From 7c8c2c30304b763d99fc3ded4a31e532082646a8 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Thu, 22 Aug 2024 10:20:27 -0700 Subject: [PATCH 01/22] [gateway] Separate enum for SP lookup errors (#6413) Currently, MGS has a `SpCommsError` enum that represents pretty much every error that may occur, including failures to look up a SP's address and communication failures while talking to that SP. This means that almost all of the `ManagementSwitch` and `SingleSp` functions return all of the possible errors of any of those operations. Perhaps more importantly, this means that the *authors* of code that calls these functions cannot easily determine which errors will actually be returned by the functions they're calling. For example, while working on #6354, I had incorrectly believed that the `ManagementSwitch::sp` function, which returns a `SingleSp` for a `SpIdentifier`, could fail if there were transient network issues, and that my code would need to handle this. In fact, it turns out that this function only returns an error if discovery hasn't completed yet, or if the `SpIdentifier` is invalid, both of which are fatal errors (and should all never be returned at the point where my code calls that function). Therefore, this branch refactors the error types a bit to separate a `SpLookupError` from `SpCommsError`, and change the `ManagementSwitch` functions that only return errors if discovery hasn't completed successfully or an `SpIdentifier` is invalid to return a `SpLookupError` instead. The `SpCommsError` has a `Discovery` variant which is produced from a `SpLookupError`, so this is actually a fairly minimal change overall --- functions returning a `SpCommsError` via the `?` operator can continue doing so identically to how they do today. I'd like to merge this before #6354, so that I can use it to clean up some error handling code in that branch. --- gateway/src/error.rs | 56 ++++++++++++++++++++------------ gateway/src/management_switch.rs | 19 ++++++----- 2 files changed, 46 insertions(+), 29 deletions(-) diff --git a/gateway/src/error.rs b/gateway/src/error.rs index 5933daa340..ee148e0c98 100644 --- a/gateway/src/error.rs +++ b/gateway/src/error.rs @@ -26,12 +26,8 @@ pub enum StartupError { #[derive(Debug, Error, SlogInlineError)] pub enum SpCommsError { - #[error("discovery process not yet complete")] - DiscoveryNotYetComplete, - #[error("location discovery failed: {reason}")] - DiscoveryFailed { reason: String }, - #[error("nonexistent SP {0:?}")] - SpDoesNotExist(SpIdentifier), + #[error(transparent)] + Discovery(#[from] SpLookupError), #[error("unknown socket address for SP {0:?}")] SpAddressUnknown(SpIdentifier), #[error( @@ -52,13 +48,22 @@ pub enum SpCommsError { }, } +/// Errors returned by attempts to look up a SP in the management switch's +/// discovery map. +#[derive(Debug, Error, SlogInlineError)] +pub enum SpLookupError { + #[error("discovery process not yet complete")] + DiscoveryNotYetComplete, + #[error("location discovery failed: {reason}")] + DiscoveryFailed { reason: String }, + #[error("nonexistent SP {0:?}")] + SpDoesNotExist(SpIdentifier), +} + impl From for HttpError { fn from(error: SpCommsError) -> Self { match error { - SpCommsError::SpDoesNotExist(_) => HttpError::for_bad_request( - Some("InvalidSp".to_string()), - InlineErrorChain::new(&error).to_string(), - ), + SpCommsError::Discovery(err) => HttpError::from(err), SpCommsError::SpCommunicationFailed { err: CommunicationError::SpError( @@ -124,21 +129,11 @@ impl From for HttpError { "UpdateInProgress", InlineErrorChain::new(&error).to_string(), ), - SpCommsError::DiscoveryNotYetComplete => http_err_with_message( - http::StatusCode::SERVICE_UNAVAILABLE, - "DiscoveryNotYetComplete", - InlineErrorChain::new(&error).to_string(), - ), SpCommsError::SpAddressUnknown(_) => http_err_with_message( http::StatusCode::SERVICE_UNAVAILABLE, "SpAddressUnknown", InlineErrorChain::new(&error).to_string(), ), - SpCommsError::DiscoveryFailed { .. } => http_err_with_message( - http::StatusCode::SERVICE_UNAVAILABLE, - "DiscoveryFailed ", - InlineErrorChain::new(&error).to_string(), - ), SpCommsError::Timeout { .. } => http_err_with_message( http::StatusCode::SERVICE_UNAVAILABLE, "Timeout ", @@ -160,6 +155,27 @@ impl From for HttpError { } } +impl From for HttpError { + fn from(error: SpLookupError) -> Self { + match error { + SpLookupError::SpDoesNotExist(_) => HttpError::for_bad_request( + Some("InvalidSp".to_string()), + InlineErrorChain::new(&error).to_string(), + ), + SpLookupError::DiscoveryNotYetComplete => http_err_with_message( + http::StatusCode::SERVICE_UNAVAILABLE, + "DiscoveryNotYetComplete", + InlineErrorChain::new(&error).to_string(), + ), + SpLookupError::DiscoveryFailed { .. } => http_err_with_message( + http::StatusCode::SERVICE_UNAVAILABLE, + "DiscoveryFailed ", + InlineErrorChain::new(&error).to_string(), + ), + } + } +} + // Helper function to return an `HttpError` with the same internal and external // message. MGS is an "internal" service - even when we return a 500-level // status code, we want to give our caller some information about what is going diff --git a/gateway/src/management_switch.rs b/gateway/src/management_switch.rs index a93c44d62c..23dfbe01a8 100644 --- a/gateway/src/management_switch.rs +++ b/gateway/src/management_switch.rs @@ -20,6 +20,7 @@ pub use self::location_map::SwitchPortConfig; pub use self::location_map::SwitchPortDescription; use self::location_map::ValidatedLocationConfig; use crate::error::SpCommsError; +use crate::error::SpLookupError; use crate::error::StartupError; use gateway_messages::IgnitionState; use gateway_sp_comms::default_discovery_addr; @@ -316,18 +317,18 @@ impl ManagementSwitch { self.location_map.get().is_some() } - fn location_map(&self) -> Result<&LocationMap, SpCommsError> { + fn location_map(&self) -> Result<&LocationMap, SpLookupError> { let discovery_result = self .location_map .get() - .ok_or(SpCommsError::DiscoveryNotYetComplete)?; + .ok_or(SpLookupError::DiscoveryNotYetComplete)?; discovery_result .as_ref() - .map_err(|s| SpCommsError::DiscoveryFailed { reason: s.clone() }) + .map_err(|s| SpLookupError::DiscoveryFailed { reason: s.clone() }) } /// Get the identifier of our local switch. - pub fn local_switch(&self) -> Result { + pub fn local_switch(&self) -> Result { let location_map = self.location_map()?; Ok(location_map.port_to_id(self.local_ignition_controller_port)) } @@ -347,11 +348,11 @@ impl ManagementSwitch { /// This method will fail if discovery is not yet complete (i.e., we don't /// know the logical identifiers of any SP yet!) or if `id` specifies an SP /// that doesn't exist in our discovered location map. - fn get_port(&self, id: SpIdentifier) -> Result { + fn get_port(&self, id: SpIdentifier) -> Result { let location_map = self.location_map()?; let port = location_map .id_to_port(id) - .ok_or(SpCommsError::SpDoesNotExist(id))?; + .ok_or(SpLookupError::SpDoesNotExist(id))?; Ok(port) } @@ -362,7 +363,7 @@ impl ManagementSwitch { /// This method will fail if discovery is not yet complete (i.e., we don't /// know the logical identifiers of any SP yet!) or if `id` specifies an SP /// that doesn't exist in our discovered location map. - pub fn sp(&self, id: SpIdentifier) -> Result<&SingleSp, SpCommsError> { + pub fn sp(&self, id: SpIdentifier) -> Result<&SingleSp, SpLookupError> { let port = self.get_port(id)?; Ok(self.port_to_sp(port)) } @@ -377,7 +378,7 @@ impl ManagementSwitch { pub fn ignition_target( &self, id: SpIdentifier, - ) -> Result { + ) -> Result { let port = self.get_port(id)?; Ok(self.port_to_ignition_target[port.0]) } @@ -389,7 +390,7 @@ impl ManagementSwitch { /// therefore can't map our switch ports to SP identities). pub(crate) fn all_sps( &self, - ) -> Result, SpCommsError> + ) -> Result, SpLookupError> { let location_map = self.location_map()?; Ok(location_map From 7b9fee2f65f6e76fa37de4125c720631de5fc236 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Thu, 22 Aug 2024 20:26:41 -0500 Subject: [PATCH 02/22] Bump web console (custom routers on subnets) (#6418) https://github.com/oxidecomputer/console/compare/8dcddcef...9ff6ac6c * [9ff6ac6c](https://github.com/oxidecomputer/console/commit/9ff6ac6c) oxidecomputer/console#2394 * [8028f9a5](https://github.com/oxidecomputer/console/commit/8028f9a5) oxidecomputer/console#2393 * [1bb92706](https://github.com/oxidecomputer/console/commit/1bb92706) oxidecomputer/console#2339 --- tools/console_version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/console_version b/tools/console_version index ef59f6e40c..6ed1b23c0b 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="8dcddcef62b8d10dfcd3adb470439212b23b3d5e" -SHA2="30a5ecc4d7b82dfc8bbd5ea59d5d92b8414d0362425c1ce1011da8c722a8ec4c" +COMMIT="9ff6ac6cc709b9081347f2718b99a9a799a41610" +SHA2="abeddddefcf70f1cea74178b6b7463eb834215a4168f16631ccae74e9d95a8e1" From 02303a6f03b19b8476fc4bc4a65d6b4e29585c6c Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Fri, 23 Aug 2024 09:30:53 -0700 Subject: [PATCH 03/22] Check histogram bin overflow in the support type, not the power type (#6409) Fixes #6408 --- oximeter/types/src/histogram.rs | 38 +++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/oximeter/types/src/histogram.rs b/oximeter/types/src/histogram.rs index 0b85727ee0..2a4feab382 100644 --- a/oximeter/types/src/histogram.rs +++ b/oximeter/types/src/histogram.rs @@ -1029,8 +1029,13 @@ where return Err(QuantizationError::InvalidSteps); } - // The highest power must be representable in the target type. - if self.checked_pow(hi.into()).is_none() { + // The highest power must be representable in the target type. Note that + // we have to convert to that target type _before_ doing this check. + let base = >::from(*self); + let Some(highest) = base.checked_pow(hi.into()) else { + return Err(QuantizationError::Overflow); + }; + if ::from(highest).is_none() { return Err(QuantizationError::Overflow); } @@ -1039,7 +1044,6 @@ where // // Note that we unwrap in a few places below, where we're sure the // narrowing conversion cannot fail, such as to a u32. - let base = >::from(*self); let lo = >::from(lo); let hi = >::from(hi); let count = ::from(count.get()) @@ -1057,7 +1061,6 @@ where let lo = base.pow(lo as _); let hi = base.pow(hi as _); let distance = hi - lo; - dbg!(distance, count); distance.is_multiple_of(&count) }) } @@ -1767,4 +1770,31 @@ mod tests { HistogramError::EmptyBins )); } + + #[test] + fn test_log_linear_bins_does_not_overflow_wide_bin_type() { + let start: u16 = 3; + // 10u16 ** 10u16 overflows, but what we should be computing is 10u64 ** + // 10u16, which would not overflow. We need to compute whether it + // overflows in the _support_ type. + let stop = 10; + Histogram::::span_decades(start, stop).expect( + "expected not to overflow, since support type is wide enough", + ); + } + + #[test] + fn test_log_linear_bins_does_overflow_narrow_bin_type() { + // In this case, the start / stop powers _and_ their resulting bins are + // both representable as u16s and also u64s. But we're generating bins + // that are u8s, which _the powers do_ overflow. + let start: u16 = 1; + let stop: u16 = 4; + Histogram::::span_decades(start, stop).expect( + "expected not to overflow a u32, since support type is wide enough", + ); + Histogram::::span_decades(start, stop).expect_err( + "expected to overflow a u8, since support type is not wide enough", + ); + } } From f2463f4305320997c8e4d62a18162ef817722674 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Fri, 23 Aug 2024 13:20:01 -0500 Subject: [PATCH 04/22] Bump web console (instance list polling, status -> state) (#6419) https://github.com/oxidecomputer/console/compare/9ff6ac6c...77127657 * [77127657](https://github.com/oxidecomputer/console/commit/77127657) oxidecomputer/console#2395 * [342aa049](https://github.com/oxidecomputer/console/commit/342aa049) oxidecomputer/console#2391 --- tools/console_version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/console_version b/tools/console_version index 6ed1b23c0b..b2fc99daf3 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="9ff6ac6cc709b9081347f2718b99a9a799a41610" -SHA2="abeddddefcf70f1cea74178b6b7463eb834215a4168f16631ccae74e9d95a8e1" +COMMIT="771276573549dd255c6749980636aa7140e8bab8" +SHA2="4d441de0784bb0d775e0a7f4067758fd6c37fbf050ed76b744cd37d6e81af3d3" From d96ea7ca8945b8ad78a53fd083850ea39789e5f0 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 23 Aug 2024 21:00:03 +0100 Subject: [PATCH 05/22] [chore] Bump OPTE to v0.33.293 (#6400) * Move underlay NICs back into H/W Classification (oxidecomputer/opte#504) My disposition is to wait til R11 before we merge this -- I've done lengthy testing on `glasgow`, but I would like plenty of soak time on dogfood before this sees a release. --- Cargo.lock | 12 ++++++------ Cargo.toml | 4 ++-- tools/opte_version | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 837015f3bc..4edebcc911 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3810,7 +3810,7 @@ dependencies = [ [[package]] name = "illumos-sys-hdrs" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d#3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d" +source = "git+https://github.com/oxidecomputer/opte?rev=76878de67229ea113d70503c441eab47ac5dc653#76878de67229ea113d70503c441eab47ac5dc653" [[package]] name = "illumos-utils" @@ -4246,7 +4246,7 @@ dependencies = [ [[package]] name = "kstat-macro" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d#3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d" +source = "git+https://github.com/oxidecomputer/opte?rev=76878de67229ea113d70503c441eab47ac5dc653#76878de67229ea113d70503c441eab47ac5dc653" dependencies = [ "quote", "syn 2.0.74", @@ -6717,7 +6717,7 @@ dependencies = [ [[package]] name = "opte" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d#3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d" +source = "git+https://github.com/oxidecomputer/opte?rev=76878de67229ea113d70503c441eab47ac5dc653#76878de67229ea113d70503c441eab47ac5dc653" dependencies = [ "cfg-if", "dyn-clone", @@ -6734,7 +6734,7 @@ dependencies = [ [[package]] name = "opte-api" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d#3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d" +source = "git+https://github.com/oxidecomputer/opte?rev=76878de67229ea113d70503c441eab47ac5dc653#76878de67229ea113d70503c441eab47ac5dc653" dependencies = [ "illumos-sys-hdrs", "ipnetwork", @@ -6746,7 +6746,7 @@ dependencies = [ [[package]] name = "opte-ioctl" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d#3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d" +source = "git+https://github.com/oxidecomputer/opte?rev=76878de67229ea113d70503c441eab47ac5dc653#76878de67229ea113d70503c441eab47ac5dc653" dependencies = [ "libc", "libnet 0.1.0 (git+https://github.com/oxidecomputer/netadm-sys)", @@ -6820,7 +6820,7 @@ dependencies = [ [[package]] name = "oxide-vpc" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d#3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d" +source = "git+https://github.com/oxidecomputer/opte?rev=76878de67229ea113d70503c441eab47ac5dc653#76878de67229ea113d70503c441eab47ac5dc653" dependencies = [ "cfg-if", "illumos-sys-hdrs", diff --git a/Cargo.toml b/Cargo.toml index cfb097ef3c..cbb0216d5f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -450,7 +450,7 @@ omicron-test-utils = { path = "test-utils" } omicron-workspace-hack = "0.1.0" omicron-zone-package = "0.11.0" oxide-client = { path = "clients/oxide-client" } -oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d", features = [ "api", "std" ] } +oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "76878de67229ea113d70503c441eab47ac5dc653", features = [ "api", "std" ] } oxlog = { path = "dev-tools/oxlog" } oxnet = { git = "https://github.com/oxidecomputer/oxnet" } once_cell = "1.19.0" @@ -460,7 +460,7 @@ openapiv3 = "2.0.0" # must match samael's crate! openssl = "0.10" openssl-sys = "0.9" -opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d" } +opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "76878de67229ea113d70503c441eab47ac5dc653" } oso = "0.27" owo-colors = "4.0.0" oximeter = { path = "oximeter/oximeter" } diff --git a/tools/opte_version b/tools/opte_version index dfbb589f24..0e2023666f 100644 --- a/tools/opte_version +++ b/tools/opte_version @@ -1 +1 @@ -0.33.277 +0.33.293 From 9ac07441afc15cabc7dfeab59915b50da69f44be Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Fri, 23 Aug 2024 14:53:50 -0700 Subject: [PATCH 06/22] Expunge old switch-table timeseries schema (#6423) Fixes #6422 --- .../db/schema/replicated/11/timeseries-to-delete.txt | 9 +++++++++ .../db/schema/single-node/11/timeseries-to-delete.txt | 9 +++++++++ oximeter/db/src/model.rs | 2 +- 3 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 oximeter/db/schema/replicated/11/timeseries-to-delete.txt create mode 100644 oximeter/db/schema/single-node/11/timeseries-to-delete.txt diff --git a/oximeter/db/schema/replicated/11/timeseries-to-delete.txt b/oximeter/db/schema/replicated/11/timeseries-to-delete.txt new file mode 100644 index 0000000000..4f0301a6b5 --- /dev/null +++ b/oximeter/db/schema/replicated/11/timeseries-to-delete.txt @@ -0,0 +1,9 @@ +switch_table:capacity +switch_table:collisions +switch_table:delete_misses +switch_table:deletes +switch_table:exhaustion +switch_table:inserts +switch_table:occupancy +switch_table:update_misses +switch_table:updates diff --git a/oximeter/db/schema/single-node/11/timeseries-to-delete.txt b/oximeter/db/schema/single-node/11/timeseries-to-delete.txt new file mode 100644 index 0000000000..4f0301a6b5 --- /dev/null +++ b/oximeter/db/schema/single-node/11/timeseries-to-delete.txt @@ -0,0 +1,9 @@ +switch_table:capacity +switch_table:collisions +switch_table:delete_misses +switch_table:deletes +switch_table:exhaustion +switch_table:inserts +switch_table:occupancy +switch_table:update_misses +switch_table:updates diff --git a/oximeter/db/src/model.rs b/oximeter/db/src/model.rs index 7608f81e45..a3e9d109ff 100644 --- a/oximeter/db/src/model.rs +++ b/oximeter/db/src/model.rs @@ -45,7 +45,7 @@ use uuid::Uuid; /// - [`crate::Client::initialize_db_with_version`] /// - [`crate::Client::ensure_schema`] /// - The `clickhouse-schema-updater` binary in this crate -pub const OXIMETER_VERSION: u64 = 10; +pub const OXIMETER_VERSION: u64 = 11; // Wrapper type to represent a boolean in the database. // From 41d36d75ac0e442212366f3d0567e33ecb47c067 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Fri, 23 Aug 2024 15:40:07 -0700 Subject: [PATCH 07/22] [oximeter] spell "Celsius" correctly (#6426) Thanks to @elaine-oxide for catching this --- I had misspelt "Celsius" as "Celcius" and it had made it all the way into the CLI thanks to its dependency on the Nexus API. This commit corrects the misspelling. --- openapi/nexus.json | 2 +- oximeter/schema/src/codegen.rs | 4 ++-- oximeter/types/src/schema.rs | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/openapi/nexus.json b/openapi/nexus.json index 285dcd82bb..2a8c227c64 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -19934,7 +19934,7 @@ "nanoseconds", "volts", "amps", - "degrees_celcius" + "degrees_celsius" ] }, { diff --git a/oximeter/schema/src/codegen.rs b/oximeter/schema/src/codegen.rs index 0429cf0534..c46c25c97d 100644 --- a/oximeter/schema/src/codegen.rs +++ b/oximeter/schema/src/codegen.rs @@ -512,8 +512,8 @@ fn quote_units(units: Units) -> TokenStream { } Units::Amps => quote! { ::oximeter::schema::Units::Amps }, Units::Volts => quote! { ::oximeter::schema::Units::Volts }, - Units::DegreesCelcius => { - quote! { ::oximeter::schema::Units::DegreesCelcius } + Units::DegreesCelsius => { + quote! { ::oximeter::schema::Units::DegreesCelsius } } Units::Rpm => quote! { ::oximeter::schema::Units::Rpm }, } diff --git a/oximeter/types/src/schema.rs b/oximeter/types/src/schema.rs index 80aaa6f101..e06e6e2b57 100644 --- a/oximeter/types/src/schema.rs +++ b/oximeter/types/src/schema.rs @@ -189,7 +189,7 @@ pub enum Units { Nanoseconds, Volts, Amps, - DegreesCelcius, + DegreesCelsius, /// Rotations per minute. Rpm, } From 876ae85fc86e21ce81f1c49783fb86907d99fe8e Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Fri, 23 Aug 2024 19:14:54 -0400 Subject: [PATCH 08/22] Use `BlueprintZoneConfig` in RSS service plan (#6410) In anticipation of adding more `BlueprintZoneConfig` variants with more auxiliary information, we stop converting from `OmicronZoneConfig` to `BlueprintZoneConfig` which is not going to be feasible for much longer. Instead we change the one production code place we do this, RSS, to directly construct `BlueprintZoneConfig` structs rather than do the conversion. This has some ripple effects, and results in a new persistent v4 sled service plan. There is one test that still does this conversion, but the function that does it is now moved into that test module and commented heavily. We hope to remove it shortly. --- Cargo.lock | 1 + nexus/reconfigurator/execution/Cargo.toml | 1 + nexus/reconfigurator/execution/src/dns.rs | 214 ++++- nexus/types/src/deployment.rs | 172 ---- schema/rss-service-plan-v4.json | 999 ++++++++++++++++++++++ sled-agent/src/rack_setup/mod.rs | 5 + sled-agent/src/rack_setup/plan/service.rs | 362 +++++--- sled-agent/src/rack_setup/service.rs | 93 +- sled-agent/src/sim/server.rs | 105 ++- 9 files changed, 1548 insertions(+), 404 deletions(-) create mode 100644 schema/rss-service-plan-v4.json diff --git a/Cargo.lock b/Cargo.lock index 4edebcc911..2630aa2a25 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5245,6 +5245,7 @@ dependencies = [ "httptest", "internal-dns", "ipnet", + "newtype-uuid", "nexus-config", "nexus-db-model", "nexus-db-queries", diff --git a/nexus/reconfigurator/execution/Cargo.toml b/nexus/reconfigurator/execution/Cargo.toml index a531b66df4..1c62e553a8 100644 --- a/nexus/reconfigurator/execution/Cargo.toml +++ b/nexus/reconfigurator/execution/Cargo.toml @@ -16,6 +16,7 @@ dns-service-client.workspace = true chrono.workspace = true futures.workspace = true internal-dns.workspace = true +newtype-uuid.workspace = true nexus-config.workspace = true nexus-db-model.workspace = true nexus-db-queries.workspace = true diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index 9ca14f8e24..1c878a9ada 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -467,6 +467,7 @@ mod test { use internal_dns::resolver::Resolver; use internal_dns::ServiceName; use internal_dns::DNS_ZONE; + use newtype_uuid::GenericUuid; use nexus_db_model::DnsGroup; use nexus_db_model::Silo; use nexus_db_queries::authn; @@ -478,6 +479,8 @@ mod test { use nexus_reconfigurator_planning::blueprint_builder::EnsureMultiple; use nexus_reconfigurator_planning::example::example; use nexus_reconfigurator_preparation::PlanningInputFromDb; + use nexus_sled_agent_shared::inventory::OmicronZoneConfig; + use nexus_sled_agent_shared::inventory::OmicronZoneType; use nexus_sled_agent_shared::inventory::ZoneKind; use nexus_test_utils::resource_helpers::create_silo; use nexus_test_utils::resource_helpers::DiskTestBuilder; @@ -490,6 +493,9 @@ mod test { use nexus_types::deployment::CockroachDbClusterVersion; use nexus_types::deployment::CockroachDbPreserveDowngrade; use nexus_types::deployment::CockroachDbSettings; + pub use nexus_types::deployment::OmicronZoneExternalFloatingAddr; + pub use nexus_types::deployment::OmicronZoneExternalFloatingIp; + pub use nexus_types::deployment::OmicronZoneExternalSnatIp; use nexus_types::deployment::SledFilter; use nexus_types::external_api::params; use nexus_types::external_api::shared; @@ -539,6 +545,212 @@ mod test { } } + /// ********************************************************************** + /// DEPRECATION WARNING: + /// + /// Remove when `deprecated_omicron_zone_config_to_blueprint_zone_config` + /// is deleted. + /// ********************************************************************** + /// + /// Errors from converting an [`OmicronZoneType`] into a [`BlueprintZoneType`]. + #[derive(Debug, Clone)] + pub enum InvalidOmicronZoneType { + #[allow(unused)] + ExternalIpIdRequired { kind: ZoneKind }, + } + + /// ********************************************************************** + /// DEPRECATION WARNING: Do not call this function in new code !!! + /// ********************************************************************** + /// + /// Convert an [`OmicronZoneConfig`] to a [`BlueprintZoneConfig`]. + /// + /// A `BlueprintZoneConfig` is a superset of `OmicronZoneConfig` and + /// contains auxiliary information not present in an `OmicronZoneConfig`. + /// Therefore, the only valid direction for a real system to take is a + /// lossy conversion from `BlueprintZoneConfig` to `OmicronZoneConfig`. + /// This function, however, does the opposite. We therefore have to inject + /// fake information to fill in the unknown fields in the generated + /// `OmicronZoneConfig`. + /// + /// This is bad, and we should generally feel bad for doing it :). At + /// the time this was done we were backporting the blueprint system into + /// RSS while trying not to change too much code. This was a judicious + /// shortcut used right before a release for stability reasons. As the + /// number of zones managed by the reconfigurator has grown, the use + /// of this function has become more egregious, and so it was removed + /// from the production code path and into this test module. This move + /// itself is a judicious shortcut. We have a test in this module, + /// `test_blueprint_internal_dns_basic`, that is the last caller of this + /// function, and so we have moved this function into this module. + /// + /// Ideally, we would get rid of this function altogether and use another + /// method for generating `BlueprintZoneConfig` structures. Unfortunately, + /// there are still a few remaining zones that need to be implemented in the + /// `BlueprintBuilder`, and some of them require custom code. Until that is + /// done, we don't have a good way of generating a test representation of + /// the real system that would properly serve this test. We could generate + /// a `BlueprintZoneConfig` by hand for each zone type in this test, on + /// top of the more modern `SystemDescription` setup, but that isn't much + /// different than what we do in this test. We'd also eventually remove it + /// for better test setup when our `BlueprintBuilder` is capable of properly + /// constructing all zone types. Instead, we do the simple thing, and reuse + /// what we alreaady have. + /// + /// # Errors + /// + /// If `config.zone_type` is a zone that has an external IP address (Nexus, + /// boundary NTP, external DNS), `external_ip_id` must be `Some(_)` or this + /// method will return an error. + pub fn deprecated_omicron_zone_config_to_blueprint_zone_config( + config: OmicronZoneConfig, + disposition: BlueprintZoneDisposition, + external_ip_id: Option, + ) -> Result { + let kind = config.zone_type.kind(); + let zone_type = match config.zone_type { + OmicronZoneType::BoundaryNtp { + address, + dns_servers, + domain, + nic, + ntp_servers, + snat_cfg, + } => { + let external_ip_id = external_ip_id.ok_or( + InvalidOmicronZoneType::ExternalIpIdRequired { kind }, + )?; + BlueprintZoneType::BoundaryNtp( + blueprint_zone_type::BoundaryNtp { + address, + ntp_servers, + dns_servers, + domain, + nic, + external_ip: OmicronZoneExternalSnatIp { + id: external_ip_id, + snat_cfg, + }, + }, + ) + } + OmicronZoneType::Clickhouse { address, dataset } => { + BlueprintZoneType::Clickhouse(blueprint_zone_type::Clickhouse { + address, + dataset, + }) + } + OmicronZoneType::ClickhouseKeeper { address, dataset } => { + BlueprintZoneType::ClickhouseKeeper( + blueprint_zone_type::ClickhouseKeeper { address, dataset }, + ) + } + OmicronZoneType::ClickhouseServer { address, dataset } => { + BlueprintZoneType::ClickhouseServer( + blueprint_zone_type::ClickhouseServer { address, dataset }, + ) + } + OmicronZoneType::CockroachDb { address, dataset } => { + BlueprintZoneType::CockroachDb( + blueprint_zone_type::CockroachDb { address, dataset }, + ) + } + OmicronZoneType::Crucible { address, dataset } => { + BlueprintZoneType::Crucible(blueprint_zone_type::Crucible { + address, + dataset, + }) + } + OmicronZoneType::CruciblePantry { address } => { + BlueprintZoneType::CruciblePantry( + blueprint_zone_type::CruciblePantry { address }, + ) + } + OmicronZoneType::ExternalDns { + dataset, + dns_address, + http_address, + nic, + } => { + let external_ip_id = external_ip_id.ok_or( + InvalidOmicronZoneType::ExternalIpIdRequired { kind }, + )?; + BlueprintZoneType::ExternalDns( + blueprint_zone_type::ExternalDns { + dataset, + http_address, + dns_address: OmicronZoneExternalFloatingAddr { + id: external_ip_id, + addr: dns_address, + }, + nic, + }, + ) + } + OmicronZoneType::InternalDns { + dataset, + dns_address, + gz_address, + gz_address_index, + http_address, + } => BlueprintZoneType::InternalDns( + blueprint_zone_type::InternalDns { + dataset, + http_address, + dns_address, + gz_address, + gz_address_index, + }, + ), + OmicronZoneType::InternalNtp { + address, + dns_servers, + domain, + ntp_servers, + } => BlueprintZoneType::InternalNtp( + blueprint_zone_type::InternalNtp { + address, + ntp_servers, + dns_servers, + domain, + }, + ), + OmicronZoneType::Nexus { + external_dns_servers, + external_ip, + external_tls, + internal_address, + nic, + } => { + let external_ip_id = external_ip_id.ok_or( + InvalidOmicronZoneType::ExternalIpIdRequired { kind }, + )?; + BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + internal_address, + external_ip: OmicronZoneExternalFloatingIp { + id: external_ip_id, + ip: external_ip, + }, + nic, + external_tls, + external_dns_servers, + }) + } + OmicronZoneType::Oximeter { address } => { + BlueprintZoneType::Oximeter(blueprint_zone_type::Oximeter { + address, + }) + } + }; + Ok(BlueprintZoneConfig { + disposition, + id: OmicronZoneUuid::from_untyped_uuid(config.id), + underlay_address: config.underlay_address, + filesystem_pool: config.filesystem_pool, + zone_type, + }) + } + /// test blueprint_internal_dns_config(): trivial case of an empty blueprint #[test] fn test_blueprint_internal_dns_empty() { @@ -589,7 +801,7 @@ mod test { .zones .into_iter() .map(|config| -> BlueprintZoneConfig { - BlueprintZoneConfig::from_omicron_zone_config( + deprecated_omicron_zone_config_to_blueprint_zone_config( config, BlueprintZoneDisposition::InService, // We don't get external IP IDs in inventory diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index cc48f2646a..96de893fa3 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -27,20 +27,17 @@ use omicron_common::api::external::Generation; use omicron_common::disk::DiskIdentity; use omicron_common::disk::OmicronPhysicalDisksConfig; use omicron_uuid_kinds::CollectionUuid; -use omicron_uuid_kinds::ExternalIpUuid; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledUuid; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; -use slog_error_chain::SlogInlineError; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::fmt; use std::net::Ipv6Addr; use strum::EnumIter; use strum::IntoEnumIterator; -use thiserror::Error; use uuid::Uuid; mod blueprint_diff; @@ -595,13 +592,6 @@ fn zone_sort_key(z: &T) -> impl Ord { (z.kind(), z.id()) } -/// Errors from converting an [`OmicronZoneType`] into a [`BlueprintZoneType`]. -#[derive(Debug, Clone, Error, SlogInlineError)] -pub enum InvalidOmicronZoneType { - #[error("Omicron zone {} requires an external IP ID", kind.report_str())] - ExternalIpIdRequired { kind: ZoneKind }, -} - /// Describes one Omicron-managed zone in a blueprint. /// /// Part of [`BlueprintZonesConfig`]. @@ -616,168 +606,6 @@ pub struct BlueprintZoneConfig { pub zone_type: BlueprintZoneType, } -impl BlueprintZoneConfig { - /// Convert from an [`OmicronZoneConfig`]. - /// - /// This method is annoying to call correctly and will become more so over - /// time. Ideally we'd remove all callers and then remove this method, but - /// for now we keep it. - /// - /// # Errors - /// - /// If `config.zone_type` is a zone that has an external IP address (Nexus, - /// boundary NTP, external DNS), `external_ip_id` must be `Some(_)` or this - /// method will return an error. - pub fn from_omicron_zone_config( - config: OmicronZoneConfig, - disposition: BlueprintZoneDisposition, - external_ip_id: Option, - ) -> Result { - let kind = config.zone_type.kind(); - let zone_type = match config.zone_type { - OmicronZoneType::BoundaryNtp { - address, - dns_servers, - domain, - nic, - ntp_servers, - snat_cfg, - } => { - let external_ip_id = external_ip_id.ok_or( - InvalidOmicronZoneType::ExternalIpIdRequired { kind }, - )?; - BlueprintZoneType::BoundaryNtp( - blueprint_zone_type::BoundaryNtp { - address, - ntp_servers, - dns_servers, - domain, - nic, - external_ip: OmicronZoneExternalSnatIp { - id: external_ip_id, - snat_cfg, - }, - }, - ) - } - OmicronZoneType::Clickhouse { address, dataset } => { - BlueprintZoneType::Clickhouse(blueprint_zone_type::Clickhouse { - address, - dataset, - }) - } - OmicronZoneType::ClickhouseKeeper { address, dataset } => { - BlueprintZoneType::ClickhouseKeeper( - blueprint_zone_type::ClickhouseKeeper { address, dataset }, - ) - } - OmicronZoneType::ClickhouseServer { address, dataset } => { - BlueprintZoneType::ClickhouseServer( - blueprint_zone_type::ClickhouseServer { address, dataset }, - ) - } - OmicronZoneType::CockroachDb { address, dataset } => { - BlueprintZoneType::CockroachDb( - blueprint_zone_type::CockroachDb { address, dataset }, - ) - } - OmicronZoneType::Crucible { address, dataset } => { - BlueprintZoneType::Crucible(blueprint_zone_type::Crucible { - address, - dataset, - }) - } - OmicronZoneType::CruciblePantry { address } => { - BlueprintZoneType::CruciblePantry( - blueprint_zone_type::CruciblePantry { address }, - ) - } - OmicronZoneType::ExternalDns { - dataset, - dns_address, - http_address, - nic, - } => { - let external_ip_id = external_ip_id.ok_or( - InvalidOmicronZoneType::ExternalIpIdRequired { kind }, - )?; - BlueprintZoneType::ExternalDns( - blueprint_zone_type::ExternalDns { - dataset, - http_address, - dns_address: OmicronZoneExternalFloatingAddr { - id: external_ip_id, - addr: dns_address, - }, - nic, - }, - ) - } - OmicronZoneType::InternalDns { - dataset, - dns_address, - gz_address, - gz_address_index, - http_address, - } => BlueprintZoneType::InternalDns( - blueprint_zone_type::InternalDns { - dataset, - http_address, - dns_address, - gz_address, - gz_address_index, - }, - ), - OmicronZoneType::InternalNtp { - address, - dns_servers, - domain, - ntp_servers, - } => BlueprintZoneType::InternalNtp( - blueprint_zone_type::InternalNtp { - address, - ntp_servers, - dns_servers, - domain, - }, - ), - OmicronZoneType::Nexus { - external_dns_servers, - external_ip, - external_tls, - internal_address, - nic, - } => { - let external_ip_id = external_ip_id.ok_or( - InvalidOmicronZoneType::ExternalIpIdRequired { kind }, - )?; - BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { - internal_address, - external_ip: OmicronZoneExternalFloatingIp { - id: external_ip_id, - ip: external_ip, - }, - nic, - external_tls, - external_dns_servers, - }) - } - OmicronZoneType::Oximeter { address } => { - BlueprintZoneType::Oximeter(blueprint_zone_type::Oximeter { - address, - }) - } - }; - Ok(Self { - disposition, - id: OmicronZoneUuid::from_untyped_uuid(config.id), - underlay_address: config.underlay_address, - filesystem_pool: config.filesystem_pool, - zone_type, - }) - } -} - impl From for OmicronZoneConfig { fn from(z: BlueprintZoneConfig) -> Self { Self { diff --git a/schema/rss-service-plan-v4.json b/schema/rss-service-plan-v4.json new file mode 100644 index 0000000000..badfaf4589 --- /dev/null +++ b/schema/rss-service-plan-v4.json @@ -0,0 +1,999 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Plan", + "type": "object", + "required": [ + "dns_config", + "services" + ], + "properties": { + "dns_config": { + "$ref": "#/definitions/DnsConfigParams" + }, + "services": { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/SledConfig" + } + } + }, + "definitions": { + "BlueprintZoneConfig": { + "description": "Describes one Omicron-managed zone in a blueprint.\n\nPart of [`BlueprintZonesConfig`].", + "type": "object", + "required": [ + "disposition", + "id", + "underlay_address", + "zone_type" + ], + "properties": { + "disposition": { + "description": "The disposition (desired state) of this zone recorded in the blueprint.", + "allOf": [ + { + "$ref": "#/definitions/BlueprintZoneDisposition" + } + ] + }, + "filesystem_pool": { + "anyOf": [ + { + "$ref": "#/definitions/ZpoolName" + }, + { + "type": "null" + } + ] + }, + "id": { + "$ref": "#/definitions/TypedUuidForOmicronZoneKind" + }, + "underlay_address": { + "type": "string", + "format": "ipv6" + }, + "zone_type": { + "$ref": "#/definitions/BlueprintZoneType" + } + } + }, + "BlueprintZoneDisposition": { + "description": "The desired state of an Omicron-managed zone in a blueprint.\n\nPart of [`BlueprintZoneConfig`].", + "oneOf": [ + { + "description": "The zone is in-service.", + "type": "string", + "enum": [ + "in_service" + ] + }, + { + "description": "The zone is not in service.", + "type": "string", + "enum": [ + "quiesced" + ] + }, + { + "description": "The zone is permanently gone.", + "type": "string", + "enum": [ + "expunged" + ] + } + ] + }, + "BlueprintZoneType": { + "oneOf": [ + { + "type": "object", + "required": [ + "address", + "dns_servers", + "external_ip", + "nic", + "ntp_servers", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dns_servers": { + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "domain": { + "type": [ + "string", + "null" + ] + }, + "external_ip": { + "$ref": "#/definitions/OmicronZoneExternalSnatIp" + }, + "nic": { + "description": "The service vNIC providing outbound connectivity using OPTE.", + "allOf": [ + { + "$ref": "#/definitions/NetworkInterface" + } + ] + }, + "ntp_servers": { + "type": "array", + "items": { + "type": "string" + } + }, + "type": { + "type": "string", + "enum": [ + "boundary_ntp" + ] + } + } + }, + { + "description": "Used in single-node clickhouse setups", + "type": "object", + "required": [ + "address", + "dataset", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "clickhouse" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "dataset", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "clickhouse_keeper" + ] + } + } + }, + { + "description": "Used in replicated clickhouse setups", + "type": "object", + "required": [ + "address", + "dataset", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "clickhouse_server" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "dataset", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "cockroach_db" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "dataset", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "crucible" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "crucible_pantry" + ] + } + } + }, + { + "type": "object", + "required": [ + "dataset", + "dns_address", + "http_address", + "nic", + "type" + ], + "properties": { + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "dns_address": { + "description": "The address at which the external DNS server is reachable.", + "allOf": [ + { + "$ref": "#/definitions/OmicronZoneExternalFloatingAddr" + } + ] + }, + "http_address": { + "description": "The address at which the external DNS server API is reachable.", + "type": "string" + }, + "nic": { + "description": "The service vNIC providing external connectivity using OPTE.", + "allOf": [ + { + "$ref": "#/definitions/NetworkInterface" + } + ] + }, + "type": { + "type": "string", + "enum": [ + "external_dns" + ] + } + } + }, + { + "type": "object", + "required": [ + "dataset", + "dns_address", + "gz_address", + "gz_address_index", + "http_address", + "type" + ], + "properties": { + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "dns_address": { + "type": "string" + }, + "gz_address": { + "description": "The addresses in the global zone which should be created\n\nFor the DNS service, which exists outside the sleds's typical subnet - adding an address in the GZ is necessary to allow inter-zone traffic routing.", + "type": "string", + "format": "ipv6" + }, + "gz_address_index": { + "description": "The address is also identified with an auxiliary bit of information to ensure that the created global zone address can have a unique name.", + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "http_address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "internal_dns" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "dns_servers", + "ntp_servers", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dns_servers": { + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "domain": { + "type": [ + "string", + "null" + ] + }, + "ntp_servers": { + "type": "array", + "items": { + "type": "string" + } + }, + "type": { + "type": "string", + "enum": [ + "internal_ntp" + ] + } + } + }, + { + "type": "object", + "required": [ + "external_dns_servers", + "external_ip", + "external_tls", + "internal_address", + "nic", + "type" + ], + "properties": { + "external_dns_servers": { + "description": "External DNS servers Nexus can use to resolve external hosts.", + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "external_ip": { + "description": "The address at which the external nexus server is reachable.", + "allOf": [ + { + "$ref": "#/definitions/OmicronZoneExternalFloatingIp" + } + ] + }, + "external_tls": { + "description": "Whether Nexus's external endpoint should use TLS", + "type": "boolean" + }, + "internal_address": { + "description": "The address at which the internal nexus server is reachable.", + "type": "string" + }, + "nic": { + "description": "The service vNIC providing external connectivity using OPTE.", + "allOf": [ + { + "$ref": "#/definitions/NetworkInterface" + } + ] + }, + "type": { + "type": "string", + "enum": [ + "nexus" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "oximeter" + ] + } + } + } + ] + }, + "DiskIdentity": { + "description": "Uniquely identifies a disk.", + "type": "object", + "required": [ + "model", + "serial", + "vendor" + ], + "properties": { + "model": { + "type": "string" + }, + "serial": { + "type": "string" + }, + "vendor": { + "type": "string" + } + } + }, + "DnsConfigParams": { + "description": "DnsConfigParams\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"generation\", \"time_created\", \"zones\" ], \"properties\": { \"generation\": { \"type\": \"integer\", \"format\": \"uint64\", \"minimum\": 0.0 }, \"time_created\": { \"type\": \"string\", \"format\": \"date-time\" }, \"zones\": { \"type\": \"array\", \"items\": { \"$ref\": \"#/components/schemas/DnsConfigZone\" } } } } ```
", + "type": "object", + "required": [ + "generation", + "time_created", + "zones" + ], + "properties": { + "generation": { + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "time_created": { + "type": "string", + "format": "date-time" + }, + "zones": { + "type": "array", + "items": { + "$ref": "#/definitions/DnsConfigZone" + } + } + } + }, + "DnsConfigZone": { + "description": "DnsConfigZone\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"records\", \"zone_name\" ], \"properties\": { \"records\": { \"type\": \"object\", \"additionalProperties\": { \"type\": \"array\", \"items\": { \"$ref\": \"#/components/schemas/DnsRecord\" } } }, \"zone_name\": { \"type\": \"string\" } } } ```
", + "type": "object", + "required": [ + "records", + "zone_name" + ], + "properties": { + "records": { + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "$ref": "#/definitions/DnsRecord" + } + } + }, + "zone_name": { + "type": "string" + } + } + }, + "DnsRecord": { + "description": "DnsRecord\n\n
JSON schema\n\n```json { \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"data\", \"type\" ], \"properties\": { \"data\": { \"type\": \"string\", \"format\": \"ipv4\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"A\" ] } } }, { \"type\": \"object\", \"required\": [ \"data\", \"type\" ], \"properties\": { \"data\": { \"type\": \"string\", \"format\": \"ipv6\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"AAAA\" ] } } }, { \"type\": \"object\", \"required\": [ \"data\", \"type\" ], \"properties\": { \"data\": { \"$ref\": \"#/components/schemas/Srv\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"SRV\" ] } } } ] } ```
", + "oneOf": [ + { + "type": "object", + "required": [ + "data", + "type" + ], + "properties": { + "data": { + "type": "string", + "format": "ipv4" + }, + "type": { + "type": "string", + "enum": [ + "A" + ] + } + } + }, + { + "type": "object", + "required": [ + "data", + "type" + ], + "properties": { + "data": { + "type": "string", + "format": "ipv6" + }, + "type": { + "type": "string", + "enum": [ + "AAAA" + ] + } + } + }, + { + "type": "object", + "required": [ + "data", + "type" + ], + "properties": { + "data": { + "$ref": "#/definitions/Srv" + }, + "type": { + "type": "string", + "enum": [ + "SRV" + ] + } + } + } + ] + }, + "Generation": { + "description": "Generation numbers stored in the database, used for optimistic concurrency control", + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "IpNet": { + "oneOf": [ + { + "title": "v4", + "allOf": [ + { + "$ref": "#/definitions/Ipv4Net" + } + ] + }, + { + "title": "v6", + "allOf": [ + { + "$ref": "#/definitions/Ipv6Net" + } + ] + } + ], + "x-rust-type": { + "crate": "oxnet", + "path": "oxnet::IpNet", + "version": "0.1.0" + } + }, + "Ipv4Net": { + "title": "An IPv4 subnet", + "description": "An IPv4 subnet, including prefix and prefix length", + "examples": [ + "192.168.1.0/24" + ], + "type": "string", + "pattern": "^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/([0-9]|1[0-9]|2[0-9]|3[0-2])$", + "x-rust-type": { + "crate": "oxnet", + "path": "oxnet::Ipv4Net", + "version": "0.1.0" + } + }, + "Ipv6Net": { + "title": "An IPv6 subnet", + "description": "An IPv6 subnet, including prefix and subnet mask", + "examples": [ + "fd12:3456::/64" + ], + "type": "string", + "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$", + "x-rust-type": { + "crate": "oxnet", + "path": "oxnet::Ipv6Net", + "version": "0.1.0" + } + }, + "MacAddr": { + "title": "A MAC address", + "description": "A Media Access Control address, in EUI-48 format", + "examples": [ + "ff:ff:ff:ff:ff:ff" + ], + "type": "string", + "maxLength": 17, + "minLength": 5, + "pattern": "^([0-9a-fA-F]{0,2}:){5}[0-9a-fA-F]{0,2}$" + }, + "Name": { + "title": "A name unique within the parent collection", + "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.", + "type": "string", + "maxLength": 63, + "minLength": 1, + "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$" + }, + "NetworkInterface": { + "description": "Information required to construct a virtual network interface", + "type": "object", + "required": [ + "id", + "ip", + "kind", + "mac", + "name", + "primary", + "slot", + "subnet", + "vni" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "ip": { + "type": "string", + "format": "ip" + }, + "kind": { + "$ref": "#/definitions/NetworkInterfaceKind" + }, + "mac": { + "$ref": "#/definitions/MacAddr" + }, + "name": { + "$ref": "#/definitions/Name" + }, + "primary": { + "type": "boolean" + }, + "slot": { + "type": "integer", + "format": "uint8", + "minimum": 0.0 + }, + "subnet": { + "$ref": "#/definitions/IpNet" + }, + "transit_ips": { + "default": [], + "type": "array", + "items": { + "$ref": "#/definitions/IpNet" + } + }, + "vni": { + "$ref": "#/definitions/Vni" + } + } + }, + "NetworkInterfaceKind": { + "description": "The type of network interface", + "oneOf": [ + { + "description": "A vNIC attached to a guest instance", + "type": "object", + "required": [ + "id", + "type" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "instance" + ] + } + } + }, + { + "description": "A vNIC associated with an internal service", + "type": "object", + "required": [ + "id", + "type" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "service" + ] + } + } + }, + { + "description": "A vNIC associated with a probe", + "type": "object", + "required": [ + "id", + "type" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "probe" + ] + } + } + } + ] + }, + "OmicronPhysicalDiskConfig": { + "type": "object", + "required": [ + "id", + "identity", + "pool_id" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "identity": { + "$ref": "#/definitions/DiskIdentity" + }, + "pool_id": { + "$ref": "#/definitions/TypedUuidForZpoolKind" + } + } + }, + "OmicronPhysicalDisksConfig": { + "type": "object", + "required": [ + "disks", + "generation" + ], + "properties": { + "disks": { + "type": "array", + "items": { + "$ref": "#/definitions/OmicronPhysicalDiskConfig" + } + }, + "generation": { + "description": "generation number of this configuration\n\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\n\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.", + "allOf": [ + { + "$ref": "#/definitions/Generation" + } + ] + } + } + }, + "OmicronZoneDataset": { + "description": "Describes a persistent ZFS dataset associated with an Omicron zone", + "type": "object", + "required": [ + "pool_name" + ], + "properties": { + "pool_name": { + "$ref": "#/definitions/ZpoolName" + } + } + }, + "OmicronZoneExternalFloatingAddr": { + "description": "Floating external address with port allocated to an Omicron-managed zone.", + "type": "object", + "required": [ + "addr", + "id" + ], + "properties": { + "addr": { + "type": "string" + }, + "id": { + "$ref": "#/definitions/TypedUuidForExternalIpKind" + } + } + }, + "OmicronZoneExternalFloatingIp": { + "description": "Floating external IP allocated to an Omicron-managed zone.\n\nThis is a slimmer `nexus_db_model::ExternalIp` that only stores the fields necessary for blueprint planning, and requires that the zone have a single IP.", + "type": "object", + "required": [ + "id", + "ip" + ], + "properties": { + "id": { + "$ref": "#/definitions/TypedUuidForExternalIpKind" + }, + "ip": { + "type": "string", + "format": "ip" + } + } + }, + "OmicronZoneExternalSnatIp": { + "description": "SNAT (outbound) external IP allocated to an Omicron-managed zone.\n\nThis is a slimmer `nexus_db_model::ExternalIp` that only stores the fields necessary for blueprint planning, and requires that the zone have a single IP.", + "type": "object", + "required": [ + "id", + "snat_cfg" + ], + "properties": { + "id": { + "$ref": "#/definitions/TypedUuidForExternalIpKind" + }, + "snat_cfg": { + "$ref": "#/definitions/SourceNatConfig" + } + } + }, + "SledConfig": { + "type": "object", + "required": [ + "disks", + "zones" + ], + "properties": { + "disks": { + "description": "Control plane disks configured for this sled", + "allOf": [ + { + "$ref": "#/definitions/OmicronPhysicalDisksConfig" + } + ] + }, + "zones": { + "description": "zones configured for this sled", + "type": "array", + "items": { + "$ref": "#/definitions/BlueprintZoneConfig" + } + } + } + }, + "SourceNatConfig": { + "description": "An IP address and port range used for source NAT, i.e., making outbound network connections from guests or services.", + "type": "object", + "required": [ + "first_port", + "ip", + "last_port" + ], + "properties": { + "first_port": { + "description": "The first port used for source NAT, inclusive.", + "type": "integer", + "format": "uint16", + "minimum": 0.0 + }, + "ip": { + "description": "The external address provided to the instance or service.", + "type": "string", + "format": "ip" + }, + "last_port": { + "description": "The last port used for source NAT, also inclusive.", + "type": "integer", + "format": "uint16", + "minimum": 0.0 + } + } + }, + "Srv": { + "description": "Srv\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"port\", \"prio\", \"target\", \"weight\" ], \"properties\": { \"port\": { \"type\": \"integer\", \"format\": \"uint16\", \"minimum\": 0.0 }, \"prio\": { \"type\": \"integer\", \"format\": \"uint16\", \"minimum\": 0.0 }, \"target\": { \"type\": \"string\" }, \"weight\": { \"type\": \"integer\", \"format\": \"uint16\", \"minimum\": 0.0 } } } ```
", + "type": "object", + "required": [ + "port", + "prio", + "target", + "weight" + ], + "properties": { + "port": { + "type": "integer", + "format": "uint16", + "minimum": 0.0 + }, + "prio": { + "type": "integer", + "format": "uint16", + "minimum": 0.0 + }, + "target": { + "type": "string" + }, + "weight": { + "type": "integer", + "format": "uint16", + "minimum": 0.0 + } + } + }, + "TypedUuidForExternalIpKind": { + "type": "string", + "format": "uuid" + }, + "TypedUuidForOmicronZoneKind": { + "type": "string", + "format": "uuid" + }, + "TypedUuidForZpoolKind": { + "type": "string", + "format": "uuid" + }, + "Vni": { + "description": "A Geneve Virtual Network Identifier", + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "ZpoolName": { + "title": "The name of a Zpool", + "description": "Zpool names are of the format ox{i,p}_. They are either Internal or External, and should be unique", + "type": "string", + "pattern": "^ox[ip]_[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$" + } + } +} \ No newline at end of file diff --git a/sled-agent/src/rack_setup/mod.rs b/sled-agent/src/rack_setup/mod.rs index 0ec14138fc..e1b12d6b2b 100644 --- a/sled-agent/src/rack_setup/mod.rs +++ b/sled-agent/src/rack_setup/mod.rs @@ -9,3 +9,8 @@ mod plan; pub mod service; pub use plan::service::SledConfig; +pub use plan::service::{ + from_ipaddr_to_external_floating_ip, + from_sockaddr_to_external_floating_addr, + from_source_nat_config_to_external_snat_ip, +}; diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index 8c26d0bf58..a376096a87 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -10,7 +10,13 @@ use illumos_utils::zpool::ZpoolName; use internal_dns::config::{Host, Zone}; use internal_dns::ServiceName; use nexus_sled_agent_shared::inventory::{ - Inventory, OmicronZoneConfig, OmicronZoneDataset, OmicronZoneType, SledRole, + Inventory, OmicronZoneDataset, SledRole, +}; +use nexus_types::deployment::{ + blueprint_zone_type, BlueprintPhysicalDisksConfig, BlueprintZoneConfig, + BlueprintZoneDisposition, BlueprintZoneType, + OmicronZoneExternalFloatingAddr, OmicronZoneExternalFloatingIp, + OmicronZoneExternalSnatIp, }; use omicron_common::address::{ get_sled_address, get_switch_zone_address, Ipv6Subnet, ReservedRackSubnet, @@ -33,7 +39,9 @@ use omicron_common::policy::{ BOUNDARY_NTP_REDUNDANCY, COCKROACHDB_REDUNDANCY, DNS_REDUNDANCY, MAX_DNS_REDUNDANCY, NEXUS_REDUNDANCY, }; -use omicron_uuid_kinds::{GenericUuid, OmicronZoneUuid, SledUuid, ZpoolUuid}; +use omicron_uuid_kinds::{ + ExternalIpUuid, GenericUuid, OmicronZoneUuid, SledUuid, ZpoolUuid, +}; use rand::prelude::SliceRandom; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -120,10 +128,10 @@ pub enum PlanError { #[derive(Clone, Debug, Default, Serialize, Deserialize, JsonSchema)] pub struct SledConfig { /// Control plane disks configured for this sled - pub disks: OmicronPhysicalDisksConfig, + pub disks: BlueprintPhysicalDisksConfig, /// zones configured for this sled - pub zones: Vec, + pub zones: Vec, } #[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] @@ -140,7 +148,53 @@ impl Ledgerable for Plan { } const RSS_SERVICE_PLAN_V1_FILENAME: &str = "rss-service-plan.json"; const RSS_SERVICE_PLAN_V2_FILENAME: &str = "rss-service-plan-v2.json"; -const RSS_SERVICE_PLAN_FILENAME: &str = "rss-service-plan-v3.json"; +const RSS_SERVICE_PLAN_V3_FILENAME: &str = "rss-service-plan-v3.json"; +const RSS_SERVICE_PLAN_FILENAME: &str = "rss-service-plan-v4.json"; + +pub fn from_sockaddr_to_external_floating_addr( + addr: SocketAddr, +) -> OmicronZoneExternalFloatingAddr { + // This is pretty weird: IP IDs don't exist yet, so it's fine for us + // to make them up (Nexus will record them as a part of the + // handoff). We could pass `None` here for some zone types, but it's + // a little simpler to just always pass a new ID, which will only be + // used if the zone type has an external IP. + // + // This should all go away once RSS starts using blueprints more + // directly (instead of this conversion after the fact): + // https://github.com/oxidecomputer/omicron/issues/5272 + OmicronZoneExternalFloatingAddr { id: ExternalIpUuid::new_v4(), addr } +} + +pub fn from_ipaddr_to_external_floating_ip( + ip: IpAddr, +) -> OmicronZoneExternalFloatingIp { + // This is pretty weird: IP IDs don't exist yet, so it's fine for us + // to make them up (Nexus will record them as a part of the + // handoff). We could pass `None` here for some zone types, but it's + // a little simpler to just always pass a new ID, which will only be + // used if the zone type has an external IP. + // + // This should all go away once RSS starts using blueprints more + // directly (instead of this conversion after the fact): + // https://github.com/oxidecomputer/omicron/issues/5272 + OmicronZoneExternalFloatingIp { id: ExternalIpUuid::new_v4(), ip } +} + +pub fn from_source_nat_config_to_external_snat_ip( + snat_cfg: SourceNatConfig, +) -> OmicronZoneExternalSnatIp { + // This is pretty weird: IP IDs don't exist yet, so it's fine for us + // to make them up (Nexus will record them as a part of the + // handoff). We could pass `None` here for some zone types, but it's + // a little simpler to just always pass a new ID, which will only be + // used if the zone type has an external IP. + // + // This should all go away once RSS starts using blueprints more + // directly (instead of this conversion after the fact): + // https://github.com/oxidecomputer/omicron/issues/5272 + OmicronZoneExternalSnatIp { id: ExternalIpUuid::new_v4(), snat_cfg } +} impl Plan { pub async fn load( @@ -200,6 +254,14 @@ impl Plan { } })? { Err(PlanError::FoundV2) + } else if Self::has_v3(storage_manager).await.map_err(|err| { + // Same as the comment above, but for version 3. + PlanError::Io { + message: String::from("looking for v3 RSS plan"), + err, + } + })? { + Err(PlanError::FoundV2) } else { Ok(None) } @@ -243,6 +305,25 @@ impl Plan { Ok(false) } + async fn has_v3( + storage_manager: &StorageHandle, + ) -> Result { + let paths = storage_manager + .get_latest_disks() + .await + .all_m2_mountpoints(CONFIG_DATASET) + .into_iter() + .map(|p| p.join(RSS_SERVICE_PLAN_V3_FILENAME)); + + for p in paths { + if p.try_exists()? { + return Ok(true); + } + } + + Ok(false) + } + async fn is_sled_scrimlet( log: &Logger, address: SocketAddrV6, @@ -419,20 +500,22 @@ impl Plan { sled.alloc_dataset_from_u2s(DatasetType::InternalDns)?; let filesystem_pool = Some(dataset_name.pool().clone()); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: ip, - zone_type: OmicronZoneType::InternalDns { - dataset: OmicronZoneDataset { - pool_name: dataset_name.pool().clone(), - }, - http_address, - dns_address, - gz_address: dns_subnet.gz_address(), - gz_address_index: i.try_into().expect("Giant indices?"), - }, filesystem_pool, + zone_type: BlueprintZoneType::InternalDns( + blueprint_zone_type::InternalDns { + dataset: OmicronZoneDataset { + pool_name: dataset_name.pool().clone(), + }, + http_address, + dns_address, + gz_address: dns_subnet.gz_address(), + gz_address_index: i.try_into().expect("Giant indices?"), + }, + ), }); } @@ -458,16 +541,18 @@ impl Plan { let dataset_name = sled.alloc_dataset_from_u2s(DatasetType::CockroachDb)?; let filesystem_pool = Some(dataset_name.pool().clone()); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: ip, - zone_type: OmicronZoneType::CockroachDb { - dataset: OmicronZoneDataset { - pool_name: dataset_name.pool().clone(), + zone_type: BlueprintZoneType::CockroachDb( + blueprint_zone_type::CockroachDb { + address, + dataset: OmicronZoneDataset { + pool_name: dataset_name.pool().clone(), + }, }, - address, - }, + ), filesystem_pool, }); } @@ -499,23 +584,27 @@ impl Plan { ) .unwrap(); let dns_port = omicron_common::address::DNS_PORT; - let dns_address = SocketAddr::new(external_ip, dns_port); + let dns_address = from_sockaddr_to_external_floating_addr( + SocketAddr::new(external_ip, dns_port), + ); let dataset_kind = DatasetType::ExternalDns; let dataset_name = sled.alloc_dataset_from_u2s(dataset_kind)?; let filesystem_pool = Some(dataset_name.pool().clone()); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: *http_address.ip(), - zone_type: OmicronZoneType::ExternalDns { - dataset: OmicronZoneDataset { - pool_name: dataset_name.pool().clone(), + zone_type: BlueprintZoneType::ExternalDns( + blueprint_zone_type::ExternalDns { + dataset: OmicronZoneDataset { + pool_name: dataset_name.pool().clone(), + }, + http_address, + dns_address, + nic, }, - http_address, - dns_address, - nic, - }, + ), filesystem_pool, }); } @@ -539,28 +628,32 @@ impl Plan { .unwrap(); let (nic, external_ip) = svc_port_builder.next_nexus(id)?; let filesystem_pool = Some(sled.alloc_zpool_from_u2s()?); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: address, - zone_type: OmicronZoneType::Nexus { - internal_address: SocketAddrV6::new( - address, - omicron_common::address::NEXUS_INTERNAL_PORT, - 0, - 0, - ), - external_ip, - nic, - // Tell Nexus to use TLS if and only if the caller - // provided TLS certificates. This effectively - // determines the status of TLS for the lifetime of - // the rack. In production-like deployments, we'd - // always expect TLS to be enabled. It's only in - // development that it might not be. - external_tls: !config.external_certificates.is_empty(), - external_dns_servers: config.dns_servers.clone(), - }, + zone_type: BlueprintZoneType::Nexus( + blueprint_zone_type::Nexus { + internal_address: SocketAddrV6::new( + address, + omicron_common::address::NEXUS_INTERNAL_PORT, + 0, + 0, + ), + external_ip: from_ipaddr_to_external_floating_ip( + external_ip, + ), + nic, + // Tell Nexus to use TLS if and only if the caller + // provided TLS certificates. This effectively + // determines the status of TLS for the lifetime of + // the rack. In production-like deployments, we'd + // always expect TLS to be enabled. It's only in + // development that it might not be. + external_tls: !config.external_certificates.is_empty(), + external_dns_servers: config.dns_servers.clone(), + }, + ), filesystem_pool, }); } @@ -584,18 +677,20 @@ impl Plan { ) .unwrap(); let filesystem_pool = Some(sled.alloc_zpool_from_u2s()?); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: address, - zone_type: OmicronZoneType::Oximeter { - address: SocketAddrV6::new( - address, - omicron_common::address::OXIMETER_PORT, - 0, - 0, - ), - }, + zone_type: BlueprintZoneType::Oximeter( + blueprint_zone_type::Oximeter { + address: SocketAddrV6::new( + address, + omicron_common::address::OXIMETER_PORT, + 0, + 0, + ), + }, + ), filesystem_pool, }) } @@ -623,16 +718,18 @@ impl Plan { let dataset_name = sled.alloc_dataset_from_u2s(DatasetType::Clickhouse)?; let filesystem_pool = Some(dataset_name.pool().clone()); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: ip, - zone_type: OmicronZoneType::Clickhouse { - address, - dataset: OmicronZoneDataset { - pool_name: dataset_name.pool().clone(), + zone_type: BlueprintZoneType::Clickhouse( + blueprint_zone_type::Clickhouse { + address, + dataset: OmicronZoneDataset { + pool_name: dataset_name.pool().clone(), + }, }, - }, + ), filesystem_pool, }); } @@ -664,16 +761,18 @@ impl Plan { let dataset_name = sled.alloc_dataset_from_u2s(DatasetType::ClickhouseServer)?; let filesystem_pool = Some(dataset_name.pool().clone()); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: ip, - zone_type: OmicronZoneType::ClickhouseServer { - address, - dataset: OmicronZoneDataset { - pool_name: dataset_name.pool().clone(), + zone_type: BlueprintZoneType::ClickhouseServer( + blueprint_zone_type::ClickhouseServer { + address, + dataset: OmicronZoneDataset { + pool_name: dataset_name.pool().clone(), + }, }, - }, + ), filesystem_pool, }); } @@ -703,16 +802,18 @@ impl Plan { let dataset_name = sled.alloc_dataset_from_u2s(DatasetType::ClickhouseKeeper)?; let filesystem_pool = Some(dataset_name.pool().clone()); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: ip, - zone_type: OmicronZoneType::ClickhouseKeeper { - address, - dataset: OmicronZoneDataset { - pool_name: dataset_name.pool().clone(), + zone_type: BlueprintZoneType::ClickhouseKeeper( + blueprint_zone_type::ClickhouseKeeper { + address, + dataset: OmicronZoneDataset { + pool_name: dataset_name.pool().clone(), + }, }, - }, + ), filesystem_pool, }); } @@ -737,13 +838,15 @@ impl Plan { port, ) .unwrap(); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: address, - zone_type: OmicronZoneType::CruciblePantry { - address: SocketAddrV6::new(address, port, 0, 0), - }, + zone_type: BlueprintZoneType::CruciblePantry( + blueprint_zone_type::CruciblePantry { + address: SocketAddrV6::new(address, port, 0, 0), + }, + ), filesystem_pool, }); } @@ -765,14 +868,18 @@ impl Plan { ) .unwrap(); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: ip, - zone_type: OmicronZoneType::Crucible { - address, - dataset: OmicronZoneDataset { pool_name: pool.clone() }, - }, + zone_type: BlueprintZoneType::Crucible( + blueprint_zone_type::Crucible { + address, + dataset: OmicronZoneDataset { + pool_name: pool.clone(), + }, + }, + ), filesystem_pool: Some(pool.clone()), }); } @@ -793,24 +900,31 @@ impl Plan { .push(Host::for_zone(Zone::Other(id)).fqdn()); let (nic, snat_cfg) = svc_port_builder.next_snat(id)?; ( - OmicronZoneType::BoundaryNtp { - address: ntp_address, - ntp_servers: config.ntp_servers.clone(), - dns_servers: config.dns_servers.clone(), - domain: None, - nic, - snat_cfg, - }, + BlueprintZoneType::BoundaryNtp( + blueprint_zone_type::BoundaryNtp { + address: ntp_address, + ntp_servers: config.ntp_servers.clone(), + dns_servers: config.dns_servers.clone(), + domain: None, + nic, + external_ip: + from_source_nat_config_to_external_snat_ip( + snat_cfg, + ), + }, + ), ServiceName::BoundaryNtp, ) } else { ( - OmicronZoneType::InternalNtp { - address: ntp_address, - ntp_servers: boundary_ntp_servers.clone(), - dns_servers: rack_dns_servers.clone(), - domain: None, - }, + BlueprintZoneType::InternalNtp( + blueprint_zone_type::InternalNtp { + address: ntp_address, + ntp_servers: boundary_ntp_servers.clone(), + dns_servers: rack_dns_servers.clone(), + domain: None, + }, + ), ServiceName::InternalNtp, ) }; @@ -819,9 +933,9 @@ impl Plan { .host_zone_with_one_backend(id, address, svcname, NTP_PORT) .unwrap(); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: address, zone_type, filesystem_pool, @@ -1379,10 +1493,10 @@ mod tests { } #[test] - fn test_rss_service_plan_v3_schema() { + fn test_rss_service_plan_v4_schema() { let schema = schemars::schema_for!(Plan); expectorate::assert_contents( - "../schema/rss-service-plan-v3.json", + "../schema/rss-service-plan-v4.json", &serde_json::to_string_pretty(&schema).unwrap(), ); } diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 20cd5646c0..3f73e55d0f 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -71,7 +71,6 @@ use crate::bootstrap::early_networking::{ }; use crate::bootstrap::rss_handle::BootstrapAgentHandle; use crate::nexus::d2n_params; -use crate::params::OmicronZoneTypeExt; use crate::rack_setup::plan::service::{ Plan as ServicePlan, PlanError as ServicePlanError, }; @@ -91,9 +90,8 @@ use nexus_sled_agent_shared::inventory::{ OmicronZoneConfig, OmicronZoneType, OmicronZonesConfig, }; use nexus_types::deployment::{ - Blueprint, BlueprintPhysicalDisksConfig, BlueprintZoneConfig, - BlueprintZoneDisposition, BlueprintZonesConfig, - CockroachDbPreserveDowngrade, InvalidOmicronZoneType, + blueprint_zone_type, Blueprint, BlueprintZoneType, BlueprintZonesConfig, + CockroachDbPreserveDowngrade, }; use nexus_types::external_api::views::SledState; use omicron_common::address::get_sled_address; @@ -108,8 +106,8 @@ use omicron_common::disk::{ }; use omicron_common::ledger::{self, Ledger, Ledgerable}; use omicron_ddm_admin_client::{Client as DdmAdminClient, DdmError}; +use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::SledUuid; -use omicron_uuid_kinds::{ExternalIpUuid, GenericUuid}; use serde::{Deserialize, Serialize}; use sled_agent_client::{ types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError, @@ -533,7 +531,7 @@ impl ServiceInner { .iter() .filter_map(|zone_config| { match &zone_config.zone_type { - OmicronZoneType::InternalDns { http_address, .. } + BlueprintZoneType::InternalDns(blueprint_zone_type::InternalDns{ http_address, .. }) => { Some(*http_address) }, @@ -719,15 +717,17 @@ impl ServiceInner { let mut datasets: Vec = vec![]; for sled_config in service_plan.services.values() { for zone in &sled_config.zones { - if let Some((dataset_name, dataset_address)) = - zone.dataset_name_and_address() - { + if let Some(dataset) = zone.zone_type.durable_dataset() { datasets.push(NexusTypes::DatasetCreateRequest { - zpool_id: dataset_name.pool().id().into_untyped_uuid(), - dataset_id: zone.id, + zpool_id: dataset + .dataset + .pool_name + .id() + .into_untyped_uuid(), + dataset_id: zone.id.into_untyped_uuid(), request: NexusTypes::DatasetPutRequest { - address: dataset_address.to_string(), - kind: dataset_name.dataset().kind(), + address: dataset.address.to_string(), + kind: dataset.kind, }, }) } @@ -981,7 +981,7 @@ impl ServiceInner { if sled_config.zones.iter().any(|zone_config| { matches!( &zone_config.zone_type, - OmicronZoneType::CockroachDb { .. } + BlueprintZoneType::CockroachDb(_) ) }) { Some(sled_address) @@ -1398,7 +1398,7 @@ fn build_initial_blueprint_from_plan( let blueprint = build_initial_blueprint_from_sled_configs( sled_configs_by_id, internal_dns_version, - )?; + ); Ok(blueprint) } @@ -1406,47 +1406,11 @@ fn build_initial_blueprint_from_plan( pub(crate) fn build_initial_blueprint_from_sled_configs( sled_configs_by_id: &BTreeMap, internal_dns_version: Generation, -) -> Result { - // Helper to convert an `OmicronZoneConfig` into a `BlueprintZoneConfig`. - // This is separate primarily so rustfmt doesn't lose its mind. - let to_bp_zone_config = |z: &OmicronZoneConfig| { - // All initial zones are in-service. - let disposition = BlueprintZoneDisposition::InService; - BlueprintZoneConfig::from_omicron_zone_config( - z.clone(), - disposition, - // This is pretty weird: IP IDs don't exist yet, so it's fine for us - // to make them up (Nexus will record them as a part of the - // handoff). We could pass `None` here for some zone types, but it's - // a little simpler to just always pass a new ID, which will only be - // used if the zone type has an external IP. - // - // This should all go away once RSS starts using blueprints more - // directly (instead of this conversion after the fact): - // https://github.com/oxidecomputer/omicron/issues/5272 - Some(ExternalIpUuid::new_v4()), - ) - }; - - let mut blueprint_disks = BTreeMap::new(); - for (sled_id, sled_config) in sled_configs_by_id { - blueprint_disks.insert( - *sled_id, - BlueprintPhysicalDisksConfig { - generation: sled_config.disks.generation, - disks: sled_config - .disks - .disks - .iter() - .map(|d| OmicronPhysicalDiskConfig { - identity: d.identity.clone(), - id: d.id, - pool_id: d.pool_id, - }) - .collect(), - }, - ); - } +) -> Blueprint { + let blueprint_disks: BTreeMap<_, _> = sled_configs_by_id + .iter() + .map(|(sled_id, sled_config)| (*sled_id, sled_config.disks.clone())) + .collect(); let mut blueprint_zones = BTreeMap::new(); let mut sled_state = BTreeMap::new(); @@ -1463,18 +1427,14 @@ pub(crate) fn build_initial_blueprint_from_sled_configs( // value, we will need to revisit storing this in the serialized // RSS plan. generation: DeployStepVersion::V5_EVERYTHING, - zones: sled_config - .zones - .iter() - .map(to_bp_zone_config) - .collect::>()?, + zones: sled_config.zones.clone(), }; blueprint_zones.insert(*sled_id, zones_config); sled_state.insert(*sled_id, SledState::Active); } - Ok(Blueprint { + Blueprint { id: Uuid::new_v4(), blueprint_zones, blueprint_disks, @@ -1492,7 +1452,7 @@ pub(crate) fn build_initial_blueprint_from_sled_configs( time_created: Utc::now(), creator: "RSS".to_string(), comment: "initial blueprint from rack setup".to_string(), - }) + } } /// Facilitates creating a sequence of OmicronZonesConfig objects for each sled @@ -1570,11 +1530,14 @@ impl<'a> OmicronZonesConfigGenerator<'a> { sled_config .zones .iter() + .cloned() + .map(|bp_zone_config| { + OmicronZoneConfig::from(bp_zone_config) + }) .filter(|z| { !zones_already.contains(&z.id) && zone_filter(&z.zone_type) - }) - .cloned(), + }), ); let config = OmicronZonesConfig { generation: version, zones }; diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs index 189f775adb..b546025654 100644 --- a/sled-agent/src/sim/server.rs +++ b/sled-agent/src/sim/server.rs @@ -12,6 +12,10 @@ use crate::nexus::d2n_params; use crate::nexus::NexusClient; use crate::rack_setup::service::build_initial_blueprint_from_sled_configs; use crate::rack_setup::SledConfig; +use crate::rack_setup::{ + from_ipaddr_to_external_floating_ip, + from_sockaddr_to_external_floating_addr, +}; use anyhow::anyhow; use crucible_agent_client::types::State as RegionState; use illumos_utils::zpool::ZpoolName; @@ -19,9 +23,11 @@ use internal_dns::ServiceName; use nexus_client::types as NexusTypes; use nexus_client::types::{IpRange, Ipv4Range, Ipv6Range}; use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; -use nexus_sled_agent_shared::inventory::OmicronZoneConfig; use nexus_sled_agent_shared::inventory::OmicronZoneDataset; -use nexus_sled_agent_shared::inventory::OmicronZoneType; +use nexus_types::deployment::blueprint_zone_type; +use nexus_types::deployment::{ + BlueprintZoneConfig, BlueprintZoneDisposition, BlueprintZoneType, +}; use nexus_types::inventory::NetworkInterfaceKind; use omicron_common::address::DNS_OPTE_IPV4_SUBNET; use omicron_common::address::NEXUS_OPTE_IPV4_SUBNET; @@ -36,6 +42,7 @@ use omicron_common::backoff::{ use omicron_common::disk::DiskIdentity; use omicron_common::FileKv; use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; use oxnet::Ipv6Net; @@ -375,19 +382,22 @@ pub async fn run_standalone_server( SocketAddr::V6(a) => a, }; let pool_name = ZpoolName::new_external(ZpoolUuid::new_v4()); - let mut zones = vec![OmicronZoneConfig { - id: Uuid::new_v4(), + let mut zones = vec![BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id: OmicronZoneUuid::new_v4(), underlay_address: *http_bound.ip(), - zone_type: OmicronZoneType::InternalDns { - dataset: OmicronZoneDataset { pool_name: pool_name.clone() }, - http_address: http_bound, - dns_address: match dns.dns_server.local_address() { - SocketAddr::V4(_) => panic!("did not expect v4 address"), - SocketAddr::V6(a) => a, + zone_type: BlueprintZoneType::InternalDns( + blueprint_zone_type::InternalDns { + dataset: OmicronZoneDataset { pool_name: pool_name.clone() }, + http_address: http_bound, + dns_address: match dns.dns_server.local_address() { + SocketAddr::V4(_) => panic!("did not expect v4 address"), + SocketAddr::V6(a) => a, + }, + gz_address: Ipv6Addr::LOCALHOST, + gz_address_index: 0, }, - gz_address: Ipv6Addr::LOCALHOST, - gz_address_index: 0, - }, + ), // Co-locate the filesystem pool with the dataset filesystem_pool: Some(pool_name), }]; @@ -396,23 +406,26 @@ pub async fn run_standalone_server( let mut macs = MacAddr::iter_system(); if let Some(nexus_external_addr) = rss_args.nexus_external_addr { let ip = nexus_external_addr.ip(); - let id = Uuid::new_v4(); + let id = OmicronZoneUuid::new_v4(); - zones.push(OmicronZoneConfig { + zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, id, underlay_address: match ip { IpAddr::V4(_) => panic!("did not expect v4 address"), IpAddr::V6(a) => a, }, - zone_type: OmicronZoneType::Nexus { + zone_type: BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { internal_address: match config.nexus_address { SocketAddr::V4(_) => panic!("did not expect v4 address"), SocketAddr::V6(a) => a, }, - external_ip: ip, + external_ip: from_ipaddr_to_external_floating_ip(ip), nic: nexus_types::inventory::NetworkInterface { id: Uuid::new_v4(), - kind: NetworkInterfaceKind::Service { id }, + kind: NetworkInterfaceKind::Service { + id: id.into_untyped_uuid(), + }, name: "nexus".parse().unwrap(), ip: NEXUS_OPTE_IPV4_SUBNET .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES + 1) @@ -427,7 +440,7 @@ pub async fn run_standalone_server( }, external_tls: false, external_dns_servers: vec![], - }, + }), filesystem_pool: Some(get_random_zpool()), }); @@ -445,31 +458,40 @@ pub async fn run_standalone_server( rss_args.external_dns_internal_addr { let ip = *external_dns_internal_addr.ip(); - let id = Uuid::new_v4(); + let id = OmicronZoneUuid::new_v4(); let pool_name = ZpoolName::new_external(ZpoolUuid::new_v4()); - zones.push(OmicronZoneConfig { + zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, id, underlay_address: ip, - zone_type: OmicronZoneType::ExternalDns { - dataset: OmicronZoneDataset { pool_name: pool_name.clone() }, - http_address: external_dns_internal_addr, - dns_address: SocketAddr::V6(external_dns_internal_addr), - nic: nexus_types::inventory::NetworkInterface { - id: Uuid::new_v4(), - kind: NetworkInterfaceKind::Service { id }, - name: "external-dns".parse().unwrap(), - ip: DNS_OPTE_IPV4_SUBNET - .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES + 1) - .unwrap() - .into(), - mac: macs.next().unwrap(), - subnet: (*DNS_OPTE_IPV4_SUBNET).into(), - vni: Vni::SERVICES_VNI, - primary: true, - slot: 0, - transit_ips: vec![], + zone_type: BlueprintZoneType::ExternalDns( + blueprint_zone_type::ExternalDns { + dataset: OmicronZoneDataset { + pool_name: pool_name.clone(), + }, + http_address: external_dns_internal_addr, + dns_address: from_sockaddr_to_external_floating_addr( + SocketAddr::V6(external_dns_internal_addr), + ), + nic: nexus_types::inventory::NetworkInterface { + id: Uuid::new_v4(), + kind: NetworkInterfaceKind::Service { + id: id.into_untyped_uuid(), + }, + name: "external-dns".parse().unwrap(), + ip: DNS_OPTE_IPV4_SUBNET + .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES + 1) + .unwrap() + .into(), + mac: macs.next().unwrap(), + subnet: (*DNS_OPTE_IPV4_SUBNET).into(), + vni: Vni::SERVICES_VNI, + primary: true, + slot: 0, + transit_ips: vec![], + }, }, - }, + ), // Co-locate the filesystem pool with the dataset filesystem_pool: Some(pool_name), }); @@ -530,8 +552,7 @@ pub async fn run_standalone_server( blueprint: build_initial_blueprint_from_sled_configs( &sled_configs, internal_dns_version, - ) - .expect("failed to construct initial blueprint"), + ), physical_disks, zpools, datasets, From 31ea57ea400f2ee68d9b2a3348f881bbad955069 Mon Sep 17 00:00:00 2001 From: Ryan Goodfellow Date: Fri, 23 Aug 2024 23:23:45 -0600 Subject: [PATCH 09/22] bgp: check md5 passkey length on create/update (#6428) --- nexus/src/app/switch_port.rs | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/nexus/src/app/switch_port.rs b/nexus/src/app/switch_port.rs index 9726a59d33..b616531f53 100644 --- a/nexus/src/app/switch_port.rs +++ b/nexus/src/app/switch_port.rs @@ -30,6 +30,7 @@ impl super::Nexus { params: params::SwitchPortSettingsCreate, ) -> CreateResult { opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + Self::switch_port_settings_validate(¶ms)?; //TODO race conditions on exists check versus update/create. // Normally I would use a DB lock here, but not sure what @@ -54,6 +55,36 @@ impl super::Nexus { } } + // TODO: more validation wanted + fn switch_port_settings_validate( + params: ¶ms::SwitchPortSettingsCreate, + ) -> CreateResult<()> { + for x in params.bgp_peers.values() { + for p in x.peers.iter() { + if let Some(ref key) = p.md5_auth_key { + if key.len() > 80 { + return Err(Error::invalid_value( + "md5_auth_key", + format!("md5 auth key for {} is longer than 80 characters", p.addr) + )); + } + for c in key.chars() { + if !c.is_ascii() || c.is_ascii_control() { + return Err(Error::invalid_value( + "md5_auth_key", + format!( + "md5 auth key for {} must be printable ascii", + p.addr + ), + )); + } + } + } + } + } + Ok(()) + } + pub async fn switch_port_settings_create( self: &Arc, opctx: &OpContext, From 5afa0de7b3e91ee23168b12758f3451f46043032 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Sat, 24 Aug 2024 11:16:41 -0700 Subject: [PATCH 10/22] [gateway] ingest sensor measurements from SPs into oximeter (#6354) This branch adds code to the Management Gateway Service for periodically polling sensor measurements from SPs and emitting it to Oximeter. In particular, this consists of: - a task for managing the metrics endpoint, waiting until MGS knows its underlay network address to bind the endpoint and register it with the control plane, - tasks for polling sensor measurements from each individual SP that MGS knows about, - a task that waits until SP discovery has completed and the rack ID to be known, and then spawns a poller task for every discovered SP slot The SP poller tasks send samples to the Oximeter producer endpoint using a `tokio::sync::broadcast` channel, which I've chosen primarily because it can be used as a bounded ring buffer that actually overwrites the *oldest* value when the buffer is full. This mostway, we use a bounded amount of memory for samples, but prioritize the most recent samples if we have to throw anything away because Oximeter hasn't come along to collect them recently. The poller tasks cache the component inventory and identifying information from the SP, so that we don't have to re-read all this data from the SP on every poll. While MGS, running on a host, would probably be fine with doing this, it seems better to avoid making the SP do unnecessary work at a 1Hz poll frequency, especially when *both* switch zones are polling them. Instead, every time we poll sensor data from an SP, we first ask it for its current state, and only invalidate our cached understanding of the SP when the state changes. This way, if a SP starts reporting new metrics due to a firmware update, or gets replaced with a different chassis with a new serial number, revision, etc, we won't continue to report metrics for stale targets, but we don't have to reload all of that once per second. To detect scenarios where the SP's state and/or identity has changed in the midst of polling its sensors (which may result in mislabeled metrics), we check whether the SP's state at the end of the poll matches its state at the beginning, and if it's not, we poll again immediately with its new identity. At present, the timestamps for these metric samples is generated by MGS --- it's the time when MGS received the sensor data from the SP, as MGS understands it. Because we don't currently collect data that was recorded prior to the switch zone coming up, we don't need to worry about figuring out timestamps for data recorded by the SP prior to the existence of a wall clock. Figuring out the SP/MGS timebase synchronization is probably a lot of additional work, although it would be nice to do in the future. At present, [metrics emitted by sled-agent prior to NTP sync will also be from 1987][1], so I think it's fine to do something similar here, especially because the potential solutions to that [also have their fair share of tradeoffs][2]. The new metrics use a schema in `oximeter/oximeter/schema/hardware-component.toml`. The target of these metrics is a `hardware_component` that includes: - the rack ID and the identity of the MGS instance that collected the metric, - information identifying the chassis[^1] and of the SP that recorded them (its serial number, model number, revision, and whether it's a switch, a sled, or a power shelf), - the SP's Hubris archive version (since the reported sensor data may change in future firmware releases) - the SP's ID for the hardware component (e.g. "dev-7"), the kind of device (e.g. "tmp117", "max5970"), and the humman-readable description (e.g. "Southeast temperature sensor", "U.2 Sharkfin A hot swap controller", etc.) reported by the SP Each kind of sensor reading has an individual metric (`hardware_component:temperature`, `hardware_component:current`, `hardware_component:voltage`, and so on). These metrics are labeled with the SP-reported name of the individual sensor measurement channel. For instance, a MAX5970 hotswap controller on sharkfin will have a voltage and current metric named "V12_U2A_A0" for the 12V rail, and a voltage and current metric named "V3P3_U2A_A0" for the 3.3V rail. Finally, a `hardware_component:sensor_errors` metric records sensor errors reported by the SP, labeled with the sensor name, what kind of sensor it is, and a string representation of the error. [1]: https://github.com/oxidecomputer/omicron/pull/6354#issuecomment-2308019422 [2]: https://github.com/oxidecomputer/omicron/pull/6354#issuecomment-2308475741 [^1]: I'm using "chassis" as a generic term to refer to "switch, sled, or power shelf". --- Cargo.lock | 4 + clients/nexus-client/src/lib.rs | 4 + clients/oximeter-client/src/lib.rs | 1 + common/src/api/internal/nexus.rs | 2 + dev-tools/mgs-dev/Cargo.toml | 1 + dev-tools/mgs-dev/src/main.rs | 24 +- dev-tools/omdb/tests/successes.out | 25 +- gateway-test-utils/configs/config.test.toml | 9 + .../configs/sp_sim_config.test.toml | 166 +++ gateway-test-utils/src/setup.rs | 21 +- gateway/Cargo.toml | 3 + gateway/examples/config.toml | 9 + gateway/src/config.rs | 7 +- gateway/src/lib.rs | 12 +- gateway/src/metrics.rs | 1159 +++++++++++++++++ .../tests/integration_tests/component_list.rs | 157 ++- nexus/db-model/src/producer_endpoint.rs | 7 + nexus/db-model/src/schema_versions.rs | 3 +- nexus/tests/integration_tests/metrics.rs | 181 ++- nexus/tests/integration_tests/sp_updater.rs | 20 +- openapi/nexus-internal.json | 7 + openapi/nexus.json | 1 + openapi/oximeter.json | 7 + .../oximeter/schema/hardware-component.toml | 183 +++ oximeter/schema/src/codegen.rs | 1 + oximeter/types/src/schema.rs | 1 + .../up.sql | 2 + schema/crdb/dbinit.sql | 6 +- 28 files changed, 1990 insertions(+), 33 deletions(-) create mode 100644 gateway/src/metrics.rs create mode 100644 oximeter/oximeter/schema/hardware-component.toml create mode 100644 schema/crdb/add-management-gateway-producer-kind/up.sql diff --git a/Cargo.lock b/Cargo.lock index 2630aa2a25..249b7c5cea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4720,6 +4720,7 @@ dependencies = [ "gateway-messages", "gateway-test-utils", "libc", + "omicron-gateway", "omicron-workspace-hack", "signal-hook-tokio", "tokio", @@ -5962,6 +5963,7 @@ dependencies = [ "anyhow", "base64 0.22.1", "camino", + "chrono", "clap", "dropshot", "expectorate", @@ -5980,6 +5982,8 @@ dependencies = [ "omicron-test-utils", "omicron-workspace-hack", "once_cell", + "oximeter", + "oximeter-producer", "schemars", "serde", "serde_json", diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs index 62366c45e1..a55c5d4013 100644 --- a/clients/nexus-client/src/lib.rs +++ b/clients/nexus-client/src/lib.rs @@ -213,6 +213,7 @@ impl From fn from(kind: omicron_common::api::internal::nexus::ProducerKind) -> Self { use omicron_common::api::internal::nexus::ProducerKind; match kind { + ProducerKind::ManagementGateway => Self::ManagementGateway, ProducerKind::SledAgent => Self::SledAgent, ProducerKind::Service => Self::Service, ProducerKind::Instance => Self::Instance, @@ -390,6 +391,9 @@ impl From fn from(kind: types::ProducerKind) -> Self { use omicron_common::api::internal::nexus::ProducerKind; match kind { + types::ProducerKind::ManagementGateway => { + ProducerKind::ManagementGateway + } types::ProducerKind::SledAgent => ProducerKind::SledAgent, types::ProducerKind::Instance => ProducerKind::Instance, types::ProducerKind::Service => ProducerKind::Service, diff --git a/clients/oximeter-client/src/lib.rs b/clients/oximeter-client/src/lib.rs index 74fc6968e8..c23e5177a0 100644 --- a/clients/oximeter-client/src/lib.rs +++ b/clients/oximeter-client/src/lib.rs @@ -26,6 +26,7 @@ impl From fn from(kind: omicron_common::api::internal::nexus::ProducerKind) -> Self { use omicron_common::api::internal::nexus; match kind { + nexus::ProducerKind::ManagementGateway => Self::ManagementGateway, nexus::ProducerKind::Service => Self::Service, nexus::ProducerKind::SledAgent => Self::SledAgent, nexus::ProducerKind::Instance => Self::Instance, diff --git a/common/src/api/internal/nexus.rs b/common/src/api/internal/nexus.rs index 7f4eb358a4..4daea6a198 100644 --- a/common/src/api/internal/nexus.rs +++ b/common/src/api/internal/nexus.rs @@ -223,6 +223,8 @@ pub enum ProducerKind { Service, /// The producer is a Propolis VMM managing a guest instance. Instance, + /// The producer is a management gateway service. + ManagementGateway, } /// Information announced by a metric server, used so that clients can contact it and collect diff --git a/dev-tools/mgs-dev/Cargo.toml b/dev-tools/mgs-dev/Cargo.toml index d5f61f4b96..70382c0469 100644 --- a/dev-tools/mgs-dev/Cargo.toml +++ b/dev-tools/mgs-dev/Cargo.toml @@ -14,6 +14,7 @@ futures.workspace = true gateway-messages.workspace = true gateway-test-utils.workspace = true libc.workspace = true +omicron-gateway.workspace = true omicron-workspace-hack.workspace = true signal-hook-tokio.workspace = true tokio.workspace = true diff --git a/dev-tools/mgs-dev/src/main.rs b/dev-tools/mgs-dev/src/main.rs index 85b1313d68..77947999d9 100644 --- a/dev-tools/mgs-dev/src/main.rs +++ b/dev-tools/mgs-dev/src/main.rs @@ -8,6 +8,7 @@ use clap::{Args, Parser, Subcommand}; use futures::StreamExt; use libc::SIGINT; use signal_hook_tokio::Signals; +use std::net::SocketAddr; #[tokio::main] async fn main() -> anyhow::Result<()> { @@ -36,7 +37,12 @@ enum MgsDevCmd { } #[derive(Clone, Debug, Args)] -struct MgsRunArgs {} +struct MgsRunArgs { + /// Override the address of the Nexus instance to use when registering the + /// Oximeter producer. + #[clap(long)] + nexus_address: Option, +} impl MgsRunArgs { async fn exec(&self) -> Result<(), anyhow::Error> { @@ -46,9 +52,23 @@ impl MgsRunArgs { let mut signal_stream = signals.fuse(); println!("mgs-dev: setting up MGS ... "); - let gwtestctx = gateway_test_utils::setup::test_setup( + let (mut mgs_config, sp_sim_config) = + gateway_test_utils::setup::load_test_config(); + if let Some(addr) = self.nexus_address { + mgs_config.metrics = + Some(gateway_test_utils::setup::MetricsConfig { + disabled: false, + dev_nexus_address: Some(addr), + dev_bind_loopback: true, + }); + } + + let gwtestctx = gateway_test_utils::setup::test_setup_with_config( "mgs-dev", gateway_messages::SpPort::One, + mgs_config, + &sp_sim_config, + None, ) .await; println!("mgs-dev: MGS is running."); diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 2a9c9c8051..e939bfa864 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -141,9 +141,16 @@ SP DETAILS: type "Sled" slot 0 COMPONENTS - NAME DESCRIPTION DEVICE PRESENCE SERIAL - sp3-host-cpu FAKE host cpu sp3-host-cpu Present None - dev-0 FAKE temperature sensor fake-tmp-sensor Failed None + NAME DESCRIPTION DEVICE PRESENCE SERIAL + sp3-host-cpu FAKE host cpu sp3-host-cpu Present None + dev-0 FAKE temperature sensor fake-tmp-sensor Failed None + dev-1 FAKE temperature sensor tmp117 Present None + dev-2 FAKE Southeast temperature sensor tmp117 Present None + dev-6 FAKE U.2 Sharkfin A VPD at24csw080 Present None + dev-7 FAKE U.2 Sharkfin A hot swap controller max5970 Present None + dev-8 FAKE U.2 A NVMe Basic Management Command nvme_bmc Present None + dev-39 FAKE T6 temperature sensor tmp451 Present None + dev-53 FAKE Fan controller max31790 Present None CABOOSES: none found @@ -167,8 +174,16 @@ SP DETAILS: type "Sled" slot 1 COMPONENTS - NAME DESCRIPTION DEVICE PRESENCE SERIAL - sp3-host-cpu FAKE host cpu sp3-host-cpu Present None + NAME DESCRIPTION DEVICE PRESENCE SERIAL + sp3-host-cpu FAKE host cpu sp3-host-cpu Present None + dev-0 FAKE temperature sensor tmp117 Present None + dev-1 FAKE temperature sensor tmp117 Present None + dev-2 FAKE Southeast temperature sensor tmp117 Present None + dev-6 FAKE U.2 Sharkfin A VPD at24csw080 Present None + dev-7 FAKE U.2 Sharkfin A hot swap controller max5970 Present None + dev-8 FAKE U.2 A NVMe Basic Management Command nvme_bmc Present None + dev-39 FAKE T6 temperature sensor tmp451 Present None + dev-53 FAKE Fan controller max31790 Present None CABOOSES: none found diff --git a/gateway-test-utils/configs/config.test.toml b/gateway-test-utils/configs/config.test.toml index 79975f4611..4e3e9c6e6e 100644 --- a/gateway-test-utils/configs/config.test.toml +++ b/gateway-test-utils/configs/config.test.toml @@ -88,6 +88,15 @@ addr = "[::1]:0" ignition-target = 3 location = { switch0 = ["sled", 1], switch1 = ["sled", 1] } +# +# Configuration for SP sensor metrics polling +# +[metrics] +# Allow the Oximeter metrics endpoint to bind on the loopback IP. This is +# useful in local testing and development, when the gateway service is not +# given a "real" underlay network IP. +dev_bind_loopback = true + # # NOTE: for the test suite, if mode = "file", the file path MUST be the sentinel # string "UNUSED". The actual path will be generated by the test suite for each diff --git a/gateway-test-utils/configs/sp_sim_config.test.toml b/gateway-test-utils/configs/sp_sim_config.test.toml index cc08eec30b..4f370a167c 100644 --- a/gateway-test-utils/configs/sp_sim_config.test.toml +++ b/gateway-test-utils/configs/sp_sim_config.test.toml @@ -20,6 +20,9 @@ device = "fake-tmp-sensor" description = "FAKE temperature sensor 1" capabilities = 0x2 presence = "Present" +sensors = [ + {name = "Southwest", kind = "Temperature", last_data.value = 41.7890625, last_data.timestamp = 1234 }, +] [[simulated_sps.sidecar.components]] id = "dev-1" @@ -27,6 +30,9 @@ device = "fake-tmp-sensor" description = "FAKE temperature sensor 2" capabilities = 0x2 presence = "Failed" +sensors = [ + { name = "South", kind = "Temperature", last_error.value = "DeviceError", last_error.timestamp = 1234 }, +] [[simulated_sps.sidecar]] multicast_addr = "::1" @@ -56,6 +62,82 @@ device = "fake-tmp-sensor" description = "FAKE temperature sensor" capabilities = 0x2 presence = "Failed" +sensors = [ + { name = "Southwest", kind = "Temperature", last_error.value = "DeviceError", last_error.timestamp = 1234 }, +] +[[simulated_sps.gimlet.components]] +id = "dev-1" +device = "tmp117" +description = "FAKE temperature sensor" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "South", kind = "Temperature", last_data.value = 42.5625, last_data.timestamp = 1234 }, +] + +[[simulated_sps.gimlet.components]] +id = "dev-2" +device = "tmp117" +description = "FAKE Southeast temperature sensor" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "Southeast", kind = "Temperature", last_data.value = 41.570313, last_data.timestamp = 1234 }, +] + +[[simulated_sps.gimlet.components]] +id = "dev-6" +device = "at24csw080" +description = "FAKE U.2 Sharkfin A VPD" +capabilities = 0x0 +presence = "Present" + +[[simulated_sps.gimlet.components]] +id = "dev-7" +device = "max5970" +description = "FAKE U.2 Sharkfin A hot swap controller" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "V12_U2A_A0", kind = "Current", last_data.value = 0.45898438, last_data.timestamp = 1234 }, + { name = "V3P3_U2A_A0", kind = "Current", last_data.value = 0.024414063, last_data.timestamp = 1234 }, + { name = "V12_U2A_A0", kind = "Voltage", last_data.value = 12.03125, last_data.timestamp = 1234 }, + { name = "V3P3_U2A_A0", kind = "Voltage", last_data.value = 3.328125, last_data.timestamp = 1234 }, +] + +[[simulated_sps.gimlet.components]] +id = "dev-8" +device = "nvme_bmc" +description = "FAKE U.2 A NVMe Basic Management Command" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "U2_N0", kind = "Temperature", last_data.value = 56.0, last_data.timestamp = 1234 }, +] +[[simulated_sps.gimlet.components]] +id = "dev-39" +device = "tmp451" +description = "FAKE T6 temperature sensor" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "t6", kind = "Temperature", last_data.value = 70.625, last_data.timestamp = 1234 }, +] +[[simulated_sps.gimlet.components]] +id = "dev-53" +device = "max31790" +description = "FAKE Fan controller" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "Southeast", kind = "Speed", last_data.value = 2607.0, last_data.timestamp = 1234 }, + { name = "Northeast", kind = "Speed", last_data.value = 2476.0, last_data.timestamp = 1234 }, + { name = "South", kind = "Speed", last_data.value = 2553.0, last_data.timestamp = 1234 }, + { name = "North", kind = "Speed", last_data.value = 2265.0, last_data.timestamp = 1234 }, + { name = "Southwest", kind = "Speed", last_data.value = 2649.0, last_data.timestamp = 1234 }, + { name = "Northwest", kind = "Speed", last_data.value = 2275.0, last_data.timestamp = 1234 }, +] + [[simulated_sps.gimlet]] multicast_addr = "::1" @@ -72,6 +154,90 @@ capabilities = 0 presence = "Present" serial_console = "[::1]:0" + +[[simulated_sps.gimlet.components]] +id = "dev-0" +device = "tmp117" +description = "FAKE temperature sensor" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "Southwest", kind = "Temperature", last_data.value = 41.3629, last_data.timestamp = 1234 }, +] +[[simulated_sps.gimlet.components]] +id = "dev-1" +device = "tmp117" +description = "FAKE temperature sensor" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "South", kind = "Temperature", last_data.value = 42.5625, last_data.timestamp = 1234 }, +] + +[[simulated_sps.gimlet.components]] +id = "dev-2" +device = "tmp117" +description = "FAKE Southeast temperature sensor" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "Southeast", kind = "Temperature", last_data.value = 41.570313, last_data.timestamp = 1234 }, +] + +[[simulated_sps.gimlet.components]] +id = "dev-6" +device = "at24csw080" +description = "FAKE U.2 Sharkfin A VPD" +capabilities = 0x0 +presence = "Present" + +[[simulated_sps.gimlet.components]] +id = "dev-7" +device = "max5970" +description = "FAKE U.2 Sharkfin A hot swap controller" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "V12_U2A_A0", kind = "Current", last_data.value = 0.41893438, last_data.timestamp = 1234 }, + { name = "V3P3_U2A_A0", kind = "Current", last_data.value = 0.025614603, last_data.timestamp = 1234 }, + { name = "V12_U2A_A0", kind = "Voltage", last_data.value = 12.02914, last_data.timestamp = 1234 }, + { name = "V3P3_U2A_A0", kind = "Voltage", last_data.value = 3.2618, last_data.timestamp = 1234 }, +] + +[[simulated_sps.gimlet.components]] +id = "dev-8" +device = "nvme_bmc" +description = "FAKE U.2 A NVMe Basic Management Command" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "U2_N0", kind = "Temperature", last_data.value = 56.0, last_data.timestamp = 1234 }, +] +[[simulated_sps.gimlet.components]] +id = "dev-39" +device = "tmp451" +description = "FAKE T6 temperature sensor" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "t6", kind = "Temperature", last_data.value = 70.625, last_data.timestamp = 1234 }, +] +[[simulated_sps.gimlet.components]] +id = "dev-53" +device = "max31790" +description = "FAKE Fan controller" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "Southeast", kind = "Speed", last_data.value = 2510.0, last_data.timestamp = 1234 }, + { name = "Northeast", kind = "Speed", last_data.value = 2390.0, last_data.timestamp = 1234 }, + { name = "South", kind = "Speed", last_data.value = 2467.0, last_data.timestamp = 1234 }, + { name = "North", kind = "Speed", last_data.value = 2195.0, last_data.timestamp = 1234 }, + { name = "Southwest", kind = "Speed", last_data.value = 2680.0, last_data.timestamp = 1234 }, + { name = "Northwest", kind = "Speed", last_data.value = 2212.0, last_data.timestamp = 1234 }, +] + + # # NOTE: for the test suite, the [log] section is ignored; sp-sim logs are rolled # into the gateway logfile. diff --git a/gateway-test-utils/src/setup.rs b/gateway-test-utils/src/setup.rs index 46bc55805a..056bb451f7 100644 --- a/gateway-test-utils/src/setup.rs +++ b/gateway-test-utils/src/setup.rs @@ -8,6 +8,7 @@ use camino::Utf8Path; use dropshot::test_util::ClientTestContext; use dropshot::test_util::LogContext; use gateway_messages::SpPort; +pub use omicron_gateway::metrics::MetricsConfig; use omicron_gateway::MgsArguments; use omicron_gateway::SpType; use omicron_gateway::SwitchPortConfig; @@ -33,6 +34,7 @@ pub struct GatewayTestContext { pub server: omicron_gateway::Server, pub simrack: SimRack, pub logctx: LogContext, + pub gateway_id: Uuid, } impl GatewayTestContext { @@ -48,13 +50,18 @@ pub fn load_test_config() -> (omicron_gateway::Config, sp_sim::Config) { let manifest_dir = Utf8Path::new(env!("CARGO_MANIFEST_DIR")); let server_config_file_path = manifest_dir.join("configs/config.test.toml"); let server_config = - omicron_gateway::Config::from_file(&server_config_file_path) - .expect("failed to load config.test.toml"); + match omicron_gateway::Config::from_file(&server_config_file_path) { + Ok(config) => config, + Err(e) => panic!("failed to load MGS config: {e}"), + }; let sp_sim_config_file_path = manifest_dir.join("configs/sp_sim_config.test.toml"); - let sp_sim_config = sp_sim::Config::from_file(&sp_sim_config_file_path) - .expect("failed to load sp_sim_config.test.toml"); + let sp_sim_config = + match sp_sim::Config::from_file(&sp_sim_config_file_path) { + Ok(config) => config, + Err(e) => panic!("failed to load SP simulator config: {e}"), + }; (server_config, sp_sim_config) } @@ -143,8 +150,8 @@ pub async fn test_setup_with_config( // Start gateway server let rack_id = Some(Uuid::parse_str(RACK_UUID).unwrap()); - - let args = MgsArguments { id: Uuid::new_v4(), addresses, rack_id }; + let gateway_id = Uuid::new_v4(); + let args = MgsArguments { id: gateway_id, addresses, rack_id }; let server = omicron_gateway::Server::start( server_config.clone(), args, @@ -206,5 +213,5 @@ pub async fn test_setup_with_config( log.new(o!("component" => "client test context")), ); - GatewayTestContext { client, server, simrack, logctx } + GatewayTestContext { client, server, simrack, logctx, gateway_id } } diff --git a/gateway/Cargo.toml b/gateway/Cargo.toml index 3cfd1d447b..2dce15892d 100644 --- a/gateway/Cargo.toml +++ b/gateway/Cargo.toml @@ -11,6 +11,7 @@ workspace = true anyhow.workspace = true base64.workspace = true camino.workspace = true +chrono.workspace = true clap.workspace = true dropshot.workspace = true futures.workspace = true @@ -39,6 +40,8 @@ tokio-tungstenite.workspace = true toml.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true +oximeter.workspace = true +oximeter-producer.workspace = true [dev-dependencies] expectorate.workspace = true diff --git a/gateway/examples/config.toml b/gateway/examples/config.toml index d29d9508b9..a76edcd7b5 100644 --- a/gateway/examples/config.toml +++ b/gateway/examples/config.toml @@ -71,6 +71,15 @@ addr = "[::1]:33320" ignition-target = 3 location = { switch0 = ["sled", 1], switch1 = ["sled", 1] } +# +# Configuration for SP sensor metrics polling +# +[metrics] +# Allow the Oximeter metrics endpoint to bind on the loopback IP. This is +# useful in local testing and development, when the gateway service is not +# given a "real" underlay network IP. +dev_bind_loopback = true + [log] # Show log messages of this level and more severe level = "debug" diff --git a/gateway/src/config.rs b/gateway/src/config.rs index afdb046881..edf895ef59 100644 --- a/gateway/src/config.rs +++ b/gateway/src/config.rs @@ -6,6 +6,7 @@ //! configuration use crate::management_switch::SwitchConfig; +use crate::metrics::MetricsConfig; use camino::Utf8Path; use camino::Utf8PathBuf; use dropshot::ConfigLogging; @@ -25,6 +26,8 @@ pub struct Config { pub switch: SwitchConfig, /// Server-wide logging configuration. pub log: ConfigLogging, + /// Configuration for SP sensor metrics. + pub metrics: Option, } impl Config { @@ -47,13 +50,13 @@ pub struct PartialDropshotConfig { #[derive(Debug, Error, SlogInlineError)] pub enum LoadError { - #[error("error reading \"{path}\"")] + #[error("error reading \"{path}\": {err}")] Io { path: Utf8PathBuf, #[source] err: std::io::Error, }, - #[error("error parsing \"{path}\"")] + #[error("error parsing \"{path}\": {err}")] Parse { path: Utf8PathBuf, #[source] diff --git a/gateway/src/lib.rs b/gateway/src/lib.rs index e1eed05334..8e764dc63f 100644 --- a/gateway/src/lib.rs +++ b/gateway/src/lib.rs @@ -6,6 +6,7 @@ mod config; mod context; mod error; mod management_switch; +pub mod metrics; mod serial_console; pub mod http_entrypoints; // TODO pub only for testing - is this right? @@ -62,6 +63,8 @@ pub struct Server { /// `http_servers` all_servers_shutdown: FuturesUnordered, request_body_max_bytes: usize, + /// handle to the SP sensor metrics subsystem + metrics: metrics::Metrics, log: Logger, } @@ -151,6 +154,9 @@ impl Server { let mut http_servers = HashMap::with_capacity(args.addresses.len()); let all_servers_shutdown = FuturesUnordered::new(); + let metrics = + metrics::Metrics::new(&log, &args, config.metrics, apictx.clone()); + for addr in args.addresses { start_dropshot_server( &apictx, @@ -167,6 +173,7 @@ impl Server { http_servers, all_servers_shutdown, request_body_max_bytes: config.dropshot.request_body_max_bytes, + metrics, log, }) } @@ -275,12 +282,14 @@ impl Server { server.close().await?; } + self.metrics.update_server_addrs(addresses).await; + Ok(()) } /// The rack_id will be set on a refresh of the SMF property when the sled /// agent starts. - pub fn set_rack_id(&self, rack_id: Option) { + pub fn set_rack_id(&mut self, rack_id: Option) { if let Some(rack_id) = rack_id { let val = self.apictx.rack_id.get_or_init(|| rack_id); if *val != rack_id { @@ -291,6 +300,7 @@ impl Server { "ignored_new_rack_id" => %rack_id); } else { info!(self.apictx.log, "Set rack_id"; "rack_id" => %rack_id); + self.metrics.set_rack_id(rack_id); } } else { warn!(self.apictx.log, "SMF refresh called without a rack id"); diff --git a/gateway/src/metrics.rs b/gateway/src/metrics.rs new file mode 100644 index 0000000000..d4e0795ae0 --- /dev/null +++ b/gateway/src/metrics.rs @@ -0,0 +1,1159 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. +use crate::error::CommunicationError; +use crate::management_switch::SpIdentifier; +use crate::management_switch::SpType; +use crate::MgsArguments; +use crate::ServerContext; +use anyhow::Context; +use gateway_messages::measurement::MeasurementError; +use gateway_messages::measurement::MeasurementKind; +use gateway_messages::ComponentDetails; +use gateway_messages::DeviceCapabilities; +use gateway_sp_comms::SingleSp; +use gateway_sp_comms::SpComponent; +use gateway_sp_comms::VersionedSpState; +use omicron_common::api::internal::nexus::ProducerEndpoint; +use omicron_common::api::internal::nexus::ProducerKind; +use omicron_common::backoff; +use oximeter::types::Cumulative; +use oximeter::types::ProducerRegistry; +use oximeter::types::Sample; +use oximeter::MetricsError; +use std::borrow::Cow; +use std::collections::hash_map; +use std::collections::hash_map::HashMap; +use std::net::IpAddr; +use std::net::SocketAddr; +use std::net::SocketAddrV6; +use std::sync::Arc; +use std::time::Duration; +use tokio::sync::broadcast; +use tokio::sync::oneshot; +use tokio::sync::watch; +use tokio::task::JoinHandle; +use uuid::Uuid; + +oximeter::use_timeseries!("hardware-component.toml"); +use hardware_component as metric; + +/// Handle to the metrics tasks. +pub struct Metrics { + /// If the metrics subsystem is disabled, this is `None`. + inner: Option, +} + +struct Handles { + addrs_tx: watch::Sender>, + rack_id_tx: Option>, + server: JoinHandle>, +} + +/// Configuration for metrics. +/// +/// In order to reduce the risk of a bad config file taking down the whole +/// management network, we try to keep the metrics-specific portion of the +/// config file as minimal as possible. At present, it only includes development +/// configurations that shouldn't be present in production configs. +#[derive( + Clone, Debug, Default, PartialEq, Eq, serde::Deserialize, serde::Serialize, +)] +#[serde(deny_unknown_fields)] +pub struct MetricsConfig { + /// Completely disable the metrics subsystem. + /// + /// If `disabled = true`, sensor data metrics will not be collected, and the + /// metrics polling tasks will not be started. + #[serde(default)] + pub disabled: bool, + + /// Override the Nexus address used to register the SP metrics Oximeter + /// producer. This is intended for use in development and testing. + /// + /// If this argument is not present, Nexus is discovered through DNS. + #[serde(default)] + pub dev_nexus_address: Option, + + /// Allow the metrics producer endpoint to bind on loopback. + /// + /// This should be disabled in production, as Nexus will not be able to + /// reach the loopback interface, but is necessary for local development and + /// test purposes. + #[serde(default)] + pub dev_bind_loopback: bool, +} + +/// Polls sensor readings from an individual SP. +struct SpPoller { + spid: SpIdentifier, + known_state: Option, + components: HashMap, + log: slog::Logger, + rack_id: Uuid, + mgs_id: Uuid, + sample_tx: broadcast::Sender>, +} + +struct ComponentMetrics { + target: metric::HardwareComponent, + /// Counts of errors reported by sensors on this component. + sensor_errors: HashMap>, + /// Counts of errors that occurred whilst polling the SP for measurements + /// from this component. + poll_errors: HashMap<&'static str, Cumulative>, +} + +#[derive(Eq, PartialEq, Hash)] +struct SensorErrorKey { + name: Cow<'static, str>, + kind: &'static str, + error: &'static str, +} + +/// Manages a metrics server and stuff. +struct ServerManager { + log: slog::Logger, + addrs: watch::Receiver>, + registry: ProducerRegistry, +} + +#[derive(Debug)] +struct Producer { + /// Receiver for samples produced by SP pollers. + sample_rx: broadcast::Receiver>, + /// Logging context. + /// + /// We stick this on the producer because we would like to be able to log + /// when stale samples are dropped. + log: slog::Logger, +} + +/// The maximum Dropshot request size for the metrics server. +const METRIC_REQUEST_MAX_SIZE: usize = 10 * 1024 * 1024; + +/// Poll interval for requesting sensor readings from SPs. +/// +/// Bryan wants to try polling at 1Hz, so let's do that for now. +const SP_POLL_INTERVAL: Duration = Duration::from_secs(1); + +///The interval at which we will ask Oximeter to collect our metric samples. +/// +/// Every ten seconds seems good. +const OXIMETER_COLLECTION_INTERVAL: Duration = Duration::from_secs(10); + +/// The expected number of SPs in a fully-loaded rack. +/// +/// N.B. that there *might* be more than this; we shouldn't ever panic or +/// otherwise misbehave if we see more than this number. This is just intended +/// for sizing buffers/map allocations and so forth; we can always realloc if we +/// see a bonus SP or two. That's why it's called "normal number of SPs" and not +/// "MAX_SPS" or similar. +/// +/// Additionally, note that we always determine the channel capacity based on +/// the assumption that *someday*, the rack might be fully loaded with compute +/// sleds, even if it isn't *right now*. A rack with 16 sleds could always grow +/// another 16 later! +const NORMAL_NUMBER_OF_SPS: usize = + 32 // 32 compute sleds + + 2 // two switches + + 2 // two power shelves, someday. + ; + +/// What size should we make the +const MAX_BUFFERED_SAMPLE_CHUNKS: usize = { + // Roughly how many times will we poll SPs for each metrics collection + // interval? + let polls_per_metrics_interval = { + let collection_interval_secs: usize = + OXIMETER_COLLECTION_INTERVAL.as_secs() as usize; + let poll_interval_secs: usize = SP_POLL_INTERVAL.as_secs() as usize; + + collection_interval_secs / poll_interval_secs + }; + + // How many sample collection intervals do we want to allow to elapse before + // we start putting stuff on the floor? + // + // Let's say 16. Chosen totally arbitrarily but seems reasonable-ish. + let sloppiness = 16; + let capacity = + NORMAL_NUMBER_OF_SPS * polls_per_metrics_interval * sloppiness; + // Finally, the buffer capacity will probably be allocated in a power of two + // anyway, so let's make sure our thing is a power of two so we don't waste + // the allocation we're gonna get anyway. + capacity.next_power_of_two() +}; + +impl Metrics { + pub fn new( + log: &slog::Logger, + args: &MgsArguments, + cfg: Option, + apictx: Arc, + ) -> Self { + let &MgsArguments { id, rack_id, ref addresses } = args; + + if cfg.as_ref().map(|c| c.disabled).unwrap_or(false) { + slog::warn!(&log, "metrics subsystem disabled by config"); + return Self { inner: None }; + } + + // Create a channel for the SP poller tasks to send samples to the + // Oximeter producer endpoint. + // + // A broadcast channel is used here, not because we are actually + // multi-consumer (`Producer::produce` is never called concurrently), + // but because the broadcast channel has properly ring-buffer-like + // behavior, where earlier messages are discarded, rather than exerting + // backpressure on senders (as Tokio's MPSC channel does). This + // is what we want, as we would prefer a full buffer to result in + // clobbering the oldest measurements, rather than leaving the newest + // ones on the floor. + let (sample_tx, sample_rx) = + broadcast::channel(MAX_BUFFERED_SAMPLE_CHUNKS); + + // Using a channel for this is, admittedly, a bit of an end-run around + // the `OnceLock` on the `ServerContext` that *also* stores the rack ID, + // but it has the nice benefit of allowing the `PollerManager` task to _await_ + // the rack ID being set...we might want to change other code to use a + // similar approach in the future. + let (rack_id_tx, rack_id_rx) = oneshot::channel(); + let rack_id_tx = if let Some(rack_id) = rack_id { + rack_id_tx.send(rack_id).expect( + "we just created the channel; it therefore will not be \ + closed", + ); + None + } else { + Some(rack_id_tx) + }; + + tokio::spawn(start_pollers( + log.new(slog::o!("component" => "sensor-poller")), + apictx.clone(), + rack_id_rx, + id, + sample_tx, + )); + + let (addrs_tx, addrs_rx) = + tokio::sync::watch::channel(addresses.clone()); + let server = { + let log = log.new(slog::o!("component" => "producer-server")); + let registry = ProducerRegistry::with_id(id); + registry + .register_producer(Producer { sample_rx, log: log.clone() }) + // TODO(ben): when you change `register_producer` to not return + // a `Result`, delete this `expect`. thanks in advance! :) + .expect( + "`ProducerRegistry::register_producer()` will never \ + actually return an `Err`, so this shouldn't ever \ + happen...", + ); + + tokio::spawn( + ServerManager { log, addrs: addrs_rx, registry }.run(cfg), + ) + }; + Self { inner: Some(Handles { addrs_tx, rack_id_tx, server }) } + } + + pub fn set_rack_id(&mut self, rack_id: Uuid) { + let tx = self.inner.as_mut().and_then(|i| i.rack_id_tx.take()); + if let Some(tx) = tx { + // If the task that starts sensor pollers has gone away already, + // we're probably shutting down, and shouldn't panic. + let _ = tx.send(rack_id); + } + // Ignoring duplicate attempt to set the rack ID... + } + + pub async fn update_server_addrs(&self, new_addrs: &[SocketAddrV6]) { + if let Some(ref inner) = self.inner { + inner.addrs_tx.send_if_modified(|current_addrs| { + if current_addrs.len() == new_addrs.len() + // N.B. that we could make this "faster" with a `HashSet`, + // but...the size of this Vec of addresses is probably going to + // two or three items, max, so the linear scan actually probably + // outperforms it... + && current_addrs.iter().all(|addr| new_addrs.contains(addr)) + { + return false; + } + + // Reuse existing `Vec` capacity if possible.This is almost + // certainly not performance-critical, but it makes me feel happy. + current_addrs.clear(); + current_addrs.extend_from_slice(new_addrs); + true + }); + } + } +} + +impl Drop for Metrics { + fn drop(&mut self) { + // Clean up our children on drop. + if let Some(ref mut inner) = self.inner { + inner.server.abort(); + } + } +} + +impl oximeter::Producer for Producer { + fn produce( + &mut self, + ) -> Result>, MetricsError> { + // Drain all samples currently in the queue into a `Vec`. + // + // N.B. it may be tempting to pursue an alternative design where we + // implement `Iterator` for a `broadcast::Receiver>` and + // just return that using `Receiver::resubscribe`...DON'T DO THAT! The + // `resubscribe` function creates a receiver at the current *tail* of + // the ringbuffer, so it won't see any samples produced *before* now. + // Which is the opposite of what we want! + let mut samples = Vec::with_capacity(self.sample_rx.len()); + // Because we receive the individual samples in a `Vec` of all samples + // produced by a poller, let's also sum the length of each of those + // `Vec`s here, so we can log it later. + let mut total_samples = 0; + // Also, track whether any sample chunks were dropped off the end of the + // ring buffer. + let mut dropped_chunks = 0; + + use broadcast::error::TryRecvError; + loop { + match self.sample_rx.try_recv() { + Ok(sample_chunk) => { + total_samples += sample_chunk.len(); + samples.push(sample_chunk) + } + // This error indicates that an old ringbuffer entry was + // overwritten. That's fine, just get the next one. + Err(TryRecvError::Lagged(dropped)) => { + dropped_chunks += dropped; + } + // We've drained all currently available samples! We're done here! + Err(TryRecvError::Empty) => break, + // This should only happen when shutting down. + Err(TryRecvError::Closed) => { + slog::debug!(&self.log, "sample producer channel closed"); + break; + } + } + } + + if dropped_chunks > 0 { + slog::info!( + &self.log, + "produced metric samples. some old sample chunks were dropped!"; + "samples" => total_samples, + "sample_chunks" => samples.len(), + "dropped_chunks" => dropped_chunks, + ); + } else { + slog::debug!( + &self.log, + "produced metric samples"; + "samples" => total_samples, + "sample_chunks" => samples.len(), + ); + } + + // There you go, that's all I've got. + Ok(Box::new(samples.into_iter().flatten())) + } +} + +async fn start_pollers( + log: slog::Logger, + apictx: Arc, + rack_id: oneshot::Receiver, + mgs_id: Uuid, + sample_tx: broadcast::Sender>, +) -> anyhow::Result<()> { + let switch = &apictx.mgmt_switch; + + // First, wait until we know what the rack ID is known... + let rack_id = rack_id + .await + .context("rack ID sender has gone away...we must be shutting down")?; + + // Wait for SP discovery to complete, if it hasn't already. + // TODO(eliza): presently, we busy-poll here. It would be nicer to + // replace the `OnceLock` in `ManagementSwitch` + // with a `tokio::sync::watch` + let sps = backoff::retry_notify_ext( + backoff::retry_policy_local(), + || async { switch.all_sps().map_err(backoff::BackoffError::transient) }, + |err, _, elapsed| { + let secs = elapsed.as_secs(); + if secs < 30 { + slog::debug!( + &log, + "waiting for SP discovery to complete..."; + "elapsed" => ?elapsed, + "error" => err, + ); + } else if secs < 180 { + slog::info!( + &log, + "still waiting for SP discovery to complete..."; + "elapsed" => ?elapsed, + "error" => err, + ) + } else { + slog::warn!( + &log, + "we have been waiting for SP discovery to complete \ + for a pretty long time!"; + "elapsed" => ?elapsed, + "error" => err, + ) + } + }, + ) + .await + .context("we should never return a fatal error here")?; + + slog::info!( + &log, + "starting to poll SP sensor data every {SP_POLL_INTERVAL:?}" + ); + + for (spid, _) in sps { + slog::info!( + &log, + "found a new little friend!"; + "sp_slot" => ?spid.slot, + "chassis_type" => ?spid.typ, + ); + + let poller = SpPoller { + spid, + rack_id, + mgs_id, + log: log.new(slog::o!( + "sp_slot" => spid.slot, + "chassis_type" => format!("{:?}", spid.typ), + )), + components: HashMap::new(), + known_state: None, + sample_tx: sample_tx.clone(), + }; + tokio::spawn(poller.run(apictx.clone())); + } + + Ok(()) +} + +impl SpPoller { + async fn run(mut self, apictx: Arc) { + let mut interval = tokio::time::interval(SP_POLL_INTERVAL); + let switch = &apictx.mgmt_switch; + let sp = match switch.sp(self.spid) { + Ok(sp) => sp, + Err(e) => { + // This should never happen, but it's not worth taking down the + // entire management network over that... + const MSG: &'static str = + "the `SpPoller::run` function is only called after \ + discovery completes successfully, and the `SpIdentifier` \ + used was returned by the management switch, \ + so it should be valid."; + if cfg!(debug_assertions) { + unreachable!( + "{MSG} nonetheless, we saw a {e:?} error when looking \ + up {:?}", + self.spid + ); + } else { + slog::error!( + &self.log, + "THIS SHOULDN'T HAPPEN: {MSG}"; + "error" => e, + "sp" => ?self.spid, + ); + return; + } + } + }; + loop { + interval.tick().await; + slog::trace!(&self.log, "interval elapsed, polling SP..."); + + match self.poll(sp).await { + // No sense cluttering the ringbuffer with empty vecs... + Ok(samples) if samples.is_empty() => { + slog::trace!( + &self.log, + "polled SP, no samples returned"; + "num_samples" => 0usize + ); + } + Ok(samples) => { + slog::trace!( + &self.log, + "polled SP successfully"; + "num_samples" => samples.len(), + ); + + if let Err(_) = self.sample_tx.send(samples) { + slog::debug!( + &self.log, + "all sample receiver handles have been dropped! \ + presumably we are shutting down..."; + ); + return; + } + } + // No SP is currently present for this ID. This may change in + // the future: a cubby that is not populated at present may have + // a sled added to it in the future. So, let's wait until it + // changes. + Err(CommunicationError::NoSpDiscovered) => { + slog::info!( + &self.log, + "no SP is present for this slot. waiting for a \ + little buddy to appear..."; + ); + let mut watch = sp.sp_addr_watch().clone(); + loop { + if let Some((addr, port)) = *watch.borrow_and_update() { + // Ladies and gentlemen...we got him! + slog::info!( + &self.log, + "found a SP, resuming polling."; + "sp_addr" => ?addr, + "sp_port" => ?port, + ); + break; + } + + // Wait for an address to be discovered. + slog::debug!(&self.log, "waiting for a SP to appear."); + if watch.changed().await.is_err() { + slog::debug!( + &self.log, + "SP address watch has been closed, presumably \ + we are shutting down"; + ); + return; + } + } + } + Err(error) => { + slog::warn!( + &self.log, + "failed to poll SP, will try again momentarily..."; + "error" => %error, + ); + // TODO(eliza): we should probably have a metric for failed + // SP polls. + } + } + } + } + + async fn poll( + &mut self, + sp: &SingleSp, + ) -> Result, CommunicationError> { + let mut current_state = SpUnderstanding::from(sp.state().await?); + let mut samples = Vec::new(); + // If the SP's state changes dramatically *during* a poll, it may be + // necessary to re-do the metrics scrape, thus the loop. Normally, we + // will only loop a single time, but may retry if necessary. + loop { + // Check if the SP's state has changed. If it has, we need to make sure + // we still know what all of its sensors are. + if Some(¤t_state) != self.known_state.as_ref() { + // The SP's state appears to have changed. Time to make sure our + // understanding of its devices and identity is up to date! + + let chassis_kind = match self.spid.typ { + SpType::Sled => "sled", + SpType::Switch => "switch", + SpType::Power => "power", + }; + let model = stringify_byte_string(¤t_state.model[..]); + let serial = + stringify_byte_string(¤t_state.serial_number[..]); + let hubris_archive_id = + hex::encode(¤t_state.hubris_archive_id); + + slog::debug!( + &self.log, + "our little friend seems to have changed in some kind of way"; + "current_state" => ?current_state, + "known_state" => ?self.known_state, + "new_model" => %model, + "new_serial" => %serial, + "new_hubris_archive_id" => %hubris_archive_id, + ); + + let inv_devices = sp.inventory().await?.devices; + + // Clear out any previously-known devices, and preallocate capacity + // for all the new ones. + self.components.clear(); + self.components.reserve(inv_devices.len()); + + for dev in inv_devices { + // Skip devices which have nothing interesting for us. + if !dev + .capabilities + .contains(DeviceCapabilities::HAS_MEASUREMENT_CHANNELS) + { + continue; + } + let component_id = match dev.component.as_str() { + Some(c) => Cow::Owned(c.to_string()), + None => { + // These are supposed to always be strings. But, if we + // see one that's not a string, fall back to the hex + // representation rather than panicking. + let hex = hex::encode(dev.component.id); + slog::warn!( + &self.log, + "a SP component ID was not a string! this isn't \ + supposed to happen!"; + "component" => %hex, + "device" => ?dev, + ); + Cow::Owned(hex) + } + }; + + // TODO(eliza): i hate having to clone all these strings for + // every device on the SP...it would be cool if Oximeter let us + // reference count them... + let target = metric::HardwareComponent { + rack_id: self.rack_id, + gateway_id: self.mgs_id, + chassis_model: Cow::Owned(model.clone()), + chassis_revision: current_state.revision, + chassis_kind: Cow::Borrowed(chassis_kind), + chassis_serial: Cow::Owned(serial.clone()), + hubris_archive_id: Cow::Owned( + hubris_archive_id.clone(), + ), + slot: self.spid.slot as u32, + component_kind: Cow::Owned(dev.device), + component_id, + description: Cow::Owned(dev.description), + }; + match self.components.entry(dev.component) { + // Found a new device! + hash_map::Entry::Vacant(entry) => { + slog::debug!( + &self.log, + "discovered a new component!"; + "component_id" => %target.component_id, + "component_kind" => %target.component_kind, + "description" => %target.component_id, + ); + entry.insert(ComponentMetrics { + target, + sensor_errors: HashMap::new(), + poll_errors: HashMap::new(), + }); + } + // We previously had a known device for this thing, but + // the metrics target has changed, so we should reset + // its cumulative metrics. + hash_map::Entry::Occupied(mut entry) + if entry.get().target != target => + { + slog::trace!( + &self.log, + "target has changed, resetting cumulative metrics \ + for component"; + "component" => ?dev.component, + ); + entry.insert(ComponentMetrics { + target, + sensor_errors: HashMap::new(), + poll_errors: HashMap::new(), + }); + } + + // The target for this device hasn't changed, don't reset it. + hash_map::Entry::Occupied(_) => {} + } + } + + self.known_state = Some(current_state); + } + + // We will need capacity for *at least* the number of components on the + // SP --- it will probably be more, as several components have multiple + // measurement channels which will produce independent samples (e.g. a + // power rail will likely have both voltage and current measurements, + // and a device may have multiple rails...) but, this way, we can avoid + // *some* amount of reallocating... + samples.reserve(self.components.len()); + for (c, metrics) in &mut self.components { + // Metrics samples *should* always be well-formed. If we ever emit a + // messed up one, this is a programmer error, and therefore should + // fail in test, but should probably *not* take down the whole + // management gateway in a real-life rack, especially because it's + // probably going to happen again if we were to get restarted. + const BAD_SAMPLE: &str = + "we emitted a bad metrics sample! this should never happen"; + macro_rules! try_sample { + ($sample:expr) => { + match $sample { + Ok(sample) => samples.push(sample), + + Err(err) => { + slog::error!( + &self.log, + "{BAD_SAMPLE}!"; + "error" => %err, + ); + #[cfg(debug_assertions)] + unreachable!("{BAD_SAMPLE}: {err}"); + } + } + } + } + let details = match sp.component_details(*c).await { + Ok(deets) => deets, + // SP seems gone! + Err(CommunicationError::NoSpDiscovered) => { + return Err(CommunicationError::NoSpDiscovered) + } + Err(error) => { + slog::warn!( + &self.log, + "failed to read details on SP component"; + "sp_component" => %c, + "error" => %error, + ); + try_sample!(metrics.poll_error(comms_error_str(error))); + continue; + } + }; + if details.entries.is_empty() { + slog::warn!( + &self.log, + "a component which claimed to have measurement channels \ + had empty details. this seems weird..."; + "sp_component" => %c, + ); + try_sample!(metrics.poll_error("no_measurement_channels")); + continue; + } + + let ComponentMetrics { sensor_errors, target, .. } = metrics; + for d in details.entries { + let ComponentDetails::Measurement(m) = d else { + // If the component details are switch port details rather + // than measurement channels, ignore it for now. + continue; + }; + let sensor: Cow<'static, str> = Cow::Owned(m.name); + + // First, if there's a measurement error, increment the + // error count metric. We will synthesize a missing sample + // for the sensor's metric as well, after we produce the + // measurement error sample. + // + // We do this first so that we only have to clone the + // sensor's name if there's an error, rather than always + // cloning it in *case* there's an error. + if let Err(error) = m.value { + let kind = match m.kind { + MeasurementKind::Temperature => "temperature", + MeasurementKind::Current => "current", + MeasurementKind::Voltage => "voltage", + MeasurementKind::Power => "power", + MeasurementKind::InputCurrent => "input_current", + MeasurementKind::InputVoltage => "input_voltage", + MeasurementKind::Speed => "fan_speed", + }; + let error = match error { + MeasurementError::InvalidSensor => "invalid_sensor", + MeasurementError::NoReading => "no_reading", + MeasurementError::NotPresent => "not_present", + MeasurementError::DeviceError => "device_error", + MeasurementError::DeviceUnavailable => { + "device_unavailable" + } + MeasurementError::DeviceTimeout => "device_timeout", + MeasurementError::DeviceOff => "device_off", + }; + let datum = sensor_errors + .entry(SensorErrorKey { + name: sensor.clone(), + kind, + error, + }) + .or_insert(Cumulative::new(0)); + // TODO(eliza): perhaps we should treat this as + // "level-triggered" and only increment the counter + // when the sensor has *changed* to an errored + // state after we have seen at least one good + // measurement from it since the last time the error + // was observed? + datum.increment(); + try_sample!(Sample::new( + target, + &metric::SensorErrorCount { + error: Cow::Borrowed(error), + sensor: sensor.clone(), + datum: *datum, + sensor_kind: Cow::Borrowed(kind), + }, + )); + } + + // I don't love this massive `match`, but because the + // `Sample::new_missing` constructor is a different function + // from `Sample::new`, we need separate branches for the + // error and not-error cases, rather than just doing + // something to produce a datum from both the `Ok` and + // `Error` cases... + let sample = match (m.value, m.kind) { + (Ok(datum), MeasurementKind::Temperature) => { + Sample::new( + target, + &metric::Temperature { sensor, datum }, + ) + } + (Err(_), MeasurementKind::Temperature) => { + Sample::new_missing( + target, + &metric::Temperature { sensor, datum: 0.0 }, + ) + } + (Ok(datum), MeasurementKind::Current) => Sample::new( + target, + &metric::Current { sensor, datum }, + ), + (Err(_), MeasurementKind::Current) => { + Sample::new_missing( + target, + &metric::Current { sensor, datum: 0.0 }, + ) + } + (Ok(datum), MeasurementKind::Voltage) => Sample::new( + target, + &metric::Voltage { sensor, datum }, + ), + + (Err(_), MeasurementKind::Voltage) => { + Sample::new_missing( + target, + &metric::Voltage { sensor, datum: 0.0 }, + ) + } + (Ok(datum), MeasurementKind::Power) => Sample::new( + target, + &metric::Power { sensor, datum }, + ), + (Err(_), MeasurementKind::Power) => { + Sample::new_missing( + target, + &metric::Power { sensor, datum: 0.0 }, + ) + } + (Ok(datum), MeasurementKind::InputCurrent) => { + Sample::new( + target, + &metric::InputCurrent { sensor, datum }, + ) + } + (Err(_), MeasurementKind::InputCurrent) => { + Sample::new_missing( + target, + &metric::InputCurrent { sensor, datum: 0.0 }, + ) + } + (Ok(datum), MeasurementKind::InputVoltage) => { + Sample::new( + target, + &metric::InputVoltage { sensor, datum }, + ) + } + (Err(_), MeasurementKind::InputVoltage) => { + Sample::new_missing( + target, + &metric::InputVoltage { sensor, datum: 0.0 }, + ) + } + (Ok(datum), MeasurementKind::Speed) => Sample::new( + target, + &metric::FanSpeed { sensor, datum }, + ), + (Err(_), MeasurementKind::Speed) => { + Sample::new_missing( + target, + &metric::FanSpeed { sensor, datum: 0.0 }, + ) + } + }; + try_sample!(sample); + } + } + + // Now, fetch the SP's state *again*. It is possible that, while we + // were scraping the SP's samples, the SP's identity changed in some + // way: perhaps its version was updated during the poll, or it + // was removed from the rack and replaced with an entirely different + // chassis! If that's the case, some of the samples we collected may + // have a metrics target describing the wrong thing (e.g. they could + // still have the previous firmware's `hubris_archive_id`, if the SP + // was updated). In that case, we need to throw away the samples we + // collected and try again, potentially rebuilding our understanding + // of the SP's inventory. + let state = SpUnderstanding::from(sp.state().await?); + if state == current_state { + // All good, the SP is still who we thought it was! We can + // "commit" this batch of samples + return Ok(samples); + } + + slog::info!( + &self.log, + "SP's state changed mid-poll! discarding current samples and \ + starting over!"; + "new_state" => ?state, + "current_state" => ?current_state, + ); + // Let's reuse the buffer we already have for the next batch of + // samples. + samples.clear(); + //...and try again with the new state. + current_state = state; + } + } +} + +/// The fields of the `gateway_messages` `VersionedSpState` and +/// `SpStateV1`/`SpStateV2`/`SpStateV3` that we actually care about for purposes +/// of determining whether our understanding of the SP's components are still +/// valid. +/// +/// In particular, we throw out the RoT state and the SP's power state, because +/// those changing won't actually invalidate our understanding of the SP's +/// components. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +struct SpUnderstanding { + hubris_archive_id: [u8; 8], + serial_number: [u8; 32], + model: [u8; 32], + revision: u32, +} + +impl From for SpUnderstanding { + fn from(v: VersionedSpState) -> Self { + match v { + VersionedSpState::V1(gateway_messages::SpStateV1 { + hubris_archive_id, + serial_number, + model, + revision, + .. + }) => Self { hubris_archive_id, serial_number, model, revision }, + VersionedSpState::V2(gateway_messages::SpStateV2 { + hubris_archive_id, + serial_number, + model, + revision, + .. + }) => Self { hubris_archive_id, serial_number, model, revision }, + VersionedSpState::V3(gateway_messages::SpStateV3 { + hubris_archive_id, + serial_number, + model, + revision, + .. + }) => Self { hubris_archive_id, serial_number, model, revision }, + } + } +} + +// Reimplement this ourselves because we don't really care about +// reading the RoT state at present. This is unfortunately copied +// from `gateway_messages`. +fn stringify_byte_string(bytes: &[u8]) -> String { + // We expect serial and model numbers to be ASCII and 0-padded: find the first 0 + // byte and convert to a string. If that fails, hexlify the entire slice. + let first_zero = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len()); + + std::str::from_utf8(&bytes[..first_zero]) + .map(|s| s.to_string()) + .unwrap_or_else(|_err| hex::encode(bytes)) +} + +impl ServerManager { + async fn run(mut self, cfg: Option) -> anyhow::Result<()> { + let (registration_address, bind_loopback) = + if let Some(MetricsConfig { + dev_bind_loopback, + dev_nexus_address, + .. + }) = cfg + { + if dev_bind_loopback || dev_nexus_address.is_some() { + slog::warn!( + &self.log, + "using development metrics configuration overrides!"; + "nexus_address" => ?dev_nexus_address, + "bind_loopback" => dev_bind_loopback, + ); + } + (dev_nexus_address, dev_bind_loopback) + } else { + (None, false) + }; + let id = self.registry.producer_id(); + + let mut current_server: Option = None; + loop { + let current_ip = current_server.as_ref().map(|s| s.address().ip()); + let mut new_ip = None; + for addr in self.addrs.borrow_and_update().iter() { + let &ip = addr.ip(); + // Don't bind the metrics endpoint on ::1 + if ip.is_loopback() && !bind_loopback { + continue; + } + // If our current address is contained in the new addresses, + // no need to rebind. + if current_ip == Some(IpAddr::V6(ip)) { + new_ip = None; + break; + } else { + new_ip = Some(ip); + } + } + + if let Some(ip) = new_ip { + slog::debug!( + &self.log, + "rebinding producer server on new IP"; + "new_ip" => ?ip, + "current_ip" => ?current_ip, + "collection_interval" => ?OXIMETER_COLLECTION_INTERVAL, + "producer_id" => ?id, + ); + let server = { + // Listen on any available socket, using the provided underlay IP. + let address = SocketAddr::new(ip.into(), 0); + + let server_info = ProducerEndpoint { + id, + kind: ProducerKind::ManagementGateway, + address, + interval: OXIMETER_COLLECTION_INTERVAL, + }; + let config = oximeter_producer::Config { + server_info, + registration_address, + request_body_max_bytes: METRIC_REQUEST_MAX_SIZE, + log: oximeter_producer::LogConfig::Logger( + self.log.clone(), + ), + }; + oximeter_producer::Server::with_registry( + self.registry.clone(), + &config, + ) + .context("failed to start producer server")? + }; + + slog::info!( + &self.log, + "bound metrics producer server"; + "collection_interval" => ?OXIMETER_COLLECTION_INTERVAL, + "producer_id" => ?id, + "address" => %server.address(), + ); + + if let Some(old_server) = current_server.replace(server) { + let old_addr = old_server.address(); + if let Err(error) = old_server.close().await { + slog::error!( + &self.log, + "failed to close old metrics producer server"; + "address" => %old_addr, + "error" => %error, + ); + } else { + slog::debug!( + &self.log, + "old metrics producer server shut down"; + "address" => %old_addr, + ) + } + } + } + + // Wait for a subsequent address change. + self.addrs.changed().await?; + } + } +} + +impl ComponentMetrics { + fn poll_error( + &mut self, + error_str: &'static str, + ) -> Result { + let datum = self + .poll_errors + .entry(error_str) + .or_insert_with(|| Cumulative::new(0)); + datum.increment(); + Sample::new( + &self.target, + &metric::PollErrorCount { + error: Cow::Borrowed(error_str), + datum: *datum, + }, + ) + } +} + +fn comms_error_str(error: CommunicationError) -> &'static str { + // TODO(eliza): a bunch of these probably can't be returned by the specific + // operations we try to do. It could be good to make the methods this code + // calls return a smaller enum of just the errors it might actually + // encounter? Figure this out later. + match error { + CommunicationError::NoSpDiscovered => "no_sp_discovered", + CommunicationError::InterfaceError(_) => "interface", + CommunicationError::ScopeIdChangingFrequently { .. } => { + "scope_id_changing_frequently" + } + CommunicationError::JoinMulticast { .. } => "join_multicast", + CommunicationError::UdpSendTo { .. } => "udp_send_to", + CommunicationError::UdpRecv(_) => "udp_recv", + CommunicationError::Deserialize { .. } => "deserialize", + CommunicationError::ExhaustedNumAttempts(_) => "exhausted_num_attempts", + CommunicationError::BadResponseType { .. } => "bad_response_type", + CommunicationError::SpError { .. } => "sp_error", + CommunicationError::BogusSerialConsoleState { .. } => { + "bogus_serial_console_state" + } + CommunicationError::VersionMismatch { .. } => { + "protocol_version_mismatch" + } + CommunicationError::TlvDeserialize { .. } => "tlv_deserialize", + CommunicationError::TlvDecode(_) => "tlv_decode", + CommunicationError::TlvPagination { .. } => "tlv_pagination", + CommunicationError::IpccKeyLookupValueTooLarge => { + "ipcc_key_lookup_value_too_large" + } + CommunicationError::UnexpectedTrailingData(_) => { + "unexpected_trailing_data" + } + CommunicationError::BadTrailingDataSize { .. } => { + "bad_trailing_data_size" + } + } +} diff --git a/gateway/tests/integration_tests/component_list.rs b/gateway/tests/integration_tests/component_list.rs index ec876c0783..993dcc9e93 100644 --- a/gateway/tests/integration_tests/component_list.rs +++ b/gateway/tests/integration_tests/component_list.rs @@ -57,7 +57,71 @@ async fn component_list() { capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS .bits(), presence: SpComponentPresence::Failed, - } + }, + SpComponentInfo { + component: "dev-1".to_string(), + device: "tmp117".to_string(), + serial_number: None, + description: "FAKE temperature sensor".to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-2".to_string(), + device: "tmp117".to_string(), + serial_number: None, + description: "FAKE Southeast temperature sensor".to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-6".to_string(), + device: "at24csw080".to_string(), + serial_number: None, + description: "FAKE U.2 Sharkfin A VPD".to_string(), + capabilities: 0, + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-7".to_string(), + device: "max5970".to_string(), + serial_number: None, + description: "FAKE U.2 Sharkfin A hot swap controller" + .to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-8".to_string(), + device: "nvme_bmc".to_string(), + serial_number: None, + description: "FAKE U.2 A NVMe Basic Management Command" + .to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-39".to_string(), + device: "tmp451".to_string(), + serial_number: None, + description: "FAKE T6 temperature sensor".to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-53".to_string(), + device: "max31790".to_string(), + serial_number: None, + description: "FAKE Fan controller".to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, ] ); @@ -67,14 +131,89 @@ async fn component_list() { assert_eq!( resp.components, - &[SpComponentInfo { - component: SpComponent::SP3_HOST_CPU.const_as_str().to_string(), - device: SpComponent::SP3_HOST_CPU.const_as_str().to_string(), - serial_number: None, - description: "FAKE host cpu".to_string(), - capabilities: 0, - presence: SpComponentPresence::Present, - },] + &[ + SpComponentInfo { + component: SpComponent::SP3_HOST_CPU.const_as_str().to_string(), + device: SpComponent::SP3_HOST_CPU.const_as_str().to_string(), + serial_number: None, + description: "FAKE host cpu".to_string(), + capabilities: 0, + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-0".to_string(), + device: "tmp117".to_string(), + serial_number: None, + description: "FAKE temperature sensor".to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-1".to_string(), + device: "tmp117".to_string(), + serial_number: None, + description: "FAKE temperature sensor".to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-2".to_string(), + device: "tmp117".to_string(), + serial_number: None, + description: "FAKE Southeast temperature sensor".to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-6".to_string(), + device: "at24csw080".to_string(), + serial_number: None, + description: "FAKE U.2 Sharkfin A VPD".to_string(), + capabilities: 0, + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-7".to_string(), + device: "max5970".to_string(), + serial_number: None, + description: "FAKE U.2 Sharkfin A hot swap controller" + .to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-8".to_string(), + device: "nvme_bmc".to_string(), + serial_number: None, + description: "FAKE U.2 A NVMe Basic Management Command" + .to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-39".to_string(), + device: "tmp451".to_string(), + serial_number: None, + description: "FAKE T6 temperature sensor".to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-53".to_string(), + device: "max31790".to_string(), + serial_number: None, + description: "FAKE Fan controller".to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + ] ); // Get the component list for switch 0. diff --git a/nexus/db-model/src/producer_endpoint.rs b/nexus/db-model/src/producer_endpoint.rs index 74a7356adb..c2fab2de5a 100644 --- a/nexus/db-model/src/producer_endpoint.rs +++ b/nexus/db-model/src/producer_endpoint.rs @@ -22,6 +22,7 @@ impl_enum_type!( #[diesel(sql_type = ProducerKindEnum)] pub enum ProducerKind; + ManagementGateway => b"management_gateway" SledAgent => b"sled_agent" Service => b"service" Instance => b"instance" @@ -30,6 +31,9 @@ impl_enum_type!( impl From for ProducerKind { fn from(kind: internal::nexus::ProducerKind) -> Self { match kind { + internal::nexus::ProducerKind::ManagementGateway => { + ProducerKind::ManagementGateway + } internal::nexus::ProducerKind::SledAgent => ProducerKind::SledAgent, internal::nexus::ProducerKind::Service => ProducerKind::Service, internal::nexus::ProducerKind::Instance => ProducerKind::Instance, @@ -40,6 +44,9 @@ impl From for ProducerKind { impl From for internal::nexus::ProducerKind { fn from(kind: ProducerKind) -> Self { match kind { + ProducerKind::ManagementGateway => { + internal::nexus::ProducerKind::ManagementGateway + } ProducerKind::SledAgent => internal::nexus::ProducerKind::SledAgent, ProducerKind::Service => internal::nexus::ProducerKind::Service, ProducerKind::Instance => internal::nexus::ProducerKind::Instance, diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index d0542874fb..aef95e6d53 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(90, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(91, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(91, "add-management-gateway-producer-kind"), KnownVersion::new(90, "lookup-bgp-config-by-asn"), KnownVersion::new(89, "collapse_lldp_settings"), KnownVersion::new(88, "route-local-pref"), diff --git a/nexus/tests/integration_tests/metrics.rs b/nexus/tests/integration_tests/metrics.rs index 3b808984ae..9f4652c2da 100644 --- a/nexus/tests/integration_tests/metrics.rs +++ b/nexus/tests/integration_tests/metrics.rs @@ -23,8 +23,11 @@ use nexus_types::external_api::views::OxqlQueryResult; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; use oximeter::types::Datum; +use oximeter::types::FieldValue; use oximeter::types::Measurement; use oximeter::TimeseriesSchema; +use std::borrow::Borrow; +use std::collections::HashMap; use uuid::Uuid; pub async fn query_for_metrics( @@ -344,7 +347,6 @@ async fn test_instance_watcher_metrics( ); }}; } - use oximeter::types::FieldValue; const INSTANCE_ID_FIELD: &str = "instance_id"; const STATE_FIELD: &str = "state"; const STATE_STARTING: &str = "starting"; @@ -589,6 +591,183 @@ async fn test_instance_watcher_metrics( assert_gte!(ts2_running, 2); } +#[nexus_test] +async fn test_mgs_metrics( + cptestctx: &ControlPlaneTestContext, +) { + // Make a MGS + let (mut mgs_config, sp_sim_config) = + gateway_test_utils::setup::load_test_config(); + let mgs = { + // munge the already-parsed MGS config file to point it at the test + // Nexus' address. + mgs_config.metrics = Some(gateway_test_utils::setup::MetricsConfig { + disabled: false, + dev_bind_loopback: true, + dev_nexus_address: Some(cptestctx.internal_client.bind_address), + }); + gateway_test_utils::setup::test_setup_with_config( + "test_mgs_metrics", + gateway_messages::SpPort::One, + mgs_config, + &sp_sim_config, + None, + ) + .await + }; + + // Let's look at all the simulated SP components in the config file which + // have sensor readings, so we can assert that there are timeseries for all + // of them. + let all_sp_configs = { + let gimlet_configs = + sp_sim_config.simulated_sps.gimlet.iter().map(|g| &g.common); + let sidecar_configs = + sp_sim_config.simulated_sps.sidecar.iter().map(|s| &s.common); + gimlet_configs.chain(sidecar_configs) + }; + // XXX(eliza): yes, this code is repetitive. We could probably make it a + // little elss ugly with nested hash maps, but like...I already wrote it, so + // you don't have to. :) + // + // TODO(eliza): presently, we just expect that the number of timeseries for + // each serial number and sensor type lines up. If we wanted to be *really* + // fancy, we could also assert that all the component IDs, component kinds, + // and measurement values line up with the config. But, honestly, it's + // pretty unlikely that a bug in MGS' sensor metrics subsystem would mess + // that up --- the most important thing is just to make sure that the sensor + // data is *present*, as that should catch most regressions. + let mut temp_sensors = HashMap::new(); + let mut current_sensors = HashMap::new(); + let mut voltage_sensors = HashMap::new(); + let mut power_sensors = HashMap::new(); + let mut input_voltage_sensors = HashMap::new(); + let mut input_current_sensors = HashMap::new(); + let mut fan_speed_sensors = HashMap::new(); + for sp in all_sp_configs { + let mut temp = 0; + let mut current = 0; + let mut voltage = 0; + let mut input_voltage = 0; + let mut input_current = 0; + let mut power = 0; + let mut speed = 0; + for component in &sp.components { + for sensor in &component.sensors { + use gateway_messages::measurement::MeasurementKind as Kind; + match sensor.def.kind { + Kind::Temperature => temp += 1, + Kind::Current => current += 1, + Kind::Voltage => voltage += 1, + Kind::InputVoltage => input_voltage += 1, + Kind::InputCurrent => input_current += 1, + Kind::Speed => speed += 1, + Kind::Power => power += 1, + } + } + } + temp_sensors.insert(sp.serial_number.clone(), temp); + current_sensors.insert(sp.serial_number.clone(), current); + voltage_sensors.insert(sp.serial_number.clone(), voltage); + input_voltage_sensors.insert(sp.serial_number.clone(), input_voltage); + input_current_sensors.insert(sp.serial_number.clone(), input_current); + fan_speed_sensors.insert(sp.serial_number.clone(), speed); + power_sensors.insert(sp.serial_number.clone(), power); + } + + async fn check_all_timeseries_present( + cptestctx: &ControlPlaneTestContext, + name: &str, + expected: HashMap, + ) { + let metric_name = format!("hardware_component:{name}"); + eprintln!("\n=== checking timeseries for {metric_name} ===\n"); + + if expected.values().all(|&v| v == 0) { + eprintln!( + "-> SP sim config contains no {name} sensors, skipping it" + ); + return; + } + + let table = timeseries_query(&cptestctx, &format!("get {metric_name}")) + .await + .into_iter() + .find(|t| t.name() == metric_name); + let table = match table { + Some(table) => table, + None => panic!("missing table for {metric_name}"), + }; + + let mut found = expected + .keys() + .map(|serial| (serial.clone(), 0)) + .collect::>(); + for timeseries in table.timeseries() { + let fields = ×eries.fields; + let n_points = timeseries.points.len(); + assert!( + n_points > 0, + "{metric_name} timeseries {fields:?} should have points" + ); + let serial_str: &str = match timeseries.fields.get("chassis_serial") + { + Some(FieldValue::String(s)) => s.borrow(), + Some(x) => panic!( + "{metric_name} `chassis_serial` field should be a string, but got: {x:?}" + ), + None => { + panic!("{metric_name} timeseries should have a `chassis_serial` field") + } + }; + if let Some(count) = found.get_mut(serial_str) { + *count += 1; + } else { + panic!( + "{metric_name} timeseries had an unexpected chassis serial \ + number {serial_str:?} (not in the config file)", + ); + } + } + + eprintln!("-> {metric_name}: found timeseries: {found:#?}"); + assert_eq!( + found, expected, + "number of {metric_name} timeseries didn't match expected in {table:#?}", + ); + eprintln!("-> okay, looks good!"); + } + + // Wait until the MGS registers as a producer with Oximeter. + wait_for_producer(&cptestctx.oximeter, &mgs.gateway_id).await; + + // ...and collect its samples. + cptestctx.oximeter.force_collect().await; + + check_all_timeseries_present(&cptestctx, "temperature", temp_sensors).await; + check_all_timeseries_present(&cptestctx, "voltage", voltage_sensors).await; + check_all_timeseries_present(&cptestctx, "current", current_sensors).await; + check_all_timeseries_present(&cptestctx, "power", power_sensors).await; + check_all_timeseries_present( + &cptestctx, + "input_voltage", + input_voltage_sensors, + ) + .await; + check_all_timeseries_present( + &cptestctx, + "input_current", + input_current_sensors, + ) + .await; + check_all_timeseries_present(&cptestctx, "fan_speed", fan_speed_sensors) + .await; + + // Because the `ControlPlaneTestContext` isn't managing the MGS we made for + // this test, we are responsible for removing its logs. + mgs.logctx.cleanup_successful(); +} + /// Wait until a producer is registered with Oximeter. /// /// This blocks until the producer is registered, for up to 60s. It panics if diff --git a/nexus/tests/integration_tests/sp_updater.rs b/nexus/tests/integration_tests/sp_updater.rs index 8314d22173..6e482bc1ad 100644 --- a/nexus/tests/integration_tests/sp_updater.rs +++ b/nexus/tests/integration_tests/sp_updater.rs @@ -434,9 +434,23 @@ async fn test_sp_updater_switches_mgs_instances_on_failure() { #[tokio::test] async fn test_sp_updater_delivers_progress() { // Start MGS + Sim SP. - let mgstestctx = - mgs_setup::test_setup("test_sp_updater_delivers_progress", SpPort::One) - .await; + let mgstestctx = { + let (mut mgs_config, sp_sim_config) = mgs_setup::load_test_config(); + // Enabling SP metrics collection makes this alread-flaky test even + // flakier, so let's just turn it off. + // TODO(eliza): it would be nice if we didn't have to disable metrics in + // this test, so that we can better catch regressions that could be + // introduced by the metrics subsystem... + mgs_config.metrics.get_or_insert_with(Default::default).disabled = true; + mgs_setup::test_setup_with_config( + "test_sp_updater_delivers_progress", + SpPort::One, + mgs_config, + &sp_sim_config, + None, + ) + .await + }; // Configure an MGS client. let mut mgs_clients = diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 54b4822e51..111bd552d0 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -4443,6 +4443,13 @@ "enum": [ "instance" ] + }, + { + "description": "The producer is a management gateway service.", + "type": "string", + "enum": [ + "management_gateway" + ] } ] }, diff --git a/openapi/nexus.json b/openapi/nexus.json index 2a8c227c64..f6d140ed05 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -19934,6 +19934,7 @@ "nanoseconds", "volts", "amps", + "watts", "degrees_celsius" ] }, diff --git a/openapi/oximeter.json b/openapi/oximeter.json index f596ac6ee6..327351d961 100644 --- a/openapi/oximeter.json +++ b/openapi/oximeter.json @@ -277,6 +277,13 @@ "enum": [ "instance" ] + }, + { + "description": "The producer is a management gateway service.", + "type": "string", + "enum": [ + "management_gateway" + ] } ] } diff --git a/oximeter/oximeter/schema/hardware-component.toml b/oximeter/oximeter/schema/hardware-component.toml new file mode 100644 index 0000000000..30a1d6510f --- /dev/null +++ b/oximeter/oximeter/schema/hardware-component.toml @@ -0,0 +1,183 @@ +format_version = 1 + +[target] +name = "hardware_component" +description = "A hardware component on a compute sled, switch, or power shelf" +authz_scope = "fleet" +versions = [ + { version = 1, fields = [ + "rack_id", + "slot", + "chassis_kind", + "chassis_serial", + "chassis_model", + "chassis_revision", + "hubris_archive_id", + "gateway_id", + "component_kind", + "component_id", + "description", + ]} +] + +[fields.rack_id] +type = "uuid" +description = "ID of the rack on which this measurement was recorded." + +[fields.slot] +type = "u32" +description = """ +The cubby number or switch slot of the service processor reporting the \ +measurement""" + +[fields.chassis_model] +type = "string" +description = "Model number of the sled, switch, or power shelf" + +[fields.chassis_revision] +type = "u32" +description = "Revision number of the sled, switch, or power shelf" + +[fields.chassis_serial] +type = "string" +description = "Serial number of the sled, switch, or power shelf" + +[fields.hubris_archive_id] +type = "string" +description = """ +Hubris firmware archive ID of the service processor when the measurement \ +was recorded.""" + +[fields.gateway_id] +type = "uuid" +description = """ +ID of the Management Gateway Service process which recorded the measurement.""" + +[fields.chassis_kind] +type = "string" +description = """ +What kind of thing the component resides on. + +This will be one of 'sled', for components on compute sleds; 'switch', for \ +components on rack switches; or 'power', for components on power shelves.""" + +[fields.component_id] +type = "string" +description = """ +The service processor component ID uniquely identifying the hardware \ +component on the sled, switch, or power shelf.""" + +[fields.component_kind] +type = "string" +description = "What type of hardware component this thing is." + +[fields.description] +type = "string" +description = """ +A human-readable description of the hardware component. This may include \ +its location or role in the system (e.g. a DIMM's number, or a temperature \ +sensor's location).""" + +[fields.sensor] +type = "string" +description = """The name of a sensor that recorded a sensor reading.""" + +[fields.error] +type = "string" +description = "The kind of sensor error that occurred" + +[fields.sensor_kind] +type = "string" +description = """ +Which kind of sensor could not be read due to a sensor error. + +This will be one of 'temperature', 'current', 'power', 'voltage', \ +'input_current', 'input_voltage', or 'fan_speed' (the same names as \ +the metrics emitted by these sensors when they are read successfully).""" + +[[metrics]] +name = "temperature" +description = "A temperature reading from a hardware component." +units = "degrees_celsius" +datum_type = "f32" +versions = [ + { added_in = 1, fields = ["sensor"]} +] + +[[metrics]] +name = "current" +description = "Output current reading in amperes" +units = "amps" +datum_type = "f32" +versions = [ + { added_in = 1, fields = ["sensor"]} +] + +[[metrics]] +name = "power" +description = "Power reading, in watts" +units = "watts" +datum_type = "f32" +versions = [ + { added_in = 1, fields = ["sensor"]} +] + +[[metrics]] +name = "voltage" +description = "Output voltage reading, in volts" +units = "volts" +datum_type = "f32" +versions = [ + { added_in = 1, fields = ["sensor"]} +] + +[[metrics]] +name = "input_current" +description = "Input electric current reading in amperes" +units = "amps" +datum_type = "f32" +versions = [ + { added_in = 1, fields = ["sensor"]} +] + +[[metrics]] +name = "input_voltage" +description = "Input electric voltage reading, in volts" +units = "volts" +datum_type = "f32" +versions = [ + { added_in = 1, fields = ["sensor"]} +] + + +[[metrics]] +name = "fan_speed" +description = "A fan speed measurement, in rotations per minute" +units = "rpm" +datum_type = "f32" +versions = [ + { added_in = 1, fields = ["sensor"]} +] + +[[metrics]] +name = "sensor_error_count" +description = "Cumulative count of errors reported by a sensor" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = ["sensor", "error", "sensor_kind"]} +] + +[[metrics]] +name = "poll_error_count" +description = """ +Cumulative count of errors encountered whilst polling a component's sensors. + +Unlike the `sensor_error_count` metric, this counts errors encountered by \ +the management gateway while polling the component, rather than errors \ +reported by the component itself.""" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = ["error"] } +] diff --git a/oximeter/schema/src/codegen.rs b/oximeter/schema/src/codegen.rs index c46c25c97d..1e6e352c15 100644 --- a/oximeter/schema/src/codegen.rs +++ b/oximeter/schema/src/codegen.rs @@ -512,6 +512,7 @@ fn quote_units(units: Units) -> TokenStream { } Units::Amps => quote! { ::oximeter::schema::Units::Amps }, Units::Volts => quote! { ::oximeter::schema::Units::Volts }, + Units::Watts => quote! { ::oximeter::schema::Units::Watts }, Units::DegreesCelsius => { quote! { ::oximeter::schema::Units::DegreesCelsius } } diff --git a/oximeter/types/src/schema.rs b/oximeter/types/src/schema.rs index e06e6e2b57..135c77462a 100644 --- a/oximeter/types/src/schema.rs +++ b/oximeter/types/src/schema.rs @@ -189,6 +189,7 @@ pub enum Units { Nanoseconds, Volts, Amps, + Watts, DegreesCelsius, /// Rotations per minute. Rpm, diff --git a/schema/crdb/add-management-gateway-producer-kind/up.sql b/schema/crdb/add-management-gateway-producer-kind/up.sql new file mode 100644 index 0000000000..e872278e2f --- /dev/null +++ b/schema/crdb/add-management-gateway-producer-kind/up.sql @@ -0,0 +1,2 @@ +ALTER TYPE omicron.public.producer_kind + ADD VALUE IF NOT EXISTS 'management_gateway' AFTER 'instance'; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index baef38e44f..1457532c49 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -1334,7 +1334,9 @@ CREATE TYPE IF NOT EXISTS omicron.public.producer_kind AS ENUM ( -- removed). 'service', -- A Propolis VMM for an instance in the omicron.public.instance table - 'instance' + 'instance', + -- A management gateway service on a scrimlet. + 'management_gateway' ); /* @@ -4212,7 +4214,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '90.0.0', NULL) + (TRUE, NOW(), NOW(), '91.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; From 4a60d7843b0d5763021ce71b6a3410f8de0859e1 Mon Sep 17 00:00:00 2001 From: "oxide-reflector-bot[bot]" <130185838+oxide-reflector-bot[bot]@users.noreply.github.com> Date: Mon, 26 Aug 2024 01:11:06 +0000 Subject: [PATCH 11/22] Update dendrite to 76c735d (#6434) Updated dendrite to commit 76c735d. Co-authored-by: reflector[bot] <130185838+reflector[bot]@users.noreply.github.com> --- package-manifest.toml | 12 ++++++------ tools/dendrite_openapi_version | 2 +- tools/dendrite_stub_checksums | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/package-manifest.toml b/package-manifest.toml index 125861f610..cab3c1877e 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -710,8 +710,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "21b16567f28e103f145cd18d53fac6958429c4ff" -source.sha256 = "3771671f0069b33143774e560eb258db99253dba9b78fa3ca974f02a8e1145b4" +source.commit = "76c735d472e3badaeca08982e22496fccb1ce210" +source.sha256 = "3ee6cfe770da2855b4eb44c048637d56f8d72de45c8c396186dfe7232d8548fa" output.type = "zone" output.intermediate_only = true @@ -737,8 +737,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "21b16567f28e103f145cd18d53fac6958429c4ff" -source.sha256 = "ad02632713a57fe8c5371316320309e1fad52f0ce2f7e6f768859aa94dfbb1d9" +source.commit = "76c735d472e3badaeca08982e22496fccb1ce210" +source.sha256 = "0e68ea8fbb609bbe2c643fc8cadc0197bd641006a323149159893bfd0d816805" output.type = "zone" output.intermediate_only = true @@ -757,8 +757,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "21b16567f28e103f145cd18d53fac6958429c4ff" -source.sha256 = "23bca3873cdb0441cd18c0cf071b86d49755be06837479661876ac95d2f10f27" +source.commit = "76c735d472e3badaeca08982e22496fccb1ce210" +source.sha256 = "45484d6d8557a0656984d0e6db879589d841d43ab6a11116cb1da314b928a425" output.type = "zone" output.intermediate_only = true diff --git a/tools/dendrite_openapi_version b/tools/dendrite_openapi_version index 2d0f4d4887..a9e13c083a 100755 --- a/tools/dendrite_openapi_version +++ b/tools/dendrite_openapi_version @@ -1,2 +1,2 @@ -COMMIT="21b16567f28e103f145cd18d53fac6958429c4ff" +COMMIT="76c735d472e3badaeca08982e22496fccb1ce210" SHA2="3a54305ab4b1270c9a5fb0603f481fce199f3767c174a03559ff642f7f44687e" diff --git a/tools/dendrite_stub_checksums b/tools/dendrite_stub_checksums index e3d16d779c..075ead4752 100644 --- a/tools/dendrite_stub_checksums +++ b/tools/dendrite_stub_checksums @@ -1,3 +1,3 @@ -CIDL_SHA256_ILLUMOS="3771671f0069b33143774e560eb258db99253dba9b78fa3ca974f02a8e1145b4" -CIDL_SHA256_LINUX_DPD="6aa070ab0590aca7458f2555012acc5571e61b3b1523de862d4bbb04b9d34135" +CIDL_SHA256_ILLUMOS="3ee6cfe770da2855b4eb44c048637d56f8d72de45c8c396186dfe7232d8548fa" +CIDL_SHA256_LINUX_DPD="5c70318c6feb7595bdbf41d8b33827100d28fcdf34ad738a5af10e0411463f64" CIDL_SHA256_LINUX_SWADM="e1e35784538a4fdd76dc257cc636ac3f43f7ef2842dabfe981f17f8ce6b8e1a2" From 219121a5a5b918694f13f3c3a505a1d067d2476f Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Mon, 26 Aug 2024 12:46:52 -0700 Subject: [PATCH 12/22] `omdb nexus blueprints target set --diff` (#6435) --- dev-tools/omdb/src/bin/omdb/nexus.rs | 48 ++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index ede2743404..db9e2cba52 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -51,6 +51,7 @@ use std::collections::BTreeMap; use std::collections::BTreeSet; use std::str::FromStr; use tabled::Tabled; +use tokio::sync::OnceCell; use uuid::Uuid; /// Arguments to the "omdb nexus" subcommand @@ -244,6 +245,10 @@ struct BlueprintTargetSetArgs { blueprint_id: Uuid, /// whether this blueprint should be enabled enabled: BlueprintTargetSetEnabled, + /// if specified, diff against the current target and wait for confirmation + /// before proceeding + #[clap(long)] + diff: bool, } #[derive(Debug, Clone, Copy, ValueEnum)] @@ -1722,6 +1727,38 @@ async fn cmd_nexus_blueprints_target_set( args: &BlueprintTargetSetArgs, _destruction_token: DestructiveOperationToken, ) -> Result<(), anyhow::Error> { + // Helper to only fetch the current target once. We may need it immediately + // if `args.diff` is true, or later if `args.enabled` is "inherit" (or + // both). + let current_target = OnceCell::new(); + let get_current_target = || async { + current_target + .get_or_try_init(|| client.blueprint_target_view()) + .await + .context("failed to fetch current target blueprint") + }; + + if args.diff { + let current_target = get_current_target().await?; + let blueprint1 = client + .blueprint_view(¤t_target.target_id) + .await + .context("failed to fetch target blueprint")? + .into_inner(); + let blueprint2 = + client.blueprint_view(&args.blueprint_id).await.with_context( + || format!("fetching blueprint {}", args.blueprint_id), + )?; + let diff = blueprint2.diff_since_blueprint(&blueprint1); + println!("{}", diff.display()); + println!( + "\nDo you want to make {} the target blueprint?", + args.blueprint_id + ); + let mut prompt = ConfirmationPrompt::new(); + prompt.read_and_validate("y/N", "y")?; + } + let enabled = match args.enabled { BlueprintTargetSetEnabled::Enabled => true, BlueprintTargetSetEnabled::Disabled => false, @@ -1734,12 +1771,11 @@ async fn cmd_nexus_blueprints_target_set( // operator. (In the case of the current target blueprint being changed // entirely, that will result in a failure to set the current target // below, because its parent will no longer be the current target.) - BlueprintTargetSetEnabled::Inherit => client - .blueprint_target_view() - .await - .map(|current| current.into_inner().enabled) - .context("failed to fetch current target blueprint")?, + BlueprintTargetSetEnabled::Inherit => { + get_current_target().await?.enabled + } }; + client .blueprint_target_set(&nexus_client::types::BlueprintTargetSet { target_id: args.blueprint_id, @@ -1966,7 +2002,7 @@ impl ConfirmationPrompt { { Ok(input) } else { - bail!("expungement aborted") + bail!("operation aborted") } } From e434307e0dd3158a41aca81be0a49ff98eac3a78 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Mon, 26 Aug 2024 16:58:42 -0400 Subject: [PATCH 13/22] [#5333 5/6] Region snapshot replacement step (#6350) After the region snapshot replacement start saga finishes, the snapshot's volume is no longer in a degraded state: the requested read-only region was cloned to a new region, and the reference was replaced in the construction request. Any disk that is now created using the snapshot as a source will work without issues. The problem now is volumes that still reference the replaced read-only region, and any Upstairs constructed from a VCR that references that region: disks created using a snapshot as a source will clone the snapshot volume and use that as the read-only parent for the new disk. This commit adds a new background task that finds all volumes that reference the replaced read-only region, creates a "region snapshot replacement step" record for them, and triggers the region snapshot replacement step saga. This is a much less involved process than region replacement: no continuous monitoring and driving is required, only a single best effort replace call to any relevant propolis. No pantry notification is required because there's nothing that currently attaches a volume to a pantry other than bulk import, and that's not relevant because those volumes do not have read-only parents. This commit also adds a garbage collection for these new "step" records, as they also stash the old snapshot target in a volume for later deletion. --- dev-tools/omdb/src/bin/omdb/nexus.rs | 41 + dev-tools/omdb/tests/env.out | 15 + dev-tools/omdb/tests/successes.out | 25 + nexus-config/src/nexus_config.rs | 16 + nexus/examples/config-second.toml | 1 + nexus/examples/config.toml | 1 + nexus/src/app/background/init.rs | 21 +- nexus/src/app/background/tasks/mod.rs | 1 + .../tasks/region_snapshot_replacement_step.rs | 775 ++++++++++++++++++ nexus/src/app/sagas/mod.rs | 8 + .../sagas/region_snapshot_replacement_step.rs | 603 ++++++++++++++ ...apshot_replacement_step_garbage_collect.rs | 233 ++++++ nexus/tests/config.test.toml | 1 + nexus/types/src/internal_api/background.rs | 10 + smf/nexus/multi-sled/config-partial.toml | 1 + smf/nexus/single-sled/config-partial.toml | 1 + 16 files changed, 1752 insertions(+), 1 deletion(-) create mode 100644 nexus/src/app/background/tasks/region_snapshot_replacement_step.rs create mode 100644 nexus/src/app/sagas/region_snapshot_replacement_step.rs create mode 100644 nexus/src/app/sagas/region_snapshot_replacement_step_garbage_collect.rs diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index db9e2cba52..d45865b4a7 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -36,6 +36,7 @@ use nexus_types::internal_api::background::LookupRegionPortStatus; use nexus_types::internal_api::background::RegionReplacementDriverStatus; use nexus_types::internal_api::background::RegionSnapshotReplacementGarbageCollectStatus; use nexus_types::internal_api::background::RegionSnapshotReplacementStartStatus; +use nexus_types::internal_api::background::RegionSnapshotReplacementStepStatus; use nexus_types::inventory::BaseboardId; use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::DemoSagaUuid; @@ -1509,6 +1510,46 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) { println!(" > {line}"); } + println!(" errors: {}", status.errors.len()); + for line in &status.errors { + println!(" > {line}"); + } + } + } + } else if name == "region_snapshot_replacement_step" { + match serde_json::from_value::( + details.clone(), + ) { + Err(error) => eprintln!( + "warning: failed to interpret task details: {:?}: {:?}", + error, details + ), + + Ok(status) => { + println!( + " total step records created ok: {}", + status.step_records_created_ok.len(), + ); + for line in &status.step_records_created_ok { + println!(" > {line}"); + } + + println!( + " total step garbage collect saga invoked ok: {}", + status.step_garbage_collect_invoked_ok.len(), + ); + for line in &status.step_garbage_collect_invoked_ok { + println!(" > {line}"); + } + + println!( + " total step saga invoked ok: {}", + status.step_invoked_ok.len(), + ); + for line in &status.step_invoked_ok { + println!(" > {line}"); + } + println!(" errors: {}", status.errors.len()); for line in &status.errors { println!(" > {line}"); diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index ec407cd123..2774a5d734 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -135,6 +135,11 @@ task: "region_snapshot_replacement_start" detect if region snapshots need replacement and begin the process +task: "region_snapshot_replacement_step" + detect what volumes were affected by a region snapshot replacement, and run + the step saga for them + + task: "saga_recovery" recovers sagas assigned to this Nexus @@ -292,6 +297,11 @@ task: "region_snapshot_replacement_start" detect if region snapshots need replacement and begin the process +task: "region_snapshot_replacement_step" + detect what volumes were affected by a region snapshot replacement, and run + the step saga for them + + task: "saga_recovery" recovers sagas assigned to this Nexus @@ -436,6 +446,11 @@ task: "region_snapshot_replacement_start" detect if region snapshots need replacement and begin the process +task: "region_snapshot_replacement_step" + detect what volumes were affected by a region snapshot replacement, and run + the step saga for them + + task: "saga_recovery" recovers sagas assigned to this Nexus diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index e939bfa864..757b4e8888 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -351,6 +351,11 @@ task: "region_snapshot_replacement_start" detect if region snapshots need replacement and begin the process +task: "region_snapshot_replacement_step" + detect what volumes were affected by a region snapshot replacement, and run + the step saga for them + + task: "saga_recovery" recovers sagas assigned to this Nexus @@ -606,6 +611,16 @@ task: "region_snapshot_replacement_start" total start saga invoked ok: 0 errors: 0 +task: "region_snapshot_replacement_step" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total step records created ok: 0 + total step garbage collect saga invoked ok: 0 + total step saga invoked ok: 0 + errors: 0 + task: "saga_recovery" configured period: every 10m currently executing: no @@ -1014,6 +1029,16 @@ task: "region_snapshot_replacement_start" total start saga invoked ok: 0 errors: 0 +task: "region_snapshot_replacement_step" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total step records created ok: 0 + total step garbage collect saga invoked ok: 0 + total step saga invoked ok: 0 + errors: 0 + task: "saga_recovery" configured period: every 10m currently executing: no diff --git a/nexus-config/src/nexus_config.rs b/nexus-config/src/nexus_config.rs index f6e60bb558..b3d189691c 100644 --- a/nexus-config/src/nexus_config.rs +++ b/nexus-config/src/nexus_config.rs @@ -396,6 +396,8 @@ pub struct BackgroundTaskConfig { /// configuration for region snapshot replacement garbage collection pub region_snapshot_replacement_garbage_collection: RegionSnapshotReplacementGarbageCollectionConfig, + /// configuration for region snapshot replacement step task + pub region_snapshot_replacement_step: RegionSnapshotReplacementStepConfig, } #[serde_as] @@ -648,6 +650,14 @@ pub struct RegionSnapshotReplacementGarbageCollectionConfig { pub period_secs: Duration, } +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct RegionSnapshotReplacementStepConfig { + /// period (in seconds) for periodic activations of this background task + #[serde_as(as = "DurationSeconds")] + pub period_secs: Duration, +} + /// Configuration for a nexus server #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] pub struct PackageConfig { @@ -897,6 +907,7 @@ mod test { lookup_region_port.period_secs = 60 region_snapshot_replacement_start.period_secs = 30 region_snapshot_replacement_garbage_collection.period_secs = 30 + region_snapshot_replacement_step.period_secs = 30 [default_region_allocation_strategy] type = "random" seed = 0 @@ -1067,6 +1078,10 @@ mod test { RegionSnapshotReplacementGarbageCollectionConfig { period_secs: Duration::from_secs(30), }, + region_snapshot_replacement_step: + RegionSnapshotReplacementStepConfig { + period_secs: Duration::from_secs(30), + }, }, default_region_allocation_strategy: crate::nexus_config::RegionAllocationStrategy::Random { @@ -1145,6 +1160,7 @@ mod test { lookup_region_port.period_secs = 60 region_snapshot_replacement_start.period_secs = 30 region_snapshot_replacement_garbage_collection.period_secs = 30 + region_snapshot_replacement_step.period_secs = 30 [default_region_allocation_strategy] type = "random" "##, diff --git a/nexus/examples/config-second.toml b/nexus/examples/config-second.toml index c87e1255b5..e63b155fc6 100644 --- a/nexus/examples/config-second.toml +++ b/nexus/examples/config-second.toml @@ -141,6 +141,7 @@ saga_recovery.period_secs = 600 lookup_region_port.period_secs = 60 region_snapshot_replacement_start.period_secs = 30 region_snapshot_replacement_garbage_collection.period_secs = 30 +region_snapshot_replacement_step.period_secs = 30 [default_region_allocation_strategy] # allocate region on 3 random distinct zpools, on 3 random distinct sleds. diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index f844adccbe..bca3f7f2c4 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -127,6 +127,7 @@ saga_recovery.period_secs = 600 lookup_region_port.period_secs = 60 region_snapshot_replacement_start.period_secs = 30 region_snapshot_replacement_garbage_collection.period_secs = 30 +region_snapshot_replacement_step.period_secs = 30 [default_region_allocation_strategy] # allocate region on 3 random distinct zpools, on 3 random distinct sleds. diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index 37c276fa07..ae4309d8f9 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -110,6 +110,7 @@ use super::tasks::region_replacement; use super::tasks::region_replacement_driver; use super::tasks::region_snapshot_replacement_garbage_collect::*; use super::tasks::region_snapshot_replacement_start::*; +use super::tasks::region_snapshot_replacement_step::*; use super::tasks::saga_recovery; use super::tasks::service_firewall_rules; use super::tasks::sync_service_zone_nat::ServiceZoneNatTracker; @@ -165,6 +166,7 @@ pub struct BackgroundTasks { pub task_lookup_region_port: Activator, pub task_region_snapshot_replacement_start: Activator, pub task_region_snapshot_replacement_garbage_collection: Activator, + pub task_region_snapshot_replacement_step: Activator, // Handles to activate background tasks that do not get used by Nexus // at-large. These background tasks are implementation details as far as @@ -249,6 +251,7 @@ impl BackgroundTasksInitializer { task_region_snapshot_replacement_start: Activator::new(), task_region_snapshot_replacement_garbage_collection: Activator::new( ), + task_region_snapshot_replacement_step: Activator::new(), task_internal_dns_propagation: Activator::new(), task_external_dns_propagation: Activator::new(), @@ -312,6 +315,7 @@ impl BackgroundTasksInitializer { task_lookup_region_port, task_region_snapshot_replacement_start, task_region_snapshot_replacement_garbage_collection, + task_region_snapshot_replacement_step, // Add new background tasks here. Be sure to use this binding in a // call to `Driver::register()` below. That's what actually wires // up the Activator to the corresponding background task. @@ -761,7 +765,7 @@ impl BackgroundTasksInitializer { .region_snapshot_replacement_garbage_collection .period_secs, task_impl: Box::new(RegionSnapshotReplacementGarbageCollect::new( - datastore, + datastore.clone(), sagas.clone(), )), opctx: opctx.child(BTreeMap::new()), @@ -769,6 +773,21 @@ impl BackgroundTasksInitializer { activator: task_region_snapshot_replacement_garbage_collection, }); + driver.register(TaskDefinition { + name: "region_snapshot_replacement_step", + description: + "detect what volumes were affected by a region snapshot \ + replacement, and run the step saga for them", + period: config.region_snapshot_replacement_step.period_secs, + task_impl: Box::new(RegionSnapshotReplacementFindAffected::new( + datastore, + sagas.clone(), + )), + opctx: opctx.child(BTreeMap::new()), + watchers: vec![], + activator: task_region_snapshot_replacement_step, + }); + driver } } diff --git a/nexus/src/app/background/tasks/mod.rs b/nexus/src/app/background/tasks/mod.rs index 7ba68d0b80..6089ba8d65 100644 --- a/nexus/src/app/background/tasks/mod.rs +++ b/nexus/src/app/background/tasks/mod.rs @@ -27,6 +27,7 @@ pub mod region_replacement; pub mod region_replacement_driver; pub mod region_snapshot_replacement_garbage_collect; pub mod region_snapshot_replacement_start; +pub mod region_snapshot_replacement_step; pub mod saga_recovery; pub mod service_firewall_rules; pub mod sync_service_zone_nat; diff --git a/nexus/src/app/background/tasks/region_snapshot_replacement_step.rs b/nexus/src/app/background/tasks/region_snapshot_replacement_step.rs new file mode 100644 index 0000000000..d78e304b75 --- /dev/null +++ b/nexus/src/app/background/tasks/region_snapshot_replacement_step.rs @@ -0,0 +1,775 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background task for detecting volumes affected by a region snapshot +//! replacement, creating records for those, and triggering the "step" saga for +//! them. +//! +//! After the region snapshot replacement start saga finishes, the snapshot's +//! volume is no longer in a degraded state: the requested read-only region was +//! cloned to a new region, and the reference was replaced in the construction +//! request. Any disk that is now created using the snapshot as a source will +//! work without issues. +//! +//! The problem now is volumes that still reference the replaced read-only +//! region, and any Upstairs constructed from a VCR that references that region. +//! This task's responsibility is to find all volumes that reference the +//! replaced read-only region, create a record for them, and trigger the region +//! snapshot replacement step saga. This is a much less involved process than +//! region replacement: no continuous monitoring and driving is required. See +//! the "region snapshot replacement step" saga's docstring for more +//! information. + +use crate::app::authn; +use crate::app::background::BackgroundTask; +use crate::app::saga::StartSaga; +use crate::app::sagas; +use crate::app::sagas::region_snapshot_replacement_step::*; +use crate::app::sagas::region_snapshot_replacement_step_garbage_collect::*; +use crate::app::sagas::NexusSaga; +use futures::future::BoxFuture; +use futures::FutureExt; +use nexus_db_model::RegionSnapshotReplacementStep; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use nexus_types::identity::Asset; +use nexus_types::internal_api::background::RegionSnapshotReplacementStepStatus; +use serde_json::json; +use std::sync::Arc; + +pub struct RegionSnapshotReplacementFindAffected { + datastore: Arc, + sagas: Arc, +} + +impl RegionSnapshotReplacementFindAffected { + pub fn new(datastore: Arc, sagas: Arc) -> Self { + RegionSnapshotReplacementFindAffected { datastore, sagas } + } + + async fn send_start_request( + &self, + opctx: &OpContext, + request: RegionSnapshotReplacementStep, + ) -> Result<(), omicron_common::api::external::Error> { + let params = sagas::region_snapshot_replacement_step::Params { + serialized_authn: authn::saga::Serialized::for_opctx(opctx), + request, + }; + + let saga_dag = SagaRegionSnapshotReplacementStep::prepare(¶ms)?; + self.sagas.saga_start(saga_dag).await + } + + async fn send_garbage_collect_request( + &self, + opctx: &OpContext, + request: RegionSnapshotReplacementStep, + ) -> Result<(), omicron_common::api::external::Error> { + let Some(old_snapshot_volume_id) = request.old_snapshot_volume_id + else { + // This state is illegal! + let s = format!( + "request {} old snapshot volume id is None!", + request.id, + ); + + return Err(omicron_common::api::external::Error::internal_error( + &s, + )); + }; + + let params = + sagas::region_snapshot_replacement_step_garbage_collect::Params { + serialized_authn: authn::saga::Serialized::for_opctx(opctx), + old_snapshot_volume_id, + request, + }; + + let saga_dag = + SagaRegionSnapshotReplacementStepGarbageCollect::prepare(¶ms)?; + self.sagas.saga_start(saga_dag).await + } + + async fn clean_up_region_snapshot_replacement_step_volumes( + &self, + opctx: &OpContext, + status: &mut RegionSnapshotReplacementStepStatus, + ) { + let log = &opctx.log; + + let requests = match self + .datastore + .region_snapshot_replacement_steps_requiring_garbage_collection( + opctx, + ) + .await + { + Ok(requests) => requests, + + Err(e) => { + let s = format!("querying for steps to collect failed! {e}"); + error!(&log, "{s}"); + status.errors.push(s); + return; + } + }; + + for request in requests { + let request_id = request.id; + + let result = + self.send_garbage_collect_request(opctx, request.clone()).await; + + match result { + Ok(()) => { + let s = format!( + "region snapshot replacement step garbage \ + collect request ok for {request_id}" + ); + + info!( + &log, + "{s}"; + "request.volume_id" => %request.volume_id, + "request.old_snapshot_volume_id" => ?request.old_snapshot_volume_id, + ); + status.step_garbage_collect_invoked_ok.push(s); + } + + Err(e) => { + let s = format!( + "sending region snapshot replacement step garbage \ + collect request failed: {e}", + ); + error!( + &log, + "{s}"; + "request.volume_id" => %request.volume_id, + "request.old_snapshot_volume_id" => ?request.old_snapshot_volume_id, + ); + status.errors.push(s); + } + } + } + } + + // Any request in state Running means that the target replacement has + // occurred already, meaning the region snapshot being replaced is not + // present as a target in the snapshot's volume construction request + // anymore. Any future usage of that snapshot (as a source for a disk or + // otherwise) will get a volume construction request that references the + // replacement read-only region. + // + // "step" records are created here for each volume found that still + // references the replaced region snapshot, most likely having been created + // by copying the snapshot's volume construction request before the target + // replacement occurred. These volumes also need to have target replacement + // performed, and this is captured in this "step" record. + async fn create_step_records_for_affected_volumes( + &self, + opctx: &OpContext, + status: &mut RegionSnapshotReplacementStepStatus, + ) { + let log = &opctx.log; + + // Find all region snapshot replacement requests in state "Running" + let requests = match self + .datastore + .get_running_region_snapshot_replacements(opctx) + .await + { + Ok(requests) => requests, + + Err(e) => { + let s = format!( + "get_running_region_snapshot_replacements failed: {e}", + ); + + error!(&log, "{s}"); + status.errors.push(s); + return; + } + }; + + for request in requests { + // Find all volumes that reference the replaced snapshot + let region_snapshot = match self + .datastore + .region_snapshot_get( + request.old_dataset_id, + request.old_region_id, + request.old_snapshot_id, + ) + .await + { + Ok(Some(region_snapshot)) => region_snapshot, + + Ok(None) => { + let s = format!( + "region snapshot {} {} {} not found!", + request.old_dataset_id, + request.old_region_id, + request.old_snapshot_id, + ); + error!(&log, "{s}"); + status.errors.push(s); + + continue; + } + + Err(e) => { + let s = format!( + "error querying for region snapshot {} {} {}: {e}", + request.old_dataset_id, + request.old_region_id, + request.old_snapshot_id, + ); + error!(&log, "{s}"); + status.errors.push(s); + + continue; + } + }; + + let snapshot_addr = match region_snapshot.snapshot_addr.parse() { + Ok(addr) => addr, + + Err(e) => { + let s = format!( + "region snapshot addr {} could not be parsed: {e}", + region_snapshot.snapshot_addr, + ); + error!(&log, "{s}"); + status.errors.push(s); + + continue; + } + }; + + let volumes = match self + .datastore + .find_volumes_referencing_socket_addr(&opctx, snapshot_addr) + .await + { + Ok(volumes) => volumes, + + Err(e) => { + let s = format!("error finding referenced volumes: {e}"); + error!( + log, + "{s}"; + "request id" => ?request.id, + ); + status.errors.push(s); + + continue; + } + }; + + for volume in volumes { + // Any volume referencing the old socket addr needs to be + // replaced. Create a "step" record for this. + // + // Note: this function returns a conflict error if there already + // exists a step record referencing this volume ID because a + // volume repair record is also created using that volume ID, + // and only one of those can exist for a given volume at a time. + // + // Also note: this function returns a conflict error if another + // step record references this volume id in the "old snapshot + // volume id" column - this is ok! Region snapshot replacement + // step records are created for some volume id, and a null old + // snapshot volume id: + // + // volume_id: references snapshot_addr + // old_snapshot_volume_id: null + // + // The region snapshot replacement step saga will create a + // volume to stash the reference to snapshot_addr, and then call + // `volume_replace_snapshot`. This will swap snapshot_addr + // reference into the old snapshot volume for later deletion: + // + // volume_id: does _not_ reference snapshot_addr anymore + // old_snapshot_volume_id: now references snapshot_addr + // + // If `find_volumes_referencing_socket_addr` is executed before + // that volume is deleted, it will return the old snapshot + // volume id above, and then this for loop tries to make a + // region snapshot replacement step record for it! + // + // Allowing a region snapshot replacement step record to be + // created in this case would mean that (depending on when the + // functions execute), an indefinite amount of work would be + // created, continually "moving" the snapshot_addr from + // temporary volume to temporary volume. + + match self + .datastore + .create_region_snapshot_replacement_step( + opctx, + request.id, + volume.id(), + ) + .await + { + Ok(step_request_id) => { + let s = format!("created {step_request_id}"); + info!( + log, + "{s}"; + "request id" => ?request.id, + "volume id" => ?volume.id(), + ); + status.step_records_created_ok.push(s); + } + + Err(e) => { + let s = format!("error creating step request: {e}"); + error!( + log, + "{s}"; + "request id" => ?request.id, + "volume id" => ?volume.id(), + ); + status.errors.push(s); + } + } + } + } + } + + async fn invoke_step_saga_for_affected_volumes( + &self, + opctx: &OpContext, + status: &mut RegionSnapshotReplacementStepStatus, + ) { + let log = &opctx.log; + + // Once all region snapshot replacement step records have been created, + // trigger sagas as appropriate. + + let step_requests = match self + .datastore + .get_requested_region_snapshot_replacement_steps(opctx) + .await + { + Ok(step_requests) => step_requests, + + Err(e) => { + let s = format!( + "query for requested region snapshot replacement step \ + requests failed: {e}" + ); + error!(&log, "{s}"); + status.errors.push(s); + + return; + } + }; + + for request in step_requests { + let request_id = request.id; + + match self.send_start_request(opctx, request.clone()).await { + Ok(()) => { + let s = format!( + "region snapshot replacement step saga invoked ok for \ + {request_id}" + ); + + info!( + &log, + "{s}"; + "request.request_id" => %request.request_id, + "request.volume_id" => %request.volume_id, + ); + status.step_invoked_ok.push(s); + } + + Err(e) => { + let s = format!( + "invoking region snapshot replacement step saga for \ + {request_id} failed: {e}" + ); + + error!( + &log, + "{s}"; + "request.request_id" => %request.request_id, + "request.volume_id" => %request.volume_id, + ); + status.errors.push(s); + } + }; + } + } +} + +impl BackgroundTask for RegionSnapshotReplacementFindAffected { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + async move { + let log = &opctx.log; + info!( + &log, + "region snapshot replacement find affected volumes task started" + ); + + let mut status = RegionSnapshotReplacementStepStatus::default(); + + // Importantly, clean old steps up before finding affected volumes! + // Otherwise, will continue to find the snapshot in volumes to + // delete, and will continue to see conflicts in next function. + self.clean_up_region_snapshot_replacement_step_volumes( + opctx, + &mut status, + ) + .await; + + self.create_step_records_for_affected_volumes(opctx, &mut status) + .await; + + self.invoke_step_saga_for_affected_volumes(opctx, &mut status) + .await; + + info!( + &log, + "region snapshot replacement find affected volumes task done" + ); + + json!(status) + } + .boxed() + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::app::background::init::test::NoopStartSaga; + use nexus_db_model::RegionSnapshot; + use nexus_db_model::RegionSnapshotReplacement; + use nexus_db_model::RegionSnapshotReplacementStep; + use nexus_db_model::RegionSnapshotReplacementStepState; + use nexus_db_model::Volume; + use nexus_test_utils_macros::nexus_test; + use sled_agent_client::types::CrucibleOpts; + use sled_agent_client::types::VolumeConstructionRequest; + use uuid::Uuid; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + async fn add_fake_volume_for_snapshot_addr( + datastore: &DataStore, + snapshot_addr: String, + ) -> Uuid { + let new_volume_id = Uuid::new_v4(); + + let volume_construction_request = VolumeConstructionRequest::Volume { + id: new_volume_id, + block_size: 0, + sub_volumes: vec![], + read_only_parent: Some(Box::new( + VolumeConstructionRequest::Region { + block_size: 0, + blocks_per_extent: 0, + extent_count: 0, + gen: 0, + opts: CrucibleOpts { + id: Uuid::new_v4(), + target: vec![snapshot_addr], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + }, + }, + )), + }; + + let volume_data = + serde_json::to_string(&volume_construction_request).unwrap(); + + let volume = Volume::new(new_volume_id, volume_data); + + datastore.volume_create(volume).await.unwrap(); + + new_volume_id + } + + #[nexus_test(server = crate::Server)] + async fn test_region_snapshot_replacement_step_task( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + let starter = Arc::new(NoopStartSaga::new()); + let mut task = RegionSnapshotReplacementFindAffected::new( + datastore.clone(), + starter.clone(), + ); + + // Noop test + let result: RegionSnapshotReplacementStepStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + assert_eq!(result, RegionSnapshotReplacementStepStatus::default()); + assert_eq!(starter.count_reset(), 0); + + // Add a region snapshot replacement request for a fake region snapshot. + + let dataset_id = Uuid::new_v4(); + let region_id = Uuid::new_v4(); + let snapshot_id = Uuid::new_v4(); + let snapshot_addr = String::from("[fd00:1122:3344::101]:9876"); + + let fake_region_snapshot = RegionSnapshot::new( + dataset_id, + region_id, + snapshot_id, + snapshot_addr.clone(), + ); + + datastore.region_snapshot_create(fake_region_snapshot).await.unwrap(); + + let request = + RegionSnapshotReplacement::new(dataset_id, region_id, snapshot_id); + + let request_id = request.id; + + datastore + .insert_region_snapshot_replacement_request_with_volume_id( + &opctx, + request, + Uuid::new_v4(), + ) + .await + .unwrap(); + + // Transition that to Allocating -> ReplacementDone -> DeletingOldVolume + // -> Running + + let operating_saga_id = Uuid::new_v4(); + + datastore + .set_region_snapshot_replacement_allocating( + &opctx, + request_id, + operating_saga_id, + ) + .await + .unwrap(); + + let new_region_id = Uuid::new_v4(); + let old_snapshot_volume_id = Uuid::new_v4(); + + datastore + .set_region_snapshot_replacement_replacement_done( + &opctx, + request_id, + operating_saga_id, + new_region_id, + old_snapshot_volume_id, + ) + .await + .unwrap(); + + datastore + .set_region_snapshot_replacement_deleting_old_volume( + &opctx, + request_id, + operating_saga_id, + ) + .await + .unwrap(); + + datastore + .set_region_snapshot_replacement_running( + &opctx, + request_id, + operating_saga_id, + ) + .await + .unwrap(); + + // Add some fake volumes that reference the region snapshot being + // replaced + + let new_volume_1_id = add_fake_volume_for_snapshot_addr( + &datastore, + snapshot_addr.clone(), + ) + .await; + let new_volume_2_id = add_fake_volume_for_snapshot_addr( + &datastore, + snapshot_addr.clone(), + ) + .await; + + // Add some fake volumes that do not + + let other_volume_1_id = add_fake_volume_for_snapshot_addr( + &datastore, + String::from("[fd00:1122:3344::101]:1000"), + ) + .await; + + let other_volume_2_id = add_fake_volume_for_snapshot_addr( + &datastore, + String::from("[fd12:5544:3344::912]:3901"), + ) + .await; + + // Activate the task - it should pick the running request up and try to + // run the region snapshot replacement step saga for the volumes + + let result: RegionSnapshotReplacementStepStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + + let requested_region_snapshot_replacement_steps = datastore + .get_requested_region_snapshot_replacement_steps(&opctx) + .await + .unwrap(); + + assert_eq!(requested_region_snapshot_replacement_steps.len(), 2); + + for step in &requested_region_snapshot_replacement_steps { + let s: String = format!("created {}", step.id); + assert!(result.step_records_created_ok.contains(&s)); + + let s: String = format!( + "region snapshot replacement step saga invoked ok for {}", + step.id + ); + assert!(result.step_invoked_ok.contains(&s)); + + if step.volume_id == new_volume_1_id + || step.volume_id == new_volume_2_id + { + // ok! + } else if step.volume_id == other_volume_1_id + || step.volume_id == other_volume_2_id + { + // error! + assert!(false); + } else { + // error! + assert!(false); + } + } + + // No garbage collection would be invoked yet, as the step records are + // not in state Complete + assert!(result.step_garbage_collect_invoked_ok.is_empty()); + + assert_eq!(result.errors.len(), 0); + + assert_eq!(starter.count_reset(), 2); + } + + #[nexus_test(server = crate::Server)] + async fn test_region_snapshot_replacement_step_task_gc( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + let starter = Arc::new(NoopStartSaga::new()); + let mut task = RegionSnapshotReplacementFindAffected::new( + datastore.clone(), + starter.clone(), + ); + + // Noop test + let result: RegionSnapshotReplacementStepStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + assert_eq!(result, RegionSnapshotReplacementStepStatus::default()); + assert_eq!(starter.count_reset(), 0); + + // Now, add some Complete records and make sure the garbage collection + // saga is invoked. + + datastore + .insert_region_snapshot_replacement_step(&opctx, { + let mut record = RegionSnapshotReplacementStep::new( + Uuid::new_v4(), + Uuid::new_v4(), + ); + + record.replacement_state = + RegionSnapshotReplacementStepState::Complete; + record.old_snapshot_volume_id = Some(Uuid::new_v4()); + + record + }) + .await + .unwrap(); + + datastore + .insert_region_snapshot_replacement_step(&opctx, { + let mut record = RegionSnapshotReplacementStep::new( + Uuid::new_v4(), + Uuid::new_v4(), + ); + + record.replacement_state = + RegionSnapshotReplacementStepState::Complete; + record.old_snapshot_volume_id = Some(Uuid::new_v4()); + + record + }) + .await + .unwrap(); + + // Activate the task - it should pick the complete steps up and try to + // run the region snapshot replacement step garbage collect saga + + let result: RegionSnapshotReplacementStepStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + + let region_snapshot_replacement_steps_requiring_gc = datastore + .region_snapshot_replacement_steps_requiring_garbage_collection( + &opctx, + ) + .await + .unwrap(); + + assert_eq!(region_snapshot_replacement_steps_requiring_gc.len(), 2); + + eprintln!("{:?}", result); + + for step in ®ion_snapshot_replacement_steps_requiring_gc { + let s: String = format!( + "region snapshot replacement step garbage collect request ok \ + for {}", + step.id + ); + assert!(result.step_garbage_collect_invoked_ok.contains(&s)); + } + + assert!(result.step_records_created_ok.is_empty()); + + assert!(result.step_invoked_ok.is_empty()); + + assert_eq!(result.errors.len(), 0); + + assert_eq!(starter.count_reset(), 2); + } +} diff --git a/nexus/src/app/sagas/mod.rs b/nexus/src/app/sagas/mod.rs index 926b983460..bd3ae62996 100644 --- a/nexus/src/app/sagas/mod.rs +++ b/nexus/src/app/sagas/mod.rs @@ -41,6 +41,8 @@ pub mod region_replacement_finish; pub mod region_replacement_start; pub mod region_snapshot_replacement_garbage_collect; pub mod region_snapshot_replacement_start; +pub mod region_snapshot_replacement_step; +pub mod region_snapshot_replacement_step_garbage_collect; pub mod snapshot_create; pub mod snapshot_delete; pub mod test_saga; @@ -198,6 +200,12 @@ fn make_action_registry() -> ActionRegistry { ::register_actions( &mut registry, ); + ::register_actions( + &mut registry, + ); + ::register_actions( + &mut registry, + ); #[cfg(test)] ::register_actions(&mut registry); diff --git a/nexus/src/app/sagas/region_snapshot_replacement_step.rs b/nexus/src/app/sagas/region_snapshot_replacement_step.rs new file mode 100644 index 0000000000..600bb155bf --- /dev/null +++ b/nexus/src/app/sagas/region_snapshot_replacement_step.rs @@ -0,0 +1,603 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Region snapshot replacement is distinct from region replacement: replacing +//! parts of a volume's read-only parent (and all the layers under it) is easier +//! because this does _not_ incur a live repair or reconciliation. Each part of +//! a read-only region set contains the same data that will never be modified. +//! +//! A region snapshot replacement request starts off in the "Requested" state, +//! just like a region replacement request. A background task will search for +//! region snapshot replacement requests in this state and trigger the "region +//! snapshot replacement start" saga. This will allocate a new region to replace +//! the requested one, and modify the snapshot VCR accordingly. If any disks are +//! then created using that snapshot as a source, they will have the replacement +//! and will not need a replace request. +//! +//! However, any past use of that snapshot as a source means that the Volume +//! created from that will have a copy of the unmodified snapshot Volume as a +//! read-only parent. Any construction of the Volume will be referencing the +//! replaced region snapshot (which could be gone if it is expunged). It is this +//! saga's responsibility to update all Volumes that reference the region +//! snapshot being replaced, and send a replacement request to any Upstairs that +//! were constructed. +//! +//! Some difficulty comes from the requirement to notify existing Upstairs that +//! reference the replaced read-only part, but even this is not as difficult as +//! region replacement: Nexus does not have to continually monitor and drive +//! either live repair or reconciliation, just ensure that the read-only +//! replacement occurs. Read-only replacements should be basically +//! instantaneous. +//! +//! A replace request only needs to be done once per Upstairs that has the old +//! reference. This is done as a "region snapshot replacement step", and once +//! all those are done, the region snapshot replacement request can be +//! "completed". +//! +//! Region snapshot replacement steps need to be written into the database and +//! have an associated state and operating saga id for the same reason that +//! region snapshot replacement requests do: multiple background tasks will +//! invoke multiple sagas, and there needs to be some exclusive access. +//! +//! See the documentation for the "region snapshot replacement step garbage +//! collect" saga for the next step in the process. + +use super::{ + ActionRegistry, NexusActionContext, NexusSaga, SagaInitError, + ACTION_GENERATE_ID, +}; +use crate::app::db::datastore::ExistingTarget; +use crate::app::db::datastore::ReplacementTarget; +use crate::app::db::datastore::VolumeToDelete; +use crate::app::db::datastore::VolumeWithTarget; +use crate::app::db::lookup::LookupPath; +use crate::app::sagas::declare_saga_actions; +use crate::app::{authn, authz, db}; +use nexus_db_model::VmmState; +use nexus_types::identity::Resource; +use omicron_common::api::external::Error; +use propolis_client::types::ReplaceResult; +use serde::Deserialize; +use serde::Serialize; +use sled_agent_client::types::CrucibleOpts; +use sled_agent_client::types::VolumeConstructionRequest; +use std::net::SocketAddrV6; +use steno::ActionError; +use steno::Node; +use uuid::Uuid; + +// region snapshot replacement step saga: input parameters + +#[derive(Debug, Deserialize, Serialize)] +pub(crate) struct Params { + pub serialized_authn: authn::saga::Serialized, + pub request: db::model::RegionSnapshotReplacementStep, +} + +// region snapshot replacement step saga: actions + +declare_saga_actions! { + region_snapshot_replacement_step; + SET_SAGA_ID -> "unused_1" { + + rsrss_set_saga_id + - rsrss_set_saga_id_undo + } + CREATE_REPLACE_PARAMS -> "replace_params" { + + rsrss_create_replace_params + } + CREATE_FAKE_VOLUME -> "unused_2" { + + rssrs_create_fake_volume + - rssrs_create_fake_volume_undo + } + REPLACE_SNAPSHOT_IN_VOLUME -> "unused_3" { + + rsrss_replace_snapshot_in_volume + - rsrss_replace_snapshot_in_volume_undo + } + NOTIFY_UPSTAIRS -> "unused_4" { + + rsrss_notify_upstairs + } + UPDATE_REQUEST_RECORD -> "unused_5" { + + rsrss_update_request_record + } +} + +// region snapshot replacement step saga: definition + +#[derive(Debug)] +pub(crate) struct SagaRegionSnapshotReplacementStep; +impl NexusSaga for SagaRegionSnapshotReplacementStep { + const NAME: &'static str = "region-snapshot-replacement-step"; + type Params = Params; + + fn register_actions(registry: &mut ActionRegistry) { + region_snapshot_replacement_step_register_actions(registry); + } + + fn make_saga_dag( + _params: &Self::Params, + mut builder: steno::DagBuilder, + ) -> Result { + builder.append(Node::action( + "saga_id", + "GenerateSagaId", + ACTION_GENERATE_ID.as_ref(), + )); + + builder.append(Node::action( + "new_volume_id", + "GenerateNewVolumeId", + ACTION_GENERATE_ID.as_ref(), + )); + + builder.append(set_saga_id_action()); + builder.append(create_replace_params_action()); + builder.append(create_fake_volume_action()); + builder.append(replace_snapshot_in_volume_action()); + builder.append(notify_upstairs_action()); + builder.append(update_request_record_action()); + + Ok(builder.build()?) + } +} + +// region snapshot replacement step saga: action implementations + +async fn rsrss_set_saga_id( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + + // Change the request record here to an intermediate "running" state to + // block out other sagas that will be triggered for the same request. + + osagactx + .datastore() + .set_region_snapshot_replacement_step_running( + &opctx, + params.request.id, + saga_id, + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +async fn rsrss_set_saga_id_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + + osagactx + .datastore() + .undo_set_region_snapshot_replacement_step_running( + &opctx, + params.request.id, + saga_id, + ) + .await?; + + Ok(()) +} + +#[derive(Debug, Serialize, Deserialize)] +struct ReplaceParams { + old_snapshot_address: SocketAddrV6, + new_region_address: SocketAddrV6, +} + +async fn rsrss_create_replace_params( + sagactx: NexusActionContext, +) -> Result { + let log = sagactx.user_data().log(); + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + // look up region snapshot replace request by id + + let region_snapshot_replace_request = osagactx + .datastore() + .get_region_snapshot_replacement_request_by_id( + &opctx, + params.request.request_id, + ) + .await + .map_err(ActionError::action_failed)?; + + let region_snapshot = osagactx + .datastore() + .region_snapshot_get( + region_snapshot_replace_request.old_dataset_id, + region_snapshot_replace_request.old_region_id, + region_snapshot_replace_request.old_snapshot_id, + ) + .await + .map_err(ActionError::action_failed)?; + + let Some(region_snapshot) = region_snapshot else { + return Err(ActionError::action_failed(format!( + "region snapshot {} {} {} deleted!", + region_snapshot_replace_request.old_dataset_id, + region_snapshot_replace_request.old_region_id, + region_snapshot_replace_request.old_snapshot_id, + ))); + }; + + let old_snapshot_address: SocketAddrV6 = + match region_snapshot.snapshot_addr.parse() { + Ok(addr) => addr, + + Err(e) => { + return Err(ActionError::action_failed(format!( + "parsing {} as SocketAddrV6 failed: {e}", + region_snapshot.snapshot_addr, + ))); + } + }; + + let Some(new_region_id) = region_snapshot_replace_request.new_region_id + else { + return Err(ActionError::action_failed(format!( + "request {} does not have a new_region_id!", + region_snapshot_replace_request.id, + ))); + }; + + let new_region_address = osagactx + .nexus() + .region_addr(&log, new_region_id) + .await + .map_err(ActionError::action_failed)?; + + Ok(ReplaceParams { old_snapshot_address, new_region_address }) +} + +async fn rssrs_create_fake_volume( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + + let new_volume_id = sagactx.lookup::("new_volume_id")?; + + // Create a fake volume record for the old snapshot target. This will be + // deleted after region snapshot replacement step saga has finished, and the + // region replacement snapshot gc step has run. It can be completely blank + // here, it will be replaced by `volume_replace_snapshot`. + + let volume_construction_request = VolumeConstructionRequest::Volume { + id: new_volume_id, + block_size: 0, + sub_volumes: vec![VolumeConstructionRequest::Region { + block_size: 0, + blocks_per_extent: 0, + extent_count: 0, + gen: 0, + opts: CrucibleOpts { + id: new_volume_id, + target: vec![], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + }, + }], + read_only_parent: None, + }; + + let volume_data = serde_json::to_string(&volume_construction_request) + .map_err(|e| { + ActionError::action_failed(Error::internal_error(&e.to_string())) + })?; + + let volume = db::model::Volume::new(new_volume_id, volume_data); + + osagactx + .datastore() + .volume_create(volume) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +async fn rssrs_create_fake_volume_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + + // Delete the fake volume. + + let new_volume_id = sagactx.lookup::("new_volume_id")?; + osagactx.datastore().volume_hard_delete(new_volume_id).await?; + + Ok(()) +} + +async fn rsrss_replace_snapshot_in_volume( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let replace_params = sagactx.lookup::("replace_params")?; + + let new_volume_id = sagactx.lookup::("new_volume_id")?; + + // `volume_replace_snapshot` will swap the old snapshot for the new region. + // No repair or reconcilation needs to occur after this. + osagactx + .datastore() + .volume_replace_snapshot( + VolumeWithTarget(params.request.volume_id), + ExistingTarget(replace_params.old_snapshot_address), + ReplacementTarget(replace_params.new_region_address), + VolumeToDelete(new_volume_id), + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +async fn rsrss_replace_snapshot_in_volume_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let replace_params = sagactx.lookup::("replace_params")?; + + let new_volume_id = sagactx.lookup::("new_volume_id")?; + + osagactx + .datastore() + .volume_replace_snapshot( + VolumeWithTarget(params.request.volume_id), + ExistingTarget(replace_params.new_region_address), + ReplacementTarget(replace_params.old_snapshot_address), + VolumeToDelete(new_volume_id), + ) + .await?; + + Ok(()) +} + +async fn rsrss_notify_upstairs( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let log = sagactx.user_data().log(); + + // Make an effort to notify a Propolis if one was booted for this volume. + // This is best effort: if there is a failure, this saga will unwind and be + // triggered again for the same request. If there is no Propolis booted for + // this volume, then there's nothing to be done: any future Propolis will + // receive the updated Volume. + // + // Unlike for region replacement, there's no step required here if there + // isn't an active Propolis: any Upstairs created after the snapshot_addr + // is replaced will reference the cloned data. + + let Some(disk) = osagactx + .datastore() + .disk_for_volume_id(params.request.volume_id) + .await + .map_err(ActionError::action_failed)? + else { + return Ok(()); + }; + + let Some(instance_id) = disk.runtime().attach_instance_id else { + return Ok(()); + }; + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let (.., authz_instance) = LookupPath::new(&opctx, &osagactx.datastore()) + .instance_id(instance_id) + .lookup_for(authz::Action::Read) + .await + .map_err(ActionError::action_failed)?; + + let instance_and_vmm = osagactx + .datastore() + .instance_fetch_with_vmm(&opctx, &authz_instance) + .await + .map_err(ActionError::action_failed)?; + + let Some(vmm) = instance_and_vmm.vmm() else { + return Ok(()); + }; + + let state = vmm.runtime.state; + + info!( + log, + "volume associated with disk attached to instance with vmm in \ + state {state}"; + "request id" => %params.request.id, + "volume id" => %params.request.volume_id, + "disk id" => ?disk.id(), + "instance id" => ?instance_id, + "vmm id" => ?vmm.id, + ); + + match &state { + VmmState::Running | VmmState::Rebooting => { + // Propolis server is ok to receive the volume replacement request. + } + + VmmState::Starting + | VmmState::Stopping + | VmmState::Stopped + | VmmState::Migrating + | VmmState::Failed + | VmmState::Destroyed + | VmmState::SagaUnwound => { + // Propolis server is not ok to receive volume replacement requests + // - unwind so that this saga can run again. + return Err(ActionError::action_failed(format!( + "vmm {} propolis not in a state to receive request", + vmm.id, + ))); + } + } + + let new_volume_vcr = match osagactx + .datastore() + .volume_get(params.request.volume_id) + .await + .map_err(ActionError::action_failed)? + { + Some(volume) => volume.data().to_string(), + + None => { + return Err(ActionError::action_failed(Error::internal_error( + "new volume is gone!", + ))); + } + }; + + let instance_lookup = + LookupPath::new(&opctx, &osagactx.datastore()).instance_id(instance_id); + + let (vmm, client) = osagactx + .nexus() + .propolis_client_for_instance( + &opctx, + &instance_lookup, + authz::Action::Modify, + ) + .await + .map_err(ActionError::action_failed)?; + + info!( + log, + "sending replacement request for disk volume to propolis"; + "request id" => %params.request.id, + "volume id" => %params.request.volume_id, + "disk id" => ?disk.id(), + "instance id" => ?instance_id, + "vmm id" => ?vmm.id, + ); + + let result = client + .instance_issue_crucible_vcr_request() + .id(disk.id()) + .body(propolis_client::types::InstanceVcrReplace { + name: disk.name().to_string(), + vcr_json: new_volume_vcr, + }) + .send() + .await + .map_err(|e| match e { + propolis_client::Error::ErrorResponse(rv) => { + ActionError::action_failed(rv.message.clone()) + } + + _ => ActionError::action_failed(format!( + "unexpected failure during \ + `instance_issue_crucible_vcr_request`: {e}", + )), + })?; + + let replace_result = result.into_inner(); + + info!( + log, + "saw replace result {replace_result:?}"; + "request id" => %params.request.id, + "volume id" => %params.request.volume_id, + "disk id" => ?disk.id(), + "instance id" => ?instance_id, + "vmm id" => ?vmm.id, + ); + + match &replace_result { + ReplaceResult::Started => { + // This saga's call just started the replacement + } + + ReplaceResult::StartedAlready => { + // A previous run of this saga (or saga node) started the + // replacement + } + + ReplaceResult::CompletedAlready => { + // It's done! We see this if the same propolis that received the + // original replace request started and finished the replacement. + } + + ReplaceResult::VcrMatches => { + // This propolis booted with the updated VCR + } + + ReplaceResult::Missing => { + // The volume does not contain the region to be replaced. This is an + // error! + return Err(ActionError::action_failed(String::from( + "saw ReplaceResult::Missing", + ))); + } + } + + Ok(()) +} + +async fn rsrss_update_request_record( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let params = sagactx.saga_params::()?; + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + let new_volume_id = sagactx.lookup::("new_volume_id")?; + + // Update the request record to 'Completed' and clear the operating saga id. + // There is no undo step for this, it should succeed idempotently. + datastore + .set_region_snapshot_replacement_step_complete( + &opctx, + params.request.id, + saga_id, + new_volume_id, + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} diff --git a/nexus/src/app/sagas/region_snapshot_replacement_step_garbage_collect.rs b/nexus/src/app/sagas/region_snapshot_replacement_step_garbage_collect.rs new file mode 100644 index 0000000000..93335b6125 --- /dev/null +++ b/nexus/src/app/sagas/region_snapshot_replacement_step_garbage_collect.rs @@ -0,0 +1,233 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Delete the volume that stashes the target replaced during a region snapshot +//! replacement step saga. After that's done, change the region snapshot +//! replacement step's state to "VolumeDeleted". + +use super::{ActionRegistry, NexusActionContext, NexusSaga, SagaInitError}; +use crate::app::sagas::declare_saga_actions; +use crate::app::sagas::volume_delete; +use crate::app::{authn, db}; +use serde::Deserialize; +use serde::Serialize; +use steno::ActionError; +use steno::Node; +use uuid::Uuid; + +// region snapshot replacement step garbage collect saga: input parameters + +#[derive(Debug, Deserialize, Serialize)] +pub(crate) struct Params { + pub serialized_authn: authn::saga::Serialized, + /// The fake volume created for the snapshot that was replaced + // Note: this is only required in the params to build the volume-delete sub + // saga + pub old_snapshot_volume_id: Uuid, + pub request: db::model::RegionSnapshotReplacementStep, +} + +// region snapshot replacement step garbage collect saga: actions + +declare_saga_actions! { + region_snapshot_replacement_step_garbage_collect; + UPDATE_REQUEST_RECORD -> "unused_1" { + + srsgs_update_request_record + } +} + +// region snapshot replacement step garbage collect saga: definition + +#[derive(Debug)] +pub(crate) struct SagaRegionSnapshotReplacementStepGarbageCollect; +impl NexusSaga for SagaRegionSnapshotReplacementStepGarbageCollect { + const NAME: &'static str = + "region-snapshot-replacement-step-garbage-collect"; + type Params = Params; + + fn register_actions(registry: &mut ActionRegistry) { + region_snapshot_replacement_step_garbage_collect_register_actions( + registry, + ); + } + + fn make_saga_dag( + params: &Self::Params, + mut builder: steno::DagBuilder, + ) -> Result { + let subsaga_params = volume_delete::Params { + serialized_authn: params.serialized_authn.clone(), + volume_id: params.old_snapshot_volume_id, + }; + + let subsaga_dag = { + let subsaga_builder = steno::DagBuilder::new(steno::SagaName::new( + volume_delete::SagaVolumeDelete::NAME, + )); + volume_delete::SagaVolumeDelete::make_saga_dag( + &subsaga_params, + subsaga_builder, + )? + }; + + builder.append(Node::constant( + "params_for_volume_delete_subsaga", + serde_json::to_value(&subsaga_params).map_err(|e| { + SagaInitError::SerializeError( + "params_for_volume_delete_subsaga".to_string(), + e, + ) + })?, + )); + + builder.append(Node::subsaga( + "volume_delete_subsaga_no_result", + subsaga_dag, + "params_for_volume_delete_subsaga", + )); + + builder.append(update_request_record_action()); + + Ok(builder.build()?) + } +} + +// region snapshot replacement step garbage collect saga: action implementations + +async fn srsgs_update_request_record( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let params = sagactx.saga_params::()?; + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + // Now that the region snapshot step volume has been deleted, update the + // replacement request record to 'VolumeDeleted'. There is no undo step for + // this, it should succeed idempotently. + + datastore + .set_region_snapshot_replacement_step_volume_deleted( + &opctx, + params.request.id, + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +#[cfg(test)] +pub(crate) mod test { + use crate::app::sagas::region_snapshot_replacement_step_garbage_collect::*; + use nexus_db_model::RegionSnapshotReplacementStep; + use nexus_db_model::RegionSnapshotReplacementStepState; + use nexus_db_model::Volume; + use nexus_db_queries::authn::saga::Serialized; + use nexus_db_queries::context::OpContext; + use nexus_test_utils_macros::nexus_test; + use sled_agent_client::types::CrucibleOpts; + use sled_agent_client::types::VolumeConstructionRequest; + use uuid::Uuid; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + #[nexus_test(server = crate::Server)] + async fn test_region_snapshot_replacement_step_garbage_collect_saga( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + // Manually insert required records + let old_snapshot_volume_id = Uuid::new_v4(); + + let volume_construction_request = VolumeConstructionRequest::Volume { + id: old_snapshot_volume_id, + block_size: 0, + sub_volumes: vec![VolumeConstructionRequest::Region { + block_size: 0, + blocks_per_extent: 0, + extent_count: 0, + gen: 0, + opts: CrucibleOpts { + id: old_snapshot_volume_id, + target: vec![ + // XXX if you put something here, you'll need a + // synthetic dataset record + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: false, + }, + }], + read_only_parent: None, + }; + + let volume_data = + serde_json::to_string(&volume_construction_request).unwrap(); + + datastore + .volume_create(Volume::new(old_snapshot_volume_id, volume_data)) + .await + .unwrap(); + + let mut request = + RegionSnapshotReplacementStep::new(Uuid::new_v4(), Uuid::new_v4()); + request.replacement_state = + RegionSnapshotReplacementStepState::Complete; + request.old_snapshot_volume_id = Some(old_snapshot_volume_id); + + datastore + .insert_region_snapshot_replacement_step(&opctx, request.clone()) + .await + .unwrap(); + + // Run the saga + let params = Params { + serialized_authn: Serialized::for_opctx(&opctx), + old_snapshot_volume_id, + request: request.clone(), + }; + + let _output = nexus + .sagas + .saga_execute::( + params, + ) + .await + .unwrap(); + + // Validate the state transition + let result = datastore + .get_region_snapshot_replacement_step_by_id(&opctx, request.id) + .await + .unwrap(); + + assert_eq!( + result.replacement_state, + RegionSnapshotReplacementStepState::VolumeDeleted + ); + + // Validate the Volume was deleted + assert!(datastore + .volume_get(old_snapshot_volume_id) + .await + .unwrap() + .is_none()); + } +} diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index d9cbb5eb34..6859e992ca 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -139,6 +139,7 @@ instance_updater.disable = true instance_updater.period_secs = 60 region_snapshot_replacement_start.period_secs = 30 region_snapshot_replacement_garbage_collection.period_secs = 30 +region_snapshot_replacement_step.period_secs = 30 [default_region_allocation_strategy] # we only have one sled in the test environment, so we need to use the diff --git a/nexus/types/src/internal_api/background.rs b/nexus/types/src/internal_api/background.rs index 8e4b6b3013..e5fd35d1e3 100644 --- a/nexus/types/src/internal_api/background.rs +++ b/nexus/types/src/internal_api/background.rs @@ -36,3 +36,13 @@ pub struct RegionSnapshotReplacementGarbageCollectStatus { pub garbage_collect_requested: Vec, pub errors: Vec, } + +/// The status of a `region_snapshot_replacement_step` background task +/// activation +#[derive(Serialize, Deserialize, Default, Debug, PartialEq, Eq)] +pub struct RegionSnapshotReplacementStepStatus { + pub step_records_created_ok: Vec, + pub step_garbage_collect_invoked_ok: Vec, + pub step_invoked_ok: Vec, + pub errors: Vec, +} diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml index 2e3a8fe578..f0f40d282e 100644 --- a/smf/nexus/multi-sled/config-partial.toml +++ b/smf/nexus/multi-sled/config-partial.toml @@ -67,6 +67,7 @@ lookup_region_port.period_secs = 60 instance_updater.period_secs = 30 region_snapshot_replacement_start.period_secs = 30 region_snapshot_replacement_garbage_collection.period_secs = 30 +region_snapshot_replacement_step.period_secs = 30 [default_region_allocation_strategy] # by default, allocate across 3 distinct sleds diff --git a/smf/nexus/single-sled/config-partial.toml b/smf/nexus/single-sled/config-partial.toml index dbd61e953d..23340b3c36 100644 --- a/smf/nexus/single-sled/config-partial.toml +++ b/smf/nexus/single-sled/config-partial.toml @@ -67,6 +67,7 @@ lookup_region_port.period_secs = 60 instance_updater.period_secs = 30 region_snapshot_replacement_start.period_secs = 30 region_snapshot_replacement_garbage_collection.period_secs = 30 +region_snapshot_replacement_step.period_secs = 30 [default_region_allocation_strategy] # by default, allocate without requirement for distinct sleds. From a032d2a74597fb5331949222a5e2df601350e2eb Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 26 Aug 2024 15:13:16 -0700 Subject: [PATCH 14/22] Nullable lldp_link_config_id column (#6436) https://github.com/oxidecomputer/omicron/pull/6185 introduced a schema change which added a non-nullable column `lldp_link_config_id` to the `switch_port_settings_link_config` table. However, on systems where `switch_port_settings_link_config` had rows, there was no "default" value, and the schema update appears to fail. This is currently the case on our dogfood system. To mitigate: this PR makes the `lldp_link_config_id` column nullable, by updating the existing schema change (for cases where it could not complete previously) and by adding a new schema change (for cases where the `switch_port_settings_link_config` table was empty, and the schema change **did** previously complete). Fixes #6433 --- common/src/api/external/mod.rs | 2 +- nexus/db-model/src/schema.rs | 2 +- nexus/db-model/src/schema_versions.rs | 3 ++- nexus/db-model/src/switch_port.rs | 4 ++-- .../src/db/datastore/switch_port.rs | 4 ++-- openapi/nexus.json | 2 +- schema/crdb/collapse_lldp_settings/up2.sql | 2 +- schema/crdb/dbinit.sql | 4 ++-- schema/crdb/lldp-link-config-nullable/up1.sql | 20 +++++++++++++++++++ 9 files changed, 32 insertions(+), 11 deletions(-) create mode 100644 schema/crdb/lldp-link-config-nullable/up1.sql diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 07e4fd0b83..58cace3032 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -2371,7 +2371,7 @@ pub struct SwitchPortLinkConfig { /// The link-layer discovery protocol service configuration id for this /// link. - pub lldp_link_config_id: Uuid, + pub lldp_link_config_id: Option, /// The name of this link. pub link_name: String, diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index f630bbbeac..f01f33c39d 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -144,7 +144,7 @@ table! { fec -> crate::SwitchLinkFecEnum, speed -> crate::SwitchLinkSpeedEnum, autoneg -> Bool, - lldp_link_config_id -> Uuid, + lldp_link_config_id -> Nullable, } } diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index aef95e6d53..eaed2990c5 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(91, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(92, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(92, "lldp-link-config-nullable"), KnownVersion::new(91, "add-management-gateway-producer-kind"), KnownVersion::new(90, "lookup-bgp-config-by-asn"), KnownVersion::new(89, "collapse_lldp_settings"), diff --git a/nexus/db-model/src/switch_port.rs b/nexus/db-model/src/switch_port.rs index 09f1327be2..bbcbb0748a 100644 --- a/nexus/db-model/src/switch_port.rs +++ b/nexus/db-model/src/switch_port.rs @@ -381,7 +381,7 @@ impl Into for SwitchPortConfig { #[diesel(table_name = switch_port_settings_link_config)] pub struct SwitchPortLinkConfig { pub port_settings_id: Uuid, - pub lldp_link_config_id: Uuid, + pub lldp_link_config_id: Option, pub link_name: String, pub mtu: SqlU16, pub fec: SwitchLinkFec, @@ -401,7 +401,7 @@ impl SwitchPortLinkConfig { ) -> Self { Self { port_settings_id, - lldp_link_config_id, + lldp_link_config_id: Some(lldp_link_config_id), link_name, fec, speed, diff --git a/nexus/db-queries/src/db/datastore/switch_port.rs b/nexus/db-queries/src/db/datastore/switch_port.rs index 2e09c1ac13..59748aa4db 100644 --- a/nexus/db-queries/src/db/datastore/switch_port.rs +++ b/nexus/db-queries/src/db/datastore/switch_port.rs @@ -455,7 +455,7 @@ impl DataStore { let lldp_link_ids: Vec = result .links .iter() - .map(|link| link.lldp_link_config_id) + .filter_map(|link| link.lldp_link_config_id) .collect(); use db::schema::lldp_link_config; @@ -1511,7 +1511,7 @@ async fn do_switch_port_settings_delete( // delete lldp configs use db::schema::lldp_link_config; let lldp_link_ids: Vec = - links.iter().map(|link| link.lldp_link_config_id).collect(); + links.iter().filter_map(|link| link.lldp_link_config_id).collect(); diesel::delete(lldp_link_config::dsl::lldp_link_config) .filter(lldp_link_config::id.eq_any(lldp_link_ids)) .execute_async(conn) diff --git a/openapi/nexus.json b/openapi/nexus.json index f6d140ed05..47f1f0822b 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -19335,6 +19335,7 @@ "type": "string" }, "lldp_link_config_id": { + "nullable": true, "description": "The link-layer discovery protocol service configuration id for this link.", "type": "string", "format": "uuid" @@ -19363,7 +19364,6 @@ "autoneg", "fec", "link_name", - "lldp_link_config_id", "mtu", "port_settings_id", "speed" diff --git a/schema/crdb/collapse_lldp_settings/up2.sql b/schema/crdb/collapse_lldp_settings/up2.sql index b2d884d068..8ead8a29b4 100644 --- a/schema/crdb/collapse_lldp_settings/up2.sql +++ b/schema/crdb/collapse_lldp_settings/up2.sql @@ -1,4 +1,4 @@ /* * Add a pointer to this link's LLDP config settings. */ -ALTER TABLE omicron.public.switch_port_settings_link_config ADD COLUMN IF NOT EXISTS lldp_link_config_id UUID NOT NULL; +ALTER TABLE omicron.public.switch_port_settings_link_config ADD COLUMN IF NOT EXISTS lldp_link_config_id UUID; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 1457532c49..d531672832 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -2657,7 +2657,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.switch_port_settings_link_config ( fec omicron.public.switch_link_fec, speed omicron.public.switch_link_speed, autoneg BOOL NOT NULL DEFAULT false, - lldp_link_config_id UUID NOT NULL, + lldp_link_config_id UUID, PRIMARY KEY (port_settings_id, link_name) ); @@ -4214,7 +4214,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '91.0.0', NULL) + (TRUE, NOW(), NOW(), '92.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/lldp-link-config-nullable/up1.sql b/schema/crdb/lldp-link-config-nullable/up1.sql new file mode 100644 index 0000000000..c8e1122f68 --- /dev/null +++ b/schema/crdb/lldp-link-config-nullable/up1.sql @@ -0,0 +1,20 @@ +-- Refer to https://github.com/oxidecomputer/omicron/issues/6433 for the justificaiton +-- behind this schema change. +-- +-- In short: the "collapse_lldp_settings" schema change was edited after +-- merging. That change included a schema change which added a non-null column +-- to an existing table. Such a data-modifying statement is only valid for +-- tables with no rows - however, in our test systems, we observed rows, which +-- prevented this schema change from progressing. +-- +-- To resolve: +-- 1. Within the old "collapse_lldp_settings" change, we retroactively dropped the +-- non-null constraint. For systems with populated +-- "switch_port_settings_link_config" tables, this allows the schema update to +-- complete without an error. +-- 2. Within this new "lldp-link-config-nullable" change, we ALSO dropped the +-- non-null constraint. For systems without populated +-- "switch_port_settings_link_config" tables -- which may have been able to +-- apply the "collapse_lldp_settings" change successfully -- this converges the state +-- of the database to the same outcome, where the columns is nullable. +ALTER TABLE omicron.public.switch_port_settings_link_config ALTER COLUMN lldp_link_config_id DROP NOT NULL; From 6e0bf12278ccaca58d74edb57c11a5dadc1f2f3d Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Mon, 26 Aug 2024 16:19:24 -0700 Subject: [PATCH 15/22] [gateway] Add Oximeter HTTP service metrics (#6432) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that #6354 has added an Oximeter producer endpoint to MGS for publishing SP sensor metrics, it seemed like a nice idea to also instrument the MGS HTTP server, similar to the existing instrumentation for other control plane services. I don't think we'll be doing a lot of tuning of MGS performance, but the metrics seem like they could still be useful because they also include the distribution of HTTP status codes, and in many cases, the latency measurements also serve as a proxy for how long it takes the *SP* to perform a certain operation, which could be a valuable signal. This commit adds an `oximeter_instruments::http::LatencyTracker` to the MGS HTTP servers. To test that it works, I started a local Clickhouse and a standalone Oximeter, and ran MGS and the SP simulator using `cargo xtask mgs-dev run`. Then, I made a few HTTP requests to various MGS APIs using `curl`; most of which were expected to succeed, and a few for SP slots that the simulator wasn't configured to simulate a SP in (to ensure that the request would fail). We can see the new metrics in OxQL: ``` 0x〉\d hardware_component:current hardware_component:fan_speed hardware_component:sensor_error_count hardware_component:temperature hardware_component:voltage http_service:request_latency_histogram oximeter_collector:collections 0x〉get http_service:request_latency_histogram | last 1 http_service:request_latency_histogram id: 1ac73746-2d3b-46d8-ac7c-44512c5f2263 name: management-gateway-service operation_id: sp_get status_code: 200 [2024-08-24 18:54:47.978590056, 2024-08-24 18:58:18.125731231]: [-179769313486231570000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000: 0, 0.000001: 0, 0.000002: 0, 0.000003: 0, 0.000004: 0, 0.000005: 0, 0.0000059999999999999985: 0, 0.000007: 0, 0.000008: 0, 0.000009: 0, 0.00001: 0, 0.00002: 0, 0.000030000000000000004: 0, 0.00004: 0, 0.00005: 0, 0.00006: 0, 0.00007000000000000001: 0, 0.00008: 0, 0.00009: 0, 0.0001: 0, 0.0002: 0, 0.0003: 0, 0.0004: 0, 0.0005: 1, 0.0006000000000000001: 1, 0.0007: 0, 0.0007999999999999999: 0, 0.0009: 0, 0.001: 0, 0.002: 0, 0.003: 0, 0.004: 0, 0.005: 0, 0.006: 0, 0.007: 0, 0.008: 0, 0.009000000000000001: 0, 0.01: 0, 0.020000000000000004: 0, 0.03000000000000001: 0, 0.04000000000000001: 0, 0.05000000000000001: 0, 0.06000000000000001: 0, 0.07: 0, 0.08: 0, 0.09000000000000001: 0, 0.1: 0, 0.2: 0, 0.30000000000000004: 0, 0.4: 0, 0.5: 0, 0.6: 0, 0.7000000000000001: 0, 0.8: 0, 0.9: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0, 20: 0, 30: 0, 40: 0, 50: 0, 60: 0, 70: 0, 80: 0, 90: 0, 100: 0, 200: 0, 300: 0, 400: 0, 500: 0, 600: 0, 700: 0, 800: 0, 900: 0, 1000: 0, min: 0.000556233, max: 0.000603704, mean: 0.0005799685000000001, std_dev: 0.00002373549999999997, p50: 0, p90: 0.000603704, p99: 0.000603704] id: 1ac73746-2d3b-46d8-ac7c-44512c5f2263 name: management-gateway-service operation_id: ignition_list status_code: 200 [2024-08-24 18:54:47.978590056, 2024-08-24 18:58:18.125290346]: [-179769313486231570000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000: 0, 0.000001: 0, 0.000002: 0, 0.000003: 0, 0.000004: 0, 0.000005: 0, 0.0000059999999999999985: 0, 0.000007: 0, 0.000008: 0, 0.000009: 0, 0.00001: 0, 0.00002: 0, 0.000030000000000000004: 0, 0.00004: 0, 0.00005: 0, 0.00006: 0, 0.00007000000000000001: 0, 0.00008: 0, 0.00009: 0, 0.0001: 0, 0.0002: 0, 0.0003: 0, 0.0004: 1, 0.0005: 0, 0.0006000000000000001: 0, 0.0007: 0, 0.0007999999999999999: 0, 0.0009: 0, 0.001: 0, 0.002: 0, 0.003: 0, 0.004: 0, 0.005: 0, 0.006: 0, 0.007: 0, 0.008: 0, 0.009000000000000001: 0, 0.01: 0, 0.020000000000000004: 0, 0.03000000000000001: 0, 0.04000000000000001: 0, 0.05000000000000001: 0, 0.06000000000000001: 0, 0.07: 0, 0.08: 0, 0.09000000000000001: 0, 0.1: 0, 0.2: 0, 0.30000000000000004: 0, 0.4: 0, 0.5: 0, 0.6: 0, 0.7000000000000001: 0, 0.8: 0, 0.9: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0, 20: 0, 30: 0, 40: 0, 50: 0, 60: 0, 70: 0, 80: 0, 90: 0, 100: 0, 200: 0, 300: 0, 400: 0, 500: 0, 600: 0, 700: 0, 800: 0, 900: 0, 1000: 0, min: 0.000427249, max: 0.000427249, mean: 0.000427249, std_dev: 0, p50: 0, p90: 0.000427249, p99: 0.000427249] id: 1ac73746-2d3b-46d8-ac7c-44512c5f2263 name: management-gateway-service operation_id: sp_get status_code: 400 [2024-08-24 18:54:47.978590056, 2024-08-24 18:58:18.126114126]: [-179769313486231570000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000: 0, 0.000001: 0, 0.000002: 0, 0.000003: 0, 0.000004: 0, 0.000005: 0, 0.0000059999999999999985: 0, 0.000007: 0, 0.000008: 0, 0.000009: 0, 0.00001: 0, 0.00002: 2, 0.000030000000000000004: 0, 0.00004: 0, 0.00005: 0, 0.00006: 0, 0.00007000000000000001: 0, 0.00008: 0, 0.00009: 0, 0.0001: 0, 0.0002: 0, 0.0003: 0, 0.0004: 0, 0.0005: 0, 0.0006000000000000001: 0, 0.0007: 0, 0.0007999999999999999: 0, 0.0009: 0, 0.001: 0, 0.002: 0, 0.003: 0, 0.004: 0, 0.005: 0, 0.006: 0, 0.007: 0, 0.008: 0, 0.009000000000000001: 0, 0.01: 0, 0.020000000000000004: 0, 0.03000000000000001: 0, 0.04000000000000001: 0, 0.05000000000000001: 0, 0.06000000000000001: 0, 0.07: 0, 0.08: 0, 0.09000000000000001: 0, 0.1: 0, 0.2: 0, 0.30000000000000004: 0, 0.4: 0, 0.5: 0, 0.6: 0, 0.7000000000000001: 0, 0.8: 0, 0.9: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0, 20: 0, 30: 0, 40: 0, 50: 0, 60: 0, 70: 0, 80: 0, 90: 0, 100: 0, 200: 0, 300: 0, 400: 0, 500: 0, 600: 0, 700: 0, 800: 0, 900: 0, 1000: 0, min: 0.000020368, max: 0.000021581, mean: 0.0000209745, std_dev: 0.0000006064999999999992, p50: 0, p90: 0.000021581, p99: 0.000021581] 0x〉exit ``` --- Cargo.lock | 1 + gateway/Cargo.toml | 1 + gateway/src/context.rs | 16 + gateway/src/http_entrypoints.rs | 784 ++++++++++++++++++-------------- gateway/src/lib.rs | 11 +- gateway/src/metrics.rs | 10 + 6 files changed, 478 insertions(+), 345 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 249b7c5cea..7074e40993 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5983,6 +5983,7 @@ dependencies = [ "omicron-workspace-hack", "once_cell", "oximeter", + "oximeter-instruments", "oximeter-producer", "schemars", "serde", diff --git a/gateway/Cargo.toml b/gateway/Cargo.toml index 2dce15892d..bdf4a911af 100644 --- a/gateway/Cargo.toml +++ b/gateway/Cargo.toml @@ -42,6 +42,7 @@ uuid.workspace = true omicron-workspace-hack.workspace = true oximeter.workspace = true oximeter-producer.workspace = true +oximeter-instruments = { workspace = true, features = ["http-instruments"] } [dev-dependencies] expectorate.workspace = true diff --git a/gateway/src/context.rs b/gateway/src/context.rs index 939bb9b6b9..15592145cf 100644 --- a/gateway/src/context.rs +++ b/gateway/src/context.rs @@ -16,11 +16,13 @@ pub struct ServerContext { pub mgmt_switch: ManagementSwitch, pub host_phase2_provider: Arc, pub rack_id: OnceLock, + pub latencies: oximeter_instruments::http::LatencyTracker, pub log: Logger, } impl ServerContext { pub async fn new( + id: Uuid, host_phase2_provider: Arc, switch_config: SwitchConfig, rack_id_config: Option, @@ -37,7 +39,21 @@ impl ServerContext { OnceLock::new() }; + const START_LATENCY_DECADE: i16 = -6; + const END_LATENCY_DECADE: i16 = 3; + let latencies = + oximeter_instruments::http::LatencyTracker::with_latency_decades( + oximeter_instruments::http::HttpService { + name: "management-gateway-service".into(), + id, + }, + START_LATENCY_DECADE, + END_LATENCY_DECADE, + ) + .expect("start and end decades are hardcoded and should be valid"); + Ok(Arc::new(ServerContext { + latencies, mgmt_switch, host_phase2_provider, rack_id, diff --git a/gateway/src/http_entrypoints.rs b/gateway/src/http_entrypoints.rs index 332f50ed8a..c10e71ad61 100644 --- a/gateway/src/http_entrypoints.rs +++ b/gateway/src/http_entrypoints.rs @@ -81,18 +81,22 @@ impl GatewayApi for GatewayImpl { ) -> Result, HttpError> { let apictx = rqctx.context(); let sp_id = path.into_inner().sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; - let state = sp.state().await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; + let state = sp.state().await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; + + let rot_state = sp + .rot_state(gateway_messages::RotBootInfo::HIGHEST_KNOWN_VERSION) + .await; - let rot_state = sp - .rot_state(gateway_messages::RotBootInfo::HIGHEST_KNOWN_VERSION) - .await; + let final_state = sp_state_from_comms(state, rot_state); - let final_state = sp_state_from_comms(state, rot_state); - Ok(HttpResponseOk(final_state)) + Ok(HttpResponseOk(final_state)) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_startup_options_get( @@ -100,15 +104,18 @@ impl GatewayApi for GatewayImpl { path: Path, ) -> Result, HttpError> { let apictx = rqctx.context(); - let mgmt_switch = &apictx.mgmt_switch; - let sp_id = path.into_inner().sp.into(); - let sp = mgmt_switch.sp(sp_id)?; + let handler = async { + let mgmt_switch = &apictx.mgmt_switch; + let sp_id = path.into_inner().sp.into(); + let sp = mgmt_switch.sp(sp_id)?; - let options = sp.get_startup_options().await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; + let options = sp.get_startup_options().await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - Ok(HttpResponseOk(options.into())) + Ok(HttpResponseOk(options.into())) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_startup_options_set( @@ -119,13 +126,16 @@ impl GatewayApi for GatewayImpl { let apictx = rqctx.context(); let mgmt_switch = &apictx.mgmt_switch; let sp_id = path.into_inner().sp.into(); - let sp = mgmt_switch.sp(sp_id)?; + let handler = async { + let sp = mgmt_switch.sp(sp_id)?; - sp.set_startup_options(body.into_inner().into()).await.map_err( - |err| SpCommsError::SpCommunicationFailed { sp: sp_id, err }, - )?; + sp.set_startup_options(body.into_inner().into()).await.map_err( + |err| SpCommsError::SpCommunicationFailed { sp: sp_id, err }, + )?; - Ok(HttpResponseUpdatedNoContent {}) + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_sensor_read_value( @@ -135,12 +145,17 @@ impl GatewayApi for GatewayImpl { let apictx = rqctx.context(); let PathSpSensorId { sp, sensor_id } = path.into_inner(); let sp_id = sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; - let value = sp.read_sensor_value(sensor_id).await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; + let value = + sp.read_sensor_value(sensor_id).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; + + Ok(HttpResponseOk(value.into())) + }; - Ok(HttpResponseOk(value.into())) + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_component_list( @@ -149,12 +164,15 @@ impl GatewayApi for GatewayImpl { ) -> Result, HttpError> { let apictx = rqctx.context(); let sp_id = path.into_inner().sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; - let inventory = sp.inventory().await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; + let inventory = sp.inventory().await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - Ok(HttpResponseOk(sp_component_list_from_comms(inventory))) + Ok(HttpResponseOk(sp_component_list_from_comms(inventory))) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_component_get( @@ -164,16 +182,21 @@ impl GatewayApi for GatewayImpl { let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); let sp_id = sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; - let component = component_from_str(&component)?; - - let details = sp.component_details(component).await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; + let component = component_from_str(&component)?; + + let details = + sp.component_details(component).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; + + Ok(HttpResponseOk( + details.entries.into_iter().map(Into::into).collect(), + )) + }; - Ok(HttpResponseOk( - details.entries.into_iter().map(Into::into).collect(), - )) + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } // Implementation notes: @@ -198,66 +221,79 @@ impl GatewayApi for GatewayImpl { let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); let sp_id = sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; - let ComponentCabooseSlot { firmware_slot } = query_params.into_inner(); - let component = component_from_str(&component)?; - let from_utf8 = |key: &[u8], bytes| { - // This helper closure is only called with the ascii-printable [u8; 4] - // key constants we define above, so we can unwrap this conversion. - let key = str::from_utf8(key).unwrap(); - String::from_utf8(bytes).map_err(|_| { - http_err_with_message( - http::StatusCode::SERVICE_UNAVAILABLE, - "InvalidCaboose", - format!("non-utf8 data returned for caboose key {key}"), + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; + let ComponentCabooseSlot { firmware_slot } = + query_params.into_inner(); + let component = component_from_str(&component)?; + + let from_utf8 = |key: &[u8], bytes| { + // This helper closure is only called with the ascii-printable [u8; 4] + // key constants we define above, so we can unwrap this conversion. + let key = str::from_utf8(key).unwrap(); + String::from_utf8(bytes).map_err(|_| { + http_err_with_message( + http::StatusCode::SERVICE_UNAVAILABLE, + "InvalidCaboose", + format!("non-utf8 data returned for caboose key {key}"), + ) + }) + }; + + let git_commit = + sp.read_component_caboose( + component, + firmware_slot, + CABOOSE_KEY_GIT_COMMIT, ) - }) - }; + .await + .map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; + let board = + sp.read_component_caboose( + component, + firmware_slot, + CABOOSE_KEY_BOARD, + ) + .await + .map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; + let name = + sp.read_component_caboose( + component, + firmware_slot, + CABOOSE_KEY_NAME, + ) + .await + .map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; + let version = + sp.read_component_caboose( + component, + firmware_slot, + CABOOSE_KEY_VERSION, + ) + .await + .map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - let git_commit = - sp.read_component_caboose( - component, - firmware_slot, - CABOOSE_KEY_GIT_COMMIT, - ) - .await - .map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; - let board = sp - .read_component_caboose(component, firmware_slot, CABOOSE_KEY_BOARD) - .await - .map_err(|err| SpCommsError::SpCommunicationFailed { - sp: sp_id, - err, - })?; - let name = sp - .read_component_caboose(component, firmware_slot, CABOOSE_KEY_NAME) - .await - .map_err(|err| SpCommsError::SpCommunicationFailed { - sp: sp_id, - err, - })?; - let version = - sp.read_component_caboose( - component, - firmware_slot, - CABOOSE_KEY_VERSION, - ) - .await - .map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; + let git_commit = from_utf8(&CABOOSE_KEY_GIT_COMMIT, git_commit)?; + let board = from_utf8(&CABOOSE_KEY_BOARD, board)?; + let name = from_utf8(&CABOOSE_KEY_NAME, name)?; + let version = from_utf8(&CABOOSE_KEY_VERSION, version)?; - let git_commit = from_utf8(&CABOOSE_KEY_GIT_COMMIT, git_commit)?; - let board = from_utf8(&CABOOSE_KEY_BOARD, board)?; - let name = from_utf8(&CABOOSE_KEY_NAME, name)?; - let version = from_utf8(&CABOOSE_KEY_VERSION, version)?; + let caboose = + SpComponentCaboose { git_commit, board, name, version }; - let caboose = SpComponentCaboose { git_commit, board, name, version }; + Ok(HttpResponseOk(caboose)) + }; - Ok(HttpResponseOk(caboose)) + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_component_clear_status( @@ -267,14 +303,18 @@ impl GatewayApi for GatewayImpl { let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); let sp_id = sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; - let component = component_from_str(&component)?; + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; + let component = component_from_str(&component)?; - sp.component_clear_status(component).await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; + sp.component_clear_status(component).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; + + Ok(HttpResponseUpdatedNoContent {}) + }; - Ok(HttpResponseUpdatedNoContent {}) + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_component_active_slot_get( @@ -284,15 +324,18 @@ impl GatewayApi for GatewayImpl { let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); let sp_id = sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; - let component = component_from_str(&component)?; + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; + let component = component_from_str(&component)?; - let slot = - sp.component_active_slot(component).await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; + let slot = + sp.component_active_slot(component).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - Ok(HttpResponseOk(SpComponentFirmwareSlot { slot })) + Ok(HttpResponseOk(SpComponentFirmwareSlot { slot })) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_component_active_slot_set( @@ -304,16 +347,22 @@ impl GatewayApi for GatewayImpl { let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); let sp_id = sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; - let component = component_from_str(&component)?; - let slot = body.into_inner().slot; - let persist = query_params.into_inner().persist; - - sp.set_component_active_slot(component, slot, persist).await.map_err( - |err| SpCommsError::SpCommunicationFailed { sp: sp_id, err }, - )?; - - Ok(HttpResponseUpdatedNoContent {}) + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; + let component = component_from_str(&component)?; + let slot = body.into_inner().slot; + let persist = query_params.into_inner().persist; + + sp.set_component_active_slot(component, slot, persist) + .await + .map_err(|err| SpCommsError::SpCommunicationFailed { + sp: sp_id, + err, + })?; + + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_component_serial_console_attach( @@ -321,6 +370,10 @@ impl GatewayApi for GatewayImpl { path: Path, websocket: WebsocketUpgrade, ) -> WebsocketEndpointResult { + // TODO(eliza): I'm not sure whether there's a way to make + // `oximeter_instruments`'s HTTP latency tracker work with websockets + // requests? It would be nice to get the latency and any error returned + // prior to actually returning the websocket stream... let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); let sp_id = sp.into(); @@ -356,13 +409,15 @@ impl GatewayApi for GatewayImpl { // we don't use it at all to detach. let PathSpComponent { sp, component: _ } = path.into_inner(); let sp_id = sp.into(); + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; + sp.serial_console_detach().await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - let sp = apictx.mgmt_switch.sp(sp_id)?; - sp.serial_console_detach().await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; - - Ok(HttpResponseUpdatedNoContent {}) + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_component_reset( @@ -372,20 +427,23 @@ impl GatewayApi for GatewayImpl { let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); let sp_id = sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; - let component = component_from_str(&component)?; - - sp.reset_component_prepare(component) - // We always want to run with the watchdog when resetting as - // disabling the watchdog should be considered a debug only feature - .and_then(|()| sp.reset_component_trigger(component, false)) - .await - .map_err(|err| SpCommsError::SpCommunicationFailed { - sp: sp_id, - err, - })?; - - Ok(HttpResponseUpdatedNoContent {}) + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; + let component = component_from_str(&component)?; + + sp.reset_component_prepare(component) + // We always want to run with the watchdog when resetting as + // disabling the watchdog should be considered a debug only feature + .and_then(|()| sp.reset_component_trigger(component, false)) + .await + .map_err(|err| SpCommsError::SpCommunicationFailed { + sp: sp_id, + err, + })?; + + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_component_update( @@ -398,19 +456,22 @@ impl GatewayApi for GatewayImpl { let PathSpComponent { sp, component } = path.into_inner(); let sp_id = sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; - let component = component_from_str(&component)?; - let ComponentUpdateIdSlot { id, firmware_slot } = - query_params.into_inner(); + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; + let component = component_from_str(&component)?; + let ComponentUpdateIdSlot { id, firmware_slot } = + query_params.into_inner(); - // TODO-performance: this makes a full copy of the uploaded data - let image = body.as_bytes().to_vec(); + // TODO-performance: this makes a full copy of the uploaded data + let image = body.as_bytes().to_vec(); - sp.start_update(component, id, firmware_slot, image) - .await - .map_err(|err| SpCommsError::UpdateFailed { sp: sp_id, err })?; + sp.start_update(component, id, firmware_slot, image) + .await + .map_err(|err| SpCommsError::UpdateFailed { sp: sp_id, err })?; - Ok(HttpResponseUpdatedNoContent {}) + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_component_update_status( @@ -421,14 +482,17 @@ impl GatewayApi for GatewayImpl { let PathSpComponent { sp, component } = path.into_inner(); let sp_id = sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; - let component = component_from_str(&component)?; + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; + let component = component_from_str(&component)?; - let status = sp.update_status(component).await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; + let status = sp.update_status(component).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - Ok(HttpResponseOk(status.into())) + Ok(HttpResponseOk(status.into())) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_component_update_abort( @@ -440,15 +504,18 @@ impl GatewayApi for GatewayImpl { let PathSpComponent { sp, component } = path.into_inner(); let sp_id = sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; - let component = component_from_str(&component)?; + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; + let component = component_from_str(&component)?; - let UpdateAbortBody { id } = body.into_inner(); - sp.update_abort(component, id).await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; + let UpdateAbortBody { id } = body.into_inner(); + sp.update_abort(component, id).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - Ok(HttpResponseUpdatedNoContent {}) + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_rot_cmpa_get( @@ -459,24 +526,26 @@ impl GatewayApi for GatewayImpl { let PathSpComponent { sp, component } = path.into_inner(); let sp_id = sp.into(); + let handler = async { + // Ensure the caller knows they're asking for the RoT + if component_from_str(&component)? != SpComponent::ROT { + return Err(HttpError::for_bad_request( + Some("RequestUnsupportedForComponent".to_string()), + "Only the RoT has a CFPA".into(), + )); + } + + let sp = apictx.mgmt_switch.sp(sp_id)?; + let data = sp.read_rot_cmpa().await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - // Ensure the caller knows they're asking for the RoT - if component_from_str(&component)? != SpComponent::ROT { - return Err(HttpError::for_bad_request( - Some("RequestUnsupportedForComponent".to_string()), - "Only the RoT has a CFPA".into(), - )); - } - - let sp = apictx.mgmt_switch.sp(sp_id)?; - let data = sp.read_rot_cmpa().await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; - - let base64_data = - base64::engine::general_purpose::STANDARD.encode(data); + let base64_data = + base64::engine::general_purpose::STANDARD.encode(data); - Ok(HttpResponseOk(RotCmpa { base64_data })) + Ok(HttpResponseOk(RotCmpa { base64_data })) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_rot_cfpa_get( @@ -490,29 +559,32 @@ impl GatewayApi for GatewayImpl { let GetCfpaParams { slot } = params.into_inner(); let sp_id = sp.into(); - // Ensure the caller knows they're asking for the RoT - if component_from_str(&component)? != SpComponent::ROT { - return Err(HttpError::for_bad_request( - Some("RequestUnsupportedForComponent".to_string()), - "Only the RoT has a CFPA".into(), - )); - } + let handler = async { + // Ensure the caller knows they're asking for the RoT + if component_from_str(&component)? != SpComponent::ROT { + return Err(HttpError::for_bad_request( + Some("RequestUnsupportedForComponent".to_string()), + "Only the RoT has a CFPA".into(), + )); + } + + let sp = apictx.mgmt_switch.sp(sp_id)?; + let data = match slot { + RotCfpaSlot::Active => sp.read_rot_active_cfpa().await, + RotCfpaSlot::Inactive => sp.read_rot_inactive_cfpa().await, + RotCfpaSlot::Scratch => sp.read_rot_scratch_cfpa().await, + } + .map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - let sp = apictx.mgmt_switch.sp(sp_id)?; - let data = match slot { - RotCfpaSlot::Active => sp.read_rot_active_cfpa().await, - RotCfpaSlot::Inactive => sp.read_rot_inactive_cfpa().await, - RotCfpaSlot::Scratch => sp.read_rot_scratch_cfpa().await, - } - .map_err(|err| SpCommsError::SpCommunicationFailed { - sp: sp_id, - err, - })?; + let base64_data = + base64::engine::general_purpose::STANDARD.encode(data); - let base64_data = - base64::engine::general_purpose::STANDARD.encode(data); + Ok(HttpResponseOk(RotCfpa { base64_data, slot })) + }; - Ok(HttpResponseOk(RotCfpa { base64_data, slot })) + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_rot_boot_info( @@ -526,20 +598,24 @@ impl GatewayApi for GatewayImpl { let GetRotBootInfoParams { version } = params.into_inner(); let sp_id = sp.into(); - // Ensure the caller knows they're asking for the RoT - if component_from_str(&component)? != SpComponent::ROT { - return Err(HttpError::for_bad_request( - Some("RequestUnsupportedForComponent".to_string()), - "rot_boot_info only makes sent for a RoT".into(), - )); - } + let handler = async { + // Ensure the caller knows they're asking for the RoT + if component_from_str(&component)? != SpComponent::ROT { + return Err(HttpError::for_bad_request( + Some("RequestUnsupportedForComponent".to_string()), + "rot_boot_info only makes sent for a RoT".into(), + )); + } + + let sp = apictx.mgmt_switch.sp(sp_id)?; + let state = sp.rot_state(version).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - let sp = apictx.mgmt_switch.sp(sp_id)?; - let state = sp.rot_state(version).await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; + Ok(HttpResponseOk(state.into())) + }; - Ok(HttpResponseOk(state.into())) + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn ignition_list( @@ -547,17 +623,19 @@ impl GatewayApi for GatewayImpl { ) -> Result>, HttpError> { let apictx = rqctx.context(); let mgmt_switch = &apictx.mgmt_switch; - - let out = mgmt_switch - .bulk_ignition_state() - .await? - .map(|(id, state)| SpIgnitionInfo { - id: id.into(), - details: state.into(), - }) - .collect(); - - Ok(HttpResponseOk(out)) + let handler = async { + let out = mgmt_switch + .bulk_ignition_state() + .await? + .map(|(id, state)| SpIgnitionInfo { + id: id.into(), + details: state.into(), + }) + .collect(); + + Ok(HttpResponseOk(out)) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn ignition_get( @@ -568,19 +646,23 @@ impl GatewayApi for GatewayImpl { let mgmt_switch = &apictx.mgmt_switch; let sp_id = path.into_inner().sp.into(); - let ignition_target = mgmt_switch.ignition_target(sp_id)?; - - let state = mgmt_switch - .ignition_controller() - .ignition_state(ignition_target) - .await - .map_err(|err| SpCommsError::SpCommunicationFailed { - sp: sp_id, - err, - })?; - - let info = SpIgnitionInfo { id: sp_id.into(), details: state.into() }; - Ok(HttpResponseOk(info)) + let handler = async { + let ignition_target = mgmt_switch.ignition_target(sp_id)?; + + let state = mgmt_switch + .ignition_controller() + .ignition_state(ignition_target) + .await + .map_err(|err| SpCommsError::SpCommunicationFailed { + sp: sp_id, + err, + })?; + + let info = + SpIgnitionInfo { id: sp_id.into(), details: state.into() }; + Ok(HttpResponseOk(info)) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn ignition_command( @@ -591,18 +673,22 @@ impl GatewayApi for GatewayImpl { let mgmt_switch = &apictx.mgmt_switch; let PathSpIgnitionCommand { sp, command } = path.into_inner(); let sp_id = sp.into(); - let ignition_target = mgmt_switch.ignition_target(sp_id)?; - mgmt_switch - .ignition_controller() - .ignition_command(ignition_target, command.into()) - .await - .map_err(|err| SpCommsError::SpCommunicationFailed { - sp: sp_id, - err, - })?; + let handler = async { + let ignition_target = mgmt_switch.ignition_target(sp_id)?; - Ok(HttpResponseUpdatedNoContent {}) + mgmt_switch + .ignition_controller() + .ignition_command(ignition_target, command.into()) + .await + .map_err(|err| SpCommsError::SpCommunicationFailed { + sp: sp_id, + err, + })?; + + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_power_state_get( @@ -611,13 +697,16 @@ impl GatewayApi for GatewayImpl { ) -> Result, HttpError> { let apictx = rqctx.context(); let sp_id = path.into_inner().sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; - let power_state = sp.power_state().await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; + let power_state = sp.power_state().await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - Ok(HttpResponseOk(power_state.into())) + Ok(HttpResponseOk(power_state.into())) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_power_state_set( @@ -627,14 +716,17 @@ impl GatewayApi for GatewayImpl { ) -> Result { let apictx = rqctx.context(); let sp_id = path.into_inner().sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; - let power_state = body.into_inner(); + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; + let power_state = body.into_inner(); - sp.set_power_state(power_state.into()).await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; + sp.set_power_state(power_state.into()).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - Ok(HttpResponseUpdatedNoContent {}) + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_installinator_image_id_set( @@ -646,21 +738,23 @@ impl GatewayApi for GatewayImpl { let apictx = rqctx.context(); let sp_id = path.into_inner().sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; - let image_id = ipcc::InstallinatorImageId::from(body.into_inner()); + let image_id = ipcc::InstallinatorImageId::from(body.into_inner()); - sp.set_ipcc_key_lookup_value( - Key::InstallinatorImageId as u8, - image_id.serialize(), - ) - .await - .map_err(|err| SpCommsError::SpCommunicationFailed { - sp: sp_id, - err, - })?; + sp.set_ipcc_key_lookup_value( + Key::InstallinatorImageId as u8, + image_id.serialize(), + ) + .await + .map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - Ok(HttpResponseUpdatedNoContent {}) + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_installinator_image_id_delete( @@ -671,20 +765,22 @@ impl GatewayApi for GatewayImpl { let apictx = rqctx.context(); let sp_id = path.into_inner().sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; - // We clear the image ID by setting it to a 0-length vec. - sp.set_ipcc_key_lookup_value( - Key::InstallinatorImageId as u8, - Vec::new(), - ) - .await - .map_err(|err| SpCommsError::SpCommunicationFailed { - sp: sp_id, - err, - })?; + // We clear the image ID by setting it to a 0-length vec. + sp.set_ipcc_key_lookup_value( + Key::InstallinatorImageId as u8, + Vec::new(), + ) + .await + .map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - Ok(HttpResponseUpdatedNoContent {}) + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_host_phase2_progress_get( @@ -692,37 +788,41 @@ impl GatewayApi for GatewayImpl { path: Path, ) -> Result, HttpError> { let apictx = rqctx.context(); - let sp = apictx.mgmt_switch.sp(path.into_inner().sp.into())?; - - let Some(progress) = sp.most_recent_host_phase2_request().await else { - return Ok(HttpResponseOk(HostPhase2Progress::None)); - }; - - // Our `host_phase2_provider` is using an in-memory cache, so the only way - // we can fail to get the total size is if we no longer have the image that - // this SP most recently requested. We'll treat that as "no progress - // information", since it almost certainly means our progress info on this - // SP is very stale. - let Ok(total_size) = - apictx.host_phase2_provider.total_size(progress.hash).await - else { - return Ok(HttpResponseOk(HostPhase2Progress::None)); - }; - - let image_id = HostPhase2RecoveryImageId { - sha256_hash: ArtifactHash(progress.hash), + let handler = async { + let sp = apictx.mgmt_switch.sp(path.into_inner().sp.into())?; + + let Some(progress) = sp.most_recent_host_phase2_request().await + else { + return Ok(HttpResponseOk(HostPhase2Progress::None)); + }; + + // Our `host_phase2_provider` is using an in-memory cache, so the only way + // we can fail to get the total size is if we no longer have the image that + // this SP most recently requested. We'll treat that as "no progress + // information", since it almost certainly means our progress info on this + // SP is very stale. + let Ok(total_size) = + apictx.host_phase2_provider.total_size(progress.hash).await + else { + return Ok(HttpResponseOk(HostPhase2Progress::None)); + }; + + let image_id = HostPhase2RecoveryImageId { + sha256_hash: ArtifactHash(progress.hash), + }; + + // `progress` tells us the offset the SP requested and the amount of data we + // sent starting at that offset; report the end of that chunk to our caller. + let offset = progress.offset.saturating_add(progress.data_sent); + + Ok(HttpResponseOk(HostPhase2Progress::Available { + image_id, + offset, + total_size, + age: progress.received.elapsed(), + })) }; - - // `progress` tells us the offset the SP requested and the amount of data we - // sent starting at that offset; report the end of that chunk to our caller. - let offset = progress.offset.saturating_add(progress.data_sent); - - Ok(HttpResponseOk(HostPhase2Progress::Available { - image_id, - offset, - total_size, - age: progress.received.elapsed(), - })) + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_host_phase2_progress_delete( @@ -730,11 +830,14 @@ impl GatewayApi for GatewayImpl { path: Path, ) -> Result { let apictx = rqctx.context(); - let sp = apictx.mgmt_switch.sp(path.into_inner().sp.into())?; + let handler = async { + let sp = apictx.mgmt_switch.sp(path.into_inner().sp.into())?; - sp.clear_most_recent_host_phase2_request().await; + sp.clear_most_recent_host_phase2_request().await; - Ok(HttpResponseUpdatedNoContent {}) + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn recovery_host_phase2_upload( @@ -742,44 +845,55 @@ impl GatewayApi for GatewayImpl { body: UntypedBody, ) -> Result, HttpError> { let apictx = rqctx.context(); - - // TODO: this makes a full copy of the host image, potentially unnecessarily - // if it's malformed. - let image = body.as_bytes().to_vec(); - - let sha256_hash = - apictx.host_phase2_provider.insert(image).await.map_err(|err| { - // Any cache-insertion failure indicates a malformed image; map them - // to bad requests. - HttpError::for_bad_request( - Some("BadHostPhase2Image".to_string()), - err.to_string(), - ) - })?; - let sha256_hash = ArtifactHash(sha256_hash); - - Ok(HttpResponseOk(HostPhase2RecoveryImageId { sha256_hash })) + let handler = async { + // TODO: this makes a full copy of the host image, potentially unnecessarily + // if it's malformed. + let image = body.as_bytes().to_vec(); + + let sha256_hash = + apictx.host_phase2_provider.insert(image).await.map_err( + |err| { + // Any cache-insertion failure indicates a malformed image; map them + // to bad requests. + HttpError::for_bad_request( + Some("BadHostPhase2Image".to_string()), + err.to_string(), + ) + }, + )?; + let sha256_hash = ArtifactHash(sha256_hash); + + Ok(HttpResponseOk(HostPhase2RecoveryImageId { sha256_hash })) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_local_switch_id( rqctx: RequestContext, ) -> Result, HttpError> { let apictx = rqctx.context(); + let handler = async { + let id = apictx.mgmt_switch.local_switch()?; - let id = apictx.mgmt_switch.local_switch()?; - - Ok(HttpResponseOk(id.into())) + Ok(HttpResponseOk(id.into())) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_all_ids( rqctx: RequestContext, ) -> Result>, HttpError> { let apictx = rqctx.context(); - - let all_ids = - apictx.mgmt_switch.all_sps()?.map(|(id, _)| id.into()).collect(); - - Ok(HttpResponseOk(all_ids)) + let handler = async { + let all_ids = apictx + .mgmt_switch + .all_sps()? + .map(|(id, _)| id.into()) + .collect(); + + Ok(HttpResponseOk(all_ids)) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } } diff --git a/gateway/src/lib.rs b/gateway/src/lib.rs index 8e764dc63f..e07df0cfb9 100644 --- a/gateway/src/lib.rs +++ b/gateway/src/lib.rs @@ -143,6 +143,7 @@ impl Server { config.host_phase2_recovery_image_cache_max_images, )); let apictx = ServerContext::new( + args.id, host_phase2_provider, config.switch, args.rack_id, @@ -306,15 +307,6 @@ impl Server { warn!(self.apictx.log, "SMF refresh called without a rack id"); } } - - // TODO does MGS register itself with oximeter? - // Register the Nexus server as a metric producer with `oximeter. - // pub async fn register_as_producer(&self) { - // self.apictx - // .nexus - // .register_as_producer(self.http_server_internal.local_addr()) - // .await; - // } } /// Start an instance of the [Server]. @@ -337,6 +329,5 @@ pub async fn start_server( debug!(log, "registered DTrace probes"); } let server = Server::start(config, args, log).await?; - // server.register_as_producer().await; Ok(server) } diff --git a/gateway/src/metrics.rs b/gateway/src/metrics.rs index d4e0795ae0..7c133f5d97 100644 --- a/gateway/src/metrics.rs +++ b/gateway/src/metrics.rs @@ -242,6 +242,7 @@ impl Metrics { let server = { let log = log.new(slog::o!("component" => "producer-server")); let registry = ProducerRegistry::with_id(id); + // Register the producer for SP sensor metrics. registry .register_producer(Producer { sample_rx, log: log.clone() }) // TODO(ben): when you change `register_producer` to not return @@ -251,6 +252,15 @@ impl Metrics { actually return an `Err`, so this shouldn't ever \ happen...", ); + // Also, register the producer for the HTTP API metrics. + registry + .register_producer(apictx.latencies.clone()) + // TODO(ben): do this one too pls + .expect( + "`ProducerRegistry::register_producer()` will never \ + actually return an `Err`, so this shouldn't ever \ + happen...", + ); tokio::spawn( ServerManager { log, addrs: addrs_rx, registry }.run(cfg), From 6207e195bbb813d17a038702f056832d335d9180 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Tue, 27 Aug 2024 08:27:05 -0700 Subject: [PATCH 16/22] Add virtual disk timeseries schema (#6420) This adds a new set of timeseries that track block operations on virtual disks. This builds on and replaces the pre-existing Crucible data, adding more information about the disk and instance it's attached to. It also tracks I/O latencies and sizes in histograms. --- oximeter/oximeter/schema/virtual-disk.toml | 127 +++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 oximeter/oximeter/schema/virtual-disk.toml diff --git a/oximeter/oximeter/schema/virtual-disk.toml b/oximeter/oximeter/schema/virtual-disk.toml new file mode 100644 index 0000000000..54cedae6e6 --- /dev/null +++ b/oximeter/oximeter/schema/virtual-disk.toml @@ -0,0 +1,127 @@ +format_version = 1 + +[target] +name = "virtual_disk" +description = "A virtual disk" +authz_scope = "project" +versions = [ + { version = 1, fields = [ "attached_instance_id", "block_size", "disk_id", "project_id", "silo_id", ] }, +] + +[fields.attached_instance_id] +type = "uuid" +description = "ID of the instance the disk is attached to" + +[fields.block_size] +type = "u32" +description = "Block size of the disk, in bytes" + +[fields.disk_id] +type = "uuid" +description = "ID of the disk" + +[fields.failure_reason] +type = "string" +description = "The reason an I/O operation failed" + +[fields.io_kind] +type = "string" +description = "The kind of I/O operation" + +[fields.project_id] +type = "uuid" +description = "ID of the project containing the disk" + +[fields.silo_id] +type = "uuid" +description = "ID for the silo containing the disk" + +[[metrics]] +name = "bytes_read" +description = "Number of bytes read from the disk" +units = "bytes" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [] } +] + +[[metrics]] +name = "reads" +description = "Total number of read operations from the disk" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [] } +] + +[[metrics]] +name = "failed_reads" +description = "Total number of failed read operations from the disk" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "failure_reason" ] } +] + +[[metrics]] +name = "bytes_written" +description = "Number of bytes written to the disk" +units = "bytes" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [] } +] + +[[metrics]] +name = "writes" +description = "Total number of write operations to the disk" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [] } +] + +[[metrics]] +name = "failed_writes" +description = "Total number of failed write operations to the disk" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "failure_reason" ] } +] + +[[metrics]] +name = "flushes" +description = "Total number of flush operations on the disk" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [] } +] + +[[metrics]] +name = "failed_flushes" +description = "Total number of failed flush operations on the disk" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "failure_reason" ] } +] + +[[metrics]] +name = "io_latency" +description = "Histogram of latency for I/O operations by kind" +units = "nanoseconds" +datum_type = "histogram_u64" +versions = [ + { added_in = 1, fields = [ "io_kind" ] } +] + +[[metrics]] +name = "io_size" +description = "Histogram of sizes for I/O operations by kind" +units = "bytes" +datum_type = "histogram_u64" +versions = [ + { added_in = 1, fields = [ "io_kind" ] } +] From 758818aea3f0d375b6bbf9bd87f713a5da09ffbd Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Tue, 27 Aug 2024 08:59:03 -0700 Subject: [PATCH 17/22] omdb could interpret status of blueprint tasks (#6440) --- dev-tools/omdb/src/bin/omdb/nexus.rs | 56 +++++++++++++++++++ .../background/tasks/blueprint_execution.rs | 8 ++- .../app/background/tasks/blueprint_load.rs | 9 ++- 3 files changed, 69 insertions(+), 4 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index d45865b4a7..5af75fac8f 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -1556,6 +1556,62 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) { } } } + } else if name == "blueprint_loader" { + #[derive(Deserialize)] + struct BlueprintLoaderStatus { + target_id: Uuid, + time_created: DateTime, + status: String, + enabled: bool, + } + + match serde_json::from_value::(details.clone()) { + Err(error) => eprintln!( + "warning: failed to interpret task details: {:?}: {:?}", + error, details + ), + Ok(status) => { + println!(" target blueprint: {}", status.target_id); + println!( + " execution: {}", + if status.enabled { "enabled" } else { "disabled" } + ); + println!( + " created at: {}", + humantime::format_rfc3339_millis( + status.time_created.into() + ) + ); + println!(" status: {}", status.status); + } + } + } else if name == "blueprint_executor" { + #[derive(Deserialize)] + struct BlueprintExecutorStatus { + target_id: Uuid, + enabled: bool, + errors: Option>, + } + + match serde_json::from_value::(details.clone()) + { + Err(error) => eprintln!( + "warning: failed to interpret task details: {:?}: {:?}", + error, details + ), + Ok(status) => { + println!(" target blueprint: {}", status.target_id); + println!( + " execution: {}", + if status.enabled { "enabled" } else { "disabled" } + ); + let errors = status.errors.as_deref().unwrap_or(&[]); + println!(" errors: {}", errors.len()); + for (i, e) in errors.iter().enumerate() { + println!(" error {}: {}", i, e); + } + } + } } else { println!( "warning: unknown background task: {:?} \ diff --git a/nexus/src/app/background/tasks/blueprint_execution.rs b/nexus/src/app/background/tasks/blueprint_execution.rs index dbbfcb3b14..2b1e3eedca 100644 --- a/nexus/src/app/background/tasks/blueprint_execution.rs +++ b/nexus/src/app/background/tasks/blueprint_execution.rs @@ -83,7 +83,7 @@ impl BlueprintExecutor { "target_id" => %blueprint.id); return json!({ "target_id": blueprint.id.to_string(), - "error": "blueprint disabled" + "enabled": false, }); } @@ -111,6 +111,7 @@ impl BlueprintExecutor { json!({ "target_id": blueprint.id.to_string(), + "enabled": true, "needs_saga_recovery": needs_saga_recovery, }) } @@ -119,6 +120,7 @@ impl BlueprintExecutor { errors.into_iter().map(|e| format!("{:#}", e)).collect(); json!({ "target_id": blueprint.id.to_string(), + "enabled": true, "errors": errors }) } @@ -316,6 +318,7 @@ mod test { value, json!({ "target_id": blueprint_id, + "enabled": true, "needs_saga_recovery": false, }) ); @@ -410,6 +413,7 @@ mod test { value, json!({ "target_id": blueprint.1.id.to_string(), + "enabled": true, "needs_saga_recovery": false, }) ); @@ -427,7 +431,7 @@ mod test { assert_eq!( value, json!({ - "error": "blueprint disabled", + "enabled": false, "target_id": blueprint.1.id.to_string() }) ); diff --git a/nexus/src/app/background/tasks/blueprint_load.rs b/nexus/src/app/background/tasks/blueprint_load.rs index 31bc00441d..70fcf713bc 100644 --- a/nexus/src/app/background/tasks/blueprint_load.rs +++ b/nexus/src/app/background/tasks/blueprint_load.rs @@ -78,6 +78,7 @@ impl BackgroundTask for TargetBlueprintLoader { }; // Decide what to do with the new blueprint + let enabled = new_bp_target.enabled; let Some((old_bp_target, old_blueprint)) = self.last.as_deref() else { // We've found a target blueprint for the first time. @@ -97,6 +98,7 @@ impl BackgroundTask for TargetBlueprintLoader { "time_created": time_created, "time_found": chrono::Utc::now(), "status": "first target blueprint", + "enabled": enabled, }); }; @@ -116,7 +118,8 @@ impl BackgroundTask for TargetBlueprintLoader { "target_id": target_id, "time_created": time_created, "time_found": chrono::Utc::now(), - "status": "target blueprint updated" + "status": "target blueprint updated", + "enabled": enabled, }) } else { // The new target id matches the old target id @@ -159,6 +162,7 @@ impl BackgroundTask for TargetBlueprintLoader { "time_created": time_created, "time_found": chrono::Utc::now(), "status": format!("target blueprint {status}"), + "enabled": enabled, }) } else { // We found a new target blueprint that exactly @@ -173,7 +177,8 @@ impl BackgroundTask for TargetBlueprintLoader { json!({ "target_id": target_id, "time_created": time_created, - "status": "target blueprint unchanged" + "status": "target blueprint unchanged", + "enabled": enabled, }) } } From a0cdce7395cec5813fc697172b8a28a69d27093d Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Tue, 27 Aug 2024 09:54:30 -0700 Subject: [PATCH 18/22] sled agent: index running VMMs by VMM ID, not instance ID (#6429) Change sled agent's instance lookup tables so that Propolis jobs are indexed by Propolis/VMM IDs instead of instance IDs. This is a prerequisite to revisiting how the Failed instance state works. See RFD 486 section 6.1 for all the details of why this is needed, but very broadly: when an instance's VMM is Destroyed, we'd like sled agent to tell Nexus that *before* the agent deregisters the instance from the sled, for reasons described in the RFD; but if we do that with no other changes, there's a race where Nexus may try to restart the instance on the same sled before sled agent can update its instance table, causing instance start to fail. To achieve this: - In sled agent, change the `InstanceManagerRunner`'s instance map to a `BTreeMap`, then clean up all the compilation errors. - In Nexus: - Make callers of instance APIs furnish a Propolis ID instead of an instance ID. This is generally very straightforward because they already had to get a VMM record to figure out what sled to talk to. - Change `cpapi_instances_put` to take a Propolis ID instead of an instance ID. Regular sled agent still has both IDs, but with these changes, simulated sled agents only have a Propolis ID to work with, and plumbing an instance ID down to them requires significant code changes. - Update test code: - Unify the Nexus helper routines that let integration tests get sled agent clients or sled IDs; now they get a single struct containing both of those and the instance's Propolis IDs. - Update users of the simulated agent's `poke` endpoints to use Propolis IDs. - Delete the "detach disks on instance stop" bits of simulated sled agent. These don't appear to be load-bearing, they don't correspond to any behavior in the actual sled agent (which doesn't manage disk attachment or detachment), and it was a pain to rework them to work with Propolis IDs. Tests: cargo nextest. Related: #4226 and #4872, among others. --- clients/nexus-client/src/lib.rs | 9 +- clients/sled-agent-client/src/lib.rs | 30 +- common/src/api/internal/nexus.rs | 10 +- nexus/db-queries/src/db/datastore/vmm.rs | 25 +- nexus/internal-api/src/lib.rs | 20 +- .../background/tasks/abandoned_vmm_reaper.rs | 4 +- .../app/background/tasks/instance_watcher.rs | 24 +- nexus/src/app/instance.rs | 207 ++-- nexus/src/app/sagas/instance_common.rs | 61 +- nexus/src/app/sagas/instance_create.rs | 3 +- nexus/src/app/sagas/instance_ip_attach.rs | 59 +- nexus/src/app/sagas/instance_ip_detach.rs | 45 +- nexus/src/app/sagas/instance_migrate.rs | 21 +- nexus/src/app/sagas/instance_start.rs | 14 +- nexus/src/app/sagas/instance_update/mod.rs | 31 +- nexus/src/app/sagas/snapshot_create.rs | 36 +- nexus/src/app/sagas/test_helpers.rs | 45 +- nexus/src/app/snapshot.rs | 2 +- nexus/src/app/test_interfaces.rs | 148 +-- nexus/src/internal_api/http_entrypoints.rs | 12 +- nexus/tests/integration_tests/disks.rs | 7 +- nexus/tests/integration_tests/instances.rs | 131 ++- nexus/tests/integration_tests/ip_pools.rs | 6 +- nexus/tests/integration_tests/pantry.rs | 6 +- openapi/nexus-internal.json | 162 ++- openapi/sled-agent.json | 1004 ++++++++--------- sled-agent/api/src/lib.rs | 79 +- sled-agent/src/common/instance.rs | 27 +- sled-agent/src/fakes/nexus.rs | 23 +- sled-agent/src/http_entrypoints.rs | 87 +- sled-agent/src/instance.rs | 120 +- sled-agent/src/instance_manager.rs | 263 ++--- sled-agent/src/sim/collection.rs | 59 +- sled-agent/src/sim/http_entrypoints.rs | 112 +- sled-agent/src/sim/instance.rs | 42 +- sled-agent/src/sim/sled_agent.rs | 163 ++- sled-agent/src/sim/storage.rs | 4 +- sled-agent/src/sled_agent.rs | 49 +- sled-agent/types/src/instance.rs | 45 +- 39 files changed, 1565 insertions(+), 1630 deletions(-) diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs index a55c5d4013..97f6373e29 100644 --- a/clients/nexus-client/src/lib.rs +++ b/clients/nexus-client/src/lib.rs @@ -131,14 +131,11 @@ impl From } } -impl From - for types::SledInstanceState +impl From + for types::SledVmmState { - fn from( - s: omicron_common::api::internal::nexus::SledInstanceState, - ) -> Self { + fn from(s: omicron_common::api::internal::nexus::SledVmmState) -> Self { Self { - propolis_id: s.propolis_id, vmm_state: s.vmm_state.into(), migration_in: s.migration_in.map(Into::into), migration_out: s.migration_out.map(Into::into), diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index ed96d762dc..b14cf5a96f 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -5,6 +5,7 @@ //! Interface for making API requests to a Sled Agent use async_trait::async_trait; +use omicron_uuid_kinds::PropolisUuid; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; @@ -161,12 +162,11 @@ impl From } } -impl From - for omicron_common::api::internal::nexus::SledInstanceState +impl From + for omicron_common::api::internal::nexus::SledVmmState { - fn from(s: types::SledInstanceState) -> Self { + fn from(s: types::SledVmmState) -> Self { Self { - propolis_id: s.propolis_id, vmm_state: s.vmm_state.into(), migration_in: s.migration_in.map(Into::into), migration_out: s.migration_out.map(Into::into), @@ -448,11 +448,11 @@ impl From /// are bonus endpoints, not generated in the real client. #[async_trait] pub trait TestInterfaces { - async fn instance_single_step(&self, id: Uuid); - async fn instance_finish_transition(&self, id: Uuid); - async fn instance_simulate_migration_source( + async fn vmm_single_step(&self, id: PropolisUuid); + async fn vmm_finish_transition(&self, id: PropolisUuid); + async fn vmm_simulate_migration_source( &self, - id: Uuid, + id: PropolisUuid, params: SimulateMigrationSource, ); async fn disk_finish_transition(&self, id: Uuid); @@ -460,10 +460,10 @@ pub trait TestInterfaces { #[async_trait] impl TestInterfaces for Client { - async fn instance_single_step(&self, id: Uuid) { + async fn vmm_single_step(&self, id: PropolisUuid) { let baseurl = self.baseurl(); let client = self.client(); - let url = format!("{}/instances/{}/poke-single-step", baseurl, id); + let url = format!("{}/vmms/{}/poke-single-step", baseurl, id); client .post(url) .send() @@ -471,10 +471,10 @@ impl TestInterfaces for Client { .expect("instance_single_step() failed unexpectedly"); } - async fn instance_finish_transition(&self, id: Uuid) { + async fn vmm_finish_transition(&self, id: PropolisUuid) { let baseurl = self.baseurl(); let client = self.client(); - let url = format!("{}/instances/{}/poke", baseurl, id); + let url = format!("{}/vmms/{}/poke", baseurl, id); client .post(url) .send() @@ -493,14 +493,14 @@ impl TestInterfaces for Client { .expect("disk_finish_transition() failed unexpectedly"); } - async fn instance_simulate_migration_source( + async fn vmm_simulate_migration_source( &self, - id: Uuid, + id: PropolisUuid, params: SimulateMigrationSource, ) { let baseurl = self.baseurl(); let client = self.client(); - let url = format!("{baseurl}/instances/{id}/sim-migration-source"); + let url = format!("{baseurl}/vmms/{id}/sim-migration-source"); client .post(url) .json(¶ms) diff --git a/common/src/api/internal/nexus.rs b/common/src/api/internal/nexus.rs index 4daea6a198..996b000ddc 100644 --- a/common/src/api/internal/nexus.rs +++ b/common/src/api/internal/nexus.rs @@ -113,13 +113,9 @@ pub struct VmmRuntimeState { pub time_updated: DateTime, } -/// A wrapper type containing a sled's total knowledge of the state of a -/// specific VMM and the instance it incarnates. +/// A wrapper type containing a sled's total knowledge of the state of a VMM. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct SledInstanceState { - /// The ID of the VMM whose state is being reported. - pub propolis_id: PropolisUuid, - +pub struct SledVmmState { /// The most recent state of the sled's VMM process. pub vmm_state: VmmRuntimeState, @@ -142,7 +138,7 @@ impl Migrations<'_> { } } -impl SledInstanceState { +impl SledVmmState { pub fn migrations(&self) -> Migrations<'_> { Migrations { migration_in: self.migration_in.as_ref(), diff --git a/nexus/db-queries/src/db/datastore/vmm.rs b/nexus/db-queries/src/db/datastore/vmm.rs index 14c3405a70..089a2914be 100644 --- a/nexus/db-queries/src/db/datastore/vmm.rs +++ b/nexus/db-queries/src/db/datastore/vmm.rs @@ -5,7 +5,6 @@ //! [`DataStore`] helpers for working with VMM records. use super::DataStore; -use crate::authz; use crate::context::OpContext; use crate::db; use crate::db::error::public_error_from_diesel; @@ -40,8 +39,13 @@ use uuid::Uuid; /// The result of an [`DataStore::vmm_and_migration_update_runtime`] call, /// indicating which records were updated. -#[derive(Copy, Clone, Debug)] +#[derive(Clone, Debug)] pub struct VmmStateUpdateResult { + /// The VMM record that the update query found and possibly updated. + /// + /// NOTE: This is the record prior to the update! + pub found_vmm: Vmm, + /// `true` if the VMM record was updated, `false` otherwise. pub vmm_updated: bool, @@ -108,14 +112,10 @@ impl DataStore { pub async fn vmm_fetch( &self, opctx: &OpContext, - authz_instance: &authz::Instance, vmm_id: &PropolisUuid, ) -> LookupResult { - opctx.authorize(authz::Action::Read, authz_instance).await?; - let vmm = dsl::vmm .filter(dsl::id.eq(vmm_id.into_untyped_uuid())) - .filter(dsl::instance_id.eq(authz_instance.id())) .filter(dsl::time_deleted.is_null()) .select(Vmm::as_select()) .get_result_async(&*self.pool_connection_authorized(opctx).await?) @@ -233,13 +233,21 @@ impl DataStore { .transaction(&conn, |conn| { let err = err.clone(); async move { - let vmm_updated = self + let vmm_update_result = self .vmm_update_runtime_on_connection( &conn, &vmm_id, new_runtime, ) - .await.map(|r| match r.status { UpdateStatus::Updated => true, UpdateStatus::NotUpdatedButExists => false })?; + .await?; + + + let found_vmm = vmm_update_result.found; + let vmm_updated = match vmm_update_result.status { + UpdateStatus::Updated => true, + UpdateStatus::NotUpdatedButExists => false + }; + let migration_out_updated = match migration_out { Some(migration) => { let r = self.migration_update_source_on_connection( @@ -287,6 +295,7 @@ impl DataStore { None => false, }; Ok(VmmStateUpdateResult { + found_vmm, vmm_updated, migration_in_updated, migration_out_updated, diff --git a/nexus/internal-api/src/lib.rs b/nexus/internal-api/src/lib.rs index 7ac3e42f57..12e99ba23b 100644 --- a/nexus/internal-api/src/lib.rs +++ b/nexus/internal-api/src/lib.rs @@ -33,14 +33,14 @@ use omicron_common::{ DiskRuntimeState, DownstairsClientStopRequest, DownstairsClientStopped, ProducerEndpoint, ProducerRegistrationResponse, RepairFinishInfo, RepairProgress, - RepairStartInfo, SledInstanceState, + RepairStartInfo, SledVmmState, }, }, update::ArtifactId, }; use omicron_uuid_kinds::{ - DemoSagaUuid, DownstairsKind, SledUuid, TypedUuid, UpstairsKind, - UpstairsRepairKind, + DemoSagaUuid, DownstairsKind, PropolisUuid, SledUuid, TypedUuid, + UpstairsKind, UpstairsRepairKind, }; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -108,15 +108,15 @@ pub trait NexusInternalApi { body: TypedBody, ) -> Result, HttpError>; - /// Report updated state for an instance. + /// Report updated state for a VMM. #[endpoint { method = PUT, - path = "/instances/{instance_id}", + path = "/vmms/{propolis_id}", }] async fn cpapi_instances_put( rqctx: RequestContext, - path_params: Path, - new_runtime_state: TypedBody, + path_params: Path, + new_runtime_state: TypedBody, ) -> Result; #[endpoint { @@ -568,6 +568,12 @@ pub struct InstancePathParam { pub instance_id: Uuid, } +/// Path parameters for VMM requests (internal API) +#[derive(Deserialize, JsonSchema)] +pub struct VmmPathParam { + pub propolis_id: PropolisUuid, +} + #[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] pub struct CollectorIdPathParams { /// The ID of the oximeter collector. diff --git a/nexus/src/app/background/tasks/abandoned_vmm_reaper.rs b/nexus/src/app/background/tasks/abandoned_vmm_reaper.rs index a81080ec75..ca6e7e4271 100644 --- a/nexus/src/app/background/tasks/abandoned_vmm_reaper.rs +++ b/nexus/src/app/background/tasks/abandoned_vmm_reaper.rs @@ -28,8 +28,8 @@ //! remains alive and continues to own its virtual provisioning resources. //! //! Cleanup of instance resources when an instance's *active* VMM is destroyed -//! is handled elsewhere, by `notify_instance_updated` and (eventually) the -//! `instance-update` saga. +//! is handled elsewhere, by `process_vmm_update` and the `instance-update` +//! saga. use crate::app::background::BackgroundTask; use anyhow::Context; diff --git a/nexus/src/app/background/tasks/instance_watcher.rs b/nexus/src/app/background/tasks/instance_watcher.rs index f63c21105e..ae78392ea3 100644 --- a/nexus/src/app/background/tasks/instance_watcher.rs +++ b/nexus/src/app/background/tasks/instance_watcher.rs @@ -19,9 +19,9 @@ use nexus_types::identity::Asset; use nexus_types::identity::Resource; use omicron_common::api::external::Error; use omicron_common::api::external::InstanceState; -use omicron_common::api::internal::nexus::SledInstanceState; +use omicron_common::api::internal::nexus::SledVmmState; use omicron_uuid_kinds::GenericUuid; -use omicron_uuid_kinds::InstanceUuid; +use omicron_uuid_kinds::PropolisUuid; use oximeter::types::ProducerRegistry; use sled_agent_client::Client as SledAgentClient; use std::borrow::Cow; @@ -81,12 +81,12 @@ impl InstanceWatcher { let client = client.clone(); async move { - slog::trace!(opctx.log, "checking on instance..."); - let rsp = client - .instance_get_state(&InstanceUuid::from_untyped_uuid( - target.instance_id, - )) - .await; + let vmm_id = PropolisUuid::from_untyped_uuid(target.vmm_id); + slog::trace!( + opctx.log, "checking on VMM"; "propolis_id" => %vmm_id + ); + + let rsp = client.vmm_get_state(&vmm_id).await; let mut check = Check { target, outcome: Default::default(), @@ -151,7 +151,7 @@ impl InstanceWatcher { } }; - let new_runtime_state: SledInstanceState = state.into(); + let new_runtime_state: SledVmmState = state.into(); check.outcome = CheckOutcome::Success(new_runtime_state.vmm_state.state.into()); debug!( @@ -159,10 +159,10 @@ impl InstanceWatcher { "updating instance state"; "state" => ?new_runtime_state.vmm_state.state, ); - match crate::app::instance::notify_instance_updated( + match crate::app::instance::process_vmm_update( &datastore, &opctx, - InstanceUuid::from_untyped_uuid(target.instance_id), + PropolisUuid::from_untyped_uuid(target.vmm_id), &new_runtime_state, ) .await @@ -176,7 +176,7 @@ impl InstanceWatcher { _ => Err(Incomplete::UpdateFailed), }; } - Ok(Some(saga)) => { + Ok(Some((_, saga))) => { check.update_saga_queued = true; if let Err(e) = sagas.saga_start(saga).await { warn!(opctx.log, "update saga failed"; "error" => ?e); diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 3106ab9f2a..b715b6bbd3 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -60,7 +60,7 @@ use propolis_client::support::WebSocketStream; use sagas::instance_common::ExternalIpAttach; use sled_agent_client::types::InstanceMigrationTargetParams; use sled_agent_client::types::InstanceProperties; -use sled_agent_client::types::InstancePutStateBody; +use sled_agent_client::types::VmmPutStateBody; use std::matches; use std::net::SocketAddr; use std::sync::Arc; @@ -154,7 +154,7 @@ pub(crate) enum InstanceStateChangeRequest { } impl From - for sled_agent_client::types::InstanceStateRequested + for sled_agent_client::types::VmmStateRequested { fn from(value: InstanceStateChangeRequest) -> Self { match value { @@ -176,7 +176,7 @@ enum InstanceStateChangeRequestAction { /// Request the appropriate state change from the sled with the specified /// UUID. - SendToSled(SledUuid), + SendToSled { sled_id: SledUuid, propolis_id: PropolisUuid }, } /// What is the higher level operation that is calling @@ -553,7 +553,6 @@ impl super::Nexus { if let Err(e) = self .instance_request_state( opctx, - &authz_instance, state.instance(), state.vmm(), InstanceStateChangeRequest::Reboot, @@ -632,7 +631,6 @@ impl super::Nexus { if let Err(e) = self .instance_request_state( opctx, - &authz_instance, state.instance(), state.vmm(), InstanceStateChangeRequest::Stop, @@ -664,21 +662,18 @@ impl super::Nexus { /// this sled, this operation rudely terminates it. pub(crate) async fn instance_ensure_unregistered( &self, - opctx: &OpContext, - authz_instance: &authz::Instance, + propolis_id: &PropolisUuid, sled_id: &SledUuid, - ) -> Result, InstanceStateChangeError> - { - opctx.authorize(authz::Action::Modify, authz_instance).await?; + ) -> Result, InstanceStateChangeError> { let sa = self.sled_client(&sled_id).await?; - sa.instance_unregister(&InstanceUuid::from_untyped_uuid( - authz_instance.id(), - )) - .await - .map(|res| res.into_inner().updated_runtime.map(Into::into)) - .map_err(|e| { - InstanceStateChangeError::SledAgent(SledAgentInstancePutError(e)) - }) + sa.vmm_unregister(propolis_id) + .await + .map(|res| res.into_inner().updated_runtime.map(Into::into)) + .map_err(|e| { + InstanceStateChangeError::SledAgent(SledAgentInstancePutError( + e, + )) + }) } /// Determines the action to take on an instance's active VMM given a @@ -712,8 +707,11 @@ impl super::Nexus { // Requests that operate on active instances have to be directed to the // instance's current sled agent. If there is none, the request needs to // be handled specially based on its type. - let sled_id = if let Some(vmm) = vmm_state { - SledUuid::from_untyped_uuid(vmm.sled_id) + let (sled_id, propolis_id) = if let Some(vmm) = vmm_state { + ( + SledUuid::from_untyped_uuid(vmm.sled_id), + PropolisUuid::from_untyped_uuid(vmm.id), + ) } else { match effective_state { // If there's no active sled because the instance is stopped, @@ -814,7 +812,10 @@ impl super::Nexus { }; if allowed { - Ok(InstanceStateChangeRequestAction::SendToSled(sled_id)) + Ok(InstanceStateChangeRequestAction::SendToSled { + sled_id, + propolis_id, + }) } else { Err(Error::invalid_request(format!( "instance state cannot be changed from state \"{}\"", @@ -826,26 +827,25 @@ impl super::Nexus { pub(crate) async fn instance_request_state( &self, opctx: &OpContext, - authz_instance: &authz::Instance, prev_instance_state: &db::model::Instance, prev_vmm_state: &Option, requested: InstanceStateChangeRequest, ) -> Result<(), InstanceStateChangeError> { - opctx.authorize(authz::Action::Modify, authz_instance).await?; - let instance_id = InstanceUuid::from_untyped_uuid(authz_instance.id()); - match self.select_runtime_change_action( prev_instance_state, prev_vmm_state, &requested, )? { InstanceStateChangeRequestAction::AlreadyDone => Ok(()), - InstanceStateChangeRequestAction::SendToSled(sled_id) => { + InstanceStateChangeRequestAction::SendToSled { + sled_id, + propolis_id, + } => { let sa = self.sled_client(&sled_id).await?; let instance_put_result = sa - .instance_put_state( - &instance_id, - &InstancePutStateBody { state: requested.into() }, + .vmm_put_state( + &propolis_id, + &VmmPutStateBody { state: requested.into() }, ) .await .map(|res| res.into_inner().updated_runtime.map(Into::into)) @@ -862,7 +862,7 @@ impl super::Nexus { // Ok(None) here, in which case, there's nothing to write back. match instance_put_result { Ok(Some(ref state)) => self - .notify_instance_updated(opctx, instance_id, state) + .notify_vmm_updated(opctx, propolis_id, state) .await .map_err(Into::into), Ok(None) => Ok(()), @@ -1120,13 +1120,13 @@ impl super::Nexus { .sled_client(&SledUuid::from_untyped_uuid(initial_vmm.sled_id)) .await?; let instance_register_result = sa - .instance_register( - &instance_id, + .vmm_register( + propolis_id, &sled_agent_client::types::InstanceEnsureBody { hardware: instance_hardware, instance_runtime: db_instance.runtime().clone().into(), vmm_runtime: initial_vmm.clone().into(), - propolis_id: *propolis_id, + instance_id, propolis_addr: SocketAddr::new( initial_vmm.propolis_ip.ip(), initial_vmm.propolis_port.into(), @@ -1141,8 +1141,7 @@ impl super::Nexus { match instance_register_result { Ok(state) => { - self.notify_instance_updated(opctx, instance_id, &state) - .await?; + self.notify_vmm_updated(opctx, *propolis_id, &state).await?; } Err(e) => { if e.instance_unhealthy() { @@ -1321,19 +1320,22 @@ impl super::Nexus { /// Invoked by a sled agent to publish an updated runtime state for an /// Instance. - pub(crate) async fn notify_instance_updated( + pub(crate) async fn notify_vmm_updated( &self, opctx: &OpContext, - instance_id: InstanceUuid, - new_runtime_state: &nexus::SledInstanceState, + propolis_id: PropolisUuid, + new_runtime_state: &nexus::SledVmmState, ) -> Result<(), Error> { - let saga = notify_instance_updated( + let Some((instance_id, saga)) = process_vmm_update( &self.db_datastore, opctx, - instance_id, + propolis_id, new_runtime_state, ) - .await?; + .await? + else { + return Ok(()); + }; // We don't need to wait for the instance update saga to run to // completion to return OK to the sled-agent --- all it needs to care @@ -1344,53 +1346,51 @@ impl super::Nexus { // one is eventually executed. // // Therefore, just spawn the update saga in a new task, and return. - if let Some(saga) = saga { - info!(opctx.log, "starting update saga for {instance_id}"; - "instance_id" => %instance_id, - "vmm_state" => ?new_runtime_state.vmm_state, - "migration_state" => ?new_runtime_state.migrations(), - ); - let sagas = self.sagas.clone(); - let task_instance_updater = - self.background_tasks.task_instance_updater.clone(); - let log = opctx.log.clone(); - tokio::spawn(async move { - // TODO(eliza): maybe we should use the lower level saga API so - // we can see if the saga failed due to the lock being held and - // retry it immediately? - let running_saga = async move { - let runnable_saga = sagas.saga_prepare(saga).await?; - runnable_saga.start().await - } - .await; - let result = match running_saga { - Err(error) => { - error!(&log, "failed to start update saga for {instance_id}"; - "instance_id" => %instance_id, - "error" => %error, - ); - // If we couldn't start the update saga for this - // instance, kick the instance-updater background task - // to try and start it again in a timely manner. - task_instance_updater.activate(); - return; - } - Ok(saga) => { - saga.wait_until_stopped().await.into_omicron_result() - } - }; - if let Err(error) = result { - error!(&log, "update saga for {instance_id} failed"; + info!(opctx.log, "starting update saga for {instance_id}"; + "instance_id" => %instance_id, + "vmm_state" => ?new_runtime_state.vmm_state, + "migration_state" => ?new_runtime_state.migrations(), + ); + let sagas = self.sagas.clone(); + let task_instance_updater = + self.background_tasks.task_instance_updater.clone(); + let log = opctx.log.clone(); + tokio::spawn(async move { + // TODO(eliza): maybe we should use the lower level saga API so + // we can see if the saga failed due to the lock being held and + // retry it immediately? + let running_saga = async move { + let runnable_saga = sagas.saga_prepare(saga).await?; + runnable_saga.start().await + } + .await; + let result = match running_saga { + Err(error) => { + error!(&log, "failed to start update saga for {instance_id}"; "instance_id" => %instance_id, "error" => %error, ); - // If we couldn't complete the update saga for this + // If we couldn't start the update saga for this // instance, kick the instance-updater background task // to try and start it again in a timely manner. task_instance_updater.activate(); + return; } - }); - } + Ok(saga) => { + saga.wait_until_stopped().await.into_omicron_result() + } + }; + if let Err(error) = result { + error!(&log, "update saga for {instance_id} failed"; + "instance_id" => %instance_id, + "error" => %error, + ); + // If we couldn't complete the update saga for this + // instance, kick the instance-updater background task + // to try and start it again in a timely manner. + task_instance_updater.activate(); + } + }); Ok(()) } @@ -1830,21 +1830,27 @@ impl super::Nexus { } } -/// Invoked by a sled agent to publish an updated runtime state for an -/// Instance, returning an update saga for that instance (if one must be -/// executed). -pub(crate) async fn notify_instance_updated( +/// Writes the VMM and migration state supplied in `new_runtime_state` to the +/// database (provided that it's newer than what's already there). +/// +/// # Return value +/// +/// - `Ok(Some(instance_id, saga))` if the new VMM state obsoletes the current +/// instance state. The caller should execute the returned instance update +/// saga to reconcile the instance to the new VMM state. +/// - `Ok(None)` if the new state was successfully published but does not +/// require an instance update. +/// - `Err` if an error occurred. +pub(crate) async fn process_vmm_update( datastore: &DataStore, opctx: &OpContext, - instance_id: InstanceUuid, - new_runtime_state: &nexus::SledInstanceState, -) -> Result, Error> { + propolis_id: PropolisUuid, + new_runtime_state: &nexus::SledVmmState, +) -> Result, Error> { use sagas::instance_update; let migrations = new_runtime_state.migrations(); - let propolis_id = new_runtime_state.propolis_id; info!(opctx.log, "received new VMM runtime state from sled agent"; - "instance_id" => %instance_id, "propolis_id" => %propolis_id, "vmm_state" => ?new_runtime_state.vmm_state, "migration_state" => ?migrations, @@ -1864,21 +1870,34 @@ pub(crate) async fn notify_instance_updated( // prepare and return it. if instance_update::update_saga_needed( &opctx.log, - instance_id, + propolis_id, new_runtime_state, &result, ) { + let instance_id = + InstanceUuid::from_untyped_uuid(result.found_vmm.instance_id); + let (.., authz_instance) = LookupPath::new(&opctx, datastore) .instance_id(instance_id.into_untyped_uuid()) .lookup_for(authz::Action::Modify) .await?; - let saga = instance_update::SagaInstanceUpdate::prepare( + + match instance_update::SagaInstanceUpdate::prepare( &instance_update::Params { serialized_authn: authn::saga::Serialized::for_opctx(opctx), authz_instance, }, - )?; - Ok(Some(saga)) + ) { + Ok(saga) => Ok(Some((instance_id, saga))), + Err(e) => { + error!(opctx.log, "failed to prepare instance update saga"; + "error" => ?e, + "instance_id" => %instance_id, + "propolis_id" => %propolis_id); + + Err(e) + } + } } else { Ok(None) } diff --git a/nexus/src/app/sagas/instance_common.rs b/nexus/src/app/sagas/instance_common.rs index 6e431aaca7..049673d2ee 100644 --- a/nexus/src/app/sagas/instance_common.rs +++ b/nexus/src/app/sagas/instance_common.rs @@ -25,6 +25,12 @@ use super::NexusActionContext; /// The port propolis-server listens on inside the propolis zone. const DEFAULT_PROPOLIS_PORT: u16 = 12400; +#[derive(Clone, Debug, Serialize, Deserialize)] +pub(super) struct VmmAndSledIds { + pub(super) vmm_id: PropolisUuid, + pub(super) sled_id: SledUuid, +} + /// Reserves resources for a new VMM whose instance has `ncpus` guest logical /// processors and `guest_memory` bytes of guest RAM. The selected sled is /// random within the set of sleds allowed by the supplied `constraints`. @@ -213,12 +219,12 @@ pub async fn instance_ip_move_state( /// the Attaching or Detaching state so that concurrent attempts to start the /// instance will notice that the IP state is in flux and ask the caller to /// retry. -pub async fn instance_ip_get_instance_state( +pub(super) async fn instance_ip_get_instance_state( sagactx: &NexusActionContext, serialized_authn: &authn::saga::Serialized, authz_instance: &authz::Instance, verb: &str, -) -> Result, ActionError> { +) -> Result, ActionError> { // XXX: we can get instance state (but not sled ID) in same transaction // as attach (but not detach) wth current design. We need to re-query // for sled ID anyhow, so keep consistent between attach/detach. @@ -236,7 +242,11 @@ pub async fn instance_ip_get_instance_state( inst_and_vmm.vmm().as_ref().map(|vmm| vmm.runtime.state); let found_instance_state = inst_and_vmm.instance().runtime_state.nexus_state; - let mut sled_id = inst_and_vmm.sled_id(); + let mut propolis_and_sled_id = + inst_and_vmm.vmm().as_ref().map(|vmm| VmmAndSledIds { + vmm_id: PropolisUuid::from_untyped_uuid(vmm.id), + sled_id: SledUuid::from_untyped_uuid(vmm.sled_id), + }); slog::debug!( osagactx.log(), "evaluating instance state for IP attach/detach"; @@ -257,7 +267,7 @@ pub async fn instance_ip_get_instance_state( match (found_instance_state, found_vmm_state) { // If there's no VMM, the instance is definitely not on any sled. (InstanceState::NoVmm, _) | (_, Some(VmmState::SagaUnwound)) => { - sled_id = None; + propolis_and_sled_id = None; } // If the instance is running normally or rebooting, it's resident on @@ -340,7 +350,7 @@ pub async fn instance_ip_get_instance_state( } } - Ok(sled_id) + Ok(propolis_and_sled_id) } /// Adds a NAT entry to DPD, routing packets bound for `target_ip` to a @@ -441,18 +451,19 @@ pub async fn instance_ip_remove_nat( /// Inform the OPTE port for a running instance that it should start /// sending/receiving traffic on a given IP address. /// -/// This call is a no-op if `sled_uuid` is `None` or the saga is explicitly -/// set to be inactive in event of double attach/detach (`!target_ip.do_saga`). -pub async fn instance_ip_add_opte( +/// This call is a no-op if the instance is not active (`propolis_and_sled` is +/// `None`) or the calling saga is explicitly set to be inactive in the event of +/// a double attach/detach (`!target_ip.do_saga`). +pub(super) async fn instance_ip_add_opte( sagactx: &NexusActionContext, - authz_instance: &authz::Instance, - sled_uuid: Option, + vmm_and_sled: Option, target_ip: ModifyStateForExternalIp, ) -> Result<(), ActionError> { let osagactx = sagactx.user_data(); // No physical sled? Don't inform OPTE. - let Some(sled_uuid) = sled_uuid else { + let Some(VmmAndSledIds { vmm_id: propolis_id, sled_id }) = vmm_and_sled + else { return Ok(()); }; @@ -470,17 +481,14 @@ pub async fn instance_ip_add_opte( osagactx .nexus() - .sled_client(&sled_uuid) + .sled_client(&sled_id) .await .map_err(|_| { ActionError::action_failed(Error::unavail( "sled agent client went away mid-attach/detach", )) })? - .instance_put_external_ip( - &InstanceUuid::from_untyped_uuid(authz_instance.id()), - &sled_agent_body, - ) + .vmm_put_external_ip(&propolis_id, &sled_agent_body) .await .map_err(|e| { ActionError::action_failed(match e { @@ -499,18 +507,20 @@ pub async fn instance_ip_add_opte( /// Inform the OPTE port for a running instance that it should cease /// sending/receiving traffic on a given IP address. /// -/// This call is a no-op if `sled_uuid` is `None` or the saga is explicitly -/// set to be inactive in event of double attach/detach (`!target_ip.do_saga`). -pub async fn instance_ip_remove_opte( +/// This call is a no-op if the instance is not active (`propolis_and_sled` is +/// `None`) or the calling saga is explicitly set to be inactive in the event of +/// a double attach/detach (`!target_ip.do_saga`). +pub(super) async fn instance_ip_remove_opte( sagactx: &NexusActionContext, - authz_instance: &authz::Instance, - sled_uuid: Option, + propolis_and_sled: Option, target_ip: ModifyStateForExternalIp, ) -> Result<(), ActionError> { let osagactx = sagactx.user_data(); // No physical sled? Don't inform OPTE. - let Some(sled_uuid) = sled_uuid else { + let Some(VmmAndSledIds { vmm_id: propolis_id, sled_id }) = + propolis_and_sled + else { return Ok(()); }; @@ -528,17 +538,14 @@ pub async fn instance_ip_remove_opte( osagactx .nexus() - .sled_client(&sled_uuid) + .sled_client(&sled_id) .await .map_err(|_| { ActionError::action_failed(Error::unavail( "sled agent client went away mid-attach/detach", )) })? - .instance_delete_external_ip( - &InstanceUuid::from_untyped_uuid(authz_instance.id()), - &sled_agent_body, - ) + .vmm_delete_external_ip(&propolis_id, &sled_agent_body) .await .map_err(|e| { ActionError::action_failed(match e { diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index d19230892f..0b6d8cc0f8 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -1220,8 +1220,7 @@ pub mod test { } async fn no_instances_or_disks_on_sled(sled_agent: &SledAgent) -> bool { - sled_agent.instance_count().await == 0 - && sled_agent.disk_count().await == 0 + sled_agent.vmm_count().await == 0 && sled_agent.disk_count().await == 0 } pub(crate) async fn verify_clean_slate( diff --git a/nexus/src/app/sagas/instance_ip_attach.rs b/nexus/src/app/sagas/instance_ip_attach.rs index a14054cf66..e6fb8654ea 100644 --- a/nexus/src/app/sagas/instance_ip_attach.rs +++ b/nexus/src/app/sagas/instance_ip_attach.rs @@ -5,7 +5,7 @@ use super::instance_common::{ instance_ip_add_nat, instance_ip_add_opte, instance_ip_get_instance_state, instance_ip_move_state, instance_ip_remove_opte, ExternalIpAttach, - ModifyStateForExternalIp, + ModifyStateForExternalIp, VmmAndSledIds, }; use super::{ActionRegistry, NexusActionContext, NexusSaga}; use crate::app::sagas::declare_saga_actions; @@ -13,7 +13,7 @@ use crate::app::{authn, authz}; use nexus_db_model::{IpAttachState, Ipv4NatEntry}; use nexus_types::external_api::views; use omicron_common::api::external::Error; -use omicron_uuid_kinds::{GenericUuid, InstanceUuid, SledUuid}; +use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; use serde::Deserialize; use serde::Serialize; use steno::ActionError; @@ -161,7 +161,7 @@ async fn siia_begin_attach_ip_undo( async fn siia_get_instance_state( sagactx: NexusActionContext, -) -> Result, ActionError> { +) -> Result, ActionError> { let params = sagactx.saga_params::()?; instance_ip_get_instance_state( &sagactx, @@ -177,7 +177,10 @@ async fn siia_nat( sagactx: NexusActionContext, ) -> Result, ActionError> { let params = sagactx.saga_params::()?; - let sled_id = sagactx.lookup::>("instance_state")?; + let sled_id = sagactx + .lookup::>("instance_state")? + .map(|ids| ids.sled_id); + let target_ip = sagactx.lookup::("target_ip")?; instance_ip_add_nat( &sagactx, @@ -245,28 +248,18 @@ async fn siia_nat_undo( async fn siia_update_opte( sagactx: NexusActionContext, ) -> Result<(), ActionError> { - let params = sagactx.saga_params::()?; - let sled_id = sagactx.lookup::>("instance_state")?; + let ids = sagactx.lookup::>("instance_state")?; let target_ip = sagactx.lookup::("target_ip")?; - instance_ip_add_opte(&sagactx, ¶ms.authz_instance, sled_id, target_ip) - .await + instance_ip_add_opte(&sagactx, ids, target_ip).await } async fn siia_update_opte_undo( sagactx: NexusActionContext, ) -> Result<(), anyhow::Error> { let log = sagactx.user_data().log(); - let params = sagactx.saga_params::()?; - let sled_id = sagactx.lookup::>("instance_state")?; + let ids = sagactx.lookup::>("instance_state")?; let target_ip = sagactx.lookup::("target_ip")?; - if let Err(e) = instance_ip_remove_opte( - &sagactx, - ¶ms.authz_instance, - sled_id, - target_ip, - ) - .await - { + if let Err(e) = instance_ip_remove_opte(&sagactx, ids, target_ip).await { error!(log, "siia_update_opte_undo: failed to notify sled-agent: {e}"); } Ok(()) @@ -436,8 +429,14 @@ pub(crate) mod test { } // Sled agent has a record of the new external IPs. + let VmmAndSledIds { vmm_id, .. } = + crate::app::sagas::test_helpers::instance_fetch_vmm_and_sled_ids( + cptestctx, + &instance_id, + ) + .await; let mut eips = sled_agent.external_ips.lock().await; - let my_eips = eips.entry(instance_id.into_untyped_uuid()).or_default(); + let my_eips = eips.entry(vmm_id).or_default(); assert!(my_eips .iter() .any(|v| matches!(v, InstanceExternalIpBody::Floating(_)))); @@ -458,7 +457,7 @@ pub(crate) mod test { pub(crate) async fn verify_clean_slate( cptestctx: &ControlPlaneTestContext, - instance_id: Uuid, + instance_id: InstanceUuid, ) { use nexus_db_queries::db::schema::external_ip::dsl; @@ -471,7 +470,7 @@ pub(crate) mod test { assert!(dsl::external_ip .filter(dsl::kind.eq(IpKind::Floating)) .filter(dsl::time_deleted.is_null()) - .filter(dsl::parent_id.eq(instance_id)) + .filter(dsl::parent_id.eq(instance_id.into_untyped_uuid())) .filter(dsl::state.ne(IpAttachState::Detached)) .select(ExternalIp::as_select()) .first_async::(&*conn) @@ -492,8 +491,14 @@ pub(crate) mod test { .is_none()); // No IP bindings remain on sled-agent. + let VmmAndSledIds { vmm_id, .. } = + crate::app::sagas::test_helpers::instance_fetch_vmm_and_sled_ids( + cptestctx, + &instance_id, + ) + .await; let mut eips = sled_agent.external_ips.lock().await; - let my_eips = eips.entry(instance_id).or_default(); + let my_eips = eips.entry(vmm_id).or_default(); assert!(my_eips.is_empty()); } @@ -512,9 +517,10 @@ pub(crate) mod test { let instance = create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + let instance_id = InstanceUuid::from_untyped_uuid(instance.identity.id); crate::app::sagas::test_helpers::instance_simulate( cptestctx, - &InstanceUuid::from_untyped_uuid(instance.identity.id), + &instance_id, ) .await; @@ -522,7 +528,7 @@ pub(crate) mod test { test_helpers::action_failure_can_unwind::( nexus, || Box::pin(new_test_params(&opctx, datastore, use_float) ), - || Box::pin(verify_clean_slate(&cptestctx, instance.id())), + || Box::pin(verify_clean_slate(&cptestctx, instance_id)), log, ) .await; @@ -544,9 +550,10 @@ pub(crate) mod test { let instance = create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + let instance_id = InstanceUuid::from_untyped_uuid(instance.identity.id); crate::app::sagas::test_helpers::instance_simulate( cptestctx, - &InstanceUuid::from_untyped_uuid(instance.identity.id), + &instance_id, ) .await; @@ -558,7 +565,7 @@ pub(crate) mod test { >( nexus, || Box::pin(new_test_params(&opctx, datastore, use_float)), - || Box::pin(verify_clean_slate(&cptestctx, instance.id())), + || Box::pin(verify_clean_slate(&cptestctx, instance_id)), log, ) .await; diff --git a/nexus/src/app/sagas/instance_ip_detach.rs b/nexus/src/app/sagas/instance_ip_detach.rs index a5b51ce375..d9da9fc05c 100644 --- a/nexus/src/app/sagas/instance_ip_detach.rs +++ b/nexus/src/app/sagas/instance_ip_detach.rs @@ -5,7 +5,7 @@ use super::instance_common::{ instance_ip_add_nat, instance_ip_add_opte, instance_ip_get_instance_state, instance_ip_move_state, instance_ip_remove_nat, instance_ip_remove_opte, - ModifyStateForExternalIp, + ModifyStateForExternalIp, VmmAndSledIds, }; use super::{ActionRegistry, NexusActionContext, NexusSaga}; use crate::app::sagas::declare_saga_actions; @@ -15,7 +15,7 @@ use nexus_db_model::IpAttachState; use nexus_db_queries::db::lookup::LookupPath; use nexus_types::external_api::views; use omicron_common::api::external::NameOrId; -use omicron_uuid_kinds::{GenericUuid, InstanceUuid, SledUuid}; +use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; use ref_cast::RefCast; use serde::Deserialize; use serde::Serialize; @@ -155,7 +155,7 @@ async fn siid_begin_detach_ip_undo( async fn siid_get_instance_state( sagactx: NexusActionContext, -) -> Result, ActionError> { +) -> Result, ActionError> { let params = sagactx.saga_params::()?; instance_ip_get_instance_state( &sagactx, @@ -168,7 +168,9 @@ async fn siid_get_instance_state( async fn siid_nat(sagactx: NexusActionContext) -> Result<(), ActionError> { let params = sagactx.saga_params::()?; - let sled_id = sagactx.lookup::>("instance_state")?; + let sled_id = sagactx + .lookup::>("instance_state")? + .map(|ids| ids.sled_id); let target_ip = sagactx.lookup::("target_ip")?; instance_ip_remove_nat( &sagactx, @@ -184,7 +186,9 @@ async fn siid_nat_undo( ) -> Result<(), anyhow::Error> { let log = sagactx.user_data().log(); let params = sagactx.saga_params::()?; - let sled_id = sagactx.lookup::>("instance_state")?; + let sled_id = sagactx + .lookup::>("instance_state")? + .map(|ids| ids.sled_id); let target_ip = sagactx.lookup::("target_ip")?; if let Err(e) = instance_ip_add_nat( &sagactx, @@ -204,33 +208,18 @@ async fn siid_nat_undo( async fn siid_update_opte( sagactx: NexusActionContext, ) -> Result<(), ActionError> { - let params = sagactx.saga_params::()?; - let sled_id = sagactx.lookup::>("instance_state")?; + let ids = sagactx.lookup::>("instance_state")?; let target_ip = sagactx.lookup::("target_ip")?; - instance_ip_remove_opte( - &sagactx, - ¶ms.authz_instance, - sled_id, - target_ip, - ) - .await + instance_ip_remove_opte(&sagactx, ids, target_ip).await } async fn siid_update_opte_undo( sagactx: NexusActionContext, ) -> Result<(), anyhow::Error> { let log = sagactx.user_data().log(); - let params = sagactx.saga_params::()?; - let sled_id = sagactx.lookup::>("instance_state")?; + let ids = sagactx.lookup::>("instance_state")?; let target_ip = sagactx.lookup::("target_ip")?; - if let Err(e) = instance_ip_add_opte( - &sagactx, - ¶ms.authz_instance, - sled_id, - target_ip, - ) - .await - { + if let Err(e) = instance_ip_add_opte(&sagactx, ids, target_ip).await { error!(log, "siid_update_opte_undo: failed to notify sled-agent: {e}"); } Ok(()) @@ -410,8 +399,14 @@ pub(crate) mod test { } // Sled agent has removed its records of the external IPs. + let VmmAndSledIds { vmm_id, .. } = + crate::app::sagas::test_helpers::instance_fetch_vmm_and_sled_ids( + cptestctx, + &instance_id, + ) + .await; let mut eips = sled_agent.external_ips.lock().await; - let my_eips = eips.entry(instance_id.into_untyped_uuid()).or_default(); + let my_eips = eips.entry(vmm_id).or_default(); assert!(my_eips.is_empty()); // DB only has record for SNAT. diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index 19bef2f046..24d11fcae2 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -437,20 +437,10 @@ async fn sim_ensure_destination_propolis_undo( sagactx: NexusActionContext, ) -> Result<(), anyhow::Error> { let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - + let dst_propolis_id = sagactx.lookup::("dst_propolis_id")?; let dst_sled_id = sagactx.lookup::("dst_sled_id")?; let db_instance = sagactx.lookup::("set_migration_ids")?; - let (.., authz_instance) = LookupPath::new(&opctx, &osagactx.datastore()) - .instance_id(db_instance.id()) - .lookup_for(authz::Action::Modify) - .await - .map_err(ActionError::action_failed)?; info!(osagactx.log(), "unregistering destination vmm for migration unwind"; "instance_id" => %db_instance.id(), @@ -465,7 +455,7 @@ async fn sim_ensure_destination_propolis_undo( // needed. match osagactx .nexus() - .instance_ensure_unregistered(&opctx, &authz_instance, &dst_sled_id) + .instance_ensure_unregistered(&dst_propolis_id, &dst_sled_id) .await { Ok(_) => Ok(()), @@ -500,12 +490,6 @@ async fn sim_instance_migrate( let src_propolis_id = db_instance.runtime().propolis_id.unwrap(); let dst_vmm = sagactx.lookup::("dst_vmm_record")?; - let (.., authz_instance) = LookupPath::new(&opctx, &osagactx.datastore()) - .instance_id(db_instance.id()) - .lookup_for(authz::Action::Modify) - .await - .map_err(ActionError::action_failed)?; - info!(osagactx.log(), "initiating migration from destination sled"; "instance_id" => %db_instance.id(), "dst_vmm_record" => ?dst_vmm, @@ -529,7 +513,6 @@ async fn sim_instance_migrate( .nexus() .instance_request_state( &opctx, - &authz_instance, &db_instance, &Some(dst_vmm), InstanceStateChangeRequest::Migrate( diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index 55fc312ae7..b6b78bd43c 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -538,6 +538,7 @@ async fn sis_ensure_registered_undo( let params = sagactx.saga_params::()?; let datastore = osagactx.datastore(); let instance_id = InstanceUuid::from_untyped_uuid(params.db_instance.id()); + let propolis_id = sagactx.lookup::("propolis_id")?; let sled_id = sagactx.lookup::("sled_id")?; let opctx = crate::context::op_context_for_saga_action( &sagactx, @@ -546,11 +547,12 @@ async fn sis_ensure_registered_undo( info!(osagactx.log(), "start saga: unregistering instance from sled"; "instance_id" => %instance_id, + "propolis_id" => %propolis_id, "sled_id" => %sled_id); // Fetch the latest record so that this callee can drive the instance into // a Failed state if the unregister call fails. - let (.., authz_instance, db_instance) = LookupPath::new(&opctx, &datastore) + let (.., db_instance) = LookupPath::new(&opctx, &datastore) .instance_id(instance_id.into_untyped_uuid()) .fetch() .await @@ -563,7 +565,7 @@ async fn sis_ensure_registered_undo( // returned. if let Err(e) = osagactx .nexus() - .instance_ensure_unregistered(&opctx, &authz_instance, &sled_id) + .instance_ensure_unregistered(&propolis_id, &sled_id) .await { error!(osagactx.log(), @@ -644,7 +646,6 @@ async fn sis_ensure_running( ) -> Result<(), ActionError> { let osagactx = sagactx.user_data(); let params = sagactx.saga_params::()?; - let datastore = osagactx.datastore(); let opctx = crate::context::op_context_for_saga_action( &sagactx, ¶ms.serialized_authn, @@ -659,17 +660,10 @@ async fn sis_ensure_running( "instance_id" => %instance_id, "sled_id" => %sled_id); - let (.., authz_instance) = LookupPath::new(&opctx, &datastore) - .instance_id(instance_id.into_untyped_uuid()) - .lookup_for(authz::Action::Modify) - .await - .map_err(ActionError::action_failed)?; - match osagactx .nexus() .instance_request_state( &opctx, - &authz_instance, &db_instance, &Some(db_vmm), crate::app::instance::InstanceStateChangeRequest::Run, diff --git a/nexus/src/app/sagas/instance_update/mod.rs b/nexus/src/app/sagas/instance_update/mod.rs index 5f226480b8..4c4c4deff2 100644 --- a/nexus/src/app/sagas/instance_update/mod.rs +++ b/nexus/src/app/sagas/instance_update/mod.rs @@ -30,10 +30,9 @@ //! Nexus' `cpapi_instances_put` internal API endpoint, when a Nexus' //! `instance-watcher` background task *pulls* instance states from sled-agents //! periodically, or as the return value of an API call from Nexus to a -//! sled-agent. When a Nexus receives a new [`SledInstanceState`] from a -//! sled-agent through any of these mechanisms, the Nexus will write any changed -//! state to the `vmm` and/or `migration` tables directly on behalf of the -//! sled-agent. +//! sled-agent. When a Nexus receives a new [`SledVmmState`] from a sled-agent +//! through any of these mechanisms, the Nexus will write any changed state to +//! the `vmm` and/or `migration` tables directly on behalf of the sled-agent. //! //! Although Nexus is technically the party responsible for the database query //! that writes VMM and migration state updates received from sled-agent, it is @@ -236,9 +235,9 @@ //! updates is perhaps the simplest one: _avoiding unnecessary update sagas_. //! The `cpapi_instances_put` API endpoint and instance-watcher background tasks //! handle changes to VMM and migration states by calling the -//! [`notify_instance_updated`] method, which writes the new states to the -//! database and (potentially) starts an update saga. Naively, this method would -//! *always* start an update saga, but remember that --- as we discussed +//! [`process_vmm_update`] method, which writes the new states to the database +//! and (potentially) starts an update saga. Naively, this method would *always* +//! start an update saga, but remember that --- as we discussed //! [above](#background) --- many VMM/migration state changes don't actually //! require modifying the instance record. For example, if an instance's VMM //! transitions from [`VmmState::Starting`] to [`VmmState::Running`], that @@ -271,7 +270,7 @@ //! delayed. To improve the timeliness of update sagas, we will also explicitly //! activate the background task at any point where we know that an update saga //! *should* run but we were not able to run it. If an update saga cannot be -//! started, whether by [`notify_instance_updated`], a `start-instance-update` +//! started, whether by [`notify_vmm_updated`], a `start-instance-update` //! saga attempting to start its real saga, or an `instance-update` saga //! chaining into a new one as its last action, the `instance-watcher` //! background task is activated. Similarly, when a `start-instance-update` saga @@ -326,7 +325,8 @@ //! crate::app::db::datastore::DataStore::instance_updater_inherit_lock //! [instance_updater_unlock]: //! crate::app::db::datastore::DataStore::instance_updater_unlock -//! [`notify_instance_updated`]: crate::app::Nexus::notify_instance_updated +//! [`notify_vmm_updated`]: crate::app::Nexus::notify_vmm_updated +//! [`process_vmm_update`]: crate::app::instance::process_vmm_update //! //! [dist-locking]: //! https://martin.kleppmann.com/2016/02/08/how-to-do-distributed-locking.html @@ -362,7 +362,7 @@ use nexus_db_queries::{authn, authz}; use nexus_types::identity::Resource; use omicron_common::api::external::Error; use omicron_common::api::internal::nexus; -use omicron_common::api::internal::nexus::SledInstanceState; +use omicron_common::api::internal::nexus::SledVmmState; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::InstanceUuid; use omicron_uuid_kinds::PropolisUuid; @@ -388,8 +388,8 @@ pub(crate) use self::start::{Params, SagaInstanceUpdate}; mod destroyed; /// Returns `true` if an `instance-update` saga should be executed as a result -/// of writing the provided [`SledInstanceState`] to the database with the -/// provided [`VmmStateUpdateResult`]. +/// of writing the provided [`SledVmmState`] to the database with the provided +/// [`VmmStateUpdateResult`]. /// /// We determine this only after actually updating the database records, /// because we don't know whether a particular VMM or migration state is @@ -407,8 +407,8 @@ mod destroyed; /// VMM/migration states. pub fn update_saga_needed( log: &slog::Logger, - instance_id: InstanceUuid, - state: &SledInstanceState, + propolis_id: PropolisUuid, + state: &SledVmmState, result: &VmmStateUpdateResult, ) -> bool { // Currently, an instance-update saga is required if (and only if): @@ -443,8 +443,7 @@ pub fn update_saga_needed( debug!(log, "new VMM runtime state from sled agent requires an \ instance-update saga"; - "instance_id" => %instance_id, - "propolis_id" => %state.propolis_id, + "propolis_id" => %propolis_id, "vmm_needs_update" => vmm_needs_update, "migration_in_needs_update" => migration_in_needs_update, "migration_out_needs_update" => migration_out_needs_update, diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index eeb14091b2..540ab90e28 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -106,11 +106,12 @@ use nexus_db_queries::db::lookup::LookupPath; use omicron_common::api::external; use omicron_common::api::external::Error; use omicron_common::retry_until_known_result; +use omicron_uuid_kinds::{GenericUuid, PropolisUuid, SledUuid}; use rand::{rngs::StdRng, RngCore, SeedableRng}; use serde::Deserialize; use serde::Serialize; use sled_agent_client::types::CrucibleOpts; -use sled_agent_client::types::InstanceIssueDiskSnapshotRequestBody; +use sled_agent_client::types::VmmIssueDiskSnapshotRequestBody; use sled_agent_client::types::VolumeConstructionRequest; use slog::info; use std::collections::BTreeMap; @@ -826,39 +827,43 @@ async fn ssc_send_snapshot_request_to_sled_agent( .await .map_err(ActionError::action_failed)?; - let sled_id = osagactx + let instance_and_vmm = osagactx .datastore() .instance_fetch_with_vmm(&opctx, &authz_instance) .await - .map_err(ActionError::action_failed)? - .sled_id(); + .map_err(ActionError::action_failed)?; + + let vmm = instance_and_vmm.vmm(); // If this instance does not currently have a sled, we can't continue this // saga - the user will have to reissue the snapshot request and it will get // run on a Pantry. - let Some(sled_id) = sled_id else { + let Some((propolis_id, sled_id)) = + vmm.as_ref().map(|vmm| (vmm.id, vmm.sled_id)) + else { return Err(ActionError::action_failed(Error::unavail( - "sled id is None!", + "instance no longer has an active VMM!", ))); }; info!(log, "asking for disk snapshot from Propolis via sled agent"; "disk_id" => %params.disk_id, "instance_id" => %attach_instance_id, + "propolis_id" => %propolis_id, "sled_id" => %sled_id); let sled_agent_client = osagactx .nexus() - .sled_client(&sled_id) + .sled_client(&SledUuid::from_untyped_uuid(sled_id)) .await .map_err(ActionError::action_failed)?; retry_until_known_result(log, || async { sled_agent_client - .instance_issue_disk_snapshot_request( - &attach_instance_id, + .vmm_issue_disk_snapshot_request( + &PropolisUuid::from_untyped_uuid(propolis_id), ¶ms.disk_id, - &InstanceIssueDiskSnapshotRequestBody { snapshot_id }, + &VmmIssueDiskSnapshotRequestBody { snapshot_id }, ) .await }) @@ -2151,12 +2156,15 @@ mod test { .await .unwrap(); - let sled_id = instance_state - .sled_id() - .expect("starting instance should have a sled"); + let vmm_state = instance_state + .vmm() + .as_ref() + .expect("starting instance should have a vmm"); + let propolis_id = PropolisUuid::from_untyped_uuid(vmm_state.id); + let sled_id = SledUuid::from_untyped_uuid(vmm_state.sled_id); let sa = nexus.sled_client(&sled_id).await.unwrap(); + sa.vmm_finish_transition(propolis_id).await; - sa.instance_finish_transition(instance.identity.id).await; let instance_state = nexus .datastore() .instance_fetch_with_vmm(&opctx, &authz_instance) diff --git a/nexus/src/app/sagas/test_helpers.rs b/nexus/src/app/sagas/test_helpers.rs index b9388a1116..1572ba4330 100644 --- a/nexus/src/app/sagas/test_helpers.rs +++ b/nexus/src/app/sagas/test_helpers.rs @@ -5,11 +5,8 @@ //! Helper functions for writing saga undo tests and working with instances in //! saga tests. -use super::NexusSaga; -use crate::{ - app::{saga::create_saga_dag, test_interfaces::TestInterfaces as _}, - Nexus, -}; +use super::{instance_common::VmmAndSledIds, NexusSaga}; +use crate::{app::saga::create_saga_dag, Nexus}; use async_bb8_diesel::{AsyncRunQueryDsl, AsyncSimpleConnection}; use camino::Utf8Path; use diesel::{ @@ -137,13 +134,14 @@ pub(crate) async fn instance_simulate( info!(&cptestctx.logctx.log, "Poking simulated instance"; "instance_id" => %instance_id); let nexus = &cptestctx.server.server_context().nexus; + let VmmAndSledIds { vmm_id, sled_id } = + instance_fetch_vmm_and_sled_ids(cptestctx, instance_id).await; let sa = nexus - .instance_sled_by_id(instance_id) + .sled_client(&sled_id) .await - .unwrap() .expect("instance must be on a sled to simulate a state change"); - sa.instance_finish_transition(instance_id.into_untyped_uuid()).await; + sa.vmm_finish_transition(vmm_id).await; } pub(crate) async fn instance_single_step_on_sled( @@ -158,12 +156,14 @@ pub(crate) async fn instance_single_step_on_sled( "sled_id" => %sled_id, ); let nexus = &cptestctx.server.server_context().nexus; + let VmmAndSledIds { vmm_id, sled_id } = + instance_fetch_vmm_and_sled_ids(cptestctx, instance_id).await; let sa = nexus - .sled_client(sled_id) + .sled_client(&sled_id) .await - .expect("sled must exist to simulate a state change"); + .expect("instance must be on a sled to simulate a state change"); - sa.instance_single_step(instance_id.into_untyped_uuid()).await; + sa.vmm_single_step(vmm_id).await; } pub(crate) async fn instance_simulate_by_name( @@ -186,12 +186,14 @@ pub(crate) async fn instance_simulate_by_name( let instance_lookup = nexus.instance_lookup(&opctx, instance_selector).unwrap(); let (.., instance) = instance_lookup.fetch().await.unwrap(); + let instance_id = InstanceUuid::from_untyped_uuid(instance.id()); + let VmmAndSledIds { vmm_id, sled_id } = + instance_fetch_vmm_and_sled_ids(cptestctx, &instance_id).await; let sa = nexus - .instance_sled_by_id(&InstanceUuid::from_untyped_uuid(instance.id())) + .sled_client(&sled_id) .await - .unwrap() .expect("instance must be on a sled to simulate a state change"); - sa.instance_finish_transition(instance.id()).await; + sa.vmm_finish_transition(vmm_id).await; } pub async fn instance_fetch( @@ -218,6 +220,21 @@ pub async fn instance_fetch( db_state } +pub(super) async fn instance_fetch_vmm_and_sled_ids( + cptestctx: &ControlPlaneTestContext, + instance_id: &InstanceUuid, +) -> VmmAndSledIds { + let instance_and_vmm = instance_fetch(cptestctx, *instance_id).await; + let vmm = instance_and_vmm + .vmm() + .as_ref() + .expect("can only fetch VMM and sled IDs for an active instance"); + + let vmm_id = PropolisUuid::from_untyped_uuid(vmm.id); + let sled_id = SledUuid::from_untyped_uuid(vmm.sled_id); + VmmAndSledIds { vmm_id, sled_id } +} + pub async fn instance_fetch_all( cptestctx: &ControlPlaneTestContext, instance_id: InstanceUuid, diff --git a/nexus/src/app/snapshot.rs b/nexus/src/app/snapshot.rs index 040c9fc082..57b8edd1f0 100644 --- a/nexus/src/app/snapshot.rs +++ b/nexus/src/app/snapshot.rs @@ -109,7 +109,7 @@ impl super::Nexus { // If a Propolis _may_ exist, send the snapshot request there, // otherwise use the pantry. - !instance_state.vmm().is_some() + instance_state.vmm().is_none() } else { // This disk is not attached to an instance, use the pantry. true diff --git a/nexus/src/app/test_interfaces.rs b/nexus/src/app/test_interfaces.rs index adfafa523d..9852225e8c 100644 --- a/nexus/src/app/test_interfaces.rs +++ b/nexus/src/app/test_interfaces.rs @@ -6,8 +6,7 @@ use async_trait::async_trait; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::lookup::LookupPath; use omicron_common::api::external::Error; -use omicron_uuid_kinds::GenericUuid; -use omicron_uuid_kinds::{InstanceUuid, SledUuid}; +use omicron_uuid_kinds::{GenericUuid, InstanceUuid, PropolisUuid, SledUuid}; use sled_agent_client::Client as SledAgentClient; use std::sync::Arc; use uuid::Uuid; @@ -19,25 +18,47 @@ pub use super::update::SpUpdater; pub use super::update::UpdateProgress; pub use gateway_client::types::SpType; +/// The information needed to talk to a sled agent about an instance that is +/// active on that sled. +pub struct InstanceSledAgentInfo { + /// The ID of the Propolis job to send to sled agent. + pub propolis_id: PropolisUuid, + + /// The ID of the sled where the Propolis job is running. + pub sled_id: SledUuid, + + /// A client for talking to the Propolis's host sled. + pub sled_client: Arc, + + /// The ID of the instance's migration target Propolis, if it has one. + pub dst_propolis_id: Option, +} + /// Exposes additional [`super::Nexus`] interfaces for use by the test suite #[async_trait] pub trait TestInterfaces { /// Access the Rack ID of the currently executing Nexus. fn rack_id(&self) -> Uuid; - /// Returns the SledAgentClient for an Instance from its id. We may also - /// want to split this up into instance_lookup_by_id() and instance_sled(), - /// but after all it's a test suite special to begin with. - async fn instance_sled_by_id( + /// Attempts to obtain the Propolis ID and sled agent information for an + /// instance. + /// + /// # Arguments + /// + /// - `id`: The ID of the instance of interest. + /// - `opctx`: An optional operation context to use for authorization + /// checks. If `None`, this routine supplies the default test opctx. + /// + /// # Return value + /// + /// - `Ok(Some(info))` if the instance has an active Propolis. + /// - `Ok(None)` if the instance has no active Propolis. + /// - `Err` if an error occurred. + async fn active_instance_info( &self, id: &InstanceUuid, - ) -> Result>, Error>; - - async fn instance_sled_by_id_with_opctx( - &self, - id: &InstanceUuid, - opctx: &OpContext, - ) -> Result>, Error>; + opctx: Option<&OpContext>, + ) -> Result, Error>; /// Returns the SledAgentClient for the sled running an instance to which a /// disk is attached. @@ -46,18 +67,6 @@ pub trait TestInterfaces { id: &Uuid, ) -> Result>, Error>; - /// Returns the supplied instance's current active sled ID. - async fn instance_sled_id( - &self, - instance_id: &InstanceUuid, - ) -> Result, Error>; - - async fn instance_sled_id_with_opctx( - &self, - instance_id: &InstanceUuid, - opctx: &OpContext, - ) -> Result, Error>; - async fn set_disk_as_faulted(&self, disk_id: &Uuid) -> Result; fn set_samael_max_issue_delay(&self, max_issue_delay: chrono::Duration); @@ -69,30 +78,49 @@ impl TestInterfaces for super::Nexus { self.rack_id } - async fn instance_sled_by_id( + async fn active_instance_info( &self, id: &InstanceUuid, - ) -> Result>, Error> { - let opctx = OpContext::for_tests( - self.log.new(o!()), - Arc::clone(&self.db_datastore) - as Arc, - ); + opctx: Option<&OpContext>, + ) -> Result, Error> { + let local_opctx; + let opctx = match opctx { + Some(o) => o, + None => { + local_opctx = OpContext::for_tests( + self.log.new(o!()), + Arc::clone(&self.db_datastore) + as Arc, + ); + &local_opctx + } + }; - self.instance_sled_by_id_with_opctx(id, &opctx).await - } + let (.., authz_instance) = LookupPath::new(&opctx, &self.db_datastore) + .instance_id(id.into_untyped_uuid()) + .lookup_for(nexus_db_queries::authz::Action::Read) + .await?; - async fn instance_sled_by_id_with_opctx( - &self, - id: &InstanceUuid, - opctx: &OpContext, - ) -> Result>, Error> { - let sled_id = self.instance_sled_id_with_opctx(id, opctx).await?; - if let Some(sled_id) = sled_id { - Ok(Some(self.sled_client(&sled_id).await?)) - } else { - Ok(None) - } + let state = self + .datastore() + .instance_fetch_with_vmm(opctx, &authz_instance) + .await?; + + let Some(vmm) = state.vmm() else { + return Ok(None); + }; + + let sled_id = SledUuid::from_untyped_uuid(vmm.sled_id); + Ok(Some(InstanceSledAgentInfo { + propolis_id: PropolisUuid::from_untyped_uuid(vmm.id), + sled_id, + sled_client: self.sled_client(&sled_id).await?, + dst_propolis_id: state + .instance() + .runtime() + .dst_propolis_id + .map(PropolisUuid::from_untyped_uuid), + })) } async fn disk_sled_by_id( @@ -112,37 +140,11 @@ impl TestInterfaces for super::Nexus { let instance_id = InstanceUuid::from_untyped_uuid( db_disk.runtime().attach_instance_id.unwrap(), ); - self.instance_sled_by_id(&instance_id).await - } - - async fn instance_sled_id( - &self, - id: &InstanceUuid, - ) -> Result, Error> { - let opctx = OpContext::for_tests( - self.log.new(o!()), - Arc::clone(&self.db_datastore) - as Arc, - ); - - self.instance_sled_id_with_opctx(id, &opctx).await - } - - async fn instance_sled_id_with_opctx( - &self, - id: &InstanceUuid, - opctx: &OpContext, - ) -> Result, Error> { - let (.., authz_instance) = LookupPath::new(&opctx, &self.db_datastore) - .instance_id(id.into_untyped_uuid()) - .lookup_for(nexus_db_queries::authz::Action::Read) - .await?; Ok(self - .datastore() - .instance_fetch_with_vmm(opctx, &authz_instance) + .active_instance_info(&instance_id, Some(&opctx)) .await? - .sled_id()) + .map(|info| info.sled_client)) } async fn set_disk_as_faulted(&self, disk_id: &Uuid) -> Result { diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index 9965b6e21e..66a8090f11 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -52,7 +52,7 @@ use omicron_common::api::internal::nexus::ProducerRegistrationResponse; use omicron_common::api::internal::nexus::RepairFinishInfo; use omicron_common::api::internal::nexus::RepairProgress; use omicron_common::api::internal::nexus::RepairStartInfo; -use omicron_common::api::internal::nexus::SledInstanceState; +use omicron_common::api::internal::nexus::SledVmmState; use omicron_common::update::ArtifactId; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::InstanceUuid; @@ -168,8 +168,8 @@ impl NexusInternalApi for NexusInternalApiImpl { async fn cpapi_instances_put( rqctx: RequestContext, - path_params: Path, - new_runtime_state: TypedBody, + path_params: Path, + new_runtime_state: TypedBody, ) -> Result { let apictx = &rqctx.context().context; let nexus = &apictx.nexus; @@ -178,11 +178,7 @@ impl NexusInternalApi for NexusInternalApiImpl { let opctx = crate::context::op_context_for_internal_api(&rqctx).await; let handler = async { nexus - .notify_instance_updated( - &opctx, - InstanceUuid::from_untyped_uuid(path.instance_id), - &new_state, - ) + .notify_vmm_updated(&opctx, path.propolis_id, &new_state) .await?; Ok(HttpResponseUpdatedNoContent()) }; diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs index 234ab5f382..fe6aab2770 100644 --- a/nexus/tests/integration_tests/disks.rs +++ b/nexus/tests/integration_tests/disks.rs @@ -188,12 +188,13 @@ async fn set_instance_state( } async fn instance_simulate(nexus: &Arc, id: &InstanceUuid) { - let sa = nexus - .instance_sled_by_id(id) + let info = nexus + .active_instance_info(id, None) .await .unwrap() .expect("instance must be on a sled to simulate a state change"); - sa.instance_finish_transition(id.into_untyped_uuid()).await; + + info.sled_client.vmm_finish_transition(info.propolis_id).await; } #[nexus_test] diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index eb3c88eb38..a7228e0841 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -780,12 +780,13 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { let instance_next = instance_get(&client, &instance_url).await; assert_eq!(instance_next.runtime.run_state, InstanceState::Running); - let original_sled = nexus - .instance_sled_id(&instance_id) + let sled_info = nexus + .active_instance_info(&instance_id, None) .await .unwrap() .expect("running instance should have a sled"); + let original_sled = sled_info.sled_id; let dst_sled_id = if original_sled == default_sled_id { other_sled_id } else { @@ -808,12 +809,13 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { .parsed_body::() .unwrap(); - let current_sled = nexus - .instance_sled_id(&instance_id) + let new_sled_info = nexus + .active_instance_info(&instance_id, None) .await .unwrap() .expect("running instance should have a sled"); + let current_sled = new_sled_info.sled_id; assert_eq!(current_sled, original_sled); // Ensure that both sled agents report that the migration is in progress. @@ -840,6 +842,15 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { assert_eq!(migration.target_state, MigrationState::Pending.into()); assert_eq!(migration.source_state, MigrationState::Pending.into()); + let info = nexus + .active_instance_info(&instance_id, None) + .await + .unwrap() + .expect("instance should be on a sled"); + let src_propolis_id = info.propolis_id; + let dst_propolis_id = + info.dst_propolis_id.expect("instance should have a migration target"); + // Simulate the migration. We will use `instance_single_step_on_sled` to // single-step both sled-agents through the migration state machine and // ensure that the migration state looks nice at each step. @@ -847,15 +858,15 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { cptestctx, nexus, original_sled, - instance_id, + src_propolis_id, migration_id, ) .await; // Move source to "migrating". - instance_single_step_on_sled(cptestctx, nexus, original_sled, instance_id) + vmm_single_step_on_sled(cptestctx, nexus, original_sled, src_propolis_id) .await; - instance_single_step_on_sled(cptestctx, nexus, original_sled, instance_id) + vmm_single_step_on_sled(cptestctx, nexus, original_sled, src_propolis_id) .await; let migration = dbg!(migration_fetch(cptestctx, migration_id).await); @@ -865,9 +876,9 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { assert_eq!(instance.runtime.run_state, InstanceState::Migrating); // Move target to "migrating". - instance_single_step_on_sled(cptestctx, nexus, dst_sled_id, instance_id) + vmm_single_step_on_sled(cptestctx, nexus, dst_sled_id, dst_propolis_id) .await; - instance_single_step_on_sled(cptestctx, nexus, dst_sled_id, instance_id) + vmm_single_step_on_sled(cptestctx, nexus, dst_sled_id, dst_propolis_id) .await; let migration = dbg!(migration_fetch(cptestctx, migration_id).await); @@ -877,7 +888,7 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { assert_eq!(instance.runtime.run_state, InstanceState::Migrating); // Move the source to "completed" - instance_simulate_on_sled(cptestctx, nexus, original_sled, instance_id) + vmm_simulate_on_sled(cptestctx, nexus, original_sled, src_propolis_id) .await; let migration = dbg!(migration_fetch(cptestctx, migration_id).await); @@ -887,15 +898,16 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { assert_eq!(instance.runtime.run_state, InstanceState::Migrating); // Move the target to "completed". - instance_simulate_on_sled(cptestctx, nexus, dst_sled_id, instance_id).await; + vmm_simulate_on_sled(cptestctx, nexus, dst_sled_id, dst_propolis_id).await; instance_wait_for_state(&client, instance_id, InstanceState::Running).await; let current_sled = nexus - .instance_sled_id(&instance_id) + .active_instance_info(&instance_id, None) .await .unwrap() - .expect("migrated instance should still have a sled"); + .expect("migrated instance should still have a sled") + .sled_id; assert_eq!(current_sled, dst_sled_id); @@ -978,11 +990,13 @@ async fn test_instance_migrate_v2p_and_routes( .derive_guest_network_interface_info(&opctx, &authz_instance) .await .unwrap(); + let original_sled_id = nexus - .instance_sled_id(&instance_id) + .active_instance_info(&instance_id, None) .await .unwrap() - .expect("running instance should have a sled"); + .expect("running instance should have a sled") + .sled_id; let mut sled_agents = vec![cptestctx.sled_agent.sled_agent.clone()]; sled_agents.extend(other_sleds.iter().map(|tup| tup.1.sled_agent.clone())); @@ -1035,25 +1049,35 @@ async fn test_instance_migrate_v2p_and_routes( .expect("since we've started a migration, the instance record must have a migration id!") }; + let info = nexus + .active_instance_info(&instance_id, None) + .await + .unwrap() + .expect("instance should be on a sled"); + let src_propolis_id = info.propolis_id; + let dst_propolis_id = + info.dst_propolis_id.expect("instance should have a migration target"); + // Tell both sled-agents to pretend to do the migration. instance_simulate_migration_source( cptestctx, nexus, original_sled_id, - instance_id, + src_propolis_id, migration_id, ) .await; - instance_simulate_on_sled(cptestctx, nexus, original_sled_id, instance_id) + vmm_simulate_on_sled(cptestctx, nexus, original_sled_id, src_propolis_id) .await; - instance_simulate_on_sled(cptestctx, nexus, dst_sled_id, instance_id).await; + vmm_simulate_on_sled(cptestctx, nexus, dst_sled_id, dst_propolis_id).await; instance_wait_for_state(&client, instance_id, InstanceState::Running).await; let current_sled = nexus - .instance_sled_id(&instance_id) + .active_instance_info(&instance_id, None) .await .unwrap() - .expect("migrated instance should have a sled"); + .expect("migrated instance should have a sled") + .sled_id; assert_eq!(current_sled, dst_sled_id); for sled_agent in &sled_agents { @@ -1373,10 +1397,11 @@ async fn test_instance_metrics_with_migration( // Request migration to the other sled. This reserves resources on the // target sled, but shouldn't change the virtual provisioning counters. let original_sled = nexus - .instance_sled_id(&instance_id) + .active_instance_info(&instance_id, None) .await .unwrap() - .expect("running instance should have a sled"); + .expect("running instance should have a sled") + .sled_id; let dst_sled_id = if original_sled == default_sled_id { other_sled_id @@ -1420,6 +1445,15 @@ async fn test_instance_metrics_with_migration( .expect("since we've started a migration, the instance record must have a migration id!") }; + let info = nexus + .active_instance_info(&instance_id, None) + .await + .unwrap() + .expect("instance should be on a sled"); + let src_propolis_id = info.propolis_id; + let dst_propolis_id = + info.dst_propolis_id.expect("instance should have a migration target"); + // Wait for the instance to be in the `Migrating` state. Otherwise, the // subsequent `instance_wait_for_state(..., Running)` may see the `Running` // state from the *old* VMM, rather than waiting for the migration to @@ -1428,13 +1462,13 @@ async fn test_instance_metrics_with_migration( cptestctx, nexus, original_sled, - instance_id, + src_propolis_id, migration_id, ) .await; - instance_single_step_on_sled(cptestctx, nexus, original_sled, instance_id) + vmm_single_step_on_sled(cptestctx, nexus, original_sled, src_propolis_id) .await; - instance_single_step_on_sled(cptestctx, nexus, dst_sled_id, instance_id) + vmm_single_step_on_sled(cptestctx, nexus, dst_sled_id, dst_propolis_id) .await; instance_wait_for_state(&client, instance_id, InstanceState::Migrating) .await; @@ -1444,9 +1478,9 @@ async fn test_instance_metrics_with_migration( // Complete migration on the target. Simulated migrations always succeed. // After this the instance should be running and should continue to appear // to be provisioned. - instance_simulate_on_sled(cptestctx, nexus, original_sled, instance_id) + vmm_simulate_on_sled(cptestctx, nexus, original_sled, src_propolis_id) .await; - instance_simulate_on_sled(cptestctx, nexus, dst_sled_id, instance_id).await; + vmm_simulate_on_sled(cptestctx, nexus, dst_sled_id, dst_propolis_id).await; instance_wait_for_state(&client, instance_id, InstanceState::Running).await; check_provisioning_state(4, 1).await; @@ -3337,10 +3371,11 @@ async fn test_disks_detached_when_instance_destroyed( let apictx = &cptestctx.server.server_context(); let nexus = &apictx.nexus; let sa = nexus - .instance_sled_by_id(&instance_id) + .active_instance_info(&instance_id, None) .await .unwrap() - .expect("instance should be on a sled while it's running"); + .expect("instance should be on a sled while it's running") + .sled_client; // Stop and delete instance instance_post(&client, instance_name, InstanceOp::Stop).await; @@ -5080,28 +5115,29 @@ pub async fn assert_sled_vpc_routes( /// instance, and then tell it to finish simulating whatever async transition is /// going on. pub async fn instance_simulate(nexus: &Arc, id: &InstanceUuid) { - let sa = nexus - .instance_sled_by_id(id) + let sled_info = nexus + .active_instance_info(id, None) .await .unwrap() .expect("instance must be on a sled to simulate a state change"); - sa.instance_finish_transition(id.into_untyped_uuid()).await; + + sled_info.sled_client.vmm_finish_transition(sled_info.propolis_id).await; } /// Simulate one step of an ongoing instance state transition. To do this, we /// have to look up the instance, then get the sled agent associated with that /// instance, and then tell it to finish simulating whatever async transition is /// going on. -async fn instance_single_step_on_sled( +async fn vmm_single_step_on_sled( cptestctx: &ControlPlaneTestContext, nexus: &Arc, sled_id: SledUuid, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, ) { info!(&cptestctx.logctx.log, "Single-stepping simulated instance on sled"; - "instance_id" => %instance_id, "sled_id" => %sled_id); + "propolis_id" => %propolis_id, "sled_id" => %sled_id); let sa = nexus.sled_client(&sled_id).await.unwrap(); - sa.instance_single_step(instance_id.into_untyped_uuid()).await; + sa.vmm_single_step(propolis_id).await; } pub async fn instance_simulate_with_opctx( @@ -5109,27 +5145,28 @@ pub async fn instance_simulate_with_opctx( id: &InstanceUuid, opctx: &OpContext, ) { - let sa = nexus - .instance_sled_by_id_with_opctx(id, opctx) + let sled_info = nexus + .active_instance_info(id, Some(opctx)) .await .unwrap() .expect("instance must be on a sled to simulate a state change"); - sa.instance_finish_transition(id.into_untyped_uuid()).await; + + sled_info.sled_client.vmm_finish_transition(sled_info.propolis_id).await; } /// Simulates state transitions for the incarnation of the instance on the /// supplied sled (which may not be the sled ID currently stored in the /// instance's CRDB record). -async fn instance_simulate_on_sled( +async fn vmm_simulate_on_sled( cptestctx: &ControlPlaneTestContext, nexus: &Arc, sled_id: SledUuid, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, ) { info!(&cptestctx.logctx.log, "Poking simulated instance on sled"; - "instance_id" => %instance_id, "sled_id" => %sled_id); + "propolis_id" => %propolis_id, "sled_id" => %sled_id); let sa = nexus.sled_client(&sled_id).await.unwrap(); - sa.instance_finish_transition(instance_id.into_untyped_uuid()).await; + sa.vmm_finish_transition(propolis_id).await; } /// Simulates a migration source for the provided instance ID, sled ID, and @@ -5138,19 +5175,19 @@ async fn instance_simulate_migration_source( cptestctx: &ControlPlaneTestContext, nexus: &Arc, sled_id: SledUuid, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, migration_id: Uuid, ) { info!( &cptestctx.logctx.log, "Simulating migration source sled"; - "instance_id" => %instance_id, + "propolis_id" => %propolis_id, "sled_id" => %sled_id, "migration_id" => %migration_id, ); let sa = nexus.sled_client(&sled_id).await.unwrap(); - sa.instance_simulate_migration_source( - instance_id.into_untyped_uuid(), + sa.vmm_simulate_migration_source( + propolis_id, sled_agent_client::SimulateMigrationSource { migration_id, result: sled_agent_client::SimulatedMigrationResult::Success, diff --git a/nexus/tests/integration_tests/ip_pools.rs b/nexus/tests/integration_tests/ip_pools.rs index e872cc6fe3..f56755d85c 100644 --- a/nexus/tests/integration_tests/ip_pools.rs +++ b/nexus/tests/integration_tests/ip_pools.rs @@ -1344,12 +1344,12 @@ async fn test_ip_range_delete_with_allocated_external_ip_fails( .expect("Failed to stop instance"); // Simulate the transition, wait until it is in fact stopped. - let sa = nexus - .instance_sled_by_id(&instance_id) + let info = nexus + .active_instance_info(&instance_id, None) .await .unwrap() .expect("running instance should be on a sled"); - sa.instance_finish_transition(instance.identity.id).await; + info.sled_client.vmm_finish_transition(info.propolis_id).await; instance_wait_for_state(client, instance_id, InstanceState::Stopped).await; // Delete the instance diff --git a/nexus/tests/integration_tests/pantry.rs b/nexus/tests/integration_tests/pantry.rs index d77ad49db6..22d35b01b5 100644 --- a/nexus/tests/integration_tests/pantry.rs +++ b/nexus/tests/integration_tests/pantry.rs @@ -88,12 +88,12 @@ async fn set_instance_state( } async fn instance_simulate(nexus: &Arc, id: &InstanceUuid) { - let sa = nexus - .instance_sled_by_id(id) + let info = nexus + .active_instance_info(id, None) .await .unwrap() .expect("instance must be on a sled to simulate a state change"); - sa.instance_finish_transition(id.into_untyped_uuid()).await; + info.sled_client.vmm_finish_transition(info.propolis_id).await; } async fn disk_get(client: &ClientTestContext, disk_url: &str) -> Disk { diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 111bd552d0..619a2187b5 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -746,44 +746,6 @@ } } }, - "/instances/{instance_id}": { - "put": { - "summary": "Report updated state for an instance.", - "operationId": "cpapi_instances_put", - "parameters": [ - { - "in": "path", - "name": "instance_id", - "required": true, - "schema": { - "type": "string", - "format": "uuid" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/SledInstanceState" - } - } - }, - "required": true - }, - "responses": { - "204": { - "description": "resource updated" - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - } - }, "/instances/{instance_id}/migrate": { "post": { "operationId": "instance_migrate", @@ -1470,6 +1432,43 @@ } } }, + "/vmms/{propolis_id}": { + "put": { + "summary": "Report updated state for a VMM.", + "operationId": "cpapi_instances_put", + "parameters": [ + { + "in": "path", + "name": "propolis_id", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForPropolisKind" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SledVmmState" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/volume/{volume_id}/remove-read-only-parent": { "post": { "summary": "Request removal of a read_only_parent from a volume.", @@ -5062,50 +5061,6 @@ "id" ] }, - "SledInstanceState": { - "description": "A wrapper type containing a sled's total knowledge of the state of a specific VMM and the instance it incarnates.", - "type": "object", - "properties": { - "migration_in": { - "nullable": true, - "description": "The current state of any inbound migration to this VMM.", - "allOf": [ - { - "$ref": "#/components/schemas/MigrationRuntimeState" - } - ] - }, - "migration_out": { - "nullable": true, - "description": "The state of any outbound migration from this VMM.", - "allOf": [ - { - "$ref": "#/components/schemas/MigrationRuntimeState" - } - ] - }, - "propolis_id": { - "description": "The ID of the VMM whose state is being reported.", - "allOf": [ - { - "$ref": "#/components/schemas/TypedUuidForPropolisKind" - } - ] - }, - "vmm_state": { - "description": "The most recent state of the sled's VMM process.", - "allOf": [ - { - "$ref": "#/components/schemas/VmmRuntimeState" - } - ] - } - }, - "required": [ - "propolis_id", - "vmm_state" - ] - }, "SledPolicy": { "description": "The operator-defined policy of a sled.", "oneOf": [ @@ -5220,6 +5175,41 @@ } ] }, + "SledVmmState": { + "description": "A wrapper type containing a sled's total knowledge of the state of a VMM.", + "type": "object", + "properties": { + "migration_in": { + "nullable": true, + "description": "The current state of any inbound migration to this VMM.", + "allOf": [ + { + "$ref": "#/components/schemas/MigrationRuntimeState" + } + ] + }, + "migration_out": { + "nullable": true, + "description": "The state of any outbound migration from this VMM.", + "allOf": [ + { + "$ref": "#/components/schemas/MigrationRuntimeState" + } + ] + }, + "vmm_state": { + "description": "The most recent state of the sled's VMM process.", + "allOf": [ + { + "$ref": "#/components/schemas/VmmRuntimeState" + } + ] + } + }, + "required": [ + "vmm_state" + ] + }, "SourceNatConfig": { "description": "An IP address and port range used for source NAT, i.e., making outbound network connections from guests or services.", "type": "object", @@ -5332,10 +5322,6 @@ "type": "string", "format": "uuid" }, - "TypedUuidForPropolisKind": { - "type": "string", - "format": "uuid" - }, "TypedUuidForSledKind": { "type": "string", "format": "uuid" @@ -5597,6 +5583,10 @@ ] } ] + }, + "TypedUuidForPropolisKind": { + "type": "string", + "format": "uuid" } }, "responses": { diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 4c40fb5da0..ec2a8bfc4d 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -220,36 +220,17 @@ } } }, - "/instances/{instance_id}": { - "put": { - "operationId": "instance_register", - "parameters": [ - { - "in": "path", - "name": "instance_id", - "required": true, - "schema": { - "$ref": "#/components/schemas/TypedUuidForInstanceKind" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/InstanceEnsureBody" - } - } - }, - "required": true - }, + "/inventory": { + "get": { + "summary": "Fetch basic information about this sled", + "operationId": "inventory", "responses": { "200": { "description": "successful operation", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/SledInstanceState" + "$ref": "#/components/schemas/Inventory" } } } @@ -261,26 +242,20 @@ "$ref": "#/components/responses/Error" } } - }, - "delete": { - "operationId": "instance_unregister", - "parameters": [ - { - "in": "path", - "name": "instance_id", - "required": true, - "schema": { - "$ref": "#/components/schemas/TypedUuidForInstanceKind" - } - } - ], + } + }, + "/network-bootstore-config": { + "get": { + "summary": "This API endpoint is only reading the local sled agent's view of the", + "description": "bootstore. The boostore is a distributed data store that is eventually consistent. Reads from individual nodes may not represent the latest state.", + "operationId": "read_network_bootstore_config_cache", "responses": { "200": { "description": "successful operation", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/InstanceUnregisterResponse" + "$ref": "#/components/schemas/EarlyNetworkConfig" } } } @@ -292,52 +267,22 @@ "$ref": "#/components/responses/Error" } } - } - }, - "/instances/{instance_id}/disks/{disk_id}/snapshot": { - "post": { - "summary": "Take a snapshot of a disk that is attached to an instance", - "operationId": "instance_issue_disk_snapshot_request", - "parameters": [ - { - "in": "path", - "name": "disk_id", - "required": true, - "schema": { - "type": "string", - "format": "uuid" - } - }, - { - "in": "path", - "name": "instance_id", - "required": true, - "schema": { - "type": "string", - "format": "uuid" - } - } - ], + }, + "put": { + "operationId": "write_network_bootstore_config", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/InstanceIssueDiskSnapshotRequestBody" + "$ref": "#/components/schemas/EarlyNetworkConfig" } } }, "required": true }, "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/InstanceIssueDiskSnapshotRequestResponse" - } - } - } + "204": { + "description": "resource updated" }, "4XX": { "$ref": "#/components/responses/Error" @@ -348,33 +293,20 @@ } } }, - "/instances/{instance_id}/external-ip": { - "put": { - "operationId": "instance_put_external_ip", - "parameters": [ - { - "in": "path", - "name": "instance_id", - "required": true, - "schema": { - "$ref": "#/components/schemas/TypedUuidForInstanceKind" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/InstanceExternalIpBody" + "/omicron-physical-disks": { + "get": { + "operationId": "omicron_physical_disks_get", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/OmicronPhysicalDisksConfig" + } } } }, - "required": true - }, - "responses": { - "204": { - "description": "resource updated" - }, "4XX": { "$ref": "#/components/responses/Error" }, @@ -383,31 +315,28 @@ } } }, - "delete": { - "operationId": "instance_delete_external_ip", - "parameters": [ - { - "in": "path", - "name": "instance_id", - "required": true, - "schema": { - "$ref": "#/components/schemas/TypedUuidForInstanceKind" - } - } - ], + "put": { + "operationId": "omicron_physical_disks_put", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/InstanceExternalIpBody" + "$ref": "#/components/schemas/OmicronPhysicalDisksConfig" } } }, "required": true }, "responses": { - "204": { - "description": "resource updated" + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DisksManagementResult" + } + } + } }, "4XX": { "$ref": "#/components/responses/Error" @@ -418,26 +347,16 @@ } } }, - "/instances/{instance_id}/state": { + "/omicron-zones": { "get": { - "operationId": "instance_get_state", - "parameters": [ - { - "in": "path", - "name": "instance_id", - "required": true, - "schema": { - "$ref": "#/components/schemas/TypedUuidForInstanceKind" - } - } - ], + "operationId": "omicron_zones_get", "responses": { "200": { "description": "successful operation", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/SledInstanceState" + "$ref": "#/components/schemas/OmicronZonesConfig" } } } @@ -451,37 +370,20 @@ } }, "put": { - "operationId": "instance_put_state", - "parameters": [ - { - "in": "path", - "name": "instance_id", - "required": true, - "schema": { - "$ref": "#/components/schemas/TypedUuidForInstanceKind" - } - } - ], + "operationId": "omicron_zones_put", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/InstancePutStateBody" + "$ref": "#/components/schemas/OmicronZonesConfig" } } }, "required": true }, "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/InstancePutStateResponse" - } - } - } + "204": { + "description": "resource updated" }, "4XX": { "$ref": "#/components/responses/Error" @@ -492,17 +394,17 @@ } } }, - "/inventory": { + "/sled-identifiers": { "get": { - "summary": "Fetch basic information about this sled", - "operationId": "inventory", + "summary": "Fetch sled identifiers", + "operationId": "sled_identifiers", "responses": { "200": { "description": "successful operation", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/Inventory" + "$ref": "#/components/schemas/SledIdentifiers" } } } @@ -516,18 +418,16 @@ } } }, - "/network-bootstore-config": { + "/sled-role": { "get": { - "summary": "This API endpoint is only reading the local sled agent's view of the", - "description": "bootstore. The boostore is a distributed data store that is eventually consistent. Reads from individual nodes may not represent the latest state.", - "operationId": "read_network_bootstore_config_cache", + "operationId": "sled_role_get", "responses": { "200": { "description": "successful operation", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/EarlyNetworkConfig" + "$ref": "#/components/schemas/SledRole" } } } @@ -539,14 +439,17 @@ "$ref": "#/components/responses/Error" } } - }, + } + }, + "/sleds": { "put": { - "operationId": "write_network_bootstore_config", + "summary": "Add a sled to a rack that was already initialized via RSS", + "operationId": "sled_add", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/EarlyNetworkConfig" + "$ref": "#/components/schemas/AddSledRequest" } } }, @@ -565,16 +468,42 @@ } } }, - "/omicron-physical-disks": { + "/switch-ports": { + "post": { + "operationId": "uplink_ensure", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SwitchPorts" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/timesync": { "get": { - "operationId": "omicron_physical_disks_get", + "operationId": "timesync_get", "responses": { "200": { "description": "successful operation", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OmicronPhysicalDisksConfig" + "$ref": "#/components/schemas/TimeSync" } } } @@ -586,29 +515,24 @@ "$ref": "#/components/responses/Error" } } - }, - "put": { - "operationId": "omicron_physical_disks_put", + } + }, + "/update": { + "post": { + "operationId": "update_artifact", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OmicronPhysicalDisksConfig" + "$ref": "#/components/schemas/UpdateArtifactId" } } }, "required": true }, "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/DisksManagementResult" - } - } - } + "204": { + "description": "resource updated" }, "4XX": { "$ref": "#/components/responses/Error" @@ -619,16 +543,21 @@ } } }, - "/omicron-zones": { + "/v2p": { "get": { - "operationId": "omicron_zones_get", + "summary": "List v2p mappings present on sled", + "operationId": "list_v2p", "responses": { "200": { "description": "successful operation", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OmicronZonesConfig" + "title": "Array_of_VirtualNetworkInterfaceHost", + "type": "array", + "items": { + "$ref": "#/components/schemas/VirtualNetworkInterfaceHost" + } } } } @@ -642,12 +571,13 @@ } }, "put": { - "operationId": "omicron_zones_put", + "summary": "Create a mapping from a virtual NIC to a physical host", + "operationId": "set_v2p", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OmicronZonesConfig" + "$ref": "#/components/schemas/VirtualNetworkInterfaceHost" } } }, @@ -664,23 +594,24 @@ "$ref": "#/components/responses/Error" } } - } - }, - "/sled-identifiers": { - "get": { - "summary": "Fetch sled identifiers", - "operationId": "sled_identifiers", - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/SledIdentifiers" - } + }, + "delete": { + "summary": "Delete a mapping from a virtual NIC to a physical host", + "operationId": "del_v2p", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VirtualNetworkInterfaceHost" } } }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, "4XX": { "$ref": "#/components/responses/Error" }, @@ -690,16 +621,36 @@ } } }, - "/sled-role": { - "get": { - "operationId": "sled_role_get", + "/vmms/{propolis_id}": { + "put": { + "operationId": "vmm_register", + "parameters": [ + { + "in": "path", + "name": "propolis_id", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForPropolisKind" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InstanceEnsureBody" + } + } + }, + "required": true + }, "responses": { "200": { "description": "successful operation", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/SledRole" + "$ref": "#/components/schemas/SledVmmState" } } } @@ -711,25 +662,29 @@ "$ref": "#/components/responses/Error" } } - } - }, - "/sleds": { - "put": { - "summary": "Add a sled to a rack that was already initialized via RSS", - "operationId": "sled_add", - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/AddSledRequest" - } + }, + "delete": { + "operationId": "vmm_unregister", + "parameters": [ + { + "in": "path", + "name": "propolis_id", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForPropolisKind" } - }, - "required": true - }, + } + ], "responses": { - "204": { - "description": "resource updated" + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VmmUnregisterResponse" + } + } + } }, "4XX": { "$ref": "#/components/responses/Error" @@ -740,22 +695,49 @@ } } }, - "/switch-ports": { + "/vmms/{propolis_id}/disks/{disk_id}/snapshot": { "post": { - "operationId": "uplink_ensure", + "summary": "Take a snapshot of a disk that is attached to an instance", + "operationId": "vmm_issue_disk_snapshot_request", + "parameters": [ + { + "in": "path", + "name": "disk_id", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + }, + { + "in": "path", + "name": "propolis_id", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForPropolisKind" + } + } + ], "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/SwitchPorts" + "$ref": "#/components/schemas/VmmIssueDiskSnapshotRequestBody" } } }, "required": true }, "responses": { - "204": { - "description": "resource updated" + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VmmIssueDiskSnapshotRequestResponse" + } + } + } }, "4XX": { "$ref": "#/components/responses/Error" @@ -766,20 +748,33 @@ } } }, - "/timesync": { - "get": { - "operationId": "timesync_get", - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/TimeSync" - } + "/vmms/{propolis_id}/external-ip": { + "put": { + "operationId": "vmm_put_external_ip", + "parameters": [ + { + "in": "path", + "name": "propolis_id", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForPropolisKind" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InstanceExternalIpBody" } } }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, "4XX": { "$ref": "#/components/responses/Error" }, @@ -787,16 +782,24 @@ "$ref": "#/components/responses/Error" } } - } - }, - "/update": { - "post": { - "operationId": "update_artifact", + }, + "delete": { + "operationId": "vmm_delete_external_ip", + "parameters": [ + { + "in": "path", + "name": "propolis_id", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForPropolisKind" + } + } + ], "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/UpdateArtifactId" + "$ref": "#/components/schemas/InstanceExternalIpBody" } } }, @@ -815,21 +818,26 @@ } } }, - "/v2p": { + "/vmms/{propolis_id}/state": { "get": { - "summary": "List v2p mappings present on sled", - "operationId": "list_v2p", + "operationId": "vmm_get_state", + "parameters": [ + { + "in": "path", + "name": "propolis_id", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForPropolisKind" + } + } + ], "responses": { "200": { "description": "successful operation", "content": { "application/json": { "schema": { - "title": "Array_of_VirtualNetworkInterfaceHost", - "type": "array", - "items": { - "$ref": "#/components/schemas/VirtualNetworkInterfaceHost" - } + "$ref": "#/components/schemas/SledVmmState" } } } @@ -843,46 +851,37 @@ } }, "put": { - "summary": "Create a mapping from a virtual NIC to a physical host", - "operationId": "set_v2p", - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/VirtualNetworkInterfaceHost" - } + "operationId": "vmm_put_state", + "parameters": [ + { + "in": "path", + "name": "propolis_id", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForPropolisKind" } - }, - "required": true - }, - "responses": { - "204": { - "description": "resource updated" - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" } - } - }, - "delete": { - "summary": "Delete a mapping from a virtual NIC to a physical host", - "operationId": "del_v2p", + ], "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/VirtualNetworkInterfaceHost" + "$ref": "#/components/schemas/VmmPutStateBody" } } }, "required": true }, "responses": { - "204": { - "description": "resource updated" + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VmmPutStateResponse" + } + } + } }, "4XX": { "$ref": "#/components/responses/Error" @@ -2837,6 +2836,14 @@ } ] }, + "instance_id": { + "description": "The ID of the instance for which this VMM is being created.", + "allOf": [ + { + "$ref": "#/components/schemas/TypedUuidForInstanceKind" + } + ] + }, "instance_runtime": { "description": "The instance runtime state for the instance being registered.", "allOf": [ @@ -2857,14 +2864,6 @@ "description": "The address at which this VMM should serve a Propolis server API.", "type": "string" }, - "propolis_id": { - "description": "The ID of the VMM being registered. This may not be the active VMM ID in the instance runtime state (e.g. if the new VMM is going to be a migration target).", - "allOf": [ - { - "$ref": "#/components/schemas/TypedUuidForPropolisKind" - } - ] - }, "vmm_runtime": { "description": "The initial VMM runtime state for the VMM being registered.", "allOf": [ @@ -2876,10 +2875,10 @@ }, "required": [ "hardware", + "instance_id", "instance_runtime", "metadata", "propolis_addr", - "propolis_id", "vmm_runtime" ] }, @@ -2985,30 +2984,6 @@ "source_nat" ] }, - "InstanceIssueDiskSnapshotRequestBody": { - "type": "object", - "properties": { - "snapshot_id": { - "type": "string", - "format": "uuid" - } - }, - "required": [ - "snapshot_id" - ] - }, - "InstanceIssueDiskSnapshotRequestResponse": { - "type": "object", - "properties": { - "snapshot_id": { - "type": "string", - "format": "uuid" - } - }, - "required": [ - "snapshot_id" - ] - }, "InstanceMetadata": { "description": "Metadata used to track statistics about an instance.", "type": "object", @@ -3052,187 +3027,71 @@ "properties": { "hostname": { "description": "RFC1035-compliant hostname for the instance.", - "allOf": [ - { - "$ref": "#/components/schemas/Hostname" - } - ] - }, - "memory": { - "$ref": "#/components/schemas/ByteCount" - }, - "ncpus": { - "$ref": "#/components/schemas/InstanceCpuCount" - } - }, - "required": [ - "hostname", - "memory", - "ncpus" - ] - }, - "InstancePutStateBody": { - "description": "The body of a request to move a previously-ensured instance into a specific runtime state.", - "type": "object", - "properties": { - "state": { - "description": "The state into which the instance should be driven.", - "allOf": [ - { - "$ref": "#/components/schemas/InstanceStateRequested" - } - ] - } - }, - "required": [ - "state" - ] - }, - "InstancePutStateResponse": { - "description": "The response sent from a request to move an instance into a specific runtime state.", - "type": "object", - "properties": { - "updated_runtime": { - "nullable": true, - "description": "The current runtime state of the instance after handling the request to change its state. If the instance's state did not change, this field is `None`.", - "allOf": [ - { - "$ref": "#/components/schemas/SledInstanceState" - } - ] - } - } - }, - "InstanceRuntimeState": { - "description": "The dynamic runtime properties of an instance: its current VMM ID (if any), migration information (if any), and the instance state to report if there is no active VMM.", - "type": "object", - "properties": { - "dst_propolis_id": { - "nullable": true, - "description": "If a migration is active, the ID of the target VMM.", - "allOf": [ - { - "$ref": "#/components/schemas/TypedUuidForPropolisKind" - } - ] - }, - "gen": { - "description": "Generation number for this state.", - "allOf": [ - { - "$ref": "#/components/schemas/Generation" - } - ] - }, - "migration_id": { - "nullable": true, - "description": "If a migration is active, the ID of that migration.", - "type": "string", - "format": "uuid" - }, - "propolis_id": { - "nullable": true, - "description": "The instance's currently active VMM ID.", - "allOf": [ - { - "$ref": "#/components/schemas/TypedUuidForPropolisKind" - } - ] - }, - "time_updated": { - "description": "Timestamp for this information.", - "type": "string", - "format": "date-time" - } - }, - "required": [ - "gen", - "time_updated" - ] - }, - "InstanceStateRequested": { - "description": "Requestable running state of an Instance.\n\nA subset of [`omicron_common::api::external::InstanceState`].", - "oneOf": [ - { - "description": "Run this instance by migrating in from a previous running incarnation of the instance.", - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "migration_target" - ] - }, - "value": { - "$ref": "#/components/schemas/InstanceMigrationTargetParams" - } - }, - "required": [ - "type", - "value" - ] - }, - { - "description": "Start the instance if it is not already running.", - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "running" - ] - } - }, - "required": [ - "type" - ] - }, - { - "description": "Stop the instance.", - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "stopped" - ] + "allOf": [ + { + "$ref": "#/components/schemas/Hostname" } - }, - "required": [ - "type" ] }, - { - "description": "Immediately reset the instance, as though it had stopped and immediately began to run again.", - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "reboot" - ] - } - }, - "required": [ - "type" - ] + "memory": { + "$ref": "#/components/schemas/ByteCount" + }, + "ncpus": { + "$ref": "#/components/schemas/InstanceCpuCount" } + }, + "required": [ + "hostname", + "memory", + "ncpus" ] }, - "InstanceUnregisterResponse": { - "description": "The response sent from a request to unregister an instance.", + "InstanceRuntimeState": { + "description": "The dynamic runtime properties of an instance: its current VMM ID (if any), migration information (if any), and the instance state to report if there is no active VMM.", "type": "object", "properties": { - "updated_runtime": { + "dst_propolis_id": { "nullable": true, - "description": "The current state of the instance after handling the request to unregister it. If the instance's state did not change, this field is `None`.", + "description": "If a migration is active, the ID of the target VMM.", + "allOf": [ + { + "$ref": "#/components/schemas/TypedUuidForPropolisKind" + } + ] + }, + "gen": { + "description": "Generation number for this state.", + "allOf": [ + { + "$ref": "#/components/schemas/Generation" + } + ] + }, + "migration_id": { + "nullable": true, + "description": "If a migration is active, the ID of that migration.", + "type": "string", + "format": "uuid" + }, + "propolis_id": { + "nullable": true, + "description": "The instance's currently active VMM ID.", "allOf": [ { - "$ref": "#/components/schemas/SledInstanceState" + "$ref": "#/components/schemas/TypedUuidForPropolisKind" } ] + }, + "time_updated": { + "description": "Timestamp for this information.", + "type": "string", + "format": "date-time" } - } + }, + "required": [ + "gen", + "time_updated" + ] }, "Inventory": { "description": "Identity and basic status information about this sled agent", @@ -4667,8 +4526,27 @@ "sled_id" ] }, - "SledInstanceState": { - "description": "A wrapper type containing a sled's total knowledge of the state of a specific VMM and the instance it incarnates.", + "SledRole": { + "description": "Describes the role of the sled within the rack.\n\nNote that this may change if the sled is physically moved within the rack.", + "oneOf": [ + { + "description": "The sled is a general compute sled.", + "type": "string", + "enum": [ + "gimlet" + ] + }, + { + "description": "The sled is attached to the network switch, and has additional responsibilities.", + "type": "string", + "enum": [ + "scrimlet" + ] + } + ] + }, + "SledVmmState": { + "description": "A wrapper type containing a sled's total knowledge of the state of a VMM.", "type": "object", "properties": { "migration_in": { @@ -4689,14 +4567,6 @@ } ] }, - "propolis_id": { - "description": "The ID of the VMM whose state is being reported.", - "allOf": [ - { - "$ref": "#/components/schemas/TypedUuidForPropolisKind" - } - ] - }, "vmm_state": { "description": "The most recent state of the sled's VMM process.", "allOf": [ @@ -4707,29 +4577,9 @@ } }, "required": [ - "propolis_id", "vmm_state" ] }, - "SledRole": { - "description": "Describes the role of the sled within the rack.\n\nNote that this may change if the sled is physically moved within the rack.", - "oneOf": [ - { - "description": "The sled is a general compute sled.", - "type": "string", - "enum": [ - "gimlet" - ] - }, - { - "description": "The sled is attached to the network switch, and has additional responsibilities.", - "type": "string", - "enum": [ - "scrimlet" - ] - } - ] - }, "Slot": { "description": "A stable index which is translated by Propolis into a PCI BDF, visible to the guest.\n\n
JSON schema\n\n```json { \"description\": \"A stable index which is translated by Propolis into a PCI BDF, visible to the guest.\", \"type\": \"integer\", \"format\": \"uint8\", \"minimum\": 0.0 } ```
", "type": "integer", @@ -4912,6 +4762,10 @@ "sync" ] }, + "TypedUuidForInstanceKind": { + "type": "string", + "format": "uuid" + }, "TypedUuidForPropolisKind": { "type": "string", "format": "uuid" @@ -4996,6 +4850,62 @@ "vni" ] }, + "VmmIssueDiskSnapshotRequestBody": { + "type": "object", + "properties": { + "snapshot_id": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "snapshot_id" + ] + }, + "VmmIssueDiskSnapshotRequestResponse": { + "type": "object", + "properties": { + "snapshot_id": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "snapshot_id" + ] + }, + "VmmPutStateBody": { + "description": "The body of a request to move a previously-ensured instance into a specific runtime state.", + "type": "object", + "properties": { + "state": { + "description": "The state into which the instance should be driven.", + "allOf": [ + { + "$ref": "#/components/schemas/VmmStateRequested" + } + ] + } + }, + "required": [ + "state" + ] + }, + "VmmPutStateResponse": { + "description": "The response sent from a request to move an instance into a specific runtime state.", + "type": "object", + "properties": { + "updated_runtime": { + "nullable": true, + "description": "The current runtime state of the instance after handling the request to change its state. If the instance's state did not change, this field is `None`.", + "allOf": [ + { + "$ref": "#/components/schemas/SledVmmState" + } + ] + } + } + }, "VmmRuntimeState": { "description": "The dynamic runtime properties of an individual VMM process.", "type": "object", @@ -5089,6 +4999,90 @@ } ] }, + "VmmStateRequested": { + "description": "Requestable running state of an Instance.\n\nA subset of [`omicron_common::api::external::InstanceState`].", + "oneOf": [ + { + "description": "Run this instance by migrating in from a previous running incarnation of the instance.", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "migration_target" + ] + }, + "value": { + "$ref": "#/components/schemas/InstanceMigrationTargetParams" + } + }, + "required": [ + "type", + "value" + ] + }, + { + "description": "Start the instance if it is not already running.", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "running" + ] + } + }, + "required": [ + "type" + ] + }, + { + "description": "Stop the instance.", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "stopped" + ] + } + }, + "required": [ + "type" + ] + }, + { + "description": "Immediately reset the instance, as though it had stopped and immediately began to run again.", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "reboot" + ] + } + }, + "required": [ + "type" + ] + } + ] + }, + "VmmUnregisterResponse": { + "description": "The response sent from a request to unregister an instance.", + "type": "object", + "properties": { + "updated_runtime": { + "nullable": true, + "description": "The current state of the instance after handling the request to unregister it. If the instance's state did not change, this field is `None`.", + "allOf": [ + { + "$ref": "#/components/schemas/SledVmmState" + } + ] + } + } + }, "Vni": { "description": "A Geneve Virtual Network Identifier", "type": "integer", @@ -5408,10 +5402,6 @@ "A", "B" ] - }, - "TypedUuidForInstanceKind": { - "type": "string", - "format": "uuid" } }, "responses": { diff --git a/sled-agent/api/src/lib.rs b/sled-agent/api/src/lib.rs index c44b24d712..410747bf46 100644 --- a/sled-agent/api/src/lib.rs +++ b/sled-agent/api/src/lib.rs @@ -15,7 +15,7 @@ use nexus_sled_agent_shared::inventory::{ }; use omicron_common::{ api::internal::{ - nexus::{DiskRuntimeState, SledInstanceState, UpdateArtifactId}, + nexus::{DiskRuntimeState, SledVmmState, UpdateArtifactId}, shared::{ ResolvedVpcRouteSet, ResolvedVpcRouteState, SledIdentifiers, SwitchPorts, VirtualNetworkInterfaceHost, @@ -23,7 +23,7 @@ use omicron_common::{ }, disk::{DiskVariant, DisksManagementResult, OmicronPhysicalDisksConfig}, }; -use omicron_uuid_kinds::{InstanceUuid, ZpoolUuid}; +use omicron_uuid_kinds::{PropolisUuid, ZpoolUuid}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use sled_agent_types::{ @@ -36,8 +36,8 @@ use sled_agent_types::{ early_networking::EarlyNetworkConfig, firewall_rules::VpcFirewallRulesEnsureBody, instance::{ - InstanceEnsureBody, InstanceExternalIpBody, InstancePutStateBody, - InstancePutStateResponse, InstanceUnregisterResponse, + InstanceEnsureBody, InstanceExternalIpBody, VmmPutStateBody, + VmmPutStateResponse, VmmUnregisterResponse, }, sled::AddSledRequest, time_sync::TimeSync, @@ -212,59 +212,59 @@ pub trait SledAgentApi { #[endpoint { method = PUT, - path = "/instances/{instance_id}", + path = "/vmms/{propolis_id}", }] - async fn instance_register( + async fn vmm_register( rqctx: RequestContext, - path_params: Path, + path_params: Path, body: TypedBody, - ) -> Result, HttpError>; + ) -> Result, HttpError>; #[endpoint { method = DELETE, - path = "/instances/{instance_id}", + path = "/vmms/{propolis_id}", }] - async fn instance_unregister( + async fn vmm_unregister( rqctx: RequestContext, - path_params: Path, - ) -> Result, HttpError>; + path_params: Path, + ) -> Result, HttpError>; #[endpoint { method = PUT, - path = "/instances/{instance_id}/state", + path = "/vmms/{propolis_id}/state", }] - async fn instance_put_state( + async fn vmm_put_state( rqctx: RequestContext, - path_params: Path, - body: TypedBody, - ) -> Result, HttpError>; + path_params: Path, + body: TypedBody, + ) -> Result, HttpError>; #[endpoint { method = GET, - path = "/instances/{instance_id}/state", + path = "/vmms/{propolis_id}/state", }] - async fn instance_get_state( + async fn vmm_get_state( rqctx: RequestContext, - path_params: Path, - ) -> Result, HttpError>; + path_params: Path, + ) -> Result, HttpError>; #[endpoint { method = PUT, - path = "/instances/{instance_id}/external-ip", + path = "/vmms/{propolis_id}/external-ip", }] - async fn instance_put_external_ip( + async fn vmm_put_external_ip( rqctx: RequestContext, - path_params: Path, + path_params: Path, body: TypedBody, ) -> Result; #[endpoint { method = DELETE, - path = "/instances/{instance_id}/external-ip", + path = "/vmms/{propolis_id}/external-ip", }] - async fn instance_delete_external_ip( + async fn vmm_delete_external_ip( rqctx: RequestContext, - path_params: Path, + path_params: Path, body: TypedBody, ) -> Result; @@ -290,16 +290,13 @@ pub trait SledAgentApi { /// Take a snapshot of a disk that is attached to an instance #[endpoint { method = POST, - path = "/instances/{instance_id}/disks/{disk_id}/snapshot", + path = "/vmms/{propolis_id}/disks/{disk_id}/snapshot", }] - async fn instance_issue_disk_snapshot_request( + async fn vmm_issue_disk_snapshot_request( rqctx: RequestContext, - path_params: Path, - body: TypedBody, - ) -> Result< - HttpResponseOk, - HttpError, - >; + path_params: Path, + body: TypedBody, + ) -> Result, HttpError>; #[endpoint { method = PUT, @@ -516,8 +513,8 @@ impl From for DiskType { /// Path parameters for Instance requests (sled agent API) #[derive(Deserialize, JsonSchema)] -pub struct InstancePathParam { - pub instance_id: InstanceUuid, +pub struct VmmPathParam { + pub propolis_id: PropolisUuid, } /// Path parameters for Disk requests (sled agent API) @@ -527,18 +524,18 @@ pub struct DiskPathParam { } #[derive(Deserialize, JsonSchema)] -pub struct InstanceIssueDiskSnapshotRequestPathParam { - pub instance_id: Uuid, +pub struct VmmIssueDiskSnapshotRequestPathParam { + pub propolis_id: PropolisUuid, pub disk_id: Uuid, } #[derive(Deserialize, JsonSchema)] -pub struct InstanceIssueDiskSnapshotRequestBody { +pub struct VmmIssueDiskSnapshotRequestBody { pub snapshot_id: Uuid, } #[derive(Serialize, JsonSchema)] -pub struct InstanceIssueDiskSnapshotRequestResponse { +pub struct VmmIssueDiskSnapshotRequestResponse { pub snapshot_id: Uuid, } diff --git a/sled-agent/src/common/instance.rs b/sled-agent/src/common/instance.rs index adbeb9158f..f95bf0cb64 100644 --- a/sled-agent/src/common/instance.rs +++ b/sled-agent/src/common/instance.rs @@ -7,10 +7,9 @@ use chrono::{DateTime, Utc}; use omicron_common::api::external::Generation; use omicron_common::api::internal::nexus::{ - MigrationRuntimeState, MigrationState, SledInstanceState, VmmRuntimeState, + MigrationRuntimeState, MigrationState, SledVmmState, VmmRuntimeState, VmmState, }; -use omicron_uuid_kinds::PropolisUuid; use propolis_client::types::{ InstanceMigrationStatus, InstanceState as PropolisApiState, InstanceStateMonitorResponse, MigrationState as PropolisMigrationState, @@ -21,7 +20,6 @@ use uuid::Uuid; #[derive(Clone, Debug)] pub struct InstanceStates { vmm: VmmRuntimeState, - propolis_id: PropolisUuid, migration_in: Option, migration_out: Option, } @@ -173,11 +171,7 @@ pub enum Action { } impl InstanceStates { - pub fn new( - vmm: VmmRuntimeState, - propolis_id: PropolisUuid, - migration_id: Option, - ) -> Self { + pub fn new(vmm: VmmRuntimeState, migration_id: Option) -> Self { // If this instance is created with a migration ID, we are the intended // target of a migration in. Set that up now. let migration_in = @@ -187,17 +181,13 @@ impl InstanceStates { gen: Generation::new(), time_updated: Utc::now(), }); - InstanceStates { vmm, propolis_id, migration_in, migration_out: None } + InstanceStates { vmm, migration_in, migration_out: None } } pub fn vmm(&self) -> &VmmRuntimeState { &self.vmm } - pub fn propolis_id(&self) -> PropolisUuid { - self.propolis_id - } - pub fn migration_in(&self) -> Option<&MigrationRuntimeState> { self.migration_in.as_ref() } @@ -209,10 +199,9 @@ impl InstanceStates { /// Creates a `SledInstanceState` structure containing the entirety of this /// structure's runtime state. This requires cloning; for simple read access /// use the `instance` or `vmm` accessors instead. - pub fn sled_instance_state(&self) -> SledInstanceState { - SledInstanceState { + pub fn sled_instance_state(&self) -> SledVmmState { + SledVmmState { vmm_state: self.vmm.clone(), - propolis_id: self.propolis_id, migration_in: self.migration_in.clone(), migration_out: self.migration_out.clone(), } @@ -377,7 +366,6 @@ mod test { use uuid::Uuid; fn make_instance() -> InstanceStates { - let propolis_id = PropolisUuid::new_v4(); let now = Utc::now(); let vmm = VmmRuntimeState { @@ -386,7 +374,7 @@ mod test { time_updated: now, }; - InstanceStates::new(vmm, propolis_id, None) + InstanceStates::new(vmm, None) } fn make_migration_source_instance() -> InstanceStates { @@ -406,7 +394,6 @@ mod test { } fn make_migration_target_instance() -> InstanceStates { - let propolis_id = PropolisUuid::new_v4(); let now = Utc::now(); let vmm = VmmRuntimeState { @@ -415,7 +402,7 @@ mod test { time_updated: now, }; - InstanceStates::new(vmm, propolis_id, Some(Uuid::new_v4())) + InstanceStates::new(vmm, Some(Uuid::new_v4())) } fn make_observed_state( diff --git a/sled-agent/src/fakes/nexus.rs b/sled-agent/src/fakes/nexus.rs index 246ef07b60..bd4680563e 100644 --- a/sled-agent/src/fakes/nexus.rs +++ b/sled-agent/src/fakes/nexus.rs @@ -15,12 +15,11 @@ use hyper::Body; use internal_dns::ServiceName; use nexus_client::types::SledAgentInfo; use omicron_common::api::external::Error; -use omicron_common::api::internal::nexus::{ - SledInstanceState, UpdateArtifactId, -}; -use omicron_uuid_kinds::OmicronZoneUuid; +use omicron_common::api::internal::nexus::{SledVmmState, UpdateArtifactId}; +use omicron_uuid_kinds::{OmicronZoneUuid, PropolisUuid}; use schemars::JsonSchema; use serde::Deserialize; +use sled_agent_api::VmmPathParam; use uuid::Uuid; /// Implements a fake Nexus. @@ -50,8 +49,8 @@ pub trait FakeNexusServer: Send + Sync { fn cpapi_instances_put( &self, - _instance_id: Uuid, - _new_runtime_state: SledInstanceState, + _propolis_id: PropolisUuid, + _new_runtime_state: SledVmmState, ) -> Result<(), Error> { Err(Error::internal_error("Not implemented")) } @@ -118,22 +117,18 @@ async fn sled_agent_put( Ok(HttpResponseUpdatedNoContent()) } -#[derive(Deserialize, JsonSchema)] -struct InstancePathParam { - instance_id: Uuid, -} #[endpoint { method = PUT, - path = "/instances/{instance_id}", + path = "/vmms/{propolis_id}", }] async fn cpapi_instances_put( request_context: RequestContext, - path_params: Path, - new_runtime_state: TypedBody, + path_params: Path, + new_runtime_state: TypedBody, ) -> Result { let context = request_context.context(); context.cpapi_instances_put( - path_params.into_inner().instance_id, + path_params.into_inner().propolis_id, new_runtime_state.into_inner(), )?; Ok(HttpResponseUpdatedNoContent()) diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 2bf8067d1c..221224a2e9 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -21,7 +21,7 @@ use nexus_sled_agent_shared::inventory::{ }; use omicron_common::api::external::Error; use omicron_common::api::internal::nexus::{ - DiskRuntimeState, SledInstanceState, UpdateArtifactId, + DiskRuntimeState, SledVmmState, UpdateArtifactId, }; use omicron_common::api::internal::shared::{ ResolvedVpcRouteSet, ResolvedVpcRouteState, SledIdentifiers, SwitchPorts, @@ -30,7 +30,6 @@ use omicron_common::api::internal::shared::{ use omicron_common::disk::{ DiskVariant, DisksManagementResult, M2Slot, OmicronPhysicalDisksConfig, }; -use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; use sled_agent_api::*; use sled_agent_types::boot_disk::{ BootDiskOsWriteStatus, BootDiskPathParams, BootDiskUpdatePathParams, @@ -41,8 +40,8 @@ use sled_agent_types::disk::DiskEnsureBody; use sled_agent_types::early_networking::EarlyNetworkConfig; use sled_agent_types::firewall_rules::VpcFirewallRulesEnsureBody; use sled_agent_types::instance::{ - InstanceEnsureBody, InstanceExternalIpBody, InstancePutStateBody, - InstancePutStateResponse, InstanceUnregisterResponse, + InstanceEnsureBody, InstanceExternalIpBody, VmmPutStateBody, + VmmPutStateResponse, VmmUnregisterResponse, }; use sled_agent_types::sled::AddSledRequest; use sled_agent_types::time_sync::TimeSync; @@ -294,18 +293,18 @@ impl SledAgentApi for SledAgentImpl { Ok(HttpResponseUpdatedNoContent()) } - async fn instance_register( + async fn vmm_register( rqctx: RequestContext, - path_params: Path, + path_params: Path, body: TypedBody, - ) -> Result, HttpError> { + ) -> Result, HttpError> { let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; + let propolis_id = path_params.into_inner().propolis_id; let body_args = body.into_inner(); Ok(HttpResponseOk( sa.instance_ensure_registered( - instance_id, - body_args.propolis_id, + body_args.instance_id, + propolis_id, body_args.hardware, body_args.instance_runtime, body_args.vmm_runtime, @@ -316,58 +315,56 @@ impl SledAgentApi for SledAgentImpl { )) } - async fn instance_unregister( + async fn vmm_unregister( rqctx: RequestContext, - path_params: Path, - ) -> Result, HttpError> { + path_params: Path, + ) -> Result, HttpError> { let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - Ok(HttpResponseOk(sa.instance_ensure_unregistered(instance_id).await?)) + let id = path_params.into_inner().propolis_id; + Ok(HttpResponseOk(sa.instance_ensure_unregistered(id).await?)) } - async fn instance_put_state( + async fn vmm_put_state( rqctx: RequestContext, - path_params: Path, - body: TypedBody, - ) -> Result, HttpError> { + path_params: Path, + body: TypedBody, + ) -> Result, HttpError> { let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; + let id = path_params.into_inner().propolis_id; let body_args = body.into_inner(); - Ok(HttpResponseOk( - sa.instance_ensure_state(instance_id, body_args.state).await?, - )) + Ok(HttpResponseOk(sa.instance_ensure_state(id, body_args.state).await?)) } - async fn instance_get_state( + async fn vmm_get_state( rqctx: RequestContext, - path_params: Path, - ) -> Result, HttpError> { + path_params: Path, + ) -> Result, HttpError> { let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - Ok(HttpResponseOk(sa.instance_get_state(instance_id).await?)) + let id = path_params.into_inner().propolis_id; + Ok(HttpResponseOk(sa.instance_get_state(id).await?)) } - async fn instance_put_external_ip( + async fn vmm_put_external_ip( rqctx: RequestContext, - path_params: Path, + path_params: Path, body: TypedBody, ) -> Result { let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; + let id = path_params.into_inner().propolis_id; let body_args = body.into_inner(); - sa.instance_put_external_ip(instance_id, &body_args).await?; + sa.instance_put_external_ip(id, &body_args).await?; Ok(HttpResponseUpdatedNoContent()) } - async fn instance_delete_external_ip( + async fn vmm_delete_external_ip( rqctx: RequestContext, - path_params: Path, + path_params: Path, body: TypedBody, ) -> Result { let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; + let id = path_params.into_inner().propolis_id; let body_args = body.into_inner(); - sa.instance_delete_external_ip(instance_id, &body_args).await?; + sa.instance_delete_external_ip(id, &body_args).await?; Ok(HttpResponseUpdatedNoContent()) } @@ -399,26 +396,24 @@ impl SledAgentApi for SledAgentImpl { Ok(HttpResponseUpdatedNoContent()) } - async fn instance_issue_disk_snapshot_request( + async fn vmm_issue_disk_snapshot_request( rqctx: RequestContext, - path_params: Path, - body: TypedBody, - ) -> Result< - HttpResponseOk, - HttpError, - > { + path_params: Path, + body: TypedBody, + ) -> Result, HttpError> + { let sa = rqctx.context(); let path_params = path_params.into_inner(); let body = body.into_inner(); - sa.instance_issue_disk_snapshot_request( - InstanceUuid::from_untyped_uuid(path_params.instance_id), + sa.vmm_issue_disk_snapshot_request( + path_params.propolis_id, path_params.disk_id, body.snapshot_id, ) .await?; - Ok(HttpResponseOk(InstanceIssueDiskSnapshotRequestResponse { + Ok(HttpResponseOk(VmmIssueDiskSnapshotRequestResponse { snapshot_id: body.snapshot_id, })) } diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 0bcbc97fd2..b035ef7e71 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -25,9 +25,7 @@ use illumos_utils::opte::{DhcpCfg, PortCreateParams, PortManager}; use illumos_utils::running_zone::{RunningZone, ZoneBuilderFactory}; use illumos_utils::svc::wait_for_service; use illumos_utils::zone::PROPOLIS_ZONE_PREFIX; -use omicron_common::api::internal::nexus::{ - SledInstanceState, VmmRuntimeState, -}; +use omicron_common::api::internal::nexus::{SledVmmState, VmmRuntimeState}; use omicron_common::api::internal::shared::{ NetworkInterface, ResolvedVpcFirewallRule, SledIdentifiers, SourceNatConfig, }; @@ -104,11 +102,11 @@ pub enum Error { #[error("Error resolving DNS name: {0}")] ResolveError(#[from] internal_dns::resolver::ResolveError), - #[error("Instance {0} not running!")] - InstanceNotRunning(InstanceUuid), + #[error("Propolis job with ID {0} is registered but not running")] + VmNotRunning(PropolisUuid), - #[error("Instance already registered with Propolis ID {0}")] - InstanceAlreadyRegistered(PropolisUuid), + #[error("Propolis job with ID {0} already registered")] + PropolisAlreadyRegistered(PropolisUuid), #[error("No U.2 devices found")] U2NotFound, @@ -217,15 +215,15 @@ enum InstanceRequest { tx: oneshot::Sender>, }, CurrentState { - tx: oneshot::Sender, + tx: oneshot::Sender, }, PutState { - state: InstanceStateRequested, - tx: oneshot::Sender>, + state: VmmStateRequested, + tx: oneshot::Sender>, }, Terminate { mark_failed: bool, - tx: oneshot::Sender>, + tx: oneshot::Sender>, }, IssueSnapshotRequest { disk_id: Uuid, @@ -414,12 +412,12 @@ impl InstanceRunner { }, Some(PutState{ state, tx }) => { tx.send(self.put_state(state).await - .map(|r| InstancePutStateResponse { updated_runtime: Some(r) }) + .map(|r| VmmPutStateResponse { updated_runtime: Some(r) }) .map_err(|e| e.into())) .map_err(|_| Error::FailedSendClientClosed) }, Some(Terminate { mark_failed, tx }) => { - tx.send(Ok(InstanceUnregisterResponse { + tx.send(Ok(VmmUnregisterResponse { updated_runtime: Some(self.terminate(mark_failed).await) })) .map_err(|_| Error::FailedSendClientClosed) @@ -499,15 +497,10 @@ impl InstanceRunner { } /// Yields this instance's ID. - fn id(&self) -> InstanceUuid { + fn instance_id(&self) -> InstanceUuid { InstanceUuid::from_untyped_uuid(self.properties.id) } - /// Yields this instance's Propolis's ID. - fn propolis_id(&self) -> &PropolisUuid { - &self.propolis_id - } - async fn publish_state_to_nexus(&self) { // Retry until Nexus acknowledges that it has applied this state update. // Note that Nexus may receive this call but then fail while reacting @@ -518,15 +511,13 @@ impl InstanceRunner { || async { let state = self.state.sled_instance_state(); info!(self.log, "Publishing instance state update to Nexus"; - "instance_id" => %self.id(), + "instance_id" => %self.instance_id(), + "propolis_id" => %self.propolis_id, "state" => ?state, ); self.nexus_client - .cpapi_instances_put( - &self.id().into_untyped_uuid(), - &state.into(), - ) + .cpapi_instances_put(&self.propolis_id, &state.into()) .await .map_err(|err| -> backoff::BackoffError { match &err { @@ -576,7 +567,8 @@ impl InstanceRunner { warn!(self.log, "Failed to publish instance state to Nexus: {}", err.to_string(); - "instance_id" => %self.id(), + "instance_id" => %self.instance_id(), + "propolis_id" => %self.propolis_id, "retry_after" => ?delay); }, ) @@ -586,7 +578,8 @@ impl InstanceRunner { error!( self.log, "Failed to publish state to Nexus, will not retry: {:?}", e; - "instance_id" => %self.id() + "instance_id" => %self.instance_id(), + "propolis_id" => %self.propolis_id, ); } } @@ -622,7 +615,7 @@ impl InstanceRunner { info!( self.log, "updated state after observing Propolis state change"; - "propolis_id" => %self.state.propolis_id(), + "propolis_id" => %self.propolis_id, "new_vmm_state" => ?self.state.vmm() ); @@ -634,7 +627,8 @@ impl InstanceRunner { match action { Some(InstanceAction::Destroy) => { info!(self.log, "terminating VMM that has exited"; - "instance_id" => %self.id()); + "instance_id" => %self.instance_id(), + "propolis_id" => %self.propolis_id); let mark_failed = false; self.terminate(mark_failed).await; Reaction::Terminate @@ -780,7 +774,7 @@ impl InstanceRunner { /// This routine is safe to call even if the instance's zone was never /// started. It is also safe to call multiple times on a single instance. async fn terminate_inner(&mut self) { - let zname = propolis_zone_name(self.propolis_id()); + let zname = propolis_zone_name(&self.propolis_id); // First fetch the running state. // @@ -948,8 +942,10 @@ impl InstanceRunner { } } -/// A reference to a single instance running a running Propolis server. +/// Describes a single Propolis server that incarnates a specific instance. pub struct Instance { + id: InstanceUuid, + tx: mpsc::Sender, #[allow(dead_code)] @@ -1091,7 +1087,7 @@ impl Instance { dhcp_config, requested_disks: hardware.disks, cloud_init_bytes: hardware.cloud_init_bytes, - state: InstanceStates::new(vmm_runtime, propolis_id, migration_id), + state: InstanceStates::new(vmm_runtime, migration_id), running_state: None, nexus_client, storage, @@ -1104,7 +1100,11 @@ impl Instance { let runner_handle = tokio::task::spawn(async move { runner.run().await }); - Ok(Instance { tx, runner_handle }) + Ok(Instance { id, tx, runner_handle }) + } + + pub fn id(&self) -> InstanceUuid { + self.id } /// Create bundle from an instance zone. @@ -1130,7 +1130,7 @@ impl Instance { Ok(rx.await?) } - pub async fn current_state(&self) -> Result { + pub async fn current_state(&self) -> Result { let (tx, rx) = oneshot::channel(); self.tx .send(InstanceRequest::CurrentState { tx }) @@ -1152,8 +1152,8 @@ impl Instance { /// Rebooting to Running to Stopping to Stopped. pub async fn put_state( &self, - tx: oneshot::Sender>, - state: InstanceStateRequested, + tx: oneshot::Sender>, + state: VmmStateRequested, ) -> Result<(), Error> { self.tx .send(InstanceRequest::PutState { state, tx }) @@ -1166,7 +1166,7 @@ impl Instance { /// immediately transitions the instance to the Destroyed state. pub async fn terminate( &self, - tx: oneshot::Sender>, + tx: oneshot::Sender>, mark_failed: bool, ) -> Result<(), Error> { self.tx @@ -1224,7 +1224,7 @@ impl InstanceRunner { async fn request_zone_bundle( &self, ) -> Result { - let name = propolis_zone_name(self.propolis_id()); + let name = propolis_zone_name(&self.propolis_id); match &self.running_state { None => Err(BundleError::Unavailable { name }), Some(RunningState { ref running_zone, .. }) => { @@ -1242,7 +1242,7 @@ impl InstanceRunner { run_state.running_zone.root_zpool().map(|p| p.clone()) } - fn current_state(&self) -> SledInstanceState { + fn current_state(&self) -> SledVmmState { self.state.sled_instance_state() } @@ -1300,19 +1300,19 @@ impl InstanceRunner { async fn put_state( &mut self, - state: InstanceStateRequested, - ) -> Result { + state: VmmStateRequested, + ) -> Result { use propolis_client::types::InstanceStateRequested as PropolisRequest; let (propolis_state, next_published) = match state { - InstanceStateRequested::MigrationTarget(migration_params) => { + VmmStateRequested::MigrationTarget(migration_params) => { self.propolis_ensure(Some(migration_params)).await?; (None, None) } - InstanceStateRequested::Running => { + VmmStateRequested::Running => { self.propolis_ensure(None).await?; (Some(PropolisRequest::Run), None) } - InstanceStateRequested::Stopped => { + VmmStateRequested::Stopped => { // If the instance has not started yet, unregister it // immediately. Since there is no Propolis to push updates when // this happens, generate an instance record bearing the @@ -1328,9 +1328,9 @@ impl InstanceRunner { ) } } - InstanceStateRequested::Reboot => { + VmmStateRequested::Reboot => { if self.running_state.is_none() { - return Err(Error::InstanceNotRunning(self.id())); + return Err(Error::VmNotRunning(self.propolis_id)); } ( Some(PropolisRequest::Reboot), @@ -1379,7 +1379,7 @@ impl InstanceRunner { // Create a zone for the propolis instance, using the previously // configured VNICs. - let zname = propolis_zone_name(self.propolis_id()); + let zname = propolis_zone_name(&self.propolis_id); let mut rng = rand::rngs::StdRng::from_entropy(); let latest_disks = self .storage @@ -1399,7 +1399,7 @@ impl InstanceRunner { .with_zone_root_path(root) .with_zone_image_paths(&["/opt/oxide".into()]) .with_zone_type("propolis-server") - .with_unique_name(self.propolis_id().into_untyped_uuid()) + .with_unique_name(self.propolis_id.into_untyped_uuid()) .with_datasets(&[]) .with_filesystems(&[]) .with_data_links(&[]) @@ -1483,7 +1483,7 @@ impl InstanceRunner { Ok(PropolisSetup { client, running_zone }) } - async fn terminate(&mut self, mark_failed: bool) -> SledInstanceState { + async fn terminate(&mut self, mark_failed: bool) -> SledVmmState { self.terminate_inner().await; self.state.terminate_rudely(mark_failed); @@ -1508,9 +1508,7 @@ impl InstanceRunner { Ok(()) } else { - Err(Error::InstanceNotRunning(InstanceUuid::from_untyped_uuid( - self.properties.id, - ))) + Err(Error::VmNotRunning(self.propolis_id)) } } @@ -1604,7 +1602,7 @@ mod tests { enum ReceivedInstanceState { #[default] None, - InstancePut(SledInstanceState), + InstancePut(SledVmmState), } struct NexusServer { @@ -1614,8 +1612,8 @@ mod tests { impl FakeNexusServer for NexusServer { fn cpapi_instances_put( &self, - _instance_id: Uuid, - new_runtime_state: SledInstanceState, + _propolis_id: PropolisUuid, + new_runtime_state: SledVmmState, ) -> Result<(), omicron_common::api::external::Error> { self.observed_runtime_state .send(ReceivedInstanceState::InstancePut(new_runtime_state)) @@ -1760,7 +1758,7 @@ mod tests { let id = InstanceUuid::new_v4(); let propolis_id = PropolisUuid::from_untyped_uuid(PROPOLIS_ID); - let ticket = InstanceTicket::new_without_manager_for_test(id); + let ticket = InstanceTicket::new_without_manager_for_test(propolis_id); let initial_state = fake_instance_initial_state(propolis_addr); @@ -1917,7 +1915,7 @@ mod tests { // pretending we're InstanceManager::ensure_state, start our "instance" // (backed by fakes and propolis_mock_server) - inst.put_state(put_tx, InstanceStateRequested::Running) + inst.put_state(put_tx, VmmStateRequested::Running) .await .expect("failed to send Instance::put_state"); @@ -2011,7 +2009,7 @@ mod tests { // pretending we're InstanceManager::ensure_state, try in vain to start // our "instance", but no propolis server is running - inst.put_state(put_tx, InstanceStateRequested::Running) + inst.put_state(put_tx, VmmStateRequested::Running) .await .expect("failed to send Instance::put_state"); @@ -2025,7 +2023,7 @@ mod tests { .await .expect_err("*should've* timed out waiting for Instance::put_state, but didn't?"); - if let ReceivedInstanceState::InstancePut(SledInstanceState { + if let ReceivedInstanceState::InstancePut(SledVmmState { vmm_state: VmmRuntimeState { state: VmmState::Running, .. }, .. }) = state_rx.borrow().to_owned() @@ -2118,7 +2116,7 @@ mod tests { // pretending we're InstanceManager::ensure_state, try in vain to start // our "instance", but the zone never finishes installing - inst.put_state(put_tx, InstanceStateRequested::Running) + inst.put_state(put_tx, VmmStateRequested::Running) .await .expect("failed to send Instance::put_state"); @@ -2133,7 +2131,7 @@ mod tests { .expect_err("*should've* timed out waiting for Instance::put_state, but didn't?"); debug!(log, "Zone-boot timeout awaited"); - if let ReceivedInstanceState::InstancePut(SledInstanceState { + if let ReceivedInstanceState::InstancePut(SledVmmState { vmm_state: VmmRuntimeState { state: VmmState::Running, .. }, .. }) = state_rx.borrow().to_owned() @@ -2256,7 +2254,7 @@ mod tests { .await .unwrap(); - mgr.ensure_state(instance_id, InstanceStateRequested::Running) + mgr.ensure_state(propolis_id, VmmStateRequested::Running) .await .unwrap(); diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index 63164ed290..24be8be89f 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -4,13 +4,13 @@ //! API for controlling multiple instances on a sled. -use crate::instance::propolis_zone_name; use crate::instance::Instance; use crate::metrics::MetricsRequestQueue; use crate::nexus::NexusClient; use crate::vmm_reservoir::VmmReservoirManagerHandle; use crate::zone_bundle::BundleError; use crate::zone_bundle::ZoneBundler; +use illumos_utils::zone::PROPOLIS_ZONE_PREFIX; use omicron_common::api::external::ByteCount; use anyhow::anyhow; @@ -20,7 +20,7 @@ use illumos_utils::opte::PortManager; use illumos_utils::running_zone::ZoneBuilderFactory; use omicron_common::api::external::Generation; use omicron_common::api::internal::nexus::InstanceRuntimeState; -use omicron_common::api::internal::nexus::SledInstanceState; +use omicron_common::api::internal::nexus::SledVmmState; use omicron_common::api::internal::nexus::VmmRuntimeState; use omicron_common::api::internal::shared::SledIdentifiers; use omicron_uuid_kinds::InstanceUuid; @@ -44,8 +44,8 @@ pub enum Error { #[error("Instance error: {0}")] Instance(#[from] crate::instance::Error), - #[error("No such instance ID: {0}")] - NoSuchInstance(InstanceUuid), + #[error("VMM with ID {0} not found")] + NoSuchVmm(PropolisUuid), #[error("OPTE port management error: {0}")] Opte(#[from] illumos_utils::opte::Error), @@ -117,7 +117,7 @@ impl InstanceManager { terminate_tx, terminate_rx, nexus_client, - instances: BTreeMap::new(), + jobs: BTreeMap::new(), vnic_allocator: VnicAllocator::new("Instance", etherstub), port_manager, storage_generation: None, @@ -150,7 +150,7 @@ impl InstanceManager { propolis_addr: SocketAddr, sled_identifiers: SledIdentifiers, metadata: InstanceMetadata, - ) -> Result { + ) -> Result { let (tx, rx) = oneshot::channel(); self.inner .tx @@ -172,13 +172,13 @@ impl InstanceManager { pub async fn ensure_unregistered( &self, - instance_id: InstanceUuid, - ) -> Result { + propolis_id: PropolisUuid, + ) -> Result { let (tx, rx) = oneshot::channel(); self.inner .tx .send(InstanceManagerRequest::EnsureUnregistered { - instance_id, + propolis_id, tx, }) .await @@ -188,14 +188,14 @@ impl InstanceManager { pub async fn ensure_state( &self, - instance_id: InstanceUuid, - target: InstanceStateRequested, - ) -> Result { + propolis_id: PropolisUuid, + target: VmmStateRequested, + ) -> Result { let (tx, rx) = oneshot::channel(); self.inner .tx .send(InstanceManagerRequest::EnsureState { - instance_id, + propolis_id, target, tx, }) @@ -206,31 +206,32 @@ impl InstanceManager { // these may involve a long-running zone creation, so avoid HTTP // request timeouts by decoupling the response // (see InstanceRunner::put_state) - InstanceStateRequested::MigrationTarget(_) - | InstanceStateRequested::Running => { + VmmStateRequested::MigrationTarget(_) + | VmmStateRequested::Running => { // We don't want the sending side of the channel to see an // error if we drop rx without awaiting it. // Since we don't care about the response here, we spawn rx // into a task which will await it for us in the background. tokio::spawn(rx); - Ok(InstancePutStateResponse { updated_runtime: None }) + Ok(VmmPutStateResponse { updated_runtime: None }) + } + VmmStateRequested::Stopped | VmmStateRequested::Reboot => { + rx.await? } - InstanceStateRequested::Stopped - | InstanceStateRequested::Reboot => rx.await?, } } - pub async fn instance_issue_disk_snapshot_request( + pub async fn issue_disk_snapshot_request( &self, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, disk_id: Uuid, snapshot_id: Uuid, ) -> Result<(), Error> { let (tx, rx) = oneshot::channel(); self.inner .tx - .send(InstanceManagerRequest::InstanceIssueDiskSnapshot { - instance_id, + .send(InstanceManagerRequest::IssueDiskSnapshot { + propolis_id, disk_id, snapshot_id, tx, @@ -259,14 +260,14 @@ impl InstanceManager { pub async fn add_external_ip( &self, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, ip: &InstanceExternalIpBody, ) -> Result<(), Error> { let (tx, rx) = oneshot::channel(); self.inner .tx - .send(InstanceManagerRequest::InstanceAddExternalIp { - instance_id, + .send(InstanceManagerRequest::AddExternalIp { + propolis_id, ip: *ip, tx, }) @@ -277,14 +278,14 @@ impl InstanceManager { pub async fn delete_external_ip( &self, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, ip: &InstanceExternalIpBody, ) -> Result<(), Error> { let (tx, rx) = oneshot::channel(); self.inner .tx - .send(InstanceManagerRequest::InstanceDeleteExternalIp { - instance_id, + .send(InstanceManagerRequest::DeleteExternalIp { + propolis_id, ip: *ip, tx, }) @@ -300,12 +301,12 @@ impl InstanceManager { pub async fn get_instance_state( &self, - instance_id: InstanceUuid, - ) -> Result { + propolis_id: PropolisUuid, + ) -> Result { let (tx, rx) = oneshot::channel(); self.inner .tx - .send(InstanceManagerRequest::GetState { instance_id, tx }) + .send(InstanceManagerRequest::GetState { propolis_id, tx }) .await .map_err(|_| Error::FailedSendInstanceManagerClosed)?; rx.await? @@ -351,20 +352,20 @@ enum InstanceManagerRequest { // reasonable choice... sled_identifiers: Box, metadata: InstanceMetadata, - tx: oneshot::Sender>, + tx: oneshot::Sender>, }, EnsureUnregistered { - instance_id: InstanceUuid, - tx: oneshot::Sender>, + propolis_id: PropolisUuid, + tx: oneshot::Sender>, }, EnsureState { - instance_id: InstanceUuid, - target: InstanceStateRequested, - tx: oneshot::Sender>, + propolis_id: PropolisUuid, + target: VmmStateRequested, + tx: oneshot::Sender>, }, - InstanceIssueDiskSnapshot { - instance_id: InstanceUuid, + IssueDiskSnapshot { + propolis_id: PropolisUuid, disk_id: Uuid, snapshot_id: Uuid, tx: oneshot::Sender>, @@ -373,19 +374,19 @@ enum InstanceManagerRequest { name: String, tx: oneshot::Sender>, }, - InstanceAddExternalIp { - instance_id: InstanceUuid, + AddExternalIp { + propolis_id: PropolisUuid, ip: InstanceExternalIpBody, tx: oneshot::Sender>, }, - InstanceDeleteExternalIp { - instance_id: InstanceUuid, + DeleteExternalIp { + propolis_id: PropolisUuid, ip: InstanceExternalIpBody, tx: oneshot::Sender>, }, GetState { - instance_id: InstanceUuid, - tx: oneshot::Sender>, + propolis_id: PropolisUuid, + tx: oneshot::Sender>, }, OnlyUseDisks { disks: AllDisks, @@ -396,7 +397,7 @@ enum InstanceManagerRequest { // Requests that the instance manager stop processing information about a // particular instance. struct InstanceDeregisterRequest { - id: InstanceUuid, + id: PropolisUuid, } struct InstanceManagerRunner { @@ -422,8 +423,8 @@ struct InstanceManagerRunner { // TODO: If we held an object representing an enum of "Created OR Running" // instance, we could avoid the methods within "instance.rs" that panic // if the Propolis client hasn't been initialized. - /// A mapping from a Sled Agent "Instance ID" to ("Propolis ID", [Instance]). - instances: BTreeMap, + /// A mapping from a Propolis ID to the [Instance] that Propolis incarnates. + jobs: BTreeMap, vnic_allocator: VnicAllocator, port_manager: PortManager, @@ -451,7 +452,7 @@ impl InstanceManagerRunner { request = self.terminate_rx.recv() => { match request { Some(request) => { - self.instances.remove(&request.id); + self.jobs.remove(&request.id); }, None => { warn!(self.log, "InstanceManager's 'instance terminate' channel closed; shutting down"); @@ -484,31 +485,31 @@ impl InstanceManagerRunner { metadata ).await).map_err(|_| Error::FailedSendClientClosed) }, - Some(EnsureUnregistered { instance_id, tx }) => { - self.ensure_unregistered(tx, instance_id).await + Some(EnsureUnregistered { propolis_id, tx }) => { + self.ensure_unregistered(tx, propolis_id).await }, - Some(EnsureState { instance_id, target, tx }) => { - self.ensure_state(tx, instance_id, target).await + Some(EnsureState { propolis_id, target, tx }) => { + self.ensure_state(tx, propolis_id, target).await }, - Some(InstanceIssueDiskSnapshot { instance_id, disk_id, snapshot_id, tx }) => { - self.instance_issue_disk_snapshot_request(tx, instance_id, disk_id, snapshot_id).await + Some(IssueDiskSnapshot { propolis_id, disk_id, snapshot_id, tx }) => { + self.issue_disk_snapshot_request(tx, propolis_id, disk_id, snapshot_id).await }, Some(CreateZoneBundle { name, tx }) => { self.create_zone_bundle(tx, &name).await.map_err(Error::from) }, - Some(InstanceAddExternalIp { instance_id, ip, tx }) => { - self.add_external_ip(tx, instance_id, &ip).await + Some(AddExternalIp { propolis_id, ip, tx }) => { + self.add_external_ip(tx, propolis_id, &ip).await }, - Some(InstanceDeleteExternalIp { instance_id, ip, tx }) => { - self.delete_external_ip(tx, instance_id, &ip).await + Some(DeleteExternalIp { propolis_id, ip, tx }) => { + self.delete_external_ip(tx, propolis_id, &ip).await }, - Some(GetState { instance_id, tx }) => { + Some(GetState { propolis_id, tx }) => { // TODO(eliza): it could potentially be nice to // refactor this to use `tokio::sync::watch`, rather // than having to force `GetState` requests to // serialize with the requests that actually update // the state... - self.get_instance_state(tx, instance_id).await + self.get_instance_state(tx, propolis_id).await }, Some(OnlyUseDisks { disks, tx } ) => { self.use_only_these_disks(disks).await; @@ -533,8 +534,8 @@ impl InstanceManagerRunner { } } - fn get_instance(&self, instance_id: InstanceUuid) -> Option<&Instance> { - self.instances.get(&instance_id).map(|(_id, v)| v) + fn get_propolis(&self, propolis_id: PropolisUuid) -> Option<&Instance> { + self.jobs.get(&propolis_id) } /// Ensures that the instance manager contains a registered instance with @@ -565,7 +566,7 @@ impl InstanceManagerRunner { propolis_addr: SocketAddr, sled_identifiers: SledIdentifiers, metadata: InstanceMetadata, - ) -> Result { + ) -> Result { info!( &self.log, "ensuring instance is registered"; @@ -579,17 +580,16 @@ impl InstanceManagerRunner { ); let instance = { - if let Some((existing_propolis_id, existing_instance)) = - self.instances.get(&instance_id) - { - if propolis_id != *existing_propolis_id { + if let Some(existing_instance) = self.jobs.get(&propolis_id) { + if instance_id != existing_instance.id() { info!(&self.log, - "instance already registered with another Propolis ID"; - "instance_id" => %instance_id, - "existing_propolis_id" => %*existing_propolis_id); + "Propolis ID already used by another instance"; + "propolis_id" => %propolis_id, + "existing_instanceId" => %existing_instance.id()); + return Err(Error::Instance( - crate::instance::Error::InstanceAlreadyRegistered( - *existing_propolis_id, + crate::instance::Error::PropolisAlreadyRegistered( + propolis_id, ), )); } else { @@ -602,11 +602,16 @@ impl InstanceManagerRunner { } else { info!(&self.log, "registering new instance"; - "instance_id" => ?instance_id); - let instance_log = - self.log.new(o!("instance_id" => format!("{instance_id}"))); + "instance_id" => %instance_id, + "propolis_id" => %propolis_id); + + let instance_log = self.log.new(o!( + "instance_id" => instance_id.to_string(), + "propolis_id" => propolis_id.to_string(), + )); + let ticket = - InstanceTicket::new(instance_id, self.terminate_tx.clone()); + InstanceTicket::new(propolis_id, self.terminate_tx.clone()); let services = InstanceManagerServices { nexus_client: self.nexus_client.clone(), @@ -635,27 +640,26 @@ impl InstanceManagerRunner { sled_identifiers, metadata, )?; - let _old = - self.instances.insert(instance_id, (propolis_id, instance)); + let _old = self.jobs.insert(propolis_id, instance); assert!(_old.is_none()); - &self.instances.get(&instance_id).unwrap().1 + &self.jobs.get(&propolis_id).unwrap() } }; Ok(instance.current_state().await?) } - /// Idempotently ensures the instance is not registered with this instance - /// manager. If the instance exists and has a running Propolis, that - /// Propolis is rudely terminated. + /// Idempotently ensures this VM is not registered with this instance + /// manager. If this Propolis job is registered and has a running zone, the + /// zone is rudely terminated. async fn ensure_unregistered( &mut self, - tx: oneshot::Sender>, - instance_id: InstanceUuid, + tx: oneshot::Sender>, + propolis_id: PropolisUuid, ) -> Result<(), Error> { // If the instance does not exist, we response immediately. - let Some(instance) = self.get_instance(instance_id) else { - tx.send(Ok(InstanceUnregisterResponse { updated_runtime: None })) + let Some(instance) = self.get_propolis(propolis_id) else { + tx.send(Ok(VmmUnregisterResponse { updated_runtime: None })) .map_err(|_| Error::FailedSendClientClosed)?; return Ok(()); }; @@ -667,15 +671,15 @@ impl InstanceManagerRunner { Ok(()) } - /// Idempotently attempts to drive the supplied instance into the supplied + /// Idempotently attempts to drive the supplied Propolis into the supplied /// runtime state. async fn ensure_state( &mut self, - tx: oneshot::Sender>, - instance_id: InstanceUuid, - target: InstanceStateRequested, + tx: oneshot::Sender>, + propolis_id: PropolisUuid, + target: VmmStateRequested, ) -> Result<(), Error> { - let Some(instance) = self.get_instance(instance_id) else { + let Some(instance) = self.get_propolis(propolis_id) else { match target { // If the instance isn't registered, then by definition it // isn't running here. Allow requests to stop or destroy the @@ -685,14 +689,12 @@ impl InstanceManagerRunner { // Propolis handled it, sled agent unregistered the // instance, and only then did a second stop request // arrive. - InstanceStateRequested::Stopped => { - tx.send(Ok(InstancePutStateResponse { - updated_runtime: None, - })) - .map_err(|_| Error::FailedSendClientClosed)?; + VmmStateRequested::Stopped => { + tx.send(Ok(VmmPutStateResponse { updated_runtime: None })) + .map_err(|_| Error::FailedSendClientClosed)?; } _ => { - tx.send(Err(Error::NoSuchInstance(instance_id))) + tx.send(Err(Error::NoSuchVmm(propolis_id))) .map_err(|_| Error::FailedSendClientClosed)?; } } @@ -702,20 +704,15 @@ impl InstanceManagerRunner { Ok(()) } - async fn instance_issue_disk_snapshot_request( + async fn issue_disk_snapshot_request( &self, tx: oneshot::Sender>, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, disk_id: Uuid, snapshot_id: Uuid, ) -> Result<(), Error> { - let instance = { - let (_, instance) = self - .instances - .get(&instance_id) - .ok_or(Error::NoSuchInstance(instance_id))?; - instance - }; + let instance = + self.jobs.get(&propolis_id).ok_or(Error::NoSuchVmm(propolis_id))?; instance .issue_snapshot_request(tx, disk_id, snapshot_id) @@ -729,11 +726,19 @@ impl InstanceManagerRunner { tx: oneshot::Sender>, name: &str, ) -> Result<(), BundleError> { - let Some((_propolis_id, instance)) = - self.instances.values().find(|(propolis_id, _instance)| { - name == propolis_zone_name(propolis_id) - }) - else { + // A well-formed Propolis zone name must consist of + // `PROPOLIS_ZONE_PREFIX` and the Propolis ID. If the prefix is not + // present or the Propolis ID portion of the supplied zone name isn't + // parseable as a UUID, there is no Propolis zone with the specified + // name to capture into a bundle, so return a `NoSuchZone` error. + let vmm_id: PropolisUuid = name + .strip_prefix(PROPOLIS_ZONE_PREFIX) + .and_then(|uuid_str| uuid_str.parse::().ok()) + .ok_or_else(|| BundleError::NoSuchZone { + name: name.to_string(), + })?; + + let Some(instance) = self.jobs.get(&vmm_id) else { return Err(BundleError::NoSuchZone { name: name.to_string() }); }; instance.request_zone_bundle(tx).await @@ -742,11 +747,11 @@ impl InstanceManagerRunner { async fn add_external_ip( &self, tx: oneshot::Sender>, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, ip: &InstanceExternalIpBody, ) -> Result<(), Error> { - let Some(instance) = self.get_instance(instance_id) else { - return Err(Error::NoSuchInstance(instance_id)); + let Some(instance) = self.get_propolis(propolis_id) else { + return Err(Error::NoSuchVmm(propolis_id)); }; instance.add_external_ip(tx, ip).await?; Ok(()) @@ -755,11 +760,11 @@ impl InstanceManagerRunner { async fn delete_external_ip( &self, tx: oneshot::Sender>, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, ip: &InstanceExternalIpBody, ) -> Result<(), Error> { - let Some(instance) = self.get_instance(instance_id) else { - return Err(Error::NoSuchInstance(instance_id)); + let Some(instance) = self.get_propolis(propolis_id) else { + return Err(Error::NoSuchVmm(propolis_id)); }; instance.delete_external_ip(tx, ip).await?; @@ -768,12 +773,12 @@ impl InstanceManagerRunner { async fn get_instance_state( &self, - tx: oneshot::Sender>, - instance_id: InstanceUuid, + tx: oneshot::Sender>, + propolis_id: PropolisUuid, ) -> Result<(), Error> { - let Some(instance) = self.get_instance(instance_id) else { + let Some(instance) = self.get_propolis(propolis_id) else { return tx - .send(Err(Error::NoSuchInstance(instance_id))) + .send(Err(Error::NoSuchVmm(propolis_id))) .map_err(|_| Error::FailedSendClientClosed); }; @@ -801,7 +806,7 @@ impl InstanceManagerRunner { let u2_set: HashSet<_> = disks.all_u2_zpools().into_iter().collect(); let mut to_remove = vec![]; - for (id, (_, instance)) in self.instances.iter() { + for (id, instance) in self.jobs.iter() { // If we can read the filesystem pool, consider it. Otherwise, move // on, to prevent blocking the cleanup of other instances. let Ok(Some(filesystem_pool)) = @@ -817,7 +822,7 @@ impl InstanceManagerRunner { for id in to_remove { info!(self.log, "use_only_these_disks: Removing instance"; "instance_id" => ?id); - if let Some((_, instance)) = self.instances.remove(&id) { + if let Some(instance) = self.jobs.remove(&id) { let (tx, rx) = oneshot::channel(); let mark_failed = true; if let Err(e) = instance.terminate(tx, mark_failed).await { @@ -835,22 +840,22 @@ impl InstanceManagerRunner { /// Represents membership of an instance in the [`InstanceManager`]. pub struct InstanceTicket { - id: InstanceUuid, + id: PropolisUuid, terminate_tx: Option>, } impl InstanceTicket { - // Creates a new instance ticket for instance "id" to be removed - // from the manger on destruction. + // Creates a new instance ticket for the Propolis job with the supplied `id` + // to be removed from the manager on destruction. fn new( - id: InstanceUuid, + id: PropolisUuid, terminate_tx: mpsc::UnboundedSender, ) -> Self { InstanceTicket { id, terminate_tx: Some(terminate_tx) } } #[cfg(all(test, target_os = "illumos"))] - pub(crate) fn new_without_manager_for_test(id: InstanceUuid) -> Self { + pub(crate) fn new_without_manager_for_test(id: PropolisUuid) -> Self { Self { id, terminate_tx: None } } diff --git a/sled-agent/src/sim/collection.rs b/sled-agent/src/sim/collection.rs index 6057d03f70..d75081f1e4 100644 --- a/sled-agent/src/sim/collection.rs +++ b/sled-agent/src/sim/collection.rs @@ -364,35 +364,6 @@ impl SimCollection { pub async fn contains_key(self: &Arc, id: &Uuid) -> bool { self.objects.lock().await.contains_key(id) } - - /// Iterates over all of the existing objects in the collection and, for any - /// that meet `condition`, asks to transition them into the supplied target - /// state. - /// - /// If any such transition fails, this routine short-circuits and does not - /// attempt to transition any other objects. - // - // TODO: It's likely more idiomatic to have an `iter_mut` routine that - // returns a struct that impls Iterator and yields &mut S references. The - // tricky bit is that the struct must hold the objects lock during the - // iteration. Figure out if there's a better way to arrange all this. - pub async fn sim_ensure_for_each_where( - self: &Arc, - condition: C, - target: &S::RequestedState, - ) -> Result<(), Error> - where - C: Fn(&S) -> bool, - { - let mut objects = self.objects.lock().await; - for o in objects.values_mut() { - if condition(&o.object) { - o.transition(target.clone())?; - } - } - - Ok(()) - } } impl SimCollection { @@ -421,30 +392,24 @@ mod test { use omicron_common::api::external::Error; use omicron_common::api::external::Generation; use omicron_common::api::internal::nexus::DiskRuntimeState; - use omicron_common::api::internal::nexus::SledInstanceState; + use omicron_common::api::internal::nexus::SledVmmState; use omicron_common::api::internal::nexus::VmmRuntimeState; use omicron_common::api::internal::nexus::VmmState; use omicron_test_utils::dev::test_setup_log; - use omicron_uuid_kinds::PropolisUuid; use sled_agent_types::disk::DiskStateRequested; - use sled_agent_types::instance::InstanceStateRequested; + use sled_agent_types::instance::VmmStateRequested; fn make_instance( logctx: &LogContext, ) -> (SimObject, Receiver<()>) { - let propolis_id = PropolisUuid::new_v4(); let vmm_state = VmmRuntimeState { state: VmmState::Starting, gen: Generation::new(), time_updated: Utc::now(), }; - let state = SledInstanceState { - vmm_state, - propolis_id, - migration_in: None, - migration_out: None, - }; + let state = + SledVmmState { vmm_state, migration_in: None, migration_out: None }; SimObject::new_simulated_auto(&state, logctx.log.new(o!())) } @@ -488,8 +453,7 @@ mod test { // Stopping an instance that was never started synchronously destroys // its VMM. let rprev = r1; - let dropped = - instance.transition(InstanceStateRequested::Stopped).unwrap(); + let dropped = instance.transition(VmmStateRequested::Stopped).unwrap(); assert!(dropped.is_none()); assert!(instance.object.desired().is_none()); let rnext = instance.object.current(); @@ -529,8 +493,7 @@ mod test { // simulated instance's state, but it does queue up a transition. let mut rprev = r1; assert!(rx.try_next().is_err()); - let dropped = - instance.transition(InstanceStateRequested::Running).unwrap(); + let dropped = instance.transition(VmmStateRequested::Running).unwrap(); assert!(dropped.is_none()); assert!(instance.object.desired().is_some()); assert!(rx.try_next().is_err()); @@ -562,8 +525,7 @@ mod test { // If we transition again to "Running", the process should complete // immediately. - let dropped = - instance.transition(InstanceStateRequested::Running).unwrap(); + let dropped = instance.transition(VmmStateRequested::Running).unwrap(); assert!(dropped.is_none()); assert!(instance.object.desired().is_none()); assert!(rx.try_next().is_err()); @@ -576,8 +538,7 @@ mod test { // If we go back to any stopped state, we go through the async process // again. assert!(rx.try_next().is_err()); - let dropped = - instance.transition(InstanceStateRequested::Stopped).unwrap(); + let dropped = instance.transition(VmmStateRequested::Stopped).unwrap(); assert!(dropped.is_none()); assert!(instance.object.desired().is_some()); let rnext = instance.object.current(); @@ -634,7 +595,7 @@ mod test { assert_eq!(r1.vmm_state.state, VmmState::Starting); assert_eq!(r1.vmm_state.gen, Generation::new()); assert!(instance - .transition(InstanceStateRequested::Running) + .transition(VmmStateRequested::Running) .unwrap() .is_none()); instance.transition_finish(); @@ -650,7 +611,7 @@ mod test { // Now reboot the instance. This is dispatched to Propolis, which will // move to the Rebooting state and then back to Running. assert!(instance - .transition(InstanceStateRequested::Reboot) + .transition(VmmStateRequested::Reboot) .unwrap() .is_none()); let (rprev, rnext) = (rnext, instance.object.current()); diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index e93bebad98..aead47658f 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -23,7 +23,7 @@ use dropshot::TypedBody; use nexus_sled_agent_shared::inventory::SledRole; use nexus_sled_agent_shared::inventory::{Inventory, OmicronZonesConfig}; use omicron_common::api::internal::nexus::DiskRuntimeState; -use omicron_common::api::internal::nexus::SledInstanceState; +use omicron_common::api::internal::nexus::SledVmmState; use omicron_common::api::internal::nexus::UpdateArtifactId; use omicron_common::api::internal::shared::SledIdentifiers; use omicron_common::api::internal::shared::VirtualNetworkInterfaceHost; @@ -32,7 +32,6 @@ use omicron_common::api::internal::shared::{ }; use omicron_common::disk::DisksManagementResult; use omicron_common::disk::OmicronPhysicalDisksConfig; -use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; use sled_agent_api::*; use sled_agent_types::boot_disk::BootDiskOsWriteStatus; use sled_agent_types::boot_disk::BootDiskPathParams; @@ -44,9 +43,9 @@ use sled_agent_types::early_networking::EarlyNetworkConfig; use sled_agent_types::firewall_rules::VpcFirewallRulesEnsureBody; use sled_agent_types::instance::InstanceEnsureBody; use sled_agent_types::instance::InstanceExternalIpBody; -use sled_agent_types::instance::InstancePutStateBody; -use sled_agent_types::instance::InstancePutStateResponse; -use sled_agent_types::instance::InstanceUnregisterResponse; +use sled_agent_types::instance::VmmPutStateBody; +use sled_agent_types::instance::VmmPutStateResponse; +use sled_agent_types::instance::VmmUnregisterResponse; use sled_agent_types::sled::AddSledRequest; use sled_agent_types::time_sync::TimeSync; use sled_agent_types::zone_bundle::BundleUtilization; @@ -83,18 +82,18 @@ enum SledAgentSimImpl {} impl SledAgentApi for SledAgentSimImpl { type Context = Arc; - async fn instance_register( + async fn vmm_register( rqctx: RequestContext, - path_params: Path, + path_params: Path, body: TypedBody, - ) -> Result, HttpError> { + ) -> Result, HttpError> { let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; + let propolis_id = path_params.into_inner().propolis_id; let body_args = body.into_inner(); Ok(HttpResponseOk( sa.instance_register( - instance_id, - body_args.propolis_id, + body_args.instance_id, + propolis_id, body_args.hardware, body_args.instance_runtime, body_args.vmm_runtime, @@ -104,58 +103,56 @@ impl SledAgentApi for SledAgentSimImpl { )) } - async fn instance_unregister( + async fn vmm_unregister( rqctx: RequestContext, - path_params: Path, - ) -> Result, HttpError> { + path_params: Path, + ) -> Result, HttpError> { let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - Ok(HttpResponseOk(sa.instance_unregister(instance_id).await?)) + let id = path_params.into_inner().propolis_id; + Ok(HttpResponseOk(sa.instance_unregister(id).await?)) } - async fn instance_put_state( + async fn vmm_put_state( rqctx: RequestContext, - path_params: Path, - body: TypedBody, - ) -> Result, HttpError> { + path_params: Path, + body: TypedBody, + ) -> Result, HttpError> { let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; + let id = path_params.into_inner().propolis_id; let body_args = body.into_inner(); - Ok(HttpResponseOk( - sa.instance_ensure_state(instance_id, body_args.state).await?, - )) + Ok(HttpResponseOk(sa.instance_ensure_state(id, body_args.state).await?)) } - async fn instance_get_state( + async fn vmm_get_state( rqctx: RequestContext, - path_params: Path, - ) -> Result, HttpError> { + path_params: Path, + ) -> Result, HttpError> { let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - Ok(HttpResponseOk(sa.instance_get_state(instance_id).await?)) + let id = path_params.into_inner().propolis_id; + Ok(HttpResponseOk(sa.instance_get_state(id).await?)) } - async fn instance_put_external_ip( + async fn vmm_put_external_ip( rqctx: RequestContext, - path_params: Path, + path_params: Path, body: TypedBody, ) -> Result { let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; + let id = path_params.into_inner().propolis_id; let body_args = body.into_inner(); - sa.instance_put_external_ip(instance_id, &body_args).await?; + sa.instance_put_external_ip(id, &body_args).await?; Ok(HttpResponseUpdatedNoContent()) } - async fn instance_delete_external_ip( + async fn vmm_delete_external_ip( rqctx: RequestContext, - path_params: Path, + path_params: Path, body: TypedBody, ) -> Result { let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; + let id = path_params.into_inner().propolis_id; let body_args = body.into_inner(); - sa.instance_delete_external_ip(instance_id, &body_args).await?; + sa.instance_delete_external_ip(id, &body_args).await?; Ok(HttpResponseUpdatedNoContent()) } @@ -192,27 +189,25 @@ impl SledAgentApi for SledAgentSimImpl { Ok(HttpResponseUpdatedNoContent()) } - async fn instance_issue_disk_snapshot_request( + async fn vmm_issue_disk_snapshot_request( rqctx: RequestContext, - path_params: Path, - body: TypedBody, - ) -> Result< - HttpResponseOk, - HttpError, - > { + path_params: Path, + body: TypedBody, + ) -> Result, HttpError> + { let sa = rqctx.context(); let path_params = path_params.into_inner(); let body = body.into_inner(); sa.instance_issue_disk_snapshot_request( - InstanceUuid::from_untyped_uuid(path_params.instance_id), + path_params.propolis_id, path_params.disk_id, body.snapshot_id, ) .await .map_err(|e| HttpError::for_internal_error(e.to_string()))?; - Ok(HttpResponseOk(InstanceIssueDiskSnapshotRequestResponse { + Ok(HttpResponseOk(VmmIssueDiskSnapshotRequestResponse { snapshot_id: body.snapshot_id, })) } @@ -512,45 +507,44 @@ fn method_unimplemented() -> Result { #[endpoint { method = POST, - path = "/instances/{instance_id}/poke", + path = "/vmms/{propolis_id}/poke", }] async fn instance_poke_post( rqctx: RequestContext>, - path_params: Path, + path_params: Path, ) -> Result { let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - sa.instance_poke(instance_id, PokeMode::Drain).await; + let id = path_params.into_inner().propolis_id; + sa.vmm_poke(id, PokeMode::Drain).await; Ok(HttpResponseUpdatedNoContent()) } #[endpoint { method = POST, - path = "/instances/{instance_id}/poke-single-step", + path = "/vmms/{propolis_id}/poke-single-step", }] async fn instance_poke_single_step_post( rqctx: RequestContext>, - path_params: Path, + path_params: Path, ) -> Result { let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - sa.instance_poke(instance_id, PokeMode::SingleStep).await; + let id = path_params.into_inner().propolis_id; + sa.vmm_poke(id, PokeMode::SingleStep).await; Ok(HttpResponseUpdatedNoContent()) } #[endpoint { method = POST, - path = "/instances/{instance_id}/sim-migration-source", + path = "/vmms/{propolis_id}/sim-migration-source", }] async fn instance_post_sim_migration_source( rqctx: RequestContext>, - path_params: Path, + path_params: Path, body: TypedBody, ) -> Result { let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - sa.instance_simulate_migration_source(instance_id, body.into_inner()) - .await?; + let id = path_params.into_inner().propolis_id; + sa.instance_simulate_migration_source(id, body.into_inner()).await?; Ok(HttpResponseUpdatedNoContent()) } diff --git a/sled-agent/src/sim/instance.rs b/sled-agent/src/sim/instance.rs index 33bc1c40c1..eb7ea0ca79 100644 --- a/sled-agent/src/sim/instance.rs +++ b/sled-agent/src/sim/instance.rs @@ -14,13 +14,14 @@ use nexus_client; use omicron_common::api::external::Error; use omicron_common::api::external::Generation; use omicron_common::api::external::ResourceType; -use omicron_common::api::internal::nexus::{SledInstanceState, VmmState}; +use omicron_common::api::internal::nexus::{SledVmmState, VmmState}; +use omicron_uuid_kinds::{GenericUuid, PropolisUuid}; use propolis_client::types::{ InstanceMigrateStatusResponse as PropolisMigrateResponse, InstanceMigrationStatus as PropolisMigrationStatus, InstanceState as PropolisInstanceState, InstanceStateMonitorResponse, }; -use sled_agent_types::instance::InstanceStateRequested; +use sled_agent_types::instance::VmmStateRequested; use std::collections::VecDeque; use std::sync::Arc; use std::sync::Mutex; @@ -170,13 +171,13 @@ impl SimInstanceInner { /// returning an action for the caller to simulate. fn request_transition( &mut self, - target: &InstanceStateRequested, + target: &VmmStateRequested, ) -> Result, Error> { match target { // When Nexus intends to migrate into a VMM, it should create that // VMM in the Migrating state and shouldn't request anything else // from it before asking to migrate in. - InstanceStateRequested::MigrationTarget(_) => { + VmmStateRequested::MigrationTarget(_) => { if !self.queue.is_empty() { return Err(Error::invalid_request(&format!( "can't request migration in with a non-empty state @@ -207,7 +208,7 @@ impl SimInstanceInner { SimulatedMigrationResult::Success, ); } - InstanceStateRequested::Running => { + VmmStateRequested::Running => { match self.next_resting_state() { VmmState::Starting => { self.queue_propolis_state( @@ -234,7 +235,7 @@ impl SimInstanceInner { } } } - InstanceStateRequested::Stopped => { + VmmStateRequested::Stopped => { match self.next_resting_state() { VmmState::Starting => { let mark_failed = false; @@ -256,7 +257,7 @@ impl SimInstanceInner { } } } - InstanceStateRequested::Reboot => match self.next_resting_state() { + VmmStateRequested::Reboot => match self.next_resting_state() { VmmState::Running => { // Further requests to reboot are ignored if the instance // is currently rebooting or about to reboot. @@ -315,7 +316,7 @@ impl SimInstanceInner { /// If the state change queue contains at least once instance state change, /// returns the requested instance state associated with the last instance /// state on the queue. Returns None otherwise. - fn desired(&self) -> Option { + fn desired(&self) -> Option { self.last_queued_instance_state().map(|terminal| match terminal { // State change requests may queue these states as intermediate // states, but the simulation (and the tests that rely on it) is @@ -331,13 +332,11 @@ impl SimInstanceInner { "pending resting state {:?} doesn't map to a requested state", terminal ), - PropolisInstanceState::Running => InstanceStateRequested::Running, + PropolisInstanceState::Running => VmmStateRequested::Running, PropolisInstanceState::Stopping | PropolisInstanceState::Stopped - | PropolisInstanceState::Destroyed => { - InstanceStateRequested::Stopped - } - PropolisInstanceState::Rebooting => InstanceStateRequested::Reboot, + | PropolisInstanceState::Destroyed => VmmStateRequested::Stopped, + PropolisInstanceState::Rebooting => VmmStateRequested::Reboot, }) } @@ -388,7 +387,7 @@ impl SimInstanceInner { /// Simulates rude termination by moving the instance to the Destroyed state /// immediately and clearing the queue of pending state transitions. - fn terminate(&mut self) -> SledInstanceState { + fn terminate(&mut self) -> SledVmmState { let mark_failed = false; self.state.terminate_rudely(mark_failed); self.queue.clear(); @@ -418,7 +417,7 @@ pub struct SimInstance { } impl SimInstance { - pub fn terminate(&self) -> SledInstanceState { + pub fn terminate(&self) -> SledVmmState { self.inner.lock().unwrap().terminate() } @@ -435,12 +434,12 @@ impl SimInstance { #[async_trait] impl Simulatable for SimInstance { - type CurrentState = SledInstanceState; - type RequestedState = InstanceStateRequested; + type CurrentState = SledVmmState; + type RequestedState = VmmStateRequested; type ProducerArgs = (); type Action = InstanceAction; - fn new(current: SledInstanceState) -> Self { + fn new(current: SledVmmState) -> Self { assert!(matches!( current.vmm_state.state, VmmState::Starting | VmmState::Migrating), @@ -453,7 +452,6 @@ impl Simulatable for SimInstance { inner: Arc::new(Mutex::new(SimInstanceInner { state: InstanceStates::new( current.vmm_state, - current.propolis_id, current.migration_in.map(|m| m.migration_id), ), last_response: InstanceStateMonitorResponse { @@ -480,7 +478,7 @@ impl Simulatable for SimInstance { fn request_transition( &mut self, - target: &InstanceStateRequested, + target: &VmmStateRequested, ) -> Result, Error> { self.inner.lock().unwrap().request_transition(target) } @@ -512,8 +510,8 @@ impl Simulatable for SimInstance { ) -> Result<(), Error> { nexus_client .cpapi_instances_put( - id, - &nexus_client::types::SledInstanceState::from(current), + &PropolisUuid::from_untyped_uuid(*id), + &nexus_client::types::SledVmmState::from(current), ) .await .map(|_| ()) diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 10536c8c80..7292b3dee1 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -24,7 +24,7 @@ use omicron_common::api::external::{ ByteCount, DiskState, Error, Generation, ResourceType, }; use omicron_common::api::internal::nexus::{ - DiskRuntimeState, MigrationRuntimeState, MigrationState, SledInstanceState, + DiskRuntimeState, MigrationRuntimeState, MigrationState, SledVmmState, }; use omicron_common::api::internal::nexus::{ InstanceRuntimeState, VmmRuntimeState, @@ -50,8 +50,7 @@ use sled_agent_types::early_networking::{ }; use sled_agent_types::instance::{ InstanceExternalIpBody, InstanceHardware, InstanceMetadata, - InstancePutStateResponse, InstanceStateRequested, - InstanceUnregisterResponse, + VmmPutStateResponse, VmmStateRequested, VmmUnregisterResponse, }; use slog::Logger; use std::collections::{HashMap, HashSet, VecDeque}; @@ -71,8 +70,8 @@ use uuid::Uuid; pub struct SledAgent { pub id: Uuid, pub ip: IpAddr, - /// collection of simulated instances, indexed by instance uuid - instances: Arc>, + /// collection of simulated VMMs, indexed by Propolis uuid + vmms: Arc>, /// collection of simulated disks, indexed by disk uuid disks: Arc>, storage: Mutex, @@ -84,7 +83,8 @@ pub struct SledAgent { mock_propolis: Mutex>, PropolisClient)>>, /// lists of external IPs assigned to instances - pub external_ips: Mutex>>, + pub external_ips: + Mutex>>, pub vpc_routes: Mutex>, config: Config, fake_zones: Mutex, @@ -170,7 +170,7 @@ impl SledAgent { Arc::new(SledAgent { id, ip: config.dropshot.bind_address.ip(), - instances: Arc::new(SimCollection::new( + vmms: Arc::new(SimCollection::new( Arc::clone(&nexus_client), instance_log, sim_mode, @@ -269,7 +269,7 @@ impl SledAgent { instance_runtime: InstanceRuntimeState, vmm_runtime: VmmRuntimeState, metadata: InstanceMetadata, - ) -> Result { + ) -> Result { // respond with a fake 500 level failure if asked to ensure an instance // with more than 16 CPUs. let ncpus: i64 = (&hardware.properties.ncpus).into(); @@ -317,11 +317,7 @@ impl SledAgent { // point to the correct address. let mock_lock = self.mock_propolis.lock().await; if let Some((_srv, client)) = mock_lock.as_ref() { - if !self - .instances - .contains_key(&instance_id.into_untyped_uuid()) - .await - { + if !self.vmms.contains_key(&instance_id.into_untyped_uuid()).await { let metadata = propolis_client::types::InstanceMetadata { project_id: metadata.project_id, silo_id: metadata.silo_id, @@ -379,12 +375,11 @@ impl SledAgent { }); let instance_run_time_state = self - .instances + .vmms .sim_ensure( - &instance_id.into_untyped_uuid(), - SledInstanceState { + &propolis_id.into_untyped_uuid(), + SledVmmState { vmm_state: vmm_runtime, - propolis_id, migration_in, migration_out: None, }, @@ -417,56 +412,53 @@ impl SledAgent { /// not notified. pub async fn instance_unregister( self: &Arc, - instance_id: InstanceUuid, - ) -> Result { + propolis_id: PropolisUuid, + ) -> Result { let instance = match self - .instances - .sim_get_cloned_object(&instance_id.into_untyped_uuid()) + .vmms + .sim_get_cloned_object(&propolis_id.into_untyped_uuid()) .await { Ok(instance) => instance, Err(Error::ObjectNotFound { .. }) => { - return Ok(InstanceUnregisterResponse { updated_runtime: None }) + return Ok(VmmUnregisterResponse { updated_runtime: None }) } Err(e) => return Err(e), }; - self.detach_disks_from_instance(instance_id).await?; - let response = InstanceUnregisterResponse { + let response = VmmUnregisterResponse { updated_runtime: Some(instance.terminate()), }; - self.instances.sim_force_remove(instance_id.into_untyped_uuid()).await; + self.vmms.sim_force_remove(propolis_id.into_untyped_uuid()).await; Ok(response) } /// Asks the supplied instance to transition to the requested state. pub async fn instance_ensure_state( self: &Arc, - instance_id: InstanceUuid, - state: InstanceStateRequested, - ) -> Result { + propolis_id: PropolisUuid, + state: VmmStateRequested, + ) -> Result { if let Some(e) = self.instance_ensure_state_error.lock().await.as_ref() { return Err(e.clone()); } let current = match self - .instances - .sim_get_cloned_object(&instance_id.into_untyped_uuid()) + .vmms + .sim_get_cloned_object(&propolis_id.into_untyped_uuid()) .await { Ok(i) => i.current().clone(), Err(_) => match state { - InstanceStateRequested::Stopped => { - return Ok(InstancePutStateResponse { - updated_runtime: None, - }); + VmmStateRequested::Stopped => { + return Ok(VmmPutStateResponse { updated_runtime: None }); } _ => { return Err(Error::invalid_request(&format!( - "instance {} not registered on sled", - instance_id, + "Propolis {} not registered on sled", + propolis_id, ))); } }, @@ -475,43 +467,41 @@ impl SledAgent { let mock_lock = self.mock_propolis.lock().await; if let Some((_srv, client)) = mock_lock.as_ref() { let body = match state { - InstanceStateRequested::MigrationTarget(_) => { + VmmStateRequested::MigrationTarget(_) => { return Err(Error::internal_error( "migration not implemented for mock Propolis", )); } - InstanceStateRequested::Running => { - let instances = self.instances.clone(); + VmmStateRequested::Running => { + let vmms = self.vmms.clone(); let log = self.log.new( o!("component" => "SledAgent-insure_instance_state"), ); tokio::spawn(async move { tokio::time::sleep(Duration::from_secs(10)).await; - match instances + match vmms .sim_ensure( - &instance_id.into_untyped_uuid(), + &propolis_id.into_untyped_uuid(), current, Some(state), ) .await { Ok(state) => { - let instance_state: nexus_client::types::SledInstanceState = state.into(); - info!(log, "sim_ensure success"; "instance_state" => #?instance_state); + let vmm_state: nexus_client::types::SledVmmState = state.into(); + info!(log, "sim_ensure success"; "vmm_state" => #?vmm_state); } Err(instance_put_error) => { error!(log, "sim_ensure failure"; "error" => #?instance_put_error); } } }); - return Ok(InstancePutStateResponse { - updated_runtime: None, - }); + return Ok(VmmPutStateResponse { updated_runtime: None }); } - InstanceStateRequested::Stopped => { + VmmStateRequested::Stopped => { propolis_client::types::InstanceStateRequested::Stop } - InstanceStateRequested::Reboot => { + VmmStateRequested::Reboot => { propolis_client::types::InstanceStateRequested::Reboot } }; @@ -521,30 +511,24 @@ impl SledAgent { } let new_state = self - .instances - .sim_ensure(&instance_id.into_untyped_uuid(), current, Some(state)) + .vmms + .sim_ensure(&propolis_id.into_untyped_uuid(), current, Some(state)) .await?; - // If this request will shut down the simulated instance, look for any - // disks that are attached to it and drive them to the Detached state. - if matches!(state, InstanceStateRequested::Stopped) { - self.detach_disks_from_instance(instance_id).await?; - } - - Ok(InstancePutStateResponse { updated_runtime: Some(new_state) }) + Ok(VmmPutStateResponse { updated_runtime: Some(new_state) }) } pub async fn instance_get_state( &self, - instance_id: InstanceUuid, - ) -> Result { + propolis_id: PropolisUuid, + ) -> Result { let instance = self - .instances - .sim_get_cloned_object(&instance_id.into_untyped_uuid()) + .vmms + .sim_get_cloned_object(&propolis_id.into_untyped_uuid()) .await .map_err(|_| { crate::sled_agent::Error::Instance( - crate::instance_manager::Error::NoSuchInstance(instance_id), + crate::instance_manager::Error::NoSuchVmm(propolis_id), ) })?; Ok(instance.current()) @@ -552,16 +536,16 @@ impl SledAgent { pub async fn instance_simulate_migration_source( &self, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, migration: instance::SimulateMigrationSource, ) -> Result<(), HttpError> { let instance = self - .instances - .sim_get_cloned_object(&instance_id.into_untyped_uuid()) + .vmms + .sim_get_cloned_object(&propolis_id.into_untyped_uuid()) .await .map_err(|_| { crate::sled_agent::Error::Instance( - crate::instance_manager::Error::NoSuchInstance(instance_id), + crate::instance_manager::Error::NoSuchVmm(propolis_id), ) })?; instance.set_simulated_migration_source(migration); @@ -572,25 +556,6 @@ impl SledAgent { *self.instance_ensure_state_error.lock().await = error; } - async fn detach_disks_from_instance( - &self, - instance_id: InstanceUuid, - ) -> Result<(), Error> { - self.disks - .sim_ensure_for_each_where( - |disk| match disk.current().disk_state { - DiskState::Attached(id) | DiskState::Attaching(id) => { - id == instance_id.into_untyped_uuid() - } - _ => false, - }, - &DiskStateRequested::Detached, - ) - .await?; - - Ok(()) - } - /// Idempotently ensures that the given API Disk (described by `api_disk`) /// is attached (or not) as specified. This simulates disk attach and /// detach, similar to instance boot and halt. @@ -607,16 +572,16 @@ impl SledAgent { &self.updates } - pub async fn instance_count(&self) -> usize { - self.instances.size().await + pub async fn vmm_count(&self) -> usize { + self.vmms.size().await } pub async fn disk_count(&self) -> usize { self.disks.size().await } - pub async fn instance_poke(&self, id: InstanceUuid, mode: PokeMode) { - self.instances.sim_poke(id.into_untyped_uuid(), mode).await; + pub async fn vmm_poke(&self, id: PropolisUuid, mode: PokeMode) { + self.vmms.sim_poke(id.into_untyped_uuid(), mode).await; } pub async fn disk_poke(&self, id: Uuid) { @@ -699,7 +664,7 @@ impl SledAgent { /// snapshot here. pub async fn instance_issue_disk_snapshot_request( &self, - _instance_id: InstanceUuid, + _propolis_id: PropolisUuid, disk_id: Uuid, snapshot_id: Uuid, ) -> Result<(), Error> { @@ -760,18 +725,17 @@ impl SledAgent { pub async fn instance_put_external_ip( &self, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, body_args: &InstanceExternalIpBody, ) -> Result<(), Error> { - if !self.instances.contains_key(&instance_id.into_untyped_uuid()).await - { + if !self.vmms.contains_key(&propolis_id.into_untyped_uuid()).await { return Err(Error::internal_error( - "can't alter IP state for nonexistent instance", + "can't alter IP state for VMM that's not registered", )); } let mut eips = self.external_ips.lock().await; - let my_eips = eips.entry(instance_id.into_untyped_uuid()).or_default(); + let my_eips = eips.entry(propolis_id).or_default(); // High-level behaviour: this should always succeed UNLESS // trying to add a double ephemeral. @@ -794,18 +758,17 @@ impl SledAgent { pub async fn instance_delete_external_ip( &self, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, body_args: &InstanceExternalIpBody, ) -> Result<(), Error> { - if !self.instances.contains_key(&instance_id.into_untyped_uuid()).await - { + if !self.vmms.contains_key(&propolis_id.into_untyped_uuid()).await { return Err(Error::internal_error( - "can't alter IP state for nonexistent instance", + "can't alter IP state for VMM that's not registered", )); } let mut eips = self.external_ips.lock().await; - let my_eips = eips.entry(instance_id.into_untyped_uuid()).or_default(); + let my_eips = eips.entry(propolis_id).or_default(); my_eips.remove(&body_args); diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index 556388ce93..ac8f80069b 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -24,8 +24,8 @@ use omicron_common::disk::DiskVariant; use omicron_common::disk::DisksManagementResult; use omicron_common::disk::OmicronPhysicalDisksConfig; use omicron_uuid_kinds::GenericUuid; -use omicron_uuid_kinds::InstanceUuid; use omicron_uuid_kinds::OmicronZoneUuid; +use omicron_uuid_kinds::PropolisUuid; use omicron_uuid_kinds::ZpoolUuid; use propolis_client::types::VolumeConstructionRequest; use slog::Logger; @@ -869,7 +869,7 @@ impl Pantry { self.sled_agent .instance_issue_disk_snapshot_request( - InstanceUuid::new_v4(), // instance id, not used by function + PropolisUuid::new_v4(), // instance id, not used by function volume_id.parse().unwrap(), snapshot_id.parse().unwrap(), ) diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 50e5611027..d69ccedb7d 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -38,9 +38,7 @@ use omicron_common::address::{ get_sled_address, get_switch_zone_address, Ipv6Subnet, SLED_PREFIX, }; use omicron_common::api::external::{ByteCount, ByteCountRangeError, Vni}; -use omicron_common::api::internal::nexus::{ - SledInstanceState, VmmRuntimeState, -}; +use omicron_common::api::internal::nexus::{SledVmmState, VmmRuntimeState}; use omicron_common::api::internal::shared::{ HostPortConfig, RackNetworkConfig, ResolvedVpcFirewallRule, ResolvedVpcRouteSet, ResolvedVpcRouteState, SledIdentifiers, @@ -61,8 +59,7 @@ use sled_agent_types::disk::DiskStateRequested; use sled_agent_types::early_networking::EarlyNetworkConfig; use sled_agent_types::instance::{ InstanceExternalIpBody, InstanceHardware, InstanceMetadata, - InstancePutStateResponse, InstanceStateRequested, - InstanceUnregisterResponse, + VmmPutStateResponse, VmmStateRequested, VmmUnregisterResponse, }; use sled_agent_types::sled::{BaseboardId, StartSledAgentRequest}; use sled_agent_types::time_sync::TimeSync; @@ -227,7 +224,7 @@ impl From for dropshot::HttpError { } } Error::Instance( - e @ crate::instance_manager::Error::NoSuchInstance(_), + e @ crate::instance_manager::Error::NoSuchVmm(_), ) => HttpError::for_not_found( Some(NO_SUCH_INSTANCE.to_string()), e.to_string(), @@ -966,7 +963,7 @@ impl SledAgent { vmm_runtime: VmmRuntimeState, propolis_addr: SocketAddr, metadata: InstanceMetadata, - ) -> Result { + ) -> Result { self.inner .instances .ensure_registered( @@ -990,11 +987,11 @@ impl SledAgent { /// rudely terminates the instance. pub async fn instance_ensure_unregistered( &self, - instance_id: InstanceUuid, - ) -> Result { + propolis_id: PropolisUuid, + ) -> Result { self.inner .instances - .ensure_unregistered(instance_id) + .ensure_unregistered(propolis_id) .await .map_err(|e| Error::Instance(e)) } @@ -1003,12 +1000,12 @@ impl SledAgent { /// state. pub async fn instance_ensure_state( &self, - instance_id: InstanceUuid, - target: InstanceStateRequested, - ) -> Result { + propolis_id: PropolisUuid, + target: VmmStateRequested, + ) -> Result { self.inner .instances - .ensure_state(instance_id, target) + .ensure_state(propolis_id, target) .await .map_err(|e| Error::Instance(e)) } @@ -1020,12 +1017,12 @@ impl SledAgent { /// does not match the current ephemeral IP. pub async fn instance_put_external_ip( &self, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, external_ip: &InstanceExternalIpBody, ) -> Result<(), Error> { self.inner .instances - .add_external_ip(instance_id, external_ip) + .add_external_ip(propolis_id, external_ip) .await .map_err(|e| Error::Instance(e)) } @@ -1034,12 +1031,12 @@ impl SledAgent { /// specified external IP address in either its ephemeral or floating IP set. pub async fn instance_delete_external_ip( &self, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, external_ip: &InstanceExternalIpBody, ) -> Result<(), Error> { self.inner .instances - .delete_external_ip(instance_id, external_ip) + .delete_external_ip(propolis_id, external_ip) .await .map_err(|e| Error::Instance(e)) } @@ -1047,11 +1044,11 @@ impl SledAgent { /// Returns the state of the instance with the provided ID. pub async fn instance_get_state( &self, - instance_id: InstanceUuid, - ) -> Result { + propolis_id: PropolisUuid, + ) -> Result { self.inner .instances - .get_instance_state(instance_id) + .get_instance_state(propolis_id) .await .map_err(|e| Error::Instance(e)) } @@ -1082,19 +1079,15 @@ impl SledAgent { } /// Issue a snapshot request for a Crucible disk attached to an instance - pub async fn instance_issue_disk_snapshot_request( + pub async fn vmm_issue_disk_snapshot_request( &self, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, disk_id: Uuid, snapshot_id: Uuid, ) -> Result<(), Error> { self.inner .instances - .instance_issue_disk_snapshot_request( - instance_id, - disk_id, - snapshot_id, - ) + .issue_disk_snapshot_request(propolis_id, disk_id, snapshot_id) .await .map_err(Error::from) } diff --git a/sled-agent/types/src/instance.rs b/sled-agent/types/src/instance.rs index 0753e273dc..bd0f536aa3 100644 --- a/sled-agent/types/src/instance.rs +++ b/sled-agent/types/src/instance.rs @@ -11,14 +11,13 @@ use std::{ use omicron_common::api::internal::{ nexus::{ - InstanceProperties, InstanceRuntimeState, SledInstanceState, - VmmRuntimeState, + InstanceProperties, InstanceRuntimeState, SledVmmState, VmmRuntimeState, }, shared::{ DhcpConfig, NetworkInterface, ResolvedVpcFirewallRule, SourceNatConfig, }, }; -use omicron_uuid_kinds::PropolisUuid; +use omicron_uuid_kinds::InstanceUuid; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use uuid::Uuid; @@ -37,10 +36,8 @@ pub struct InstanceEnsureBody { /// The initial VMM runtime state for the VMM being registered. pub vmm_runtime: VmmRuntimeState, - /// The ID of the VMM being registered. This may not be the active VMM ID in - /// the instance runtime state (e.g. if the new VMM is going to be a - /// migration target). - pub propolis_id: PropolisUuid, + /// The ID of the instance for which this VMM is being created. + pub instance_id: InstanceUuid, /// The address at which this VMM should serve a Propolis server API. pub propolis_addr: SocketAddr, @@ -80,19 +77,19 @@ pub struct InstanceMetadata { /// The body of a request to move a previously-ensured instance into a specific /// runtime state. #[derive(Serialize, Deserialize, JsonSchema)] -pub struct InstancePutStateBody { +pub struct VmmPutStateBody { /// The state into which the instance should be driven. - pub state: InstanceStateRequested, + pub state: VmmStateRequested, } /// The response sent from a request to move an instance into a specific runtime /// state. #[derive(Debug, Serialize, Deserialize, JsonSchema)] -pub struct InstancePutStateResponse { +pub struct VmmPutStateResponse { /// The current runtime state of the instance after handling the request to /// change its state. If the instance's state did not change, this field is /// `None`. - pub updated_runtime: Option, + pub updated_runtime: Option, } /// Requestable running state of an Instance. @@ -100,7 +97,7 @@ pub struct InstancePutStateResponse { /// A subset of [`omicron_common::api::external::InstanceState`]. #[derive(Copy, Clone, Debug, Deserialize, Serialize, JsonSchema)] #[serde(rename_all = "snake_case", tag = "type", content = "value")] -pub enum InstanceStateRequested { +pub enum VmmStateRequested { /// Run this instance by migrating in from a previous running incarnation of /// the instance. MigrationTarget(InstanceMigrationTargetParams), @@ -113,40 +110,40 @@ pub enum InstanceStateRequested { Reboot, } -impl fmt::Display for InstanceStateRequested { +impl fmt::Display for VmmStateRequested { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", self.label()) } } -impl InstanceStateRequested { +impl VmmStateRequested { fn label(&self) -> &str { match self { - InstanceStateRequested::MigrationTarget(_) => "migrating in", - InstanceStateRequested::Running => "running", - InstanceStateRequested::Stopped => "stopped", - InstanceStateRequested::Reboot => "reboot", + VmmStateRequested::MigrationTarget(_) => "migrating in", + VmmStateRequested::Running => "running", + VmmStateRequested::Stopped => "stopped", + VmmStateRequested::Reboot => "reboot", } } /// Returns true if the state represents a stopped Instance. pub fn is_stopped(&self) -> bool { match self { - InstanceStateRequested::MigrationTarget(_) => false, - InstanceStateRequested::Running => false, - InstanceStateRequested::Stopped => true, - InstanceStateRequested::Reboot => false, + VmmStateRequested::MigrationTarget(_) => false, + VmmStateRequested::Running => false, + VmmStateRequested::Stopped => true, + VmmStateRequested::Reboot => false, } } } /// The response sent from a request to unregister an instance. #[derive(Serialize, Deserialize, JsonSchema)] -pub struct InstanceUnregisterResponse { +pub struct VmmUnregisterResponse { /// The current state of the instance after handling the request to /// unregister it. If the instance's state did not change, this field is /// `None`. - pub updated_runtime: Option, + pub updated_runtime: Option, } /// Parameters used when directing Propolis to initialize itself via live From 7a6f45c5504bb092ce738d165cc88736ba4a9092 Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 27 Aug 2024 12:39:21 -0700 Subject: [PATCH 19/22] [meta] update tokio to 1.39.3 (again) (#6452) Tokio 1.39/mio 1.0 switches out the illumos impl to being eventfd based. For release 10 we decided that that was too risky, so we switched back to Tokio 1.38. Now that the r10 branch has been cut, we can go back and update Tokio to 1.39.3. We'd like to land this early in the cycle to get as much soak time as possible. See: * #6356 * #6249 * https://github.com/oxidecomputer/helios/issues/169 * https://github.com/oxidecomputer/helios/pull/171 * #6391 --- Cargo.lock | 18 ++++++++---------- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 22 ++++++++++------------ 3 files changed, 19 insertions(+), 23 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7074e40993..4b8f8cdf6f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3616,7 +3616,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2 0.4.10", + "socket2 0.5.7", "tokio", "tower-service", "tracing", @@ -6501,7 +6501,7 @@ dependencies = [ "log", "managed", "memchr", - "mio 0.8.11", + "mio 1.0.2", "nix 0.28.0", "nom", "num-bigint-dig", @@ -6536,7 +6536,6 @@ dependencies = [ "similar", "slog", "smallvec 1.13.2", - "socket2 0.5.7", "spin 0.9.8", "string_cache", "subtle", @@ -10623,28 +10622,27 @@ dependencies = [ [[package]] name = "tokio" -version = "1.38.1" +version = "1.39.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb2caba9f80616f438e09748d5acda951967e1ea58508ef53d9c6402485a46df" +checksum = "9babc99b9923bfa4804bd74722ff02c0381021eafa4db9949217e3be8e84fff5" dependencies = [ "backtrace", "bytes", "libc", - "mio 0.8.11", - "num_cpus", + "mio 1.0.2", "parking_lot 0.12.2", "pin-project-lite", "signal-hook-registry", "socket2 0.5.7", "tokio-macros", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] name = "tokio-macros" -version = "2.3.0" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" +checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index cbb0216d5f..2c3902f7bc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -580,7 +580,7 @@ textwrap = "0.16.1" test-strategy = "0.3.1" thiserror = "1.0" tofino = { git = "https://github.com/oxidecomputer/tofino", branch = "main" } -tokio = "1.38.1" +tokio = "1.39.3" tokio-postgres = { version = "0.7", features = [ "with-chrono-0_4", "with-uuid-1" ] } tokio-stream = "0.1.15" tokio-tungstenite = "0.20" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index a39daa5735..edb92c8c77 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -102,13 +102,12 @@ sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.6.0", features = ["bytes", "inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } smallvec = { version = "1.13.2", default-features = false, features = ["const_new"] } -socket2 = { version = "0.5.7", default-features = false, features = ["all"] } spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.74", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.36", features = ["formatting", "local-offset", "macros", "parsing"] } -tokio = { version = "1.38.1", features = ["full", "test-util"] } +tokio = { version = "1.39.3", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.11", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } tokio-stream = { version = "0.1.15", features = ["net"] } tokio-util = { version = "0.7.11", features = ["codec", "io-util"] } @@ -211,7 +210,6 @@ sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.6.0", features = ["bytes", "inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } smallvec = { version = "1.13.2", default-features = false, features = ["const_new"] } -socket2 = { version = "0.5.7", default-features = false, features = ["all"] } spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } @@ -219,7 +217,7 @@ syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extr syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.74", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.36", features = ["formatting", "local-offset", "macros", "parsing"] } time-macros = { version = "0.2.18", default-features = false, features = ["formatting", "parsing"] } -tokio = { version = "1.38.1", features = ["full", "test-util"] } +tokio = { version = "1.39.3", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.11", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } tokio-stream = { version = "0.1.15", features = ["net"] } tokio-util = { version = "0.7.11", features = ["codec", "io-util"] } @@ -239,7 +237,7 @@ zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } [target.x86_64-unknown-linux-gnu.dependencies] dof = { version = "0.3.0", default-features = false, features = ["des"] } linux-raw-sys = { version = "0.4.13", default-features = false, features = ["elf", "errno", "general", "ioctl", "no_std", "std", "system"] } -mio = { version = "0.8.11", features = ["net", "os-ext"] } +mio = { version = "1.0.2", features = ["net", "os-ext"] } nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } @@ -248,35 +246,35 @@ signal-hook-mio = { version = "0.2.4", default-features = false, features = ["su [target.x86_64-unknown-linux-gnu.build-dependencies] dof = { version = "0.3.0", default-features = false, features = ["des"] } linux-raw-sys = { version = "0.4.13", default-features = false, features = ["elf", "errno", "general", "ioctl", "no_std", "std", "system"] } -mio = { version = "0.8.11", features = ["net", "os-ext"] } +mio = { version = "1.0.2", features = ["net", "os-ext"] } nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } [target.x86_64-apple-darwin.dependencies] -mio = { version = "0.8.11", features = ["net", "os-ext"] } +mio = { version = "1.0.2", features = ["net", "os-ext"] } nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } [target.x86_64-apple-darwin.build-dependencies] -mio = { version = "0.8.11", features = ["net", "os-ext"] } +mio = { version = "1.0.2", features = ["net", "os-ext"] } nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } [target.aarch64-apple-darwin.dependencies] -mio = { version = "0.8.11", features = ["net", "os-ext"] } +mio = { version = "1.0.2", features = ["net", "os-ext"] } nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } [target.aarch64-apple-darwin.build-dependencies] -mio = { version = "0.8.11", features = ["net", "os-ext"] } +mio = { version = "1.0.2", features = ["net", "os-ext"] } nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } @@ -284,7 +282,7 @@ signal-hook-mio = { version = "0.2.4", default-features = false, features = ["su [target.x86_64-unknown-illumos.dependencies] dof = { version = "0.3.0", default-features = false, features = ["des"] } -mio = { version = "0.8.11", features = ["net", "os-ext"] } +mio = { version = "1.0.2", features = ["net", "os-ext"] } nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } @@ -293,7 +291,7 @@ toml_edit-cdcf2f9584511fe6 = { package = "toml_edit", version = "0.19.15", featu [target.x86_64-unknown-illumos.build-dependencies] dof = { version = "0.3.0", default-features = false, features = ["des"] } -mio = { version = "0.8.11", features = ["net", "os-ext"] } +mio = { version = "1.0.2", features = ["net", "os-ext"] } nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } From a24fa8cf2e65c1945c2af7b460c7359316b05970 Mon Sep 17 00:00:00 2001 From: iliana etaoin Date: Tue, 27 Aug 2024 13:00:27 -0700 Subject: [PATCH 20/22] stop logging `cloud_init_bytes` in sled-agent (#6439) aka "want sled-agent to stop screaming at base64 floppy disks" Fixes #6387. 1. Create a `NoDebug` wrapper struct for this purpose in omicron-common. (Note that the OpenAPI schema for sled-agent does not change; schemars seems to honor `#[serde(transparent)]`.) 2. Apply `NoDebug` inside the `cloud_init_bytes` `Option`. Thus we log everything else about the instance hardware, and whether or not there is cidata, but not the cidata itself. 3. Demote an INFO-level log of the Propolis client's `InstanceEnsureRequest` to DEBUG. --- common/src/lib.rs | 24 ++++++++++++++++++++++++ sled-agent/src/instance.rs | 7 ++++--- sled-agent/types/src/instance.rs | 3 ++- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/common/src/lib.rs b/common/src/lib.rs index 6da32c56ba..b9d6dd3172 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -118,3 +118,27 @@ where async fn never_bail() -> Result { Ok(false) } + +/// A wrapper struct that does nothing other than elide the inner value from +/// [`std::fmt::Debug`] output. +/// +/// We define this within Omicron instead of using one of the many available +/// crates that do the same thing because it's trivial to do so, and we want the +/// flexibility to add traits to this type without needing to wait on upstream +/// to add an optional dependency. +/// +/// If you want to use this for secrets, consider that it might not do +/// everything you expect (it does not zeroize memory on drop, nor get in the +/// way of you removing the inner value from this wrapper struct). +#[derive( + Clone, Copy, serde::Deserialize, serde::Serialize, schemars::JsonSchema, +)] +#[repr(transparent)] +#[serde(transparent)] +pub struct NoDebug(pub T); + +impl std::fmt::Debug for NoDebug { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "..") + } +} diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index b035ef7e71..33b2d0cf67 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -31,6 +31,7 @@ use omicron_common::api::internal::shared::{ }; use omicron_common::backoff; use omicron_common::zpool_name::ZpoolName; +use omicron_common::NoDebug; use omicron_uuid_kinds::{GenericUuid, InstanceUuid, PropolisUuid}; use propolis_client::Client as PropolisClient; use rand::prelude::IteratorRandom; @@ -335,7 +336,7 @@ struct InstanceRunner { // Disk related properties requested_disks: Vec, - cloud_init_bytes: Option, + cloud_init_bytes: Option>, // Internal State management state: InstanceStates, @@ -718,10 +719,10 @@ impl InstanceRunner { .map(Into::into) .collect(), migrate, - cloud_init_bytes: self.cloud_init_bytes.clone(), + cloud_init_bytes: self.cloud_init_bytes.clone().map(|x| x.0), }; - info!(self.log, "Sending ensure request to propolis: {:?}", request); + debug!(self.log, "Sending ensure request to propolis: {:?}", request); let result = client.instance_ensure().body(request).send().await; info!(self.log, "result of instance_ensure call is {:?}", result); result?; diff --git a/sled-agent/types/src/instance.rs b/sled-agent/types/src/instance.rs index bd0f536aa3..a39fae414b 100644 --- a/sled-agent/types/src/instance.rs +++ b/sled-agent/types/src/instance.rs @@ -17,6 +17,7 @@ use omicron_common::api::internal::{ DhcpConfig, NetworkInterface, ResolvedVpcFirewallRule, SourceNatConfig, }, }; +use omicron_common::NoDebug; use omicron_uuid_kinds::InstanceUuid; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -60,7 +61,7 @@ pub struct InstanceHardware { pub dhcp_config: DhcpConfig, // TODO: replace `propolis_client::*` with locally-modeled request type pub disks: Vec, - pub cloud_init_bytes: Option, + pub cloud_init_bytes: Option>, } /// Metadata used to track statistics about an instance. From e3ec364eed53e1486879a0b96bc17aac3dcbcd0e Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 27 Aug 2024 14:28:00 -0700 Subject: [PATCH 21/22] [wicket] attempt to fix test_inventory (#6456) I haven't been able to reproduce this locally, but this is my best guess as to what's going wrong here: MGS/wicketd learns about SPs but due to a race/load on the system, misses out on populating their state and instead leaves it empty. That causes the SPs to be filtered out here: https://github.com/oxidecomputer/omicron/blob/7a6f45c5504bb092ce738d165cc88736ba4a9092/wicketd/src/rss_config.rs#L129 This theory is buttressed by the fact that in failing logs, the returned inventory is a lot smaller than what I'm seeing locally. For example, in the logs for [this failing test](https://buildomat.eng.oxide.computer/wg/0/details/01J69AR918WAQNFKSBS85EAQPV/kkFMDYhAM3Vxb5ujRHlyAO9thmIAc7mHjHuicct0gS2bL8xu/01J69ARHYXXSKXKG8J49SRZVTA) I see [a 1430 byte response](https://buildomat.eng.oxide.computer/wg/0/artefact/01J69AR918WAQNFKSBS85EAQPV/kkFMDYhAM3Vxb5ujRHlyAO9thmIAc7mHjHuicct0gS2bL8xu/01J69ARHYXXSKXKG8J49SRZVTA/01J69ENP3EF3A212GVAGEMBDVQ/mod-ff551cc639cd8d16-test_inventory.21679.0.log?format=x-bunyan#L640): ``` test_inventory (wicketd test client): client response result = Ok(Response { url: Url { scheme: "http", cannot_be_a_base: false, username: "", password: None, host: Some(Ipv6(::1)), port: Some(45364), path: "/inventory", query: None, fragment: None }, status: 200, headers: {"content-type": "application/json", "x-request-id": "e68141e2-4c4f-46ec-a49b-9f8aa11a3410", "content-length": "1430", "date": "Tue, 27 Aug 2024 08:13:01 GMT"} }) ``` But in passing runs locally, I see a much larger 8654 byte response ([full logs](https://gist.github.com/sunshowers/b9c1868ba4c8c4bd3eec49cc4b56516d)): ``` 19:32:43.847Z DEBG test_inventory (wicketd test client): client response result = Ok(Response { url: Url { scheme: "http", cannot_be_a_base: false, username: "", password: None, host: Some(Ipv6(::1)), port: Some(44183), path: "/inventory", query: None, fragment: None }, status: 200, headers: {"content-type": "application/json", "x-request-id": "8b48dae0-025d-426a-82f0-1dd8323670d5", "content-length": "8654", "date": "Tue, 27 Aug 2024 19:32:43 GMT"} }) ``` Based on this theory, this PR changes the exit condition for the poll loop to also consider all of the SP states being present. In case there's something else going on, the PR also adds a bunch of additional logging. Fixes #6300. --- wicketd/src/http_entrypoints.rs | 2 + wicketd/src/rss_config.rs | 12 +++++- wicketd/tests/integration_tests/inventory.rs | 39 +++++++++++++++----- 3 files changed, 42 insertions(+), 11 deletions(-) diff --git a/wicketd/src/http_entrypoints.rs b/wicketd/src/http_entrypoints.rs index 55b4d61c9a..3f460f1e37 100644 --- a/wicketd/src/http_entrypoints.rs +++ b/wicketd/src/http_entrypoints.rs @@ -82,6 +82,7 @@ impl WicketdApi for WicketdApiImpl { config.update_with_inventory_and_bootstrap_peers( &inventory, &ctx.bootstrap_peers, + &ctx.log, ); Ok(HttpResponseOk((&*config).into())) @@ -101,6 +102,7 @@ impl WicketdApi for WicketdApiImpl { config.update_with_inventory_and_bootstrap_peers( &inventory, &ctx.bootstrap_peers, + &ctx.log, ); config .update(body.into_inner(), ctx.baseboard.as_ref()) diff --git a/wicketd/src/rss_config.rs b/wicketd/src/rss_config.rs index 56e83fcd41..46ede25eaa 100644 --- a/wicketd/src/rss_config.rs +++ b/wicketd/src/rss_config.rs @@ -26,6 +26,7 @@ use omicron_common::api::external::AllowedSourceIps; use omicron_common::api::external::SwitchLocation; use once_cell::sync::Lazy; use sled_hardware_types::Baseboard; +use slog::debug; use slog::warn; use std::collections::btree_map; use std::collections::BTreeMap; @@ -115,6 +116,7 @@ impl CurrentRssConfig { &mut self, inventory: &RackV1Inventory, bootstrap_peers: &BootstrapPeers, + log: &slog::Logger, ) { let bootstrap_sleds = bootstrap_peers.sleds(); @@ -126,7 +128,15 @@ impl CurrentRssConfig { return None; } - let state = sp.state.as_ref()?; + let Some(state) = sp.state.as_ref() else { + debug!( + log, + "in update_with_inventory_and_bootstrap_peers, \ + filtering out SP with no state"; + "sp" => ?sp, + ); + return None; + }; let baseboard = Baseboard::new_gimlet( state.serial_number.clone(), state.model.clone(), diff --git a/wicketd/tests/integration_tests/inventory.rs b/wicketd/tests/integration_tests/inventory.rs index ed5ad22d5d..c7057e3adc 100644 --- a/wicketd/tests/integration_tests/inventory.rs +++ b/wicketd/tests/integration_tests/inventory.rs @@ -10,6 +10,7 @@ use super::setup::WicketdTestContext; use gateway_messages::SpPort; use gateway_test_utils::setup as gateway_setup; use sled_hardware_types::Baseboard; +use slog::{info, warn}; use wicket::OutputKind; use wicket_common::inventory::{SpIdentifier, SpType}; use wicket_common::rack_setup::BootstrapSledDescription; @@ -32,13 +33,29 @@ async fn test_inventory() { .into_inner(); match response { GetInventoryResponse::Response { inventory, .. } => { - break inventory - } - GetInventoryResponse::Unavailable => { - // Keep polling wicketd until it receives its first results from MGS. - tokio::time::sleep(Duration::from_millis(100)).await; + // Ensure that the SP state is populated -- if it's not, + // then the `configured-bootstrap-sleds` command below + // might return an empty list. + let sp_state_none: Vec<_> = inventory + .sps + .iter() + .filter(|sp| sp.state.is_none()) + .collect(); + if sp_state_none.is_empty() { + break inventory; + } + + warn!( + wicketd_testctx.log(), + "SP state not yet populated for some SPs, retrying"; + "sps" => ?sp_state_none + ) } + GetInventoryResponse::Unavailable => {} } + + // Keep polling wicketd until it receives its first results from MGS. + tokio::time::sleep(Duration::from_millis(100)).await; } }; let inventory = @@ -46,6 +63,8 @@ async fn test_inventory() { .await .expect("get_inventory completed within 10 seconds"); + info!(wicketd_testctx.log(), "inventory returned"; "inventory" => ?inventory); + // 4 SPs attached to the inventory. assert_eq!(inventory.sps.len(), 4); @@ -70,17 +89,17 @@ async fn test_inventory() { serde_json::from_slice(&stdout).expect("stdout is valid JSON"); // This only tests the case that we get sleds back with no current - // bootstrap IP. This does provide svalue: it check that the command - // exists, accesses data within wicket, and returns it in the schema we - // expect. But it does not test the case where a sled does have a - // bootstrap IP. + // bootstrap IP. This does provide some value: it checks that the + // command exists, accesses data within wicket, and returns it in the + // schema we expect. But it does not test the case where a sled does + // have a bootstrap IP. // // Unfortunately, that's a difficult thing to test today. Wicket gets // that information by enumerating the IPs on the bootstrap network and // reaching out to the bootstrap_agent on them directly to ask them who // they are. Our testing setup does not have a way to provide such an // IP, or run a bootstrap_agent on an IP to respond. We should update - // this test when we do have that capabilitiy. + // this test when we do have that capability. assert_eq!( response, vec![ From dd853311ff9a654537973fa3b830b70894cb50fa Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 27 Aug 2024 14:44:17 -0700 Subject: [PATCH 22/22] Qorb integration as connection pool for database (#5876) Replaces all usage of bb8 with a new connection pooling library called [qorb](https://github.com/oxidecomputer/qorb). qorb, detailed in RFD 477, provides the following benefits over bb8: - It allows lookup of multiple backends via DNS SRV records - It dynamically adjusts the number of connections to each bakend based on their health, and prioritizes vending out connections to healthy backends - It should be re-usable for both our database and progenitor clients (using a different "backend connector", but the same core library and DNS resolution mechanism). Fixes https://github.com/oxidecomputer/omicron/issues/4192 Part of https://github.com/oxidecomputer/omicron/issues/3763 (fixes CRDB portion) --- Cargo.lock | 210 ++++++++++++++---- Cargo.toml | 4 +- dev-tools/omdb/src/bin/omdb/db.rs | 13 +- nexus-config/src/postgres_config.rs | 24 ++ nexus/db-queries/Cargo.toml | 6 +- nexus/db-queries/src/db/collection_attach.rs | 28 ++- nexus/db-queries/src/db/collection_detach.rs | 20 +- .../src/db/collection_detach_many.rs | 24 +- nexus/db-queries/src/db/collection_insert.rs | 12 +- .../src/db/datastore/db_metadata.rs | 10 +- .../db-queries/src/db/datastore/inventory.rs | 2 +- nexus/db-queries/src/db/datastore/mod.rs | 15 +- nexus/db-queries/src/db/datastore/probe.rs | 34 +-- .../src/db/datastore/pub_test_utils.rs | 2 +- nexus/db-queries/src/db/explain.rs | 11 +- nexus/db-queries/src/db/pagination.rs | 12 +- nexus/db-queries/src/db/pool.rs | 203 ++++++++++------- nexus/db-queries/src/db/pool_connection.rs | 133 +++++++++-- .../db-queries/src/db/queries/external_ip.rs | 3 +- nexus/db-queries/src/db/queries/next_item.rs | 12 +- .../src/db/queries/region_allocation.rs | 4 +- .../virtual_provisioning_collection_update.rs | 16 +- nexus/db-queries/src/db/queries/vpc_subnet.rs | 11 +- .../src/app/background/tasks/saga_recovery.rs | 2 +- nexus/src/bin/schema-updater.rs | 2 +- nexus/src/context.rs | 88 +++----- nexus/src/populate.rs | 16 +- nexus/tests/integration_tests/schema.rs | 4 +- workspace-hack/Cargo.toml | 4 +- 29 files changed, 591 insertions(+), 334 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4b8f8cdf6f..a3a963d030 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -256,12 +256,14 @@ checksum = "9b34d609dfbaf33d6889b2b7106d3ca345eacad44200913df5ba02bfd31d2ba9" [[package]] name = "async-bb8-diesel" -version = "0.1.0" -source = "git+https://github.com/oxidecomputer/async-bb8-diesel?rev=ed7ab5ef0513ba303d33efd41d3e9e381169d59b#ed7ab5ef0513ba303d33efd41d3e9e381169d59b" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc03a2806f66f36513d65e0a7f34200382230250cadcf8a8397cfbe3f26b795" dependencies = [ "async-trait", "bb8", "diesel", + "futures", "thiserror", "tokio", ] @@ -703,7 +705,7 @@ dependencies = [ name = "bootstrap-agent-api" version = "0.1.0" dependencies = [ - "dropshot", + "dropshot 0.10.2-dev", "nexus-client", "omicron-common", "omicron-uuid-kinds", @@ -973,7 +975,7 @@ version = "0.1.0" dependencies = [ "anyhow", "clap", - "dropshot", + "dropshot 0.10.2-dev", "futures", "libc", "omicron-rpaths", @@ -1117,7 +1119,7 @@ checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" name = "clickhouse-admin-api" version = "0.1.0" dependencies = [ - "dropshot", + "dropshot 0.10.2-dev", "omicron-common", "omicron-uuid-kinds", "omicron-workspace-hack", @@ -1160,7 +1162,7 @@ name = "cockroach-admin-api" version = "0.1.0" dependencies = [ "cockroach-admin-types", - "dropshot", + "dropshot 0.10.2-dev", "omicron-common", "omicron-uuid-kinds", "omicron-workspace-hack", @@ -1387,7 +1389,7 @@ name = "crdb-seed" version = "0.1.0" dependencies = [ "anyhow", - "dropshot", + "dropshot 0.10.2-dev", "omicron-test-utils", "omicron-workspace-hack", "slog", @@ -1539,7 +1541,7 @@ dependencies = [ "anyhow", "atty", "crucible-workspace-hack", - "dropshot", + "dropshot 0.10.2-dev", "nix 0.28.0", "rusqlite", "rustls-pemfile 1.0.4", @@ -2104,7 +2106,7 @@ dependencies = [ "clap", "dns-server-api", "dns-service-client", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "hickory-client", "hickory-proto", @@ -2137,7 +2139,7 @@ name = "dns-server-api" version = "0.1.0" dependencies = [ "chrono", - "dropshot", + "dropshot 0.10.2-dev", "omicron-workspace-hack", "schemars", "serde", @@ -2210,6 +2212,52 @@ dependencies = [ "uuid", ] +[[package]] +name = "dropshot" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a391eeedf8a75a188eb670327c704b7ab10eb2bb890e2ec0880dd21d609fb6e8" +dependencies = [ + "async-stream", + "async-trait", + "base64 0.22.1", + "bytes", + "camino", + "chrono", + "debug-ignore", + "dropshot_endpoint 0.10.1", + "form_urlencoded", + "futures", + "hostname 0.4.0", + "http 0.2.12", + "hyper 0.14.30", + "indexmap 2.4.0", + "multer", + "openapiv3", + "paste", + "percent-encoding", + "rustls 0.22.4", + "rustls-pemfile 2.1.3", + "schemars", + "scopeguard", + "serde", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", + "sha1", + "slog", + "slog-async", + "slog-bunyan", + "slog-json", + "slog-term", + "tokio", + "tokio-rustls 0.25.0", + "toml 0.8.19", + "uuid", + "version_check", + "waitgroup", +] + [[package]] name = "dropshot" version = "0.10.2-dev" @@ -2222,7 +2270,7 @@ dependencies = [ "camino", "chrono", "debug-ignore", - "dropshot_endpoint", + "dropshot_endpoint 0.10.2-dev", "form_urlencoded", "futures", "hostname 0.4.0", @@ -2256,6 +2304,19 @@ dependencies = [ "waitgroup", ] +[[package]] +name = "dropshot_endpoint" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9058c9c7e4a6b378cd12e71dc155bb15d0d4f8e1e6039ce2cf0a7c0c81043e33" +dependencies = [ + "proc-macro2", + "quote", + "serde", + "serde_tokenstream", + "syn 2.0.74", +] + [[package]] name = "dropshot_endpoint" version = "0.10.2-dev" @@ -2875,7 +2936,7 @@ dependencies = [ name = "gateway-api" version = "0.1.0" dependencies = [ - "dropshot", + "dropshot 0.10.2-dev", "gateway-types", "omicron-common", "omicron-uuid-kinds", @@ -2979,7 +3040,7 @@ name = "gateway-test-utils" version = "0.1.0" dependencies = [ "camino", - "dropshot", + "dropshot 0.10.2-dev", "gateway-messages", "gateway-types", "omicron-gateway", @@ -3982,7 +4043,7 @@ name = "installinator-api" version = "0.1.0" dependencies = [ "anyhow", - "dropshot", + "dropshot 0.10.2-dev", "hyper 0.14.30", "installinator-common", "omicron-common", @@ -4050,7 +4111,7 @@ dependencies = [ "chrono", "dns-server", "dns-service-client", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "futures", "hickory-resolver", @@ -4077,7 +4138,7 @@ version = "0.1.0" dependencies = [ "anyhow", "clap", - "dropshot", + "dropshot 0.10.2-dev", "hickory-resolver", "internal-dns", "omicron-common", @@ -4912,7 +4973,7 @@ dependencies = [ "base64 0.22.1", "chrono", "cookie 0.18.1", - "dropshot", + "dropshot 0.10.2-dev", "futures", "headers", "http 0.2.12", @@ -4969,7 +5030,7 @@ version = "0.1.0" dependencies = [ "anyhow", "camino", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "libc", "omicron-common", @@ -5052,7 +5113,6 @@ dependencies = [ "assert_matches", "async-bb8-diesel", "async-trait", - "bb8", "camino", "camino-tempfile", "chrono", @@ -5060,7 +5120,7 @@ dependencies = [ "db-macros", "diesel", "diesel-dtrace", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "futures", "gateway-client", @@ -5097,6 +5157,7 @@ dependencies = [ "pq-sys", "predicates", "pretty_assertions", + "qorb", "rand", "rcgen", "ref-cast", @@ -5118,6 +5179,7 @@ dependencies = [ "term", "thiserror", "tokio", + "url", "usdt", "uuid", ] @@ -5139,7 +5201,7 @@ dependencies = [ name = "nexus-internal-api" version = "0.1.0" dependencies = [ - "dropshot", + "dropshot 0.10.2-dev", "nexus-types", "omicron-common", "omicron-uuid-kinds", @@ -5395,7 +5457,7 @@ dependencies = [ "crucible-agent-client", "dns-server", "dns-service-client", - "dropshot", + "dropshot 0.10.2-dev", "futures", "gateway-messages", "gateway-test-utils", @@ -5453,7 +5515,7 @@ dependencies = [ "derive-where", "derive_more", "dns-service-client", - "dropshot", + "dropshot 0.10.2-dev", "futures", "gateway-client", "http 0.2.12", @@ -5778,7 +5840,7 @@ dependencies = [ "chrono", "clap", "clickhouse-admin-api", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "http 0.2.12", "illumos-utils", @@ -5815,7 +5877,7 @@ dependencies = [ "cockroach-admin-api", "cockroach-admin-types", "csv", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "http 0.2.12", "illumos-utils", @@ -5857,7 +5919,7 @@ dependencies = [ "camino", "camino-tempfile", "chrono", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "futures", "hex", @@ -5925,7 +5987,7 @@ version = "0.1.0" dependencies = [ "anyhow", "clap", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "futures", "libc", @@ -5965,7 +6027,7 @@ dependencies = [ "camino", "chrono", "clap", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "futures", "gateway-api", @@ -6029,7 +6091,7 @@ dependencies = [ "dns-server", "dns-service-client", "dpd-client", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "fatfs", "futures", @@ -6152,7 +6214,7 @@ dependencies = [ "crucible-agent-client", "csv", "diesel", - "dropshot", + "dropshot 0.10.2-dev", "dyn-clone", "expectorate", "futures", @@ -6315,7 +6377,7 @@ dependencies = [ "dns-server", "dns-service-client", "dpd-client", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "flate2", "flume", @@ -6400,7 +6462,7 @@ dependencies = [ "atomicwrites", "camino", "camino-tempfile", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "filetime", "gethostname", @@ -6636,7 +6698,7 @@ dependencies = [ "clickhouse-admin-api", "cockroach-admin-api", "dns-server-api", - "dropshot", + "dropshot 0.10.2-dev", "fs-err", "gateway-api", "indent_write", @@ -6860,7 +6922,7 @@ name = "oximeter-api" version = "0.1.0" dependencies = [ "chrono", - "dropshot", + "dropshot 0.10.2-dev", "omicron-common", "omicron-workspace-hack", "schemars", @@ -6891,7 +6953,7 @@ dependencies = [ "camino", "chrono", "clap", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "futures", "hyper 0.14.30", @@ -6939,7 +7001,7 @@ dependencies = [ "clap", "clickward", "crossterm 0.28.1", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "futures", "highway", @@ -6981,7 +7043,7 @@ version = "0.1.0" dependencies = [ "cfg-if", "chrono", - "dropshot", + "dropshot 0.10.2-dev", "futures", "http 0.2.12", "hyper 0.14.30", @@ -7017,7 +7079,7 @@ dependencies = [ "anyhow", "chrono", "clap", - "dropshot", + "dropshot 0.10.2-dev", "internal-dns", "nexus-client", "omicron-common", @@ -8012,7 +8074,7 @@ dependencies = [ "atty", "base64 0.21.7", "clap", - "dropshot", + "dropshot 0.10.2-dev", "futures", "hyper 0.14.30", "progenitor", @@ -8099,6 +8161,29 @@ dependencies = [ "psl-types", ] +[[package]] +name = "qorb" +version = "0.0.1" +source = "git+https://github.com/oxidecomputer/qorb?branch=master#163a77838a3cfe8f7741d32e443f76d995b89df3" +dependencies = [ + "anyhow", + "async-trait", + "debug-ignore", + "derive-where", + "dropshot 0.10.1", + "futures", + "hickory-resolver", + "rand", + "schemars", + "serde", + "serde_json", + "thiserror", + "tokio", + "tokio-stream", + "tokio-tungstenite 0.23.1", + "tracing", +] + [[package]] name = "quick-error" version = "1.2.3" @@ -8272,7 +8357,7 @@ dependencies = [ "camino-tempfile", "clap", "dns-service-client", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "humantime", "indexmap 2.4.0", @@ -9528,7 +9613,7 @@ name = "sled-agent-api" version = "0.1.0" dependencies = [ "camino", - "dropshot", + "dropshot 0.10.2-dev", "nexus-sled-agent-shared", "omicron-common", "omicron-uuid-kinds", @@ -9898,7 +9983,7 @@ dependencies = [ "anyhow", "async-trait", "clap", - "dropshot", + "dropshot 0.10.2-dev", "futures", "gateway-messages", "gateway-types", @@ -10715,6 +10800,7 @@ dependencies = [ "futures-core", "pin-project-lite", "tokio", + "tokio-util", ] [[package]] @@ -10741,6 +10827,18 @@ dependencies = [ "tungstenite 0.21.0", ] +[[package]] +name = "tokio-tungstenite" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6989540ced10490aaf14e6bad2e3d33728a2813310a0c71d1574304c49631cd" +dependencies = [ + "futures-util", + "log", + "tokio", + "tungstenite 0.23.0", +] + [[package]] name = "tokio-util" version = "0.7.11" @@ -11092,6 +11190,24 @@ dependencies = [ "utf-8", ] +[[package]] +name = "tungstenite" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e2e2ce1e47ed2994fd43b04c8f618008d4cabdd5ee34027cf14f9d918edd9c8" +dependencies = [ + "byteorder", + "bytes", + "data-encoding", + "http 1.1.0", + "httparse", + "log", + "rand", + "sha1", + "thiserror", + "utf-8", +] + [[package]] name = "twox-hash" version = "1.6.3" @@ -11293,7 +11409,7 @@ dependencies = [ "clap", "debug-ignore", "display-error-chain", - "dropshot", + "dropshot 0.10.2-dev", "futures", "hex", "hubtools", @@ -11760,7 +11876,7 @@ version = "0.1.0" dependencies = [ "anyhow", "dpd-client", - "dropshot", + "dropshot 0.10.2-dev", "gateway-client", "maplit", "omicron-common", @@ -11816,7 +11932,7 @@ dependencies = [ "debug-ignore", "display-error-chain", "dpd-client", - "dropshot", + "dropshot 0.10.2-dev", "either", "expectorate", "flate2", @@ -11883,7 +11999,7 @@ name = "wicketd-api" version = "0.1.0" dependencies = [ "bootstrap-agent-client", - "dropshot", + "dropshot 0.10.2-dev", "gateway-client", "omicron-common", "omicron-passwords", @@ -12339,7 +12455,7 @@ dependencies = [ "anyhow", "camino", "clap", - "dropshot", + "dropshot 0.10.2-dev", "illumos-utils", "omicron-common", "omicron-sled-agent", diff --git a/Cargo.toml b/Cargo.toml index 2c3902f7bc..6565265c1a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -282,13 +282,12 @@ api_identity = { path = "api_identity" } approx = "0.5.1" assert_matches = "1.5.0" assert_cmd = "2.0.16" -async-bb8-diesel = { git = "https://github.com/oxidecomputer/async-bb8-diesel", rev = "ed7ab5ef0513ba303d33efd41d3e9e381169d59b" } +async-bb8-diesel = "0.2" async-trait = "0.1.81" atomicwrites = "0.4.3" authz-macros = { path = "nexus/authz-macros" } backoff = { version = "0.4.0", features = [ "tokio" ] } base64 = "0.22.1" -bb8 = "0.8.5" bcs = "0.1.6" bincode = "1.3.3" bootstore = { path = "bootstore" } @@ -497,6 +496,7 @@ bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "24a74d0c propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "24a74d0c76b6a63961ecef76acb1516b6e66c5c9" } propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "24a74d0c76b6a63961ecef76acb1516b6e66c5c9" } proptest = "1.5.0" +qorb = { git = "https://github.com/oxidecomputer/qorb", branch = "master" } quote = "1.0" rand = "0.8.5" rand_core = "0.6.4" diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 9ce4c66a80..48f5137698 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -246,7 +246,8 @@ impl DbUrlOptions { eprintln!("note: using database URL {}", &db_url); let db_config = db::Config { url: db_url.clone() }; - let pool = Arc::new(db::Pool::new(&log.clone(), &db_config)); + let pool = + Arc::new(db::Pool::new_single_host(&log.clone(), &db_config)); // Being a dev tool, we want to try this operation even if the schema // doesn't match what we expect. So we use `DataStore::new_unchecked()` @@ -4224,7 +4225,7 @@ async fn cmd_db_inventory( } async fn cmd_db_inventory_baseboard_ids( - conn: &DataStoreConnection<'_>, + conn: &DataStoreConnection, limit: NonZeroU32, ) -> Result<(), anyhow::Error> { #[derive(Tabled)] @@ -4261,7 +4262,7 @@ async fn cmd_db_inventory_baseboard_ids( } async fn cmd_db_inventory_cabooses( - conn: &DataStoreConnection<'_>, + conn: &DataStoreConnection, limit: NonZeroU32, ) -> Result<(), anyhow::Error> { #[derive(Tabled)] @@ -4302,7 +4303,7 @@ async fn cmd_db_inventory_cabooses( } async fn cmd_db_inventory_physical_disks( - conn: &DataStoreConnection<'_>, + conn: &DataStoreConnection, limit: NonZeroU32, args: InvPhysicalDisksArgs, ) -> Result<(), anyhow::Error> { @@ -4359,7 +4360,7 @@ async fn cmd_db_inventory_physical_disks( } async fn cmd_db_inventory_rot_pages( - conn: &DataStoreConnection<'_>, + conn: &DataStoreConnection, limit: NonZeroU32, ) -> Result<(), anyhow::Error> { #[derive(Tabled)] @@ -4394,7 +4395,7 @@ async fn cmd_db_inventory_rot_pages( } async fn cmd_db_inventory_collections_list( - conn: &DataStoreConnection<'_>, + conn: &DataStoreConnection, limit: NonZeroU32, ) -> Result<(), anyhow::Error> { #[derive(Tabled)] diff --git a/nexus-config/src/postgres_config.rs b/nexus-config/src/postgres_config.rs index 2509ae4fca..0c72d2ba9e 100644 --- a/nexus-config/src/postgres_config.rs +++ b/nexus-config/src/postgres_config.rs @@ -5,6 +5,7 @@ //! Common objects used for configuration use std::fmt; +use std::net::SocketAddr; use std::ops::Deref; use std::str::FromStr; @@ -32,6 +33,29 @@ impl PostgresConfigWithUrl { pub fn url(&self) -> String { self.url_raw.clone() } + + /// Accesses the first ip / port pair within the URL. + /// + /// # Panics + /// + /// This method makes the assumption that the hostname has at least one + /// "host IP / port" pair which can be extracted. If the supplied URL + /// does not have such a pair, this function will panic. + // Yes, panicking in the above scenario sucks. But this type is already + // pretty ubiquitous within Omicron, and integration with the qorb + // connection pooling library requires access to database by SocketAddr. + pub fn address(&self) -> SocketAddr { + let tokio_postgres::config::Host::Tcp(host) = + &self.config.get_hosts()[0] + else { + panic!("Non-TCP hostname"); + }; + let ip: std::net::IpAddr = + host.parse().expect("Failed to parse host as IP address"); + + let port = self.config.get_ports()[0]; + SocketAddr::new(ip, port) + } } impl FromStr for PostgresConfigWithUrl { diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml index 5192528944..c6c5caab6a 100644 --- a/nexus/db-queries/Cargo.toml +++ b/nexus/db-queries/Cargo.toml @@ -14,7 +14,6 @@ omicron-rpaths.workspace = true anyhow.workspace = true async-bb8-diesel.workspace = true async-trait.workspace = true -bb8.workspace = true camino.workspace = true chrono.workspace = true const_format.workspace = true @@ -22,6 +21,7 @@ diesel.workspace = true diesel-dtrace.workspace = true dropshot.workspace = true futures.workspace = true +internal-dns.workspace = true ipnetwork.workspace = true macaddr.workspace = true once_cell.workspace = true @@ -29,6 +29,7 @@ oxnet.workspace = true paste.workspace = true # See omicron-rpaths for more about the "pq-sys" dependency. pq-sys = "*" +qorb = { workspace = true, features = [ "qtop" ] } rand.workspace = true ref-cast.workspace = true schemars.workspace = true @@ -45,8 +46,9 @@ strum.workspace = true swrite.workspace = true thiserror.workspace = true tokio = { workspace = true, features = ["full"] } -uuid.workspace = true +url.workspace = true usdt.workspace = true +uuid.workspace = true db-macros.workspace = true nexus-auth.workspace = true diff --git a/nexus/db-queries/src/db/collection_attach.rs b/nexus/db-queries/src/db/collection_attach.rs index 95e6afeb4b..c009d60483 100644 --- a/nexus/db-queries/src/db/collection_attach.rs +++ b/nexus/db-queries/src/db/collection_attach.rs @@ -578,9 +578,7 @@ where mod test { use super::*; use crate::db::{self, identity::Resource as IdentityResource}; - use async_bb8_diesel::{ - AsyncRunQueryDsl, AsyncSimpleConnection, ConnectionManager, - }; + use async_bb8_diesel::{AsyncRunQueryDsl, AsyncSimpleConnection}; use chrono::Utc; use db_macros::Resource; use diesel::expression_methods::ExpressionMethods; @@ -617,8 +615,8 @@ mod test { async fn setup_db( pool: &crate::db::Pool, - ) -> bb8::PooledConnection> { - let connection = pool.pool().get().await.unwrap(); + ) -> crate::db::datastore::DataStoreConnection { + let connection = pool.claim().await.unwrap(); (*connection) .batch_execute_async( "CREATE SCHEMA IF NOT EXISTS test_schema; \ @@ -873,7 +871,7 @@ mod test { dev::test_setup_log("test_attach_missing_collection_fails"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -902,7 +900,7 @@ mod test { let logctx = dev::test_setup_log("test_attach_missing_resource_fails"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -939,7 +937,7 @@ mod test { let logctx = dev::test_setup_log("test_attach_once"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -987,7 +985,7 @@ mod test { let logctx = dev::test_setup_log("test_attach_once_synchronous"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -1036,7 +1034,7 @@ mod test { let logctx = dev::test_setup_log("test_attach_multiple_times"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -1092,7 +1090,7 @@ mod test { let logctx = dev::test_setup_log("test_attach_beyond_capacity_fails"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -1156,7 +1154,7 @@ mod test { let logctx = dev::test_setup_log("test_attach_while_already_attached"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -1263,7 +1261,7 @@ mod test { let logctx = dev::test_setup_log("test_attach_once"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -1318,7 +1316,7 @@ mod test { let logctx = dev::test_setup_log("test_attach_deleted_resource_fails"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -1363,7 +1361,7 @@ mod test { let logctx = dev::test_setup_log("test_attach_without_update_filter"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; diff --git a/nexus/db-queries/src/db/collection_detach.rs b/nexus/db-queries/src/db/collection_detach.rs index 03e09d41ca..bc547d5127 100644 --- a/nexus/db-queries/src/db/collection_detach.rs +++ b/nexus/db-queries/src/db/collection_detach.rs @@ -482,9 +482,7 @@ mod test { use super::*; use crate::db::collection_attach::DatastoreAttachTarget; use crate::db::{self, identity::Resource as IdentityResource}; - use async_bb8_diesel::{ - AsyncRunQueryDsl, AsyncSimpleConnection, ConnectionManager, - }; + use async_bb8_diesel::{AsyncRunQueryDsl, AsyncSimpleConnection}; use chrono::Utc; use db_macros::Resource; use diesel::expression_methods::ExpressionMethods; @@ -521,8 +519,8 @@ mod test { async fn setup_db( pool: &crate::db::Pool, - ) -> bb8::PooledConnection> { - let connection = pool.pool().get().await.unwrap(); + ) -> crate::db::datastore::DataStoreConnection { + let connection = pool.claim().await.unwrap(); (*connection) .batch_execute_async( "CREATE SCHEMA IF NOT EXISTS test_schema; \ @@ -786,7 +784,7 @@ mod test { dev::test_setup_log("test_detach_missing_collection_fails"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -814,7 +812,7 @@ mod test { let logctx = dev::test_setup_log("test_detach_missing_resource_fails"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -850,7 +848,7 @@ mod test { let logctx = dev::test_setup_log("test_detach_once"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -890,7 +888,7 @@ mod test { let logctx = dev::test_setup_log("test_detach_while_already_detached"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -954,7 +952,7 @@ mod test { let logctx = dev::test_setup_log("test_detach_deleted_resource_fails"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -998,7 +996,7 @@ mod test { let logctx = dev::test_setup_log("test_detach_without_update_filter"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; diff --git a/nexus/db-queries/src/db/collection_detach_many.rs b/nexus/db-queries/src/db/collection_detach_many.rs index 986cfb70b7..36755599d4 100644 --- a/nexus/db-queries/src/db/collection_detach_many.rs +++ b/nexus/db-queries/src/db/collection_detach_many.rs @@ -480,9 +480,7 @@ mod test { use super::*; use crate::db::collection_attach::DatastoreAttachTarget; use crate::db::{self, identity::Resource as IdentityResource}; - use async_bb8_diesel::{ - AsyncRunQueryDsl, AsyncSimpleConnection, ConnectionManager, - }; + use async_bb8_diesel::{AsyncRunQueryDsl, AsyncSimpleConnection}; use chrono::Utc; use db_macros::Resource; use diesel::expression_methods::ExpressionMethods; @@ -519,8 +517,8 @@ mod test { async fn setup_db( pool: &crate::db::Pool, - ) -> bb8::PooledConnection> { - let connection = pool.pool().get().await.unwrap(); + ) -> crate::db::datastore::DataStoreConnection { + let connection = pool.claim().await.unwrap(); (*connection) .batch_execute_async( "CREATE SCHEMA IF NOT EXISTS test_schema; \ @@ -778,7 +776,7 @@ mod test { dev::test_setup_log("test_detach_missing_collection_fails"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -808,7 +806,7 @@ mod test { dev::test_setup_log("test_detach_missing_resource_succeeds"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -849,7 +847,7 @@ mod test { let logctx = dev::test_setup_log("test_detach_once"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -892,7 +890,7 @@ mod test { let logctx = dev::test_setup_log("test_detach_once_synchronous"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -937,7 +935,7 @@ mod test { let logctx = dev::test_setup_log("test_detach_while_already_detached"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -993,7 +991,7 @@ mod test { let logctx = dev::test_setup_log("test_detach_filter_collection"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -1044,7 +1042,7 @@ mod test { let logctx = dev::test_setup_log("test_detach_deleted_resource"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -1102,7 +1100,7 @@ mod test { let logctx = dev::test_setup_log("test_detach_many"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; diff --git a/nexus/db-queries/src/db/collection_insert.rs b/nexus/db-queries/src/db/collection_insert.rs index 69906e6498..3aaea6aeb1 100644 --- a/nexus/db-queries/src/db/collection_insert.rs +++ b/nexus/db-queries/src/db/collection_insert.rs @@ -406,9 +406,7 @@ where mod test { use super::*; use crate::db::{self, identity::Resource as IdentityResource}; - use async_bb8_diesel::{ - AsyncRunQueryDsl, AsyncSimpleConnection, ConnectionManager, - }; + use async_bb8_diesel::{AsyncRunQueryDsl, AsyncSimpleConnection}; use chrono::{DateTime, Utc}; use db_macros::Resource; use diesel::expression_methods::ExpressionMethods; @@ -443,8 +441,8 @@ mod test { async fn setup_db( pool: &crate::db::Pool, - ) -> bb8::PooledConnection> { - let connection = pool.pool().get().await.unwrap(); + ) -> crate::db::datastore::DataStoreConnection { + let connection = pool.claim().await.unwrap(); (*connection) .batch_execute_async( "CREATE SCHEMA IF NOT EXISTS test_schema; \ @@ -560,7 +558,7 @@ mod test { let logctx = dev::test_setup_log("test_collection_not_present"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -590,7 +588,7 @@ mod test { let logctx = dev::test_setup_log("test_collection_present"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; diff --git a/nexus/db-queries/src/db/datastore/db_metadata.rs b/nexus/db-queries/src/db/datastore/db_metadata.rs index 4169cc06bd..b997bf384f 100644 --- a/nexus/db-queries/src/db/datastore/db_metadata.rs +++ b/nexus/db-queries/src/db/datastore/db_metadata.rs @@ -511,7 +511,7 @@ mod test { let mut crdb = test_db::test_setup_database(&logctx.log).await; let cfg = db::Config { url: crdb.pg_config().clone() }; - let pool = Arc::new(db::Pool::new(&logctx.log, &cfg)); + let pool = Arc::new(db::Pool::new_single_host(&logctx.log, &cfg)); let datastore = Arc::new(DataStore::new(&logctx.log, pool, None).await.unwrap()); @@ -559,8 +559,8 @@ mod test { let mut crdb = test_db::test_setup_database(&logctx.log).await; let cfg = db::Config { url: crdb.pg_config().clone() }; - let pool = Arc::new(db::Pool::new(&logctx.log, &cfg)); - let conn = pool.pool().get().await.unwrap(); + let pool = Arc::new(db::Pool::new_single_host(&logctx.log, &cfg)); + let conn = pool.claim().await.unwrap(); // Mimic the layout of "schema/crdb". let config_dir = Utf8TempDir::new().unwrap(); @@ -671,8 +671,8 @@ mod test { let mut crdb = test_db::test_setup_database(&logctx.log).await; let cfg = db::Config { url: crdb.pg_config().clone() }; - let pool = Arc::new(db::Pool::new(&logctx.log, &cfg)); - let conn = pool.pool().get().await.unwrap(); + let pool = Arc::new(db::Pool::new_single_host(&logctx.log, &cfg)); + let conn = pool.claim().await.unwrap(); // Mimic the layout of "schema/crdb". let config_dir = Utf8TempDir::new().unwrap(); diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index 1774a25c48..8888f2caaa 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -2164,7 +2164,7 @@ mod test { } impl CollectionCounts { - async fn new(conn: &DataStoreConnection<'_>) -> anyhow::Result { + async fn new(conn: &DataStoreConnection) -> anyhow::Result { conn.transaction_async(|conn| async move { conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL) .await diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 2cd21754f8..d424e08b61 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -27,7 +27,7 @@ use crate::db::{ error::{public_error_from_diesel, ErrorHandler}, }; use ::oximeter::types::ProducerRegistry; -use async_bb8_diesel::{AsyncRunQueryDsl, ConnectionManager}; +use async_bb8_diesel::AsyncRunQueryDsl; use diesel::pg::Pg; use diesel::prelude::*; use diesel::query_builder::{QueryFragment, QueryId}; @@ -174,8 +174,8 @@ impl RunnableQuery for T where { } -pub type DataStoreConnection<'a> = - bb8::PooledConnection<'a, ConnectionManager>; +pub type DataStoreConnection = + qorb::claim::Handle>; pub struct DataStore { log: Logger, @@ -279,8 +279,7 @@ impl DataStore { opctx: &OpContext, ) -> Result { opctx.authorize(authz::Action::Query, &authz::DATABASE).await?; - let pool = self.pool.pool(); - let connection = pool.get().await.map_err(|err| { + let connection = self.pool.claim().await.map_err(|err| { Error::unavail(&format!("Failed to access DB connection: {err}")) })?; Ok(connection) @@ -294,7 +293,7 @@ impl DataStore { pub(super) async fn pool_connection_unauthorized( &self, ) -> Result { - let connection = self.pool.pool().get().await.map_err(|err| { + let connection = self.pool.claim().await.map_err(|err| { Error::unavail(&format!("Failed to access DB connection: {err}")) })?; Ok(connection) @@ -1587,7 +1586,7 @@ mod test { dev::test_setup_log("test_queries_do_not_require_full_table_scan"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let datastore = DataStore::new(&logctx.log, Arc::new(pool), None).await.unwrap(); let conn = datastore.pool_connection_for_tests().await.unwrap(); @@ -1632,7 +1631,7 @@ mod test { let logctx = dev::test_setup_log("test_sled_ipv6_address_allocation"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = Arc::new(db::Pool::new(&logctx.log, &cfg)); + let pool = Arc::new(db::Pool::new_single_host(&logctx.log, &cfg)); let datastore = Arc::new(DataStore::new(&logctx.log, pool, None).await.unwrap()); let opctx = OpContext::for_tests( diff --git a/nexus/db-queries/src/db/datastore/probe.rs b/nexus/db-queries/src/db/datastore/probe.rs index f3e0614552..434bf25760 100644 --- a/nexus/db-queries/src/db/datastore/probe.rs +++ b/nexus/db-queries/src/db/datastore/probe.rs @@ -62,7 +62,7 @@ impl super::DataStore { use db::schema::probe::dsl; use db::schema::vpc_subnet::dsl as vpc_subnet_dsl; - let pool = self.pool_connection_authorized(opctx).await?; + let conn = self.pool_connection_authorized(opctx).await?; let probes = match pagparams { PaginatedBy::Id(pagparams) => { @@ -77,7 +77,7 @@ impl super::DataStore { .filter(dsl::project_id.eq(authz_project.id())) .filter(dsl::time_deleted.is_null()) .select(Probe::as_select()) - .load_async(&*pool) + .load_async(&*conn) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; @@ -99,7 +99,7 @@ impl super::DataStore { let db_subnet = vpc_subnet_dsl::vpc_subnet .filter(vpc_subnet_dsl::id.eq(interface.subnet_id)) .select(VpcSubnet::as_select()) - .first_async(&*pool) + .first_async(&*conn) .await .map_err(|e| { public_error_from_diesel(e, ErrorHandler::Server) @@ -126,7 +126,7 @@ impl super::DataStore { &self, opctx: &OpContext, probe: &Probe, - pool: &DataStoreConnection<'_>, + conn: &DataStoreConnection, ) -> LookupResult { use db::schema::vpc_subnet::dsl as vpc_subnet_dsl; @@ -143,7 +143,7 @@ impl super::DataStore { let db_subnet = vpc_subnet_dsl::vpc_subnet .filter(vpc_subnet_dsl::id.eq(interface.subnet_id)) .select(VpcSubnet::as_select()) - .first_async(&**pool) + .first_async(&**conn) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; @@ -172,20 +172,20 @@ impl super::DataStore { ) -> ListResultVec { use db::schema::probe::dsl; - let pool = self.pool_connection_authorized(opctx).await?; + let conn = self.pool_connection_authorized(opctx).await?; let probes = paginated(dsl::probe, dsl::id, pagparams) .filter(dsl::time_deleted.is_null()) .filter(dsl::sled.eq(sled)) .select(Probe::as_select()) - .load_async(&*pool) + .load_async(&*conn) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; let mut result = Vec::with_capacity(probes.len()); for probe in probes.into_iter() { - result.push(self.resolve_probe_info(opctx, &probe, &pool).await?); + result.push(self.resolve_probe_info(opctx, &probe, &conn).await?); } Ok(result) @@ -200,7 +200,7 @@ impl super::DataStore { ) -> LookupResult { use db::schema::probe; use db::schema::probe::dsl; - let pool = self.pool_connection_authorized(opctx).await?; + let conn = self.pool_connection_authorized(opctx).await?; let name_or_id = name_or_id.clone(); @@ -211,7 +211,7 @@ impl super::DataStore { .filter(probe::project_id.eq(authz_project.id())) .select(Probe::as_select()) .limit(1) - .first_async::(&*pool) + .first_async::(&*conn) .await .map_err(|e| { public_error_from_diesel( @@ -227,7 +227,7 @@ impl super::DataStore { .filter(probe::project_id.eq(authz_project.id())) .select(Probe::as_select()) .limit(1) - .first_async::(&*pool) + .first_async::(&*conn) .await .map_err(|e| { public_error_from_diesel( @@ -240,7 +240,7 @@ impl super::DataStore { }), }?; - self.resolve_probe_info(opctx, &probe, &pool).await + self.resolve_probe_info(opctx, &probe, &conn).await } /// Add a probe to the data store. @@ -253,7 +253,7 @@ impl super::DataStore { ) -> CreateResult { //TODO in transaction use db::schema::probe::dsl; - let pool = self.pool_connection_authorized(opctx).await?; + let conn = self.pool_connection_authorized(opctx).await?; let _eip = self .allocate_probe_ephemeral_ip( @@ -306,7 +306,7 @@ impl super::DataStore { let result = diesel::insert_into(dsl::probe) .values(probe.clone()) .returning(Probe::as_returning()) - .get_result_async(&*pool) + .get_result_async(&*conn) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; @@ -322,7 +322,7 @@ impl super::DataStore { ) -> DeleteResult { use db::schema::probe; use db::schema::probe::dsl; - let pool = self.pool_connection_authorized(opctx).await?; + let conn = self.pool_connection_authorized(opctx).await?; let name_or_id = name_or_id.clone(); @@ -334,7 +334,7 @@ impl super::DataStore { .filter(probe::project_id.eq(authz_project.id())) .select(probe::id) .limit(1) - .first_async::(&*pool) + .first_async::(&*conn) .await .map_err(|e| { public_error_from_diesel(e, ErrorHandler::Server) @@ -350,7 +350,7 @@ impl super::DataStore { .filter(dsl::id.eq(id)) .filter(dsl::project_id.eq(authz_project.id())) .set(dsl::time_deleted.eq(Utc::now())) - .execute_async(&*pool) + .execute_async(&*conn) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; diff --git a/nexus/db-queries/src/db/datastore/pub_test_utils.rs b/nexus/db-queries/src/db/datastore/pub_test_utils.rs index 93a172bd15..bcf6a6c80f 100644 --- a/nexus/db-queries/src/db/datastore/pub_test_utils.rs +++ b/nexus/db-queries/src/db/datastore/pub_test_utils.rs @@ -29,7 +29,7 @@ pub async fn datastore_test( use crate::authn; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = Arc::new(db::Pool::new(&logctx.log, &cfg)); + let pool = Arc::new(db::Pool::new_single_host(&logctx.log, &cfg)); let datastore = Arc::new(DataStore::new(&logctx.log, pool, None).await.unwrap()); diff --git a/nexus/db-queries/src/db/explain.rs b/nexus/db-queries/src/db/explain.rs index 24fd993040..52844c204f 100644 --- a/nexus/db-queries/src/db/explain.rs +++ b/nexus/db-queries/src/db/explain.rs @@ -124,8 +124,7 @@ mod test { } async fn create_schema(pool: &db::Pool) { - pool.pool() - .get() + pool.claim() .await .unwrap() .batch_execute_async( @@ -145,8 +144,8 @@ mod test { let logctx = dev::test_setup_log("test_explain_async"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); - let conn = pool.pool().get().await.unwrap(); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); + let conn = pool.claim().await.unwrap(); create_schema(&pool).await; @@ -170,8 +169,8 @@ mod test { let logctx = dev::test_setup_log("test_explain_full_table_scan"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); - let conn = pool.pool().get().await.unwrap(); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); + let conn = pool.claim().await.unwrap(); create_schema(&pool).await; diff --git a/nexus/db-queries/src/db/pagination.rs b/nexus/db-queries/src/db/pagination.rs index 4fc1cf5966..9920440ade 100644 --- a/nexus/db-queries/src/db/pagination.rs +++ b/nexus/db-queries/src/db/pagination.rs @@ -354,7 +354,7 @@ mod test { async fn populate_users(pool: &db::Pool, values: &Vec<(i64, i64)>) { use schema::test_users::dsl; - let conn = pool.pool().get().await.unwrap(); + let conn = pool.claim().await.unwrap(); // The indexes here work around the check that prevents full table // scans. @@ -392,7 +392,7 @@ mod test { pool: &db::Pool, query: BoxedQuery, ) -> Vec { - let conn = pool.pool().get().await.unwrap(); + let conn = pool.claim().await.unwrap(); query.select(User::as_select()).load_async(&*conn).await.unwrap() } @@ -402,7 +402,7 @@ mod test { dev::test_setup_log("test_paginated_single_column_ascending"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); use schema::test_users::dsl; @@ -437,7 +437,7 @@ mod test { dev::test_setup_log("test_paginated_single_column_descending"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); use schema::test_users::dsl; @@ -472,7 +472,7 @@ mod test { dev::test_setup_log("test_paginated_multicolumn_ascending"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); use schema::test_users::dsl; @@ -526,7 +526,7 @@ mod test { dev::test_setup_log("test_paginated_multicolumn_descending"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); use schema::test_users::dsl; diff --git a/nexus/db-queries/src/db/pool.rs b/nexus/db-queries/src/db/pool.rs index 497c8d97c5..dccee6fa3f 100644 --- a/nexus/db-queries/src/db/pool.rs +++ b/nexus/db-queries/src/db/pool.rs @@ -3,108 +3,155 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. //! Database connection pooling -// This whole thing is a placeholder for prototyping. -// -// TODO-robustness TODO-resilience We will want to carefully think about the -// connection pool that we use and its parameters. It's not clear from the -// survey so far whether an existing module is suitable for our purposes. See -// the Cueball Internals document for details on the sorts of behaviors we'd -// like here. Even if by luck we stick with bb8, we definitely want to think -// through the various parameters. -// -// Notes about bb8's behavior: -// * When the database is completely offline, and somebody wants a connection, -// it still waits for the connection timeout before giving up. That seems -// like not what we want. (To be clear, this is a failure mode where we know -// the database is offline, not one where it's partitioned and we can't tell.) -// * Although the `build_unchecked()` builder allows the pool to start up with -// no connections established (good), it also _seems_ to not establish any -// connections even when it could, resulting in a latency bubble for the first -// operation after startup. That's not what we're looking for. -// // TODO-design Need TLS support (the types below hardcode NoTls). use super::Config as DbConfig; -use async_bb8_diesel::ConnectionError; -use async_bb8_diesel::ConnectionManager; +use crate::db::pool_connection::{DieselPgConnector, DieselPgConnectorArgs}; + +use qorb::backend; +use qorb::policy::Policy; +use qorb::resolver::{AllBackends, Resolver}; +use qorb::resolvers::dns::{DnsResolver, DnsResolverConfig}; +use qorb::service; +use slog::Logger; +use std::collections::BTreeMap; +use std::net::SocketAddr; +use std::sync::Arc; +use tokio::sync::watch; pub use super::pool_connection::DbConnection; +type QorbConnection = async_bb8_diesel::Connection; +type QorbPool = qorb::pool::Pool; + /// Wrapper around a database connection pool. /// /// Expected to be used as the primary interface to the database. pub struct Pool { - pool: bb8::Pool>, + inner: QorbPool, } -impl Pool { - pub fn new(log: &slog::Logger, db_config: &DbConfig) -> Self { - // Make sure diesel-dtrace's USDT probes are enabled. - usdt::register_probes().expect("Failed to register USDT DTrace probes"); - Self::new_builder(log, db_config, bb8::Builder::new()) - } +// Provides an alternative to the DNS resolver for cases where we want to +// contact the database without performing resolution. +struct SingleHostResolver { + tx: watch::Sender, +} - pub fn new_failfast_for_tests( - log: &slog::Logger, - db_config: &DbConfig, - ) -> Self { - Self::new_builder( - log, - db_config, - bb8::Builder::new() - .connection_timeout(std::time::Duration::from_millis(1)), - ) +impl SingleHostResolver { + fn new(config: &DbConfig) -> Self { + let backends = Arc::new(BTreeMap::from([( + backend::Name::new("singleton"), + backend::Backend { address: config.url.address() }, + )])); + let (tx, _rx) = watch::channel(backends.clone()); + Self { tx } } +} - fn new_builder( - log: &slog::Logger, - db_config: &DbConfig, - builder: bb8::Builder>, - ) -> Self { - let url = db_config.url.url(); - let log = log.new(o!( - "database_url" => url.clone(), - "component" => "db::Pool" - )); - info!(&log, "database connection pool"); - let error_sink = LoggingErrorSink::new(log); - let manager = ConnectionManager::::new(url); - let pool = builder - .connection_customizer(Box::new( - super::pool_connection::ConnectionCustomizer::new(), - )) - .error_sink(Box::new(error_sink)) - .build_unchecked(manager); - Pool { pool } +impl Resolver for SingleHostResolver { + fn monitor(&mut self) -> watch::Receiver { + self.tx.subscribe() } +} - /// Returns a reference to the underlying pool. - pub fn pool(&self) -> &bb8::Pool> { - &self.pool - } +fn make_dns_resolver( + bootstrap_dns: Vec, +) -> qorb::resolver::BoxedResolver { + Box::new(DnsResolver::new( + service::Name(internal_dns::ServiceName::Cockroach.srv_name()), + bootstrap_dns, + DnsResolverConfig { + hardcoded_ttl: Some(tokio::time::Duration::MAX), + ..Default::default() + }, + )) } -#[derive(Clone, Debug)] -struct LoggingErrorSink { - log: slog::Logger, +fn make_single_host_resolver( + config: &DbConfig, +) -> qorb::resolver::BoxedResolver { + Box::new(SingleHostResolver::new(config)) } -impl LoggingErrorSink { - fn new(log: slog::Logger) -> LoggingErrorSink { - LoggingErrorSink { log } - } +fn make_postgres_connector( + log: &Logger, +) -> qorb::backend::SharedConnector { + // Create postgres connections. + // + // We're currently relying on the DieselPgConnector doing the following: + // - Disallowing full table scans in its implementation of "on_acquire" + // - Creating async_bb8_diesel connections that also wrap DTraceConnections. + let user = "root"; + let db = "omicron"; + let args = vec![("sslmode", "disable")]; + Arc::new(DieselPgConnector::new( + log, + DieselPgConnectorArgs { user, db, args }, + )) } -impl bb8::ErrorSink for LoggingErrorSink { - fn sink(&self, error: ConnectionError) { - error!( - &self.log, - "database connection error"; - "error_message" => #%error - ); +impl Pool { + /// Creates a new qorb-backed connection pool to the database. + /// + /// Creating this pool does not necessarily wait for connections to become + /// available, as backends may shift over time. + pub fn new(log: &Logger, bootstrap_dns: Vec) -> Self { + // Make sure diesel-dtrace's USDT probes are enabled. + usdt::register_probes().expect("Failed to register USDT DTrace probes"); + + let resolver = make_dns_resolver(bootstrap_dns); + let connector = make_postgres_connector(log); + + let policy = Policy::default(); + Pool { inner: qorb::pool::Pool::new(resolver, connector, policy) } + } + + /// Creates a new qorb-backed connection pool to a single instance of the + /// database. + /// + /// This is intended for tests that want to skip DNS resolution, relying + /// on a single instance of the database. + /// + /// In production, [Self::new] should be preferred. + pub fn new_single_host(log: &Logger, db_config: &DbConfig) -> Self { + // Make sure diesel-dtrace's USDT probes are enabled. + usdt::register_probes().expect("Failed to register USDT DTrace probes"); + + let resolver = make_single_host_resolver(db_config); + let connector = make_postgres_connector(log); + + let policy = Policy::default(); + Pool { inner: qorb::pool::Pool::new(resolver, connector, policy) } + } + + /// Creates a new qorb-backed connection pool which returns an error + /// if claims are not available within one millisecond. + /// + /// This is intended for test-only usage, in particular for tests where + /// claim requests should rapidly return errors when a backend has been + /// intentionally disabled. + #[cfg(any(test, feature = "testing"))] + pub fn new_single_host_failfast( + log: &Logger, + db_config: &DbConfig, + ) -> Self { + // Make sure diesel-dtrace's USDT probes are enabled. + usdt::register_probes().expect("Failed to register USDT DTrace probes"); + + let resolver = make_single_host_resolver(db_config); + let connector = make_postgres_connector(log); + + let policy = Policy { + claim_timeout: tokio::time::Duration::from_millis(1), + ..Default::default() + }; + Pool { inner: qorb::pool::Pool::new(resolver, connector, policy) } } - fn boxed_clone(&self) -> Box> { - Box::new(self.clone()) + /// Returns a connection from the pool + pub async fn claim( + &self, + ) -> anyhow::Result> { + Ok(self.inner.claim().await?) } } diff --git a/nexus/db-queries/src/db/pool_connection.rs b/nexus/db-queries/src/db/pool_connection.rs index dae6a0ee51..9a33370a5a 100644 --- a/nexus/db-queries/src/db/pool_connection.rs +++ b/nexus/db-queries/src/db/pool_connection.rs @@ -4,46 +4,139 @@ //! Customization that happens on each connection as they're acquired. +use anyhow::anyhow; +use async_bb8_diesel::AsyncR2D2Connection; use async_bb8_diesel::AsyncSimpleConnection; -use async_bb8_diesel::Connection; -use async_bb8_diesel::ConnectionError; use async_trait::async_trait; -use bb8::CustomizeConnection; +use diesel::Connection; use diesel::PgConnection; use diesel_dtrace::DTraceConnection; +use qorb::backend::{self, Backend, Error}; +use slog::Logger; +use url::Url; pub type DbConnection = DTraceConnection; pub const DISALLOW_FULL_TABLE_SCAN_SQL: &str = "set disallow_full_table_scans = on; set large_full_scan_rows = 0;"; -/// A customizer for all new connections made to CockroachDB, from Diesel. -#[derive(Debug)] -pub(crate) struct ConnectionCustomizer {} +/// A [backend::Connector] which provides access to [PgConnection]. +pub(crate) struct DieselPgConnector { + log: Logger, + user: String, + db: String, + args: Vec<(String, String)>, +} + +pub(crate) struct DieselPgConnectorArgs<'a> { + pub(crate) user: &'a str, + pub(crate) db: &'a str, + pub(crate) args: Vec<(&'a str, &'a str)>, +} -impl ConnectionCustomizer { - pub(crate) fn new() -> Self { - Self {} +impl DieselPgConnector { + /// Creates a new "connector" to a database, which + /// swaps out the IP address at runtime depending on the selected backend. + /// + /// Format of the url is: + /// + /// - postgresql://{user}@{address}/{db} + /// + /// Or, if arguments are supplied: + /// + /// - postgresql://{user}@{address}/{db}?{args} + pub(crate) fn new(log: &Logger, args: DieselPgConnectorArgs<'_>) -> Self { + let DieselPgConnectorArgs { user, db, args } = args; + Self { + log: log.clone(), + user: user.to_string(), + db: db.to_string(), + args: args + .into_iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect(), + } } - async fn disallow_full_table_scans( + fn to_url( &self, - conn: &mut Connection, - ) -> Result<(), ConnectionError> { - conn.batch_execute_async(DISALLOW_FULL_TABLE_SCAN_SQL).await?; - Ok(()) + address: std::net::SocketAddr, + ) -> Result { + let user = &self.user; + let db = &self.db; + let mut url = + Url::parse(&format!("postgresql://{user}@{address}/{db}"))?; + + for (k, v) in &self.args { + url.query_pairs_mut().append_pair(k, v); + } + + Ok(url.as_str().to_string()) } } #[async_trait] -impl CustomizeConnection, ConnectionError> - for ConnectionCustomizer -{ +impl backend::Connector for DieselPgConnector { + type Connection = async_bb8_diesel::Connection; + + async fn connect( + &self, + backend: &Backend, + ) -> Result { + let url = self.to_url(backend.address).map_err(Error::Other)?; + + let conn = tokio::task::spawn_blocking(move || { + let pg_conn = DbConnection::establish(&url) + .map_err(|e| Error::Other(anyhow!(e)))?; + Ok::<_, Error>(async_bb8_diesel::Connection::new(pg_conn)) + }) + .await + .expect("Task panicked establishing connection") + .map_err(|e| { + warn!( + self.log, + "Failed to make connection"; + "error" => e.to_string(), + "backend" => backend.address, + ); + e + })?; + Ok(conn) + } + async fn on_acquire( &self, - conn: &mut Connection, - ) -> Result<(), ConnectionError> { - self.disallow_full_table_scans(conn).await?; + conn: &mut Self::Connection, + ) -> Result<(), Error> { + conn.batch_execute_async(DISALLOW_FULL_TABLE_SCAN_SQL).await.map_err( + |e| { + warn!( + self.log, + "Failed on_acquire execution"; + "error" => e.to_string() + ); + Error::Other(anyhow!(e)) + }, + )?; Ok(()) } + + async fn is_valid(&self, conn: &mut Self::Connection) -> Result<(), Error> { + let is_broken = conn.is_broken_async().await; + if is_broken { + warn!( + self.log, + "Failed is_valid check; connection known to be broken" + ); + return Err(Error::Other(anyhow!("Connection broken"))); + } + conn.ping_async().await.map_err(|e| { + warn!( + self.log, + "Failed is_valid check; connection failed ping"; + "error" => e.to_string() + ); + Error::Other(anyhow!(e)) + }) + } } diff --git a/nexus/db-queries/src/db/queries/external_ip.rs b/nexus/db-queries/src/db/queries/external_ip.rs index 7ea44b33fb..4d752d451b 100644 --- a/nexus/db-queries/src/db/queries/external_ip.rs +++ b/nexus/db-queries/src/db/queries/external_ip.rs @@ -918,7 +918,8 @@ mod tests { crate::db::datastore::test_utils::datastore_test(&logctx, &db) .await; let cfg = crate::db::Config { url: db.pg_config().clone() }; - let pool = Arc::new(crate::db::Pool::new(&logctx.log, &cfg)); + let pool = + Arc::new(crate::db::Pool::new_single_host(&logctx.log, &cfg)); let db_datastore = Arc::new( crate::db::DataStore::new(&logctx.log, Arc::clone(&pool), None) .await diff --git a/nexus/db-queries/src/db/queries/next_item.rs b/nexus/db-queries/src/db/queries/next_item.rs index 769c891349..658d151a5b 100644 --- a/nexus/db-queries/src/db/queries/next_item.rs +++ b/nexus/db-queries/src/db/queries/next_item.rs @@ -616,7 +616,7 @@ mod tests { } async fn setup_test_schema(pool: &db::Pool) { - let connection = pool.pool().get().await.unwrap(); + let connection = pool.claim().await.unwrap(); (*connection) .batch_execute_async( "CREATE SCHEMA IF NOT EXISTS test_schema; \ @@ -708,8 +708,9 @@ mod tests { let log = logctx.log.new(o!()); let mut db = test_setup_database(&log).await; let cfg = crate::db::Config { url: db.pg_config().clone() }; - let pool = Arc::new(crate::db::Pool::new(&logctx.log, &cfg)); - let conn = pool.pool().get().await.unwrap(); + let pool = + Arc::new(crate::db::Pool::new_single_host(&logctx.log, &cfg)); + let conn = pool.claim().await.unwrap(); // We're going to operate on a separate table, for simplicity. setup_test_schema(&pool).await; @@ -770,8 +771,9 @@ mod tests { let log = logctx.log.new(o!()); let mut db = test_setup_database(&log).await; let cfg = crate::db::Config { url: db.pg_config().clone() }; - let pool = Arc::new(crate::db::Pool::new(&logctx.log, &cfg)); - let conn = pool.pool().get().await.unwrap(); + let pool = + Arc::new(crate::db::Pool::new_single_host(&logctx.log, &cfg)); + let conn = pool.claim().await.unwrap(); // We're going to operate on a separate table, for simplicity. setup_test_schema(&pool).await; diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index 7cf378d53b..dbf37fda2e 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -507,8 +507,8 @@ mod test { let log = logctx.log.new(o!()); let mut db = test_setup_database(&log).await; let cfg = crate::db::Config { url: db.pg_config().clone() }; - let pool = crate::db::Pool::new(&logctx.log, &cfg); - let conn = pool.pool().get().await.unwrap(); + let pool = crate::db::Pool::new_single_host(&logctx.log, &cfg); + let conn = pool.claim().await.unwrap(); let volume_id = Uuid::new_v4(); let params = RegionParameters { diff --git a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs index 902d955a79..9d2ed04c85 100644 --- a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs +++ b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs @@ -568,8 +568,8 @@ mod test { let log = logctx.log.new(o!()); let mut db = test_setup_database(&log).await; let cfg = crate::db::Config { url: db.pg_config().clone() }; - let pool = crate::db::Pool::new(&logctx.log, &cfg); - let conn = pool.pool().get().await.unwrap(); + let pool = crate::db::Pool::new_single_host(&logctx.log, &cfg); + let conn = pool.claim().await.unwrap(); let id = Uuid::nil(); let project_id = Uuid::nil(); @@ -597,8 +597,8 @@ mod test { let log = logctx.log.new(o!()); let mut db = test_setup_database(&log).await; let cfg = crate::db::Config { url: db.pg_config().clone() }; - let pool = crate::db::Pool::new(&logctx.log, &cfg); - let conn = pool.pool().get().await.unwrap(); + let pool = crate::db::Pool::new_single_host(&logctx.log, &cfg); + let conn = pool.claim().await.unwrap(); let id = Uuid::nil(); let project_id = Uuid::nil(); @@ -624,8 +624,8 @@ mod test { let log = logctx.log.new(o!()); let mut db = test_setup_database(&log).await; let cfg = crate::db::Config { url: db.pg_config().clone() }; - let pool = crate::db::Pool::new(&logctx.log, &cfg); - let conn = pool.pool().get().await.unwrap(); + let pool = crate::db::Pool::new_single_host(&logctx.log, &cfg); + let conn = pool.claim().await.unwrap(); let id = InstanceUuid::nil(); let project_id = Uuid::nil(); @@ -650,8 +650,8 @@ mod test { let log = logctx.log.new(o!()); let mut db = test_setup_database(&log).await; let cfg = crate::db::Config { url: db.pg_config().clone() }; - let pool = crate::db::Pool::new(&logctx.log, &cfg); - let conn = pool.pool().get().await.unwrap(); + let pool = crate::db::Pool::new_single_host(&logctx.log, &cfg); + let conn = pool.claim().await.unwrap(); let id = InstanceUuid::nil(); let project_id = Uuid::nil(); diff --git a/nexus/db-queries/src/db/queries/vpc_subnet.rs b/nexus/db-queries/src/db/queries/vpc_subnet.rs index 8cbf4495ca..85c771c050 100644 --- a/nexus/db-queries/src/db/queries/vpc_subnet.rs +++ b/nexus/db-queries/src/db/queries/vpc_subnet.rs @@ -313,8 +313,9 @@ mod test { let log = logctx.log.new(o!()); let mut db = test_setup_database(&log).await; let cfg = crate::db::Config { url: db.pg_config().clone() }; - let pool = Arc::new(crate::db::Pool::new(&logctx.log, &cfg)); - let conn = pool.pool().get().await.unwrap(); + let pool = + Arc::new(crate::db::Pool::new_single_host(&logctx.log, &cfg)); + let conn = pool.claim().await.unwrap(); let explain = query.explain_async(&conn).await.unwrap(); println!("{explain}"); db.cleanup().await.unwrap(); @@ -352,7 +353,8 @@ mod test { let log = logctx.log.new(o!()); let mut db = test_setup_database(&log).await; let cfg = crate::db::Config { url: db.pg_config().clone() }; - let pool = Arc::new(crate::db::Pool::new(&logctx.log, &cfg)); + let pool = + Arc::new(crate::db::Pool::new_single_host(&logctx.log, &cfg)); let db_datastore = Arc::new( crate::db::DataStore::new(&log, Arc::clone(&pool), None) .await @@ -544,7 +546,8 @@ mod test { let log = logctx.log.new(o!()); let mut db = test_setup_database(&log).await; let cfg = crate::db::Config { url: db.pg_config().clone() }; - let pool = Arc::new(crate::db::Pool::new(&logctx.log, &cfg)); + let pool = + Arc::new(crate::db::Pool::new_single_host(&logctx.log, &cfg)); let db_datastore = Arc::new( crate::db::DataStore::new(&log, Arc::clone(&pool), None) .await diff --git a/nexus/src/app/background/tasks/saga_recovery.rs b/nexus/src/app/background/tasks/saga_recovery.rs index 7b0fe1b331..42069ac4ed 100644 --- a/nexus/src/app/background/tasks/saga_recovery.rs +++ b/nexus/src/app/background/tasks/saga_recovery.rs @@ -517,7 +517,7 @@ mod test { ) -> (dev::db::CockroachInstance, Arc) { let db = test_setup_database(&log).await; let cfg = nexus_db_queries::db::Config { url: db.pg_config().clone() }; - let pool = Arc::new(db::Pool::new(log, &cfg)); + let pool = Arc::new(db::Pool::new_single_host(log, &cfg)); let db_datastore = Arc::new( db::DataStore::new(&log, Arc::clone(&pool), None).await.unwrap(), ); diff --git a/nexus/src/bin/schema-updater.rs b/nexus/src/bin/schema-updater.rs index 7fe1ed84a4..4a43698f00 100644 --- a/nexus/src/bin/schema-updater.rs +++ b/nexus/src/bin/schema-updater.rs @@ -71,7 +71,7 @@ async fn main() -> anyhow::Result<()> { let log = Logger::root(drain, slog::o!("unit" => "schema_updater")); let crdb_cfg = db::Config { url: args.url }; - let pool = Arc::new(db::Pool::new(&log, &crdb_cfg)); + let pool = Arc::new(db::Pool::new_single_host(&log, &crdb_cfg)); let schema_config = SchemaConfig { schema_dir: args.schema_directory }; let all_versions = AllSchemaVersions::load(&schema_config.schema_dir)?; diff --git a/nexus/src/context.rs b/nexus/src/context.rs index 95d69e0c88..8cb696c62f 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -11,9 +11,7 @@ use authn::external::token::HttpAuthnToken; use authn::external::HttpAuthnScheme; use camino::Utf8PathBuf; use chrono::Duration; -use internal_dns::ServiceName; use nexus_config::NexusConfig; -use nexus_config::PostgresConfigWithUrl; use nexus_config::SchemeName; use nexus_db_queries::authn::external::session_cookie::SessionStore; use nexus_db_queries::authn::ConsoleSessionWithSiloId; @@ -25,7 +23,6 @@ use oximeter::types::ProducerRegistry; use oximeter_instruments::http::{HttpService, LatencyTracker}; use slog::Logger; use std::env; -use std::str::FromStr; use std::sync::Arc; use uuid::Uuid; @@ -210,7 +207,7 @@ impl ServerContext { // nexus in dev for everyone // Set up DNS Client - let resolver = match config.deployment.internal_dns { + let (resolver, dns_addrs) = match config.deployment.internal_dns { nexus_config::InternalDns::FromSubnet { subnet } => { let az_subnet = Ipv6Subnet::::new(subnet.net().addr()); @@ -219,11 +216,21 @@ impl ServerContext { "Setting up resolver using DNS servers for subnet: {:?}", az_subnet ); - internal_dns::resolver::Resolver::new_from_subnet( - log.new(o!("component" => "DnsResolver")), - az_subnet, + let resolver = + internal_dns::resolver::Resolver::new_from_subnet( + log.new(o!("component" => "DnsResolver")), + az_subnet, + ) + .map_err(|e| { + format!("Failed to create DNS resolver: {}", e) + })?; + + ( + resolver, + internal_dns::resolver::Resolver::servers_from_subnet( + az_subnet, + ), ) - .map_err(|e| format!("Failed to create DNS resolver: {}", e))? } nexus_config::InternalDns::FromAddress { address } => { info!( @@ -231,56 +238,33 @@ impl ServerContext { "Setting up resolver using DNS address: {:?}", address ); - internal_dns::resolver::Resolver::new_from_addrs( - log.new(o!("component" => "DnsResolver")), - &[address], - ) - .map_err(|e| format!("Failed to create DNS resolver: {}", e))? + let resolver = + internal_dns::resolver::Resolver::new_from_addrs( + log.new(o!("component" => "DnsResolver")), + &[address], + ) + .map_err(|e| { + format!("Failed to create DNS resolver: {}", e) + })?; + + (resolver, vec![address]) } }; - // Set up DB pool - let url = match &config.deployment.database { - nexus_config::Database::FromUrl { url } => url.clone(), + let pool = match &config.deployment.database { + nexus_config::Database::FromUrl { url } => { + info!(log, "Setting up qorb pool from a single host"; "url" => #?url); + db::Pool::new_single_host( + &log, + &db::Config { url: url.clone() }, + ) + } nexus_config::Database::FromDns => { - info!(log, "Accessing DB url from DNS"); - // It's been requested but unfortunately not supported to - // directly connect using SRV based lookup. - // TODO-robustness: the set of cockroachdb hosts we'll use will - // be fixed to whatever we got back from DNS at Nexus start. - // This means a new cockroachdb instance won't picked up until - // Nexus restarts. - let addrs = loop { - match resolver - .lookup_all_socket_v6(ServiceName::Cockroach) - .await - { - Ok(addrs) => break addrs, - Err(e) => { - warn!( - log, - "Failed to lookup cockroach addresses: {e}" - ); - tokio::time::sleep(std::time::Duration::from_secs( - 1, - )) - .await; - } - } - }; - let addrs_str = addrs - .iter() - .map(ToString::to_string) - .collect::>() - .join(","); - info!(log, "DB addresses: {}", addrs_str); - PostgresConfigWithUrl::from_str(&format!( - "postgresql://root@{addrs_str}/omicron?sslmode=disable", - )) - .map_err(|e| format!("Cannot parse Postgres URL: {}", e))? + info!(log, "Setting up qorb pool from DNS"; "dns_addrs" => #?dns_addrs); + db::Pool::new(&log, dns_addrs) } }; - let pool = db::Pool::new(&log, &db::Config { url }); + let nexus = Nexus::new_with_id( rack_id, log.new(o!("component" => "nexus")), diff --git a/nexus/src/populate.rs b/nexus/src/populate.rs index 4fcb126356..f026b1b504 100644 --- a/nexus/src/populate.rs +++ b/nexus/src/populate.rs @@ -380,7 +380,7 @@ mod test { let logctx = dev::test_setup_log("test_populator"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = Arc::new(db::Pool::new(&logctx.log, &cfg)); + let pool = Arc::new(db::Pool::new_single_host(&logctx.log, &cfg)); let datastore = Arc::new( db::DataStore::new(&logctx.log, pool, None).await.unwrap(), ); @@ -422,19 +422,13 @@ mod test { }) .unwrap(); - // Test again with the database offline. In principle we could do this - // immediately without creating a new pool and datastore. However, the - // pool's default behavior is to wait 30 seconds for a connection, which - // makes this test take a long time. (See the note in - // nexus/src/db/pool.rs about this.) So let's create a pool with an - // arbitrarily short timeout now. (We wouldn't want to do this above - // because we do want to wait a bit when we expect things to work, in - // case the test system is busy.) + // Test again with the database offline. In principle we could do this + // immediately without creating a new pool and datastore. // - // Anyway, if we try again with a broken database, we should get a + // If we try again with a broken database, we should get a // ServiceUnavailable error, which indicates a transient failure. let pool = - Arc::new(db::Pool::new_failfast_for_tests(&logctx.log, &cfg)); + Arc::new(db::Pool::new_single_host_failfast(&logctx.log, &cfg)); // We need to create the datastore before tearing down the database, as // it verifies the schema version of the DB while booting. let datastore = Arc::new( diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs index bf73855ea7..5201b5c971 100644 --- a/nexus/tests/integration_tests/schema.rs +++ b/nexus/tests/integration_tests/schema.rs @@ -954,12 +954,12 @@ async fn dbinit_equals_sum_of_all_up() { // Create a connection pool after we apply the first schema version but // before applying the rest, and grab a connection from that pool. We'll use // it for an extra check later. - let pool = nexus_db_queries::db::Pool::new( + let pool = nexus_db_queries::db::Pool::new_single_host( log, &nexus_db_queries::db::Config { url: crdb.pg_config().clone() }, ); let conn_from_pool = - pool.pool().get().await.expect("failed to get pooled connection"); + pool.claim().await.expect("failed to get pooled connection"); // Go from the second version to the latest version. for version in all_versions.iter_versions().skip(1) { diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index edb92c8c77..746a0bd3ab 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -109,7 +109,7 @@ syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.74", features = ["extra time = { version = "0.3.36", features = ["formatting", "local-offset", "macros", "parsing"] } tokio = { version = "1.39.3", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.11", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } -tokio-stream = { version = "0.1.15", features = ["net"] } +tokio-stream = { version = "0.1.15", features = ["net", "sync"] } tokio-util = { version = "0.7.11", features = ["codec", "io-util"] } toml = { version = "0.7.8" } toml_datetime = { version = "0.6.8", default-features = false, features = ["serde"] } @@ -219,7 +219,7 @@ time = { version = "0.3.36", features = ["formatting", "local-offset", "macros", time-macros = { version = "0.2.18", default-features = false, features = ["formatting", "parsing"] } tokio = { version = "1.39.3", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.11", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } -tokio-stream = { version = "0.1.15", features = ["net"] } +tokio-stream = { version = "0.1.15", features = ["net", "sync"] } tokio-util = { version = "0.7.11", features = ["codec", "io-util"] } toml = { version = "0.7.8" } toml_datetime = { version = "0.6.8", default-features = false, features = ["serde"] }