From a4e12168c6c418317f980c16dea7801660781d7c Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 29 Nov 2023 11:57:01 -0800 Subject: [PATCH 01/10] [nexus] Make 'update_and_check' CTE explicitly request columns (#4572) Related to https://github.com/oxidecomputer/omicron/issues/4570 , but not a direct fix for it This PR removes a usage of ".\*" from a SQL query. Using ".\*" in sql queries is somewhat risky -- it makes an implicit dependency on order, and can make backwards compatibility difficult in certain circumstances. Instead, this PR provides a `ColumnWalker`, for converting a tuple of columns to an iterator, and requests the expected columns explicitly. --- nexus/db-queries/src/db/column_walker.rs | 112 ++++++++++++++++++++ nexus/db-queries/src/db/mod.rs | 1 + nexus/db-queries/src/db/update_and_check.rs | 48 +++++---- 3 files changed, 141 insertions(+), 20 deletions(-) create mode 100644 nexus/db-queries/src/db/column_walker.rs diff --git a/nexus/db-queries/src/db/column_walker.rs b/nexus/db-queries/src/db/column_walker.rs new file mode 100644 index 0000000000..64c3b450c8 --- /dev/null +++ b/nexus/db-queries/src/db/column_walker.rs @@ -0,0 +1,112 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! CTE utility for iterating over all columns in a table. + +use diesel::prelude::*; +use std::marker::PhantomData; + +/// Used to iterate over a tuple of columns ("T"). +/// +/// Diesel exposes "AllColumns" as a tuple, which is difficult to iterate over +/// -- after all, all the types are distinct. However, each of these types +/// implements "Column", so we can use a macro to provide a +/// "convertion-to-iterator" implemenation for our expected tuples. +pub(crate) struct ColumnWalker { + remaining: PhantomData, +} + +impl ColumnWalker { + pub fn new() -> Self { + Self { remaining: PhantomData } + } +} + +macro_rules! impl_column_walker { + ( $len:literal $($column:ident)+ ) => ( + impl<$($column: Column),+> IntoIterator for ColumnWalker<($($column,)+)> { + type Item = &'static str; + type IntoIter = std::array::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + [$($column::NAME,)+].into_iter() + } + } + ); +} + +// implementations for 1 - 32 columns +impl_column_walker! { 1 A } +impl_column_walker! { 2 A B } +impl_column_walker! { 3 A B C } +impl_column_walker! { 4 A B C D } +impl_column_walker! { 5 A B C D E } +impl_column_walker! { 6 A B C D E F } +impl_column_walker! { 7 A B C D E F G } +impl_column_walker! { 8 A B C D E F G H } +impl_column_walker! { 9 A B C D E F G H I } +impl_column_walker! { 10 A B C D E F G H I J } +impl_column_walker! { 11 A B C D E F G H I J K } +impl_column_walker! { 12 A B C D E F G H I J K L } +impl_column_walker! { 13 A B C D E F G H I J K L M } +impl_column_walker! { 14 A B C D E F G H I J K L M N } +impl_column_walker! { 15 A B C D E F G H I J K L M N O } +impl_column_walker! { 16 A B C D E F G H I J K L M N O P } +impl_column_walker! { 17 A B C D E F G H I J K L M N O P Q } +impl_column_walker! { 18 A B C D E F G H I J K L M N O P Q R } +impl_column_walker! { 19 A B C D E F G H I J K L M N O P Q R S } +impl_column_walker! { 20 A B C D E F G H I J K L M N O P Q R S T } +impl_column_walker! { 21 A B C D E F G H I J K L M N O P Q R S T U } +impl_column_walker! { 22 A B C D E F G H I J K L M N O P Q R S T U V } +impl_column_walker! { 23 A B C D E F G H I J K L M N O P Q R S T U V W } +impl_column_walker! { 24 A B C D E F G H I J K L M N O P Q R S T U V W X } +impl_column_walker! { 25 A B C D E F G H I J K L M N O P Q R S T U V W X Y } +impl_column_walker! { 26 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z } +impl_column_walker! { 27 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z A1 } +impl_column_walker! { 28 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z A1 B1 } +impl_column_walker! { 29 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z A1 B1 C1 } +impl_column_walker! { 30 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z A1 B1 C1 D1 } +impl_column_walker! { 31 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z A1 B1 C1 D1 E1 } +impl_column_walker! { 32 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z A1 B1 C1 D1 E1 F1 } + +#[cfg(test)] +mod test { + use super::*; + + table! { + test_schema.test_table (id) { + id -> Uuid, + value -> Int4, + time_deleted -> Nullable, + } + } + + // We can convert all a tables columns into an iteratable format. + #[test] + fn test_walk_table() { + let all_columns = + ColumnWalker::<::AllColumns>::new(); + + let mut iter = all_columns.into_iter(); + assert_eq!(iter.next(), Some("id")); + assert_eq!(iter.next(), Some("value")); + assert_eq!(iter.next(), Some("time_deleted")); + assert_eq!(iter.next(), None); + } + + // We can, if we want to, also make a ColumnWalker out of an arbitrary tuple + // of columns. + #[test] + fn test_walk_columns() { + let all_columns = ColumnWalker::<( + test_table::columns::id, + test_table::columns::value, + )>::new(); + + let mut iter = all_columns.into_iter(); + assert_eq!(iter.next(), Some("id")); + assert_eq!(iter.next(), Some("value")); + assert_eq!(iter.next(), None); + } +} diff --git a/nexus/db-queries/src/db/mod.rs b/nexus/db-queries/src/db/mod.rs index 8b7424a056..b7c7079b54 100644 --- a/nexus/db-queries/src/db/mod.rs +++ b/nexus/db-queries/src/db/mod.rs @@ -12,6 +12,7 @@ pub mod collection_attach; pub mod collection_detach; pub mod collection_detach_many; pub mod collection_insert; +mod column_walker; mod config; mod cte_utils; // This is marked public for use by the integration tests diff --git a/nexus/db-queries/src/db/update_and_check.rs b/nexus/db-queries/src/db/update_and_check.rs index d6bf14c083..fed79d5254 100644 --- a/nexus/db-queries/src/db/update_and_check.rs +++ b/nexus/db-queries/src/db/update_and_check.rs @@ -4,6 +4,7 @@ //! CTE implementation for "UPDATE with extended return status". +use super::column_walker::ColumnWalker; use super::pool::DbConnection; use async_bb8_diesel::AsyncRunQueryDsl; use diesel::associations::HasTable; @@ -21,7 +22,7 @@ use std::marker::PhantomData; /// allows referencing generics with names (and extending usage /// without re-stating those generic parameters everywhere). pub trait UpdateStatementExt { - type Table: QuerySource; + type Table: Table + QuerySource; type WhereClause; type Changeset; @@ -32,7 +33,7 @@ pub trait UpdateStatementExt { impl UpdateStatementExt for UpdateStatement where - T: QuerySource, + T: Table + QuerySource, { type Table = T; type WhereClause = U; @@ -201,11 +202,11 @@ where /// /// ```text /// // WITH found AS (SELECT FROM T WHERE ) -/// // updated AS (UPDATE T SET RETURNING *) +/// // updated AS (UPDATE T SET RETURNING ) /// // SELECT /// // found. /// // updated. -/// // found.* +/// // found. /// // FROM /// // found /// // LEFT JOIN @@ -217,41 +218,48 @@ impl QueryFragment for UpdateAndQueryStatement where US: UpdateStatementExt, US::Table: HasTable + Table, + ColumnWalker<<::Table as Table>::AllColumns>: + IntoIterator, PrimaryKey: diesel::Column, UpdateStatement: QueryFragment, { fn walk_ast<'b>(&'b self, mut out: AstPass<'_, 'b, Pg>) -> QueryResult<()> { + let primary_key = as Column>::NAME; + out.push_sql("WITH found AS ("); self.find_subquery.walk_ast(out.reborrow())?; out.push_sql("), updated AS ("); self.update_statement.walk_ast(out.reborrow())?; - // TODO: Only need primary? Or would we actually want - // to pass the returned rows back through the result? - out.push_sql(" RETURNING *) "); + out.push_sql(" RETURNING "); + out.push_identifier(primary_key)?; + out.push_sql(") "); out.push_sql("SELECT"); - let name = as Column>::NAME; out.push_sql(" found."); - out.push_identifier(name)?; + out.push_identifier(primary_key)?; out.push_sql(", updated."); - out.push_identifier(name)?; - // TODO: I'd prefer to list all columns explicitly. But how? - // The types exist within Table::AllColumns, and each one - // has a name as "::Name". - // But Table::AllColumns is a tuple, which makes iteration - // a pain. - // - // TODO: Technically, we're repeating the PK here. - out.push_sql(", found.*"); + out.push_identifier(primary_key)?; + + // List all the "found" columns explicitly. + // This admittedly repeats the primary key, but that keeps the query + // "simple" since it returns all columns in the same order as + // AllColumns. + let all_columns = ColumnWalker::< + <::Table as Table>::AllColumns, + >::new(); + for column in all_columns.into_iter() { + out.push_sql(", found."); + out.push_identifier(column)?; + } out.push_sql(" FROM found LEFT JOIN updated ON"); out.push_sql(" found."); - out.push_identifier(name)?; + out.push_identifier(primary_key)?; out.push_sql(" = "); out.push_sql("updated."); - out.push_identifier(name)?; + out.push_identifier(primary_key)?; Ok(()) } From 22a70e489db5c91f1215535463abed10aa0e9db2 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 29 Nov 2023 11:58:22 -0800 Subject: [PATCH 02/10] Stop panicking when our accounting is wrong (#4568) Prefer to return a 500 error instead of panicking. Since this function is already called from a transactional context, we can rely on the rollback mechanism to "undo" the deletion. Fixes https://github.com/oxidecomputer/omicron/issues/3870 --- .../db/datastore/virtual_provisioning_collection.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs b/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs index 83856e10c7..c5c2751723 100644 --- a/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs +++ b/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs @@ -124,10 +124,12 @@ impl DataStore { .get_result_async(conn) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; - assert!( - collection.is_empty(), - "Collection deleted while non-empty: {collection:?}" - ); + + if !collection.is_empty() { + return Err(Error::internal_error(&format!( + "Collection deleted while non-empty: {collection:?}" + ))); + } Ok(()) } From f24447b0d93d339e70904fccb2f0a2c421db01e0 Mon Sep 17 00:00:00 2001 From: bnaecker Date: Wed, 29 Nov 2023 12:03:48 -0800 Subject: [PATCH 03/10] Improve oximeter self-stat tests (#4577) Reduces the tick interval in calls to `tokio::time::advance()` to ensure all timers complete reliably. See #4566 for context. --- oximeter/collector/src/agent.rs | 51 +++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/oximeter/collector/src/agent.rs b/oximeter/collector/src/agent.rs index f6da172909..365527ef08 100644 --- a/oximeter/collector/src/agent.rs +++ b/oximeter/collector/src/agent.rs @@ -659,6 +659,24 @@ mod tests { use tokio::time::Instant; use uuid::Uuid; + // Interval on which oximeter collects from producers in these tests. + const COLLECTION_INTERVAL: Duration = Duration::from_secs(1); + + // Interval in calls to `tokio::time::advance`. This must be sufficiently + // small relative to `COLLECTION_INTERVAL` to ensure all ticks of internal + // timers complete as expected. + const TICK_INTERVAL: Duration = Duration::from_millis(10); + + // Total number of collection attempts. + const N_COLLECTIONS: u64 = 5; + + // Period these tests wait using `tokio::time::advance()` before checking + // their test conditions. + const TEST_WAIT_PERIOD: Duration = Duration::from_millis( + COLLECTION_INTERVAL.as_millis() as u64 * N_COLLECTIONS + + COLLECTION_INTERVAL.as_millis() as u64 / 2, + ); + // Test that we count successful collections from a target correctly. #[tokio::test] async fn test_self_stat_collection_count() { @@ -692,13 +710,12 @@ mod tests { let _task = tokio::task::spawn(server); // Register the dummy producer. - let interval = Duration::from_secs(1); let endpoint = ProducerEndpoint { id: Uuid::new_v4(), kind: Some(ProducerKind::Service), address, base_route: String::from("/"), - interval, + interval: COLLECTION_INTERVAL, }; collector .register_producer(endpoint) @@ -708,10 +725,8 @@ mod tests { // Step time until there has been exactly `N_COLLECTIONS` collections. tokio::time::pause(); let now = Instant::now(); - const N_COLLECTIONS: usize = 5; - let wait_for = interval * N_COLLECTIONS as u32 + interval / 2; - while now.elapsed() < wait_for { - tokio::time::advance(interval / 10).await; + while now.elapsed() < TEST_WAIT_PERIOD { + tokio::time::advance(TICK_INTERVAL).await; } // Request the statistics from the task itself. @@ -729,7 +744,7 @@ mod tests { .await .expect("failed to request statistics from task"); let stats = rx.await.expect("failed to receive statistics from task"); - assert_eq!(stats.collections.datum.value(), N_COLLECTIONS as u64); + assert_eq!(stats.collections.datum.value(), N_COLLECTIONS); assert!(stats.failed_collections.is_empty()); logctx.cleanup_successful(); } @@ -751,7 +766,6 @@ mod tests { // Register a bogus producer, which is equivalent to a producer that is // unreachable. - let interval = Duration::from_secs(1); let endpoint = ProducerEndpoint { id: Uuid::new_v4(), kind: Some(ProducerKind::Service), @@ -762,7 +776,7 @@ mod tests { 0, )), base_route: String::from("/"), - interval, + interval: COLLECTION_INTERVAL, }; collector .register_producer(endpoint) @@ -772,10 +786,8 @@ mod tests { // Step time until there has been exactly `N_COLLECTIONS` collections. tokio::time::pause(); let now = Instant::now(); - const N_COLLECTIONS: usize = 5; - let wait_for = interval * N_COLLECTIONS as u32 + interval / 2; - while now.elapsed() < wait_for { - tokio::time::advance(interval / 10).await; + while now.elapsed() < TEST_WAIT_PERIOD { + tokio::time::advance(TICK_INTERVAL).await; } // Request the statistics from the task itself. @@ -801,7 +813,7 @@ mod tests { .unwrap() .datum .value(), - N_COLLECTIONS as u64 + N_COLLECTIONS, ); assert_eq!(stats.failed_collections.len(), 1); logctx.cleanup_successful(); @@ -840,13 +852,12 @@ mod tests { let _task = tokio::task::spawn(server); // Register the rather flaky producer. - let interval = Duration::from_secs(1); let endpoint = ProducerEndpoint { id: Uuid::new_v4(), kind: Some(ProducerKind::Service), address, base_route: String::from("/"), - interval, + interval: COLLECTION_INTERVAL, }; collector .register_producer(endpoint) @@ -856,10 +867,8 @@ mod tests { // Step time until there has been exactly `N_COLLECTIONS` collections. tokio::time::pause(); let now = Instant::now(); - const N_COLLECTIONS: usize = 5; - let wait_for = interval * N_COLLECTIONS as u32 + interval / 2; - while now.elapsed() < wait_for { - tokio::time::advance(interval / 10).await; + while now.elapsed() < TEST_WAIT_PERIOD { + tokio::time::advance(TICK_INTERVAL).await; } // Request the statistics from the task itself. @@ -885,7 +894,7 @@ mod tests { .unwrap() .datum .value(), - N_COLLECTIONS as u64 + N_COLLECTIONS, ); assert_eq!(stats.failed_collections.len(), 1); logctx.cleanup_successful(); From 75ccdad5cbe7213c4be70c56376dac95a424d882 Mon Sep 17 00:00:00 2001 From: bnaecker Date: Wed, 29 Nov 2023 14:26:36 -0800 Subject: [PATCH 04/10] Make oximeter producer kind required (#4571) - Pulls in updated Dendrite, Propolis, and Crucible deps, which include the new producer kind enum in metric registration requests. From their perspective, this is still an optional parameter, but it is supplied. - Make the kind a required field in API requests. - Make the kind a required column in the database, and remove any rows with a NULL value. - Update OpenAPI documents and internal consumers to reflect the required parameter. --- clients/nexus-client/src/lib.rs | 2 +- clients/oximeter-client/src/lib.rs | 2 +- common/src/api/internal/nexus.rs | 2 +- nexus/db-model/src/producer_endpoint.rs | 4 ++-- nexus/db-model/src/schema.rs | 2 +- nexus/src/app/oximeter.rs | 6 ++---- nexus/test-utils/src/lib.rs | 2 +- nexus/tests/integration_tests/oximeter.rs | 2 +- openapi/nexus-internal.json | 4 ++-- openapi/oximeter.json | 4 ++-- oximeter/collector/src/agent.rs | 6 +++--- oximeter/producer/examples/producer.rs | 2 +- package-manifest.toml | 24 +++++++++++------------ schema/crdb/15.0.0/up01.sql | 14 +++++++++++++ schema/crdb/15.0.0/up02.sql | 4 ++++ schema/crdb/dbinit.sql | 2 +- sled-agent/src/sim/disk.rs | 2 +- sled-agent/src/sled_agent.rs | 2 +- tools/dendrite_openapi_version | 2 +- tools/dendrite_stub_checksums | 6 +++--- 20 files changed, 55 insertions(+), 39 deletions(-) create mode 100644 schema/crdb/15.0.0/up01.sql create mode 100644 schema/crdb/15.0.0/up02.sql diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs index 6667f759e4..3ecba7e710 100644 --- a/clients/nexus-client/src/lib.rs +++ b/clients/nexus-client/src/lib.rs @@ -225,7 +225,7 @@ impl From<&omicron_common::api::internal::nexus::ProducerEndpoint> address: s.address.to_string(), base_route: s.base_route.clone(), id: s.id, - kind: s.kind.map(Into::into), + kind: s.kind.into(), interval: s.interval.into(), } } diff --git a/clients/oximeter-client/src/lib.rs b/clients/oximeter-client/src/lib.rs index 8a03304e06..11aa1452f8 100644 --- a/clients/oximeter-client/src/lib.rs +++ b/clients/oximeter-client/src/lib.rs @@ -43,7 +43,7 @@ impl From<&omicron_common::api::internal::nexus::ProducerEndpoint> address: s.address.to_string(), base_route: s.base_route.clone(), id: s.id, - kind: s.kind.map(Into::into), + kind: s.kind.into(), interval: s.interval.into(), } } diff --git a/common/src/api/internal/nexus.rs b/common/src/api/internal/nexus.rs index 1daa85dbe7..780e60b1a2 100644 --- a/common/src/api/internal/nexus.rs +++ b/common/src/api/internal/nexus.rs @@ -103,7 +103,7 @@ pub struct ProducerEndpoint { /// A unique ID for this producer. pub id: Uuid, /// The kind of producer. - pub kind: Option, + pub kind: ProducerKind, /// The IP address and port at which `oximeter` can collect metrics from the /// producer. pub address: SocketAddr, diff --git a/nexus/db-model/src/producer_endpoint.rs b/nexus/db-model/src/producer_endpoint.rs index 52a69e0508..f282f6f08f 100644 --- a/nexus/db-model/src/producer_endpoint.rs +++ b/nexus/db-model/src/producer_endpoint.rs @@ -52,7 +52,7 @@ pub struct ProducerEndpoint { #[diesel(embed)] identity: ProducerEndpointIdentity, - pub kind: Option, + pub kind: ProducerKind, pub ip: ipnetwork::IpNetwork, pub port: SqlU16, pub interval: f64, @@ -69,7 +69,7 @@ impl ProducerEndpoint { ) -> Self { Self { identity: ProducerEndpointIdentity::new(endpoint.id), - kind: endpoint.kind.map(Into::into), + kind: endpoint.kind.into(), ip: endpoint.address.ip().into(), port: endpoint.address.port().into(), base_route: endpoint.base_route.clone(), diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 6527da3637..5b97bd10a9 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -399,7 +399,7 @@ table! { id -> Uuid, time_created -> Timestamptz, time_modified -> Timestamptz, - kind -> Nullable, + kind -> crate::ProducerKindEnum, ip -> Inet, port -> Int4, interval -> Float8, diff --git a/nexus/src/app/oximeter.rs b/nexus/src/app/oximeter.rs index 66f39a32b6..a168b35293 100644 --- a/nexus/src/app/oximeter.rs +++ b/nexus/src/app/oximeter.rs @@ -127,9 +127,7 @@ impl super::Nexus { for producer in producers.into_iter() { let producer_info = oximeter_client::types::ProducerEndpoint { id: producer.id(), - kind: producer - .kind - .map(|kind| nexus::ProducerKind::from(kind).into()), + kind: nexus::ProducerKind::from(producer.kind).into(), address: SocketAddr::new( producer.ip.ip(), producer.port.try_into().unwrap(), @@ -152,7 +150,7 @@ impl super::Nexus { pub(crate) async fn register_as_producer(&self, address: SocketAddr) { let producer_endpoint = nexus::ProducerEndpoint { id: self.id, - kind: Some(nexus::ProducerKind::Service), + kind: nexus::ProducerKind::Service, address, base_route: String::from("/metrics/collect"), interval: Duration::from_secs(10), diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 1e7de6132b..52ff8910f9 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -1093,7 +1093,7 @@ pub async fn start_producer_server( let producer_address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), 0); let server_info = ProducerEndpoint { id, - kind: Some(ProducerKind::Service), + kind: ProducerKind::Service, address: producer_address, base_route: "/collect".to_string(), interval: Duration::from_secs(1), diff --git a/nexus/tests/integration_tests/oximeter.rs b/nexus/tests/integration_tests/oximeter.rs index e97f36daf4..7dc453d713 100644 --- a/nexus/tests/integration_tests/oximeter.rs +++ b/nexus/tests/integration_tests/oximeter.rs @@ -361,7 +361,7 @@ async fn test_oximeter_collector_reregistration_gets_all_assignments() { ids.insert(id); let info = ProducerEndpoint { id, - kind: Some(ProducerKind::Service), + kind: ProducerKind::Service, address: SocketAddr::new(Ipv6Addr::LOCALHOST.into(), 12345), base_route: String::from("/collect"), interval: Duration::from_secs(1), diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index c358b4109b..e0580e7c13 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -4343,7 +4343,6 @@ ] }, "kind": { - "nullable": true, "description": "The kind of producer.", "allOf": [ { @@ -4356,7 +4355,8 @@ "address", "base_route", "id", - "interval" + "interval", + "kind" ] }, "ProducerKind": { diff --git a/openapi/oximeter.json b/openapi/oximeter.json index f7e534c95d..f5c78d53cd 100644 --- a/openapi/oximeter.json +++ b/openapi/oximeter.json @@ -212,7 +212,6 @@ ] }, "kind": { - "nullable": true, "description": "The kind of producer.", "allOf": [ { @@ -225,7 +224,8 @@ "address", "base_route", "id", - "interval" + "interval", + "kind" ] }, "ProducerEndpointResultsPage": { diff --git a/oximeter/collector/src/agent.rs b/oximeter/collector/src/agent.rs index 365527ef08..4135125a48 100644 --- a/oximeter/collector/src/agent.rs +++ b/oximeter/collector/src/agent.rs @@ -712,7 +712,7 @@ mod tests { // Register the dummy producer. let endpoint = ProducerEndpoint { id: Uuid::new_v4(), - kind: Some(ProducerKind::Service), + kind: ProducerKind::Service, address, base_route: String::from("/"), interval: COLLECTION_INTERVAL, @@ -768,7 +768,7 @@ mod tests { // unreachable. let endpoint = ProducerEndpoint { id: Uuid::new_v4(), - kind: Some(ProducerKind::Service), + kind: ProducerKind::Service, address: SocketAddr::V6(SocketAddrV6::new( Ipv6Addr::LOCALHOST, 0, @@ -854,7 +854,7 @@ mod tests { // Register the rather flaky producer. let endpoint = ProducerEndpoint { id: Uuid::new_v4(), - kind: Some(ProducerKind::Service), + kind: ProducerKind::Service, address, base_route: String::from("/"), interval: COLLECTION_INTERVAL, diff --git a/oximeter/producer/examples/producer.rs b/oximeter/producer/examples/producer.rs index baa4f57bf7..8dbe0b6ad9 100644 --- a/oximeter/producer/examples/producer.rs +++ b/oximeter/producer/examples/producer.rs @@ -125,7 +125,7 @@ async fn main() -> anyhow::Result<()> { registry.register_producer(producer).unwrap(); let server_info = ProducerEndpoint { id: registry.producer_id(), - kind: Some(ProducerKind::Service), + kind: ProducerKind::Service, address: args.address, base_route: "/collect".to_string(), interval: Duration::from_secs(10), diff --git a/package-manifest.toml b/package-manifest.toml index 26c45f0ff7..3bce4aafee 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -384,10 +384,10 @@ only_for_targets.image = "standard" # 3. Use source.type = "manual" instead of "prebuilt" source.type = "prebuilt" source.repo = "crucible" -source.commit = "51a3121c8318fc7ac97d74f917ce1d37962e785f" +source.commit = "945f040d259ca8013d3fb26f510453da7cd7b1a6" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible.sha256.txt -source.sha256 = "897d0fd6c0b82db42256a63a13c228152e1117434afa2681f649b291e3c6f46d" +source.sha256 = "f8c23cbf89fd0bbd928d8e3db1357bbea6e6b50560e221f873da5b56ed9d7527" output.type = "zone" [package.crucible-pantry] @@ -395,10 +395,10 @@ service_name = "crucible_pantry" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "crucible" -source.commit = "51a3121c8318fc7ac97d74f917ce1d37962e785f" +source.commit = "945f040d259ca8013d3fb26f510453da7cd7b1a6" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible-pantry.sha256.txt -source.sha256 = "fe545de7ac4f15454d7827927149c5f0fc68ce9545b4f1ef96aac9ac8039805a" +source.sha256 = "a25b31c81798eb65564dbe259858fdd9715784d212d3508791b1ef0cf6d17da6" output.type = "zone" # Refer to @@ -409,10 +409,10 @@ service_name = "propolis-server" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "propolis" -source.commit = "54398875a2125227d13827d4236dce943c019b1c" +source.commit = "3e1d129151c3621d28ead5c6e5760693ba6e7fec" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/propolis/image//propolis-server.sha256.txt -source.sha256 = "01b8563db6626f90ee3fb6d97e7921b0a680373d843c1bea7ebf46fcea4f7b28" +source.sha256 = "cd341409eb2ffc3d8bec89fd20cad61d170f89d3adf926f6104eb01f4f4da881" output.type = "zone" [package.mg-ddm-gz] @@ -476,8 +476,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "8ff834e7d0a6adb263240edd40537f2c0768f1a4" -source.sha256 = "c00e79f55e0bdf048069b2d18a4d009ddfef46e7e5d846887cf96e843a8884bd" +source.commit = "2af6adea85c62ac37e451148b84e5eb0ef005f36" +source.sha256 = "dc93b671cce54e83ed55faaa267f81ba9e65abcd6714aa559d68a8783d73b1c1" output.type = "zone" output.intermediate_only = true @@ -501,8 +501,8 @@ only_for_targets.image = "standard" # 2. Copy the output zone image from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "8ff834e7d0a6adb263240edd40537f2c0768f1a4" -source.sha256 = "428cce1e9aa399b1b49c04e7fd0bc1cb0e3f3fae6fda96055892a42e010c9d6f" +source.commit = "2af6adea85c62ac37e451148b84e5eb0ef005f36" +source.sha256 = "c34b10d47fa3eb9f9f6b3655ea4ed8a726f93399ea177efea79f5c89f2ab5a1e" output.type = "zone" output.intermediate_only = true @@ -519,8 +519,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out/dendrite-softnpu.tar.gz source.type = "prebuilt" source.repo = "dendrite" -source.commit = "8ff834e7d0a6adb263240edd40537f2c0768f1a4" -source.sha256 = "5dd3534bec5eb4f857d0bf3994b26650288f650d409eec6aaa29860a2f481c37" +source.commit = "2af6adea85c62ac37e451148b84e5eb0ef005f36" +source.sha256 = "ce7065227c092ee82704f39a966b7441e3ae82d75eedb6eb281bd8b3e5873e32" output.type = "zone" output.intermediate_only = true diff --git a/schema/crdb/15.0.0/up01.sql b/schema/crdb/15.0.0/up01.sql new file mode 100644 index 0000000000..f9806c5917 --- /dev/null +++ b/schema/crdb/15.0.0/up01.sql @@ -0,0 +1,14 @@ +/* + * Previous commits added the optional kind of a producer. In this version, + * we're making the value required and not nullable. We'll first delete all + * records with a NULL kind -- there should not be any, since all producers both + * in an out of tree have been updated. Nonetheless, this is safe because + * currently we're updating offline, and all producers should re-register when + * they are restarted. + * + * NOTE: Full table scans are disallowed, however we don't have an index on + * producer kind (and don't currently need one). Allow full table scans for the + * context of this one statement. + */ +SET LOCAL disallow_full_table_scans = off; +DELETE FROM omicron.public.metric_producer WHERE kind IS NULL; diff --git a/schema/crdb/15.0.0/up02.sql b/schema/crdb/15.0.0/up02.sql new file mode 100644 index 0000000000..9c1ad2ea47 --- /dev/null +++ b/schema/crdb/15.0.0/up02.sql @@ -0,0 +1,4 @@ +/* + * Next, we make the field itself required in the database. + */ +ALTER TABLE IF EXISTS omicron.public.metric_producer ALTER COLUMN kind SET NOT NULL; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 178c7af913..053bc0bcfb 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -1182,7 +1182,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.metric_producer ( id UUID PRIMARY KEY, time_created TIMESTAMPTZ NOT NULL, time_modified TIMESTAMPTZ NOT NULL, - kind omicron.public.producer_kind, + kind omicron.public.producer_kind NOT NULL, ip INET NOT NULL, port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL, interval FLOAT NOT NULL, diff --git a/sled-agent/src/sim/disk.rs b/sled-agent/src/sim/disk.rs index f131fd2bff..fc388f6ce2 100644 --- a/sled-agent/src/sim/disk.rs +++ b/sled-agent/src/sim/disk.rs @@ -169,7 +169,7 @@ impl SimDisk { let producer_address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), 0); let server_info = ProducerEndpoint { id, - kind: Some(ProducerKind::SledAgent), + kind: ProducerKind::SledAgent, address: producer_address, base_route: "/collect".to_string(), interval: Duration::from_millis(200), diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index f5b71106cd..9f8d31b3c5 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -507,7 +507,7 @@ impl SledAgent { // Nexus. This should not block progress here. let endpoint = ProducerEndpoint { id: request.body.id, - kind: Some(ProducerKind::SledAgent), + kind: ProducerKind::SledAgent, address: sled_address.into(), base_route: String::from("/metrics/collect"), interval: crate::metrics::METRIC_COLLECTION_INTERVAL, diff --git a/tools/dendrite_openapi_version b/tools/dendrite_openapi_version index ba4b5a5722..c2dda4dbd0 100644 --- a/tools/dendrite_openapi_version +++ b/tools/dendrite_openapi_version @@ -1,2 +1,2 @@ -COMMIT="8ff834e7d0a6adb263240edd40537f2c0768f1a4" +COMMIT="2af6adea85c62ac37e451148b84e5eb0ef005f36" SHA2="07d115bfa8498a8015ca2a8447efeeac32e24aeb25baf3d5e2313216e11293c0" diff --git a/tools/dendrite_stub_checksums b/tools/dendrite_stub_checksums index 619a6bf287..77ee198fc5 100644 --- a/tools/dendrite_stub_checksums +++ b/tools/dendrite_stub_checksums @@ -1,3 +1,3 @@ -CIDL_SHA256_ILLUMOS="c00e79f55e0bdf048069b2d18a4d009ddfef46e7e5d846887cf96e843a8884bd" -CIDL_SHA256_LINUX_DPD="b5d829b4628759ac374106f3c56c29074b29577fd0ff72f61c3b8289fea430fe" -CIDL_SHA256_LINUX_SWADM="afc68828f54dc57b32dc1556fc588baeab12341c30e96cc0fadb49f401b4b48f" +CIDL_SHA256_ILLUMOS="dc93b671cce54e83ed55faaa267f81ba9e65abcd6714aa559d68a8783d73b1c1" +CIDL_SHA256_LINUX_DPD="b13b391a085ba6bf16fdd99774f64c9d53cd7220ad518d5839c8558fb925c40c" +CIDL_SHA256_LINUX_SWADM="6bfa4e367eb2b0be89f1588ac458026a186314597a4feb9fee6cea60101c7ebe" From 3555b5d2f4827269a61608ad012f258b74d676fb Mon Sep 17 00:00:00 2001 From: Ryan Goodfellow Date: Wed, 29 Nov 2023 14:35:10 -0800 Subject: [PATCH 05/10] Various network fixes (#4564) --- common/src/api/internal/shared.rs | 7 +- nexus/db-model/src/schema.rs | 3 +- nexus/db-model/src/switch_port.rs | 3 + .../src/db/datastore/switch_port.rs | 97 ++++++----- nexus/src/app/rack.rs | 162 +++++++++++++++++- .../app/sagas/switch_port_settings_common.rs | 3 +- nexus/tests/integration_tests/switch_port.rs | 23 +-- nexus/types/src/external_api/params.rs | 56 +++++- openapi/bootstrap-agent.json | 5 + openapi/nexus-internal.json | 5 + openapi/nexus.json | 21 ++- openapi/sled-agent.json | 5 + openapi/wicketd.json | 5 + schema/crdb/16.0.0/up1.sql | 1 + schema/crdb/dbinit.sql | 4 +- schema/rss-sled-plan.json | 5 + sled-agent/src/bootstrap/early_networking.rs | 14 +- sled-agent/src/rack_setup/service.rs | 11 +- .../gimlet-standalone/config-rss.toml | 2 + smf/sled-agent/non-gimlet/config-rss.toml | 4 +- tools/generate-wicketd-api.sh | 3 + .../src/cli/rack_setup/config_template.toml | 3 + wicket/src/cli/rack_setup/config_toml.rs | 8 + wicketd/src/preflight_check/uplink.rs | 6 +- wicketd/src/rss_config.rs | 1 + 25 files changed, 367 insertions(+), 90 deletions(-) create mode 100644 schema/crdb/16.0.0/up1.sql create mode 100755 tools/generate-wicketd-api.sh diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index 155fbf971b..15ab4c66ce 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -140,6 +140,8 @@ pub struct PortConfigV1 { pub uplink_port_fec: PortFec, /// BGP peers on this port pub bgp_peers: Vec, + /// Whether or not to set autonegotiation + pub autoneg: bool, } impl From for PortConfigV1 { @@ -155,6 +157,7 @@ impl From for PortConfigV1 { uplink_port_speed: value.uplink_port_speed, uplink_port_fec: value.uplink_port_fec, bgp_peers: vec![], + autoneg: false, } } } @@ -260,7 +263,7 @@ pub enum ExternalPortDiscovery { } /// Switchport Speed options -#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] +#[derive(Copy, Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] #[serde(rename_all = "snake_case")] pub enum PortSpeed { #[serde(alias = "0G")] @@ -284,7 +287,7 @@ pub enum PortSpeed { } /// Switchport FEC options -#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] +#[derive(Copy, Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] #[serde(rename_all = "snake_case")] pub enum PortFec { Firecode, diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 5b97bd10a9..7d4ae241aa 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -146,6 +146,7 @@ table! { mtu -> Int4, fec -> crate::SwitchLinkFecEnum, speed -> crate::SwitchLinkSpeedEnum, + autoneg -> Bool, } } @@ -1300,7 +1301,7 @@ table! { /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(15, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(16, 0, 0); allow_tables_to_appear_in_same_query!( system_update, diff --git a/nexus/db-model/src/switch_port.rs b/nexus/db-model/src/switch_port.rs index 44588899b6..6ff8612d2f 100644 --- a/nexus/db-model/src/switch_port.rs +++ b/nexus/db-model/src/switch_port.rs @@ -355,6 +355,7 @@ pub struct SwitchPortLinkConfig { pub mtu: SqlU16, pub fec: SwitchLinkFec, pub speed: SwitchLinkSpeed, + pub autoneg: bool, } impl SwitchPortLinkConfig { @@ -365,6 +366,7 @@ impl SwitchPortLinkConfig { mtu: u16, fec: SwitchLinkFec, speed: SwitchLinkSpeed, + autoneg: bool, ) -> Self { Self { port_settings_id, @@ -372,6 +374,7 @@ impl SwitchPortLinkConfig { link_name, fec, speed, + autoneg, mtu: mtu.into(), } } diff --git a/nexus/db-queries/src/db/datastore/switch_port.rs b/nexus/db-queries/src/db/datastore/switch_port.rs index d7319347f0..6bd4e61f70 100644 --- a/nexus/db-queries/src/db/datastore/switch_port.rs +++ b/nexus/db-queries/src/db/datastore/switch_port.rs @@ -234,6 +234,7 @@ impl DataStore { c.mtu, c.fec.into(), c.speed.into(), + c.autoneg, )); } result.link_lldp = @@ -304,39 +305,41 @@ impl DataStore { .await?; let mut bgp_peer_config = Vec::new(); - for (interface_name, p) in ¶ms.bgp_peers { - use db::schema::bgp_config; - let bgp_config_id = match &p.bgp_config { - NameOrId::Id(id) => *id, - NameOrId::Name(name) => { - let name = name.to_string(); - bgp_config_dsl::bgp_config - .filter(bgp_config::time_deleted.is_null()) - .filter(bgp_config::name.eq(name)) - .select(bgp_config::id) - .limit(1) - .first_async::(&conn) - .await - .map_err(|_| - TxnError::CustomError( - SwitchPortSettingsCreateError::BgpConfigNotFound, - ) - )? - } - }; + for (interface_name, peer_config) in ¶ms.bgp_peers { + for p in &peer_config.peers { + use db::schema::bgp_config; + let bgp_config_id = match &p.bgp_config { + NameOrId::Id(id) => *id, + NameOrId::Name(name) => { + let name = name.to_string(); + bgp_config_dsl::bgp_config + .filter(bgp_config::time_deleted.is_null()) + .filter(bgp_config::name.eq(name)) + .select(bgp_config::id) + .limit(1) + .first_async::(&conn) + .await + .map_err(|_| + TxnError::CustomError( + SwitchPortSettingsCreateError::BgpConfigNotFound, + ) + )? + } + }; - bgp_peer_config.push(SwitchPortBgpPeerConfig::new( - psid, - bgp_config_id, - interface_name.clone(), - p.addr.into(), - p.hold_time.into(), - p.idle_hold_time.into(), - p.delay_open.into(), - p.connect_retry.into(), - p.keepalive.into(), - )); + bgp_peer_config.push(SwitchPortBgpPeerConfig::new( + psid, + bgp_config_id, + interface_name.clone(), + p.addr.into(), + p.hold_time.into(), + p.idle_hold_time.into(), + p.delay_open.into(), + p.connect_retry.into(), + p.keepalive.into(), + )); + } } result.bgp_peers = diesel::insert_into( @@ -1152,8 +1155,8 @@ mod test { use crate::db::datastore::{datastore_test, UpdatePrecondition}; use nexus_test_utils::db::test_setup_database; use nexus_types::external_api::params::{ - BgpAnnounceSetCreate, BgpConfigCreate, BgpPeerConfig, SwitchPortConfig, - SwitchPortGeometry, SwitchPortSettingsCreate, + BgpAnnounceSetCreate, BgpConfigCreate, BgpPeer, BgpPeerConfig, + SwitchPortConfig, SwitchPortGeometry, SwitchPortSettingsCreate, }; use omicron_common::api::external::{ IdentityMetadataCreateParams, Name, NameOrId, @@ -1217,19 +1220,21 @@ mod test { bgp_peers: HashMap::from([( "phy0".into(), BgpPeerConfig { - bgp_announce_set: NameOrId::Name( - "test-announce-set".parse().unwrap(), - ), - bgp_config: NameOrId::Name( - "test-bgp-config".parse().unwrap(), - ), - interface_name: "qsfp0".into(), - addr: "192.168.1.1".parse().unwrap(), - hold_time: 0, - idle_hold_time: 0, - delay_open: 0, - connect_retry: 0, - keepalive: 0, + peers: vec![BgpPeer { + bgp_announce_set: NameOrId::Name( + "test-announce-set".parse().unwrap(), + ), + bgp_config: NameOrId::Name( + "test-bgp-config".parse().unwrap(), + ), + interface_name: "qsfp0".into(), + addr: "192.168.1.1".parse().unwrap(), + hold_time: 0, + idle_hold_time: 0, + delay_open: 0, + connect_retry: 0, + keepalive: 0, + }], }, )]), addresses: HashMap::new(), diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 984ece2d0c..95283faa1c 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -23,10 +23,16 @@ use nexus_db_queries::db::lookup::LookupPath; use nexus_types::external_api::params::Address; use nexus_types::external_api::params::AddressConfig; use nexus_types::external_api::params::AddressLotBlockCreate; +use nexus_types::external_api::params::BgpAnnounceSetCreate; +use nexus_types::external_api::params::BgpAnnouncementCreate; +use nexus_types::external_api::params::BgpConfigCreate; +use nexus_types::external_api::params::BgpPeer; +use nexus_types::external_api::params::LinkConfig; +use nexus_types::external_api::params::LldpServiceConfig; use nexus_types::external_api::params::RouteConfig; use nexus_types::external_api::params::SwitchPortConfig; use nexus_types::external_api::params::{ - AddressLotCreate, LoopbackAddressCreate, Route, SiloCreate, + AddressLotCreate, BgpPeerConfig, LoopbackAddressCreate, Route, SiloCreate, SwitchPortSettingsCreate, }; use nexus_types::external_api::shared::Baseboard; @@ -51,8 +57,8 @@ use sled_agent_client::types::EarlyNetworkConfigBody; use sled_agent_client::types::StartSledAgentRequest; use sled_agent_client::types::StartSledAgentRequestBody; use sled_agent_client::types::{ - BgpConfig, BgpPeerConfig, EarlyNetworkConfig, PortConfigV1, - RackNetworkConfigV1, RouteConfig as SledRouteConfig, + BgpConfig, BgpPeerConfig as SledBgpPeerConfig, EarlyNetworkConfig, + PortConfigV1, RackNetworkConfigV1, RouteConfig as SledRouteConfig, }; use std::collections::BTreeMap; use std::collections::BTreeSet; @@ -406,6 +412,108 @@ impl super::Nexus { Error::internal_error(&format!("unable to retrieve authz_address_lot for infra address_lot: {e}")) })?; + let mut bgp_configs = HashMap::new(); + + for bgp_config in &rack_network_config.bgp { + bgp_configs.insert(bgp_config.asn, bgp_config.clone()); + + let bgp_config_name: Name = + format!("as{}", bgp_config.asn).parse().unwrap(); + + let announce_set_name: Name = + format!("as{}-announce", bgp_config.asn).parse().unwrap(); + + let address_lot_name: Name = + format!("as{}-lot", bgp_config.asn).parse().unwrap(); + + self.db_datastore + .address_lot_create( + &opctx, + &AddressLotCreate { + identity: IdentityMetadataCreateParams { + name: address_lot_name, + description: format!( + "Address lot for announce set in as {}", + bgp_config.asn + ), + }, + kind: AddressLotKind::Infra, + blocks: bgp_config + .originate + .iter() + .map(|o| AddressLotBlockCreate { + first_address: o.network().into(), + last_address: o.broadcast().into(), + }) + .collect(), + }, + ) + .await + .map_err(|e| { + Error::internal_error(&format!( + "unable to create address lot for BGP as {}: {}", + bgp_config.asn, e + )) + })?; + + self.db_datastore + .bgp_create_announce_set( + &opctx, + &BgpAnnounceSetCreate { + identity: IdentityMetadataCreateParams { + name: announce_set_name.clone(), + description: format!( + "Announce set for AS {}", + bgp_config.asn + ), + }, + announcement: bgp_config + .originate + .iter() + .map(|x| BgpAnnouncementCreate { + address_lot_block: NameOrId::Name( + format!("as{}", bgp_config.asn) + .parse() + .unwrap(), + ), + network: IpNetwork::from(*x).into(), + }) + .collect(), + }, + ) + .await + .map_err(|e| { + Error::internal_error(&format!( + "unable to create bgp announce set for as {}: {}", + bgp_config.asn, e + )) + })?; + + self.db_datastore + .bgp_config_set( + &opctx, + &BgpConfigCreate { + identity: IdentityMetadataCreateParams { + name: bgp_config_name, + description: format!( + "BGP config for AS {}", + bgp_config.asn + ), + }, + asn: bgp_config.asn, + bgp_announce_set_id: announce_set_name.into(), + vrf: None, + }, + ) + .await + .map_err(|e| { + Error::internal_error(&format!( + "unable to set bgp config for as {}: {}", + bgp_config.asn, e + )) + })?; + } + for (idx, uplink_config) in rack_network_config.ports.iter().enumerate() { @@ -503,6 +611,43 @@ impl super::Nexus { .routes .insert("phy0".to_string(), RouteConfig { routes }); + let peers: Vec = uplink_config + .bgp_peers + .iter() + .map(|r| BgpPeer { + bgp_announce_set: NameOrId::Name( + format!("as{}-announce", r.asn).parse().unwrap(), + ), + bgp_config: NameOrId::Name( + format!("as{}", r.asn).parse().unwrap(), + ), + interface_name: "phy0".into(), + addr: r.addr.into(), + hold_time: r.hold_time.unwrap_or(6) as u32, + idle_hold_time: r.idle_hold_time.unwrap_or(3) as u32, + delay_open: r.delay_open.unwrap_or(0) as u32, + connect_retry: r.connect_retry.unwrap_or(3) as u32, + keepalive: r.keepalive.unwrap_or(2) as u32, + }) + .collect(); + + port_settings_params + .bgp_peers + .insert("phy0".to_string(), BgpPeerConfig { peers }); + + let link = LinkConfig { + mtu: 1500, //TODO https://github.com/oxidecomputer/omicron/issues/2274 + lldp: LldpServiceConfig { + enabled: false, + lldp_config: None, + }, + fec: uplink_config.uplink_port_fec.into(), + speed: uplink_config.uplink_port_speed.into(), + autoneg: uplink_config.autoneg, + }; + + port_settings_params.links.insert("phy".to_string(), link); + match self .db_datastore .switch_port_settings_create( @@ -658,7 +803,7 @@ impl super::Nexus { addresses: info.addresses.iter().map(|a| a.address).collect(), bgp_peers: peer_info .iter() - .map(|(p, asn, addr)| BgpPeerConfig { + .map(|(p, asn, addr)| SledBgpPeerConfig { addr: *addr, asn: *asn, port: port.port_name.clone(), @@ -673,16 +818,21 @@ impl super::Nexus { port: port.port_name.clone(), uplink_port_fec: info .links - .get(0) //TODO breakout support + .get(0) //TODO https://github.com/oxidecomputer/omicron/issues/3062 .map(|l| l.fec) .unwrap_or(SwitchLinkFec::None) .into(), uplink_port_speed: info .links - .get(0) //TODO breakout support + .get(0) //TODO https://github.com/oxidecomputer/omicron/issues/3062 .map(|l| l.speed) .unwrap_or(SwitchLinkSpeed::Speed100G) .into(), + autoneg: info + .links + .get(0) //TODO breakout support + .map(|l| l.autoneg) + .unwrap_or(false), }; ports.push(p); diff --git a/nexus/src/app/sagas/switch_port_settings_common.rs b/nexus/src/app/sagas/switch_port_settings_common.rs index b328c6d1ac..9132645782 100644 --- a/nexus/src/app/sagas/switch_port_settings_common.rs +++ b/nexus/src/app/sagas/switch_port_settings_common.rs @@ -55,7 +55,7 @@ pub(crate) fn api_to_dpd_port_settings( link_id.to_string(), LinkSettings { params: LinkCreate { - autoneg: false, + autoneg: l.autoneg, lane: Some(LinkId(0)), kr: false, fec: match l.fec { @@ -251,6 +251,7 @@ pub(crate) async fn bootstore_update( .map(|l| l.speed) .unwrap_or(SwitchLinkSpeed::Speed100G) .into(), + autoneg: settings.links.get(0).map(|l| l.autoneg).unwrap_or(false), bgp_peers: peer_info .iter() .filter_map(|(p, asn)| { diff --git a/nexus/tests/integration_tests/switch_port.rs b/nexus/tests/integration_tests/switch_port.rs index d163fc6b06..df4d96c6d1 100644 --- a/nexus/tests/integration_tests/switch_port.rs +++ b/nexus/tests/integration_tests/switch_port.rs @@ -10,7 +10,7 @@ use nexus_test_utils::http_testing::{AuthnMode, NexusRequest, RequestBuilder}; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params::{ Address, AddressConfig, AddressLotBlockCreate, AddressLotCreate, - BgpAnnounceSetCreate, BgpAnnouncementCreate, BgpConfigCreate, + BgpAnnounceSetCreate, BgpAnnouncementCreate, BgpConfigCreate, BgpPeer, BgpPeerConfig, LinkConfig, LinkFec, LinkSpeed, LldpServiceConfig, Route, RouteConfig, SwitchInterfaceConfig, SwitchInterfaceKind, SwitchPortApplySettings, SwitchPortSettingsCreate, @@ -118,6 +118,7 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { lldp: LldpServiceConfig { enabled: false, lldp_config: None }, fec: LinkFec::None, speed: LinkSpeed::Speed100G, + autoneg: false, }, ); // interfaces @@ -252,15 +253,17 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { settings.bgp_peers.insert( "phy0".into(), BgpPeerConfig { - bgp_config: NameOrId::Name("as47".parse().unwrap()), //TODO - bgp_announce_set: NameOrId::Name("instances".parse().unwrap()), //TODO - interface_name: "phy0".to_string(), - addr: "1.2.3.4".parse().unwrap(), - hold_time: 6, - idle_hold_time: 6, - delay_open: 0, - connect_retry: 3, - keepalive: 2, + peers: vec![BgpPeer { + bgp_config: NameOrId::Name("as47".parse().unwrap()), + bgp_announce_set: NameOrId::Name("instances".parse().unwrap()), + interface_name: "phy0".to_string(), + addr: "1.2.3.4".parse().unwrap(), + hold_time: 6, + idle_hold_time: 6, + delay_open: 0, + connect_retry: 3, + keepalive: 2, + }], }, ); let _created: SwitchPortSettingsView = NexusRequest::objects_post( diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index a5f1f3f874..3303d38367 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -1354,6 +1354,18 @@ pub enum LinkFec { Rs, } +impl From for LinkFec { + fn from(x: omicron_common::api::internal::shared::PortFec) -> LinkFec { + match x { + omicron_common::api::internal::shared::PortFec::Firecode => { + Self::Firecode + } + omicron_common::api::internal::shared::PortFec::None => Self::None, + omicron_common::api::internal::shared::PortFec::Rs => Self::Rs, + } + } +} + /// The speed of a link. #[derive(Copy, Clone, Debug, Deserialize, Serialize, JsonSchema)] #[serde(rename_all = "snake_case")] @@ -1378,6 +1390,40 @@ pub enum LinkSpeed { Speed400G, } +impl From for LinkSpeed { + fn from(x: omicron_common::api::internal::shared::PortSpeed) -> Self { + match x { + omicron_common::api::internal::shared::PortSpeed::Speed0G => { + Self::Speed0G + } + omicron_common::api::internal::shared::PortSpeed::Speed1G => { + Self::Speed1G + } + omicron_common::api::internal::shared::PortSpeed::Speed10G => { + Self::Speed10G + } + omicron_common::api::internal::shared::PortSpeed::Speed25G => { + Self::Speed25G + } + omicron_common::api::internal::shared::PortSpeed::Speed40G => { + Self::Speed40G + } + omicron_common::api::internal::shared::PortSpeed::Speed50G => { + Self::Speed50G + } + omicron_common::api::internal::shared::PortSpeed::Speed100G => { + Self::Speed100G + } + omicron_common::api::internal::shared::PortSpeed::Speed200G => { + Self::Speed200G + } + omicron_common::api::internal::shared::PortSpeed::Speed400G => { + Self::Speed400G + } + } + } +} + /// Switch link configuration. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] pub struct LinkConfig { @@ -1392,6 +1438,9 @@ pub struct LinkConfig { /// The speed of the link. pub speed: LinkSpeed, + + /// Whether or not to set autonegotiation + pub autoneg: bool, } /// The LLDP configuration associated with a port. LLDP may be either enabled or @@ -1479,12 +1528,17 @@ pub struct BgpConfigListSelector { pub name_or_id: Option, } +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct BgpPeerConfig { + pub peers: Vec, +} + /// A BGP peer configuration for an interface. Includes the set of announcements /// that will be advertised to the peer identified by `addr`. The `bgp_config` /// parameter is a reference to global BGP parameters. The `interface_name` /// indicates what interface the peer should be contacted on. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct BgpPeerConfig { +pub struct BgpPeer { /// The set of announcements advertised by the peer. pub bgp_announce_set: NameOrId, diff --git a/openapi/bootstrap-agent.json b/openapi/bootstrap-agent.json index 2c7ffbc337..efd9c05fa9 100644 --- a/openapi/bootstrap-agent.json +++ b/openapi/bootstrap-agent.json @@ -510,6 +510,10 @@ "$ref": "#/components/schemas/IpNetwork" } }, + "autoneg": { + "description": "Whether or not to set autonegotiation", + "type": "boolean" + }, "bgp_peers": { "description": "BGP peers on this port", "type": "array", @@ -555,6 +559,7 @@ }, "required": [ "addresses", + "autoneg", "bgp_peers", "port", "routes", diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index e0580e7c13..82c799b78d 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -4240,6 +4240,10 @@ "$ref": "#/components/schemas/IpNetwork" } }, + "autoneg": { + "description": "Whether or not to set autonegotiation", + "type": "boolean" + }, "bgp_peers": { "description": "BGP peers on this port", "type": "array", @@ -4285,6 +4289,7 @@ }, "required": [ "addresses", + "autoneg", "bgp_peers", "port", "routes", diff --git a/openapi/nexus.json b/openapi/nexus.json index 08e6cd7149..15e75f93ff 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -7865,7 +7865,7 @@ "switch" ] }, - "BgpPeerConfig": { + "BgpPeer": { "description": "A BGP peer configuration for an interface. Includes the set of announcements that will be advertised to the peer identified by `addr`. The `bgp_config` parameter is a reference to global BGP parameters. The `interface_name` indicates what interface the peer should be contacted on.", "type": "object", "properties": { @@ -7937,6 +7937,20 @@ "keepalive" ] }, + "BgpPeerConfig": { + "type": "object", + "properties": { + "peers": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BgpPeer" + } + } + }, + "required": [ + "peers" + ] + }, "BgpPeerState": { "description": "The current state of a BGP peer.", "oneOf": [ @@ -11938,6 +11952,10 @@ "description": "Switch link configuration.", "type": "object", "properties": { + "autoneg": { + "description": "Whether or not to set autonegotiation", + "type": "boolean" + }, "fec": { "description": "The forward error correction mode of the link.", "allOf": [ @@ -11970,6 +11988,7 @@ } }, "required": [ + "autoneg", "fec", "lldp", "mtu", diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index ed202ddbdb..22216b9571 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -5037,6 +5037,10 @@ "$ref": "#/components/schemas/IpNetwork" } }, + "autoneg": { + "description": "Whether or not to set autonegotiation", + "type": "boolean" + }, "bgp_peers": { "description": "BGP peers on this port", "type": "array", @@ -5082,6 +5086,7 @@ }, "required": [ "addresses", + "autoneg", "bgp_peers", "port", "routes", diff --git a/openapi/wicketd.json b/openapi/wicketd.json index 60ad9a42df..32e3b70de2 100644 --- a/openapi/wicketd.json +++ b/openapi/wicketd.json @@ -1545,6 +1545,10 @@ "$ref": "#/components/schemas/IpNetwork" } }, + "autoneg": { + "description": "Whether or not to set autonegotiation", + "type": "boolean" + }, "bgp_peers": { "description": "BGP peers on this port", "type": "array", @@ -1590,6 +1594,7 @@ }, "required": [ "addresses", + "autoneg", "bgp_peers", "port", "routes", diff --git a/schema/crdb/16.0.0/up1.sql b/schema/crdb/16.0.0/up1.sql new file mode 100644 index 0000000000..d28d5ca4b5 --- /dev/null +++ b/schema/crdb/16.0.0/up1.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.switch_port_settings_link_config ADD COLUMN IF NOT EXISTS autoneg BOOL NOT NULL DEFAULT false; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 053bc0bcfb..8a34c09bc1 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3000,6 +3000,8 @@ CREATE TABLE IF NOT EXISTS omicron.public.db_metadata ( CHECK (singleton = true) ); +ALTER TABLE omicron.public.switch_port_settings_link_config ADD COLUMN IF NOT EXISTS autoneg BOOL NOT NULL DEFAULT false; + INSERT INTO omicron.public.db_metadata ( singleton, time_created, @@ -3007,7 +3009,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '15.0.0', NULL) + ( TRUE, NOW(), NOW(), '16.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/rss-sled-plan.json b/schema/rss-sled-plan.json index 2ce8ae3bdc..5086c38a9c 100644 --- a/schema/rss-sled-plan.json +++ b/schema/rss-sled-plan.json @@ -366,6 +366,7 @@ "type": "object", "required": [ "addresses", + "autoneg", "bgp_peers", "port", "routes", @@ -381,6 +382,10 @@ "$ref": "#/definitions/IpNetwork" } }, + "autoneg": { + "description": "Whether or not to set autonegotiation", + "type": "boolean" + }, "bgp_peers": { "description": "BGP peers on this port", "type": "array", diff --git a/sled-agent/src/bootstrap/early_networking.rs b/sled-agent/src/bootstrap/early_networking.rs index bec309dc27..cb411a2546 100644 --- a/sled-agent/src/bootstrap/early_networking.rs +++ b/sled-agent/src/bootstrap/early_networking.rs @@ -548,23 +548,20 @@ impl<'a> EarlyNetworkSetup<'a> { let mut addrs = Vec::new(); for a in &port_config.addresses { + // TODO We're discarding the `uplink_cidr.prefix()` here and only using + // the IP address; at some point we probably need to give the full CIDR + // to dendrite? addrs.push(a.ip()); } - // TODO We're discarding the `uplink_cidr.prefix()` here and only using - // the IP address; at some point we probably need to give the full CIDR - // to dendrite? let link_settings = LinkSettings { - // TODO Allow user to configure link properties - // https://github.com/oxidecomputer/omicron/issues/3061 params: LinkCreate { - autoneg: false, - kr: false, + autoneg: port_config.autoneg, + kr: false, //NOTE: kr does not apply to user configurable links. fec: convert_fec(&port_config.uplink_port_fec), speed: convert_speed(&port_config.uplink_port_speed), lane: Some(LinkId(0)), }, - //addrs: vec![addr], addrs, }; dpd_port_settings.links.insert(link_id.to_string(), link_settings); @@ -866,6 +863,7 @@ mod tests { port: uplink.uplink_port, uplink_port_speed: uplink.uplink_port_speed, uplink_port_fec: uplink.uplink_port_fec, + autoneg: false, bgp_peers: vec![], }], bgp: vec![], diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 7dcbfa7045..0b1eadf464 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -598,14 +598,9 @@ impl ServiceInner { .collect(), addresses: config.addresses.clone(), switch: config.switch.into(), - uplink_port_speed: config - .uplink_port_speed - .clone() - .into(), - uplink_port_fec: config - .uplink_port_fec - .clone() - .into(), + uplink_port_speed: config.uplink_port_speed.into(), + uplink_port_fec: config.uplink_port_fec.into(), + autoneg: config.autoneg, bgp_peers: config .bgp_peers .iter() diff --git a/smf/sled-agent/gimlet-standalone/config-rss.toml b/smf/sled-agent/gimlet-standalone/config-rss.toml index 29a7a79eba..f7a93260e3 100644 --- a/smf/sled-agent/gimlet-standalone/config-rss.toml +++ b/smf/sled-agent/gimlet-standalone/config-rss.toml @@ -110,6 +110,8 @@ port = "qsfp0" uplink_port_speed = "40G" # The forward error correction mode for this port. uplink_port_fec="none" +# Do not use autonegotiation +autoneg = false # Switch to use for the uplink. For single-rack deployments this can be # "switch0" (upper slot) or "switch1" (lower slot). For single-node softnpu # and dendrite stub environments, use "switch0" diff --git a/smf/sled-agent/non-gimlet/config-rss.toml b/smf/sled-agent/non-gimlet/config-rss.toml index fea3cfa5d8..fdc81c0f8f 100644 --- a/smf/sled-agent/non-gimlet/config-rss.toml +++ b/smf/sled-agent/non-gimlet/config-rss.toml @@ -109,7 +109,9 @@ port = "qsfp0" # The speed of this port. uplink_port_speed = "40G" # The forward error correction mode for this port. -uplink_port_fec="none" +uplink_port_fec = "none" +# Do not use autonegotiation +autoneg = false # Switch to use for the uplink. For single-rack deployments this can be # "switch0" (upper slot) or "switch1" (lower slot). For single-node softnpu # and dendrite stub environments, use "switch0" diff --git a/tools/generate-wicketd-api.sh b/tools/generate-wicketd-api.sh new file mode 100755 index 0000000000..f1af33aecc --- /dev/null +++ b/tools/generate-wicketd-api.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +./target/debug/wicketd openapi > openapi/wicketd.json diff --git a/wicket/src/cli/rack_setup/config_template.toml b/wicket/src/cli/rack_setup/config_template.toml index 617b61fadc..2886fa01d7 100644 --- a/wicket/src/cli/rack_setup/config_template.toml +++ b/wicket/src/cli/rack_setup/config_template.toml @@ -65,6 +65,9 @@ uplink_port_speed = "" # `none`, `firecode`, or `rs` uplink_port_fec = "" +# `true` or `false` +autoneg = "" + # A list of bgp peers # { addr = "1.7.0.1", asn = 47, port = "qsfp0" } bgp_peers = [] diff --git a/wicket/src/cli/rack_setup/config_toml.rs b/wicket/src/cli/rack_setup/config_toml.rs index 9b1a25a50e..5a8e8a560e 100644 --- a/wicket/src/cli/rack_setup/config_toml.rs +++ b/wicket/src/cli/rack_setup/config_toml.rs @@ -229,6 +229,12 @@ fn populate_network_table( ); _last_key = Some(property); } + uplink.insert( + "autoneg", + Item::Value(Value::Boolean(Formatted::new( + cfg.autoneg, + ))), + ); let mut routes = Array::new(); for r in &cfg.routes { @@ -449,6 +455,7 @@ mod tests { PortFec::None => InternalPortFec::None, PortFec::Rs => InternalPortFec::Rs, }, + autoneg: config.autoneg, switch: match config.switch { SwitchLocation::Switch0 => { InternalSwitchLocation::Switch0 @@ -529,6 +536,7 @@ mod tests { }], uplink_port_speed: PortSpeed::Speed400G, uplink_port_fec: PortFec::Firecode, + autoneg: true, port: "port0".into(), switch: SwitchLocation::Switch0, }], diff --git a/wicketd/src/preflight_check/uplink.rs b/wicketd/src/preflight_check/uplink.rs index d94baf1995..25411f17a5 100644 --- a/wicketd/src/preflight_check/uplink.rs +++ b/wicketd/src/preflight_check/uplink.rs @@ -775,10 +775,8 @@ fn build_port_settings( LinkSettings { addrs, params: LinkCreate { - // TODO we should take these parameters too - // https://github.com/oxidecomputer/omicron/issues/3061 - autoneg: false, - kr: false, + autoneg: uplink.autoneg, + kr: false, //NOTE: kr does not apply to user configurable links fec, speed, lane: Some(LinkId(0)), diff --git a/wicketd/src/rss_config.rs b/wicketd/src/rss_config.rs index 0aaea427f3..f654597d81 100644 --- a/wicketd/src/rss_config.rs +++ b/wicketd/src/rss_config.rs @@ -548,6 +548,7 @@ fn validate_rack_network_config( PortFec::None => BaPortFec::None, PortFec::Rs => BaPortFec::Rs, }, + autoneg: config.autoneg, }) .collect(), bgp: config From a04f5e387d74ef445db9c217f54cead1f5668cf1 Mon Sep 17 00:00:00 2001 From: Ryan Goodfellow Date: Wed, 29 Nov 2023 18:23:01 -0800 Subject: [PATCH 06/10] fix schema collision (#4580) --- nexus/db-model/src/schema.rs | 2 +- schema/crdb/{15.0.0 => 16.0.0}/up01.sql | 0 schema/crdb/{15.0.0 => 16.0.0}/up02.sql | 0 schema/crdb/{16.0.0 => 17.0.0}/up1.sql | 0 schema/crdb/dbinit.sql | 2 +- 5 files changed, 2 insertions(+), 2 deletions(-) rename schema/crdb/{15.0.0 => 16.0.0}/up01.sql (100%) rename schema/crdb/{15.0.0 => 16.0.0}/up02.sql (100%) rename schema/crdb/{16.0.0 => 17.0.0}/up1.sql (100%) diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 7d4ae241aa..be345032ac 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1301,7 +1301,7 @@ table! { /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(16, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(17, 0, 0); allow_tables_to_appear_in_same_query!( system_update, diff --git a/schema/crdb/15.0.0/up01.sql b/schema/crdb/16.0.0/up01.sql similarity index 100% rename from schema/crdb/15.0.0/up01.sql rename to schema/crdb/16.0.0/up01.sql diff --git a/schema/crdb/15.0.0/up02.sql b/schema/crdb/16.0.0/up02.sql similarity index 100% rename from schema/crdb/15.0.0/up02.sql rename to schema/crdb/16.0.0/up02.sql diff --git a/schema/crdb/16.0.0/up1.sql b/schema/crdb/17.0.0/up1.sql similarity index 100% rename from schema/crdb/16.0.0/up1.sql rename to schema/crdb/17.0.0/up1.sql diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 8a34c09bc1..f4caa2a4e6 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3009,7 +3009,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '16.0.0', NULL) + ( TRUE, NOW(), NOW(), '17.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; From 25fd21b0de9b0b7eb773019bd446eea33a33d691 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 30 Nov 2023 05:21:07 +0000 Subject: [PATCH 07/10] Update taiki-e/install-action digest to 6b385b7 (#4583) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`f7c663c` -> `6b385b7`](https://togithub.com/taiki-e/install-action/compare/f7c663c...6b385b7) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. â™» **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 1805da8ad8..afc56f40ca 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@f7c663c03b51ed0d93e9cec22a575d3f02175989 # v2 + uses: taiki-e/install-action@6b385b7509c65e9d1b7d6b72244f7e275a7f5cef # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From 9023e15bf7c407ce8e1e350de2a2e8cde4a1ac3e Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Thu, 30 Nov 2023 08:53:36 -0800 Subject: [PATCH 08/10] [nexus] Add `HostPhase1Updater` (#4548) Continues the work from #4427 and #4502 adding types to Nexus that can deliver updates to components managed by the SP, and contains a similar amount of duplication from those two. Non-duplication changes are mostly refactoring: * Removed component-specific error types in favor of `SpComponentUpdateError` * Extracted the "start the update and poll MGS until it's done" logic into a new `common_sp_update` module The `test_host_phase1_updater_delivers_progress` is subtly different from the RoT/SP versions of the same test, but not in a way that's particularly interesting (see the "Unlike the SP and RoT cases" comment for details). --- nexus/src/app/test_interfaces.rs | 3 +- nexus/src/app/update/common_sp_update.rs | 239 +++++++ nexus/src/app/update/host_phase1_updater.rs | 177 ++++++ nexus/src/app/update/mgs_clients.rs | 146 ----- nexus/src/app/update/mod.rs | 10 +- nexus/src/app/update/rot_updater.rs | 175 ++---- nexus/src/app/update/sp_updater.rs | 166 ++--- .../integration_tests/host_phase1_updater.rs | 584 ++++++++++++++++++ nexus/tests/integration_tests/mod.rs | 1 + nexus/tests/integration_tests/rot_updater.rs | 46 +- nexus/tests/integration_tests/sp_updater.rs | 46 +- sp-sim/src/gimlet.rs | 18 +- sp-sim/src/lib.rs | 6 + sp-sim/src/sidecar.rs | 8 + sp-sim/src/update.rs | 44 +- 15 files changed, 1226 insertions(+), 443 deletions(-) create mode 100644 nexus/src/app/update/common_sp_update.rs create mode 100644 nexus/src/app/update/host_phase1_updater.rs create mode 100644 nexus/tests/integration_tests/host_phase1_updater.rs diff --git a/nexus/src/app/test_interfaces.rs b/nexus/src/app/test_interfaces.rs index 6161a9a1c1..581b9a89bb 100644 --- a/nexus/src/app/test_interfaces.rs +++ b/nexus/src/app/test_interfaces.rs @@ -10,10 +10,9 @@ use sled_agent_client::Client as SledAgentClient; use std::sync::Arc; use uuid::Uuid; +pub use super::update::HostPhase1Updater; pub use super::update::MgsClients; -pub use super::update::RotUpdateError; pub use super::update::RotUpdater; -pub use super::update::SpUpdateError; pub use super::update::SpUpdater; pub use super::update::UpdateProgress; pub use gateway_client::types::SpType; diff --git a/nexus/src/app/update/common_sp_update.rs b/nexus/src/app/update/common_sp_update.rs new file mode 100644 index 0000000000..69a5b132a2 --- /dev/null +++ b/nexus/src/app/update/common_sp_update.rs @@ -0,0 +1,239 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Module containing implementation details shared amongst all MGS-to-SP-driven +//! updates. + +use super::MgsClients; +use super::UpdateProgress; +use gateway_client::types::SpType; +use gateway_client::types::SpUpdateStatus; +use slog::Logger; +use std::time::Duration; +use tokio::sync::watch; +use uuid::Uuid; + +type GatewayClientError = gateway_client::Error; + +/// Error type returned when an update to a component managed by the SP fails. +/// +/// Note that the SP manages itself, as well, so "SP component" here includes +/// the SP. +#[derive(Debug, thiserror::Error)] +pub enum SpComponentUpdateError { + #[error("error communicating with MGS")] + MgsCommunication(#[from] GatewayClientError), + #[error("different update is now preparing ({0})")] + DifferentUpdatePreparing(Uuid), + #[error("different update is now in progress ({0})")] + DifferentUpdateInProgress(Uuid), + #[error("different update is now complete ({0})")] + DifferentUpdateComplete(Uuid), + #[error("different update is now aborted ({0})")] + DifferentUpdateAborted(Uuid), + #[error("different update failed ({0})")] + DifferentUpdateFailed(Uuid), + #[error("update status lost (did the SP reset?)")] + UpdateStatusLost, + #[error("update was aborted")] + UpdateAborted, + #[error("update failed (error code {0})")] + UpdateFailedWithCode(u32), + #[error("update failed (error message {0})")] + UpdateFailedWithMessage(String), +} + +pub(super) trait SpComponentUpdater { + /// The target component. + /// + /// Should be produced via `SpComponent::const_as_str()`. + fn component(&self) -> &'static str; + + /// The type of the target SP. + fn target_sp_type(&self) -> SpType; + + /// The slot number of the target SP. + fn target_sp_slot(&self) -> u32; + + /// The target firmware slot for the component. + fn firmware_slot(&self) -> u16; + + /// The ID of this update. + fn update_id(&self) -> Uuid; + + /// The update payload data to send to MGS. + // TODO-performance This has to be convertible into a `reqwest::Body`, so we + // return an owned Vec. That requires all our implementors to clone the data + // at least once; maybe we should use `Bytes` instead (which is cheap to + // clone and also convertible into a reqwest::Body)? + fn update_data(&self) -> Vec; + + /// The sending half of the watch channel to report update progress. + fn progress(&self) -> &watch::Sender>; + + /// Logger to use while performing this update. + fn logger(&self) -> &Logger; +} + +pub(super) async fn deliver_update( + updater: &(dyn SpComponentUpdater + Send + Sync), + mgs_clients: &mut MgsClients, +) -> Result<(), SpComponentUpdateError> { + // How frequently do we poll MGS for the update progress? + const STATUS_POLL_INTERVAL: Duration = Duration::from_secs(3); + + // Start the update. + mgs_clients + .try_all_serially(updater.logger(), |client| async move { + client + .sp_component_update( + updater.target_sp_type(), + updater.target_sp_slot(), + updater.component(), + updater.firmware_slot(), + &updater.update_id(), + reqwest::Body::from(updater.update_data()), + ) + .await?; + updater.progress().send_replace(Some(UpdateProgress::Started)); + info!( + updater.logger(), "update started"; + "mgs_addr" => client.baseurl(), + ); + Ok(()) + }) + .await?; + + // Wait for the update to complete. + loop { + let status = mgs_clients + .try_all_serially(updater.logger(), |client| async move { + let update_status = client + .sp_component_update_status( + updater.target_sp_type(), + updater.target_sp_slot(), + updater.component(), + ) + .await?; + + debug!( + updater.logger(), "got update status"; + "mgs_addr" => client.baseurl(), + "status" => ?update_status, + ); + + Ok(update_status) + }) + .await?; + + if status_is_complete( + status.into_inner(), + updater.update_id(), + updater.progress(), + updater.logger(), + )? { + updater.progress().send_replace(Some(UpdateProgress::InProgress { + progress: Some(1.0), + })); + return Ok(()); + } + + tokio::time::sleep(STATUS_POLL_INTERVAL).await; + } +} + +fn status_is_complete( + status: SpUpdateStatus, + update_id: Uuid, + progress_tx: &watch::Sender>, + log: &Logger, +) -> Result { + match status { + // For `Preparing` and `InProgress`, we could check the progress + // information returned by these steps and try to check that + // we're still _making_ progress, but every Nexus instance needs + // to do that anyway in case we (or the MGS instance delivering + // the update) crash, so we'll omit that check here. Instead, we + // just sleep and we'll poll again shortly. + SpUpdateStatus::Preparing { id, progress } => { + if id == update_id { + let progress = progress.and_then(|progress| { + if progress.current > progress.total { + warn!( + log, "nonsense preparing progress"; + "current" => progress.current, + "total" => progress.total, + ); + None + } else if progress.total == 0 { + None + } else { + Some( + f64::from(progress.current) + / f64::from(progress.total), + ) + } + }); + progress_tx + .send_replace(Some(UpdateProgress::Preparing { progress })); + Ok(false) + } else { + Err(SpComponentUpdateError::DifferentUpdatePreparing(id)) + } + } + SpUpdateStatus::InProgress { id, bytes_received, total_bytes } => { + if id == update_id { + let progress = if bytes_received > total_bytes { + warn!( + log, "nonsense update progress"; + "bytes_received" => bytes_received, + "total_bytes" => total_bytes, + ); + None + } else if total_bytes == 0 { + None + } else { + Some(f64::from(bytes_received) / f64::from(total_bytes)) + }; + progress_tx.send_replace(Some(UpdateProgress::InProgress { + progress, + })); + Ok(false) + } else { + Err(SpComponentUpdateError::DifferentUpdateInProgress(id)) + } + } + SpUpdateStatus::Complete { id } => { + if id == update_id { + Ok(true) + } else { + Err(SpComponentUpdateError::DifferentUpdateComplete(id)) + } + } + SpUpdateStatus::None => Err(SpComponentUpdateError::UpdateStatusLost), + SpUpdateStatus::Aborted { id } => { + if id == update_id { + Err(SpComponentUpdateError::UpdateAborted) + } else { + Err(SpComponentUpdateError::DifferentUpdateAborted(id)) + } + } + SpUpdateStatus::Failed { code, id } => { + if id == update_id { + Err(SpComponentUpdateError::UpdateFailedWithCode(code)) + } else { + Err(SpComponentUpdateError::DifferentUpdateFailed(id)) + } + } + SpUpdateStatus::RotError { id, message } => { + if id == update_id { + Err(SpComponentUpdateError::UpdateFailedWithMessage(format!( + "rot error: {message}" + ))) + } else { + Err(SpComponentUpdateError::DifferentUpdateFailed(id)) + } + } + } +} diff --git a/nexus/src/app/update/host_phase1_updater.rs b/nexus/src/app/update/host_phase1_updater.rs new file mode 100644 index 0000000000..fb013d0ffe --- /dev/null +++ b/nexus/src/app/update/host_phase1_updater.rs @@ -0,0 +1,177 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Module containing types for updating host OS phase1 images via MGS. + +use super::common_sp_update::deliver_update; +use super::common_sp_update::SpComponentUpdater; +use super::MgsClients; +use super::SpComponentUpdateError; +use super::UpdateProgress; +use gateway_client::types::SpComponentFirmwareSlot; +use gateway_client::types::SpType; +use gateway_client::SpComponent; +use slog::Logger; +use tokio::sync::watch; +use uuid::Uuid; + +type GatewayClientError = gateway_client::Error; + +pub struct HostPhase1Updater { + log: Logger, + progress: watch::Sender>, + sp_type: SpType, + sp_slot: u32, + target_host_slot: u16, + update_id: Uuid, + // TODO-clarity maybe a newtype for this? TBD how we get this from + // wherever it's stored, which might give us a stronger type already. + phase1_data: Vec, +} + +impl HostPhase1Updater { + pub fn new( + sp_type: SpType, + sp_slot: u32, + target_host_slot: u16, + update_id: Uuid, + phase1_data: Vec, + log: &Logger, + ) -> Self { + let log = log.new(slog::o!( + "component" => "HostPhase1Updater", + "sp_type" => format!("{sp_type:?}"), + "sp_slot" => sp_slot, + "target_host_slot" => target_host_slot, + "update_id" => format!("{update_id}"), + )); + let progress = watch::Sender::new(None); + Self { + log, + progress, + sp_type, + sp_slot, + target_host_slot, + update_id, + phase1_data, + } + } + + pub fn progress_watcher(&self) -> watch::Receiver> { + self.progress.subscribe() + } + + /// Drive this host phase 1 update to completion (or failure). + /// + /// Only one MGS instance is required to drive an update; however, if + /// multiple MGS instances are available and passed to this method and an + /// error occurs communicating with one instance, `HostPhase1Updater` will + /// try the remaining instances before failing. + pub async fn update( + mut self, + mgs_clients: &mut MgsClients, + ) -> Result<(), SpComponentUpdateError> { + // The async block below wants a `&self` reference, but we take `self` + // for API clarity (to start a new update, the caller should construct a + // new instance of the updater). Create a `&self` ref that we use + // through the remainder of this method. + let me = &self; + + // Prior to delivering the update, ensure the correct target slot is + // activated. + // + // TODO-correctness Should we be doing this, or should a higher level + // executor set this up before calling us? + mgs_clients + .try_all_serially(&self.log, |client| async move { + me.mark_target_slot_active(&client).await + }) + .await?; + + // Deliver and drive the update to completion + deliver_update(&mut self, mgs_clients).await?; + + // Unlike SP and RoT updates, we have nothing to do after delivery of + // the update completes; signal to any watchers that we're done. + self.progress.send_replace(Some(UpdateProgress::Complete)); + + // wait for any progress watchers to be dropped before we return; + // otherwise, they'll get `RecvError`s when trying to check the current + // status + self.progress.closed().await; + + Ok(()) + } + + async fn mark_target_slot_active( + &self, + client: &gateway_client::Client, + ) -> Result<(), GatewayClientError> { + // TODO-correctness Should we always persist this choice? + let persist = true; + + let slot = self.firmware_slot(); + + // TODO-correctness Until + // https://github.com/oxidecomputer/hubris/issues/1172 is fixed, the + // host must be in A2 for this operation to succeed. After it is fixed, + // there will still be a window while a host is booting where this + // operation can fail. How do we handle this? + client + .sp_component_active_slot_set( + self.sp_type, + self.sp_slot, + self.component(), + persist, + &SpComponentFirmwareSlot { slot }, + ) + .await?; + + // TODO-correctness Should we send some kind of update to + // `self.progress`? We haven't actually started delivering an update + // yet, but it seems weird to give no indication that we have + // successfully (potentially) modified the state of the target sled. + + info!( + self.log, "host phase1 target slot marked active"; + "mgs_addr" => client.baseurl(), + ); + + Ok(()) + } +} + +impl SpComponentUpdater for HostPhase1Updater { + fn component(&self) -> &'static str { + SpComponent::HOST_CPU_BOOT_FLASH.const_as_str() + } + + fn target_sp_type(&self) -> SpType { + self.sp_type + } + + fn target_sp_slot(&self) -> u32 { + self.sp_slot + } + + fn firmware_slot(&self) -> u16 { + self.target_host_slot + } + + fn update_id(&self) -> Uuid { + self.update_id + } + + fn update_data(&self) -> Vec { + self.phase1_data.clone() + } + + fn progress(&self) -> &watch::Sender> { + &self.progress + } + + fn logger(&self) -> &Logger { + &self.log + } +} diff --git a/nexus/src/app/update/mgs_clients.rs b/nexus/src/app/update/mgs_clients.rs index 5915505829..4b200a1819 100644 --- a/nexus/src/app/update/mgs_clients.rs +++ b/nexus/src/app/update/mgs_clients.rs @@ -5,53 +5,14 @@ //! Module providing support for handling failover between multiple MGS clients use futures::Future; -use gateway_client::types::SpType; -use gateway_client::types::SpUpdateStatus; use gateway_client::Client; use slog::Logger; use std::collections::VecDeque; use std::sync::Arc; -use uuid::Uuid; pub(super) type GatewayClientError = gateway_client::Error; -pub(super) enum PollUpdateStatus { - Preparing { progress: Option }, - InProgress { progress: Option }, - Complete, -} - -#[derive(Debug, thiserror::Error)] -pub enum UpdateStatusError { - #[error("different update is now preparing ({0})")] - DifferentUpdatePreparing(Uuid), - #[error("different update is now in progress ({0})")] - DifferentUpdateInProgress(Uuid), - #[error("different update is now complete ({0})")] - DifferentUpdateComplete(Uuid), - #[error("different update is now aborted ({0})")] - DifferentUpdateAborted(Uuid), - #[error("different update failed ({0})")] - DifferentUpdateFailed(Uuid), - #[error("update status lost (did the SP reset?)")] - UpdateStatusLost, - #[error("update was aborted")] - UpdateAborted, - #[error("update failed (error code {0})")] - UpdateFailedWithCode(u32), - #[error("update failed (error message {0})")] - UpdateFailedWithMessage(String), -} - -#[derive(Debug, thiserror::Error)] -pub(super) enum PollUpdateStatusError { - #[error(transparent)] - StatusError(#[from] UpdateStatusError), - #[error(transparent)] - ClientError(#[from] GatewayClientError), -} - #[derive(Debug, Clone)] pub struct MgsClients { clients: VecDeque>, @@ -130,111 +91,4 @@ impl MgsClients { // errors. Return the error from the last MGS we tried. Err(GatewayClientError::CommunicationError(last_err.unwrap())) } - - /// Poll for the status of an expected-to-be-in-progress update. - pub(super) async fn poll_update_status( - &mut self, - sp_type: SpType, - sp_slot: u32, - component: &'static str, - update_id: Uuid, - log: &Logger, - ) -> Result { - let update_status = self - .try_all_serially(log, |client| async move { - let update_status = client - .sp_component_update_status(sp_type, sp_slot, component) - .await?; - - debug!( - log, "got update status"; - "mgs_addr" => client.baseurl(), - "status" => ?update_status, - ); - - Ok(update_status) - }) - .await? - .into_inner(); - - match update_status { - SpUpdateStatus::Preparing { id, progress } => { - if id == update_id { - let progress = progress.and_then(|progress| { - if progress.current > progress.total { - warn!( - log, "nonsense preparing progress"; - "current" => progress.current, - "total" => progress.total, - ); - None - } else if progress.total == 0 { - None - } else { - Some( - f64::from(progress.current) - / f64::from(progress.total), - ) - } - }); - Ok(PollUpdateStatus::Preparing { progress }) - } else { - Err(UpdateStatusError::DifferentUpdatePreparing(id).into()) - } - } - SpUpdateStatus::InProgress { id, bytes_received, total_bytes } => { - if id == update_id { - let progress = if bytes_received > total_bytes { - warn!( - log, "nonsense update progress"; - "bytes_received" => bytes_received, - "total_bytes" => total_bytes, - ); - None - } else if total_bytes == 0 { - None - } else { - Some(f64::from(bytes_received) / f64::from(total_bytes)) - }; - Ok(PollUpdateStatus::InProgress { progress }) - } else { - Err(UpdateStatusError::DifferentUpdateInProgress(id).into()) - } - } - SpUpdateStatus::Complete { id } => { - if id == update_id { - Ok(PollUpdateStatus::Complete) - } else { - Err(UpdateStatusError::DifferentUpdateComplete(id).into()) - } - } - SpUpdateStatus::None => { - Err(UpdateStatusError::UpdateStatusLost.into()) - } - SpUpdateStatus::Aborted { id } => { - if id == update_id { - Err(UpdateStatusError::UpdateAborted.into()) - } else { - Err(UpdateStatusError::DifferentUpdateAborted(id).into()) - } - } - SpUpdateStatus::Failed { code, id } => { - if id == update_id { - Err(UpdateStatusError::UpdateFailedWithCode(code).into()) - } else { - Err(UpdateStatusError::DifferentUpdateFailed(id).into()) - } - } - SpUpdateStatus::RotError { id, message } => { - if id == update_id { - Err(UpdateStatusError::UpdateFailedWithMessage(format!( - "rot error: {message}" - )) - .into()) - } else { - Err(UpdateStatusError::DifferentUpdateFailed(id).into()) - } - } - } - } } diff --git a/nexus/src/app/update/mod.rs b/nexus/src/app/update/mod.rs index 7d5c642822..5075e421ae 100644 --- a/nexus/src/app/update/mod.rs +++ b/nexus/src/app/update/mod.rs @@ -26,13 +26,17 @@ use std::path::Path; use tokio::io::AsyncWriteExt; use uuid::Uuid; +mod common_sp_update; +mod host_phase1_updater; mod mgs_clients; mod rot_updater; mod sp_updater; -pub use mgs_clients::{MgsClients, UpdateStatusError}; -pub use rot_updater::{RotUpdateError, RotUpdater}; -pub use sp_updater::{SpUpdateError, SpUpdater}; +pub use common_sp_update::SpComponentUpdateError; +pub use host_phase1_updater::HostPhase1Updater; +pub use mgs_clients::MgsClients; +pub use rot_updater::RotUpdater; +pub use sp_updater::SpUpdater; #[derive(Debug, PartialEq, Clone)] pub enum UpdateProgress { diff --git a/nexus/src/app/update/rot_updater.rs b/nexus/src/app/update/rot_updater.rs index d7d21e3b3a..12126a7de9 100644 --- a/nexus/src/app/update/rot_updater.rs +++ b/nexus/src/app/update/rot_updater.rs @@ -4,40 +4,21 @@ //! Module containing types for updating RoTs via MGS. -use super::mgs_clients::PollUpdateStatusError; +use super::common_sp_update::deliver_update; +use super::common_sp_update::SpComponentUpdater; use super::MgsClients; +use super::SpComponentUpdateError; use super::UpdateProgress; -use super::UpdateStatusError; -use crate::app::update::mgs_clients::PollUpdateStatus; use gateway_client::types::RotSlot; use gateway_client::types::SpComponentFirmwareSlot; use gateway_client::types::SpType; use gateway_client::SpComponent; use slog::Logger; -use std::time::Duration; use tokio::sync::watch; use uuid::Uuid; type GatewayClientError = gateway_client::Error; -#[derive(Debug, thiserror::Error)] -pub enum RotUpdateError { - #[error("error communicating with MGS")] - MgsCommunication(#[from] GatewayClientError), - - #[error("failed checking update status: {0}")] - PollUpdateStatus(#[from] UpdateStatusError), -} - -impl From for RotUpdateError { - fn from(err: PollUpdateStatusError) -> Self { - match err { - PollUpdateStatusError::StatusError(err) => err.into(), - PollUpdateStatusError::ClientError(err) => err.into(), - } - } -} - pub struct RotUpdater { log: Logger, progress: watch::Sender>, @@ -89,9 +70,14 @@ impl RotUpdater { /// error occurs communicating with one instance, `RotUpdater` will try the /// remaining instances before failing. pub async fn update( - self, - mut mgs_clients: MgsClients, - ) -> Result<(), RotUpdateError> { + mut self, + mgs_clients: &mut MgsClients, + ) -> Result<(), SpComponentUpdateError> { + // Deliver and drive the update to "completion" (which isn't really + // complete for the RoT, since we still have to do the steps below after + // the delivery of the update completes). + deliver_update(&mut self, mgs_clients).await?; + // The async blocks below want `&self` references, but we take `self` // for API clarity (to start a new update, the caller should construct a // new updater). Create a `&self` ref that we use through the remainder @@ -100,23 +86,13 @@ impl RotUpdater { mgs_clients .try_all_serially(&self.log, |client| async move { - me.start_update_one_mgs(&client).await - }) - .await?; - - // `wait_for_update_completion` uses `try_all_mgs_clients` internally, - // so we don't wrap it here. - me.wait_for_update_completion(&mut mgs_clients).await?; - - mgs_clients - .try_all_serially(&self.log, |client| async move { - me.mark_target_slot_active_one_mgs(&client).await + me.mark_target_slot_active(&client).await }) .await?; mgs_clients .try_all_serially(&self.log, |client| async move { - me.finalize_update_via_reset_one_mgs(&client).await + me.finalize_update_via_reset(&client).await }) .await?; @@ -128,82 +104,7 @@ impl RotUpdater { Ok(()) } - async fn start_update_one_mgs( - &self, - client: &gateway_client::Client, - ) -> Result<(), GatewayClientError> { - let firmware_slot = self.target_rot_slot.as_u16(); - - // Start the update. - client - .sp_component_update( - self.sp_type, - self.sp_slot, - SpComponent::ROT.const_as_str(), - firmware_slot, - &self.update_id, - reqwest::Body::from(self.rot_hubris_archive.clone()), - ) - .await?; - - self.progress.send_replace(Some(UpdateProgress::Started)); - - info!( - self.log, "RoT update started"; - "mgs_addr" => client.baseurl(), - ); - - Ok(()) - } - - async fn wait_for_update_completion( - &self, - mgs_clients: &mut MgsClients, - ) -> Result<(), RotUpdateError> { - // How frequently do we poll MGS for the update progress? - const STATUS_POLL_INTERVAL: Duration = Duration::from_secs(3); - - loop { - let status = mgs_clients - .poll_update_status( - self.sp_type, - self.sp_slot, - SpComponent::ROT.const_as_str(), - self.update_id, - &self.log, - ) - .await?; - - // For `Preparing` and `InProgress`, we could check the progress - // information returned by these steps and try to check that - // we're still _making_ progress, but every Nexus instance needs - // to do that anyway in case we (or the MGS instance delivering - // the update) crash, so we'll omit that check here. Instead, we - // just sleep and we'll poll again shortly. - match status { - PollUpdateStatus::Preparing { progress } => { - self.progress.send_replace(Some( - UpdateProgress::Preparing { progress }, - )); - } - PollUpdateStatus::InProgress { progress } => { - self.progress.send_replace(Some( - UpdateProgress::InProgress { progress }, - )); - } - PollUpdateStatus::Complete => { - self.progress.send_replace(Some( - UpdateProgress::InProgress { progress: Some(1.0) }, - )); - return Ok(()); - } - } - - tokio::time::sleep(STATUS_POLL_INTERVAL).await; - } - } - - async fn mark_target_slot_active_one_mgs( + async fn mark_target_slot_active( &self, client: &gateway_client::Client, ) -> Result<(), GatewayClientError> { @@ -211,13 +112,13 @@ impl RotUpdater { // tell it to persist our choice. let persist = true; - let slot = self.target_rot_slot.as_u16(); + let slot = self.firmware_slot(); client .sp_component_active_slot_set( self.sp_type, self.sp_slot, - SpComponent::ROT.const_as_str(), + self.component(), persist, &SpComponentFirmwareSlot { slot }, ) @@ -236,16 +137,12 @@ impl RotUpdater { Ok(()) } - async fn finalize_update_via_reset_one_mgs( + async fn finalize_update_via_reset( &self, client: &gateway_client::Client, ) -> Result<(), GatewayClientError> { client - .sp_component_reset( - self.sp_type, - self.sp_slot, - SpComponent::ROT.const_as_str(), - ) + .sp_component_reset(self.sp_type, self.sp_slot, self.component()) .await?; self.progress.send_replace(Some(UpdateProgress::Complete)); @@ -258,15 +155,39 @@ impl RotUpdater { } } -trait RotSlotAsU16 { - fn as_u16(&self) -> u16; -} +impl SpComponentUpdater for RotUpdater { + fn component(&self) -> &'static str { + SpComponent::ROT.const_as_str() + } + + fn target_sp_type(&self) -> SpType { + self.sp_type + } -impl RotSlotAsU16 for RotSlot { - fn as_u16(&self) -> u16 { - match self { + fn target_sp_slot(&self) -> u32 { + self.sp_slot + } + + fn firmware_slot(&self) -> u16 { + match self.target_rot_slot { RotSlot::A => 0, RotSlot::B => 1, } } + + fn update_id(&self) -> Uuid { + self.update_id + } + + fn update_data(&self) -> Vec { + self.rot_hubris_archive.clone() + } + + fn progress(&self) -> &watch::Sender> { + &self.progress + } + + fn logger(&self) -> &Logger { + &self.log + } } diff --git a/nexus/src/app/update/sp_updater.rs b/nexus/src/app/update/sp_updater.rs index 419a733441..2a6ddc6de6 100644 --- a/nexus/src/app/update/sp_updater.rs +++ b/nexus/src/app/update/sp_updater.rs @@ -4,39 +4,19 @@ //! Module containing types for updating SPs via MGS. -use crate::app::update::mgs_clients::PollUpdateStatus; - -use super::mgs_clients::PollUpdateStatusError; +use super::common_sp_update::deliver_update; +use super::common_sp_update::SpComponentUpdater; use super::MgsClients; +use super::SpComponentUpdateError; use super::UpdateProgress; -use super::UpdateStatusError; use gateway_client::types::SpType; use gateway_client::SpComponent; use slog::Logger; -use std::time::Duration; use tokio::sync::watch; use uuid::Uuid; type GatewayClientError = gateway_client::Error; -#[derive(Debug, thiserror::Error)] -pub enum SpUpdateError { - #[error("error communicating with MGS")] - MgsCommunication(#[from] GatewayClientError), - - #[error("failed checking update status: {0}")] - PollUpdateStatus(#[from] UpdateStatusError), -} - -impl From for SpUpdateError { - fn from(err: PollUpdateStatusError) -> Self { - match err { - PollUpdateStatusError::StatusError(err) => err.into(), - PollUpdateStatusError::ClientError(err) => err.into(), - } - } -} - pub struct SpUpdater { log: Logger, progress: watch::Sender>, @@ -77,10 +57,15 @@ impl SpUpdater { /// error occurs communicating with one instance, `SpUpdater` will try the /// remaining instances before failing. pub async fn update( - self, - mut mgs_clients: MgsClients, - ) -> Result<(), SpUpdateError> { - // The async blocks below want `&self` references, but we take `self` + mut self, + mgs_clients: &mut MgsClients, + ) -> Result<(), SpComponentUpdateError> { + // Deliver and drive the update to "completion" (which isn't really + // complete for the SP, since we still have to reset it after the + // delivery of the update completes). + deliver_update(&mut self, mgs_clients).await?; + + // The async block below wants a `&self` reference, but we take `self` // for API clarity (to start a new SP update, the caller should // construct a new `SpUpdater`). Create a `&self` ref that we use // through the remainder of this method. @@ -88,17 +73,7 @@ impl SpUpdater { mgs_clients .try_all_serially(&self.log, |client| async move { - me.start_update_one_mgs(&client).await - }) - .await?; - - // `wait_for_update_completion` uses `try_all_mgs_clients` internally, - // so we don't wrap it here. - me.wait_for_update_completion(&mut mgs_clients).await?; - - mgs_clients - .try_all_serially(&self.log, |client| async move { - me.finalize_update_via_reset_one_mgs(&client).await + me.finalize_update_via_reset(&client).await }) .await?; @@ -110,102 +85,57 @@ impl SpUpdater { Ok(()) } - async fn start_update_one_mgs( + async fn finalize_update_via_reset( &self, client: &gateway_client::Client, ) -> Result<(), GatewayClientError> { - // The SP has two firmware slots, but they're aren't individually - // labled. We always request an update to slot 0, which means "the - // inactive slot". - let firmware_slot = 0; - - // Start the update. client - .sp_component_update( - self.sp_type, - self.sp_slot, - SpComponent::SP_ITSELF.const_as_str(), - firmware_slot, - &self.update_id, - reqwest::Body::from(self.sp_hubris_archive.clone()), - ) + .sp_component_reset(self.sp_type, self.sp_slot, self.component()) .await?; - self.progress.send_replace(Some(UpdateProgress::Started)); - + self.progress.send_replace(Some(UpdateProgress::Complete)); info!( - self.log, "SP update started"; + self.log, "SP update complete"; "mgs_addr" => client.baseurl(), ); Ok(()) } +} - async fn wait_for_update_completion( - &self, - mgs_clients: &mut MgsClients, - ) -> Result<(), SpUpdateError> { - // How frequently do we poll MGS for the update progress? - const STATUS_POLL_INTERVAL: Duration = Duration::from_secs(3); - - loop { - let status = mgs_clients - .poll_update_status( - self.sp_type, - self.sp_slot, - SpComponent::SP_ITSELF.const_as_str(), - self.update_id, - &self.log, - ) - .await?; - - // For `Preparing` and `InProgress`, we could check the progress - // information returned by these steps and try to check that - // we're still _making_ progress, but every Nexus instance needs - // to do that anyway in case we (or the MGS instance delivering - // the update) crash, so we'll omit that check here. Instead, we - // just sleep and we'll poll again shortly. - match status { - PollUpdateStatus::Preparing { progress } => { - self.progress.send_replace(Some( - UpdateProgress::Preparing { progress }, - )); - } - PollUpdateStatus::InProgress { progress } => { - self.progress.send_replace(Some( - UpdateProgress::InProgress { progress }, - )); - } - PollUpdateStatus::Complete => { - self.progress.send_replace(Some( - UpdateProgress::InProgress { progress: Some(1.0) }, - )); - return Ok(()); - } - } - - tokio::time::sleep(STATUS_POLL_INTERVAL).await; - } +impl SpComponentUpdater for SpUpdater { + fn component(&self) -> &'static str { + SpComponent::SP_ITSELF.const_as_str() } - async fn finalize_update_via_reset_one_mgs( - &self, - client: &gateway_client::Client, - ) -> Result<(), GatewayClientError> { - client - .sp_component_reset( - self.sp_type, - self.sp_slot, - SpComponent::SP_ITSELF.const_as_str(), - ) - .await?; + fn target_sp_type(&self) -> SpType { + self.sp_type + } - self.progress.send_replace(Some(UpdateProgress::Complete)); - info!( - self.log, "SP update complete"; - "mgs_addr" => client.baseurl(), - ); + fn target_sp_slot(&self) -> u32 { + self.sp_slot + } - Ok(()) + fn firmware_slot(&self) -> u16 { + // The SP has two firmware slots, but they're aren't individually + // labled. We always request an update to slot 0, which means "the + // inactive slot". + 0 + } + + fn update_id(&self) -> Uuid { + self.update_id + } + + fn update_data(&self) -> Vec { + self.sp_hubris_archive.clone() + } + + fn progress(&self) -> &watch::Sender> { + &self.progress + } + + fn logger(&self) -> &Logger { + &self.log } } diff --git a/nexus/tests/integration_tests/host_phase1_updater.rs b/nexus/tests/integration_tests/host_phase1_updater.rs new file mode 100644 index 0000000000..01d546636e --- /dev/null +++ b/nexus/tests/integration_tests/host_phase1_updater.rs @@ -0,0 +1,584 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Tests `HostPhase1Updater`'s delivery of updates to host phase 1 flash via +//! MGS to SP. + +use gateway_client::types::SpType; +use gateway_messages::{SpPort, UpdateInProgressStatus, UpdateStatus}; +use gateway_test_utils::setup as mgs_setup; +use omicron_nexus::app::test_interfaces::{ + HostPhase1Updater, MgsClients, UpdateProgress, +}; +use rand::RngCore; +use sp_sim::SimulatedSp; +use std::mem; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; +use std::time::Duration; +use tokio::io::AsyncWriteExt; +use tokio::net::TcpListener; +use tokio::net::TcpStream; +use tokio::sync::mpsc; +use uuid::Uuid; + +fn make_fake_host_phase1_image() -> Vec { + let mut image = vec![0; 128]; + rand::thread_rng().fill_bytes(&mut image); + image +} + +#[tokio::test] +async fn test_host_phase1_updater_updates_sled() { + // Start MGS + Sim SP. + let mgstestctx = mgs_setup::test_setup( + "test_host_phase1_updater_updates_sled", + SpPort::One, + ) + .await; + + // Configure an MGS client. + let mut mgs_clients = + MgsClients::from_clients([gateway_client::Client::new( + &mgstestctx.client.url("/").to_string(), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), + )]); + + for target_host_slot in [0, 1] { + // Configure and instantiate an `HostPhase1Updater`. + let sp_type = SpType::Sled; + let sp_slot = 0; + let update_id = Uuid::new_v4(); + let phase1_data = make_fake_host_phase1_image(); + + let host_phase1_updater = HostPhase1Updater::new( + sp_type, + sp_slot, + target_host_slot, + update_id, + phase1_data.clone(), + &mgstestctx.logctx.log, + ); + + // Run the update. + host_phase1_updater + .update(&mut mgs_clients) + .await + .expect("update failed"); + + // Ensure the SP received the complete update. + let last_update_image = mgstestctx.simrack.gimlets[sp_slot as usize] + .last_host_phase1_update_data(target_host_slot) + .await + .expect("simulated host phase1 did not receive an update"); + + assert_eq!( + phase1_data.as_slice(), + &*last_update_image, + "simulated host phase1 update contents (len {}) \ + do not match test generated fake image (len {})", + last_update_image.len(), + phase1_data.len(), + ); + } + + mgstestctx.teardown().await; +} + +#[tokio::test] +async fn test_host_phase1_updater_remembers_successful_mgs_instance() { + // Start MGS + Sim SP. + let mgstestctx = mgs_setup::test_setup( + "test_host_phase1_updater_remembers_successful_mgs_instance", + SpPort::One, + ) + .await; + + // Also start a local TCP server that we will claim is an MGS instance, but + // it will close connections immediately after accepting them. This will + // allow us to count how many connections it receives, while simultaneously + // causing errors in the HostPhase1Updater when it attempts to use this + // "MGS". + let (failing_mgs_task, failing_mgs_addr, failing_mgs_conn_counter) = { + let socket = TcpListener::bind("[::1]:0").await.unwrap(); + let addr = socket.local_addr().unwrap(); + let conn_count = Arc::new(AtomicUsize::new(0)); + + let task = { + let conn_count = Arc::clone(&conn_count); + tokio::spawn(async move { + loop { + let (mut stream, _peer) = socket.accept().await.unwrap(); + conn_count.fetch_add(1, Ordering::SeqCst); + stream.shutdown().await.unwrap(); + } + }) + }; + + (task, addr, conn_count) + }; + + // Order the MGS clients such that the bogus MGS that immediately closes + // connections comes first. `HostPhase1Updater` should remember that the + // second MGS instance succeeds, and only send subsequent requests to it: we + // should only see a single attempted connection to the bogus MGS, even + // though delivering an update requires a bare minimum of three requests + // (start the update, query the status, reset the SP) and often more (if + // repeated queries are required to wait for completion). + let mut mgs_clients = MgsClients::from_clients([ + gateway_client::Client::new( + &format!("http://{failing_mgs_addr}"), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient1")), + ), + gateway_client::Client::new( + &mgstestctx.client.url("/").to_string(), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), + ), + ]); + + let sp_type = SpType::Sled; + let sp_slot = 0; + let target_host_slot = 0; + let update_id = Uuid::new_v4(); + let phase1_data = make_fake_host_phase1_image(); + + let host_phase1_updater = HostPhase1Updater::new( + sp_type, + sp_slot, + target_host_slot, + update_id, + phase1_data.clone(), + &mgstestctx.logctx.log, + ); + + host_phase1_updater.update(&mut mgs_clients).await.expect("update failed"); + + let last_update_image = mgstestctx.simrack.gimlets[sp_slot as usize] + .last_host_phase1_update_data(target_host_slot) + .await + .expect("simulated host phase1 did not receive an update"); + + assert_eq!( + phase1_data.as_slice(), + &*last_update_image, + "simulated host phase1 update contents (len {}) \ + do not match test generated fake image (len {})", + last_update_image.len(), + phase1_data.len(), + ); + + // Check that our bogus MGS only received a single connection attempt. + // (After HostPhase1Updater failed to talk to this instance, it should have + // fallen back to the valid one for all further requests.) + assert_eq!( + failing_mgs_conn_counter.load(Ordering::SeqCst), + 1, + "bogus MGS instance didn't receive the expected number of connections" + ); + failing_mgs_task.abort(); + + mgstestctx.teardown().await; +} + +#[tokio::test] +async fn test_host_phase1_updater_switches_mgs_instances_on_failure() { + enum MgsProxy { + One(TcpStream), + Two(TcpStream), + } + + // Start MGS + Sim SP. + let mgstestctx = mgs_setup::test_setup( + "test_host_phase1_updater_switches_mgs_instances_on_failure", + SpPort::One, + ) + .await; + let mgs_bind_addr = mgstestctx.client.bind_address; + + let spawn_mgs_proxy_task = |mut stream: TcpStream| { + tokio::spawn(async move { + let mut mgs_stream = TcpStream::connect(mgs_bind_addr) + .await + .expect("failed to connect to MGS"); + tokio::io::copy_bidirectional(&mut stream, &mut mgs_stream) + .await + .expect("failed to proxy connection to MGS"); + }) + }; + + // Start two MGS proxy tasks; when each receives an incoming TCP connection, + // it forwards that `TcpStream` along the `mgs_proxy_connections` channel + // along with a tag of which proxy it is. We'll use this below to flip flop + // between MGS "instances" (really these two proxies). + let (mgs_proxy_connections_tx, mut mgs_proxy_connections_rx) = + mpsc::unbounded_channel(); + let (mgs_proxy_one_task, mgs_proxy_one_addr) = { + let socket = TcpListener::bind("[::1]:0").await.unwrap(); + let addr = socket.local_addr().unwrap(); + let mgs_proxy_connections_tx = mgs_proxy_connections_tx.clone(); + let task = tokio::spawn(async move { + loop { + let (stream, _peer) = socket.accept().await.unwrap(); + mgs_proxy_connections_tx.send(MgsProxy::One(stream)).unwrap(); + } + }); + (task, addr) + }; + let (mgs_proxy_two_task, mgs_proxy_two_addr) = { + let socket = TcpListener::bind("[::1]:0").await.unwrap(); + let addr = socket.local_addr().unwrap(); + let task = tokio::spawn(async move { + loop { + let (stream, _peer) = socket.accept().await.unwrap(); + mgs_proxy_connections_tx.send(MgsProxy::Two(stream)).unwrap(); + } + }); + (task, addr) + }; + + // Disable connection pooling so each request gets a new TCP connection. + let client = + reqwest::Client::builder().pool_max_idle_per_host(0).build().unwrap(); + + // Configure two MGS clients pointed at our two proxy tasks. + let mut mgs_clients = MgsClients::from_clients([ + gateway_client::Client::new_with_client( + &format!("http://{mgs_proxy_one_addr}"), + client.clone(), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient1")), + ), + gateway_client::Client::new_with_client( + &format!("http://{mgs_proxy_two_addr}"), + client, + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient2")), + ), + ]); + + let sp_type = SpType::Sled; + let sp_slot = 0; + let target_host_slot = 0; + let update_id = Uuid::new_v4(); + let phase1_data = make_fake_host_phase1_image(); + + let host_phase1_updater = HostPhase1Updater::new( + sp_type, + sp_slot, + target_host_slot, + update_id, + phase1_data.clone(), + &mgstestctx.logctx.log, + ); + + // Spawn the actual update task. + let mut update_task = tokio::spawn(async move { + host_phase1_updater.update(&mut mgs_clients).await + }); + + // Loop over incoming requests. We expect this sequence: + // + // 1. Connection arrives on the first proxy + // 2. We spawn a task to service that request, and set `should_swap` + // 3. Connection arrives on the first proxy + // 4. We drop that connection, flip `expected_proxy`, and clear + // `should_swap` + // 5. Connection arrives on the second proxy + // 6. We spawn a task to service that request, and set `should_swap` + // 7. Connection arrives on the second proxy + // 8. We drop that connection, flip `expected_proxy`, and clear + // `should_swap` + // + // ... repeat until the update is complete. + let mut expected_proxy = 0; + let mut proxy_one_count = 0; + let mut proxy_two_count = 0; + let mut total_requests_handled = 0; + let mut should_swap = false; + loop { + tokio::select! { + Some(proxy_stream) = mgs_proxy_connections_rx.recv() => { + let stream = match proxy_stream { + MgsProxy::One(stream) => { + assert_eq!(expected_proxy, 0); + proxy_one_count += 1; + stream + } + MgsProxy::Two(stream) => { + assert_eq!(expected_proxy, 1); + proxy_two_count += 1; + stream + } + }; + + // Should we trigger `HostPhase1Updater` to swap to the other + // MGS (proxy)? If so, do that by dropping this connection + // (which will cause a client failure) and note that we expect + // the next incoming request to come on the other proxy. + if should_swap { + mem::drop(stream); + expected_proxy ^= 1; + should_swap = false; + } else { + // Otherwise, handle this connection. + total_requests_handled += 1; + spawn_mgs_proxy_task(stream); + should_swap = true; + } + } + + result = &mut update_task => { + match result { + Ok(Ok(())) => { + mgs_proxy_one_task.abort(); + mgs_proxy_two_task.abort(); + break; + } + Ok(Err(err)) => panic!("update failed: {err}"), + Err(err) => panic!("update task panicked: {err}"), + } + } + } + } + + // A host flash update requires a minimum of 3 requests to MGS: set the + // active flash slot, post the update, and check the status. There may be + // more requests if the update is not yet complete when the status is + // checked, but we can just check that each of our proxies received at least + // 2 incoming requests; based on our outline above, if we got the minimum of + // 3 requests, it would look like this: + // + // 1. POST update -> first proxy (success) + // 2. GET status -> first proxy (fail) + // 3. GET status retry -> second proxy (success) + // 4. POST reset -> second proxy (fail) + // 5. POST reset -> first proxy (success) + // + // This pattern would repeat if multiple status requests were required, so + // we always expect the first proxy to see exactly one more connection + // attempt than the second (because it went first before they started + // swapping), and the two together should see a total of one less than + // double the number of successful requests required. + assert!(total_requests_handled >= 3); + assert_eq!(proxy_one_count, proxy_two_count + 1); + assert_eq!( + (proxy_one_count + proxy_two_count + 1) / 2, + total_requests_handled + ); + + let last_update_image = mgstestctx.simrack.gimlets[sp_slot as usize] + .last_host_phase1_update_data(target_host_slot) + .await + .expect("simulated host phase1 did not receive an update"); + + assert_eq!( + phase1_data.as_slice(), + &*last_update_image, + "simulated host phase1 update contents (len {}) \ + do not match test generated fake image (len {})", + last_update_image.len(), + phase1_data.len(), + ); + + mgstestctx.teardown().await; +} + +#[tokio::test] +async fn test_host_phase1_updater_delivers_progress() { + // Start MGS + Sim SP. + let mgstestctx = mgs_setup::test_setup( + "test_host_phase1_updater_delivers_progress", + SpPort::One, + ) + .await; + + // Configure an MGS client. + let mut mgs_clients = + MgsClients::from_clients([gateway_client::Client::new( + &mgstestctx.client.url("/").to_string(), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), + )]); + + let sp_type = SpType::Sled; + let sp_slot = 0; + let target_host_slot = 0; + let update_id = Uuid::new_v4(); + let phase1_data = make_fake_host_phase1_image(); + + let host_phase1_updater = HostPhase1Updater::new( + sp_type, + sp_slot, + target_host_slot, + update_id, + phase1_data.clone(), + &mgstestctx.logctx.log, + ); + + let phase1_data_len = phase1_data.len() as u32; + + // Subscribe to update progress, and check that there is no status yet; we + // haven't started the update. + let mut progress = host_phase1_updater.progress_watcher(); + assert_eq!(*progress.borrow_and_update(), None); + + // Install a semaphore on the requests our target SP will receive so we can + // inspect progress messages without racing. + let target_sp = &mgstestctx.simrack.gimlets[sp_slot as usize]; + let sp_accept_sema = target_sp.install_udp_accept_semaphore().await; + let mut sp_responses = target_sp.responses_sent_count().unwrap(); + + // Spawn the update on a background task so we can watch `progress` as it is + // applied. + let do_update_task = tokio::spawn(async move { + host_phase1_updater.update(&mut mgs_clients).await + }); + + // Allow the SP to respond to 2 messages: the message to activate the target + // flash slot and the "prepare update" messages that triggers the start of an + // update, then ensure we see the "started" progress. + sp_accept_sema.send(2).unwrap(); + progress.changed().await.unwrap(); + assert_eq!(*progress.borrow_and_update(), Some(UpdateProgress::Started)); + + // Ensure our simulated SP is in the state we expect: it's prepared for an + // update but has not yet received any data. + assert_eq!( + target_sp.current_update_status().await, + UpdateStatus::InProgress(UpdateInProgressStatus { + id: update_id.into(), + bytes_received: 0, + total_size: phase1_data_len, + }) + ); + + // Record the number of responses the SP has sent; we'll use + // `sp_responses.changed()` in the loop below, and want to mark whatever + // value this watch channel currently has as seen. + sp_responses.borrow_and_update(); + + // At this point, there are two clients racing each other to talk to our + // simulated SP: + // + // 1. MGS is trying to deliver the update + // 2. `host_phase1_updater` is trying to poll (via MGS) for update status + // + // and we want to ensure that we see any relevant progress reports from + // `host_phase1_updater`. We'll let one MGS -> SP message through at a time + // (waiting until our SP has responded by waiting for a change to + // `sp_responses`) then check its update state: if it changed, the packet we + // let through was data from MGS; otherwise, it was a status request from + // `host_phase1_updater`. + // + // This loop will continue until either: + // + // 1. We see an `UpdateStatus::InProgress` message indicating 100% delivery, + // at which point we break out of the loop + // 2. We time out waiting for the previous step (by timing out for either + // the SP to process a request or `host_phase1_updater` to realize + // there's been progress), at which point we panic and fail this test. + let mut prev_bytes_received = 0; + let mut expect_progress_change = false; + loop { + // Allow the SP to accept and respond to a single UDP packet. + sp_accept_sema.send(1).unwrap(); + + // Wait until the SP has sent a response, with a safety rail that we + // haven't screwed up our untangle-the-race logic: if we don't see the + // SP process any new messages after several seconds, our test is + // broken, so fail. + tokio::time::timeout(Duration::from_secs(10), sp_responses.changed()) + .await + .expect("timeout waiting for SP response count to change") + .expect("sp response count sender dropped"); + + // Inspec the SP's in-memory update state; we expect only `InProgress` + // or `Complete`, and in either case we note whether we expect to see + // status changes from `host_phase1_updater`. + match target_sp.current_update_status().await { + UpdateStatus::InProgress(sp_progress) => { + if sp_progress.bytes_received > prev_bytes_received { + prev_bytes_received = sp_progress.bytes_received; + expect_progress_change = true; + continue; + } + } + UpdateStatus::Complete(_) => { + if prev_bytes_received < phase1_data_len { + break; + } + } + status @ (UpdateStatus::None + | UpdateStatus::Preparing(_) + | UpdateStatus::SpUpdateAuxFlashChckScan { .. } + | UpdateStatus::Aborted(_) + | UpdateStatus::Failed { .. } + | UpdateStatus::RotError { .. }) => { + panic!("unexpected status {status:?}"); + } + } + + // If we get here, the most recent packet did _not_ change the SP's + // internal update state, so it was a status request from + // `host_phase1_updater`. If we expect the updater to see new progress, + // wait for that change here. + if expect_progress_change { + // Safety rail that we haven't screwed up our untangle-the-race + // logic: if we don't see a new progress after several seconds, our + // test is broken, so fail. + tokio::time::timeout(Duration::from_secs(10), progress.changed()) + .await + .expect("progress timeout") + .expect("progress watch sender dropped"); + let status = progress.borrow_and_update().clone().unwrap(); + expect_progress_change = false; + + assert!( + matches!(status, UpdateProgress::InProgress { .. }), + "unexpected progress status {status:?}" + ); + } + } + + // We know the SP has received a complete update, but `HostPhase1Updater` + // may still need to request status to realize that; release the socket + // semaphore so the SP can respond. + sp_accept_sema.send(usize::MAX).unwrap(); + + // Unlike the SP and RoT cases, there are no MGS/SP steps in between the + // update completing and `HostPhase1Updater` sending + // `UpdateProgress::Complete`. Therefore, it's a race whether we'll see + // some number of `InProgress` status before `Complete`, but we should + // quickly move to `Complete`. + loop { + tokio::time::timeout(Duration::from_secs(10), progress.changed()) + .await + .expect("progress timeout") + .expect("progress watch sender dropped"); + let status = progress.borrow_and_update().clone().unwrap(); + match status { + UpdateProgress::Complete => break, + UpdateProgress::InProgress { .. } => continue, + _ => panic!("unexpected progress status {status:?}"), + } + } + + // drop our progress receiver so `do_update_task` can complete + mem::drop(progress); + + do_update_task.await.expect("update task panicked").expect("update failed"); + + let last_update_image = target_sp + .last_host_phase1_update_data(target_host_slot) + .await + .expect("simulated host phase1 did not receive an update"); + + assert_eq!( + phase1_data.as_slice(), + &*last_update_image, + "simulated host phase1 update contents (len {}) \ + do not match test generated fake image (len {})", + last_update_image.len(), + phase1_data.len(), + ); + + mgstestctx.teardown().await; +} diff --git a/nexus/tests/integration_tests/mod.rs b/nexus/tests/integration_tests/mod.rs index 87c5c74f0f..4d7b41cfa8 100644 --- a/nexus/tests/integration_tests/mod.rs +++ b/nexus/tests/integration_tests/mod.rs @@ -12,6 +12,7 @@ mod commands; mod console_api; mod device_auth; mod disks; +mod host_phase1_updater; mod images; mod initialization; mod instances; diff --git a/nexus/tests/integration_tests/rot_updater.rs b/nexus/tests/integration_tests/rot_updater.rs index 750f9571d0..2e6d65f8b1 100644 --- a/nexus/tests/integration_tests/rot_updater.rs +++ b/nexus/tests/integration_tests/rot_updater.rs @@ -45,10 +45,11 @@ async fn test_rot_updater_updates_sled() { .await; // Configure an MGS client. - let mgs_clients = MgsClients::from_clients([gateway_client::Client::new( - &mgstestctx.client.url("/").to_string(), - mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), - )]); + let mut mgs_clients = + MgsClients::from_clients([gateway_client::Client::new( + &mgstestctx.client.url("/").to_string(), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), + )]); // Configure and instantiate an `RotUpdater`. let sp_type = SpType::Sled; @@ -67,7 +68,7 @@ async fn test_rot_updater_updates_sled() { ); // Run the update. - rot_updater.update(mgs_clients).await.expect("update failed"); + rot_updater.update(&mut mgs_clients).await.expect("update failed"); // Ensure the RoT received the complete update. let last_update_image = mgstestctx.simrack.gimlets[sp_slot as usize] @@ -97,10 +98,11 @@ async fn test_rot_updater_updates_switch() { .await; // Configure an MGS client. - let mgs_clients = MgsClients::from_clients([gateway_client::Client::new( - &mgstestctx.client.url("/").to_string(), - mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), - )]); + let mut mgs_clients = + MgsClients::from_clients([gateway_client::Client::new( + &mgstestctx.client.url("/").to_string(), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), + )]); let sp_type = SpType::Switch; let sp_slot = 0; @@ -117,7 +119,7 @@ async fn test_rot_updater_updates_switch() { &mgstestctx.logctx.log, ); - rot_updater.update(mgs_clients).await.expect("update failed"); + rot_updater.update(&mut mgs_clients).await.expect("update failed"); let last_update_image = mgstestctx.simrack.sidecars[sp_slot as usize] .last_rot_update_data() @@ -177,7 +179,7 @@ async fn test_rot_updater_remembers_successful_mgs_instance() { // delivering an update requires a bare minimum of three requests (start the // update, query the status, reset the RoT) and often more (if repeated // queries are required to wait for completion). - let mgs_clients = MgsClients::from_clients([ + let mut mgs_clients = MgsClients::from_clients([ gateway_client::Client::new( &format!("http://{failing_mgs_addr}"), mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient1")), @@ -203,7 +205,7 @@ async fn test_rot_updater_remembers_successful_mgs_instance() { &mgstestctx.logctx.log, ); - rot_updater.update(mgs_clients).await.expect("update failed"); + rot_updater.update(&mut mgs_clients).await.expect("update failed"); let last_update_image = mgstestctx.simrack.gimlets[sp_slot as usize] .last_rot_update_data() @@ -295,7 +297,7 @@ async fn test_rot_updater_switches_mgs_instances_on_failure() { reqwest::Client::builder().pool_max_idle_per_host(0).build().unwrap(); // Configure two MGS clients pointed at our two proxy tasks. - let mgs_clients = MgsClients::from_clients([ + let mut mgs_clients = MgsClients::from_clients([ gateway_client::Client::new_with_client( &format!("http://{mgs_proxy_one_addr}"), client.clone(), @@ -324,7 +326,8 @@ async fn test_rot_updater_switches_mgs_instances_on_failure() { ); // Spawn the actual update task. - let mut update_task = tokio::spawn(rot_updater.update(mgs_clients)); + let mut update_task = + tokio::spawn(async move { rot_updater.update(&mut mgs_clients).await }); // Loop over incoming requests. We expect this sequence: // @@ -447,10 +450,11 @@ async fn test_rot_updater_delivers_progress() { .await; // Configure an MGS client. - let mgs_clients = MgsClients::from_clients([gateway_client::Client::new( - &mgstestctx.client.url("/").to_string(), - mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), - )]); + let mut mgs_clients = + MgsClients::from_clients([gateway_client::Client::new( + &mgstestctx.client.url("/").to_string(), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), + )]); let sp_type = SpType::Sled; let sp_slot = 0; @@ -483,10 +487,11 @@ async fn test_rot_updater_delivers_progress() { // Spawn the update on a background task so we can watch `progress` as it is // applied. - let do_update_task = tokio::spawn(rot_updater.update(mgs_clients)); + let do_update_task = + tokio::spawn(async move { rot_updater.update(&mut mgs_clients).await }); // Allow the SP to respond to 1 message: the "prepare update" messages that - // trigger the start of an update, then ensure we see the "started" + // triggers the start of an update, then ensure we see the "started" // progress. sp_accept_sema.send(1).unwrap(); progress.changed().await.unwrap(); @@ -556,7 +561,6 @@ async fn test_rot_updater_delivers_progress() { UpdateStatus::Complete(_) => { if prev_bytes_received < rot_image_len { prev_bytes_received = rot_image_len; - continue; } } status @ (UpdateStatus::None diff --git a/nexus/tests/integration_tests/sp_updater.rs b/nexus/tests/integration_tests/sp_updater.rs index 89735ac3d9..1b6764e609 100644 --- a/nexus/tests/integration_tests/sp_updater.rs +++ b/nexus/tests/integration_tests/sp_updater.rs @@ -46,10 +46,11 @@ async fn test_sp_updater_updates_sled() { .await; // Configure an MGS client. - let mgs_clients = MgsClients::from_clients([gateway_client::Client::new( - &mgstestctx.client.url("/").to_string(), - mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), - )]); + let mut mgs_clients = + MgsClients::from_clients([gateway_client::Client::new( + &mgstestctx.client.url("/").to_string(), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), + )]); // Configure and instantiate an `SpUpdater`. let sp_type = SpType::Sled; @@ -66,7 +67,7 @@ async fn test_sp_updater_updates_sled() { ); // Run the update. - sp_updater.update(mgs_clients).await.expect("update failed"); + sp_updater.update(&mut mgs_clients).await.expect("update failed"); // Ensure the SP received the complete update. let last_update_image = mgstestctx.simrack.gimlets[sp_slot as usize] @@ -96,10 +97,11 @@ async fn test_sp_updater_updates_switch() { .await; // Configure an MGS client. - let mgs_clients = MgsClients::from_clients([gateway_client::Client::new( - &mgstestctx.client.url("/").to_string(), - mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), - )]); + let mut mgs_clients = + MgsClients::from_clients([gateway_client::Client::new( + &mgstestctx.client.url("/").to_string(), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), + )]); let sp_type = SpType::Switch; let sp_slot = 0; @@ -114,7 +116,7 @@ async fn test_sp_updater_updates_switch() { &mgstestctx.logctx.log, ); - sp_updater.update(mgs_clients).await.expect("update failed"); + sp_updater.update(&mut mgs_clients).await.expect("update failed"); let last_update_image = mgstestctx.simrack.sidecars[sp_slot as usize] .last_sp_update_data() @@ -174,7 +176,7 @@ async fn test_sp_updater_remembers_successful_mgs_instance() { // delivering an update requires a bare minimum of three requests (start the // update, query the status, reset the SP) and often more (if repeated // queries are required to wait for completion). - let mgs_clients = MgsClients::from_clients([ + let mut mgs_clients = MgsClients::from_clients([ gateway_client::Client::new( &format!("http://{failing_mgs_addr}"), mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient1")), @@ -198,7 +200,7 @@ async fn test_sp_updater_remembers_successful_mgs_instance() { &mgstestctx.logctx.log, ); - sp_updater.update(mgs_clients).await.expect("update failed"); + sp_updater.update(&mut mgs_clients).await.expect("update failed"); let last_update_image = mgstestctx.simrack.gimlets[sp_slot as usize] .last_sp_update_data() @@ -290,7 +292,7 @@ async fn test_sp_updater_switches_mgs_instances_on_failure() { reqwest::Client::builder().pool_max_idle_per_host(0).build().unwrap(); // Configure two MGS clients pointed at our two proxy tasks. - let mgs_clients = MgsClients::from_clients([ + let mut mgs_clients = MgsClients::from_clients([ gateway_client::Client::new_with_client( &format!("http://{mgs_proxy_one_addr}"), client.clone(), @@ -317,7 +319,8 @@ async fn test_sp_updater_switches_mgs_instances_on_failure() { ); // Spawn the actual update task. - let mut update_task = tokio::spawn(sp_updater.update(mgs_clients)); + let mut update_task = + tokio::spawn(async move { sp_updater.update(&mut mgs_clients).await }); // Loop over incoming requests. We expect this sequence: // @@ -436,10 +439,11 @@ async fn test_sp_updater_delivers_progress() { .await; // Configure an MGS client. - let mgs_clients = MgsClients::from_clients([gateway_client::Client::new( - &mgstestctx.client.url("/").to_string(), - mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), - )]); + let mut mgs_clients = + MgsClients::from_clients([gateway_client::Client::new( + &mgstestctx.client.url("/").to_string(), + mgstestctx.logctx.log.new(slog::o!("component" => "MgsClient")), + )]); let sp_type = SpType::Sled; let sp_slot = 0; @@ -470,10 +474,11 @@ async fn test_sp_updater_delivers_progress() { // Spawn the update on a background task so we can watch `progress` as it is // applied. - let do_update_task = tokio::spawn(sp_updater.update(mgs_clients)); + let do_update_task = + tokio::spawn(async move { sp_updater.update(&mut mgs_clients).await }); // Allow the SP to respond to 2 messages: the caboose check and the "prepare - // update" messages that trigger the start of an update, then ensure we see + // update" messages that triggers the start of an update, then ensure we see // the "started" progress. sp_accept_sema.send(2).unwrap(); progress.changed().await.unwrap(); @@ -543,7 +548,6 @@ async fn test_sp_updater_delivers_progress() { UpdateStatus::Complete(_) => { if prev_bytes_received < sp_image_len { prev_bytes_received = sp_image_len; - continue; } } status @ (UpdateStatus::None diff --git a/sp-sim/src/gimlet.rs b/sp-sim/src/gimlet.rs index 635e8fde6b..5cfad94c86 100644 --- a/sp-sim/src/gimlet.rs +++ b/sp-sim/src/gimlet.rs @@ -123,6 +123,15 @@ impl SimulatedSp for Gimlet { handler.update_state.last_rot_update_data() } + async fn last_host_phase1_update_data( + &self, + slot: u16, + ) -> Option> { + let handler = self.handler.as_ref()?; + let handler = handler.lock().await; + handler.update_state.last_host_phase1_update_data(slot) + } + async fn current_update_status(&self) -> gateway_messages::UpdateStatus { let Some(handler) = self.handler.as_ref() else { return gateway_messages::UpdateStatus::None; @@ -1188,7 +1197,7 @@ impl SpHandler for Handler { port: SpPort, component: SpComponent, ) -> Result { - warn!( + debug!( &self.log, "asked for component active slot"; "sender" => %sender, "port" => ?port, @@ -1211,7 +1220,7 @@ impl SpHandler for Handler { slot: u16, persist: bool, ) -> Result<(), SpError> { - warn!( + debug!( &self.log, "asked to set component active slot"; "sender" => %sender, "port" => ?port, @@ -1222,9 +1231,12 @@ impl SpHandler for Handler { if component == SpComponent::ROT { self.rot_active_slot = rot_slot_id_from_u16(slot)?; Ok(()) + } else if component == SpComponent::HOST_CPU_BOOT_FLASH { + self.update_state.set_active_host_slot(slot); + Ok(()) } else { // The real SP returns `RequestUnsupportedForComponent` for anything - // other than the RoT, including SP_ITSELF. + // other than the RoT and host boot flash, including SP_ITSELF. Err(SpError::RequestUnsupportedForComponent) } } diff --git a/sp-sim/src/lib.rs b/sp-sim/src/lib.rs index 0958e8a177..87643af9a8 100644 --- a/sp-sim/src/lib.rs +++ b/sp-sim/src/lib.rs @@ -68,6 +68,12 @@ pub trait SimulatedSp { /// Only returns data after a simulated reset of the RoT. async fn last_rot_update_data(&self) -> Option>; + /// Get the last completed update delivered to the host phase1 flash slot. + async fn last_host_phase1_update_data( + &self, + slot: u16, + ) -> Option>; + /// Get the current update status, just as would be returned by an MGS /// request to get the update status. async fn current_update_status(&self) -> gateway_messages::UpdateStatus; diff --git a/sp-sim/src/sidecar.rs b/sp-sim/src/sidecar.rs index 19e84ffc64..1bd6fe4964 100644 --- a/sp-sim/src/sidecar.rs +++ b/sp-sim/src/sidecar.rs @@ -134,6 +134,14 @@ impl SimulatedSp for Sidecar { handler.update_state.last_rot_update_data() } + async fn last_host_phase1_update_data( + &self, + _slot: u16, + ) -> Option> { + // sidecars do not have attached hosts + None + } + async fn current_update_status(&self) -> gateway_messages::UpdateStatus { let Some(handler) = self.handler.as_ref() else { return gateway_messages::UpdateStatus::None; diff --git a/sp-sim/src/update.rs b/sp-sim/src/update.rs index 9879a3ecde..0efa730a26 100644 --- a/sp-sim/src/update.rs +++ b/sp-sim/src/update.rs @@ -2,6 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use std::collections::BTreeMap; use std::io::Cursor; use std::mem; @@ -15,6 +16,8 @@ pub(crate) struct SimSpUpdate { state: UpdateState, last_sp_update_data: Option>, last_rot_update_data: Option>, + last_host_phase1_update_data: BTreeMap>, + active_host_slot: Option, } impl Default for SimSpUpdate { @@ -23,6 +26,13 @@ impl Default for SimSpUpdate { state: UpdateState::NotPrepared, last_sp_update_data: None, last_rot_update_data: None, + last_host_phase1_update_data: BTreeMap::new(), + + // In the real SP, there is always _some_ active host slot. We could + // emulate that by always defaulting to slot 0, but instead we'll + // ensure any tests that expect to read or write a particular slot + // set that slot as active first. + active_host_slot: None, } } } @@ -43,9 +53,20 @@ impl SimSpUpdate { UpdateState::NotPrepared | UpdateState::Aborted(_) | UpdateState::Completed { .. } => { + let slot = if component == SpComponent::HOST_CPU_BOOT_FLASH { + match self.active_host_slot { + Some(slot) => slot, + None => return Err(SpError::InvalidSlotForComponent), + } + } else { + // We don't manage SP or RoT slots, so just use 0 + 0 + }; + self.state = UpdateState::Prepared { component, id, + slot, data: Cursor::new(vec![0u8; total_size].into_boxed_slice()), }; Ok(()) @@ -63,7 +84,7 @@ impl SimSpUpdate { chunk_data: &[u8], ) -> Result<(), SpError> { match &mut self.state { - UpdateState::Prepared { component, id, data } => { + UpdateState::Prepared { component, id, slot, data } => { // Ensure that the update ID and target component are correct. if chunk.id != *id || chunk.component != *component { return Err(SpError::InvalidUpdateId { sp_update_id: *id }); @@ -84,10 +105,17 @@ impl SimSpUpdate { if data.position() == data.get_ref().len() as u64 { let mut stolen = Cursor::new(Box::default()); mem::swap(data, &mut stolen); + let data = stolen.into_inner(); + + if *component == SpComponent::HOST_CPU_BOOT_FLASH { + self.last_host_phase1_update_data + .insert(*slot, data.clone()); + } + self.state = UpdateState::Completed { component: *component, id: *id, - data: stolen.into_inner(), + data, }; } @@ -150,6 +178,17 @@ impl SimSpUpdate { pub(crate) fn last_rot_update_data(&self) -> Option> { self.last_rot_update_data.clone() } + + pub(crate) fn last_host_phase1_update_data( + &self, + slot: u16, + ) -> Option> { + self.last_host_phase1_update_data.get(&slot).cloned() + } + + pub(crate) fn set_active_host_slot(&mut self, slot: u16) { + self.active_host_slot = Some(slot); + } } enum UpdateState { @@ -157,6 +196,7 @@ enum UpdateState { Prepared { component: SpComponent, id: UpdateId, + slot: u16, // data would ordinarily be a Cursor>, but that can grow and // reallocate. We want to ensure that we don't receive any more data // than originally promised, so use a Cursor> to ensure that From b6ebaaad31e376fe6c64ff2b9b54e612fddfd91a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 30 Nov 2023 10:00:53 -0800 Subject: [PATCH 09/10] Bump openssl from 0.10.57 to 0.10.60 (#4569) --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6580e1de55..5ccaa2c3d1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5099,9 +5099,9 @@ dependencies = [ [[package]] name = "openssl" -version = "0.10.57" +version = "0.10.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bac25ee399abb46215765b1cb35bc0212377e58a061560d8b29b024fd0430e7c" +checksum = "79a4c6c3a2b158f7f8f2a2fc5a969fa3a068df6fc9dbb4a43845436e3af7c800" dependencies = [ "bitflags 2.4.0", "cfg-if 1.0.0", @@ -5131,9 +5131,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.93" +version = "0.9.96" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db4d56a4c0478783083cfafcc42493dd4a981d41669da64b4572a2a089b51b1d" +checksum = "3812c071ba60da8b5677cc12bcb1d42989a65553772897a7e0355545a819838f" dependencies = [ "cc", "libc", From 7ef3631ed95830a0120fe7832d0a88e2155b2613 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 30 Nov 2023 10:01:20 -0800 Subject: [PATCH 10/10] Update Rust crate url to 2.5.0 (#4584) --- Cargo.lock | 18 +++++++++--------- tufaceous-lib/Cargo.toml | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5ccaa2c3d1..358ec1f9b4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2276,9 +2276,9 @@ checksum = "aa9a19cbb55df58761df49b23516a86d432839add4af60fc256da840f66ed35b" [[package]] name = "form_urlencoded" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" dependencies = [ "percent-encoding", ] @@ -3040,9 +3040,9 @@ dependencies = [ [[package]] name = "idna" -version = "0.4.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" dependencies = [ "unicode-bidi", "unicode-normalization", @@ -5631,9 +5631,9 @@ dependencies = [ [[package]] name = "percent-encoding" -version = "2.3.0" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "pest" @@ -9106,12 +9106,12 @@ dependencies = [ [[package]] name = "url" -version = "2.4.1" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "143b538f18257fac9cad154828a57c6bf5157e1aa604d4816b5995bf6de87ae5" +checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" dependencies = [ "form_urlencoded", - "idna 0.4.0", + "idna 0.5.0", "percent-encoding", ] diff --git a/tufaceous-lib/Cargo.toml b/tufaceous-lib/Cargo.toml index 0df3a33f98..aa9a26e3bb 100644 --- a/tufaceous-lib/Cargo.toml +++ b/tufaceous-lib/Cargo.toml @@ -33,7 +33,7 @@ tar.workspace = true tokio.workspace = true toml.workspace = true tough.workspace = true -url = "2.4.1" +url = "2.5.0" zip.workspace = true omicron-workspace-hack.workspace = true