From 3365c2b636a2e8c955f8dd50ed43def68f13e9f9 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Sat, 21 Oct 2023 11:54:30 -0400 Subject: [PATCH 01/24] Update crucible and propolis (#4306) Bump crucible rev to pick up: - Add session_id to crucible logs - Don't check for a snapshot that's gone - Update Rust crate libc to 0.2.149 - Update Rust crate csv to 1.3.0 - Update Rust crate tokio to 1.33 - Add timeout and failure data collection of replay job Bump propolis rev to pick up: - Bump crucible rev to latest Also ran `cargo update -p libc -p tokio` --- Cargo.lock | 40 +++++++++++++++++++-------------------- Cargo.toml | 16 ++++++++-------- package-manifest.toml | 12 ++++++------ workspace-hack/Cargo.toml | 4 ++-- 4 files changed, 36 insertions(+), 36 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 06d5f2fb70..d99358a9ad 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -495,7 +495,7 @@ dependencies = [ [[package]] name = "bhyve_api" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541" +source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2" dependencies = [ "bhyve_api_sys", "libc", @@ -505,7 +505,7 @@ dependencies = [ [[package]] name = "bhyve_api_sys" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541" +source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2" dependencies = [ "libc", "strum", @@ -1235,7 +1235,7 @@ dependencies = [ [[package]] name = "cpuid_profile_config" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541" +source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2" dependencies = [ "propolis", "serde", @@ -1443,7 +1443,7 @@ dependencies = [ [[package]] name = "crucible" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=657d985247b41e38aac2e271c7ce8bc9ea81f4b6#657d985247b41e38aac2e271c7ce8bc9ea81f4b6" +source = "git+https://github.com/oxidecomputer/crucible?rev=da534e73380f3cc53ca0de073e1ea862ae32109b#da534e73380f3cc53ca0de073e1ea862ae32109b" dependencies = [ "aes-gcm-siv", "anyhow", @@ -1488,7 +1488,7 @@ dependencies = [ [[package]] name = "crucible-agent-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=657d985247b41e38aac2e271c7ce8bc9ea81f4b6#657d985247b41e38aac2e271c7ce8bc9ea81f4b6" +source = "git+https://github.com/oxidecomputer/crucible?rev=da534e73380f3cc53ca0de073e1ea862ae32109b#da534e73380f3cc53ca0de073e1ea862ae32109b" dependencies = [ "anyhow", "chrono", @@ -1504,7 +1504,7 @@ dependencies = [ [[package]] name = "crucible-client-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/crucible?rev=657d985247b41e38aac2e271c7ce8bc9ea81f4b6#657d985247b41e38aac2e271c7ce8bc9ea81f4b6" +source = "git+https://github.com/oxidecomputer/crucible?rev=da534e73380f3cc53ca0de073e1ea862ae32109b#da534e73380f3cc53ca0de073e1ea862ae32109b" dependencies = [ "base64 0.21.4", "crucible-workspace-hack", @@ -1517,7 +1517,7 @@ dependencies = [ [[package]] name = "crucible-common" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=657d985247b41e38aac2e271c7ce8bc9ea81f4b6#657d985247b41e38aac2e271c7ce8bc9ea81f4b6" +source = "git+https://github.com/oxidecomputer/crucible?rev=da534e73380f3cc53ca0de073e1ea862ae32109b#da534e73380f3cc53ca0de073e1ea862ae32109b" dependencies = [ "anyhow", "atty", @@ -1545,7 +1545,7 @@ dependencies = [ [[package]] name = "crucible-pantry-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=657d985247b41e38aac2e271c7ce8bc9ea81f4b6#657d985247b41e38aac2e271c7ce8bc9ea81f4b6" +source = "git+https://github.com/oxidecomputer/crucible?rev=da534e73380f3cc53ca0de073e1ea862ae32109b#da534e73380f3cc53ca0de073e1ea862ae32109b" dependencies = [ "anyhow", "chrono", @@ -1562,7 +1562,7 @@ dependencies = [ [[package]] name = "crucible-protocol" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/crucible?rev=657d985247b41e38aac2e271c7ce8bc9ea81f4b6#657d985247b41e38aac2e271c7ce8bc9ea81f4b6" +source = "git+https://github.com/oxidecomputer/crucible?rev=da534e73380f3cc53ca0de073e1ea862ae32109b#da534e73380f3cc53ca0de073e1ea862ae32109b" dependencies = [ "anyhow", "bincode", @@ -1579,7 +1579,7 @@ dependencies = [ [[package]] name = "crucible-smf" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/crucible?rev=657d985247b41e38aac2e271c7ce8bc9ea81f4b6#657d985247b41e38aac2e271c7ce8bc9ea81f4b6" +source = "git+https://github.com/oxidecomputer/crucible?rev=da534e73380f3cc53ca0de073e1ea862ae32109b#da534e73380f3cc53ca0de073e1ea862ae32109b" dependencies = [ "crucible-workspace-hack", "libc", @@ -2029,7 +2029,7 @@ checksum = "7e1a8646b2c125eeb9a84ef0faa6d2d102ea0d5da60b824ade2743263117b848" [[package]] name = "dladm" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541" +source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2" dependencies = [ "libc", "strum", @@ -6642,7 +6642,7 @@ dependencies = [ [[package]] name = "propolis" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541" +source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2" dependencies = [ "anyhow", "bhyve_api", @@ -6675,7 +6675,7 @@ dependencies = [ [[package]] name = "propolis-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541" +source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2" dependencies = [ "async-trait", "base64 0.21.4", @@ -6699,7 +6699,7 @@ dependencies = [ [[package]] name = "propolis-server" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541" +source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2" dependencies = [ "anyhow", "async-trait", @@ -6751,7 +6751,7 @@ dependencies = [ [[package]] name = "propolis-server-config" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541" +source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2" dependencies = [ "cpuid_profile_config", "serde", @@ -6763,7 +6763,7 @@ dependencies = [ [[package]] name = "propolis_types" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541" +source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2" dependencies = [ "schemars", "serde", @@ -9007,9 +9007,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.32.0" +version = "1.33.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17ed6077ed6cd6c74735e21f37eb16dc3935f96878b1fe961074089cc80893f9" +checksum = "4f38200e3ef7995e5ef13baec2f432a6da0aa9ac495b2c0e8f3b7eec2c92d653" dependencies = [ "backtrace", "bytes", @@ -9803,7 +9803,7 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "viona_api" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541" +source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2" dependencies = [ "libc", "viona_api_sys", @@ -9812,7 +9812,7 @@ dependencies = [ [[package]] name = "viona_api_sys" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541" +source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2" dependencies = [ "libc", ] diff --git a/Cargo.toml b/Cargo.toml index e9eea3c4ee..58d57d15b8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -165,10 +165,10 @@ cookie = "0.16" criterion = { version = "0.5.1", features = [ "async_tokio" ] } crossbeam = "0.8" crossterm = { version = "0.27.0", features = ["event-stream"] } -crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "657d985247b41e38aac2e271c7ce8bc9ea81f4b6" } -crucible-client-types = { git = "https://github.com/oxidecomputer/crucible", rev = "657d985247b41e38aac2e271c7ce8bc9ea81f4b6" } -crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "657d985247b41e38aac2e271c7ce8bc9ea81f4b6" } -crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "657d985247b41e38aac2e271c7ce8bc9ea81f4b6" } +crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "da534e73380f3cc53ca0de073e1ea862ae32109b" } +crucible-client-types = { git = "https://github.com/oxidecomputer/crucible", rev = "da534e73380f3cc53ca0de073e1ea862ae32109b" } +crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "da534e73380f3cc53ca0de073e1ea862ae32109b" } +crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "da534e73380f3cc53ca0de073e1ea862ae32109b" } curve25519-dalek = "4" datatest-stable = "0.1.3" display-error-chain = "0.1.1" @@ -284,9 +284,9 @@ pretty-hex = "0.3.0" proc-macro2 = "1.0" progenitor = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } progenitor-client = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } -bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "334df299a56cd0d33e1227ed4ce4d2fe7478d541" } -propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "334df299a56cd0d33e1227ed4ce4d2fe7478d541", features = [ "generated-migration" ] } -propolis-server = { git = "https://github.com/oxidecomputer/propolis", rev = "334df299a56cd0d33e1227ed4ce4d2fe7478d541", default-features = false, features = ["mock-only"] } +bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "4019eb10fc2f4ba9bf210d0461dc6292b68309c2" } +propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "4019eb10fc2f4ba9bf210d0461dc6292b68309c2", features = [ "generated-migration" ] } +propolis-server = { git = "https://github.com/oxidecomputer/propolis", rev = "4019eb10fc2f4ba9bf210d0461dc6292b68309c2", default-features = false, features = ["mock-only"] } proptest = "1.3.1" quote = "1.0" rand = "0.8.5" @@ -354,7 +354,7 @@ textwrap = "0.16.0" test-strategy = "0.2.1" thiserror = "1.0" tofino = { git = "http://github.com/oxidecomputer/tofino", branch = "main" } -tokio = "1.29" +tokio = "1.33.0" tokio-postgres = { version = "0.7", features = [ "with-chrono-0_4", "with-uuid-1" ] } tokio-stream = "0.1.14" tokio-tungstenite = "0.18" diff --git a/package-manifest.toml b/package-manifest.toml index 3404f5f44f..5fc8114116 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -381,10 +381,10 @@ only_for_targets.image = "standard" # 3. Use source.type = "manual" instead of "prebuilt" source.type = "prebuilt" source.repo = "crucible" -source.commit = "657d985247b41e38aac2e271c7ce8bc9ea81f4b6" +source.commit = "da534e73380f3cc53ca0de073e1ea862ae32109b" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible.sha256.txt -source.sha256 = "010281ff5c3a0807c9e770d79264c954816a055aa482988d81e85ed98242e454" +source.sha256 = "572ac3b19e51b4e476266a62c2b7e06eff81c386cb48247c4b9f9b1e2ee81895" output.type = "zone" [package.crucible-pantry] @@ -392,10 +392,10 @@ service_name = "crucible_pantry" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "crucible" -source.commit = "657d985247b41e38aac2e271c7ce8bc9ea81f4b6" +source.commit = "da534e73380f3cc53ca0de073e1ea862ae32109b" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible-pantry.sha256.txt -source.sha256 = "809936edff2957e761e49667d5477e34b7a862050b4e082a59fdc95096d3bdd5" +source.sha256 = "812269958e18f54d72bc10bb4fb81f26c084cf762da7fd98e63d58c689be9ad1" output.type = "zone" # Refer to @@ -406,10 +406,10 @@ service_name = "propolis-server" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "propolis" -source.commit = "334df299a56cd0d33e1227ed4ce4d2fe7478d541" +source.commit = "4019eb10fc2f4ba9bf210d0461dc6292b68309c2" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/propolis/image//propolis-server.sha256.txt -source.sha256 = "531e0654de94b6e805836c35aa88b8a1ac691184000a03976e2b7825061e904e" +source.sha256 = "aa1d9dc5c9117c100f9636901e8eec6679d7dfbf869c46b7f2873585f94a1b89" output.type = "zone" [package.mg-ddm-gz] diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 3d9b195ae3..45c6419d2a 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -91,7 +91,7 @@ syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extr syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.32", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } textwrap = { version = "0.16.0" } time = { version = "0.3.27", features = ["formatting", "local-offset", "macros", "parsing"] } -tokio = { version = "1.32.0", features = ["full", "test-util"] } +tokio = { version = "1.33.0", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.10", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } tokio-stream = { version = "0.1.14", features = ["net"] } toml = { version = "0.7.8" } @@ -185,7 +185,7 @@ syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.32", features = ["extra textwrap = { version = "0.16.0" } time = { version = "0.3.27", features = ["formatting", "local-offset", "macros", "parsing"] } time-macros = { version = "0.2.13", default-features = false, features = ["formatting", "parsing"] } -tokio = { version = "1.32.0", features = ["full", "test-util"] } +tokio = { version = "1.33.0", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.10", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } tokio-stream = { version = "0.1.14", features = ["net"] } toml = { version = "0.7.8" } From 132efaccb1534bdafb92318f6927002ecdfa68dd Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sat, 21 Oct 2023 09:28:25 -0700 Subject: [PATCH 02/24] [nexus] Improve logging for schema changes (#4309) Improves visibility of logging, indicating: - What version are we upgrading into, throughout the process - Logging the names of SQL files used for upgrade, as they occur --- .../src/db/datastore/db_metadata.rs | 29 +++++++++++++------ nexus/db-queries/src/db/datastore/mod.rs | 3 +- nexus/tests/integration_tests/schema.rs | 15 +++++++--- 3 files changed, 33 insertions(+), 14 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/db_metadata.rs b/nexus/db-queries/src/db/datastore/db_metadata.rs index 9e4e8b1a48..0ae61a7c38 100644 --- a/nexus/db-queries/src/db/datastore/db_metadata.rs +++ b/nexus/db-queries/src/db/datastore/db_metadata.rs @@ -25,6 +25,17 @@ use std::str::FromStr; pub const EARLIEST_SUPPORTED_VERSION: &'static str = "1.0.0"; +/// Describes a single file containing a schema change, as SQL. +pub struct SchemaUpgradeStep { + pub path: Utf8PathBuf, + pub sql: String, +} + +/// Describes a sequence of files containing schema changes. +pub struct SchemaUpgrade { + pub steps: Vec, +} + /// Reads a "version directory" and reads all SQL changes into /// a result Vec. /// @@ -34,7 +45,7 @@ pub const EARLIEST_SUPPORTED_VERSION: &'static str = "1.0.0"; /// These are sorted lexicographically. pub async fn all_sql_for_version_migration>( path: P, -) -> Result, String> { +) -> Result { let target_dir = path.as_ref(); let mut up_sqls = vec![]; let entries = target_dir @@ -54,13 +65,12 @@ pub async fn all_sql_for_version_migration>( } up_sqls.sort(); - let mut result = vec![]; + let mut result = SchemaUpgrade { steps: vec![] }; for path in up_sqls.into_iter() { - result.push( - tokio::fs::read_to_string(&path) - .await - .map_err(|e| format!("Cannot read {path}: {e}"))?, - ); + let sql = tokio::fs::read_to_string(&path) + .await + .map_err(|e| format!("Cannot read {path}: {e}"))?; + result.steps.push(SchemaUpgradeStep { path: path.to_owned(), sql }); } Ok(result) } @@ -187,7 +197,8 @@ impl DataStore { ) .map_err(|e| format!("Invalid schema path: {}", e.display()))?; - let up_sqls = all_sql_for_version_migration(&target_dir).await?; + let schema_change = + all_sql_for_version_migration(&target_dir).await?; // Confirm the current version, set the "target_version" // column to indicate that a schema update is in-progress. @@ -205,7 +216,7 @@ impl DataStore { "target_version" => target_version.to_string(), ); - for sql in &up_sqls { + for SchemaUpgradeStep { path: _, sql } in &schema_change.steps { // Perform the schema change. self.apply_schema_update( ¤t_version, diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index f5283e263e..2dc1e69a6f 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -91,7 +91,8 @@ mod zpool; pub use address_lot::AddressLotCreateResult; pub use db_metadata::{ - all_sql_for_version_migration, EARLIEST_SUPPORTED_VERSION, + all_sql_for_version_migration, SchemaUpgrade, SchemaUpgradeStep, + EARLIEST_SUPPORTED_VERSION, }; pub use dns::DnsVersionUpdateBuilder; pub use instance::InstanceAndActiveVmm; diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs index e75211b834..d79dd09fc1 100644 --- a/nexus/tests/integration_tests/schema.rs +++ b/nexus/tests/integration_tests/schema.rs @@ -89,6 +89,7 @@ async fn apply_update_as_transaction( match apply_update_as_transaction_inner(client, sql).await { Ok(()) => break, Err(err) => { + warn!(log, "Failed to apply update as transaction"; "err" => err.to_string()); client .batch_execute("ROLLBACK;") .await @@ -111,7 +112,9 @@ async fn apply_update( version: &str, times_to_apply: usize, ) { - info!(log, "Performing upgrade to {version}"); + let log = log.new(o!("target version" => version.to_string())); + info!(log, "Performing upgrade"); + let client = crdb.connect().await.expect("failed to connect"); // We skip this for the earliest supported version because these tables @@ -126,11 +129,15 @@ async fn apply_update( } let target_dir = Utf8PathBuf::from(SCHEMA_DIR).join(version); - let sqls = all_sql_for_version_migration(&target_dir).await.unwrap(); + let schema_change = + all_sql_for_version_migration(&target_dir).await.unwrap(); for _ in 0..times_to_apply { - for sql in sqls.iter() { - apply_update_as_transaction(log, &client, sql).await; + for nexus_db_queries::db::datastore::SchemaUpgradeStep { path, sql } in + &schema_change.steps + { + info!(log, "Applying sql schema upgrade step"; "path" => path.to_string()); + apply_update_as_transaction(&log, &client, sql).await; } } From aef679c4ae8f0921e34c59e480e1400a2775cc7e Mon Sep 17 00:00:00 2001 From: bnaecker Date: Sat, 21 Oct 2023 10:28:17 -0700 Subject: [PATCH 03/24] Limit size of the search range for VNIs when creating VPCs (#4298) - Fixes #4283. - Adds a relatively small limit to the `NextItem` query used for finding a free VNI during VPC creation. This limits the memory consumption to something very reasonable, but is big enough that we should be extremely unlikely to find _no_ available VNIs in the range. - Add an application-level retry loop when inserting _customer_ VPCs, which catches the unlikely event that there really are no VNIs available, and retries a few times. - Adds tests for the computation of the limited search range. - Adds tests for the actual exhaustion-detection and retry behavior. --- Cargo.lock | 1 + nexus/db-queries/Cargo.toml | 1 + nexus/db-queries/src/db/datastore/vpc.rs | 376 ++++++++++++++++++++--- nexus/db-queries/src/db/queries/vpc.rs | 215 ++++++++++++- nexus/db-queries/src/lib.rs | 19 ++ 5 files changed, 565 insertions(+), 47 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d99358a9ad..7dbaa3e008 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4464,6 +4464,7 @@ dependencies = [ "serde_with", "sled-agent-client", "slog", + "static_assertions", "steno", "strum", "subprocess", diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml index eaf3dc1295..c16c0f5319 100644 --- a/nexus/db-queries/Cargo.toml +++ b/nexus/db-queries/Cargo.toml @@ -47,6 +47,7 @@ serde_urlencoded.workspace = true serde_with.workspace = true sled-agent-client.workspace = true slog.workspace = true +static_assertions.workspace = true steno.workspace = true thiserror.workspace = true tokio = { workspace = true, features = [ "full" ] } diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index 14886ba018..6db99465a3 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -34,12 +34,15 @@ use crate::db::model::VpcUpdate; use crate::db::model::{Ipv4Net, Ipv6Net}; use crate::db::pagination::paginated; use crate::db::queries::vpc::InsertVpcQuery; +use crate::db::queries::vpc::VniSearchIter; use crate::db::queries::vpc_subnet::FilterConflictingVpcSubnetRangesQuery; use crate::db::queries::vpc_subnet::SubnetError; use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; +use diesel::result::DatabaseErrorKind; +use diesel::result::Error as DieselError; use ipnetwork::IpNetwork; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::CreateResult; @@ -85,18 +88,23 @@ impl DataStore { SERVICES_VPC.clone(), Some(Vni(ExternalVni::SERVICES_VNI)), ); - let authz_vpc = self + let authz_vpc = match self .project_create_vpc_raw(opctx, &authz_project, vpc_query) .await - .map(|(authz_vpc, _)| authz_vpc) - .or_else(|e| match e { - Error::ObjectAlreadyExists { .. } => Ok(authz::Vpc::new( - authz_project.clone(), - *SERVICES_VPC_ID, - LookupType::ByName(SERVICES_VPC.identity.name.to_string()), - )), - _ => Err(e), - })?; + { + Ok(None) => { + let msg = "VNI exhaustion detected when creating built-in VPCs"; + error!(opctx.log, "{}", msg); + Err(Error::internal_error(msg)) + } + Ok(Some((authz_vpc, _))) => Ok(authz_vpc), + Err(Error::ObjectAlreadyExists { .. }) => Ok(authz::Vpc::new( + authz_project.clone(), + *SERVICES_VPC_ID, + LookupType::ByName(SERVICES_VPC.identity.name.to_string()), + )), + Err(e) => Err(e), + }?; // Also add the system router and internet gateway route @@ -287,22 +295,65 @@ impl DataStore { &self, opctx: &OpContext, authz_project: &authz::Project, - vpc: IncompleteVpc, + mut vpc: IncompleteVpc, ) -> Result<(authz::Vpc, Vpc), Error> { - self.project_create_vpc_raw( - opctx, - authz_project, - InsertVpcQuery::new(vpc), - ) - .await + // Generate an iterator that allows us to search the entire space of + // VNIs for this VPC, in manageable chunks to limit memory usage. + let vnis = VniSearchIter::new(vpc.vni.0); + for (i, vni) in vnis.enumerate() { + vpc.vni = Vni(vni); + let id = usdt::UniqueId::new(); + crate::probes::vni__search__range__start!(|| { + (&id, u32::from(vni), VniSearchIter::STEP_SIZE) + }); + match self + .project_create_vpc_raw( + opctx, + authz_project, + InsertVpcQuery::new(vpc.clone()), + ) + .await + { + Ok(Some((authz_vpc, vpc))) => { + crate::probes::vni__search__range__found!(|| { + (&id, u32::from(vpc.vni.0)) + }); + return Ok((authz_vpc, vpc)); + } + Err(e) => return Err(e), + Ok(None) => { + crate::probes::vni__search__range__empty!(|| (&id)); + debug!( + opctx.log, + "No VNIs available within current search range, retrying"; + "attempt" => i, + "vpc_name" => %vpc.identity.name, + "start_vni" => ?vni, + ); + } + } + } + + // We've failed to find a VNI after searching the entire range, so we'll + // return a 503 at this point. + error!( + opctx.log, + "failed to find a VNI after searching entire range"; + ); + Err(Error::unavail("Failed to find a free VNI for this VPC")) } + // Internal implementation for creating a VPC. + // + // This returns an optional VPC. If it is None, then we failed to insert a + // VPC specifically because there are no available VNIs. All other errors + // are returned in the `Result::Err` variant. async fn project_create_vpc_raw( &self, opctx: &OpContext, authz_project: &authz::Project, vpc_query: InsertVpcQuery, - ) -> Result<(authz::Vpc, Vpc), Error> { + ) -> Result, Error> { use db::schema::vpc::dsl; assert_eq!(authz_project.id(), vpc_query.vpc.project_id); @@ -312,30 +363,48 @@ impl DataStore { let project_id = vpc_query.vpc.project_id; let conn = self.pool_connection_authorized(opctx).await?; - let vpc: Vpc = Project::insert_resource( + let result: Result = Project::insert_resource( project_id, diesel::insert_into(dsl::vpc).values(vpc_query), ) .insert_and_get_result_async(&conn) - .await - .map_err(|e| match e { - AsyncInsertError::CollectionNotFound => Error::ObjectNotFound { - type_name: ResourceType::Project, - lookup_type: LookupType::ById(project_id), - }, - AsyncInsertError::DatabaseError(e) => public_error_from_diesel( - e, - ErrorHandler::Conflict(ResourceType::Vpc, name.as_str()), - ), - })?; - Ok(( - authz::Vpc::new( - authz_project.clone(), - vpc.id(), - LookupType::ByName(vpc.name().to_string()), - ), - vpc, - )) + .await; + match result { + Ok(vpc) => Ok(Some(( + authz::Vpc::new( + authz_project.clone(), + vpc.id(), + LookupType::ByName(vpc.name().to_string()), + ), + vpc, + ))), + Err(AsyncInsertError::CollectionNotFound) => { + Err(Error::ObjectNotFound { + type_name: ResourceType::Project, + lookup_type: LookupType::ById(project_id), + }) + } + Err(AsyncInsertError::DatabaseError( + DieselError::DatabaseError( + DatabaseErrorKind::NotNullViolation, + info, + ), + )) if info + .message() + .starts_with("null value in column \"vni\"") => + { + // We failed the non-null check on the VNI column, which means + // we could not find a valid VNI in our search range. Return + // None instead to signal the error. + Ok(None) + } + Err(AsyncInsertError::DatabaseError(e)) => { + Err(public_error_from_diesel( + e, + ErrorHandler::Conflict(ResourceType::Vpc, name.as_str()), + )) + } + } } pub async fn project_update_vpc( @@ -1092,3 +1161,234 @@ impl DataStore { }) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::db::datastore::datastore_test; + use crate::db::model::Project; + use crate::db::queries::vpc::MAX_VNI_SEARCH_RANGE_SIZE; + use nexus_test_utils::db::test_setup_database; + use nexus_types::external_api::params; + use omicron_common::api::external; + use omicron_test_utils::dev; + use slog::info; + + // Test that we detect the right error condition and return None when we + // fail to insert a VPC due to VNI exhaustion. + // + // This is a bit awkward, but we'll test this by inserting a bunch of VPCs, + // and checking that we get the expected error response back from the + // `project_create_vpc_raw` call. + #[tokio::test] + async fn test_project_create_vpc_raw_returns_none_on_vni_exhaustion() { + usdt::register_probes().unwrap(); + let logctx = dev::test_setup_log( + "test_project_create_vpc_raw_returns_none_on_vni_exhaustion", + ); + let log = &logctx.log; + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a project. + let project_params = params::ProjectCreate { + identity: IdentityMetadataCreateParams { + name: "project".parse().unwrap(), + description: String::from("test project"), + }, + }; + let project = Project::new(Uuid::new_v4(), project_params); + let (authz_project, _) = datastore + .project_create(&opctx, project) + .await + .expect("failed to create project"); + + let starting_vni = 2048; + let description = String::from("test vpc"); + for vni in 0..=MAX_VNI_SEARCH_RANGE_SIZE { + // Create an incomplete VPC and make sure it has the next available + // VNI. + let name: external::Name = format!("vpc{vni}").parse().unwrap(); + let mut incomplete_vpc = IncompleteVpc::new( + Uuid::new_v4(), + authz_project.id(), + Uuid::new_v4(), + params::VpcCreate { + identity: IdentityMetadataCreateParams { + name: name.clone(), + description: description.clone(), + }, + ipv6_prefix: None, + dns_name: name.clone(), + }, + ) + .expect("failed to create incomplete VPC"); + let this_vni = + Vni(external::Vni::try_from(starting_vni + vni).unwrap()); + incomplete_vpc.vni = this_vni; + info!( + log, + "creating initial VPC"; + "index" => vni, + "vni" => ?this_vni, + ); + let query = InsertVpcQuery::new(incomplete_vpc); + let (_, db_vpc) = datastore + .project_create_vpc_raw(&opctx, &authz_project, query) + .await + .expect("failed to create initial set of VPCs") + .expect("expected an actual VPC"); + info!( + log, + "created VPC"; + "vpc" => ?db_vpc, + ); + } + + // At this point, we've filled all the VNIs starting from 2048. Let's + // try to allocate one more, also starting from that position. This + // should fail, because we've explicitly filled the entire range we'll + // search above. + let name: external::Name = "dead-vpc".parse().unwrap(); + let mut incomplete_vpc = IncompleteVpc::new( + Uuid::new_v4(), + authz_project.id(), + Uuid::new_v4(), + params::VpcCreate { + identity: IdentityMetadataCreateParams { + name: name.clone(), + description: description.clone(), + }, + ipv6_prefix: None, + dns_name: name.clone(), + }, + ) + .expect("failed to create incomplete VPC"); + let this_vni = Vni(external::Vni::try_from(starting_vni).unwrap()); + incomplete_vpc.vni = this_vni; + info!( + log, + "creating VPC when all VNIs are allocated"; + "vni" => ?this_vni, + ); + let query = InsertVpcQuery::new(incomplete_vpc); + let Ok(None) = datastore + .project_create_vpc_raw(&opctx, &authz_project, query) + .await + else { + panic!("Expected Ok(None) when creating a VPC without any available VNIs"); + }; + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + // Test that we appropriately retry when there are no available VNIs. + // + // This is a bit awkward, but we'll test this by inserting a bunch of VPCs, + // and then check that we correctly retry + #[tokio::test] + async fn test_project_create_vpc_retries() { + usdt::register_probes().unwrap(); + let logctx = dev::test_setup_log("test_project_create_vpc_retries"); + let log = &logctx.log; + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a project. + let project_params = params::ProjectCreate { + identity: IdentityMetadataCreateParams { + name: "project".parse().unwrap(), + description: String::from("test project"), + }, + }; + let project = Project::new(Uuid::new_v4(), project_params); + let (authz_project, _) = datastore + .project_create(&opctx, project) + .await + .expect("failed to create project"); + + let starting_vni = 2048; + let description = String::from("test vpc"); + for vni in 0..=MAX_VNI_SEARCH_RANGE_SIZE { + // Create an incomplete VPC and make sure it has the next available + // VNI. + let name: external::Name = format!("vpc{vni}").parse().unwrap(); + let mut incomplete_vpc = IncompleteVpc::new( + Uuid::new_v4(), + authz_project.id(), + Uuid::new_v4(), + params::VpcCreate { + identity: IdentityMetadataCreateParams { + name: name.clone(), + description: description.clone(), + }, + ipv6_prefix: None, + dns_name: name.clone(), + }, + ) + .expect("failed to create incomplete VPC"); + let this_vni = + Vni(external::Vni::try_from(starting_vni + vni).unwrap()); + incomplete_vpc.vni = this_vni; + info!( + log, + "creating initial VPC"; + "index" => vni, + "vni" => ?this_vni, + ); + let query = InsertVpcQuery::new(incomplete_vpc); + let (_, db_vpc) = datastore + .project_create_vpc_raw(&opctx, &authz_project, query) + .await + .expect("failed to create initial set of VPCs") + .expect("expected an actual VPC"); + info!( + log, + "created VPC"; + "vpc" => ?db_vpc, + ); + } + + // Similar to the above test, we've fill all available VPCs starting at + // `starting_vni`. Let's attempt to allocate one beginning there, which + // _should_ fail and be internally retried. Note that we're using + // `project_create_vpc()` here instead of the raw version, to check that + // retry logic. + let name: external::Name = "dead-at-first-vpc".parse().unwrap(); + let mut incomplete_vpc = IncompleteVpc::new( + Uuid::new_v4(), + authz_project.id(), + Uuid::new_v4(), + params::VpcCreate { + identity: IdentityMetadataCreateParams { + name: name.clone(), + description: description.clone(), + }, + ipv6_prefix: None, + dns_name: name.clone(), + }, + ) + .expect("failed to create incomplete VPC"); + let this_vni = Vni(external::Vni::try_from(starting_vni).unwrap()); + incomplete_vpc.vni = this_vni; + info!( + log, + "creating VPC when all VNIs are allocated"; + "vni" => ?this_vni, + ); + match datastore + .project_create_vpc(&opctx, &authz_project, incomplete_vpc.clone()) + .await + { + Ok((_, vpc)) => { + assert_eq!(vpc.id(), incomplete_vpc.identity.id); + let expected_vni = starting_vni + MAX_VNI_SEARCH_RANGE_SIZE + 1; + assert_eq!(u32::from(vpc.vni.0), expected_vni); + info!(log, "successfully created VPC after retries"; "vpc" => ?vpc); + } + Err(e) => panic!("Unexpected error when inserting VPC: {e}"), + }; + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } +} diff --git a/nexus/db-queries/src/db/queries/vpc.rs b/nexus/db-queries/src/db/queries/vpc.rs index b1ac8fe1e1..c29a51adb0 100644 --- a/nexus/db-queries/src/db/queries/vpc.rs +++ b/nexus/db-queries/src/db/queries/vpc.rs @@ -245,15 +245,7 @@ struct NextVni { impl NextVni { fn new(vni: Vni) -> Self { - let base_u32 = u32::from(vni.0); - // The valid range is [0, 1 << 24], so the maximum shift is whatever - // gets us to 1 << 24, and the minimum is whatever gets us back to the - // minimum guest VNI. - let max_shift = i64::from(external::Vni::MAX_VNI - base_u32); - let min_shift = i64::from( - -i32::try_from(base_u32 - external::Vni::MIN_GUEST_VNI) - .expect("Expected a valid VNI at this point"), - ); + let VniShifts { min_shift, max_shift } = VniShifts::new(vni); let generator = DefaultShiftGenerator { base: vni, max_shift, min_shift }; let inner = NextItem::new_unscoped(generator); @@ -278,3 +270,208 @@ impl NextVni { } delegate_query_fragment_impl!(NextVni); + +// Helper type to compute the shift for a `NextItem` query to find VNIs. +#[derive(Clone, Copy, Debug, PartialEq)] +struct VniShifts { + // The minimum `ShiftGenerator` shift. + min_shift: i64, + // The maximum `ShiftGenerator` shift. + max_shift: i64, +} + +/// Restrict the search for a VNI to a small range. +/// +/// VNIs are pretty sparsely allocated (the number of VPCs), and the range is +/// quite large (24 bits). To avoid memory issues, we'll restrict a search +/// for an available VNI to a small range starting from the random starting +/// VNI. +// +// NOTE: This is very small for tests, to ensure we can accurately test the +// failure mode where there are no available VNIs. +#[cfg(not(test))] +pub const MAX_VNI_SEARCH_RANGE_SIZE: u32 = 2048; +#[cfg(test)] +pub const MAX_VNI_SEARCH_RANGE_SIZE: u32 = 10; + +// Ensure that we cannot search a range that extends beyond the valid guest VNI +// range. +static_assertions::const_assert!( + MAX_VNI_SEARCH_RANGE_SIZE + <= (external::Vni::MAX_VNI - external::Vni::MIN_GUEST_VNI) +); + +impl VniShifts { + fn new(vni: Vni) -> Self { + let base_u32 = u32::from(vni.0); + let range_end = base_u32 + MAX_VNI_SEARCH_RANGE_SIZE; + + // Clamp the maximum shift at the distance to the maximum allowed VNI, + // or the maximum of the range. + let max_shift = i64::from( + (external::Vni::MAX_VNI - base_u32).min(MAX_VNI_SEARCH_RANGE_SIZE), + ); + + // And any remaining part of the range wraps around starting at the + // beginning. + let min_shift = -i64::from( + range_end.checked_sub(external::Vni::MAX_VNI).unwrap_or(0), + ); + Self { min_shift, max_shift } + } +} + +/// An iterator yielding sequential starting VNIs. +/// +/// The VPC insertion query requires a search for the next available VNI, using +/// the `NextItem` query. We limit the search for each query to avoid memory +/// issues on any one query. If we fail to find a VNI, we need to search the +/// next range. This iterator yields the starting positions for the `NextItem` +/// query, so that the entire range can be search in chunks until a free VNI is +/// found. +// +// NOTE: It's technically possible for this to lead to searching the very +// initial portion of the range twice. If we end up wrapping around so that the +// last position yielded by this iterator is `start - x`, then we'll end up +// searching from `start - x` to `start + (MAX_VNI_SEARCH_RANGE_SIZE - x)`, and +// so search those first few after `start` again. This is both innocuous and +// really unlikely. +#[derive(Clone, Copy, Debug)] +pub struct VniSearchIter { + start: u32, + current: u32, + has_wrapped: bool, +} + +impl VniSearchIter { + pub const STEP_SIZE: u32 = MAX_VNI_SEARCH_RANGE_SIZE; + + /// Create a search range, starting from the provided VNI. + pub fn new(start: external::Vni) -> Self { + let start = u32::from(start); + Self { start, current: start, has_wrapped: false } + } +} + +impl std::iter::Iterator for VniSearchIter { + type Item = external::Vni; + + fn next(&mut self) -> Option { + // If we've wrapped around and the computed position is beyond where we + // started, then the ite + if self.has_wrapped && self.current > self.start { + return None; + } + + // Compute the next position. + // + // Make sure we wrap around to the mininum guest VNI. Note that we + // consider the end of the range inclusively, so we subtract one in the + // offset below to end up _at_ the min guest VNI. + let mut next = self.current + MAX_VNI_SEARCH_RANGE_SIZE; + if next > external::Vni::MAX_VNI { + next -= external::Vni::MAX_VNI; + next += external::Vni::MIN_GUEST_VNI - 1; + self.has_wrapped = true; + } + let current = self.current; + self.current = next; + Some(external::Vni::try_from(current).unwrap()) + } +} + +#[cfg(test)] +mod tests { + use super::external; + use super::Vni; + use super::VniSearchIter; + use super::VniShifts; + use super::MAX_VNI_SEARCH_RANGE_SIZE; + + // Ensure that when the search range lies entirely within the range of VNIs, + // we search from the start VNI through the maximum allowed range size. + #[test] + fn test_vni_shift_no_wrapping() { + let vni = Vni(external::Vni::try_from(2048).unwrap()); + let VniShifts { min_shift, max_shift } = VniShifts::new(vni); + assert_eq!(min_shift, 0); + assert_eq!(max_shift, i64::from(MAX_VNI_SEARCH_RANGE_SIZE)); + assert_eq!(max_shift - min_shift, i64::from(MAX_VNI_SEARCH_RANGE_SIZE)); + } + + // Ensure that we wrap correctly, when the starting VNI happens to land + // quite close to the end of the allowed range. + #[test] + fn test_vni_shift_with_wrapping() { + let offset = 5; + let vni = + Vni(external::Vni::try_from(external::Vni::MAX_VNI - offset) + .unwrap()); + let VniShifts { min_shift, max_shift } = VniShifts::new(vni); + assert_eq!(min_shift, -i64::from(MAX_VNI_SEARCH_RANGE_SIZE - offset)); + assert_eq!(max_shift, i64::from(offset)); + assert_eq!(max_shift - min_shift, i64::from(MAX_VNI_SEARCH_RANGE_SIZE)); + } + + #[test] + fn test_vni_search_iter_steps() { + let start = external::Vni::try_from(2048).unwrap(); + let mut it = VniSearchIter::new(start); + let next = it.next().unwrap(); + assert_eq!(next, start); + let next = it.next().unwrap(); + assert_eq!( + next, + external::Vni::try_from( + u32::from(start) + MAX_VNI_SEARCH_RANGE_SIZE + ) + .unwrap() + ); + } + + #[test] + fn test_vni_search_iter_full_count() { + let start = + external::Vni::try_from(external::Vni::MIN_GUEST_VNI).unwrap(); + + let last = VniSearchIter::new(start).last().unwrap(); + println!("{:?}", last); + + pub const fn div_ceil(x: u32, y: u32) -> u32 { + let d = x / y; + let r = x % y; + if r > 0 && y > 0 { + d + 1 + } else { + d + } + } + const N_EXPECTED: u32 = div_ceil( + external::Vni::MAX_VNI - external::Vni::MIN_GUEST_VNI, + MAX_VNI_SEARCH_RANGE_SIZE, + ); + let count = u32::try_from(VniSearchIter::new(start).count()).unwrap(); + assert_eq!(count, N_EXPECTED); + } + + #[test] + fn test_vni_search_iter_wrapping() { + // Start from just before the end of the range. + let start = + external::Vni::try_from(external::Vni::MAX_VNI - 1).unwrap(); + let mut it = VniSearchIter::new(start); + + // We should yield that start position first. + let next = it.next().unwrap(); + assert_eq!(next, start); + + // The next value should be wrapped around to the beginning. + // + // Subtract 2 because we _include_ the max VNI in the search range. + let next = it.next().unwrap(); + assert_eq!( + u32::from(next), + external::Vni::MIN_GUEST_VNI + MAX_VNI_SEARCH_RANGE_SIZE - 2 + ); + } +} diff --git a/nexus/db-queries/src/lib.rs b/nexus/db-queries/src/lib.rs index 29c33039ff..a693f7ff42 100644 --- a/nexus/db-queries/src/lib.rs +++ b/nexus/db-queries/src/lib.rs @@ -17,3 +17,22 @@ extern crate newtype_derive; #[cfg(test)] #[macro_use] extern crate diesel; + +#[usdt::provider(provider = "nexus__db__queries")] +mod probes { + // Fires before we start a search over a range for a VNI. + // + // Includes the starting VNI and the size of the range being searched. + fn vni__search__range__start( + _: &usdt::UniqueId, + start_vni: u32, + size: u32, + ) { + } + + // Fires when we successfully find a VNI. + fn vni__search__range__found(_: &usdt::UniqueId, vni: u32) {} + + // Fires when we fail to find a VNI in the provided range. + fn vni__search__range__empty(_: &usdt::UniqueId) {} +} From 825e4f39c8efb147a108d13d87def0221c27c8df Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Sat, 21 Oct 2023 16:12:26 -0400 Subject: [PATCH 04/24] Make sure project_delete_snapshot does its job (#4308) `project_delete_snapshot`'s job is to soft-delete snapshots but due to incorrectly using `check_if_exists` with `execute_async`, the state a snapshot was in was not being validated. This was causing a snapshot delete request to proceed for a snapshot that was in state Creating, causing a case where a volume was hard deleted before it had its associated resource accounting performed, leading to regions associated with deleted resources that would never be cleaned up. Fix this and ensure expected behaviour via a test. --- nexus/db-queries/src/db/datastore/snapshot.rs | 67 ++++++++++++---- nexus/tests/integration_tests/disks.rs | 78 +++++++++++++++++++ nexus/tests/integration_tests/snapshots.rs | 32 +++++++- 3 files changed, 163 insertions(+), 14 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/snapshot.rs b/nexus/db-queries/src/db/datastore/snapshot.rs index 59fb00c84d..7c03e4bd40 100644 --- a/nexus/db-queries/src/db/datastore/snapshot.rs +++ b/nexus/db-queries/src/db/datastore/snapshot.rs @@ -19,6 +19,7 @@ use crate::db::model::Snapshot; use crate::db::model::SnapshotState; use crate::db::pagination::paginated; use crate::db::update_and_check::UpdateAndCheck; +use crate::db::update_and_check::UpdateStatus; use crate::db::TransactionError; use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; @@ -252,30 +253,70 @@ impl DataStore { use db::schema::snapshot::dsl; - let updated_rows = diesel::update(dsl::snapshot) + let result = diesel::update(dsl::snapshot) .filter(dsl::time_deleted.is_null()) .filter(dsl::gen.eq(gen)) .filter(dsl::id.eq(snapshot_id)) - .filter(dsl::state.eq_any(ok_to_delete_states)) + .filter(dsl::state.eq_any(ok_to_delete_states.clone())) .set(( dsl::time_deleted.eq(now), dsl::state.eq(SnapshotState::Destroyed), )) .check_if_exists::(snapshot_id) - .execute_async(&*self.pool_connection_authorized(&opctx).await?) + .execute_and_check(&*self.pool_connection_authorized(&opctx).await?) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::Snapshot, + LookupType::ById(snapshot_id), + ), + ) + })?; + + match result.status { + UpdateStatus::Updated => { + // snapshot was soft deleted ok + Ok(result.found.id()) + } - if updated_rows == 0 { - // Either: - // - // - the snapshot was already deleted - // - the generation number changed - // - the state of the snapshot isn't one of `ok_to_delete_states` + UpdateStatus::NotUpdatedButExists => { + let snapshot = result.found; - return Err(Error::invalid_request("snapshot cannot be deleted")); - } + // if the snapshot was already deleted, return Ok - this + // function must remain idempotent for the same input. + if snapshot.time_deleted().is_some() + && snapshot.state == SnapshotState::Destroyed + { + Ok(snapshot.id()) + } else { + // if the snapshot was not deleted, figure out why + if !ok_to_delete_states.contains(&snapshot.state) { + Err(Error::invalid_request(&format!( + "snapshot cannot be deleted in state {:?}", + snapshot.state, + ))) + } else if snapshot.gen != gen { + Err(Error::invalid_request(&format!( + "snapshot cannot be deleted: mismatched generation {:?} != {:?}", + gen, + snapshot.gen, + ))) + } else { + error!( + opctx.log, + "snapshot exists but cannot be deleted: {:?} (db_snapshot is {:?}", + snapshot, + db_snapshot, + ); - Ok(snapshot_id) + Err(Error::invalid_request( + "snapshot exists but cannot be deleted", + )) + } + } + } + } } } diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs index 71a3977192..a5a8339c34 100644 --- a/nexus/tests/integration_tests/disks.rs +++ b/nexus/tests/integration_tests/disks.rs @@ -1672,6 +1672,84 @@ async fn test_disk_create_for_importing(cptestctx: &ControlPlaneTestContext) { disks_eq(&disks[0], &disk); } +#[nexus_test] +async fn test_project_delete_disk_no_auth_idempotent( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + DiskTest::new(&cptestctx).await; + create_org_and_project(client).await; + + // Create a disk + let disks_url = get_disks_url(); + + let new_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: DISK_NAME.parse().unwrap(), + description: String::from("sells rainsticks"), + }, + disk_source: params::DiskSource::Blank { + block_size: params::BlockSize::try_from(512).unwrap(), + }, + size: ByteCount::from_gibibytes_u32(1), + }; + + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&new_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body::() + .unwrap(); + + let disk_url = get_disk_url(DISK_NAME); + let disk = disk_get(&client, &disk_url).await; + assert_eq!(disk.state, DiskState::Detached); + + // Call project_delete_disk_no_auth twice, ensuring that the disk is either + // there before deleting and not afterwards. + + let nexus = &cptestctx.server.apictx().nexus; + let datastore = nexus.datastore(); + let opctx = + OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); + + let (.., db_disk) = LookupPath::new(&opctx, &datastore) + .disk_id(disk.identity.id) + .fetch() + .await + .unwrap(); + + assert_eq!(db_disk.id(), disk.identity.id); + + datastore + .project_delete_disk_no_auth( + &disk.identity.id, + &[DiskState::Detached, DiskState::Faulted], + ) + .await + .unwrap(); + + let r = LookupPath::new(&opctx, &datastore) + .disk_id(disk.identity.id) + .fetch() + .await; + + assert!(r.is_err()); + + datastore + .project_delete_disk_no_auth( + &disk.identity.id, + &[DiskState::Detached, DiskState::Faulted], + ) + .await + .unwrap(); +} + async fn disk_get(client: &ClientTestContext, disk_url: &str) -> Disk { NexusRequest::object_get(client, disk_url) .authn_as(AuthnMode::PrivilegedUser) diff --git a/nexus/tests/integration_tests/snapshots.rs b/nexus/tests/integration_tests/snapshots.rs index b3cf4bb594..1dd32e6769 100644 --- a/nexus/tests/integration_tests/snapshots.rs +++ b/nexus/tests/integration_tests/snapshots.rs @@ -1041,7 +1041,25 @@ async fn test_create_snapshot_record_idempotent( external::Error::ObjectAlreadyExists { .. }, )); - // Test project_delete_snapshot is idempotent + // Move snapshot from Creating to Ready + + let (.., authz_snapshot, db_snapshot) = LookupPath::new(&opctx, &datastore) + .snapshot_id(snapshot_created_1.id()) + .fetch_for(authz::Action::Modify) + .await + .unwrap(); + + datastore + .project_snapshot_update_state( + &opctx, + &authz_snapshot, + db_snapshot.gen, + db::model::SnapshotState::Ready, + ) + .await + .unwrap(); + + // Grab the new snapshot (so generation number is updated) let (.., authz_snapshot, db_snapshot) = LookupPath::new(&opctx, &datastore) .snapshot_id(snapshot_created_1.id()) @@ -1049,6 +1067,8 @@ async fn test_create_snapshot_record_idempotent( .await .unwrap(); + // Test project_delete_snapshot is idempotent for the same input + datastore .project_delete_snapshot( &opctx, @@ -1064,6 +1084,16 @@ async fn test_create_snapshot_record_idempotent( .await .unwrap(); + { + // Ensure the snapshot is gone + let r = LookupPath::new(&opctx, &datastore) + .snapshot_id(snapshot_created_1.id()) + .fetch_for(authz::Action::Read) + .await; + + assert!(r.is_err()); + } + datastore .project_delete_snapshot( &opctx, From 1cd3314e234533343b63e0d4f9b71a8ce2e4c432 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Sat, 21 Oct 2023 13:14:15 -0700 Subject: [PATCH 05/24] Bump SP versions to 1.0.3 (#4305) --- tools/hubris_checksums | 14 +++++++------- tools/hubris_version | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/hubris_checksums b/tools/hubris_checksums index e2b16d345e..1396af4d60 100644 --- a/tools/hubris_checksums +++ b/tools/hubris_checksums @@ -1,7 +1,7 @@ -204328b941deab8bfd3d6e34af96ef04489672fa3b0d5419b60456f9b052e789 build-gimlet-c-image-default-v1.0.2.zip -0ebaa9d98c3734420a482160a7a19dd09510ea3bdc573a090a97ec47137bd624 build-gimlet-d-image-default-v1.0.2.zip -39ec8fd0c946b744e59d9c1b89914f354c60f54e974398029d1dea9d31681f05 build-gimlet-e-image-default-v1.0.2.zip -fa5dc36a7a70eeb45d4c4b3b314ba54ee820b3d57ffc07defcc3ae07c142329c build-psc-b-image-default-v1.0.2.zip -4a9850100f8b5fcbbdd11d41ccd8d5037059697a9b65c1ba2dba48a6565ba9e7 build-psc-c-image-default-v1.0.2.zip -1bb870d7921c1731ec83dc38b8e3425703ec17efa614d75e92f07a551312f54b build-sidecar-b-image-default-v1.0.2.zip -6aed0e15e0025bb87a22ecea60d75fa71b54b83bea1e213b8cd5bdb02e7ccb2d build-sidecar-c-image-default-v1.0.2.zip +2df01d7dd17423588c99de4361694efdb6bd375e2f54db053320eeead3e07eda build-gimlet-c-image-default-v1.0.3.zip +8ac0eb6d7817825c6318feb8327f5758a33ccd2479512e3e2424f0eb8e290010 build-gimlet-d-image-default-v1.0.3.zip +eeeb72ec81a843fa1f5093096d1e4500aba6ce01c2d21040a2a10a092595d945 build-gimlet-e-image-default-v1.0.3.zip +de0d9028929322f6d5afc4cb52c198b3402c93a38aa15f9d378617ca1d1112c9 build-psc-b-image-default-v1.0.3.zip +11a6235d852bd75548f12d85b0913cb4ccb0aff3c38bf8a92510a2b9c14dad3c build-psc-c-image-default-v1.0.3.zip +3f863d46a462432f19d3fb5a293b8106da6e138de80271f869692bd29abd994b build-sidecar-b-image-default-v1.0.3.zip +2a9feac7f2da61b843d00edf2693c31c118f202c6cd889d1d1758ea1dd95dbca build-sidecar-c-image-default-v1.0.3.zip diff --git a/tools/hubris_version b/tools/hubris_version index 0f1729d791..b00c3286fe 100644 --- a/tools/hubris_version +++ b/tools/hubris_version @@ -1 +1 @@ -TAGS=(gimlet-v1.0.2 psc-v1.0.2 sidecar-v1.0.2) +TAGS=(gimlet-v1.0.3 psc-v1.0.3 sidecar-v1.0.3) From 51b6b160c4913ee77685ab95dbfc50047d5503fe Mon Sep 17 00:00:00 2001 From: Andy Fiddaman Date: Mon, 23 Oct 2023 18:06:41 +0100 Subject: [PATCH 06/24] Combine host and trampoline OS images into one CI job (#4273) There are two OS build jobs that each check out and build many repositories in order to build a host image. By combining them into one job, we only pay the cost of this checkout and build phase once. It doesn't actually save a lot of wall time because the two jobs run in parallel, but it saves starting a VM and about 20 minutes of compute time. --- .github/buildomat/jobs/host-image.sh | 48 ++++++++++++++++--- .github/buildomat/jobs/trampoline-image.sh | 54 ---------------------- .github/buildomat/jobs/tuf-repo.sh | 7 +-- tools/build-host-image.sh | 16 ------- 4 files changed, 44 insertions(+), 81 deletions(-) delete mode 100755 .github/buildomat/jobs/trampoline-image.sh diff --git a/.github/buildomat/jobs/host-image.sh b/.github/buildomat/jobs/host-image.sh index ba0b4e1ac3..2f4d146a48 100755 --- a/.github/buildomat/jobs/host-image.sh +++ b/.github/buildomat/jobs/host-image.sh @@ -1,11 +1,12 @@ #!/bin/bash #: -#: name = "helios / build OS image" +#: name = "helios / build OS images" #: variety = "basic" #: target = "helios-2.0" #: rust_toolchain = "1.72.1" #: output_rules = [ -#: "=/work/helios/image/output/os.tar.gz", +#: "=/work/helios/upload/os-host.tar.gz", +#: "=/work/helios/upload/os-trampoline.tar.gz", #: ] #: access_repos = [ #: "oxidecomputer/amd-apcb", @@ -44,14 +45,49 @@ TOP=$PWD source "$TOP/tools/include/force-git-over-https.sh" -# Checkout helios at a pinned commit into /work/helios -git clone https://github.com/oxidecomputer/helios.git /work/helios -cd /work/helios +# Check out helios into /work/helios +HELIOSDIR=/work/helios +git clone https://github.com/oxidecomputer/helios.git "$HELIOSDIR" +cd "$HELIOSDIR" +# Record the branch and commit in the output +git status --branch --porcelain=2 +# Setting BUILD_OS to no makes setup skip repositories we don't need for +# building the OS itself (we are just building an image from already built OS). +BUILD_OS=no gmake setup + +# Commands that "helios-build" would ask us to run (either explicitly or +# implicitly, to avoid an error). +rc=0 +pfexec pkg install -q /system/zones/brand/omicron1/tools || rc=$? +case $rc in + # `man pkg` notes that exit code 4 means no changes were made because + # there is nothing to do; that's fine. Any other exit code is an error. + 0 | 4) ;; + *) exit $rc ;; +esac + +pfexec zfs create -p "rpool/images/$USER" + # TODO: Consider importing zones here too? cd "$TOP" +OUTPUTDIR="$HELIOSDIR/upload" +mkdir "$OUTPUTDIR" + +banner OS ./tools/build-host-image.sh -B \ -S /input/package/work/zones/switch-asic.tar.gz \ - /work/helios \ + "$HELIOSDIR" \ /input/package/work/global-zone-packages.tar.gz + +mv "$HELIOSDIR/image/output/os.tar.gz" "$OUTPUTDIR/os-host.tar.gz" + +banner Trampoline + +./tools/build-host-image.sh -R \ + "$HELIOSDIR" \ + /input/package/work/trampoline-global-zone-packages.tar.gz + +mv "$HELIOSDIR/image/output/os.tar.gz" "$OUTPUTDIR/os-trampoline.tar.gz" + diff --git a/.github/buildomat/jobs/trampoline-image.sh b/.github/buildomat/jobs/trampoline-image.sh deleted file mode 100755 index 6014d7dca0..0000000000 --- a/.github/buildomat/jobs/trampoline-image.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/bash -#: -#: name = "helios / build trampoline OS image" -#: variety = "basic" -#: target = "helios-2.0" -#: rust_toolchain = "1.72.1" -#: output_rules = [ -#: "=/work/helios/image/output/os.tar.gz", -#: ] -#: access_repos = [ -#: "oxidecomputer/amd-apcb", -#: "oxidecomputer/amd-efs", -#: "oxidecomputer/amd-firmware", -#: "oxidecomputer/amd-flash", -#: "oxidecomputer/amd-host-image-builder", -#: "oxidecomputer/boot-image-tools", -#: "oxidecomputer/chelsio-t6-roms", -#: "oxidecomputer/compliance-pilot", -#: "oxidecomputer/facade", -#: "oxidecomputer/helios", -#: "oxidecomputer/helios-omicron-brand", -#: "oxidecomputer/helios-omnios-build", -#: "oxidecomputer/helios-omnios-extra", -#: "oxidecomputer/nanobl-rs", -#: ] -#: -#: [dependencies.package] -#: job = "helios / package" -#: -#: [[publish]] -#: series = "image" -#: name = "os-trampoline.tar.gz" -#: from_output = "/work/helios/image/output/os.tar.gz" -#: - -set -o errexit -set -o pipefail -set -o xtrace - -cargo --version -rustc --version - -TOP=$PWD - -source "$TOP/tools/include/force-git-over-https.sh" - -# Checkout helios at a pinned commit into /work/helios -git clone https://github.com/oxidecomputer/helios.git /work/helios -cd /work/helios - -cd "$TOP" -./tools/build-host-image.sh -R \ - /work/helios \ - /input/package/work/trampoline-global-zone-packages.tar.gz diff --git a/.github/buildomat/jobs/tuf-repo.sh b/.github/buildomat/jobs/tuf-repo.sh index ea25ab5834..29cf7fa85e 100644 --- a/.github/buildomat/jobs/tuf-repo.sh +++ b/.github/buildomat/jobs/tuf-repo.sh @@ -19,10 +19,7 @@ #: job = "helios / package" #: #: [dependencies.host] -#: job = "helios / build OS image" -#: -#: [dependencies.trampoline] -#: job = "helios / build trampoline OS image" +#: job = "helios / build OS images" #: #: [[publish]] #: series = "rot-all" @@ -139,7 +136,7 @@ name = "$kind" version = "$VERSION" [artifact.$kind.source] kind = "file" -path = "/input/$kind/work/helios/image/output/os.tar.gz" +path = "/input/host/work/helios/upload/os-$kind.tar.gz" EOF done diff --git a/tools/build-host-image.sh b/tools/build-host-image.sh index d492e84b81..c194edb603 100755 --- a/tools/build-host-image.sh +++ b/tools/build-host-image.sh @@ -92,22 +92,6 @@ function main # Move to the helios checkout cd "$HELIOS_PATH" - # Create the "./helios-build" command, which lets us build images - gmake setup - - # Commands that "./helios-build" would ask us to run (either explicitly - # or implicitly, to avoid an error). - rc=0 - pfexec pkg install -q /system/zones/brand/omicron1/tools || rc=$? - case $rc in - # `man pkg` notes that exit code 4 means no changes were made because - # there is nothing to do; that's fine. Any other exit code is an error. - 0 | 4) ;; - *) exit $rc ;; - esac - - pfexec zfs create -p rpool/images/"$USER" - HELIOS_REPO=https://pkg.oxide.computer/helios/2/dev/ # Build an image name that includes the omicron and host OS hashes From 3af87d2e29a9bf1a75f86a2f472cf8a0ec719737 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Mon, 23 Oct 2023 15:08:28 -0500 Subject: [PATCH 07/24] Bump web console (utilization table) (#4318) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit https://github.com/oxidecomputer/console/compare/3538c32a...bd65b9da * [bd65b9da](https://github.com/oxidecomputer/console/commit/bd65b9da) loading state for utilization table, use QueryParamTabs * [8e74accf](https://github.com/oxidecomputer/console/commit/8e74accf) handle empty metrics list on silo utilization * [672cb208](https://github.com/oxidecomputer/console/commit/672cb208) bump API spec (BGP endpoints, not used yet) * [a50777c5](https://github.com/oxidecomputer/console/commit/a50777c5) oxidecomputer/console#1795 * [8978e07a](https://github.com/oxidecomputer/console/commit/8978e07a) bump omicron version (no changes) * [992fb1e1](https://github.com/oxidecomputer/console/commit/992fb1e1) oxidecomputer/console#1797 * [e5b8d029](https://github.com/oxidecomputer/console/commit/e5b8d029) remove unnecessary @types/testing-library__jest-dom dep * [e9a78617](https://github.com/oxidecomputer/console/commit/e9a78617) tanstack query 5 went stable --- tools/console_version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/console_version b/tools/console_version index 218aef576d..7e8d352efd 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="3538c32a5189bd22df8f6a573399dacfbe81efaa" -SHA2="3289989f2cd6c71ea800e73231190455cc8e4e45ae9304293050b925a9fd9423" +COMMIT="bd65b9da7019ad812dd056e7fc182df2cf4ec128" +SHA2="e4d4f33996a6e89b972fac61737acb7f1dbd21943d1f6bef776d4ee9bcccd2b0" From 35034f75d81821bf7f7901485143ff5f72608e9f Mon Sep 17 00:00:00 2001 From: Luqman Aden Date: Mon, 23 Oct 2023 18:58:58 -0700 Subject: [PATCH 08/24] Re-introduce support for setting a VLAN for an uplink from RSS (#4319) Going from the previous `UplinkConfig` format to the `PortConfigV1` introduced with the BGP work seems to have lost the ability to configure the VLAN ID for reaching a gateway. The actual APIs and functionality is still there so just needed to be wired back up. --- common/src/api/internal/shared.rs | 3 + nexus/src/app/rack.rs | 1 + .../app/sagas/switch_port_settings_apply.rs | 6 +- nexus/tests/integration_tests/switch_port.rs | 31 ++++++--- openapi/bootstrap-agent.json | 7 +++ openapi/nexus-internal.json | 7 +++ openapi/sled-agent.json | 7 +++ openapi/wicketd.json | 7 +++ schema/rss-sled-plan.json | 9 +++ sled-agent/src/bootstrap/early_networking.rs | 63 ++++++++++++++++++- sled-agent/src/rack_setup/service.rs | 1 + wicket/src/rack_setup/config_template.toml | 3 + wicket/src/rack_setup/config_toml.rs | 23 +++++-- wicket/src/ui/panes/rack_setup.rs | 7 ++- wicketd/src/rss_config.rs | 1 + 15 files changed, 161 insertions(+), 15 deletions(-) diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index 784da8fcc6..971dbbabbf 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -111,6 +111,8 @@ pub struct RouteConfig { pub destination: IpNetwork, /// The nexthop/gateway address. pub nexthop: IpAddr, + /// The VLAN ID the gateway is reachable over. + pub vid: Option, } #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] @@ -137,6 +139,7 @@ impl From for PortConfigV1 { routes: vec![RouteConfig { destination: "0.0.0.0/0".parse().unwrap(), nexthop: value.gateway_ip.into(), + vid: value.uplink_vid, }], addresses: vec![value.uplink_cidr.into()], switch: value.switch, diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 907c3ffa78..21918d2687 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -663,6 +663,7 @@ impl super::Nexus { .map(|r| SledRouteConfig { destination: r.dst, nexthop: r.gw.ip(), + vid: r.vid.map(Into::into), }) .collect(), addresses: info.addresses.iter().map(|a| a.address).collect(), diff --git a/nexus/src/app/sagas/switch_port_settings_apply.rs b/nexus/src/app/sagas/switch_port_settings_apply.rs index 93dc45751a..8442080979 100644 --- a/nexus/src/app/sagas/switch_port_settings_apply.rs +++ b/nexus/src/app/sagas/switch_port_settings_apply.rs @@ -911,7 +911,11 @@ pub(crate) async fn bootstore_update( routes: settings .routes .iter() - .map(|r| RouteConfig { destination: r.dst, nexthop: r.gw.ip() }) + .map(|r| RouteConfig { + destination: r.dst, + nexthop: r.gw.ip(), + vid: r.vid.map(Into::into), + }) .collect(), addresses: settings.addresses.iter().map(|a| a.address).collect(), switch: switch_location, diff --git a/nexus/tests/integration_tests/switch_port.rs b/nexus/tests/integration_tests/switch_port.rs index fada45694d..d4fd10f819 100644 --- a/nexus/tests/integration_tests/switch_port.rs +++ b/nexus/tests/integration_tests/switch_port.rs @@ -128,11 +128,18 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { settings.routes.insert( "phy0".into(), RouteConfig { - routes: vec![Route { - dst: "1.2.3.0/24".parse().unwrap(), - gw: "1.2.3.4".parse().unwrap(), - vid: None, - }], + routes: vec![ + Route { + dst: "1.2.3.0/24".parse().unwrap(), + gw: "1.2.3.4".parse().unwrap(), + vid: None, + }, + Route { + dst: "5.6.7.0/24".parse().unwrap(), + gw: "5.6.7.8".parse().unwrap(), + vid: Some(5), + }, + ], }, ); // addresses @@ -159,7 +166,7 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { .unwrap(); assert_eq!(created.links.len(), 1); - assert_eq!(created.routes.len(), 1); + assert_eq!(created.routes.len(), 2); assert_eq!(created.addresses.len(), 1); let link0 = &created.links[0]; @@ -178,6 +185,11 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { let route0 = &created.routes[0]; assert_eq!(route0.dst, "1.2.3.0/24".parse().unwrap()); assert_eq!(route0.gw, "1.2.3.4".parse().unwrap()); + assert_eq!(route0.vlan_id, None); + let route1 = &created.routes[1]; + assert_eq!(route1.dst, "5.6.7.0/24".parse().unwrap()); + assert_eq!(route1.gw, "5.6.7.8".parse().unwrap()); + assert_eq!(route1.vlan_id, Some(5)); let addr0 = &created.addresses[0]; assert_eq!(addr0.address, "203.0.113.10/24".parse().unwrap()); @@ -195,7 +207,7 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { .unwrap(); assert_eq!(roundtrip.links.len(), 1); - assert_eq!(roundtrip.routes.len(), 1); + assert_eq!(roundtrip.routes.len(), 2); assert_eq!(roundtrip.addresses.len(), 1); let link0 = &roundtrip.links[0]; @@ -214,6 +226,11 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { let route0 = &roundtrip.routes[0]; assert_eq!(route0.dst, "1.2.3.0/24".parse().unwrap()); assert_eq!(route0.gw, "1.2.3.4".parse().unwrap()); + assert_eq!(route0.vlan_id, None); + let route1 = &roundtrip.routes[1]; + assert_eq!(route1.dst, "5.6.7.0/24".parse().unwrap()); + assert_eq!(route1.gw, "5.6.7.8".parse().unwrap()); + assert_eq!(route1.vlan_id, Some(5)); let addr0 = &roundtrip.addresses[0]; assert_eq!(addr0.address, "203.0.113.10/24".parse().unwrap()); diff --git a/openapi/bootstrap-agent.json b/openapi/bootstrap-agent.json index 6dcf756737..0a954fff0d 100644 --- a/openapi/bootstrap-agent.json +++ b/openapi/bootstrap-agent.json @@ -908,6 +908,13 @@ "description": "The nexthop/gateway address.", "type": "string", "format": "ip" + }, + "vid": { + "nullable": true, + "description": "The VLAN ID the gateway is reachable over.", + "type": "integer", + "format": "uint16", + "minimum": 0 } }, "required": [ diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 411c52ddff..1a3be03de1 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -4507,6 +4507,13 @@ "description": "The nexthop/gateway address.", "type": "string", "format": "ip" + }, + "vid": { + "nullable": true, + "description": "The VLAN ID the gateway is reachable over.", + "type": "integer", + "format": "uint16", + "minimum": 0 } }, "required": [ diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 486662853c..ec070a3a0b 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -2669,6 +2669,13 @@ "description": "The nexthop/gateway address.", "type": "string", "format": "ip" + }, + "vid": { + "nullable": true, + "description": "The VLAN ID the gateway is reachable over.", + "type": "integer", + "format": "uint16", + "minimum": 0 } }, "required": [ diff --git a/openapi/wicketd.json b/openapi/wicketd.json index 75db82e8e1..9fd05a9ca7 100644 --- a/openapi/wicketd.json +++ b/openapi/wicketd.json @@ -2493,6 +2493,13 @@ "description": "The nexthop/gateway address.", "type": "string", "format": "ip" + }, + "vid": { + "nullable": true, + "description": "The VLAN ID the gateway is reachable over.", + "type": "integer", + "format": "uint16", + "minimum": 0 } }, "required": [ diff --git a/schema/rss-sled-plan.json b/schema/rss-sled-plan.json index 39a9a68acc..7ba74ffc0a 100644 --- a/schema/rss-sled-plan.json +++ b/schema/rss-sled-plan.json @@ -581,6 +581,15 @@ "description": "The nexthop/gateway address.", "type": "string", "format": "ip" + }, + "vid": { + "description": "The VLAN ID the gateway is reachable over.", + "type": [ + "integer", + "null" + ], + "format": "uint16", + "minimum": 0.0 } } }, diff --git a/sled-agent/src/bootstrap/early_networking.rs b/sled-agent/src/bootstrap/early_networking.rs index 6c19080e9c..c539f6fdfd 100644 --- a/sled-agent/src/bootstrap/early_networking.rs +++ b/sled-agent/src/bootstrap/early_networking.rs @@ -509,7 +509,7 @@ impl<'a> EarlyNetworkSetup<'a> { { dpd_port_settings.v4_routes.insert( dst.to_string(), - RouteSettingsV4 { link_id: link_id.0, nexthop, vid: None }, + RouteSettingsV4 { link_id: link_id.0, nexthop, vid: r.vid }, ); } if let (IpNetwork::V6(dst), IpAddr::V6(nexthop)) = @@ -517,7 +517,7 @@ impl<'a> EarlyNetworkSetup<'a> { { dpd_port_settings.v6_routes.insert( dst.to_string(), - RouteSettingsV6 { link_id: link_id.0, nexthop, vid: None }, + RouteSettingsV6 { link_id: link_id.0, nexthop, vid: r.vid }, ); } } @@ -785,6 +785,65 @@ mod tests { routes: vec![RouteConfig { destination: "0.0.0.0/0".parse().unwrap(), nexthop: uplink.gateway_ip.into(), + vid: None, + }], + addresses: vec![uplink.uplink_cidr.into()], + switch: uplink.switch, + port: uplink.uplink_port, + uplink_port_speed: uplink.uplink_port_speed, + uplink_port_fec: uplink.uplink_port_fec, + bgp_peers: vec![], + }], + bgp: vec![], + }), + }, + }; + + assert_eq!(expected, v1); + } + + #[test] + fn serialized_early_network_config_v0_to_v1_conversion_with_vid() { + let v0 = EarlyNetworkConfigV0 { + generation: 1, + rack_subnet: Ipv6Addr::UNSPECIFIED, + ntp_servers: Vec::new(), + rack_network_config: Some(RackNetworkConfigV0 { + infra_ip_first: Ipv4Addr::UNSPECIFIED, + infra_ip_last: Ipv4Addr::UNSPECIFIED, + uplinks: vec![UplinkConfig { + gateway_ip: Ipv4Addr::UNSPECIFIED, + switch: SwitchLocation::Switch0, + uplink_port: "Port0".to_string(), + uplink_port_speed: PortSpeed::Speed100G, + uplink_port_fec: PortFec::None, + uplink_cidr: "192.168.0.1/16".parse().unwrap(), + uplink_vid: Some(10), + }], + }), + }; + + let v0_serialized = serde_json::to_vec(&v0).unwrap(); + let bootstore_conf = + bootstore::NetworkConfig { generation: 1, blob: v0_serialized }; + + let v1 = EarlyNetworkConfig::try_from(bootstore_conf).unwrap(); + let v0_rack_network_config = v0.rack_network_config.unwrap(); + let uplink = v0_rack_network_config.uplinks[0].clone(); + let expected = EarlyNetworkConfig { + generation: 1, + schema_version: 1, + body: EarlyNetworkConfigBody { + ntp_servers: v0.ntp_servers.clone(), + rack_network_config: Some(RackNetworkConfigV1 { + rack_subnet: Ipv6Network::new(v0.rack_subnet, 56).unwrap(), + infra_ip_first: v0_rack_network_config.infra_ip_first, + infra_ip_last: v0_rack_network_config.infra_ip_last, + ports: vec![PortConfigV1 { + routes: vec![RouteConfig { + destination: "0.0.0.0/0".parse().unwrap(), + nexthop: uplink.gateway_ip.into(), + vid: Some(10), }], addresses: vec![uplink.uplink_cidr.into()], switch: uplink.switch, diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 7f6469d2c0..b54a8f0ba0 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -591,6 +591,7 @@ impl ServiceInner { .map(|r| NexusTypes::RouteConfig { destination: r.destination, nexthop: r.nexthop, + vid: r.vid, }) .collect(), addresses: config.addresses.clone(), diff --git a/wicket/src/rack_setup/config_template.toml b/wicket/src/rack_setup/config_template.toml index 617b61fadc..da07f42cd4 100644 --- a/wicket/src/rack_setup/config_template.toml +++ b/wicket/src/rack_setup/config_template.toml @@ -47,6 +47,9 @@ infra_ip_last = "" [[rack_network_config.ports]] # Routes associated with this port. # { nexthop = "1.2.3.4", destination = "0.0.0.0/0" } +# Can also optionally specify a VLAN id if the next hop is reachable +# over an 802.1Q tagged L2 segment. +# { nexthop = "5.6.7.8", destination = "5.6.7.0/24", vid = 5 } routes = [] # Addresses associated with this port. diff --git a/wicket/src/rack_setup/config_toml.rs b/wicket/src/rack_setup/config_toml.rs index e087c9aa7c..9616939308 100644 --- a/wicket/src/rack_setup/config_toml.rs +++ b/wicket/src/rack_setup/config_toml.rs @@ -245,6 +245,12 @@ fn populate_network_table( r.destination.to_string(), )), ); + if let Some(vid) = r.vid { + route.insert( + "vid", + Value::Integer(Formatted::new(vid.into())), + ); + } routes.push(Value::InlineTable(route)); } uplink.insert("routes", Item::Value(Value::Array(routes))); @@ -379,6 +385,7 @@ mod tests { .map(|r| InternalRouteConfig { destination: r.destination, nexthop: r.nexthop, + vid: r.vid, }) .collect(), addresses: config.addresses.clone(), @@ -478,10 +485,18 @@ mod tests { infra_ip_last: "172.30.0.10".parse().unwrap(), ports: vec![PortConfigV1 { addresses: vec!["172.30.0.1/24".parse().unwrap()], - routes: vec![RouteConfig { - destination: "0.0.0.0/0".parse().unwrap(), - nexthop: "172.30.0.10".parse().unwrap(), - }], + routes: vec![ + RouteConfig { + destination: "0.0.0.0/0".parse().unwrap(), + nexthop: "172.30.0.10".parse().unwrap(), + vid: None, + }, + RouteConfig { + destination: "10.20.0.0/16".parse().unwrap(), + nexthop: "10.0.0.20".parse().unwrap(), + vid: Some(20), + }, + ], bgp_peers: vec![BgpPeerConfig { asn: 47, addr: "10.2.3.4".parse().unwrap(), diff --git a/wicket/src/ui/panes/rack_setup.rs b/wicket/src/ui/panes/rack_setup.rs index 086d01ce9d..36febe404f 100644 --- a/wicket/src/ui/panes/rack_setup.rs +++ b/wicket/src/ui/panes/rack_setup.rs @@ -718,7 +718,12 @@ fn rss_config_text<'a>( vec![ Span::styled(" • Route : ", label_style), Span::styled( - format!("{} -> {}", r.destination, r.nexthop), + format!( + "{} -> {} (vid={})", + r.destination, + r.nexthop, + r.vid.unwrap_or(0), + ), ok_style, ), ] diff --git a/wicketd/src/rss_config.rs b/wicketd/src/rss_config.rs index a96acc56a0..d446ed71d1 100644 --- a/wicketd/src/rss_config.rs +++ b/wicketd/src/rss_config.rs @@ -511,6 +511,7 @@ fn validate_rack_network_config( .map(|r| BaRouteConfig { destination: r.destination, nexthop: r.nexthop, + vid: r.vid, }) .collect(), addresses: config.addresses.clone(), From 4555620332d0c25f34141dbf069803834470963c Mon Sep 17 00:00:00 2001 From: Luqman Aden Date: Mon, 23 Oct 2023 21:57:54 -0700 Subject: [PATCH 09/24] Revert "Re-introduce support for setting a VLAN for an uplink from RSS (#4319)" (#4323) This reverts commit 35034f75d81821bf7f7901485143ff5f72608e9f. https://github.com/oxidecomputer/omicron/pull/4319#issuecomment-1776439950: Note that setting the VLAN id on routes is going away entirely. The implementation was too costly on the Tofino, and we had to remove it for another feature as it did not wind up getting used. --- common/src/api/internal/shared.rs | 3 - nexus/src/app/rack.rs | 1 - .../app/sagas/switch_port_settings_apply.rs | 6 +- nexus/tests/integration_tests/switch_port.rs | 31 +++------ openapi/bootstrap-agent.json | 7 --- openapi/nexus-internal.json | 7 --- openapi/sled-agent.json | 7 --- openapi/wicketd.json | 7 --- schema/rss-sled-plan.json | 9 --- sled-agent/src/bootstrap/early_networking.rs | 63 +------------------ sled-agent/src/rack_setup/service.rs | 1 - wicket/src/rack_setup/config_template.toml | 3 - wicket/src/rack_setup/config_toml.rs | 23 ++----- wicket/src/ui/panes/rack_setup.rs | 7 +-- wicketd/src/rss_config.rs | 1 - 15 files changed, 15 insertions(+), 161 deletions(-) diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index 971dbbabbf..784da8fcc6 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -111,8 +111,6 @@ pub struct RouteConfig { pub destination: IpNetwork, /// The nexthop/gateway address. pub nexthop: IpAddr, - /// The VLAN ID the gateway is reachable over. - pub vid: Option, } #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] @@ -139,7 +137,6 @@ impl From for PortConfigV1 { routes: vec![RouteConfig { destination: "0.0.0.0/0".parse().unwrap(), nexthop: value.gateway_ip.into(), - vid: value.uplink_vid, }], addresses: vec![value.uplink_cidr.into()], switch: value.switch, diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 21918d2687..907c3ffa78 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -663,7 +663,6 @@ impl super::Nexus { .map(|r| SledRouteConfig { destination: r.dst, nexthop: r.gw.ip(), - vid: r.vid.map(Into::into), }) .collect(), addresses: info.addresses.iter().map(|a| a.address).collect(), diff --git a/nexus/src/app/sagas/switch_port_settings_apply.rs b/nexus/src/app/sagas/switch_port_settings_apply.rs index 8442080979..93dc45751a 100644 --- a/nexus/src/app/sagas/switch_port_settings_apply.rs +++ b/nexus/src/app/sagas/switch_port_settings_apply.rs @@ -911,11 +911,7 @@ pub(crate) async fn bootstore_update( routes: settings .routes .iter() - .map(|r| RouteConfig { - destination: r.dst, - nexthop: r.gw.ip(), - vid: r.vid.map(Into::into), - }) + .map(|r| RouteConfig { destination: r.dst, nexthop: r.gw.ip() }) .collect(), addresses: settings.addresses.iter().map(|a| a.address).collect(), switch: switch_location, diff --git a/nexus/tests/integration_tests/switch_port.rs b/nexus/tests/integration_tests/switch_port.rs index d4fd10f819..fada45694d 100644 --- a/nexus/tests/integration_tests/switch_port.rs +++ b/nexus/tests/integration_tests/switch_port.rs @@ -128,18 +128,11 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { settings.routes.insert( "phy0".into(), RouteConfig { - routes: vec![ - Route { - dst: "1.2.3.0/24".parse().unwrap(), - gw: "1.2.3.4".parse().unwrap(), - vid: None, - }, - Route { - dst: "5.6.7.0/24".parse().unwrap(), - gw: "5.6.7.8".parse().unwrap(), - vid: Some(5), - }, - ], + routes: vec![Route { + dst: "1.2.3.0/24".parse().unwrap(), + gw: "1.2.3.4".parse().unwrap(), + vid: None, + }], }, ); // addresses @@ -166,7 +159,7 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { .unwrap(); assert_eq!(created.links.len(), 1); - assert_eq!(created.routes.len(), 2); + assert_eq!(created.routes.len(), 1); assert_eq!(created.addresses.len(), 1); let link0 = &created.links[0]; @@ -185,11 +178,6 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { let route0 = &created.routes[0]; assert_eq!(route0.dst, "1.2.3.0/24".parse().unwrap()); assert_eq!(route0.gw, "1.2.3.4".parse().unwrap()); - assert_eq!(route0.vlan_id, None); - let route1 = &created.routes[1]; - assert_eq!(route1.dst, "5.6.7.0/24".parse().unwrap()); - assert_eq!(route1.gw, "5.6.7.8".parse().unwrap()); - assert_eq!(route1.vlan_id, Some(5)); let addr0 = &created.addresses[0]; assert_eq!(addr0.address, "203.0.113.10/24".parse().unwrap()); @@ -207,7 +195,7 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { .unwrap(); assert_eq!(roundtrip.links.len(), 1); - assert_eq!(roundtrip.routes.len(), 2); + assert_eq!(roundtrip.routes.len(), 1); assert_eq!(roundtrip.addresses.len(), 1); let link0 = &roundtrip.links[0]; @@ -226,11 +214,6 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { let route0 = &roundtrip.routes[0]; assert_eq!(route0.dst, "1.2.3.0/24".parse().unwrap()); assert_eq!(route0.gw, "1.2.3.4".parse().unwrap()); - assert_eq!(route0.vlan_id, None); - let route1 = &roundtrip.routes[1]; - assert_eq!(route1.dst, "5.6.7.0/24".parse().unwrap()); - assert_eq!(route1.gw, "5.6.7.8".parse().unwrap()); - assert_eq!(route1.vlan_id, Some(5)); let addr0 = &roundtrip.addresses[0]; assert_eq!(addr0.address, "203.0.113.10/24".parse().unwrap()); diff --git a/openapi/bootstrap-agent.json b/openapi/bootstrap-agent.json index 0a954fff0d..6dcf756737 100644 --- a/openapi/bootstrap-agent.json +++ b/openapi/bootstrap-agent.json @@ -908,13 +908,6 @@ "description": "The nexthop/gateway address.", "type": "string", "format": "ip" - }, - "vid": { - "nullable": true, - "description": "The VLAN ID the gateway is reachable over.", - "type": "integer", - "format": "uint16", - "minimum": 0 } }, "required": [ diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 1a3be03de1..411c52ddff 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -4507,13 +4507,6 @@ "description": "The nexthop/gateway address.", "type": "string", "format": "ip" - }, - "vid": { - "nullable": true, - "description": "The VLAN ID the gateway is reachable over.", - "type": "integer", - "format": "uint16", - "minimum": 0 } }, "required": [ diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index ec070a3a0b..486662853c 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -2669,13 +2669,6 @@ "description": "The nexthop/gateway address.", "type": "string", "format": "ip" - }, - "vid": { - "nullable": true, - "description": "The VLAN ID the gateway is reachable over.", - "type": "integer", - "format": "uint16", - "minimum": 0 } }, "required": [ diff --git a/openapi/wicketd.json b/openapi/wicketd.json index 9fd05a9ca7..75db82e8e1 100644 --- a/openapi/wicketd.json +++ b/openapi/wicketd.json @@ -2493,13 +2493,6 @@ "description": "The nexthop/gateway address.", "type": "string", "format": "ip" - }, - "vid": { - "nullable": true, - "description": "The VLAN ID the gateway is reachable over.", - "type": "integer", - "format": "uint16", - "minimum": 0 } }, "required": [ diff --git a/schema/rss-sled-plan.json b/schema/rss-sled-plan.json index 7ba74ffc0a..39a9a68acc 100644 --- a/schema/rss-sled-plan.json +++ b/schema/rss-sled-plan.json @@ -581,15 +581,6 @@ "description": "The nexthop/gateway address.", "type": "string", "format": "ip" - }, - "vid": { - "description": "The VLAN ID the gateway is reachable over.", - "type": [ - "integer", - "null" - ], - "format": "uint16", - "minimum": 0.0 } } }, diff --git a/sled-agent/src/bootstrap/early_networking.rs b/sled-agent/src/bootstrap/early_networking.rs index c539f6fdfd..6c19080e9c 100644 --- a/sled-agent/src/bootstrap/early_networking.rs +++ b/sled-agent/src/bootstrap/early_networking.rs @@ -509,7 +509,7 @@ impl<'a> EarlyNetworkSetup<'a> { { dpd_port_settings.v4_routes.insert( dst.to_string(), - RouteSettingsV4 { link_id: link_id.0, nexthop, vid: r.vid }, + RouteSettingsV4 { link_id: link_id.0, nexthop, vid: None }, ); } if let (IpNetwork::V6(dst), IpAddr::V6(nexthop)) = @@ -517,7 +517,7 @@ impl<'a> EarlyNetworkSetup<'a> { { dpd_port_settings.v6_routes.insert( dst.to_string(), - RouteSettingsV6 { link_id: link_id.0, nexthop, vid: r.vid }, + RouteSettingsV6 { link_id: link_id.0, nexthop, vid: None }, ); } } @@ -785,65 +785,6 @@ mod tests { routes: vec![RouteConfig { destination: "0.0.0.0/0".parse().unwrap(), nexthop: uplink.gateway_ip.into(), - vid: None, - }], - addresses: vec![uplink.uplink_cidr.into()], - switch: uplink.switch, - port: uplink.uplink_port, - uplink_port_speed: uplink.uplink_port_speed, - uplink_port_fec: uplink.uplink_port_fec, - bgp_peers: vec![], - }], - bgp: vec![], - }), - }, - }; - - assert_eq!(expected, v1); - } - - #[test] - fn serialized_early_network_config_v0_to_v1_conversion_with_vid() { - let v0 = EarlyNetworkConfigV0 { - generation: 1, - rack_subnet: Ipv6Addr::UNSPECIFIED, - ntp_servers: Vec::new(), - rack_network_config: Some(RackNetworkConfigV0 { - infra_ip_first: Ipv4Addr::UNSPECIFIED, - infra_ip_last: Ipv4Addr::UNSPECIFIED, - uplinks: vec![UplinkConfig { - gateway_ip: Ipv4Addr::UNSPECIFIED, - switch: SwitchLocation::Switch0, - uplink_port: "Port0".to_string(), - uplink_port_speed: PortSpeed::Speed100G, - uplink_port_fec: PortFec::None, - uplink_cidr: "192.168.0.1/16".parse().unwrap(), - uplink_vid: Some(10), - }], - }), - }; - - let v0_serialized = serde_json::to_vec(&v0).unwrap(); - let bootstore_conf = - bootstore::NetworkConfig { generation: 1, blob: v0_serialized }; - - let v1 = EarlyNetworkConfig::try_from(bootstore_conf).unwrap(); - let v0_rack_network_config = v0.rack_network_config.unwrap(); - let uplink = v0_rack_network_config.uplinks[0].clone(); - let expected = EarlyNetworkConfig { - generation: 1, - schema_version: 1, - body: EarlyNetworkConfigBody { - ntp_servers: v0.ntp_servers.clone(), - rack_network_config: Some(RackNetworkConfigV1 { - rack_subnet: Ipv6Network::new(v0.rack_subnet, 56).unwrap(), - infra_ip_first: v0_rack_network_config.infra_ip_first, - infra_ip_last: v0_rack_network_config.infra_ip_last, - ports: vec![PortConfigV1 { - routes: vec![RouteConfig { - destination: "0.0.0.0/0".parse().unwrap(), - nexthop: uplink.gateway_ip.into(), - vid: Some(10), }], addresses: vec![uplink.uplink_cidr.into()], switch: uplink.switch, diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index b54a8f0ba0..7f6469d2c0 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -591,7 +591,6 @@ impl ServiceInner { .map(|r| NexusTypes::RouteConfig { destination: r.destination, nexthop: r.nexthop, - vid: r.vid, }) .collect(), addresses: config.addresses.clone(), diff --git a/wicket/src/rack_setup/config_template.toml b/wicket/src/rack_setup/config_template.toml index da07f42cd4..617b61fadc 100644 --- a/wicket/src/rack_setup/config_template.toml +++ b/wicket/src/rack_setup/config_template.toml @@ -47,9 +47,6 @@ infra_ip_last = "" [[rack_network_config.ports]] # Routes associated with this port. # { nexthop = "1.2.3.4", destination = "0.0.0.0/0" } -# Can also optionally specify a VLAN id if the next hop is reachable -# over an 802.1Q tagged L2 segment. -# { nexthop = "5.6.7.8", destination = "5.6.7.0/24", vid = 5 } routes = [] # Addresses associated with this port. diff --git a/wicket/src/rack_setup/config_toml.rs b/wicket/src/rack_setup/config_toml.rs index 9616939308..e087c9aa7c 100644 --- a/wicket/src/rack_setup/config_toml.rs +++ b/wicket/src/rack_setup/config_toml.rs @@ -245,12 +245,6 @@ fn populate_network_table( r.destination.to_string(), )), ); - if let Some(vid) = r.vid { - route.insert( - "vid", - Value::Integer(Formatted::new(vid.into())), - ); - } routes.push(Value::InlineTable(route)); } uplink.insert("routes", Item::Value(Value::Array(routes))); @@ -385,7 +379,6 @@ mod tests { .map(|r| InternalRouteConfig { destination: r.destination, nexthop: r.nexthop, - vid: r.vid, }) .collect(), addresses: config.addresses.clone(), @@ -485,18 +478,10 @@ mod tests { infra_ip_last: "172.30.0.10".parse().unwrap(), ports: vec![PortConfigV1 { addresses: vec!["172.30.0.1/24".parse().unwrap()], - routes: vec![ - RouteConfig { - destination: "0.0.0.0/0".parse().unwrap(), - nexthop: "172.30.0.10".parse().unwrap(), - vid: None, - }, - RouteConfig { - destination: "10.20.0.0/16".parse().unwrap(), - nexthop: "10.0.0.20".parse().unwrap(), - vid: Some(20), - }, - ], + routes: vec![RouteConfig { + destination: "0.0.0.0/0".parse().unwrap(), + nexthop: "172.30.0.10".parse().unwrap(), + }], bgp_peers: vec![BgpPeerConfig { asn: 47, addr: "10.2.3.4".parse().unwrap(), diff --git a/wicket/src/ui/panes/rack_setup.rs b/wicket/src/ui/panes/rack_setup.rs index 36febe404f..086d01ce9d 100644 --- a/wicket/src/ui/panes/rack_setup.rs +++ b/wicket/src/ui/panes/rack_setup.rs @@ -718,12 +718,7 @@ fn rss_config_text<'a>( vec![ Span::styled(" • Route : ", label_style), Span::styled( - format!( - "{} -> {} (vid={})", - r.destination, - r.nexthop, - r.vid.unwrap_or(0), - ), + format!("{} -> {}", r.destination, r.nexthop), ok_style, ), ] diff --git a/wicketd/src/rss_config.rs b/wicketd/src/rss_config.rs index d446ed71d1..a96acc56a0 100644 --- a/wicketd/src/rss_config.rs +++ b/wicketd/src/rss_config.rs @@ -511,7 +511,6 @@ fn validate_rack_network_config( .map(|r| BaRouteConfig { destination: r.destination, nexthop: r.nexthop, - vid: r.vid, }) .collect(), addresses: config.addresses.clone(), From 0f51441b8fc5264ee6ab7f9118722b18e4a1198f Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Mon, 23 Oct 2023 22:30:07 -0700 Subject: [PATCH 10/24] bump version to 1.0.3 in package.sh (#4324) dogfood wants a 1.0.3 Co-authored-by: Alan Hanson --- .github/buildomat/jobs/package.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/buildomat/jobs/package.sh b/.github/buildomat/jobs/package.sh index c1cb04124d..5cfb6fcfd7 100755 --- a/.github/buildomat/jobs/package.sh +++ b/.github/buildomat/jobs/package.sh @@ -37,7 +37,7 @@ rustc --version # trampoline global zone images. # COMMIT=$(git rev-parse HEAD) -VERSION="1.0.2-0.ci+git${COMMIT:0:11}" +VERSION="1.0.3-0.ci+git${COMMIT:0:11}" echo "$VERSION" >/work/version.txt ptime -m ./tools/install_builder_prerequisites.sh -yp From 40d0981d60afc389f109e0a6cc6eddb8e5c76aa3 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 24 Oct 2023 10:21:52 -0700 Subject: [PATCH 11/24] chore(deps): update rust crate semver to 1.0.20 (#4242) --- Cargo.lock | 30 +++++++++++++++--------------- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7dbaa3e008..84d908140a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -866,7 +866,7 @@ checksum = "fb9ac64500cc83ce4b9f8dafa78186aa008c8dea77a09b94cd307fd0cd5022a8" dependencies = [ "camino", "cargo-platform", - "semver 1.0.18", + "semver 1.0.20", "serde", "serde_json", "thiserror", @@ -4388,7 +4388,7 @@ dependencies = [ "rand 0.8.5", "ref-cast", "schemars", - "semver 1.0.18", + "semver 1.0.20", "serde", "serde_json", "sled-agent-client", @@ -4911,7 +4911,7 @@ dependencies = [ "reqwest", "ring", "schemars", - "semver 1.0.18", + "semver 1.0.20", "serde", "serde_derive", "serde_human_bytes", @@ -4953,7 +4953,7 @@ dependencies = [ "reqwest", "ring", "schemars", - "semver 1.0.18", + "semver 1.0.20", "serde", "serde_derive", "serde_human_bytes", @@ -5149,7 +5149,7 @@ dependencies = [ "rustls", "samael", "schemars", - "semver 1.0.18", + "semver 1.0.20", "serde", "serde_json", "serde_urlencoded", @@ -5237,7 +5237,7 @@ dependencies = [ "rayon", "reqwest", "ring", - "semver 1.0.18", + "semver 1.0.20", "serde", "serde_derive", "sled-hardware", @@ -5350,7 +5350,7 @@ dependencies = [ "rcgen", "reqwest", "schemars", - "semver 1.0.18", + "semver 1.0.20", "serde", "serde_json", "serial_test", @@ -5483,7 +5483,7 @@ dependencies = [ "ring", "rustix 0.38.9", "schemars", - "semver 1.0.18", + "semver 1.0.20", "serde", "serde_json", "sha2", @@ -5529,7 +5529,7 @@ dependencies = [ "flate2", "futures-util", "reqwest", - "semver 1.0.18", + "semver 1.0.20", "serde", "serde_derive", "tar", @@ -7466,7 +7466,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" dependencies = [ - "semver 1.0.18", + "semver 1.0.20", ] [[package]] @@ -7750,9 +7750,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.18" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" +checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090" dependencies = [ "serde", ] @@ -9505,8 +9505,8 @@ version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" dependencies = [ - "cfg-if 0.1.10", - "rand 0.4.6", + "cfg-if 1.0.0", + "rand 0.8.5", "static_assertions", ] @@ -10047,7 +10047,7 @@ dependencies = [ "ratatui", "reqwest", "rpassword", - "semver 1.0.18", + "semver 1.0.20", "serde", "serde_json", "shell-words", diff --git a/Cargo.toml b/Cargo.toml index 58d57d15b8..d006d9524a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -305,7 +305,7 @@ rustls = "0.21.7" samael = { git = "https://github.com/njaremko/samael", features = ["xmlsec"], branch = "master" } schemars = "0.8.12" secrecy = "0.8.0" -semver = { version = "1.0.18", features = ["std", "serde"] } +semver = { version = "1.0.20", features = ["std", "serde"] } serde = { version = "1.0", default-features = false, features = [ "derive" ] } serde_derive = "1.0" serde_human_bytes = { git = "http://github.com/oxidecomputer/serde_human_bytes", branch = "main" } diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 45c6419d2a..1c3a0b0c73 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -77,7 +77,7 @@ regex-syntax = { version = "0.7.5" } reqwest = { version = "0.11.20", features = ["blocking", "json", "rustls-tls", "stream"] } ring = { version = "0.16.20", features = ["std"] } schemars = { version = "0.8.13", features = ["bytes", "chrono", "uuid1"] } -semver = { version = "1.0.18", features = ["serde"] } +semver = { version = "1.0.20", features = ["serde"] } serde = { version = "1.0.188", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.107", features = ["raw_value"] } sha2 = { version = "0.10.7", features = ["oid"] } @@ -170,7 +170,7 @@ regex-syntax = { version = "0.7.5" } reqwest = { version = "0.11.20", features = ["blocking", "json", "rustls-tls", "stream"] } ring = { version = "0.16.20", features = ["std"] } schemars = { version = "0.8.13", features = ["bytes", "chrono", "uuid1"] } -semver = { version = "1.0.18", features = ["serde"] } +semver = { version = "1.0.20", features = ["serde"] } serde = { version = "1.0.188", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.107", features = ["raw_value"] } sha2 = { version = "0.10.7", features = ["oid"] } From 672883e3282aceb91166d9497d6301b20e2c1719 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 24 Oct 2023 10:22:18 -0700 Subject: [PATCH 12/24] chore(deps): update rust crate tokio-util to 0.7.9 (#4268) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 84d908140a..395eeb1e79 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9119,9 +9119,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.8" +version = "0.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "806fe8c2c87eccc8b3267cbae29ed3ab2d0bd37fca70ab622e46aaa9375ddb7d" +checksum = "1d68074620f57a0b21594d9735eb2e98ab38b17f80d3fcb189fca266771ca60d" dependencies = [ "bytes", "futures-core", diff --git a/Cargo.toml b/Cargo.toml index d006d9524a..0b1dee821e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -358,7 +358,7 @@ tokio = "1.33.0" tokio-postgres = { version = "0.7", features = [ "with-chrono-0_4", "with-uuid-1" ] } tokio-stream = "0.1.14" tokio-tungstenite = "0.18" -tokio-util = "0.7.8" +tokio-util = "0.7.9" toml = "0.7.8" toml_edit = "0.19.15" topological-sort = "0.2.2" From b6ec68f38873198cd6e9e8638d306a98f6d882d5 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Tue, 24 Oct 2023 12:22:40 -0500 Subject: [PATCH 13/24] [nexus] [minor] Bring BGP endpoint doc comments in line with conventions (#4329) I noticed there were periods in the docs sidebar. Also used the word "list" as appropriate. --- nexus/src/external_api/http_entrypoints.rs | 16 ++++++++-------- openapi/nexus.json | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 990704904a..48be2de6b0 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -2609,7 +2609,7 @@ async fn networking_loopback_address_delete( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } -/// Get loopback addresses, optionally filtering by id +/// List loopback addresses #[endpoint { method = GET, path = "/v1/system/networking/loopback-address", @@ -2824,7 +2824,7 @@ async fn networking_switch_port_clear_settings( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } -/// Create a new BGP configuration. +/// Create a new BGP configuration #[endpoint { method = POST, path = "/v1/system/networking/bgp", @@ -2845,7 +2845,7 @@ async fn networking_bgp_config_create( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } -/// Get BGP configurations. +/// List BGP configurations #[endpoint { method = GET, path = "/v1/system/networking/bgp", @@ -2900,7 +2900,7 @@ async fn networking_bgp_status( } //TODO pagination? the normal by-name/by-id stuff does not work here -/// Get imported IPv4 BGP routes. +/// Get imported IPv4 BGP routes #[endpoint { method = GET, path = "/v1/system/networking/bgp-routes-ipv4", @@ -2921,7 +2921,7 @@ async fn networking_bgp_imported_routes_ipv4( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } -/// Delete a BGP configuration. +/// Delete a BGP configuration #[endpoint { method = DELETE, path = "/v1/system/networking/bgp", @@ -2942,7 +2942,7 @@ async fn networking_bgp_config_delete( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } -/// Create a new BGP announce set. +/// Create a new BGP announce set #[endpoint { method = POST, path = "/v1/system/networking/bgp-announce", @@ -2964,7 +2964,7 @@ async fn networking_bgp_announce_set_create( } //TODO pagination? the normal by-name/by-id stuff does not work here -/// Get originated routes for a given BGP configuration. +/// Get originated routes for a BGP configuration #[endpoint { method = GET, path = "/v1/system/networking/bgp-announce", @@ -2990,7 +2990,7 @@ async fn networking_bgp_announce_set_list( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } -/// Delete a BGP announce set. +/// Delete a BGP announce set #[endpoint { method = DELETE, path = "/v1/system/networking/bgp-announce", diff --git a/openapi/nexus.json b/openapi/nexus.json index 456f2aebd6..f1bfa4351f 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -5170,7 +5170,7 @@ "tags": [ "system/networking" ], - "summary": "Get BGP configurations.", + "summary": "List BGP configurations", "operationId": "networking_bgp_config_list", "parameters": [ { @@ -5235,7 +5235,7 @@ "tags": [ "system/networking" ], - "summary": "Create a new BGP configuration.", + "summary": "Create a new BGP configuration", "operationId": "networking_bgp_config_create", "requestBody": { "content": { @@ -5270,7 +5270,7 @@ "tags": [ "system/networking" ], - "summary": "Delete a BGP configuration.", + "summary": "Delete a BGP configuration", "operationId": "networking_bgp_config_delete", "parameters": [ { @@ -5301,7 +5301,7 @@ "tags": [ "system/networking" ], - "summary": "Get originated routes for a given BGP configuration.", + "summary": "Get originated routes for a BGP configuration", "operationId": "networking_bgp_announce_set_list", "parameters": [ { @@ -5341,7 +5341,7 @@ "tags": [ "system/networking" ], - "summary": "Create a new BGP announce set.", + "summary": "Create a new BGP announce set", "operationId": "networking_bgp_announce_set_create", "requestBody": { "content": { @@ -5376,7 +5376,7 @@ "tags": [ "system/networking" ], - "summary": "Delete a BGP announce set.", + "summary": "Delete a BGP announce set", "operationId": "networking_bgp_announce_set_delete", "parameters": [ { @@ -5407,7 +5407,7 @@ "tags": [ "system/networking" ], - "summary": "Get imported IPv4 BGP routes.", + "summary": "Get imported IPv4 BGP routes", "operationId": "networking_bgp_imported_routes_ipv4", "parameters": [ { @@ -5482,7 +5482,7 @@ "tags": [ "system/networking" ], - "summary": "Get loopback addresses, optionally filtering by id", + "summary": "List loopback addresses", "operationId": "networking_loopback_address_list", "parameters": [ { From a0fccaff7f36a64d8c270896720fb75027c73ca8 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 24 Oct 2023 10:22:50 -0700 Subject: [PATCH 14/24] chore(deps): update rust crate flate2 to 1.0.28 (#4280) Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 395eeb1e79..4429a5031f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2520,9 +2520,9 @@ checksum = "cda653ca797810c02f7ca4b804b40b8b95ae046eb989d356bce17919a8c25499" [[package]] name = "flate2" -version = "1.0.27" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6c98ee8095e9d1dcbf2fcc6d95acccb90d1c81db1e44725c6a984b1dbdfb010" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" dependencies = [ "crc32fast", "miniz_oxide", diff --git a/Cargo.toml b/Cargo.toml index 0b1dee821e..64f4d5af61 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -187,7 +187,7 @@ either = "1.9.0" expectorate = "1.1.0" fatfs = "0.3.6" filetime = "0.2.22" -flate2 = "1.0.27" +flate2 = "1.0.28" flume = "0.11.0" foreign-types = "0.3.2" fs-err = "2.9.0" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 1c3a0b0c73..28aa7a6110 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -35,7 +35,7 @@ crypto-common = { version = "0.1.6", default-features = false, features = ["getr diesel = { version = "2.1.1", features = ["chrono", "i-implement-a-third-party-backend-and-opt-into-breaking-changes", "network-address", "postgres", "r2d2", "serde_json", "uuid"] } digest = { version = "0.10.7", features = ["mac", "oid", "std"] } either = { version = "1.9.0" } -flate2 = { version = "1.0.27" } +flate2 = { version = "1.0.28" } futures = { version = "0.3.28" } futures-channel = { version = "0.3.28", features = ["sink"] } futures-core = { version = "0.3.28" } @@ -128,7 +128,7 @@ crypto-common = { version = "0.1.6", default-features = false, features = ["getr diesel = { version = "2.1.1", features = ["chrono", "i-implement-a-third-party-backend-and-opt-into-breaking-changes", "network-address", "postgres", "r2d2", "serde_json", "uuid"] } digest = { version = "0.10.7", features = ["mac", "oid", "std"] } either = { version = "1.9.0" } -flate2 = { version = "1.0.27" } +flate2 = { version = "1.0.28" } futures = { version = "0.3.28" } futures-channel = { version = "0.3.28", features = ["sink"] } futures-core = { version = "0.3.28" } From e14b7379089e6b7b6fafe43306ff8c1ba5b0b45e Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 24 Oct 2023 10:24:56 -0700 Subject: [PATCH 15/24] chore(deps): update rust crate bcs to 0.1.6 (#4279) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4429a5031f..dd4fa4d69b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -484,9 +484,9 @@ dependencies = [ [[package]] name = "bcs" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bd3ffe8b19a604421a5d461d4a70346223e535903fbc3067138bddbebddcf77" +checksum = "85b6598a2f5d564fb7855dc6b06fd1c38cff5a72bd8b863a4d021938497b440a" dependencies = [ "serde", "thiserror", diff --git a/Cargo.toml b/Cargo.toml index 64f4d5af61..62d6712d7d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -145,7 +145,7 @@ authz-macros = { path = "nexus/authz-macros" } backoff = { version = "0.4.0", features = [ "tokio" ] } base64 = "0.21.4" bb8 = "0.8.1" -bcs = "0.1.5" +bcs = "0.1.6" bincode = "1.3.3" bootstore = { path = "bootstore" } bootstrap-agent-client = { path = "clients/bootstrap-agent-client" } From c3c968d362f4a8da8ca32220db297727f3f8b7e0 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 24 Oct 2023 10:29:15 -0700 Subject: [PATCH 16/24] chore(deps): update rust crate openssl-probe to 0.1.5 (#4253) Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 62d6712d7d..7eb0373d6c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -259,7 +259,7 @@ openapiv3 = "1.0" # must match samael's crate! openssl = "0.10" openssl-sys = "0.9" -openssl-probe = "0.1.2" +openssl-probe = "0.1.5" opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "258a8b59902dd36fc7ee5425e6b1fb5fc80d4649" } oso = "0.26" owo-colors = "3.5.0" From 35a7f447c8be0b867f2bd923835d87c541b3f809 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 24 Oct 2023 10:30:23 -0700 Subject: [PATCH 17/24] chore(deps): update rust crate sha2 to 0.10.8 (#4267) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dd4fa4d69b..191db4715d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8008,9 +8008,9 @@ dependencies = [ [[package]] name = "sha2" -version = "0.10.7" +version = "0.10.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "479fb9d862239e610720565ca91403019f2f00410f1864c5aa7479b950a76ed8" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" dependencies = [ "cfg-if 1.0.0", "cpufeatures", diff --git a/Cargo.toml b/Cargo.toml index 7eb0373d6c..73f89d3bdf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -315,7 +315,7 @@ serde_tokenstream = "0.2" serde_urlencoded = "0.7.1" serde_with = "2.3.3" serial_test = "0.10" -sha2 = "0.10.7" +sha2 = "0.10.8" sha3 = "0.10.8" shell-words = "1.1.0" signal-hook = "0.3" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 28aa7a6110..884fa14431 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -80,7 +80,7 @@ schemars = { version = "0.8.13", features = ["bytes", "chrono", "uuid1"] } semver = { version = "1.0.20", features = ["serde"] } serde = { version = "1.0.188", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.107", features = ["raw_value"] } -sha2 = { version = "0.10.7", features = ["oid"] } +sha2 = { version = "0.10.8", features = ["oid"] } signature = { version = "2.1.0", default-features = false, features = ["digest", "rand_core", "std"] } similar = { version = "2.2.1", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } @@ -173,7 +173,7 @@ schemars = { version = "0.8.13", features = ["bytes", "chrono", "uuid1"] } semver = { version = "1.0.20", features = ["serde"] } serde = { version = "1.0.188", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.107", features = ["raw_value"] } -sha2 = { version = "0.10.7", features = ["oid"] } +sha2 = { version = "0.10.8", features = ["oid"] } signature = { version = "2.1.0", default-features = false, features = ["digest", "rand_core", "std"] } similar = { version = "2.2.1", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } From 4bd19c83048b56568208c3b7b159a697653f99fc Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 24 Oct 2023 10:54:31 -0700 Subject: [PATCH 18/24] [wicketd] Fall back to old behavior if SP is too old to support reading RoT CMPA/CFPA (#4326) We hit this on rack3 but did not hit it on dogfood due to more frequent updates to dogfood's SP/RoT. Prior to this PR, wicketd expected to be able to ask an SP for its RoT's CMPA/CFPA pages, but if a rack is jumping from the 1.0.2 release to current master, its SPs are too old to understand that message. With this change, we will fall back to wicketd's previous behavior of requiring exactly 1 RoT archive if we fail to fetch the CMPA from the target component. --- wicketd/src/update_tracker.rs | 193 +++++++++++++-------- wicketd/tests/integration_tests/updates.rs | 5 +- 2 files changed, 123 insertions(+), 75 deletions(-) diff --git a/wicketd/src/update_tracker.rs b/wicketd/src/update_tracker.rs index e968d65a30..18b692703c 100644 --- a/wicketd/src/update_tracker.rs +++ b/wicketd/src/update_tracker.rs @@ -1608,7 +1608,13 @@ impl UpdateContext { page.copy_from_slice(&output_buf[..n]); Ok(page) }; - let cmpa = self + + // We may be talking to an SP / RoT that doesn't yet know how to give us + // its CMPA. If we hit a protocol version error here, we can fall back + // to our old behavior of requiring exactly 1 RoT artifact. + let mut artifact_to_apply = None; + + let cmpa = match self .mgs_client .sp_rot_cmpa_get( self.sp.type_, @@ -1616,82 +1622,125 @@ impl UpdateContext { SpComponent::ROT.const_as_str(), ) .await - .map_err(|err| UpdateTerminalError::GetRotCmpaFailed { - error: err.into(), - }) - .and_then(|response| { + { + Ok(response) => { let data = response.into_inner().base64_data; - base64_decode_rot_page(data).map_err(|error| { + Some(base64_decode_rot_page(data).map_err(|error| { UpdateTerminalError::GetRotCmpaFailed { error } - }) - })?; - let cfpa = self - .mgs_client - .sp_rot_cfpa_get( - self.sp.type_, - self.sp.slot, - SpComponent::ROT.const_as_str(), - &gateway_client::types::GetCfpaParams { - slot: RotCfpaSlot::Active, - }, - ) - .await - .map_err(|err| UpdateTerminalError::GetRotCfpaFailed { - error: err.into(), - }) - .and_then(|response| { - let data = response.into_inner().base64_data; - base64_decode_rot_page(data).map_err(|error| { - UpdateTerminalError::GetRotCfpaFailed { error } - }) - })?; - - // Loop through our possible artifacts and find the first (we only - // expect one!) that verifies against the RoT's CMPA/CFPA. - let mut artifact_to_apply = None; - for artifact in available_artifacts { - let image = artifact - .data - .reader_stream() - .and_then(|stream| async { - let mut buf = Vec::with_capacity(artifact.data.file_size()); - StreamReader::new(stream) - .read_to_end(&mut buf) - .await - .context("I/O error reading extracted archive")?; - Ok(buf) - }) - .await - .map_err(|error| { - UpdateTerminalError::FailedFindingSignedRotImage { error } - })?; - let archive = RawHubrisArchive::from_vec(image).map_err(|err| { - UpdateTerminalError::FailedFindingSignedRotImage { - error: anyhow::Error::new(err).context(format!( - "failed to read hubris archive for {:?}", - artifact.id - )), - } - })?; - match archive.verify(&cmpa, &cfpa) { - Ok(()) => { + })?) + } + // TODO is there a better way to check the _specific_ error response + // we get here? We only have a couple of strings; we could check the + // error string contents for something like "WrongVersion", but + // that's pretty fragile. Instead we'll treat any error response + // here as a "fallback to previous behavior". + Err(err @ gateway_client::Error::ErrorResponse(_)) => { + if available_artifacts.len() == 1 { info!( - self.log, "RoT archive verification success"; - "name" => artifact.id.name.as_str(), - "version" => %artifact.id.version, - "kind" => ?artifact.id.kind, + self.log, + "Failed to get RoT CMPA page; \ + using only available RoT artifact"; + "err" => %err, ); - artifact_to_apply = Some(artifact.clone()); - break; - } - Err(err) => { - // We log this but don't fail - we want to continue looking - // for a verifiable artifact. - info!( - self.log, "RoT archive verification failed"; - "artifact" => ?artifact.id, - "err" => %DisplayErrorChain::new(&err), + artifact_to_apply = Some(available_artifacts[0].clone()); + None + } else { + error!( + self.log, + "Failed to get RoT CMPA; unable to choose from \ + multiple available RoT artifacts"; + "err" => %err, + "num_rot_artifacts" => available_artifacts.len(), ); + return Err(UpdateTerminalError::GetRotCmpaFailed { + error: err.into(), + }); + } + } + // For any other error (e.g., comms failures), just fail as normal. + Err(err) => { + return Err(UpdateTerminalError::GetRotCmpaFailed { + error: err.into(), + }); + } + }; + + // If we were able to fetch the CMPA, we also need to fetch the CFPA and + // then find the correct RoT artifact. If we weren't able to fetch the + // CMPA, we either already set `artifact_to_apply` to the one-and-only + // RoT artifact available, or we returned a terminal error. + if let Some(cmpa) = cmpa { + let cfpa = self + .mgs_client + .sp_rot_cfpa_get( + self.sp.type_, + self.sp.slot, + SpComponent::ROT.const_as_str(), + &gateway_client::types::GetCfpaParams { + slot: RotCfpaSlot::Active, + }, + ) + .await + .map_err(|err| UpdateTerminalError::GetRotCfpaFailed { + error: err.into(), + }) + .and_then(|response| { + let data = response.into_inner().base64_data; + base64_decode_rot_page(data).map_err(|error| { + UpdateTerminalError::GetRotCfpaFailed { error } + }) + })?; + + // Loop through our possible artifacts and find the first (we only + // expect one!) that verifies against the RoT's CMPA/CFPA. + for artifact in available_artifacts { + let image = artifact + .data + .reader_stream() + .and_then(|stream| async { + let mut buf = + Vec::with_capacity(artifact.data.file_size()); + StreamReader::new(stream) + .read_to_end(&mut buf) + .await + .context("I/O error reading extracted archive")?; + Ok(buf) + }) + .await + .map_err(|error| { + UpdateTerminalError::FailedFindingSignedRotImage { + error, + } + })?; + let archive = + RawHubrisArchive::from_vec(image).map_err(|err| { + UpdateTerminalError::FailedFindingSignedRotImage { + error: anyhow::Error::new(err).context(format!( + "failed to read hubris archive for {:?}", + artifact.id + )), + } + })?; + match archive.verify(&cmpa, &cfpa) { + Ok(()) => { + info!( + self.log, "RoT archive verification success"; + "name" => artifact.id.name.as_str(), + "version" => %artifact.id.version, + "kind" => ?artifact.id.kind, + ); + artifact_to_apply = Some(artifact.clone()); + break; + } + Err(err) => { + // We log this but don't fail - we want to continue + // looking for a verifiable artifact. + info!( + self.log, "RoT archive verification failed"; + "artifact" => ?artifact.id, + "err" => %DisplayErrorChain::new(&err), + ); + } } } } diff --git a/wicketd/tests/integration_tests/updates.rs b/wicketd/tests/integration_tests/updates.rs index af743190c2..a9be9d4747 100644 --- a/wicketd/tests/integration_tests/updates.rs +++ b/wicketd/tests/integration_tests/updates.rs @@ -168,9 +168,8 @@ async fn test_updates() { match terminal_event.kind { StepEventKind::ExecutionFailed { failed_step, .. } => { // TODO: obviously we shouldn't stop here, get past more of the - // update process in this test. We currently fail when attempting to - // look up the RoT's CMPA/CFPA. - assert_eq!(failed_step.info.component, UpdateComponent::Rot); + // update process in this test. + assert_eq!(failed_step.info.component, UpdateComponent::Sp); } other => { panic!("unexpected terminal event kind: {other:?}"); From ca1b0ba216ed8af485e46a014399e3adb00abb90 Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 24 Oct 2023 12:01:30 -0700 Subject: [PATCH 19/24] [renovate] use actions/pin checked into our global renovate-config (#4320) See https://github.com/oxidecomputer/renovate-config/blob/main/actions/pin.json. --- .github/renovate.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/renovate.json b/.github/renovate.json index 405a3e282b..606be44c87 100644 --- a/.github/renovate.json +++ b/.github/renovate.json @@ -4,6 +4,6 @@ "local>oxidecomputer/renovate-config", "local>oxidecomputer/renovate-config//rust/autocreate", "local>oxidecomputer/renovate-config:post-upgrade", - "helpers:pinGitHubActionDigests" + "local>oxidecomputer/renovate-config//actions/pin" ] } From 9daf99b870865044ed781107605791752f0eec94 Mon Sep 17 00:00:00 2001 From: bnaecker Date: Tue, 24 Oct 2023 14:46:00 -0700 Subject: [PATCH 20/24] Sort fields when extracting timeseries schema (#4312) - Fields are reported in a sample via the implementations of `Target` and `Metric`, which may or may not be sorted. They'll be in declaration order, if folks derive the traits. When deriving a schema from the sample, collect fields into a set to ignore order. - Convert between a `DbFieldList` and `BTreeSet` when inserting / reading fields from the nested tables in ClickHouse. - Add sanity test that we're sorting field schema correctly. - Errors for schema mismatches report entire schema, not just fields. --- oximeter/db/src/client.rs | 169 ++++++++++++++++++++++++-------------- oximeter/db/src/lib.rs | 140 +++++++++++++++++++++++++++++-- oximeter/db/src/model.rs | 18 ++-- oximeter/db/src/query.rs | 29 ++++--- 4 files changed, 272 insertions(+), 84 deletions(-) diff --git a/oximeter/db/src/client.rs b/oximeter/db/src/client.rs index ffa5d97d52..c2b7c820a8 100644 --- a/oximeter/db/src/client.rs +++ b/oximeter/db/src/client.rs @@ -35,7 +35,7 @@ use std::collections::BTreeSet; use std::convert::TryFrom; use std::net::SocketAddr; use std::num::NonZeroU32; -use std::sync::Mutex; +use tokio::sync::Mutex; use uuid::Uuid; #[usdt::provider(provider = "clickhouse__client")] @@ -208,16 +208,12 @@ impl Client { &self, name: &TimeseriesName, ) -> Result, Error> { - { - let map = self.schema.lock().unwrap(); - if let Some(s) = map.get(name) { - return Ok(Some(s.clone())); - } + let mut schema = self.schema.lock().await; + if let Some(s) = schema.get(name) { + return Ok(Some(s.clone())); } - // `get_schema` acquires the lock internally, so the above scope is required to avoid - // deadlock. - self.get_schema().await?; - Ok(self.schema.lock().unwrap().get(name).map(Clone::clone)) + self.get_schema_locked(&mut schema).await?; + Ok(schema.get(name).map(Clone::clone)) } /// List timeseries schema, paginated. @@ -384,30 +380,48 @@ impl Client { &self, sample: &Sample, ) -> Result, Error> { - let schema = model::schema_for(sample); - let name = schema.timeseries_name.clone(); - let maybe_new_schema = match self.schema.lock().unwrap().entry(name) { - Entry::Vacant(entry) => Ok(Some(entry.insert(schema).clone())), + let sample_schema = model::schema_for(sample); + let name = sample_schema.timeseries_name.clone(); + let mut schema = self.schema.lock().await; + + // We've taken the lock before we do any checks for schema. First, we + // check if we've already got one in the cache. If not, we update all + // the schema from the database, and then check the map again. If we + // find a schema (which now either came from the cache or the latest + // read of the DB), then we check that the derived schema matches. If + // not, we can insert it in the cache and the DB. + if !schema.contains_key(&name) { + self.get_schema_locked(&mut schema).await?; + } + match schema.entry(name) { Entry::Occupied(entry) => { let existing_schema = entry.get(); - if existing_schema == &schema { + if existing_schema == &sample_schema { Ok(None) } else { - let err = - error_for_schema_mismatch(&schema, &existing_schema); error!( self.log, - "timeseries schema mismatch, sample will be skipped: {}", - err + "timeseries schema mismatch, sample will be skipped"; + "expected" => ?existing_schema, + "actual" => ?sample_schema, + "sample" => ?sample, ); - Err(err) + Err(Error::SchemaMismatch { + expected: existing_schema.clone(), + actual: sample_schema, + }) } } - }?; - Ok(maybe_new_schema.map(|schema| { - serde_json::to_string(&model::DbTimeseriesSchema::from(schema)) - .expect("Failed to convert schema to DB model") - })) + Entry::Vacant(entry) => { + entry.insert(sample_schema.clone()); + Ok(Some( + serde_json::to_string(&model::DbTimeseriesSchema::from( + sample_schema, + )) + .expect("Failed to convert schema to DB model"), + )) + } + } } // Select the timeseries, including keys and field values, that match the given field-selection @@ -503,10 +517,15 @@ impl Client { response } - async fn get_schema(&self) -> Result<(), Error> { + // Get timeseries schema from the database. + // + // Can only be called after acquiring the lock around `self.schema`. + async fn get_schema_locked( + &self, + schema: &mut BTreeMap, + ) -> Result<(), Error> { debug!(self.log, "retrieving timeseries schema from database"); let sql = { - let schema = self.schema.lock().unwrap(); if schema.is_empty() { format!( "SELECT * FROM {db_name}.timeseries_schema FORMAT JSONEachRow;", @@ -545,7 +564,7 @@ impl Client { ); (schema.timeseries_name.clone(), schema) }); - self.schema.lock().unwrap().extend(new); + schema.extend(new); } Ok(()) } @@ -593,7 +612,7 @@ impl DbWrite for Client { } Ok(schema) => { if let Some(schema) = schema { - debug!(self.log, "new timeseries schema: {:?}", schema); + debug!(self.log, "new timeseries schema"; "schema" => ?schema); new_schema.push(schema); } } @@ -730,28 +749,6 @@ async fn handle_db_response( } } -// Generate an error describing a schema mismatch -fn error_for_schema_mismatch( - schema: &TimeseriesSchema, - existing_schema: &TimeseriesSchema, -) -> Error { - let expected = existing_schema - .field_schema - .iter() - .map(|field| (field.name.clone(), field.ty)) - .collect(); - let actual = schema - .field_schema - .iter() - .map(|field| (field.name.clone(), field.ty)) - .collect(); - Error::SchemaMismatch { - name: schema.timeseries_name.to_string(), - expected, - actual, - } -} - #[cfg(test)] mod tests { use super::*; @@ -1599,7 +1596,7 @@ mod tests { ); // Clear the internal caches of seen schema - client.schema.lock().unwrap().clear(); + client.schema.lock().await.clear(); // Insert the new sample client.insert_samples(&[sample.clone()]).await.unwrap(); @@ -1611,7 +1608,7 @@ mod tests { let expected_schema = client .schema .lock() - .unwrap() + .await .get(×eries_name) .expect( "After inserting a new sample, its schema should be included", @@ -2484,13 +2481,13 @@ mod tests { #[tokio::test] async fn test_get_schema_no_new_values() { let (mut db, client, _) = setup_filter_testcase().await; - let schema = &client.schema.lock().unwrap().clone(); - client.get_schema().await.expect("Failed to get timeseries schema"); - assert_eq!( - schema, - &*client.schema.lock().unwrap(), - "Schema shouldn't change" - ); + let original_schema = client.schema.lock().await.clone(); + let mut schema = client.schema.lock().await; + client + .get_schema_locked(&mut schema) + .await + .expect("Failed to get timeseries schema"); + assert_eq!(&original_schema, &*schema, "Schema shouldn't change"); db.cleanup().await.expect("Failed to cleanup database"); } @@ -2585,4 +2582,56 @@ mod tests { ); db.cleanup().await.expect("Failed to cleanup database"); } + + #[tokio::test] + async fn test_update_schema_cache_on_new_sample() { + usdt::register_probes().unwrap(); + let logctx = test_setup_log("test_update_schema_cache_on_new_sample"); + let log = &logctx.log; + + // Let the OS assign a port and discover it after ClickHouse starts + let mut db = ClickHouseInstance::new_single_node(0) + .await + .expect("Failed to start ClickHouse"); + let address = SocketAddr::new("::1".parse().unwrap(), db.port()); + + let client = Client::new(address, &log); + client + .init_single_node_db() + .await + .expect("Failed to initialize timeseries database"); + let samples = [test_util::make_sample()]; + client.insert_samples(&samples).await.unwrap(); + + // Get the count of schema directly from the DB, which should have just + // one. + let response = client.execute_with_body( + "SELECT COUNT() FROM oximeter.timeseries_schema FORMAT JSONEachRow; + ").await.unwrap(); + assert_eq!(response.lines().count(), 1, "Expected exactly 1 schema"); + assert_eq!(client.schema.lock().await.len(), 1); + + // Clear the internal cache, and insert the sample again. + // + // This should cause us to look up the schema in the DB again, but _not_ + // insert a new one. + client.schema.lock().await.clear(); + assert!(client.schema.lock().await.is_empty()); + + client.insert_samples(&samples).await.unwrap(); + + // Get the count of schema directly from the DB, which should still have + // only the one schema. + let response = client.execute_with_body( + "SELECT COUNT() FROM oximeter.timeseries_schema FORMAT JSONEachRow; + ").await.unwrap(); + assert_eq!( + response.lines().count(), + 1, + "Expected exactly 1 schema again" + ); + assert_eq!(client.schema.lock().await.len(), 1); + db.cleanup().await.expect("Failed to cleanup ClickHouse server"); + logctx.cleanup_successful(); + } } diff --git a/oximeter/db/src/lib.rs b/oximeter/db/src/lib.rs index c878b8ff2a..11ecbeddc8 100644 --- a/oximeter/db/src/lib.rs +++ b/oximeter/db/src/lib.rs @@ -4,7 +4,7 @@ //! Tools for interacting with the control plane telemetry database. -// Copyright 2021 Oxide Computer Company +// Copyright 2023 Oxide Computer Company use crate::query::StringFieldSelector; use chrono::{DateTime, Utc}; @@ -13,6 +13,7 @@ pub use oximeter::{DatumType, Field, FieldType, Measurement, Sample}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::collections::BTreeMap; +use std::collections::BTreeSet; use std::convert::TryFrom; use std::num::NonZeroU32; use thiserror::Error; @@ -36,12 +37,8 @@ pub enum Error { Database(String), /// A schema provided when collecting samples did not match the expected schema - #[error("Schema mismatch for timeseries '{name}', expected fields {expected:?} found fields {actual:?}")] - SchemaMismatch { - name: String, - expected: BTreeMap, - actual: BTreeMap, - }, + #[error("Schema mismatch for timeseries '{0}'", expected.timeseries_name)] + SchemaMismatch { expected: TimeseriesSchema, actual: TimeseriesSchema }, #[error("Timeseries not found for: {0}")] TimeseriesNotFound(String), @@ -153,6 +150,13 @@ impl std::convert::TryFrom for TimeseriesName { } } +impl std::str::FromStr for TimeseriesName { + type Err = Error; + fn from_str(s: &str) -> Result { + s.try_into() + } +} + impl PartialEq for TimeseriesName where T: AsRef, @@ -177,7 +181,7 @@ fn validate_timeseries_name(s: &str) -> Result<&str, Error> { #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] pub struct TimeseriesSchema { pub timeseries_name: TimeseriesName, - pub field_schema: Vec, + pub field_schema: BTreeSet, pub datum_type: DatumType, pub created: DateTime, } @@ -398,6 +402,8 @@ const TIMESERIES_NAME_REGEX: &str = #[cfg(test)] mod tests { use super::*; + use crate::model::DbFieldList; + use crate::model::DbTimeseriesSchema; use std::convert::TryFrom; use uuid::Uuid; @@ -505,4 +511,122 @@ mod tests { &output.join("\n"), ); } + + // Test that we correctly order field across a target and metric. + // + // In an earlier commit, we switched from storing fields in an unordered Vec + // to using a BTree{Map,Set} to ensure ordering by name. However, the + // `TimeseriesSchema` type stored all its fields by chaining the sorted + // fields from the target and metric, without then sorting _across_ them. + // + // This was exacerbated by the error reporting, where we did in fact sort + // all fields across the target and metric, making it difficult to tell how + // the derived schema was different, if at all. + // + // This test generates a sample with a schema where the target and metric + // fields are sorted within them, but not across them. We check that the + // derived schema are actually equal, which means we've imposed that + // ordering when deriving the schema. + #[test] + fn test_schema_field_ordering_across_target_metric() { + let target_field = FieldSchema { + name: String::from("later"), + ty: FieldType::U64, + source: FieldSource::Target, + }; + let metric_field = FieldSchema { + name: String::from("earlier"), + ty: FieldType::U64, + source: FieldSource::Metric, + }; + let timeseries_name: TimeseriesName = "foo:bar".parse().unwrap(); + let datum_type = DatumType::U64; + let field_schema = + [target_field.clone(), metric_field.clone()].into_iter().collect(); + let expected_schema = TimeseriesSchema { + timeseries_name, + field_schema, + datum_type, + created: Utc::now(), + }; + + #[derive(oximeter::Target)] + struct Foo { + later: u64, + } + #[derive(oximeter::Metric)] + struct Bar { + earlier: u64, + datum: u64, + } + + let target = Foo { later: 1 }; + let metric = Bar { earlier: 2, datum: 10 }; + let sample = Sample::new(&target, &metric).unwrap(); + let derived_schema = model::schema_for(&sample); + assert_eq!(derived_schema, expected_schema); + } + + #[test] + fn test_unsorted_db_fields_are_sorted_on_read() { + let target_field = FieldSchema { + name: String::from("later"), + ty: FieldType::U64, + source: FieldSource::Target, + }; + let metric_field = FieldSchema { + name: String::from("earlier"), + ty: FieldType::U64, + source: FieldSource::Metric, + }; + let timeseries_name: TimeseriesName = "foo:bar".parse().unwrap(); + let datum_type = DatumType::U64; + let field_schema = + [target_field.clone(), metric_field.clone()].into_iter().collect(); + let expected_schema = TimeseriesSchema { + timeseries_name: timeseries_name.clone(), + field_schema, + datum_type, + created: Utc::now(), + }; + + // The fields here are sorted by target and then metric, which is how we + // used to insert them into the DB. We're checking that they are totally + // sorted when we read them out of the DB, even though they are not in + // the extracted model type. + let db_fields = DbFieldList { + names: vec![target_field.name.clone(), metric_field.name.clone()], + types: vec![target_field.ty.into(), metric_field.ty.into()], + sources: vec![ + target_field.source.into(), + metric_field.source.into(), + ], + }; + let db_schema = DbTimeseriesSchema { + timeseries_name: timeseries_name.to_string(), + field_schema: db_fields, + datum_type: datum_type.into(), + created: expected_schema.created, + }; + assert_eq!(expected_schema, TimeseriesSchema::from(db_schema)); + } + + #[test] + fn test_field_schema_ordering() { + let mut fields = BTreeSet::new(); + fields.insert(FieldSchema { + name: String::from("second"), + ty: FieldType::U64, + source: FieldSource::Target, + }); + fields.insert(FieldSchema { + name: String::from("first"), + ty: FieldType::U64, + source: FieldSource::Target, + }); + let mut iter = fields.iter(); + assert_eq!(iter.next().unwrap().name, "first"); + assert_eq!(iter.next().unwrap().name, "second"); + assert!(iter.next().is_none()); + } } diff --git a/oximeter/db/src/model.rs b/oximeter/db/src/model.rs index 1314c5c649..7f5b150b46 100644 --- a/oximeter/db/src/model.rs +++ b/oximeter/db/src/model.rs @@ -30,6 +30,7 @@ use oximeter::types::Sample; use serde::Deserialize; use serde::Serialize; use std::collections::BTreeMap; +use std::collections::BTreeSet; use std::convert::TryFrom; use std::net::IpAddr; use std::net::Ipv6Addr; @@ -107,7 +108,7 @@ pub(crate) struct DbFieldList { pub sources: Vec, } -impl From for Vec { +impl From for BTreeSet { fn from(list: DbFieldList) -> Self { list.names .into_iter() @@ -122,8 +123,8 @@ impl From for Vec { } } -impl From> for DbFieldList { - fn from(list: Vec) -> Self { +impl From> for DbFieldList { + fn from(list: BTreeSet) -> Self { let mut names = Vec::with_capacity(list.len()); let mut types = Vec::with_capacity(list.len()); let mut sources = Vec::with_capacity(list.len()); @@ -914,6 +915,9 @@ pub(crate) fn unroll_measurement_row(sample: &Sample) -> (String, String) { /// Return the schema for a `Sample`. pub(crate) fn schema_for(sample: &Sample) -> TimeseriesSchema { + // The fields are iterated through whatever order the `Target` or `Metric` + // impl chooses. We'll store in a set ordered by field name, to ignore the + // declaration order. let created = Utc::now(); let field_schema = sample .target_fields() @@ -1403,7 +1407,7 @@ mod tests { sources: vec![DbFieldSource::Target, DbFieldSource::Metric], }; - let list = vec![ + let list: BTreeSet<_> = [ FieldSchema { name: String::from("field0"), ty: FieldType::I64, @@ -1414,11 +1418,13 @@ mod tests { ty: FieldType::IpAddr, source: FieldSource::Metric, }, - ]; + ] + .into_iter() + .collect(); assert_eq!(DbFieldList::from(list.clone()), db_list); assert_eq!(db_list, list.clone().into()); - let round_trip: Vec = + let round_trip: BTreeSet = DbFieldList::from(list.clone()).into(); assert_eq!(round_trip, list); } diff --git a/oximeter/db/src/query.rs b/oximeter/db/src/query.rs index e9e1600739..6a55d3f518 100644 --- a/oximeter/db/src/query.rs +++ b/oximeter/db/src/query.rs @@ -721,6 +721,7 @@ mod tests { use crate::FieldSource; use crate::TimeseriesName; use chrono::NaiveDateTime; + use std::collections::BTreeSet; use std::convert::TryFrom; #[test] @@ -774,7 +775,7 @@ mod tests { fn test_select_query_builder_filter_raw() { let schema = TimeseriesSchema { timeseries_name: TimeseriesName::try_from("foo:bar").unwrap(), - field_schema: vec![ + field_schema: [ FieldSchema { name: "f0".to_string(), ty: FieldType::I64, @@ -785,7 +786,9 @@ mod tests { ty: FieldType::Bool, source: FieldSource::Target, }, - ], + ] + .into_iter() + .collect(), datum_type: DatumType::I64, created: Utc::now(), }; @@ -905,7 +908,7 @@ mod tests { fn test_select_query_builder_no_fields() { let schema = TimeseriesSchema { timeseries_name: TimeseriesName::try_from("foo:bar").unwrap(), - field_schema: vec![], + field_schema: BTreeSet::new(), datum_type: DatumType::I64, created: Utc::now(), }; @@ -927,7 +930,7 @@ mod tests { fn test_select_query_builder_limit_offset() { let schema = TimeseriesSchema { timeseries_name: TimeseriesName::try_from("foo:bar").unwrap(), - field_schema: vec![], + field_schema: BTreeSet::new(), datum_type: DatumType::I64, created: Utc::now(), }; @@ -996,7 +999,7 @@ mod tests { fn test_select_query_builder_no_selectors() { let schema = TimeseriesSchema { timeseries_name: TimeseriesName::try_from("foo:bar").unwrap(), - field_schema: vec![ + field_schema: [ FieldSchema { name: "f0".to_string(), ty: FieldType::I64, @@ -1007,7 +1010,9 @@ mod tests { ty: FieldType::Bool, source: FieldSource::Target, }, - ], + ] + .into_iter() + .collect(), datum_type: DatumType::I64, created: Utc::now(), }; @@ -1057,7 +1062,7 @@ mod tests { fn test_select_query_builder_field_selectors() { let schema = TimeseriesSchema { timeseries_name: TimeseriesName::try_from("foo:bar").unwrap(), - field_schema: vec![ + field_schema: [ FieldSchema { name: "f0".to_string(), ty: FieldType::I64, @@ -1068,7 +1073,9 @@ mod tests { ty: FieldType::Bool, source: FieldSource::Target, }, - ], + ] + .into_iter() + .collect(), datum_type: DatumType::I64, created: Utc::now(), }; @@ -1106,7 +1113,7 @@ mod tests { fn test_select_query_builder_full() { let schema = TimeseriesSchema { timeseries_name: TimeseriesName::try_from("foo:bar").unwrap(), - field_schema: vec![ + field_schema: [ FieldSchema { name: "f0".to_string(), ty: FieldType::I64, @@ -1117,7 +1124,9 @@ mod tests { ty: FieldType::Bool, source: FieldSource::Target, }, - ], + ] + .into_iter() + .collect(), datum_type: DatumType::I64, created: Utc::now(), }; From 8ca3e674f8249bd9f1415426293f08150759fd36 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 25 Oct 2023 03:06:54 +0000 Subject: [PATCH 21/24] chore(deps): update actions/checkout digest to b4ffde6 (#4313) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [actions/checkout](https://togithub.com/actions/checkout) | action | digest | [`8ade135` -> `b4ffde6`](https://togithub.com/actions/checkout/compare/8ade135...b4ffde6) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. â™» **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index df4cbc9b59..e85a67f1e0 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -17,7 +17,7 @@ jobs: env: RUSTFLAGS: -D warnings steps: - - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1 with: toolchain: stable From cb63320216e5d57a1a73d12eb40605477b485b38 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 25 Oct 2023 03:07:21 +0000 Subject: [PATCH 22/24] chore(deps): update taiki-e/install-action digest to ac89944 (#4275) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`e659bf8` -> `ac89944`](https://togithub.com/taiki-e/install-action/compare/e659bf8...ac89944) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. â™» **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index e85a67f1e0..ebf881b1f2 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -22,7 +22,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@e659bf85ee986e37e35cc1c53bfeebe044d8133e # v2 + uses: taiki-e/install-action@ac89944b5b150d78567ab6c02badfbe48b0b55aa # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From d45058098f9b782e9fa25b1739d3a5fca73a0486 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 25 Oct 2023 06:23:47 +0000 Subject: [PATCH 23/24] chore(deps): update actions/checkout action to v3.6.0 (#4344) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [actions/checkout](https://togithub.com/actions/checkout) | action | minor | [`v3.5.0` -> `v3.6.0`](https://togithub.com/actions/checkout/compare/v3.5.0...v3.6.0) | --- ### Release Notes
actions/checkout (actions/checkout) ### [`v3.6.0`](https://togithub.com/actions/checkout/blob/HEAD/CHANGELOG.md#v360) [Compare Source](https://togithub.com/actions/checkout/compare/v3.5.3...v3.6.0) - [Fix: Mark test scripts with Bash'isms to be run via Bash](https://togithub.com/actions/checkout/pull/1377) - [Add option to fetch tags even if fetch-depth > 0](https://togithub.com/actions/checkout/pull/579) ### [`v3.5.3`](https://togithub.com/actions/checkout/blob/HEAD/CHANGELOG.md#v353) [Compare Source](https://togithub.com/actions/checkout/compare/v3.5.2...v3.5.3) - [Fix: Checkout fail in self-hosted runners when faulty submodule are checked-in](https://togithub.com/actions/checkout/pull/1196) - [Fix typos found by codespell](https://togithub.com/actions/checkout/pull/1287) - [Add support for sparse checkouts](https://togithub.com/actions/checkout/pull/1369) ### [`v3.5.2`](https://togithub.com/actions/checkout/blob/HEAD/CHANGELOG.md#v352) [Compare Source](https://togithub.com/actions/checkout/compare/v3.5.1...v3.5.2) - [Fix api endpoint for GHES](https://togithub.com/actions/checkout/pull/1289) ### [`v3.5.1`](https://togithub.com/actions/checkout/blob/HEAD/CHANGELOG.md#v351) [Compare Source](https://togithub.com/actions/checkout/compare/v3.5.0...v3.5.1) - [Fix slow checkout on Windows](https://togithub.com/actions/checkout/pull/1246)
--- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/check-opte-ver.yml | 2 +- .github/workflows/check-workspace-deps.yml | 2 +- .github/workflows/rust.yml | 8 ++++---- .github/workflows/update-dendrite.yml | 2 +- .github/workflows/update-maghemite.yml | 2 +- .github/workflows/validate-openapi-spec.yml | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/check-opte-ver.yml b/.github/workflows/check-opte-ver.yml index a8e18f080e..9fc390277b 100644 --- a/.github/workflows/check-opte-ver.yml +++ b/.github/workflows/check-opte-ver.yml @@ -9,7 +9,7 @@ jobs: check-opte-ver: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - name: Install jq run: sudo apt-get install -y jq - name: Install toml-cli diff --git a/.github/workflows/check-workspace-deps.yml b/.github/workflows/check-workspace-deps.yml index 521afa7359..7ba0c66566 100644 --- a/.github/workflows/check-workspace-deps.yml +++ b/.github/workflows/check-workspace-deps.yml @@ -10,6 +10,6 @@ jobs: check-workspace-deps: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - name: Check Workspace Dependencies run: cargo xtask check-workspace-deps diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 873b316e16..8cc98f192f 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -9,7 +9,7 @@ jobs: check-style: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - name: Report cargo version run: cargo --version - name: Report rustfmt version @@ -27,7 +27,7 @@ jobs: # This repo is unstable and unnecessary: https://github.com/microsoft/linux-package-repositories/issues/34 - name: Disable packages.microsoft.com repo run: sudo rm -f /etc/apt/sources.list.d/microsoft-prod.list - - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - uses: Swatinem/rust-cache@6fd3edff6979b79f87531400ad694fb7f2c84b1f # v2.2.1 if: ${{ github.ref != 'refs/heads/main' }} - name: Report cargo version @@ -53,7 +53,7 @@ jobs: # This repo is unstable and unnecessary: https://github.com/microsoft/linux-package-repositories/issues/34 - name: Disable packages.microsoft.com repo run: sudo rm -f /etc/apt/sources.list.d/microsoft-prod.list - - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - uses: Swatinem/rust-cache@6fd3edff6979b79f87531400ad694fb7f2c84b1f # v2.2.1 if: ${{ github.ref != 'refs/heads/main' }} - name: Report cargo version @@ -79,7 +79,7 @@ jobs: # This repo is unstable and unnecessary: https://github.com/microsoft/linux-package-repositories/issues/34 - name: Disable packages.microsoft.com repo run: sudo rm -f /etc/apt/sources.list.d/microsoft-prod.list - - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - uses: Swatinem/rust-cache@6fd3edff6979b79f87531400ad694fb7f2c84b1f # v2.2.1 if: ${{ github.ref != 'refs/heads/main' }} - name: Report cargo version diff --git a/.github/workflows/update-dendrite.yml b/.github/workflows/update-dendrite.yml index 10d8ef7618..9d79dfc8f9 100644 --- a/.github/workflows/update-dendrite.yml +++ b/.github/workflows/update-dendrite.yml @@ -29,7 +29,7 @@ jobs: steps: # Checkout both the target and integration branches - - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 with: token: ${{ inputs.reflector_access_token }} fetch-depth: 0 diff --git a/.github/workflows/update-maghemite.yml b/.github/workflows/update-maghemite.yml index 7aa2b8b6c8..e2512dc6ce 100644 --- a/.github/workflows/update-maghemite.yml +++ b/.github/workflows/update-maghemite.yml @@ -29,7 +29,7 @@ jobs: steps: # Checkout both the target and integration branches - - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 with: token: ${{ inputs.reflector_access_token }} fetch-depth: 0 diff --git a/.github/workflows/validate-openapi-spec.yml b/.github/workflows/validate-openapi-spec.yml index 1d6c152296..9eab5ccaa1 100644 --- a/.github/workflows/validate-openapi-spec.yml +++ b/.github/workflows/validate-openapi-spec.yml @@ -10,7 +10,7 @@ jobs: format: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - uses: actions/setup-node@64ed1c7eab4cce3362f8c340dee64e5eaeef8f7c # v3.6.0 with: node-version: '18' From 6f4923e63d39e1a530db992ed7f9c26976559846 Mon Sep 17 00:00:00 2001 From: bnaecker Date: Wed, 25 Oct 2023 10:11:39 -0700 Subject: [PATCH 24/24] Add new datum types to timeseries schema table (#4349) - Fixes https://github.com/oxidecomputer/omicron/issues/4336 - Adds regression test making sure a query selecting all datum types from the schema table succeeds, even if it returns zero results. This uses an iterator over the datum type enum, so it should catch future changes to the type. - Adds new datum types to schema table - Bumps oximeter database version number --- oximeter/db/src/client.rs | 41 +++++++++++++++++++++++++ oximeter/db/src/db-replicated-init.sql | 20 +++++++++++- oximeter/db/src/db-single-node-init.sql | 20 +++++++++++- oximeter/db/src/model.rs | 2 +- oximeter/oximeter/src/types.rs | 1 + 5 files changed, 81 insertions(+), 3 deletions(-) diff --git a/oximeter/db/src/client.rs b/oximeter/db/src/client.rs index c2b7c820a8..2d563045f2 100644 --- a/oximeter/db/src/client.rs +++ b/oximeter/db/src/client.rs @@ -2634,4 +2634,45 @@ mod tests { db.cleanup().await.expect("Failed to cleanup ClickHouse server"); logctx.cleanup_successful(); } + + // Regression test for https://github.com/oxidecomputer/omicron/issues/4336. + // + // This tests that we can successfully query all extant datum types from the + // schema table. There may be no such values, but the query itself should + // succeed. + #[tokio::test] + async fn test_select_all_datum_types() { + use strum::IntoEnumIterator; + usdt::register_probes().unwrap(); + let logctx = test_setup_log("test_update_schema_cache_on_new_sample"); + let log = &logctx.log; + + // Let the OS assign a port and discover it after ClickHouse starts + let mut db = ClickHouseInstance::new_single_node(0) + .await + .expect("Failed to start ClickHouse"); + let address = SocketAddr::new("::1".parse().unwrap(), db.port()); + + let client = Client::new(address, &log); + client + .init_single_node_db() + .await + .expect("Failed to initialize timeseries database"); + + // Attempt to select all schema with each datum type. + for ty in oximeter::DatumType::iter() { + let sql = format!( + "SELECT COUNT() \ + FROM {}.timeseries_schema WHERE \ + datum_type = '{:?}'", + crate::DATABASE_NAME, + crate::model::DbDatumType::from(ty), + ); + let res = client.execute_with_body(sql).await.unwrap(); + let count = res.trim().parse::().unwrap(); + assert_eq!(count, 0); + } + db.cleanup().await.expect("Failed to cleanup ClickHouse server"); + logctx.cleanup_successful(); + } } diff --git a/oximeter/db/src/db-replicated-init.sql b/oximeter/db/src/db-replicated-init.sql index 7b756f4b0d..3b734c113d 100644 --- a/oximeter/db/src/db-replicated-init.sql +++ b/oximeter/db/src/db-replicated-init.sql @@ -652,7 +652,25 @@ CREATE TABLE IF NOT EXISTS oximeter.timeseries_schema ON CLUSTER oximeter_cluste 'CumulativeI64' = 6, 'CumulativeF64' = 7, 'HistogramI64' = 8, - 'HistogramF64' = 9 + 'HistogramF64' = 9, + 'I8' = 10, + 'U8' = 11, + 'I16' = 12, + 'U16' = 13, + 'I32' = 14, + 'U32' = 15, + 'U64' = 16, + 'F32' = 17, + 'CumulativeU64' = 18, + 'CumulativeF32' = 19, + 'HistogramI8' = 20, + 'HistogramU8' = 21, + 'HistogramI16' = 23, + 'HistogramU16' = 23, + 'HistogramI32' = 24, + 'HistogramU32' = 25, + 'HistogramU64' = 26, + 'HistogramF32' = 27 ), created DateTime64(9, 'UTC') ) diff --git a/oximeter/db/src/db-single-node-init.sql b/oximeter/db/src/db-single-node-init.sql index 1f648fd5d5..2fb5c36397 100644 --- a/oximeter/db/src/db-single-node-init.sql +++ b/oximeter/db/src/db-single-node-init.sql @@ -476,7 +476,25 @@ CREATE TABLE IF NOT EXISTS oximeter.timeseries_schema 'CumulativeI64' = 6, 'CumulativeF64' = 7, 'HistogramI64' = 8, - 'HistogramF64' = 9 + 'HistogramF64' = 9, + 'I8' = 10, + 'U8' = 11, + 'I16' = 12, + 'U16' = 13, + 'I32' = 14, + 'U32' = 15, + 'U64' = 16, + 'F32' = 17, + 'CumulativeU64' = 18, + 'CumulativeF32' = 19, + 'HistogramI8' = 20, + 'HistogramU8' = 21, + 'HistogramI16' = 22, + 'HistogramU16' = 23, + 'HistogramI32' = 24, + 'HistogramU32' = 25, + 'HistogramU64' = 26, + 'HistogramF32' = 27 ), created DateTime64(9, 'UTC') ) diff --git a/oximeter/db/src/model.rs b/oximeter/db/src/model.rs index 7f5b150b46..41c7ab9d24 100644 --- a/oximeter/db/src/model.rs +++ b/oximeter/db/src/model.rs @@ -42,7 +42,7 @@ use uuid::Uuid; /// /// TODO(#4271): The current implementation of versioning will wipe the metrics /// database if this number is incremented. -pub const OXIMETER_VERSION: u64 = 1; +pub const OXIMETER_VERSION: u64 = 2; // Wrapper type to represent a boolean in the database. // diff --git a/oximeter/oximeter/src/types.rs b/oximeter/oximeter/src/types.rs index d3f1b9e746..0cc3299ec4 100644 --- a/oximeter/oximeter/src/types.rs +++ b/oximeter/oximeter/src/types.rs @@ -275,6 +275,7 @@ pub struct Field { JsonSchema, Serialize, Deserialize, + strum::EnumIter, )] #[serde(rename_all = "snake_case")] pub enum DatumType {