From 3365c2b636a2e8c955f8dd50ed43def68f13e9f9 Mon Sep 17 00:00:00 2001
From: James MacMahon <james@oxide.computer>
Date: Sat, 21 Oct 2023 11:54:30 -0400
Subject: [PATCH 01/24] Update crucible and propolis (#4306)

Bump crucible rev to pick up:

- Add session_id to crucible logs
- Don't check for a snapshot that's gone
- Update Rust crate libc to 0.2.149
- Update Rust crate csv to 1.3.0
- Update Rust crate tokio to 1.33
- Add timeout and failure data collection of replay job

Bump propolis rev to pick up:

- Bump crucible rev to latest

Also ran `cargo update -p libc -p tokio`
---
 Cargo.lock                | 40 +++++++++++++++++++--------------------
 Cargo.toml                | 16 ++++++++--------
 package-manifest.toml     | 12 ++++++------
 workspace-hack/Cargo.toml |  4 ++--
 4 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 06d5f2fb70..d99358a9ad 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -495,7 +495,7 @@ dependencies = [
 [[package]]
 name = "bhyve_api"
 version = "0.0.0"
-source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541"
+source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2"
 dependencies = [
  "bhyve_api_sys",
  "libc",
@@ -505,7 +505,7 @@ dependencies = [
 [[package]]
 name = "bhyve_api_sys"
 version = "0.0.0"
-source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541"
+source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2"
 dependencies = [
  "libc",
  "strum",
@@ -1235,7 +1235,7 @@ dependencies = [
 [[package]]
 name = "cpuid_profile_config"
 version = "0.0.0"
-source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541"
+source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2"
 dependencies = [
  "propolis",
  "serde",
@@ -1443,7 +1443,7 @@ dependencies = [
 [[package]]
 name = "crucible"
 version = "0.0.1"
-source = "git+https://github.com/oxidecomputer/crucible?rev=657d985247b41e38aac2e271c7ce8bc9ea81f4b6#657d985247b41e38aac2e271c7ce8bc9ea81f4b6"
+source = "git+https://github.com/oxidecomputer/crucible?rev=da534e73380f3cc53ca0de073e1ea862ae32109b#da534e73380f3cc53ca0de073e1ea862ae32109b"
 dependencies = [
  "aes-gcm-siv",
  "anyhow",
@@ -1488,7 +1488,7 @@ dependencies = [
 [[package]]
 name = "crucible-agent-client"
 version = "0.0.1"
-source = "git+https://github.com/oxidecomputer/crucible?rev=657d985247b41e38aac2e271c7ce8bc9ea81f4b6#657d985247b41e38aac2e271c7ce8bc9ea81f4b6"
+source = "git+https://github.com/oxidecomputer/crucible?rev=da534e73380f3cc53ca0de073e1ea862ae32109b#da534e73380f3cc53ca0de073e1ea862ae32109b"
 dependencies = [
  "anyhow",
  "chrono",
@@ -1504,7 +1504,7 @@ dependencies = [
 [[package]]
 name = "crucible-client-types"
 version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/crucible?rev=657d985247b41e38aac2e271c7ce8bc9ea81f4b6#657d985247b41e38aac2e271c7ce8bc9ea81f4b6"
+source = "git+https://github.com/oxidecomputer/crucible?rev=da534e73380f3cc53ca0de073e1ea862ae32109b#da534e73380f3cc53ca0de073e1ea862ae32109b"
 dependencies = [
  "base64 0.21.4",
  "crucible-workspace-hack",
@@ -1517,7 +1517,7 @@ dependencies = [
 [[package]]
 name = "crucible-common"
 version = "0.0.1"
-source = "git+https://github.com/oxidecomputer/crucible?rev=657d985247b41e38aac2e271c7ce8bc9ea81f4b6#657d985247b41e38aac2e271c7ce8bc9ea81f4b6"
+source = "git+https://github.com/oxidecomputer/crucible?rev=da534e73380f3cc53ca0de073e1ea862ae32109b#da534e73380f3cc53ca0de073e1ea862ae32109b"
 dependencies = [
  "anyhow",
  "atty",
@@ -1545,7 +1545,7 @@ dependencies = [
 [[package]]
 name = "crucible-pantry-client"
 version = "0.0.1"
-source = "git+https://github.com/oxidecomputer/crucible?rev=657d985247b41e38aac2e271c7ce8bc9ea81f4b6#657d985247b41e38aac2e271c7ce8bc9ea81f4b6"
+source = "git+https://github.com/oxidecomputer/crucible?rev=da534e73380f3cc53ca0de073e1ea862ae32109b#da534e73380f3cc53ca0de073e1ea862ae32109b"
 dependencies = [
  "anyhow",
  "chrono",
@@ -1562,7 +1562,7 @@ dependencies = [
 [[package]]
 name = "crucible-protocol"
 version = "0.0.0"
-source = "git+https://github.com/oxidecomputer/crucible?rev=657d985247b41e38aac2e271c7ce8bc9ea81f4b6#657d985247b41e38aac2e271c7ce8bc9ea81f4b6"
+source = "git+https://github.com/oxidecomputer/crucible?rev=da534e73380f3cc53ca0de073e1ea862ae32109b#da534e73380f3cc53ca0de073e1ea862ae32109b"
 dependencies = [
  "anyhow",
  "bincode",
@@ -1579,7 +1579,7 @@ dependencies = [
 [[package]]
 name = "crucible-smf"
 version = "0.0.0"
-source = "git+https://github.com/oxidecomputer/crucible?rev=657d985247b41e38aac2e271c7ce8bc9ea81f4b6#657d985247b41e38aac2e271c7ce8bc9ea81f4b6"
+source = "git+https://github.com/oxidecomputer/crucible?rev=da534e73380f3cc53ca0de073e1ea862ae32109b#da534e73380f3cc53ca0de073e1ea862ae32109b"
 dependencies = [
  "crucible-workspace-hack",
  "libc",
@@ -2029,7 +2029,7 @@ checksum = "7e1a8646b2c125eeb9a84ef0faa6d2d102ea0d5da60b824ade2743263117b848"
 [[package]]
 name = "dladm"
 version = "0.0.0"
-source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541"
+source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2"
 dependencies = [
  "libc",
  "strum",
@@ -6642,7 +6642,7 @@ dependencies = [
 [[package]]
 name = "propolis"
 version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541"
+source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2"
 dependencies = [
  "anyhow",
  "bhyve_api",
@@ -6675,7 +6675,7 @@ dependencies = [
 [[package]]
 name = "propolis-client"
 version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541"
+source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2"
 dependencies = [
  "async-trait",
  "base64 0.21.4",
@@ -6699,7 +6699,7 @@ dependencies = [
 [[package]]
 name = "propolis-server"
 version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541"
+source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2"
 dependencies = [
  "anyhow",
  "async-trait",
@@ -6751,7 +6751,7 @@ dependencies = [
 [[package]]
 name = "propolis-server-config"
 version = "0.0.0"
-source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541"
+source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2"
 dependencies = [
  "cpuid_profile_config",
  "serde",
@@ -6763,7 +6763,7 @@ dependencies = [
 [[package]]
 name = "propolis_types"
 version = "0.0.0"
-source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541"
+source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2"
 dependencies = [
  "schemars",
  "serde",
@@ -9007,9 +9007,9 @@ dependencies = [
 
 [[package]]
 name = "tokio"
-version = "1.32.0"
+version = "1.33.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "17ed6077ed6cd6c74735e21f37eb16dc3935f96878b1fe961074089cc80893f9"
+checksum = "4f38200e3ef7995e5ef13baec2f432a6da0aa9ac495b2c0e8f3b7eec2c92d653"
 dependencies = [
  "backtrace",
  "bytes",
@@ -9803,7 +9803,7 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
 [[package]]
 name = "viona_api"
 version = "0.0.0"
-source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541"
+source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2"
 dependencies = [
  "libc",
  "viona_api_sys",
@@ -9812,7 +9812,7 @@ dependencies = [
 [[package]]
 name = "viona_api_sys"
 version = "0.0.0"
-source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541"
+source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2"
 dependencies = [
  "libc",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index e9eea3c4ee..58d57d15b8 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -165,10 +165,10 @@ cookie = "0.16"
 criterion = { version = "0.5.1", features = [ "async_tokio" ] }
 crossbeam = "0.8"
 crossterm = { version = "0.27.0", features = ["event-stream"] }
-crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "657d985247b41e38aac2e271c7ce8bc9ea81f4b6" }
-crucible-client-types = { git = "https://github.com/oxidecomputer/crucible", rev = "657d985247b41e38aac2e271c7ce8bc9ea81f4b6" }
-crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "657d985247b41e38aac2e271c7ce8bc9ea81f4b6" }
-crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "657d985247b41e38aac2e271c7ce8bc9ea81f4b6" }
+crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "da534e73380f3cc53ca0de073e1ea862ae32109b" }
+crucible-client-types = { git = "https://github.com/oxidecomputer/crucible", rev = "da534e73380f3cc53ca0de073e1ea862ae32109b" }
+crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "da534e73380f3cc53ca0de073e1ea862ae32109b" }
+crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "da534e73380f3cc53ca0de073e1ea862ae32109b" }
 curve25519-dalek = "4"
 datatest-stable = "0.1.3"
 display-error-chain = "0.1.1"
@@ -284,9 +284,9 @@ pretty-hex = "0.3.0"
 proc-macro2 = "1.0"
 progenitor = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" }
 progenitor-client = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" }
-bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "334df299a56cd0d33e1227ed4ce4d2fe7478d541" }
-propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "334df299a56cd0d33e1227ed4ce4d2fe7478d541", features = [ "generated-migration" ] }
-propolis-server = { git = "https://github.com/oxidecomputer/propolis", rev = "334df299a56cd0d33e1227ed4ce4d2fe7478d541", default-features = false, features = ["mock-only"] }
+bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "4019eb10fc2f4ba9bf210d0461dc6292b68309c2" }
+propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "4019eb10fc2f4ba9bf210d0461dc6292b68309c2", features = [ "generated-migration" ] }
+propolis-server = { git = "https://github.com/oxidecomputer/propolis", rev = "4019eb10fc2f4ba9bf210d0461dc6292b68309c2", default-features = false, features = ["mock-only"] }
 proptest = "1.3.1"
 quote = "1.0"
 rand = "0.8.5"
@@ -354,7 +354,7 @@ textwrap = "0.16.0"
 test-strategy = "0.2.1"
 thiserror = "1.0"
 tofino = { git = "http://github.com/oxidecomputer/tofino", branch = "main" }
-tokio = "1.29"
+tokio = "1.33.0"
 tokio-postgres = { version = "0.7", features = [ "with-chrono-0_4", "with-uuid-1" ] }
 tokio-stream = "0.1.14"
 tokio-tungstenite = "0.18"
diff --git a/package-manifest.toml b/package-manifest.toml
index 3404f5f44f..5fc8114116 100644
--- a/package-manifest.toml
+++ b/package-manifest.toml
@@ -381,10 +381,10 @@ only_for_targets.image = "standard"
 # 3. Use source.type = "manual" instead of "prebuilt"
 source.type = "prebuilt"
 source.repo = "crucible"
-source.commit = "657d985247b41e38aac2e271c7ce8bc9ea81f4b6"
+source.commit = "da534e73380f3cc53ca0de073e1ea862ae32109b"
 # The SHA256 digest is automatically posted to:
 # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image/<commit>/crucible.sha256.txt
-source.sha256 = "010281ff5c3a0807c9e770d79264c954816a055aa482988d81e85ed98242e454"
+source.sha256 = "572ac3b19e51b4e476266a62c2b7e06eff81c386cb48247c4b9f9b1e2ee81895"
 output.type = "zone"
 
 [package.crucible-pantry]
@@ -392,10 +392,10 @@ service_name = "crucible_pantry"
 only_for_targets.image = "standard"
 source.type = "prebuilt"
 source.repo = "crucible"
-source.commit = "657d985247b41e38aac2e271c7ce8bc9ea81f4b6"
+source.commit = "da534e73380f3cc53ca0de073e1ea862ae32109b"
 # The SHA256 digest is automatically posted to:
 # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image/<commit>/crucible-pantry.sha256.txt
-source.sha256 = "809936edff2957e761e49667d5477e34b7a862050b4e082a59fdc95096d3bdd5"
+source.sha256 = "812269958e18f54d72bc10bb4fb81f26c084cf762da7fd98e63d58c689be9ad1"
 output.type = "zone"
 
 # Refer to
@@ -406,10 +406,10 @@ service_name = "propolis-server"
 only_for_targets.image = "standard"
 source.type = "prebuilt"
 source.repo = "propolis"
-source.commit = "334df299a56cd0d33e1227ed4ce4d2fe7478d541"
+source.commit = "4019eb10fc2f4ba9bf210d0461dc6292b68309c2"
 # The SHA256 digest is automatically posted to:
 # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/propolis/image/<commit>/propolis-server.sha256.txt
-source.sha256 = "531e0654de94b6e805836c35aa88b8a1ac691184000a03976e2b7825061e904e"
+source.sha256 = "aa1d9dc5c9117c100f9636901e8eec6679d7dfbf869c46b7f2873585f94a1b89"
 output.type = "zone"
 
 [package.mg-ddm-gz]
diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml
index 3d9b195ae3..45c6419d2a 100644
--- a/workspace-hack/Cargo.toml
+++ b/workspace-hack/Cargo.toml
@@ -91,7 +91,7 @@ syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extr
 syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.32", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] }
 textwrap = { version = "0.16.0" }
 time = { version = "0.3.27", features = ["formatting", "local-offset", "macros", "parsing"] }
-tokio = { version = "1.32.0", features = ["full", "test-util"] }
+tokio = { version = "1.33.0", features = ["full", "test-util"] }
 tokio-postgres = { version = "0.7.10", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] }
 tokio-stream = { version = "0.1.14", features = ["net"] }
 toml = { version = "0.7.8" }
@@ -185,7 +185,7 @@ syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.32", features = ["extra
 textwrap = { version = "0.16.0" }
 time = { version = "0.3.27", features = ["formatting", "local-offset", "macros", "parsing"] }
 time-macros = { version = "0.2.13", default-features = false, features = ["formatting", "parsing"] }
-tokio = { version = "1.32.0", features = ["full", "test-util"] }
+tokio = { version = "1.33.0", features = ["full", "test-util"] }
 tokio-postgres = { version = "0.7.10", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] }
 tokio-stream = { version = "0.1.14", features = ["net"] }
 toml = { version = "0.7.8" }

From 132efaccb1534bdafb92318f6927002ecdfa68dd Mon Sep 17 00:00:00 2001
From: Sean Klein <sean@oxide.computer>
Date: Sat, 21 Oct 2023 09:28:25 -0700
Subject: [PATCH 02/24] [nexus] Improve logging for schema changes (#4309)

Improves visibility of logging, indicating:
- What version are we upgrading into, throughout the process
- Logging the names of SQL files used for upgrade, as they occur
---
 .../src/db/datastore/db_metadata.rs           | 29 +++++++++++++------
 nexus/db-queries/src/db/datastore/mod.rs      |  3 +-
 nexus/tests/integration_tests/schema.rs       | 15 +++++++---
 3 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/nexus/db-queries/src/db/datastore/db_metadata.rs b/nexus/db-queries/src/db/datastore/db_metadata.rs
index 9e4e8b1a48..0ae61a7c38 100644
--- a/nexus/db-queries/src/db/datastore/db_metadata.rs
+++ b/nexus/db-queries/src/db/datastore/db_metadata.rs
@@ -25,6 +25,17 @@ use std::str::FromStr;
 
 pub const EARLIEST_SUPPORTED_VERSION: &'static str = "1.0.0";
 
+/// Describes a single file containing a schema change, as SQL.
+pub struct SchemaUpgradeStep {
+    pub path: Utf8PathBuf,
+    pub sql: String,
+}
+
+/// Describes a sequence of files containing schema changes.
+pub struct SchemaUpgrade {
+    pub steps: Vec<SchemaUpgradeStep>,
+}
+
 /// Reads a "version directory" and reads all SQL changes into
 /// a result Vec.
 ///
@@ -34,7 +45,7 @@ pub const EARLIEST_SUPPORTED_VERSION: &'static str = "1.0.0";
 /// These are sorted lexicographically.
 pub async fn all_sql_for_version_migration<P: AsRef<Utf8Path>>(
     path: P,
-) -> Result<Vec<String>, String> {
+) -> Result<SchemaUpgrade, String> {
     let target_dir = path.as_ref();
     let mut up_sqls = vec![];
     let entries = target_dir
@@ -54,13 +65,12 @@ pub async fn all_sql_for_version_migration<P: AsRef<Utf8Path>>(
     }
     up_sqls.sort();
 
-    let mut result = vec![];
+    let mut result = SchemaUpgrade { steps: vec![] };
     for path in up_sqls.into_iter() {
-        result.push(
-            tokio::fs::read_to_string(&path)
-                .await
-                .map_err(|e| format!("Cannot read {path}: {e}"))?,
-        );
+        let sql = tokio::fs::read_to_string(&path)
+            .await
+            .map_err(|e| format!("Cannot read {path}: {e}"))?;
+        result.steps.push(SchemaUpgradeStep { path: path.to_owned(), sql });
     }
     Ok(result)
 }
@@ -187,7 +197,8 @@ impl DataStore {
             )
             .map_err(|e| format!("Invalid schema path: {}", e.display()))?;
 
-            let up_sqls = all_sql_for_version_migration(&target_dir).await?;
+            let schema_change =
+                all_sql_for_version_migration(&target_dir).await?;
 
             // Confirm the current version, set the "target_version"
             // column to indicate that a schema update is in-progress.
@@ -205,7 +216,7 @@ impl DataStore {
                 "target_version" => target_version.to_string(),
             );
 
-            for sql in &up_sqls {
+            for SchemaUpgradeStep { path: _, sql } in &schema_change.steps {
                 // Perform the schema change.
                 self.apply_schema_update(
                     &current_version,
diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs
index f5283e263e..2dc1e69a6f 100644
--- a/nexus/db-queries/src/db/datastore/mod.rs
+++ b/nexus/db-queries/src/db/datastore/mod.rs
@@ -91,7 +91,8 @@ mod zpool;
 
 pub use address_lot::AddressLotCreateResult;
 pub use db_metadata::{
-    all_sql_for_version_migration, EARLIEST_SUPPORTED_VERSION,
+    all_sql_for_version_migration, SchemaUpgrade, SchemaUpgradeStep,
+    EARLIEST_SUPPORTED_VERSION,
 };
 pub use dns::DnsVersionUpdateBuilder;
 pub use instance::InstanceAndActiveVmm;
diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs
index e75211b834..d79dd09fc1 100644
--- a/nexus/tests/integration_tests/schema.rs
+++ b/nexus/tests/integration_tests/schema.rs
@@ -89,6 +89,7 @@ async fn apply_update_as_transaction(
         match apply_update_as_transaction_inner(client, sql).await {
             Ok(()) => break,
             Err(err) => {
+                warn!(log, "Failed to apply update as transaction"; "err" => err.to_string());
                 client
                     .batch_execute("ROLLBACK;")
                     .await
@@ -111,7 +112,9 @@ async fn apply_update(
     version: &str,
     times_to_apply: usize,
 ) {
-    info!(log, "Performing upgrade to {version}");
+    let log = log.new(o!("target version" => version.to_string()));
+    info!(log, "Performing upgrade");
+
     let client = crdb.connect().await.expect("failed to connect");
 
     // We skip this for the earliest supported version because these tables
@@ -126,11 +129,15 @@ async fn apply_update(
     }
 
     let target_dir = Utf8PathBuf::from(SCHEMA_DIR).join(version);
-    let sqls = all_sql_for_version_migration(&target_dir).await.unwrap();
+    let schema_change =
+        all_sql_for_version_migration(&target_dir).await.unwrap();
 
     for _ in 0..times_to_apply {
-        for sql in sqls.iter() {
-            apply_update_as_transaction(log, &client, sql).await;
+        for nexus_db_queries::db::datastore::SchemaUpgradeStep { path, sql } in
+            &schema_change.steps
+        {
+            info!(log, "Applying sql schema upgrade step"; "path" => path.to_string());
+            apply_update_as_transaction(&log, &client, sql).await;
         }
     }
 

From aef679c4ae8f0921e34c59e480e1400a2775cc7e Mon Sep 17 00:00:00 2001
From: bnaecker <ben@oxide.computer>
Date: Sat, 21 Oct 2023 10:28:17 -0700
Subject: [PATCH 03/24] Limit size of the search range for VNIs when creating
 VPCs (#4298)

- Fixes #4283.
- Adds a relatively small limit to the `NextItem` query used for finding
a free VNI during VPC creation. This limits the memory consumption to
something very reasonable, but is big enough that we should be extremely
unlikely to find _no_ available VNIs in the range.
- Add an application-level retry loop when inserting _customer_ VPCs,
which catches the unlikely event that there really are no VNIs
available, and retries a few times.
- Adds tests for the computation of the limited search range.
- Adds tests for the actual exhaustion-detection and retry behavior.
---
 Cargo.lock                               |   1 +
 nexus/db-queries/Cargo.toml              |   1 +
 nexus/db-queries/src/db/datastore/vpc.rs | 376 ++++++++++++++++++++---
 nexus/db-queries/src/db/queries/vpc.rs   | 215 ++++++++++++-
 nexus/db-queries/src/lib.rs              |  19 ++
 5 files changed, 565 insertions(+), 47 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index d99358a9ad..7dbaa3e008 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4464,6 +4464,7 @@ dependencies = [
  "serde_with",
  "sled-agent-client",
  "slog",
+ "static_assertions",
  "steno",
  "strum",
  "subprocess",
diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml
index eaf3dc1295..c16c0f5319 100644
--- a/nexus/db-queries/Cargo.toml
+++ b/nexus/db-queries/Cargo.toml
@@ -47,6 +47,7 @@ serde_urlencoded.workspace = true
 serde_with.workspace = true
 sled-agent-client.workspace = true
 slog.workspace = true
+static_assertions.workspace = true
 steno.workspace = true
 thiserror.workspace = true
 tokio = { workspace = true, features = [ "full" ] }
diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs
index 14886ba018..6db99465a3 100644
--- a/nexus/db-queries/src/db/datastore/vpc.rs
+++ b/nexus/db-queries/src/db/datastore/vpc.rs
@@ -34,12 +34,15 @@ use crate::db::model::VpcUpdate;
 use crate::db::model::{Ipv4Net, Ipv6Net};
 use crate::db::pagination::paginated;
 use crate::db::queries::vpc::InsertVpcQuery;
+use crate::db::queries::vpc::VniSearchIter;
 use crate::db::queries::vpc_subnet::FilterConflictingVpcSubnetRangesQuery;
 use crate::db::queries::vpc_subnet::SubnetError;
 use async_bb8_diesel::AsyncConnection;
 use async_bb8_diesel::AsyncRunQueryDsl;
 use chrono::Utc;
 use diesel::prelude::*;
+use diesel::result::DatabaseErrorKind;
+use diesel::result::Error as DieselError;
 use ipnetwork::IpNetwork;
 use omicron_common::api::external::http_pagination::PaginatedBy;
 use omicron_common::api::external::CreateResult;
@@ -85,18 +88,23 @@ impl DataStore {
             SERVICES_VPC.clone(),
             Some(Vni(ExternalVni::SERVICES_VNI)),
         );
-        let authz_vpc = self
+        let authz_vpc = match self
             .project_create_vpc_raw(opctx, &authz_project, vpc_query)
             .await
-            .map(|(authz_vpc, _)| authz_vpc)
-            .or_else(|e| match e {
-                Error::ObjectAlreadyExists { .. } => Ok(authz::Vpc::new(
-                    authz_project.clone(),
-                    *SERVICES_VPC_ID,
-                    LookupType::ByName(SERVICES_VPC.identity.name.to_string()),
-                )),
-                _ => Err(e),
-            })?;
+        {
+            Ok(None) => {
+                let msg = "VNI exhaustion detected when creating built-in VPCs";
+                error!(opctx.log, "{}", msg);
+                Err(Error::internal_error(msg))
+            }
+            Ok(Some((authz_vpc, _))) => Ok(authz_vpc),
+            Err(Error::ObjectAlreadyExists { .. }) => Ok(authz::Vpc::new(
+                authz_project.clone(),
+                *SERVICES_VPC_ID,
+                LookupType::ByName(SERVICES_VPC.identity.name.to_string()),
+            )),
+            Err(e) => Err(e),
+        }?;
 
         // Also add the system router and internet gateway route
 
@@ -287,22 +295,65 @@ impl DataStore {
         &self,
         opctx: &OpContext,
         authz_project: &authz::Project,
-        vpc: IncompleteVpc,
+        mut vpc: IncompleteVpc,
     ) -> Result<(authz::Vpc, Vpc), Error> {
-        self.project_create_vpc_raw(
-            opctx,
-            authz_project,
-            InsertVpcQuery::new(vpc),
-        )
-        .await
+        // Generate an iterator that allows us to search the entire space of
+        // VNIs for this VPC, in manageable chunks to limit memory usage.
+        let vnis = VniSearchIter::new(vpc.vni.0);
+        for (i, vni) in vnis.enumerate() {
+            vpc.vni = Vni(vni);
+            let id = usdt::UniqueId::new();
+            crate::probes::vni__search__range__start!(|| {
+                (&id, u32::from(vni), VniSearchIter::STEP_SIZE)
+            });
+            match self
+                .project_create_vpc_raw(
+                    opctx,
+                    authz_project,
+                    InsertVpcQuery::new(vpc.clone()),
+                )
+                .await
+            {
+                Ok(Some((authz_vpc, vpc))) => {
+                    crate::probes::vni__search__range__found!(|| {
+                        (&id, u32::from(vpc.vni.0))
+                    });
+                    return Ok((authz_vpc, vpc));
+                }
+                Err(e) => return Err(e),
+                Ok(None) => {
+                    crate::probes::vni__search__range__empty!(|| (&id));
+                    debug!(
+                        opctx.log,
+                        "No VNIs available within current search range, retrying";
+                        "attempt" => i,
+                        "vpc_name" => %vpc.identity.name,
+                        "start_vni" => ?vni,
+                    );
+                }
+            }
+        }
+
+        // We've failed to find a VNI after searching the entire range, so we'll
+        // return a 503 at this point.
+        error!(
+            opctx.log,
+            "failed to find a VNI after searching entire range";
+        );
+        Err(Error::unavail("Failed to find a free VNI for this VPC"))
     }
 
+    // Internal implementation for creating a VPC.
+    //
+    // This returns an optional VPC. If it is None, then we failed to insert a
+    // VPC specifically because there are no available VNIs. All other errors
+    // are returned in the `Result::Err` variant.
     async fn project_create_vpc_raw(
         &self,
         opctx: &OpContext,
         authz_project: &authz::Project,
         vpc_query: InsertVpcQuery,
-    ) -> Result<(authz::Vpc, Vpc), Error> {
+    ) -> Result<Option<(authz::Vpc, Vpc)>, Error> {
         use db::schema::vpc::dsl;
 
         assert_eq!(authz_project.id(), vpc_query.vpc.project_id);
@@ -312,30 +363,48 @@ impl DataStore {
         let project_id = vpc_query.vpc.project_id;
 
         let conn = self.pool_connection_authorized(opctx).await?;
-        let vpc: Vpc = Project::insert_resource(
+        let result: Result<Vpc, _> = Project::insert_resource(
             project_id,
             diesel::insert_into(dsl::vpc).values(vpc_query),
         )
         .insert_and_get_result_async(&conn)
-        .await
-        .map_err(|e| match e {
-            AsyncInsertError::CollectionNotFound => Error::ObjectNotFound {
-                type_name: ResourceType::Project,
-                lookup_type: LookupType::ById(project_id),
-            },
-            AsyncInsertError::DatabaseError(e) => public_error_from_diesel(
-                e,
-                ErrorHandler::Conflict(ResourceType::Vpc, name.as_str()),
-            ),
-        })?;
-        Ok((
-            authz::Vpc::new(
-                authz_project.clone(),
-                vpc.id(),
-                LookupType::ByName(vpc.name().to_string()),
-            ),
-            vpc,
-        ))
+        .await;
+        match result {
+            Ok(vpc) => Ok(Some((
+                authz::Vpc::new(
+                    authz_project.clone(),
+                    vpc.id(),
+                    LookupType::ByName(vpc.name().to_string()),
+                ),
+                vpc,
+            ))),
+            Err(AsyncInsertError::CollectionNotFound) => {
+                Err(Error::ObjectNotFound {
+                    type_name: ResourceType::Project,
+                    lookup_type: LookupType::ById(project_id),
+                })
+            }
+            Err(AsyncInsertError::DatabaseError(
+                DieselError::DatabaseError(
+                    DatabaseErrorKind::NotNullViolation,
+                    info,
+                ),
+            )) if info
+                .message()
+                .starts_with("null value in column \"vni\"") =>
+            {
+                // We failed the non-null check on the VNI column, which means
+                // we could not find a valid VNI in our search range. Return
+                // None instead to signal the error.
+                Ok(None)
+            }
+            Err(AsyncInsertError::DatabaseError(e)) => {
+                Err(public_error_from_diesel(
+                    e,
+                    ErrorHandler::Conflict(ResourceType::Vpc, name.as_str()),
+                ))
+            }
+        }
     }
 
     pub async fn project_update_vpc(
@@ -1092,3 +1161,234 @@ impl DataStore {
             })
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::db::datastore::datastore_test;
+    use crate::db::model::Project;
+    use crate::db::queries::vpc::MAX_VNI_SEARCH_RANGE_SIZE;
+    use nexus_test_utils::db::test_setup_database;
+    use nexus_types::external_api::params;
+    use omicron_common::api::external;
+    use omicron_test_utils::dev;
+    use slog::info;
+
+    // Test that we detect the right error condition and return None when we
+    // fail to insert a VPC due to VNI exhaustion.
+    //
+    // This is a bit awkward, but we'll test this by inserting a bunch of VPCs,
+    // and checking that we get the expected error response back from the
+    // `project_create_vpc_raw` call.
+    #[tokio::test]
+    async fn test_project_create_vpc_raw_returns_none_on_vni_exhaustion() {
+        usdt::register_probes().unwrap();
+        let logctx = dev::test_setup_log(
+            "test_project_create_vpc_raw_returns_none_on_vni_exhaustion",
+        );
+        let log = &logctx.log;
+        let mut db = test_setup_database(&logctx.log).await;
+        let (opctx, datastore) = datastore_test(&logctx, &db).await;
+
+        // Create a project.
+        let project_params = params::ProjectCreate {
+            identity: IdentityMetadataCreateParams {
+                name: "project".parse().unwrap(),
+                description: String::from("test project"),
+            },
+        };
+        let project = Project::new(Uuid::new_v4(), project_params);
+        let (authz_project, _) = datastore
+            .project_create(&opctx, project)
+            .await
+            .expect("failed to create project");
+
+        let starting_vni = 2048;
+        let description = String::from("test vpc");
+        for vni in 0..=MAX_VNI_SEARCH_RANGE_SIZE {
+            // Create an incomplete VPC and make sure it has the next available
+            // VNI.
+            let name: external::Name = format!("vpc{vni}").parse().unwrap();
+            let mut incomplete_vpc = IncompleteVpc::new(
+                Uuid::new_v4(),
+                authz_project.id(),
+                Uuid::new_v4(),
+                params::VpcCreate {
+                    identity: IdentityMetadataCreateParams {
+                        name: name.clone(),
+                        description: description.clone(),
+                    },
+                    ipv6_prefix: None,
+                    dns_name: name.clone(),
+                },
+            )
+            .expect("failed to create incomplete VPC");
+            let this_vni =
+                Vni(external::Vni::try_from(starting_vni + vni).unwrap());
+            incomplete_vpc.vni = this_vni;
+            info!(
+                log,
+                "creating initial VPC";
+                "index" => vni,
+                "vni" => ?this_vni,
+            );
+            let query = InsertVpcQuery::new(incomplete_vpc);
+            let (_, db_vpc) = datastore
+                .project_create_vpc_raw(&opctx, &authz_project, query)
+                .await
+                .expect("failed to create initial set of VPCs")
+                .expect("expected an actual VPC");
+            info!(
+                log,
+                "created VPC";
+                "vpc" => ?db_vpc,
+            );
+        }
+
+        // At this point, we've filled all the VNIs starting from 2048. Let's
+        // try to allocate one more, also starting from that position. This
+        // should fail, because we've explicitly filled the entire range we'll
+        // search above.
+        let name: external::Name = "dead-vpc".parse().unwrap();
+        let mut incomplete_vpc = IncompleteVpc::new(
+            Uuid::new_v4(),
+            authz_project.id(),
+            Uuid::new_v4(),
+            params::VpcCreate {
+                identity: IdentityMetadataCreateParams {
+                    name: name.clone(),
+                    description: description.clone(),
+                },
+                ipv6_prefix: None,
+                dns_name: name.clone(),
+            },
+        )
+        .expect("failed to create incomplete VPC");
+        let this_vni = Vni(external::Vni::try_from(starting_vni).unwrap());
+        incomplete_vpc.vni = this_vni;
+        info!(
+            log,
+            "creating VPC when all VNIs are allocated";
+            "vni" => ?this_vni,
+        );
+        let query = InsertVpcQuery::new(incomplete_vpc);
+        let Ok(None) = datastore
+            .project_create_vpc_raw(&opctx, &authz_project, query)
+            .await
+        else {
+            panic!("Expected Ok(None) when creating a VPC without any available VNIs");
+        };
+        db.cleanup().await.unwrap();
+        logctx.cleanup_successful();
+    }
+
+    // Test that we appropriately retry when there are no available VNIs.
+    //
+    // This is a bit awkward, but we'll test this by inserting a bunch of VPCs,
+    // and then check that we correctly retry
+    #[tokio::test]
+    async fn test_project_create_vpc_retries() {
+        usdt::register_probes().unwrap();
+        let logctx = dev::test_setup_log("test_project_create_vpc_retries");
+        let log = &logctx.log;
+        let mut db = test_setup_database(&logctx.log).await;
+        let (opctx, datastore) = datastore_test(&logctx, &db).await;
+
+        // Create a project.
+        let project_params = params::ProjectCreate {
+            identity: IdentityMetadataCreateParams {
+                name: "project".parse().unwrap(),
+                description: String::from("test project"),
+            },
+        };
+        let project = Project::new(Uuid::new_v4(), project_params);
+        let (authz_project, _) = datastore
+            .project_create(&opctx, project)
+            .await
+            .expect("failed to create project");
+
+        let starting_vni = 2048;
+        let description = String::from("test vpc");
+        for vni in 0..=MAX_VNI_SEARCH_RANGE_SIZE {
+            // Create an incomplete VPC and make sure it has the next available
+            // VNI.
+            let name: external::Name = format!("vpc{vni}").parse().unwrap();
+            let mut incomplete_vpc = IncompleteVpc::new(
+                Uuid::new_v4(),
+                authz_project.id(),
+                Uuid::new_v4(),
+                params::VpcCreate {
+                    identity: IdentityMetadataCreateParams {
+                        name: name.clone(),
+                        description: description.clone(),
+                    },
+                    ipv6_prefix: None,
+                    dns_name: name.clone(),
+                },
+            )
+            .expect("failed to create incomplete VPC");
+            let this_vni =
+                Vni(external::Vni::try_from(starting_vni + vni).unwrap());
+            incomplete_vpc.vni = this_vni;
+            info!(
+                log,
+                "creating initial VPC";
+                "index" => vni,
+                "vni" => ?this_vni,
+            );
+            let query = InsertVpcQuery::new(incomplete_vpc);
+            let (_, db_vpc) = datastore
+                .project_create_vpc_raw(&opctx, &authz_project, query)
+                .await
+                .expect("failed to create initial set of VPCs")
+                .expect("expected an actual VPC");
+            info!(
+                log,
+                "created VPC";
+                "vpc" => ?db_vpc,
+            );
+        }
+
+        // Similar to the above test, we've fill all available VPCs starting at
+        // `starting_vni`. Let's attempt to allocate one beginning there, which
+        // _should_ fail and be internally retried. Note that we're using
+        // `project_create_vpc()` here instead of the raw version, to check that
+        // retry logic.
+        let name: external::Name = "dead-at-first-vpc".parse().unwrap();
+        let mut incomplete_vpc = IncompleteVpc::new(
+            Uuid::new_v4(),
+            authz_project.id(),
+            Uuid::new_v4(),
+            params::VpcCreate {
+                identity: IdentityMetadataCreateParams {
+                    name: name.clone(),
+                    description: description.clone(),
+                },
+                ipv6_prefix: None,
+                dns_name: name.clone(),
+            },
+        )
+        .expect("failed to create incomplete VPC");
+        let this_vni = Vni(external::Vni::try_from(starting_vni).unwrap());
+        incomplete_vpc.vni = this_vni;
+        info!(
+            log,
+            "creating VPC when all VNIs are allocated";
+            "vni" => ?this_vni,
+        );
+        match datastore
+            .project_create_vpc(&opctx, &authz_project, incomplete_vpc.clone())
+            .await
+        {
+            Ok((_, vpc)) => {
+                assert_eq!(vpc.id(), incomplete_vpc.identity.id);
+                let expected_vni = starting_vni + MAX_VNI_SEARCH_RANGE_SIZE + 1;
+                assert_eq!(u32::from(vpc.vni.0), expected_vni);
+                info!(log, "successfully created VPC after retries"; "vpc" => ?vpc);
+            }
+            Err(e) => panic!("Unexpected error when inserting VPC: {e}"),
+        };
+        db.cleanup().await.unwrap();
+        logctx.cleanup_successful();
+    }
+}
diff --git a/nexus/db-queries/src/db/queries/vpc.rs b/nexus/db-queries/src/db/queries/vpc.rs
index b1ac8fe1e1..c29a51adb0 100644
--- a/nexus/db-queries/src/db/queries/vpc.rs
+++ b/nexus/db-queries/src/db/queries/vpc.rs
@@ -245,15 +245,7 @@ struct NextVni {
 
 impl NextVni {
     fn new(vni: Vni) -> Self {
-        let base_u32 = u32::from(vni.0);
-        // The valid range is [0, 1 << 24], so the maximum shift is whatever
-        // gets us to 1 << 24, and the minimum is whatever gets us back to the
-        // minimum guest VNI.
-        let max_shift = i64::from(external::Vni::MAX_VNI - base_u32);
-        let min_shift = i64::from(
-            -i32::try_from(base_u32 - external::Vni::MIN_GUEST_VNI)
-                .expect("Expected a valid VNI at this point"),
-        );
+        let VniShifts { min_shift, max_shift } = VniShifts::new(vni);
         let generator =
             DefaultShiftGenerator { base: vni, max_shift, min_shift };
         let inner = NextItem::new_unscoped(generator);
@@ -278,3 +270,208 @@ impl NextVni {
 }
 
 delegate_query_fragment_impl!(NextVni);
+
+// Helper type to compute the shift for a `NextItem` query to find VNIs.
+#[derive(Clone, Copy, Debug, PartialEq)]
+struct VniShifts {
+    // The minimum `ShiftGenerator` shift.
+    min_shift: i64,
+    // The maximum `ShiftGenerator` shift.
+    max_shift: i64,
+}
+
+/// Restrict the search for a VNI to a small range.
+///
+/// VNIs are pretty sparsely allocated (the number of VPCs), and the range is
+/// quite large (24 bits). To avoid memory issues, we'll restrict a search
+/// for an available VNI to a small range starting from the random starting
+/// VNI.
+//
+// NOTE: This is very small for tests, to ensure we can accurately test the
+// failure mode where there are no available VNIs.
+#[cfg(not(test))]
+pub const MAX_VNI_SEARCH_RANGE_SIZE: u32 = 2048;
+#[cfg(test)]
+pub const MAX_VNI_SEARCH_RANGE_SIZE: u32 = 10;
+
+// Ensure that we cannot search a range that extends beyond the valid guest VNI
+// range.
+static_assertions::const_assert!(
+    MAX_VNI_SEARCH_RANGE_SIZE
+        <= (external::Vni::MAX_VNI - external::Vni::MIN_GUEST_VNI)
+);
+
+impl VniShifts {
+    fn new(vni: Vni) -> Self {
+        let base_u32 = u32::from(vni.0);
+        let range_end = base_u32 + MAX_VNI_SEARCH_RANGE_SIZE;
+
+        // Clamp the maximum shift at the distance to the maximum allowed VNI,
+        // or the maximum of the range.
+        let max_shift = i64::from(
+            (external::Vni::MAX_VNI - base_u32).min(MAX_VNI_SEARCH_RANGE_SIZE),
+        );
+
+        // And any remaining part of the range wraps around starting at the
+        // beginning.
+        let min_shift = -i64::from(
+            range_end.checked_sub(external::Vni::MAX_VNI).unwrap_or(0),
+        );
+        Self { min_shift, max_shift }
+    }
+}
+
+/// An iterator yielding sequential starting VNIs.
+///
+/// The VPC insertion query requires a search for the next available VNI, using
+/// the `NextItem` query. We limit the search for each query to avoid memory
+/// issues on any one query. If we fail to find a VNI, we need to search the
+/// next range. This iterator yields the starting positions for the `NextItem`
+/// query, so that the entire range can be search in chunks until a free VNI is
+/// found.
+//
+// NOTE: It's technically possible for this to lead to searching the very
+// initial portion of the range twice. If we end up wrapping around so that the
+// last position yielded by this iterator is `start - x`, then we'll end up
+// searching from `start - x` to `start + (MAX_VNI_SEARCH_RANGE_SIZE - x)`, and
+// so search those first few after `start` again. This is both innocuous and
+// really unlikely.
+#[derive(Clone, Copy, Debug)]
+pub struct VniSearchIter {
+    start: u32,
+    current: u32,
+    has_wrapped: bool,
+}
+
+impl VniSearchIter {
+    pub const STEP_SIZE: u32 = MAX_VNI_SEARCH_RANGE_SIZE;
+
+    /// Create a search range, starting from the provided VNI.
+    pub fn new(start: external::Vni) -> Self {
+        let start = u32::from(start);
+        Self { start, current: start, has_wrapped: false }
+    }
+}
+
+impl std::iter::Iterator for VniSearchIter {
+    type Item = external::Vni;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        // If we've wrapped around and the computed position is beyond where we
+        // started, then the ite
+        if self.has_wrapped && self.current > self.start {
+            return None;
+        }
+
+        // Compute the next position.
+        //
+        // Make sure we wrap around to the mininum guest VNI. Note that we
+        // consider the end of the range inclusively, so we subtract one in the
+        // offset below to end up _at_ the min guest VNI.
+        let mut next = self.current + MAX_VNI_SEARCH_RANGE_SIZE;
+        if next > external::Vni::MAX_VNI {
+            next -= external::Vni::MAX_VNI;
+            next += external::Vni::MIN_GUEST_VNI - 1;
+            self.has_wrapped = true;
+        }
+        let current = self.current;
+        self.current = next;
+        Some(external::Vni::try_from(current).unwrap())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::external;
+    use super::Vni;
+    use super::VniSearchIter;
+    use super::VniShifts;
+    use super::MAX_VNI_SEARCH_RANGE_SIZE;
+
+    // Ensure that when the search range lies entirely within the range of VNIs,
+    // we search from the start VNI through the maximum allowed range size.
+    #[test]
+    fn test_vni_shift_no_wrapping() {
+        let vni = Vni(external::Vni::try_from(2048).unwrap());
+        let VniShifts { min_shift, max_shift } = VniShifts::new(vni);
+        assert_eq!(min_shift, 0);
+        assert_eq!(max_shift, i64::from(MAX_VNI_SEARCH_RANGE_SIZE));
+        assert_eq!(max_shift - min_shift, i64::from(MAX_VNI_SEARCH_RANGE_SIZE));
+    }
+
+    // Ensure that we wrap correctly, when the starting VNI happens to land
+    // quite close to the end of the allowed range.
+    #[test]
+    fn test_vni_shift_with_wrapping() {
+        let offset = 5;
+        let vni =
+            Vni(external::Vni::try_from(external::Vni::MAX_VNI - offset)
+                .unwrap());
+        let VniShifts { min_shift, max_shift } = VniShifts::new(vni);
+        assert_eq!(min_shift, -i64::from(MAX_VNI_SEARCH_RANGE_SIZE - offset));
+        assert_eq!(max_shift, i64::from(offset));
+        assert_eq!(max_shift - min_shift, i64::from(MAX_VNI_SEARCH_RANGE_SIZE));
+    }
+
+    #[test]
+    fn test_vni_search_iter_steps() {
+        let start = external::Vni::try_from(2048).unwrap();
+        let mut it = VniSearchIter::new(start);
+        let next = it.next().unwrap();
+        assert_eq!(next, start);
+        let next = it.next().unwrap();
+        assert_eq!(
+            next,
+            external::Vni::try_from(
+                u32::from(start) + MAX_VNI_SEARCH_RANGE_SIZE
+            )
+            .unwrap()
+        );
+    }
+
+    #[test]
+    fn test_vni_search_iter_full_count() {
+        let start =
+            external::Vni::try_from(external::Vni::MIN_GUEST_VNI).unwrap();
+
+        let last = VniSearchIter::new(start).last().unwrap();
+        println!("{:?}", last);
+
+        pub const fn div_ceil(x: u32, y: u32) -> u32 {
+            let d = x / y;
+            let r = x % y;
+            if r > 0 && y > 0 {
+                d + 1
+            } else {
+                d
+            }
+        }
+        const N_EXPECTED: u32 = div_ceil(
+            external::Vni::MAX_VNI - external::Vni::MIN_GUEST_VNI,
+            MAX_VNI_SEARCH_RANGE_SIZE,
+        );
+        let count = u32::try_from(VniSearchIter::new(start).count()).unwrap();
+        assert_eq!(count, N_EXPECTED);
+    }
+
+    #[test]
+    fn test_vni_search_iter_wrapping() {
+        // Start from just before the end of the range.
+        let start =
+            external::Vni::try_from(external::Vni::MAX_VNI - 1).unwrap();
+        let mut it = VniSearchIter::new(start);
+
+        // We should yield that start position first.
+        let next = it.next().unwrap();
+        assert_eq!(next, start);
+
+        // The next value should be wrapped around to the beginning.
+        //
+        // Subtract 2 because we _include_ the max VNI in the search range.
+        let next = it.next().unwrap();
+        assert_eq!(
+            u32::from(next),
+            external::Vni::MIN_GUEST_VNI + MAX_VNI_SEARCH_RANGE_SIZE - 2
+        );
+    }
+}
diff --git a/nexus/db-queries/src/lib.rs b/nexus/db-queries/src/lib.rs
index 29c33039ff..a693f7ff42 100644
--- a/nexus/db-queries/src/lib.rs
+++ b/nexus/db-queries/src/lib.rs
@@ -17,3 +17,22 @@ extern crate newtype_derive;
 #[cfg(test)]
 #[macro_use]
 extern crate diesel;
+
+#[usdt::provider(provider = "nexus__db__queries")]
+mod probes {
+    // Fires before we start a search over a range for a VNI.
+    //
+    // Includes the starting VNI and the size of the range being searched.
+    fn vni__search__range__start(
+        _: &usdt::UniqueId,
+        start_vni: u32,
+        size: u32,
+    ) {
+    }
+
+    // Fires when we successfully find a VNI.
+    fn vni__search__range__found(_: &usdt::UniqueId, vni: u32) {}
+
+    // Fires when we fail to find a VNI in the provided range.
+    fn vni__search__range__empty(_: &usdt::UniqueId) {}
+}

From 825e4f39c8efb147a108d13d87def0221c27c8df Mon Sep 17 00:00:00 2001
From: James MacMahon <james@oxide.computer>
Date: Sat, 21 Oct 2023 16:12:26 -0400
Subject: [PATCH 04/24] Make sure project_delete_snapshot does its job (#4308)

`project_delete_snapshot`'s job is to soft-delete snapshots but due to
incorrectly using `check_if_exists` with `execute_async`, the state a
snapshot was in was not being validated. This was causing a snapshot
delete request to proceed for a snapshot that was in state Creating,
causing a case where a volume was hard deleted before it had its
associated resource accounting performed, leading to regions associated
with deleted resources that would never be cleaned up.

Fix this and ensure expected behaviour via a test.
---
 nexus/db-queries/src/db/datastore/snapshot.rs | 67 ++++++++++++----
 nexus/tests/integration_tests/disks.rs        | 78 +++++++++++++++++++
 nexus/tests/integration_tests/snapshots.rs    | 32 +++++++-
 3 files changed, 163 insertions(+), 14 deletions(-)

diff --git a/nexus/db-queries/src/db/datastore/snapshot.rs b/nexus/db-queries/src/db/datastore/snapshot.rs
index 59fb00c84d..7c03e4bd40 100644
--- a/nexus/db-queries/src/db/datastore/snapshot.rs
+++ b/nexus/db-queries/src/db/datastore/snapshot.rs
@@ -19,6 +19,7 @@ use crate::db::model::Snapshot;
 use crate::db::model::SnapshotState;
 use crate::db::pagination::paginated;
 use crate::db::update_and_check::UpdateAndCheck;
+use crate::db::update_and_check::UpdateStatus;
 use crate::db::TransactionError;
 use async_bb8_diesel::AsyncConnection;
 use async_bb8_diesel::AsyncRunQueryDsl;
@@ -252,30 +253,70 @@ impl DataStore {
 
         use db::schema::snapshot::dsl;
 
-        let updated_rows = diesel::update(dsl::snapshot)
+        let result = diesel::update(dsl::snapshot)
             .filter(dsl::time_deleted.is_null())
             .filter(dsl::gen.eq(gen))
             .filter(dsl::id.eq(snapshot_id))
-            .filter(dsl::state.eq_any(ok_to_delete_states))
+            .filter(dsl::state.eq_any(ok_to_delete_states.clone()))
             .set((
                 dsl::time_deleted.eq(now),
                 dsl::state.eq(SnapshotState::Destroyed),
             ))
             .check_if_exists::<Snapshot>(snapshot_id)
-            .execute_async(&*self.pool_connection_authorized(&opctx).await?)
+            .execute_and_check(&*self.pool_connection_authorized(&opctx).await?)
             .await
-            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?;
+            .map_err(|e| {
+                public_error_from_diesel(
+                    e,
+                    ErrorHandler::NotFoundByLookup(
+                        ResourceType::Snapshot,
+                        LookupType::ById(snapshot_id),
+                    ),
+                )
+            })?;
+
+        match result.status {
+            UpdateStatus::Updated => {
+                // snapshot was soft deleted ok
+                Ok(result.found.id())
+            }
 
-        if updated_rows == 0 {
-            // Either:
-            //
-            // - the snapshot was already deleted
-            // - the generation number changed
-            // - the state of the snapshot isn't one of `ok_to_delete_states`
+            UpdateStatus::NotUpdatedButExists => {
+                let snapshot = result.found;
 
-            return Err(Error::invalid_request("snapshot cannot be deleted"));
-        }
+                // if the snapshot was already deleted, return Ok - this
+                // function must remain idempotent for the same input.
+                if snapshot.time_deleted().is_some()
+                    && snapshot.state == SnapshotState::Destroyed
+                {
+                    Ok(snapshot.id())
+                } else {
+                    // if the snapshot was not deleted, figure out why
+                    if !ok_to_delete_states.contains(&snapshot.state) {
+                        Err(Error::invalid_request(&format!(
+                            "snapshot cannot be deleted in state {:?}",
+                            snapshot.state,
+                        )))
+                    } else if snapshot.gen != gen {
+                        Err(Error::invalid_request(&format!(
+                            "snapshot cannot be deleted: mismatched generation {:?} != {:?}",
+                            gen,
+                            snapshot.gen,
+                        )))
+                    } else {
+                        error!(
+                            opctx.log,
+                            "snapshot exists but cannot be deleted: {:?} (db_snapshot is {:?}",
+                            snapshot,
+                            db_snapshot,
+                        );
 
-        Ok(snapshot_id)
+                        Err(Error::invalid_request(
+                            "snapshot exists but cannot be deleted",
+                        ))
+                    }
+                }
+            }
+        }
     }
 }
diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs
index 71a3977192..a5a8339c34 100644
--- a/nexus/tests/integration_tests/disks.rs
+++ b/nexus/tests/integration_tests/disks.rs
@@ -1672,6 +1672,84 @@ async fn test_disk_create_for_importing(cptestctx: &ControlPlaneTestContext) {
     disks_eq(&disks[0], &disk);
 }
 
+#[nexus_test]
+async fn test_project_delete_disk_no_auth_idempotent(
+    cptestctx: &ControlPlaneTestContext,
+) {
+    let client = &cptestctx.external_client;
+    DiskTest::new(&cptestctx).await;
+    create_org_and_project(client).await;
+
+    // Create a disk
+    let disks_url = get_disks_url();
+
+    let new_disk = params::DiskCreate {
+        identity: IdentityMetadataCreateParams {
+            name: DISK_NAME.parse().unwrap(),
+            description: String::from("sells rainsticks"),
+        },
+        disk_source: params::DiskSource::Blank {
+            block_size: params::BlockSize::try_from(512).unwrap(),
+        },
+        size: ByteCount::from_gibibytes_u32(1),
+    };
+
+    NexusRequest::new(
+        RequestBuilder::new(client, Method::POST, &disks_url)
+            .body(Some(&new_disk))
+            .expect_status(Some(StatusCode::CREATED)),
+    )
+    .authn_as(AuthnMode::PrivilegedUser)
+    .execute()
+    .await
+    .unwrap()
+    .parsed_body::<Disk>()
+    .unwrap();
+
+    let disk_url = get_disk_url(DISK_NAME);
+    let disk = disk_get(&client, &disk_url).await;
+    assert_eq!(disk.state, DiskState::Detached);
+
+    // Call project_delete_disk_no_auth twice, ensuring that the disk is either
+    // there before deleting and not afterwards.
+
+    let nexus = &cptestctx.server.apictx().nexus;
+    let datastore = nexus.datastore();
+    let opctx =
+        OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone());
+
+    let (.., db_disk) = LookupPath::new(&opctx, &datastore)
+        .disk_id(disk.identity.id)
+        .fetch()
+        .await
+        .unwrap();
+
+    assert_eq!(db_disk.id(), disk.identity.id);
+
+    datastore
+        .project_delete_disk_no_auth(
+            &disk.identity.id,
+            &[DiskState::Detached, DiskState::Faulted],
+        )
+        .await
+        .unwrap();
+
+    let r = LookupPath::new(&opctx, &datastore)
+        .disk_id(disk.identity.id)
+        .fetch()
+        .await;
+
+    assert!(r.is_err());
+
+    datastore
+        .project_delete_disk_no_auth(
+            &disk.identity.id,
+            &[DiskState::Detached, DiskState::Faulted],
+        )
+        .await
+        .unwrap();
+}
+
 async fn disk_get(client: &ClientTestContext, disk_url: &str) -> Disk {
     NexusRequest::object_get(client, disk_url)
         .authn_as(AuthnMode::PrivilegedUser)
diff --git a/nexus/tests/integration_tests/snapshots.rs b/nexus/tests/integration_tests/snapshots.rs
index b3cf4bb594..1dd32e6769 100644
--- a/nexus/tests/integration_tests/snapshots.rs
+++ b/nexus/tests/integration_tests/snapshots.rs
@@ -1041,7 +1041,25 @@ async fn test_create_snapshot_record_idempotent(
         external::Error::ObjectAlreadyExists { .. },
     ));
 
-    // Test project_delete_snapshot is idempotent
+    // Move snapshot from Creating to Ready
+
+    let (.., authz_snapshot, db_snapshot) = LookupPath::new(&opctx, &datastore)
+        .snapshot_id(snapshot_created_1.id())
+        .fetch_for(authz::Action::Modify)
+        .await
+        .unwrap();
+
+    datastore
+        .project_snapshot_update_state(
+            &opctx,
+            &authz_snapshot,
+            db_snapshot.gen,
+            db::model::SnapshotState::Ready,
+        )
+        .await
+        .unwrap();
+
+    // Grab the new snapshot (so generation number is updated)
 
     let (.., authz_snapshot, db_snapshot) = LookupPath::new(&opctx, &datastore)
         .snapshot_id(snapshot_created_1.id())
@@ -1049,6 +1067,8 @@ async fn test_create_snapshot_record_idempotent(
         .await
         .unwrap();
 
+    // Test project_delete_snapshot is idempotent for the same input
+
     datastore
         .project_delete_snapshot(
             &opctx,
@@ -1064,6 +1084,16 @@ async fn test_create_snapshot_record_idempotent(
         .await
         .unwrap();
 
+    {
+        // Ensure the snapshot is gone
+        let r = LookupPath::new(&opctx, &datastore)
+            .snapshot_id(snapshot_created_1.id())
+            .fetch_for(authz::Action::Read)
+            .await;
+
+        assert!(r.is_err());
+    }
+
     datastore
         .project_delete_snapshot(
             &opctx,

From 1cd3314e234533343b63e0d4f9b71a8ce2e4c432 Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@users.noreply.github.com>
Date: Sat, 21 Oct 2023 13:14:15 -0700
Subject: [PATCH 05/24] Bump SP versions to 1.0.3 (#4305)

---
 tools/hubris_checksums | 14 +++++++-------
 tools/hubris_version   |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/tools/hubris_checksums b/tools/hubris_checksums
index e2b16d345e..1396af4d60 100644
--- a/tools/hubris_checksums
+++ b/tools/hubris_checksums
@@ -1,7 +1,7 @@
-204328b941deab8bfd3d6e34af96ef04489672fa3b0d5419b60456f9b052e789  build-gimlet-c-image-default-v1.0.2.zip
-0ebaa9d98c3734420a482160a7a19dd09510ea3bdc573a090a97ec47137bd624  build-gimlet-d-image-default-v1.0.2.zip
-39ec8fd0c946b744e59d9c1b89914f354c60f54e974398029d1dea9d31681f05  build-gimlet-e-image-default-v1.0.2.zip
-fa5dc36a7a70eeb45d4c4b3b314ba54ee820b3d57ffc07defcc3ae07c142329c  build-psc-b-image-default-v1.0.2.zip
-4a9850100f8b5fcbbdd11d41ccd8d5037059697a9b65c1ba2dba48a6565ba9e7  build-psc-c-image-default-v1.0.2.zip
-1bb870d7921c1731ec83dc38b8e3425703ec17efa614d75e92f07a551312f54b  build-sidecar-b-image-default-v1.0.2.zip
-6aed0e15e0025bb87a22ecea60d75fa71b54b83bea1e213b8cd5bdb02e7ccb2d  build-sidecar-c-image-default-v1.0.2.zip
+2df01d7dd17423588c99de4361694efdb6bd375e2f54db053320eeead3e07eda  build-gimlet-c-image-default-v1.0.3.zip
+8ac0eb6d7817825c6318feb8327f5758a33ccd2479512e3e2424f0eb8e290010  build-gimlet-d-image-default-v1.0.3.zip
+eeeb72ec81a843fa1f5093096d1e4500aba6ce01c2d21040a2a10a092595d945  build-gimlet-e-image-default-v1.0.3.zip
+de0d9028929322f6d5afc4cb52c198b3402c93a38aa15f9d378617ca1d1112c9  build-psc-b-image-default-v1.0.3.zip
+11a6235d852bd75548f12d85b0913cb4ccb0aff3c38bf8a92510a2b9c14dad3c  build-psc-c-image-default-v1.0.3.zip
+3f863d46a462432f19d3fb5a293b8106da6e138de80271f869692bd29abd994b  build-sidecar-b-image-default-v1.0.3.zip
+2a9feac7f2da61b843d00edf2693c31c118f202c6cd889d1d1758ea1dd95dbca  build-sidecar-c-image-default-v1.0.3.zip
diff --git a/tools/hubris_version b/tools/hubris_version
index 0f1729d791..b00c3286fe 100644
--- a/tools/hubris_version
+++ b/tools/hubris_version
@@ -1 +1 @@
-TAGS=(gimlet-v1.0.2 psc-v1.0.2 sidecar-v1.0.2)
+TAGS=(gimlet-v1.0.3 psc-v1.0.3 sidecar-v1.0.3)

From 51b6b160c4913ee77685ab95dbfc50047d5503fe Mon Sep 17 00:00:00 2001
From: Andy Fiddaman <andy@oxidecomputer.com>
Date: Mon, 23 Oct 2023 18:06:41 +0100
Subject: [PATCH 06/24] Combine host and trampoline OS images into one CI job
 (#4273)

There are two OS build jobs that each check out and build many
repositories in order to build a host image. By combining them
into one job, we only pay the cost of this checkout and build
phase once. It doesn't actually save a lot of wall time because
the two jobs run in parallel, but it saves starting a VM and
about 20 minutes of compute time.
---
 .github/buildomat/jobs/host-image.sh       | 48 ++++++++++++++++---
 .github/buildomat/jobs/trampoline-image.sh | 54 ----------------------
 .github/buildomat/jobs/tuf-repo.sh         |  7 +--
 tools/build-host-image.sh                  | 16 -------
 4 files changed, 44 insertions(+), 81 deletions(-)
 delete mode 100755 .github/buildomat/jobs/trampoline-image.sh

diff --git a/.github/buildomat/jobs/host-image.sh b/.github/buildomat/jobs/host-image.sh
index ba0b4e1ac3..2f4d146a48 100755
--- a/.github/buildomat/jobs/host-image.sh
+++ b/.github/buildomat/jobs/host-image.sh
@@ -1,11 +1,12 @@
 #!/bin/bash
 #:
-#: name = "helios / build OS image"
+#: name = "helios / build OS images"
 #: variety = "basic"
 #: target = "helios-2.0"
 #: rust_toolchain = "1.72.1"
 #: output_rules = [
-#:	"=/work/helios/image/output/os.tar.gz",
+#:	"=/work/helios/upload/os-host.tar.gz",
+#:	"=/work/helios/upload/os-trampoline.tar.gz",
 #: ]
 #: access_repos = [
 #:	"oxidecomputer/amd-apcb",
@@ -44,14 +45,49 @@ TOP=$PWD
 
 source "$TOP/tools/include/force-git-over-https.sh"
 
-# Checkout helios at a pinned commit into /work/helios
-git clone https://github.com/oxidecomputer/helios.git /work/helios
-cd /work/helios
+# Check out helios into /work/helios
+HELIOSDIR=/work/helios
+git clone https://github.com/oxidecomputer/helios.git "$HELIOSDIR"
+cd "$HELIOSDIR"
+# Record the branch and commit in the output
+git status --branch --porcelain=2
+# Setting BUILD_OS to no makes setup skip repositories we don't need for
+# building the OS itself (we are just building an image from already built OS).
+BUILD_OS=no gmake setup
+
+# Commands that "helios-build" would ask us to run (either explicitly or
+# implicitly, to avoid an error).
+rc=0
+pfexec pkg install -q /system/zones/brand/omicron1/tools || rc=$?
+case $rc in
+    # `man pkg` notes that exit code 4 means no changes were made because
+    # there is nothing to do; that's fine. Any other exit code is an error.
+    0 | 4) ;;
+    *) exit $rc ;;
+esac
+
+pfexec zfs create -p "rpool/images/$USER"
+
 
 # TODO: Consider importing zones here too?
 
 cd "$TOP"
+OUTPUTDIR="$HELIOSDIR/upload"
+mkdir "$OUTPUTDIR"
+
+banner OS
 ./tools/build-host-image.sh -B \
     -S /input/package/work/zones/switch-asic.tar.gz \
-    /work/helios \
+    "$HELIOSDIR" \
     /input/package/work/global-zone-packages.tar.gz
+
+mv "$HELIOSDIR/image/output/os.tar.gz" "$OUTPUTDIR/os-host.tar.gz"
+
+banner Trampoline
+
+./tools/build-host-image.sh -R \
+    "$HELIOSDIR" \
+    /input/package/work/trampoline-global-zone-packages.tar.gz
+
+mv "$HELIOSDIR/image/output/os.tar.gz" "$OUTPUTDIR/os-trampoline.tar.gz"
+
diff --git a/.github/buildomat/jobs/trampoline-image.sh b/.github/buildomat/jobs/trampoline-image.sh
deleted file mode 100755
index 6014d7dca0..0000000000
--- a/.github/buildomat/jobs/trampoline-image.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/bin/bash
-#:
-#: name = "helios / build trampoline OS image"
-#: variety = "basic"
-#: target = "helios-2.0"
-#: rust_toolchain = "1.72.1"
-#: output_rules = [
-#:	"=/work/helios/image/output/os.tar.gz",
-#: ]
-#: access_repos = [
-#:	"oxidecomputer/amd-apcb",
-#:	"oxidecomputer/amd-efs",
-#:	"oxidecomputer/amd-firmware",
-#:	"oxidecomputer/amd-flash",
-#:	"oxidecomputer/amd-host-image-builder",
-#:	"oxidecomputer/boot-image-tools",
-#:	"oxidecomputer/chelsio-t6-roms",
-#:	"oxidecomputer/compliance-pilot",
-#:	"oxidecomputer/facade",
-#:	"oxidecomputer/helios",
-#:	"oxidecomputer/helios-omicron-brand",
-#:	"oxidecomputer/helios-omnios-build",
-#:	"oxidecomputer/helios-omnios-extra",
-#:	"oxidecomputer/nanobl-rs",
-#: ]
-#:
-#: [dependencies.package]
-#: job = "helios / package"
-#:
-#: [[publish]]
-#: series = "image"
-#: name = "os-trampoline.tar.gz"
-#: from_output = "/work/helios/image/output/os.tar.gz"
-#:
-
-set -o errexit
-set -o pipefail
-set -o xtrace
-
-cargo --version
-rustc --version
-
-TOP=$PWD
-
-source "$TOP/tools/include/force-git-over-https.sh"
-
-# Checkout helios at a pinned commit into /work/helios
-git clone https://github.com/oxidecomputer/helios.git /work/helios
-cd /work/helios
-
-cd "$TOP"
-./tools/build-host-image.sh -R \
-    /work/helios \
-    /input/package/work/trampoline-global-zone-packages.tar.gz
diff --git a/.github/buildomat/jobs/tuf-repo.sh b/.github/buildomat/jobs/tuf-repo.sh
index ea25ab5834..29cf7fa85e 100644
--- a/.github/buildomat/jobs/tuf-repo.sh
+++ b/.github/buildomat/jobs/tuf-repo.sh
@@ -19,10 +19,7 @@
 #: job = "helios / package"
 #:
 #: [dependencies.host]
-#: job = "helios / build OS image"
-#:
-#: [dependencies.trampoline]
-#: job = "helios / build trampoline OS image"
+#: job = "helios / build OS images"
 #:
 #: [[publish]]
 #: series = "rot-all"
@@ -139,7 +136,7 @@ name = "$kind"
 version = "$VERSION"
 [artifact.$kind.source]
 kind = "file"
-path = "/input/$kind/work/helios/image/output/os.tar.gz"
+path = "/input/host/work/helios/upload/os-$kind.tar.gz"
 EOF
 done
 
diff --git a/tools/build-host-image.sh b/tools/build-host-image.sh
index d492e84b81..c194edb603 100755
--- a/tools/build-host-image.sh
+++ b/tools/build-host-image.sh
@@ -92,22 +92,6 @@ function main
     # Move to the helios checkout
     cd "$HELIOS_PATH"
 
-    # Create the "./helios-build" command, which lets us build images
-    gmake setup
-
-    # Commands that "./helios-build" would ask us to run (either explicitly
-    # or implicitly, to avoid an error).
-    rc=0
-    pfexec pkg install -q /system/zones/brand/omicron1/tools || rc=$?
-    case $rc in
-        # `man pkg` notes that exit code 4 means no changes were made because
-        # there is nothing to do; that's fine. Any other exit code is an error.
-        0 | 4) ;;
-        *) exit $rc ;;
-    esac
-
-    pfexec zfs create -p rpool/images/"$USER"
-
     HELIOS_REPO=https://pkg.oxide.computer/helios/2/dev/
 
     # Build an image name that includes the omicron and host OS hashes

From 3af87d2e29a9bf1a75f86a2f472cf8a0ec719737 Mon Sep 17 00:00:00 2001
From: David Crespo <david-crespo@users.noreply.github.com>
Date: Mon, 23 Oct 2023 15:08:28 -0500
Subject: [PATCH 07/24] Bump web console (utilization table) (#4318)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

https://github.com/oxidecomputer/console/compare/3538c32a...bd65b9da

* [bd65b9da](https://github.com/oxidecomputer/console/commit/bd65b9da)
loading state for utilization table, use QueryParamTabs
* [8e74accf](https://github.com/oxidecomputer/console/commit/8e74accf)
handle empty metrics list on silo utilization
* [672cb208](https://github.com/oxidecomputer/console/commit/672cb208)
bump API spec (BGP endpoints, not used yet)
* [a50777c5](https://github.com/oxidecomputer/console/commit/a50777c5)
oxidecomputer/console#1795
* [8978e07a](https://github.com/oxidecomputer/console/commit/8978e07a)
bump omicron version (no changes)
* [992fb1e1](https://github.com/oxidecomputer/console/commit/992fb1e1)
oxidecomputer/console#1797
* [e5b8d029](https://github.com/oxidecomputer/console/commit/e5b8d029)
remove unnecessary @types/testing-library__jest-dom dep
* [e9a78617](https://github.com/oxidecomputer/console/commit/e9a78617)
tanstack query 5 went stable
---
 tools/console_version | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/console_version b/tools/console_version
index 218aef576d..7e8d352efd 100644
--- a/tools/console_version
+++ b/tools/console_version
@@ -1,2 +1,2 @@
-COMMIT="3538c32a5189bd22df8f6a573399dacfbe81efaa"
-SHA2="3289989f2cd6c71ea800e73231190455cc8e4e45ae9304293050b925a9fd9423"
+COMMIT="bd65b9da7019ad812dd056e7fc182df2cf4ec128"
+SHA2="e4d4f33996a6e89b972fac61737acb7f1dbd21943d1f6bef776d4ee9bcccd2b0"

From 35034f75d81821bf7f7901485143ff5f72608e9f Mon Sep 17 00:00:00 2001
From: Luqman Aden <luqman@oxide.computer>
Date: Mon, 23 Oct 2023 18:58:58 -0700
Subject: [PATCH 08/24] Re-introduce support for setting a VLAN for an uplink
 from RSS (#4319)

Going from the previous `UplinkConfig` format to the `PortConfigV1`
introduced with the BGP work seems to have lost the ability to configure
the VLAN ID for reaching a gateway. The actual APIs and functionality is
still there so just needed to be wired back up.
---
 common/src/api/internal/shared.rs             |  3 +
 nexus/src/app/rack.rs                         |  1 +
 .../app/sagas/switch_port_settings_apply.rs   |  6 +-
 nexus/tests/integration_tests/switch_port.rs  | 31 ++++++---
 openapi/bootstrap-agent.json                  |  7 +++
 openapi/nexus-internal.json                   |  7 +++
 openapi/sled-agent.json                       |  7 +++
 openapi/wicketd.json                          |  7 +++
 schema/rss-sled-plan.json                     |  9 +++
 sled-agent/src/bootstrap/early_networking.rs  | 63 ++++++++++++++++++-
 sled-agent/src/rack_setup/service.rs          |  1 +
 wicket/src/rack_setup/config_template.toml    |  3 +
 wicket/src/rack_setup/config_toml.rs          | 23 +++++--
 wicket/src/ui/panes/rack_setup.rs             |  7 ++-
 wicketd/src/rss_config.rs                     |  1 +
 15 files changed, 161 insertions(+), 15 deletions(-)

diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs
index 784da8fcc6..971dbbabbf 100644
--- a/common/src/api/internal/shared.rs
+++ b/common/src/api/internal/shared.rs
@@ -111,6 +111,8 @@ pub struct RouteConfig {
     pub destination: IpNetwork,
     /// The nexthop/gateway address.
     pub nexthop: IpAddr,
+    /// The VLAN ID the gateway is reachable over.
+    pub vid: Option<u16>,
 }
 
 #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)]
@@ -137,6 +139,7 @@ impl From<UplinkConfig> for PortConfigV1 {
             routes: vec![RouteConfig {
                 destination: "0.0.0.0/0".parse().unwrap(),
                 nexthop: value.gateway_ip.into(),
+                vid: value.uplink_vid,
             }],
             addresses: vec![value.uplink_cidr.into()],
             switch: value.switch,
diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs
index 907c3ffa78..21918d2687 100644
--- a/nexus/src/app/rack.rs
+++ b/nexus/src/app/rack.rs
@@ -663,6 +663,7 @@ impl super::Nexus {
                     .map(|r| SledRouteConfig {
                         destination: r.dst,
                         nexthop: r.gw.ip(),
+                        vid: r.vid.map(Into::into),
                     })
                     .collect(),
                 addresses: info.addresses.iter().map(|a| a.address).collect(),
diff --git a/nexus/src/app/sagas/switch_port_settings_apply.rs b/nexus/src/app/sagas/switch_port_settings_apply.rs
index 93dc45751a..8442080979 100644
--- a/nexus/src/app/sagas/switch_port_settings_apply.rs
+++ b/nexus/src/app/sagas/switch_port_settings_apply.rs
@@ -911,7 +911,11 @@ pub(crate) async fn bootstore_update(
             routes: settings
                 .routes
                 .iter()
-                .map(|r| RouteConfig { destination: r.dst, nexthop: r.gw.ip() })
+                .map(|r| RouteConfig {
+                    destination: r.dst,
+                    nexthop: r.gw.ip(),
+                    vid: r.vid.map(Into::into),
+                })
                 .collect(),
             addresses: settings.addresses.iter().map(|a| a.address).collect(),
             switch: switch_location,
diff --git a/nexus/tests/integration_tests/switch_port.rs b/nexus/tests/integration_tests/switch_port.rs
index fada45694d..d4fd10f819 100644
--- a/nexus/tests/integration_tests/switch_port.rs
+++ b/nexus/tests/integration_tests/switch_port.rs
@@ -128,11 +128,18 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) {
     settings.routes.insert(
         "phy0".into(),
         RouteConfig {
-            routes: vec![Route {
-                dst: "1.2.3.0/24".parse().unwrap(),
-                gw: "1.2.3.4".parse().unwrap(),
-                vid: None,
-            }],
+            routes: vec![
+                Route {
+                    dst: "1.2.3.0/24".parse().unwrap(),
+                    gw: "1.2.3.4".parse().unwrap(),
+                    vid: None,
+                },
+                Route {
+                    dst: "5.6.7.0/24".parse().unwrap(),
+                    gw: "5.6.7.8".parse().unwrap(),
+                    vid: Some(5),
+                },
+            ],
         },
     );
     // addresses
@@ -159,7 +166,7 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) {
     .unwrap();
 
     assert_eq!(created.links.len(), 1);
-    assert_eq!(created.routes.len(), 1);
+    assert_eq!(created.routes.len(), 2);
     assert_eq!(created.addresses.len(), 1);
 
     let link0 = &created.links[0];
@@ -178,6 +185,11 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) {
     let route0 = &created.routes[0];
     assert_eq!(route0.dst, "1.2.3.0/24".parse().unwrap());
     assert_eq!(route0.gw, "1.2.3.4".parse().unwrap());
+    assert_eq!(route0.vlan_id, None);
+    let route1 = &created.routes[1];
+    assert_eq!(route1.dst, "5.6.7.0/24".parse().unwrap());
+    assert_eq!(route1.gw, "5.6.7.8".parse().unwrap());
+    assert_eq!(route1.vlan_id, Some(5));
 
     let addr0 = &created.addresses[0];
     assert_eq!(addr0.address, "203.0.113.10/24".parse().unwrap());
@@ -195,7 +207,7 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) {
     .unwrap();
 
     assert_eq!(roundtrip.links.len(), 1);
-    assert_eq!(roundtrip.routes.len(), 1);
+    assert_eq!(roundtrip.routes.len(), 2);
     assert_eq!(roundtrip.addresses.len(), 1);
 
     let link0 = &roundtrip.links[0];
@@ -214,6 +226,11 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) {
     let route0 = &roundtrip.routes[0];
     assert_eq!(route0.dst, "1.2.3.0/24".parse().unwrap());
     assert_eq!(route0.gw, "1.2.3.4".parse().unwrap());
+    assert_eq!(route0.vlan_id, None);
+    let route1 = &roundtrip.routes[1];
+    assert_eq!(route1.dst, "5.6.7.0/24".parse().unwrap());
+    assert_eq!(route1.gw, "5.6.7.8".parse().unwrap());
+    assert_eq!(route1.vlan_id, Some(5));
 
     let addr0 = &roundtrip.addresses[0];
     assert_eq!(addr0.address, "203.0.113.10/24".parse().unwrap());
diff --git a/openapi/bootstrap-agent.json b/openapi/bootstrap-agent.json
index 6dcf756737..0a954fff0d 100644
--- a/openapi/bootstrap-agent.json
+++ b/openapi/bootstrap-agent.json
@@ -908,6 +908,13 @@
             "description": "The nexthop/gateway address.",
             "type": "string",
             "format": "ip"
+          },
+          "vid": {
+            "nullable": true,
+            "description": "The VLAN ID the gateway is reachable over.",
+            "type": "integer",
+            "format": "uint16",
+            "minimum": 0
           }
         },
         "required": [
diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json
index 411c52ddff..1a3be03de1 100644
--- a/openapi/nexus-internal.json
+++ b/openapi/nexus-internal.json
@@ -4507,6 +4507,13 @@
             "description": "The nexthop/gateway address.",
             "type": "string",
             "format": "ip"
+          },
+          "vid": {
+            "nullable": true,
+            "description": "The VLAN ID the gateway is reachable over.",
+            "type": "integer",
+            "format": "uint16",
+            "minimum": 0
           }
         },
         "required": [
diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json
index 486662853c..ec070a3a0b 100644
--- a/openapi/sled-agent.json
+++ b/openapi/sled-agent.json
@@ -2669,6 +2669,13 @@
             "description": "The nexthop/gateway address.",
             "type": "string",
             "format": "ip"
+          },
+          "vid": {
+            "nullable": true,
+            "description": "The VLAN ID the gateway is reachable over.",
+            "type": "integer",
+            "format": "uint16",
+            "minimum": 0
           }
         },
         "required": [
diff --git a/openapi/wicketd.json b/openapi/wicketd.json
index 75db82e8e1..9fd05a9ca7 100644
--- a/openapi/wicketd.json
+++ b/openapi/wicketd.json
@@ -2493,6 +2493,13 @@
             "description": "The nexthop/gateway address.",
             "type": "string",
             "format": "ip"
+          },
+          "vid": {
+            "nullable": true,
+            "description": "The VLAN ID the gateway is reachable over.",
+            "type": "integer",
+            "format": "uint16",
+            "minimum": 0
           }
         },
         "required": [
diff --git a/schema/rss-sled-plan.json b/schema/rss-sled-plan.json
index 39a9a68acc..7ba74ffc0a 100644
--- a/schema/rss-sled-plan.json
+++ b/schema/rss-sled-plan.json
@@ -581,6 +581,15 @@
           "description": "The nexthop/gateway address.",
           "type": "string",
           "format": "ip"
+        },
+        "vid": {
+          "description": "The VLAN ID the gateway is reachable over.",
+          "type": [
+            "integer",
+            "null"
+          ],
+          "format": "uint16",
+          "minimum": 0.0
         }
       }
     },
diff --git a/sled-agent/src/bootstrap/early_networking.rs b/sled-agent/src/bootstrap/early_networking.rs
index 6c19080e9c..c539f6fdfd 100644
--- a/sled-agent/src/bootstrap/early_networking.rs
+++ b/sled-agent/src/bootstrap/early_networking.rs
@@ -509,7 +509,7 @@ impl<'a> EarlyNetworkSetup<'a> {
             {
                 dpd_port_settings.v4_routes.insert(
                     dst.to_string(),
-                    RouteSettingsV4 { link_id: link_id.0, nexthop, vid: None },
+                    RouteSettingsV4 { link_id: link_id.0, nexthop, vid: r.vid },
                 );
             }
             if let (IpNetwork::V6(dst), IpAddr::V6(nexthop)) =
@@ -517,7 +517,7 @@ impl<'a> EarlyNetworkSetup<'a> {
             {
                 dpd_port_settings.v6_routes.insert(
                     dst.to_string(),
-                    RouteSettingsV6 { link_id: link_id.0, nexthop, vid: None },
+                    RouteSettingsV6 { link_id: link_id.0, nexthop, vid: r.vid },
                 );
             }
         }
@@ -785,6 +785,65 @@ mod tests {
                         routes: vec![RouteConfig {
                             destination: "0.0.0.0/0".parse().unwrap(),
                             nexthop: uplink.gateway_ip.into(),
+                            vid: None,
+                        }],
+                        addresses: vec![uplink.uplink_cidr.into()],
+                        switch: uplink.switch,
+                        port: uplink.uplink_port,
+                        uplink_port_speed: uplink.uplink_port_speed,
+                        uplink_port_fec: uplink.uplink_port_fec,
+                        bgp_peers: vec![],
+                    }],
+                    bgp: vec![],
+                }),
+            },
+        };
+
+        assert_eq!(expected, v1);
+    }
+
+    #[test]
+    fn serialized_early_network_config_v0_to_v1_conversion_with_vid() {
+        let v0 = EarlyNetworkConfigV0 {
+            generation: 1,
+            rack_subnet: Ipv6Addr::UNSPECIFIED,
+            ntp_servers: Vec::new(),
+            rack_network_config: Some(RackNetworkConfigV0 {
+                infra_ip_first: Ipv4Addr::UNSPECIFIED,
+                infra_ip_last: Ipv4Addr::UNSPECIFIED,
+                uplinks: vec![UplinkConfig {
+                    gateway_ip: Ipv4Addr::UNSPECIFIED,
+                    switch: SwitchLocation::Switch0,
+                    uplink_port: "Port0".to_string(),
+                    uplink_port_speed: PortSpeed::Speed100G,
+                    uplink_port_fec: PortFec::None,
+                    uplink_cidr: "192.168.0.1/16".parse().unwrap(),
+                    uplink_vid: Some(10),
+                }],
+            }),
+        };
+
+        let v0_serialized = serde_json::to_vec(&v0).unwrap();
+        let bootstore_conf =
+            bootstore::NetworkConfig { generation: 1, blob: v0_serialized };
+
+        let v1 = EarlyNetworkConfig::try_from(bootstore_conf).unwrap();
+        let v0_rack_network_config = v0.rack_network_config.unwrap();
+        let uplink = v0_rack_network_config.uplinks[0].clone();
+        let expected = EarlyNetworkConfig {
+            generation: 1,
+            schema_version: 1,
+            body: EarlyNetworkConfigBody {
+                ntp_servers: v0.ntp_servers.clone(),
+                rack_network_config: Some(RackNetworkConfigV1 {
+                    rack_subnet: Ipv6Network::new(v0.rack_subnet, 56).unwrap(),
+                    infra_ip_first: v0_rack_network_config.infra_ip_first,
+                    infra_ip_last: v0_rack_network_config.infra_ip_last,
+                    ports: vec![PortConfigV1 {
+                        routes: vec![RouteConfig {
+                            destination: "0.0.0.0/0".parse().unwrap(),
+                            nexthop: uplink.gateway_ip.into(),
+                            vid: Some(10),
                         }],
                         addresses: vec![uplink.uplink_cidr.into()],
                         switch: uplink.switch,
diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs
index 7f6469d2c0..b54a8f0ba0 100644
--- a/sled-agent/src/rack_setup/service.rs
+++ b/sled-agent/src/rack_setup/service.rs
@@ -591,6 +591,7 @@ impl ServiceInner {
                                 .map(|r| NexusTypes::RouteConfig {
                                     destination: r.destination,
                                     nexthop: r.nexthop,
+                                    vid: r.vid,
                                 })
                                 .collect(),
                             addresses: config.addresses.clone(),
diff --git a/wicket/src/rack_setup/config_template.toml b/wicket/src/rack_setup/config_template.toml
index 617b61fadc..da07f42cd4 100644
--- a/wicket/src/rack_setup/config_template.toml
+++ b/wicket/src/rack_setup/config_template.toml
@@ -47,6 +47,9 @@ infra_ip_last = ""
 [[rack_network_config.ports]]
 # Routes associated with this port.
 # { nexthop = "1.2.3.4", destination = "0.0.0.0/0" }
+# Can also optionally specify a VLAN id if the next hop is reachable
+# over an 802.1Q tagged L2 segment.
+# { nexthop = "5.6.7.8", destination = "5.6.7.0/24", vid = 5 }
 routes = []
 
 # Addresses associated with this port.
diff --git a/wicket/src/rack_setup/config_toml.rs b/wicket/src/rack_setup/config_toml.rs
index e087c9aa7c..9616939308 100644
--- a/wicket/src/rack_setup/config_toml.rs
+++ b/wicket/src/rack_setup/config_toml.rs
@@ -245,6 +245,12 @@ fn populate_network_table(
                                 r.destination.to_string(),
                             )),
                         );
+                        if let Some(vid) = r.vid {
+                            route.insert(
+                                "vid",
+                                Value::Integer(Formatted::new(vid.into())),
+                            );
+                        }
                         routes.push(Value::InlineTable(route));
                     }
                     uplink.insert("routes", Item::Value(Value::Array(routes)));
@@ -379,6 +385,7 @@ mod tests {
                             .map(|r| InternalRouteConfig {
                                 destination: r.destination,
                                 nexthop: r.nexthop,
+                                vid: r.vid,
                             })
                             .collect(),
                         addresses: config.addresses.clone(),
@@ -478,10 +485,18 @@ mod tests {
                 infra_ip_last: "172.30.0.10".parse().unwrap(),
                 ports: vec![PortConfigV1 {
                     addresses: vec!["172.30.0.1/24".parse().unwrap()],
-                    routes: vec![RouteConfig {
-                        destination: "0.0.0.0/0".parse().unwrap(),
-                        nexthop: "172.30.0.10".parse().unwrap(),
-                    }],
+                    routes: vec![
+                        RouteConfig {
+                            destination: "0.0.0.0/0".parse().unwrap(),
+                            nexthop: "172.30.0.10".parse().unwrap(),
+                            vid: None,
+                        },
+                        RouteConfig {
+                            destination: "10.20.0.0/16".parse().unwrap(),
+                            nexthop: "10.0.0.20".parse().unwrap(),
+                            vid: Some(20),
+                        },
+                    ],
                     bgp_peers: vec![BgpPeerConfig {
                         asn: 47,
                         addr: "10.2.3.4".parse().unwrap(),
diff --git a/wicket/src/ui/panes/rack_setup.rs b/wicket/src/ui/panes/rack_setup.rs
index 086d01ce9d..36febe404f 100644
--- a/wicket/src/ui/panes/rack_setup.rs
+++ b/wicket/src/ui/panes/rack_setup.rs
@@ -718,7 +718,12 @@ fn rss_config_text<'a>(
                 vec![
                     Span::styled("  • Route     : ", label_style),
                     Span::styled(
-                        format!("{} -> {}", r.destination, r.nexthop),
+                        format!(
+                            "{} -> {} (vid={})",
+                            r.destination,
+                            r.nexthop,
+                            r.vid.unwrap_or(0),
+                        ),
                         ok_style,
                     ),
                 ]
diff --git a/wicketd/src/rss_config.rs b/wicketd/src/rss_config.rs
index a96acc56a0..d446ed71d1 100644
--- a/wicketd/src/rss_config.rs
+++ b/wicketd/src/rss_config.rs
@@ -511,6 +511,7 @@ fn validate_rack_network_config(
                     .map(|r| BaRouteConfig {
                         destination: r.destination,
                         nexthop: r.nexthop,
+                        vid: r.vid,
                     })
                     .collect(),
                 addresses: config.addresses.clone(),

From 4555620332d0c25f34141dbf069803834470963c Mon Sep 17 00:00:00 2001
From: Luqman Aden <luqman@oxide.computer>
Date: Mon, 23 Oct 2023 21:57:54 -0700
Subject: [PATCH 09/24] Revert "Re-introduce support for setting a VLAN for an
 uplink from RSS (#4319)" (#4323)

This reverts commit 35034f75d81821bf7f7901485143ff5f72608e9f.

https://github.com/oxidecomputer/omicron/pull/4319#issuecomment-1776439950:
Note that setting the VLAN id on routes is going away entirely. The implementation was too costly on the Tofino, and we had to remove it for another feature as it did not wind up getting used.
---
 common/src/api/internal/shared.rs             |  3 -
 nexus/src/app/rack.rs                         |  1 -
 .../app/sagas/switch_port_settings_apply.rs   |  6 +-
 nexus/tests/integration_tests/switch_port.rs  | 31 +++------
 openapi/bootstrap-agent.json                  |  7 ---
 openapi/nexus-internal.json                   |  7 ---
 openapi/sled-agent.json                       |  7 ---
 openapi/wicketd.json                          |  7 ---
 schema/rss-sled-plan.json                     |  9 ---
 sled-agent/src/bootstrap/early_networking.rs  | 63 +------------------
 sled-agent/src/rack_setup/service.rs          |  1 -
 wicket/src/rack_setup/config_template.toml    |  3 -
 wicket/src/rack_setup/config_toml.rs          | 23 ++-----
 wicket/src/ui/panes/rack_setup.rs             |  7 +--
 wicketd/src/rss_config.rs                     |  1 -
 15 files changed, 15 insertions(+), 161 deletions(-)

diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs
index 971dbbabbf..784da8fcc6 100644
--- a/common/src/api/internal/shared.rs
+++ b/common/src/api/internal/shared.rs
@@ -111,8 +111,6 @@ pub struct RouteConfig {
     pub destination: IpNetwork,
     /// The nexthop/gateway address.
     pub nexthop: IpAddr,
-    /// The VLAN ID the gateway is reachable over.
-    pub vid: Option<u16>,
 }
 
 #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)]
@@ -139,7 +137,6 @@ impl From<UplinkConfig> for PortConfigV1 {
             routes: vec![RouteConfig {
                 destination: "0.0.0.0/0".parse().unwrap(),
                 nexthop: value.gateway_ip.into(),
-                vid: value.uplink_vid,
             }],
             addresses: vec![value.uplink_cidr.into()],
             switch: value.switch,
diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs
index 21918d2687..907c3ffa78 100644
--- a/nexus/src/app/rack.rs
+++ b/nexus/src/app/rack.rs
@@ -663,7 +663,6 @@ impl super::Nexus {
                     .map(|r| SledRouteConfig {
                         destination: r.dst,
                         nexthop: r.gw.ip(),
-                        vid: r.vid.map(Into::into),
                     })
                     .collect(),
                 addresses: info.addresses.iter().map(|a| a.address).collect(),
diff --git a/nexus/src/app/sagas/switch_port_settings_apply.rs b/nexus/src/app/sagas/switch_port_settings_apply.rs
index 8442080979..93dc45751a 100644
--- a/nexus/src/app/sagas/switch_port_settings_apply.rs
+++ b/nexus/src/app/sagas/switch_port_settings_apply.rs
@@ -911,11 +911,7 @@ pub(crate) async fn bootstore_update(
             routes: settings
                 .routes
                 .iter()
-                .map(|r| RouteConfig {
-                    destination: r.dst,
-                    nexthop: r.gw.ip(),
-                    vid: r.vid.map(Into::into),
-                })
+                .map(|r| RouteConfig { destination: r.dst, nexthop: r.gw.ip() })
                 .collect(),
             addresses: settings.addresses.iter().map(|a| a.address).collect(),
             switch: switch_location,
diff --git a/nexus/tests/integration_tests/switch_port.rs b/nexus/tests/integration_tests/switch_port.rs
index d4fd10f819..fada45694d 100644
--- a/nexus/tests/integration_tests/switch_port.rs
+++ b/nexus/tests/integration_tests/switch_port.rs
@@ -128,18 +128,11 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) {
     settings.routes.insert(
         "phy0".into(),
         RouteConfig {
-            routes: vec![
-                Route {
-                    dst: "1.2.3.0/24".parse().unwrap(),
-                    gw: "1.2.3.4".parse().unwrap(),
-                    vid: None,
-                },
-                Route {
-                    dst: "5.6.7.0/24".parse().unwrap(),
-                    gw: "5.6.7.8".parse().unwrap(),
-                    vid: Some(5),
-                },
-            ],
+            routes: vec![Route {
+                dst: "1.2.3.0/24".parse().unwrap(),
+                gw: "1.2.3.4".parse().unwrap(),
+                vid: None,
+            }],
         },
     );
     // addresses
@@ -166,7 +159,7 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) {
     .unwrap();
 
     assert_eq!(created.links.len(), 1);
-    assert_eq!(created.routes.len(), 2);
+    assert_eq!(created.routes.len(), 1);
     assert_eq!(created.addresses.len(), 1);
 
     let link0 = &created.links[0];
@@ -185,11 +178,6 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) {
     let route0 = &created.routes[0];
     assert_eq!(route0.dst, "1.2.3.0/24".parse().unwrap());
     assert_eq!(route0.gw, "1.2.3.4".parse().unwrap());
-    assert_eq!(route0.vlan_id, None);
-    let route1 = &created.routes[1];
-    assert_eq!(route1.dst, "5.6.7.0/24".parse().unwrap());
-    assert_eq!(route1.gw, "5.6.7.8".parse().unwrap());
-    assert_eq!(route1.vlan_id, Some(5));
 
     let addr0 = &created.addresses[0];
     assert_eq!(addr0.address, "203.0.113.10/24".parse().unwrap());
@@ -207,7 +195,7 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) {
     .unwrap();
 
     assert_eq!(roundtrip.links.len(), 1);
-    assert_eq!(roundtrip.routes.len(), 2);
+    assert_eq!(roundtrip.routes.len(), 1);
     assert_eq!(roundtrip.addresses.len(), 1);
 
     let link0 = &roundtrip.links[0];
@@ -226,11 +214,6 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) {
     let route0 = &roundtrip.routes[0];
     assert_eq!(route0.dst, "1.2.3.0/24".parse().unwrap());
     assert_eq!(route0.gw, "1.2.3.4".parse().unwrap());
-    assert_eq!(route0.vlan_id, None);
-    let route1 = &roundtrip.routes[1];
-    assert_eq!(route1.dst, "5.6.7.0/24".parse().unwrap());
-    assert_eq!(route1.gw, "5.6.7.8".parse().unwrap());
-    assert_eq!(route1.vlan_id, Some(5));
 
     let addr0 = &roundtrip.addresses[0];
     assert_eq!(addr0.address, "203.0.113.10/24".parse().unwrap());
diff --git a/openapi/bootstrap-agent.json b/openapi/bootstrap-agent.json
index 0a954fff0d..6dcf756737 100644
--- a/openapi/bootstrap-agent.json
+++ b/openapi/bootstrap-agent.json
@@ -908,13 +908,6 @@
             "description": "The nexthop/gateway address.",
             "type": "string",
             "format": "ip"
-          },
-          "vid": {
-            "nullable": true,
-            "description": "The VLAN ID the gateway is reachable over.",
-            "type": "integer",
-            "format": "uint16",
-            "minimum": 0
           }
         },
         "required": [
diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json
index 1a3be03de1..411c52ddff 100644
--- a/openapi/nexus-internal.json
+++ b/openapi/nexus-internal.json
@@ -4507,13 +4507,6 @@
             "description": "The nexthop/gateway address.",
             "type": "string",
             "format": "ip"
-          },
-          "vid": {
-            "nullable": true,
-            "description": "The VLAN ID the gateway is reachable over.",
-            "type": "integer",
-            "format": "uint16",
-            "minimum": 0
           }
         },
         "required": [
diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json
index ec070a3a0b..486662853c 100644
--- a/openapi/sled-agent.json
+++ b/openapi/sled-agent.json
@@ -2669,13 +2669,6 @@
             "description": "The nexthop/gateway address.",
             "type": "string",
             "format": "ip"
-          },
-          "vid": {
-            "nullable": true,
-            "description": "The VLAN ID the gateway is reachable over.",
-            "type": "integer",
-            "format": "uint16",
-            "minimum": 0
           }
         },
         "required": [
diff --git a/openapi/wicketd.json b/openapi/wicketd.json
index 9fd05a9ca7..75db82e8e1 100644
--- a/openapi/wicketd.json
+++ b/openapi/wicketd.json
@@ -2493,13 +2493,6 @@
             "description": "The nexthop/gateway address.",
             "type": "string",
             "format": "ip"
-          },
-          "vid": {
-            "nullable": true,
-            "description": "The VLAN ID the gateway is reachable over.",
-            "type": "integer",
-            "format": "uint16",
-            "minimum": 0
           }
         },
         "required": [
diff --git a/schema/rss-sled-plan.json b/schema/rss-sled-plan.json
index 7ba74ffc0a..39a9a68acc 100644
--- a/schema/rss-sled-plan.json
+++ b/schema/rss-sled-plan.json
@@ -581,15 +581,6 @@
           "description": "The nexthop/gateway address.",
           "type": "string",
           "format": "ip"
-        },
-        "vid": {
-          "description": "The VLAN ID the gateway is reachable over.",
-          "type": [
-            "integer",
-            "null"
-          ],
-          "format": "uint16",
-          "minimum": 0.0
         }
       }
     },
diff --git a/sled-agent/src/bootstrap/early_networking.rs b/sled-agent/src/bootstrap/early_networking.rs
index c539f6fdfd..6c19080e9c 100644
--- a/sled-agent/src/bootstrap/early_networking.rs
+++ b/sled-agent/src/bootstrap/early_networking.rs
@@ -509,7 +509,7 @@ impl<'a> EarlyNetworkSetup<'a> {
             {
                 dpd_port_settings.v4_routes.insert(
                     dst.to_string(),
-                    RouteSettingsV4 { link_id: link_id.0, nexthop, vid: r.vid },
+                    RouteSettingsV4 { link_id: link_id.0, nexthop, vid: None },
                 );
             }
             if let (IpNetwork::V6(dst), IpAddr::V6(nexthop)) =
@@ -517,7 +517,7 @@ impl<'a> EarlyNetworkSetup<'a> {
             {
                 dpd_port_settings.v6_routes.insert(
                     dst.to_string(),
-                    RouteSettingsV6 { link_id: link_id.0, nexthop, vid: r.vid },
+                    RouteSettingsV6 { link_id: link_id.0, nexthop, vid: None },
                 );
             }
         }
@@ -785,65 +785,6 @@ mod tests {
                         routes: vec![RouteConfig {
                             destination: "0.0.0.0/0".parse().unwrap(),
                             nexthop: uplink.gateway_ip.into(),
-                            vid: None,
-                        }],
-                        addresses: vec![uplink.uplink_cidr.into()],
-                        switch: uplink.switch,
-                        port: uplink.uplink_port,
-                        uplink_port_speed: uplink.uplink_port_speed,
-                        uplink_port_fec: uplink.uplink_port_fec,
-                        bgp_peers: vec![],
-                    }],
-                    bgp: vec![],
-                }),
-            },
-        };
-
-        assert_eq!(expected, v1);
-    }
-
-    #[test]
-    fn serialized_early_network_config_v0_to_v1_conversion_with_vid() {
-        let v0 = EarlyNetworkConfigV0 {
-            generation: 1,
-            rack_subnet: Ipv6Addr::UNSPECIFIED,
-            ntp_servers: Vec::new(),
-            rack_network_config: Some(RackNetworkConfigV0 {
-                infra_ip_first: Ipv4Addr::UNSPECIFIED,
-                infra_ip_last: Ipv4Addr::UNSPECIFIED,
-                uplinks: vec![UplinkConfig {
-                    gateway_ip: Ipv4Addr::UNSPECIFIED,
-                    switch: SwitchLocation::Switch0,
-                    uplink_port: "Port0".to_string(),
-                    uplink_port_speed: PortSpeed::Speed100G,
-                    uplink_port_fec: PortFec::None,
-                    uplink_cidr: "192.168.0.1/16".parse().unwrap(),
-                    uplink_vid: Some(10),
-                }],
-            }),
-        };
-
-        let v0_serialized = serde_json::to_vec(&v0).unwrap();
-        let bootstore_conf =
-            bootstore::NetworkConfig { generation: 1, blob: v0_serialized };
-
-        let v1 = EarlyNetworkConfig::try_from(bootstore_conf).unwrap();
-        let v0_rack_network_config = v0.rack_network_config.unwrap();
-        let uplink = v0_rack_network_config.uplinks[0].clone();
-        let expected = EarlyNetworkConfig {
-            generation: 1,
-            schema_version: 1,
-            body: EarlyNetworkConfigBody {
-                ntp_servers: v0.ntp_servers.clone(),
-                rack_network_config: Some(RackNetworkConfigV1 {
-                    rack_subnet: Ipv6Network::new(v0.rack_subnet, 56).unwrap(),
-                    infra_ip_first: v0_rack_network_config.infra_ip_first,
-                    infra_ip_last: v0_rack_network_config.infra_ip_last,
-                    ports: vec![PortConfigV1 {
-                        routes: vec![RouteConfig {
-                            destination: "0.0.0.0/0".parse().unwrap(),
-                            nexthop: uplink.gateway_ip.into(),
-                            vid: Some(10),
                         }],
                         addresses: vec![uplink.uplink_cidr.into()],
                         switch: uplink.switch,
diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs
index b54a8f0ba0..7f6469d2c0 100644
--- a/sled-agent/src/rack_setup/service.rs
+++ b/sled-agent/src/rack_setup/service.rs
@@ -591,7 +591,6 @@ impl ServiceInner {
                                 .map(|r| NexusTypes::RouteConfig {
                                     destination: r.destination,
                                     nexthop: r.nexthop,
-                                    vid: r.vid,
                                 })
                                 .collect(),
                             addresses: config.addresses.clone(),
diff --git a/wicket/src/rack_setup/config_template.toml b/wicket/src/rack_setup/config_template.toml
index da07f42cd4..617b61fadc 100644
--- a/wicket/src/rack_setup/config_template.toml
+++ b/wicket/src/rack_setup/config_template.toml
@@ -47,9 +47,6 @@ infra_ip_last = ""
 [[rack_network_config.ports]]
 # Routes associated with this port.
 # { nexthop = "1.2.3.4", destination = "0.0.0.0/0" }
-# Can also optionally specify a VLAN id if the next hop is reachable
-# over an 802.1Q tagged L2 segment.
-# { nexthop = "5.6.7.8", destination = "5.6.7.0/24", vid = 5 }
 routes = []
 
 # Addresses associated with this port.
diff --git a/wicket/src/rack_setup/config_toml.rs b/wicket/src/rack_setup/config_toml.rs
index 9616939308..e087c9aa7c 100644
--- a/wicket/src/rack_setup/config_toml.rs
+++ b/wicket/src/rack_setup/config_toml.rs
@@ -245,12 +245,6 @@ fn populate_network_table(
                                 r.destination.to_string(),
                             )),
                         );
-                        if let Some(vid) = r.vid {
-                            route.insert(
-                                "vid",
-                                Value::Integer(Formatted::new(vid.into())),
-                            );
-                        }
                         routes.push(Value::InlineTable(route));
                     }
                     uplink.insert("routes", Item::Value(Value::Array(routes)));
@@ -385,7 +379,6 @@ mod tests {
                             .map(|r| InternalRouteConfig {
                                 destination: r.destination,
                                 nexthop: r.nexthop,
-                                vid: r.vid,
                             })
                             .collect(),
                         addresses: config.addresses.clone(),
@@ -485,18 +478,10 @@ mod tests {
                 infra_ip_last: "172.30.0.10".parse().unwrap(),
                 ports: vec![PortConfigV1 {
                     addresses: vec!["172.30.0.1/24".parse().unwrap()],
-                    routes: vec![
-                        RouteConfig {
-                            destination: "0.0.0.0/0".parse().unwrap(),
-                            nexthop: "172.30.0.10".parse().unwrap(),
-                            vid: None,
-                        },
-                        RouteConfig {
-                            destination: "10.20.0.0/16".parse().unwrap(),
-                            nexthop: "10.0.0.20".parse().unwrap(),
-                            vid: Some(20),
-                        },
-                    ],
+                    routes: vec![RouteConfig {
+                        destination: "0.0.0.0/0".parse().unwrap(),
+                        nexthop: "172.30.0.10".parse().unwrap(),
+                    }],
                     bgp_peers: vec![BgpPeerConfig {
                         asn: 47,
                         addr: "10.2.3.4".parse().unwrap(),
diff --git a/wicket/src/ui/panes/rack_setup.rs b/wicket/src/ui/panes/rack_setup.rs
index 36febe404f..086d01ce9d 100644
--- a/wicket/src/ui/panes/rack_setup.rs
+++ b/wicket/src/ui/panes/rack_setup.rs
@@ -718,12 +718,7 @@ fn rss_config_text<'a>(
                 vec![
                     Span::styled("  • Route     : ", label_style),
                     Span::styled(
-                        format!(
-                            "{} -> {} (vid={})",
-                            r.destination,
-                            r.nexthop,
-                            r.vid.unwrap_or(0),
-                        ),
+                        format!("{} -> {}", r.destination, r.nexthop),
                         ok_style,
                     ),
                 ]
diff --git a/wicketd/src/rss_config.rs b/wicketd/src/rss_config.rs
index d446ed71d1..a96acc56a0 100644
--- a/wicketd/src/rss_config.rs
+++ b/wicketd/src/rss_config.rs
@@ -511,7 +511,6 @@ fn validate_rack_network_config(
                     .map(|r| BaRouteConfig {
                         destination: r.destination,
                         nexthop: r.nexthop,
-                        vid: r.vid,
                     })
                     .collect(),
                 addresses: config.addresses.clone(),

From 0f51441b8fc5264ee6ab7f9118722b18e4a1198f Mon Sep 17 00:00:00 2001
From: Alan Hanson <alan@oxidecomputer.com>
Date: Mon, 23 Oct 2023 22:30:07 -0700
Subject: [PATCH 10/24] bump version to 1.0.3 in package.sh (#4324)

dogfood wants a 1.0.3

Co-authored-by: Alan Hanson <alan@oxide.computer>
---
 .github/buildomat/jobs/package.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/buildomat/jobs/package.sh b/.github/buildomat/jobs/package.sh
index c1cb04124d..5cfb6fcfd7 100755
--- a/.github/buildomat/jobs/package.sh
+++ b/.github/buildomat/jobs/package.sh
@@ -37,7 +37,7 @@ rustc --version
 # trampoline global zone images.
 #
 COMMIT=$(git rev-parse HEAD)
-VERSION="1.0.2-0.ci+git${COMMIT:0:11}"
+VERSION="1.0.3-0.ci+git${COMMIT:0:11}"
 echo "$VERSION" >/work/version.txt
 
 ptime -m ./tools/install_builder_prerequisites.sh -yp

From 40d0981d60afc389f109e0a6cc6eddb8e5c76aa3 Mon Sep 17 00:00:00 2001
From: "oxide-renovate[bot]"
 <146848827+oxide-renovate[bot]@users.noreply.github.com>
Date: Tue, 24 Oct 2023 10:21:52 -0700
Subject: [PATCH 11/24] chore(deps): update rust crate semver to 1.0.20 (#4242)

---
 Cargo.lock                | 30 +++++++++++++++---------------
 Cargo.toml                |  2 +-
 workspace-hack/Cargo.toml |  4 ++--
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 7dbaa3e008..84d908140a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -866,7 +866,7 @@ checksum = "fb9ac64500cc83ce4b9f8dafa78186aa008c8dea77a09b94cd307fd0cd5022a8"
 dependencies = [
  "camino",
  "cargo-platform",
- "semver 1.0.18",
+ "semver 1.0.20",
  "serde",
  "serde_json",
  "thiserror",
@@ -4388,7 +4388,7 @@ dependencies = [
  "rand 0.8.5",
  "ref-cast",
  "schemars",
- "semver 1.0.18",
+ "semver 1.0.20",
  "serde",
  "serde_json",
  "sled-agent-client",
@@ -4911,7 +4911,7 @@ dependencies = [
  "reqwest",
  "ring",
  "schemars",
- "semver 1.0.18",
+ "semver 1.0.20",
  "serde",
  "serde_derive",
  "serde_human_bytes",
@@ -4953,7 +4953,7 @@ dependencies = [
  "reqwest",
  "ring",
  "schemars",
- "semver 1.0.18",
+ "semver 1.0.20",
  "serde",
  "serde_derive",
  "serde_human_bytes",
@@ -5149,7 +5149,7 @@ dependencies = [
  "rustls",
  "samael",
  "schemars",
- "semver 1.0.18",
+ "semver 1.0.20",
  "serde",
  "serde_json",
  "serde_urlencoded",
@@ -5237,7 +5237,7 @@ dependencies = [
  "rayon",
  "reqwest",
  "ring",
- "semver 1.0.18",
+ "semver 1.0.20",
  "serde",
  "serde_derive",
  "sled-hardware",
@@ -5350,7 +5350,7 @@ dependencies = [
  "rcgen",
  "reqwest",
  "schemars",
- "semver 1.0.18",
+ "semver 1.0.20",
  "serde",
  "serde_json",
  "serial_test",
@@ -5483,7 +5483,7 @@ dependencies = [
  "ring",
  "rustix 0.38.9",
  "schemars",
- "semver 1.0.18",
+ "semver 1.0.20",
  "serde",
  "serde_json",
  "sha2",
@@ -5529,7 +5529,7 @@ dependencies = [
  "flate2",
  "futures-util",
  "reqwest",
- "semver 1.0.18",
+ "semver 1.0.20",
  "serde",
  "serde_derive",
  "tar",
@@ -7466,7 +7466,7 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366"
 dependencies = [
- "semver 1.0.18",
+ "semver 1.0.20",
 ]
 
 [[package]]
@@ -7750,9 +7750,9 @@ dependencies = [
 
 [[package]]
 name = "semver"
-version = "1.0.18"
+version = "1.0.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918"
+checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090"
 dependencies = [
  "serde",
 ]
@@ -9505,8 +9505,8 @@ version = "1.6.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675"
 dependencies = [
- "cfg-if 0.1.10",
- "rand 0.4.6",
+ "cfg-if 1.0.0",
+ "rand 0.8.5",
  "static_assertions",
 ]
 
@@ -10047,7 +10047,7 @@ dependencies = [
  "ratatui",
  "reqwest",
  "rpassword",
- "semver 1.0.18",
+ "semver 1.0.20",
  "serde",
  "serde_json",
  "shell-words",
diff --git a/Cargo.toml b/Cargo.toml
index 58d57d15b8..d006d9524a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -305,7 +305,7 @@ rustls = "0.21.7"
 samael = { git = "https://github.com/njaremko/samael", features = ["xmlsec"], branch = "master" }
 schemars = "0.8.12"
 secrecy = "0.8.0"
-semver = { version = "1.0.18", features = ["std", "serde"] }
+semver = { version = "1.0.20", features = ["std", "serde"] }
 serde = { version = "1.0", default-features = false, features = [ "derive" ] }
 serde_derive = "1.0"
 serde_human_bytes = { git = "http://github.com/oxidecomputer/serde_human_bytes", branch = "main" }
diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml
index 45c6419d2a..1c3a0b0c73 100644
--- a/workspace-hack/Cargo.toml
+++ b/workspace-hack/Cargo.toml
@@ -77,7 +77,7 @@ regex-syntax = { version = "0.7.5" }
 reqwest = { version = "0.11.20", features = ["blocking", "json", "rustls-tls", "stream"] }
 ring = { version = "0.16.20", features = ["std"] }
 schemars = { version = "0.8.13", features = ["bytes", "chrono", "uuid1"] }
-semver = { version = "1.0.18", features = ["serde"] }
+semver = { version = "1.0.20", features = ["serde"] }
 serde = { version = "1.0.188", features = ["alloc", "derive", "rc"] }
 serde_json = { version = "1.0.107", features = ["raw_value"] }
 sha2 = { version = "0.10.7", features = ["oid"] }
@@ -170,7 +170,7 @@ regex-syntax = { version = "0.7.5" }
 reqwest = { version = "0.11.20", features = ["blocking", "json", "rustls-tls", "stream"] }
 ring = { version = "0.16.20", features = ["std"] }
 schemars = { version = "0.8.13", features = ["bytes", "chrono", "uuid1"] }
-semver = { version = "1.0.18", features = ["serde"] }
+semver = { version = "1.0.20", features = ["serde"] }
 serde = { version = "1.0.188", features = ["alloc", "derive", "rc"] }
 serde_json = { version = "1.0.107", features = ["raw_value"] }
 sha2 = { version = "0.10.7", features = ["oid"] }

From 672883e3282aceb91166d9497d6301b20e2c1719 Mon Sep 17 00:00:00 2001
From: "oxide-renovate[bot]"
 <146848827+oxide-renovate[bot]@users.noreply.github.com>
Date: Tue, 24 Oct 2023 10:22:18 -0700
Subject: [PATCH 12/24] chore(deps): update rust crate tokio-util to 0.7.9
 (#4268)

---
 Cargo.lock | 4 ++--
 Cargo.toml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 84d908140a..395eeb1e79 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -9119,9 +9119,9 @@ dependencies = [
 
 [[package]]
 name = "tokio-util"
-version = "0.7.8"
+version = "0.7.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "806fe8c2c87eccc8b3267cbae29ed3ab2d0bd37fca70ab622e46aaa9375ddb7d"
+checksum = "1d68074620f57a0b21594d9735eb2e98ab38b17f80d3fcb189fca266771ca60d"
 dependencies = [
  "bytes",
  "futures-core",
diff --git a/Cargo.toml b/Cargo.toml
index d006d9524a..0b1dee821e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -358,7 +358,7 @@ tokio = "1.33.0"
 tokio-postgres = { version = "0.7", features = [ "with-chrono-0_4", "with-uuid-1" ] }
 tokio-stream = "0.1.14"
 tokio-tungstenite = "0.18"
-tokio-util = "0.7.8"
+tokio-util = "0.7.9"
 toml = "0.7.8"
 toml_edit = "0.19.15"
 topological-sort = "0.2.2"

From b6ec68f38873198cd6e9e8638d306a98f6d882d5 Mon Sep 17 00:00:00 2001
From: David Crespo <david-crespo@users.noreply.github.com>
Date: Tue, 24 Oct 2023 12:22:40 -0500
Subject: [PATCH 13/24] [nexus] [minor] Bring BGP endpoint doc comments in line
 with conventions (#4329)

I noticed there were periods in the docs sidebar. Also used the word
"list" as appropriate.

<img
src="https://github.com/oxidecomputer/omicron/assets/3612203/daf2e0b5-b6af-4647-ba45-394d935197b7"
width="200">
---
 nexus/src/external_api/http_entrypoints.rs | 16 ++++++++--------
 openapi/nexus.json                         | 16 ++++++++--------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs
index 990704904a..48be2de6b0 100644
--- a/nexus/src/external_api/http_entrypoints.rs
+++ b/nexus/src/external_api/http_entrypoints.rs
@@ -2609,7 +2609,7 @@ async fn networking_loopback_address_delete(
     apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await
 }
 
-/// Get loopback addresses, optionally filtering by id
+/// List loopback addresses
 #[endpoint {
     method = GET,
     path = "/v1/system/networking/loopback-address",
@@ -2824,7 +2824,7 @@ async fn networking_switch_port_clear_settings(
     apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await
 }
 
-/// Create a new BGP configuration.
+/// Create a new BGP configuration
 #[endpoint {
     method = POST,
     path = "/v1/system/networking/bgp",
@@ -2845,7 +2845,7 @@ async fn networking_bgp_config_create(
     apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await
 }
 
-/// Get BGP configurations.
+/// List BGP configurations
 #[endpoint {
     method = GET,
     path = "/v1/system/networking/bgp",
@@ -2900,7 +2900,7 @@ async fn networking_bgp_status(
 }
 
 //TODO pagination? the normal by-name/by-id stuff does not work here
-/// Get imported IPv4 BGP routes.
+/// Get imported IPv4 BGP routes
 #[endpoint {
     method = GET,
     path = "/v1/system/networking/bgp-routes-ipv4",
@@ -2921,7 +2921,7 @@ async fn networking_bgp_imported_routes_ipv4(
     apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await
 }
 
-/// Delete a BGP configuration.
+/// Delete a BGP configuration
 #[endpoint {
     method = DELETE,
     path = "/v1/system/networking/bgp",
@@ -2942,7 +2942,7 @@ async fn networking_bgp_config_delete(
     apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await
 }
 
-/// Create a new BGP announce set.
+/// Create a new BGP announce set
 #[endpoint {
     method = POST,
     path = "/v1/system/networking/bgp-announce",
@@ -2964,7 +2964,7 @@ async fn networking_bgp_announce_set_create(
 }
 
 //TODO pagination? the normal by-name/by-id stuff does not work here
-/// Get originated routes for a given BGP configuration.
+/// Get originated routes for a BGP configuration
 #[endpoint {
     method = GET,
     path = "/v1/system/networking/bgp-announce",
@@ -2990,7 +2990,7 @@ async fn networking_bgp_announce_set_list(
     apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await
 }
 
-/// Delete a BGP announce set.
+/// Delete a BGP announce set
 #[endpoint {
     method = DELETE,
     path = "/v1/system/networking/bgp-announce",
diff --git a/openapi/nexus.json b/openapi/nexus.json
index 456f2aebd6..f1bfa4351f 100644
--- a/openapi/nexus.json
+++ b/openapi/nexus.json
@@ -5170,7 +5170,7 @@
         "tags": [
           "system/networking"
         ],
-        "summary": "Get BGP configurations.",
+        "summary": "List BGP configurations",
         "operationId": "networking_bgp_config_list",
         "parameters": [
           {
@@ -5235,7 +5235,7 @@
         "tags": [
           "system/networking"
         ],
-        "summary": "Create a new BGP configuration.",
+        "summary": "Create a new BGP configuration",
         "operationId": "networking_bgp_config_create",
         "requestBody": {
           "content": {
@@ -5270,7 +5270,7 @@
         "tags": [
           "system/networking"
         ],
-        "summary": "Delete a BGP configuration.",
+        "summary": "Delete a BGP configuration",
         "operationId": "networking_bgp_config_delete",
         "parameters": [
           {
@@ -5301,7 +5301,7 @@
         "tags": [
           "system/networking"
         ],
-        "summary": "Get originated routes for a given BGP configuration.",
+        "summary": "Get originated routes for a BGP configuration",
         "operationId": "networking_bgp_announce_set_list",
         "parameters": [
           {
@@ -5341,7 +5341,7 @@
         "tags": [
           "system/networking"
         ],
-        "summary": "Create a new BGP announce set.",
+        "summary": "Create a new BGP announce set",
         "operationId": "networking_bgp_announce_set_create",
         "requestBody": {
           "content": {
@@ -5376,7 +5376,7 @@
         "tags": [
           "system/networking"
         ],
-        "summary": "Delete a BGP announce set.",
+        "summary": "Delete a BGP announce set",
         "operationId": "networking_bgp_announce_set_delete",
         "parameters": [
           {
@@ -5407,7 +5407,7 @@
         "tags": [
           "system/networking"
         ],
-        "summary": "Get imported IPv4 BGP routes.",
+        "summary": "Get imported IPv4 BGP routes",
         "operationId": "networking_bgp_imported_routes_ipv4",
         "parameters": [
           {
@@ -5482,7 +5482,7 @@
         "tags": [
           "system/networking"
         ],
-        "summary": "Get loopback addresses, optionally filtering by id",
+        "summary": "List loopback addresses",
         "operationId": "networking_loopback_address_list",
         "parameters": [
           {

From a0fccaff7f36a64d8c270896720fb75027c73ca8 Mon Sep 17 00:00:00 2001
From: "oxide-renovate[bot]"
 <146848827+oxide-renovate[bot]@users.noreply.github.com>
Date: Tue, 24 Oct 2023 10:22:50 -0700
Subject: [PATCH 14/24] chore(deps): update rust crate flate2 to 1.0.28 (#4280)

Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com>
---
 Cargo.lock                | 4 ++--
 Cargo.toml                | 2 +-
 workspace-hack/Cargo.toml | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 395eeb1e79..4429a5031f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2520,9 +2520,9 @@ checksum = "cda653ca797810c02f7ca4b804b40b8b95ae046eb989d356bce17919a8c25499"
 
 [[package]]
 name = "flate2"
-version = "1.0.27"
+version = "1.0.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c6c98ee8095e9d1dcbf2fcc6d95acccb90d1c81db1e44725c6a984b1dbdfb010"
+checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e"
 dependencies = [
  "crc32fast",
  "miniz_oxide",
diff --git a/Cargo.toml b/Cargo.toml
index 0b1dee821e..64f4d5af61 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -187,7 +187,7 @@ either = "1.9.0"
 expectorate = "1.1.0"
 fatfs = "0.3.6"
 filetime = "0.2.22"
-flate2 = "1.0.27"
+flate2 = "1.0.28"
 flume = "0.11.0"
 foreign-types = "0.3.2"
 fs-err = "2.9.0"
diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml
index 1c3a0b0c73..28aa7a6110 100644
--- a/workspace-hack/Cargo.toml
+++ b/workspace-hack/Cargo.toml
@@ -35,7 +35,7 @@ crypto-common = { version = "0.1.6", default-features = false, features = ["getr
 diesel = { version = "2.1.1", features = ["chrono", "i-implement-a-third-party-backend-and-opt-into-breaking-changes", "network-address", "postgres", "r2d2", "serde_json", "uuid"] }
 digest = { version = "0.10.7", features = ["mac", "oid", "std"] }
 either = { version = "1.9.0" }
-flate2 = { version = "1.0.27" }
+flate2 = { version = "1.0.28" }
 futures = { version = "0.3.28" }
 futures-channel = { version = "0.3.28", features = ["sink"] }
 futures-core = { version = "0.3.28" }
@@ -128,7 +128,7 @@ crypto-common = { version = "0.1.6", default-features = false, features = ["getr
 diesel = { version = "2.1.1", features = ["chrono", "i-implement-a-third-party-backend-and-opt-into-breaking-changes", "network-address", "postgres", "r2d2", "serde_json", "uuid"] }
 digest = { version = "0.10.7", features = ["mac", "oid", "std"] }
 either = { version = "1.9.0" }
-flate2 = { version = "1.0.27" }
+flate2 = { version = "1.0.28" }
 futures = { version = "0.3.28" }
 futures-channel = { version = "0.3.28", features = ["sink"] }
 futures-core = { version = "0.3.28" }

From e14b7379089e6b7b6fafe43306ff8c1ba5b0b45e Mon Sep 17 00:00:00 2001
From: "oxide-renovate[bot]"
 <146848827+oxide-renovate[bot]@users.noreply.github.com>
Date: Tue, 24 Oct 2023 10:24:56 -0700
Subject: [PATCH 15/24] chore(deps): update rust crate bcs to 0.1.6 (#4279)

---
 Cargo.lock | 4 ++--
 Cargo.toml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 4429a5031f..dd4fa4d69b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -484,9 +484,9 @@ dependencies = [
 
 [[package]]
 name = "bcs"
-version = "0.1.5"
+version = "0.1.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4bd3ffe8b19a604421a5d461d4a70346223e535903fbc3067138bddbebddcf77"
+checksum = "85b6598a2f5d564fb7855dc6b06fd1c38cff5a72bd8b863a4d021938497b440a"
 dependencies = [
  "serde",
  "thiserror",
diff --git a/Cargo.toml b/Cargo.toml
index 64f4d5af61..62d6712d7d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -145,7 +145,7 @@ authz-macros = { path = "nexus/authz-macros" }
 backoff = { version = "0.4.0", features = [ "tokio" ] }
 base64 = "0.21.4"
 bb8 = "0.8.1"
-bcs = "0.1.5"
+bcs = "0.1.6"
 bincode = "1.3.3"
 bootstore = { path = "bootstore" }
 bootstrap-agent-client = { path = "clients/bootstrap-agent-client" }

From c3c968d362f4a8da8ca32220db297727f3f8b7e0 Mon Sep 17 00:00:00 2001
From: "oxide-renovate[bot]"
 <146848827+oxide-renovate[bot]@users.noreply.github.com>
Date: Tue, 24 Oct 2023 10:29:15 -0700
Subject: [PATCH 16/24] chore(deps): update rust crate openssl-probe to 0.1.5
 (#4253)

Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com>
---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 62d6712d7d..7eb0373d6c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -259,7 +259,7 @@ openapiv3 = "1.0"
 # must match samael's crate!
 openssl = "0.10"
 openssl-sys = "0.9"
-openssl-probe = "0.1.2"
+openssl-probe = "0.1.5"
 opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "258a8b59902dd36fc7ee5425e6b1fb5fc80d4649" }
 oso = "0.26"
 owo-colors = "3.5.0"

From 35a7f447c8be0b867f2bd923835d87c541b3f809 Mon Sep 17 00:00:00 2001
From: "oxide-renovate[bot]"
 <146848827+oxide-renovate[bot]@users.noreply.github.com>
Date: Tue, 24 Oct 2023 10:30:23 -0700
Subject: [PATCH 17/24] chore(deps): update rust crate sha2 to 0.10.8 (#4267)

---
 Cargo.lock                | 4 ++--
 Cargo.toml                | 2 +-
 workspace-hack/Cargo.toml | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index dd4fa4d69b..191db4715d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -8008,9 +8008,9 @@ dependencies = [
 
 [[package]]
 name = "sha2"
-version = "0.10.7"
+version = "0.10.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "479fb9d862239e610720565ca91403019f2f00410f1864c5aa7479b950a76ed8"
+checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8"
 dependencies = [
  "cfg-if 1.0.0",
  "cpufeatures",
diff --git a/Cargo.toml b/Cargo.toml
index 7eb0373d6c..73f89d3bdf 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -315,7 +315,7 @@ serde_tokenstream = "0.2"
 serde_urlencoded = "0.7.1"
 serde_with = "2.3.3"
 serial_test = "0.10"
-sha2 = "0.10.7"
+sha2 = "0.10.8"
 sha3 = "0.10.8"
 shell-words = "1.1.0"
 signal-hook = "0.3"
diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml
index 28aa7a6110..884fa14431 100644
--- a/workspace-hack/Cargo.toml
+++ b/workspace-hack/Cargo.toml
@@ -80,7 +80,7 @@ schemars = { version = "0.8.13", features = ["bytes", "chrono", "uuid1"] }
 semver = { version = "1.0.20", features = ["serde"] }
 serde = { version = "1.0.188", features = ["alloc", "derive", "rc"] }
 serde_json = { version = "1.0.107", features = ["raw_value"] }
-sha2 = { version = "0.10.7", features = ["oid"] }
+sha2 = { version = "0.10.8", features = ["oid"] }
 signature = { version = "2.1.0", default-features = false, features = ["digest", "rand_core", "std"] }
 similar = { version = "2.2.1", features = ["inline", "unicode"] }
 slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] }
@@ -173,7 +173,7 @@ schemars = { version = "0.8.13", features = ["bytes", "chrono", "uuid1"] }
 semver = { version = "1.0.20", features = ["serde"] }
 serde = { version = "1.0.188", features = ["alloc", "derive", "rc"] }
 serde_json = { version = "1.0.107", features = ["raw_value"] }
-sha2 = { version = "0.10.7", features = ["oid"] }
+sha2 = { version = "0.10.8", features = ["oid"] }
 signature = { version = "2.1.0", default-features = false, features = ["digest", "rand_core", "std"] }
 similar = { version = "2.2.1", features = ["inline", "unicode"] }
 slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] }

From 4bd19c83048b56568208c3b7b159a697653f99fc Mon Sep 17 00:00:00 2001
From: John Gallagher <john@oxidecomputer.com>
Date: Tue, 24 Oct 2023 10:54:31 -0700
Subject: [PATCH 18/24] [wicketd] Fall back to old behavior if SP is too old to
 support reading RoT CMPA/CFPA (#4326)

We hit this on rack3 but did not hit it on dogfood due to more frequent
updates to dogfood's SP/RoT. Prior to this PR, wicketd expected to be
able to ask an SP for its RoT's CMPA/CFPA pages, but if a rack is
jumping from the 1.0.2 release to current master, its SPs are too old to
understand that message. With this change, we will fall back to
wicketd's previous behavior of requiring exactly 1 RoT archive if we
fail to fetch the CMPA from the target component.
---
 wicketd/src/update_tracker.rs              | 193 +++++++++++++--------
 wicketd/tests/integration_tests/updates.rs |   5 +-
 2 files changed, 123 insertions(+), 75 deletions(-)

diff --git a/wicketd/src/update_tracker.rs b/wicketd/src/update_tracker.rs
index e968d65a30..18b692703c 100644
--- a/wicketd/src/update_tracker.rs
+++ b/wicketd/src/update_tracker.rs
@@ -1608,7 +1608,13 @@ impl UpdateContext {
             page.copy_from_slice(&output_buf[..n]);
             Ok(page)
         };
-        let cmpa = self
+
+        // We may be talking to an SP / RoT that doesn't yet know how to give us
+        // its CMPA. If we hit a protocol version error here, we can fall back
+        // to our old behavior of requiring exactly 1 RoT artifact.
+        let mut artifact_to_apply = None;
+
+        let cmpa = match self
             .mgs_client
             .sp_rot_cmpa_get(
                 self.sp.type_,
@@ -1616,82 +1622,125 @@ impl UpdateContext {
                 SpComponent::ROT.const_as_str(),
             )
             .await
-            .map_err(|err| UpdateTerminalError::GetRotCmpaFailed {
-                error: err.into(),
-            })
-            .and_then(|response| {
+        {
+            Ok(response) => {
                 let data = response.into_inner().base64_data;
-                base64_decode_rot_page(data).map_err(|error| {
+                Some(base64_decode_rot_page(data).map_err(|error| {
                     UpdateTerminalError::GetRotCmpaFailed { error }
-                })
-            })?;
-        let cfpa = self
-            .mgs_client
-            .sp_rot_cfpa_get(
-                self.sp.type_,
-                self.sp.slot,
-                SpComponent::ROT.const_as_str(),
-                &gateway_client::types::GetCfpaParams {
-                    slot: RotCfpaSlot::Active,
-                },
-            )
-            .await
-            .map_err(|err| UpdateTerminalError::GetRotCfpaFailed {
-                error: err.into(),
-            })
-            .and_then(|response| {
-                let data = response.into_inner().base64_data;
-                base64_decode_rot_page(data).map_err(|error| {
-                    UpdateTerminalError::GetRotCfpaFailed { error }
-                })
-            })?;
-
-        // Loop through our possible artifacts and find the first (we only
-        // expect one!) that verifies against the RoT's CMPA/CFPA.
-        let mut artifact_to_apply = None;
-        for artifact in available_artifacts {
-            let image = artifact
-                .data
-                .reader_stream()
-                .and_then(|stream| async {
-                    let mut buf = Vec::with_capacity(artifact.data.file_size());
-                    StreamReader::new(stream)
-                        .read_to_end(&mut buf)
-                        .await
-                        .context("I/O error reading extracted archive")?;
-                    Ok(buf)
-                })
-                .await
-                .map_err(|error| {
-                    UpdateTerminalError::FailedFindingSignedRotImage { error }
-                })?;
-            let archive = RawHubrisArchive::from_vec(image).map_err(|err| {
-                UpdateTerminalError::FailedFindingSignedRotImage {
-                    error: anyhow::Error::new(err).context(format!(
-                        "failed to read hubris archive for {:?}",
-                        artifact.id
-                    )),
-                }
-            })?;
-            match archive.verify(&cmpa, &cfpa) {
-                Ok(()) => {
+                })?)
+            }
+            // TODO is there a better way to check the _specific_ error response
+            // we get here? We only have a couple of strings; we could check the
+            // error string contents for something like "WrongVersion", but
+            // that's pretty fragile. Instead we'll treat any error response
+            // here as a "fallback to previous behavior".
+            Err(err @ gateway_client::Error::ErrorResponse(_)) => {
+                if available_artifacts.len() == 1 {
                     info!(
-                        self.log, "RoT archive verification success";
-                        "name" => artifact.id.name.as_str(),
-                        "version" => %artifact.id.version,
-                        "kind" => ?artifact.id.kind,
+                        self.log,
+                        "Failed to get RoT CMPA page; \
+                         using only available RoT artifact";
+                        "err" => %err,
                     );
-                    artifact_to_apply = Some(artifact.clone());
-                    break;
-                }
-                Err(err) => {
-                    // We log this but don't fail - we want to continue looking
-                    // for a verifiable artifact.
-                    info!(
-                        self.log, "RoT archive verification failed";
-                        "artifact" => ?artifact.id,
-                        "err" => %DisplayErrorChain::new(&err),
+                    artifact_to_apply = Some(available_artifacts[0].clone());
+                    None
+                } else {
+                    error!(
+                        self.log,
+                        "Failed to get RoT CMPA; unable to choose from \
+                         multiple available RoT artifacts";
+                        "err" => %err,
+                        "num_rot_artifacts" => available_artifacts.len(),
                     );
+                    return Err(UpdateTerminalError::GetRotCmpaFailed {
+                        error: err.into(),
+                    });
+                }
+            }
+            // For any other error (e.g., comms failures), just fail as normal.
+            Err(err) => {
+                return Err(UpdateTerminalError::GetRotCmpaFailed {
+                    error: err.into(),
+                });
+            }
+        };
+
+        // If we were able to fetch the CMPA, we also need to fetch the CFPA and
+        // then find the correct RoT artifact. If we weren't able to fetch the
+        // CMPA, we either already set `artifact_to_apply` to the one-and-only
+        // RoT artifact available, or we returned a terminal error.
+        if let Some(cmpa) = cmpa {
+            let cfpa = self
+                .mgs_client
+                .sp_rot_cfpa_get(
+                    self.sp.type_,
+                    self.sp.slot,
+                    SpComponent::ROT.const_as_str(),
+                    &gateway_client::types::GetCfpaParams {
+                        slot: RotCfpaSlot::Active,
+                    },
+                )
+                .await
+                .map_err(|err| UpdateTerminalError::GetRotCfpaFailed {
+                    error: err.into(),
+                })
+                .and_then(|response| {
+                    let data = response.into_inner().base64_data;
+                    base64_decode_rot_page(data).map_err(|error| {
+                        UpdateTerminalError::GetRotCfpaFailed { error }
+                    })
+                })?;
+
+            // Loop through our possible artifacts and find the first (we only
+            // expect one!) that verifies against the RoT's CMPA/CFPA.
+            for artifact in available_artifacts {
+                let image = artifact
+                    .data
+                    .reader_stream()
+                    .and_then(|stream| async {
+                        let mut buf =
+                            Vec::with_capacity(artifact.data.file_size());
+                        StreamReader::new(stream)
+                            .read_to_end(&mut buf)
+                            .await
+                            .context("I/O error reading extracted archive")?;
+                        Ok(buf)
+                    })
+                    .await
+                    .map_err(|error| {
+                        UpdateTerminalError::FailedFindingSignedRotImage {
+                            error,
+                        }
+                    })?;
+                let archive =
+                    RawHubrisArchive::from_vec(image).map_err(|err| {
+                        UpdateTerminalError::FailedFindingSignedRotImage {
+                            error: anyhow::Error::new(err).context(format!(
+                                "failed to read hubris archive for {:?}",
+                                artifact.id
+                            )),
+                        }
+                    })?;
+                match archive.verify(&cmpa, &cfpa) {
+                    Ok(()) => {
+                        info!(
+                            self.log, "RoT archive verification success";
+                            "name" => artifact.id.name.as_str(),
+                            "version" => %artifact.id.version,
+                            "kind" => ?artifact.id.kind,
+                        );
+                        artifact_to_apply = Some(artifact.clone());
+                        break;
+                    }
+                    Err(err) => {
+                        // We log this but don't fail - we want to continue
+                        // looking for a verifiable artifact.
+                        info!(
+                            self.log, "RoT archive verification failed";
+                            "artifact" => ?artifact.id,
+                            "err" => %DisplayErrorChain::new(&err),
+                        );
+                    }
                 }
             }
         }
diff --git a/wicketd/tests/integration_tests/updates.rs b/wicketd/tests/integration_tests/updates.rs
index af743190c2..a9be9d4747 100644
--- a/wicketd/tests/integration_tests/updates.rs
+++ b/wicketd/tests/integration_tests/updates.rs
@@ -168,9 +168,8 @@ async fn test_updates() {
     match terminal_event.kind {
         StepEventKind::ExecutionFailed { failed_step, .. } => {
             // TODO: obviously we shouldn't stop here, get past more of the
-            // update process in this test. We currently fail when attempting to
-            // look up the RoT's CMPA/CFPA.
-            assert_eq!(failed_step.info.component, UpdateComponent::Rot);
+            // update process in this test.
+            assert_eq!(failed_step.info.component, UpdateComponent::Sp);
         }
         other => {
             panic!("unexpected terminal event kind: {other:?}");

From ca1b0ba216ed8af485e46a014399e3adb00abb90 Mon Sep 17 00:00:00 2001
From: Rain <rain@oxide.computer>
Date: Tue, 24 Oct 2023 12:01:30 -0700
Subject: [PATCH 19/24] [renovate] use actions/pin checked into our global
 renovate-config (#4320)

See https://github.com/oxidecomputer/renovate-config/blob/main/actions/pin.json.
---
 .github/renovate.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/renovate.json b/.github/renovate.json
index 405a3e282b..606be44c87 100644
--- a/.github/renovate.json
+++ b/.github/renovate.json
@@ -4,6 +4,6 @@
     "local>oxidecomputer/renovate-config",
     "local>oxidecomputer/renovate-config//rust/autocreate",
     "local>oxidecomputer/renovate-config:post-upgrade",
-    "helpers:pinGitHubActionDigests"
+    "local>oxidecomputer/renovate-config//actions/pin"
   ]
 }

From 9daf99b870865044ed781107605791752f0eec94 Mon Sep 17 00:00:00 2001
From: bnaecker <ben@oxide.computer>
Date: Tue, 24 Oct 2023 14:46:00 -0700
Subject: [PATCH 20/24] Sort fields when extracting timeseries schema (#4312)

- Fields are reported in a sample via the implementations of `Target`
and `Metric`, which may or may not be sorted. They'll be in declaration
order, if folks derive the traits. When deriving a schema from the
sample, collect fields into a set to ignore order.
- Convert between a `DbFieldList` and `BTreeSet` when inserting /
reading fields from the nested tables in ClickHouse.
- Add sanity test that we're sorting field schema correctly.
- Errors for schema mismatches report entire schema, not just fields.
---
 oximeter/db/src/client.rs | 169 ++++++++++++++++++++++++--------------
 oximeter/db/src/lib.rs    | 140 +++++++++++++++++++++++++++++--
 oximeter/db/src/model.rs  |  18 ++--
 oximeter/db/src/query.rs  |  29 ++++---
 4 files changed, 272 insertions(+), 84 deletions(-)

diff --git a/oximeter/db/src/client.rs b/oximeter/db/src/client.rs
index ffa5d97d52..c2b7c820a8 100644
--- a/oximeter/db/src/client.rs
+++ b/oximeter/db/src/client.rs
@@ -35,7 +35,7 @@ use std::collections::BTreeSet;
 use std::convert::TryFrom;
 use std::net::SocketAddr;
 use std::num::NonZeroU32;
-use std::sync::Mutex;
+use tokio::sync::Mutex;
 use uuid::Uuid;
 
 #[usdt::provider(provider = "clickhouse__client")]
@@ -208,16 +208,12 @@ impl Client {
         &self,
         name: &TimeseriesName,
     ) -> Result<Option<TimeseriesSchema>, Error> {
-        {
-            let map = self.schema.lock().unwrap();
-            if let Some(s) = map.get(name) {
-                return Ok(Some(s.clone()));
-            }
+        let mut schema = self.schema.lock().await;
+        if let Some(s) = schema.get(name) {
+            return Ok(Some(s.clone()));
         }
-        // `get_schema` acquires the lock internally, so the above scope is required to avoid
-        // deadlock.
-        self.get_schema().await?;
-        Ok(self.schema.lock().unwrap().get(name).map(Clone::clone))
+        self.get_schema_locked(&mut schema).await?;
+        Ok(schema.get(name).map(Clone::clone))
     }
 
     /// List timeseries schema, paginated.
@@ -384,30 +380,48 @@ impl Client {
         &self,
         sample: &Sample,
     ) -> Result<Option<String>, Error> {
-        let schema = model::schema_for(sample);
-        let name = schema.timeseries_name.clone();
-        let maybe_new_schema = match self.schema.lock().unwrap().entry(name) {
-            Entry::Vacant(entry) => Ok(Some(entry.insert(schema).clone())),
+        let sample_schema = model::schema_for(sample);
+        let name = sample_schema.timeseries_name.clone();
+        let mut schema = self.schema.lock().await;
+
+        // We've taken the lock before we do any checks for schema. First, we
+        // check if we've already got one in the cache. If not, we update all
+        // the schema from the database, and then check the map again. If we
+        // find a schema (which now either came from the cache or the latest
+        // read of the DB), then we check that the derived schema matches. If
+        // not, we can insert it in the cache and the DB.
+        if !schema.contains_key(&name) {
+            self.get_schema_locked(&mut schema).await?;
+        }
+        match schema.entry(name) {
             Entry::Occupied(entry) => {
                 let existing_schema = entry.get();
-                if existing_schema == &schema {
+                if existing_schema == &sample_schema {
                     Ok(None)
                 } else {
-                    let err =
-                        error_for_schema_mismatch(&schema, &existing_schema);
                     error!(
                         self.log,
-                        "timeseries schema mismatch, sample will be skipped: {}",
-                        err
+                        "timeseries schema mismatch, sample will be skipped";
+                        "expected" => ?existing_schema,
+                        "actual" => ?sample_schema,
+                        "sample" => ?sample,
                     );
-                    Err(err)
+                    Err(Error::SchemaMismatch {
+                        expected: existing_schema.clone(),
+                        actual: sample_schema,
+                    })
                 }
             }
-        }?;
-        Ok(maybe_new_schema.map(|schema| {
-            serde_json::to_string(&model::DbTimeseriesSchema::from(schema))
-                .expect("Failed to convert schema to DB model")
-        }))
+            Entry::Vacant(entry) => {
+                entry.insert(sample_schema.clone());
+                Ok(Some(
+                    serde_json::to_string(&model::DbTimeseriesSchema::from(
+                        sample_schema,
+                    ))
+                    .expect("Failed to convert schema to DB model"),
+                ))
+            }
+        }
     }
 
     // Select the timeseries, including keys and field values, that match the given field-selection
@@ -503,10 +517,15 @@ impl Client {
         response
     }
 
-    async fn get_schema(&self) -> Result<(), Error> {
+    // Get timeseries schema from the database.
+    //
+    // Can only be called after acquiring the lock around `self.schema`.
+    async fn get_schema_locked(
+        &self,
+        schema: &mut BTreeMap<TimeseriesName, TimeseriesSchema>,
+    ) -> Result<(), Error> {
         debug!(self.log, "retrieving timeseries schema from database");
         let sql = {
-            let schema = self.schema.lock().unwrap();
             if schema.is_empty() {
                 format!(
                     "SELECT * FROM {db_name}.timeseries_schema FORMAT JSONEachRow;",
@@ -545,7 +564,7 @@ impl Client {
                 );
                 (schema.timeseries_name.clone(), schema)
             });
-            self.schema.lock().unwrap().extend(new);
+            schema.extend(new);
         }
         Ok(())
     }
@@ -593,7 +612,7 @@ impl DbWrite for Client {
                 }
                 Ok(schema) => {
                     if let Some(schema) = schema {
-                        debug!(self.log, "new timeseries schema: {:?}", schema);
+                        debug!(self.log, "new timeseries schema"; "schema" => ?schema);
                         new_schema.push(schema);
                     }
                 }
@@ -730,28 +749,6 @@ async fn handle_db_response(
     }
 }
 
-// Generate an error describing a schema mismatch
-fn error_for_schema_mismatch(
-    schema: &TimeseriesSchema,
-    existing_schema: &TimeseriesSchema,
-) -> Error {
-    let expected = existing_schema
-        .field_schema
-        .iter()
-        .map(|field| (field.name.clone(), field.ty))
-        .collect();
-    let actual = schema
-        .field_schema
-        .iter()
-        .map(|field| (field.name.clone(), field.ty))
-        .collect();
-    Error::SchemaMismatch {
-        name: schema.timeseries_name.to_string(),
-        expected,
-        actual,
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -1599,7 +1596,7 @@ mod tests {
         );
 
         // Clear the internal caches of seen schema
-        client.schema.lock().unwrap().clear();
+        client.schema.lock().await.clear();
 
         // Insert the new sample
         client.insert_samples(&[sample.clone()]).await.unwrap();
@@ -1611,7 +1608,7 @@ mod tests {
         let expected_schema = client
             .schema
             .lock()
-            .unwrap()
+            .await
             .get(&timeseries_name)
             .expect(
                 "After inserting a new sample, its schema should be included",
@@ -2484,13 +2481,13 @@ mod tests {
     #[tokio::test]
     async fn test_get_schema_no_new_values() {
         let (mut db, client, _) = setup_filter_testcase().await;
-        let schema = &client.schema.lock().unwrap().clone();
-        client.get_schema().await.expect("Failed to get timeseries schema");
-        assert_eq!(
-            schema,
-            &*client.schema.lock().unwrap(),
-            "Schema shouldn't change"
-        );
+        let original_schema = client.schema.lock().await.clone();
+        let mut schema = client.schema.lock().await;
+        client
+            .get_schema_locked(&mut schema)
+            .await
+            .expect("Failed to get timeseries schema");
+        assert_eq!(&original_schema, &*schema, "Schema shouldn't change");
         db.cleanup().await.expect("Failed to cleanup database");
     }
 
@@ -2585,4 +2582,56 @@ mod tests {
         );
         db.cleanup().await.expect("Failed to cleanup database");
     }
+
+    #[tokio::test]
+    async fn test_update_schema_cache_on_new_sample() {
+        usdt::register_probes().unwrap();
+        let logctx = test_setup_log("test_update_schema_cache_on_new_sample");
+        let log = &logctx.log;
+
+        // Let the OS assign a port and discover it after ClickHouse starts
+        let mut db = ClickHouseInstance::new_single_node(0)
+            .await
+            .expect("Failed to start ClickHouse");
+        let address = SocketAddr::new("::1".parse().unwrap(), db.port());
+
+        let client = Client::new(address, &log);
+        client
+            .init_single_node_db()
+            .await
+            .expect("Failed to initialize timeseries database");
+        let samples = [test_util::make_sample()];
+        client.insert_samples(&samples).await.unwrap();
+
+        // Get the count of schema directly from the DB, which should have just
+        // one.
+        let response = client.execute_with_body(
+            "SELECT COUNT() FROM oximeter.timeseries_schema FORMAT JSONEachRow;
+        ").await.unwrap();
+        assert_eq!(response.lines().count(), 1, "Expected exactly 1 schema");
+        assert_eq!(client.schema.lock().await.len(), 1);
+
+        // Clear the internal cache, and insert the sample again.
+        //
+        // This should cause us to look up the schema in the DB again, but _not_
+        // insert a new one.
+        client.schema.lock().await.clear();
+        assert!(client.schema.lock().await.is_empty());
+
+        client.insert_samples(&samples).await.unwrap();
+
+        // Get the count of schema directly from the DB, which should still have
+        // only the one schema.
+        let response = client.execute_with_body(
+            "SELECT COUNT() FROM oximeter.timeseries_schema FORMAT JSONEachRow;
+        ").await.unwrap();
+        assert_eq!(
+            response.lines().count(),
+            1,
+            "Expected exactly 1 schema again"
+        );
+        assert_eq!(client.schema.lock().await.len(), 1);
+        db.cleanup().await.expect("Failed to cleanup ClickHouse server");
+        logctx.cleanup_successful();
+    }
 }
diff --git a/oximeter/db/src/lib.rs b/oximeter/db/src/lib.rs
index c878b8ff2a..11ecbeddc8 100644
--- a/oximeter/db/src/lib.rs
+++ b/oximeter/db/src/lib.rs
@@ -4,7 +4,7 @@
 
 //! Tools for interacting with the control plane telemetry database.
 
-// Copyright 2021 Oxide Computer Company
+// Copyright 2023 Oxide Computer Company
 
 use crate::query::StringFieldSelector;
 use chrono::{DateTime, Utc};
@@ -13,6 +13,7 @@ pub use oximeter::{DatumType, Field, FieldType, Measurement, Sample};
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use std::collections::BTreeMap;
+use std::collections::BTreeSet;
 use std::convert::TryFrom;
 use std::num::NonZeroU32;
 use thiserror::Error;
@@ -36,12 +37,8 @@ pub enum Error {
     Database(String),
 
     /// A schema provided when collecting samples did not match the expected schema
-    #[error("Schema mismatch for timeseries '{name}', expected fields {expected:?} found fields {actual:?}")]
-    SchemaMismatch {
-        name: String,
-        expected: BTreeMap<String, FieldType>,
-        actual: BTreeMap<String, FieldType>,
-    },
+    #[error("Schema mismatch for timeseries '{0}'", expected.timeseries_name)]
+    SchemaMismatch { expected: TimeseriesSchema, actual: TimeseriesSchema },
 
     #[error("Timeseries not found for: {0}")]
     TimeseriesNotFound(String),
@@ -153,6 +150,13 @@ impl std::convert::TryFrom<String> for TimeseriesName {
     }
 }
 
+impl std::str::FromStr for TimeseriesName {
+    type Err = Error;
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        s.try_into()
+    }
+}
+
 impl<T> PartialEq<T> for TimeseriesName
 where
     T: AsRef<str>,
@@ -177,7 +181,7 @@ fn validate_timeseries_name(s: &str) -> Result<&str, Error> {
 #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)]
 pub struct TimeseriesSchema {
     pub timeseries_name: TimeseriesName,
-    pub field_schema: Vec<FieldSchema>,
+    pub field_schema: BTreeSet<FieldSchema>,
     pub datum_type: DatumType,
     pub created: DateTime<Utc>,
 }
@@ -398,6 +402,8 @@ const TIMESERIES_NAME_REGEX: &str =
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::model::DbFieldList;
+    use crate::model::DbTimeseriesSchema;
     use std::convert::TryFrom;
     use uuid::Uuid;
 
@@ -505,4 +511,122 @@ mod tests {
             &output.join("\n"),
         );
     }
+
+    // Test that we correctly order field across a target and metric.
+    //
+    // In an earlier commit, we switched from storing fields in an unordered Vec
+    // to using a BTree{Map,Set} to ensure ordering by name. However, the
+    // `TimeseriesSchema` type stored all its fields by chaining the sorted
+    // fields from the target and metric, without then sorting _across_ them.
+    //
+    // This was exacerbated by the error reporting, where we did in fact sort
+    // all fields across the target and metric, making it difficult to tell how
+    // the derived schema was different, if at all.
+    //
+    // This test generates a sample with a schema where the target and metric
+    // fields are sorted within them, but not across them. We check that the
+    // derived schema are actually equal, which means we've imposed that
+    // ordering when deriving the schema.
+    #[test]
+    fn test_schema_field_ordering_across_target_metric() {
+        let target_field = FieldSchema {
+            name: String::from("later"),
+            ty: FieldType::U64,
+            source: FieldSource::Target,
+        };
+        let metric_field = FieldSchema {
+            name: String::from("earlier"),
+            ty: FieldType::U64,
+            source: FieldSource::Metric,
+        };
+        let timeseries_name: TimeseriesName = "foo:bar".parse().unwrap();
+        let datum_type = DatumType::U64;
+        let field_schema =
+            [target_field.clone(), metric_field.clone()].into_iter().collect();
+        let expected_schema = TimeseriesSchema {
+            timeseries_name,
+            field_schema,
+            datum_type,
+            created: Utc::now(),
+        };
+
+        #[derive(oximeter::Target)]
+        struct Foo {
+            later: u64,
+        }
+        #[derive(oximeter::Metric)]
+        struct Bar {
+            earlier: u64,
+            datum: u64,
+        }
+
+        let target = Foo { later: 1 };
+        let metric = Bar { earlier: 2, datum: 10 };
+        let sample = Sample::new(&target, &metric).unwrap();
+        let derived_schema = model::schema_for(&sample);
+        assert_eq!(derived_schema, expected_schema);
+    }
+
+    #[test]
+    fn test_unsorted_db_fields_are_sorted_on_read() {
+        let target_field = FieldSchema {
+            name: String::from("later"),
+            ty: FieldType::U64,
+            source: FieldSource::Target,
+        };
+        let metric_field = FieldSchema {
+            name: String::from("earlier"),
+            ty: FieldType::U64,
+            source: FieldSource::Metric,
+        };
+        let timeseries_name: TimeseriesName = "foo:bar".parse().unwrap();
+        let datum_type = DatumType::U64;
+        let field_schema =
+            [target_field.clone(), metric_field.clone()].into_iter().collect();
+        let expected_schema = TimeseriesSchema {
+            timeseries_name: timeseries_name.clone(),
+            field_schema,
+            datum_type,
+            created: Utc::now(),
+        };
+
+        // The fields here are sorted by target and then metric, which is how we
+        // used to insert them into the DB. We're checking that they are totally
+        // sorted when we read them out of the DB, even though they are not in
+        // the extracted model type.
+        let db_fields = DbFieldList {
+            names: vec![target_field.name.clone(), metric_field.name.clone()],
+            types: vec![target_field.ty.into(), metric_field.ty.into()],
+            sources: vec![
+                target_field.source.into(),
+                metric_field.source.into(),
+            ],
+        };
+        let db_schema = DbTimeseriesSchema {
+            timeseries_name: timeseries_name.to_string(),
+            field_schema: db_fields,
+            datum_type: datum_type.into(),
+            created: expected_schema.created,
+        };
+        assert_eq!(expected_schema, TimeseriesSchema::from(db_schema));
+    }
+
+    #[test]
+    fn test_field_schema_ordering() {
+        let mut fields = BTreeSet::new();
+        fields.insert(FieldSchema {
+            name: String::from("second"),
+            ty: FieldType::U64,
+            source: FieldSource::Target,
+        });
+        fields.insert(FieldSchema {
+            name: String::from("first"),
+            ty: FieldType::U64,
+            source: FieldSource::Target,
+        });
+        let mut iter = fields.iter();
+        assert_eq!(iter.next().unwrap().name, "first");
+        assert_eq!(iter.next().unwrap().name, "second");
+        assert!(iter.next().is_none());
+    }
 }
diff --git a/oximeter/db/src/model.rs b/oximeter/db/src/model.rs
index 1314c5c649..7f5b150b46 100644
--- a/oximeter/db/src/model.rs
+++ b/oximeter/db/src/model.rs
@@ -30,6 +30,7 @@ use oximeter::types::Sample;
 use serde::Deserialize;
 use serde::Serialize;
 use std::collections::BTreeMap;
+use std::collections::BTreeSet;
 use std::convert::TryFrom;
 use std::net::IpAddr;
 use std::net::Ipv6Addr;
@@ -107,7 +108,7 @@ pub(crate) struct DbFieldList {
     pub sources: Vec<DbFieldSource>,
 }
 
-impl From<DbFieldList> for Vec<FieldSchema> {
+impl From<DbFieldList> for BTreeSet<FieldSchema> {
     fn from(list: DbFieldList) -> Self {
         list.names
             .into_iter()
@@ -122,8 +123,8 @@ impl From<DbFieldList> for Vec<FieldSchema> {
     }
 }
 
-impl From<Vec<FieldSchema>> for DbFieldList {
-    fn from(list: Vec<FieldSchema>) -> Self {
+impl From<BTreeSet<FieldSchema>> for DbFieldList {
+    fn from(list: BTreeSet<FieldSchema>) -> Self {
         let mut names = Vec::with_capacity(list.len());
         let mut types = Vec::with_capacity(list.len());
         let mut sources = Vec::with_capacity(list.len());
@@ -914,6 +915,9 @@ pub(crate) fn unroll_measurement_row(sample: &Sample) -> (String, String) {
 
 /// Return the schema for a `Sample`.
 pub(crate) fn schema_for(sample: &Sample) -> TimeseriesSchema {
+    // The fields are iterated through whatever order the `Target` or `Metric`
+    // impl chooses. We'll store in a set ordered by field name, to ignore the
+    // declaration order.
     let created = Utc::now();
     let field_schema = sample
         .target_fields()
@@ -1403,7 +1407,7 @@ mod tests {
             sources: vec![DbFieldSource::Target, DbFieldSource::Metric],
         };
 
-        let list = vec![
+        let list: BTreeSet<_> = [
             FieldSchema {
                 name: String::from("field0"),
                 ty: FieldType::I64,
@@ -1414,11 +1418,13 @@ mod tests {
                 ty: FieldType::IpAddr,
                 source: FieldSource::Metric,
             },
-        ];
+        ]
+        .into_iter()
+        .collect();
 
         assert_eq!(DbFieldList::from(list.clone()), db_list);
         assert_eq!(db_list, list.clone().into());
-        let round_trip: Vec<FieldSchema> =
+        let round_trip: BTreeSet<FieldSchema> =
             DbFieldList::from(list.clone()).into();
         assert_eq!(round_trip, list);
     }
diff --git a/oximeter/db/src/query.rs b/oximeter/db/src/query.rs
index e9e1600739..6a55d3f518 100644
--- a/oximeter/db/src/query.rs
+++ b/oximeter/db/src/query.rs
@@ -721,6 +721,7 @@ mod tests {
     use crate::FieldSource;
     use crate::TimeseriesName;
     use chrono::NaiveDateTime;
+    use std::collections::BTreeSet;
     use std::convert::TryFrom;
 
     #[test]
@@ -774,7 +775,7 @@ mod tests {
     fn test_select_query_builder_filter_raw() {
         let schema = TimeseriesSchema {
             timeseries_name: TimeseriesName::try_from("foo:bar").unwrap(),
-            field_schema: vec![
+            field_schema: [
                 FieldSchema {
                     name: "f0".to_string(),
                     ty: FieldType::I64,
@@ -785,7 +786,9 @@ mod tests {
                     ty: FieldType::Bool,
                     source: FieldSource::Target,
                 },
-            ],
+            ]
+            .into_iter()
+            .collect(),
             datum_type: DatumType::I64,
             created: Utc::now(),
         };
@@ -905,7 +908,7 @@ mod tests {
     fn test_select_query_builder_no_fields() {
         let schema = TimeseriesSchema {
             timeseries_name: TimeseriesName::try_from("foo:bar").unwrap(),
-            field_schema: vec![],
+            field_schema: BTreeSet::new(),
             datum_type: DatumType::I64,
             created: Utc::now(),
         };
@@ -927,7 +930,7 @@ mod tests {
     fn test_select_query_builder_limit_offset() {
         let schema = TimeseriesSchema {
             timeseries_name: TimeseriesName::try_from("foo:bar").unwrap(),
-            field_schema: vec![],
+            field_schema: BTreeSet::new(),
             datum_type: DatumType::I64,
             created: Utc::now(),
         };
@@ -996,7 +999,7 @@ mod tests {
     fn test_select_query_builder_no_selectors() {
         let schema = TimeseriesSchema {
             timeseries_name: TimeseriesName::try_from("foo:bar").unwrap(),
-            field_schema: vec![
+            field_schema: [
                 FieldSchema {
                     name: "f0".to_string(),
                     ty: FieldType::I64,
@@ -1007,7 +1010,9 @@ mod tests {
                     ty: FieldType::Bool,
                     source: FieldSource::Target,
                 },
-            ],
+            ]
+            .into_iter()
+            .collect(),
             datum_type: DatumType::I64,
             created: Utc::now(),
         };
@@ -1057,7 +1062,7 @@ mod tests {
     fn test_select_query_builder_field_selectors() {
         let schema = TimeseriesSchema {
             timeseries_name: TimeseriesName::try_from("foo:bar").unwrap(),
-            field_schema: vec![
+            field_schema: [
                 FieldSchema {
                     name: "f0".to_string(),
                     ty: FieldType::I64,
@@ -1068,7 +1073,9 @@ mod tests {
                     ty: FieldType::Bool,
                     source: FieldSource::Target,
                 },
-            ],
+            ]
+            .into_iter()
+            .collect(),
             datum_type: DatumType::I64,
             created: Utc::now(),
         };
@@ -1106,7 +1113,7 @@ mod tests {
     fn test_select_query_builder_full() {
         let schema = TimeseriesSchema {
             timeseries_name: TimeseriesName::try_from("foo:bar").unwrap(),
-            field_schema: vec![
+            field_schema: [
                 FieldSchema {
                     name: "f0".to_string(),
                     ty: FieldType::I64,
@@ -1117,7 +1124,9 @@ mod tests {
                     ty: FieldType::Bool,
                     source: FieldSource::Target,
                 },
-            ],
+            ]
+            .into_iter()
+            .collect(),
             datum_type: DatumType::I64,
             created: Utc::now(),
         };

From 8ca3e674f8249bd9f1415426293f08150759fd36 Mon Sep 17 00:00:00 2001
From: "oxide-renovate[bot]"
 <146848827+oxide-renovate[bot]@users.noreply.github.com>
Date: Wed, 25 Oct 2023 03:06:54 +0000
Subject: [PATCH 21/24] chore(deps): update actions/checkout digest to b4ffde6
 (#4313)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [actions/checkout](https://togithub.com/actions/checkout) | action |
digest | [`8ade135` ->
`b4ffde6`](https://togithub.com/actions/checkout/compare/8ade135...b4ffde6)
|

---

### Configuration

📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone
America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone
America/Los_Angeles.

🚦 **Automerge**: Enabled.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Renovate
Bot](https://togithub.com/renovatebot/renovate).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNy4zMS4xIiwidXBkYXRlZEluVmVyIjoiMzcuMzEuNyIsInRhcmdldEJyYW5jaCI6Im1haW4ifQ==-->

Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com>
---
 .github/workflows/hakari.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml
index df4cbc9b59..e85a67f1e0 100644
--- a/.github/workflows/hakari.yml
+++ b/.github/workflows/hakari.yml
@@ -17,7 +17,7 @@ jobs:
     env:
       RUSTFLAGS: -D warnings
     steps:
-      - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4
       - uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1
         with:
           toolchain: stable

From cb63320216e5d57a1a73d12eb40605477b485b38 Mon Sep 17 00:00:00 2001
From: "oxide-renovate[bot]"
 <146848827+oxide-renovate[bot]@users.noreply.github.com>
Date: Wed, 25 Oct 2023 03:07:21 +0000
Subject: [PATCH 22/24] chore(deps): update taiki-e/install-action digest to
 ac89944 (#4275)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [taiki-e/install-action](https://togithub.com/taiki-e/install-action)
| action | digest | [`e659bf8` ->
`ac89944`](https://togithub.com/taiki-e/install-action/compare/e659bf8...ac89944)
|

---

### Configuration

📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone
America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone
America/Los_Angeles.

🚦 **Automerge**: Enabled.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Renovate
Bot](https://togithub.com/renovatebot/renovate).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNy4xOC4xIiwidXBkYXRlZEluVmVyIjoiMzcuMzEuNyIsInRhcmdldEJyYW5jaCI6Im1haW4ifQ==-->

Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com>
---
 .github/workflows/hakari.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml
index e85a67f1e0..ebf881b1f2 100644
--- a/.github/workflows/hakari.yml
+++ b/.github/workflows/hakari.yml
@@ -22,7 +22,7 @@ jobs:
         with:
           toolchain: stable
       - name: Install cargo-hakari
-        uses: taiki-e/install-action@e659bf85ee986e37e35cc1c53bfeebe044d8133e # v2
+        uses: taiki-e/install-action@ac89944b5b150d78567ab6c02badfbe48b0b55aa # v2
         with:
           tool: cargo-hakari
       - name: Check workspace-hack Cargo.toml is up-to-date

From d45058098f9b782e9fa25b1739d3a5fca73a0486 Mon Sep 17 00:00:00 2001
From: "oxide-renovate[bot]"
 <146848827+oxide-renovate[bot]@users.noreply.github.com>
Date: Wed, 25 Oct 2023 06:23:47 +0000
Subject: [PATCH 23/24] chore(deps): update actions/checkout action to v3.6.0
 (#4344)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [actions/checkout](https://togithub.com/actions/checkout) | action |
minor | [`v3.5.0` ->
`v3.6.0`](https://togithub.com/actions/checkout/compare/v3.5.0...v3.6.0)
|

---

### Release Notes

<details>
<summary>actions/checkout (actions/checkout)</summary>

###
[`v3.6.0`](https://togithub.com/actions/checkout/blob/HEAD/CHANGELOG.md#v360)

[Compare
Source](https://togithub.com/actions/checkout/compare/v3.5.3...v3.6.0)

- [Fix: Mark test scripts with Bash'isms to be run via
Bash](https://togithub.com/actions/checkout/pull/1377)
- [Add option to fetch tags even if fetch-depth >
0](https://togithub.com/actions/checkout/pull/579)

###
[`v3.5.3`](https://togithub.com/actions/checkout/blob/HEAD/CHANGELOG.md#v353)

[Compare
Source](https://togithub.com/actions/checkout/compare/v3.5.2...v3.5.3)

- [Fix: Checkout fail in self-hosted runners when faulty submodule are
checked-in](https://togithub.com/actions/checkout/pull/1196)
- [Fix typos found by
codespell](https://togithub.com/actions/checkout/pull/1287)
- [Add support for sparse
checkouts](https://togithub.com/actions/checkout/pull/1369)

###
[`v3.5.2`](https://togithub.com/actions/checkout/blob/HEAD/CHANGELOG.md#v352)

[Compare
Source](https://togithub.com/actions/checkout/compare/v3.5.1...v3.5.2)

- [Fix api endpoint for
GHES](https://togithub.com/actions/checkout/pull/1289)

###
[`v3.5.1`](https://togithub.com/actions/checkout/blob/HEAD/CHANGELOG.md#v351)

[Compare
Source](https://togithub.com/actions/checkout/compare/v3.5.0...v3.5.1)

- [Fix slow checkout on
Windows](https://togithub.com/actions/checkout/pull/1246)

</details>

---

### Configuration

📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone
America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone
America/Los_Angeles.

🚦 **Automerge**: Enabled.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Renovate
Bot](https://togithub.com/renovatebot/renovate).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNy4zMS43IiwidXBkYXRlZEluVmVyIjoiMzcuMzEuNyIsInRhcmdldEJyYW5jaCI6Im1haW4ifQ==-->

Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com>
---
 .github/workflows/check-opte-ver.yml        | 2 +-
 .github/workflows/check-workspace-deps.yml  | 2 +-
 .github/workflows/rust.yml                  | 8 ++++----
 .github/workflows/update-dendrite.yml       | 2 +-
 .github/workflows/update-maghemite.yml      | 2 +-
 .github/workflows/validate-openapi-spec.yml | 2 +-
 6 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/check-opte-ver.yml b/.github/workflows/check-opte-ver.yml
index a8e18f080e..9fc390277b 100644
--- a/.github/workflows/check-opte-ver.yml
+++ b/.github/workflows/check-opte-ver.yml
@@ -9,7 +9,7 @@ jobs:
   check-opte-ver:
     runs-on: ubuntu-22.04
     steps:
-      - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
+      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0
       - name: Install jq
         run: sudo apt-get install -y jq
       - name: Install toml-cli
diff --git a/.github/workflows/check-workspace-deps.yml b/.github/workflows/check-workspace-deps.yml
index 521afa7359..7ba0c66566 100644
--- a/.github/workflows/check-workspace-deps.yml
+++ b/.github/workflows/check-workspace-deps.yml
@@ -10,6 +10,6 @@ jobs:
   check-workspace-deps:
     runs-on: ubuntu-22.04
     steps:
-      - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
+      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0
       - name: Check Workspace Dependencies
         run: cargo xtask check-workspace-deps
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 873b316e16..8cc98f192f 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -9,7 +9,7 @@ jobs:
   check-style:
     runs-on: ubuntu-22.04
     steps:
-    - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
+    - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0
     - name: Report cargo version
       run: cargo --version
     - name: Report rustfmt version
@@ -27,7 +27,7 @@ jobs:
     # This repo is unstable and unnecessary: https://github.com/microsoft/linux-package-repositories/issues/34
     - name: Disable packages.microsoft.com repo
       run: sudo rm -f /etc/apt/sources.list.d/microsoft-prod.list
-    - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
+    - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0
     - uses: Swatinem/rust-cache@6fd3edff6979b79f87531400ad694fb7f2c84b1f # v2.2.1
       if: ${{ github.ref != 'refs/heads/main' }}
     - name: Report cargo version
@@ -53,7 +53,7 @@ jobs:
     # This repo is unstable and unnecessary: https://github.com/microsoft/linux-package-repositories/issues/34
     - name: Disable packages.microsoft.com repo
       run: sudo rm -f /etc/apt/sources.list.d/microsoft-prod.list
-    - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
+    - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0
     - uses: Swatinem/rust-cache@6fd3edff6979b79f87531400ad694fb7f2c84b1f # v2.2.1
       if: ${{ github.ref != 'refs/heads/main' }}
     - name: Report cargo version
@@ -79,7 +79,7 @@ jobs:
     # This repo is unstable and unnecessary: https://github.com/microsoft/linux-package-repositories/issues/34
     - name: Disable packages.microsoft.com repo
       run: sudo rm -f /etc/apt/sources.list.d/microsoft-prod.list
-    - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
+    - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0
     - uses: Swatinem/rust-cache@6fd3edff6979b79f87531400ad694fb7f2c84b1f # v2.2.1
       if: ${{ github.ref != 'refs/heads/main' }}
     - name: Report cargo version
diff --git a/.github/workflows/update-dendrite.yml b/.github/workflows/update-dendrite.yml
index 10d8ef7618..9d79dfc8f9 100644
--- a/.github/workflows/update-dendrite.yml
+++ b/.github/workflows/update-dendrite.yml
@@ -29,7 +29,7 @@ jobs:
     steps:
 
       # Checkout both the target and integration branches
-      - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
+      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0
         with:
           token: ${{ inputs.reflector_access_token }}
           fetch-depth: 0
diff --git a/.github/workflows/update-maghemite.yml b/.github/workflows/update-maghemite.yml
index 7aa2b8b6c8..e2512dc6ce 100644
--- a/.github/workflows/update-maghemite.yml
+++ b/.github/workflows/update-maghemite.yml
@@ -29,7 +29,7 @@ jobs:
     steps:
 
       # Checkout both the target and integration branches
-      - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
+      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0
         with:
           token: ${{ inputs.reflector_access_token }}
           fetch-depth: 0
diff --git a/.github/workflows/validate-openapi-spec.yml b/.github/workflows/validate-openapi-spec.yml
index 1d6c152296..9eab5ccaa1 100644
--- a/.github/workflows/validate-openapi-spec.yml
+++ b/.github/workflows/validate-openapi-spec.yml
@@ -10,7 +10,7 @@ jobs:
   format:
     runs-on: ubuntu-22.04
     steps:
-      - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
+      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0
       - uses: actions/setup-node@64ed1c7eab4cce3362f8c340dee64e5eaeef8f7c # v3.6.0
         with:
           node-version: '18'

From 6f4923e63d39e1a530db992ed7f9c26976559846 Mon Sep 17 00:00:00 2001
From: bnaecker <ben@oxide.computer>
Date: Wed, 25 Oct 2023 10:11:39 -0700
Subject: [PATCH 24/24] Add new datum types to timeseries schema table (#4349)

- Fixes https://github.com/oxidecomputer/omicron/issues/4336
- Adds regression test making sure a query selecting all datum types
from the schema table succeeds, even if it returns zero results. This
uses an iterator over the datum type enum, so it should catch future
changes to the type.
- Adds new datum types to schema table
- Bumps oximeter database version number
---
 oximeter/db/src/client.rs               | 41 +++++++++++++++++++++++++
 oximeter/db/src/db-replicated-init.sql  | 20 +++++++++++-
 oximeter/db/src/db-single-node-init.sql | 20 +++++++++++-
 oximeter/db/src/model.rs                |  2 +-
 oximeter/oximeter/src/types.rs          |  1 +
 5 files changed, 81 insertions(+), 3 deletions(-)

diff --git a/oximeter/db/src/client.rs b/oximeter/db/src/client.rs
index c2b7c820a8..2d563045f2 100644
--- a/oximeter/db/src/client.rs
+++ b/oximeter/db/src/client.rs
@@ -2634,4 +2634,45 @@ mod tests {
         db.cleanup().await.expect("Failed to cleanup ClickHouse server");
         logctx.cleanup_successful();
     }
+
+    // Regression test for https://github.com/oxidecomputer/omicron/issues/4336.
+    //
+    // This tests that we can successfully query all extant datum types from the
+    // schema table. There may be no such values, but the query itself should
+    // succeed.
+    #[tokio::test]
+    async fn test_select_all_datum_types() {
+        use strum::IntoEnumIterator;
+        usdt::register_probes().unwrap();
+        let logctx = test_setup_log("test_update_schema_cache_on_new_sample");
+        let log = &logctx.log;
+
+        // Let the OS assign a port and discover it after ClickHouse starts
+        let mut db = ClickHouseInstance::new_single_node(0)
+            .await
+            .expect("Failed to start ClickHouse");
+        let address = SocketAddr::new("::1".parse().unwrap(), db.port());
+
+        let client = Client::new(address, &log);
+        client
+            .init_single_node_db()
+            .await
+            .expect("Failed to initialize timeseries database");
+
+        // Attempt to select all schema with each datum type.
+        for ty in oximeter::DatumType::iter() {
+            let sql = format!(
+                "SELECT COUNT() \
+                FROM {}.timeseries_schema WHERE \
+                datum_type = '{:?}'",
+                crate::DATABASE_NAME,
+                crate::model::DbDatumType::from(ty),
+            );
+            let res = client.execute_with_body(sql).await.unwrap();
+            let count = res.trim().parse::<usize>().unwrap();
+            assert_eq!(count, 0);
+        }
+        db.cleanup().await.expect("Failed to cleanup ClickHouse server");
+        logctx.cleanup_successful();
+    }
 }
diff --git a/oximeter/db/src/db-replicated-init.sql b/oximeter/db/src/db-replicated-init.sql
index 7b756f4b0d..3b734c113d 100644
--- a/oximeter/db/src/db-replicated-init.sql
+++ b/oximeter/db/src/db-replicated-init.sql
@@ -652,7 +652,25 @@ CREATE TABLE IF NOT EXISTS oximeter.timeseries_schema ON CLUSTER oximeter_cluste
         'CumulativeI64' = 6,
         'CumulativeF64' = 7,
         'HistogramI64' = 8,
-        'HistogramF64' = 9
+        'HistogramF64' = 9,
+        'I8' = 10,
+        'U8' = 11,
+        'I16' = 12,
+        'U16' = 13,
+        'I32' = 14,
+        'U32' = 15,
+        'U64' = 16,
+        'F32' = 17,
+        'CumulativeU64' = 18,
+        'CumulativeF32' = 19,
+        'HistogramI8' = 20,
+        'HistogramU8' = 21,
+        'HistogramI16' = 23,
+        'HistogramU16' = 23,
+        'HistogramI32' = 24,
+        'HistogramU32' = 25,
+        'HistogramU64' = 26,
+        'HistogramF32' = 27
     ),
     created DateTime64(9, 'UTC')
 )
diff --git a/oximeter/db/src/db-single-node-init.sql b/oximeter/db/src/db-single-node-init.sql
index 1f648fd5d5..2fb5c36397 100644
--- a/oximeter/db/src/db-single-node-init.sql
+++ b/oximeter/db/src/db-single-node-init.sql
@@ -476,7 +476,25 @@ CREATE TABLE IF NOT EXISTS oximeter.timeseries_schema
         'CumulativeI64' = 6,
         'CumulativeF64' = 7,
         'HistogramI64' = 8,
-        'HistogramF64' = 9
+        'HistogramF64' = 9,
+        'I8' = 10,
+        'U8' = 11,
+        'I16' = 12,
+        'U16' = 13,
+        'I32' = 14,
+        'U32' = 15,
+        'U64' = 16,
+        'F32' = 17,
+        'CumulativeU64' = 18,
+        'CumulativeF32' = 19,
+        'HistogramI8' = 20,
+        'HistogramU8' = 21,
+        'HistogramI16' = 22,
+        'HistogramU16' = 23,
+        'HistogramI32' = 24,
+        'HistogramU32' = 25,
+        'HistogramU64' = 26,
+        'HistogramF32' = 27
     ),
     created DateTime64(9, 'UTC')
 )
diff --git a/oximeter/db/src/model.rs b/oximeter/db/src/model.rs
index 7f5b150b46..41c7ab9d24 100644
--- a/oximeter/db/src/model.rs
+++ b/oximeter/db/src/model.rs
@@ -42,7 +42,7 @@ use uuid::Uuid;
 ///
 /// TODO(#4271): The current implementation of versioning will wipe the metrics
 /// database if this number is incremented.
-pub const OXIMETER_VERSION: u64 = 1;
+pub const OXIMETER_VERSION: u64 = 2;
 
 // Wrapper type to represent a boolean in the database.
 //
diff --git a/oximeter/oximeter/src/types.rs b/oximeter/oximeter/src/types.rs
index d3f1b9e746..0cc3299ec4 100644
--- a/oximeter/oximeter/src/types.rs
+++ b/oximeter/oximeter/src/types.rs
@@ -275,6 +275,7 @@ pub struct Field {
     JsonSchema,
     Serialize,
     Deserialize,
+    strum::EnumIter,
 )]
 #[serde(rename_all = "snake_case")]
 pub enum DatumType {