From 4090983f94c7a65b9ef515af85ea3078513a46f0 Mon Sep 17 00:00:00 2001
From: James MacMahon <james@oxide.computer>
Date: Thu, 23 May 2024 17:34:40 -0400
Subject: [PATCH 01/28] [#3886 1/4] Region replacement models and queries
 (#5791)

Splitting up #5683 first by separating out the DB models, queries, and
schema changes required:

1. region replacement records

This commit adds a Region Replacement record, which is a request to
replace a region in a volume. It transitions through the following
states:

        Requested   <--
                      |
            |         |
            v         |
                      |
        Allocating  --

            |
            v

         Running    <--
                      |
            |         |
            v         |
                      |
         Driving    --

            |
            v

     ReplacementDone  <--
                        |
            |           |
            v           |
                        |
        Completing    --

            |
            v

        Completed

which are captured in the `RegionReplacementState` enum. Transitioning
from Requested to Running is the responsibility of the "start" saga,
iterating between Running and Driving is the responsibility of the
"drive" saga, and transitioning from ReplacementDone to Completed is the
responsibility of the "finish" saga. All of these will come in
subsequent PRs.

The state transitions themselves are performed by these sagas and all
involve a query that:

- checks that the starting state (and other values as required) make
sense
- updates the state while setting a unique `operating_saga_id` id (and
any other fields as appropriate)

As multiple background tasks will be waking up, checking to see what
sagas need to be triggered, and requesting that these region replacement
sagas run, this is meant to block multiple sagas from running at the
same time in an effort to cut down on interference - most will unwind at
the first step instead of somewhere in the middle.

2. region replacement step records

As region replacement takes place, Nexus will be making calls to
services in order to trigger the necessary Crucible operations meant to
actually perform th replacement. These steps are recorded in the
database so that they can be consulted by subsequent steps, and
additionally act as breadcrumbs if there is an issue.

3. volume repair records

Nexus should take care to only replace one region (or snapshot!) for a
volume at a time. Technically, the Upstairs can support two at a time,
but codifying "only one at a time" is safer, and does not allow the
possiblity for a Nexus bug to replace all three regions of a region set
at a time (aka total data loss!). This "one at a time" constraint is
enforced by each repair also creating a VolumeRepair record, a table for
which there is a UNIQUE CONSTRAINT on the volume ID.

4. also, the `volume_replace_region` function

The `volume_replace_region` function is also included in this PR. In a
single transaction, this will:

- set the target region's volume id to the replacement's volume id
- set the replacement region's volume id to the target's volume id
- update the target volume's construction request to replace the target
region's SocketAddrV6 with the replacement region's

This is called from the "start" saga, after allocating the replacement
region, and is meant to transition the Volume's construction request
from "indefinitely degraded, pointing to region that is gone" to
"currently degraded, but can be repaired".
---
 nexus/db-model/src/lib.rs                     |   6 +
 nexus/db-model/src/region.rs                  |   3 +
 nexus/db-model/src/region_replacement.rs      | 165 ++++
 nexus/db-model/src/region_replacement_step.rs |  85 ++
 nexus/db-model/src/schema.rs                  |  39 +
 nexus/db-model/src/schema_versions.rs         |   3 +-
 nexus/db-model/src/upstairs_repair.rs         |   1 +
 nexus/db-model/src/volume_repair.rs           |  20 +
 nexus/db-queries/src/db/datastore/disk.rs     |  16 +
 nexus/db-queries/src/db/datastore/mod.rs      |   2 +
 nexus/db-queries/src/db/datastore/region.rs   |  77 +-
 .../src/db/datastore/region_replacement.rs    | 907 ++++++++++++++++++
 nexus/db-queries/src/db/datastore/snapshot.rs |  18 +
 nexus/db-queries/src/db/datastore/volume.rs   | 632 ++++++++++++
 nexus/src/app/sagas/common_storage.rs         |   1 -
 nexus/tests/integration_tests/disks.rs        |   1 -
 schema/crdb/dbinit.sql                        |  74 +-
 schema/crdb/region-replacement/up01.sql       |   9 +
 schema/crdb/region-replacement/up02.sql       |  18 +
 schema/crdb/region-replacement/up03.sql       |   1 +
 schema/crdb/region-replacement/up04.sql       |   4 +
 schema/crdb/region-replacement/up05.sql       |   3 +
 schema/crdb/region-replacement/up06.sql       |   4 +
 schema/crdb/region-replacement/up07.sql       |  16 +
 schema/crdb/region-replacement/up08.sql       |   1 +
 schema/crdb/region-replacement/up09.sql       |   1 +
 schema/crdb/region-replacement/up10.sql       |   3 +
 schema/crdb/region-replacement/up11.sql       |   1 +
 28 files changed, 2106 insertions(+), 5 deletions(-)
 create mode 100644 nexus/db-model/src/region_replacement.rs
 create mode 100644 nexus/db-model/src/region_replacement_step.rs
 create mode 100644 nexus/db-model/src/volume_repair.rs
 create mode 100644 nexus/db-queries/src/db/datastore/region_replacement.rs
 create mode 100644 schema/crdb/region-replacement/up01.sql
 create mode 100644 schema/crdb/region-replacement/up02.sql
 create mode 100644 schema/crdb/region-replacement/up03.sql
 create mode 100644 schema/crdb/region-replacement/up04.sql
 create mode 100644 schema/crdb/region-replacement/up05.sql
 create mode 100644 schema/crdb/region-replacement/up06.sql
 create mode 100644 schema/crdb/region-replacement/up07.sql
 create mode 100644 schema/crdb/region-replacement/up08.sql
 create mode 100644 schema/crdb/region-replacement/up09.sql
 create mode 100644 schema/crdb/region-replacement/up10.sql
 create mode 100644 schema/crdb/region-replacement/up11.sql

diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs
index 205885cfd8..bd16719633 100644
--- a/nexus/db-model/src/lib.rs
+++ b/nexus/db-model/src/lib.rs
@@ -66,6 +66,8 @@ pub mod queries;
 mod quota;
 mod rack;
 mod region;
+mod region_replacement;
+mod region_replacement_step;
 mod region_snapshot;
 mod role_assignment;
 mod role_builtin;
@@ -98,6 +100,7 @@ mod virtual_provisioning_resource;
 mod vmm;
 mod vni;
 mod volume;
+mod volume_repair;
 mod vpc;
 mod vpc_firewall_rule;
 mod vpc_route;
@@ -162,6 +165,8 @@ pub use project::*;
 pub use quota::*;
 pub use rack::*;
 pub use region::*;
+pub use region_replacement::*;
+pub use region_replacement_step::*;
 pub use region_snapshot::*;
 pub use role_assignment::*;
 pub use role_builtin::*;
@@ -195,6 +200,7 @@ pub use virtual_provisioning_resource::*;
 pub use vmm::*;
 pub use vni::*;
 pub use volume::*;
+pub use volume_repair::*;
 pub use vpc::*;
 pub use vpc_firewall_rule::*;
 pub use vpc_route::*;
diff --git a/nexus/db-model/src/region.rs b/nexus/db-model/src/region.rs
index fefc4f4fce..441f928405 100644
--- a/nexus/db-model/src/region.rs
+++ b/nexus/db-model/src/region.rs
@@ -58,6 +58,9 @@ impl Region {
         }
     }
 
+    pub fn id(&self) -> Uuid {
+        self.identity.id
+    }
     pub fn volume_id(&self) -> Uuid {
         self.volume_id
     }
diff --git a/nexus/db-model/src/region_replacement.rs b/nexus/db-model/src/region_replacement.rs
new file mode 100644
index 0000000000..a04710f53d
--- /dev/null
+++ b/nexus/db-model/src/region_replacement.rs
@@ -0,0 +1,165 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+use super::impl_enum_type;
+use crate::schema::region_replacement;
+use crate::Region;
+use chrono::DateTime;
+use chrono::Utc;
+use serde::{Deserialize, Serialize};
+use uuid::Uuid;
+
+impl_enum_type!(
+    #[derive(SqlType, Debug, QueryId)]
+    #[diesel(postgres_type(name = "region_replacement_state", schema = "public"))]
+    pub struct RegionReplacementStateEnum;
+
+    #[derive(Copy, Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)]
+    #[diesel(sql_type = RegionReplacementStateEnum)]
+    pub enum RegionReplacementState;
+
+    // Enum values
+    Requested => b"requested"
+    Allocating => b"allocating"
+    Running => b"running"
+    Driving => b"driving"
+    ReplacementDone => b"replacement_done"
+    Completing => b"completing"
+    Complete => b"complete"
+);
+
+impl std::str::FromStr for RegionReplacementState {
+    type Err = String;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "requested" => Ok(RegionReplacementState::Requested),
+            "allocating" => Ok(RegionReplacementState::Allocating),
+            "running" => Ok(RegionReplacementState::Running),
+            "driving" => Ok(RegionReplacementState::Driving),
+            "replacement_done" => Ok(RegionReplacementState::ReplacementDone),
+            "complete" => Ok(RegionReplacementState::Complete),
+            "completing" => Ok(RegionReplacementState::Completing),
+            _ => Err(format!("unrecognized value {} for enum", s)),
+        }
+    }
+}
+
+/// Database representation of a Region replacement request.
+///
+/// This record stores the data related to the operations required for Nexus to
+/// orchestrate replacing a region in a volume. It transitions through the
+/// following states:
+///
+/// ```text
+///     Requested   <--              ---
+///                   |              |
+///         |         |              |
+///         v         |              |  responsibility of region
+///                   |              |  replacement start saga
+///     Allocating  --               |
+///                                  |
+///         |                        |
+///         v                        ---
+///                                  ---
+///      Running    <--              |
+///                   |              |
+///         |         |              |
+///         v         |              | responsibility of region
+///                   |              | replacement drive saga
+///      Driving    --               |
+///                                  |
+///         |                        |
+///         v                        ---
+///                                  ---
+///  ReplacementDone  <--            |
+///                     |            |
+///         |           |            |
+///         v           |            |
+///                     |            | responsibility of region
+///     Completing    --             | replacement finish saga
+///                                  |
+///         |                        |
+///         v                        |
+///                                  |
+///     Completed                    ---
+/// ```
+///
+/// which are captured in the RegionReplacementState enum. Annotated on the
+/// right are which sagas are responsible for which state transitions. The state
+/// transitions themselves are performed by these sagas and all involve a query
+/// that:
+///
+///  - checks that the starting state (and other values as required) make sense
+///  - updates the state while setting a unique operating_saga_id id (and any
+///    other fields as appropriate)
+///
+/// As multiple background tasks will be waking up, checking to see what sagas
+/// need to be triggered, and requesting that these region replacement sagas
+/// run, this is meant to block multiple sagas from running at the same time in
+/// an effort to cut down on interference - most will unwind at the first step
+/// of performing this state transition instead of somewhere in the middle.
+///
+/// The correctness of a region replacement relies on certain operations
+/// happening only when the record is in a certain state. For example: Nexus
+/// should not undo a volume modification _after_ an upstairs has been sent a
+/// replacement request, so volume modification happens at the Allocating state
+/// (in the start saga), and replacement requests are only sent in the Driving
+/// state (in the drive saga) - this ensures that replacement requests are only
+/// sent if the start saga completed successfully, meaning the volume
+/// modification was committed to the database and will not change or be
+/// unwound.
+///
+/// See also: RegionReplacementStep records
+#[derive(
+    Queryable,
+    Insertable,
+    Debug,
+    Clone,
+    Selectable,
+    Serialize,
+    Deserialize,
+    PartialEq,
+)]
+#[diesel(table_name = region_replacement)]
+pub struct RegionReplacement {
+    pub id: Uuid,
+
+    pub request_time: DateTime<Utc>,
+
+    /// The region being replaced
+    pub old_region_id: Uuid,
+
+    /// The volume whose region is being replaced
+    pub volume_id: Uuid,
+
+    /// A synthetic volume that only is used to later delete the old region
+    pub old_region_volume_id: Option<Uuid>,
+
+    /// The new region that will be used to replace the old one
+    pub new_region_id: Option<Uuid>,
+
+    pub replacement_state: RegionReplacementState,
+
+    pub operating_saga_id: Option<Uuid>,
+}
+
+impl RegionReplacement {
+    pub fn for_region(region: &Region) -> Self {
+        Self::new(region.id(), region.volume_id())
+    }
+
+    pub fn new(old_region_id: Uuid, volume_id: Uuid) -> Self {
+        Self {
+            id: Uuid::new_v4(),
+            request_time: Utc::now(),
+            old_region_id,
+            volume_id,
+            old_region_volume_id: None,
+            new_region_id: None,
+            replacement_state: RegionReplacementState::Requested,
+            operating_saga_id: None,
+        }
+    }
+}
diff --git a/nexus/db-model/src/region_replacement_step.rs b/nexus/db-model/src/region_replacement_step.rs
new file mode 100644
index 0000000000..c0a61b958c
--- /dev/null
+++ b/nexus/db-model/src/region_replacement_step.rs
@@ -0,0 +1,85 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+use super::impl_enum_type;
+use crate::ipv6;
+use crate::schema::region_replacement_step;
+use crate::SqlU16;
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+use std::net::SocketAddrV6;
+use uuid::Uuid;
+
+impl_enum_type!(
+    #[derive(SqlType, Debug, QueryId)]
+    #[diesel(postgres_type(name = "region_replacement_step_type", schema = "public"))]
+    pub struct RegionReplacementStepTypeEnum;
+
+    #[derive(Copy, Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)]
+    #[diesel(sql_type = RegionReplacementStepTypeEnum)]
+    pub enum RegionReplacementStepType;
+
+    // What is driving the repair forward?
+    Propolis => b"propolis"
+    Pantry => b"pantry"
+);
+
+/// Database representation of a Region replacement repair step
+///
+/// As region replacement takes place, Nexus will be making calls to services in
+/// order to trigger the necessary Crucible operations meant to actually perform
+/// the replacement. These steps are recorded in the database so that they can
+/// be consulted by subsequent steps, and additionally act as breadcrumbs if
+/// there is an issue.
+///
+/// See also: RegionReplacement records
+#[derive(
+    Queryable,
+    Insertable,
+    Debug,
+    Clone,
+    Selectable,
+    Serialize,
+    Deserialize,
+    PartialEq,
+)]
+#[diesel(table_name = region_replacement_step)]
+pub struct RegionReplacementStep {
+    pub replacement_id: Uuid,
+
+    pub step_time: DateTime<Utc>,
+
+    pub step_type: RegionReplacementStepType,
+
+    pub step_associated_instance_id: Option<Uuid>,
+    pub step_associated_vmm_id: Option<Uuid>,
+
+    pub step_associated_pantry_ip: Option<ipv6::Ipv6Addr>,
+    pub step_associated_pantry_port: Option<SqlU16>,
+    pub step_associated_pantry_job_id: Option<Uuid>,
+}
+
+impl RegionReplacementStep {
+    pub fn instance_and_vmm_ids(&self) -> Option<(Uuid, Uuid)> {
+        if self.step_type != RegionReplacementStepType::Propolis {
+            return None;
+        }
+
+        let instance_id = self.step_associated_instance_id?;
+        let vmm_id = self.step_associated_vmm_id?;
+
+        Some((instance_id, vmm_id))
+    }
+
+    pub fn pantry_address(&self) -> Option<SocketAddrV6> {
+        if self.step_type != RegionReplacementStepType::Pantry {
+            return None;
+        }
+
+        let ip = self.step_associated_pantry_ip?;
+        let port = self.step_associated_pantry_port?;
+
+        Some(SocketAddrV6::new(*ip, *port, 0, 0))
+    }
+}
diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs
index 423388de30..deeca970c7 100644
--- a/nexus/db-model/src/schema.rs
+++ b/nexus/db-model/src/schema.rs
@@ -1036,6 +1036,8 @@ table! {
     }
 }
 
+allow_tables_to_appear_in_same_query!(zpool, dataset);
+
 table! {
     region (id) {
         id -> Uuid,
@@ -1051,6 +1053,8 @@ table! {
     }
 }
 
+allow_tables_to_appear_in_same_query!(zpool, region);
+
 table! {
     region_snapshot (dataset_id, region_id, snapshot_id) {
         dataset_id -> Uuid,
@@ -1697,6 +1701,41 @@ table! {
     }
 }
 
+table! {
+    region_replacement (id) {
+        id -> Uuid,
+        request_time -> Timestamptz,
+        old_region_id -> Uuid,
+        volume_id -> Uuid,
+        old_region_volume_id -> Nullable<Uuid>,
+        new_region_id -> Nullable<Uuid>,
+        replacement_state -> crate::RegionReplacementStateEnum,
+        operating_saga_id -> Nullable<Uuid>,
+    }
+}
+
+table! {
+    volume_repair (volume_id) {
+        volume_id -> Uuid,
+        repair_id -> Uuid,
+    }
+}
+
+table! {
+    region_replacement_step (replacement_id, step_time, step_type) {
+        replacement_id -> Uuid,
+        step_time -> Timestamptz,
+        step_type -> crate::RegionReplacementStepTypeEnum,
+
+        step_associated_instance_id -> Nullable<Uuid>,
+        step_associated_vmm_id -> Nullable<Uuid>,
+
+        step_associated_pantry_ip -> Nullable<Inet>,
+        step_associated_pantry_port -> Nullable<Int4>,
+        step_associated_pantry_job_id -> Nullable<Uuid>,
+    }
+}
+
 table! {
     db_metadata (singleton) {
         singleton -> Bool,
diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs
index ed4b762e68..5ceaf3167a 100644
--- a/nexus/db-model/src/schema_versions.rs
+++ b/nexus/db-model/src/schema_versions.rs
@@ -17,7 +17,7 @@ use std::collections::BTreeMap;
 ///
 /// This must be updated when you change the database schema.  Refer to
 /// schema/crdb/README.adoc in the root of this repository for details.
-pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(64, 0, 0);
+pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(65, 0, 0);
 
 /// List of all past database schema versions, in *reverse* order
 ///
@@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy<Vec<KnownVersion>> = Lazy::new(|| {
         // |  leaving the first copy as an example for the next person.
         // v
         // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"),
+        KnownVersion::new(65, "region-replacement"),
         KnownVersion::new(64, "add-view-for-v2p-mappings"),
         KnownVersion::new(63, "remove-producer-base-route-column"),
         KnownVersion::new(62, "allocate-subnet-decommissioned-sleds"),
diff --git a/nexus/db-model/src/upstairs_repair.rs b/nexus/db-model/src/upstairs_repair.rs
index 311592f8e4..ed281b6c64 100644
--- a/nexus/db-model/src/upstairs_repair.rs
+++ b/nexus/db-model/src/upstairs_repair.rs
@@ -106,6 +106,7 @@ pub struct UpstairsRepairNotification {
     pub upstairs_id: DbTypedUuid<UpstairsKind>,
     pub session_id: DbTypedUuid<UpstairsSessionKind>,
 
+    // The Downstairs being repaired
     pub region_id: DbTypedUuid<DownstairsRegionKind>,
     pub target_ip: ipv6::Ipv6Addr,
     pub target_port: SqlU16,
diff --git a/nexus/db-model/src/volume_repair.rs b/nexus/db-model/src/volume_repair.rs
new file mode 100644
index 0000000000..a92fcd3425
--- /dev/null
+++ b/nexus/db-model/src/volume_repair.rs
@@ -0,0 +1,20 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+use crate::schema::volume_repair;
+use uuid::Uuid;
+
+/// When modifying a Volume by replacing its parts, Nexus should take care to
+/// only replace one region or snapshot for a volume at a time. Technically, the
+/// Upstairs can support two at a time, but codifying "only one at a time" is
+/// safer, and does not allow the possiblity for a Nexus bug to replace all
+/// three regions of a region set at a time (aka total data loss!). This "one at
+/// a time" constraint is enforced by each repair also creating a VolumeRepair
+/// record, a table for which there is a UNIQUE CONSTRAINT on the volume ID.
+#[derive(Queryable, Insertable, Debug, Selectable, Clone)]
+#[diesel(table_name = volume_repair)]
+pub struct VolumeRepair {
+    pub volume_id: Uuid,
+    pub repair_id: Uuid,
+}
diff --git a/nexus/db-queries/src/db/datastore/disk.rs b/nexus/db-queries/src/db/datastore/disk.rs
index 2788558a0b..e1d504761c 100644
--- a/nexus/db-queries/src/db/datastore/disk.rs
+++ b/nexus/db-queries/src/db/datastore/disk.rs
@@ -811,6 +811,22 @@ impl DataStore {
             .map(|(disk, _, _)| disk)
             .collect())
     }
+
+    pub async fn disk_for_volume_id(
+        &self,
+        volume_id: Uuid,
+    ) -> LookupResult<Option<Disk>> {
+        let conn = self.pool_connection_unauthorized().await?;
+
+        use db::schema::disk::dsl;
+        dsl::disk
+            .filter(dsl::volume_id.eq(volume_id))
+            .select(Disk::as_select())
+            .first_async(&*conn)
+            .await
+            .optional()
+            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
+    }
 }
 
 #[cfg(test)]
diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs
index 1618395800..7c47489477 100644
--- a/nexus/db-queries/src/db/datastore/mod.rs
+++ b/nexus/db-queries/src/db/datastore/mod.rs
@@ -77,6 +77,7 @@ pub mod pub_test_utils;
 mod quota;
 mod rack;
 mod region;
+mod region_replacement;
 mod region_snapshot;
 mod role;
 mod saga;
@@ -119,6 +120,7 @@ pub use volume::read_only_resources_associated_with_volume;
 pub use volume::CrucibleResources;
 pub use volume::CrucibleTargets;
 pub use volume::VolumeCheckoutReason;
+pub use volume::VolumeReplacementParams;
 
 // Number of unique datasets required to back a region.
 // TODO: This should likely turn into a configuration option.
diff --git a/nexus/db-queries/src/db/datastore/region.rs b/nexus/db-queries/src/db/datastore/region.rs
index 6e152cb9f2..d7da24cce3 100644
--- a/nexus/db-queries/src/db/datastore/region.rs
+++ b/nexus/db-queries/src/db/datastore/region.rs
@@ -13,6 +13,7 @@ use crate::db::error::public_error_from_diesel;
 use crate::db::error::ErrorHandler;
 use crate::db::lookup::LookupPath;
 use crate::db::model::Dataset;
+use crate::db::model::PhysicalDiskPolicy;
 use crate::db::model::Region;
 use crate::transaction_retry::OptionalError;
 use async_bb8_diesel::AsyncRunQueryDsl;
@@ -22,6 +23,7 @@ use nexus_types::external_api::params;
 use omicron_common::api::external;
 use omicron_common::api::external::DeleteResult;
 use omicron_common::api::external::Error;
+use omicron_common::api::external::LookupResult;
 use slog::Logger;
 use uuid::Uuid;
 
@@ -69,6 +71,22 @@ impl DataStore {
             .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
     }
 
+    pub async fn get_region_optional(
+        &self,
+        region_id: Uuid,
+    ) -> Result<Option<Region>, Error> {
+        use db::schema::region::dsl;
+        dsl::region
+            .filter(dsl::id.eq(region_id))
+            .select(Region::as_select())
+            .get_result_async::<Region>(
+                &*self.pool_connection_unauthorized().await?,
+            )
+            .await
+            .optional()
+            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
+    }
+
     async fn get_block_size_from_disk_source(
         &self,
         opctx: &OpContext,
@@ -173,13 +191,36 @@ impl DataStore {
         let (blocks_per_extent, extent_count) =
             Self::get_crucible_allocation(&block_size, size);
 
-        let query = crate::db::queries::region_allocation::allocation_query(
+        self.arbitrary_region_allocate_direct(
+            opctx,
             volume_id,
             u64::from(block_size.to_bytes()),
             blocks_per_extent,
             extent_count,
             allocation_strategy,
             num_regions_required,
+        )
+        .await
+    }
+
+    #[allow(clippy::too_many_arguments)]
+    pub async fn arbitrary_region_allocate_direct(
+        &self,
+        opctx: &OpContext,
+        volume_id: Uuid,
+        block_size: u64,
+        blocks_per_extent: u64,
+        extent_count: u64,
+        allocation_strategy: &RegionAllocationStrategy,
+        num_regions_required: usize,
+    ) -> Result<Vec<(Dataset, Region)>, Error> {
+        let query = crate::db::queries::region_allocation::allocation_query(
+            volume_id,
+            block_size,
+            blocks_per_extent,
+            extent_count,
+            allocation_strategy,
+            num_regions_required,
         );
 
         let conn = self.pool_connection_authorized(&opctx).await?;
@@ -324,6 +365,40 @@ impl DataStore {
             Ok(0)
         }
     }
+
+    /// Find regions on expunged disks
+    pub async fn find_regions_on_expunged_physical_disks(
+        &self,
+        opctx: &OpContext,
+    ) -> LookupResult<Vec<Region>> {
+        let conn = self.pool_connection_authorized(opctx).await?;
+
+        use db::schema::dataset::dsl as dataset_dsl;
+        use db::schema::physical_disk::dsl as physical_disk_dsl;
+        use db::schema::region::dsl as region_dsl;
+        use db::schema::zpool::dsl as zpool_dsl;
+
+        region_dsl::region
+            .filter(region_dsl::dataset_id.eq_any(
+                dataset_dsl::dataset
+                    .filter(dataset_dsl::time_deleted.is_null())
+                    .filter(dataset_dsl::pool_id.eq_any(
+                        zpool_dsl::zpool
+                            .filter(zpool_dsl::time_deleted.is_null())
+                            .filter(zpool_dsl::physical_disk_id.eq_any(
+                                physical_disk_dsl::physical_disk
+                                    .filter(physical_disk_dsl::disk_policy.eq(PhysicalDiskPolicy::Expunged))
+                                    .select(physical_disk_dsl::id)
+                            ))
+                            .select(zpool_dsl::id)
+                    ))
+                    .select(dataset_dsl::id)
+            ))
+            .select(Region::as_select())
+            .load_async(&*conn)
+            .await
+            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
+    }
 }
 
 #[cfg(test)]
diff --git a/nexus/db-queries/src/db/datastore/region_replacement.rs b/nexus/db-queries/src/db/datastore/region_replacement.rs
new file mode 100644
index 0000000000..d12d123e7e
--- /dev/null
+++ b/nexus/db-queries/src/db/datastore/region_replacement.rs
@@ -0,0 +1,907 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! [`DataStore`] methods on [`RegionReplacement`]s.
+
+use super::DataStore;
+use crate::context::OpContext;
+use crate::db;
+use crate::db::datastore::SQL_BATCH_SIZE;
+use crate::db::error::public_error_from_diesel;
+use crate::db::error::ErrorHandler;
+use crate::db::model::Region;
+use crate::db::model::RegionReplacement;
+use crate::db::model::RegionReplacementState;
+use crate::db::model::RegionReplacementStep;
+use crate::db::model::UpstairsRepairNotification;
+use crate::db::model::UpstairsRepairNotificationType;
+use crate::db::model::VolumeRepair;
+use crate::db::pagination::paginated;
+use crate::db::pagination::Paginator;
+use crate::db::update_and_check::UpdateAndCheck;
+use crate::db::update_and_check::UpdateStatus;
+use crate::db::TransactionError;
+use async_bb8_diesel::AsyncConnection;
+use async_bb8_diesel::AsyncRunQueryDsl;
+use diesel::prelude::*;
+use omicron_common::api::external::Error;
+use omicron_uuid_kinds::DownstairsRegionKind;
+use omicron_uuid_kinds::TypedUuid;
+use uuid::Uuid;
+
+impl DataStore {
+    /// Create and insert a region replacement request for a Region, returning the ID of the
+    /// request.
+    pub async fn create_region_replacement_request_for_region(
+        &self,
+        opctx: &OpContext,
+        region: &Region,
+    ) -> Result<Uuid, Error> {
+        let request = RegionReplacement::for_region(region);
+        let request_id = request.id;
+
+        self.insert_region_replacement_request(opctx, request).await?;
+
+        Ok(request_id)
+    }
+
+    /// Insert a region replacement request into the DB, also creating the
+    /// VolumeRepair record.
+    pub async fn insert_region_replacement_request(
+        &self,
+        opctx: &OpContext,
+        request: RegionReplacement,
+    ) -> Result<(), Error> {
+        self.pool_connection_authorized(opctx)
+            .await?
+            .transaction_async(|conn| async move {
+                use db::schema::region_replacement::dsl;
+                use db::schema::volume_repair::dsl as volume_repair_dsl;
+
+                diesel::insert_into(volume_repair_dsl::volume_repair)
+                    .values(VolumeRepair {
+                        volume_id: request.volume_id,
+                        repair_id: request.id,
+                    })
+                    .execute_async(&conn)
+                    .await?;
+
+                diesel::insert_into(dsl::region_replacement)
+                    .values(request)
+                    .execute_async(&conn)
+                    .await?;
+
+                Ok(())
+            })
+            .await
+            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
+    }
+
+    pub async fn get_region_replacement_request_by_id(
+        &self,
+        opctx: &OpContext,
+        id: Uuid,
+    ) -> Result<RegionReplacement, Error> {
+        use db::schema::region_replacement::dsl;
+
+        dsl::region_replacement
+            .filter(dsl::id.eq(id))
+            .get_result_async::<RegionReplacement>(
+                &*self.pool_connection_authorized(opctx).await?,
+            )
+            .await
+            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
+    }
+
+    pub async fn get_requested_region_replacements(
+        &self,
+        opctx: &OpContext,
+    ) -> Result<Vec<RegionReplacement>, Error> {
+        opctx.check_complex_operations_allowed()?;
+
+        let mut replacements = Vec::new();
+        let mut paginator = Paginator::new(SQL_BATCH_SIZE);
+        let conn = self.pool_connection_authorized(opctx).await?;
+
+        while let Some(p) = paginator.next() {
+            use db::schema::region_replacement::dsl;
+
+            let batch = paginated(
+                dsl::region_replacement,
+                dsl::id,
+                &p.current_pagparams(),
+            )
+            .filter(
+                dsl::replacement_state.eq(RegionReplacementState::Requested),
+            )
+            .get_results_async::<RegionReplacement>(&*conn)
+            .await
+            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?;
+
+            paginator = p.found_batch(&batch, &|r| r.id);
+            replacements.extend(batch);
+        }
+
+        Ok(replacements)
+    }
+
+    /// Return region replacement requests that are in state `Running` with no
+    /// currently operating saga. These need to be checked on or driven forward.
+    pub async fn get_running_region_replacements(
+        &self,
+        opctx: &OpContext,
+    ) -> Result<Vec<RegionReplacement>, Error> {
+        use db::schema::region_replacement::dsl;
+
+        dsl::region_replacement
+            .filter(dsl::replacement_state.eq(RegionReplacementState::Running))
+            .filter(dsl::operating_saga_id.is_null())
+            .get_results_async::<RegionReplacement>(
+                &*self.pool_connection_authorized(opctx).await?,
+            )
+            .await
+            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
+    }
+
+    /// Return region replacement requests that are in state `ReplacementDone`
+    /// with no currently operating saga. These need to be completed.
+    pub async fn get_done_region_replacements(
+        &self,
+        opctx: &OpContext,
+    ) -> Result<Vec<RegionReplacement>, Error> {
+        use db::schema::region_replacement::dsl;
+
+        dsl::region_replacement
+            .filter(
+                dsl::replacement_state
+                    .eq(RegionReplacementState::ReplacementDone),
+            )
+            .filter(dsl::operating_saga_id.is_null())
+            .get_results_async::<RegionReplacement>(
+                &*self.pool_connection_authorized(opctx).await?,
+            )
+            .await
+            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
+    }
+
+    /// Transition a RegionReplacement record from Requested to Allocating,
+    /// setting a unique id at the same time.
+    pub async fn set_region_replacement_allocating(
+        &self,
+        opctx: &OpContext,
+        region_replacement_id: Uuid,
+        operating_saga_id: Uuid,
+    ) -> Result<(), Error> {
+        use db::schema::region_replacement::dsl;
+        let updated = diesel::update(dsl::region_replacement)
+            .filter(dsl::id.eq(region_replacement_id))
+            .filter(
+                dsl::replacement_state.eq(RegionReplacementState::Requested),
+            )
+            .filter(dsl::operating_saga_id.is_null())
+            .set((
+                dsl::replacement_state.eq(RegionReplacementState::Allocating),
+                dsl::operating_saga_id.eq(operating_saga_id),
+            ))
+            .check_if_exists::<RegionReplacement>(region_replacement_id)
+            .execute_and_check(&*self.pool_connection_authorized(opctx).await?)
+            .await;
+
+        match updated {
+            Ok(result) => match result.status {
+                UpdateStatus::Updated => Ok(()),
+                UpdateStatus::NotUpdatedButExists => {
+                    let record = result.found;
+
+                    if record.operating_saga_id == Some(operating_saga_id)
+                        && record.replacement_state
+                            == RegionReplacementState::Allocating
+                    {
+                        Ok(())
+                    } else {
+                        Err(Error::conflict(format!(
+                            "region replacement {} set to {:?} (operating saga id {:?})",
+                            region_replacement_id,
+                            record.replacement_state,
+                            record.operating_saga_id,
+                        )))
+                    }
+                }
+            },
+
+            Err(e) => Err(public_error_from_diesel(e, ErrorHandler::Server)),
+        }
+    }
+
+    /// Transition a RegionReplacement record from Allocating to Requested,
+    /// clearing the operating saga id.
+    pub async fn undo_set_region_replacement_allocating(
+        &self,
+        opctx: &OpContext,
+        region_replacement_id: Uuid,
+        operating_saga_id: Uuid,
+    ) -> Result<(), Error> {
+        use db::schema::region_replacement::dsl;
+        let updated = diesel::update(dsl::region_replacement)
+            .filter(dsl::id.eq(region_replacement_id))
+            .filter(
+                dsl::replacement_state.eq(RegionReplacementState::Allocating),
+            )
+            .filter(dsl::operating_saga_id.eq(operating_saga_id))
+            .set((
+                dsl::replacement_state.eq(RegionReplacementState::Requested),
+                dsl::operating_saga_id.eq(Option::<Uuid>::None),
+            ))
+            .check_if_exists::<RegionReplacement>(region_replacement_id)
+            .execute_and_check(&*self.pool_connection_authorized(opctx).await?)
+            .await;
+
+        match updated {
+            Ok(result) => match result.status {
+                UpdateStatus::Updated => Ok(()),
+                UpdateStatus::NotUpdatedButExists => {
+                    let record = result.found;
+
+                    if record.operating_saga_id == None
+                        && record.replacement_state
+                            == RegionReplacementState::Requested
+                    {
+                        Ok(())
+                    } else {
+                        Err(Error::conflict(format!(
+                            "region replacement {} set to {:?} (operating saga id {:?})",
+                            region_replacement_id,
+                            record.replacement_state,
+                            record.operating_saga_id,
+                        )))
+                    }
+                }
+            },
+
+            Err(e) => Err(public_error_from_diesel(e, ErrorHandler::Server)),
+        }
+    }
+
+    /// Transition from Allocating to Running, and clear the operating saga id.
+    pub async fn set_region_replacement_running(
+        &self,
+        opctx: &OpContext,
+        region_replacement_id: Uuid,
+        operating_saga_id: Uuid,
+        new_region_id: Uuid,
+        old_region_volume_id: Uuid,
+    ) -> Result<(), Error> {
+        use db::schema::region_replacement::dsl;
+        let updated = diesel::update(dsl::region_replacement)
+            .filter(dsl::id.eq(region_replacement_id))
+            .filter(dsl::operating_saga_id.eq(operating_saga_id))
+            .filter(
+                dsl::replacement_state.eq(RegionReplacementState::Allocating),
+            )
+            .set((
+                dsl::replacement_state.eq(RegionReplacementState::Running),
+                dsl::old_region_volume_id.eq(Some(old_region_volume_id)),
+                dsl::new_region_id.eq(Some(new_region_id)),
+                dsl::operating_saga_id.eq(Option::<Uuid>::None),
+            ))
+            .check_if_exists::<RegionReplacement>(region_replacement_id)
+            .execute_and_check(&*self.pool_connection_authorized(opctx).await?)
+            .await;
+
+        match updated {
+            Ok(result) => match result.status {
+                UpdateStatus::Updated => Ok(()),
+                UpdateStatus::NotUpdatedButExists => {
+                    let record = result.found;
+
+                    if record.operating_saga_id == None
+                        && record.replacement_state
+                            == RegionReplacementState::Running
+                        && record.new_region_id == Some(new_region_id)
+                        && record.old_region_volume_id
+                            == Some(old_region_volume_id)
+                    {
+                        Ok(())
+                    } else {
+                        Err(Error::conflict(format!(
+                            "region replacement {} set to {:?} (operating saga id {:?})",
+                            region_replacement_id,
+                            record.replacement_state,
+                            record.operating_saga_id,
+                        )))
+                    }
+                }
+            },
+
+            Err(e) => Err(public_error_from_diesel(e, ErrorHandler::Server)),
+        }
+    }
+
+    /// Find an in-progress region replacement request by new region id
+    pub async fn lookup_in_progress_region_replacement_request_by_new_region_id(
+        &self,
+        opctx: &OpContext,
+        new_region_id: TypedUuid<DownstairsRegionKind>,
+    ) -> Result<Option<RegionReplacement>, Error> {
+        use db::schema::region_replacement::dsl;
+
+        dsl::region_replacement
+            .filter(
+                dsl::new_region_id
+                    .eq(nexus_db_model::to_db_typed_uuid(new_region_id)),
+            )
+            .filter(dsl::replacement_state.ne(RegionReplacementState::Complete))
+            .get_result_async::<RegionReplacement>(
+                &*self.pool_connection_authorized(opctx).await?,
+            )
+            .await
+            .optional()
+            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
+    }
+
+    /// Find a region replacement request by old region id
+    pub async fn lookup_region_replacement_request_by_old_region_id(
+        &self,
+        opctx: &OpContext,
+        old_region_id: TypedUuid<DownstairsRegionKind>,
+    ) -> Result<Option<RegionReplacement>, Error> {
+        use db::schema::region_replacement::dsl;
+
+        dsl::region_replacement
+            .filter(
+                dsl::old_region_id
+                    .eq(nexus_db_model::to_db_typed_uuid(old_region_id)),
+            )
+            .get_result_async::<RegionReplacement>(
+                &*self.pool_connection_authorized(opctx).await?,
+            )
+            .await
+            .optional()
+            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
+    }
+
+    /// Transition a RegionReplacement record from Running to Driving,
+    /// setting a unique id at the same time.
+    pub async fn set_region_replacement_driving(
+        &self,
+        opctx: &OpContext,
+        region_replacement_id: Uuid,
+        operating_saga_id: Uuid,
+    ) -> Result<(), Error> {
+        use db::schema::region_replacement::dsl;
+        let updated = diesel::update(dsl::region_replacement)
+            .filter(dsl::id.eq(region_replacement_id))
+            .filter(dsl::replacement_state.eq(RegionReplacementState::Running))
+            .filter(dsl::operating_saga_id.is_null())
+            .set((
+                dsl::replacement_state.eq(RegionReplacementState::Driving),
+                dsl::operating_saga_id.eq(operating_saga_id),
+            ))
+            .check_if_exists::<RegionReplacement>(region_replacement_id)
+            .execute_and_check(&*self.pool_connection_authorized(opctx).await?)
+            .await;
+
+        match updated {
+            Ok(result) => match result.status {
+                UpdateStatus::Updated => Ok(()),
+                UpdateStatus::NotUpdatedButExists => {
+                    let record = result.found;
+
+                    if record.operating_saga_id == Some(operating_saga_id)
+                        && record.replacement_state
+                            == RegionReplacementState::Driving
+                    {
+                        Ok(())
+                    } else {
+                        Err(Error::conflict(format!(
+                            "region replacement {} set to {:?} (operating saga id {:?})",
+                            region_replacement_id,
+                            record.replacement_state,
+                            record.operating_saga_id,
+                        )))
+                    }
+                }
+            },
+
+            Err(e) => Err(public_error_from_diesel(e, ErrorHandler::Server)),
+        }
+    }
+
+    /// Transition a RegionReplacement record from Driving to Running,
+    /// clearing the operating saga id.
+    pub async fn undo_set_region_replacement_driving(
+        &self,
+        opctx: &OpContext,
+        region_replacement_id: Uuid,
+        operating_saga_id: Uuid,
+    ) -> Result<(), Error> {
+        use db::schema::region_replacement::dsl;
+        let updated = diesel::update(dsl::region_replacement)
+            .filter(dsl::id.eq(region_replacement_id))
+            .filter(dsl::replacement_state.eq(RegionReplacementState::Driving))
+            .filter(dsl::operating_saga_id.eq(operating_saga_id))
+            .set((
+                dsl::replacement_state.eq(RegionReplacementState::Running),
+                dsl::operating_saga_id.eq(Option::<Uuid>::None),
+            ))
+            .check_if_exists::<RegionReplacement>(region_replacement_id)
+            .execute_and_check(&*self.pool_connection_authorized(opctx).await?)
+            .await;
+
+        match updated {
+            Ok(result) => match result.status {
+                UpdateStatus::Updated => Ok(()),
+                UpdateStatus::NotUpdatedButExists => {
+                    let record = result.found;
+
+                    if record.operating_saga_id == None
+                        && record.replacement_state
+                            == RegionReplacementState::Running
+                    {
+                        Ok(())
+                    } else {
+                        Err(Error::conflict(format!(
+                            "region replacement {} set to {:?} (operating saga id {:?})",
+                                region_replacement_id,
+                                record.replacement_state,
+                                record.operating_saga_id,
+                            )))
+                    }
+                }
+            },
+
+            Err(e) => Err(public_error_from_diesel(e, ErrorHandler::Server)),
+        }
+    }
+
+    /// Transition a RegionReplacement record from Driving to ReplacementDone,
+    /// clearing the operating saga id.
+    pub async fn set_region_replacement_from_driving_to_done(
+        &self,
+        opctx: &OpContext,
+        region_replacement_id: Uuid,
+        operating_saga_id: Uuid,
+    ) -> Result<(), Error> {
+        use db::schema::region_replacement::dsl;
+        let updated = diesel::update(dsl::region_replacement)
+            .filter(dsl::id.eq(region_replacement_id))
+            .filter(dsl::replacement_state.eq(RegionReplacementState::Driving))
+            .filter(dsl::operating_saga_id.eq(operating_saga_id))
+            .set((
+                dsl::replacement_state
+                    .eq(RegionReplacementState::ReplacementDone),
+                dsl::operating_saga_id.eq(Option::<Uuid>::None),
+            ))
+            .check_if_exists::<RegionReplacement>(region_replacement_id)
+            .execute_and_check(&*self.pool_connection_authorized(opctx).await?)
+            .await;
+
+        match updated {
+            Ok(result) => match result.status {
+                UpdateStatus::Updated => Ok(()),
+                UpdateStatus::NotUpdatedButExists => {
+                    let record = result.found;
+
+                    if record.operating_saga_id == None
+                        && record.replacement_state
+                            == RegionReplacementState::ReplacementDone
+                    {
+                        Ok(())
+                    } else {
+                        Err(Error::conflict(format!(
+                            "region replacement {} set to {:?} (operating saga id {:?})",
+                                region_replacement_id,
+                                record.replacement_state,
+                                record.operating_saga_id,
+                            )))
+                    }
+                }
+            },
+
+            Err(e) => Err(public_error_from_diesel(e, ErrorHandler::Server)),
+        }
+    }
+
+    /// Return the most current step for a region replacement request
+    pub async fn current_region_replacement_request_step(
+        &self,
+        opctx: &OpContext,
+        id: Uuid,
+    ) -> Result<Option<RegionReplacementStep>, Error> {
+        use db::schema::region_replacement_step::dsl;
+
+        dsl::region_replacement_step
+            .filter(dsl::replacement_id.eq(id))
+            .order_by(dsl::step_time.desc())
+            .first_async::<RegionReplacementStep>(
+                &*self.pool_connection_authorized(opctx).await?,
+            )
+            .await
+            .optional()
+            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
+    }
+
+    /// Record a step taken to drive a region replacement forward
+    pub async fn add_region_replacement_request_step(
+        &self,
+        opctx: &OpContext,
+        step: RegionReplacementStep,
+    ) -> Result<(), Error> {
+        use db::schema::region_replacement_step::dsl;
+
+        let conn = self.pool_connection_authorized(opctx).await?;
+
+        diesel::insert_into(dsl::region_replacement_step)
+            .values(step)
+            .on_conflict((dsl::replacement_id, dsl::step_time, dsl::step_type))
+            .do_nothing()
+            .execute_async(&*conn)
+            .await
+            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?;
+
+        Ok(())
+    }
+
+    /// Transition a RegionReplacement record from ReplacementDone to Completing,
+    /// setting a unique id at the same time.
+    pub async fn set_region_replacement_completing(
+        &self,
+        opctx: &OpContext,
+        region_replacement_id: Uuid,
+        operating_saga_id: Uuid,
+    ) -> Result<(), Error> {
+        use db::schema::region_replacement::dsl;
+        let updated = diesel::update(dsl::region_replacement)
+            .filter(dsl::id.eq(region_replacement_id))
+            .filter(
+                dsl::replacement_state
+                    .eq(RegionReplacementState::ReplacementDone),
+            )
+            .filter(dsl::operating_saga_id.is_null())
+            .set((
+                dsl::replacement_state.eq(RegionReplacementState::Completing),
+                dsl::operating_saga_id.eq(operating_saga_id),
+            ))
+            .check_if_exists::<RegionReplacement>(region_replacement_id)
+            .execute_and_check(&*self.pool_connection_authorized(opctx).await?)
+            .await;
+
+        match updated {
+            Ok(result) => match result.status {
+                UpdateStatus::Updated => Ok(()),
+                UpdateStatus::NotUpdatedButExists => {
+                    let record = result.found;
+
+                    if record.operating_saga_id == Some(operating_saga_id)
+                        && record.replacement_state
+                            == RegionReplacementState::Completing
+                    {
+                        Ok(())
+                    } else {
+                        Err(Error::conflict(format!(
+                            "region replacement {} set to {:?} (operating saga id {:?})",
+                            region_replacement_id,
+                            record.replacement_state,
+                            record.operating_saga_id,
+                        )))
+                    }
+                }
+            },
+
+            Err(e) => Err(public_error_from_diesel(e, ErrorHandler::Server)),
+        }
+    }
+
+    /// Transition a RegionReplacement record from Completing to ReplacementDone,
+    /// clearing the operating saga id.
+    pub async fn undo_set_region_replacement_completing(
+        &self,
+        opctx: &OpContext,
+        region_replacement_id: Uuid,
+        operating_saga_id: Uuid,
+    ) -> Result<(), Error> {
+        use db::schema::region_replacement::dsl;
+        let updated = diesel::update(dsl::region_replacement)
+            .filter(dsl::id.eq(region_replacement_id))
+            .filter(
+                dsl::replacement_state.eq(RegionReplacementState::Completing),
+            )
+            .filter(dsl::operating_saga_id.eq(operating_saga_id))
+            .set((
+                dsl::replacement_state
+                    .eq(RegionReplacementState::ReplacementDone),
+                dsl::operating_saga_id.eq(Option::<Uuid>::None),
+            ))
+            .check_if_exists::<RegionReplacement>(region_replacement_id)
+            .execute_and_check(&*self.pool_connection_authorized(opctx).await?)
+            .await;
+
+        match updated {
+            Ok(result) => match result.status {
+                UpdateStatus::Updated => Ok(()),
+                UpdateStatus::NotUpdatedButExists => {
+                    let record = result.found;
+
+                    if record.operating_saga_id == None
+                        && record.replacement_state
+                            == RegionReplacementState::ReplacementDone
+                    {
+                        Ok(())
+                    } else {
+                        Err(Error::conflict(format!(
+                            "region replacement {} set to {:?} (operating saga id {:?})",
+                            region_replacement_id,
+                            record.replacement_state,
+                            record.operating_saga_id,
+                        )))
+                    }
+                }
+            },
+
+            Err(e) => Err(public_error_from_diesel(e, ErrorHandler::Server)),
+        }
+    }
+
+    /// Transition a RegionReplacement record from Completing to Complete,
+    /// clearing the operating saga id. Also removes the `volume_repair` record
+    /// that is taking a "lock" on the Volume.
+    pub async fn set_region_replacement_complete(
+        &self,
+        opctx: &OpContext,
+        region_replacement_id: Uuid,
+        operating_saga_id: Uuid,
+    ) -> Result<(), Error> {
+        type TxnError = TransactionError<Error>;
+
+        self.pool_connection_authorized(opctx)
+            .await?
+            .transaction_async(|conn| async move {
+                use db::schema::volume_repair::dsl as volume_repair_dsl;
+
+                diesel::delete(
+                    volume_repair_dsl::volume_repair
+                        .filter(volume_repair_dsl::repair_id.eq(region_replacement_id))
+                    )
+                    .execute_async(&conn)
+                    .await?;
+
+                use db::schema::region_replacement::dsl;
+
+                let result = diesel::update(dsl::region_replacement)
+                    .filter(dsl::id.eq(region_replacement_id))
+                    .filter(
+                        dsl::replacement_state.eq(RegionReplacementState::Completing),
+                    )
+                    .filter(dsl::operating_saga_id.eq(operating_saga_id))
+                    .set((
+                        dsl::replacement_state.eq(RegionReplacementState::Complete),
+                        dsl::operating_saga_id.eq(Option::<Uuid>::None),
+                    ))
+                    .check_if_exists::<RegionReplacement>(region_replacement_id)
+                    .execute_and_check(&conn)
+                    .await?;
+
+                match result.status {
+                    UpdateStatus::Updated => Ok(()),
+                    UpdateStatus::NotUpdatedButExists => {
+                        let record = result.found;
+
+                        if record.operating_saga_id == None
+                            && record.replacement_state
+                                == RegionReplacementState::Complete
+                        {
+                            Ok(())
+                        } else {
+                            Err(TxnError::CustomError(Error::conflict(format!(
+                                "region replacement {} set to {:?} (operating saga id {:?})",
+                                region_replacement_id,
+                                record.replacement_state,
+                                record.operating_saga_id,
+                            ))))
+                        }
+                    }
+                }
+            })
+            .await
+            .map_err(|e| match e {
+                TxnError::CustomError(error) => error,
+
+                TxnError::Database(error) => {
+                    public_error_from_diesel(error, ErrorHandler::Server)
+                }
+            })
+    }
+
+    /// Nexus has been notified by an Upstairs (or has otherwised determined)
+    /// that a region replacement is done, so update the record. This may arrive
+    /// in the middle of a drive saga invocation, so do not filter on state or
+    /// operating saga id!
+    pub async fn mark_region_replacement_as_done(
+        &self,
+        opctx: &OpContext,
+        region_replacement_id: Uuid,
+    ) -> Result<(), Error> {
+        use db::schema::region_replacement::dsl;
+        let updated = diesel::update(dsl::region_replacement)
+            .filter(dsl::id.eq(region_replacement_id))
+            .set((
+                dsl::replacement_state
+                    .eq(RegionReplacementState::ReplacementDone),
+                dsl::operating_saga_id.eq(Option::<Uuid>::None),
+            ))
+            .check_if_exists::<RegionReplacement>(region_replacement_id)
+            .execute_and_check(&*self.pool_connection_authorized(opctx).await?)
+            .await;
+
+        match updated {
+            Ok(result) => match result.status {
+                UpdateStatus::Updated => Ok(()),
+
+                UpdateStatus::NotUpdatedButExists => {
+                    let record = result.found;
+
+                    if record.operating_saga_id == None
+                        && record.replacement_state
+                            == RegionReplacementState::ReplacementDone
+                    {
+                        Ok(())
+                    } else {
+                        Err(Error::conflict(format!(
+                            "region replacement {} set to {:?} (operating saga id {:?})",
+                            region_replacement_id,
+                            record.replacement_state,
+                            record.operating_saga_id,
+                        )))
+                    }
+                }
+            },
+
+            Err(e) => Err(public_error_from_diesel(e, ErrorHandler::Server)),
+        }
+    }
+
+    /// Check if a region replacement request has at least one matching
+    /// successful "repair finished" notification.
+    //
+    // For the purposes of changing the state of a region replacement request to
+    // `ReplacementDone`, check if Nexus has seen at least related one
+    // successful "repair finished" notification.
+    //
+    // Note: after a region replacement request has transitioned to `Complete`,
+    // there may be many future "repair finished" notifications for the "new"
+    // region that are unrelated to the replacement request.
+    pub async fn request_has_matching_successful_finish_notification(
+        &self,
+        opctx: &OpContext,
+        region_replacement: &RegionReplacement,
+    ) -> Result<bool, Error> {
+        let Some(new_region_id) = region_replacement.new_region_id else {
+            return Err(Error::invalid_request(format!(
+                "region replacement {} has no new region id!",
+                region_replacement.id,
+            )));
+        };
+
+        use db::schema::upstairs_repair_notification::dsl;
+
+        let maybe_notification = dsl::upstairs_repair_notification
+            .filter(dsl::region_id.eq(new_region_id))
+            .filter(
+                dsl::notification_type
+                    .eq(UpstairsRepairNotificationType::Succeeded),
+            )
+            .first_async::<UpstairsRepairNotification>(
+                &*self.pool_connection_authorized(opctx).await?,
+            )
+            .await
+            .optional()
+            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?;
+
+        Ok(maybe_notification.is_some())
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    use crate::db::datastore::test_utils::datastore_test;
+    use nexus_test_utils::db::test_setup_database;
+    use omicron_test_utils::dev;
+
+    #[tokio::test]
+    async fn test_one_replacement_per_volume() {
+        let logctx = dev::test_setup_log("test_one_replacement_per_volume");
+        let mut db = test_setup_database(&logctx.log).await;
+        let (opctx, datastore) = datastore_test(&logctx, &db).await;
+
+        let region_1_id = Uuid::new_v4();
+        let region_2_id = Uuid::new_v4();
+        let volume_id = Uuid::new_v4();
+
+        let request_1 = RegionReplacement::new(region_1_id, volume_id);
+        let request_2 = RegionReplacement::new(region_2_id, volume_id);
+
+        datastore
+            .insert_region_replacement_request(&opctx, request_1)
+            .await
+            .unwrap();
+        datastore
+            .insert_region_replacement_request(&opctx, request_2)
+            .await
+            .unwrap_err();
+
+        db.cleanup().await.unwrap();
+        logctx.cleanup_successful();
+    }
+
+    #[tokio::test]
+    async fn test_replacement_done_in_middle_of_drive_saga() {
+        // If Nexus receives a notification that a repair has finished in the
+        // middle of a drive saga, then make sure the replacement request state
+        // ends up as `ReplacementDone`.
+
+        let logctx = dev::test_setup_log(
+            "test_replacement_done_in_middle_of_drive_saga",
+        );
+        let mut db = test_setup_database(&logctx.log).await;
+        let (opctx, datastore) = datastore_test(&logctx, &db).await;
+
+        let region_id = Uuid::new_v4();
+        let volume_id = Uuid::new_v4();
+
+        let request = {
+            let mut request = RegionReplacement::new(region_id, volume_id);
+            request.replacement_state = RegionReplacementState::Running;
+            request
+        };
+
+        datastore
+            .insert_region_replacement_request(&opctx, request.clone())
+            .await
+            .unwrap();
+
+        // Transition to Driving
+
+        let saga_id = Uuid::new_v4();
+
+        datastore
+            .set_region_replacement_driving(&opctx, request.id, saga_id)
+            .await
+            .unwrap();
+
+        // Now, Nexus receives a notification that the repair has finished
+        // successfully
+
+        datastore
+            .mark_region_replacement_as_done(&opctx, request.id)
+            .await
+            .unwrap();
+
+        // Ensure that the state is ReplacementDone, and the operating saga id
+        // is cleared.
+
+        let actual_request = datastore
+            .get_region_replacement_request_by_id(&opctx, request.id)
+            .await
+            .unwrap();
+
+        assert_eq!(
+            actual_request.replacement_state,
+            RegionReplacementState::ReplacementDone
+        );
+        assert_eq!(actual_request.operating_saga_id, None);
+
+        // The Drive saga will unwind when it tries to set the state back to
+        // Running.
+
+        datastore
+            .undo_set_region_replacement_driving(&opctx, request.id, saga_id)
+            .await
+            .unwrap_err();
+
+        db.cleanup().await.unwrap();
+        logctx.cleanup_successful();
+    }
+}
diff --git a/nexus/db-queries/src/db/datastore/snapshot.rs b/nexus/db-queries/src/db/datastore/snapshot.rs
index 7a3f84bbb2..9d4900e2a4 100644
--- a/nexus/db-queries/src/db/datastore/snapshot.rs
+++ b/nexus/db-queries/src/db/datastore/snapshot.rs
@@ -31,6 +31,7 @@ use omicron_common::api::external::http_pagination::PaginatedBy;
 use omicron_common::api::external::CreateResult;
 use omicron_common::api::external::Error;
 use omicron_common::api::external::ListResultVec;
+use omicron_common::api::external::LookupResult;
 use omicron_common::api::external::LookupType;
 use omicron_common::api::external::ResourceType;
 use omicron_common::api::external::UpdateResult;
@@ -304,4 +305,21 @@ impl DataStore {
             }
         }
     }
+
+    pub async fn find_snapshot_by_destination_volume_id(
+        &self,
+        opctx: &OpContext,
+        volume_id: Uuid,
+    ) -> LookupResult<Option<Snapshot>> {
+        let conn = self.pool_connection_authorized(opctx).await?;
+
+        use db::schema::snapshot::dsl;
+        dsl::snapshot
+            .filter(dsl::destination_volume_id.eq(volume_id))
+            .select(Snapshot::as_select())
+            .first_async(&*conn)
+            .await
+            .optional()
+            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
+    }
 }
diff --git a/nexus/db-queries/src/db/datastore/volume.rs b/nexus/db-queries/src/db/datastore/volume.rs
index 0e80ee3e3c..a7b9273aa8 100644
--- a/nexus/db-queries/src/db/datastore/volume.rs
+++ b/nexus/db-queries/src/db/datastore/volume.rs
@@ -45,6 +45,7 @@ use serde::Deserialize;
 use serde::Deserializer;
 use serde::Serialize;
 use sled_agent_client::types::VolumeConstructionRequest;
+use std::net::SocketAddrV6;
 use uuid::Uuid;
 
 #[derive(Debug, Clone, Copy)]
@@ -1150,6 +1151,48 @@ impl DataStore {
             })
     }
 
+    /// Return all the read-write regions in a volume whose target address
+    /// matches the argument dataset's.
+    pub async fn get_dataset_rw_regions_in_volume(
+        &self,
+        opctx: &OpContext,
+        dataset_id: Uuid,
+        volume_id: Uuid,
+    ) -> LookupResult<Vec<SocketAddrV6>> {
+        let conn = self.pool_connection_authorized(opctx).await?;
+
+        let dataset = {
+            use db::schema::dataset::dsl;
+
+            dsl::dataset
+                .filter(dsl::id.eq(dataset_id))
+                .select(Dataset::as_select())
+                .first_async(&*conn)
+                .await
+                .map_err(|e| {
+                    public_error_from_diesel(e, ErrorHandler::Server)
+                })?
+        };
+
+        let Some(volume) = self.volume_get(volume_id).await? else {
+            return Err(Error::internal_error("volume is gone!?"));
+        };
+
+        let vcr: VolumeConstructionRequest =
+            serde_json::from_str(&volume.data())?;
+
+        let mut targets: Vec<SocketAddrV6> = vec![];
+
+        find_matching_rw_regions_in_volume(
+            &vcr,
+            dataset.address().ip(),
+            &mut targets,
+        )
+        .map_err(|e| Error::internal_error(&e.to_string()))?;
+
+        Ok(targets)
+    }
+
     // An Upstairs is created as part of a Volume hierarchy if the Volume
     // Construction Request includes a "Region" variant. This may be at any
     // layer of the Volume, and some notifications will come from an Upstairs
@@ -1583,6 +1626,274 @@ impl DataStore {
     }
 }
 
+pub struct VolumeReplacementParams {
+    pub volume_id: Uuid,
+    pub region_id: Uuid,
+    pub region_addr: SocketAddrV6,
+}
+
+impl DataStore {
+    /// Replace a read-write region in a Volume with a new region.
+    pub async fn volume_replace_region(
+        &self,
+        existing: VolumeReplacementParams,
+        replacement: VolumeReplacementParams,
+    ) -> Result<(), Error> {
+        // In a single transaction:
+        //
+        // - set the existing region's volume id to the replacement's volume id
+        // - set the replacement region's volume id to the existing's volume id
+        // - update the existing volume's construction request to replace the
+        // existing region's SocketAddrV6 with the replacement region's
+        //
+        // This function's effects can be undone by calling it with swapped
+        // parameters.
+        //
+        // # Example #
+        //
+        // Imagine `volume_replace_region` is called with the following,
+        // pretending that UUIDs are just eight uppercase letters:
+        //
+        //   let existing = VolumeReplacementParams {
+        //     volume_id: TARGET_VOL,
+        //     region_id: TARGET_REG,
+        //     region_addr: "[fd00:1122:3344:145::10]:40001",
+        //   }
+        //
+        //   let replace = VolumeReplacementParams {
+        //     volume_id: NEW_VOL,
+        //     region_id: NEW_REG,
+        //     region_addr: "[fd00:1122:3344:322::4]:3956",
+        //   }
+        //
+        // In the database, the relevant records (and columns) of the region
+        // table look like this prior to the transaction:
+        //
+        //            id | volume_id
+        //  -------------| ---------
+        //    TARGET_REG | TARGET_VOL
+        //       NEW_REG | NEW_VOL
+        //
+        // TARGET_VOL has a volume construction request where one of the targets
+        // list will contain TARGET_REG's address:
+        //
+        //   {
+        //     "type": "volume",
+        //     "block_size": 512,
+        //     "id": "TARGET_VOL",
+        //     "read_only_parent": {
+        //       ...
+        //     },
+        //     "sub_volumes": [
+        //       {
+        //         ...
+        //         "opts": {
+        //           ...
+        //           "target": [
+        //             "[fd00:1122:3344:103::3]:19004",
+        //             "[fd00:1122:3344:79::12]:27015",
+        //             "[fd00:1122:3344:145::10]:40001"  <-----
+        //           ]
+        //         }
+        //       }
+        //     ]
+        //   }
+        //
+        // Note it is not required for the replacement volume to exist as a
+        // database record for this transaction.
+        //
+        // The first part of the transaction will swap the volume IDs of the
+        // existing and replacement region records:
+        //
+        //           id | volume_id
+        //  ------------| ---------
+        //   TARGET_REG | NEW_VOL
+        //      NEW_REG | TARGET_VOL
+        //
+        // The second part of the transaction will update the volume
+        // construction request of TARGET_VOL by finding and replacing
+        // TARGET_REG's address (in the appropriate targets array) with
+        // NEW_REG's address:
+        //
+        //   {
+        //           ...
+        //           "target": [
+        //             "[fd00:1122:3344:103::3]:19004",
+        //             "[fd00:1122:3344:79::12]:27015",
+        //             "[fd00:1122:3344:322::4]:3956"  <-----
+        //           ]
+        //           ...
+        //   }
+        //
+        // After the transaction, the caller should ensure that TARGET_REG is
+        // referenced (via its socket address) in NEW_VOL. For an example, this
+        // is done as part of the region replacement start saga.
+
+        #[derive(Debug, thiserror::Error)]
+        enum VolumeReplaceRegionError {
+            #[error("Error from Volume region replacement: {0}")]
+            Public(Error),
+
+            #[error("Serde error during Volume region replacement: {0}")]
+            SerdeError(#[from] serde_json::Error),
+
+            #[error("Target Volume deleted")]
+            TargetVolumeDeleted,
+
+            #[error("Region replacement error: {0}")]
+            RegionReplacementError(#[from] anyhow::Error),
+        }
+        let err = OptionalError::new();
+
+        let conn = self.pool_connection_unauthorized().await?;
+        self.transaction_retry_wrapper("volume_replace_region")
+            .transaction(&conn, |conn| {
+                let err = err.clone();
+                async move {
+                    use db::schema::region::dsl as region_dsl;
+                    use db::schema::volume::dsl as volume_dsl;
+
+                    // Set the existing region's volume id to the replacement's
+                    // volume id
+                    diesel::update(region_dsl::region)
+                        .filter(region_dsl::id.eq(existing.region_id))
+                        .set(region_dsl::volume_id.eq(replacement.volume_id))
+                        .execute_async(&conn)
+                        .await
+                        .map_err(|e| {
+                            err.bail_retryable_or_else(e, |e| {
+                                VolumeReplaceRegionError::Public(
+                                    public_error_from_diesel(
+                                        e,
+                                        ErrorHandler::Server,
+                                    )
+                                )
+                            })
+                        })?;
+
+                    // Set the replacement region's volume id to the existing's
+                    // volume id
+                    diesel::update(region_dsl::region)
+                        .filter(region_dsl::id.eq(replacement.region_id))
+                        .set(region_dsl::volume_id.eq(existing.volume_id))
+                        .execute_async(&conn)
+                        .await
+                        .map_err(|e| {
+                            err.bail_retryable_or_else(e, |e| {
+                                VolumeReplaceRegionError::Public(
+                                    public_error_from_diesel(
+                                        e,
+                                        ErrorHandler::Server,
+                                    )
+                                )
+                            })
+                        })?;
+
+                    // Update the existing volume's construction request to
+                    // replace the existing region's SocketAddrV6 with the
+                    // replacement region's
+                    let maybe_old_volume = {
+                        volume_dsl::volume
+                            .filter(volume_dsl::id.eq(existing.volume_id))
+                            .select(Volume::as_select())
+                            .first_async::<Volume>(&conn)
+                            .await
+                            .optional()
+                            .map_err(|e| {
+                                err.bail_retryable_or_else(e, |e| {
+                                    VolumeReplaceRegionError::Public(
+                                        public_error_from_diesel(
+                                            e,
+                                            ErrorHandler::Server,
+                                        )
+                                    )
+                                })
+                            })?
+                    };
+
+                    let old_volume = if let Some(old_volume) = maybe_old_volume {
+                        old_volume
+                    } else {
+                        // existing volume was deleted, so return an error, we
+                        // can't perform the region replacement now!
+                        return Err(err.bail(VolumeReplaceRegionError::TargetVolumeDeleted));
+                    };
+
+                    let old_vcr: VolumeConstructionRequest =
+                        match serde_json::from_str(&old_volume.data()) {
+                            Ok(vcr) => vcr,
+                            Err(e) => {
+                                return Err(err.bail(VolumeReplaceRegionError::SerdeError(e)));
+                            },
+                        };
+
+                    // Copy the old volume's VCR, changing out the old region
+                    // for the new.
+                    let new_vcr = match replace_region_in_vcr(
+                        &old_vcr,
+                        existing.region_addr,
+                        replacement.region_addr,
+                    ) {
+                        Ok(new_vcr) => new_vcr,
+                        Err(e) => {
+                            return Err(err.bail(
+                                VolumeReplaceRegionError::RegionReplacementError(e)
+                            ));
+                        }
+                    };
+
+                    let new_volume_data = serde_json::to_string(
+                        &new_vcr,
+                    )
+                    .map_err(|e| {
+                        err.bail(VolumeReplaceRegionError::SerdeError(e))
+                    })?;
+
+                    // Update the existing volume's data
+                    diesel::update(volume_dsl::volume)
+                        .filter(volume_dsl::id.eq(existing.volume_id))
+                        .set(volume_dsl::data.eq(new_volume_data))
+                        .execute_async(&conn)
+                        .await
+                        .map_err(|e| {
+                            err.bail_retryable_or_else(e, |e| {
+                                VolumeReplaceRegionError::Public(
+                                    public_error_from_diesel(
+                                        e,
+                                        ErrorHandler::Server,
+                                    )
+                                )
+                            })
+                        })?;
+
+                    Ok(())
+                }
+            })
+            .await
+            .map_err(|e| {
+                if let Some(err) = err.take() {
+                    match err {
+                        VolumeReplaceRegionError::Public(e) => e,
+
+                        VolumeReplaceRegionError::SerdeError(_) => {
+                            Error::internal_error(&err.to_string())
+                        }
+
+                        VolumeReplaceRegionError::TargetVolumeDeleted => {
+                            Error::internal_error(&err.to_string())
+                        }
+
+                        VolumeReplaceRegionError::RegionReplacementError(_) => {
+                            Error::internal_error(&err.to_string())
+                        }
+                    }
+                } else {
+                    public_error_from_diesel(e, ErrorHandler::Server)
+                }
+            })
+    }
+}
+
 /// Return the targets from a VolumeConstructionRequest.
 ///
 /// The targets of a volume construction request map to resources.
@@ -1681,6 +1992,119 @@ pub fn volume_is_read_only(
     }
 }
 
+/// Replace a Region in a VolumeConstructionRequest
+///
+/// Note that UUIDs are not randomized by this step: Crucible will reject a
+/// `target_replace` call if the replacement VolumeConstructionRequest does not
+/// exactly match the original, except for a single Region difference.
+///
+/// Note that the generation number _is_ bumped in this step, otherwise
+/// `compare_vcr_for_update` will reject the update.
+fn replace_region_in_vcr(
+    vcr: &VolumeConstructionRequest,
+    old_region: SocketAddrV6,
+    new_region: SocketAddrV6,
+) -> anyhow::Result<VolumeConstructionRequest> {
+    match vcr {
+        VolumeConstructionRequest::Volume {
+            id,
+            block_size,
+            sub_volumes,
+            read_only_parent,
+        } => Ok(VolumeConstructionRequest::Volume {
+            id: *id,
+            block_size: *block_size,
+            sub_volumes: sub_volumes
+                .iter()
+                .map(|subvol| -> anyhow::Result<VolumeConstructionRequest> {
+                    replace_region_in_vcr(&subvol, old_region, new_region)
+                })
+                .collect::<anyhow::Result<Vec<VolumeConstructionRequest>>>()?,
+
+            // Only replacing R/W regions
+            read_only_parent: read_only_parent.clone(),
+        }),
+
+        VolumeConstructionRequest::Url { id, block_size, url } => {
+            Ok(VolumeConstructionRequest::Url {
+                id: *id,
+                block_size: *block_size,
+                url: url.clone(),
+            })
+        }
+
+        VolumeConstructionRequest::Region {
+            block_size,
+            blocks_per_extent,
+            extent_count,
+            opts,
+            gen,
+        } => {
+            let mut opts = opts.clone();
+
+            for target in &mut opts.target {
+                let parsed_target: SocketAddrV6 = target.parse()?;
+                if parsed_target == old_region {
+                    *target = new_region.to_string();
+                }
+            }
+
+            Ok(VolumeConstructionRequest::Region {
+                block_size: *block_size,
+                blocks_per_extent: *blocks_per_extent,
+                extent_count: *extent_count,
+                opts,
+                gen: *gen + 1,
+            })
+        }
+
+        VolumeConstructionRequest::File { id, block_size, path } => {
+            Ok(VolumeConstructionRequest::File {
+                id: *id,
+                block_size: *block_size,
+                path: path.clone(),
+            })
+        }
+    }
+}
+
+/// Find Regions in a Volume's subvolumes list whose target match the argument
+/// IP, and add them to the supplied Vec.
+fn find_matching_rw_regions_in_volume(
+    vcr: &VolumeConstructionRequest,
+    ip: &std::net::Ipv6Addr,
+    matched_targets: &mut Vec<SocketAddrV6>,
+) -> anyhow::Result<()> {
+    match vcr {
+        VolumeConstructionRequest::Volume { sub_volumes, .. } => {
+            for sub_volume in sub_volumes {
+                find_matching_rw_regions_in_volume(
+                    sub_volume,
+                    ip,
+                    matched_targets,
+                )?;
+            }
+        }
+
+        VolumeConstructionRequest::Url { .. } => {}
+
+        VolumeConstructionRequest::Region { opts, .. } => {
+            if !opts.read_only {
+                for target in &opts.target {
+                    let parsed_target: SocketAddrV6 = target.parse()?;
+                    if parsed_target.ip() == ip {
+                        matched_targets.push(parsed_target);
+                    }
+                }
+            }
+        }
+
+        VolumeConstructionRequest::File { .. } => {}
+    }
+
+    Ok(())
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -1688,6 +2112,7 @@ mod tests {
     use crate::db::datastore::test_utils::datastore_test;
     use nexus_test_utils::db::test_setup_database;
     use omicron_test_utils::dev;
+    use sled_agent_client::types::CrucibleOpts;
 
     // Assert that Nexus will not fail to deserialize an old version of
     // CrucibleResources that was serialized before schema update 6.0.0.
@@ -1794,4 +2219,211 @@ mod tests {
         db.cleanup().await.unwrap();
         logctx.cleanup_successful();
     }
+
+    #[tokio::test]
+    async fn test_volume_replace_region() {
+        let logctx = dev::test_setup_log("test_volume_replace_region");
+        let log = logctx.log.new(o!());
+        let mut db = test_setup_database(&log).await;
+        let (_opctx, db_datastore) = datastore_test(&logctx, &db).await;
+
+        // Insert four Region records (three, plus one additionally allocated)
+
+        let volume_id = Uuid::new_v4();
+        let new_volume_id = Uuid::new_v4();
+
+        let mut region_and_volume_ids = [
+            (Uuid::new_v4(), volume_id),
+            (Uuid::new_v4(), volume_id),
+            (Uuid::new_v4(), volume_id),
+            (Uuid::new_v4(), new_volume_id),
+        ];
+
+        {
+            let conn = db_datastore.pool_connection_for_tests().await.unwrap();
+
+            for i in 0..4 {
+                let (_, volume_id) = region_and_volume_ids[i];
+
+                let region = Region::new(
+                    Uuid::new_v4(), // dataset id
+                    volume_id,
+                    512_i64.try_into().unwrap(),
+                    10,
+                    10,
+                );
+
+                region_and_volume_ids[i].0 = region.id();
+
+                use nexus_db_model::schema::region::dsl;
+                diesel::insert_into(dsl::region)
+                    .values(region.clone())
+                    .execute_async(&*conn)
+                    .await
+                    .unwrap();
+            }
+        }
+
+        let _volume = db_datastore
+            .volume_create(nexus_db_model::Volume::new(
+                volume_id,
+                serde_json::to_string(&VolumeConstructionRequest::Volume {
+                    id: volume_id,
+                    block_size: 512,
+                    sub_volumes: vec![VolumeConstructionRequest::Region {
+                        block_size: 512,
+                        blocks_per_extent: 10,
+                        extent_count: 10,
+                        gen: 1,
+                        opts: CrucibleOpts {
+                            id: volume_id,
+                            target: vec![
+                                String::from("[fd00:1122:3344:101::1]:11111"), // target to replace
+                                String::from("[fd00:1122:3344:102::1]:22222"),
+                                String::from("[fd00:1122:3344:103::1]:33333"),
+                            ],
+                            lossy: false,
+                            flush_timeout: None,
+                            key: None,
+                            cert_pem: None,
+                            key_pem: None,
+                            root_cert_pem: None,
+                            control: None,
+                            read_only: false,
+                        },
+                    }],
+                    read_only_parent: None,
+                })
+                .unwrap(),
+            ))
+            .await
+            .unwrap();
+
+        // Replace one
+
+        let target = region_and_volume_ids[0];
+        let replacement = region_and_volume_ids[3];
+
+        db_datastore
+            .volume_replace_region(
+                /* target */
+                db::datastore::VolumeReplacementParams {
+                    volume_id: target.1,
+                    region_id: target.0,
+                    region_addr: "[fd00:1122:3344:101::1]:11111"
+                        .parse()
+                        .unwrap(),
+                },
+                /* replacement */
+                db::datastore::VolumeReplacementParams {
+                    volume_id: replacement.1,
+                    region_id: replacement.0,
+                    region_addr: "[fd55:1122:3344:101::1]:11111"
+                        .parse()
+                        .unwrap(),
+                },
+            )
+            .await
+            .unwrap();
+
+        let vcr: VolumeConstructionRequest = serde_json::from_str(
+            db_datastore.volume_get(volume_id).await.unwrap().unwrap().data(),
+        )
+        .unwrap();
+
+        // Ensure the shape of the resulting VCR
+        assert_eq!(
+            &vcr,
+            &VolumeConstructionRequest::Volume {
+                id: volume_id,
+                block_size: 512,
+                sub_volumes: vec![VolumeConstructionRequest::Region {
+                    block_size: 512,
+                    blocks_per_extent: 10,
+                    extent_count: 10,
+                    gen: 2, // generation number bumped
+                    opts: CrucibleOpts {
+                        id: volume_id,
+                        target: vec![
+                            String::from("[fd55:1122:3344:101::1]:11111"), // replaced
+                            String::from("[fd00:1122:3344:102::1]:22222"),
+                            String::from("[fd00:1122:3344:103::1]:33333"),
+                        ],
+                        lossy: false,
+                        flush_timeout: None,
+                        key: None,
+                        cert_pem: None,
+                        key_pem: None,
+                        root_cert_pem: None,
+                        control: None,
+                        read_only: false,
+                    },
+                }],
+                read_only_parent: None,
+            },
+        );
+
+        // Now undo the replacement. Note volume ID is not swapped.
+        db_datastore
+            .volume_replace_region(
+                /* target */
+                db::datastore::VolumeReplacementParams {
+                    volume_id: target.1,
+                    region_id: replacement.0,
+                    region_addr: "[fd55:1122:3344:101::1]:11111"
+                        .parse()
+                        .unwrap(),
+                },
+                /* replacement */
+                db::datastore::VolumeReplacementParams {
+                    volume_id: replacement.1,
+                    region_id: target.0,
+                    region_addr: "[fd00:1122:3344:101::1]:11111"
+                        .parse()
+                        .unwrap(),
+                },
+            )
+            .await
+            .unwrap();
+
+        let vcr: VolumeConstructionRequest = serde_json::from_str(
+            db_datastore.volume_get(volume_id).await.unwrap().unwrap().data(),
+        )
+        .unwrap();
+
+        // Ensure the shape of the resulting VCR
+        assert_eq!(
+            &vcr,
+            &VolumeConstructionRequest::Volume {
+                id: volume_id,
+                block_size: 512,
+                sub_volumes: vec![VolumeConstructionRequest::Region {
+                    block_size: 512,
+                    blocks_per_extent: 10,
+                    extent_count: 10,
+                    gen: 3, // generation number bumped
+                    opts: CrucibleOpts {
+                        id: volume_id,
+                        target: vec![
+                            String::from("[fd00:1122:3344:101::1]:11111"), // back to what it was
+                            String::from("[fd00:1122:3344:102::1]:22222"),
+                            String::from("[fd00:1122:3344:103::1]:33333"),
+                        ],
+                        lossy: false,
+                        flush_timeout: None,
+                        key: None,
+                        cert_pem: None,
+                        key_pem: None,
+                        root_cert_pem: None,
+                        control: None,
+                        read_only: false,
+                    },
+                }],
+                read_only_parent: None,
+            },
+        );
+
+        db.cleanup().await.unwrap();
+        logctx.cleanup_successful();
+    }
 }
diff --git a/nexus/src/app/sagas/common_storage.rs b/nexus/src/app/sagas/common_storage.rs
index 611fcc3258..51e9648592 100644
--- a/nexus/src/app/sagas/common_storage.rs
+++ b/nexus/src/app/sagas/common_storage.rs
@@ -17,7 +17,6 @@ use internal_dns::ServiceName;
 use nexus_db_queries::authz;
 use nexus_db_queries::context::OpContext;
 use nexus_db_queries::db;
-use nexus_db_queries::db::identity::Asset;
 use nexus_db_queries::db::lookup::LookupPath;
 use omicron_common::api::external::Error;
 use omicron_common::backoff::{self, BackoffError};
diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs
index 886504a83b..ed4fd59277 100644
--- a/nexus/tests/integration_tests/disks.rs
+++ b/nexus/tests/integration_tests/disks.rs
@@ -29,7 +29,6 @@ use nexus_test_utils::resource_helpers::objects_list_page_authz;
 use nexus_test_utils::resource_helpers::DiskTest;
 use nexus_test_utils_macros::nexus_test;
 use nexus_types::external_api::params;
-use nexus_types::identity::Asset;
 use omicron_common::api::external::ByteCount;
 use omicron_common::api::external::Disk;
 use omicron_common::api::external::DiskState;
diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql
index 2cf9e1100f..17ea6d5510 100644
--- a/schema/crdb/dbinit.sql
+++ b/schema/crdb/dbinit.sql
@@ -3866,6 +3866,78 @@ ON omicron.public.sled (sled_policy) STORING (ip, sled_state);
 CREATE INDEX IF NOT EXISTS vmm_by_instance_id
 ON omicron.public.vmm (instance_id) STORING (sled_id);
 
+CREATE TYPE IF NOT EXISTS omicron.public.region_replacement_state AS ENUM (
+  'requested',
+  'allocating',
+  'running',
+  'driving',
+  'replacement_done',
+  'completing',
+  'complete'
+);
+
+CREATE TABLE IF NOT EXISTS omicron.public.region_replacement (
+    /* unique ID for this region replacement */
+    id UUID PRIMARY KEY,
+
+    request_time TIMESTAMPTZ NOT NULL,
+
+    old_region_id UUID NOT NULL,
+
+    volume_id UUID NOT NULL,
+
+    old_region_volume_id UUID,
+
+    new_region_id UUID,
+
+    replacement_state omicron.public.region_replacement_state NOT NULL,
+
+    operating_saga_id UUID
+);
+
+CREATE INDEX IF NOT EXISTS lookup_region_replacement_by_state on omicron.public.region_replacement (replacement_state);
+
+CREATE TABLE IF NOT EXISTS omicron.public.volume_repair (
+    volume_id UUID PRIMARY KEY,
+    repair_id UUID NOT NULL
+);
+
+CREATE INDEX IF NOT EXISTS lookup_volume_repair_by_repair_id on omicron.public.volume_repair (
+    repair_id
+);
+
+CREATE TYPE IF NOT EXISTS omicron.public.region_replacement_step_type AS ENUM (
+  'propolis',
+  'pantry'
+);
+
+CREATE TABLE IF NOT EXISTS omicron.public.region_replacement_step (
+    replacement_id UUID NOT NULL,
+
+    step_time TIMESTAMPTZ NOT NULL,
+
+    step_type omicron.public.region_replacement_step_type NOT NULL,
+
+    step_associated_instance_id UUID,
+    step_associated_vmm_id UUID,
+
+    step_associated_pantry_ip INET,
+    step_associated_pantry_port INT4 CHECK (step_associated_pantry_port BETWEEN 0 AND 65535),
+    step_associated_pantry_job_id UUID,
+
+    PRIMARY KEY (replacement_id, step_time, step_type)
+);
+
+CREATE INDEX IF NOT EXISTS step_time_order on omicron.public.region_replacement_step (step_time);
+
+CREATE INDEX IF NOT EXISTS search_for_repair_notifications ON omicron.public.upstairs_repair_notification (region_id, notification_type);
+
+CREATE INDEX IF NOT EXISTS lookup_any_disk_by_volume_id ON omicron.public.disk (
+    volume_id
+);
+
+CREATE INDEX IF NOT EXISTS lookup_snapshot_by_destination_volume_id ON omicron.public.snapshot ( destination_volume_id );
+
 /*
  * Metadata for the schema itself. This version number isn't great, as there's
  * nothing to ensure it gets bumped when it should be, but it's a start.
@@ -3926,7 +3998,7 @@ INSERT INTO omicron.public.db_metadata (
     version,
     target_version
 ) VALUES
-    (TRUE, NOW(), NOW(), '64.0.0', NULL)
+    (TRUE, NOW(), NOW(), '65.0.0', NULL)
 ON CONFLICT DO NOTHING;
 
 COMMIT;
diff --git a/schema/crdb/region-replacement/up01.sql b/schema/crdb/region-replacement/up01.sql
new file mode 100644
index 0000000000..e13ec3c983
--- /dev/null
+++ b/schema/crdb/region-replacement/up01.sql
@@ -0,0 +1,9 @@
+CREATE TYPE IF NOT EXISTS omicron.public.region_replacement_state AS ENUM (
+  'requested',
+  'allocating',
+  'running',
+  'driving',
+  'replacement_done',
+  'completing',
+  'complete'
+);
diff --git a/schema/crdb/region-replacement/up02.sql b/schema/crdb/region-replacement/up02.sql
new file mode 100644
index 0000000000..46e5de96ba
--- /dev/null
+++ b/schema/crdb/region-replacement/up02.sql
@@ -0,0 +1,18 @@
+CREATE TABLE IF NOT EXISTS omicron.public.region_replacement (
+    /* unique ID for this region replacement */
+    id UUID PRIMARY KEY,
+
+    request_time TIMESTAMPTZ NOT NULL,
+
+    old_region_id UUID NOT NULL,
+
+    volume_id UUID NOT NULL,
+
+    old_region_volume_id UUID,
+
+    new_region_id UUID,
+
+    replacement_state omicron.public.region_replacement_state NOT NULL,
+
+    operating_saga_id UUID
+);
diff --git a/schema/crdb/region-replacement/up03.sql b/schema/crdb/region-replacement/up03.sql
new file mode 100644
index 0000000000..51a9db9379
--- /dev/null
+++ b/schema/crdb/region-replacement/up03.sql
@@ -0,0 +1 @@
+CREATE INDEX IF NOT EXISTS lookup_region_replacement_by_state on omicron.public.region_replacement (replacement_state);
diff --git a/schema/crdb/region-replacement/up04.sql b/schema/crdb/region-replacement/up04.sql
new file mode 100644
index 0000000000..7a95f48983
--- /dev/null
+++ b/schema/crdb/region-replacement/up04.sql
@@ -0,0 +1,4 @@
+CREATE TABLE IF NOT EXISTS omicron.public.volume_repair (
+    volume_id UUID PRIMARY KEY,
+    repair_id UUID NOT NULL
+);
diff --git a/schema/crdb/region-replacement/up05.sql b/schema/crdb/region-replacement/up05.sql
new file mode 100644
index 0000000000..b436dd865d
--- /dev/null
+++ b/schema/crdb/region-replacement/up05.sql
@@ -0,0 +1,3 @@
+CREATE INDEX IF NOT EXISTS lookup_volume_repair_by_repair_id on omicron.public.volume_repair (
+    repair_id
+);
diff --git a/schema/crdb/region-replacement/up06.sql b/schema/crdb/region-replacement/up06.sql
new file mode 100644
index 0000000000..b02377cc59
--- /dev/null
+++ b/schema/crdb/region-replacement/up06.sql
@@ -0,0 +1,4 @@
+CREATE TYPE IF NOT EXISTS omicron.public.region_replacement_step_type AS ENUM (
+  'propolis',
+  'pantry'
+);
diff --git a/schema/crdb/region-replacement/up07.sql b/schema/crdb/region-replacement/up07.sql
new file mode 100644
index 0000000000..675b637bf3
--- /dev/null
+++ b/schema/crdb/region-replacement/up07.sql
@@ -0,0 +1,16 @@
+CREATE TABLE IF NOT EXISTS omicron.public.region_replacement_step (
+    replacement_id UUID NOT NULL,
+
+    step_time TIMESTAMPTZ NOT NULL,
+
+    step_type omicron.public.region_replacement_step_type NOT NULL,
+
+    step_associated_instance_id UUID,
+    step_associated_vmm_id UUID,
+
+    step_associated_pantry_ip INET,
+    step_associated_pantry_port INT4 CHECK (step_associated_pantry_port BETWEEN 0 AND 65535),
+    step_associated_pantry_job_id UUID,
+
+    PRIMARY KEY (replacement_id, step_time, step_type)
+);
diff --git a/schema/crdb/region-replacement/up08.sql b/schema/crdb/region-replacement/up08.sql
new file mode 100644
index 0000000000..a5ecac8216
--- /dev/null
+++ b/schema/crdb/region-replacement/up08.sql
@@ -0,0 +1 @@
+CREATE INDEX IF NOT EXISTS step_time_order on omicron.public.region_replacement_step (step_time);
diff --git a/schema/crdb/region-replacement/up09.sql b/schema/crdb/region-replacement/up09.sql
new file mode 100644
index 0000000000..f5cc7bb682
--- /dev/null
+++ b/schema/crdb/region-replacement/up09.sql
@@ -0,0 +1 @@
+CREATE INDEX IF NOT EXISTS search_for_repair_notifications ON omicron.public.upstairs_repair_notification (region_id, notification_type);
diff --git a/schema/crdb/region-replacement/up10.sql b/schema/crdb/region-replacement/up10.sql
new file mode 100644
index 0000000000..eccfad8a25
--- /dev/null
+++ b/schema/crdb/region-replacement/up10.sql
@@ -0,0 +1,3 @@
+CREATE INDEX IF NOT EXISTS lookup_any_disk_by_volume_id ON omicron.public.disk (
+    volume_id
+);
diff --git a/schema/crdb/region-replacement/up11.sql b/schema/crdb/region-replacement/up11.sql
new file mode 100644
index 0000000000..5984bba752
--- /dev/null
+++ b/schema/crdb/region-replacement/up11.sql
@@ -0,0 +1 @@
+CREATE INDEX IF NOT EXISTS lookup_snapshot_by_destination_volume_id ON omicron.public.snapshot ( destination_volume_id );

From 8975897872cb63a072c60eea4a081c3d164ff588 Mon Sep 17 00:00:00 2001
From: Adam Leventhal <ahl@oxide.computer>
Date: Thu, 23 May 2024 17:40:02 -0700
Subject: [PATCH 02/28] use `oxnet::{ IpNet, Ipv4Net, Ipv6Net }` (#5810)

---
 Cargo.lock                                    |  39 +-
 Cargo.toml                                    |   1 +
 clients/ddm-admin-client/src/lib.rs           |   2 +-
 clients/nexus-client/Cargo.toml               |   1 +
 clients/nexus-client/src/lib.rs               |  20 +-
 clients/sled-agent-client/Cargo.toml          |   1 +
 clients/sled-agent-client/src/lib.rs          |  35 +-
 common/Cargo.toml                             |   1 +
 common/src/address.rs                         | 114 ++--
 common/src/api/external/mod.rs                | 548 ++----------------
 common/src/api/internal/nexus.rs              |   4 +-
 common/src/api/internal/shared.rs             |  39 +-
 illumos-utils/Cargo.toml                      |   1 +
 illumos-utils/src/opte/firewall_rules.rs      |  22 +-
 internal-dns/src/resolver.rs                  |   2 +-
 nexus/Cargo.toml                              |   1 +
 nexus/db-model/Cargo.toml                     |   1 +
 nexus/db-model/src/ipv4_nat_entry.rs          |   4 +-
 nexus/db-model/src/ipv4net.rs                 |  17 +-
 nexus/db-model/src/ipv6net.rs                 |  27 +-
 nexus/db-model/src/lib.rs                     |  29 +-
 nexus/db-model/src/network_interface.rs       |   2 +-
 nexus/db-model/src/vpc.rs                     |  27 +-
 nexus/db-model/src/vpc_subnet.rs              |   4 +-
 nexus/db-queries/Cargo.toml                   |   1 +
 .../src/db/datastore/ipv4_nat_entry.rs        |  47 +-
 nexus/db-queries/src/db/datastore/mod.rs      |   4 +-
 .../src/db/datastore/network_interface.rs     |   7 +-
 nexus/db-queries/src/db/datastore/rack.rs     |  72 +--
 .../src/db/datastore/switch_port.rs           |   2 +-
 nexus/db-queries/src/db/datastore/vpc.rs      |   4 +-
 .../src/db/queries/network_interface.rs       |  56 +-
 nexus/db-queries/src/db/queries/vpc_subnet.rs |  28 +-
 nexus/defaults/Cargo.toml                     |   1 +
 nexus/defaults/src/lib.rs                     |  28 +-
 nexus/networking/Cargo.toml                   |   1 +
 nexus/networking/src/firewall_rules.rs        |  11 +-
 nexus/reconfigurator/execution/Cargo.toml     |   1 +
 .../execution/src/external_networking.rs      |   5 +-
 nexus/reconfigurator/planning/Cargo.toml      |   1 +
 .../planning/src/blueprint_builder/builder.rs |   4 +-
 .../blueprint_builder/external_networking.rs  |   6 +-
 .../output/planner_nonprovisionable_2_2a.txt  |  10 +-
 nexus/src/app/allow_list.rs                   |   4 +-
 .../app/background/sync_service_zone_nat.rs   |  17 +-
 nexus/src/app/bgp.rs                          |   8 +-
 nexus/src/app/instance_network.rs             |  12 +-
 nexus/src/app/sagas/vpc_create.rs             |  16 +-
 nexus/src/app/switch_interface.rs             |   3 +-
 nexus/src/app/vpc_subnet.rs                   |  19 +-
 nexus/src/context.rs                          |   3 +-
 nexus/test-utils/src/lib.rs                   |   4 +-
 nexus/tests/integration_tests/allow_list.rs   |  13 +-
 nexus/tests/integration_tests/endpoints.rs    |   3 +-
 nexus/tests/integration_tests/instances.rs    |   7 +-
 .../integration_tests/subnet_allocation.rs    |  23 +-
 nexus/tests/integration_tests/vpc_subnets.rs  |  24 +-
 nexus/tests/integration_tests/vpcs.rs         |   5 +-
 nexus/types/Cargo.toml                        |   1 +
 nexus/types/src/external_api/params.rs        |   5 +-
 nexus/types/src/external_api/views.rs         |   5 +-
 openapi/bootstrap-agent.json                  |  17 +-
 openapi/nexus-internal.json                   |  17 +-
 openapi/nexus.json                            |  17 +-
 openapi/sled-agent.json                       |  19 +-
 openapi/wicketd.json                          |  17 +-
 schema/all-zone-requests.json                 |  23 +-
 schema/all-zones-requests.json                |  23 +-
 schema/deployment-config.json                 |   9 +-
 schema/rss-service-plan-v3.json               |  23 +-
 schema/rss-sled-plan.json                     |  25 +-
 schema/start-sled-agent-request.json          |   9 +-
 sled-agent/Cargo.toml                         |   1 +
 sled-agent/src/bootstrap/early_networking.rs  |  19 +-
 sled-agent/src/bootstrap/server.rs            |   2 +-
 sled-agent/src/rack_setup/plan/service.rs     |  26 +-
 sled-agent/src/services.rs                    |  14 +-
 sled-agent/src/sim/server.rs                  |   4 +-
 tools/dendrite_openapi_version                |   0
 wicket-common/Cargo.toml                      |   1 +
 wicket-common/src/rack_setup.rs               |   2 +-
 wicket/src/ui/panes/rack_setup.rs             |   4 +-
 wicketd/src/rss_config.rs                     |   2 +-
 workspace-hack/Cargo.toml                     |   4 +-
 84 files changed, 666 insertions(+), 1015 deletions(-)
 mode change 100644 => 100755 tools/dendrite_openapi_version

diff --git a/Cargo.lock b/Cargo.lock
index 1dfaff0d77..eba31ceca4 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2026,7 +2026,7 @@ dependencies = [
 [[package]]
 name = "dropshot"
 version = "0.10.2-dev"
-source = "git+https://github.com/oxidecomputer/dropshot?branch=main#2fdf37183d2fac385e0f66f48903bc567f2e8e26"
+source = "git+https://github.com/oxidecomputer/dropshot?branch=main#0cd0e828d096578392b6a5524334d44fd10ef6da"
 dependencies = [
  "async-stream",
  "async-trait",
@@ -2072,7 +2072,7 @@ dependencies = [
 [[package]]
 name = "dropshot_endpoint"
 version = "0.10.2-dev"
-source = "git+https://github.com/oxidecomputer/dropshot?branch=main#2fdf37183d2fac385e0f66f48903bc567f2e8e26"
+source = "git+https://github.com/oxidecomputer/dropshot?branch=main#0cd0e828d096578392b6a5524334d44fd10ef6da"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3506,6 +3506,7 @@ dependencies = [
  "opte-ioctl",
  "oxide-vpc",
  "oxlog",
+ "oxnet",
  "regress",
  "schemars",
  "serde",
@@ -4023,7 +4024,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19"
 dependencies = [
  "cfg-if",
- "windows-targets 0.52.5",
+ "windows-targets 0.48.5",
 ]
 
 [[package]]
@@ -4495,6 +4496,7 @@ dependencies = [
  "omicron-passwords",
  "omicron-uuid-kinds",
  "omicron-workspace-hack",
+ "oxnet",
  "progenitor",
  "regress",
  "reqwest",
@@ -4551,6 +4553,7 @@ dependencies = [
  "omicron-uuid-kinds",
  "omicron-workspace-hack",
  "once_cell",
+ "oxnet",
  "parse-display",
  "pq-sys",
  "rand 0.8.5",
@@ -4620,6 +4623,7 @@ dependencies = [
  "openssl",
  "oso",
  "oximeter",
+ "oxnet",
  "paste",
  "pem",
  "petgraph",
@@ -4661,6 +4665,7 @@ dependencies = [
  "omicron-common",
  "omicron-workspace-hack",
  "once_cell",
+ "oxnet",
  "rand 0.8.5",
  "serde_json",
 ]
@@ -4740,6 +4745,7 @@ dependencies = [
  "nexus-db-queries",
  "omicron-common",
  "omicron-workspace-hack",
+ "oxnet",
  "reqwest",
  "sled-agent-client",
  "slog",
@@ -4775,6 +4781,7 @@ dependencies = [
  "omicron-test-utils",
  "omicron-uuid-kinds",
  "omicron-workspace-hack",
+ "oxnet",
  "pq-sys",
  "reqwest",
  "sled-agent-client",
@@ -4805,6 +4812,7 @@ dependencies = [
  "omicron-test-utils",
  "omicron-uuid-kinds",
  "omicron-workspace-hack",
+ "oxnet",
  "proptest",
  "rand 0.8.5",
  "sled-agent-client",
@@ -4920,6 +4928,7 @@ dependencies = [
  "omicron-uuid-kinds",
  "omicron-workspace-hack",
  "openssl",
+ "oxnet",
  "parse-display",
  "proptest",
  "schemars",
@@ -5240,6 +5249,7 @@ dependencies = [
  "omicron-uuid-kinds",
  "omicron-workspace-hack",
  "once_cell",
+ "oxnet",
  "parse-display",
  "progenitor",
  "progenitor-client",
@@ -5444,6 +5454,7 @@ dependencies = [
  "oximeter-db",
  "oximeter-instruments",
  "oximeter-producer",
+ "oxnet",
  "parse-display",
  "paste",
  "pem",
@@ -5696,6 +5707,7 @@ dependencies = [
  "oximeter",
  "oximeter-instruments",
  "oximeter-producer",
+ "oxnet",
  "pretty_assertions",
  "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=6d7ed9a033babc054db9eff5b59dee978d2b0d76)",
  "propolis-mock-server",
@@ -6328,6 +6340,17 @@ dependencies = [
  "uuid",
 ]
 
+[[package]]
+name = "oxnet"
+version = "0.1.0"
+source = "git+https://github.com/oxidecomputer/oxnet?branch=main#42b4d3c77c7f5f2636cd6c4bbf37ac3eada047e0"
+dependencies = [
+ "ipnetwork",
+ "schemars",
+ "serde",
+ "serde_json",
+]
+
 [[package]]
 name = "p256"
 version = "0.13.2"
@@ -8196,9 +8219,9 @@ dependencies = [
 
 [[package]]
 name = "schemars"
-version = "0.8.19"
+version = "0.8.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fc6e7ed6919cb46507fb01ff1654309219f62b4d603822501b0b80d42f6f21ef"
+checksum = "b0218ceea14babe24a4a5836f86ade86c1effbc198164e619194cb5069187e29"
 dependencies = [
  "bytes",
  "chrono",
@@ -8211,9 +8234,9 @@ dependencies = [
 
 [[package]]
 name = "schemars_derive"
-version = "0.8.19"
+version = "0.8.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "185f2b7aa7e02d418e453790dde16890256bbd2bcd04b7dc5348811052b53f49"
+checksum = "3ed5a1ccce8ff962e31a165d41f6e2a2dd1245099dc4d594f5574a86cd90f4d3"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -8697,6 +8720,7 @@ dependencies = [
  "omicron-common",
  "omicron-uuid-kinds",
  "omicron-workspace-hack",
+ "oxnet",
  "progenitor",
  "regress",
  "reqwest",
@@ -10972,6 +10996,7 @@ dependencies = [
  "omicron-common",
  "omicron-workspace-hack",
  "owo-colors",
+ "oxnet",
  "schemars",
  "serde",
  "serde_json",
diff --git a/Cargo.toml b/Cargo.toml
index ed2b7cdcfe..a350f59f0a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -331,6 +331,7 @@ omicron-certificates = { path = "certificates" }
 omicron-passwords = { path = "passwords" }
 omicron-workspace-hack = "0.1.0"
 oxlog = { path = "dev-tools/oxlog" }
+oxnet = { git = "https://github.com/oxidecomputer/oxnet", branch = "main" }
 nexus-test-interface = { path = "nexus/test-interface" }
 nexus-test-utils-macros = { path = "nexus/test-utils-macros" }
 nexus-test-utils = { path = "nexus/test-utils" }
diff --git a/clients/ddm-admin-client/src/lib.rs b/clients/ddm-admin-client/src/lib.rs
index 5be2dd53bd..b926ee2971 100644
--- a/clients/ddm-admin-client/src/lib.rs
+++ b/clients/ddm-admin-client/src/lib.rs
@@ -82,7 +82,7 @@ impl Client {
         let me = self.clone();
         tokio::spawn(async move {
             let prefix =
-                Ipv6Prefix { addr: address.net().network(), len: SLED_PREFIX };
+                Ipv6Prefix { addr: address.net().prefix(), len: SLED_PREFIX };
             retry_notify(retry_policy_internal_service_aggressive(), || async {
                 info!(
                     me.log, "Sending prefix to ddmd for advertisement";
diff --git a/clients/nexus-client/Cargo.toml b/clients/nexus-client/Cargo.toml
index 1d5cced21c..b4e299da67 100644
--- a/clients/nexus-client/Cargo.toml
+++ b/clients/nexus-client/Cargo.toml
@@ -14,6 +14,7 @@ ipnetwork.workspace = true
 nexus-types.workspace = true
 omicron-common.workspace = true
 omicron-passwords.workspace = true
+oxnet.workspace = true
 progenitor.workspace = true
 regress.workspace = true
 reqwest = { workspace = true, features = ["rustls-tls", "stream"] }
diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs
index ae8f0c93db..bcdd3971c0 100644
--- a/clients/nexus-client/src/lib.rs
+++ b/clients/nexus-client/src/lib.rs
@@ -419,33 +419,27 @@ impl TryFrom<types::ProducerEndpoint>
     }
 }
 
-impl TryFrom<&omicron_common::api::external::Ipv4Net> for types::Ipv4Net {
+impl TryFrom<&oxnet::Ipv4Net> for types::Ipv4Net {
     type Error = String;
 
-    fn try_from(
-        net: &omicron_common::api::external::Ipv4Net,
-    ) -> Result<Self, Self::Error> {
+    fn try_from(net: &oxnet::Ipv4Net) -> Result<Self, Self::Error> {
         types::Ipv4Net::try_from(net.to_string()).map_err(|e| e.to_string())
     }
 }
 
-impl TryFrom<&omicron_common::api::external::Ipv6Net> for types::Ipv6Net {
+impl TryFrom<&oxnet::Ipv6Net> for types::Ipv6Net {
     type Error = String;
 
-    fn try_from(
-        net: &omicron_common::api::external::Ipv6Net,
-    ) -> Result<Self, Self::Error> {
+    fn try_from(net: &oxnet::Ipv6Net) -> Result<Self, Self::Error> {
         types::Ipv6Net::try_from(net.to_string()).map_err(|e| e.to_string())
     }
 }
 
-impl TryFrom<&omicron_common::api::external::IpNet> for types::IpNet {
+impl TryFrom<&oxnet::IpNet> for types::IpNet {
     type Error = String;
 
-    fn try_from(
-        net: &omicron_common::api::external::IpNet,
-    ) -> Result<Self, Self::Error> {
-        use omicron_common::api::external::IpNet;
+    fn try_from(net: &oxnet::IpNet) -> Result<Self, Self::Error> {
+        use oxnet::IpNet;
         match net {
             IpNet::V4(v4) => types::Ipv4Net::try_from(v4).map(types::IpNet::V4),
             IpNet::V6(v6) => types::Ipv6Net::try_from(v6).map(types::IpNet::V6),
diff --git a/clients/sled-agent-client/Cargo.toml b/clients/sled-agent-client/Cargo.toml
index 3f3d82bf80..caca3c8c73 100644
--- a/clients/sled-agent-client/Cargo.toml
+++ b/clients/sled-agent-client/Cargo.toml
@@ -22,4 +22,5 @@ slog.workspace = true
 uuid.workspace = true
 omicron-workspace-hack.workspace = true
 omicron-uuid-kinds.workspace = true
+oxnet.workspace = true
 serde_json.workspace = true
diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs
index 4ac7eed27c..24bb2a6df8 100644
--- a/clients/sled-agent-client/src/lib.rs
+++ b/clients/sled-agent-client/src/lib.rs
@@ -413,24 +413,23 @@ impl From<types::DiskState> for omicron_common::api::external::DiskState {
     }
 }
 
-impl From<omicron_common::api::external::Ipv4Net> for types::Ipv4Net {
-    fn from(n: omicron_common::api::external::Ipv4Net) -> Self {
+impl From<oxnet::Ipv4Net> for types::Ipv4Net {
+    fn from(n: oxnet::Ipv4Net) -> Self {
         Self::try_from(n.to_string()).unwrap_or_else(|e| panic!("{}: {}", n, e))
     }
 }
 
-impl From<omicron_common::api::external::Ipv6Net> for types::Ipv6Net {
-    fn from(n: omicron_common::api::external::Ipv6Net) -> Self {
+impl From<oxnet::Ipv6Net> for types::Ipv6Net {
+    fn from(n: oxnet::Ipv6Net) -> Self {
         Self::try_from(n.to_string()).unwrap_or_else(|e| panic!("{}: {}", n, e))
     }
 }
 
-impl From<omicron_common::api::external::IpNet> for types::IpNet {
-    fn from(s: omicron_common::api::external::IpNet) -> Self {
-        use omicron_common::api::external::IpNet;
+impl From<oxnet::IpNet> for types::IpNet {
+    fn from(s: oxnet::IpNet) -> Self {
         match s {
-            IpNet::V4(v4) => Self::V4(v4.into()),
-            IpNet::V6(v6) => Self::V6(v6.into()),
+            oxnet::IpNet::V4(v4) => Self::V4(v4.into()),
+            oxnet::IpNet::V6(v6) => Self::V6(v6.into()),
         }
     }
 }
@@ -441,14 +440,20 @@ impl From<ipnetwork::Ipv4Network> for types::Ipv4Net {
     }
 }
 
-impl From<types::Ipv4Net> for ipnetwork::Ipv4Network {
+impl From<ipnetwork::Ipv4Network> for types::Ipv4Network {
+    fn from(n: ipnetwork::Ipv4Network) -> Self {
+        Self::try_from(n.to_string()).unwrap_or_else(|e| panic!("{}: {}", n, e))
+    }
+}
+
+impl From<types::Ipv4Net> for oxnet::Ipv4Net {
     fn from(n: types::Ipv4Net) -> Self {
         n.parse().unwrap()
     }
 }
 
-impl From<ipnetwork::Ipv4Network> for types::Ipv4Network {
-    fn from(n: ipnetwork::Ipv4Network) -> Self {
+impl From<oxnet::Ipv4Net> for types::Ipv4Network {
+    fn from(n: oxnet::Ipv4Net) -> Self {
         Self::try_from(n.to_string()).unwrap_or_else(|e| panic!("{}: {}", n, e))
     }
 }
@@ -484,6 +489,12 @@ impl From<types::IpNet> for ipnetwork::IpNetwork {
     }
 }
 
+impl From<types::Ipv4Net> for ipnetwork::Ipv4Network {
+    fn from(n: types::Ipv4Net) -> Self {
+        n.parse().unwrap()
+    }
+}
+
 impl From<std::net::Ipv4Addr> for types::Ipv4Net {
     fn from(n: std::net::Ipv4Addr) -> Self {
         Self::try_from(format!("{n}/32"))
diff --git a/common/Cargo.toml b/common/Cargo.toml
index 04010af4be..b51e1bb070 100644
--- a/common/Cargo.toml
+++ b/common/Cargo.toml
@@ -28,6 +28,7 @@ ipnetwork.workspace = true
 macaddr.workspace = true
 mg-admin-client.workspace = true
 omicron-uuid-kinds.workspace = true
+oxnet.workspace = true
 proptest = { workspace = true, optional = true }
 rand.workspace = true
 reqwest = { workspace = true, features = ["rustls-tls", "stream"] }
diff --git a/common/src/address.rs b/common/src/address.rs
index 817070d399..b7476d6ff4 100644
--- a/common/src/address.rs
+++ b/common/src/address.rs
@@ -7,9 +7,10 @@
 //! This addressing functionality is shared by both initialization services
 //! and Nexus, who need to agree upon addressing schemes.
 
-use crate::api::external::{self, Error, Ipv4Net, Ipv6Net};
-use ipnetwork::{Ipv4Network, Ipv6Network};
+use crate::api::external::{self, Error};
+use ipnetwork::Ipv6Network;
 use once_cell::sync::Lazy;
+use oxnet::{Ipv4Net, Ipv6Net};
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddrV6};
@@ -72,6 +73,12 @@ pub const WICKETD_NEXUS_PROXY_PORT: u16 = 12229;
 
 pub const NTP_PORT: u16 = 123;
 
+/// The length for all VPC IPv6 prefixes
+pub const VPC_IPV6_PREFIX_LENGTH: u8 = 48;
+
+/// The prefix length for all VPC subnets
+pub const VPC_SUBNET_IPV6_PREFIX_LENGTH: u8 = 64;
+
 // The number of ports available to an SNAT IP.
 // Note that for static NAT, this value isn't used, and all ports are available.
 //
@@ -104,61 +111,50 @@ pub const NUM_SOURCE_NAT_PORTS: u16 = 1 << 14;
 // Furthermore, all the below *_OPTE_IPV6_SUBNET constants are
 // /64's within this prefix.
 pub static SERVICE_VPC_IPV6_PREFIX: Lazy<Ipv6Net> = Lazy::new(|| {
-    Ipv6Net(
-        Ipv6Network::new(
-            Ipv6Addr::new(0xfd77, 0xe9d2, 0x9cd9, 0, 0, 0, 0, 0),
-            Ipv6Net::VPC_IPV6_PREFIX_LENGTH,
-        )
-        .unwrap(),
+    Ipv6Net::new(
+        Ipv6Addr::new(0xfd77, 0xe9d2, 0x9cd9, 0, 0, 0, 0, 0),
+        VPC_IPV6_PREFIX_LENGTH,
     )
+    .unwrap()
 });
 
 /// The IPv4 subnet for External DNS OPTE ports.
-pub static DNS_OPTE_IPV4_SUBNET: Lazy<Ipv4Net> = Lazy::new(|| {
-    Ipv4Net(Ipv4Network::new(Ipv4Addr::new(172, 30, 1, 0), 24).unwrap())
-});
+pub static DNS_OPTE_IPV4_SUBNET: Lazy<Ipv4Net> =
+    Lazy::new(|| Ipv4Net::new(Ipv4Addr::new(172, 30, 1, 0), 24).unwrap());
 
 /// The IPv6 subnet for External DNS OPTE ports.
 pub static DNS_OPTE_IPV6_SUBNET: Lazy<Ipv6Net> = Lazy::new(|| {
-    Ipv6Net(
-        Ipv6Network::new(
-            Ipv6Addr::new(0xfd77, 0xe9d2, 0x9cd9, 1, 0, 0, 0, 0),
-            Ipv6Net::VPC_SUBNET_IPV6_PREFIX_LENGTH,
-        )
-        .unwrap(),
+    Ipv6Net::new(
+        Ipv6Addr::new(0xfd77, 0xe9d2, 0x9cd9, 1, 0, 0, 0, 0),
+        VPC_SUBNET_IPV6_PREFIX_LENGTH,
     )
+    .unwrap()
 });
 
 /// The IPv4 subnet for Nexus OPTE ports.
-pub static NEXUS_OPTE_IPV4_SUBNET: Lazy<Ipv4Net> = Lazy::new(|| {
-    Ipv4Net(Ipv4Network::new(Ipv4Addr::new(172, 30, 2, 0), 24).unwrap())
-});
+pub static NEXUS_OPTE_IPV4_SUBNET: Lazy<Ipv4Net> =
+    Lazy::new(|| Ipv4Net::new(Ipv4Addr::new(172, 30, 2, 0), 24).unwrap());
 
 /// The IPv6 subnet for Nexus OPTE ports.
 pub static NEXUS_OPTE_IPV6_SUBNET: Lazy<Ipv6Net> = Lazy::new(|| {
-    Ipv6Net(
-        Ipv6Network::new(
-            Ipv6Addr::new(0xfd77, 0xe9d2, 0x9cd9, 2, 0, 0, 0, 0),
-            Ipv6Net::VPC_SUBNET_IPV6_PREFIX_LENGTH,
-        )
-        .unwrap(),
+    Ipv6Net::new(
+        Ipv6Addr::new(0xfd77, 0xe9d2, 0x9cd9, 2, 0, 0, 0, 0),
+        VPC_SUBNET_IPV6_PREFIX_LENGTH,
     )
+    .unwrap()
 });
 
 /// The IPv4 subnet for Boundary NTP OPTE ports.
-pub static NTP_OPTE_IPV4_SUBNET: Lazy<Ipv4Net> = Lazy::new(|| {
-    Ipv4Net(Ipv4Network::new(Ipv4Addr::new(172, 30, 3, 0), 24).unwrap())
-});
+pub static NTP_OPTE_IPV4_SUBNET: Lazy<Ipv4Net> =
+    Lazy::new(|| Ipv4Net::new(Ipv4Addr::new(172, 30, 3, 0), 24).unwrap());
 
 /// The IPv6 subnet for Boundary NTP OPTE ports.
 pub static NTP_OPTE_IPV6_SUBNET: Lazy<Ipv6Net> = Lazy::new(|| {
-    Ipv6Net(
-        Ipv6Network::new(
-            Ipv6Addr::new(0xfd77, 0xe9d2, 0x9cd9, 3, 0, 0, 0, 0),
-            Ipv6Net::VPC_SUBNET_IPV6_PREFIX_LENGTH,
-        )
-        .unwrap(),
+    Ipv6Net::new(
+        Ipv6Addr::new(0xfd77, 0xe9d2, 0x9cd9, 3, 0, 0, 0, 0),
+        VPC_SUBNET_IPV6_PREFIX_LENGTH,
     )
+    .unwrap()
 });
 
 // Anycast is a mechanism in which a single IP address is shared by multiple
@@ -188,7 +184,7 @@ pub const CP_SERVICES_RESERVED_ADDRESSES: u16 = 0xFFFF;
 // to assume that addresses in this subnet are available.
 pub const SLED_RESERVED_ADDRESSES: u16 = 32;
 
-/// Wraps an [`Ipv6Network`] with a compile-time prefix length.
+/// Wraps an [`Ipv6Net`] with a compile-time prefix length.
 #[derive(Debug, Clone, Copy, JsonSchema, Serialize, Hash, PartialEq, Eq)]
 #[schemars(rename = "Ipv6Subnet")]
 pub struct Ipv6Subnet<const N: u8> {
@@ -198,23 +194,23 @@ pub struct Ipv6Subnet<const N: u8> {
 impl<const N: u8> Ipv6Subnet<N> {
     pub fn new(addr: Ipv6Addr) -> Self {
         // Create a network with the compile-time prefix length.
-        let net = Ipv6Network::new(addr, N).unwrap();
+        let net = Ipv6Net::new(addr, N).unwrap();
         // Ensure the address is set to within-prefix only components.
-        let net = Ipv6Network::new(net.network(), N).unwrap();
-        Self { net: Ipv6Net(net) }
+        let net = Ipv6Net::new(net.prefix(), N).unwrap();
+        Self { net }
     }
 
     /// Returns the underlying network.
-    pub fn net(&self) -> Ipv6Network {
-        self.net.0
+    pub fn net(&self) -> Ipv6Net {
+        self.net
     }
 }
 
 impl<const N: u8> From<Ipv6Network> for Ipv6Subnet<N> {
     fn from(net: Ipv6Network) -> Self {
         // Ensure the address is set to within-prefix only components.
-        let net = Ipv6Network::new(net.network(), N).unwrap();
-        Self { net: Ipv6Net(net) }
+        let net = Ipv6Net::new(net.network(), N).unwrap();
+        Self { net }
     }
 }
 
@@ -230,13 +226,13 @@ impl<'de, const N: u8> Deserialize<'de> for Ipv6Subnet<N> {
         }
 
         let Inner { net } = Inner::deserialize(deserializer)?;
-        if net.prefix() == N {
+        if net.width() == N {
             Ok(Self { net })
         } else {
             Err(<D::Error as serde::de::Error>::custom(format!(
                 "expected prefix {} but found {}",
                 N,
-                net.prefix(),
+                net.width(),
             )))
         }
     }
@@ -252,9 +248,9 @@ impl DnsSubnet {
     /// Returns the DNS server address within the subnet.
     ///
     /// This is the first address within the subnet.
-    pub fn dns_address(&self) -> Ipv6Network {
-        Ipv6Network::new(
-            self.subnet.net().iter().nth(DNS_ADDRESS_INDEX).unwrap(),
+    pub fn dns_address(&self) -> Ipv6Net {
+        Ipv6Net::new(
+            self.subnet.net().nth(DNS_ADDRESS_INDEX as u128).unwrap(),
             SLED_PREFIX,
         )
         .unwrap()
@@ -264,9 +260,9 @@ impl DnsSubnet {
     /// to be able to contact the DNS server.
     ///
     /// This is the second address within the subnet.
-    pub fn gz_address(&self) -> Ipv6Network {
-        Ipv6Network::new(
-            self.subnet.net().iter().nth(GZ_ADDRESS_INDEX).unwrap(),
+    pub fn gz_address(&self) -> Ipv6Net {
+        Ipv6Net::new(
+            self.subnet.net().nth(GZ_ADDRESS_INDEX as u128).unwrap(),
             SLED_PREFIX,
         )
         .unwrap()
@@ -281,7 +277,7 @@ pub struct ReservedRackSubnet(pub Ipv6Subnet<RACK_PREFIX>);
 impl ReservedRackSubnet {
     /// Returns the subnet for the reserved rack subnet.
     pub fn new(subnet: Ipv6Subnet<AZ_PREFIX>) -> Self {
-        ReservedRackSubnet(Ipv6Subnet::<RACK_PREFIX>::new(subnet.net().ip()))
+        ReservedRackSubnet(Ipv6Subnet::<RACK_PREFIX>::new(subnet.net().addr()))
     }
 
     /// Returns the DNS addresses from this reserved rack subnet.
@@ -308,7 +304,7 @@ pub fn get_internal_dns_server_addresses(addr: Ipv6Addr) -> Vec<IpAddr> {
         &reserved_rack_subnet.get_dns_subnets()[0..DNS_REDUNDANCY];
     dns_subnets
         .iter()
-        .map(|dns_subnet| IpAddr::from(dns_subnet.dns_address().ip()))
+        .map(|dns_subnet| IpAddr::from(dns_subnet.dns_address().addr()))
         .collect()
 }
 
@@ -320,7 +316,7 @@ const SWITCH_ZONE_ADDRESS_INDEX: usize = 2;
 /// This address will come from the first address of the [`SLED_PREFIX`] subnet.
 pub fn get_sled_address(sled_subnet: Ipv6Subnet<SLED_PREFIX>) -> SocketAddrV6 {
     let sled_agent_ip =
-        sled_subnet.net().iter().nth(SLED_AGENT_ADDRESS_INDEX).unwrap();
+        sled_subnet.net().nth(SLED_AGENT_ADDRESS_INDEX as u128).unwrap();
     SocketAddrV6::new(sled_agent_ip, SLED_AGENT_PORT, 0, 0)
 }
 
@@ -330,7 +326,7 @@ pub fn get_sled_address(sled_subnet: Ipv6Subnet<SLED_PREFIX>) -> SocketAddrV6 {
 pub fn get_switch_zone_address(
     sled_subnet: Ipv6Subnet<SLED_PREFIX>,
 ) -> Ipv6Addr {
-    sled_subnet.net().iter().nth(SWITCH_ZONE_ADDRESS_INDEX).unwrap()
+    sled_subnet.net().nth(SWITCH_ZONE_ADDRESS_INDEX as u128).unwrap()
 }
 
 /// Returns a sled subnet within a rack subnet.
@@ -340,7 +336,7 @@ pub fn get_64_subnet(
     rack_subnet: Ipv6Subnet<RACK_PREFIX>,
     index: u8,
 ) -> Ipv6Subnet<SLED_PREFIX> {
-    let mut rack_network = rack_subnet.net().network().octets();
+    let mut rack_network = rack_subnet.net().addr().octets();
 
     // To set bits distinguishing the /64 from the /56, we modify the 7th octet.
     rack_network[7] = index;
@@ -680,7 +676,7 @@ mod test {
         assert_eq!(
             //              Note that these bits (indicating the rack) are zero.
             //              vv
-            "fd00:1122:3344:0000::/56".parse::<Ipv6Network>().unwrap(),
+            "fd00:1122:3344:0000::/56".parse::<Ipv6Net>().unwrap(),
             rack_subnet.0.net(),
         );
 
@@ -690,11 +686,11 @@ mod test {
 
         // The DNS address and GZ address should be only differing by one.
         assert_eq!(
-            "fd00:1122:3344:0001::1/64".parse::<Ipv6Network>().unwrap(),
+            "fd00:1122:3344:0001::1/64".parse::<Ipv6Net>().unwrap(),
             dns_subnets[0].dns_address(),
         );
         assert_eq!(
-            "fd00:1122:3344:0001::2/64".parse::<Ipv6Network>().unwrap(),
+            "fd00:1122:3344:0001::2/64".parse::<Ipv6Net>().unwrap(),
             dns_subnets[0].gz_address(),
         );
     }
diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs
index 1c01782cc6..07a7776f1e 100644
--- a/common/src/api/external/mod.rs
+++ b/common/src/api/external/mod.rs
@@ -22,6 +22,7 @@ use dropshot::HttpError;
 pub use dropshot::PaginationOrder;
 pub use error::*;
 use futures::stream::BoxStream;
+use oxnet::IpNet;
 use parse_display::Display;
 use parse_display::FromStr;
 use rand::thread_rng;
@@ -39,7 +40,6 @@ use std::fmt::Formatter;
 use std::fmt::Result as FormatResult;
 use std::net::IpAddr;
 use std::net::Ipv4Addr;
-use std::net::Ipv6Addr;
 use std::num::{NonZeroU16, NonZeroU32};
 use std::str::FromStr;
 use uuid::Uuid;
@@ -1229,398 +1229,33 @@ impl DiskState {
     }
 }
 
-/// An `Ipv4Net` represents a IPv4 subnetwork, including the address and network mask.
-#[derive(Clone, Copy, Debug, Deserialize, Hash, PartialEq, Eq, Serialize)]
-pub struct Ipv4Net(pub ipnetwork::Ipv4Network);
-
-impl Ipv4Net {
-    /// Constructs a new `Ipv4Net` representing a single IP.
-    pub fn single(ip: Ipv4Addr) -> Self {
-        Ipv4Net(
-            ipnetwork::Ipv4Network::new(ip, 32).expect("32 is within range"),
-        )
-    }
-
-    /// Return `true` if this IPv4 subnetwork is from an RFC 1918 private
-    /// address space.
-    pub fn is_private(&self) -> bool {
-        self.0.network().is_private()
-    }
-}
-
-impl std::ops::Deref for Ipv4Net {
-    type Target = ipnetwork::Ipv4Network;
-    fn deref(&self) -> &Self::Target {
-        &self.0
-    }
-}
-
-impl std::fmt::Display for Ipv4Net {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "{}", self.0)
-    }
-}
-
-impl JsonSchema for Ipv4Net {
-    fn schema_name() -> String {
-        "Ipv4Net".to_string()
-    }
-
-    fn json_schema(
-        _: &mut schemars::gen::SchemaGenerator,
-    ) -> schemars::schema::Schema {
-        schemars::schema::SchemaObject {
-            metadata: Some(Box::new(schemars::schema::Metadata {
-                title: Some("An IPv4 subnet".to_string()),
-                description: Some(
-                    "An IPv4 subnet, including prefix and subnet mask"
-                        .to_string(),
-                ),
-                examples: vec!["192.168.1.0/24".into()],
-                ..Default::default()
-            })),
-            instance_type: Some(schemars::schema::InstanceType::String.into()),
-            string: Some(Box::new(schemars::schema::StringValidation {
-                pattern: Some(
-                    concat!(
-                        r#"^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\.){3}"#,
-                        r#"([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])"#,
-                        r#"/([0-9]|1[0-9]|2[0-9]|3[0-2])$"#,
-                    )
-                    .to_string(),
-                ),
-                ..Default::default()
-            })),
-            ..Default::default()
-        }
-        .into()
-    }
-}
-
-/// An `Ipv6Net` represents a IPv6 subnetwork, including the address and network mask.
-#[derive(Clone, Copy, Debug, Deserialize, Hash, PartialEq, Eq, Serialize)]
-pub struct Ipv6Net(pub ipnetwork::Ipv6Network);
-
-impl Ipv6Net {
+pub trait Ipv6NetExt {
     /// The length for all VPC IPv6 prefixes
-    pub const VPC_IPV6_PREFIX_LENGTH: u8 = 48;
+    const VPC_IPV6_PREFIX_LENGTH: u8 = 48;
 
-    /// The prefix length for all VPC Sunets
-    pub const VPC_SUBNET_IPV6_PREFIX_LENGTH: u8 = 64;
-
-    /// Constructs a new `Ipv6Net` representing a single IPv6 address.
-    pub fn single(ip: Ipv6Addr) -> Self {
-        Ipv6Net(
-            ipnetwork::Ipv6Network::new(ip, 128).expect("128 is within range"),
-        )
-    }
-
-    /// Return `true` if this subnetwork is in the IPv6 Unique Local Address
-    /// range defined in RFC 4193, e.g., `fd00:/8`
-    pub fn is_unique_local(&self) -> bool {
-        // TODO: Delegate to `Ipv6Addr::is_unique_local()` when stabilized.
-        self.0.network().octets()[0] == 0xfd
-    }
+    /// The prefix length for all VPC Subnets
+    const VPC_SUBNET_IPV6_PREFIX_LENGTH: u8 = 64;
 
     /// Return `true` if this subnetwork is a valid VPC prefix.
     ///
     /// This checks that the subnet is a unique local address, and has the VPC
     /// prefix length required.
-    pub fn is_vpc_prefix(&self) -> bool {
-        self.is_unique_local()
-            && self.0.prefix() == Self::VPC_IPV6_PREFIX_LENGTH
-    }
+    fn is_vpc_prefix(&self) -> bool;
 
     /// Return `true` if this subnetwork is a valid VPC Subnet, given the VPC's
     /// prefix.
-    pub fn is_vpc_subnet(&self, vpc_prefix: &Ipv6Net) -> bool {
-        self.is_unique_local()
-            && self.is_subnet_of(vpc_prefix.0)
-            && self.prefix() == Self::VPC_SUBNET_IPV6_PREFIX_LENGTH
-    }
-}
-
-impl std::ops::Deref for Ipv6Net {
-    type Target = ipnetwork::Ipv6Network;
-    fn deref(&self) -> &Self::Target {
-        &self.0
-    }
-}
-
-impl std::fmt::Display for Ipv6Net {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "{}", self.0)
-    }
-}
-
-impl From<ipnetwork::Ipv6Network> for Ipv6Net {
-    fn from(n: ipnetwork::Ipv6Network) -> Ipv6Net {
-        Self(n)
-    }
-}
-
-const IPV6_NET_REGEX: &str = concat!(
-    r#"^("#,
-    r#"([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|"#,
-    r#"([0-9a-fA-F]{1,4}:){1,7}:|"#,
-    r#"([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|"#,
-    r#"([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|"#,
-    r#"([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|"#,
-    r#"([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|"#,
-    r#"([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|"#,
-    r#"[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|"#,
-    r#":((:[0-9a-fA-F]{1,4}){1,7}|:)|"#,
-    r#"fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|"#,
-    r#"::(ffff(:0{1,4}){0,1}:){0,1}"#,
-    r#"((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}"#,
-    r#"(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|"#,
-    r#"([0-9a-fA-F]{1,4}:){1,4}:"#,
-    r#"((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}"#,
-    r#"(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])"#,
-    r#")\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$"#,
-);
-
-#[cfg(test)]
-#[test]
-fn test_ipv6_regex() {
-    let re = regress::Regex::new(IPV6_NET_REGEX).unwrap();
-    for case in [
-        "1:2:3:4:5:6:7:8",
-        "1:a:2:b:3:c:4:d",
-        "1::",
-        "::1",
-        "::",
-        "1::3:4:5:6:7:8",
-        "1:2::4:5:6:7:8",
-        "1:2:3::5:6:7:8",
-        "1:2:3:4::6:7:8",
-        "1:2:3:4:5::7:8",
-        "1:2:3:4:5:6::8",
-        "1:2:3:4:5:6:7::",
-        "2001::",
-        "fd00::",
-        "::100:1",
-        "fd12:3456::",
-    ] {
-        for prefix in 0..=128 {
-            let net = format!("{case}/{prefix}");
-            assert!(
-                re.find(&net).is_some(),
-                "Expected to match IPv6 case: {}",
-                prefix,
-            );
-        }
-    }
-}
-
-impl JsonSchema for Ipv6Net {
-    fn schema_name() -> String {
-        "Ipv6Net".to_string()
-    }
-
-    fn json_schema(
-        _: &mut schemars::gen::SchemaGenerator,
-    ) -> schemars::schema::Schema {
-        schemars::schema::SchemaObject {
-            metadata: Some(Box::new(schemars::schema::Metadata {
-                title: Some("An IPv6 subnet".to_string()),
-                description: Some(
-                    "An IPv6 subnet, including prefix and subnet mask"
-                        .to_string(),
-                ),
-                examples: vec!["fd12:3456::/64".into()],
-                ..Default::default()
-            })),
-            instance_type: Some(schemars::schema::InstanceType::String.into()),
-            string: Some(Box::new(schemars::schema::StringValidation {
-                pattern: Some(IPV6_NET_REGEX.to_string()),
-                ..Default::default()
-            })),
-            ..Default::default()
-        }
-        .into()
-    }
-}
-
-/// An `IpNet` represents an IP network, either IPv4 or IPv6.
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
-pub enum IpNet {
-    V4(Ipv4Net),
-    V6(Ipv6Net),
-}
-
-impl IpNet {
-    /// Constructs a new `IpNet` representing a single IP.
-    pub fn single(ip: IpAddr) -> Self {
-        match ip {
-            IpAddr::V4(ip) => IpNet::V4(Ipv4Net::single(ip)),
-            IpAddr::V6(ip) => IpNet::V6(Ipv6Net::single(ip)),
-        }
-    }
-
-    /// Return the underlying address.
-    pub fn ip(&self) -> IpAddr {
-        match self {
-            IpNet::V4(inner) => inner.ip().into(),
-            IpNet::V6(inner) => inner.ip().into(),
-        }
-    }
-
-    /// Return the underlying prefix length.
-    pub fn prefix(&self) -> u8 {
-        match self {
-            IpNet::V4(inner) => inner.prefix(),
-            IpNet::V6(inner) => inner.prefix(),
-        }
-    }
-
-    /// Return the first address in this subnet
-    pub fn first_address(&self) -> IpAddr {
-        match self {
-            IpNet::V4(inner) => IpAddr::from(inner.iter().next().unwrap()),
-            IpNet::V6(inner) => IpAddr::from(inner.iter().next().unwrap()),
-        }
-    }
-
-    /// Return the last address in this subnet.
-    ///
-    /// For a subnet of size 1, e.g., a /32, this is the same as the first
-    /// address.
-    // NOTE: This is a workaround for the fact that the `ipnetwork` crate's
-    // iterator provides only the `Iterator::next()` method. That means that
-    // finding the last address is linear in the size of the subnet, which is
-    // completely untenable and totally avoidable with some addition. In the
-    // long term, we should either put up a patch to the `ipnetwork` crate or
-    // move the `ipnet` crate, which does provide an efficient iterator
-    // implementation.
-    pub fn last_address(&self) -> IpAddr {
-        match self {
-            IpNet::V4(inner) => {
-                let base: u32 = inner.network().into();
-                let size = inner.size() - 1;
-                std::net::IpAddr::V4(std::net::Ipv4Addr::from(base + size))
-            }
-            IpNet::V6(inner) => {
-                let base: u128 = inner.network().into();
-                let size = inner.size() - 1;
-                std::net::IpAddr::V6(std::net::Ipv6Addr::from(base + size))
-            }
-        }
-    }
-
-    /// Return true if the provided address is contained in self.
-    ///
-    /// This returns false if the address and the network are of different IP
-    /// families.
-    pub fn contains(&self, addr: IpAddr) -> bool {
-        match (self, addr) {
-            (IpNet::V4(net), IpAddr::V4(ip)) => net.contains(ip),
-            (IpNet::V6(net), IpAddr::V6(ip)) => net.contains(ip),
-            (_, _) => false,
-        }
-    }
-}
-
-impl From<ipnetwork::IpNetwork> for IpNet {
-    fn from(n: ipnetwork::IpNetwork) -> Self {
-        match n {
-            ipnetwork::IpNetwork::V4(v4) => IpNet::V4(Ipv4Net(v4)),
-            ipnetwork::IpNetwork::V6(v6) => IpNet::V6(Ipv6Net(v6)),
-        }
-    }
-}
-
-// NOTE: We deliberately do *NOT* implement `From<Ip{v4,v6,}Addr> for IpNet`.
-// This is because there are many ways to convert an address into a network.
-// See https://github.com/oxidecomputer/omicron/issues/5687.
-
-impl From<Ipv4Net> for IpNet {
-    fn from(n: Ipv4Net) -> IpNet {
-        IpNet::V4(n)
-    }
-}
-
-impl From<Ipv6Net> for IpNet {
-    fn from(n: Ipv6Net) -> IpNet {
-        IpNet::V6(n)
-    }
-}
-
-impl std::fmt::Display for IpNet {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            IpNet::V4(inner) => write!(f, "{}", inner),
-            IpNet::V6(inner) => write!(f, "{}", inner),
-        }
-    }
+    fn is_vpc_subnet(&self, vpc_prefix: &Self) -> bool;
 }
 
-impl FromStr for IpNet {
-    type Err = String;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        let net =
-            s.parse::<ipnetwork::IpNetwork>().map_err(|e| e.to_string())?;
-        match net {
-            ipnetwork::IpNetwork::V4(net) => Ok(IpNet::from(Ipv4Net(net))),
-            ipnetwork::IpNetwork::V6(net) => Ok(IpNet::from(Ipv6Net(net))),
-        }
+impl Ipv6NetExt for oxnet::Ipv6Net {
+    fn is_vpc_prefix(&self) -> bool {
+        self.is_unique_local() && self.width() == Self::VPC_IPV6_PREFIX_LENGTH
     }
-}
 
-impl From<IpNet> for ipnetwork::IpNetwork {
-    fn from(net: IpNet) -> ipnetwork::IpNetwork {
-        match net {
-            IpNet::V4(net) => ipnetwork::IpNetwork::from(net.0),
-            IpNet::V6(net) => ipnetwork::IpNetwork::from(net.0),
-        }
-    }
-}
-
-impl Serialize for IpNet {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        match self {
-            IpNet::V4(v4) => v4.serialize(serializer),
-            IpNet::V6(v6) => v6.serialize(serializer),
-        }
-    }
-}
-
-impl<'de> Deserialize<'de> for IpNet {
-    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
-    where
-        D: serde::Deserializer<'de>,
-    {
-        let net = ipnetwork::IpNetwork::deserialize(deserializer)?;
-        match net {
-            ipnetwork::IpNetwork::V4(net) => Ok(IpNet::from(Ipv4Net(net))),
-            ipnetwork::IpNetwork::V6(net) => Ok(IpNet::from(Ipv6Net(net))),
-        }
-    }
-}
-
-impl JsonSchema for IpNet {
-    fn schema_name() -> String {
-        "IpNet".to_string()
-    }
-
-    fn json_schema(
-        gen: &mut schemars::gen::SchemaGenerator,
-    ) -> schemars::schema::Schema {
-        schemars::schema::SchemaObject {
-            subschemas: Some(Box::new(schemars::schema::SubschemaValidation {
-                one_of: Some(vec![
-                    label_schema("v4", gen.subschema_for::<Ipv4Net>()),
-                    label_schema("v6", gen.subschema_for::<Ipv6Net>()),
-                ]),
-                ..Default::default()
-            })),
-            ..Default::default()
-        }
-        .into()
+    fn is_vpc_subnet(&self, vpc_prefix: &Self) -> bool {
+        self.is_unique_local()
+            && self.is_subnet_of(vpc_prefix)
+            && self.width() == Self::VPC_SUBNET_IPV6_PREFIX_LENGTH
     }
 }
 
@@ -1907,7 +1542,7 @@ pub enum VpcFirewallRuleTarget {
     /// The rule applies to a specific IP address
     Ip(IpAddr),
     /// The rule applies to a specific IP subnet
-    IpNet(IpNet),
+    IpNet(oxnet::IpNet),
     // Tags not yet implemented
     // Tag(Name),
 }
@@ -1938,7 +1573,7 @@ pub enum VpcFirewallRuleHostFilter {
     /// The rule applies to traffic from/to a specific IP address
     Ip(IpAddr),
     /// The rule applies to traffic from/to a specific IP subnet
-    IpNet(IpNet),
+    IpNet(oxnet::IpNet),
     // TODO: Internet gateways not yet implemented
     // #[display("inetgw:{0}")]
     // InternetGateway(Name),
@@ -2446,7 +2081,7 @@ pub struct LoopbackAddress {
     pub switch_location: String,
 
     /// The loopback IP address and prefix length.
-    pub address: IpNet,
+    pub address: oxnet::IpNet,
 }
 
 /// A switch port represents a physical external port on a rack switch.
@@ -2688,7 +2323,7 @@ pub struct LldpConfig {
     pub system_description: String,
 
     /// THE LLDP management IP TLV.
-    pub management_ip: IpNet,
+    pub management_ip: oxnet::IpNet,
 }
 
 /// Describes the kind of an switch interface.
@@ -2755,10 +2390,10 @@ pub struct SwitchPortRouteConfig {
     pub interface_name: String,
 
     /// The route's destination network.
-    pub dst: IpNet,
+    pub dst: oxnet::IpNet,
 
     /// The route's gateway address.
-    pub gw: IpNet,
+    pub gw: oxnet::IpNet,
 
     /// The VLAN identifier for the route. Use this if the gateway is reachable
     /// over an 802.1Q tagged L2 segment.
@@ -2887,7 +2522,7 @@ pub struct BgpAnnouncement {
     pub address_lot_block_id: Uuid,
 
     /// The IP network being announced.
-    pub network: IpNet,
+    pub network: oxnet::IpNet,
 }
 
 /// An IP address configuration for a port settings object.
@@ -2900,7 +2535,7 @@ pub struct SwitchPortAddressConfig {
     pub address_lot_block_id: Uuid,
 
     /// The IP address and prefix.
-    pub address: IpNet,
+    pub address: oxnet::IpNet,
 
     /// The interface name this address belongs to.
     // TODO: https://github.com/oxidecomputer/omicron/issues/3050
@@ -3027,7 +2662,7 @@ impl AggregateBgpMessageHistory {
 #[derive(Clone, Debug, Deserialize, JsonSchema, Serialize, PartialEq)]
 pub struct BgpImportedRouteIpv4 {
     /// The destination network prefix.
-    pub prefix: Ipv4Net,
+    pub prefix: oxnet::Ipv4Net,
 
     /// The nexthop the prefix is reachable through.
     pub nexthop: Ipv4Addr,
@@ -3180,7 +2815,7 @@ pub enum ImportExportPolicy {
     /// Do not perform any filtering.
     #[default]
     NoFiltering,
-    Allow(Vec<IpNet>),
+    Allow(Vec<oxnet::IpNet>),
 }
 
 #[cfg(test)]
@@ -3188,7 +2823,6 @@ mod test {
     use serde::Deserialize;
     use serde::Serialize;
 
-    use super::IpNet;
     use super::RouteDestination;
     use super::RouteTarget;
     use super::SemverVersion;
@@ -3644,31 +3278,29 @@ mod test {
 
     #[test]
     fn test_ipv6_net_operations() {
-        use super::Ipv6Net;
-        assert!(Ipv6Net("fd00::/8".parse().unwrap()).is_unique_local());
-        assert!(!Ipv6Net("fe00::/8".parse().unwrap()).is_unique_local());
-
-        assert!(Ipv6Net("fd00::/48".parse().unwrap()).is_vpc_prefix());
-        assert!(!Ipv6Net("fe00::/48".parse().unwrap()).is_vpc_prefix());
-        assert!(!Ipv6Net("fd00::/40".parse().unwrap()).is_vpc_prefix());
-
-        let vpc_prefix = Ipv6Net("fd00::/48".parse().unwrap());
-        assert!(
-            Ipv6Net("fd00::/64".parse().unwrap()).is_vpc_subnet(&vpc_prefix)
-        );
-        assert!(
-            !Ipv6Net("fd10::/64".parse().unwrap()).is_vpc_subnet(&vpc_prefix)
-        );
-        assert!(
-            !Ipv6Net("fd00::/63".parse().unwrap()).is_vpc_subnet(&vpc_prefix)
-        );
-    }
-
-    #[test]
-    fn test_ipv4_net_operations() {
-        use super::{IpNet, Ipv4Net};
-        let x: IpNet = "0.0.0.0/0".parse().unwrap();
-        assert_eq!(x, IpNet::V4(Ipv4Net("0.0.0.0/0".parse().unwrap())))
+        use super::Ipv6NetExt;
+        use oxnet::Ipv6Net;
+
+        assert!("fd00::/8".parse::<Ipv6Net>().unwrap().is_unique_local());
+        assert!(!"fe00::/8".parse::<Ipv6Net>().unwrap().is_unique_local());
+
+        assert!("fd00::/48".parse::<Ipv6Net>().unwrap().is_vpc_prefix());
+        assert!(!"fe00::/48".parse::<Ipv6Net>().unwrap().is_vpc_prefix());
+        assert!(!"fd00::/40".parse::<Ipv6Net>().unwrap().is_vpc_prefix());
+
+        let vpc_prefix = "fd00::/48".parse::<Ipv6Net>().unwrap();
+        assert!("fd00::/64"
+            .parse::<Ipv6Net>()
+            .unwrap()
+            .is_vpc_subnet(&vpc_prefix));
+        assert!(!"fd10::/64"
+            .parse::<Ipv6Net>()
+            .unwrap()
+            .is_vpc_subnet(&vpc_prefix));
+        assert!(!"fd00::/63"
+            .parse::<Ipv6Net>()
+            .unwrap()
+            .is_vpc_subnet(&vpc_prefix));
     }
 
     #[test]
@@ -3799,92 +3431,6 @@ mod test {
         assert!("hash:super_random".parse::<Digest>().is_err());
     }
 
-    #[test]
-    fn test_ipnet_serde() {
-        //TODO: none of this actually exercises
-        // schemars::schema::StringValidation bits and the schemars
-        // documentation is not forthcoming on how this might be accomplished.
-        let net_str = "fd00:2::/32";
-        let net = IpNet::from_str(net_str).unwrap();
-        let ser = serde_json::to_string(&net).unwrap();
-
-        assert_eq!(format!(r#""{}""#, net_str), ser);
-        let net_des = serde_json::from_str::<IpNet>(&ser).unwrap();
-        assert_eq!(net, net_des);
-
-        let net_str = "fd00:47::1/64";
-        let net = IpNet::from_str(net_str).unwrap();
-        let ser = serde_json::to_string(&net).unwrap();
-
-        assert_eq!(format!(r#""{}""#, net_str), ser);
-        let net_des = serde_json::from_str::<IpNet>(&ser).unwrap();
-        assert_eq!(net, net_des);
-
-        let net_str = "192.168.1.1/16";
-        let net = IpNet::from_str(net_str).unwrap();
-        let ser = serde_json::to_string(&net).unwrap();
-
-        assert_eq!(format!(r#""{}""#, net_str), ser);
-        let net_des = serde_json::from_str::<IpNet>(&ser).unwrap();
-        assert_eq!(net, net_des);
-
-        let net_str = "0.0.0.0/0";
-        let net = IpNet::from_str(net_str).unwrap();
-        let ser = serde_json::to_string(&net).unwrap();
-
-        assert_eq!(format!(r#""{}""#, net_str), ser);
-        let net_des = serde_json::from_str::<IpNet>(&ser).unwrap();
-        assert_eq!(net, net_des);
-    }
-
-    #[test]
-    fn test_ipnet_first_last_address() {
-        use std::net::IpAddr;
-        use std::net::Ipv4Addr;
-        use std::net::Ipv6Addr;
-        let net: IpNet = "fd00::/128".parse().unwrap();
-        assert_eq!(
-            net.first_address(),
-            IpAddr::from(Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 0)),
-        );
-        assert_eq!(
-            net.last_address(),
-            IpAddr::from(Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 0)),
-        );
-
-        let net: IpNet = "fd00::/64".parse().unwrap();
-        assert_eq!(
-            net.first_address(),
-            IpAddr::from(Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 0)),
-        );
-        assert_eq!(
-            net.last_address(),
-            IpAddr::from(Ipv6Addr::new(
-                0xfd00, 0, 0, 0, 0xffff, 0xffff, 0xffff, 0xffff
-            )),
-        );
-
-        let net: IpNet = "10.0.0.0/16".parse().unwrap();
-        assert_eq!(
-            net.first_address(),
-            IpAddr::from(Ipv4Addr::new(10, 0, 0, 0)),
-        );
-        assert_eq!(
-            net.last_address(),
-            IpAddr::from(Ipv4Addr::new(10, 0, 255, 255)),
-        );
-
-        let net: IpNet = "10.0.0.0/32".parse().unwrap();
-        assert_eq!(
-            net.first_address(),
-            IpAddr::from(Ipv4Addr::new(10, 0, 0, 0)),
-        );
-        assert_eq!(
-            net.last_address(),
-            IpAddr::from(Ipv4Addr::new(10, 0, 0, 0)),
-        );
-    }
-
     #[test]
     fn test_macaddr() {
         use super::MacAddr;
diff --git a/common/src/api/internal/nexus.rs b/common/src/api/internal/nexus.rs
index 20516e702b..de611262bf 100644
--- a/common/src/api/internal/nexus.rs
+++ b/common/src/api/internal/nexus.rs
@@ -6,7 +6,7 @@
 
 use crate::api::external::{
     ByteCount, DiskState, Generation, Hostname, InstanceCpuCount,
-    InstanceState, IpNet, SemverVersion, Vni,
+    InstanceState, SemverVersion, Vni,
 };
 use chrono::{DateTime, Utc};
 use omicron_uuid_kinds::DownstairsRegionKind;
@@ -251,7 +251,7 @@ mod tests {
 #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)]
 #[serde(tag = "type", content = "value", rename_all = "snake_case")]
 pub enum HostIdentifier {
-    Ip(IpNet),
+    Ip(oxnet::IpNet),
     Vpc(Vni),
 }
 
diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs
index 9d9ff083e4..b0d3232eed 100644
--- a/common/src/api/internal/shared.rs
+++ b/common/src/api/internal/shared.rs
@@ -6,9 +6,10 @@
 
 use crate::{
     address::NUM_SOURCE_NAT_PORTS,
-    api::external::{self, BfdMode, ImportExportPolicy, IpNet, Name},
+    api::external::{self, BfdMode, ImportExportPolicy, Name},
 };
 use ipnetwork::{IpNetwork, Ipv4Network, Ipv6Network};
+use oxnet::IpNet;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use std::{
@@ -53,7 +54,7 @@ pub struct NetworkInterface {
     pub name: external::Name,
     pub ip: IpAddr,
     pub mac: external::MacAddr,
-    pub subnet: external::IpNet,
+    pub subnet: IpNet,
     pub vni: external::Vni,
     pub primary: bool,
     pub slot: u8,
@@ -527,9 +528,9 @@ impl TryFrom<Vec<IpNet>> for AllowedSourceIps {
     }
 }
 
-impl TryFrom<&[IpNetwork]> for AllowedSourceIps {
+impl TryFrom<&[ipnetwork::IpNetwork]> for AllowedSourceIps {
     type Error = &'static str;
-    fn try_from(list: &[IpNetwork]) -> Result<Self, Self::Error> {
+    fn try_from(list: &[ipnetwork::IpNetwork]) -> Result<Self, Self::Error> {
         IpAllowList::try_from(list).map(Self::List)
     }
 }
@@ -580,45 +581,43 @@ impl TryFrom<Vec<IpNet>> for IpAllowList {
     }
 }
 
-impl TryFrom<&[IpNetwork]> for IpAllowList {
+impl TryFrom<&[ipnetwork::IpNetwork]> for IpAllowList {
     type Error = &'static str;
-    fn try_from(list: &[IpNetwork]) -> Result<Self, Self::Error> {
+
+    fn try_from(list: &[ipnetwork::IpNetwork]) -> Result<Self, Self::Error> {
         if list.is_empty() {
             return Err("IP allowlist must not be empty");
         }
-        Ok(Self(list.iter().copied().map(Into::into).collect()))
+        Ok(Self(list.into_iter().map(|net| (*net).into()).collect()))
     }
 }
 
 #[cfg(test)]
 mod tests {
-    use crate::api::{
-        external::{IpNet, Ipv4Net, Ipv6Net},
-        internal::shared::AllowedSourceIps,
-    };
-    use ipnetwork::{Ipv4Network, Ipv6Network};
+    use crate::api::internal::shared::AllowedSourceIps;
+    use oxnet::{IpNet, Ipv4Net, Ipv6Net};
     use std::net::{Ipv4Addr, Ipv6Addr};
 
     #[test]
     fn test_deserialize_allowed_source_ips() {
         let parsed: AllowedSourceIps = serde_json::from_str(
-            r#"{"allow":"list","ips":["127.0.0.1","10.0.0.0/24","fd00::1/64"]}"#,
+            r#"{"allow":"list","ips":["127.0.0.1/32","10.0.0.0/24","fd00::1/64"]}"#,
         )
         .unwrap();
         assert_eq!(
             parsed,
             AllowedSourceIps::try_from(vec![
-                IpNet::V4(Ipv4Net::single(Ipv4Addr::LOCALHOST)),
-                IpNet::V4(Ipv4Net(
-                    Ipv4Network::new(Ipv4Addr::new(10, 0, 0, 0), 24).unwrap()
-                )),
-                IpNet::V6(Ipv6Net(
-                    Ipv6Network::new(
+                Ipv4Net::host_net(Ipv4Addr::LOCALHOST).into(),
+                IpNet::V4(
+                    Ipv4Net::new(Ipv4Addr::new(10, 0, 0, 0), 24).unwrap()
+                ),
+                IpNet::V6(
+                    Ipv6Net::new(
                         Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 1),
                         64
                     )
                     .unwrap()
-                )),
+                ),
             ])
             .unwrap()
         );
diff --git a/illumos-utils/Cargo.toml b/illumos-utils/Cargo.toml
index fa50dd2822..3d17745b7e 100644
--- a/illumos-utils/Cargo.toml
+++ b/illumos-utils/Cargo.toml
@@ -25,6 +25,7 @@ omicron-common.workspace = true
 omicron-uuid-kinds.workspace = true
 oxide-vpc.workspace = true
 oxlog.workspace = true
+oxnet.workspace = true
 schemars.workspace = true
 serde.workspace = true
 slog.workspace = true
diff --git a/illumos-utils/src/opte/firewall_rules.rs b/illumos-utils/src/opte/firewall_rules.rs
index 02882a226b..1df0e7421a 100644
--- a/illumos-utils/src/opte/firewall_rules.rs
+++ b/illumos-utils/src/opte/firewall_rules.rs
@@ -7,7 +7,6 @@
 use crate::opte::params::VpcFirewallRule;
 use crate::opte::Vni;
 use macaddr::MacAddr6;
-use omicron_common::api::external::IpNet;
 use omicron_common::api::external::VpcFirewallRuleAction;
 use omicron_common::api::external::VpcFirewallRuleDirection;
 use omicron_common::api::external::VpcFirewallRuleProtocol;
@@ -27,6 +26,7 @@ use oxide_vpc::api::Ipv6PrefixLen;
 use oxide_vpc::api::Ports;
 use oxide_vpc::api::ProtoFilter;
 use oxide_vpc::api::Protocol;
+use oxnet::IpNet;
 
 trait FromVpcFirewallRule {
     fn action(&self) -> FirewallAction;
@@ -65,26 +65,22 @@ impl FromVpcFirewallRule for VpcFirewallRule {
             Some(ref hosts) if !hosts.is_empty() => hosts
                 .iter()
                 .map(|host| match host {
-                    HostIdentifier::Ip(IpNet::V4(net))
-                        if net.prefix() == 32 =>
-                    {
-                        Address::Ip(IpAddr::Ip4(net.ip().into()))
+                    HostIdentifier::Ip(IpNet::V4(net)) if net.is_host_net() => {
+                        Address::Ip(IpAddr::Ip4(net.addr().into()))
                     }
                     HostIdentifier::Ip(IpNet::V4(net)) => {
                         Address::Subnet(IpCidr::Ip4(Ipv4Cidr::new(
-                            net.ip().into(),
-                            Ipv4PrefixLen::new(net.prefix()).unwrap(),
+                            net.addr().into(),
+                            Ipv4PrefixLen::new(net.width()).unwrap(),
                         )))
                     }
-                    HostIdentifier::Ip(IpNet::V6(net))
-                        if net.prefix() == 128 =>
-                    {
-                        Address::Ip(IpAddr::Ip6(net.ip().into()))
+                    HostIdentifier::Ip(IpNet::V6(net)) if net.is_host_net() => {
+                        Address::Ip(IpAddr::Ip6(net.addr().into()))
                     }
                     HostIdentifier::Ip(IpNet::V6(net)) => {
                         Address::Subnet(IpCidr::Ip6(Ipv6Cidr::new(
-                            net.ip().into(),
-                            Ipv6PrefixLen::new(net.prefix()).unwrap(),
+                            net.addr().into(),
+                            Ipv6PrefixLen::new(net.width()).unwrap(),
                         )))
                     }
                     HostIdentifier::Vpc(vni) => {
diff --git a/internal-dns/src/resolver.rs b/internal-dns/src/resolver.rs
index a7796f559a..670b4b420c 100644
--- a/internal-dns/src/resolver.rs
+++ b/internal-dns/src/resolver.rs
@@ -118,7 +118,7 @@ impl Resolver {
             .get_dns_subnets()
             .into_iter()
             .map(|dns_subnet| {
-                let ip_addr = IpAddr::V6(dns_subnet.dns_address().ip());
+                let ip_addr = IpAddr::V6(dns_subnet.dns_address().addr());
                 SocketAddr::new(ip_addr, DNS_PORT)
             })
             .collect()
diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml
index 9793c32bf8..0b0bd097bc 100644
--- a/nexus/Cargo.toml
+++ b/nexus/Cargo.toml
@@ -51,6 +51,7 @@ once_cell.workspace = true
 openssl.workspace = true
 oximeter-client.workspace = true
 oximeter-db.workspace = true
+oxnet.workspace = true
 parse-display.workspace = true
 paste.workspace = true
 # See omicron-rpaths for more about the "pq-sys" dependency.
diff --git a/nexus/db-model/Cargo.toml b/nexus/db-model/Cargo.toml
index 1118b7c9bd..a7b6cd9de1 100644
--- a/nexus/db-model/Cargo.toml
+++ b/nexus/db-model/Cargo.toml
@@ -22,6 +22,7 @@ macaddr.workspace = true
 newtype_derive.workspace = true
 omicron-uuid-kinds.workspace = true
 once_cell.workspace = true
+oxnet.workspace = true
 parse-display.workspace = true
 # See omicron-rpaths for more about the "pq-sys" dependency.
 pq-sys = "*"
diff --git a/nexus/db-model/src/ipv4_nat_entry.rs b/nexus/db-model/src/ipv4_nat_entry.rs
index c3763346c6..4ff1ee9171 100644
--- a/nexus/db-model/src/ipv4_nat_entry.rs
+++ b/nexus/db-model/src/ipv4_nat_entry.rs
@@ -81,10 +81,10 @@ pub struct Ipv4NatEntryView {
 impl From<Ipv4NatChange> for Ipv4NatEntryView {
     fn from(value: Ipv4NatChange) -> Self {
         Self {
-            external_address: value.external_address.ip(),
+            external_address: value.external_address.addr(),
             first_port: value.first_port.into(),
             last_port: value.last_port.into(),
-            sled_address: value.sled_address.ip(),
+            sled_address: value.sled_address.addr(),
             vni: value.vni.0,
             mac: *value.mac,
             gen: value.version,
diff --git a/nexus/db-model/src/ipv4net.rs b/nexus/db-model/src/ipv4net.rs
index eaf8a6eed8..b2cf6ffefa 100644
--- a/nexus/db-model/src/ipv4net.rs
+++ b/nexus/db-model/src/ipv4net.rs
@@ -10,7 +10,6 @@ use diesel::serialize::{self, ToSql};
 use diesel::sql_types;
 use ipnetwork::IpNetwork;
 use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES;
-use omicron_common::api::external;
 use serde::Deserialize;
 use serde::Serialize;
 use std::net::Ipv4Addr;
@@ -27,10 +26,10 @@ use std::net::Ipv4Addr;
     Deserialize,
 )]
 #[diesel(sql_type = sql_types::Inet)]
-pub struct Ipv4Net(pub external::Ipv4Net);
+pub struct Ipv4Net(pub oxnet::Ipv4Net);
 
-NewtypeFrom! { () pub struct Ipv4Net(external::Ipv4Net); }
-NewtypeDeref! { () pub struct Ipv4Net(external::Ipv4Net); }
+NewtypeFrom! { () pub struct Ipv4Net(oxnet::Ipv4Net); }
+NewtypeDeref! { () pub struct Ipv4Net(oxnet::Ipv4Net); }
 
 impl Ipv4Net {
     /// Check if an address is a valid user-requestable address for this subnet
@@ -41,19 +40,19 @@ impl Ipv4Net {
         if !self.contains(addr) {
             return Err(RequestAddressError::OutsideSubnet(
                 addr.into(),
-                self.0 .0.into(),
+                oxnet::IpNet::from(self.0).into(),
             ));
         }
         // Only the first N addresses are reserved
         if self
-            .iter()
+            .addr_iter()
             .take(NUM_INITIAL_RESERVED_IP_ADDRESSES)
             .any(|this| this == addr)
         {
             return Err(RequestAddressError::Reserved);
         }
         // Last address in the subnet is reserved
-        if addr == self.broadcast() {
+        if addr == self.broadcast().expect("narrower subnet than expected") {
             return Err(RequestAddressError::Broadcast);
         }
 
@@ -67,7 +66,7 @@ impl ToSql<sql_types::Inet, Pg> for Ipv4Net {
         out: &mut serialize::Output<'a, '_, Pg>,
     ) -> serialize::Result {
         <IpNetwork as ToSql<sql_types::Inet, Pg>>::to_sql(
-            &IpNetwork::V4(*self.0),
+            &IpNetwork::V4(self.0.into()),
             &mut out.reborrow(),
         )
     }
@@ -81,7 +80,7 @@ where
     fn from_sql(bytes: DB::RawValue<'_>) -> deserialize::Result<Self> {
         let inet = IpNetwork::from_sql(bytes)?;
         match inet {
-            IpNetwork::V4(net) => Ok(Ipv4Net(external::Ipv4Net(net))),
+            IpNetwork::V4(net) => Ok(Ipv4Net(net.into())),
             _ => Err("Expected IPV4".into()),
         }
     }
diff --git a/nexus/db-model/src/ipv6net.rs b/nexus/db-model/src/ipv6net.rs
index d516b67ed9..adcf732f42 100644
--- a/nexus/db-model/src/ipv6net.rs
+++ b/nexus/db-model/src/ipv6net.rs
@@ -9,7 +9,6 @@ use diesel::serialize::{self, ToSql};
 use diesel::sql_types;
 use ipnetwork::IpNetwork;
 use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES;
-use omicron_common::api::external;
 use rand::{rngs::StdRng, SeedableRng};
 use serde::Deserialize;
 use serde::Serialize;
@@ -29,10 +28,10 @@ use crate::RequestAddressError;
     Deserialize,
 )]
 #[diesel(sql_type = sql_types::Inet)]
-pub struct Ipv6Net(pub external::Ipv6Net);
+pub struct Ipv6Net(pub oxnet::Ipv6Net);
 
-NewtypeFrom! { () pub struct Ipv6Net(external::Ipv6Net); }
-NewtypeDeref! { () pub struct Ipv6Net(external::Ipv6Net); }
+NewtypeFrom! { () pub struct Ipv6Net(oxnet::Ipv6Net); }
+NewtypeDeref! { () pub struct Ipv6Net(oxnet::Ipv6Net); }
 
 impl Ipv6Net {
     /// Generate a random subnetwork from this one, of the given prefix length.
@@ -48,10 +47,10 @@ impl Ipv6Net {
         use rand::RngCore;
 
         const MAX_IPV6_SUBNET_PREFIX: u8 = 128;
-        if prefix < self.prefix() || prefix > MAX_IPV6_SUBNET_PREFIX {
+        if prefix < self.width() || prefix > MAX_IPV6_SUBNET_PREFIX {
             return None;
         }
-        if prefix == self.prefix() {
+        if prefix == self.width() {
             return Some(*self);
         }
 
@@ -72,17 +71,17 @@ impl Ipv6Net {
         let full_mask = !(u128::MAX >> prefix);
 
         // Get the existing network address and mask.
-        let network = u128::from_be_bytes(self.network().octets());
-        let network_mask = u128::from_be_bytes(self.mask().octets());
+        let network = u128::from(self.prefix());
+        let network_mask = u128::from(self.mask_addr());
 
         // Take random bits _only_ where the new mask is set.
         let random_mask = full_mask ^ network_mask;
 
         let out = (network & network_mask) | (random & random_mask);
-        let addr = std::net::Ipv6Addr::from(out.to_be_bytes());
-        let net = ipnetwork::Ipv6Network::new(addr, prefix)
+        let addr = std::net::Ipv6Addr::from(out);
+        let net = oxnet::Ipv6Net::new(addr, prefix)
             .expect("Failed to create random subnet");
-        Some(Self(external::Ipv6Net(net)))
+        Some(Self(net))
     }
 
     /// Check if an address is a valid user-requestable address for this subnet
@@ -93,7 +92,7 @@ impl Ipv6Net {
         if !self.contains(addr) {
             return Err(RequestAddressError::OutsideSubnet(
                 addr.into(),
-                self.0 .0.into(),
+                oxnet::IpNet::from(self.0).into(),
             ));
         }
         // Only the first N addresses are reserved
@@ -114,7 +113,7 @@ impl ToSql<sql_types::Inet, Pg> for Ipv6Net {
         out: &mut serialize::Output<'a, '_, Pg>,
     ) -> serialize::Result {
         <IpNetwork as ToSql<sql_types::Inet, Pg>>::to_sql(
-            &IpNetwork::V6(self.0 .0),
+            &IpNetwork::V6(self.0.into()),
             &mut out.reborrow(),
         )
     }
@@ -128,7 +127,7 @@ where
     fn from_sql(bytes: DB::RawValue<'_>) -> deserialize::Result<Self> {
         let inet = IpNetwork::from_sql(bytes)?;
         match inet {
-            IpNetwork::V6(net) => Ok(Ipv6Net(external::Ipv6Net(net))),
+            IpNetwork::V6(net) => Ok(Ipv6Net(net.into())),
             _ => Err("Expected IPV6".into()),
         }
     }
diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs
index bd16719633..c57836a567 100644
--- a/nexus/db-model/src/lib.rs
+++ b/nexus/db-model/src/lib.rs
@@ -437,12 +437,10 @@ mod tests {
     use crate::RequestAddressError;
 
     use super::VpcSubnet;
-    use ipnetwork::Ipv4Network;
-    use ipnetwork::Ipv6Network;
     use omicron_common::api::external::IdentityMetadataCreateParams;
-    use omicron_common::api::external::IpNet;
-    use omicron_common::api::external::Ipv4Net;
-    use omicron_common::api::external::Ipv6Net;
+    use oxnet::IpNet;
+    use oxnet::Ipv4Net;
+    use oxnet::Ipv6Net;
     use std::net::IpAddr;
     use std::net::Ipv4Addr;
     use std::net::Ipv6Addr;
@@ -450,9 +448,8 @@ mod tests {
 
     #[test]
     fn test_vpc_subnet_check_requestable_addr() {
-        let ipv4_block =
-            Ipv4Net("192.168.0.0/16".parse::<Ipv4Network>().unwrap());
-        let ipv6_block = Ipv6Net("fd00::/48".parse::<Ipv6Network>().unwrap());
+        let ipv4_block = "192.168.0.0/16".parse::<Ipv4Net>().unwrap();
+        let ipv6_block = "fd00::/48".parse::<Ipv6Net>().unwrap();
         let identity = IdentityMetadataCreateParams {
             name: "net-test-vpc".parse().unwrap(),
             description: "A test VPC".parse().unwrap(),
@@ -511,9 +508,7 @@ mod tests {
 
     #[test]
     fn test_ipv6_net_random_subnet() {
-        let base = super::Ipv6Net(Ipv6Net(
-            "fd00::/48".parse::<Ipv6Network>().unwrap(),
-        ));
+        let base = super::Ipv6Net("fd00::/48".parse::<Ipv6Net>().unwrap());
         assert!(
             base.random_subnet(8).is_none(),
             "random_subnet() should fail when prefix is less than the base prefix"
@@ -524,11 +519,11 @@ mod tests {
         );
         let subnet = base.random_subnet(64).unwrap();
         assert_eq!(
-            subnet.prefix(),
+            subnet.width(),
             64,
             "random_subnet() returned an incorrect prefix"
         );
-        let octets = subnet.network().octets();
+        let octets = subnet.prefix().octets();
         const EXPECTED_RANDOM_BYTES: [u8; 8] = [253, 0, 0, 0, 0, 0, 111, 127];
         assert_eq!(octets[..8], EXPECTED_RANDOM_BYTES);
         assert!(
@@ -536,15 +531,15 @@ mod tests {
             "Host address portion should be 0"
         );
         assert!(
-            base.is_supernet_of(subnet.0 .0),
+            base.is_supernet_of(&subnet.0),
             "random_subnet should generate an actual subnet"
         );
-        assert_eq!(base.random_subnet(base.prefix()), Some(base));
+        assert_eq!(base.random_subnet(base.width()), Some(base));
     }
 
     #[test]
     fn test_ip_subnet_check_requestable_address() {
-        let subnet = super::Ipv4Net(Ipv4Net("192.168.0.0/16".parse().unwrap()));
+        let subnet = super::Ipv4Net("192.168.0.0/16".parse().unwrap());
         subnet.check_requestable_addr("192.168.0.10".parse().unwrap()).unwrap();
         subnet.check_requestable_addr("192.168.1.0".parse().unwrap()).unwrap();
         let addr = "192.178.0.10".parse().unwrap();
@@ -569,7 +564,7 @@ mod tests {
             Err(RequestAddressError::Broadcast)
         );
 
-        let subnet = super::Ipv6Net(Ipv6Net("fd00::/64".parse().unwrap()));
+        let subnet = super::Ipv6Net("fd00::/64".parse().unwrap());
         subnet.check_requestable_addr("fd00::a".parse().unwrap()).unwrap();
         assert_eq!(
             subnet.check_requestable_addr("fd00::1".parse().unwrap()),
diff --git a/nexus/db-model/src/network_interface.rs b/nexus/db-model/src/network_interface.rs
index 8520afdb76..95bb8bb7f2 100644
--- a/nexus/db-model/src/network_interface.rs
+++ b/nexus/db-model/src/network_interface.rs
@@ -73,7 +73,7 @@ pub struct NetworkInterface {
 impl NetworkInterface {
     pub fn into_internal(
         self,
-        subnet: external::IpNet,
+        subnet: oxnet::IpNet,
     ) -> internal::shared::NetworkInterface {
         internal::shared::NetworkInterface {
             id: self.id(),
diff --git a/nexus/db-model/src/vpc.rs b/nexus/db-model/src/vpc.rs
index 8a4dc0e349..88879a0436 100644
--- a/nexus/db-model/src/vpc.rs
+++ b/nexus/db-model/src/vpc.rs
@@ -14,6 +14,7 @@ use nexus_types::external_api::params;
 use nexus_types::external_api::views;
 use nexus_types::identity::Resource;
 use omicron_common::api::external;
+use omicron_common::api::external::Ipv6NetExt;
 use serde::Deserialize;
 use serde::Serialize;
 use uuid::Uuid;
@@ -83,22 +84,20 @@ impl IncompleteVpc {
         params: params::VpcCreate,
     ) -> Result<Self, external::Error> {
         let identity = VpcIdentity::new(vpc_id, params.identity);
-        let ipv6_prefix = IpNetwork::from(
-            match params.ipv6_prefix {
-                None => defaults::random_vpc_ipv6_prefix(),
-                Some(prefix) => {
-                    if prefix.is_vpc_prefix() {
-                        Ok(prefix)
-                    } else {
-                        Err(external::Error::invalid_request(
-                            "VPC IPv6 address prefixes must be in the \
+        let ipv6_prefix = oxnet::IpNet::from(match params.ipv6_prefix {
+            None => defaults::random_vpc_ipv6_prefix(),
+            Some(prefix) => {
+                if prefix.is_vpc_prefix() {
+                    Ok(prefix)
+                } else {
+                    Err(external::Error::invalid_request(
+                        "VPC IPv6 address prefixes must be in the \
                             Unique Local Address range `fd00::/48` (RFD 4193)",
-                        ))
-                    }
+                    ))
                 }
-            }?
-            .0,
-        );
+            }
+        }?)
+        .into();
         Ok(Self {
             identity,
             project_id,
diff --git a/nexus/db-model/src/vpc_subnet.rs b/nexus/db-model/src/vpc_subnet.rs
index 407c933ef2..f3c90a908e 100644
--- a/nexus/db-model/src/vpc_subnet.rs
+++ b/nexus/db-model/src/vpc_subnet.rs
@@ -50,8 +50,8 @@ impl VpcSubnet {
         subnet_id: Uuid,
         vpc_id: Uuid,
         identity: external::IdentityMetadataCreateParams,
-        ipv4_block: external::Ipv4Net,
-        ipv6_block: external::Ipv6Net,
+        ipv4_block: oxnet::Ipv4Net,
+        ipv6_block: oxnet::Ipv6Net,
     ) -> Self {
         let identity = VpcSubnetIdentity::new(subnet_id, identity);
         Self {
diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml
index 0754f7389f..135f2fcdf7 100644
--- a/nexus/db-queries/Cargo.toml
+++ b/nexus/db-queries/Cargo.toml
@@ -33,6 +33,7 @@ newtype_derive.workspace = true
 once_cell.workspace = true
 openssl.workspace = true
 oso.workspace = true
+oxnet.workspace = true
 paste.workspace = true
 # See omicron-rpaths for more about the "pq-sys" dependency.
 pq-sys = "*"
diff --git a/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs b/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs
index fa3939b8ac..5b370f27a9 100644
--- a/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs
+++ b/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs
@@ -406,13 +406,11 @@ mod test {
 
         // Each change (creation / deletion) to the NAT table should increment the
         // version number of the row in the NAT table
-        let external_address = external::Ipv4Net(
-            ipnetwork::Ipv4Network::try_from("10.0.0.100").unwrap(),
-        );
+        let external_address =
+            oxnet::Ipv4Net::host_net("10.0.0.100".parse().unwrap());
 
-        let sled_address = external::Ipv6Net(
-            ipnetwork::Ipv6Network::try_from("fd00:1122:3344:104::1").unwrap(),
-        );
+        let sled_address =
+            oxnet::Ipv6Net::host_net("fd00:1122:3344:104::1".parse().unwrap());
 
         // Add a nat entry.
         let nat1 = Ipv4NatValues {
@@ -565,13 +563,11 @@ mod test {
 
         // Each change (creation / deletion) to the NAT table should increment the
         // version number of the row in the NAT table
-        let external_address = external::Ipv4Net(
-            ipnetwork::Ipv4Network::try_from("10.0.0.100").unwrap(),
-        );
+        let external_address =
+            oxnet::Ipv4Net::host_net("10.0.0.100".parse().unwrap());
 
-        let sled_address = external::Ipv6Net(
-            ipnetwork::Ipv6Network::try_from("fd00:1122:3344:104::1").unwrap(),
-        );
+        let sled_address =
+            oxnet::Ipv6Net::host_net("fd00:1122:3344:104::1".parse().unwrap());
 
         // Add a nat entry.
         let nat1 = Ipv4NatValues {
@@ -711,13 +707,11 @@ mod test {
         // 1. an entry should be deleted during the next sync
         // 2. an entry that should be kept during the next sync
 
-        let external_address = external::Ipv4Net(
-            ipnetwork::Ipv4Network::try_from("10.0.0.100").unwrap(),
-        );
+        let external_address =
+            oxnet::Ipv4Net::host_net("10.0.0.100".parse().unwrap());
 
-        let sled_address = external::Ipv6Net(
-            ipnetwork::Ipv6Network::try_from("fd00:1122:3344:104::1").unwrap(),
-        );
+        let sled_address =
+            oxnet::Ipv6Net::host_net("fd00:1122:3344:104::1".parse().unwrap());
 
         // Add a nat entry.
         let nat1 = Ipv4NatValues {
@@ -833,13 +827,12 @@ mod test {
 
         let addresses = (0..=255).map(|i| {
             let addr = Ipv4Addr::new(10, 0, 0, i);
-            let net = ipnetwork::Ipv4Network::new(addr, 32).unwrap();
-            external::Ipv4Net(net)
+            let net = oxnet::Ipv4Net::new(addr, 32).unwrap();
+            net
         });
 
-        let sled_address = external::Ipv6Net(
-            ipnetwork::Ipv6Network::try_from("fd00:1122:3344:104::1").unwrap(),
-        );
+        let sled_address =
+            oxnet::Ipv6Net::host_net("fd00:1122:3344:104::1".parse().unwrap());
 
         let nat_entries = addresses.map(|external_address| {
             // build a bunch of nat entries
@@ -908,7 +901,7 @@ mod test {
                         .expect("did not find a deleted nat entry with a matching version number");
 
                     assert_eq!(
-                        deleted_nat.external_address.ip(),
+                        deleted_nat.external_address.addr(),
                         change.external_address
                     );
                     assert_eq!(
@@ -917,7 +910,7 @@ mod test {
                     );
                     assert_eq!(deleted_nat.last_port, change.last_port.into());
                     assert_eq!(
-                        deleted_nat.sled_address.ip(),
+                        deleted_nat.sled_address.addr(),
                         change.sled_address
                     );
                     assert_eq!(*deleted_nat.mac, change.mac);
@@ -933,13 +926,13 @@ mod test {
                     assert!(added_nat.version_removed.is_none());
 
                     assert_eq!(
-                        added_nat.external_address.ip(),
+                        added_nat.external_address.addr(),
                         change.external_address
                     );
                     assert_eq!(added_nat.first_port, change.first_port.into());
                     assert_eq!(added_nat.last_port, change.last_port.into());
                     assert_eq!(
-                        added_nat.sled_address.ip(),
+                        added_nat.sled_address.addr(),
                         change.sled_address
                     );
                     assert_eq!(*added_nat.mac, change.mac);
diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs
index 7c47489477..a69e91dff4 100644
--- a/nexus/db-queries/src/db/datastore/mod.rs
+++ b/nexus/db-queries/src/db/datastore/mod.rs
@@ -1592,8 +1592,8 @@ mod test {
                 name: external::Name::try_from(String::from("name")).unwrap(),
                 description: String::from("description"),
             },
-            external::Ipv4Net("172.30.0.0/22".parse().unwrap()),
-            external::Ipv6Net("fd00::/64".parse().unwrap()),
+            "172.30.0.0/22".parse().unwrap(),
+            "fd00::/64".parse().unwrap(),
         );
         let values = FilterConflictingVpcSubnetRangesQuery::new(subnet);
         let query =
diff --git a/nexus/db-queries/src/db/datastore/network_interface.rs b/nexus/db-queries/src/db/datastore/network_interface.rs
index f552e845c6..af3f832e35 100644
--- a/nexus/db-queries/src/db/datastore/network_interface.rs
+++ b/nexus/db-queries/src/db/datastore/network_interface.rs
@@ -68,9 +68,9 @@ impl From<NicInfo> for omicron_common::api::internal::shared::NetworkInterface {
         nic: NicInfo,
     ) -> omicron_common::api::internal::shared::NetworkInterface {
         let ip_subnet = if nic.ip.is_ipv4() {
-            external::IpNet::V4(nic.ipv4_block.0)
+            oxnet::IpNet::V4(nic.ipv4_block.0)
         } else {
-            external::IpNet::V6(nic.ipv6_block.0)
+            oxnet::IpNet::V6(nic.ipv6_block.0)
         };
         let kind = match nic.kind {
             NetworkInterfaceKind::Instance => {
@@ -894,8 +894,7 @@ mod tests {
 
         // Insert 10 Nexus NICs
         let ip_range = NEXUS_OPTE_IPV4_SUBNET
-            .0
-            .iter()
+            .addr_iter()
             .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES)
             .take(10);
         let mut macs = external::MacAddr::iter_system();
diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs
index 04901c7785..b8275b56d4 100644
--- a/nexus/db-queries/src/db/datastore/rack.rs
+++ b/nexus/db-queries/src/db/datastore/rack.rs
@@ -1038,6 +1038,7 @@ mod test {
     use omicron_uuid_kinds::{ExternalIpUuid, OmicronZoneUuid};
     use omicron_uuid_kinds::{GenericUuid, ZpoolUuid};
     use omicron_uuid_kinds::{SledUuid, TypedUuid};
+    use oxnet::IpNet;
     use sled_agent_client::types::OmicronZoneDataset;
     use std::collections::{BTreeMap, HashMap};
     use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV6};
@@ -1334,22 +1335,22 @@ mod test {
 
         let external_dns_ip = IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4));
         let external_dns_pip = DNS_OPTE_IPV4_SUBNET
-            .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1)
+            .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES + 1)
             .unwrap();
         let external_dns_id = OmicronZoneUuid::new_v4();
         let nexus_ip = IpAddr::V4(Ipv4Addr::new(1, 2, 3, 6));
         let nexus_pip = NEXUS_OPTE_IPV4_SUBNET
-            .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1)
+            .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES + 1)
             .unwrap();
         let nexus_id = OmicronZoneUuid::new_v4();
         let ntp1_ip = IpAddr::V4(Ipv4Addr::new(1, 2, 3, 5));
         let ntp1_pip = NTP_OPTE_IPV4_SUBNET
-            .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1)
+            .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES + 1)
             .unwrap();
         let ntp1_id = OmicronZoneUuid::new_v4();
         let ntp2_ip = IpAddr::V4(Ipv4Addr::new(1, 2, 3, 5));
         let ntp2_pip = NTP_OPTE_IPV4_SUBNET
-            .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 2)
+            .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES + 2)
             .unwrap();
         let ntp2_id = OmicronZoneUuid::new_v4();
         let ntp3_id = OmicronZoneUuid::new_v4();
@@ -1381,10 +1382,7 @@ mod test {
                                     name: "external-dns".parse().unwrap(),
                                     ip: external_dns_pip.into(),
                                     mac: macs.next().unwrap(),
-                                    subnet: IpNetwork::from(
-                                        **DNS_OPTE_IPV4_SUBNET,
-                                    )
-                                    .into(),
+                                    subnet: IpNet::from(*DNS_OPTE_IPV4_SUBNET),
                                     vni: Vni::SERVICES_VNI,
                                     primary: true,
                                     slot: 0,
@@ -1410,10 +1408,7 @@ mod test {
                                     name: "ntp1".parse().unwrap(),
                                     ip: ntp1_pip.into(),
                                     mac: macs.next().unwrap(),
-                                    subnet: IpNetwork::from(
-                                        **NTP_OPTE_IPV4_SUBNET,
-                                    )
-                                    .into(),
+                                    subnet: IpNet::from(*NTP_OPTE_IPV4_SUBNET),
                                     vni: Vni::SERVICES_VNI,
                                     primary: true,
                                     slot: 0,
@@ -1457,10 +1452,9 @@ mod test {
                                     name: "nexus".parse().unwrap(),
                                     ip: nexus_pip.into(),
                                     mac: macs.next().unwrap(),
-                                    subnet: IpNetwork::from(
-                                        **NEXUS_OPTE_IPV4_SUBNET,
-                                    )
-                                    .into(),
+                                    subnet: IpNet::from(
+                                        *NEXUS_OPTE_IPV4_SUBNET,
+                                    ),
                                     vni: Vni::SERVICES_VNI,
                                     primary: true,
                                     slot: 0,
@@ -1486,10 +1480,7 @@ mod test {
                                     name: "ntp2".parse().unwrap(),
                                     ip: ntp2_pip.into(),
                                     mac: macs.next().unwrap(),
-                                    subnet: IpNetwork::from(
-                                        **NTP_OPTE_IPV4_SUBNET,
-                                    )
-                                    .into(),
+                                    subnet: IpNet::from(*NTP_OPTE_IPV4_SUBNET),
                                     vni: Vni::SERVICES_VNI,
                                     primary: true,
                                     slot: 0,
@@ -1677,10 +1668,10 @@ mod test {
         let nexus_id1 = OmicronZoneUuid::new_v4();
         let nexus_id2 = OmicronZoneUuid::new_v4();
         let nexus_pip1 = NEXUS_OPTE_IPV4_SUBNET
-            .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1)
+            .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES + 1)
             .unwrap();
         let nexus_pip2 = NEXUS_OPTE_IPV4_SUBNET
-            .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 2)
+            .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES + 2)
             .unwrap();
         let mut macs = MacAddr::iter_system();
 
@@ -1711,10 +1702,9 @@ mod test {
                                     name: "nexus1".parse().unwrap(),
                                     ip: nexus_pip1.into(),
                                     mac: macs.next().unwrap(),
-                                    subnet: IpNetwork::from(
-                                        **NEXUS_OPTE_IPV4_SUBNET,
-                                    )
-                                    .into(),
+                                    subnet: IpNet::from(
+                                        *NEXUS_OPTE_IPV4_SUBNET,
+                                    ),
                                     vni: Vni::SERVICES_VNI,
                                     primary: true,
                                     slot: 0,
@@ -1743,10 +1733,9 @@ mod test {
                                     name: "nexus2".parse().unwrap(),
                                     ip: nexus_pip2.into(),
                                     mac: macs.next().unwrap(),
-                                    subnet: IpNetwork::from(
-                                        **NEXUS_OPTE_IPV4_SUBNET,
-                                    )
-                                    .into(),
+                                    subnet: oxnet::IpNet::from(
+                                        *NEXUS_OPTE_IPV4_SUBNET,
+                                    ),
                                     vni: Vni::SERVICES_VNI,
                                     primary: true,
                                     slot: 0,
@@ -1951,7 +1940,7 @@ mod test {
 
         let nexus_ip = IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4));
         let nexus_pip = NEXUS_OPTE_IPV4_SUBNET
-            .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1)
+            .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES + 1)
             .unwrap();
         let nexus_id = OmicronZoneUuid::new_v4();
         let mut macs = MacAddr::iter_system();
@@ -1981,10 +1970,7 @@ mod test {
                                 name: "nexus".parse().unwrap(),
                                 ip: nexus_pip.into(),
                                 mac: macs.next().unwrap(),
-                                subnet: IpNetwork::from(
-                                    **NEXUS_OPTE_IPV4_SUBNET,
-                                )
-                                .into(),
+                                subnet: IpNet::from(*NEXUS_OPTE_IPV4_SUBNET),
                                 vni: Vni::SERVICES_VNI,
                                 primary: true,
                                 slot: 0,
@@ -2052,11 +2038,11 @@ mod test {
         // Request two services which happen to be using the same IP address.
         let external_dns_id = OmicronZoneUuid::new_v4();
         let external_dns_pip = DNS_OPTE_IPV4_SUBNET
-            .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1)
+            .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES + 1)
             .unwrap();
         let nexus_id = OmicronZoneUuid::new_v4();
         let nexus_pip = NEXUS_OPTE_IPV4_SUBNET
-            .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1)
+            .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES + 1)
             .unwrap();
         let mut macs = MacAddr::iter_system();
 
@@ -2086,10 +2072,7 @@ mod test {
                                     name: "external-dns".parse().unwrap(),
                                     ip: external_dns_pip.into(),
                                     mac: macs.next().unwrap(),
-                                    subnet: IpNetwork::from(
-                                        **DNS_OPTE_IPV4_SUBNET,
-                                    )
-                                    .into(),
+                                    subnet: IpNet::from(*DNS_OPTE_IPV4_SUBNET),
                                     vni: Vni::SERVICES_VNI,
                                     primary: true,
                                     slot: 0,
@@ -2118,10 +2101,9 @@ mod test {
                                     name: "nexus".parse().unwrap(),
                                     ip: nexus_pip.into(),
                                     mac: macs.next().unwrap(),
-                                    subnet: IpNetwork::from(
-                                        **NEXUS_OPTE_IPV4_SUBNET,
-                                    )
-                                    .into(),
+                                    subnet: IpNet::from(
+                                        *NEXUS_OPTE_IPV4_SUBNET,
+                                    ),
                                     vni: Vni::SERVICES_VNI,
                                     primary: true,
                                     slot: 0,
diff --git a/nexus/db-queries/src/db/datastore/switch_port.rs b/nexus/db-queries/src/db/datastore/switch_port.rs
index 3f3717a9c4..edb16e95ac 100644
--- a/nexus/db-queries/src/db/datastore/switch_port.rs
+++ b/nexus/db-queries/src/db/datastore/switch_port.rs
@@ -529,7 +529,7 @@ impl DataStore {
                             let (block, rsvd_block) =
                                 crate::db::datastore::address_lot::try_reserve_block(
                                     address_lot_id,
-                                    address.address.ip().into(),
+                                    address.address.addr().into(),
                                     // TODO: Should we allow anycast addresses for switch_ports?
                                     // anycast
                                     false,
diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs
index 91843abf2e..98af47f0e2 100644
--- a/nexus/db-queries/src/db/datastore/vpc.rs
+++ b/nexus/db-queries/src/db/datastore/vpc.rs
@@ -1168,8 +1168,8 @@ impl DataStore {
         let mut result = BTreeMap::new();
         for subnet in subnets {
             let entry = result.entry(subnet.name).or_insert_with(Vec::new);
-            entry.push(IpNetwork::V4(subnet.ipv4_block.0 .0));
-            entry.push(IpNetwork::V6(subnet.ipv6_block.0 .0));
+            entry.push(IpNetwork::V4(subnet.ipv4_block.0.into()));
+            entry.push(IpNetwork::V6(subnet.ipv6_block.0.into()));
         }
         Ok(result)
     }
diff --git a/nexus/db-queries/src/db/queries/network_interface.rs b/nexus/db-queries/src/db/queries/network_interface.rs
index a9cea9826a..69c1827b6d 100644
--- a/nexus/db-queries/src/db/queries/network_interface.rs
+++ b/nexus/db-queries/src/db/queries/network_interface.rs
@@ -1061,7 +1061,7 @@ impl InsertQuery {
         let next_mac_subquery =
             NextMacAddress::new(interface.subnet.vpc_id, interface.kind);
         let next_ipv4_address_subquery = NextIpv4Address::new(
-            interface.subnet.ipv4_block.0 .0,
+            interface.subnet.ipv4_block.0.into(),
             interface.subnet.identity.id,
         );
         let next_slot_subquery = NextNicSlot::new(interface.parent_id);
@@ -1859,8 +1859,6 @@ mod tests {
     use crate::db::queries::network_interface::NextMacShifts;
     use async_bb8_diesel::AsyncRunQueryDsl;
     use dropshot::test_util::LogContext;
-    use ipnetwork::Ipv4Network;
-    use ipnetwork::Ipv6Network;
     use model::NetworkInterfaceKind;
     use nexus_test_utils::db::test_setup_database;
     use nexus_types::external_api::params;
@@ -1871,11 +1869,11 @@ mod tests {
     use omicron_common::api::external::Error;
     use omicron_common::api::external::IdentityMetadataCreateParams;
     use omicron_common::api::external::InstanceCpuCount;
-    use omicron_common::api::external::Ipv4Net;
-    use omicron_common::api::external::Ipv6Net;
     use omicron_common::api::external::MacAddr;
     use omicron_test_utils::dev;
     use omicron_test_utils::dev::db::CockroachInstance;
+    use oxnet::Ipv4Net;
+    use oxnet::Ipv6Net;
     use std::collections::HashSet;
     use std::convert::TryInto;
     use std::net::IpAddr;
@@ -1995,25 +1993,13 @@ mod tests {
             let vpc_id = Uuid::new_v4();
             let mut subnets = Vec::with_capacity(n_subnets as _);
             for i in 0..n_subnets {
-                let ipv4net = Ipv4Net(
-                    Ipv4Network::new(Ipv4Addr::new(172, 30, 0, i), 28).unwrap(),
-                );
-                let ipv6net = Ipv6Net(
-                    Ipv6Network::new(
-                        Ipv6Addr::new(
-                            0xfd12,
-                            0x3456,
-                            0x7890,
-                            i.into(),
-                            0,
-                            0,
-                            0,
-                            0,
-                        ),
-                        64,
-                    )
-                    .unwrap(),
-                );
+                let ipv4net =
+                    Ipv4Net::new(Ipv4Addr::new(172, 30, 0, i), 28).unwrap();
+                let ipv6net = Ipv6Net::new(
+                    Ipv6Addr::new(0xfd12, 0x3456, 0x7890, i.into(), 0, 0, 0, 0),
+                    64,
+                )
+                .unwrap();
                 let subnet = VpcSubnet::new(
                     Uuid::new_v4(),
                     vpc_id,
@@ -2033,9 +2019,11 @@ mod tests {
             self.subnets
                 .iter()
                 .map(|subnet| {
-                    subnet.ipv4_block.size() as usize
-                        - NUM_INITIAL_RESERVED_IP_ADDRESSES
-                        - 1
+                    let size_minus_1 = match subnet.ipv4_block.size() {
+                        Some(n) => n - 1,
+                        None => u32::MAX,
+                    } as usize;
+                    size_minus_1 - NUM_INITIAL_RESERVED_IP_ADDRESSES
                 })
                 .collect()
         }
@@ -2148,7 +2136,7 @@ mod tests {
         let service_id = Uuid::new_v4();
         let ip = context.net1.subnets[0]
             .ipv4_block
-            .iter()
+            .addr_iter()
             .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES)
             .unwrap();
         let interface = IncompleteNetworkInterface::new_service(
@@ -2316,7 +2304,7 @@ mod tests {
             TestContext::new("test_insert_sequential_ip_allocation", 2).await;
         let addresses = context.net1.subnets[0]
             .ipv4_block
-            .iter()
+            .addr_iter()
             .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES);
 
         for (i, expected_address) in addresses.take(2).enumerate() {
@@ -2412,7 +2400,7 @@ mod tests {
         let service_id = Uuid::new_v4();
         let ip = context.net1.subnets[0]
             .ipv4_block
-            .iter()
+            .addr_iter()
             .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES)
             .unwrap();
         let mac = MacAddr::random_system();
@@ -2447,7 +2435,7 @@ mod tests {
         let mut used_macs = HashSet::new();
         let mut ips = context.net1.subnets[0]
             .ipv4_block
-            .iter()
+            .addr_iter()
             .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES);
         for slot in 0..u8::try_from(MAX_NICS_PER_INSTANCE).unwrap() {
             let service_id = Uuid::new_v4();
@@ -2487,7 +2475,7 @@ mod tests {
 
         let mut ips = context.net1.subnets[0]
             .ipv4_block
-            .iter()
+            .addr_iter()
             .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES);
 
         // Insert a service NIC
@@ -2547,12 +2535,12 @@ mod tests {
 
         let ip0 = context.net1.subnets[0]
             .ipv4_block
-            .iter()
+            .addr_iter()
             .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES)
             .unwrap();
         let ip1 = context.net1.subnets[1]
             .ipv4_block
-            .iter()
+            .addr_iter()
             .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES)
             .unwrap();
 
diff --git a/nexus/db-queries/src/db/queries/vpc_subnet.rs b/nexus/db-queries/src/db/queries/vpc_subnet.rs
index 9ddec32080..72f2771a1e 100644
--- a/nexus/db-queries/src/db/queries/vpc_subnet.rs
+++ b/nexus/db-queries/src/db/queries/vpc_subnet.rs
@@ -43,7 +43,9 @@ impl SubnetError {
                 DatabaseErrorKind::NotNullViolation,
                 ref info,
             ) if info.message() == IPV4_OVERLAP_ERROR_MESSAGE => {
-                SubnetError::OverlappingIpRange(subnet.ipv4_block.0 .0.into())
+                SubnetError::OverlappingIpRange(ipnetwork::IpNetwork::V4(
+                    subnet.ipv4_block.0.into(),
+                ))
             }
 
             // Attempt to insert overlapping IPv6 subnet
@@ -51,7 +53,9 @@ impl SubnetError {
                 DatabaseErrorKind::NotNullViolation,
                 ref info,
             ) if info.message() == IPV6_OVERLAP_ERROR_MESSAGE => {
-                SubnetError::OverlappingIpRange(subnet.ipv6_block.0 .0.into())
+                SubnetError::OverlappingIpRange(ipnetwork::IpNetwork::V6(
+                    subnet.ipv6_block.0.into(),
+                ))
             }
 
             // Conflicting name for the subnet within a VPC
@@ -233,8 +237,10 @@ pub struct FilterConflictingVpcSubnetRangesQuery {
 
 impl FilterConflictingVpcSubnetRangesQuery {
     pub fn new(subnet: VpcSubnet) -> Self {
-        let ipv4_block = ipnetwork::IpNetwork::from(subnet.ipv4_block.0 .0);
-        let ipv6_block = ipnetwork::IpNetwork::from(subnet.ipv6_block.0 .0);
+        let ipv4_block =
+            ipnetwork::Ipv4Network::from(subnet.ipv4_block.0).into();
+        let ipv6_block =
+            ipnetwork::Ipv6Network::from(subnet.ipv6_block.0).into();
         Self { subnet, ipv4_block, ipv6_block }
     }
 }
@@ -394,8 +400,6 @@ mod test {
     use ipnetwork::IpNetwork;
     use nexus_test_utils::db::test_setup_database;
     use omicron_common::api::external::IdentityMetadataCreateParams;
-    use omicron_common::api::external::Ipv4Net;
-    use omicron_common::api::external::Ipv6Net;
     use omicron_common::api::external::Name;
     use omicron_test_utils::dev;
     use std::convert::TryInto;
@@ -409,10 +413,10 @@ mod test {
                 name: name.clone(),
                 description: description.to_string(),
             };
-        let ipv4_block = Ipv4Net("172.30.0.0/22".parse().unwrap());
-        let other_ipv4_block = Ipv4Net("172.31.0.0/22".parse().unwrap());
-        let ipv6_block = Ipv6Net("fd12:3456:7890::/64".parse().unwrap());
-        let other_ipv6_block = Ipv6Net("fd00::/64".parse().unwrap());
+        let ipv4_block = "172.30.0.0/22".parse().unwrap();
+        let other_ipv4_block = "172.31.0.0/22".parse().unwrap();
+        let ipv6_block = "fd12:3456:7890::/64".parse().unwrap();
+        let other_ipv6_block = "fd00::/64".parse().unwrap();
         let name = "a-name".to_string().try_into().unwrap();
         let other_name = "b-name".to_string().try_into().unwrap();
         let description = "some description".to_string();
@@ -491,7 +495,7 @@ mod test {
             .expect_err("Should not be able to insert VPC Subnet with overlapping IPv6 range");
         assert_eq!(
             err,
-            SubnetError::OverlappingIpRange(IpNetwork::from(ipv6_block.0)),
+            SubnetError::OverlappingIpRange(ipnetwork::IpNetwork::from(oxnet::IpNet::from(ipv6_block))),
             "SubnetError variant should include the exact IP range that overlaps"
         );
         let new_row = VpcSubnet::new(
@@ -507,7 +511,7 @@ mod test {
             .expect_err("Should not be able to insert VPC Subnet with overlapping IPv4 range");
         assert_eq!(
             err,
-            SubnetError::OverlappingIpRange(IpNetwork::from(ipv4_block.0)),
+            SubnetError::OverlappingIpRange(ipnetwork::IpNetwork::from(oxnet::IpNet::from(ipv4_block))),
             "SubnetError variant should include the exact IP range that overlaps"
         );
 
diff --git a/nexus/defaults/Cargo.toml b/nexus/defaults/Cargo.toml
index d6f8e54220..1d941deb8e 100644
--- a/nexus/defaults/Cargo.toml
+++ b/nexus/defaults/Cargo.toml
@@ -10,6 +10,7 @@ workspace = true
 [dependencies]
 ipnetwork.workspace = true
 once_cell.workspace = true
+oxnet.workspace = true
 rand.workspace = true
 serde_json.workspace = true
 
diff --git a/nexus/defaults/src/lib.rs b/nexus/defaults/src/lib.rs
index dd08b4e4ab..32def47b9e 100644
--- a/nexus/defaults/src/lib.rs
+++ b/nexus/defaults/src/lib.rs
@@ -4,12 +4,10 @@
 
 //! Default values for data in the Nexus API, when not provided explicitly in a request.
 
-use ipnetwork::Ipv4Network;
-use ipnetwork::Ipv6Network;
 use omicron_common::api::external;
-use omicron_common::api::external::Ipv4Net;
-use omicron_common::api::external::Ipv6Net;
 use once_cell::sync::Lazy;
+use oxnet::Ipv4Net;
+use oxnet::Ipv6Net;
 use std::net::Ipv4Addr;
 use std::net::Ipv6Addr;
 
@@ -20,10 +18,8 @@ pub const DEFAULT_PRIMARY_NIC_NAME: &str = "net0";
 /// The default IPv4 subnet range assigned to the default VPC Subnet, when
 /// the VPC is created, if one is not provided in the request. See
 /// <https://rfd.shared.oxide.computer/rfd/0021> for details.
-pub static DEFAULT_VPC_SUBNET_IPV4_BLOCK: Lazy<external::Ipv4Net> =
-    Lazy::new(|| {
-        Ipv4Net(Ipv4Network::new(Ipv4Addr::new(172, 30, 0, 0), 22).unwrap())
-    });
+pub static DEFAULT_VPC_SUBNET_IPV4_BLOCK: Lazy<Ipv4Net> =
+    Lazy::new(|| Ipv4Net::new(Ipv4Addr::new(172, 30, 0, 0), 22).unwrap());
 
 pub static DEFAULT_FIREWALL_RULES: Lazy<external::VpcFirewallRuleUpdateParams> =
     Lazy::new(|| {
@@ -73,24 +69,24 @@ pub fn random_vpc_ipv6_prefix() -> Result<Ipv6Net, external::Error> {
             "Unable to allocate random IPv6 address range",
         )
     })?;
-    Ok(Ipv6Net(
-        Ipv6Network::new(
-            Ipv6Addr::from(bytes),
-            Ipv6Net::VPC_IPV6_PREFIX_LENGTH,
-        )
-        .unwrap(),
-    ))
+    Ok(Ipv6Net::new(
+        Ipv6Addr::from(bytes),
+        omicron_common::address::VPC_IPV6_PREFIX_LENGTH,
+    )
+    .unwrap())
 }
 
 #[cfg(test)]
 mod tests {
+    use omicron_common::api::external::Ipv6NetExt;
+
     use super::*;
 
     #[test]
     fn test_random_vpc_ipv6_prefix() {
         let network = random_vpc_ipv6_prefix().unwrap();
         assert!(network.is_vpc_prefix());
-        let octets = network.network().octets();
+        let octets = network.prefix().octets();
         assert!(octets[6..].iter().all(|x| *x == 0));
     }
 }
diff --git a/nexus/networking/Cargo.toml b/nexus/networking/Cargo.toml
index db163d5aa6..510fd6ca27 100644
--- a/nexus/networking/Cargo.toml
+++ b/nexus/networking/Cargo.toml
@@ -12,6 +12,7 @@ futures.workspace = true
 ipnetwork.workspace = true
 nexus-db-queries.workspace = true
 omicron-common.workspace = true
+oxnet.workspace = true
 reqwest.workspace = true
 sled-agent-client.workspace = true
 slog.workspace = true
diff --git a/nexus/networking/src/firewall_rules.rs b/nexus/networking/src/firewall_rules.rs
index 623c545702..a656c673ca 100644
--- a/nexus/networking/src/firewall_rules.rs
+++ b/nexus/networking/src/firewall_rules.rs
@@ -19,10 +19,10 @@ use nexus_db_queries::db::DataStore;
 use omicron_common::api::external;
 use omicron_common::api::external::AllowedSourceIps;
 use omicron_common::api::external::Error;
-use omicron_common::api::external::IpNet;
 use omicron_common::api::external::ListResultVec;
 use omicron_common::api::internal::nexus::HostIdentifier;
 use omicron_common::api::internal::shared::NetworkInterface;
+use oxnet::IpNet;
 use slog::debug;
 use slog::error;
 use slog::info;
@@ -353,7 +353,7 @@ pub async fn resolve_firewall_rules_for_sled_agent(
                                 .unwrap_or(&no_interfaces)
                             {
                                 host_addrs.push(
-                                    HostIdentifier::Ip(IpNet::single(
+                                    HostIdentifier::Ip(IpNet::host_net(
                                         interface.ip,
                                     ))
                                     .into(),
@@ -362,7 +362,7 @@ pub async fn resolve_firewall_rules_for_sled_agent(
                         }
                         external::VpcFirewallRuleHostFilter::Subnet(name) => {
                             for subnet in subnet_networks
-                                .get(&name)
+                                .get(name)
                                 .unwrap_or(&no_networks)
                             {
                                 host_addrs.push(
@@ -373,7 +373,8 @@ pub async fn resolve_firewall_rules_for_sled_agent(
                         }
                         external::VpcFirewallRuleHostFilter::Ip(addr) => {
                             host_addrs.push(
-                                HostIdentifier::Ip(IpNet::single(*addr)).into(),
+                                HostIdentifier::Ip(IpNet::host_net(*addr))
+                                    .into(),
                             )
                         }
                         external::VpcFirewallRuleHostFilter::IpNet(net) => {
@@ -381,7 +382,7 @@ pub async fn resolve_firewall_rules_for_sled_agent(
                         }
                         external::VpcFirewallRuleHostFilter::Vpc(name) => {
                             for interface in vpc_interfaces
-                                .get(&name)
+                                .get(name)
                                 .unwrap_or(&no_interfaces)
                             {
                                 host_addrs.push(
diff --git a/nexus/reconfigurator/execution/Cargo.toml b/nexus/reconfigurator/execution/Cargo.toml
index 137cde9255..34056b45a1 100644
--- a/nexus/reconfigurator/execution/Cargo.toml
+++ b/nexus/reconfigurator/execution/Cargo.toml
@@ -22,6 +22,7 @@ nexus-networking.workspace = true
 nexus-types.workspace = true
 omicron-common.workspace = true
 omicron-uuid-kinds.workspace = true
+oxnet.workspace = true
 reqwest.workspace = true
 sled-agent-client.workspace = true
 slog.workspace = true
diff --git a/nexus/reconfigurator/execution/src/external_networking.rs b/nexus/reconfigurator/execution/src/external_networking.rs
index cff912c137..13cf601135 100644
--- a/nexus/reconfigurator/execution/src/external_networking.rs
+++ b/nexus/reconfigurator/execution/src/external_networking.rs
@@ -443,10 +443,10 @@ mod tests {
     use omicron_common::address::NEXUS_OPTE_IPV4_SUBNET;
     use omicron_common::address::NTP_OPTE_IPV4_SUBNET;
     use omicron_common::address::NUM_SOURCE_NAT_PORTS;
-    use omicron_common::api::external::IpNet;
     use omicron_common::api::external::MacAddr;
     use omicron_common::api::external::Vni;
     use omicron_uuid_kinds::ExternalIpUuid;
+    use oxnet::IpNet;
     use std::net::IpAddr;
     use std::net::Ipv6Addr;
     use std::net::SocketAddr;
@@ -491,7 +491,6 @@ mod tests {
                 },
                 name: "test-nexus".parse().expect("bad name"),
                 ip: NEXUS_OPTE_IPV4_SUBNET
-                    .iter()
                     .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES)
                     .unwrap()
                     .into(),
@@ -517,7 +516,6 @@ mod tests {
                 },
                 name: "test-external-dns".parse().expect("bad name"),
                 ip: DNS_OPTE_IPV4_SUBNET
-                    .iter()
                     .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES)
                     .unwrap()
                     .into(),
@@ -546,7 +544,6 @@ mod tests {
                 },
                 name: "test-external-ntp".parse().expect("bad name"),
                 ip: NTP_OPTE_IPV4_SUBNET
-                    .iter()
                     .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES)
                     .unwrap()
                     .into(),
diff --git a/nexus/reconfigurator/planning/Cargo.toml b/nexus/reconfigurator/planning/Cargo.toml
index ba935bdba0..7bbc9aa36b 100644
--- a/nexus/reconfigurator/planning/Cargo.toml
+++ b/nexus/reconfigurator/planning/Cargo.toml
@@ -20,6 +20,7 @@ nexus-inventory.workspace = true
 nexus-types.workspace = true
 omicron-common.workspace = true
 omicron-uuid-kinds.workspace = true
+oxnet.workspace = true
 rand.workspace = true
 sled-agent-client.workspace = true
 slog.workspace = true
diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
index 45aea75473..1efefb9817 100644
--- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
+++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
@@ -526,7 +526,7 @@ impl<'a> BlueprintBuilder<'a> {
         // these are at known, fixed addresses relative to the AZ subnet
         // (which itself is a known-prefix parent subnet of the sled subnet).
         let dns_servers =
-            get_internal_dns_server_addresses(sled_subnet.net().network());
+            get_internal_dns_server_addresses(sled_subnet.net().prefix());
 
         // The list of boundary NTP servers is not necessarily stored
         // anywhere (unless there happens to be another internal NTP zone
@@ -758,7 +758,7 @@ impl<'a> BlueprintBuilder<'a> {
         let sled_subnet = self.sled_resources(sled_id)?.subnet;
         let allocator =
             self.sled_ip_allocators.entry(sled_id).or_insert_with(|| {
-                let sled_subnet_addr = sled_subnet.net().network();
+                let sled_subnet_addr = sled_subnet.net().prefix();
                 let minimum = sled_subnet_addr
                     .saturating_add(u128::from(SLED_RESERVED_ADDRESSES));
                 let maximum = sled_subnet_addr
diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/external_networking.rs b/nexus/reconfigurator/planning/src/blueprint_builder/external_networking.rs
index b9100f518d..950ce89c43 100644
--- a/nexus/reconfigurator/planning/src/blueprint_builder/external_networking.rs
+++ b/nexus/reconfigurator/planning/src/blueprint_builder/external_networking.rs
@@ -16,8 +16,8 @@ use omicron_common::address::NEXUS_OPTE_IPV4_SUBNET;
 use omicron_common::address::NEXUS_OPTE_IPV6_SUBNET;
 use omicron_common::address::NTP_OPTE_IPV4_SUBNET;
 use omicron_common::address::NTP_OPTE_IPV6_SUBNET;
-use omicron_common::api::external::IpNet;
 use omicron_common::api::external::MacAddr;
+use oxnet::IpNet;
 use std::collections::HashSet;
 use std::hash::Hash;
 use std::net::IpAddr;
@@ -191,14 +191,12 @@ impl<'a> BuilderExternalNetworking<'a> {
         // of used resources we built above if needed.
         let nexus_v4_ips = AvailableIterator::new(
             NEXUS_OPTE_IPV4_SUBNET
-                .0
-                .iter()
+                .addr_iter()
                 .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES),
             existing_nexus_v4_ips,
         );
         let nexus_v6_ips = AvailableIterator::new(
             NEXUS_OPTE_IPV6_SUBNET
-                .0
                 .iter()
                 .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES),
             existing_nexus_v6_ips,
diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt
index 648c082c0f..fa61fa2758 100644
--- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt
+++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt
@@ -196,12 +196,10 @@ ERRORS:
                 ),
             ),
             subnet: V4(
-                Ipv4Net(
-                    Ipv4Network {
-                        addr: 172.30.2.0,
-                        prefix: 24,
-                    },
-                ),
+                Ipv4Net {
+                    addr: 172.30.2.0,
+                    width: 24,
+                },
             ),
             vni: Vni(
                 100,
diff --git a/nexus/src/app/allow_list.rs b/nexus/src/app/allow_list.rs
index 6b32f0c6f3..d25400a512 100644
--- a/nexus/src/app/allow_list.rs
+++ b/nexus/src/app/allow_list.rs
@@ -64,14 +64,14 @@ impl super::Nexus {
             let mut contains_remote = false;
             for entry in list.iter() {
                 contains_remote |= entry.contains(remote_addr);
-                if entry.ip().is_unspecified() {
+                if entry.addr().is_unspecified() {
                     return Err(Error::invalid_request(
                         "Source IP allowlist may not contain the \
                         unspecified address. Use \"any\" to allow \
                         any source to connect to user-facing services.",
                     ));
                 }
-                if entry.prefix() == 0 {
+                if entry.width() == 0 {
                     return Err(Error::invalid_request(
                         "Source IP allowlist entries may not have \
                         a netmask of /0.",
diff --git a/nexus/src/app/background/sync_service_zone_nat.rs b/nexus/src/app/background/sync_service_zone_nat.rs
index d1bb9955d7..b0a4c8cef2 100644
--- a/nexus/src/app/background/sync_service_zone_nat.rs
+++ b/nexus/src/app/background/sync_service_zone_nat.rs
@@ -19,7 +19,6 @@ use nexus_db_queries::context::OpContext;
 use nexus_db_queries::db::lookup::LookupPath;
 use nexus_db_queries::db::DataStore;
 use omicron_common::address::{MAX_PORT, MIN_PORT};
-use omicron_common::api::external;
 use omicron_uuid_kinds::GenericUuid;
 use serde_json::json;
 use sled_agent_client::types::OmicronZoneType;
@@ -125,9 +124,7 @@ impl BackgroundTask for ServiceZoneNatTracker {
                     }
                 };
 
-                let sled_address = external::Ipv6Net(
-                    ipnetwork::Ipv6Network::new(*sled.ip, 128).unwrap(),
-                );
+                let sled_address = oxnet::Ipv6Net::host_net(*sled.ip);
 
                 let zones_config: sled_agent_client::types::OmicronZonesConfig =
                     zones_found.zones;
@@ -152,16 +149,14 @@ impl BackgroundTask for ServiceZoneNatTracker {
                             };
 
                             let external_address =
-                                ipnetwork::Ipv4Network::new(external_ip, 32)
+                                oxnet::Ipv4Net::new(external_ip, 32)
                                     .unwrap();
 
                             let (snat_first_port, snat_last_port) =
                                 snat_cfg.port_range_raw();
                             let nat_value = Ipv4NatValues {
                                 external_address: nexus_db_model::Ipv4Net(
-                                    omicron_common::api::external::Ipv4Net(
                                         external_address,
-                                    ),
                                 ),
                                 first_port: snat_first_port.into(),
                                 last_port: snat_last_port.into(),
@@ -187,14 +182,12 @@ impl BackgroundTask for ServiceZoneNatTracker {
                             };
 
                             let external_address =
-                                ipnetwork::Ipv4Network::new(external_ip, 32)
+                                oxnet::Ipv4Net::new(external_ip, 32)
                                     .unwrap();
 
                             let nat_value = Ipv4NatValues {
                                 external_address: nexus_db_model::Ipv4Net(
-                                    omicron_common::api::external::Ipv4Net(
                                         external_address,
-                                    ),
                                 ),
                                 first_port: MIN_PORT.into(),
                                 last_port: MAX_PORT.into(),
@@ -234,14 +227,12 @@ impl BackgroundTask for ServiceZoneNatTracker {
                             };
 
                             let external_address =
-                                ipnetwork::Ipv4Network::new(external_ip, 32)
+                                oxnet::Ipv4Net::new(external_ip, 32)
                                     .unwrap();
 
                             let nat_value = Ipv4NatValues {
                                 external_address: nexus_db_model::Ipv4Net(
-                                    omicron_common::api::external::Ipv4Net(
                                         external_address,
-                                    ),
                                 ),
                                 first_port: MIN_PORT.into(),
                                 last_port: MAX_PORT.into(),
diff --git a/nexus/src/app/bgp.rs b/nexus/src/app/bgp.rs
index d41eaf2e6a..b6e3f25263 100644
--- a/nexus/src/app/bgp.rs
+++ b/nexus/src/app/bgp.rs
@@ -10,8 +10,7 @@ use nexus_db_queries::context::OpContext;
 use omicron_common::api::external::http_pagination::PaginatedBy;
 use omicron_common::api::external::{
     self, BgpImportedRouteIpv4, BgpMessageHistory, BgpPeerStatus, CreateResult,
-    DeleteResult, Ipv4Net, ListResultVec, LookupResult, NameOrId,
-    SwitchBgpHistory,
+    DeleteResult, ListResultVec, LookupResult, NameOrId, SwitchBgpHistory,
 };
 use std::net::IpAddr;
 
@@ -202,8 +201,7 @@ impl super::Nexus {
             {
                 Ok(result) => {
                     for (prefix, paths) in result.into_inner().iter() {
-                        let ipnet: ipnetwork::Ipv4Network = match prefix.parse()
-                        {
+                        let ipnet = match prefix.parse() {
                             Ok(p) => p,
                             Err(e) => {
                                 error!(
@@ -220,7 +218,7 @@ impl super::Nexus {
                             };
                             let x = BgpImportedRouteIpv4 {
                                 switch: *switch,
-                                prefix: Ipv4Net(ipnet),
+                                prefix: ipnet,
                                 id: p
                                     .bgp
                                     .as_ref()
diff --git a/nexus/src/app/instance_network.rs b/nexus/src/app/instance_network.rs
index de4de492e0..3c607bae78 100644
--- a/nexus/src/app/instance_network.rs
+++ b/nexus/src/app/instance_network.rs
@@ -6,7 +6,6 @@
 
 use crate::app::switch_port;
 use ipnetwork::IpNetwork;
-use ipnetwork::Ipv6Network;
 use nexus_db_model::ExternalIp;
 use nexus_db_model::IpAttachState;
 use nexus_db_model::Ipv4NatEntry;
@@ -18,11 +17,11 @@ use nexus_db_queries::db;
 use nexus_db_queries::db::lookup::LookupPath;
 use nexus_db_queries::db::DataStore;
 use omicron_common::api::external::Error;
-use omicron_common::api::external::Ipv4Net;
-use omicron_common::api::external::Ipv6Net;
 use omicron_common::api::internal::nexus;
 use omicron_common::api::internal::shared::NetworkInterface;
 use omicron_common::api::internal::shared::SwitchLocation;
+use oxnet::Ipv4Net;
+use oxnet::Ipv6Net;
 use std::collections::HashSet;
 use std::str::FromStr;
 use uuid::Uuid;
@@ -511,8 +510,7 @@ pub(crate) async fn instance_ensure_dpd_config(
         ));
     }
 
-    let sled_address =
-        Ipv6Net(Ipv6Network::new(*sled_ip_address.ip(), 128).unwrap());
+    let sled_address = Ipv6Net::host_net(*sled_ip_address.ip());
 
     // If all of our IPs are attached or are guaranteed to be owned
     // by the saga calling this fn, then we need to disregard and
@@ -653,7 +651,7 @@ pub(crate) async fn probe_ensure_dpd_config(
         }
     }
 
-    let sled_address = Ipv6Net(Ipv6Network::new(sled_ip_address, 128).unwrap());
+    let sled_address = Ipv6Net::host_net(sled_ip_address);
 
     for target_ip in ips
         .iter()
@@ -1011,7 +1009,7 @@ async fn ensure_nat_entry(
     match target_ip.ip {
         IpNetwork::V4(v4net) => {
             let nat_entry = Ipv4NatValues {
-                external_address: Ipv4Net(v4net).into(),
+                external_address: Ipv4Net::from(v4net).into(),
                 first_port: target_ip.first_port,
                 last_port: target_ip.last_port,
                 sled_address: sled_address.into(),
diff --git a/nexus/src/app/sagas/vpc_create.rs b/nexus/src/app/sagas/vpc_create.rs
index fdd117b850..cc40a8d43a 100644
--- a/nexus/src/app/sagas/vpc_create.rs
+++ b/nexus/src/app/sagas/vpc_create.rs
@@ -291,15 +291,13 @@ async fn svc_create_subnet(
 
     // Allocate the first /64 sub-range from the requested or created
     // prefix.
-    let ipv6_block = external::Ipv6Net(
-        ipnetwork::Ipv6Network::new(db_vpc.ipv6_prefix.network(), 64)
-            .map_err(|_| {
-                external::Error::internal_error(
-                    "Failed to allocate default IPv6 subnet",
-                )
-            })
-            .map_err(ActionError::action_failed)?,
-    );
+    let ipv6_block = oxnet::Ipv6Net::new(db_vpc.ipv6_prefix.prefix(), 64)
+        .map_err(|_| {
+            external::Error::internal_error(
+                "Failed to allocate default IPv6 subnet",
+            )
+        })
+        .map_err(ActionError::action_failed)?;
 
     let subnet = db::model::VpcSubnet::new(
         default_subnet_id,
diff --git a/nexus/src/app/switch_interface.rs b/nexus/src/app/switch_interface.rs
index c3ce0f553c..bb4cba4c7b 100644
--- a/nexus/src/app/switch_interface.rs
+++ b/nexus/src/app/switch_interface.rs
@@ -11,8 +11,9 @@ use nexus_db_queries::db::lookup;
 use nexus_db_queries::db::lookup::LookupPath;
 use omicron_common::api::external::LookupResult;
 use omicron_common::api::external::{
-    CreateResult, DataPageParams, DeleteResult, Error, IpNet, ListResultVec,
+    CreateResult, DataPageParams, DeleteResult, Error, ListResultVec,
 };
+use oxnet::IpNet;
 use std::sync::Arc;
 use uuid::Uuid;
 
diff --git a/nexus/src/app/vpc_subnet.rs b/nexus/src/app/vpc_subnet.rs
index 4c5a569201..f081f351db 100644
--- a/nexus/src/app/vpc_subnet.rs
+++ b/nexus/src/app/vpc_subnet.rs
@@ -19,6 +19,7 @@ use omicron_common::api::external::http_pagination::PaginatedBy;
 use omicron_common::api::external::CreateResult;
 use omicron_common::api::external::DeleteResult;
 use omicron_common::api::external::Error;
+use omicron_common::api::external::Ipv6NetExt;
 use omicron_common::api::external::ListResultVec;
 use omicron_common::api::external::LookupResult;
 use omicron_common::api::external::NameOrId;
@@ -74,13 +75,13 @@ impl super::Nexus {
         let (.., authz_vpc, db_vpc) = vpc_lookup.fetch().await?;
 
         // Validate IPv4 range
-        if !params.ipv4_block.network().is_private() {
+        if !params.ipv4_block.prefix().is_private() {
             return Err(external::Error::invalid_request(
                 "VPC Subnet IPv4 address ranges must be from a private range",
             ));
         }
-        if params.ipv4_block.prefix() < MIN_VPC_IPV4_SUBNET_PREFIX
-            || params.ipv4_block.prefix()
+        if params.ipv4_block.width() < MIN_VPC_IPV4_SUBNET_PREFIX
+            || params.ipv4_block.width()
                 > self.tunables.max_vpc_ipv4_subnet_prefix
         {
             return Err(external::Error::invalid_request(&format!(
@@ -116,7 +117,7 @@ impl super::Nexus {
                     let ipv6_block = db_vpc
                         .ipv6_prefix
                         .random_subnet(
-                            external::Ipv6Net::VPC_SUBNET_IPV6_PREFIX_LENGTH,
+                            oxnet::Ipv6Net::VPC_SUBNET_IPV6_PREFIX_LENGTH,
                         )
                         .map(|block| block.0)
                         .ok_or_else(|| {
@@ -148,7 +149,7 @@ impl super::Nexus {
                                 self.log,
                                 "autogenerated random IPv6 range overlap";
                                 "subnet_id" => ?subnet_id,
-                                "ipv6_block" => %ipv6_block.0
+                                "ipv6_block" => %ipv6_block
                             );
                             retry += 1;
                             continue;
@@ -193,10 +194,10 @@ impl super::Nexus {
                 if !ipv6_block.is_vpc_subnet(&db_vpc.ipv6_prefix) {
                     return Err(external::Error::invalid_request(&format!(
                         concat!(
-                        "VPC Subnet IPv6 address range '{}' is not valid for ",
-                        "VPC with IPv6 prefix '{}'",
-                    ),
-                        ipv6_block, db_vpc.ipv6_prefix.0 .0,
+                            "VPC Subnet IPv6 address range '{}' is not valid for ",
+                            "VPC with IPv6 prefix '{}'",
+                        ),
+                        ipv6_block, db_vpc.ipv6_prefix.0,
                     )));
                 }
                 let subnet = db::model::VpcSubnet::new(
diff --git a/nexus/src/context.rs b/nexus/src/context.rs
index 72ecd6b8ac..1512671056 100644
--- a/nexus/src/context.rs
+++ b/nexus/src/context.rs
@@ -212,7 +212,8 @@ impl ServerContext {
         // Set up DNS Client
         let resolver = match config.deployment.internal_dns {
             nexus_config::InternalDns::FromSubnet { subnet } => {
-                let az_subnet = Ipv6Subnet::<AZ_PREFIX>::new(subnet.net().ip());
+                let az_subnet =
+                    Ipv6Subnet::<AZ_PREFIX>::new(subnet.net().addr());
                 info!(
                     log,
                     "Setting up resolver using DNS servers for subnet: {:?}",
diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs
index a078ce2a61..d4af109849 100644
--- a/nexus/test-utils/src/lib.rs
+++ b/nexus/test-utils/src/lib.rs
@@ -675,7 +675,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> {
                 nic: NetworkInterface {
                     id: Uuid::new_v4(),
                     ip: NEXUS_OPTE_IPV4_SUBNET
-                        .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1)
+                        .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES + 1)
                         .unwrap()
                         .into(),
                     kind: NetworkInterfaceKind::Service {
@@ -1029,7 +1029,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> {
                     nic: NetworkInterface {
                         id: Uuid::new_v4(),
                         ip: DNS_OPTE_IPV4_SUBNET
-                            .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1)
+                            .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES + 1)
                             .unwrap()
                             .into(),
                         kind: NetworkInterfaceKind::Service {
diff --git a/nexus/tests/integration_tests/allow_list.rs b/nexus/tests/integration_tests/allow_list.rs
index dc206843f7..336a33273d 100644
--- a/nexus/tests/integration_tests/allow_list.rs
+++ b/nexus/tests/integration_tests/allow_list.rs
@@ -9,7 +9,7 @@ use nexus_test_utils::http_testing::{AuthnMode, NexusRequest};
 use nexus_test_utils_macros::nexus_test;
 use nexus_types::external_api::{params, views};
 use omicron_common::api::external::AllowedSourceIps;
-use omicron_common::api::external::IpNet;
+use oxnet::IpNet;
 use std::net::IpAddr;
 use std::net::Ipv4Addr;
 
@@ -75,8 +75,9 @@ async fn test_allow_list(cptestctx: &ControlPlaneTestContext) {
     }
 
     // Set the list with exactly one IP, make sure it's the same.
-    let allowed_ips = AllowedSourceIps::try_from(vec![IpNet::single(our_addr)])
-        .expect("Expected a valid IP list");
+    let allowed_ips =
+        AllowedSourceIps::try_from(vec![IpNet::host_net(our_addr)])
+            .expect("Expected a valid IP list");
     update_list_and_compare(client, allowed_ips).await;
 
     // Add our IP in the front and end, and still make sure that works.
@@ -84,8 +85,8 @@ async fn test_allow_list(cptestctx: &ControlPlaneTestContext) {
     // This is a regression for
     // https://github.com/oxidecomputer/omicron/issues/5727.
     let addrs = vec![
-        IpNet::single(our_addr),
-        IpNet::single(IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1))),
+        IpNet::host_net(our_addr),
+        IpNet::host_net(IpAddr::from(Ipv4Addr::new(10, 0, 0, 1))),
     ];
     let allowed_ips = AllowedSourceIps::try_from(addrs.clone())
         .expect("Expected a valid IP list");
@@ -101,7 +102,7 @@ async fn test_allow_list(cptestctx: &ControlPlaneTestContext) {
 
     // Check that we cannot make the request with a list that doesn't include
     // us.
-    let addrs = vec![IpNet::single(IpAddr::V4(Ipv4Addr::new(1, 1, 1, 1)))];
+    let addrs = vec![IpNet::host_net(IpAddr::from(Ipv4Addr::new(1, 1, 1, 1)))];
     let allowed_ips = AllowedSourceIps::try_from(addrs.clone())
         .expect("Expected a valid IP list");
     let new_list = params::AllowListUpdate { allowed_ips: allowed_ips.clone() };
diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs
index cc73ab088c..7672bbc034 100644
--- a/nexus/tests/integration_tests/endpoints.rs
+++ b/nexus/tests/integration_tests/endpoints.rs
@@ -30,7 +30,6 @@ use omicron_common::api::external::ByteCount;
 use omicron_common::api::external::IdentityMetadataCreateParams;
 use omicron_common::api::external::IdentityMetadataUpdateParams;
 use omicron_common::api::external::InstanceCpuCount;
-use omicron_common::api::external::Ipv4Net;
 use omicron_common::api::external::Name;
 use omicron_common::api::external::NameOrId;
 use omicron_common::api::external::RouteDestination;
@@ -201,7 +200,7 @@ pub static DEMO_VPC_SUBNET_CREATE: Lazy<params::VpcSubnetCreate> =
             name: DEMO_VPC_SUBNET_NAME.clone(),
             description: String::from(""),
         },
-        ipv4_block: Ipv4Net("10.1.2.3/8".parse().unwrap()),
+        ipv4_block: "10.1.2.3/8".parse().unwrap(),
         ipv6_block: None,
     });
 
diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs
index 51e2552e85..565e2fbafb 100644
--- a/nexus/tests/integration_tests/instances.rs
+++ b/nexus/tests/integration_tests/instances.rs
@@ -56,7 +56,6 @@ use omicron_common::api::external::Instance;
 use omicron_common::api::external::InstanceCpuCount;
 use omicron_common::api::external::InstanceNetworkInterface;
 use omicron_common::api::external::InstanceState;
-use omicron_common::api::external::Ipv4Net;
 use omicron_common::api::external::Name;
 use omicron_common::api::external::NameOrId;
 use omicron_common::api::external::Vni;
@@ -1684,7 +1683,7 @@ async fn test_instance_with_new_custom_network_interfaces(
             name: non_default_subnet_name.clone(),
             description: String::from("A non-default subnet"),
         },
-        ipv4_block: Ipv4Net("172.31.0.0/24".parse().unwrap()),
+        ipv4_block: "172.31.0.0/24".parse().unwrap(),
         ipv6_block: None,
     };
     let _response = NexusRequest::objects_post(
@@ -1830,7 +1829,7 @@ async fn test_instance_create_delete_network_interface(
             name: Name::try_from(String::from("secondary")).unwrap(),
             description: String::from("A secondary VPC subnet"),
         },
-        ipv4_block: Ipv4Net("172.31.0.0/24".parse().unwrap()),
+        ipv4_block: "172.31.0.0/24".parse().unwrap(),
         ipv6_block: None,
     };
     let _response = NexusRequest::objects_post(
@@ -2071,7 +2070,7 @@ async fn test_instance_update_network_interfaces(
             name: Name::try_from(String::from("secondary")).unwrap(),
             description: String::from("A secondary VPC subnet"),
         },
-        ipv4_block: Ipv4Net("172.31.0.0/24".parse().unwrap()),
+        ipv4_block: "172.31.0.0/24".parse().unwrap(),
         ipv6_block: None,
     };
     let _response = NexusRequest::objects_post(
diff --git a/nexus/tests/integration_tests/subnet_allocation.rs b/nexus/tests/integration_tests/subnet_allocation.rs
index 0efc659890..794c769da4 100644
--- a/nexus/tests/integration_tests/subnet_allocation.rs
+++ b/nexus/tests/integration_tests/subnet_allocation.rs
@@ -9,7 +9,6 @@ use dropshot::test_util::ClientTestContext;
 use dropshot::HttpErrorResponseBody;
 use http::method::Method;
 use http::StatusCode;
-use ipnetwork::Ipv4Network;
 use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES;
 use nexus_test_utils::http_testing::AuthnMode;
 use nexus_test_utils::http_testing::NexusRequest;
@@ -22,8 +21,9 @@ use nexus_test_utils_macros::nexus_test;
 use nexus_types::external_api::params;
 use omicron_common::api::external::{
     ByteCount, IdentityMetadataCreateParams, InstanceCpuCount,
-    InstanceNetworkInterface, Ipv4Net,
+    InstanceNetworkInterface,
 };
+use oxnet::Ipv4Net;
 use std::net::Ipv4Addr;
 
 type ControlPlaneTestContext =
@@ -101,7 +101,7 @@ async fn test_subnet_allocation(cptestctx: &ControlPlaneTestContext) {
     let subnets_url = format!("/v1/vpc-subnets?{}", vpc_selector);
     let subnet_name = "small";
     let network_address = Ipv4Addr::new(192, 168, 42, 0);
-    let subnet = Ipv4Network::new(network_address, subnet_size)
+    let subnet = Ipv4Net::new(network_address, subnet_size)
         .expect("Invalid IPv4 network");
     let subnet_create = params::VpcSubnetCreate {
         identity: IdentityMetadataCreateParams {
@@ -109,7 +109,7 @@ async fn test_subnet_allocation(cptestctx: &ControlPlaneTestContext) {
             description: String::from("a small subnet"),
         },
         // Use the minimum subnet size
-        ipv4_block: Ipv4Net(subnet),
+        ipv4_block: subnet,
         ipv6_block: None,
     };
     NexusRequest::objects_post(client, &subnets_url, &Some(&subnet_create))
@@ -132,12 +132,13 @@ async fn test_subnet_allocation(cptestctx: &ControlPlaneTestContext) {
         },
     ]);
 
-    // Create enough instances to fill the subnet. There are subnet.size() total
-    // addresses, 6 of which are reserved.
-    let n_final_reserved_addresses = 1;
-    let n_reserved_addresses =
-        NUM_INITIAL_RESERVED_IP_ADDRESSES + n_final_reserved_addresses;
-    let subnet_size = subnet.size() as usize - n_reserved_addresses;
+    // Create enough instances to fill the subnet. There are subnet.size()
+    // total addresses, 6 of which are reserved.
+    let subnet_size_minus_1 = match subnet.size() {
+        Some(n) => n - 1,
+        None => u32::MAX,
+    } as usize;
+    let subnet_size = subnet_size_minus_1 - NUM_INITIAL_RESERVED_IP_ADDRESSES;
     for i in 0..subnet_size {
         create_instance_with(
             client,
@@ -178,7 +179,7 @@ async fn test_subnet_allocation(cptestctx: &ControlPlaneTestContext) {
     network_interfaces.sort_by(|a, b| a.ip.cmp(&b.ip));
     for (iface, addr) in network_interfaces
         .iter()
-        .zip(subnet.iter().skip(NUM_INITIAL_RESERVED_IP_ADDRESSES))
+        .zip(subnet.addr_iter().skip(NUM_INITIAL_RESERVED_IP_ADDRESSES))
     {
         assert_eq!(
             iface.ip,
diff --git a/nexus/tests/integration_tests/vpc_subnets.rs b/nexus/tests/integration_tests/vpc_subnets.rs
index 0814512cf2..dcc96d08bf 100644
--- a/nexus/tests/integration_tests/vpc_subnets.rs
+++ b/nexus/tests/integration_tests/vpc_subnets.rs
@@ -20,8 +20,8 @@ use nexus_test_utils_macros::nexus_test;
 use nexus_types::external_api::{params, views::VpcSubnet};
 use omicron_common::api::external::IdentityMetadataCreateParams;
 use omicron_common::api::external::IdentityMetadataUpdateParams;
-use omicron_common::api::external::Ipv4Net;
-use omicron_common::api::external::Ipv6Net;
+use omicron_common::api::external::Ipv6NetExt;
+use oxnet::Ipv6Net;
 
 type ControlPlaneTestContext =
     nexus_test_utils::ControlPlaneTestContext<omicron_nexus::Server>;
@@ -160,16 +160,15 @@ async fn test_vpc_subnets(cptestctx: &ControlPlaneTestContext) {
     assert_eq!(error.message, "not found: vpc-subnet with name \"subnet1\"");
 
     // Create a VPC Subnet.
-    let ipv4_block = Ipv4Net("10.0.0.0/24".parse().unwrap());
-    let other_ipv4_block = Ipv4Net("172.31.0.0/16".parse().unwrap());
-    // Create the first two available IPv6 address ranges. */
-    let prefix = vpc.ipv6_prefix.network();
-    let ipv6_block = Ipv6Net(ipnetwork::Ipv6Network::new(prefix, 64).unwrap());
+    let ipv4_block = "10.0.0.0/24".parse().unwrap();
+    let other_ipv4_block = "172.31.0.0/16".parse().unwrap();
+    // Create the first two available IPv6 address ranges.
+    let prefix = vpc.ipv6_prefix.prefix();
+    let ipv6_block = Ipv6Net::new(prefix, 64).unwrap();
     let mut segments = prefix.segments();
     segments[3] = 1;
     let addr = std::net::Ipv6Addr::from(segments);
-    let other_ipv6_block =
-        Some(Ipv6Net(ipnetwork::Ipv6Network::new(addr, 64).unwrap()));
+    let other_ipv6_block = Some(Ipv6Net::new(addr, 64).unwrap());
     let new_subnet = params::VpcSubnetCreate {
         identity: IdentityMetadataCreateParams {
             name: subnet_name.parse().unwrap(),
@@ -291,7 +290,7 @@ async fn test_vpc_subnets(cptestctx: &ControlPlaneTestContext) {
     assert_eq!(error.message, "not found: vpc-subnet with name \"subnet2\"");
 
     // create second subnet, this time with an autogenerated IPv6 range.
-    let ipv4_block = Ipv4Net("192.168.0.0/16".parse().unwrap());
+    let ipv4_block = "192.168.0.0/16".parse().unwrap();
     let new_subnet = params::VpcSubnetCreate {
         identity: IdentityMetadataCreateParams {
             name: subnet2_name.parse().unwrap(),
@@ -435,10 +434,7 @@ async fn test_vpc_subnets(cptestctx: &ControlPlaneTestContext) {
         "it's also below the net"
     );
     assert_eq!(subnet_same_name.vpc_id, vpc2.identity.id);
-    assert_eq!(
-        subnet_same_name.ipv4_block,
-        Ipv4Net("192.168.0.0/16".parse().unwrap())
-    );
+    assert_eq!(subnet_same_name.ipv4_block, "192.168.0.0/16".parse().unwrap());
     assert!(subnet_same_name.ipv6_block.is_unique_local());
 }
 
diff --git a/nexus/tests/integration_tests/vpcs.rs b/nexus/tests/integration_tests/vpcs.rs
index cc9aea4d11..1ceebd8cff 100644
--- a/nexus/tests/integration_tests/vpcs.rs
+++ b/nexus/tests/integration_tests/vpcs.rs
@@ -18,7 +18,6 @@ use nexus_test_utils_macros::nexus_test;
 use nexus_types::external_api::{params, views::Vpc};
 use omicron_common::api::external::IdentityMetadataCreateParams;
 use omicron_common::api::external::IdentityMetadataUpdateParams;
-use omicron_common::api::external::Ipv6Net;
 
 type ControlPlaneTestContext =
     nexus_test_utils::ControlPlaneTestContext<omicron_nexus::Server>;
@@ -76,7 +75,7 @@ async fn test_vpcs(cptestctx: &ControlPlaneTestContext) {
 
     // Make sure creating a VPC fails if we specify an IPv6 prefix that is
     // not a valid ULA range.
-    let bad_prefix = Ipv6Net("2000:1000::/48".parse().unwrap());
+    let bad_prefix = "2000:1000::/48".parse().unwrap();
     NexusRequest::new(
         RequestBuilder::new(client, Method::POST, &vpcs_url)
             .expect_status(Some(StatusCode::BAD_REQUEST))
@@ -101,7 +100,7 @@ async fn test_vpcs(cptestctx: &ControlPlaneTestContext) {
     assert_eq!(vpc.identity.description, "vpc description");
     assert_eq!(vpc.dns_name, "abc");
     assert_eq!(
-        vpc.ipv6_prefix.prefix(),
+        vpc.ipv6_prefix.width(),
         48,
         "Expected a 48-bit ULA IPv6 address prefix"
     );
diff --git a/nexus/types/Cargo.toml b/nexus/types/Cargo.toml
index 802727b1ab..df976e2444 100644
--- a/nexus/types/Cargo.toml
+++ b/nexus/types/Cargo.toml
@@ -19,6 +19,7 @@ humantime.workspace = true
 ipnetwork.workspace = true
 omicron-uuid-kinds.workspace = true
 openssl.workspace = true
+oxnet.workspace = true
 parse-display.workspace = true
 schemars = { workspace = true, features = ["chrono", "uuid1"] }
 serde.workspace = true
diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs
index 1b252c77cb..3f53503cc2 100644
--- a/nexus/types/src/external_api/params.rs
+++ b/nexus/types/src/external_api/params.rs
@@ -11,9 +11,10 @@ use chrono::{DateTime, Utc};
 use omicron_common::api::external::{
     AddressLotKind, AllowedSourceIps, BfdMode, BgpPeer, ByteCount, Hostname,
     IdentityMetadataCreateParams, IdentityMetadataUpdateParams,
-    InstanceCpuCount, IpNet, Ipv4Net, Ipv6Net, LinkFec, LinkSpeed, Name,
-    NameOrId, PaginationOrder, RouteDestination, RouteTarget, SemverVersion,
+    InstanceCpuCount, LinkFec, LinkSpeed, Name, NameOrId, PaginationOrder,
+    RouteDestination, RouteTarget, SemverVersion,
 };
+use oxnet::{IpNet, Ipv4Net, Ipv6Net};
 use schemars::JsonSchema;
 use serde::{
     de::{self, Visitor},
diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs
index 1e90d04b55..2fa94b0e80 100644
--- a/nexus/types/src/external_api/views.rs
+++ b/nexus/types/src/external_api/views.rs
@@ -13,9 +13,10 @@ use chrono::DateTime;
 use chrono::Utc;
 use omicron_common::api::external::{
     AllowedSourceIps as ExternalAllowedSourceIps, ByteCount, Digest, Error,
-    IdentityMetadata, InstanceState, Ipv4Net, Ipv6Net, Name, ObjectIdentity,
-    RoleName, SimpleIdentity,
+    IdentityMetadata, InstanceState, Name, ObjectIdentity, RoleName,
+    SimpleIdentity,
 };
+use oxnet::{Ipv4Net, Ipv6Net};
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use std::collections::BTreeMap;
diff --git a/openapi/bootstrap-agent.json b/openapi/bootstrap-agent.json
index f177c27f55..ddfc1e91f8 100644
--- a/openapi/bootstrap-agent.json
+++ b/openapi/bootstrap-agent.json
@@ -620,6 +620,11 @@
         ]
       },
       "IpNet": {
+        "x-rust-type": {
+          "crate": "oxnet",
+          "path": "oxnet::IpNet",
+          "version": "0.1.0"
+        },
         "oneOf": [
           {
             "title": "v4",
@@ -683,7 +688,12 @@
       "Ipv4Net": {
         "example": "192.168.1.0/24",
         "title": "An IPv4 subnet",
-        "description": "An IPv4 subnet, including prefix and subnet mask",
+        "description": "An IPv4 subnet, including prefix and prefix length",
+        "x-rust-type": {
+          "crate": "oxnet",
+          "path": "oxnet::Ipv4Net",
+          "version": "0.1.0"
+        },
         "type": "string",
         "pattern": "^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/([0-9]|1[0-9]|2[0-9]|3[0-2])$"
       },
@@ -714,6 +724,11 @@
         "example": "fd12:3456::/64",
         "title": "An IPv6 subnet",
         "description": "An IPv6 subnet, including prefix and subnet mask",
+        "x-rust-type": {
+          "crate": "oxnet",
+          "path": "oxnet::Ipv6Net",
+          "version": "0.1.0"
+        },
         "type": "string",
         "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$"
       },
diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json
index c7d476994d..ad109a18fa 100644
--- a/openapi/nexus-internal.json
+++ b/openapi/nexus-internal.json
@@ -3174,6 +3174,11 @@
         ]
       },
       "IpNet": {
+        "x-rust-type": {
+          "crate": "oxnet",
+          "path": "oxnet::IpNet",
+          "version": "0.1.0"
+        },
         "oneOf": [
           {
             "title": "v4",
@@ -3284,7 +3289,12 @@
       "Ipv4Net": {
         "example": "192.168.1.0/24",
         "title": "An IPv4 subnet",
-        "description": "An IPv4 subnet, including prefix and subnet mask",
+        "description": "An IPv4 subnet, including prefix and prefix length",
+        "x-rust-type": {
+          "crate": "oxnet",
+          "path": "oxnet::Ipv4Net",
+          "version": "0.1.0"
+        },
         "type": "string",
         "pattern": "^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/([0-9]|1[0-9]|2[0-9]|3[0-2])$"
       },
@@ -3315,6 +3325,11 @@
         "example": "fd12:3456::/64",
         "title": "An IPv6 subnet",
         "description": "An IPv6 subnet, including prefix and subnet mask",
+        "x-rust-type": {
+          "crate": "oxnet",
+          "path": "oxnet::Ipv6Net",
+          "version": "0.1.0"
+        },
         "type": "string",
         "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$"
       },
diff --git a/openapi/nexus.json b/openapi/nexus.json
index 2bf6f0a6ff..a0789aecde 100644
--- a/openapi/nexus.json
+++ b/openapi/nexus.json
@@ -14314,6 +14314,11 @@
         ]
       },
       "IpNet": {
+        "x-rust-type": {
+          "crate": "oxnet",
+          "path": "oxnet::IpNet",
+          "version": "0.1.0"
+        },
         "oneOf": [
           {
             "title": "v4",
@@ -14595,7 +14600,12 @@
       "Ipv4Net": {
         "example": "192.168.1.0/24",
         "title": "An IPv4 subnet",
-        "description": "An IPv4 subnet, including prefix and subnet mask",
+        "description": "An IPv4 subnet, including prefix and prefix length",
+        "x-rust-type": {
+          "crate": "oxnet",
+          "path": "oxnet::Ipv4Net",
+          "version": "0.1.0"
+        },
         "type": "string",
         "pattern": "^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/([0-9]|1[0-9]|2[0-9]|3[0-2])$"
       },
@@ -14642,6 +14652,11 @@
         "example": "fd12:3456::/64",
         "title": "An IPv6 subnet",
         "description": "An IPv6 subnet, including prefix and subnet mask",
+        "x-rust-type": {
+          "crate": "oxnet",
+          "path": "oxnet::Ipv6Net",
+          "version": "0.1.0"
+        },
         "type": "string",
         "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$"
       },
diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json
index 7a951a6d15..763a67910f 100644
--- a/openapi/sled-agent.json
+++ b/openapi/sled-agent.json
@@ -3388,6 +3388,11 @@
         ]
       },
       "IpNet": {
+        "x-rust-type": {
+          "crate": "oxnet",
+          "path": "oxnet::IpNet",
+          "version": "0.1.0"
+        },
         "oneOf": [
           {
             "title": "v4",
@@ -3431,7 +3436,12 @@
       "Ipv4Net": {
         "example": "192.168.1.0/24",
         "title": "An IPv4 subnet",
-        "description": "An IPv4 subnet, including prefix and subnet mask",
+        "description": "An IPv4 subnet, including prefix and prefix length",
+        "x-rust-type": {
+          "crate": "oxnet",
+          "path": "oxnet::Ipv4Net",
+          "version": "0.1.0"
+        },
         "type": "string",
         "pattern": "^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/([0-9]|1[0-9]|2[0-9]|3[0-2])$"
       },
@@ -3444,6 +3454,11 @@
         "example": "fd12:3456::/64",
         "title": "An IPv6 subnet",
         "description": "An IPv6 subnet, including prefix and subnet mask",
+        "x-rust-type": {
+          "crate": "oxnet",
+          "path": "oxnet::Ipv6Net",
+          "version": "0.1.0"
+        },
         "type": "string",
         "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$"
       },
@@ -3453,7 +3468,7 @@
         "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\")[/](12[0-8]|1[0-1][0-9]|[0-9]?[0-9])$"
       },
       "Ipv6Subnet": {
-        "description": "Wraps an [`Ipv6Network`] with a compile-time prefix length.",
+        "description": "Wraps an [`Ipv6Net`] with a compile-time prefix length.",
         "type": "object",
         "properties": {
           "net": {
diff --git a/openapi/wicketd.json b/openapi/wicketd.json
index cb06c0cadf..762fbfade0 100644
--- a/openapi/wicketd.json
+++ b/openapi/wicketd.json
@@ -1666,6 +1666,11 @@
         ]
       },
       "IpNet": {
+        "x-rust-type": {
+          "crate": "oxnet",
+          "path": "oxnet::IpNet",
+          "version": "0.1.0"
+        },
         "oneOf": [
           {
             "title": "v4",
@@ -1729,7 +1734,12 @@
       "Ipv4Net": {
         "example": "192.168.1.0/24",
         "title": "An IPv4 subnet",
-        "description": "An IPv4 subnet, including prefix and subnet mask",
+        "description": "An IPv4 subnet, including prefix and prefix length",
+        "x-rust-type": {
+          "crate": "oxnet",
+          "path": "oxnet::Ipv4Net",
+          "version": "0.1.0"
+        },
         "type": "string",
         "pattern": "^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/([0-9]|1[0-9]|2[0-9]|3[0-2])$"
       },
@@ -1760,6 +1770,11 @@
         "example": "fd12:3456::/64",
         "title": "An IPv6 subnet",
         "description": "An IPv6 subnet, including prefix and subnet mask",
+        "x-rust-type": {
+          "crate": "oxnet",
+          "path": "oxnet::Ipv6Net",
+          "version": "0.1.0"
+        },
         "type": "string",
         "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$"
       },
diff --git a/schema/all-zone-requests.json b/schema/all-zone-requests.json
index 7fe9b139eb..fde6ee18a4 100644
--- a/schema/all-zone-requests.json
+++ b/schema/all-zone-requests.json
@@ -173,16 +173,26 @@
             }
           ]
         }
-      ]
+      ],
+      "x-rust-type": {
+        "crate": "oxnet",
+        "path": "oxnet::IpNet",
+        "version": "0.1.0"
+      }
     },
     "Ipv4Net": {
       "title": "An IPv4 subnet",
-      "description": "An IPv4 subnet, including prefix and subnet mask",
+      "description": "An IPv4 subnet, including prefix and prefix length",
       "examples": [
         "192.168.1.0/24"
       ],
       "type": "string",
-      "pattern": "^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/([0-9]|1[0-9]|2[0-9]|3[0-2])$"
+      "pattern": "^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/([0-9]|1[0-9]|2[0-9]|3[0-2])$",
+      "x-rust-type": {
+        "crate": "oxnet",
+        "path": "oxnet::Ipv4Net",
+        "version": "0.1.0"
+      }
     },
     "Ipv6Net": {
       "title": "An IPv6 subnet",
@@ -191,7 +201,12 @@
         "fd12:3456::/64"
       ],
       "type": "string",
-      "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$"
+      "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$",
+      "x-rust-type": {
+        "crate": "oxnet",
+        "path": "oxnet::Ipv6Net",
+        "version": "0.1.0"
+      }
     },
     "MacAddr": {
       "title": "A MAC address",
diff --git a/schema/all-zones-requests.json b/schema/all-zones-requests.json
index bb4dba2520..526e41376f 100644
--- a/schema/all-zones-requests.json
+++ b/schema/all-zones-requests.json
@@ -57,16 +57,26 @@
             }
           ]
         }
-      ]
+      ],
+      "x-rust-type": {
+        "crate": "oxnet",
+        "path": "oxnet::IpNet",
+        "version": "0.1.0"
+      }
     },
     "Ipv4Net": {
       "title": "An IPv4 subnet",
-      "description": "An IPv4 subnet, including prefix and subnet mask",
+      "description": "An IPv4 subnet, including prefix and prefix length",
       "examples": [
         "192.168.1.0/24"
       ],
       "type": "string",
-      "pattern": "^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/([0-9]|1[0-9]|2[0-9]|3[0-2])$"
+      "pattern": "^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/([0-9]|1[0-9]|2[0-9]|3[0-2])$",
+      "x-rust-type": {
+        "crate": "oxnet",
+        "path": "oxnet::Ipv4Net",
+        "version": "0.1.0"
+      }
     },
     "Ipv6Net": {
       "title": "An IPv6 subnet",
@@ -75,7 +85,12 @@
         "fd12:3456::/64"
       ],
       "type": "string",
-      "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$"
+      "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$",
+      "x-rust-type": {
+        "crate": "oxnet",
+        "path": "oxnet::Ipv6Net",
+        "version": "0.1.0"
+      }
     },
     "MacAddr": {
       "title": "A MAC address",
diff --git a/schema/deployment-config.json b/schema/deployment-config.json
index 9fa4ba2159..7b737c52b2 100644
--- a/schema/deployment-config.json
+++ b/schema/deployment-config.json
@@ -132,10 +132,15 @@
         "fd12:3456::/64"
       ],
       "type": "string",
-      "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$"
+      "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$",
+      "x-rust-type": {
+        "crate": "oxnet",
+        "path": "oxnet::Ipv6Net",
+        "version": "0.1.0"
+      }
     },
     "Ipv6Subnet": {
-      "description": "Wraps an [`Ipv6Network`] with a compile-time prefix length.",
+      "description": "Wraps an [`Ipv6Net`] with a compile-time prefix length.",
       "type": "object",
       "required": [
         "net"
diff --git a/schema/rss-service-plan-v3.json b/schema/rss-service-plan-v3.json
index bab3e916ba..d1540ca351 100644
--- a/schema/rss-service-plan-v3.json
+++ b/schema/rss-service-plan-v3.json
@@ -171,16 +171,26 @@
             }
           ]
         }
-      ]
+      ],
+      "x-rust-type": {
+        "crate": "oxnet",
+        "path": "oxnet::IpNet",
+        "version": "0.1.0"
+      }
     },
     "Ipv4Net": {
       "title": "An IPv4 subnet",
-      "description": "An IPv4 subnet, including prefix and subnet mask",
+      "description": "An IPv4 subnet, including prefix and prefix length",
       "examples": [
         "192.168.1.0/24"
       ],
       "type": "string",
-      "pattern": "^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/([0-9]|1[0-9]|2[0-9]|3[0-2])$"
+      "pattern": "^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/([0-9]|1[0-9]|2[0-9]|3[0-2])$",
+      "x-rust-type": {
+        "crate": "oxnet",
+        "path": "oxnet::Ipv4Net",
+        "version": "0.1.0"
+      }
     },
     "Ipv6Net": {
       "title": "An IPv6 subnet",
@@ -189,7 +199,12 @@
         "fd12:3456::/64"
       ],
       "type": "string",
-      "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$"
+      "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$",
+      "x-rust-type": {
+        "crate": "oxnet",
+        "path": "oxnet::Ipv6Net",
+        "version": "0.1.0"
+      }
     },
     "MacAddr": {
       "title": "A MAC address",
diff --git a/schema/rss-sled-plan.json b/schema/rss-sled-plan.json
index 95ca5b90ba..204dddff99 100644
--- a/schema/rss-sled-plan.json
+++ b/schema/rss-sled-plan.json
@@ -493,7 +493,12 @@
             }
           ]
         }
-      ]
+      ],
+      "x-rust-type": {
+        "crate": "oxnet",
+        "path": "oxnet::IpNet",
+        "version": "0.1.0"
+      }
     },
     "IpNetwork": {
       "oneOf": [
@@ -538,12 +543,17 @@
     },
     "Ipv4Net": {
       "title": "An IPv4 subnet",
-      "description": "An IPv4 subnet, including prefix and subnet mask",
+      "description": "An IPv4 subnet, including prefix and prefix length",
       "examples": [
         "192.168.1.0/24"
       ],
       "type": "string",
-      "pattern": "^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/([0-9]|1[0-9]|2[0-9]|3[0-2])$"
+      "pattern": "^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/([0-9]|1[0-9]|2[0-9]|3[0-2])$",
+      "x-rust-type": {
+        "crate": "oxnet",
+        "path": "oxnet::Ipv4Net",
+        "version": "0.1.0"
+      }
     },
     "Ipv4Network": {
       "type": "string",
@@ -575,7 +585,12 @@
         "fd12:3456::/64"
       ],
       "type": "string",
-      "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$"
+      "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$",
+      "x-rust-type": {
+        "crate": "oxnet",
+        "path": "oxnet::Ipv6Net",
+        "version": "0.1.0"
+      }
     },
     "Ipv6Network": {
       "type": "string",
@@ -601,7 +616,7 @@
       }
     },
     "Ipv6Subnet": {
-      "description": "Wraps an [`Ipv6Network`] with a compile-time prefix length.",
+      "description": "Wraps an [`Ipv6Net`] with a compile-time prefix length.",
       "type": "object",
       "required": [
         "net"
diff --git a/schema/start-sled-agent-request.json b/schema/start-sled-agent-request.json
index 7a7745617c..98dfcea61c 100644
--- a/schema/start-sled-agent-request.json
+++ b/schema/start-sled-agent-request.json
@@ -32,10 +32,15 @@
         "fd12:3456::/64"
       ],
       "type": "string",
-      "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$"
+      "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$",
+      "x-rust-type": {
+        "crate": "oxnet",
+        "path": "oxnet::Ipv6Net",
+        "version": "0.1.0"
+      }
     },
     "Ipv6Subnet": {
-      "description": "Wraps an [`Ipv6Network`] with a compile-time prefix length.",
+      "description": "Wraps an [`Ipv6Net`] with a compile-time prefix length.",
       "type": "object",
       "required": [
         "net"
diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml
index 52533016dc..167ac987ca 100644
--- a/sled-agent/Cargo.toml
+++ b/sled-agent/Cargo.toml
@@ -56,6 +56,7 @@ once_cell.workspace = true
 oximeter.workspace = true
 oximeter-instruments.workspace = true
 oximeter-producer.workspace = true
+oxnet.workspace = true
 propolis-client.workspace = true
 propolis-mock-server.workspace = true # Only used by the simulated sled agent
 rand = { workspace = true, features = ["getrandom"] }
diff --git a/sled-agent/src/bootstrap/early_networking.rs b/sled-agent/src/bootstrap/early_networking.rs
index 2714c220c7..8727a01eae 100644
--- a/sled-agent/src/bootstrap/early_networking.rs
+++ b/sled-agent/src/bootstrap/early_networking.rs
@@ -23,7 +23,7 @@ use mg_admin_client::types::{
 use mg_admin_client::Client as MgdClient;
 use omicron_common::address::DENDRITE_PORT;
 use omicron_common::address::{MGD_PORT, MGS_PORT};
-use omicron_common::api::external::{BfdMode, ImportExportPolicy, IpNet};
+use omicron_common::api::external::{BfdMode, ImportExportPolicy};
 use omicron_common::api::internal::shared::{
     BgpConfig, PortConfigV1, PortFec, PortSpeed, RackNetworkConfig,
     RackNetworkConfigV1, SwitchLocation, UplinkConfig,
@@ -34,6 +34,7 @@ use omicron_common::backoff::{
 };
 use omicron_common::OMICRON_DPD_TAG;
 use omicron_ddm_admin_client::DdmError;
+use oxnet::IpNet;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use slog::Logger;
@@ -515,12 +516,12 @@ impl<'a> EarlyNetworkSetup<'a> {
                                     .iter()
                                     .map(|x| match x {
                                         IpNet::V4(p) => Prefix::V4(Prefix4 {
-                                            length: p.prefix(),
-                                            value: p.ip(),
+                                            length: p.width(),
+                                            value: p.addr(),
                                         }),
                                         IpNet::V6(p) => Prefix::V6(Prefix6 {
-                                            length: p.prefix(),
-                                            value: p.ip(),
+                                            length: p.width(),
+                                            value: p.addr(),
                                         }),
                                     })
                                     .collect(),
@@ -537,12 +538,12 @@ impl<'a> EarlyNetworkSetup<'a> {
                                     .iter()
                                     .map(|x| match x {
                                         IpNet::V4(p) => Prefix::V4(Prefix4 {
-                                            length: p.prefix(),
-                                            value: p.ip(),
+                                            length: p.width(),
+                                            value: p.addr(),
                                         }),
                                         IpNet::V6(p) => Prefix::V6(Prefix6 {
-                                            length: p.prefix(),
-                                            value: p.ip(),
+                                            length: p.width(),
+                                            value: p.addr(),
                                         }),
                                     })
                                     .collect(),
diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs
index d4c17e20a6..369437d3aa 100644
--- a/sled-agent/src/bootstrap/server.rs
+++ b/sled-agent/src/bootstrap/server.rs
@@ -406,7 +406,7 @@ async fn start_sled_agent(
     ddmd_client.advertise_prefix(request.body.subnet);
 
     let az_prefix =
-        Ipv6Subnet::<AZ_PREFIX>::new(request.body.subnet.net().network());
+        Ipv6Subnet::<AZ_PREFIX>::new(request.body.subnet.net().addr());
     let addr = request.body.subnet.net().iter().nth(1).unwrap();
     let dns_servers = Resolver::servers_from_subnet(az_prefix);
     ddmd_client.enable_stats(
diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs
index b4a6fe76f6..a763d61923 100644
--- a/sled-agent/src/rack_setup/plan/service.rs
+++ b/sled-agent/src/rack_setup/plan/service.rs
@@ -384,11 +384,11 @@ impl Plan {
             &reserved_rack_subnet.get_dns_subnets()[0..DNS_REDUNDANCY];
         let rack_dns_servers = dns_subnets
             .into_iter()
-            .map(|dns_subnet| dns_subnet.dns_address().ip().into())
+            .map(|dns_subnet| dns_subnet.dns_address().addr().into())
             .collect::<Vec<IpAddr>>();
         for i in 0..dns_subnets.len() {
             let dns_subnet = &dns_subnets[i];
-            let ip = dns_subnet.dns_address().ip();
+            let ip = dns_subnet.dns_address().addr();
             let sled = {
                 let which_sled =
                     sled_allocator.next().ok_or(PlanError::NotEnoughSleds)?;
@@ -419,7 +419,7 @@ impl Plan {
                     },
                     http_address,
                     dns_address,
-                    gz_address: dns_subnet.gz_address().ip(),
+                    gz_address: dns_subnet.gz_address().addr(),
                     gz_address_index: i.try_into().expect("Giant indices?"),
                 },
             });
@@ -961,39 +961,29 @@ impl ServicePortBuilder {
 
         let dns_v4_ips = Box::new(
             DNS_OPTE_IPV4_SUBNET
-                .0
-                .iter()
+                .addr_iter()
                 .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES),
         );
         let dns_v6_ips = Box::new(
-            DNS_OPTE_IPV6_SUBNET
-                .0
-                .iter()
-                .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES),
+            DNS_OPTE_IPV6_SUBNET.iter().skip(NUM_INITIAL_RESERVED_IP_ADDRESSES),
         );
         let nexus_v4_ips = Box::new(
             NEXUS_OPTE_IPV4_SUBNET
-                .0
-                .iter()
+                .addr_iter()
                 .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES),
         );
         let nexus_v6_ips = Box::new(
             NEXUS_OPTE_IPV6_SUBNET
-                .0
                 .iter()
                 .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES),
         );
         let ntp_v4_ips = Box::new(
             NTP_OPTE_IPV4_SUBNET
-                .0
-                .iter()
+                .addr_iter()
                 .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES),
         );
         let ntp_v6_ips = Box::new(
-            NTP_OPTE_IPV6_SUBNET
-                .0
-                .iter()
-                .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES),
+            NTP_OPTE_IPV6_SUBNET.iter().skip(NUM_INITIAL_RESERVED_IP_ADDRESSES),
         );
         Self {
             internal_services_ip_pool,
diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs
index c9a5014402..ff10d4aed7 100644
--- a/sled-agent/src/services.rs
+++ b/sled-agent/src/services.rs
@@ -2543,7 +2543,7 @@ impl ServiceManager {
                                 );
                                 smfh.setprop(
                                     "config/rack-subnet",
-                                    &rack_subnet.net().ip().to_string(),
+                                    &rack_subnet.net().addr().to_string(),
                                 )?;
                             }
 
@@ -2711,7 +2711,7 @@ impl ServiceManager {
                                     // network address, without the mask.
                                     smfh.setprop(
                                         format!("config/techport{i}_prefix"),
-                                        prefix.net().network().to_string(),
+                                        prefix.net().addr(),
                                     )?;
                                 }
                                 smfh.setprop("config/pkt_source", pkt_source)?;
@@ -3995,12 +3995,12 @@ impl ServiceManager {
 
                                 info!(
                                     self.inner.log, "configuring wicketd";
-                                    "rack_subnet" => %rack_subnet.net().ip(),
+                                    "rack_subnet" => %rack_subnet.net().addr(),
                                 );
 
                                 smfh.setprop(
                                     "config/rack-subnet",
-                                    &rack_subnet.net().ip().to_string(),
+                                    &rack_subnet.net().addr().to_string(),
                                 )?;
 
                                 smfh.refresh()?;
@@ -5066,9 +5066,9 @@ mod test {
     fn test_bootstrap_addr_to_techport_prefixes() {
         let ba: Ipv6Addr = "fdb0:1122:3344:5566::".parse().unwrap();
         let prefixes = ServiceManager::bootstrap_addr_to_techport_prefixes(&ba);
-        assert!(prefixes.iter().all(|p| p.net().prefix() == 64));
-        let prefix0 = prefixes[0].net().network();
-        let prefix1 = prefixes[1].net().network();
+        assert!(prefixes.iter().all(|p| p.net().width() == 64));
+        let prefix0 = prefixes[0].net().prefix();
+        let prefix1 = prefixes[1].net().prefix();
         assert_eq!(prefix0.segments()[1..], ba.segments()[1..]);
         assert_eq!(prefix1.segments()[1..], ba.segments()[1..]);
         assert_eq!(prefix0.segments()[0], 0xfdb1);
diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs
index ebee0adc1f..e3ce4ad4e4 100644
--- a/sled-agent/src/sim/server.rs
+++ b/sled-agent/src/sim/server.rs
@@ -401,7 +401,7 @@ pub async fn run_standalone_server(
                     kind: NetworkInterfaceKind::Service { id },
                     name: "nexus".parse().unwrap(),
                     ip: NEXUS_OPTE_IPV4_SUBNET
-                        .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1)
+                        .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES + 1)
                         .unwrap()
                         .into(),
                     mac: macs.next().unwrap(),
@@ -444,7 +444,7 @@ pub async fn run_standalone_server(
                     kind: NetworkInterfaceKind::Service { id },
                     name: "external-dns".parse().unwrap(),
                     ip: DNS_OPTE_IPV4_SUBNET
-                        .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1)
+                        .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES + 1)
                         .unwrap()
                         .into(),
                     mac: macs.next().unwrap(),
diff --git a/tools/dendrite_openapi_version b/tools/dendrite_openapi_version
old mode 100644
new mode 100755
diff --git a/wicket-common/Cargo.toml b/wicket-common/Cargo.toml
index 39efc2ce20..685514f399 100644
--- a/wicket-common/Cargo.toml
+++ b/wicket-common/Cargo.toml
@@ -11,6 +11,7 @@ workspace = true
 anyhow.workspace = true
 omicron-common.workspace = true
 owo-colors.workspace = true
+oxnet.workspace = true
 gateway-client.workspace = true
 ipnetwork.workspace = true
 maplit.workspace = true
diff --git a/wicket-common/src/rack_setup.rs b/wicket-common/src/rack_setup.rs
index 5e89bfdde2..9221153398 100644
--- a/wicket-common/src/rack_setup.rs
+++ b/wicket-common/src/rack_setup.rs
@@ -9,7 +9,6 @@ pub use gateway_client::types::SpType as GatewaySpType;
 use ipnetwork::IpNetwork;
 use omicron_common::address;
 use omicron_common::api::external::ImportExportPolicy;
-use omicron_common::api::external::IpNet;
 use omicron_common::api::external::Name;
 use omicron_common::api::external::SwitchLocation;
 use omicron_common::api::internal::shared::AllowedSourceIps;
@@ -21,6 +20,7 @@ use omicron_common::api::internal::shared::RouteConfig;
 use omicron_common::update::ArtifactHash;
 use owo_colors::OwoColorize;
 use owo_colors::Style;
+use oxnet::IpNet;
 use schemars::JsonSchema;
 use serde::Deserialize;
 use serde::Serialize;
diff --git a/wicket/src/ui/panes/rack_setup.rs b/wicket/src/ui/panes/rack_setup.rs
index f74baa3f2c..941f5f7dc1 100644
--- a/wicket/src/ui/panes/rack_setup.rs
+++ b/wicket/src/ui/panes/rack_setup.rs
@@ -1093,8 +1093,8 @@ fn rss_config_text<'a>(
         Some(AllowedSourceIps::List(list)) => list
             .iter()
             .map(|net| {
-                let as_str = if net.first_address() == net.last_address() {
-                    net.ip().to_string()
+                let as_str = if net.is_host_net() {
+                    net.addr().to_string()
                 } else {
                     net.to_string()
                 };
diff --git a/wicketd/src/rss_config.rs b/wicketd/src/rss_config.rs
index 9f2910bcc2..c90f672500 100644
--- a/wicketd/src/rss_config.rs
+++ b/wicketd/src/rss_config.rs
@@ -652,7 +652,7 @@ fn validate_rack_network_config(
     // TODO Add more client side checks on `rack_network_config` contents?
 
     Ok(bootstrap_agent_client::types::RackNetworkConfigV1 {
-        rack_subnet: RACK_SUBNET.net(),
+        rack_subnet: RACK_SUBNET.net().into(),
         infra_ip_first: config.infra_ip_first,
         infra_ip_last: config.infra_ip_last,
         ports: config
diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml
index d8c9e7c634..3b5e1917d0 100644
--- a/workspace-hack/Cargo.toml
+++ b/workspace-hack/Cargo.toml
@@ -89,7 +89,7 @@ regex-automata = { version = "0.4.6", default-features = false, features = ["dfa
 regex-syntax = { version = "0.8.3" }
 reqwest = { version = "0.11.27", features = ["blocking", "cookies", "json", "rustls-tls", "stream"] }
 ring = { version = "0.17.8", features = ["std"] }
-schemars = { version = "0.8.19", features = ["bytes", "chrono", "uuid1"] }
+schemars = { version = "0.8.20", features = ["bytes", "chrono", "uuid1"] }
 scopeguard = { version = "1.2.0" }
 semver = { version = "1.0.23", features = ["serde"] }
 serde = { version = "1.0.202", features = ["alloc", "derive", "rc"] }
@@ -194,7 +194,7 @@ regex-automata = { version = "0.4.6", default-features = false, features = ["dfa
 regex-syntax = { version = "0.8.3" }
 reqwest = { version = "0.11.27", features = ["blocking", "cookies", "json", "rustls-tls", "stream"] }
 ring = { version = "0.17.8", features = ["std"] }
-schemars = { version = "0.8.19", features = ["bytes", "chrono", "uuid1"] }
+schemars = { version = "0.8.20", features = ["bytes", "chrono", "uuid1"] }
 scopeguard = { version = "1.2.0" }
 semver = { version = "1.0.23", features = ["serde"] }
 serde = { version = "1.0.202", features = ["alloc", "derive", "rc"] }

From ed4dbf22fb2105965ba6a3b0a42f8d025b370ae8 Mon Sep 17 00:00:00 2001
From: "oxide-renovate[bot]"
 <146848827+oxide-renovate[bot]@users.noreply.github.com>
Date: Fri, 24 May 2024 09:51:20 +0000
Subject: [PATCH 03/28] Update taiki-e/install-action digest to 7491b90 (#5818)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [taiki-e/install-action](https://togithub.com/taiki-e/install-action)
| action | digest | [`0fc5600` ->
`7491b90`](https://togithub.com/taiki-e/install-action/compare/0fc5600...7491b90)
|

---

### Configuration

📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone
America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone
America/Los_Angeles.

🚦 **Automerge**: Enabled.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Renovate
Bot](https://togithub.com/renovatebot/renovate).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNy4zNzUuMiIsInVwZGF0ZWRJblZlciI6IjM3LjM3NS4yIiwidGFyZ2V0QnJhbmNoIjoibWFpbiIsImxhYmVscyI6WyJkZXBlbmRlbmNpZXMiXX0=-->

Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com>
---
 .github/workflows/hakari.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml
index 5c861c56bf..236b9b5023 100644
--- a/.github/workflows/hakari.yml
+++ b/.github/workflows/hakari.yml
@@ -24,7 +24,7 @@ jobs:
         with:
           toolchain: stable
       - name: Install cargo-hakari
-        uses: taiki-e/install-action@0fc560009ad92371154ca652dcf2620d19331eee # v2
+        uses: taiki-e/install-action@7491b900536dd0dae2e47ce7c17f140e46328dc4 # v2
         with:
           tool: cargo-hakari
       - name: Check workspace-hack Cargo.toml is up-to-date

From 27e6b34042e0fa9105fa1c23de96f9f23640e9ce Mon Sep 17 00:00:00 2001
From: Eliza Weisman <eliza@elizas.website>
Date: Fri, 24 May 2024 10:36:13 -0700
Subject: [PATCH 04/28] [nexus] add background task for cleaning up abandoned
 VMMs (#5812)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

**Note**: This change is part of the ongoing work on instance lifecycle
management that I'm working on in PR #5749. It's not actually necessary
on its own, it's just a component of the upcoming instance updater saga.
However, I thought it would be easier to review if I factored out this
change into a separate PR that can be reviewed and merged on its own.

The instance update saga (see PR #5749) will only clean up after VMMs
whose IDs appear in an `instance` record. When a live migration finishes
(successfully or not), we want to allow a new migration to begin as soon
as possible, which means we have to unlink the “unused” side of the
migration --- the source if migration succeeded, or the target if it
failed --- from the instance, even though that VMM may not be fully
destroyed yet. Once this happens, the instance update saga will no
longer be able to clean up these VMMs, so we’ll need a separate task
that cleans up these "abandoned" VMMs in the background.

This branch introduces an `abandoned_vmm_reaper` background task that's
responsible for doing this. It queries the database to list VMMs which
are:

- in the `Destroyed` state
- not deleted yet (i.e. `time_deleted` IS NOT NULL)
- not pointed to by their corresponding instances (neither the
  `active_propolis_id` nor the `target_propolis_id` equals the VMM's ID)

For any VMMs returned by this query, the `abandoned_vmm_reaper` task
will:
- remove the `sled_resource` reservation for that VMM
- sets the `time_deleted` on the VMM record if it was not already set.

This cleanup process will be executed periodically in the background.
Eventually, the background task will also be explicitly triggered by the
instance update saga when it knows it has abandoned a VMM.

As an aside, I noticed that the current implementation of
`DataStore::vmm_mark_deleted` will always unconditionally set the
`time_deleted` field on a VMM record, even if it's already set. This is
"probably fine" for overall correctness: the VMM remains deleted, so the
operation is still idempotent-ish. But, it's not *great*, as it means
that any queries for VMMs deleted before a certain timestamp may not be
strictly correct, and we're updating the database more frequently than
we really need to. So, I've gone ahead and changed it to only set
`time_deleted` if the record's `time_deleted` is null, using
`check_if_exists` so that the method still returns `Ok` if the record
was already deleted --- the caller can inspect the returned `bool` to
determine whether or not they were the actual deleter, but the query
still doesn't fail.
---
 dev-tools/omdb/src/bin/omdb/nexus.rs          |  53 ++
 dev-tools/omdb/tests/env.out                  |  15 +
 dev-tools/omdb/tests/successes.out            |  15 +
 nexus-config/src/nexus_config.rs              |  15 +
 nexus/db-queries/src/db/datastore/vmm.rs      |  77 ++-
 nexus/examples/config.toml                    |   1 +
 .../app/background/abandoned_vmm_reaper.rs    | 467 ++++++++++++++++++
 nexus/src/app/background/init.rs              |  22 +-
 nexus/src/app/background/mod.rs               |   1 +
 nexus/tests/config.test.toml                  |   1 +
 smf/nexus/multi-sled/config-partial.toml      |   1 +
 smf/nexus/single-sled/config-partial.toml     |   1 +
 12 files changed, 663 insertions(+), 6 deletions(-)
 create mode 100644 nexus/src/app/background/abandoned_vmm_reaper.rs

diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs
index 22fe1894cf..09ae82b5d9 100644
--- a/dev-tools/omdb/src/bin/omdb/nexus.rs
+++ b/dev-tools/omdb/src/bin/omdb/nexus.rs
@@ -996,6 +996,59 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) {
                 eprintln!("    unexpected return value from task: {:?}", val)
             }
         };
+    } else if name == "abandoned_vmm_reaper" {
+        #[derive(Deserialize)]
+        struct TaskSuccess {
+            /// total number of abandoned VMMs found
+            found: usize,
+
+            /// number of abandoned VMM records that were deleted
+            vmms_deleted: usize,
+
+            /// number of abandoned VMM records that were already deleted when
+            /// we tried to delete them.
+            vmms_already_deleted: usize,
+
+            /// sled resource reservations that were released
+            sled_reservations_deleted: usize,
+
+            /// number of errors that occurred during the activation
+            error_count: usize,
+
+            /// the last error that occurred during execution.
+            error: Option<String>,
+        }
+        match serde_json::from_value::<TaskSuccess>(details.clone()) {
+            Err(error) => eprintln!(
+                "warning: failed to interpret task details: {:?}: {:?}",
+                error, details
+            ),
+            Ok(TaskSuccess {
+                found,
+                vmms_deleted,
+                vmms_already_deleted,
+                sled_reservations_deleted,
+                error_count,
+                error,
+            }) => {
+                if let Some(error) = error {
+                    println!("    task did not complete successfully!");
+                    println!("      total errors: {error_count}");
+                    println!("      most recent error: {error}");
+                }
+
+                println!("    total abandoned VMMs found: {found}");
+                println!("      VMM records deleted: {vmms_deleted}");
+                println!(
+                    "      VMM records already deleted by another Nexus: {}",
+                    vmms_already_deleted,
+                );
+                println!(
+                    "    sled resource reservations deleted: {}",
+                    sled_reservations_deleted,
+                );
+            }
+        };
     } else {
         println!(
             "warning: unknown background task: {:?} \
diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out
index d187c47d18..ccb824cda4 100644
--- a/dev-tools/omdb/tests/env.out
+++ b/dev-tools/omdb/tests/env.out
@@ -25,6 +25,11 @@ EXECUTING COMMAND: omdb ["nexus", "--nexus-internal-url", "http://127.0.0.1:REDA
 termination: Exited(0)
 ---------------------------------------------
 stdout:
+task: "abandoned_vmm_reaper"
+    deletes sled reservations for VMMs that have been abandoned by their
+    instances
+
+
 task: "bfd_manager"
     Manages bidirectional fowarding detection (BFD) configuration on rack
     switches
@@ -140,6 +145,11 @@ EXECUTING COMMAND: omdb ["nexus", "background-tasks", "doc"]
 termination: Exited(0)
 ---------------------------------------------
 stdout:
+task: "abandoned_vmm_reaper"
+    deletes sled reservations for VMMs that have been abandoned by their
+    instances
+
+
 task: "bfd_manager"
     Manages bidirectional fowarding detection (BFD) configuration on rack
     switches
@@ -242,6 +252,11 @@ EXECUTING COMMAND: omdb ["--dns-server", "[::1]:REDACTED_PORT", "nexus", "backgr
 termination: Exited(0)
 ---------------------------------------------
 stdout:
+task: "abandoned_vmm_reaper"
+    deletes sled reservations for VMMs that have been abandoned by their
+    instances
+
+
 task: "bfd_manager"
     Manages bidirectional fowarding detection (BFD) configuration on rack
     switches
diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out
index db6e5fde87..0f588069e4 100644
--- a/dev-tools/omdb/tests/successes.out
+++ b/dev-tools/omdb/tests/successes.out
@@ -202,6 +202,11 @@ EXECUTING COMMAND: omdb ["nexus", "background-tasks", "doc"]
 termination: Exited(0)
 ---------------------------------------------
 stdout:
+task: "abandoned_vmm_reaper"
+    deletes sled reservations for VMMs that have been abandoned by their
+    instances
+
+
 task: "bfd_manager"
     Manages bidirectional fowarding detection (BFD) configuration on rack
     switches
@@ -380,6 +385,16 @@ task: "blueprint_executor"
     started at <REDACTED     TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
     last completion reported error: no blueprint
 
+task: "abandoned_vmm_reaper"
+  configured period: every 1m
+  currently executing: no
+  last completed activation: <REDACTED ITERATIONS>, triggered by an explicit signal
+    started at <REDACTED     TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
+    total abandoned VMMs found: 0
+      VMM records deleted: 0
+      VMM records already deleted by another Nexus: 0
+    sled resource reservations deleted: 0
+
 task: "bfd_manager"
   configured period: every 30s
   currently executing: no
diff --git a/nexus-config/src/nexus_config.rs b/nexus-config/src/nexus_config.rs
index 08517026ef..321064df49 100644
--- a/nexus-config/src/nexus_config.rs
+++ b/nexus-config/src/nexus_config.rs
@@ -381,6 +381,8 @@ pub struct BackgroundTaskConfig {
     pub service_firewall_propagation: ServiceFirewallPropagationConfig,
     /// configuration for v2p mapping propagation task
     pub v2p_mapping_propagation: V2PMappingPropagationConfig,
+    /// configuration for abandoned VMM reaper task
+    pub abandoned_vmm_reaper: AbandonedVmmReaperConfig,
 }
 
 #[serde_as]
@@ -549,6 +551,14 @@ pub struct V2PMappingPropagationConfig {
     pub period_secs: Duration,
 }
 
+#[serde_as]
+#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
+pub struct AbandonedVmmReaperConfig {
+    /// period (in seconds) for periodic activations of this background task
+    #[serde_as(as = "DurationSeconds<u64>")]
+    pub period_secs: Duration,
+}
+
 /// Configuration for a nexus server
 #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
 pub struct PackageConfig {
@@ -788,6 +798,7 @@ mod test {
             instance_watcher.period_secs = 30
             service_firewall_propagation.period_secs = 300
             v2p_mapping_propagation.period_secs = 30
+            abandoned_vmm_reaper.period_secs = 60
             [default_region_allocation_strategy]
             type = "random"
             seed = 0
@@ -926,6 +937,9 @@ mod test {
                         v2p_mapping_propagation: V2PMappingPropagationConfig {
                             period_secs: Duration::from_secs(30)
                         },
+                        abandoned_vmm_reaper: AbandonedVmmReaperConfig {
+                            period_secs: Duration::from_secs(60),
+                        }
                     },
                     default_region_allocation_strategy:
                         crate::nexus_config::RegionAllocationStrategy::Random {
@@ -995,6 +1009,7 @@ mod test {
             instance_watcher.period_secs = 30
             service_firewall_propagation.period_secs = 300
             v2p_mapping_propagation.period_secs = 30
+            abandoned_vmm_reaper.period_secs = 60
             [default_region_allocation_strategy]
             type = "random"
             "##,
diff --git a/nexus/db-queries/src/db/datastore/vmm.rs b/nexus/db-queries/src/db/datastore/vmm.rs
index a837d1289b..b8fb47de26 100644
--- a/nexus/db-queries/src/db/datastore/vmm.rs
+++ b/nexus/db-queries/src/db/datastore/vmm.rs
@@ -9,8 +9,10 @@ use crate::authz;
 use crate::context::OpContext;
 use crate::db::error::public_error_from_diesel;
 use crate::db::error::ErrorHandler;
+use crate::db::model::InstanceState as DbInstanceState;
 use crate::db::model::Vmm;
 use crate::db::model::VmmRuntimeState;
+use crate::db::pagination::paginated;
 use crate::db::schema::vmm::dsl;
 use crate::db::update_and_check::UpdateAndCheck;
 use crate::db::update_and_check::UpdateStatus;
@@ -18,7 +20,10 @@ use async_bb8_diesel::AsyncRunQueryDsl;
 use chrono::Utc;
 use diesel::prelude::*;
 use omicron_common::api::external::CreateResult;
+use omicron_common::api::external::DataPageParams;
 use omicron_common::api::external::Error;
+use omicron_common::api::external::InstanceState as ApiInstanceState;
+use omicron_common::api::external::ListResultVec;
 use omicron_common::api::external::LookupResult;
 use omicron_common::api::external::LookupType;
 use omicron_common::api::external::ResourceType;
@@ -50,9 +55,6 @@ impl DataStore {
         opctx: &OpContext,
         vmm_id: &Uuid,
     ) -> UpdateResult<bool> {
-        use crate::db::model::InstanceState as DbInstanceState;
-        use omicron_common::api::external::InstanceState as ApiInstanceState;
-
         let valid_states = vec![
             DbInstanceState::new(ApiInstanceState::Destroyed),
             DbInstanceState::new(ApiInstanceState::Failed),
@@ -61,9 +63,15 @@ impl DataStore {
         let updated = diesel::update(dsl::vmm)
             .filter(dsl::id.eq(*vmm_id))
             .filter(dsl::state.eq_any(valid_states))
+            .filter(dsl::time_deleted.is_null())
             .set(dsl::time_deleted.eq(Utc::now()))
-            .execute_async(&*self.pool_connection_authorized(opctx).await?)
+            .check_if_exists::<Vmm>(*vmm_id)
+            .execute_and_check(&*self.pool_connection_authorized(opctx).await?)
             .await
+            .map(|r| match r.status {
+                UpdateStatus::Updated => true,
+                UpdateStatus::NotUpdatedButExists => false,
+            })
             .map_err(|e| {
                 public_error_from_diesel(
                     e,
@@ -74,7 +82,7 @@ impl DataStore {
                 )
             })?;
 
-        Ok(updated != 0)
+        Ok(updated)
     }
 
     pub async fn vmm_fetch(
@@ -164,4 +172,63 @@ impl DataStore {
 
         Ok(vmm)
     }
+
+    /// Lists VMMs which have been abandoned by their instances after a
+    /// migration and are in need of cleanup.
+    ///
+    /// A VMM is considered "abandoned" if (and only if):
+    ///
+    /// - It is in the `Destroyed` state.
+    /// - It is not currently running an instance, and it is also not the
+    ///   migration target of any instance (i.e. it is not pointed to by
+    ///   any instance record's `active_propolis_id` and `target_propolis_id`
+    ///   fields).
+    /// - It has not been deleted yet.
+    pub async fn vmm_list_abandoned(
+        &self,
+        opctx: &OpContext,
+        pagparams: &DataPageParams<'_, Uuid>,
+    ) -> ListResultVec<Vmm> {
+        use crate::db::schema::instance::dsl as instance_dsl;
+        let destroyed = DbInstanceState::new(ApiInstanceState::Destroyed);
+        paginated(dsl::vmm, dsl::id, pagparams)
+            // In order to be considered "abandoned", a VMM must be:
+            // - in the `Destroyed` state
+            .filter(dsl::state.eq(destroyed))
+            // - not deleted yet
+            .filter(dsl::time_deleted.is_null())
+            // - not pointed to by any instance's `active_propolis_id` or
+            //   `target_propolis_id`.
+            //
+            .left_join(
+                // Left join with the `instance` table on the VMM's instance ID, so
+                // that we can check if the instance pointed to by this VMM (if
+                // any exists) has this VMM pointed to by its
+                // `active_propolis_id` or `target_propolis_id` fields.
+                instance_dsl::instance
+                    .on(instance_dsl::id.eq(dsl::instance_id)),
+            )
+            .filter(
+                dsl::id
+                    .nullable()
+                    .ne(instance_dsl::active_propolis_id)
+                    // In SQL, *all* comparisons with NULL are `false`, even `!=
+                    // NULL`, so we have to explicitly check for nulls here, or
+                    // else VMMs whose instances have no `active_propolis_id`
+                    // will not be considered abandoned (incorrectly).
+                    .or(instance_dsl::active_propolis_id.is_null()),
+            )
+            .filter(
+                dsl::id
+                    .nullable()
+                    .ne(instance_dsl::target_propolis_id)
+                    // As above, we must add this clause because SQL nulls have
+                    // the most irritating behavior possible.
+                    .or(instance_dsl::target_propolis_id.is_null()),
+            )
+            .select(Vmm::as_select())
+            .load_async(&*self.pool_connection_authorized(opctx).await?)
+            .await
+            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
+    }
 }
diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml
index cba2edb7e6..d90c240e8e 100644
--- a/nexus/examples/config.toml
+++ b/nexus/examples/config.toml
@@ -117,6 +117,7 @@ region_replacement.period_secs = 30
 instance_watcher.period_secs = 30
 service_firewall_propagation.period_secs = 300
 v2p_mapping_propagation.period_secs = 30
+abandoned_vmm_reaper.period_secs = 60
 
 [default_region_allocation_strategy]
 # allocate region on 3 random distinct zpools, on 3 random distinct sleds.
diff --git a/nexus/src/app/background/abandoned_vmm_reaper.rs b/nexus/src/app/background/abandoned_vmm_reaper.rs
new file mode 100644
index 0000000000..b24c543575
--- /dev/null
+++ b/nexus/src/app/background/abandoned_vmm_reaper.rs
@@ -0,0 +1,467 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Ensures abandoned VMMs are fully destroyed.
+//!
+//! A VMM is considered "abandoned" if (and only if):
+//!
+//! - It is in the `Destroyed` state.
+//! - It is not currently running an instance, and it is also not the
+//!   migration target of any instance (i.e. it is not pointed to by
+//!   any instance record's `active_propolis_id` and `target_propolis_id`
+//!   fields).
+//! - It has not been deleted yet.
+//!
+//! VMMs are abandoned when the instance they are responsible for migrates.
+//! Should the migration succeed, the previously occupied VMM process is now
+//! abandoned. If a migration is attempted but fails, the *target* VMM is now
+//! abandoned, as the instance remains on the source VMM.
+//!
+//! Such VMMs may be deleted fairly simply: any sled resources reserved for the
+//! VMM process can be deallocated, and the VMM record in the database is then
+//! marked as deleted. Note that reaping abandoned VMMs does not require
+//! deallocating virtual provisioning resources, NAT entries, and other such
+//! resources which are owned by the *instance*, rather than the VMM process;
+//! this task is only responsible for cleaning up VMMs left behind by an
+//! instance that has moved to *another* VMM process. The instance itself
+//! remains alive and continues to own its virtual provisioning resources.
+//!
+//! Cleanup of instance resources when an instance's *active* VMM is destroyed
+//! is handled elsewhere, by `notify_instance_updated` and (eventually) the
+//! `instance-update` saga.
+
+use super::common::BackgroundTask;
+use anyhow::Context;
+use futures::future::BoxFuture;
+use futures::FutureExt;
+use nexus_db_model::Vmm;
+use nexus_db_queries::context::OpContext;
+use nexus_db_queries::db::pagination::Paginator;
+use nexus_db_queries::db::DataStore;
+use std::num::NonZeroU32;
+use std::sync::Arc;
+
+/// Background task that searches for abandoned VMM records and deletes them.
+pub struct AbandonedVmmReaper {
+    datastore: Arc<DataStore>,
+}
+
+#[derive(Debug, Default)]
+struct ActivationResults {
+    found: usize,
+    sled_reservations_deleted: usize,
+    vmms_deleted: usize,
+    vmms_already_deleted: usize,
+    error_count: usize,
+}
+
+const MAX_BATCH: NonZeroU32 = unsafe {
+    // Safety: last time I checked, 100 was greater than zero.
+    NonZeroU32::new_unchecked(100)
+};
+
+impl AbandonedVmmReaper {
+    pub fn new(datastore: Arc<DataStore>) -> Self {
+        Self { datastore }
+    }
+
+    /// List abandoned VMMs and clean up all of their database records.
+    async fn reap_all(
+        &mut self,
+        results: &mut ActivationResults,
+        opctx: &OpContext,
+    ) -> Result<(), anyhow::Error> {
+        slog::info!(opctx.log, "Abandoned VMM reaper running");
+
+        let mut paginator = Paginator::new(MAX_BATCH);
+        let mut last_err = Ok(());
+        while let Some(p) = paginator.next() {
+            let vmms = self
+                .datastore
+                .vmm_list_abandoned(opctx, &p.current_pagparams())
+                .await
+                .context("failed to list abandoned VMMs")?;
+            paginator = p.found_batch(&vmms, &|vmm| vmm.id);
+            self.reap_batch(results, &mut last_err, opctx, &vmms).await;
+        }
+
+        last_err
+    }
+
+    /// Clean up a batch of abandoned VMMs.
+    ///
+    /// This is separated out from `reap_all` to facilitate testing situations
+    /// where we race with another Nexus instance to delete an abandoned VMM. In
+    /// order to deterministically simulate such cases, we have to perform the
+    /// query to list abandoned VMMs, ensure that the VMM record is deleted, and
+    /// *then* perform the cleanup with the stale list of abandoned VMMs, rather
+    /// than doing it all in one go. Thus, this is factored out.
+    async fn reap_batch(
+        &mut self,
+        results: &mut ActivationResults,
+        last_err: &mut Result<(), anyhow::Error>,
+        opctx: &OpContext,
+        vmms: &[Vmm],
+    ) {
+        results.found += vmms.len();
+        slog::debug!(opctx.log, "Found abandoned VMMs"; "count" => vmms.len());
+
+        for vmm in vmms {
+            let vmm_id = vmm.id;
+            slog::trace!(opctx.log, "Deleting abandoned VMM"; "vmm" => %vmm_id);
+            // Attempt to remove the abandoned VMM's sled resource reservation.
+            match self.datastore.sled_reservation_delete(opctx, vmm_id).await {
+                Ok(_) => {
+                    slog::trace!(
+                        opctx.log,
+                        "Deleted abandoned VMM's sled reservation";
+                        "vmm" => %vmm_id,
+                    );
+                    results.sled_reservations_deleted += 1;
+                }
+                Err(e) => {
+                    slog::warn!(
+                        opctx.log,
+                        "Failed to delete sled reservation for abandoned VMM";
+                        "vmm" => %vmm_id,
+                        "error" => %e,
+                    );
+                    results.error_count += 1;
+                    *last_err = Err(e).with_context(|| {
+                        format!(
+                            "failed to delete sled reservation for VMM {vmm_id}"
+                        )
+                    });
+                }
+            }
+
+            // Now, attempt to mark the VMM record as deleted.
+            match self.datastore.vmm_mark_deleted(opctx, &vmm_id).await {
+                Ok(true) => {
+                    slog::trace!(
+                        opctx.log,
+                        "Deleted abandoned VMM";
+                        "vmm" => %vmm_id,
+                    );
+                    results.vmms_deleted += 1;
+                }
+                Ok(false) => {
+                    slog::trace!(
+                        opctx.log,
+                        "Abandoned VMM was already deleted";
+                        "vmm" => %vmm_id,
+                    );
+                    results.vmms_already_deleted += 1;
+                }
+                Err(e) => {
+                    slog::warn!(
+                        opctx.log,
+                        "Failed to mark abandoned VMM as deleted";
+                        "vmm" => %vmm_id,
+                        "error" => %e,
+                    );
+                    results.error_count += 1;
+                    *last_err = Err(e).with_context(|| {
+                        format!("failed to mark VMM {vmm_id} as deleted")
+                    });
+                }
+            }
+        }
+    }
+}
+
+impl BackgroundTask for AbandonedVmmReaper {
+    fn activate<'a>(
+        &'a mut self,
+        opctx: &'a OpContext,
+    ) -> BoxFuture<'a, serde_json::Value> {
+        async move {
+            let mut results = ActivationResults::default();
+            let error = match self.reap_all(&mut results, opctx).await {
+                Ok(_) => {
+                    slog::info!(opctx.log, "Abandoned VMMs reaped";
+                        "found" => results.found,
+                        "sled_reservations_deleted" => results.sled_reservations_deleted,
+                        "vmms_deleted" => results.vmms_deleted,
+                        "vmms_already_deleted" => results.vmms_already_deleted,
+                    );
+                    None
+                }
+                Err(err) => {
+                    slog::error!(opctx.log, "Abandoned VMM reaper activation failed";
+                        "error" => %err,
+                        "found" => results.found,
+                        "sled_reservations_deleted" => results.sled_reservations_deleted,
+                        "vmms_deleted" => results.vmms_deleted,
+                        "vmms_already_deleted" => results.vmms_already_deleted,
+                    );
+                    Some(err.to_string())
+                }
+            };
+            serde_json::json!({
+                "found": results.found,
+                "vmms_deleted": results.vmms_deleted,
+                "vmms_already_deleted": results.vmms_already_deleted,
+                "sled_reservations_deleted": results.sled_reservations_deleted,
+                "error_count": results.error_count,
+                "error": error,
+            })
+        }
+        .boxed()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+
+    use super::*;
+    use chrono::Utc;
+    use nexus_db_model::ByteCount;
+    use nexus_db_model::Generation;
+    use nexus_db_model::InstanceState;
+    use nexus_db_model::Resources;
+    use nexus_db_model::SledResource;
+    use nexus_db_model::SledResourceKind;
+    use nexus_db_model::Vmm;
+    use nexus_db_model::VmmRuntimeState;
+    use nexus_test_utils::resource_helpers;
+    use nexus_test_utils_macros::nexus_test;
+    use omicron_common::api::external::InstanceState as ApiInstanceState;
+    use uuid::Uuid;
+
+    type ControlPlaneTestContext =
+        nexus_test_utils::ControlPlaneTestContext<crate::Server>;
+
+    const PROJECT_NAME: &str = "carcosa";
+
+    struct TestFixture {
+        destroyed_vmm_id: Uuid,
+    }
+
+    impl TestFixture {
+        async fn setup(
+            client: &dropshot::test_util::ClientTestContext,
+            datastore: &Arc<DataStore>,
+            opctx: &OpContext,
+        ) -> Self {
+            resource_helpers::create_default_ip_pool(&client).await;
+
+            let _project =
+                resource_helpers::create_project(client, PROJECT_NAME).await;
+            let instance = resource_helpers::create_instance(
+                client,
+                PROJECT_NAME,
+                "cassilda",
+            )
+            .await;
+
+            let destroyed_vmm_id = Uuid::new_v4();
+            datastore
+                .vmm_insert(
+                    &opctx,
+                    dbg!(Vmm {
+                        id: destroyed_vmm_id,
+                        time_created: Utc::now(),
+                        time_deleted: None,
+                        instance_id: instance.identity.id,
+                        sled_id: Uuid::new_v4(),
+                        propolis_ip: "::1".parse().unwrap(),
+                        propolis_port: 12345.into(),
+                        runtime: VmmRuntimeState {
+                            state: InstanceState::new(
+                                ApiInstanceState::Destroyed
+                            ),
+                            time_state_updated: Utc::now(),
+                            gen: Generation::new(),
+                        }
+                    }),
+                )
+                .await
+                .expect("destroyed vmm record should be created successfully");
+            let resources = Resources::new(
+                1,
+                // Just require the bare non-zero amount of RAM.
+                ByteCount::try_from(1024).unwrap(),
+                ByteCount::try_from(1024).unwrap(),
+            );
+            let constraints =
+                nexus_db_model::SledReservationConstraints::none();
+            dbg!(datastore
+                .sled_reservation_create(
+                    &opctx,
+                    destroyed_vmm_id,
+                    SledResourceKind::Instance,
+                    resources.clone(),
+                    constraints,
+                )
+                .await
+                .expect("sled reservation should be created successfully"));
+            Self { destroyed_vmm_id }
+        }
+
+        async fn assert_reaped(&self, datastore: &DataStore) {
+            use async_bb8_diesel::AsyncRunQueryDsl;
+            use diesel::{
+                ExpressionMethods, OptionalExtension, QueryDsl,
+                SelectableHelper,
+            };
+            use nexus_db_queries::db::schema::sled_resource::dsl as sled_resource_dsl;
+            use nexus_db_queries::db::schema::vmm::dsl as vmm_dsl;
+
+            let conn = datastore.pool_connection_for_tests().await.unwrap();
+            let fetched_vmm = vmm_dsl::vmm
+                .filter(vmm_dsl::id.eq(self.destroyed_vmm_id))
+                .filter(vmm_dsl::time_deleted.is_null())
+                .select(Vmm::as_select())
+                .first_async::<Vmm>(&*conn)
+                .await
+                .optional()
+                .expect("VMM query should succeed");
+            assert!(
+                dbg!(fetched_vmm).is_none(),
+                "VMM record should have been deleted"
+            );
+
+            let fetched_sled_resource = sled_resource_dsl::sled_resource
+                .filter(sled_resource_dsl::id.eq(self.destroyed_vmm_id))
+                .select(SledResource::as_select())
+                .first_async::<SledResource>(&*conn)
+                .await
+                .optional()
+                .expect("sled resource query should succeed");
+            assert!(
+                dbg!(fetched_sled_resource).is_none(),
+                "sled resource record should have been deleted"
+            );
+        }
+    }
+
+    #[nexus_test(server = crate::Server)]
+    async fn test_abandoned_vmms_are_reaped(
+        cptestctx: &ControlPlaneTestContext,
+    ) {
+        let nexus = &cptestctx.server.server_context().nexus;
+        let datastore = nexus.datastore();
+        let opctx = OpContext::for_tests(
+            cptestctx.logctx.log.clone(),
+            datastore.clone(),
+        );
+        let fixture =
+            TestFixture::setup(&cptestctx.external_client, datastore, &opctx)
+                .await;
+
+        let mut task = AbandonedVmmReaper::new(datastore.clone());
+
+        let mut results = ActivationResults::default();
+        dbg!(task.reap_all(&mut results, &opctx,).await)
+            .expect("activation completes successfully");
+        dbg!(&results);
+
+        assert_eq!(results.vmms_deleted, 1);
+        assert_eq!(results.sled_reservations_deleted, 1);
+        assert_eq!(results.vmms_already_deleted, 0);
+        assert_eq!(results.error_count, 0);
+        fixture.assert_reaped(datastore).await;
+    }
+
+    #[nexus_test(server = crate::Server)]
+    async fn vmm_already_deleted(cptestctx: &ControlPlaneTestContext) {
+        let nexus = &cptestctx.server.server_context().nexus;
+        let datastore = nexus.datastore();
+        let opctx = OpContext::for_tests(
+            cptestctx.logctx.log.clone(),
+            datastore.clone(),
+        );
+        let fixture =
+            TestFixture::setup(&cptestctx.external_client, datastore, &opctx)
+                .await;
+
+        // For this test, we separate the database query run by the background
+        // task to list abandoned VMMs from the actual cleanup of those VMMs, in
+        // order to simulate a condition where the VMM record was deleted
+        // between when the listing query was run and when the bg task attempted
+        // to delete the VMM record.
+        let paginator = Paginator::new(MAX_BATCH);
+        let p = paginator.next().unwrap();
+        let abandoned_vmms = datastore
+            .vmm_list_abandoned(&opctx, &p.current_pagparams())
+            .await
+            .expect("must list abandoned vmms");
+
+        assert!(!abandoned_vmms.is_empty());
+
+        datastore
+            .vmm_mark_deleted(&opctx, &fixture.destroyed_vmm_id)
+            .await
+            .expect("simulate another nexus marking the VMM deleted");
+
+        let mut results = ActivationResults::default();
+        let mut last_err = Ok(());
+        let mut task = AbandonedVmmReaper::new(datastore.clone());
+        task.reap_batch(&mut results, &mut last_err, &opctx, &abandoned_vmms)
+            .await;
+        dbg!(last_err).expect("should not have errored");
+        dbg!(&results);
+
+        assert_eq!(results.found, 1);
+        assert_eq!(results.vmms_deleted, 0);
+        assert_eq!(results.sled_reservations_deleted, 1);
+        assert_eq!(results.vmms_already_deleted, 1);
+        assert_eq!(results.error_count, 0);
+
+        fixture.assert_reaped(datastore).await
+    }
+
+    #[nexus_test(server = crate::Server)]
+    async fn sled_resource_already_deleted(
+        cptestctx: &ControlPlaneTestContext,
+    ) {
+        let nexus = &cptestctx.server.server_context().nexus;
+        let datastore = nexus.datastore();
+        let opctx = OpContext::for_tests(
+            cptestctx.logctx.log.clone(),
+            datastore.clone(),
+        );
+        let fixture =
+            TestFixture::setup(&cptestctx.external_client, datastore, &opctx)
+                .await;
+
+        // For this test, we separate the database query run by the background
+        // task to list abandoned VMMs from the actual cleanup of those VMMs, in
+        // order to simulate a condition where the sled reservation record was
+        // deleted between when the listing query was run and when the bg task
+        // attempted to delete the sled reservation..
+        let paginator = Paginator::new(MAX_BATCH);
+        let p = paginator.next().unwrap();
+        let abandoned_vmms = datastore
+            .vmm_list_abandoned(&opctx, &p.current_pagparams())
+            .await
+            .expect("must list abandoned vmms");
+
+        assert!(!abandoned_vmms.is_empty());
+
+        datastore
+            .sled_reservation_delete(&opctx, fixture.destroyed_vmm_id)
+            .await
+            .expect(
+                "simulate another nexus marking the sled reservation deleted",
+            );
+
+        let mut results = ActivationResults::default();
+        let mut last_err = Ok(());
+        let mut task = AbandonedVmmReaper::new(datastore.clone());
+        task.reap_batch(&mut results, &mut last_err, &opctx, &abandoned_vmms)
+            .await;
+        dbg!(last_err).expect("should not have errored");
+        dbg!(&results);
+
+        assert_eq!(results.found, 1);
+        assert_eq!(results.vmms_deleted, 1);
+        assert_eq!(results.sled_reservations_deleted, 1);
+        assert_eq!(results.vmms_already_deleted, 0);
+        assert_eq!(results.error_count, 0);
+
+        fixture.assert_reaped(datastore).await
+    }
+}
diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs
index f7b7291c59..a87c53860d 100644
--- a/nexus/src/app/background/init.rs
+++ b/nexus/src/app/background/init.rs
@@ -4,6 +4,7 @@
 
 //! Background task initialization
 
+use super::abandoned_vmm_reaper;
 use super::bfd;
 use super::blueprint_execution;
 use super::blueprint_load;
@@ -104,6 +105,10 @@ pub struct BackgroundTasks {
     /// task handle for propagation of VPC firewall rules for Omicron services
     /// with external network connectivity,
     pub task_service_firewall_propagation: common::TaskHandle,
+
+    /// task handle for deletion of database records for VMMs abandoned by their
+    /// instances.
+    pub task_abandoned_vmm_reaper: common::TaskHandle,
 }
 
 impl BackgroundTasks {
@@ -397,12 +402,26 @@ impl BackgroundTasks {
             ),
             config.service_firewall_propagation.period_secs,
             Box::new(service_firewall_rules::ServiceRulePropagator::new(
-                datastore,
+                datastore.clone(),
             )),
             opctx.child(BTreeMap::new()),
             vec![],
         );
 
+        // Background task: abandoned VMM reaping
+        let task_abandoned_vmm_reaper = driver.register(
+        String::from("abandoned_vmm_reaper"),
+        String::from(
+            "deletes sled reservations for VMMs that have been abandoned by their instances",
+        ),
+        config.abandoned_vmm_reaper.period_secs,
+        Box::new(abandoned_vmm_reaper::AbandonedVmmReaper::new(
+            datastore,
+        )),
+        opctx.child(BTreeMap::new()),
+        vec![],
+    );
+
         BackgroundTasks {
             driver,
             task_internal_dns_config,
@@ -425,6 +444,7 @@ impl BackgroundTasks {
             task_region_replacement,
             task_instance_watcher,
             task_service_firewall_propagation,
+            task_abandoned_vmm_reaper,
         }
     }
 
diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs
index 38bde3c048..6de9e6f4d3 100644
--- a/nexus/src/app/background/mod.rs
+++ b/nexus/src/app/background/mod.rs
@@ -4,6 +4,7 @@
 
 //! Background tasks
 
+mod abandoned_vmm_reaper;
 mod bfd;
 mod blueprint_execution;
 mod blueprint_load;
diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml
index 49a61cfa36..861d78e20c 100644
--- a/nexus/tests/config.test.toml
+++ b/nexus/tests/config.test.toml
@@ -112,6 +112,7 @@ region_replacement.period_secs = 30
 instance_watcher.period_secs = 30
 service_firewall_propagation.period_secs = 300
 v2p_mapping_propagation.period_secs = 30
+abandoned_vmm_reaper.period_secs = 60
 
 [default_region_allocation_strategy]
 # we only have one sled in the test environment, so we need to use the
diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml
index 0ed7a0562b..3827cbb38c 100644
--- a/smf/nexus/multi-sled/config-partial.toml
+++ b/smf/nexus/multi-sled/config-partial.toml
@@ -58,6 +58,7 @@ region_replacement.period_secs = 30
 service_firewall_propagation.period_secs = 300
 v2p_mapping_propagation.period_secs = 30
 instance_watcher.period_secs = 30
+abandoned_vmm_reaper.period_secs = 60
 
 [default_region_allocation_strategy]
 # by default, allocate across 3 distinct sleds
diff --git a/smf/nexus/single-sled/config-partial.toml b/smf/nexus/single-sled/config-partial.toml
index c57d2d3ba2..ee04f88e59 100644
--- a/smf/nexus/single-sled/config-partial.toml
+++ b/smf/nexus/single-sled/config-partial.toml
@@ -58,6 +58,7 @@ region_replacement.period_secs = 30
 service_firewall_propagation.period_secs = 300
 v2p_mapping_propagation.period_secs = 30
 instance_watcher.period_secs = 30
+abandoned_vmm_reaper.period_secs = 60
 
 [default_region_allocation_strategy]
 # by default, allocate without requirement for distinct sleds.

From 4436dc12e0d8f8968761434d351b59f0019611e1 Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@users.noreply.github.com>
Date: Fri, 24 May 2024 16:07:56 -0400
Subject: [PATCH 05/28] Automatic bump of permslip manifest to psc-v1.0.18
 (#5821)

Automated bump
---
 tools/permslip_staging | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/permslip_staging b/tools/permslip_staging
index 20a362ade0..d6ca94b280 100644
--- a/tools/permslip_staging
+++ b/tools/permslip_staging
@@ -1,4 +1,4 @@
 03df89d44ad8b653abbeb7fbb83821869f008733e9da946457e72a13cb11d6cc manifest-gimlet-v1.0.19.toml
 b973cc9feb20f7bba447e7f5291c4070387fa9992deab81301f67f0a3844cd0c manifest-oxide-rot-1-v1.0.11.toml
-aae829e02d79ec0fe19019c783b6426c6fcc1fe4427aea70b65afc2884f53db8 manifest-psc-v1.0.17.toml
+f5118f97d92cd0d56566114ca50ed149b7d5e71c452cabb33367651d86975471 manifest-psc-v1.0.18.toml
 ae00003c288ec4f520167c68de4999e1dfa15b63afe2f89e5ed1cfb8d707ebb9 manifest-sidecar-v1.0.19.toml

From ec69e001ef833c61060d63d80bea25256c91dea8 Mon Sep 17 00:00:00 2001
From: iliana etaoin <iliana@oxide.computer>
Date: Fri, 24 May 2024 18:10:44 -0700
Subject: [PATCH 06/28] manage cockroachdb cluster version with blueprints
 (#5603)

To upgrade CockroachDB, we'll need to manage the
`cluster.preserve_downgrade_option` cluster setting to give ourselves
the opportunity to roll back an upgrade. I was initially planning to
manage this with database migrations, but `SET CLUSTER SETTING` cannot
be run as part of a multi-statement transaction.

In the limit, the Reconfigurator will need to do this anyway as it
performs rolling upgrades of CockroachDB nodes, so we may as well teach
it to manage cluster settings today.
---
 dev-tools/omdb/tests/successes.out            |  12 +
 docs/crdb-upgrades.adoc                       | 115 +++++++++
 nexus/db-model/src/deployment.rs              |  13 +
 nexus/db-model/src/schema.rs                  |   3 +
 nexus/db-model/src/schema_versions.rs         |   3 +-
 .../src/db/datastore/cockroachdb_settings.rs  | 229 +++++++++++++++++
 .../db-queries/src/db/datastore/deployment.rs |  17 ++
 nexus/db-queries/src/db/datastore/mod.rs      |   1 +
 nexus/db-queries/src/db/datastore/rack.rs     |  16 ++
 .../execution/src/cockroachdb.rs              | 113 +++++++++
 nexus/reconfigurator/execution/src/dns.rs     |  14 +-
 nexus/reconfigurator/execution/src/lib.rs     |   8 +
 .../execution/src/omicron_physical_disks.rs   |   5 +-
 .../execution/src/omicron_zones.rs            |   6 +-
 .../planning/src/blueprint_builder/builder.rs |  21 ++
 nexus/reconfigurator/planning/src/planner.rs  | 224 +++++++++++++++++
 nexus/reconfigurator/planning/src/system.rs   |   9 +
 .../output/blueprint_builder_initial_diff.txt |   4 +
 .../output/planner_basic_add_sled_2_3.txt     |   4 +
 .../output/planner_basic_add_sled_3_5.txt     |   4 +
 .../planner_decommissions_sleds_1_2.txt       |   4 +
 .../planner_decommissions_sleds_bp2.txt       |   4 +
 .../output/planner_nonprovisionable_1_2.txt   |   4 +
 .../output/planner_nonprovisionable_2_2a.txt  |   4 +
 .../output/planner_nonprovisionable_bp2.txt   |   4 +
 nexus/reconfigurator/preparation/src/lib.rs   |  13 +
 .../src/app/background/blueprint_execution.rs |   5 +-
 nexus/src/app/background/blueprint_load.rs    |   7 +-
 nexus/src/app/deployment.rs                   |   8 +
 nexus/src/app/rack.rs                         |  29 +++
 nexus/test-utils/src/lib.rs                   |   4 +
 nexus/types/src/deployment.rs                 |  45 +++-
 nexus/types/src/deployment/blueprint_diff.rs  | 153 +++++++-----
 .../types/src/deployment/blueprint_display.rs |   5 +
 nexus/types/src/deployment/planning_input.rs  | 235 ++++++++++++++++++
 openapi/nexus-internal.json                   |  89 +++++++
 .../blueprint-crdb-preserve-downgrade/up1.sql |   3 +
 .../blueprint-crdb-preserve-downgrade/up2.sql |   2 +
 schema/crdb/dbinit.sql                        |  17 +-
 sled-agent/src/rack_setup/service.rs          |   7 +-
 40 files changed, 1388 insertions(+), 75 deletions(-)
 create mode 100644 docs/crdb-upgrades.adoc
 create mode 100644 nexus/db-queries/src/db/datastore/cockroachdb_settings.rs
 create mode 100644 nexus/reconfigurator/execution/src/cockroachdb.rs
 create mode 100644 schema/crdb/blueprint-crdb-preserve-downgrade/up1.sql
 create mode 100644 schema/crdb/blueprint-crdb-preserve-downgrade/up2.sql

diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out
index 0f588069e4..07ebeb10bf 100644
--- a/dev-tools/omdb/tests/successes.out
+++ b/dev-tools/omdb/tests/successes.out
@@ -542,6 +542,10 @@ WARNING: Zones exist without physical disks!
 
 
 
+ COCKROACHDB SETTINGS:
+    state fingerprint:::::::::::::::::   d4d87aa2ad877a4cc2fddd0573952362739110de
+    cluster.preserve_downgrade_option:   "22.1"
+
  METADATA:
     created by:::::::::::   nexus-test-utils
     created at:::::::::::   <REDACTED     TIMESTAMP>
@@ -576,6 +580,10 @@ WARNING: Zones exist without physical disks!
 
 
 
+ COCKROACHDB SETTINGS:
+    state fingerprint:::::::::::::::::   d4d87aa2ad877a4cc2fddd0573952362739110de
+    cluster.preserve_downgrade_option:   "22.1"
+
  METADATA:
     created by:::::::::::   nexus-test-utils
     created at:::::::::::   <REDACTED     TIMESTAMP>
@@ -613,6 +621,10 @@ to:   blueprint ......<REDACTED_BLUEPRINT_ID>.......
     nexus             ..........<REDACTED_UUID>...........   in service    ::ffff:127.0.0.1
 
 
+ COCKROACHDB SETTINGS:
+    state fingerprint:::::::::::::::::   d4d87aa2ad877a4cc2fddd0573952362739110de (unchanged)
+    cluster.preserve_downgrade_option:   "22.1" (unchanged)
+
  METADATA:
     internal DNS version:   1 (unchanged)
     external DNS version:   2 (unchanged)
diff --git a/docs/crdb-upgrades.adoc b/docs/crdb-upgrades.adoc
new file mode 100644
index 0000000000..eecfa9194e
--- /dev/null
+++ b/docs/crdb-upgrades.adoc
@@ -0,0 +1,115 @@
+:showtitle:
+:numbered:
+:toc: left
+
+= So You Want To Upgrade CockroachDB
+
+CockroachDB has a number of overlapping things called "versions":
+
+1. The `cockroachdb` executable is built from a particular version, such
+   as v22.2.19. We'll call this the *executable version*.
+2. The executable version is made up of three components: a number
+   representing the release year, a number representing which release
+   it was within that year, and a patch release number. The first two
+   components constitute the *major version* (such as v22.2).
+3. There is also a version for the on-disk data format that CockroachDB
+   writes and manages. This is called the *cluster version*. When
+   you create a new cluster while running major version v22.2, it
+   is initialized at cluster version `22.2`. Each major version of
+   CockroachDB can operate on both its own associated cluster version,
+   and the previous major version's cluster version, to facilitate
+   rolling upgrades.
+
+By default the cluster version is upgraded and _finalized_ once
+all nodes in the cluster have upgraded to a new major version
+(the CockroachDB docs refer to this as "auto-finalization").
+<<crdb-tn-upgrades>> However, it is not possible to downgrade the
+cluster version. To mitigate the risk of one-way upgrades, we use a
+CockroachDB cluster setting named `cluster.preserve_downgrade_option`
+to prevent auto-finalization and... preserve our option to downgrade in
+a future release, as the option name would suggest. We then perform an
+upgrade to the next major version across at least two releases, which we
+refer to as a tick-tock cycle:
+
+- In a *tick release*, we upgrade the executable versions across the
+  cluster.
+- In a *tock release*, we release our downgrade option and allow
+  CockroachDB to perform the cluster upgrade automatically. When the
+  upgrade is complete, we configure the "preserve downgrade option"
+  setting again to prepare for the next tick release.
+
+(This is not strictly speaking a "tick-tock" cycle, because any number
+of releases may occur between a tick and a tock, and between a tock and
+a tick, but they must occur in that order.)
+
+== Process for a tick release
+
+. Determine whether to move to the next major release of CockroachDB.
+  We have generally avoided being early adopters of new major releases
+  and prefer to see the rate of https://www.cockroachlabs.com/docs/advisories/[technical
+  advisories] that solely affect the new major version drop off. (This
+  generally won't stop you from working on building and testing the
+  next major version, however, as the build process sometimes changes
+  significantly from release to release.)
+. Build a new version of CockroachDB for illumos. You will want to
+  update the https://github.com/oxidecomputer/garbage-compactor/tree/master/cockroach[build
+  scripts in garbage-compactor].
+. In your local Omicron checkout on a Helios machine, unpack the
+  resulting tarball to `out/cockroachdb`, and update `tools/cockroachdb_version`
+  to the version you've built.
+. Add an enum variant for the new version to `CockroachDbClusterVersion`
+  in `nexus/types/src/deployment/planning_input.rs`, and change the
+  associated constant `NEWLY_INITIALIZED` to that value.
+. Run the test suite, which should catch any unexpected SQL
+  compatibility issues between releases and help validate that your
+  build works.
+  * You will need to run the `test_omdb_success_cases` test from
+    omicron-omdb with `EXPECTORATE=overwrite`; this file contains the
+    expected output of various omdb commands, including a fingerprint of
+    CockroachDB's cluster state.
+. Submit a PR for your changes to garbage-compactor; when merged,
+  publish the final build to the `oxide-cockroachdb-build` S3 bucket.
+. Update `tools/cockroachdb_checksums`. For non-illumos checksums, use
+  the https://www.cockroachlabs.com/docs/releases/[official releases]
+  matching the version you built.
+. Submit a PR with your changes (including `tools/cockroachdb_version`
+  and `tools/cockroachdb_checksums`) to Omicron.
+
+== Process for a tock release
+
+. Change the associated constant `CockroachDbClusterVersion::POLICY` in
+  `nexus/types/src/deployment/planning_input.rs` from the previous major
+  version to the current major version.
+
+== What Nexus does
+
+The Reconfigurator collects the current cluster version, and compares
+this to the desired cluster version set by policy (which we update in
+tock releases).
+
+If they do not match, CockroachDB ensures the
+`cluster.preserve_downgrade_option` setting is the default value (an
+empty string), which allows CockroachDB to perform the upgrade to the
+desired version. The end result of this upgrade is that the current and
+desired cluster versions will match.
+
+When they match, Nexus ensures that the
+`cluster.preserve_downgrade_option` setting is set to the current
+cluster version, to prevent automatic cluster upgrades when CockroachDB
+is next upgraded to a new major version.
+
+Because blueprints are serialized and continue to run even if the
+underlying state has changed, Nexus needs to ensure its view of the
+world is not out-of-date. Nexus saves a fingerprint of the current
+cluster state in the blueprint (intended to be opaque, but ultimately
+a hash of the cluster version and executable version of the node we're
+currently connected to). When setting CockroachDB options, it verifies
+this fingerprint in a way that causes an error instead of setting the
+option.
+
+[bibliography]
+== External References
+
+- [[[crdb-tn-upgrades]]] Cockroach Labs. Cluster versions and upgrades.
+  November 2023.
+  https://github.com/cockroachdb/cockroach/blob/53262957399e6e0fccd63c91add57a510b86dc9a/docs/tech-notes/version_upgrades.md
diff --git a/nexus/db-model/src/deployment.rs b/nexus/db-model/src/deployment.rs
index 0b766f9e9b..e6a66543c7 100644
--- a/nexus/db-model/src/deployment.rs
+++ b/nexus/db-model/src/deployment.rs
@@ -25,6 +25,7 @@ use nexus_types::deployment::BlueprintTarget;
 use nexus_types::deployment::BlueprintZoneConfig;
 use nexus_types::deployment::BlueprintZoneDisposition;
 use nexus_types::deployment::BlueprintZonesConfig;
+use nexus_types::deployment::CockroachDbPreserveDowngrade;
 use omicron_common::api::internal::shared::NetworkInterface;
 use omicron_common::disk::DiskIdentity;
 use omicron_uuid_kinds::GenericUuid;
@@ -41,6 +42,8 @@ pub struct Blueprint {
     pub parent_blueprint_id: Option<Uuid>,
     pub internal_dns_version: Generation,
     pub external_dns_version: Generation,
+    pub cockroachdb_fingerprint: String,
+    pub cockroachdb_setting_preserve_downgrade: Option<String>,
     pub time_created: DateTime<Utc>,
     pub creator: String,
     pub comment: String,
@@ -53,6 +56,10 @@ impl From<&'_ nexus_types::deployment::Blueprint> for Blueprint {
             parent_blueprint_id: bp.parent_blueprint_id,
             internal_dns_version: Generation(bp.internal_dns_version),
             external_dns_version: Generation(bp.external_dns_version),
+            cockroachdb_fingerprint: bp.cockroachdb_fingerprint.clone(),
+            cockroachdb_setting_preserve_downgrade: bp
+                .cockroachdb_setting_preserve_downgrade
+                .to_optional_string(),
             time_created: bp.time_created,
             creator: bp.creator.clone(),
             comment: bp.comment.clone(),
@@ -67,6 +74,12 @@ impl From<Blueprint> for nexus_types::deployment::BlueprintMetadata {
             parent_blueprint_id: value.parent_blueprint_id,
             internal_dns_version: *value.internal_dns_version,
             external_dns_version: *value.external_dns_version,
+            cockroachdb_fingerprint: value.cockroachdb_fingerprint,
+            cockroachdb_setting_preserve_downgrade:
+                CockroachDbPreserveDowngrade::from_optional_string(
+                    &value.cockroachdb_setting_preserve_downgrade,
+                )
+                .ok(),
             time_created: value.time_created,
             creator: value.creator,
             comment: value.comment,
diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs
index deeca970c7..94e699443c 100644
--- a/nexus/db-model/src/schema.rs
+++ b/nexus/db-model/src/schema.rs
@@ -1504,6 +1504,9 @@ table! {
 
         internal_dns_version -> Int8,
         external_dns_version -> Int8,
+        cockroachdb_fingerprint -> Text,
+
+        cockroachdb_setting_preserve_downgrade -> Nullable<Text>,
     }
 }
 
diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs
index 5ceaf3167a..75e1d7e440 100644
--- a/nexus/db-model/src/schema_versions.rs
+++ b/nexus/db-model/src/schema_versions.rs
@@ -17,7 +17,7 @@ use std::collections::BTreeMap;
 ///
 /// This must be updated when you change the database schema.  Refer to
 /// schema/crdb/README.adoc in the root of this repository for details.
-pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(65, 0, 0);
+pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(66, 0, 0);
 
 /// List of all past database schema versions, in *reverse* order
 ///
@@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy<Vec<KnownVersion>> = Lazy::new(|| {
         // |  leaving the first copy as an example for the next person.
         // v
         // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"),
+        KnownVersion::new(66, "blueprint-crdb-preserve-downgrade"),
         KnownVersion::new(65, "region-replacement"),
         KnownVersion::new(64, "add-view-for-v2p-mappings"),
         KnownVersion::new(63, "remove-producer-base-route-column"),
diff --git a/nexus/db-queries/src/db/datastore/cockroachdb_settings.rs b/nexus/db-queries/src/db/datastore/cockroachdb_settings.rs
new file mode 100644
index 0000000000..177cf673e7
--- /dev/null
+++ b/nexus/db-queries/src/db/datastore/cockroachdb_settings.rs
@@ -0,0 +1,229 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Datastore methods involving CockroachDB settings, which are managed by the
+//! Reconfigurator.
+
+use super::DataStore;
+use crate::context::OpContext;
+use crate::db::error::public_error_from_diesel;
+use crate::db::error::ErrorHandler;
+use crate::db::raw_query_builder::QueryBuilder;
+use async_bb8_diesel::AsyncRunQueryDsl;
+use diesel::deserialize::Queryable;
+use diesel::sql_types;
+use nexus_types::deployment::CockroachDbSettings;
+use omicron_common::api::external::Error;
+use omicron_common::api::external::LookupResult;
+
+/// This bit of SQL calculates a "state fingerprint" for the CockroachDB
+/// cluster. `DataStore::cockroachdb_settings` calculates the fingerprint and
+/// returns it to the caller. `DataStore::cockroach_setting_set_*` requires the
+/// caller send the fingerprint, and it verifies it against the current state of
+/// the cluster.
+///
+/// This is done to help prevent TOCTOU-class bugs that arise from blueprint
+/// planning taking place before blueprint execution. Here are the ones we're
+/// aware of, which guide the contents of this fingerprint:
+///
+/// - If the cluster version has changed, we are probably in the middle of
+///   an upgrade. We should not be setting any settings and should re-plan.
+///   (`crdb_internal.active_version()`)
+/// - If the major version of CockroachDB has changed, we should not trust
+///   the blueprint's value for the `cluster.preserve_downgrade_option`
+///   setting; if set to an empty string and we've just upgraded the software
+///   to the next major version, this will result in unwanted finalization.
+///   (`crdb_internal.node_executable_version()`)
+///
+/// Because these are run as part of a gadget that allows CockroachDB to verify
+/// the fingerprint during a `SET CLUSTER SETTING` statement, which cannot
+/// be run as part of a multi-transaction statement or CTE, we are limited to
+/// values that can be returned from built-in functions and operators.
+///
+/// This fingerprint should return a STRING value. It is safe to modify how this
+/// fingerprint is calculated between Nexus releases; the stale fingerprint in
+/// the previous blueprint will be rejected.
+const STATE_FINGERPRINT_SQL: &str = r#"
+    encode(digest(
+        crdb_internal.active_version()
+        || crdb_internal.node_executable_version()
+    , 'sha1'), 'hex')
+"#;
+
+impl DataStore {
+    /// Get the current CockroachDB settings.
+    pub async fn cockroachdb_settings(
+        &self,
+        opctx: &OpContext,
+    ) -> LookupResult<CockroachDbSettings> {
+        #[derive(Debug, Queryable)]
+        struct QueryOutput {
+            state_fingerprint: String,
+            version: String,
+            preserve_downgrade: String,
+        }
+        type QueryRow = (sql_types::Text, sql_types::Text, sql_types::Text);
+
+        let conn = self.pool_connection_authorized(opctx).await?;
+        let output: QueryOutput = QueryBuilder::new()
+            .sql("SELECT ")
+            .sql(STATE_FINGERPRINT_SQL)
+            .sql(", * FROM ")
+            .sql("[SHOW CLUSTER SETTING version], ")
+            .sql("[SHOW CLUSTER SETTING cluster.preserve_downgrade_option]")
+            .query::<QueryRow>()
+            .get_result_async(&*conn)
+            .await
+            .map_err(|err| {
+                public_error_from_diesel(err, ErrorHandler::Server)
+            })?;
+        Ok(CockroachDbSettings {
+            state_fingerprint: output.state_fingerprint,
+            version: output.version,
+            preserve_downgrade: output.preserve_downgrade,
+        })
+    }
+
+    /// Set a CockroachDB setting with a `String` value.
+    ///
+    /// This cannot be run in a multi-statement transaction.
+    pub async fn cockroachdb_setting_set_string(
+        &self,
+        opctx: &OpContext,
+        state_fingerprint: String,
+        setting: &'static str,
+        value: String,
+    ) -> Result<(), Error> {
+        let conn = self.pool_connection_authorized(opctx).await?;
+        QueryBuilder::new()
+            .sql("SET CLUSTER SETTING ")
+            .sql(setting)
+            // `CASE` is the one conditional statement we get out of the
+            // CockroachDB grammar for `SET CLUSTER SETTING`.
+            .sql(" = CASE ")
+            .sql(STATE_FINGERPRINT_SQL)
+            .sql(" = ")
+            .param()
+            .sql(" WHEN TRUE THEN ")
+            .param()
+            // This is the gadget that allows us to reject changing a setting
+            // if the fingerprint doesn't match. CockroachDB settings are typed,
+            // but none of them are nullable, and NULL cannot be coerced into
+            // any of them, so this branch returns an error if it's hit (tested
+            // below in `test_state_fingerprint`).
+            .sql(" ELSE NULL END")
+            .bind::<sql_types::Text, _>(state_fingerprint)
+            .bind::<sql_types::Text, _>(value.clone())
+            .query::<()>()
+            .execute_async(&*conn)
+            .await
+            .map_err(|err| {
+                public_error_from_diesel(err, ErrorHandler::Server)
+            })?;
+        info!(
+            opctx.log,
+            "set cockroachdb setting";
+            "setting" => setting,
+            "value" => &value,
+        );
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::{CockroachDbSettings, OpContext};
+    use nexus_test_utils::db::test_setup_database;
+    use nexus_types::deployment::CockroachDbClusterVersion;
+    use omicron_common::api::external::Error;
+    use omicron_test_utils::dev;
+    use std::sync::Arc;
+
+    #[tokio::test]
+    async fn test_preserve_downgrade() {
+        let logctx = dev::test_setup_log("test_preserve_downgrade");
+        let mut db = test_setup_database(&logctx.log).await;
+        let (_, datastore) =
+            crate::db::datastore::test_utils::datastore_test(&logctx, &db)
+                .await;
+        let opctx =
+            OpContext::for_tests(logctx.log.new(o!()), Arc::clone(&datastore));
+
+        let settings = datastore.cockroachdb_settings(&opctx).await.unwrap();
+        // With a fresh cluster, this is the expected state
+        let version = CockroachDbClusterVersion::NEWLY_INITIALIZED.to_string();
+        assert_eq!(settings.version, version);
+        assert_eq!(settings.preserve_downgrade, "");
+
+        // Verify that if a fingerprint is wrong, we get the expected SQL error
+        // back.
+        let Err(Error::InternalError { internal_message }) = datastore
+            .cockroachdb_setting_set_string(
+                &opctx,
+                String::new(),
+                "cluster.preserve_downgrade_option",
+                version.clone(),
+            )
+            .await
+        else {
+            panic!("should have returned an internal error");
+        };
+        assert_eq!(
+            internal_message,
+            "unexpected database error: \
+            cannot use unknown tree.dNull value for string setting"
+        );
+        // And ensure that the state didn't change.
+        assert_eq!(
+            settings,
+            datastore.cockroachdb_settings(&opctx).await.unwrap()
+        );
+
+        // Test setting it (twice, to verify doing it again doesn't trigger
+        // an error)
+        for _ in 0..2 {
+            datastore
+                .cockroachdb_setting_set_string(
+                    &opctx,
+                    settings.state_fingerprint.clone(),
+                    "cluster.preserve_downgrade_option",
+                    version.clone(),
+                )
+                .await
+                .unwrap();
+            assert_eq!(
+                datastore.cockroachdb_settings(&opctx).await.unwrap(),
+                CockroachDbSettings {
+                    state_fingerprint: settings.state_fingerprint.clone(),
+                    version: version.clone(),
+                    preserve_downgrade: version.clone(),
+                }
+            );
+        }
+
+        // Test resetting it (twice, same reason)
+        for _ in 0..2 {
+            datastore
+                .cockroachdb_setting_set_string(
+                    &opctx,
+                    settings.state_fingerprint.clone(),
+                    "cluster.preserve_downgrade_option",
+                    String::new(),
+                )
+                .await
+                .unwrap();
+            assert_eq!(
+                datastore.cockroachdb_settings(&opctx).await.unwrap(),
+                CockroachDbSettings {
+                    state_fingerprint: settings.state_fingerprint.clone(),
+                    version: version.clone(),
+                    preserve_downgrade: String::new(),
+                }
+            );
+        }
+
+        db.cleanup().await.unwrap();
+        logctx.cleanup_successful();
+    }
+}
diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs
index 003b64fd78..790dc0d72c 100644
--- a/nexus/db-queries/src/db/datastore/deployment.rs
+++ b/nexus/db-queries/src/db/datastore/deployment.rs
@@ -47,6 +47,7 @@ use nexus_types::deployment::BlueprintMetadata;
 use nexus_types::deployment::BlueprintPhysicalDisksConfig;
 use nexus_types::deployment::BlueprintTarget;
 use nexus_types::deployment::BlueprintZonesConfig;
+use nexus_types::deployment::CockroachDbPreserveDowngrade;
 use nexus_types::external_api::views::SledState;
 use omicron_common::api::external::DataPageParams;
 use omicron_common::api::external::Error;
@@ -283,6 +284,8 @@ impl DataStore {
             parent_blueprint_id,
             internal_dns_version,
             external_dns_version,
+            cockroachdb_fingerprint,
+            cockroachdb_setting_preserve_downgrade,
             time_created,
             creator,
             comment,
@@ -306,11 +309,23 @@ impl DataStore {
                 blueprint.parent_blueprint_id,
                 *blueprint.internal_dns_version,
                 *blueprint.external_dns_version,
+                blueprint.cockroachdb_fingerprint,
+                blueprint.cockroachdb_setting_preserve_downgrade,
                 blueprint.time_created,
                 blueprint.creator,
                 blueprint.comment,
             )
         };
+        let cockroachdb_setting_preserve_downgrade =
+            CockroachDbPreserveDowngrade::from_optional_string(
+                &cockroachdb_setting_preserve_downgrade,
+            )
+            .map_err(|_| {
+                Error::internal_error(&format!(
+                    "unrecognized cluster version {:?}",
+                    cockroachdb_setting_preserve_downgrade
+                ))
+            })?;
 
         // Load the sled states for this blueprint.
         let sled_state: BTreeMap<SledUuid, SledState> = {
@@ -611,6 +626,8 @@ impl DataStore {
             parent_blueprint_id,
             internal_dns_version,
             external_dns_version,
+            cockroachdb_fingerprint,
+            cockroachdb_setting_preserve_downgrade,
             time_created,
             creator,
             comment,
diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs
index a69e91dff4..b5cb749162 100644
--- a/nexus/db-queries/src/db/datastore/mod.rs
+++ b/nexus/db-queries/src/db/datastore/mod.rs
@@ -53,6 +53,7 @@ mod bfd;
 mod bgp;
 mod bootstore;
 mod certificate;
+mod cockroachdb_settings;
 mod console_session;
 mod dataset;
 mod db_metadata;
diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs
index b8275b56d4..d836185d87 100644
--- a/nexus/db-queries/src/db/datastore/rack.rs
+++ b/nexus/db-queries/src/db/datastore/rack.rs
@@ -1013,6 +1013,7 @@ mod test {
     };
     use nexus_test_utils::db::test_setup_database;
     use nexus_types::deployment::BlueprintZonesConfig;
+    use nexus_types::deployment::CockroachDbPreserveDowngrade;
     use nexus_types::deployment::{
         BlueprintZoneConfig, OmicronZoneExternalFloatingAddr,
         OmicronZoneExternalFloatingIp,
@@ -1056,9 +1057,12 @@ mod test {
                     blueprint_zones: BTreeMap::new(),
                     blueprint_disks: BTreeMap::new(),
                     sled_state: BTreeMap::new(),
+                    cockroachdb_setting_preserve_downgrade:
+                        CockroachDbPreserveDowngrade::DoNotModify,
                     parent_blueprint_id: None,
                     internal_dns_version: *Generation::new(),
                     external_dns_version: *Generation::new(),
+                    cockroachdb_fingerprint: String::new(),
                     time_created: Utc::now(),
                     creator: "test suite".to_string(),
                     comment: "test suite".to_string(),
@@ -1525,9 +1529,12 @@ mod test {
             sled_state: sled_states_active(blueprint_zones.keys().copied()),
             blueprint_zones,
             blueprint_disks: BTreeMap::new(),
+            cockroachdb_setting_preserve_downgrade:
+                CockroachDbPreserveDowngrade::DoNotModify,
             parent_blueprint_id: None,
             internal_dns_version: *Generation::new(),
             external_dns_version: *Generation::new(),
+            cockroachdb_fingerprint: String::new(),
             time_created: now_db_precision(),
             creator: "test suite".to_string(),
             comment: "test blueprint".to_string(),
@@ -1779,9 +1786,12 @@ mod test {
             sled_state: sled_states_active(blueprint_zones.keys().copied()),
             blueprint_zones,
             blueprint_disks: BTreeMap::new(),
+            cockroachdb_setting_preserve_downgrade:
+                CockroachDbPreserveDowngrade::DoNotModify,
             parent_blueprint_id: None,
             internal_dns_version: *Generation::new(),
             external_dns_version: *Generation::new(),
+            cockroachdb_fingerprint: String::new(),
             time_created: now_db_precision(),
             creator: "test suite".to_string(),
             comment: "test blueprint".to_string(),
@@ -1988,9 +1998,12 @@ mod test {
             sled_state: sled_states_active(blueprint_zones.keys().copied()),
             blueprint_zones,
             blueprint_disks: BTreeMap::new(),
+            cockroachdb_setting_preserve_downgrade:
+                CockroachDbPreserveDowngrade::DoNotModify,
             parent_blueprint_id: None,
             internal_dns_version: *Generation::new(),
             external_dns_version: *Generation::new(),
+            cockroachdb_fingerprint: String::new(),
             time_created: now_db_precision(),
             creator: "test suite".to_string(),
             comment: "test blueprint".to_string(),
@@ -2123,9 +2136,12 @@ mod test {
             sled_state: sled_states_active(blueprint_zones.keys().copied()),
             blueprint_zones,
             blueprint_disks: BTreeMap::new(),
+            cockroachdb_setting_preserve_downgrade:
+                CockroachDbPreserveDowngrade::DoNotModify,
             parent_blueprint_id: None,
             internal_dns_version: *Generation::new(),
             external_dns_version: *Generation::new(),
+            cockroachdb_fingerprint: String::new(),
             time_created: now_db_precision(),
             creator: "test suite".to_string(),
             comment: "test blueprint".to_string(),
diff --git a/nexus/reconfigurator/execution/src/cockroachdb.rs b/nexus/reconfigurator/execution/src/cockroachdb.rs
new file mode 100644
index 0000000000..101a7372c5
--- /dev/null
+++ b/nexus/reconfigurator/execution/src/cockroachdb.rs
@@ -0,0 +1,113 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Ensures CockroachDB settings are set
+
+use anyhow::Context;
+use nexus_db_queries::context::OpContext;
+use nexus_db_queries::db::DataStore;
+use nexus_types::deployment::Blueprint;
+
+pub(crate) async fn ensure_settings(
+    opctx: &OpContext,
+    datastore: &DataStore,
+    blueprint: &Blueprint,
+) -> anyhow::Result<()> {
+    if let Some(value) =
+        blueprint.cockroachdb_setting_preserve_downgrade.to_optional_string()
+    {
+        datastore
+            .cockroachdb_setting_set_string(
+                opctx,
+                blueprint.cockroachdb_fingerprint.clone(),
+                "cluster.preserve_downgrade_option",
+                value,
+            )
+            .await
+            .context("failed to set cluster.preserve_downgrade_option")?;
+    }
+    Ok(())
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::overridables::Overridables;
+    use nexus_db_queries::authn;
+    use nexus_db_queries::authz;
+    use nexus_test_utils_macros::nexus_test;
+    use nexus_types::deployment::CockroachDbClusterVersion;
+    use std::sync::Arc;
+
+    type ControlPlaneTestContext =
+        nexus_test_utils::ControlPlaneTestContext<omicron_nexus::Server>;
+
+    #[nexus_test]
+    async fn test_ensure_preserve_downgrade_option(
+        cptestctx: &ControlPlaneTestContext,
+    ) {
+        let nexus = &cptestctx.server.server_context().nexus;
+        let datastore = nexus.datastore();
+        let log = &cptestctx.logctx.log;
+        let opctx = OpContext::for_background(
+            log.clone(),
+            Arc::new(authz::Authz::new(log)),
+            authn::Context::internal_api(),
+            datastore.clone(),
+        );
+
+        // Fetch the initial CockroachDB settings.
+        let settings = datastore
+            .cockroachdb_settings(&opctx)
+            .await
+            .expect("failed to get cockroachdb settings");
+        // Fetch the initial blueprint installed during rack initialization.
+        let (_blueprint_target, blueprint) = datastore
+            .blueprint_target_get_current_full(&opctx)
+            .await
+            .expect("failed to get blueprint from datastore");
+        eprintln!("blueprint: {}", blueprint.display());
+        // The initial blueprint should already have these filled in.
+        assert_eq!(
+            blueprint.cockroachdb_fingerprint,
+            settings.state_fingerprint
+        );
+        assert_eq!(
+            blueprint.cockroachdb_setting_preserve_downgrade,
+            CockroachDbClusterVersion::NEWLY_INITIALIZED.into()
+        );
+        // The cluster version, preserve downgrade setting, and
+        // `NEWLY_INITIALIZED` should all match.
+        assert_eq!(
+            settings.version,
+            CockroachDbClusterVersion::NEWLY_INITIALIZED.to_string()
+        );
+        assert_eq!(
+            settings.preserve_downgrade,
+            CockroachDbClusterVersion::NEWLY_INITIALIZED.to_string()
+        );
+        // Execute the initial blueprint.
+        let overrides = Overridables::for_test(cptestctx);
+        crate::realize_blueprint_with_overrides(
+            &opctx,
+            datastore,
+            &blueprint,
+            "test-suite",
+            &overrides,
+        )
+        .await
+        .expect("failed to execute initial blueprint");
+        // The CockroachDB settings should not have changed.
+        assert_eq!(
+            settings,
+            datastore
+                .cockroachdb_settings(&opctx)
+                .await
+                .expect("failed to get cockroachdb settings")
+        );
+
+        // TODO(iliana): when we upgrade to v22.2, test an actual cluster
+        // upgrade when crdb-seed is run with the old CockroachDB
+    }
+}
diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs
index 4223652b00..ec48a35cbe 100644
--- a/nexus/reconfigurator/execution/src/dns.rs
+++ b/nexus/reconfigurator/execution/src/dns.rs
@@ -477,6 +477,9 @@ mod test {
     use nexus_types::deployment::BlueprintZoneConfig;
     use nexus_types::deployment::BlueprintZoneDisposition;
     use nexus_types::deployment::BlueprintZonesConfig;
+    use nexus_types::deployment::CockroachDbClusterVersion;
+    use nexus_types::deployment::CockroachDbPreserveDowngrade;
+    use nexus_types::deployment::CockroachDbSettings;
     use nexus_types::deployment::SledFilter;
     use nexus_types::external_api::params;
     use nexus_types::external_api::shared;
@@ -596,9 +599,12 @@ mod test {
             blueprint_zones,
             blueprint_disks: BTreeMap::new(),
             sled_state,
+            cockroachdb_setting_preserve_downgrade:
+                CockroachDbPreserveDowngrade::DoNotModify,
             parent_blueprint_id: None,
             internal_dns_version: initial_dns_generation,
             external_dns_version: Generation::new(),
+            cockroachdb_fingerprint: String::new(),
             time_created: now_db_precision(),
             creator: "test-suite".to_string(),
             comment: "test blueprint".to_string(),
@@ -1147,11 +1153,14 @@ mod test {
             .expect("fetching initial external DNS");
 
         // Fetch the initial blueprint installed during rack initialization.
-        let (_blueprint_target, blueprint) = datastore
+        let (_blueprint_target, mut blueprint) = datastore
             .blueprint_target_get_current_full(&opctx)
             .await
             .expect("failed to read current target blueprint");
         eprintln!("blueprint: {}", blueprint.display());
+        // Override the CockroachDB settings so that we don't try to set them.
+        blueprint.cockroachdb_setting_preserve_downgrade =
+            CockroachDbPreserveDowngrade::DoNotModify;
 
         // Now, execute the initial blueprint.
         let overrides = Overridables::for_test(cptestctx);
@@ -1222,9 +1231,12 @@ mod test {
                 .into(),
                 // These are not used because we're not actually going through
                 // the planner.
+                cockroachdb_settings: &CockroachDbSettings::empty(),
                 external_ip_rows: &[],
                 service_nic_rows: &[],
                 target_nexus_zone_count: NEXUS_REDUNDANCY,
+                target_cockroachdb_cluster_version:
+                    CockroachDbClusterVersion::POLICY,
                 log,
             }
             .build()
diff --git a/nexus/reconfigurator/execution/src/lib.rs b/nexus/reconfigurator/execution/src/lib.rs
index 8ac8bc4399..63bb4b24f0 100644
--- a/nexus/reconfigurator/execution/src/lib.rs
+++ b/nexus/reconfigurator/execution/src/lib.rs
@@ -24,6 +24,7 @@ use slog_error_chain::InlineErrorChain;
 use std::collections::BTreeMap;
 use std::net::SocketAddrV6;
 
+mod cockroachdb;
 mod datasets;
 mod dns;
 mod external_networking;
@@ -214,5 +215,12 @@ where
     )
     .await?;
 
+    // This is likely to error if any cluster upgrades are in progress (which
+    // can take some time), so it should remain at the end so that other parts
+    // of the blueprint can progress normally.
+    cockroachdb::ensure_settings(&opctx, datastore, blueprint)
+        .await
+        .map_err(|err| vec![err])?;
+
     Ok(())
 }
diff --git a/nexus/reconfigurator/execution/src/omicron_physical_disks.rs b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs
index ab0c5cab45..d7d8604e7e 100644
--- a/nexus/reconfigurator/execution/src/omicron_physical_disks.rs
+++ b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs
@@ -109,7 +109,7 @@ mod test {
     use nexus_test_utils_macros::nexus_test;
     use nexus_types::deployment::{
         Blueprint, BlueprintPhysicalDiskConfig, BlueprintPhysicalDisksConfig,
-        BlueprintTarget,
+        BlueprintTarget, CockroachDbPreserveDowngrade,
     };
     use omicron_common::api::external::Generation;
     use omicron_common::disk::DiskIdentity;
@@ -137,9 +137,12 @@ mod test {
                 blueprint_zones: BTreeMap::new(),
                 blueprint_disks,
                 sled_state: BTreeMap::new(),
+                cockroachdb_setting_preserve_downgrade:
+                    CockroachDbPreserveDowngrade::DoNotModify,
                 parent_blueprint_id: None,
                 internal_dns_version: Generation::new(),
                 external_dns_version: Generation::new(),
+                cockroachdb_fingerprint: String::new(),
                 time_created: chrono::Utc::now(),
                 creator: "test".to_string(),
                 comment: "test blueprint".to_string(),
diff --git a/nexus/reconfigurator/execution/src/omicron_zones.rs b/nexus/reconfigurator/execution/src/omicron_zones.rs
index 68c1455ee4..a40d65411b 100644
--- a/nexus/reconfigurator/execution/src/omicron_zones.rs
+++ b/nexus/reconfigurator/execution/src/omicron_zones.rs
@@ -95,7 +95,8 @@ mod test {
     use nexus_db_queries::context::OpContext;
     use nexus_test_utils_macros::nexus_test;
     use nexus_types::deployment::{
-        blueprint_zone_type, BlueprintZoneType, OmicronZonesConfig,
+        blueprint_zone_type, BlueprintZoneType, CockroachDbPreserveDowngrade,
+        OmicronZonesConfig,
     };
     use nexus_types::deployment::{
         Blueprint, BlueprintTarget, BlueprintZoneConfig,
@@ -127,9 +128,12 @@ mod test {
                 blueprint_zones,
                 blueprint_disks: BTreeMap::new(),
                 sled_state: BTreeMap::new(),
+                cockroachdb_setting_preserve_downgrade:
+                    CockroachDbPreserveDowngrade::DoNotModify,
                 parent_blueprint_id: None,
                 internal_dns_version: Generation::new(),
                 external_dns_version: Generation::new(),
+                cockroachdb_fingerprint: String::new(),
                 time_created: chrono::Utc::now(),
                 creator: "test".to_string(),
                 comment: "test blueprint".to_string(),
diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
index 1efefb9817..7e98b3906d 100644
--- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
+++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
@@ -20,6 +20,7 @@ use nexus_types::deployment::BlueprintZoneDisposition;
 use nexus_types::deployment::BlueprintZoneFilter;
 use nexus_types::deployment::BlueprintZoneType;
 use nexus_types::deployment::BlueprintZonesConfig;
+use nexus_types::deployment::CockroachDbPreserveDowngrade;
 use nexus_types::deployment::DiskFilter;
 use nexus_types::deployment::OmicronZoneDataset;
 use nexus_types::deployment::OmicronZoneExternalFloatingIp;
@@ -146,6 +147,7 @@ pub struct BlueprintBuilder<'a> {
     pub(super) zones: BlueprintZonesBuilder<'a>,
     disks: BlueprintDisksBuilder<'a>,
     sled_state: BTreeMap<SledUuid, SledState>,
+    cockroachdb_setting_preserve_downgrade: CockroachDbPreserveDowngrade,
 
     creator: String,
     comments: Vec<String>,
@@ -208,6 +210,9 @@ impl<'a> BlueprintBuilder<'a> {
             parent_blueprint_id: None,
             internal_dns_version: Generation::new(),
             external_dns_version: Generation::new(),
+            cockroachdb_fingerprint: String::new(),
+            cockroachdb_setting_preserve_downgrade:
+                CockroachDbPreserveDowngrade::DoNotModify,
             time_created: now_db_precision(),
             creator: creator.to_owned(),
             comment: format!("starting blueprint with {num_sleds} empty sleds"),
@@ -264,6 +269,8 @@ impl<'a> BlueprintBuilder<'a> {
             zones: BlueprintZonesBuilder::new(parent_blueprint),
             disks: BlueprintDisksBuilder::new(parent_blueprint),
             sled_state,
+            cockroachdb_setting_preserve_downgrade: parent_blueprint
+                .cockroachdb_setting_preserve_downgrade,
             creator: creator.to_owned(),
             comments: Vec::new(),
             rng: BlueprintBuilderRng::new(),
@@ -302,6 +309,13 @@ impl<'a> BlueprintBuilder<'a> {
             parent_blueprint_id: Some(self.parent_blueprint.id),
             internal_dns_version: self.input.internal_dns_version(),
             external_dns_version: self.input.external_dns_version(),
+            cockroachdb_fingerprint: self
+                .input
+                .cockroachdb_settings()
+                .state_fingerprint
+                .clone(),
+            cockroachdb_setting_preserve_downgrade: self
+                .cockroachdb_setting_preserve_downgrade,
             time_created: now_db_precision(),
             creator: self.creator,
             comment: self.comments.join(", "),
@@ -735,6 +749,13 @@ impl<'a> BlueprintBuilder<'a> {
         Ok(EnsureMultiple::Added(num_nexus_to_add))
     }
 
+    pub fn cockroachdb_preserve_downgrade(
+        &mut self,
+        version: CockroachDbPreserveDowngrade,
+    ) {
+        self.cockroachdb_setting_preserve_downgrade = version;
+    }
+
     fn sled_add_zone(
         &mut self,
         sled_id: SledUuid,
diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs
index 3708d212ec..6ed81cbb63 100644
--- a/nexus/reconfigurator/planning/src/planner.rs
+++ b/nexus/reconfigurator/planning/src/planner.rs
@@ -13,6 +13,9 @@ use crate::blueprint_builder::Error;
 use crate::planner::omicron_zone_placement::PlacementError;
 use nexus_types::deployment::Blueprint;
 use nexus_types::deployment::BlueprintZoneDisposition;
+use nexus_types::deployment::CockroachDbClusterVersion;
+use nexus_types::deployment::CockroachDbPreserveDowngrade;
+use nexus_types::deployment::CockroachDbSettings;
 use nexus_types::deployment::PlanningInput;
 use nexus_types::deployment::SledFilter;
 use nexus_types::deployment::ZpoolFilter;
@@ -25,6 +28,7 @@ use slog::{info, warn, Logger};
 use std::collections::BTreeMap;
 use std::collections::BTreeSet;
 use std::hash::Hash;
+use std::str::FromStr;
 
 use self::omicron_zone_placement::DiscretionaryOmicronZone;
 use self::omicron_zone_placement::OmicronZonePlacement;
@@ -90,6 +94,7 @@ impl<'a> Planner<'a> {
         self.do_plan_expunge()?;
         self.do_plan_add()?;
         self.do_plan_decommission()?;
+        self.do_plan_cockroachdb_settings();
 
         Ok(())
     }
@@ -455,6 +460,100 @@ impl<'a> Planner<'a> {
 
         Ok(())
     }
+
+    fn do_plan_cockroachdb_settings(&mut self) {
+        // Figure out what we should set the CockroachDB "preserve downgrade
+        // option" setting to based on the planning input.
+        //
+        // CockroachDB version numbers look like SemVer but are not. Major
+        // version numbers consist of the first *two* components, which
+        // represent the year and the Nth release that year. So the major
+        // version in "22.2.7" is "22.2".
+        //
+        // A given major version of CockroachDB is backward compatible with the
+        // storage format of the previous major version of CockroachDB. This is
+        // shown by the `version` setting, which displays the current storage
+        // format version. When `version` is '22.2', versions v22.2.x or v23.1.x
+        // can be used to run a node. This allows for rolling upgrades of nodes
+        // within the cluster and also preserves the ability to rollback until
+        // the new software version can be validated.
+        //
+        // By default, when all nodes of a cluster are upgraded to a new major
+        // version, the upgrade is "auto-finalized"; `version` is changed to the
+        // new major version, and rolling back to a previous major version of
+        // CockroachDB is no longer possible.
+        //
+        // The `cluster.preserve_downgrade_option` setting can be used to
+        // control this. This setting can only be set to the current value
+        // of the `version` setting, and when it is set, CockroachDB will not
+        // perform auto-finalization. To perform finalization and finish the
+        // upgrade, a client must reset the "preserve downgrade option" setting.
+        // Finalization occurs in the background, and the "preserve downgrade
+        // option" setting should not be changed again until finalization
+        // completes.
+        //
+        // We determine the appropriate value for `preserve_downgrade_option`
+        // based on:
+        //
+        // 1. the _target_ cluster version from the `Policy` (what we want to
+        //    be running)
+        // 2. the `version` setting reported by CockroachDB (what we're
+        //    currently running)
+        //
+        // by saying:
+        //
+        // - If we don't recognize the `version` CockroachDB reports, we will
+        //   do nothing.
+        // - If our target version is _equal to_ what CockroachDB reports,
+        //   we will ensure `preserve_downgrade_option` is set to the current
+        //   `version`. This prevents auto-finalization when we deploy the next
+        //   major version of CockroachDB as part of an update.
+        // - If our target version is _older than_ what CockroachDB reports, we
+        //   will also ensure `preserve_downgrade_option` is set to the current
+        //   `version`. (This will happen on newly-initialized clusters when
+        //   we deploy a version of CockroachDB that is newer than our current
+        //   policy.)
+        // - If our target version is _newer than_ what CockroachDB reports, we
+        //   will ensure `preserve_downgrade_option` is set to the default value
+        //   (the empty string). This will trigger finalization.
+
+        let policy = self.input.target_cockroachdb_cluster_version();
+        let CockroachDbSettings { version, .. } =
+            self.input.cockroachdb_settings();
+        let value = match CockroachDbClusterVersion::from_str(version) {
+            // The current version is known to us.
+            Ok(version) => {
+                if policy > version {
+                    // Ensure `cluster.preserve_downgrade_option` is reset so we
+                    // can upgrade.
+                    CockroachDbPreserveDowngrade::AllowUpgrade
+                } else {
+                    // The cluster version is equal to or newer than the
+                    // version we want by policy. In either case, ensure
+                    // `cluster.preserve_downgrade_option` is set.
+                    CockroachDbPreserveDowngrade::Set(version)
+                }
+            }
+            // The current version is unknown to us; we are likely in the middle
+            // of an cluster upgrade.
+            Err(_) => CockroachDbPreserveDowngrade::DoNotModify,
+        };
+        self.blueprint.cockroachdb_preserve_downgrade(value);
+        info!(
+            &self.log,
+            "will ensure cockroachdb setting";
+            "setting" => "cluster.preserve_downgrade_option",
+            "value" => ?value,
+        );
+
+        // Hey! Listen!
+        //
+        // If we need to manage more CockroachDB settings, we should ensure
+        // that no settings will be modified if we don't recognize the current
+        // cluster version -- we're likely in the middle of an upgrade!
+        //
+        // https://www.cockroachlabs.com/docs/stable/cluster-settings#change-a-cluster-setting
+    }
 }
 
 /// Returns `Some(reason)` if the sled needs its zones to be expunged,
@@ -508,6 +607,9 @@ mod test {
     use nexus_types::deployment::BlueprintZoneDisposition;
     use nexus_types::deployment::BlueprintZoneFilter;
     use nexus_types::deployment::BlueprintZoneType;
+    use nexus_types::deployment::CockroachDbClusterVersion;
+    use nexus_types::deployment::CockroachDbPreserveDowngrade;
+    use nexus_types::deployment::CockroachDbSettings;
     use nexus_types::deployment::OmicronZoneNetworkResources;
     use nexus_types::external_api::views::SledPolicy;
     use nexus_types::external_api::views::SledProvisionPolicy;
@@ -1365,4 +1467,126 @@ mod test {
 
         logctx.cleanup_successful();
     }
+
+    #[test]
+    fn test_ensure_preserve_downgrade_option() {
+        static TEST_NAME: &str = "planner_ensure_preserve_downgrade_option";
+        let logctx = test_setup_log(TEST_NAME);
+
+        let (collection, input, bp1) = example(&logctx.log, TEST_NAME, 0);
+        let mut builder = input.into_builder();
+        assert!(bp1.cockroachdb_fingerprint.is_empty());
+        assert_eq!(
+            bp1.cockroachdb_setting_preserve_downgrade,
+            CockroachDbPreserveDowngrade::DoNotModify
+        );
+
+        // If `preserve_downgrade_option` is unset and the current cluster
+        // version matches `POLICY`, we ensure it is set.
+        builder.set_cockroachdb_settings(CockroachDbSettings {
+            state_fingerprint: "bp2".to_owned(),
+            version: CockroachDbClusterVersion::POLICY.to_string(),
+            preserve_downgrade: String::new(),
+        });
+        let bp2 = Planner::new_based_on(
+            logctx.log.clone(),
+            &bp1,
+            &builder.clone().build(),
+            "initial settings",
+            &collection,
+        )
+        .expect("failed to create planner")
+        .with_rng_seed((TEST_NAME, "bp2"))
+        .plan()
+        .expect("failed to plan");
+        assert_eq!(bp2.cockroachdb_fingerprint, "bp2");
+        assert_eq!(
+            bp2.cockroachdb_setting_preserve_downgrade,
+            CockroachDbClusterVersion::POLICY.into()
+        );
+
+        // If `preserve_downgrade_option` is unset and the current cluster
+        // version is known to us and _newer_ than `POLICY`, we still ensure
+        // it is set. (During a "tock" release, `POLICY == NEWLY_INITIALIZED`
+        // and this won't be materially different than the above test, but it
+        // shouldn't need to change when moving to a "tick" release.)
+        builder.set_cockroachdb_settings(CockroachDbSettings {
+            state_fingerprint: "bp3".to_owned(),
+            version: CockroachDbClusterVersion::NEWLY_INITIALIZED.to_string(),
+            preserve_downgrade: String::new(),
+        });
+        let bp3 = Planner::new_based_on(
+            logctx.log.clone(),
+            &bp1,
+            &builder.clone().build(),
+            "initial settings",
+            &collection,
+        )
+        .expect("failed to create planner")
+        .with_rng_seed((TEST_NAME, "bp3"))
+        .plan()
+        .expect("failed to plan");
+        assert_eq!(bp3.cockroachdb_fingerprint, "bp3");
+        assert_eq!(
+            bp3.cockroachdb_setting_preserve_downgrade,
+            CockroachDbClusterVersion::NEWLY_INITIALIZED.into()
+        );
+
+        // When we run the planner again after setting the setting, the inputs
+        // will change; we should still be ensuring the setting.
+        builder.set_cockroachdb_settings(CockroachDbSettings {
+            state_fingerprint: "bp4".to_owned(),
+            version: CockroachDbClusterVersion::NEWLY_INITIALIZED.to_string(),
+            preserve_downgrade: CockroachDbClusterVersion::NEWLY_INITIALIZED
+                .to_string(),
+        });
+        let bp4 = Planner::new_based_on(
+            logctx.log.clone(),
+            &bp1,
+            &builder.clone().build(),
+            "after ensure",
+            &collection,
+        )
+        .expect("failed to create planner")
+        .with_rng_seed((TEST_NAME, "bp4"))
+        .plan()
+        .expect("failed to plan");
+        assert_eq!(bp4.cockroachdb_fingerprint, "bp4");
+        assert_eq!(
+            bp4.cockroachdb_setting_preserve_downgrade,
+            CockroachDbClusterVersion::NEWLY_INITIALIZED.into()
+        );
+
+        // When `version` isn't recognized, do nothing regardless of the value
+        // of `preserve_downgrade`.
+        for preserve_downgrade in [
+            String::new(),
+            CockroachDbClusterVersion::NEWLY_INITIALIZED.to_string(),
+            "definitely not a real cluster version".to_owned(),
+        ] {
+            builder.set_cockroachdb_settings(CockroachDbSettings {
+                state_fingerprint: "bp5".to_owned(),
+                version: "definitely not a real cluster version".to_owned(),
+                preserve_downgrade: preserve_downgrade.clone(),
+            });
+            let bp5 = Planner::new_based_on(
+                logctx.log.clone(),
+                &bp1,
+                &builder.clone().build(),
+                "unknown version",
+                &collection,
+            )
+            .expect("failed to create planner")
+            .with_rng_seed((TEST_NAME, format!("bp5-{}", preserve_downgrade)))
+            .plan()
+            .expect("failed to plan");
+            assert_eq!(bp5.cockroachdb_fingerprint, "bp5");
+            assert_eq!(
+                bp5.cockroachdb_setting_preserve_downgrade,
+                CockroachDbPreserveDowngrade::DoNotModify
+            );
+        }
+
+        logctx.cleanup_successful();
+    }
 }
diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs
index e28b96dda5..74c9313e05 100644
--- a/nexus/reconfigurator/planning/src/system.rs
+++ b/nexus/reconfigurator/planning/src/system.rs
@@ -10,6 +10,8 @@ use gateway_client::types::RotState;
 use gateway_client::types::SpState;
 use indexmap::IndexMap;
 use nexus_inventory::CollectionBuilder;
+use nexus_types::deployment::CockroachDbClusterVersion;
+use nexus_types::deployment::CockroachDbSettings;
 use nexus_types::deployment::PlanningInputBuilder;
 use nexus_types::deployment::Policy;
 use nexus_types::deployment::SledDetails;
@@ -74,6 +76,7 @@ pub struct SystemDescription {
     available_non_scrimlet_slots: BTreeSet<u16>,
     available_scrimlet_slots: BTreeSet<u16>,
     target_nexus_zone_count: usize,
+    target_cockroachdb_cluster_version: CockroachDbClusterVersion,
     service_ip_pool_ranges: Vec<IpRange>,
     internal_dns_version: Generation,
     external_dns_version: Generation,
@@ -121,6 +124,8 @@ impl SystemDescription {
 
         // Policy defaults
         let target_nexus_zone_count = NEXUS_REDUNDANCY;
+        let target_cockroachdb_cluster_version =
+            CockroachDbClusterVersion::POLICY;
         // IPs from TEST-NET-1 (RFC 5737)
         let service_ip_pool_ranges = vec![IpRange::try_from((
             "192.0.2.2".parse::<Ipv4Addr>().unwrap(),
@@ -135,6 +140,7 @@ impl SystemDescription {
             available_non_scrimlet_slots,
             available_scrimlet_slots,
             target_nexus_zone_count,
+            target_cockroachdb_cluster_version,
             service_ip_pool_ranges,
             internal_dns_version: Generation::new(),
             external_dns_version: Generation::new(),
@@ -301,11 +307,14 @@ impl SystemDescription {
         let policy = Policy {
             service_ip_pool_ranges: self.service_ip_pool_ranges.clone(),
             target_nexus_zone_count: self.target_nexus_zone_count,
+            target_cockroachdb_cluster_version: self
+                .target_cockroachdb_cluster_version,
         };
         let mut builder = PlanningInputBuilder::new(
             policy,
             self.internal_dns_version,
             self.external_dns_version,
+            CockroachDbSettings::empty(),
         );
 
         for sled in self.sleds.values() {
diff --git a/nexus/reconfigurator/planning/tests/output/blueprint_builder_initial_diff.txt b/nexus/reconfigurator/planning/tests/output/blueprint_builder_initial_diff.txt
index 8bce7cec98..03e76422e9 100644
--- a/nexus/reconfigurator/planning/tests/output/blueprint_builder_initial_diff.txt
+++ b/nexus/reconfigurator/planning/tests/output/blueprint_builder_initial_diff.txt
@@ -110,6 +110,10 @@ to:   blueprint  e4aeb3b3-272f-4967-be34-2d34daa46aa1
     nexus          29278a22-1ba1-4117-bfdb-39fcb9ae7fd1   in service    fd00:1122:3344:102::22
 
 
+ COCKROACHDB SETTINGS:
++   state fingerprint:::::::::::::::::   (not present in collection) -> (none)
++   cluster.preserve_downgrade_option:   (not present in collection) -> (do not modify)
+
  METADATA:
 +   internal DNS version:   (not present in collection) -> 1
 +   external DNS version:   (not present in collection) -> 1
diff --git a/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_2_3.txt b/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_2_3.txt
index 5b72615bd7..0253baa9f8 100644
--- a/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_2_3.txt
+++ b/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_2_3.txt
@@ -138,6 +138,10 @@ to:   blueprint 4171ad05-89dd-474b-846b-b007e4346366
 +   internal_ntp   2d73d30e-ca47-46a8-9c12-917d4ab824b6   in service    fd00:1122:3344:104::21
 
 
+ COCKROACHDB SETTINGS:
+    state fingerprint:::::::::::::::::   (none) (unchanged)
+    cluster.preserve_downgrade_option:   (do not modify) (unchanged)
+
  METADATA:
     internal DNS version:   1 (unchanged)
     external DNS version:   1 (unchanged)
diff --git a/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_3_5.txt b/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_3_5.txt
index 468303a56a..5a824edf84 100644
--- a/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_3_5.txt
+++ b/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_3_5.txt
@@ -148,6 +148,10 @@ to:   blueprint f432fcd5-1284-4058-8b4a-9286a3de6163
 +   crucible       f86e19d2-9145-41cf-be89-6aaa34a73873   in service    fd00:1122:3344:104::24
 
 
+ COCKROACHDB SETTINGS:
+    state fingerprint:::::::::::::::::   (none) (unchanged)
+    cluster.preserve_downgrade_option:   (do not modify) (unchanged)
+
  METADATA:
     internal DNS version:   1 (unchanged)
     external DNS version:   1 (unchanged)
diff --git a/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt b/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt
index b939e69ba1..7219c300b7 100644
--- a/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt
+++ b/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt
@@ -126,6 +126,10 @@ to:   blueprint 1ac2d88f-27dd-4506-8585-6b2be832528e
 +   nexus          c8851a11-a4f7-4b21-9281-6182fd15dc8d   in service    fd00:1122:3344:102::2d
 
 
+ COCKROACHDB SETTINGS:
+    state fingerprint:::::::::::::::::   (none) (unchanged)
+    cluster.preserve_downgrade_option:   (do not modify) (unchanged)
+
  METADATA:
     internal DNS version:   1 (unchanged)
     external DNS version:   1 (unchanged)
diff --git a/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_bp2.txt b/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_bp2.txt
index ec94d5d924..3ba829b1d2 100644
--- a/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_bp2.txt
+++ b/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_bp2.txt
@@ -97,6 +97,10 @@ WARNING: Zones exist without physical disks!
 
 
 
+ COCKROACHDB SETTINGS:
+    state fingerprint:::::::::::::::::   (none)
+    cluster.preserve_downgrade_option:   (do not modify)
+
  METADATA:
     created by:::::::::::   test_blueprint2
     created at:::::::::::   1970-01-01T00:00:00.000Z
diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt
index c5876b0b41..be2bf3c248 100644
--- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt
+++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt
@@ -203,6 +203,10 @@ to:   blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6
 +   nexus          c26b3bda-5561-44a1-a69f-22103fe209a1   in service    fd00:1122:3344:101::2f
 
 
+ COCKROACHDB SETTINGS:
+    state fingerprint:::::::::::::::::   (none) (unchanged)
+    cluster.preserve_downgrade_option:   (do not modify) (unchanged)
+
  METADATA:
     internal DNS version:   1 (unchanged)
     external DNS version:   1 (unchanged)
diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt
index fa61fa2758..262bd14811 100644
--- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt
+++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt
@@ -215,6 +215,10 @@ ERRORS:
       zone id: 7f4e9f9f-08f8-4d14-885d-e977c05525ad
       reason: mismatched underlay address: before: fd00:1122:3344:105::21, after: fd01:1122:3344:105::21
 
+ COCKROACHDB SETTINGS:
+    state fingerprint:::::::::::::::::   (none) (unchanged)
+    cluster.preserve_downgrade_option:   (do not modify) (unchanged)
+
  METADATA:
     internal DNS version:   1 (unchanged)
 *   external DNS version:   1 -> 2
diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt
index 454ce6779e..f7c0886dde 100644
--- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt
+++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt
@@ -161,6 +161,10 @@ WARNING: Zones exist without physical disks!
 
 
 
+ COCKROACHDB SETTINGS:
+    state fingerprint:::::::::::::::::   (none)
+    cluster.preserve_downgrade_option:   (do not modify)
+
  METADATA:
     created by:::::::::::   test_blueprint2
     created at:::::::::::   1970-01-01T00:00:00.000Z
diff --git a/nexus/reconfigurator/preparation/src/lib.rs b/nexus/reconfigurator/preparation/src/lib.rs
index 305644bc93..24e9afddf8 100644
--- a/nexus/reconfigurator/preparation/src/lib.rs
+++ b/nexus/reconfigurator/preparation/src/lib.rs
@@ -16,6 +16,8 @@ use nexus_db_queries::db::pagination::Paginator;
 use nexus_db_queries::db::DataStore;
 use nexus_types::deployment::Blueprint;
 use nexus_types::deployment::BlueprintMetadata;
+use nexus_types::deployment::CockroachDbClusterVersion;
+use nexus_types::deployment::CockroachDbSettings;
 use nexus_types::deployment::OmicronZoneExternalIp;
 use nexus_types::deployment::OmicronZoneNic;
 use nexus_types::deployment::PlanningInput;
@@ -58,8 +60,10 @@ pub struct PlanningInputFromDb<'a> {
     pub external_ip_rows: &'a [nexus_db_model::ExternalIp],
     pub service_nic_rows: &'a [nexus_db_model::ServiceNetworkInterface],
     pub target_nexus_zone_count: usize,
+    pub target_cockroachdb_cluster_version: CockroachDbClusterVersion,
     pub internal_dns_version: nexus_db_model::Generation,
     pub external_dns_version: nexus_db_model::Generation,
+    pub cockroachdb_settings: &'a CockroachDbSettings,
     pub log: &'a Logger,
 }
 
@@ -70,11 +74,14 @@ impl PlanningInputFromDb<'_> {
         let policy = Policy {
             service_ip_pool_ranges,
             target_nexus_zone_count: self.target_nexus_zone_count,
+            target_cockroachdb_cluster_version: self
+                .target_cockroachdb_cluster_version,
         };
         let mut builder = PlanningInputBuilder::new(
             policy,
             self.internal_dns_version.into(),
             self.external_dns_version.into(),
+            self.cockroachdb_settings.clone(),
         );
 
         let mut zpools_by_sled_id = {
@@ -217,17 +224,23 @@ pub async fn reconfigurator_state_load(
         .await
         .context("fetching external DNS version")?
         .version;
+    let cockroachdb_settings = datastore
+        .cockroachdb_settings(opctx)
+        .await
+        .context("fetching cockroachdb settings")?;
 
     let planning_input = PlanningInputFromDb {
         sled_rows: &sled_rows,
         zpool_rows: &zpool_rows,
         ip_pool_range_rows: &ip_pool_range_rows,
         target_nexus_zone_count: NEXUS_REDUNDANCY,
+        target_cockroachdb_cluster_version: CockroachDbClusterVersion::POLICY,
         external_ip_rows: &external_ip_rows,
         service_nic_rows: &service_nic_rows,
         log: &opctx.log,
         internal_dns_version,
         external_dns_version,
+        cockroachdb_settings: &cockroachdb_settings,
     }
     .build()
     .context("assembling planning_input")?;
diff --git a/nexus/src/app/background/blueprint_execution.rs b/nexus/src/app/background/blueprint_execution.rs
index 2ac1b3fd35..69725acf1d 100644
--- a/nexus/src/app/background/blueprint_execution.rs
+++ b/nexus/src/app/background/blueprint_execution.rs
@@ -123,7 +123,7 @@ mod test {
     use nexus_types::deployment::{
         blueprint_zone_type, Blueprint, BlueprintPhysicalDisksConfig,
         BlueprintTarget, BlueprintZoneConfig, BlueprintZoneDisposition,
-        BlueprintZoneType, BlueprintZonesConfig,
+        BlueprintZoneType, BlueprintZonesConfig, CockroachDbPreserveDowngrade,
     };
     use nexus_types::external_api::views::SledState;
     use nexus_types::inventory::OmicronZoneDataset;
@@ -165,9 +165,12 @@ mod test {
                 blueprint_zones,
                 blueprint_disks,
                 sled_state,
+                cockroachdb_setting_preserve_downgrade:
+                    CockroachDbPreserveDowngrade::DoNotModify,
                 parent_blueprint_id: None,
                 internal_dns_version: dns_version,
                 external_dns_version: dns_version,
+                cockroachdb_fingerprint: String::new(),
                 time_created: chrono::Utc::now(),
                 creator: "test".to_string(),
                 comment: "test blueprint".to_string(),
diff --git a/nexus/src/app/background/blueprint_load.rs b/nexus/src/app/background/blueprint_load.rs
index cda1d07fcb..baf86d655f 100644
--- a/nexus/src/app/background/blueprint_load.rs
+++ b/nexus/src/app/background/blueprint_load.rs
@@ -188,7 +188,9 @@ mod test {
     use crate::app::background::common::BackgroundTask;
     use nexus_inventory::now_db_precision;
     use nexus_test_utils_macros::nexus_test;
-    use nexus_types::deployment::{Blueprint, BlueprintTarget};
+    use nexus_types::deployment::{
+        Blueprint, BlueprintTarget, CockroachDbPreserveDowngrade,
+    };
     use omicron_common::api::external::Generation;
     use serde::Deserialize;
     use std::collections::BTreeMap;
@@ -212,9 +214,12 @@ mod test {
                 blueprint_zones: BTreeMap::new(),
                 blueprint_disks: BTreeMap::new(),
                 sled_state: BTreeMap::new(),
+                cockroachdb_setting_preserve_downgrade:
+                    CockroachDbPreserveDowngrade::DoNotModify,
                 parent_blueprint_id: Some(parent_blueprint_id),
                 internal_dns_version: Generation::new(),
                 external_dns_version: Generation::new(),
+                cockroachdb_fingerprint: String::new(),
                 time_created: now_db_precision(),
                 creator: "test".to_string(),
                 comment: "test blueprint".to_string(),
diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs
index 98f1f84744..280f4306c7 100644
--- a/nexus/src/app/deployment.rs
+++ b/nexus/src/app/deployment.rs
@@ -13,6 +13,7 @@ use nexus_types::deployment::Blueprint;
 use nexus_types::deployment::BlueprintMetadata;
 use nexus_types::deployment::BlueprintTarget;
 use nexus_types::deployment::BlueprintTargetSet;
+use nexus_types::deployment::CockroachDbClusterVersion;
 use nexus_types::deployment::PlanningInput;
 use nexus_types::deployment::SledFilter;
 use nexus_types::inventory::Collection;
@@ -162,6 +163,10 @@ impl super::Nexus {
                 "fetching external DNS version for blueprint planning",
             )?
             .version;
+        let cockroachdb_settings =
+            datastore.cockroachdb_settings(opctx).await.internal_context(
+                "fetching cockroachdb settings for blueprint planning",
+            )?;
 
         let planning_input = PlanningInputFromDb {
             sled_rows: &sled_rows,
@@ -170,9 +175,12 @@ impl super::Nexus {
             external_ip_rows: &external_ip_rows,
             service_nic_rows: &service_nic_rows,
             target_nexus_zone_count: NEXUS_REDUNDANCY,
+            target_cockroachdb_cluster_version:
+                CockroachDbClusterVersion::POLICY,
             log: &opctx.log,
             internal_dns_version,
             external_dns_version,
+            cockroachdb_settings: &cockroachdb_settings,
         }
         .build()?;
 
diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs
index c766446f38..1327558dd4 100644
--- a/nexus/src/app/rack.rs
+++ b/nexus/src/app/rack.rs
@@ -24,6 +24,7 @@ use nexus_reconfigurator_execution::silo_dns_name;
 use nexus_types::deployment::blueprint_zone_type;
 use nexus_types::deployment::BlueprintZoneFilter;
 use nexus_types::deployment::BlueprintZoneType;
+use nexus_types::deployment::CockroachDbClusterVersion;
 use nexus_types::deployment::SledFilter;
 use nexus_types::external_api::params::Address;
 use nexus_types::external_api::params::AddressConfig;
@@ -53,6 +54,7 @@ use omicron_common::api::external::BgpPeer;
 use omicron_common::api::external::DataPageParams;
 use omicron_common::api::external::Error;
 use omicron_common::api::external::IdentityMetadataCreateParams;
+use omicron_common::api::external::InternalContext;
 use omicron_common::api::external::ListResultVec;
 use omicron_common::api::external::LookupResult;
 use omicron_common::api::external::Name;
@@ -228,6 +230,33 @@ impl super::Nexus {
         let mut blueprint = request.blueprint;
         blueprint.external_dns_version = blueprint.external_dns_version.next();
 
+        // Fill in the CockroachDB metadata for the initial blueprint, and set
+        // the `cluster.preserve_downgrade_option` setting ahead of blueprint
+        // execution.
+        let cockroachdb_settings = self
+            .datastore()
+            .cockroachdb_settings(opctx)
+            .await
+            .internal_context(
+                "fetching cockroachdb settings for rack initialization",
+            )?;
+        self.datastore()
+            .cockroachdb_setting_set_string(
+                opctx,
+                cockroachdb_settings.state_fingerprint.clone(),
+                "cluster.preserve_downgrade_option",
+                CockroachDbClusterVersion::NEWLY_INITIALIZED.to_string(),
+            )
+            .await
+            .internal_context(
+                "setting `cluster.preserve_downgrade_option` \
+                for rack initialization",
+            )?;
+        blueprint.cockroachdb_fingerprint =
+            cockroachdb_settings.state_fingerprint;
+        blueprint.cockroachdb_setting_preserve_downgrade =
+            CockroachDbClusterVersion::NEWLY_INITIALIZED.into();
+
         // Administrators of the Recovery Silo are automatically made
         // administrators of the Fleet.
         let mapped_fleet_roles = BTreeMap::from([(
diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs
index d4af109849..deb43c42b6 100644
--- a/nexus/test-utils/src/lib.rs
+++ b/nexus/test-utils/src/lib.rs
@@ -30,6 +30,7 @@ use nexus_types::deployment::BlueprintZoneConfig;
 use nexus_types::deployment::BlueprintZoneDisposition;
 use nexus_types::deployment::BlueprintZoneType;
 use nexus_types::deployment::BlueprintZonesConfig;
+use nexus_types::deployment::CockroachDbPreserveDowngrade;
 use nexus_types::deployment::OmicronZoneExternalFloatingAddr;
 use nexus_types::deployment::OmicronZoneExternalFloatingIp;
 use nexus_types::external_api::params::UserId;
@@ -790,6 +791,9 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> {
                     .try_into()
                     .expect("bad internal DNS generation"),
                 external_dns_version: Generation::new(),
+                cockroachdb_fingerprint: String::new(),
+                cockroachdb_setting_preserve_downgrade:
+                    CockroachDbPreserveDowngrade::DoNotModify,
                 time_created: Utc::now(),
                 creator: "nexus-test-utils".to_string(),
                 comment: "initial test blueprint".to_string(),
diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs
index f1be32f258..4fcd49a254 100644
--- a/nexus/types/src/deployment.rs
+++ b/nexus/types/src/deployment.rs
@@ -62,6 +62,9 @@ pub use network_resources::OmicronZoneExternalSnatIp;
 pub use network_resources::OmicronZoneNetworkResources;
 pub use network_resources::OmicronZoneNic;
 pub use network_resources::OmicronZoneNicEntry;
+pub use planning_input::CockroachDbClusterVersion;
+pub use planning_input::CockroachDbPreserveDowngrade;
+pub use planning_input::CockroachDbSettings;
 pub use planning_input::DiskFilter;
 pub use planning_input::PlanningInput;
 pub use planning_input::PlanningInputBuildError;
@@ -155,6 +158,14 @@ pub struct Blueprint {
     // See blueprint execution for more on this.
     pub external_dns_version: Generation,
 
+    /// CockroachDB state fingerprint when this blueprint was created
+    // See `nexus/db-queries/src/db/datastore/cockroachdb_settings.rs` for more
+    // on this.
+    pub cockroachdb_fingerprint: String,
+
+    /// Whether to set `cluster.preserve_downgrade_option` and what to set it to
+    pub cockroachdb_setting_preserve_downgrade: CockroachDbPreserveDowngrade,
+
     /// when this blueprint was generated (for debugging)
     pub time_created: chrono::DateTime<chrono::Utc>,
     /// identity of the component that generated the blueprint (for debugging)
@@ -173,6 +184,10 @@ impl Blueprint {
             parent_blueprint_id: self.parent_blueprint_id,
             internal_dns_version: self.internal_dns_version,
             external_dns_version: self.external_dns_version,
+            cockroachdb_fingerprint: self.cockroachdb_fingerprint.clone(),
+            cockroachdb_setting_preserve_downgrade: Some(
+                self.cockroachdb_setting_preserve_downgrade,
+            ),
             time_created: self.time_created,
             creator: self.creator.clone(),
             comment: self.comment.clone(),
@@ -346,7 +361,28 @@ pub struct BlueprintDisplay<'a> {
 }
 
 impl<'a> BlueprintDisplay<'a> {
-    pub(super) fn make_metadata_table(&self) -> KvListWithHeading {
+    fn make_cockroachdb_table(&self) -> KvListWithHeading {
+        let fingerprint = if self.blueprint.cockroachdb_fingerprint.is_empty() {
+            NONE_PARENS.to_string()
+        } else {
+            self.blueprint.cockroachdb_fingerprint.clone()
+        };
+
+        KvListWithHeading::new_unchanged(
+            COCKROACHDB_HEADING,
+            vec![
+                (COCKROACHDB_FINGERPRINT, fingerprint),
+                (
+                    COCKROACHDB_PRESERVE_DOWNGRADE,
+                    self.blueprint
+                        .cockroachdb_setting_preserve_downgrade
+                        .to_string(),
+                ),
+            ],
+        )
+    }
+
+    fn make_metadata_table(&self) -> KvListWithHeading {
         let comment = if self.blueprint.comment.is_empty() {
             NONE_PARENS.to_string()
         } else {
@@ -446,6 +482,7 @@ impl<'a> fmt::Display for BlueprintDisplay<'a> {
             }
         }
 
+        writeln!(f, "{}", self.make_cockroachdb_table())?;
         writeln!(f, "{}", self.make_metadata_table())?;
 
         Ok(())
@@ -998,6 +1035,12 @@ pub struct BlueprintMetadata {
     pub internal_dns_version: Generation,
     /// external DNS version when this blueprint was created
     pub external_dns_version: Generation,
+    /// CockroachDB state fingerprint when this blueprint was created
+    pub cockroachdb_fingerprint: String,
+    /// Whether to set `cluster.preserve_downgrade_option` and what to set it to
+    /// (`None` if this value was retrieved from the database and was invalid)
+    pub cockroachdb_setting_preserve_downgrade:
+        Option<CockroachDbPreserveDowngrade>,
 
     /// when this blueprint was generated (for debugging)
     pub time_created: chrono::DateTime<chrono::Utc>,
diff --git a/nexus/types/src/deployment/blueprint_diff.rs b/nexus/types/src/deployment/blueprint_diff.rs
index 905dc3dd3d..0ee039b50f 100644
--- a/nexus/types/src/deployment/blueprint_diff.rs
+++ b/nexus/types/src/deployment/blueprint_diff.rs
@@ -10,7 +10,7 @@ use super::blueprint_display::{
     BpSledSubtable, BpSledSubtableColumn, BpSledSubtableData,
     BpSledSubtableRow, KvListWithHeading, KvPair,
 };
-use super::zone_sort_key;
+use super::{zone_sort_key, CockroachDbPreserveDowngrade};
 use omicron_common::api::external::Generation;
 use omicron_common::disk::DiskIdentity;
 use omicron_uuid_kinds::OmicronZoneUuid;
@@ -662,71 +662,75 @@ impl<'diff> BlueprintDiffDisplay<'diff> {
         Self { diff }
     }
 
-    pub fn make_metadata_diff_table(&self) -> KvListWithHeading {
-        let diff = self.diff;
-        let mut kv = vec![];
-        match &diff.before_meta {
-            DiffBeforeMetadata::Collection { .. } => {
-                // Collections don't have DNS versions, so this is new.
-                kv.push(KvPair::new(
-                    BpDiffState::Added,
-                    INTERNAL_DNS_VERSION,
-                    linear_table_modified(
-                        &NOT_PRESENT_IN_COLLECTION_PARENS,
-                        &diff.after_meta.internal_dns_version,
-                    ),
-                ));
-                kv.push(KvPair::new(
-                    BpDiffState::Added,
-                    EXTERNAL_DNS_VERSION,
-                    linear_table_modified(
-                        &NOT_PRESENT_IN_COLLECTION_PARENS,
-                        &diff.after_meta.external_dns_version,
-                    ),
-                ));
-            }
-            DiffBeforeMetadata::Blueprint(before) => {
-                if before.internal_dns_version
-                    != diff.after_meta.internal_dns_version
-                {
-                    kv.push(KvPair::new(
-                        BpDiffState::Modified,
-                        INTERNAL_DNS_VERSION,
-                        linear_table_modified(
-                            &before.internal_dns_version,
-                            &diff.after_meta.internal_dns_version,
-                        ),
-                    ));
-                } else {
-                    kv.push(KvPair::new(
-                        BpDiffState::Unchanged,
-                        INTERNAL_DNS_VERSION,
-                        linear_table_unchanged(&before.internal_dns_version),
-                    ));
-                };
-
-                if before.external_dns_version
-                    != diff.after_meta.external_dns_version
-                {
-                    kv.push(KvPair::new(
-                        BpDiffState::Modified,
-                        EXTERNAL_DNS_VERSION,
-                        linear_table_modified(
-                            &before.external_dns_version,
-                            &diff.after_meta.external_dns_version,
-                        ),
-                    ));
-                } else {
-                    kv.push(KvPair::new(
-                        BpDiffState::Unchanged,
-                        EXTERNAL_DNS_VERSION,
-                        linear_table_unchanged(&before.external_dns_version),
-                    ));
-                };
-            }
+    pub fn make_metadata_diff_tables(
+        &self,
+    ) -> impl IntoIterator<Item = KvListWithHeading> {
+        macro_rules! diff_row {
+            ($member:ident, $label:expr) => {
+                diff_row!($member, $label, |value| value)
+            };
+
+            ($member:ident, $label:expr, $display:expr) => {
+                match &self.diff.before_meta {
+                    DiffBeforeMetadata::Collection { .. } => {
+                        // Collections have no metadata, so this is new
+                        KvPair::new(
+                            BpDiffState::Added,
+                            $label,
+                            linear_table_modified(
+                                &NOT_PRESENT_IN_COLLECTION_PARENS,
+                                &$display(&self.diff.after_meta.$member),
+                            ),
+                        )
+                    }
+                    DiffBeforeMetadata::Blueprint(before) => {
+                        if before.$member == self.diff.after_meta.$member {
+                            KvPair::new(
+                                BpDiffState::Unchanged,
+                                $label,
+                                linear_table_unchanged(&$display(
+                                    &self.diff.after_meta.$member,
+                                )),
+                            )
+                        } else {
+                            KvPair::new(
+                                BpDiffState::Modified,
+                                $label,
+                                linear_table_modified(
+                                    &$display(&before.$member),
+                                    &$display(&self.diff.after_meta.$member),
+                                ),
+                            )
+                        }
+                    }
+                }
+            };
         }
 
-        KvListWithHeading::new(METADATA_HEADING, kv)
+        [
+            KvListWithHeading::new(
+                COCKROACHDB_HEADING,
+                vec![
+                    diff_row!(
+                        cockroachdb_fingerprint,
+                        COCKROACHDB_FINGERPRINT,
+                        display_none_if_empty
+                    ),
+                    diff_row!(
+                        cockroachdb_setting_preserve_downgrade,
+                        COCKROACHDB_PRESERVE_DOWNGRADE,
+                        display_optional_preserve_downgrade
+                    ),
+                ],
+            ),
+            KvListWithHeading::new(
+                METADATA_HEADING,
+                vec![
+                    diff_row!(internal_dns_version, INTERNAL_DNS_VERSION),
+                    diff_row!(external_dns_version, EXTERNAL_DNS_VERSION),
+                ],
+            ),
+        ]
     }
 
     /// Write out physical disk and zone tables for a given `sled_id`
@@ -847,8 +851,27 @@ impl<'diff> fmt::Display for BlueprintDiffDisplay<'diff> {
         }
 
         // Write out metadata diff table
-        writeln!(f, "{}", self.make_metadata_diff_table())?;
+        for table in self.make_metadata_diff_tables() {
+            writeln!(f, "{}", table)?;
+        }
 
         Ok(())
     }
 }
+
+fn display_none_if_empty(value: &str) -> &str {
+    if value.is_empty() {
+        NONE_PARENS
+    } else {
+        value
+    }
+}
+
+fn display_optional_preserve_downgrade(
+    value: &Option<CockroachDbPreserveDowngrade>,
+) -> String {
+    match value {
+        Some(v) => v.to_string(),
+        None => INVALID_VALUE_PARENS.to_string(),
+    }
+}
diff --git a/nexus/types/src/deployment/blueprint_display.rs b/nexus/types/src/deployment/blueprint_display.rs
index fb5c58d513..5d106b6ef3 100644
--- a/nexus/types/src/deployment/blueprint_display.rs
+++ b/nexus/types/src/deployment/blueprint_display.rs
@@ -18,6 +18,10 @@ pub mod constants {
     pub(super) const SUB_LAST: &str = "└─";
 
     pub const ARROW: &str = "->";
+    pub const COCKROACHDB_HEADING: &str = "COCKROACHDB SETTINGS";
+    pub const COCKROACHDB_FINGERPRINT: &str = "state fingerprint";
+    pub const COCKROACHDB_PRESERVE_DOWNGRADE: &str =
+        "cluster.preserve_downgrade_option";
     pub const METADATA_HEADING: &str = "METADATA";
     pub const CREATED_BY: &str = "created by";
     pub const CREATED_AT: &str = "created at";
@@ -29,6 +33,7 @@ pub mod constants {
     pub const NONE_PARENS: &str = "(none)";
     pub const NOT_PRESENT_IN_COLLECTION_PARENS: &str =
         "(not present in collection)";
+    pub const INVALID_VALUE_PARENS: &str = "(invalid value)";
 }
 use constants::*;
 
diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs
index c8cdeec15b..bb74c3655e 100644
--- a/nexus/types/src/deployment/planning_input.rs
+++ b/nexus/types/src/deployment/planning_input.rs
@@ -26,10 +26,12 @@ use omicron_uuid_kinds::OmicronZoneUuid;
 use omicron_uuid_kinds::PhysicalDiskUuid;
 use omicron_uuid_kinds::SledUuid;
 use omicron_uuid_kinds::ZpoolUuid;
+use schemars::JsonSchema;
 use serde::Deserialize;
 use serde::Serialize;
 use std::collections::btree_map::Entry;
 use std::collections::BTreeMap;
+use std::fmt;
 use strum::IntoEnumIterator;
 
 /// Policy and database inputs to the Reconfigurator planner
@@ -59,6 +61,9 @@ pub struct PlanningInput {
     /// current external DNS version
     external_dns_version: Generation,
 
+    /// current CockroachDB settings
+    cockroachdb_settings: CockroachDbSettings,
+
     /// per-sled policy and resources
     sleds: BTreeMap<SledUuid, SledDetails>,
 
@@ -67,18 +72,31 @@ pub struct PlanningInput {
 }
 
 impl PlanningInput {
+    /// current internal DNS version
     pub fn internal_dns_version(&self) -> Generation {
         self.internal_dns_version
     }
 
+    /// current external DNS version
     pub fn external_dns_version(&self) -> Generation {
         self.external_dns_version
     }
 
+    /// current CockroachDB settings
+    pub fn cockroachdb_settings(&self) -> &CockroachDbSettings {
+        &self.cockroachdb_settings
+    }
+
     pub fn target_nexus_zone_count(&self) -> usize {
         self.policy.target_nexus_zone_count
     }
 
+    pub fn target_cockroachdb_cluster_version(
+        &self,
+    ) -> CockroachDbClusterVersion {
+        self.policy.target_cockroachdb_cluster_version
+    }
+
     pub fn service_ip_pool_ranges(&self) -> &[IpRange] {
         &self.policy.service_ip_pool_ranges
     }
@@ -130,12 +148,178 @@ impl PlanningInput {
             policy: self.policy,
             internal_dns_version: self.internal_dns_version,
             external_dns_version: self.external_dns_version,
+            cockroachdb_settings: self.cockroachdb_settings,
             sleds: self.sleds,
             network_resources: self.network_resources,
         }
     }
 }
 
+/// Describes the current values for any CockroachDB settings that we care
+/// about.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct CockroachDbSettings {
+    /// A fingerprint representing the current state of the cluster. This must
+    /// be recorded in a blueprint and passed to the `DataStore` function when
+    /// changing settings.
+    pub state_fingerprint: String,
+
+    /// `version`
+    ///
+    /// WARNING: This value should _not_ be used to set the
+    /// `cluster.preserve_downgrade_option` setting. It can potentially reflect
+    /// an internal, intermediate upgrade version (e.g. "22.1-12").
+    pub version: String,
+    /// `cluster.preserve_downgrade_option`
+    pub preserve_downgrade: String,
+}
+
+impl CockroachDbSettings {
+    pub const fn empty() -> CockroachDbSettings {
+        CockroachDbSettings {
+            state_fingerprint: String::new(),
+            version: String::new(),
+            preserve_downgrade: String::new(),
+        }
+    }
+}
+
+/// CockroachDB cluster versions we are aware of.
+///
+/// CockroachDB can be upgraded from one major version to the next, e.g. v22.1
+/// -> v22.2. Each major version introduces changes in how it stores data on
+/// disk to support new features, and each major version has support for reading
+/// the previous version's data so that it can perform an upgrade. The version
+/// of the data format is called the "cluster version", which is distinct from
+/// but related to the software version that's being run.
+///
+/// While software version v22.2 is using cluster version v22.1, it's possible
+/// to downgrade back to v22.1. Once the cluster version is upgraded, there's no
+/// going back.
+///
+/// To give us some time to evaluate new versions of the software while
+/// retaining a downgrade path, we currently deploy new versions of CockroachDB
+/// across two releases of the Oxide software, in a "tick-tock" model:
+///
+/// - In "tick" releases, we upgrade the version of the
+///   CockroachDB software to a new major version, and update
+///   `CockroachDbClusterVersion::NEWLY_INITIALIZED`. On upgraded racks, the new
+///   version is running with the previous cluster version; on newly-initialized
+///   racks, the new version is running with the new cluser version.
+/// - In "tock" releases, we change `CockroachDbClusterVersion::POLICY` to the
+///   major version we upgraded to in the last "tick" release. This results in a
+///   new blueprint that upgrades the cluster version, destroying the downgrade
+///   path but allowing us to eventually upgrade to the next release.
+///
+/// These presently describe major versions of CockroachDB. The order of these
+/// must be maintained in the correct order (the first variant must be the
+/// earliest version).
+#[derive(
+    Debug,
+    Clone,
+    Copy,
+    PartialEq,
+    Eq,
+    PartialOrd,
+    Ord,
+    parse_display::Display,
+    parse_display::FromStr,
+    Deserialize,
+    Serialize,
+    JsonSchema,
+)]
+pub enum CockroachDbClusterVersion {
+    #[display("22.1")]
+    V22_1,
+}
+
+impl CockroachDbClusterVersion {
+    /// The hardcoded CockroachDB cluster version we want to be on, used in
+    /// [`Policy`].
+    ///
+    /// /!\ WARNING: If you change this, there is no going back. /!\
+    pub const POLICY: CockroachDbClusterVersion =
+        CockroachDbClusterVersion::V22_1;
+
+    /// The CockroachDB cluster version created as part of newly-initialized
+    /// racks.
+    ///
+    /// CockroachDB knows how to create a new cluster with the current cluster
+    /// version, and how to upgrade the cluster version from the previous major
+    /// release, but it does not have any ability to create a new cluster with
+    /// the previous major release's cluster version.
+    ///
+    /// During "tick" releases, newly-initialized racks will be running
+    /// this cluster version, which will be one major version newer than the
+    /// version specified by `CockroachDbClusterVersion::POLICY`. During "tock"
+    /// releases, these versions are the same.
+    pub const NEWLY_INITIALIZED: CockroachDbClusterVersion =
+        CockroachDbClusterVersion::V22_1;
+}
+
+/// Whether to set `cluster.preserve_downgrade_option` and what to set it to.
+#[derive(
+    Clone, Copy, Debug, Eq, PartialEq, Deserialize, Serialize, JsonSchema,
+)]
+#[serde(tag = "action", content = "data", rename_all = "snake_case")]
+pub enum CockroachDbPreserveDowngrade {
+    /// Do not modify the setting.
+    DoNotModify,
+    /// Ensure the setting is set to an empty string.
+    AllowUpgrade,
+    /// Ensure the setting is set to a given cluster version.
+    Set(CockroachDbClusterVersion),
+}
+
+impl CockroachDbPreserveDowngrade {
+    pub fn from_optional_string(
+        value: &Option<String>,
+    ) -> Result<Self, parse_display::ParseError> {
+        Ok(match value {
+            Some(version) => {
+                if version.is_empty() {
+                    CockroachDbPreserveDowngrade::AllowUpgrade
+                } else {
+                    CockroachDbPreserveDowngrade::Set(version.parse()?)
+                }
+            }
+            None => CockroachDbPreserveDowngrade::DoNotModify,
+        })
+    }
+
+    pub fn to_optional_string(self) -> Option<String> {
+        match self {
+            CockroachDbPreserveDowngrade::DoNotModify => None,
+            CockroachDbPreserveDowngrade::AllowUpgrade => Some(String::new()),
+            CockroachDbPreserveDowngrade::Set(version) => {
+                Some(version.to_string())
+            }
+        }
+    }
+}
+
+impl fmt::Display for CockroachDbPreserveDowngrade {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            CockroachDbPreserveDowngrade::DoNotModify => {
+                write!(f, "(do not modify)")
+            }
+            CockroachDbPreserveDowngrade::AllowUpgrade => {
+                write!(f, "\"\" (allow upgrade)")
+            }
+            CockroachDbPreserveDowngrade::Set(version) => {
+                write!(f, "\"{}\"", version)
+            }
+        }
+    }
+}
+
+impl From<CockroachDbClusterVersion> for CockroachDbPreserveDowngrade {
+    fn from(value: CockroachDbClusterVersion) -> Self {
+        CockroachDbPreserveDowngrade::Set(value)
+    }
+}
+
 /// Describes a single disk already managed by the sled.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct SledDisk {
@@ -447,6 +631,11 @@ pub struct Policy {
 
     /// desired total number of deployed Nexus zones
     pub target_nexus_zone_count: usize,
+
+    /// desired CockroachDB `cluster.preserve_downgrade_option` setting.
+    /// at present this is hardcoded based on the version of CockroachDB we
+    /// presently ship and the tick-tock pattern described in RFD 469.
+    pub target_cockroachdb_cluster_version: CockroachDbClusterVersion,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -483,6 +672,7 @@ pub struct PlanningInputBuilder {
     policy: Policy,
     internal_dns_version: Generation,
     external_dns_version: Generation,
+    cockroachdb_settings: CockroachDbSettings,
     sleds: BTreeMap<SledUuid, SledDetails>,
     network_resources: OmicronZoneNetworkResources,
 }
@@ -494,9 +684,12 @@ impl PlanningInputBuilder {
             policy: Policy {
                 service_ip_pool_ranges: Vec::new(),
                 target_nexus_zone_count: 0,
+                target_cockroachdb_cluster_version:
+                    CockroachDbClusterVersion::POLICY,
             },
             internal_dns_version: Generation::new(),
             external_dns_version: Generation::new(),
+            cockroachdb_settings: CockroachDbSettings::empty(),
             sleds: BTreeMap::new(),
             network_resources: OmicronZoneNetworkResources::new(),
         }
@@ -506,11 +699,13 @@ impl PlanningInputBuilder {
         policy: Policy,
         internal_dns_version: Generation,
         external_dns_version: Generation,
+        cockroachdb_settings: CockroachDbSettings,
     ) -> Self {
         Self {
             policy,
             internal_dns_version,
             external_dns_version,
+            cockroachdb_settings,
             sleds: BTreeMap::new(),
             network_resources: OmicronZoneNetworkResources::new(),
         }
@@ -574,13 +769,53 @@ impl PlanningInputBuilder {
         self.external_dns_version = new_version;
     }
 
+    pub fn set_cockroachdb_settings(
+        &mut self,
+        cockroachdb_settings: CockroachDbSettings,
+    ) {
+        self.cockroachdb_settings = cockroachdb_settings;
+    }
+
     pub fn build(self) -> PlanningInput {
         PlanningInput {
             policy: self.policy,
             internal_dns_version: self.internal_dns_version,
             external_dns_version: self.external_dns_version,
+            cockroachdb_settings: self.cockroachdb_settings,
             sleds: self.sleds,
             network_resources: self.network_resources,
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::CockroachDbClusterVersion;
+
+    #[test]
+    fn cockroachdb_cluster_versions() {
+        // This should always be true.
+        assert!(
+            CockroachDbClusterVersion::POLICY
+                <= CockroachDbClusterVersion::NEWLY_INITIALIZED
+        );
+
+        let cockroachdb_version =
+            include_str!("../../../../tools/cockroachdb_version")
+                .trim_start_matches('v')
+                .rsplit_once('.')
+                .unwrap()
+                .0;
+        assert_eq!(
+            CockroachDbClusterVersion::NEWLY_INITIALIZED.to_string(),
+            cockroachdb_version
+        );
+
+        // In the next "tick" release, this version will be stored in a
+        // different file.
+        assert_eq!(
+            CockroachDbClusterVersion::POLICY.to_string(),
+            cockroachdb_version
+        );
+    }
+}
diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json
index ad109a18fa..f9ca60b360 100644
--- a/openapi/nexus-internal.json
+++ b/openapi/nexus-internal.json
@@ -1742,6 +1742,18 @@
               "$ref": "#/components/schemas/BlueprintZonesConfig"
             }
           },
+          "cockroachdb_fingerprint": {
+            "description": "CockroachDB state fingerprint when this blueprint was created",
+            "type": "string"
+          },
+          "cockroachdb_setting_preserve_downgrade": {
+            "description": "Whether to set `cluster.preserve_downgrade_option` and what to set it to",
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/CockroachDbPreserveDowngrade"
+              }
+            ]
+          },
           "comment": {
             "description": "human-readable string describing why this blueprint was created (for debugging)",
             "type": "string"
@@ -1793,6 +1805,8 @@
         "required": [
           "blueprint_disks",
           "blueprint_zones",
+          "cockroachdb_fingerprint",
+          "cockroachdb_setting_preserve_downgrade",
           "comment",
           "creator",
           "external_dns_version",
@@ -1806,6 +1820,19 @@
         "description": "Describe high-level metadata about a blueprint",
         "type": "object",
         "properties": {
+          "cockroachdb_fingerprint": {
+            "description": "CockroachDB state fingerprint when this blueprint was created",
+            "type": "string"
+          },
+          "cockroachdb_setting_preserve_downgrade": {
+            "nullable": true,
+            "description": "Whether to set `cluster.preserve_downgrade_option` and what to set it to (`None` if this value was retrieved from the database and was invalid)",
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/CockroachDbPreserveDowngrade"
+              }
+            ]
+          },
           "comment": {
             "description": "human-readable string describing why this blueprint was created (for debugging)",
             "type": "string"
@@ -1848,6 +1875,7 @@
           }
         },
         "required": [
+          "cockroachdb_fingerprint",
           "comment",
           "creator",
           "external_dns_version",
@@ -2367,6 +2395,67 @@
           "key"
         ]
       },
+      "CockroachDbClusterVersion": {
+        "description": "CockroachDB cluster versions we are aware of.\n\nCockroachDB can be upgraded from one major version to the next, e.g. v22.1 -> v22.2. Each major version introduces changes in how it stores data on disk to support new features, and each major version has support for reading the previous version's data so that it can perform an upgrade. The version of the data format is called the \"cluster version\", which is distinct from but related to the software version that's being run.\n\nWhile software version v22.2 is using cluster version v22.1, it's possible to downgrade back to v22.1. Once the cluster version is upgraded, there's no going back.\n\nTo give us some time to evaluate new versions of the software while retaining a downgrade path, we currently deploy new versions of CockroachDB across two releases of the Oxide software, in a \"tick-tock\" model:\n\n- In \"tick\" releases, we upgrade the version of the CockroachDB software to a new major version, and update `CockroachDbClusterVersion::NEWLY_INITIALIZED`. On upgraded racks, the new version is running with the previous cluster version; on newly-initialized racks, the new version is running with the new cluser version. - In \"tock\" releases, we change `CockroachDbClusterVersion::POLICY` to the major version we upgraded to in the last \"tick\" release. This results in a new blueprint that upgrades the cluster version, destroying the downgrade path but allowing us to eventually upgrade to the next release.\n\nThese presently describe major versions of CockroachDB. The order of these must be maintained in the correct order (the first variant must be the earliest version).",
+        "type": "string",
+        "enum": [
+          "V22_1"
+        ]
+      },
+      "CockroachDbPreserveDowngrade": {
+        "description": "Whether to set `cluster.preserve_downgrade_option` and what to set it to.",
+        "oneOf": [
+          {
+            "description": "Do not modify the setting.",
+            "type": "object",
+            "properties": {
+              "action": {
+                "type": "string",
+                "enum": [
+                  "do_not_modify"
+                ]
+              }
+            },
+            "required": [
+              "action"
+            ]
+          },
+          {
+            "description": "Ensure the setting is set to an empty string.",
+            "type": "object",
+            "properties": {
+              "action": {
+                "type": "string",
+                "enum": [
+                  "allow_upgrade"
+                ]
+              }
+            },
+            "required": [
+              "action"
+            ]
+          },
+          {
+            "description": "Ensure the setting is set to a given cluster version.",
+            "type": "object",
+            "properties": {
+              "action": {
+                "type": "string",
+                "enum": [
+                  "set"
+                ]
+              },
+              "data": {
+                "$ref": "#/components/schemas/CockroachDbClusterVersion"
+              }
+            },
+            "required": [
+              "action",
+              "data"
+            ]
+          }
+        ]
+      },
       "CurrentStatus": {
         "description": "Describes the current status of a background task",
         "oneOf": [
diff --git a/schema/crdb/blueprint-crdb-preserve-downgrade/up1.sql b/schema/crdb/blueprint-crdb-preserve-downgrade/up1.sql
new file mode 100644
index 0000000000..6555cd9cd2
--- /dev/null
+++ b/schema/crdb/blueprint-crdb-preserve-downgrade/up1.sql
@@ -0,0 +1,3 @@
+ALTER TABLE omicron.public.blueprint
+    ADD COLUMN IF NOT EXISTS cockroachdb_fingerprint TEXT NOT NULL DEFAULT '',
+    ADD COLUMN IF NOT EXISTS cockroachdb_setting_preserve_downgrade TEXT;
diff --git a/schema/crdb/blueprint-crdb-preserve-downgrade/up2.sql b/schema/crdb/blueprint-crdb-preserve-downgrade/up2.sql
new file mode 100644
index 0000000000..0388528071
--- /dev/null
+++ b/schema/crdb/blueprint-crdb-preserve-downgrade/up2.sql
@@ -0,0 +1,2 @@
+ALTER TABLE omicron.public.blueprint
+    ALTER COLUMN cockroachdb_fingerprint DROP DEFAULT;
diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql
index 17ea6d5510..cf4ac4b20b 100644
--- a/schema/crdb/dbinit.sql
+++ b/schema/crdb/dbinit.sql
@@ -3238,7 +3238,20 @@ CREATE TABLE IF NOT EXISTS omicron.public.blueprint (
     -- identifies the latest internal DNS version when blueprint planning began
     internal_dns_version INT8 NOT NULL,
     -- identifies the latest external DNS version when blueprint planning began
-    external_dns_version INT8 NOT NULL
+    external_dns_version INT8 NOT NULL,
+    -- identifies the CockroachDB state fingerprint when blueprint planning began
+    cockroachdb_fingerprint TEXT NOT NULL,
+
+    -- CockroachDB settings managed by blueprints.
+    --
+    -- We use NULL in these columns to reflect that blueprint execution should
+    -- not modify the option; we're able to do this because CockroachDB settings
+    -- require the value to be the correct type and not NULL. There is no value
+    -- that represents "please reset this setting to the default value"; that is
+    -- represented by the presence of the default value in that field.
+    --
+    -- `cluster.preserve_downgrade_option`
+    cockroachdb_setting_preserve_downgrade TEXT
 );
 
 -- table describing both the current and historical target blueprints of the
@@ -3998,7 +4011,7 @@ INSERT INTO omicron.public.db_metadata (
     version,
     target_version
 ) VALUES
-    (TRUE, NOW(), NOW(), '65.0.0', NULL)
+    (TRUE, NOW(), NOW(), '66.0.0', NULL)
 ON CONFLICT DO NOTHING;
 
 COMMIT;
diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs
index 27435686e7..c39ccffacd 100644
--- a/sled-agent/src/rack_setup/service.rs
+++ b/sled-agent/src/rack_setup/service.rs
@@ -93,7 +93,8 @@ use nexus_client::{
 };
 use nexus_types::deployment::{
     Blueprint, BlueprintPhysicalDisksConfig, BlueprintZoneConfig,
-    BlueprintZoneDisposition, BlueprintZonesConfig, InvalidOmicronZoneType,
+    BlueprintZoneDisposition, BlueprintZonesConfig,
+    CockroachDbPreserveDowngrade, InvalidOmicronZoneType,
 };
 use nexus_types::external_api::views::SledState;
 use omicron_common::address::get_sled_address;
@@ -1435,6 +1436,10 @@ pub(crate) fn build_initial_blueprint_from_sled_configs(
         // generation of 1. Nexus will bump this up when it updates external DNS
         // (including creating the recovery silo).
         external_dns_version: Generation::new(),
+        // Nexus will fill in the CockroachDB values during initialization.
+        cockroachdb_fingerprint: String::new(),
+        cockroachdb_setting_preserve_downgrade:
+            CockroachDbPreserveDowngrade::DoNotModify,
         time_created: Utc::now(),
         creator: "RSS".to_string(),
         comment: "initial blueprint from rack setup".to_string(),

From 88221689faaa39975dded56398abeaee2d39bae4 Mon Sep 17 00:00:00 2001
From: Adam Leventhal <ahl@oxide.computer>
Date: Fri, 24 May 2024 21:23:14 -0700
Subject: [PATCH 07/28]  Migrate more uses of ipnetwork types to oxnet types
 (#5817)

- moves uses of IpNetwork, Ipv4Network, and Ipv6Network to IpNet,
Ipv4Net, and Ipv6Net
- adds the `crates` imperative to relevant `generate_api!` invocations
to automatically use `oxnet` types
- cleans up `generate_api!` uses by removing unneeded derives and
sorting types
- removes the ipnetwork dep from Cargo.toml files where possible and
sorts deps
---
 Cargo.lock                                    |   9 +-
 clients/bootstrap-agent-client/Cargo.toml     |   2 +-
 clients/bootstrap-agent-client/src/lib.rs     |  12 +-
 clients/gateway-client/src/lib.rs             |  18 +--
 .../installinator-artifact-client/src/lib.rs  |   8 +-
 clients/nexus-client/Cargo.toml               |   1 -
 clients/nexus-client/src/lib.rs               |  54 ++-----
 clients/sled-agent-client/Cargo.toml          |   9 +-
 clients/sled-agent-client/src/lib.rs          | 139 ++----------------
 clients/wicketd-client/Cargo.toml             |   3 +-
 clients/wicketd-client/src/lib.rs             |  53 +++----
 common/src/address.rs                         |  22 +--
 common/src/api/internal/shared.rs             |  15 +-
 internal-dns/src/resolver.rs                  |   2 +-
 nexus/reconfigurator/planning/Cargo.toml      |   1 -
 .../background/sync_switch_configuration.rs   |  20 +--
 nexus/src/app/rack.rs                         |  26 ++--
 openapi/bootstrap-agent.json                  |  39 +----
 openapi/nexus-internal.json                   |  39 +----
 openapi/sled-agent.json                       |  41 +-----
 openapi/wicketd.json                          |  37 +----
 schema/rss-sled-plan.json                     |  39 +----
 sled-agent/src/bootstrap/early_networking.rs  |  15 +-
 sled-agent/src/bootstrap/params.rs            |   3 +-
 sled-agent/src/rack_setup/config.rs           |  13 +-
 sled-agent/src/rack_setup/plan/service.rs     |   9 +-
 sled-agent/src/rack_setup/service.rs          |   4 +-
 sled-agent/src/sim/server.rs                  |   3 +-
 sled-agent/src/sim/sled_agent.rs              |   4 +-
 wicket-common/Cargo.toml                      |   7 +-
 wicket-common/src/rack_setup.rs               |   3 +-
 wicketd/Cargo.toml                            |  12 +-
 wicketd/src/preflight_check/uplink.rs         |   8 +-
 wicketd/src/rss_config.rs                     |   8 +-
 34 files changed, 179 insertions(+), 499 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index eba31ceca4..3060a8fae7 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -692,9 +692,9 @@ dependencies = [
 name = "bootstrap-agent-client"
 version = "0.1.0"
 dependencies = [
- "ipnetwork",
  "omicron-common",
  "omicron-workspace-hack",
+ "oxnet",
  "progenitor",
  "regress",
  "reqwest",
@@ -4490,7 +4490,6 @@ version = "0.1.0"
 dependencies = [
  "chrono",
  "futures",
- "ipnetwork",
  "nexus-types",
  "omicron-common",
  "omicron-passwords",
@@ -4803,7 +4802,6 @@ dependencies = [
  "indexmap 2.2.6",
  "internal-dns",
  "ipnet",
- "ipnetwork",
  "maplit",
  "nexus-config",
  "nexus-inventory",
@@ -8716,7 +8714,6 @@ dependencies = [
  "anyhow",
  "async-trait",
  "chrono",
- "ipnetwork",
  "omicron-common",
  "omicron-uuid-kinds",
  "omicron-workspace-hack",
@@ -10991,7 +10988,6 @@ version = "0.1.0"
 dependencies = [
  "anyhow",
  "gateway-client",
- "ipnetwork",
  "maplit",
  "omicron-common",
  "omicron-workspace-hack",
@@ -11064,7 +11060,6 @@ dependencies = [
  "installinator-artifactd",
  "installinator-common",
  "internal-dns",
- "ipnetwork",
  "itertools 0.12.1",
  "maplit",
  "omicron-certificates",
@@ -11076,6 +11071,7 @@ dependencies = [
  "once_cell",
  "openapi-lint",
  "openapiv3",
+ "oxnet",
  "rand 0.8.5",
  "reqwest",
  "schemars",
@@ -11110,7 +11106,6 @@ version = "0.1.0"
 dependencies = [
  "chrono",
  "installinator-common",
- "ipnetwork",
  "omicron-common",
  "omicron-workspace-hack",
  "progenitor",
diff --git a/clients/bootstrap-agent-client/Cargo.toml b/clients/bootstrap-agent-client/Cargo.toml
index 272abdedae..0b1d2fab4b 100644
--- a/clients/bootstrap-agent-client/Cargo.toml
+++ b/clients/bootstrap-agent-client/Cargo.toml
@@ -10,7 +10,6 @@ workspace = true
 [dependencies]
 omicron-common.workspace = true
 progenitor.workspace = true
-ipnetwork.workspace = true
 regress.workspace = true
 reqwest = { workspace = true, features = [ "json", "rustls-tls", "stream" ] }
 schemars.workspace = true
@@ -20,3 +19,4 @@ sled-hardware-types.workspace = true
 slog.workspace = true
 uuid.workspace = true
 omicron-workspace-hack.workspace = true
+oxnet.workspace = true
diff --git a/clients/bootstrap-agent-client/src/lib.rs b/clients/bootstrap-agent-client/src/lib.rs
index be309cc3e2..b29f4e69f4 100644
--- a/clients/bootstrap-agent-client/src/lib.rs
+++ b/clients/bootstrap-agent-client/src/lib.rs
@@ -18,16 +18,12 @@ progenitor::generate_api!(
         slog::debug!(log, "client response"; "result" => ?result);
     }),
     derives = [schemars::JsonSchema],
+    crates = {
+        "oxnet" = "0.1.0",
+    },
     replace = {
-        ImportExportPolicy = omicron_common::api::external::ImportExportPolicy,
-        Ipv4Network = ipnetwork::Ipv4Network,
-        Ipv6Network = ipnetwork::Ipv6Network,
-        IpNetwork = ipnetwork::IpNetwork,
-        IpNet = omicron_common::api::external::IpNet,
-        Ipv4Net = omicron_common::api::external::Ipv4Net,
-        Ipv6Net = omicron_common::api::external::Ipv6Net,
-        IpAllowList = omicron_common::api::external::IpAllowList,
         AllowedSourceIps = omicron_common::api::external::AllowedSourceIps,
+        ImportExportPolicy = omicron_common::api::external::ImportExportPolicy,
     }
 );
 
diff --git a/clients/gateway-client/src/lib.rs b/clients/gateway-client/src/lib.rs
index 7dbc50eea2..6e932577a7 100644
--- a/clients/gateway-client/src/lib.rs
+++ b/clients/gateway-client/src/lib.rs
@@ -50,15 +50,15 @@ progenitor::generate_api!(
     }),
     derives = [schemars::JsonSchema],
     patch = {
-        SpIdentifier = { derives = [Copy, PartialEq, Hash, Eq, Serialize, Deserialize] },
-        SpIgnition = { derives = [PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize] },
-        SpIgnitionSystemType = { derives = [Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize] },
-        SpState = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize] },
-        RotState = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize] },
-        RotImageDetails = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize] },
-        RotSlot = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize] },
-        ImageVersion = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize] },
-        HostPhase2RecoveryImageId = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize] },
+        HostPhase2RecoveryImageId = { derives = [PartialEq, Eq, PartialOrd, Ord] },
+        ImageVersion = { derives = [PartialEq, Eq, PartialOrd, Ord] },
+        RotImageDetails = { derives = [PartialEq, Eq, PartialOrd, Ord] },
+        RotSlot = { derives = [PartialEq, Eq, PartialOrd, Ord] },
+        RotState = { derives = [PartialEq, Eq, PartialOrd, Ord] },
+        SpIdentifier = { derives = [Copy, PartialEq, Hash, Eq] },
+        SpIgnition = { derives = [PartialEq, Eq, PartialOrd, Ord] },
+        SpIgnitionSystemType = { derives = [Copy, PartialEq, Eq, PartialOrd, Ord] },
+        SpState = { derives = [PartialEq, Eq, PartialOrd, Ord] },
     },
 );
 
diff --git a/clients/installinator-artifact-client/src/lib.rs b/clients/installinator-artifact-client/src/lib.rs
index de3072a34a..96806c2cab 100644
--- a/clients/installinator-artifact-client/src/lib.rs
+++ b/clients/installinator-artifact-client/src/lib.rs
@@ -19,13 +19,13 @@ progenitor::generate_api!(
     }),
     derives = [schemars::JsonSchema],
     replace = {
+        Duration = std::time::Duration,
         EventReportForInstallinatorSpec = installinator_common::EventReport,
-        StepEventForInstallinatorSpec = installinator_common::StepEvent,
+        M2Slot = installinator_common::M2Slot,
+        ProgressEventForGenericSpec = installinator_common::ProgressEvent<update_engine::NestedSpec>,
         ProgressEventForInstallinatorSpec = installinator_common::ProgressEvent,
         StepEventForGenericSpec = installinator_common::StepEvent<update_engine::NestedSpec>,
-        ProgressEventForGenericSpec = installinator_common::ProgressEvent<update_engine::NestedSpec>,
-        M2Slot = installinator_common::M2Slot,
-        Duration = std::time::Duration,
+        StepEventForInstallinatorSpec = installinator_common::StepEvent,
     }
 );
 
diff --git a/clients/nexus-client/Cargo.toml b/clients/nexus-client/Cargo.toml
index b4e299da67..1b64fa24d1 100644
--- a/clients/nexus-client/Cargo.toml
+++ b/clients/nexus-client/Cargo.toml
@@ -10,7 +10,6 @@ workspace = true
 [dependencies]
 chrono.workspace = true
 futures.workspace = true
-ipnetwork.workspace = true
 nexus-types.workspace = true
 omicron-common.workspace = true
 omicron-passwords.workspace = true
diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs
index bcdd3971c0..6546af8673 100644
--- a/clients/nexus-client/src/lib.rs
+++ b/clients/nexus-client/src/lib.rs
@@ -21,6 +21,9 @@ progenitor::generate_api!(
     post_hook = (|log: &slog::Logger, result: &Result<_, _>| {
         slog::debug!(log, "client response"; "result" => ?result);
     }),
+    crates = {
+        "oxnet" = "0.1.0",
+    },
     replace = {
         // It's kind of unfortunate to pull in such a complex and unstable type
         // as "blueprint" this way, but we have really useful functionality
@@ -28,14 +31,11 @@ progenitor::generate_api!(
         Blueprint = nexus_types::deployment::Blueprint,
         Generation = omicron_common::api::external::Generation,
         ImportExportPolicy = omicron_common::api::external::ImportExportPolicy,
-        Ipv4Network = ipnetwork::Ipv4Network,
-        Ipv6Network = ipnetwork::Ipv6Network,
-        IpNetwork = ipnetwork::IpNetwork,
         MacAddr = omicron_common::api::external::MacAddr,
         Name = omicron_common::api::external::Name,
-        NewPasswordHash = omicron_passwords::NewPasswordHash,
         NetworkInterface = omicron_common::api::internal::shared::NetworkInterface,
         NetworkInterfaceKind = omicron_common::api::internal::shared::NetworkInterfaceKind,
+        NewPasswordHash = omicron_passwords::NewPasswordHash,
         TypedUuidForCollectionKind = omicron_uuid_kinds::CollectionUuid,
         TypedUuidForDownstairsKind = omicron_uuid_kinds::TypedUuid<omicron_uuid_kinds::DownstairsKind>,
         TypedUuidForSledKind = omicron_uuid_kinds::TypedUuid<omicron_uuid_kinds::SledKind>,
@@ -419,50 +419,16 @@ impl TryFrom<types::ProducerEndpoint>
     }
 }
 
-impl TryFrom<&oxnet::Ipv4Net> for types::Ipv4Net {
-    type Error = String;
-
-    fn try_from(net: &oxnet::Ipv4Net) -> Result<Self, Self::Error> {
-        types::Ipv4Net::try_from(net.to_string()).map_err(|e| e.to_string())
-    }
-}
-
-impl TryFrom<&oxnet::Ipv6Net> for types::Ipv6Net {
-    type Error = String;
-
-    fn try_from(net: &oxnet::Ipv6Net) -> Result<Self, Self::Error> {
-        types::Ipv6Net::try_from(net.to_string()).map_err(|e| e.to_string())
-    }
-}
-
-impl TryFrom<&oxnet::IpNet> for types::IpNet {
-    type Error = String;
-
-    fn try_from(net: &oxnet::IpNet) -> Result<Self, Self::Error> {
-        use oxnet::IpNet;
-        match net {
-            IpNet::V4(v4) => types::Ipv4Net::try_from(v4).map(types::IpNet::V4),
-            IpNet::V6(v6) => types::Ipv6Net::try_from(v6).map(types::IpNet::V6),
-        }
-    }
-}
-
-impl TryFrom<&omicron_common::api::external::AllowedSourceIps>
+impl From<&omicron_common::api::external::AllowedSourceIps>
     for types::AllowedSourceIps
 {
-    type Error = String;
-
-    fn try_from(
-        ips: &omicron_common::api::external::AllowedSourceIps,
-    ) -> Result<Self, Self::Error> {
+    fn from(ips: &omicron_common::api::external::AllowedSourceIps) -> Self {
         use omicron_common::api::external::AllowedSourceIps;
         match ips {
-            AllowedSourceIps::Any => Ok(types::AllowedSourceIps::Any),
-            AllowedSourceIps::List(list) => list
-                .iter()
-                .map(TryInto::try_into)
-                .collect::<Result<Vec<_>, _>>()
-                .map(types::AllowedSourceIps::List),
+            AllowedSourceIps::Any => types::AllowedSourceIps::Any,
+            AllowedSourceIps::List(list) => {
+                types::AllowedSourceIps::List(list.iter().cloned().collect())
+            }
         }
     }
 }
diff --git a/clients/sled-agent-client/Cargo.toml b/clients/sled-agent-client/Cargo.toml
index caca3c8c73..11cc5adfd7 100644
--- a/clients/sled-agent-client/Cargo.toml
+++ b/clients/sled-agent-client/Cargo.toml
@@ -12,15 +12,14 @@ anyhow.workspace = true
 async-trait.workspace = true
 chrono.workspace = true
 omicron-common.workspace = true
+omicron-uuid-kinds.workspace = true
+omicron-workspace-hack.workspace = true
+oxnet.workspace = true
 progenitor.workspace = true
-ipnetwork.workspace = true
 regress.workspace = true
 reqwest = { workspace = true, features = [ "json", "rustls-tls", "stream" ] }
 schemars.workspace = true
 serde.workspace = true
+serde_json.workspace = true
 slog.workspace = true
 uuid.workspace = true
-omicron-workspace-hack.workspace = true
-omicron-uuid-kinds.workspace = true
-oxnet.workspace = true
-serde_json.workspace = true
diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs
index 24bb2a6df8..300e3713ea 100644
--- a/clients/sled-agent-client/src/lib.rs
+++ b/clients/sled-agent-client/src/lib.rs
@@ -16,7 +16,7 @@ use uuid::Uuid;
 
 progenitor::generate_api!(
     spec = "../../openapi/sled-agent.json",
-    derives = [ schemars::JsonSchema, PartialEq ],
+    derives = [schemars::JsonSchema, PartialEq],
     inner_type = slog::Logger,
     pre_hook = (|log: &slog::Logger, request: &reqwest::Request| {
         slog::debug!(log, "client request";
@@ -29,33 +29,31 @@ progenitor::generate_api!(
         slog::debug!(log, "client response"; "result" => ?result);
     }),
     patch = {
-        BfdPeerConfig = { derives = [PartialEq, Eq, Hash, Serialize, Deserialize] },
-        BgpConfig = { derives = [PartialEq, Eq, Hash, Serialize, Deserialize] },
-        BgpPeerConfig = { derives = [PartialEq, Eq, Hash, Serialize, Deserialize] },
-        PortConfigV1 = { derives = [PartialEq, Eq, Hash, Serialize, Deserialize] },
-        RouteConfig = { derives = [PartialEq, Eq, Hash, Serialize, Deserialize] },
-        IpNet = { derives = [PartialEq, Eq, Hash, Serialize, Deserialize] },
-        VirtualNetworkInterfaceHost = { derives = [PartialEq, Eq, Hash, Serialize, Deserialize] },
-        OmicronPhysicalDiskConfig = { derives = [Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Hash, PartialOrd, Ord] },
+        BfdPeerConfig = { derives = [Eq, Hash] },
+        BgpConfig = { derives = [Eq, Hash] },
+        BgpPeerConfig = { derives = [Eq, Hash] },
+        OmicronPhysicalDiskConfig = { derives = [Eq, Hash, PartialOrd, Ord] },
+        PortConfigV1 = { derives = [Eq, Hash] },
+        RouteConfig = { derives = [Eq, Hash] },
+        VirtualNetworkInterfaceHost = { derives = [Eq, Hash] },
+    },
+    crates = {
+        "oxnet" = "0.1.0",
     },
-    //TODO trade the manual transformations later in this file for the
-    //     replace directives below?
     replace = {
         ByteCount = omicron_common::api::external::ByteCount,
         DiskIdentity = omicron_common::disk::DiskIdentity,
         Generation = omicron_common::api::external::Generation,
+        ImportExportPolicy = omicron_common::api::external::ImportExportPolicy,
         MacAddr = omicron_common::api::external::MacAddr,
         Name = omicron_common::api::external::Name,
-        SwitchLocation = omicron_common::api::external::SwitchLocation,
-        ImportExportPolicy = omicron_common::api::external::ImportExportPolicy,
-        Ipv6Network = ipnetwork::Ipv6Network,
-        IpNetwork = ipnetwork::IpNetwork,
+        NetworkInterface = omicron_common::api::internal::shared::NetworkInterface,
         PortFec = omicron_common::api::internal::shared::PortFec,
         PortSpeed = omicron_common::api::internal::shared::PortSpeed,
         SourceNatConfig = omicron_common::api::internal::shared::SourceNatConfig,
-        Vni = omicron_common::api::external::Vni,
-        NetworkInterface = omicron_common::api::internal::shared::NetworkInterface,
+        SwitchLocation = omicron_common::api::external::SwitchLocation,
         TypedUuidForZpoolKind = omicron_uuid_kinds::ZpoolUuid,
+        Vni = omicron_common::api::external::Vni,
         ZpoolKind = omicron_common::zpool_name::ZpoolKind,
         ZpoolName = omicron_common::zpool_name::ZpoolName,
     }
@@ -413,111 +411,6 @@ impl From<types::DiskState> for omicron_common::api::external::DiskState {
     }
 }
 
-impl From<oxnet::Ipv4Net> for types::Ipv4Net {
-    fn from(n: oxnet::Ipv4Net) -> Self {
-        Self::try_from(n.to_string()).unwrap_or_else(|e| panic!("{}: {}", n, e))
-    }
-}
-
-impl From<oxnet::Ipv6Net> for types::Ipv6Net {
-    fn from(n: oxnet::Ipv6Net) -> Self {
-        Self::try_from(n.to_string()).unwrap_or_else(|e| panic!("{}: {}", n, e))
-    }
-}
-
-impl From<oxnet::IpNet> for types::IpNet {
-    fn from(s: oxnet::IpNet) -> Self {
-        match s {
-            oxnet::IpNet::V4(v4) => Self::V4(v4.into()),
-            oxnet::IpNet::V6(v6) => Self::V6(v6.into()),
-        }
-    }
-}
-
-impl From<ipnetwork::Ipv4Network> for types::Ipv4Net {
-    fn from(n: ipnetwork::Ipv4Network) -> Self {
-        Self::try_from(n.to_string()).unwrap_or_else(|e| panic!("{}: {}", n, e))
-    }
-}
-
-impl From<ipnetwork::Ipv4Network> for types::Ipv4Network {
-    fn from(n: ipnetwork::Ipv4Network) -> Self {
-        Self::try_from(n.to_string()).unwrap_or_else(|e| panic!("{}: {}", n, e))
-    }
-}
-
-impl From<types::Ipv4Net> for oxnet::Ipv4Net {
-    fn from(n: types::Ipv4Net) -> Self {
-        n.parse().unwrap()
-    }
-}
-
-impl From<oxnet::Ipv4Net> for types::Ipv4Network {
-    fn from(n: oxnet::Ipv4Net) -> Self {
-        Self::try_from(n.to_string()).unwrap_or_else(|e| panic!("{}: {}", n, e))
-    }
-}
-
-impl From<ipnetwork::Ipv6Network> for types::Ipv6Net {
-    fn from(n: ipnetwork::Ipv6Network) -> Self {
-        Self::try_from(n.to_string()).unwrap_or_else(|e| panic!("{}: {}", n, e))
-    }
-}
-
-impl From<types::Ipv6Net> for ipnetwork::Ipv6Network {
-    fn from(n: types::Ipv6Net) -> Self {
-        n.parse().unwrap()
-    }
-}
-
-impl From<ipnetwork::IpNetwork> for types::IpNet {
-    fn from(n: ipnetwork::IpNetwork) -> Self {
-        use ipnetwork::IpNetwork;
-        match n {
-            IpNetwork::V4(v4) => Self::V4(v4.into()),
-            IpNetwork::V6(v6) => Self::V6(v6.into()),
-        }
-    }
-}
-
-impl From<types::IpNet> for ipnetwork::IpNetwork {
-    fn from(n: types::IpNet) -> Self {
-        match n {
-            types::IpNet::V4(v4) => ipnetwork::IpNetwork::V4(v4.into()),
-            types::IpNet::V6(v6) => ipnetwork::IpNetwork::V6(v6.into()),
-        }
-    }
-}
-
-impl From<types::Ipv4Net> for ipnetwork::Ipv4Network {
-    fn from(n: types::Ipv4Net) -> Self {
-        n.parse().unwrap()
-    }
-}
-
-impl From<std::net::Ipv4Addr> for types::Ipv4Net {
-    fn from(n: std::net::Ipv4Addr) -> Self {
-        Self::try_from(format!("{n}/32"))
-            .unwrap_or_else(|e| panic!("{}: {}", n, e))
-    }
-}
-
-impl From<std::net::Ipv6Addr> for types::Ipv6Net {
-    fn from(n: std::net::Ipv6Addr) -> Self {
-        Self::try_from(format!("{n}/128"))
-            .unwrap_or_else(|e| panic!("{}: {}", n, e))
-    }
-}
-
-impl From<std::net::IpAddr> for types::IpNet {
-    fn from(s: std::net::IpAddr) -> Self {
-        match s {
-            IpAddr::V4(v4) => Self::V4(v4.into()),
-            IpAddr::V6(v6) => Self::V6(v6.into()),
-        }
-    }
-}
-
 impl From<omicron_common::api::external::L4PortRange> for types::L4PortRange {
     fn from(s: omicron_common::api::external::L4PortRange) -> Self {
         Self::try_from(s.to_string()).unwrap_or_else(|e| panic!("{}: {}", s, e))
@@ -578,7 +471,7 @@ impl From<omicron_common::api::internal::nexus::HostIdentifier>
     fn from(s: omicron_common::api::internal::nexus::HostIdentifier) -> Self {
         use omicron_common::api::internal::nexus::HostIdentifier::*;
         match s {
-            Ip(net) => Self::Ip(net.into()),
+            Ip(net) => Self::Ip(net),
             Vpc(vni) => Self::Vpc(vni),
         }
     }
diff --git a/clients/wicketd-client/Cargo.toml b/clients/wicketd-client/Cargo.toml
index 364cb5ec86..8e50964e59 100644
--- a/clients/wicketd-client/Cargo.toml
+++ b/clients/wicketd-client/Cargo.toml
@@ -10,8 +10,8 @@ workspace = true
 [dependencies]
 chrono.workspace = true
 installinator-common.workspace = true
-ipnetwork.workspace = true
 omicron-common.workspace = true
+omicron-workspace-hack.workspace = true
 progenitor.workspace = true
 regress.workspace = true
 reqwest = { workspace = true, features = ["rustls-tls", "stream"] }
@@ -23,4 +23,3 @@ slog.workspace = true
 update-engine.workspace = true
 uuid.workspace = true
 wicket-common.workspace = true
-omicron-workspace-hack.workspace = true
diff --git a/clients/wicketd-client/src/lib.rs b/clients/wicketd-client/src/lib.rs
index 4248c3719f..8edb797b20 100644
--- a/clients/wicketd-client/src/lib.rs
+++ b/clients/wicketd-client/src/lib.rs
@@ -18,32 +18,28 @@ progenitor::generate_api!(
         slog::debug!(log, "client response"; "result" => ?result);
     }),
     derives = [schemars::JsonSchema],
-    patch =
-        {
-        SpComponentCaboose = { derives = [PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize] },
-        SpIdentifier = { derives = [Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize] },
-        SpState = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize] },
-        SpComponentInfo= { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize]},
-        SpIgnition= { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize]},
-        SpIgnitionSystemType= { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize]},
-        SpInventory = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize]},
-        RackV1Inventory = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize]},
-        RotState = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize]},
-        RotImageDetails = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize]},
-        RotInventory = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize]},
-        RotSlot = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize]},
-        ImageVersion = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize]},
-        StartUpdateOptions = { derives = [ Serialize, Deserialize, Default ]},
-        Baseboard = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize ] },
-        RackInitId = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize ] },
-        RackResetId = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize ] },
-        RackOperationStatus = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize ] },
-        RackNetworkConfigV1 = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize ] },
-        UplinkConfig = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize ] },
-        CurrentRssUserConfigInsensitive = { derives = [ PartialEq, Serialize, Deserialize ] },
-        CurrentRssUserConfigSensitive = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize ] },
-        CurrentRssUserConfig = { derives = [ PartialEq, Serialize, Deserialize ] },
-        GetLocationResponse = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize ] },
+    patch = {
+        CurrentRssUserConfig = { derives = [PartialEq] },
+        CurrentRssUserConfigSensitive = { derives = [PartialEq, Eq, PartialOrd, Ord] },
+        GetLocationResponse = { derives = [PartialEq, Eq, PartialOrd, Ord] },
+        ImageVersion = { derives = [PartialEq, Eq, PartialOrd, Ord]},
+        RackInitId = { derives = [PartialEq, Eq, PartialOrd, Ord] },
+        RackNetworkConfigV1 = { derives = [PartialEq, Eq, PartialOrd, Ord] },
+        RackOperationStatus = { derives = [PartialEq, Eq, PartialOrd, Ord] },
+        RackResetId = { derives = [PartialEq, Eq, PartialOrd, Ord] },
+        RackV1Inventory = { derives = [PartialEq, Eq, PartialOrd, Ord]},
+        RotImageDetails = { derives = [PartialEq, Eq, PartialOrd, Ord]},
+        RotInventory = { derives = [PartialEq, Eq, PartialOrd, Ord]},
+        RotSlot = { derives = [PartialEq, Eq, PartialOrd, Ord]},
+        RotState = { derives = [PartialEq, Eq, PartialOrd, Ord]},
+        SpComponentCaboose = { derives = [PartialEq, Eq, PartialOrd, Ord] },
+        SpComponentInfo = { derives = [PartialEq, Eq, PartialOrd, Ord]},
+        SpIgnition = { derives = [PartialEq, Eq, PartialOrd, Ord]},
+        SpIgnitionSystemType= { derives = [PartialEq, Eq, PartialOrd, Ord]},
+        SpInventory = { derives = [PartialEq, Eq, PartialOrd, Ord]},
+        SpState = { derives = [PartialEq, Eq, PartialOrd, Ord] },
+        StartUpdateOptions = { derives = [Default]},
+        UplinkConfig = { derives = [PartialEq, Eq, PartialOrd, Ord] },
     },
     replace = {
         AllowedSourceIps = omicron_common::api::internal::shared::AllowedSourceIps,
@@ -61,10 +57,8 @@ progenitor::generate_api!(
         Duration = std::time::Duration,
         EventReportForWicketdEngineSpec = wicket_common::update_events::EventReport,
         GetBgpAuthKeyInfoResponse = wicket_common::rack_setup::GetBgpAuthKeyInfoResponse,
-        IpNetwork = ipnetwork::IpNetwork,
+        ImportExportPolicy = omicron_common::api::internal::shared::ImportExportPolicy,
         IpRange = omicron_common::address::IpRange,
-        Ipv4Network = ipnetwork::Ipv4Network,
-        Ipv6Network = ipnetwork::Ipv6Network,
         Ipv4Range = omicron_common::address::Ipv4Range,
         Ipv6Range = omicron_common::address::Ipv6Range,
         M2Slot = installinator_common::M2Slot,
@@ -86,7 +80,6 @@ progenitor::generate_api!(
         UserSpecifiedImportExportPolicy = wicket_common::rack_setup::UserSpecifiedImportExportPolicy,
         UserSpecifiedPortConfig = wicket_common::rack_setup::UserSpecifiedPortConfig,
         UserSpecifiedRackNetworkConfig = wicket_common::rack_setup::UserSpecifiedRackNetworkConfig,
-        ImportExportPolicy = omicron_common::api::internal::shared::ImportExportPolicy,
     }
 );
 
diff --git a/common/src/address.rs b/common/src/address.rs
index b7476d6ff4..eddfb996c4 100644
--- a/common/src/address.rs
+++ b/common/src/address.rs
@@ -248,24 +248,16 @@ impl DnsSubnet {
     /// Returns the DNS server address within the subnet.
     ///
     /// This is the first address within the subnet.
-    pub fn dns_address(&self) -> Ipv6Net {
-        Ipv6Net::new(
-            self.subnet.net().nth(DNS_ADDRESS_INDEX as u128).unwrap(),
-            SLED_PREFIX,
-        )
-        .unwrap()
+    pub fn dns_address(&self) -> Ipv6Addr {
+        self.subnet.net().nth(DNS_ADDRESS_INDEX as u128).unwrap()
     }
 
     /// Returns the address which the Global Zone should create
     /// to be able to contact the DNS server.
     ///
     /// This is the second address within the subnet.
-    pub fn gz_address(&self) -> Ipv6Net {
-        Ipv6Net::new(
-            self.subnet.net().nth(GZ_ADDRESS_INDEX as u128).unwrap(),
-            SLED_PREFIX,
-        )
-        .unwrap()
+    pub fn gz_address(&self) -> Ipv6Addr {
+        self.subnet.net().nth(GZ_ADDRESS_INDEX as u128).unwrap()
     }
 }
 
@@ -304,7 +296,7 @@ pub fn get_internal_dns_server_addresses(addr: Ipv6Addr) -> Vec<IpAddr> {
         &reserved_rack_subnet.get_dns_subnets()[0..DNS_REDUNDANCY];
     dns_subnets
         .iter()
-        .map(|dns_subnet| IpAddr::from(dns_subnet.dns_address().addr()))
+        .map(|dns_subnet| IpAddr::from(dns_subnet.dns_address()))
         .collect()
 }
 
@@ -686,11 +678,11 @@ mod test {
 
         // The DNS address and GZ address should be only differing by one.
         assert_eq!(
-            "fd00:1122:3344:0001::1/64".parse::<Ipv6Net>().unwrap(),
+            "fd00:1122:3344:0001::1".parse::<Ipv6Addr>().unwrap(),
             dns_subnets[0].dns_address(),
         );
         assert_eq!(
-            "fd00:1122:3344:0001::2/64".parse::<Ipv6Net>().unwrap(),
+            "fd00:1122:3344:0001::2".parse::<Ipv6Addr>().unwrap(),
             dns_subnets[0].gz_address(),
         );
     }
diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs
index b0d3232eed..9e3e1a71f5 100644
--- a/common/src/api/internal/shared.rs
+++ b/common/src/api/internal/shared.rs
@@ -8,8 +8,7 @@ use crate::{
     address::NUM_SOURCE_NAT_PORTS,
     api::external::{self, BfdMode, ImportExportPolicy, Name},
 };
-use ipnetwork::{IpNetwork, Ipv4Network, Ipv6Network};
-use oxnet::IpNet;
+use oxnet::{IpNet, Ipv4Net, Ipv6Net};
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use std::{
@@ -160,7 +159,7 @@ pub type RackNetworkConfig = RackNetworkConfigV1;
 /// Initial network configuration
 #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)]
 pub struct RackNetworkConfigV1 {
-    pub rack_subnet: Ipv6Network,
+    pub rack_subnet: Ipv6Net,
     // TODO: #3591 Consider making infra-ip ranges implicit for uplinks
     /// First ip address to be used for configuring network infrastructure
     pub infra_ip_first: Ipv4Addr,
@@ -180,7 +179,7 @@ pub struct BgpConfig {
     /// The autonomous system number for the BGP configuration.
     pub asn: u32,
     /// The set of prefixes for the BGP router to originate.
-    pub originate: Vec<Ipv4Network>,
+    pub originate: Vec<Ipv4Net>,
 
     /// Shaper to apply to outgoing messages.
     #[serde(default)]
@@ -292,7 +291,7 @@ pub struct BfdPeerConfig {
 #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq, JsonSchema)]
 pub struct RouteConfig {
     /// The destination of the route.
-    pub destination: IpNetwork,
+    pub destination: IpNet,
     /// The nexthop/gateway address.
     pub nexthop: IpAddr,
     /// The VLAN id associated with this route.
@@ -305,7 +304,7 @@ pub struct PortConfigV1 {
     /// The set of routes associated with this port.
     pub routes: Vec<RouteConfig>,
     /// This port's addresses.
-    pub addresses: Vec<IpNetwork>,
+    pub addresses: Vec<IpNet>,
     /// Switch the port belongs to.
     pub switch: SwitchLocation,
     /// Nmae of the port this config applies to.
@@ -356,7 +355,7 @@ pub struct UplinkConfig {
     pub uplink_port_fec: PortFec,
     /// IP Address and prefix (e.g., `192.168.0.1/16`) to apply to switchport
     /// (must be in infra_ip pool)
-    pub uplink_cidr: Ipv4Network,
+    pub uplink_cidr: Ipv4Net,
     /// VLAN id to use for uplink
     pub uplink_vid: Option<u16>,
 }
@@ -374,7 +373,7 @@ pub struct HostPortConfig {
 
     /// IP Address and prefix (e.g., `192.168.0.1/16`) to apply to switchport
     /// (must be in infra_ip pool)
-    pub addrs: Vec<IpNetwork>,
+    pub addrs: Vec<IpNet>,
 }
 
 impl From<PortConfigV1> for HostPortConfig {
diff --git a/internal-dns/src/resolver.rs b/internal-dns/src/resolver.rs
index 670b4b420c..cf5def01c5 100644
--- a/internal-dns/src/resolver.rs
+++ b/internal-dns/src/resolver.rs
@@ -118,7 +118,7 @@ impl Resolver {
             .get_dns_subnets()
             .into_iter()
             .map(|dns_subnet| {
-                let ip_addr = IpAddr::V6(dns_subnet.dns_address().addr());
+                let ip_addr = IpAddr::V6(dns_subnet.dns_address());
                 SocketAddr::new(ip_addr, DNS_PORT)
             })
             .collect()
diff --git a/nexus/reconfigurator/planning/Cargo.toml b/nexus/reconfigurator/planning/Cargo.toml
index 7bbc9aa36b..989ad6aa32 100644
--- a/nexus/reconfigurator/planning/Cargo.toml
+++ b/nexus/reconfigurator/planning/Cargo.toml
@@ -14,7 +14,6 @@ gateway-client.workspace = true
 indexmap.workspace = true
 internal-dns.workspace = true
 ipnet.workspace = true
-ipnetwork.workspace = true
 nexus-config.workspace = true
 nexus-inventory.workspace = true
 nexus-types.workspace = true
diff --git a/nexus/src/app/background/sync_switch_configuration.rs b/nexus/src/app/background/sync_switch_configuration.rs
index 7efe9ef92b..54fc5b8be0 100644
--- a/nexus/src/app/background/sync_switch_configuration.rs
+++ b/nexus/src/app/background/sync_switch_configuration.rs
@@ -11,6 +11,7 @@ use crate::app::{
     },
     map_switch_zone_addrs,
 };
+use oxnet::Ipv4Net;
 use slog::o;
 
 use internal_dns::resolver::Resolver;
@@ -50,8 +51,8 @@ use omicron_common::{
 use serde_json::json;
 use sled_agent_client::types::{
     BgpConfig as SledBgpConfig, BgpPeerConfig as SledBgpPeerConfig,
-    EarlyNetworkConfig, EarlyNetworkConfigBody, HostPortConfig, Ipv4Network,
-    PortConfigV1, RackNetworkConfigV1, RouteConfig as SledRouteConfig,
+    EarlyNetworkConfig, EarlyNetworkConfigBody, HostPortConfig, PortConfigV1,
+    RackNetworkConfigV1, RouteConfig as SledRouteConfig,
 };
 use std::{
     collections::{hash_map::Entry, HashMap, HashSet},
@@ -868,7 +869,7 @@ impl BackgroundTask for SwitchPortSettingsManager {
 
                 // build the desired bootstore config from the records we've fetched
                 let subnet = match rack.rack_subnet {
-                    Some(IpNetwork::V6(subnet)) => subnet,
+                    Some(IpNetwork::V6(subnet)) => subnet.into(),
                     Some(IpNetwork::V4(_)) => {
                         error!(log, "rack subnet must be ipv6"; "rack" => ?rack);
                         continue;
@@ -881,14 +882,13 @@ impl BackgroundTask for SwitchPortSettingsManager {
 
                 // TODO: is this correct? Do we place the BgpConfig for both switches in a single Vec to send to the bootstore?
                 let mut bgp: Vec<SledBgpConfig> = switch_bgp_config.iter().map(|(_location, (_id, config))| {
-                    let announcements: Vec<Ipv4Network> = bgp_announce_prefixes
+                    let announcements = bgp_announce_prefixes
                         .get(&config.bgp_announce_set_id)
                         .expect("bgp config is present but announce set is not populated")
                         .iter()
                         .map(|prefix| {
-                            ipnetwork::Ipv4Network::new(prefix.value, prefix.length)
-                                .expect("Prefix4 and Ipv4Network's value types have diverged")
-                                .into()
+                            Ipv4Net::new(prefix.value, prefix.length)
+                                .expect("Prefix4 and Ipv4Net's value types have diverged")
                         }).collect();
 
                     SledBgpConfig {
@@ -923,7 +923,7 @@ impl BackgroundTask for SwitchPortSettingsManager {
                     };
 
                     let mut port_config = PortConfigV1 {
-                        addresses: info.addresses.iter().map(|a| a.address).collect(),
+                        addresses: info.addresses.iter().map(|a| a.address.into()).collect(),
                         autoneg: info
                             .links
                             .get(0) //TODO breakout support
@@ -962,7 +962,7 @@ impl BackgroundTask for SwitchPortSettingsManager {
                             .routes
                             .iter()
                             .map(|r| SledRouteConfig {
-                                destination: r.dst,
+                                destination: r.dst.into(),
                                 nexthop: r.gw.ip(),
                                 vlan_id: r.vid.map(|x| x.0),
                             })
@@ -1401,7 +1401,7 @@ fn uplinks(
         };
         let config = HostPortConfig {
             port: port.port_name.clone(),
-            addrs: config.addresses.iter().map(|a| a.address).collect(),
+            addrs: config.addresses.iter().map(|a| a.address.into()).collect(),
         };
 
         match uplinks.entry(*location) {
diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs
index 1327558dd4..da97c77c04 100644
--- a/nexus/src/app/rack.rs
+++ b/nexus/src/app/rack.rs
@@ -63,6 +63,7 @@ use omicron_common::api::external::ResourceType;
 use omicron_common::api::internal::shared::ExternalPortDiscovery;
 use omicron_uuid_kinds::GenericUuid;
 use omicron_uuid_kinds::SledUuid;
+use oxnet::IpNet;
 use sled_agent_client::types::AddSledRequest;
 use sled_agent_client::types::StartSledAgentRequest;
 use sled_agent_client::types::StartSledAgentRequestBody;
@@ -286,7 +287,8 @@ impl super::Nexus {
         // The `rack` row is created with the rack ID we know when Nexus starts,
         // but we didn't know the rack subnet until now. Set it.
         let mut rack = self.rack_lookup(opctx, &self.rack_id).await?;
-        rack.rack_subnet = Some(rack_network_config.rack_subnet.into());
+        rack.rack_subnet =
+            Some(IpNet::from(rack_network_config.rack_subnet).into());
         self.datastore().update_rack_subnet(opctx, &rack).await?;
 
         // TODO - https://github.com/oxidecomputer/omicron/pull/3359
@@ -427,8 +429,8 @@ impl super::Nexus {
                             .originate
                             .iter()
                             .map(|o| AddressLotBlockCreate {
-                                first_address: o.network().into(),
-                                last_address: o.broadcast().into(),
+                                first_address: o.first_addr().into(),
+                                last_address: o.last_addr().into(),
                             })
                             .collect(),
                     },
@@ -460,13 +462,13 @@ impl super::Nexus {
                         announcement: bgp_config
                             .originate
                             .iter()
-                            .map(|x| BgpAnnouncementCreate {
+                            .map(|ipv4_net| BgpAnnouncementCreate {
                                 address_lot_block: NameOrId::Name(
                                     format!("as{}", bgp_config.asn)
                                         .parse()
                                         .unwrap(),
                                 ),
-                                network: IpNetwork::from(*x).into(),
+                                network: (*ipv4_net).into(),
                             })
                             .collect(),
                     },
@@ -552,7 +554,7 @@ impl super::Nexus {
                 .iter()
                 .map(|a| Address {
                     address_lot: NameOrId::Name(address_lot_name.clone()),
-                    address: (*a).into(),
+                    address: (*a),
                 })
                 .collect();
 
@@ -563,11 +565,7 @@ impl super::Nexus {
             let routes: Vec<Route> = uplink_config
                 .routes
                 .iter()
-                .map(|r| Route {
-                    dst: r.destination.into(),
-                    gw: r.nexthop,
-                    vid: None,
-                })
+                .map(|r| Route { dst: r.destination, gw: r.nexthop, vid: None })
                 .collect();
 
             port_settings_params
@@ -660,7 +658,8 @@ impl super::Nexus {
             .rack_set_initialized(
                 opctx,
                 RackInit {
-                    rack_subnet: rack_network_config.rack_subnet.into(),
+                    rack_subnet: IpNet::from(rack_network_config.rack_subnet)
+                        .into(),
                     rack_id,
                     blueprint,
                     physical_disks,
@@ -852,8 +851,7 @@ impl super::Nexus {
                             rack_subnet,
                             allocation.subnet_octet.try_into().unwrap(),
                         )
-                        .net()
-                        .into(),
+                        .net(),
                     },
                 },
             },
diff --git a/openapi/bootstrap-agent.json b/openapi/bootstrap-agent.json
index ddfc1e91f8..b09f34ea9e 100644
--- a/openapi/bootstrap-agent.json
+++ b/openapi/bootstrap-agent.json
@@ -334,7 +334,7 @@
             "description": "The set of prefixes for the BGP router to originate.",
             "type": "array",
             "items": {
-              "$ref": "#/components/schemas/Ipv4Network"
+              "$ref": "#/components/schemas/Ipv4Net"
             }
           },
           "shaper": {
@@ -644,27 +644,6 @@
           }
         ]
       },
-      "IpNetwork": {
-        "x-rust-type": "ipnetwork::IpNetwork",
-        "oneOf": [
-          {
-            "title": "v4",
-            "allOf": [
-              {
-                "$ref": "#/components/schemas/Ipv4Network"
-              }
-            ]
-          },
-          {
-            "title": "v6",
-            "allOf": [
-              {
-                "$ref": "#/components/schemas/Ipv6Network"
-              }
-            ]
-          }
-        ]
-      },
       "IpRange": {
         "oneOf": [
           {
@@ -697,11 +676,6 @@
         "type": "string",
         "pattern": "^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/([0-9]|1[0-9]|2[0-9]|3[0-2])$"
       },
-      "Ipv4Network": {
-        "x-rust-type": "ipnetwork::Ipv4Network",
-        "type": "string",
-        "pattern": "^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\/(3[0-2]|[0-2]?[0-9])$"
-      },
       "Ipv4Range": {
         "description": "A non-decreasing IPv4 address range, inclusive of both ends.\n\nThe first address must be less than or equal to the last address.",
         "type": "object",
@@ -732,11 +706,6 @@
         "type": "string",
         "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$"
       },
-      "Ipv6Network": {
-        "x-rust-type": "ipnetwork::Ipv6Network",
-        "type": "string",
-        "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\")[/](12[0-8]|1[0-1][0-9]|[0-9]?[0-9])$"
-      },
       "Ipv6Range": {
         "description": "A non-decreasing IPv6 address range, inclusive of both ends.\n\nThe first address must be less than or equal to the last address.",
         "type": "object",
@@ -775,7 +744,7 @@
             "description": "This port's addresses.",
             "type": "array",
             "items": {
-              "$ref": "#/components/schemas/IpNetwork"
+              "$ref": "#/components/schemas/IpNet"
             }
           },
           "autoneg": {
@@ -1002,7 +971,7 @@
             }
           },
           "rack_subnet": {
-            "$ref": "#/components/schemas/Ipv6Network"
+            "$ref": "#/components/schemas/Ipv6Net"
           }
         },
         "required": [
@@ -1211,7 +1180,7 @@
             "description": "The destination of the route.",
             "allOf": [
               {
-                "$ref": "#/components/schemas/IpNetwork"
+                "$ref": "#/components/schemas/IpNet"
               }
             ]
           },
diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json
index f9ca60b360..828378eaba 100644
--- a/openapi/nexus-internal.json
+++ b/openapi/nexus-internal.json
@@ -1573,7 +1573,7 @@
             "description": "The set of prefixes for the BGP router to originate.",
             "type": "array",
             "items": {
-              "$ref": "#/components/schemas/Ipv4Network"
+              "$ref": "#/components/schemas/Ipv4Net"
             }
           },
           "shaper": {
@@ -3287,27 +3287,6 @@
           }
         ]
       },
-      "IpNetwork": {
-        "x-rust-type": "ipnetwork::IpNetwork",
-        "oneOf": [
-          {
-            "title": "v4",
-            "allOf": [
-              {
-                "$ref": "#/components/schemas/Ipv4Network"
-              }
-            ]
-          },
-          {
-            "title": "v6",
-            "allOf": [
-              {
-                "$ref": "#/components/schemas/Ipv6Network"
-              }
-            ]
-          }
-        ]
-      },
       "IpRange": {
         "oneOf": [
           {
@@ -3387,11 +3366,6 @@
         "type": "string",
         "pattern": "^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/([0-9]|1[0-9]|2[0-9]|3[0-2])$"
       },
-      "Ipv4Network": {
-        "x-rust-type": "ipnetwork::Ipv4Network",
-        "type": "string",
-        "pattern": "^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\/(3[0-2]|[0-2]?[0-9])$"
-      },
       "Ipv4Range": {
         "description": "A non-decreasing IPv4 address range, inclusive of both ends.\n\nThe first address must be less than or equal to the last address.",
         "type": "object",
@@ -3422,11 +3396,6 @@
         "type": "string",
         "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$"
       },
-      "Ipv6Network": {
-        "x-rust-type": "ipnetwork::Ipv6Network",
-        "type": "string",
-        "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\")[/](12[0-8]|1[0-1][0-9]|[0-9]?[0-9])$"
-      },
       "Ipv6Range": {
         "description": "A non-decreasing IPv6 address range, inclusive of both ends.\n\nThe first address must be less than or equal to the last address.",
         "type": "object",
@@ -3837,7 +3806,7 @@
             "description": "This port's addresses.",
             "type": "array",
             "items": {
-              "$ref": "#/components/schemas/IpNetwork"
+              "$ref": "#/components/schemas/IpNet"
             }
           },
           "autoneg": {
@@ -4226,7 +4195,7 @@
             }
           },
           "rack_subnet": {
-            "$ref": "#/components/schemas/Ipv6Network"
+            "$ref": "#/components/schemas/Ipv6Net"
           }
         },
         "required": [
@@ -4351,7 +4320,7 @@
             "description": "The destination of the route.",
             "allOf": [
               {
-                "$ref": "#/components/schemas/IpNetwork"
+                "$ref": "#/components/schemas/IpNet"
               }
             ]
           },
diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json
index 763a67910f..b975f16484 100644
--- a/openapi/sled-agent.json
+++ b/openapi/sled-agent.json
@@ -1475,7 +1475,7 @@
             "description": "The set of prefixes for the BGP router to originate.",
             "type": "array",
             "items": {
-              "$ref": "#/components/schemas/Ipv4Network"
+              "$ref": "#/components/schemas/Ipv4Net"
             }
           },
           "shaper": {
@@ -2710,7 +2710,7 @@
             "description": "IP Address and prefix (e.g., `192.168.0.1/16`) to apply to switchport (must be in infra_ip pool)",
             "type": "array",
             "items": {
-              "$ref": "#/components/schemas/IpNetwork"
+              "$ref": "#/components/schemas/IpNet"
             }
           },
           "port": {
@@ -3412,27 +3412,6 @@
           }
         ]
       },
-      "IpNetwork": {
-        "x-rust-type": "ipnetwork::IpNetwork",
-        "oneOf": [
-          {
-            "title": "v4",
-            "allOf": [
-              {
-                "$ref": "#/components/schemas/Ipv4Network"
-              }
-            ]
-          },
-          {
-            "title": "v6",
-            "allOf": [
-              {
-                "$ref": "#/components/schemas/Ipv6Network"
-              }
-            ]
-          }
-        ]
-      },
       "Ipv4Net": {
         "example": "192.168.1.0/24",
         "title": "An IPv4 subnet",
@@ -3445,11 +3424,6 @@
         "type": "string",
         "pattern": "^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/([0-9]|1[0-9]|2[0-9]|3[0-2])$"
       },
-      "Ipv4Network": {
-        "x-rust-type": "ipnetwork::Ipv4Network",
-        "type": "string",
-        "pattern": "^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\/(3[0-2]|[0-2]?[0-9])$"
-      },
       "Ipv6Net": {
         "example": "fd12:3456::/64",
         "title": "An IPv6 subnet",
@@ -3462,11 +3436,6 @@
         "type": "string",
         "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$"
       },
-      "Ipv6Network": {
-        "x-rust-type": "ipnetwork::Ipv6Network",
-        "type": "string",
-        "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\")[/](12[0-8]|1[0-1][0-9]|[0-9]?[0-9])$"
-      },
       "Ipv6Subnet": {
         "description": "Wraps an [`Ipv6Net`] with a compile-time prefix length.",
         "type": "object",
@@ -4087,7 +4056,7 @@
             "description": "This port's addresses.",
             "type": "array",
             "items": {
-              "$ref": "#/components/schemas/IpNetwork"
+              "$ref": "#/components/schemas/IpNet"
             }
           },
           "autoneg": {
@@ -4237,7 +4206,7 @@
             }
           },
           "rack_subnet": {
-            "$ref": "#/components/schemas/Ipv6Network"
+            "$ref": "#/components/schemas/Ipv6Net"
           }
         },
         "required": [
@@ -4255,7 +4224,7 @@
             "description": "The destination of the route.",
             "allOf": [
               {
-                "$ref": "#/components/schemas/IpNetwork"
+                "$ref": "#/components/schemas/IpNet"
               }
             ]
           },
diff --git a/openapi/wicketd.json b/openapi/wicketd.json
index 762fbfade0..fd8e49b6e3 100644
--- a/openapi/wicketd.json
+++ b/openapi/wicketd.json
@@ -1055,7 +1055,7 @@
             "description": "The set of prefixes for the BGP router to originate.",
             "type": "array",
             "items": {
-              "$ref": "#/components/schemas/Ipv4Network"
+              "$ref": "#/components/schemas/Ipv4Net"
             }
           },
           "shaper": {
@@ -1690,27 +1690,6 @@
           }
         ]
       },
-      "IpNetwork": {
-        "x-rust-type": "ipnetwork::IpNetwork",
-        "oneOf": [
-          {
-            "title": "v4",
-            "allOf": [
-              {
-                "$ref": "#/components/schemas/Ipv4Network"
-              }
-            ]
-          },
-          {
-            "title": "v6",
-            "allOf": [
-              {
-                "$ref": "#/components/schemas/Ipv6Network"
-              }
-            ]
-          }
-        ]
-      },
       "IpRange": {
         "oneOf": [
           {
@@ -1743,11 +1722,6 @@
         "type": "string",
         "pattern": "^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/([0-9]|1[0-9]|2[0-9]|3[0-2])$"
       },
-      "Ipv4Network": {
-        "x-rust-type": "ipnetwork::Ipv4Network",
-        "type": "string",
-        "pattern": "^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\/(3[0-2]|[0-2]?[0-9])$"
-      },
       "Ipv4Range": {
         "description": "A non-decreasing IPv4 address range, inclusive of both ends.\n\nThe first address must be less than or equal to the last address.",
         "type": "object",
@@ -1778,11 +1752,6 @@
         "type": "string",
         "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$"
       },
-      "Ipv6Network": {
-        "x-rust-type": "ipnetwork::Ipv6Network",
-        "type": "string",
-        "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\")[/](12[0-8]|1[0-1][0-9]|[0-9]?[0-9])$"
-      },
       "Ipv6Range": {
         "description": "A non-decreasing IPv6 address range, inclusive of both ends.\n\nThe first address must be less than or equal to the last address.",
         "type": "object",
@@ -2752,7 +2721,7 @@
             "description": "The destination of the route.",
             "allOf": [
               {
-                "$ref": "#/components/schemas/IpNetwork"
+                "$ref": "#/components/schemas/IpNet"
               }
             ]
           },
@@ -5060,7 +5029,7 @@
           "addresses": {
             "type": "array",
             "items": {
-              "$ref": "#/components/schemas/IpNetwork"
+              "$ref": "#/components/schemas/IpNet"
             }
           },
           "autoneg": {
diff --git a/schema/rss-sled-plan.json b/schema/rss-sled-plan.json
index 204dddff99..5971235634 100644
--- a/schema/rss-sled-plan.json
+++ b/schema/rss-sled-plan.json
@@ -204,7 +204,7 @@
           "description": "The set of prefixes for the BGP router to originate.",
           "type": "array",
           "items": {
-            "$ref": "#/definitions/Ipv4Network"
+            "$ref": "#/definitions/Ipv4Net"
           }
         },
         "shaper": {
@@ -500,27 +500,6 @@
         "version": "0.1.0"
       }
     },
-    "IpNetwork": {
-      "oneOf": [
-        {
-          "title": "v4",
-          "allOf": [
-            {
-              "$ref": "#/definitions/Ipv4Network"
-            }
-          ]
-        },
-        {
-          "title": "v6",
-          "allOf": [
-            {
-              "$ref": "#/definitions/Ipv6Network"
-            }
-          ]
-        }
-      ],
-      "x-rust-type": "ipnetwork::IpNetwork"
-    },
     "IpRange": {
       "oneOf": [
         {
@@ -555,11 +534,6 @@
         "version": "0.1.0"
       }
     },
-    "Ipv4Network": {
-      "type": "string",
-      "pattern": "^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\/(3[0-2]|[0-2]?[0-9])$",
-      "x-rust-type": "ipnetwork::Ipv4Network"
-    },
     "Ipv4Range": {
       "description": "A non-decreasing IPv4 address range, inclusive of both ends.\n\nThe first address must be less than or equal to the last address.",
       "type": "object",
@@ -592,11 +566,6 @@
         "version": "0.1.0"
       }
     },
-    "Ipv6Network": {
-      "type": "string",
-      "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\")[/](12[0-8]|1[0-1][0-9]|[0-9]?[0-9])$",
-      "x-rust-type": "ipnetwork::Ipv6Network"
-    },
     "Ipv6Range": {
       "description": "A non-decreasing IPv6 address range, inclusive of both ends.\n\nThe first address must be less than or equal to the last address.",
       "type": "object",
@@ -656,7 +625,7 @@
           "description": "This port's addresses.",
           "type": "array",
           "items": {
-            "$ref": "#/definitions/IpNetwork"
+            "$ref": "#/definitions/IpNet"
           }
         },
         "autoneg": {
@@ -879,7 +848,7 @@
           }
         },
         "rack_subnet": {
-          "$ref": "#/definitions/Ipv6Network"
+          "$ref": "#/definitions/Ipv6Net"
         }
       }
     },
@@ -914,7 +883,7 @@
           "description": "The destination of the route.",
           "allOf": [
             {
-              "$ref": "#/definitions/IpNetwork"
+              "$ref": "#/definitions/IpNet"
             }
           ]
         },
diff --git a/sled-agent/src/bootstrap/early_networking.rs b/sled-agent/src/bootstrap/early_networking.rs
index 8727a01eae..bd12bb745a 100644
--- a/sled-agent/src/bootstrap/early_networking.rs
+++ b/sled-agent/src/bootstrap/early_networking.rs
@@ -14,7 +14,6 @@ use futures::future;
 use gateway_client::Client as MgsClient;
 use internal_dns::resolver::{ResolveError, Resolver as DnsResolver};
 use internal_dns::ServiceName;
-use ipnetwork::Ipv6Network;
 use mg_admin_client::types::{
     AddStaticRoute4Request, ApplyRequest, BfdPeerConfig, BgpPeerConfig,
     CheckerSource, ImportExportPolicy as MgImportExportPolicy, Prefix, Prefix4,
@@ -34,7 +33,7 @@ use omicron_common::backoff::{
 };
 use omicron_common::OMICRON_DPD_TAG;
 use omicron_ddm_admin_client::DdmError;
-use oxnet::IpNet;
+use oxnet::{IpNet, Ipv6Net};
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use slog::Logger;
@@ -579,7 +578,7 @@ impl<'a> EarlyNetworkSetup<'a> {
                     originate: config
                         .originate
                         .iter()
-                        .map(|x| Prefix4 { length: x.prefix(), value: x.ip() })
+                        .map(|x| Prefix4 { length: x.width(), value: x.addr() })
                         .collect(),
                 })
                 .await
@@ -601,9 +600,9 @@ impl<'a> EarlyNetworkSetup<'a> {
                     IpAddr::V4(v4) => v4,
                     IpAddr::V6(_) => continue,
                 };
-                let prefix = match r.destination.ip() {
+                let prefix = match r.destination.addr() {
                     IpAddr::V4(v4) => {
-                        Prefix4 { value: v4, length: r.destination.prefix() }
+                        Prefix4 { value: v4, length: r.destination.width() }
                     }
                     IpAddr::V6(_) => continue,
                 };
@@ -658,7 +657,7 @@ impl<'a> EarlyNetworkSetup<'a> {
             // TODO We're discarding the `uplink_cidr.prefix()` here and only using
             // the IP address; at some point we probably need to give the full CIDR
             // to dendrite?
-            addrs.push(a.ip());
+            addrs.push(a.addr());
         }
 
         let link_settings = LinkSettings {
@@ -886,7 +885,7 @@ impl RackNetworkConfigV0 {
         v0: RackNetworkConfigV0,
     ) -> RackNetworkConfigV1 {
         RackNetworkConfigV1 {
-            rack_subnet: Ipv6Network::new(rack_subnet, 56).unwrap(),
+            rack_subnet: Ipv6Net::new(rack_subnet, 56).unwrap(),
             infra_ip_first: v0.infra_ip_first,
             infra_ip_last: v0.infra_ip_last,
             ports: v0
@@ -973,7 +972,7 @@ mod tests {
             body: EarlyNetworkConfigBody {
                 ntp_servers: v0.ntp_servers.clone(),
                 rack_network_config: Some(RackNetworkConfigV1 {
-                    rack_subnet: Ipv6Network::new(v0.rack_subnet, 56).unwrap(),
+                    rack_subnet: Ipv6Net::new(v0.rack_subnet, 56).unwrap(),
                     infra_ip_first: v0_rack_network_config.infra_ip_first,
                     infra_ip_last: v0_rack_network_config.infra_ip_last,
                     ports: vec![PortConfigV1 {
diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs
index cc5c0648d6..e458900c53 100644
--- a/sled-agent/src/bootstrap/params.rs
+++ b/sled-agent/src/bootstrap/params.rs
@@ -384,6 +384,7 @@ mod tests {
 
     use super::*;
     use camino::Utf8PathBuf;
+    use oxnet::Ipv6Net;
 
     #[test]
     fn parse_rack_initialization() {
@@ -505,7 +506,7 @@ mod tests {
                 user_password_hash: "$argon2id$v=19$m=98304,t=13,p=1$RUlWc0ZxaHo0WFdrN0N6ZQ$S8p52j85GPvMhR/ek3GL0el/oProgTwWpHJZ8lsQQoY".parse().unwrap(),
             },
             rack_network_config: RackNetworkConfig {
-                rack_subnet: Ipv6Addr::LOCALHOST.into(),
+                rack_subnet: Ipv6Net::host_net(Ipv6Addr::LOCALHOST),
                 infra_ip_first: Ipv4Addr::LOCALHOST,
                 infra_ip_last: Ipv4Addr::LOCALHOST,
                 ports: Vec::new(),
diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs
index 9fe62d3582..e52ed14304 100644
--- a/sled-agent/src/rack_setup/config.rs
+++ b/sled-agent/src/rack_setup/config.rs
@@ -70,13 +70,15 @@ impl SetupServiceConfig {
     }
 
     pub fn az_subnet(&self) -> Ipv6Subnet<AZ_PREFIX> {
-        Ipv6Subnet::<AZ_PREFIX>::new(self.rack_network_config.rack_subnet.ip())
+        Ipv6Subnet::<AZ_PREFIX>::new(
+            self.rack_network_config.rack_subnet.addr(),
+        )
     }
 
     /// Returns the subnet for our rack.
     pub fn rack_subnet(&self) -> Ipv6Subnet<RACK_PREFIX> {
         Ipv6Subnet::<RACK_PREFIX>::new(
-            self.rack_network_config.rack_subnet.ip(),
+            self.rack_network_config.rack_subnet.addr(),
         )
     }
 
@@ -96,6 +98,7 @@ mod test {
     use omicron_common::address::IpRange;
     use omicron_common::api::internal::shared::AllowedSourceIps;
     use omicron_common::api::internal::shared::RackNetworkConfig;
+    use oxnet::Ipv6Net;
     use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
 
     #[test]
@@ -123,7 +126,11 @@ mod test {
                     .unwrap(),
             },
             rack_network_config: RackNetworkConfig {
-                rack_subnet: "fd00:1122:3344:0100::".parse().unwrap(),
+                rack_subnet: Ipv6Net::new(
+                    "fd00:1122:3344:0100::".parse().unwrap(),
+                    RACK_PREFIX,
+                )
+                .unwrap(),
                 infra_ip_first: Ipv4Addr::LOCALHOST,
                 infra_ip_last: Ipv4Addr::LOCALHOST,
                 ports: Vec::new(),
diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs
index a763d61923..b48e4f18b8 100644
--- a/sled-agent/src/rack_setup/plan/service.rs
+++ b/sled-agent/src/rack_setup/plan/service.rs
@@ -384,11 +384,11 @@ impl Plan {
             &reserved_rack_subnet.get_dns_subnets()[0..DNS_REDUNDANCY];
         let rack_dns_servers = dns_subnets
             .into_iter()
-            .map(|dns_subnet| dns_subnet.dns_address().addr().into())
+            .map(|dns_subnet| dns_subnet.dns_address().into())
             .collect::<Vec<IpAddr>>();
         for i in 0..dns_subnets.len() {
             let dns_subnet = &dns_subnets[i];
-            let ip = dns_subnet.dns_address().addr();
+            let ip = dns_subnet.dns_address();
             let sled = {
                 let which_sled =
                     sled_allocator.next().ok_or(PlanError::NotEnoughSleds)?;
@@ -419,7 +419,7 @@ impl Plan {
                     },
                     http_address,
                     dns_address,
-                    gz_address: dns_subnet.gz_address().addr(),
+                    gz_address: dns_subnet.gz_address(),
                     gz_address_index: i.try_into().expect("Giant indices?"),
                 },
             });
@@ -1156,6 +1156,7 @@ mod tests {
     use omicron_common::address::IpRange;
     use omicron_common::api::internal::shared::AllowedSourceIps;
     use omicron_common::api::internal::shared::RackNetworkConfig;
+    use oxnet::Ipv6Net;
 
     const EXPECTED_RESERVED_ADDRESSES: u16 = 2;
     const EXPECTED_USABLE_ADDRESSES: u16 =
@@ -1251,7 +1252,7 @@ mod tests {
                 user_password_hash: "$argon2id$v=19$m=98304,t=13,p=1$RUlWc0ZxaHo0WFdrN0N6ZQ$S8p52j85GPvMhR/ek3GL0el/oProgTwWpHJZ8lsQQoY".parse().unwrap(),
             },
             rack_network_config: RackNetworkConfig {
-                rack_subnet: Ipv6Addr::LOCALHOST.into(),
+                rack_subnet: Ipv6Net::host_net(Ipv6Addr::LOCALHOST),
                 infra_ip_first: Ipv4Addr::LOCALHOST,
                 infra_ip_last: Ipv4Addr::LOCALHOST,
                 ports: Vec::new(),
diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs
index c39ccffacd..1d8b3e7ad3 100644
--- a/sled-agent/src/rack_setup/service.rs
+++ b/sled-agent/src/rack_setup/service.rs
@@ -752,7 +752,7 @@ impl ServiceInner {
                                 vlan_id: r.vlan_id,
                             })
                             .collect(),
-                        addresses: config.addresses.clone(),
+                        addresses: config.addresses.iter().cloned().map(Into::into).collect(),
                         switch: config.switch.into(),
                         uplink_port_speed: config.uplink_port_speed.into(),
                         uplink_port_fec: config.uplink_port_fec.into(),
@@ -788,7 +788,7 @@ impl ServiceInner {
                     .iter()
                     .map(|config| NexusTypes::BgpConfig {
                         asn: config.asn,
-                        originate: config.originate.clone(),
+                        originate: config.originate.iter().cloned().map(Into::into).collect(),
                         shaper: config.shaper.clone(),
                         checker: config.checker.clone(),
                     })
diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs
index e3ce4ad4e4..ae7f40f5f3 100644
--- a/sled-agent/src/sim/server.rs
+++ b/sled-agent/src/sim/server.rs
@@ -36,6 +36,7 @@ use omicron_common::FileKv;
 use omicron_uuid_kinds::GenericUuid;
 use omicron_uuid_kinds::SledUuid;
 use omicron_uuid_kinds::ZpoolUuid;
+use oxnet::Ipv6Net;
 use slog::{info, Drain, Logger};
 use std::collections::BTreeMap;
 use std::collections::HashMap;
@@ -527,7 +528,7 @@ pub async fn run_standalone_server(
             HashMap::new(),
         ),
         rack_network_config: NexusTypes::RackNetworkConfigV1 {
-            rack_subnet: Ipv6Addr::LOCALHOST.into(),
+            rack_subnet: Ipv6Net::host_net(Ipv6Addr::LOCALHOST),
             infra_ip_first: Ipv4Addr::LOCALHOST,
             infra_ip_last: Ipv4Addr::LOCALHOST,
             ports: Vec::new(),
diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs
index d9308bf769..742639350a 100644
--- a/sled-agent/src/sim/sled_agent.rs
+++ b/sled-agent/src/sim/sled_agent.rs
@@ -27,7 +27,6 @@ use anyhow::Context;
 use dropshot::{HttpError, HttpServer};
 use futures::lock::Mutex;
 use illumos_utils::opte::params::VirtualNetworkInterfaceHost;
-use ipnetwork::Ipv6Network;
 use omicron_common::api::external::{
     ByteCount, DiskState, Error, Generation, ResourceType,
 };
@@ -40,6 +39,7 @@ use omicron_common::api::internal::nexus::{
 use omicron_common::api::internal::shared::RackNetworkConfig;
 use omicron_common::disk::DiskIdentity;
 use omicron_uuid_kinds::ZpoolUuid;
+use oxnet::Ipv6Net;
 use propolis_client::{
     types::VolumeConstructionRequest, Client as PropolisClient,
 };
@@ -154,7 +154,7 @@ impl SledAgent {
             body: EarlyNetworkConfigBody {
                 ntp_servers: Vec::new(),
                 rack_network_config: Some(RackNetworkConfig {
-                    rack_subnet: Ipv6Network::new(Ipv6Addr::UNSPECIFIED, 56)
+                    rack_subnet: Ipv6Net::new(Ipv6Addr::UNSPECIFIED, 56)
                         .unwrap(),
                     infra_ip_first: Ipv4Addr::UNSPECIFIED,
                     infra_ip_last: Ipv4Addr::UNSPECIFIED,
diff --git a/wicket-common/Cargo.toml b/wicket-common/Cargo.toml
index 685514f399..9a82b3d8bd 100644
--- a/wicket-common/Cargo.toml
+++ b/wicket-common/Cargo.toml
@@ -9,12 +9,12 @@ workspace = true
 
 [dependencies]
 anyhow.workspace = true
+gateway-client.workspace = true
+maplit.workspace = true
 omicron-common.workspace = true
+omicron-workspace-hack.workspace = true
 owo-colors.workspace = true
 oxnet.workspace = true
-gateway-client.workspace = true
-ipnetwork.workspace = true
-maplit.workspace = true
 schemars.workspace = true
 serde.workspace = true
 serde_json.workspace = true
@@ -22,7 +22,6 @@ sha2.workspace = true
 sled-hardware-types.workspace = true
 thiserror.workspace = true
 update-engine.workspace = true
-omicron-workspace-hack.workspace = true
 
 [dev-dependencies]
 toml.workspace = true
diff --git a/wicket-common/src/rack_setup.rs b/wicket-common/src/rack_setup.rs
index 9221153398..ba88c258a5 100644
--- a/wicket-common/src/rack_setup.rs
+++ b/wicket-common/src/rack_setup.rs
@@ -6,7 +6,6 @@
 
 pub use gateway_client::types::SpIdentifier as GatewaySpIdentifier;
 pub use gateway_client::types::SpType as GatewaySpType;
-use ipnetwork::IpNetwork;
 use omicron_common::address;
 use omicron_common::api::external::ImportExportPolicy;
 use omicron_common::api::external::Name;
@@ -182,7 +181,7 @@ impl UserSpecifiedRackNetworkConfig {
 #[serde(deny_unknown_fields)]
 pub struct UserSpecifiedPortConfig {
     pub routes: Vec<RouteConfig>,
-    pub addresses: Vec<IpNetwork>,
+    pub addresses: Vec<IpNet>,
     pub uplink_port_speed: PortSpeed,
     pub uplink_port_fec: PortFec,
     pub autoneg: bool,
diff --git a/wicketd/Cargo.toml b/wicketd/Cargo.toml
index fe0fa27e15..bfd8a4cf45 100644
--- a/wicketd/Cargo.toml
+++ b/wicketd/Cargo.toml
@@ -13,8 +13,8 @@ async-trait.workspace = true
 base64.workspace = true
 buf-list.workspace = true
 bytes.workspace = true
-camino.workspace = true
 camino-tempfile.workspace = true
+camino.workspace = true
 clap.workspace = true
 debug-ignore.workspace = true
 display-error-chain.workspace = true
@@ -25,28 +25,28 @@ flume.workspace = true
 futures.workspace = true
 gateway-messages.workspace = true
 hex.workspace = true
-hubtools.workspace = true
 http.workspace = true
+hubtools.workspace = true
 hyper.workspace = true
 illumos-utils.workspace = true
-ipnetwork.workspace = true
 internal-dns.workspace = true
 itertools.workspace = true
 once_cell.workspace = true
+oxnet.workspace = true
 reqwest.workspace = true
 schemars.workspace = true
 serde.workspace = true
-sha2.workspace = true
 serde_json.workspace = true
-slog.workspace = true
+sha2.workspace = true
 slog-dtrace.workspace = true
+slog.workspace = true
 thiserror.workspace = true
 tokio = { workspace = true, features = [ "full" ] }
 tokio-stream.workspace = true
 tokio-util.workspace = true
+toml.workspace = true
 tough.workspace = true
 trust-dns-resolver.workspace = true
-toml.workspace = true
 uuid.workspace = true
 
 bootstrap-agent-client.workspace = true
diff --git a/wicketd/src/preflight_check/uplink.rs b/wicketd/src/preflight_check/uplink.rs
index f17580a1de..3a70823b5b 100644
--- a/wicketd/src/preflight_check/uplink.rs
+++ b/wicketd/src/preflight_check/uplink.rs
@@ -16,12 +16,12 @@ use dpd_client::ClientState as DpdClientState;
 use either::Either;
 use illumos_utils::zone::SVCCFG;
 use illumos_utils::PFEXEC;
-use ipnetwork::IpNetwork;
 use omicron_common::address::DENDRITE_PORT;
 use omicron_common::api::internal::shared::PortFec as OmicronPortFec;
 use omicron_common::api::internal::shared::PortSpeed as OmicronPortSpeed;
 use omicron_common::api::internal::shared::SwitchLocation;
 use omicron_common::OMICRON_DPD_TAG;
+use oxnet::IpNet;
 use schemars::JsonSchema;
 use serde::Deserialize;
 use serde::Serialize;
@@ -760,7 +760,7 @@ fn build_port_settings(
 
     let mut port_settings = PortSettings { links: HashMap::new() };
 
-    let addrs = uplink.addresses.iter().map(|a| a.ip()).collect();
+    let addrs = uplink.addresses.iter().map(|a| a.addr()).collect();
 
     port_settings.links.insert(
         link_id.to_string(),
@@ -777,7 +777,7 @@ fn build_port_settings(
     );
 
     for r in &uplink.routes {
-        if let (IpNetwork::V4(_dst), IpAddr::V4(_nexthop)) =
+        if let (IpNet::V4(_dst), IpAddr::V4(_nexthop)) =
             (r.destination, r.nexthop)
         {
             // TODO: do we need to create config for mgd?
@@ -895,7 +895,7 @@ pub(crate) enum UplinkPreflightTerminalError {
     #[error(
         "failed to remove host OS route {destination} -> {nexthop}: {err}"
     )]
-    RemoveHostRoute { err: String, destination: IpNetwork, nexthop: IpAddr },
+    RemoveHostRoute { err: String, destination: IpNet, nexthop: IpAddr },
     #[error("failed to remove uplink SMF property {property:?}: {err}")]
     RemoveSmfProperty { property: String, err: String },
     #[error("failed to refresh uplink service config: {0}")]
diff --git a/wicketd/src/rss_config.rs b/wicketd/src/rss_config.rs
index c90f672500..77e107a129 100644
--- a/wicketd/src/rss_config.rs
+++ b/wicketd/src/rss_config.rs
@@ -631,8 +631,8 @@ fn validate_rack_network_config(
     for (_, _, port_config) in config.iter_uplinks() {
         for addr in &port_config.addresses {
             // ... and check that it contains `uplink_ip`.
-            if addr.ip() < infra_ip_range.first
-                || addr.ip() > infra_ip_range.last
+            if addr.addr() < infra_ip_range.first
+                || addr.addr() > infra_ip_range.last
             {
                 bail!(
                 "`uplink_cidr`'s IP address must be in the range defined by \
@@ -652,7 +652,7 @@ fn validate_rack_network_config(
     // TODO Add more client side checks on `rack_network_config` contents?
 
     Ok(bootstrap_agent_client::types::RackNetworkConfigV1 {
-        rack_subnet: RACK_SUBNET.net().into(),
+        rack_subnet: RACK_SUBNET.net(),
         infra_ip_first: config.infra_ip_first,
         infra_ip_last: config.infra_ip_last,
         ports: config
@@ -704,7 +704,7 @@ fn build_port_config(
                 vlan_id: r.vlan_id,
             })
             .collect(),
-        addresses: config.addresses.clone(),
+        addresses: config.addresses.iter().cloned().map(Into::into).collect(),
         bgp_peers: config
             .bgp_peers
             .iter()

From 32834e3a70a0700f14fce675cdc0b8cf1d5acf85 Mon Sep 17 00:00:00 2001
From: Eliza Weisman <eliza@elizas.website>
Date: Tue, 28 May 2024 15:37:49 -0700
Subject: [PATCH 08/28] fix `clippy::redundant_closure_call` warning (#5827)

The `diff_row!` macro in `blueprint_diff.rs` has recently started
generating [`clippy::redundant_closure_call`][1] warnings, as
invocations without a `display` function pass in a closure that performs
an identity operation (e.g. just returns the value). Rather than
allowing the warning, which would also have been fine, I've changed this
arm of the macro to pass the named function `std::convert::identity`
instead, which is equivalent but eliminates the closure.

[1]:
https://rust-lang.github.io/rust-clippy/master/index.html#redundant_closure_call
---
 nexus/types/src/deployment/blueprint_diff.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nexus/types/src/deployment/blueprint_diff.rs b/nexus/types/src/deployment/blueprint_diff.rs
index 0ee039b50f..17631e692d 100644
--- a/nexus/types/src/deployment/blueprint_diff.rs
+++ b/nexus/types/src/deployment/blueprint_diff.rs
@@ -667,7 +667,7 @@ impl<'diff> BlueprintDiffDisplay<'diff> {
     ) -> impl IntoIterator<Item = KvListWithHeading> {
         macro_rules! diff_row {
             ($member:ident, $label:expr) => {
-                diff_row!($member, $label, |value| value)
+                diff_row!($member, $label, std::convert::identity)
             };
 
             ($member:ident, $label:expr, $display:expr) => {

From a485a4c6e6e39523d13d07b67ebd444a04291453 Mon Sep 17 00:00:00 2001
From: iliana etaoin <iliana@oxide.computer>
Date: Tue, 28 May 2024 16:27:51 -0700
Subject: [PATCH 09/28] Bump version to 9.0.0 (#5826)

---
 dev-tools/releng/src/main.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev-tools/releng/src/main.rs b/dev-tools/releng/src/main.rs
index 445090115d..9bb0cd33bb 100644
--- a/dev-tools/releng/src/main.rs
+++ b/dev-tools/releng/src/main.rs
@@ -41,7 +41,7 @@ use crate::job::Jobs;
 /// to as "v8", "version 8", or "release 8" to customers). The use of semantic
 /// versioning is mostly to hedge for perhaps wanting something more granular in
 /// the future.
-const BASE_VERSION: Version = Version::new(8, 0, 0);
+const BASE_VERSION: Version = Version::new(9, 0, 0);
 
 #[derive(Debug, Clone, Copy)]
 enum InstallMethod {

From 23818526491ee75063b1704a8d746dd25dba5e27 Mon Sep 17 00:00:00 2001
From: James MacMahon <james@oxide.computer>
Date: Wed, 29 May 2024 13:44:22 -0400
Subject: [PATCH 10/28] Don't recurse through VolumeConstructionRequests
 (#5825)

VolumeConstructionRequest objects can be arbitrarily deep, as customers
are not restricted in the disk and snapshot layering that they can do.
There are a few functions that recurse through these objects: change
those to instead use an iterative approach to avoid hitting any
recursion limits.

Fixes #5815
---
 nexus/db-queries/src/db/datastore/volume.rs | 299 +++++++++-----------
 nexus/src/app/sagas/disk_create.rs          |  89 +++---
 nexus/src/app/sagas/snapshot_create.rs      | 128 ++++-----
 sled-agent/src/sim/sled_agent.rs            |  49 ++--
 4 files changed, 248 insertions(+), 317 deletions(-)

diff --git a/nexus/db-queries/src/db/datastore/volume.rs b/nexus/db-queries/src/db/datastore/volume.rs
index a7b9273aa8..294cd2decf 100644
--- a/nexus/db-queries/src/db/datastore/volume.rs
+++ b/nexus/db-queries/src/db/datastore/volume.rs
@@ -45,6 +45,7 @@ use serde::Deserialize;
 use serde::Deserializer;
 use serde::Serialize;
 use sled_agent_client::types::VolumeConstructionRequest;
+use std::collections::VecDeque;
 use std::net::SocketAddrV6;
 use uuid::Uuid;
 
@@ -690,78 +691,56 @@ impl DataStore {
     pub fn randomize_ids(
         vcr: &VolumeConstructionRequest,
     ) -> anyhow::Result<VolumeConstructionRequest> {
-        match vcr {
-            VolumeConstructionRequest::Volume {
-                id: _,
-                block_size,
-                sub_volumes,
-                read_only_parent,
-            } => Ok(VolumeConstructionRequest::Volume {
-                id: Uuid::new_v4(),
-                block_size: *block_size,
-                sub_volumes: sub_volumes
-                    .iter()
-                    .map(
-                        |subvol| -> anyhow::Result<VolumeConstructionRequest> {
-                            Self::randomize_ids(&subvol)
-                        },
-                    )
-                    .collect::<anyhow::Result<Vec<VolumeConstructionRequest>>>(
-                    )?,
-                read_only_parent: if let Some(read_only_parent) =
-                    read_only_parent
-                {
-                    Some(Box::new(Self::randomize_ids(read_only_parent)?))
-                } else {
-                    None
-                },
-            }),
+        let mut new_vcr = vcr.clone();
 
-            VolumeConstructionRequest::Url { id: _, block_size, url } => {
-                Ok(VolumeConstructionRequest::Url {
-                    id: Uuid::new_v4(),
-                    block_size: *block_size,
-                    url: url.clone(),
-                })
-            }
+        let mut parts: VecDeque<&mut VolumeConstructionRequest> =
+            VecDeque::new();
+        parts.push_back(&mut new_vcr);
 
-            VolumeConstructionRequest::Region {
-                block_size,
-                blocks_per_extent,
-                extent_count,
-                opts,
-                gen,
-            } => {
-                if !opts.read_only {
-                    // Only one volume can "own" a Region, and that volume's
-                    // UUID is recorded in the region table accordingly. It is
-                    // an error to make a copy of a volume construction request
-                    // that references non-read-only Regions.
-                    bail!(
-                        "only one Volume can reference a Region non-read-only!"
-                    );
+        while let Some(vcr_part) = parts.pop_front() {
+            match vcr_part {
+                VolumeConstructionRequest::Volume {
+                    id,
+                    sub_volumes,
+                    read_only_parent,
+                    ..
+                } => {
+                    *id = Uuid::new_v4();
+
+                    for sub_volume in sub_volumes {
+                        parts.push_back(sub_volume);
+                    }
+
+                    if let Some(read_only_parent) = read_only_parent {
+                        parts.push_back(read_only_parent);
+                    }
                 }
 
-                let mut opts = opts.clone();
-                opts.id = Uuid::new_v4();
+                VolumeConstructionRequest::Url { id, .. } => {
+                    *id = Uuid::new_v4();
+                }
 
-                Ok(VolumeConstructionRequest::Region {
-                    block_size: *block_size,
-                    blocks_per_extent: *blocks_per_extent,
-                    extent_count: *extent_count,
-                    opts,
-                    gen: *gen,
-                })
-            }
+                VolumeConstructionRequest::Region { opts, .. } => {
+                    if !opts.read_only {
+                        // Only one volume can "own" a Region, and that volume's
+                        // UUID is recorded in the region table accordingly. It is
+                        // an error to make a copy of a volume construction request
+                        // that references non-read-only Regions.
+                        bail!(
+                            "only one Volume can reference a Region non-read-only!"
+                        );
+                    }
 
-            VolumeConstructionRequest::File { id: _, block_size, path } => {
-                Ok(VolumeConstructionRequest::File {
-                    id: Uuid::new_v4(),
-                    block_size: *block_size,
-                    path: path.clone(),
-                })
+                    opts.id = Uuid::new_v4();
+                }
+
+                VolumeConstructionRequest::File { id, .. } => {
+                    *id = Uuid::new_v4();
+                }
             }
         }
+
+        Ok(new_vcr)
     }
 
     /// Checkout a copy of the Volume from the database using `volume_checkout`,
@@ -1901,48 +1880,40 @@ pub fn read_only_resources_associated_with_volume(
     vcr: &VolumeConstructionRequest,
     crucible_targets: &mut CrucibleTargets,
 ) {
-    match vcr {
-        VolumeConstructionRequest::Volume {
-            id: _,
-            block_size: _,
-            sub_volumes,
-            read_only_parent,
-        } => {
-            for sub_volume in sub_volumes {
-                read_only_resources_associated_with_volume(
-                    sub_volume,
-                    crucible_targets,
-                );
-            }
+    let mut parts: VecDeque<&VolumeConstructionRequest> = VecDeque::new();
+    parts.push_back(&vcr);
 
-            if let Some(read_only_parent) = read_only_parent {
-                read_only_resources_associated_with_volume(
-                    read_only_parent,
-                    crucible_targets,
-                );
+    while let Some(vcr_part) = parts.pop_front() {
+        match vcr_part {
+            VolumeConstructionRequest::Volume {
+                sub_volumes,
+                read_only_parent,
+                ..
+            } => {
+                for sub_volume in sub_volumes {
+                    parts.push_back(sub_volume);
+                }
+
+                if let Some(read_only_parent) = read_only_parent {
+                    parts.push_back(read_only_parent);
+                }
             }
-        }
 
-        VolumeConstructionRequest::Url { id: _, block_size: _, url: _ } => {
-            // no action required
-        }
+            VolumeConstructionRequest::Url { .. } => {
+                // no action required
+            }
 
-        VolumeConstructionRequest::Region {
-            block_size: _,
-            blocks_per_extent: _,
-            extent_count: _,
-            opts,
-            gen: _,
-        } => {
-            for target in &opts.target {
-                if opts.read_only {
-                    crucible_targets.read_only_targets.push(target.clone());
+            VolumeConstructionRequest::Region { opts, .. } => {
+                for target in &opts.target {
+                    if opts.read_only {
+                        crucible_targets.read_only_targets.push(target.clone());
+                    }
                 }
             }
-        }
 
-        VolumeConstructionRequest::File { id: _, block_size: _, path: _ } => {
-            // no action required
+            VolumeConstructionRequest::File { .. } => {
+                // no action required
+            }
         }
     }
 }
@@ -2005,67 +1976,52 @@ fn replace_region_in_vcr(
     old_region: SocketAddrV6,
     new_region: SocketAddrV6,
 ) -> anyhow::Result<VolumeConstructionRequest> {
-    match vcr {
-        VolumeConstructionRequest::Volume {
-            id,
-            block_size,
-            sub_volumes,
-            read_only_parent,
-        } => Ok(VolumeConstructionRequest::Volume {
-            id: *id,
-            block_size: *block_size,
-            sub_volumes: sub_volumes
-                .iter()
-                .map(|subvol| -> anyhow::Result<VolumeConstructionRequest> {
-                    replace_region_in_vcr(&subvol, old_region, new_region)
-                })
-                .collect::<anyhow::Result<Vec<VolumeConstructionRequest>>>()?,
+    let mut new_vcr = vcr.clone();
 
-            // Only replacing R/W regions
-            read_only_parent: read_only_parent.clone(),
-        }),
+    let mut parts: VecDeque<&mut VolumeConstructionRequest> = VecDeque::new();
+    parts.push_back(&mut new_vcr);
 
-        VolumeConstructionRequest::Url { id, block_size, url } => {
-            Ok(VolumeConstructionRequest::Url {
-                id: *id,
-                block_size: *block_size,
-                url: url.clone(),
-            })
-        }
+    let mut old_region_found = false;
 
-        VolumeConstructionRequest::Region {
-            block_size,
-            blocks_per_extent,
-            extent_count,
-            opts,
-            gen,
-        } => {
-            let mut opts = opts.clone();
-
-            for target in &mut opts.target {
-                let parsed_target: SocketAddrV6 = target.parse()?;
-                if parsed_target == old_region {
-                    *target = new_region.to_string();
+    while let Some(vcr_part) = parts.pop_front() {
+        match vcr_part {
+            VolumeConstructionRequest::Volume { sub_volumes, .. } => {
+                for sub_volume in sub_volumes {
+                    parts.push_back(sub_volume);
                 }
+
+                // Skip looking at read-only parent, this function only replaces
+                // R/W regions
             }
 
-            Ok(VolumeConstructionRequest::Region {
-                block_size: *block_size,
-                blocks_per_extent: *blocks_per_extent,
-                extent_count: *extent_count,
-                opts,
-                gen: *gen + 1,
-            })
-        }
+            VolumeConstructionRequest::Url { .. } => {
+                // nothing required
+            }
 
-        VolumeConstructionRequest::File { id, block_size, path } => {
-            Ok(VolumeConstructionRequest::File {
-                id: *id,
-                block_size: *block_size,
-                path: path.clone(),
-            })
+            VolumeConstructionRequest::Region { opts, gen, .. } => {
+                for target in &mut opts.target {
+                    let parsed_target: SocketAddrV6 = target.parse()?;
+                    if parsed_target == old_region {
+                        *target = new_region.to_string();
+                        old_region_found = true;
+                    }
+                }
+
+                // Bump generation number, otherwise update will be rejected
+                *gen = *gen + 1;
+            }
+
+            VolumeConstructionRequest::File { .. } => {
+                // nothing required
+            }
         }
     }
+
+    if !old_region_found {
+        bail!("old region {old_region} not found!");
+    }
+
+    Ok(new_vcr)
 }
 
 /// Find Regions in a Volume's subvolumes list whose target match the argument
@@ -2075,31 +2031,36 @@ fn find_matching_rw_regions_in_volume(
     ip: &std::net::Ipv6Addr,
     matched_targets: &mut Vec<SocketAddrV6>,
 ) -> anyhow::Result<()> {
-    match vcr {
-        VolumeConstructionRequest::Volume { sub_volumes, .. } => {
-            for sub_volume in sub_volumes {
-                find_matching_rw_regions_in_volume(
-                    sub_volume,
-                    ip,
-                    matched_targets,
-                )?;
+    let mut parts: VecDeque<&VolumeConstructionRequest> = VecDeque::new();
+    parts.push_back(vcr);
+
+    while let Some(vcr_part) = parts.pop_front() {
+        match vcr_part {
+            VolumeConstructionRequest::Volume { sub_volumes, .. } => {
+                for sub_volume in sub_volumes {
+                    parts.push_back(sub_volume);
+                }
             }
-        }
 
-        VolumeConstructionRequest::Url { .. } => {}
+            VolumeConstructionRequest::Url { .. } => {
+                // nothing required
+            }
 
-        VolumeConstructionRequest::Region { opts, .. } => {
-            if !opts.read_only {
-                for target in &opts.target {
-                    let parsed_target: SocketAddrV6 = target.parse()?;
-                    if parsed_target.ip() == ip {
-                        matched_targets.push(parsed_target);
+            VolumeConstructionRequest::Region { opts, .. } => {
+                if !opts.read_only {
+                    for target in &opts.target {
+                        let parsed_target: SocketAddrV6 = target.parse()?;
+                        if parsed_target.ip() == ip {
+                            matched_targets.push(parsed_target);
+                        }
                     }
                 }
             }
-        }
 
-        VolumeConstructionRequest::File { .. } => {}
+            VolumeConstructionRequest::File { .. } => {
+                // nothing required
+            }
+        }
     }
 
     Ok(())
diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs
index 5e1d386ed1..ee90f72862 100644
--- a/nexus/src/app/sagas/disk_create.rs
+++ b/nexus/src/app/sagas/disk_create.rs
@@ -22,6 +22,7 @@ use rand::{rngs::StdRng, RngCore, SeedableRng};
 use serde::Deserialize;
 use serde::Serialize;
 use sled_agent_client::types::{CrucibleOpts, VolumeConstructionRequest};
+use std::collections::VecDeque;
 use std::convert::TryFrom;
 use std::net::SocketAddrV6;
 use steno::ActionError;
@@ -769,65 +770,45 @@ async fn sdc_call_pantry_attach_for_disk_undo(
 fn randomize_volume_construction_request_ids(
     input: &VolumeConstructionRequest,
 ) -> anyhow::Result<VolumeConstructionRequest> {
-    match input {
-        VolumeConstructionRequest::Volume {
-            id: _,
-            block_size,
-            sub_volumes,
-            read_only_parent,
-        } => Ok(VolumeConstructionRequest::Volume {
-            id: Uuid::new_v4(),
-            block_size: *block_size,
-            sub_volumes: sub_volumes
-                .iter()
-                .map(|subvol| -> anyhow::Result<VolumeConstructionRequest> {
-                    randomize_volume_construction_request_ids(&subvol)
-                })
-                .collect::<anyhow::Result<Vec<VolumeConstructionRequest>>>()?,
-            read_only_parent: if let Some(read_only_parent) = read_only_parent {
-                Some(Box::new(randomize_volume_construction_request_ids(
-                    read_only_parent,
-                )?))
-            } else {
-                None
-            },
-        }),
+    let mut new_vcr = input.clone();
+
+    let mut parts: VecDeque<&mut VolumeConstructionRequest> = VecDeque::new();
+    parts.push_back(&mut new_vcr);
+
+    while let Some(vcr_part) = parts.pop_front() {
+        match vcr_part {
+            VolumeConstructionRequest::Volume {
+                id,
+                sub_volumes,
+                read_only_parent,
+                ..
+            } => {
+                *id = Uuid::new_v4();
+
+                for sub_volume in sub_volumes {
+                    parts.push_back(sub_volume);
+                }
 
-        VolumeConstructionRequest::Url { id: _, block_size, url } => {
-            Ok(VolumeConstructionRequest::Url {
-                id: Uuid::new_v4(),
-                block_size: *block_size,
-                url: url.clone(),
-            })
-        }
+                if let Some(read_only_parent) = read_only_parent {
+                    parts.push_back(read_only_parent);
+                }
+            }
 
-        VolumeConstructionRequest::Region {
-            block_size,
-            blocks_per_extent,
-            extent_count,
-            opts,
-            gen,
-        } => {
-            let mut opts = opts.clone();
-            opts.id = Uuid::new_v4();
-
-            Ok(VolumeConstructionRequest::Region {
-                block_size: *block_size,
-                blocks_per_extent: *blocks_per_extent,
-                extent_count: *extent_count,
-                opts,
-                gen: *gen,
-            })
-        }
+            VolumeConstructionRequest::Url { id, .. } => {
+                *id = Uuid::new_v4();
+            }
 
-        VolumeConstructionRequest::File { id: _, block_size, path } => {
-            Ok(VolumeConstructionRequest::File {
-                id: Uuid::new_v4(),
-                block_size: *block_size,
-                path: path.clone(),
-            })
+            VolumeConstructionRequest::Region { opts, .. } => {
+                opts.id = Uuid::new_v4();
+            }
+
+            VolumeConstructionRequest::File { id, .. } => {
+                *id = Uuid::new_v4();
+            }
         }
     }
+
+    Ok(new_vcr)
 }
 
 #[cfg(test)]
diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs
index 287571cfd5..cca589b758 100644
--- a/nexus/src/app/sagas/snapshot_create.rs
+++ b/nexus/src/app/sagas/snapshot_create.rs
@@ -117,6 +117,7 @@ use sled_agent_client::types::InstanceIssueDiskSnapshotRequestBody;
 use sled_agent_client::types::VolumeConstructionRequest;
 use slog::info;
 use std::collections::BTreeMap;
+use std::collections::VecDeque;
 use std::net::SocketAddrV6;
 use steno::ActionError;
 use steno::Node;
@@ -1419,7 +1420,7 @@ async fn ssc_create_volume_record(
     let snapshot_volume_construction_request: VolumeConstructionRequest =
         create_snapshot_from_disk(
             &disk_volume_construction_request,
-            Some(&replace_sockets_map),
+            &replace_sockets_map,
         )
         .map_err(|e| {
             ActionError::action_failed(Error::internal_error(&e.to_string()))
@@ -1518,7 +1519,7 @@ async fn ssc_finalize_snapshot_record(
 /// VolumeConstructionRequest and modifying it accordingly.
 fn create_snapshot_from_disk(
     disk: &VolumeConstructionRequest,
-    socket_map: Option<&BTreeMap<String, String>>,
+    socket_map: &BTreeMap<String, String>,
 ) -> anyhow::Result<VolumeConstructionRequest> {
     // When copying a disk's VolumeConstructionRequest to turn it into a
     // snapshot:
@@ -1527,78 +1528,73 @@ fn create_snapshot_from_disk(
     // - set read-only
     // - remove any control sockets
 
-    match disk {
-        VolumeConstructionRequest::Volume {
-            id: _,
-            block_size,
-            sub_volumes,
-            read_only_parent,
-        } => Ok(VolumeConstructionRequest::Volume {
-            id: Uuid::new_v4(),
-            block_size: *block_size,
-            sub_volumes: sub_volumes
-                .iter()
-                .map(|subvol| -> anyhow::Result<VolumeConstructionRequest> {
-                    create_snapshot_from_disk(&subvol, socket_map)
-                })
-                .collect::<anyhow::Result<Vec<VolumeConstructionRequest>>>()?,
-            read_only_parent: if let Some(read_only_parent) = read_only_parent {
-                Some(Box::new(create_snapshot_from_disk(
-                    read_only_parent,
-                    // no socket modification required for read-only parents
-                    None,
-                )?))
-            } else {
-                None
-            },
-        }),
+    let mut new_vcr = disk.clone();
 
-        VolumeConstructionRequest::Url { id: _, block_size, url } => {
-            Ok(VolumeConstructionRequest::Url {
-                id: Uuid::new_v4(),
-                block_size: *block_size,
-                url: url.clone(),
-            })
-        }
+    struct Work<'a> {
+        vcr_part: &'a mut VolumeConstructionRequest,
+        socket_modification_required: bool,
+    }
 
-        VolumeConstructionRequest::Region {
-            block_size,
-            blocks_per_extent,
-            extent_count,
-            opts,
-            gen,
-        } => {
-            let mut opts = opts.clone();
-
-            if let Some(socket_map) = socket_map {
-                for target in &mut opts.target {
-                    target.clone_from(socket_map.get(target).ok_or_else(
-                        || anyhow!("target {} not found in map!", target),
-                    )?);
+    let mut parts: VecDeque<Work> = VecDeque::new();
+    parts.push_back(Work {
+        vcr_part: &mut new_vcr,
+        socket_modification_required: true,
+    });
+
+    while let Some(work) = parts.pop_front() {
+        match work.vcr_part {
+            VolumeConstructionRequest::Volume {
+                id,
+                sub_volumes,
+                read_only_parent,
+                ..
+            } => {
+                *id = Uuid::new_v4();
+
+                for sub_volume in sub_volumes {
+                    parts.push_back(Work {
+                        vcr_part: sub_volume,
+                        // Inherit if socket modification is required from the
+                        // parent layer
+                        socket_modification_required: work
+                            .socket_modification_required,
+                    });
+                }
+
+                if let Some(read_only_parent) = read_only_parent {
+                    parts.push_back(Work {
+                        vcr_part: read_only_parent,
+                        // no socket modification required for read-only parents
+                        socket_modification_required: false,
+                    });
                 }
             }
 
-            opts.id = Uuid::new_v4();
-            opts.read_only = true;
-            opts.control = None;
+            VolumeConstructionRequest::Url { id, .. } => {
+                *id = Uuid::new_v4();
+            }
 
-            Ok(VolumeConstructionRequest::Region {
-                block_size: *block_size,
-                blocks_per_extent: *blocks_per_extent,
-                extent_count: *extent_count,
-                opts,
-                gen: *gen,
-            })
-        }
+            VolumeConstructionRequest::Region { opts, .. } => {
+                opts.id = Uuid::new_v4();
+                opts.read_only = true;
+                opts.control = None;
 
-        VolumeConstructionRequest::File { id: _, block_size, path } => {
-            Ok(VolumeConstructionRequest::File {
-                id: Uuid::new_v4(),
-                block_size: *block_size,
-                path: path.clone(),
-            })
+                if work.socket_modification_required {
+                    for target in &mut opts.target {
+                        target.clone_from(socket_map.get(target).ok_or_else(
+                            || anyhow!("target {} not found in map!", target),
+                        )?);
+                    }
+                }
+            }
+
+            VolumeConstructionRequest::File { id, .. } => {
+                *id = Uuid::new_v4();
+            }
         }
     }
+
+    Ok(new_vcr)
 }
 
 #[cfg(test)]
@@ -1718,7 +1714,7 @@ mod test {
         );
 
         let snapshot =
-            create_snapshot_from_disk(&disk, Some(&replace_sockets)).unwrap();
+            create_snapshot_from_disk(&disk, &replace_sockets).unwrap();
 
         eprintln!("{:?}", serde_json::to_string(&snapshot).unwrap());
 
diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs
index 742639350a..d91b9c9a33 100644
--- a/sled-agent/src/sim/sled_agent.rs
+++ b/sled-agent/src/sim/sled_agent.rs
@@ -46,7 +46,7 @@ use propolis_client::{
 use propolis_mock_server::Context as PropolisContext;
 use sled_storage::resources::DisksManagementResult;
 use slog::Logger;
-use std::collections::{HashMap, HashSet};
+use std::collections::{HashMap, HashSet, VecDeque};
 use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr};
 use std::str::FromStr;
 use std::sync::Arc;
@@ -93,40 +93,33 @@ fn extract_targets_from_volume_construction_request(
     // flush.
 
     let mut res = vec![];
-    match vcr {
-        VolumeConstructionRequest::Volume {
-            id: _,
-            block_size: _,
-            sub_volumes,
-            read_only_parent: _,
-        } => {
-            for sub_volume in sub_volumes.iter() {
-                res.extend(extract_targets_from_volume_construction_request(
-                    sub_volume,
-                )?);
+    let mut parts: VecDeque<&VolumeConstructionRequest> = VecDeque::new();
+    parts.push_back(&vcr);
+
+    while let Some(vcr_part) = parts.pop_front() {
+        match vcr_part {
+            VolumeConstructionRequest::Volume { sub_volumes, .. } => {
+                for sub_volume in sub_volumes {
+                    parts.push_back(sub_volume);
+                }
             }
-        }
 
-        VolumeConstructionRequest::Url { .. } => {
-            // noop
-        }
+            VolumeConstructionRequest::Url { .. } => {
+                // noop
+            }
 
-        VolumeConstructionRequest::Region {
-            block_size: _,
-            blocks_per_extent: _,
-            extent_count: _,
-            opts,
-            gen: _,
-        } => {
-            for target in &opts.target {
-                res.push(SocketAddr::from_str(target)?);
+            VolumeConstructionRequest::Region { opts, .. } => {
+                for target in &opts.target {
+                    res.push(SocketAddr::from_str(&target)?);
+                }
             }
-        }
 
-        VolumeConstructionRequest::File { .. } => {
-            // noop
+            VolumeConstructionRequest::File { .. } => {
+                // noop
+            }
         }
     }
+
     Ok(res)
 }
 

From 0e3e613c8402ce1dac5130d86f48643508cf9507 Mon Sep 17 00:00:00 2001
From: John Gallagher <john@oxidecomputer.com>
Date: Wed, 29 May 2024 14:35:31 -0400
Subject: [PATCH 11/28] Add `cockroach-admin` dropshot server (#5822)

The goal here is to use this server to run `cockroach node decommission`
for expunged cockroach zones. For this initial PR, the only endpoint
provided wraps `cockroach node status`; wrapping `decommission` is not
as trivial so I figured it should go into a separate PR.

There are currently no callers of this service, but stood up an a4x2 and
confirmed I could talk to it from the switch zone via `curl`:

```
root@oxz_switch:~# curl http://[fd00:1122:3344:103::3]:32222/node/status
{"all_nodes":[{"node_id":"1","address":"[fd00:1122:3344:103::3]:32221","sql_address":"[fd00:1122:3344:103::3]:32221","build":"v22.1.9-dirty","started_at":"2024-05-24T16:47:33.137256Z","updated_at":"2024-05-24T19:01:11.345263Z","locality":"","is_available":true,"is_live":true},{"node_id":"2","address":"[fd00:1122:3344:102::3]:32221","sql_address":"[fd00:1122:3344:102::3]:32221","build":"v22.1.9-dirty","started_at":"2024-05-24T16:41:23.877326Z","updated_at":"2024-05-24T19:01:10.946872Z","locality":"","is_available":true,"is_live":true},{"node_id":"3","address":"[fd00:1122:3344:102::4]:32221","sql_address":"[fd00:1122:3344:102::4]:32221","build":"v22.1.9-dirty","started_at":"2024-05-24T16:41:24.020025Z","updated_at":"2024-05-24T19:01:11.112721Z","locality":"","is_available":true,"is_live":true},{"node_id":"4","address":"[fd00:1122:3344:101::4]:32221","sql_address":"[fd00:1122:3344:101::4]:32221","build":"v22.1.9-dirty","started_at":"2024-05-24T16:41:42.706769Z","updated_at":"2024-05-24T19:01:10.944673Z","locality":"","is_available":true,"is_live":true},{"node_id":"5","address":"[fd00:1122:3344:101::3]:32221","sql_address":"[fd00:1122:3344:101::3]:32221","build":"v22.1.9-dirty","started_at":"2024-05-24T16:41:43.079549Z","updated_at":"2024-05-24T19:01:11.326557Z","locality":"","is_available":true,"is_live":true}]}
```
---
 Cargo.lock                                 |  30 ++
 Cargo.toml                                 |   4 +
 cockroach-admin/Cargo.toml                 |  40 ++
 cockroach-admin/build.rs                   |  10 +
 cockroach-admin/src/bin/cockroach-admin.rs |  79 ++++
 cockroach-admin/src/cockroach_cli.rs       | 434 +++++++++++++++++++++
 cockroach-admin/src/config.rs              |  43 ++
 cockroach-admin/src/context.rs             |   9 +
 cockroach-admin/src/http_entrypoints.rs    |  49 +++
 cockroach-admin/src/lib.rs                 |  85 ++++
 common/src/address.rs                      |   1 +
 package-manifest.toml                      |  15 +
 sled-agent/src/profile.rs                  |   9 +-
 sled-agent/src/services.rs                 |  96 +++--
 smf/cockroach-admin/config.toml            |  10 +
 smf/cockroach-admin/manifest.xml           |  45 +++
 smf/cockroach-admin/method_script.sh       |  20 +
 smf/cockroachdb/manifest.xml               |   1 -
 smf/cockroachdb/method_script.sh           |   3 +-
 tufaceous-lib/Cargo.toml                   |   2 +-
 20 files changed, 945 insertions(+), 40 deletions(-)
 create mode 100644 cockroach-admin/Cargo.toml
 create mode 100644 cockroach-admin/build.rs
 create mode 100644 cockroach-admin/src/bin/cockroach-admin.rs
 create mode 100644 cockroach-admin/src/cockroach_cli.rs
 create mode 100644 cockroach-admin/src/config.rs
 create mode 100644 cockroach-admin/src/context.rs
 create mode 100644 cockroach-admin/src/http_entrypoints.rs
 create mode 100644 cockroach-admin/src/lib.rs
 create mode 100644 smf/cockroach-admin/config.toml
 create mode 100644 smf/cockroach-admin/manifest.xml
 create mode 100755 smf/cockroach-admin/method_script.sh

diff --git a/Cargo.lock b/Cargo.lock
index 3060a8fae7..88e9afd8c9 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5224,6 +5224,36 @@ dependencies = [
  "thiserror",
 ]
 
+[[package]]
+name = "omicron-cockroach-admin"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "camino",
+ "chrono",
+ "clap",
+ "csv",
+ "dropshot",
+ "http 0.2.12",
+ "illumos-utils",
+ "nexus-test-utils",
+ "omicron-common",
+ "omicron-rpaths",
+ "omicron-test-utils",
+ "omicron-workspace-hack",
+ "pq-sys",
+ "schemars",
+ "serde",
+ "slog",
+ "slog-async",
+ "slog-dtrace",
+ "slog-error-chain",
+ "thiserror",
+ "tokio",
+ "toml 0.8.13",
+ "url",
+]
+
 [[package]]
 name = "omicron-common"
 version = "0.1.0"
diff --git a/Cargo.toml b/Cargo.toml
index a350f59f0a..e6b0ffb099 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,6 +14,7 @@ members = [
     "clients/oximeter-client",
     "clients/sled-agent-client",
     "clients/wicketd-client",
+    "cockroach-admin",
     "common",
     "dev-tools/crdb-seed",
     "dev-tools/omdb",
@@ -96,6 +97,7 @@ default-members = [
     "clients/oximeter-client",
     "clients/sled-agent-client",
     "clients/wicketd-client",
+    "cockroach-admin",
     "common",
     "dev-tools/crdb-seed",
     "dev-tools/omdb",
@@ -338,6 +340,7 @@ nexus-test-utils = { path = "nexus/test-utils" }
 nexus-types = { path = "nexus/types" }
 num-integer = "0.1.46"
 num = { version = "0.4.3", default-features = false, features = [ "libm" ] }
+omicron-cockroach-admin = { path = "cockroach-admin" }
 omicron-common = { path = "common" }
 omicron-gateway = { path = "gateway" }
 omicron-nexus = { path = "nexus" }
@@ -483,6 +486,7 @@ typed-rng = { path = "typed-rng" }
 unicode-width = "0.1.11"
 update-common = { path = "update-common" }
 update-engine = { path = "update-engine" }
+url = "2.5.0"
 usdt = "0.5.0"
 uuid = { version = "1.8.0", features = ["serde", "v4"] }
 uzers = "0.11"
diff --git a/cockroach-admin/Cargo.toml b/cockroach-admin/Cargo.toml
new file mode 100644
index 0000000000..e0c02493c2
--- /dev/null
+++ b/cockroach-admin/Cargo.toml
@@ -0,0 +1,40 @@
+[package]
+name = "omicron-cockroach-admin"
+version = "0.1.0"
+edition = "2021"
+license = "MPL-2.0"
+
+[build-dependencies]
+omicron-rpaths.workspace = true
+
+[dependencies]
+anyhow.workspace = true
+camino.workspace = true
+chrono.workspace = true
+clap.workspace = true
+csv.workspace = true
+dropshot.workspace = true
+http.workspace = true
+illumos-utils.workspace = true
+omicron-common.workspace = true
+# See omicron-rpaths for more about the "pq-sys" dependency.
+pq-sys = "*"
+schemars.workspace = true
+slog.workspace = true
+slog-async.workspace = true
+slog-dtrace.workspace = true
+slog-error-chain.workspace = true
+serde.workspace = true
+thiserror.workspace = true
+tokio.workspace = true
+toml.workspace = true
+
+omicron-workspace-hack.workspace = true
+
+[dev-dependencies]
+nexus-test-utils.workspace = true
+omicron-test-utils.workspace = true
+url.workspace = true
+
+[lints]
+workspace = true
diff --git a/cockroach-admin/build.rs b/cockroach-admin/build.rs
new file mode 100644
index 0000000000..1ba9acd41c
--- /dev/null
+++ b/cockroach-admin/build.rs
@@ -0,0 +1,10 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+// See omicron-rpaths for documentation.
+// NOTE: This file MUST be kept in sync with the other build.rs files in this
+// repository.
+fn main() {
+    omicron_rpaths::configure_default_omicron_rpaths();
+}
diff --git a/cockroach-admin/src/bin/cockroach-admin.rs b/cockroach-admin/src/bin/cockroach-admin.rs
new file mode 100644
index 0000000000..eb28082faa
--- /dev/null
+++ b/cockroach-admin/src/bin/cockroach-admin.rs
@@ -0,0 +1,79 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Executable program to run the Omicron CockroachDb admin interface (not to be
+//! confused with CockroachDb's built-in HTTP API)
+
+use anyhow::anyhow;
+use camino::Utf8PathBuf;
+use clap::Parser;
+use omicron_cockroach_admin::CockroachCli;
+use omicron_cockroach_admin::Config;
+use omicron_common::cmd::fatal;
+use omicron_common::cmd::CmdError;
+use std::net::SocketAddr;
+use std::net::SocketAddrV6;
+
+#[derive(Debug, Parser)]
+#[clap(name = "cockroach-admin", about = "Omicron CRDB cluster admin server")]
+enum Args {
+    /// Print the OpenAPI Spec document and exit
+    Openapi,
+
+    /// Start the CRDB admin server
+    Run {
+        /// Path to the `cockroach` CLI
+        #[clap(long, action)]
+        path_to_cockroach_binary: Utf8PathBuf,
+
+        /// Socket address for a running cockroach server instance
+        #[clap(long, action)]
+        cockroach_address: SocketAddrV6,
+
+        /// Address on which this server should run
+        #[clap(long, action)]
+        http_address: SocketAddrV6,
+
+        /// Path to the server config file
+        #[clap(long, action)]
+        config_file_path: Utf8PathBuf,
+    },
+}
+
+#[tokio::main]
+async fn main() {
+    if let Err(err) = main_impl().await {
+        fatal(err);
+    }
+}
+
+async fn main_impl() -> Result<(), CmdError> {
+    let args = Args::parse();
+
+    match args {
+        Args::Openapi => omicron_cockroach_admin::run_openapi()
+            .map_err(|e| CmdError::Failure(anyhow!(e))),
+        Args::Run {
+            path_to_cockroach_binary,
+            cockroach_address,
+            http_address,
+            config_file_path,
+        } => {
+            let cockroach_cli =
+                CockroachCli::new(path_to_cockroach_binary, cockroach_address);
+            let mut config = Config::from_file(&config_file_path)
+                .map_err(|err| CmdError::Failure(anyhow!(err)))?;
+            config.dropshot.bind_address = SocketAddr::V6(http_address);
+            let server =
+                omicron_cockroach_admin::start_server(cockroach_cli, config)
+                    .await
+                    .map_err(|err| CmdError::Failure(anyhow!(err)))?;
+            server.await.map_err(|err| {
+                CmdError::Failure(anyhow!(
+                    "server failed after starting: {err}"
+                ))
+            })
+        }
+    }
+}
diff --git a/cockroach-admin/src/cockroach_cli.rs b/cockroach-admin/src/cockroach_cli.rs
new file mode 100644
index 0000000000..5b3958546f
--- /dev/null
+++ b/cockroach-admin/src/cockroach_cli.rs
@@ -0,0 +1,434 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+use camino::Utf8PathBuf;
+use chrono::DateTime;
+use chrono::NaiveDateTime;
+use chrono::Utc;
+use dropshot::HttpError;
+use illumos_utils::output_to_exec_error;
+use illumos_utils::ExecutionError;
+use schemars::JsonSchema;
+use serde::de;
+use serde::Deserialize;
+use serde::Serialize;
+use slog_error_chain::InlineErrorChain;
+use slog_error_chain::SlogInlineError;
+use std::io;
+use std::net::SocketAddr;
+use std::net::SocketAddrV6;
+use tokio::process::Command;
+
+#[derive(Debug, thiserror::Error, SlogInlineError)]
+pub enum CockroachCliError {
+    #[error("failed to invoke `cockroach {subcommand}`")]
+    InvokeCli {
+        subcommand: &'static str,
+        #[source]
+        err: io::Error,
+    },
+    #[error(transparent)]
+    ExecutionError(#[from] ExecutionError),
+    #[error(
+        "failed to parse `cockroach {subcommand}` output \
+         (stdout: {stdout}, stderr: {stderr})"
+    )]
+    ParseOutput {
+        subcommand: &'static str,
+        stdout: String,
+        stderr: String,
+        #[source]
+        err: csv::Error,
+    },
+}
+
+impl From<CockroachCliError> for HttpError {
+    fn from(err: CockroachCliError) -> Self {
+        match err {
+            CockroachCliError::InvokeCli { .. }
+            | CockroachCliError::ExecutionError(_)
+            | CockroachCliError::ParseOutput { .. } => {
+                let message = InlineErrorChain::new(&err).to_string();
+                HttpError {
+                    status_code: http::StatusCode::INTERNAL_SERVER_ERROR,
+                    error_code: Some(String::from("Internal")),
+                    external_message: message.clone(),
+                    internal_message: message,
+                }
+            }
+        }
+    }
+}
+
+#[derive(Debug)]
+pub struct CockroachCli {
+    path_to_cockroach_binary: Utf8PathBuf,
+    cockroach_address: SocketAddrV6,
+}
+
+impl CockroachCli {
+    pub fn new(
+        path_to_cockroach_binary: Utf8PathBuf,
+        cockroach_address: SocketAddrV6,
+    ) -> Self {
+        Self { path_to_cockroach_binary, cockroach_address }
+    }
+
+    pub async fn node_status(
+        &self,
+    ) -> Result<Vec<NodeStatus>, CockroachCliError> {
+        let mut command = Command::new(&self.path_to_cockroach_binary);
+        command
+            .arg("node")
+            .arg("status")
+            .arg("--host")
+            .arg(&format!("{}", self.cockroach_address))
+            .arg("--insecure")
+            .arg("--format")
+            .arg("csv");
+        let output = command.output().await.map_err(|err| {
+            CockroachCliError::InvokeCli { subcommand: "node status", err }
+        })?;
+        if !output.status.success() {
+            return Err(output_to_exec_error(command.as_std(), &output).into());
+        }
+        NodeStatus::parse_from_csv(io::Cursor::new(&output.stdout)).map_err(
+            |err| CockroachCliError::ParseOutput {
+                subcommand: "node status",
+                stdout: String::from_utf8_lossy(&output.stdout).to_string(),
+                stderr: String::from_utf8_lossy(&output.stderr).to_string(),
+                err,
+            },
+        )
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)]
+#[serde(rename_all = "snake_case")]
+pub struct NodeStatus {
+    pub node_id: String,
+    pub address: SocketAddr,
+    pub sql_address: SocketAddr,
+    pub build: String,
+    pub started_at: DateTime<Utc>,
+    pub updated_at: DateTime<Utc>,
+    pub locality: String,
+    pub is_available: bool,
+    pub is_live: bool,
+}
+
+// Slightly different `NodeStatus` that matches what we get from `cockroach`:
+//
+// * `id` column instead of `node_id`
+// * timestamps are a fixed format with no timezone, so we have a custom
+//   deserializer
+#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
+struct CliNodeStatus {
+    id: String,
+    address: SocketAddr,
+    sql_address: SocketAddr,
+    build: String,
+    #[serde(deserialize_with = "parse_cockroach_cli_timestamp")]
+    started_at: DateTime<Utc>,
+    #[serde(deserialize_with = "parse_cockroach_cli_timestamp")]
+    updated_at: DateTime<Utc>,
+    locality: String,
+    is_available: bool,
+    is_live: bool,
+}
+
+impl From<CliNodeStatus> for NodeStatus {
+    fn from(cli: CliNodeStatus) -> Self {
+        Self {
+            node_id: cli.id,
+            address: cli.address,
+            sql_address: cli.sql_address,
+            build: cli.build,
+            started_at: cli.started_at,
+            updated_at: cli.updated_at,
+            locality: cli.locality,
+            is_available: cli.is_available,
+            is_live: cli.is_live,
+        }
+    }
+}
+
+fn parse_cockroach_cli_timestamp<'de, D>(
+    d: D,
+) -> Result<DateTime<Utc>, D::Error>
+where
+    D: serde::Deserializer<'de>,
+{
+    struct CockroachTimestampVisitor;
+    impl<'de> de::Visitor<'de> for CockroachTimestampVisitor {
+        type Value = DateTime<Utc>;
+
+        fn expecting(
+            &self,
+            formatter: &mut std::fmt::Formatter,
+        ) -> std::fmt::Result {
+            formatter.write_str("a Cockroach CLI timestamp")
+        }
+
+        fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
+        where
+            E: de::Error,
+        {
+            let dt = NaiveDateTime::parse_from_str(v, "%Y-%m-%d %H:%M:%S%.f")
+                .map_err(E::custom)?;
+            Ok(DateTime::from_naive_utc_and_offset(dt, Utc))
+        }
+    }
+
+    d.deserialize_str(CockroachTimestampVisitor)
+}
+
+impl NodeStatus {
+    pub fn parse_from_csv<R>(reader: R) -> Result<Vec<Self>, csv::Error>
+    where
+        R: io::Read,
+    {
+        let mut statuses = Vec::new();
+        let mut reader = csv::Reader::from_reader(reader);
+        for result in reader.deserialize() {
+            let record: CliNodeStatus = result?;
+            statuses.push(record.into());
+        }
+        Ok(statuses)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use chrono::NaiveDate;
+    use nexus_test_utils::db::test_setup_database;
+    use omicron_test_utils::dev;
+    use url::Url;
+
+    #[test]
+    fn test_node_status_parse_single_line_from_csv() {
+        let input = r#"id,address,sql_address,build,started_at,updated_at,locality,is_available,is_live
+1,[::1]:42021,[::1]:42021,v22.1.9,2024-05-21 15:19:50.523796,2024-05-21 16:31:28.050069,,true,true"#;
+        let expected = NodeStatus {
+            node_id: "1".to_string(),
+            address: "[::1]:42021".parse().unwrap(),
+            sql_address: "[::1]:42021".parse().unwrap(),
+            build: "v22.1.9".to_string(),
+            started_at: DateTime::from_naive_utc_and_offset(
+                NaiveDate::from_ymd_opt(2024, 5, 21)
+                    .unwrap()
+                    .and_hms_micro_opt(15, 19, 50, 523796)
+                    .unwrap(),
+                Utc,
+            ),
+            updated_at: DateTime::from_naive_utc_and_offset(
+                NaiveDate::from_ymd_opt(2024, 5, 21)
+                    .unwrap()
+                    .and_hms_micro_opt(16, 31, 28, 50069)
+                    .unwrap(),
+                Utc,
+            ),
+            locality: String::new(),
+            is_available: true,
+            is_live: true,
+        };
+
+        let statuses = NodeStatus::parse_from_csv(io::Cursor::new(input))
+            .expect("parsed input");
+        assert_eq!(statuses, vec![expected]);
+    }
+
+    #[test]
+    fn test_node_status_parse_multiple_lines_from_csv() {
+        let input = r#"id,address,sql_address,build,started_at,updated_at,locality,is_available,is_live
+1,[fd00:1122:3344:109::3]:32221,[fd00:1122:3344:109::3]:32221,v22.1.9-dirty,2024-05-18 19:18:00.597145,2024-05-21 15:22:34.290434,,true,true
+2,[fd00:1122:3344:105::3]:32221,[fd00:1122:3344:105::3]:32221,v22.1.9-dirty,2024-05-18 19:17:01.796714,2024-05-21 15:22:34.901268,,true,true
+3,[fd00:1122:3344:10b::3]:32221,[fd00:1122:3344:10b::3]:32221,v22.1.9-dirty,2024-05-18 19:18:52.37564,2024-05-21 15:22:36.341146,,true,true
+4,[fd00:1122:3344:107::3]:32221,[fd00:1122:3344:107::3]:32221,v22.1.9-dirty,2024-05-18 19:16:22.788276,2024-05-21 15:22:34.897047,,true,true
+5,[fd00:1122:3344:108::3]:32221,[fd00:1122:3344:108::3]:32221,v22.1.9-dirty,2024-05-18 19:18:09.196634,2024-05-21 15:22:35.168738,,true,true"#;
+        let expected = vec![
+            NodeStatus {
+                node_id: "1".to_string(),
+                address: "[fd00:1122:3344:109::3]:32221".parse().unwrap(),
+                sql_address: "[fd00:1122:3344:109::3]:32221".parse().unwrap(),
+                build: "v22.1.9-dirty".to_string(),
+                started_at: DateTime::from_naive_utc_and_offset(
+                    NaiveDate::from_ymd_opt(2024, 5, 18)
+                        .unwrap()
+                        .and_hms_micro_opt(19, 18, 0, 597145)
+                        .unwrap(),
+                    Utc,
+                ),
+                updated_at: DateTime::from_naive_utc_and_offset(
+                    NaiveDate::from_ymd_opt(2024, 5, 21)
+                        .unwrap()
+                        .and_hms_micro_opt(15, 22, 34, 290434)
+                        .unwrap(),
+                    Utc,
+                ),
+                locality: String::new(),
+                is_available: true,
+                is_live: true,
+            },
+            NodeStatus {
+                node_id: "2".to_string(),
+                address: "[fd00:1122:3344:105::3]:32221".parse().unwrap(),
+                sql_address: "[fd00:1122:3344:105::3]:32221".parse().unwrap(),
+                build: "v22.1.9-dirty".to_string(),
+                started_at: DateTime::from_naive_utc_and_offset(
+                    NaiveDate::from_ymd_opt(2024, 5, 18)
+                        .unwrap()
+                        .and_hms_micro_opt(19, 17, 1, 796714)
+                        .unwrap(),
+                    Utc,
+                ),
+                updated_at: DateTime::from_naive_utc_and_offset(
+                    NaiveDate::from_ymd_opt(2024, 5, 21)
+                        .unwrap()
+                        .and_hms_micro_opt(15, 22, 34, 901268)
+                        .unwrap(),
+                    Utc,
+                ),
+                locality: String::new(),
+                is_available: true,
+                is_live: true,
+            },
+            NodeStatus {
+                node_id: "3".to_string(),
+                address: "[fd00:1122:3344:10b::3]:32221".parse().unwrap(),
+                sql_address: "[fd00:1122:3344:10b::3]:32221".parse().unwrap(),
+                build: "v22.1.9-dirty".to_string(),
+                started_at: DateTime::from_naive_utc_and_offset(
+                    NaiveDate::from_ymd_opt(2024, 5, 18)
+                        .unwrap()
+                        .and_hms_micro_opt(19, 18, 52, 375640)
+                        .unwrap(),
+                    Utc,
+                ),
+                updated_at: DateTime::from_naive_utc_and_offset(
+                    NaiveDate::from_ymd_opt(2024, 5, 21)
+                        .unwrap()
+                        .and_hms_micro_opt(15, 22, 36, 341146)
+                        .unwrap(),
+                    Utc,
+                ),
+                locality: String::new(),
+                is_available: true,
+                is_live: true,
+            },
+            NodeStatus {
+                node_id: "4".to_string(),
+                address: "[fd00:1122:3344:107::3]:32221".parse().unwrap(),
+                sql_address: "[fd00:1122:3344:107::3]:32221".parse().unwrap(),
+                build: "v22.1.9-dirty".to_string(),
+                started_at: DateTime::from_naive_utc_and_offset(
+                    NaiveDate::from_ymd_opt(2024, 5, 18)
+                        .unwrap()
+                        .and_hms_micro_opt(19, 16, 22, 788276)
+                        .unwrap(),
+                    Utc,
+                ),
+                updated_at: DateTime::from_naive_utc_and_offset(
+                    NaiveDate::from_ymd_opt(2024, 5, 21)
+                        .unwrap()
+                        .and_hms_micro_opt(15, 22, 34, 897047)
+                        .unwrap(),
+                    Utc,
+                ),
+                locality: String::new(),
+                is_available: true,
+                is_live: true,
+            },
+            NodeStatus {
+                node_id: "5".to_string(),
+                address: "[fd00:1122:3344:108::3]:32221".parse().unwrap(),
+                sql_address: "[fd00:1122:3344:108::3]:32221".parse().unwrap(),
+                build: "v22.1.9-dirty".to_string(),
+                started_at: DateTime::from_naive_utc_and_offset(
+                    NaiveDate::from_ymd_opt(2024, 5, 18)
+                        .unwrap()
+                        .and_hms_micro_opt(19, 18, 9, 196634)
+                        .unwrap(),
+                    Utc,
+                ),
+                updated_at: DateTime::from_naive_utc_and_offset(
+                    NaiveDate::from_ymd_opt(2024, 5, 21)
+                        .unwrap()
+                        .and_hms_micro_opt(15, 22, 35, 168738)
+                        .unwrap(),
+                    Utc,
+                ),
+                locality: String::new(),
+                is_available: true,
+                is_live: true,
+            },
+        ];
+
+        let statuses = NodeStatus::parse_from_csv(io::Cursor::new(input))
+            .expect("parsed input");
+        assert_eq!(statuses.len(), expected.len());
+        for (status, expected) in statuses.iter().zip(&expected) {
+            assert_eq!(status, expected);
+        }
+    }
+
+    // Ensure that if `cockroach node status` changes in a future CRDB version
+    // bump, we have a test that will fail to force us to check whether our
+    // current parsing is still valid.
+    #[tokio::test]
+    async fn test_node_status_compatibility() {
+        let logctx = dev::test_setup_log("test_node_status_compatibility");
+        let mut db = test_setup_database(&logctx.log).await;
+        let db_url = db.listen_url().to_string();
+
+        let expected_headers = "id,address,sql_address,build,started_at,updated_at,locality,is_available,is_live";
+
+        // Manually run cockroach node status to grab just the CSV header line
+        // (which the `csv` crate normally eats on our behalf) and check it's
+        // exactly what we expect.
+        let mut command = Command::new("cockroach");
+        command
+            .arg("node")
+            .arg("status")
+            .arg("--url")
+            .arg(&db_url)
+            .arg("--format")
+            .arg("csv");
+        let output =
+            command.output().await.expect("ran `cockroach node status`");
+
+        let stdout = String::from_utf8_lossy(&output.stdout);
+        let mut lines = stdout.lines();
+        let headers = lines.next().expect("header line");
+        assert_eq!(
+            headers, expected_headers,
+            "`cockroach node status --format csv` headers may have changed?"
+        );
+
+        // We should also be able to run our wrapper against this cockroach.
+        let url: Url = db_url.parse().expect("valid url");
+        let cockroach_address: SocketAddrV6 = format!(
+            "{}:{}",
+            url.host().expect("url has host"),
+            url.port().expect("url has port")
+        )
+        .parse()
+        .expect("valid SocketAddrV6");
+        let cli = CockroachCli::new("cockroach".into(), cockroach_address);
+        let status = cli.node_status().await.expect("got node status");
+
+        // We can't check all the fields exactly, but some we know based on the
+        // fact that our test database is a single node.
+        assert_eq!(status.len(), 1);
+        assert_eq!(status[0].node_id, "1");
+        assert_eq!(status[0].address, SocketAddr::V6(cockroach_address));
+        assert_eq!(status[0].sql_address, SocketAddr::V6(cockroach_address));
+        assert_eq!(status[0].is_available, true);
+        assert_eq!(status[0].is_live, true);
+
+        db.cleanup().await.unwrap();
+        logctx.cleanup_successful();
+    }
+}
diff --git a/cockroach-admin/src/config.rs b/cockroach-admin/src/config.rs
new file mode 100644
index 0000000000..77a624835c
--- /dev/null
+++ b/cockroach-admin/src/config.rs
@@ -0,0 +1,43 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+use camino::Utf8Path;
+use camino::Utf8PathBuf;
+use dropshot::ConfigDropshot;
+use dropshot::ConfigLogging;
+use serde::Deserialize;
+use serde::Serialize;
+use slog_error_chain::SlogInlineError;
+use std::io;
+
+#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
+pub struct Config {
+    pub dropshot: ConfigDropshot,
+    pub log: ConfigLogging,
+}
+impl Config {
+    /// Load a `Config` from the given TOML file
+    pub fn from_file(path: &Utf8Path) -> Result<Self, LoadError> {
+        let contents = std::fs::read_to_string(path)
+            .map_err(|err| LoadError::Read { path: path.to_owned(), err })?;
+        toml::de::from_str(&contents)
+            .map_err(|err| LoadError::Parse { path: path.to_owned(), err })
+    }
+}
+
+#[derive(Debug, thiserror::Error, SlogInlineError)]
+pub enum LoadError {
+    #[error("failed to read {path}")]
+    Read {
+        path: Utf8PathBuf,
+        #[source]
+        err: io::Error,
+    },
+    #[error("failed to parse {path} as TOML")]
+    Parse {
+        path: Utf8PathBuf,
+        #[source]
+        err: toml::de::Error,
+    },
+}
diff --git a/cockroach-admin/src/context.rs b/cockroach-admin/src/context.rs
new file mode 100644
index 0000000000..b3f39f463a
--- /dev/null
+++ b/cockroach-admin/src/context.rs
@@ -0,0 +1,9 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+use crate::CockroachCli;
+
+pub struct ServerContext {
+    pub cockroach_cli: CockroachCli,
+}
diff --git a/cockroach-admin/src/http_entrypoints.rs b/cockroach-admin/src/http_entrypoints.rs
new file mode 100644
index 0000000000..24d36c9823
--- /dev/null
+++ b/cockroach-admin/src/http_entrypoints.rs
@@ -0,0 +1,49 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+use crate::cockroach_cli::NodeStatus;
+use crate::context::ServerContext;
+use dropshot::endpoint;
+use dropshot::HttpError;
+use dropshot::HttpResponseOk;
+use dropshot::RequestContext;
+use schemars::JsonSchema;
+use serde::Deserialize;
+use serde::Serialize;
+use std::sync::Arc;
+
+type CrdbApiDescription = dropshot::ApiDescription<Arc<ServerContext>>;
+
+pub fn api() -> CrdbApiDescription {
+    fn register_endpoints(api: &mut CrdbApiDescription) -> Result<(), String> {
+        api.register(node_status)?;
+        Ok(())
+    }
+
+    let mut api = CrdbApiDescription::new();
+    if let Err(err) = register_endpoints(&mut api) {
+        panic!("failed to register entrypoints: {}", err);
+    }
+    api
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)]
+#[serde(rename_all = "snake_case")]
+pub struct ClusterNodeStatus {
+    pub all_nodes: Vec<NodeStatus>,
+}
+
+/// Get the status of all nodes in the CRDB cluster
+#[endpoint {
+    method = GET,
+    path = "/node/status",
+}]
+async fn node_status(
+    rqctx: RequestContext<Arc<ServerContext>>,
+) -> Result<HttpResponseOk<ClusterNodeStatus>, HttpError> {
+    let ctx = rqctx.context();
+    let all_nodes =
+        ctx.cockroach_cli.node_status().await.map_err(HttpError::from)?;
+    Ok(HttpResponseOk(ClusterNodeStatus { all_nodes }))
+}
diff --git a/cockroach-admin/src/lib.rs b/cockroach-admin/src/lib.rs
new file mode 100644
index 0000000000..d6c53c8dc6
--- /dev/null
+++ b/cockroach-admin/src/lib.rs
@@ -0,0 +1,85 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+use context::ServerContext;
+use omicron_common::FileKv;
+use slog::debug;
+use slog::error;
+use slog::Drain;
+use slog_dtrace::ProbeRegistration;
+use slog_error_chain::SlogInlineError;
+use std::error::Error;
+use std::io;
+use std::sync::Arc;
+
+mod cockroach_cli;
+mod config;
+mod context;
+mod http_entrypoints;
+
+pub use cockroach_cli::CockroachCli;
+pub use cockroach_cli::CockroachCliError;
+pub use config::Config;
+
+/// Run the OpenAPI generator for the API; this emits the OpenAPI spec to
+/// stdout.
+pub fn run_openapi() -> Result<(), String> {
+    http_entrypoints::api()
+        .openapi("Oxide CockroachDb Cluster Admin API", "0.0.1")
+        .description(
+            "API for interacting with the Oxide \
+             control plane's CockroachDb cluster",
+        )
+        .contact_url("https://oxide.computer")
+        .contact_email("api@oxide.computer")
+        .write(&mut std::io::stdout())
+        .map_err(|e| e.to_string())
+}
+
+#[derive(Debug, thiserror::Error, SlogInlineError)]
+pub enum StartError {
+    #[error("failed to initialize logger")]
+    InitializeLogger(#[source] io::Error),
+    #[error("failed to register dtrace probes: {0}")]
+    RegisterDtraceProbes(String),
+    #[error("failed to initialize HTTP server")]
+    InitializeHttpServer(#[source] Box<dyn Error + Send + Sync>),
+}
+
+pub type Server = dropshot::HttpServer<Arc<ServerContext>>;
+
+/// Start the dropshot server
+pub async fn start_server(
+    cockroach_cli: CockroachCli,
+    server_config: Config,
+) -> Result<Server, StartError> {
+    let (drain, registration) = slog_dtrace::with_drain(
+        server_config
+            .log
+            .to_logger("cockroach-admin")
+            .map_err(StartError::InitializeLogger)?,
+    );
+    let log = slog::Logger::root(drain.fuse(), slog::o!(FileKv));
+    match registration {
+        ProbeRegistration::Success => {
+            debug!(log, "registered DTrace probes");
+        }
+        ProbeRegistration::Failed(err) => {
+            let err = StartError::RegisterDtraceProbes(err);
+            error!(log, "failed to register DTrace probes"; &err);
+            return Err(err);
+        }
+    }
+
+    let context = ServerContext { cockroach_cli };
+    let http_server_starter = dropshot::HttpServerStarter::new(
+        &server_config.dropshot,
+        http_entrypoints::api(),
+        Arc::new(context),
+        &log.new(slog::o!("component" => "dropshot")),
+    )
+    .map_err(StartError::InitializeHttpServer)?;
+
+    Ok(http_server_starter.start())
+}
diff --git a/common/src/address.rs b/common/src/address.rs
index eddfb996c4..b246f8f392 100644
--- a/common/src/address.rs
+++ b/common/src/address.rs
@@ -46,6 +46,7 @@ pub const DNS_HTTP_PORT: u16 = 5353;
 pub const SLED_AGENT_PORT: u16 = 12345;
 
 pub const COCKROACH_PORT: u16 = 32221;
+pub const COCKROACH_ADMIN_PORT: u16 = 32222;
 pub const CRUCIBLE_PORT: u16 = 32345;
 pub const CLICKHOUSE_PORT: u16 = 8123;
 pub const CLICKHOUSE_KEEPER_PORT: u16 = 9181;
diff --git a/package-manifest.toml b/package-manifest.toml
index 7f80dacf7c..bffd5be7dc 100644
--- a/package-manifest.toml
+++ b/package-manifest.toml
@@ -204,6 +204,7 @@ only_for_targets.image = "standard"
 source.type = "composite"
 source.packages = [
   "cockroachdb-service.tar.gz",
+  "omicron-cockroach-admin.tar.gz",
   "internal-dns-cli.tar.gz",
   "zone-setup.tar.gz",
   "zone-network-install.tar.gz"
@@ -224,6 +225,20 @@ output.type = "zone"
 output.intermediate_only = true
 setup_hint = "Run `./tools/ci_download_cockroachdb` to download the necessary binaries"
 
+[package.omicron-cockroach-admin]
+service_name = "cockroach-admin"
+only_for_targets.image = "standard"
+source.type = "local"
+source.rust.binary_names = ["cockroach-admin"]
+source.rust.release = true
+source.paths = [
+  { from = "smf/cockroach-admin/manifest.xml", to = "/var/svc/manifest/site/cockroach-admin/manifest.xml" },
+  { from = "smf/cockroach-admin/config.toml", to = "/opt/oxide/lib/svc/cockroach-admin/config.toml" },
+  { from = "smf/cockroach-admin/method_script.sh", to = "/opt/oxide/lib/svc/manifest/cockroach-admin.sh" },
+]
+output.type = "zone"
+output.intermediate_only = true
+
 [package.internal-dns-cli]
 service_name = "internal-dns-cli"
 only_for_targets.image = "standard"
diff --git a/sled-agent/src/profile.rs b/sled-agent/src/profile.rs
index 1addbca4c9..33e30d1d7b 100644
--- a/sled-agent/src/profile.rs
+++ b/sled-agent/src/profile.rs
@@ -183,7 +183,12 @@ impl PropertyGroupBuilder {
         }
     }
 
-    pub fn add_property(mut self, name: &str, ty: &str, value: &str) -> Self {
+    pub fn add_property<S: Into<String>>(
+        mut self,
+        name: &str,
+        ty: &str,
+        value: S,
+    ) -> Self {
         // The data structures here are oriented around a few goals:
         //
         // - Properties will be written out in the order that they were added.
@@ -209,7 +214,7 @@ impl PropertyGroupBuilder {
             .property_values
             .entry(name.to_string())
             .or_insert_with(Vec::new);
-        values.push(value.to_string());
+        values.push(value.into());
         self
     }
 }
diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs
index ff10d4aed7..7df9f06d53 100644
--- a/sled-agent/src/services.rs
+++ b/sled-agent/src/services.rs
@@ -61,7 +61,6 @@ use illumos_utils::{execute, PFEXEC};
 use internal_dns::resolver::Resolver;
 use itertools::Itertools;
 use nexus_config::{ConfigDropshotWithTls, DeploymentConfig};
-use omicron_common::address::BOOTSTRAP_ARTIFACT_PORT;
 use omicron_common::address::CLICKHOUSE_KEEPER_PORT;
 use omicron_common::address::CLICKHOUSE_PORT;
 use omicron_common::address::COCKROACH_PORT;
@@ -78,6 +77,7 @@ use omicron_common::address::WICKETD_NEXUS_PROXY_PORT;
 use omicron_common::address::WICKETD_PORT;
 use omicron_common::address::{Ipv6Subnet, NEXUS_TECHPORT_EXTERNAL_PORT};
 use omicron_common::address::{AZ_PREFIX, OXIMETER_PORT};
+use omicron_common::address::{BOOTSTRAP_ARTIFACT_PORT, COCKROACH_ADMIN_PORT};
 use omicron_common::api::external::Generation;
 use omicron_common::api::internal::shared::{
     HostPortConfig, RackNetworkConfig,
@@ -1406,7 +1406,7 @@ impl ServiceManager {
         match domain {
             Some(d) => {
                 dns_config_builder =
-                    dns_config_builder.add_property("domain", "astring", &d)
+                    dns_config_builder.add_property("domain", "astring", d)
             }
             None => (),
         }
@@ -1423,10 +1423,11 @@ impl ServiceManager {
     fn zone_network_setup_install(
         gw_addr: &Ipv6Addr,
         zone: &InstalledZone,
-        static_addr: &String,
+        static_addr: &Ipv6Addr,
     ) -> Result<ServiceBuilder, Error> {
         let datalink = zone.get_control_vnic_name();
         let gateway = &gw_addr.to_string();
+        let static_addr = &static_addr.to_string();
 
         let mut config_builder = PropertyGroupBuilder::new("config");
         config_builder = config_builder
@@ -1593,7 +1594,7 @@ impl ServiceManager {
                     return Err(Error::SledAgentNotReady);
                 };
 
-                let listen_addr = &underlay_address.to_string();
+                let listen_addr = underlay_address;
                 let listen_port = &CLICKHOUSE_PORT.to_string();
 
                 let nw_setup_service = Self::zone_network_setup_install(
@@ -1605,7 +1606,11 @@ impl ServiceManager {
                 let dns_service = Self::dns_install(info, None, &None).await?;
 
                 let config = PropertyGroupBuilder::new("config")
-                    .add_property("listen_addr", "astring", listen_addr)
+                    .add_property(
+                        "listen_addr",
+                        "astring",
+                        listen_addr.to_string(),
+                    )
                     .add_property("listen_port", "astring", listen_port)
                     .add_property("store", "astring", "/data");
                 let clickhouse_service =
@@ -1642,7 +1647,7 @@ impl ServiceManager {
                     return Err(Error::SledAgentNotReady);
                 };
 
-                let listen_addr = &underlay_address.to_string();
+                let listen_addr = underlay_address;
                 let listen_port = &CLICKHOUSE_KEEPER_PORT.to_string();
 
                 let nw_setup_service = Self::zone_network_setup_install(
@@ -1654,7 +1659,11 @@ impl ServiceManager {
                 let dns_service = Self::dns_install(info, None, &None).await?;
 
                 let config = PropertyGroupBuilder::new("config")
-                    .add_property("listen_addr", "astring", listen_addr)
+                    .add_property(
+                        "listen_addr",
+                        "astring",
+                        listen_addr.to_string(),
+                    )
                     .add_property("listen_port", "astring", listen_port)
                     .add_property("store", "astring", "/data");
                 let clickhouse_keeper_service =
@@ -1694,25 +1703,27 @@ impl ServiceManager {
                     return Err(Error::SledAgentNotReady);
                 };
 
-                let address = SocketAddr::new(
-                    IpAddr::V6(*underlay_address),
-                    COCKROACH_PORT,
-                );
-                let listen_addr = &address.ip().to_string();
-                let listen_port = &address.port().to_string();
+                let crdb_listen_ip = *underlay_address;
+                let crdb_address =
+                    SocketAddr::new(IpAddr::V6(crdb_listen_ip), COCKROACH_PORT)
+                        .to_string();
+                let admin_address = SocketAddr::new(
+                    IpAddr::V6(crdb_listen_ip),
+                    COCKROACH_ADMIN_PORT,
+                )
+                .to_string();
 
                 let nw_setup_service = Self::zone_network_setup_install(
                     &info.underlay_address,
                     &installed_zone,
-                    listen_addr,
+                    &crdb_listen_ip,
                 )?;
 
                 let dns_service = Self::dns_install(info, None, &None).await?;
 
                 // Configure the CockroachDB service.
                 let cockroachdb_config = PropertyGroupBuilder::new("config")
-                    .add_property("listen_addr", "astring", listen_addr)
-                    .add_property("listen_port", "astring", listen_port)
+                    .add_property("listen_addr", "astring", &crdb_address)
                     .add_property("store", "astring", "/data");
                 let cockroachdb_service =
                     ServiceBuilder::new("oxide/cockroachdb").add_instance(
@@ -1720,10 +1731,26 @@ impl ServiceManager {
                             .add_property_group(cockroachdb_config),
                     );
 
+                // Configure the Omicron cockroach-admin service.
+                let cockroach_admin_config =
+                    PropertyGroupBuilder::new("config")
+                        .add_property(
+                            "cockroach_address",
+                            "astring",
+                            crdb_address,
+                        )
+                        .add_property("http_address", "astring", admin_address);
+                let cockroach_admin_service =
+                    ServiceBuilder::new("oxide/cockroach-admin").add_instance(
+                        ServiceInstanceBuilder::new("default")
+                            .add_property_group(cockroach_admin_config),
+                    );
+
                 let profile = ProfileBuilder::new("omicron")
                     .add_service(nw_setup_service)
                     .add_service(disabled_ssh_service)
                     .add_service(cockroachdb_service)
+                    .add_service(cockroach_admin_service)
                     .add_service(dns_service)
                     .add_service(enabled_dns_client_service);
                 profile
@@ -1747,7 +1774,7 @@ impl ServiceManager {
                 let Some(info) = self.inner.sled_info.get() else {
                     return Err(Error::SledAgentNotReady);
                 };
-                let listen_addr = &underlay_address.to_string();
+                let listen_addr = &underlay_address;
                 let listen_port = &CRUCIBLE_PORT.to_string();
 
                 let nw_setup_service = Self::zone_network_setup_install(
@@ -1764,7 +1791,11 @@ impl ServiceManager {
                 let uuid = &Uuid::new_v4().to_string();
                 let config = PropertyGroupBuilder::new("config")
                     .add_property("dataset", "astring", &dataset_name)
-                    .add_property("listen_addr", "astring", listen_addr)
+                    .add_property(
+                        "listen_addr",
+                        "astring",
+                        listen_addr.to_string(),
+                    )
                     .add_property("listen_port", "astring", listen_port)
                     .add_property("uuid", "astring", uuid)
                     .add_property("store", "astring", "/data");
@@ -1802,7 +1833,7 @@ impl ServiceManager {
                     return Err(Error::SledAgentNotReady);
                 };
 
-                let listen_addr = &underlay_address.to_string();
+                let listen_addr = &underlay_address;
                 let listen_port = &CRUCIBLE_PANTRY_PORT.to_string();
 
                 let nw_setup_service = Self::zone_network_setup_install(
@@ -1812,7 +1843,11 @@ impl ServiceManager {
                 )?;
 
                 let config = PropertyGroupBuilder::new("config")
-                    .add_property("listen_addr", "astring", listen_addr)
+                    .add_property(
+                        "listen_addr",
+                        "astring",
+                        listen_addr.to_string(),
+                    )
                     .add_property("listen_port", "astring", listen_port);
 
                 let profile = ProfileBuilder::new("omicron")
@@ -1853,12 +1888,10 @@ impl ServiceManager {
                     OXIMETER_PORT,
                 );
 
-                let listen_addr = &address.ip().to_string();
-
                 let nw_setup_service = Self::zone_network_setup_install(
                     &info.underlay_address,
                     &installed_zone,
-                    listen_addr,
+                    underlay_address,
                 )?;
 
                 let oximeter_config = PropertyGroupBuilder::new("config")
@@ -1896,12 +1929,10 @@ impl ServiceManager {
                     return Err(Error::SledAgentNotReady);
                 };
 
-                let static_addr = underlay_address.to_string();
-
                 let nw_setup_service = Self::zone_network_setup_install(
                     &info.underlay_address,
                     &installed_zone,
-                    &static_addr.clone(),
+                    underlay_address,
                 )?;
 
                 // Like Nexus, we need to be reachable externally via
@@ -1925,7 +1956,8 @@ impl ServiceManager {
                     })?;
                 let opte_ip = port.ip();
 
-                let http_addr = format!("[{}]:{}", static_addr, DNS_HTTP_PORT);
+                let http_addr =
+                    format!("[{}]:{}", underlay_address, DNS_HTTP_PORT);
                 let dns_addr = format!("{}:{}", opte_ip, DNS_PORT);
 
                 let external_dns_config = PropertyGroupBuilder::new("config")
@@ -1985,12 +2017,10 @@ impl ServiceManager {
                     return Err(Error::SledAgentNotReady);
                 };
 
-                let static_addr = underlay_address.to_string();
-
                 let nw_setup_service = Self::zone_network_setup_install(
                     &info.underlay_address,
                     &installed_zone,
-                    &static_addr.clone(),
+                    underlay_address,
                 )?;
 
                 let is_boundary = matches!(
@@ -2083,7 +2113,7 @@ impl ServiceManager {
                 let nw_setup_service = Self::zone_network_setup_install(
                     gz_address,
                     &installed_zone,
-                    &underlay_address.to_string(),
+                    underlay_address,
                 )?;
 
                 // Internal DNS zones require a special route through
@@ -2163,12 +2193,10 @@ impl ServiceManager {
                     return Err(Error::SledAgentNotReady);
                 };
 
-                let static_addr = underlay_address.to_string();
-
                 let nw_setup_service = Self::zone_network_setup_install(
                     &info.underlay_address,
                     &installed_zone,
-                    &static_addr.clone(),
+                    underlay_address,
                 )?;
 
                 // While Nexus will be reachable via `external_ip`, it
diff --git a/smf/cockroach-admin/config.toml b/smf/cockroach-admin/config.toml
new file mode 100644
index 0000000000..86ee2c5d4b
--- /dev/null
+++ b/smf/cockroach-admin/config.toml
@@ -0,0 +1,10 @@
+[dropshot]
+# 1 MiB; we don't expect any requests of more than nominal size.
+request_body_max_bytes = 1048576
+
+[log]
+# Show log messages of this level and more severe
+level = "info"
+mode = "file"
+path = "/dev/stdout"
+if_exists = "append"
diff --git a/smf/cockroach-admin/manifest.xml b/smf/cockroach-admin/manifest.xml
new file mode 100644
index 0000000000..1d6f7c4861
--- /dev/null
+++ b/smf/cockroach-admin/manifest.xml
@@ -0,0 +1,45 @@
+<?xml version="1.0"?>
+<!DOCTYPE service_bundle SYSTEM "/usr/share/lib/xml/dtd/service_bundle.dtd.1">
+
+<service_bundle type='manifest' name='cockroach-admin'>
+
+  <service name='oxide/cockroach-admin' type='service' version='1'>
+  <create_default_instance enabled='true' />
+
+  <dependency name='multi_user' grouping='require_all' restart_on='none'
+    type='service'>
+  <service_fmri value='svc:/milestone/multi-user:default' />
+  </dependency>
+
+  <dependency name='zone_network_setup' grouping='require_all' restart_on='none'
+    type='service'>
+  <service_fmri value='svc:/oxide/zone-network-setup:default' />
+  </dependency>
+
+  <exec_method type='method' name='start'
+    exec='/opt/oxide/lib/svc/manifest/cockroach-admin.sh'
+    timeout_seconds='0' />
+  <exec_method type='method' name='stop' exec=':kill' timeout_seconds='0' />
+
+  <property_group name='config' type='application'>
+    <propval name='cockroach_address' type='astring' value='unknown' />
+    <propval name='http_address' type='astring' value='unknown' />
+  </property_group>
+
+  <property_group name='startd' type='framework'>
+    <propval name='duration' type='astring' value='contract' />
+  </property_group>
+
+  <stability value='Unstable' />
+
+  <template>
+    <common_name>
+      <loctext xml:lang='C'>Omicron CockroachDB Admin</loctext>
+    </common_name>
+    <description>
+      <loctext xml:lang='C'>Administering Oxide's Distributed Database</loctext>
+    </description>
+  </template>
+</service>
+
+</service_bundle>
diff --git a/smf/cockroach-admin/method_script.sh b/smf/cockroach-admin/method_script.sh
new file mode 100755
index 0000000000..c5f924223d
--- /dev/null
+++ b/smf/cockroach-admin/method_script.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+set -x
+set -o errexit
+set -o pipefail
+
+. /lib/svc/share/smf_include.sh
+
+COCKROACH_ADDR="$(svcprop -c -p config/cockroach_address "${SMF_FMRI}")"
+HTTP_ADDR="$(svcprop -c -p config/http_address "${SMF_FMRI}")"
+
+args=(
+  'run'
+  '--config-file-path' "/opt/oxide/lib/svc/cockroach-admin/config.toml"
+  '--path-to-cockroach-binary' "/opt/oxide/cockroachdb/bin/cockroach"
+  '--cockroach-address' "$COCKROACH_ADDR"
+  '--http-address' "$HTTP_ADDR"
+)
+
+exec /opt/oxide/cockroach-admin/bin/cockroach-admin "${args[@]}" &
diff --git a/smf/cockroachdb/manifest.xml b/smf/cockroachdb/manifest.xml
index 3a9b1a7cb8..67ddbe48b8 100644
--- a/smf/cockroachdb/manifest.xml
+++ b/smf/cockroachdb/manifest.xml
@@ -29,7 +29,6 @@
 
   <property_group name='config' type='application'>
     <propval name='listen_addr' type='astring' value='unknown' />
-    <propval name='listen_port' type='astring' value='unknown' />
     <propval name='store' type='astring' value='unknown' />
   </property_group>
 
diff --git a/smf/cockroachdb/method_script.sh b/smf/cockroachdb/method_script.sh
index e8b02eb1eb..1d33ef94a6 100755
--- a/smf/cockroachdb/method_script.sh
+++ b/smf/cockroachdb/method_script.sh
@@ -7,7 +7,6 @@ set -o pipefail
 . /lib/svc/share/smf_include.sh
 
 LISTEN_ADDR="$(svcprop -c -p config/listen_addr "${SMF_FMRI}")"
-LISTEN_PORT="$(svcprop -c -p config/listen_port "${SMF_FMRI}")"
 DATASTORE="$(svcprop -c -p config/store "${SMF_FMRI}")"
 
 # We need to tell CockroachDB the DNS names or IP addresses of the other nodes
@@ -25,7 +24,7 @@ fi
 
 args=(
   '--insecure'
-  '--listen-addr' "[$LISTEN_ADDR]:$LISTEN_PORT"
+  '--listen-addr' "$LISTEN_ADDR"
   '--http-addr' '127.0.0.1:8080'
   '--store' "$DATASTORE"
   '--join' "$JOIN_ADDRS"
diff --git a/tufaceous-lib/Cargo.toml b/tufaceous-lib/Cargo.toml
index e448ed6db5..61224e6080 100644
--- a/tufaceous-lib/Cargo.toml
+++ b/tufaceous-lib/Cargo.toml
@@ -36,7 +36,7 @@ tar.workspace = true
 tokio.workspace = true
 toml.workspace = true
 tough.workspace = true
-url = "2.5.0"
+url.workspace = true
 zip.workspace = true
 omicron-workspace-hack.workspace = true
 

From 1fe55e9e44f8ff5bd415ba8246a40b68d4ded0d0 Mon Sep 17 00:00:00 2001
From: Sean Klein <sean@oxide.computer>
Date: Wed, 29 May 2024 12:19:58 -0700
Subject: [PATCH 12/28] Add EXPECTORATE tests for
 virtual_provisioning_collection CTE (#5081)

This is a direct follow-up to
https://github.com/oxidecomputer/omicron/pull/5063, focused on the
`virtual_provisioning_collection` CTE.

This PR adds a test which validates the current SQL query, before
re-structuring it to use `TypedSqlQuery`.
---
 .../src/db/queries/region_allocation.rs       |  25 +--
 .../virtual_provisioning_collection_update.rs |  87 ++++++++++
 nexus/db-queries/src/db/raw_query_builder.rs  |  15 ++
 ...ning_collection_update_delete_instance.sql |  97 +++++++++++
 ...oning_collection_update_delete_storage.sql |  86 ++++++++++
 ...ning_collection_update_insert_instance.sql | 154 ++++++++++++++++++
 ...oning_collection_update_insert_storage.sql | 154 ++++++++++++++++++
 7 files changed, 602 insertions(+), 16 deletions(-)
 create mode 100644 nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_instance.sql
 create mode 100644 nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_storage.sql
 create mode 100644 nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_instance.sql
 create mode 100644 nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_storage.sql

diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs
index cc201dac30..83cc7483c9 100644
--- a/nexus/db-queries/src/db/queries/region_allocation.rs
+++ b/nexus/db-queries/src/db/queries/region_allocation.rs
@@ -369,6 +369,7 @@ mod test {
     use super::*;
     use crate::db::datastore::REGION_REDUNDANCY_THRESHOLD;
     use crate::db::explain::ExplainableAsync;
+    use crate::db::raw_query_builder::expectorate_query_contents;
     use nexus_test_utils::db::test_setup_database;
     use omicron_test_utils::dev;
     use uuid::Uuid;
@@ -395,15 +396,11 @@ mod test {
             },
             REGION_REDUNDANCY_THRESHOLD,
         );
-        let s = dev::db::format_sql(
-            &diesel::debug_query::<Pg, _>(&region_allocate).to_string(),
-        )
-        .await
-        .unwrap();
-        expectorate::assert_contents(
+        expectorate_query_contents(
+            &region_allocate,
             "tests/output/region_allocate_distinct_sleds.sql",
-            &s,
-        );
+        )
+        .await;
 
         // Second structure: "Random"
 
@@ -415,15 +412,11 @@ mod test {
             &RegionAllocationStrategy::Random { seed: Some(1) },
             REGION_REDUNDANCY_THRESHOLD,
         );
-        let s = dev::db::format_sql(
-            &diesel::debug_query::<Pg, _>(&region_allocate).to_string(),
-        )
-        .await
-        .unwrap();
-        expectorate::assert_contents(
+        expectorate_query_contents(
+            &region_allocate,
             "tests/output/region_allocate_random_sleds.sql",
-            &s,
-        );
+        )
+        .await;
     }
 
     // Explain the possible forms of the SQL query to ensure that it
diff --git a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs
index 7672d5af9a..09798e4e5d 100644
--- a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs
+++ b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs
@@ -646,3 +646,90 @@ impl Query for VirtualProvisioningCollectionUpdate {
 }
 
 impl RunQueryDsl<DbConnection> for VirtualProvisioningCollectionUpdate {}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::db::raw_query_builder::expectorate_query_contents;
+    use uuid::Uuid;
+
+    // These tests are a bit of a "change detector", but they're here to help
+    // with debugging too. If you change this query, it can be useful to see
+    // exactly how the output SQL has been altered.
+
+    #[tokio::test]
+    async fn expectorate_query_insert_storage() {
+        let id = Uuid::nil();
+        let project_id = Uuid::nil();
+        let disk_byte_diff = 2048.try_into().unwrap();
+        let storage_type = crate::db::datastore::StorageType::Disk;
+
+        let query = VirtualProvisioningCollectionUpdate::new_insert_storage(
+            id,
+            disk_byte_diff,
+            project_id,
+            storage_type,
+        );
+        expectorate_query_contents(
+            &query,
+            "tests/output/virtual_provisioning_collection_update_insert_storage.sql",
+        ).await;
+    }
+
+    #[tokio::test]
+    async fn expectorate_query_delete_storage() {
+        let id = Uuid::nil();
+        let project_id = Uuid::nil();
+        let disk_byte_diff = 2048.try_into().unwrap();
+
+        let query = VirtualProvisioningCollectionUpdate::new_delete_storage(
+            id,
+            disk_byte_diff,
+            project_id,
+        );
+
+        expectorate_query_contents(
+            &query,
+            "tests/output/virtual_provisioning_collection_update_delete_storage.sql",
+        ).await;
+    }
+
+    #[tokio::test]
+    async fn expectorate_query_insert_instance() {
+        let id = Uuid::nil();
+        let project_id = Uuid::nil();
+        let cpus_diff = 4;
+        let ram_diff = 2048.try_into().unwrap();
+
+        let query = VirtualProvisioningCollectionUpdate::new_insert_instance(
+            id, cpus_diff, ram_diff, project_id,
+        );
+
+        expectorate_query_contents(
+            &query,
+            "tests/output/virtual_provisioning_collection_update_insert_instance.sql",
+        ).await;
+    }
+
+    #[tokio::test]
+    async fn expectorate_query_delete_instance() {
+        let id = Uuid::nil();
+        let project_id = Uuid::nil();
+        let cpus_diff = 4;
+        let ram_diff = 2048.try_into().unwrap();
+        let max_instance_gen = 0;
+
+        let query = VirtualProvisioningCollectionUpdate::new_delete_instance(
+            id,
+            max_instance_gen,
+            cpus_diff,
+            ram_diff,
+            project_id,
+        );
+
+        expectorate_query_contents(
+            &query,
+            "tests/output/virtual_provisioning_collection_update_delete_instance.sql",
+        ).await;
+    }
+}
diff --git a/nexus/db-queries/src/db/raw_query_builder.rs b/nexus/db-queries/src/db/raw_query_builder.rs
index c7215417c5..d108062833 100644
--- a/nexus/db-queries/src/db/raw_query_builder.rs
+++ b/nexus/db-queries/src/db/raw_query_builder.rs
@@ -181,3 +181,18 @@ impl<T> RunQueryDsl<DbConnection> for TypedSqlQuery<T> {}
 impl<T> Query for TypedSqlQuery<T> {
     type SqlType = T;
 }
+
+#[cfg(test)]
+pub async fn expectorate_query_contents<T: QueryFragment<Pg>>(
+    query: T,
+    path: &str,
+) {
+    use omicron_test_utils::dev;
+
+    let s =
+        dev::db::format_sql(&diesel::debug_query::<Pg, _>(&query).to_string())
+            .await
+            .expect("Failed to format SQL");
+
+    expectorate::assert_contents(path, &s);
+}
diff --git a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_instance.sql b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_instance.sql
new file mode 100644
index 0000000000..fcabefef26
--- /dev/null
+++ b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_instance.sql
@@ -0,0 +1,97 @@
+WITH
+  parent_silo AS (SELECT project.silo_id AS id FROM project WHERE project.id = $1),
+  all_collections
+    AS (
+      ((SELECT $2 AS id) UNION (SELECT parent_silo.id AS id FROM parent_silo))
+      UNION (SELECT $3 AS id)
+    ),
+  quotas
+    AS (
+      SELECT
+        silo_quotas.silo_id,
+        silo_quotas.cpus,
+        silo_quotas.memory_bytes AS memory,
+        silo_quotas.storage_bytes AS storage
+      FROM
+        silo_quotas INNER JOIN parent_silo ON silo_quotas.silo_id = parent_silo.id
+    ),
+  silo_provisioned
+    AS (
+      SELECT
+        virtual_provisioning_collection.id,
+        virtual_provisioning_collection.cpus_provisioned,
+        virtual_provisioning_collection.ram_provisioned,
+        virtual_provisioning_collection.virtual_disk_bytes_provisioned
+      FROM
+        virtual_provisioning_collection
+        INNER JOIN parent_silo ON virtual_provisioning_collection.id = parent_silo.id
+    ),
+  do_update
+    AS (
+      SELECT
+        (
+          SELECT
+            count(*)
+          FROM
+            virtual_provisioning_resource
+          WHERE
+            virtual_provisioning_resource.id = $4
+          LIMIT
+            $5
+        )
+        = $6
+          AS update
+    ),
+  unused_cte_arm
+    AS (
+      DELETE FROM
+        virtual_provisioning_resource
+      WHERE
+        virtual_provisioning_resource.id = $7
+        AND virtual_provisioning_resource.id
+          = (
+              SELECT
+                instance.id
+              FROM
+                instance
+              WHERE
+                instance.id = $8 AND instance.state_generation < $9
+              LIMIT
+                $10
+            )
+      RETURNING
+        virtual_provisioning_resource.id,
+        virtual_provisioning_resource.time_modified,
+        virtual_provisioning_resource.resource_type,
+        virtual_provisioning_resource.virtual_disk_bytes_provisioned,
+        virtual_provisioning_resource.cpus_provisioned,
+        virtual_provisioning_resource.ram_provisioned
+    ),
+  virtual_provisioning_collection
+    AS (
+      UPDATE
+        virtual_provisioning_collection
+      SET
+        time_modified = current_timestamp(),
+        cpus_provisioned = virtual_provisioning_collection.cpus_provisioned - $11,
+        ram_provisioned = virtual_provisioning_collection.ram_provisioned - $12
+      WHERE
+        virtual_provisioning_collection.id = ANY (SELECT all_collections.id FROM all_collections)
+        AND (SELECT do_update.update FROM do_update LIMIT $13)
+      RETURNING
+        virtual_provisioning_collection.id,
+        virtual_provisioning_collection.time_modified,
+        virtual_provisioning_collection.collection_type,
+        virtual_provisioning_collection.virtual_disk_bytes_provisioned,
+        virtual_provisioning_collection.cpus_provisioned,
+        virtual_provisioning_collection.ram_provisioned
+    )
+SELECT
+  virtual_provisioning_collection.id,
+  virtual_provisioning_collection.time_modified,
+  virtual_provisioning_collection.collection_type,
+  virtual_provisioning_collection.virtual_disk_bytes_provisioned,
+  virtual_provisioning_collection.cpus_provisioned,
+  virtual_provisioning_collection.ram_provisioned
+FROM
+  virtual_provisioning_collection
diff --git a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_storage.sql b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_storage.sql
new file mode 100644
index 0000000000..72c0b81e15
--- /dev/null
+++ b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_storage.sql
@@ -0,0 +1,86 @@
+WITH
+  parent_silo AS (SELECT project.silo_id AS id FROM project WHERE project.id = $1),
+  all_collections
+    AS (
+      ((SELECT $2 AS id) UNION (SELECT parent_silo.id AS id FROM parent_silo))
+      UNION (SELECT $3 AS id)
+    ),
+  quotas
+    AS (
+      SELECT
+        silo_quotas.silo_id,
+        silo_quotas.cpus,
+        silo_quotas.memory_bytes AS memory,
+        silo_quotas.storage_bytes AS storage
+      FROM
+        silo_quotas INNER JOIN parent_silo ON silo_quotas.silo_id = parent_silo.id
+    ),
+  silo_provisioned
+    AS (
+      SELECT
+        virtual_provisioning_collection.id,
+        virtual_provisioning_collection.cpus_provisioned,
+        virtual_provisioning_collection.ram_provisioned,
+        virtual_provisioning_collection.virtual_disk_bytes_provisioned
+      FROM
+        virtual_provisioning_collection
+        INNER JOIN parent_silo ON virtual_provisioning_collection.id = parent_silo.id
+    ),
+  do_update
+    AS (
+      SELECT
+        (
+          SELECT
+            count(*)
+          FROM
+            virtual_provisioning_resource
+          WHERE
+            virtual_provisioning_resource.id = $4
+          LIMIT
+            $5
+        )
+        = $6
+          AS update
+    ),
+  unused_cte_arm
+    AS (
+      DELETE FROM
+        virtual_provisioning_resource
+      WHERE
+        virtual_provisioning_resource.id = $7
+      RETURNING
+        virtual_provisioning_resource.id,
+        virtual_provisioning_resource.time_modified,
+        virtual_provisioning_resource.resource_type,
+        virtual_provisioning_resource.virtual_disk_bytes_provisioned,
+        virtual_provisioning_resource.cpus_provisioned,
+        virtual_provisioning_resource.ram_provisioned
+    ),
+  virtual_provisioning_collection
+    AS (
+      UPDATE
+        virtual_provisioning_collection
+      SET
+        time_modified = current_timestamp(),
+        virtual_disk_bytes_provisioned
+          = virtual_provisioning_collection.virtual_disk_bytes_provisioned - $8
+      WHERE
+        virtual_provisioning_collection.id = ANY (SELECT all_collections.id FROM all_collections)
+        AND (SELECT do_update.update FROM do_update LIMIT $9)
+      RETURNING
+        virtual_provisioning_collection.id,
+        virtual_provisioning_collection.time_modified,
+        virtual_provisioning_collection.collection_type,
+        virtual_provisioning_collection.virtual_disk_bytes_provisioned,
+        virtual_provisioning_collection.cpus_provisioned,
+        virtual_provisioning_collection.ram_provisioned
+    )
+SELECT
+  virtual_provisioning_collection.id,
+  virtual_provisioning_collection.time_modified,
+  virtual_provisioning_collection.collection_type,
+  virtual_provisioning_collection.virtual_disk_bytes_provisioned,
+  virtual_provisioning_collection.cpus_provisioned,
+  virtual_provisioning_collection.ram_provisioned
+FROM
+  virtual_provisioning_collection
diff --git a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_instance.sql b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_instance.sql
new file mode 100644
index 0000000000..753b7f09f3
--- /dev/null
+++ b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_instance.sql
@@ -0,0 +1,154 @@
+WITH
+  parent_silo AS (SELECT project.silo_id AS id FROM project WHERE project.id = $1),
+  all_collections
+    AS (
+      ((SELECT $2 AS id) UNION (SELECT parent_silo.id AS id FROM parent_silo))
+      UNION (SELECT $3 AS id)
+    ),
+  quotas
+    AS (
+      SELECT
+        silo_quotas.silo_id,
+        silo_quotas.cpus,
+        silo_quotas.memory_bytes AS memory,
+        silo_quotas.storage_bytes AS storage
+      FROM
+        silo_quotas INNER JOIN parent_silo ON silo_quotas.silo_id = parent_silo.id
+    ),
+  silo_provisioned
+    AS (
+      SELECT
+        virtual_provisioning_collection.id,
+        virtual_provisioning_collection.cpus_provisioned,
+        virtual_provisioning_collection.ram_provisioned,
+        virtual_provisioning_collection.virtual_disk_bytes_provisioned
+      FROM
+        virtual_provisioning_collection
+        INNER JOIN parent_silo ON virtual_provisioning_collection.id = parent_silo.id
+    ),
+  do_update
+    AS (
+      SELECT
+        (
+          (
+            (
+              SELECT
+                count(*)
+              FROM
+                virtual_provisioning_resource
+              WHERE
+                virtual_provisioning_resource.id = $4
+              LIMIT
+                $5
+            )
+            = $6
+            AND CAST(
+                IF(
+                  (
+                    $7 = $8
+                    OR (SELECT quotas.cpus FROM quotas LIMIT $9)
+                      >= (
+                          (SELECT silo_provisioned.cpus_provisioned FROM silo_provisioned LIMIT $10)
+                          + $11
+                        )
+                  ),
+                  'TRUE',
+                  'Not enough cpus'
+                )
+                  AS BOOL
+              )
+          )
+          AND CAST(
+              IF(
+                (
+                  $12 = $13
+                  OR (SELECT quotas.memory FROM quotas LIMIT $14)
+                    >= (
+                        (SELECT silo_provisioned.ram_provisioned FROM silo_provisioned LIMIT $15)
+                        + $16
+                      )
+                ),
+                'TRUE',
+                'Not enough memory'
+              )
+                AS BOOL
+            )
+        )
+        AND CAST(
+            IF(
+              (
+                $17 = $18
+                OR (SELECT quotas.storage FROM quotas LIMIT $19)
+                  >= (
+                      (
+                        SELECT
+                          silo_provisioned.virtual_disk_bytes_provisioned
+                        FROM
+                          silo_provisioned
+                        LIMIT
+                          $20
+                      )
+                      + $21
+                    )
+              ),
+              'TRUE',
+              'Not enough storage'
+            )
+              AS BOOL
+          )
+          AS update
+    ),
+  unused_cte_arm
+    AS (
+      INSERT
+      INTO
+        virtual_provisioning_resource
+          (
+            id,
+            time_modified,
+            resource_type,
+            virtual_disk_bytes_provisioned,
+            cpus_provisioned,
+            ram_provisioned
+          )
+      VALUES
+        ($22, DEFAULT, $23, $24, $25, $26)
+      ON CONFLICT
+      DO
+        NOTHING
+      RETURNING
+        virtual_provisioning_resource.id,
+        virtual_provisioning_resource.time_modified,
+        virtual_provisioning_resource.resource_type,
+        virtual_provisioning_resource.virtual_disk_bytes_provisioned,
+        virtual_provisioning_resource.cpus_provisioned,
+        virtual_provisioning_resource.ram_provisioned
+    ),
+  virtual_provisioning_collection
+    AS (
+      UPDATE
+        virtual_provisioning_collection
+      SET
+        time_modified = current_timestamp(),
+        cpus_provisioned = virtual_provisioning_collection.cpus_provisioned + $27,
+        ram_provisioned = virtual_provisioning_collection.ram_provisioned + $28
+      WHERE
+        virtual_provisioning_collection.id = ANY (SELECT all_collections.id FROM all_collections)
+        AND (SELECT do_update.update FROM do_update LIMIT $29)
+      RETURNING
+        virtual_provisioning_collection.id,
+        virtual_provisioning_collection.time_modified,
+        virtual_provisioning_collection.collection_type,
+        virtual_provisioning_collection.virtual_disk_bytes_provisioned,
+        virtual_provisioning_collection.cpus_provisioned,
+        virtual_provisioning_collection.ram_provisioned
+    )
+SELECT
+  virtual_provisioning_collection.id,
+  virtual_provisioning_collection.time_modified,
+  virtual_provisioning_collection.collection_type,
+  virtual_provisioning_collection.virtual_disk_bytes_provisioned,
+  virtual_provisioning_collection.cpus_provisioned,
+  virtual_provisioning_collection.ram_provisioned
+FROM
+  virtual_provisioning_collection
diff --git a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_storage.sql b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_storage.sql
new file mode 100644
index 0000000000..040a5dc20c
--- /dev/null
+++ b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_storage.sql
@@ -0,0 +1,154 @@
+WITH
+  parent_silo AS (SELECT project.silo_id AS id FROM project WHERE project.id = $1),
+  all_collections
+    AS (
+      ((SELECT $2 AS id) UNION (SELECT parent_silo.id AS id FROM parent_silo))
+      UNION (SELECT $3 AS id)
+    ),
+  quotas
+    AS (
+      SELECT
+        silo_quotas.silo_id,
+        silo_quotas.cpus,
+        silo_quotas.memory_bytes AS memory,
+        silo_quotas.storage_bytes AS storage
+      FROM
+        silo_quotas INNER JOIN parent_silo ON silo_quotas.silo_id = parent_silo.id
+    ),
+  silo_provisioned
+    AS (
+      SELECT
+        virtual_provisioning_collection.id,
+        virtual_provisioning_collection.cpus_provisioned,
+        virtual_provisioning_collection.ram_provisioned,
+        virtual_provisioning_collection.virtual_disk_bytes_provisioned
+      FROM
+        virtual_provisioning_collection
+        INNER JOIN parent_silo ON virtual_provisioning_collection.id = parent_silo.id
+    ),
+  do_update
+    AS (
+      SELECT
+        (
+          (
+            (
+              SELECT
+                count(*)
+              FROM
+                virtual_provisioning_resource
+              WHERE
+                virtual_provisioning_resource.id = $4
+              LIMIT
+                $5
+            )
+            = $6
+            AND CAST(
+                IF(
+                  (
+                    $7 = $8
+                    OR (SELECT quotas.cpus FROM quotas LIMIT $9)
+                      >= (
+                          (SELECT silo_provisioned.cpus_provisioned FROM silo_provisioned LIMIT $10)
+                          + $11
+                        )
+                  ),
+                  'TRUE',
+                  'Not enough cpus'
+                )
+                  AS BOOL
+              )
+          )
+          AND CAST(
+              IF(
+                (
+                  $12 = $13
+                  OR (SELECT quotas.memory FROM quotas LIMIT $14)
+                    >= (
+                        (SELECT silo_provisioned.ram_provisioned FROM silo_provisioned LIMIT $15)
+                        + $16
+                      )
+                ),
+                'TRUE',
+                'Not enough memory'
+              )
+                AS BOOL
+            )
+        )
+        AND CAST(
+            IF(
+              (
+                $17 = $18
+                OR (SELECT quotas.storage FROM quotas LIMIT $19)
+                  >= (
+                      (
+                        SELECT
+                          silo_provisioned.virtual_disk_bytes_provisioned
+                        FROM
+                          silo_provisioned
+                        LIMIT
+                          $20
+                      )
+                      + $21
+                    )
+              ),
+              'TRUE',
+              'Not enough storage'
+            )
+              AS BOOL
+          )
+          AS update
+    ),
+  unused_cte_arm
+    AS (
+      INSERT
+      INTO
+        virtual_provisioning_resource
+          (
+            id,
+            time_modified,
+            resource_type,
+            virtual_disk_bytes_provisioned,
+            cpus_provisioned,
+            ram_provisioned
+          )
+      VALUES
+        ($22, DEFAULT, $23, $24, $25, $26)
+      ON CONFLICT
+      DO
+        NOTHING
+      RETURNING
+        virtual_provisioning_resource.id,
+        virtual_provisioning_resource.time_modified,
+        virtual_provisioning_resource.resource_type,
+        virtual_provisioning_resource.virtual_disk_bytes_provisioned,
+        virtual_provisioning_resource.cpus_provisioned,
+        virtual_provisioning_resource.ram_provisioned
+    ),
+  virtual_provisioning_collection
+    AS (
+      UPDATE
+        virtual_provisioning_collection
+      SET
+        time_modified = current_timestamp(),
+        virtual_disk_bytes_provisioned
+          = virtual_provisioning_collection.virtual_disk_bytes_provisioned + $27
+      WHERE
+        virtual_provisioning_collection.id = ANY (SELECT all_collections.id FROM all_collections)
+        AND (SELECT do_update.update FROM do_update LIMIT $28)
+      RETURNING
+        virtual_provisioning_collection.id,
+        virtual_provisioning_collection.time_modified,
+        virtual_provisioning_collection.collection_type,
+        virtual_provisioning_collection.virtual_disk_bytes_provisioned,
+        virtual_provisioning_collection.cpus_provisioned,
+        virtual_provisioning_collection.ram_provisioned
+    )
+SELECT
+  virtual_provisioning_collection.id,
+  virtual_provisioning_collection.time_modified,
+  virtual_provisioning_collection.collection_type,
+  virtual_provisioning_collection.virtual_disk_bytes_provisioned,
+  virtual_provisioning_collection.cpus_provisioned,
+  virtual_provisioning_collection.ram_provisioned
+FROM
+  virtual_provisioning_collection

From 7e84fd91b53d66d3d38d53ec274332c3d156e4c1 Mon Sep 17 00:00:00 2001
From: Sean Klein <sean@oxide.computer>
Date: Wed, 29 May 2024 14:25:31 -0700
Subject: [PATCH 13/28] [db-queries] Convert virtual provisioning CTE to use
 raw SQL (#5089)

Builds on:
- https://github.com/oxidecomputer/omicron/pull/5063
- https://github.com/oxidecomputer/omicron/pull/5081

Converts the virtual provisioning CTE to use SQL more directly
---
 nexus/db-model/src/lib.rs                     |   1 -
 nexus/db-model/src/queries/mod.rs             |   7 -
 .../virtual_provisioning_collection_update.rs |  60 --
 nexus/db-queries/src/db/alias.rs              |  84 --
 nexus/db-queries/src/db/mod.rs                |   1 -
 .../virtual_provisioning_collection_update.rs | 991 +++++++++---------
 ...ning_collection_update_delete_instance.sql |  16 +-
 ...oning_collection_update_delete_storage.sql |  10 +-
 ...ning_collection_update_insert_instance.sql |  35 +-
 ...oning_collection_update_insert_storage.sql |  33 +-
 10 files changed, 527 insertions(+), 711 deletions(-)
 delete mode 100644 nexus/db-model/src/queries/mod.rs
 delete mode 100644 nexus/db-model/src/queries/virtual_provisioning_collection_update.rs
 delete mode 100644 nexus/db-queries/src/db/alias.rs

diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs
index c57836a567..51fd0f6c9e 100644
--- a/nexus/db-model/src/lib.rs
+++ b/nexus/db-model/src/lib.rs
@@ -62,7 +62,6 @@ mod v2p_mapping;
 mod deployment;
 mod ipv4_nat_entry;
 mod omicron_zone_config;
-pub mod queries;
 mod quota;
 mod rack;
 mod region;
diff --git a/nexus/db-model/src/queries/mod.rs b/nexus/db-model/src/queries/mod.rs
deleted file mode 100644
index e138508f84..0000000000
--- a/nexus/db-model/src/queries/mod.rs
+++ /dev/null
@@ -1,7 +0,0 @@
-// This Source Code Form is subject to the terms of the Mozilla Public
-// License, v. 2.0. If a copy of the MPL was not distributed with this
-// file, You can obtain one at https://mozilla.org/MPL/2.0/.
-
-//! Subqueries used in CTEs.
-
-pub mod virtual_provisioning_collection_update;
diff --git a/nexus/db-model/src/queries/virtual_provisioning_collection_update.rs b/nexus/db-model/src/queries/virtual_provisioning_collection_update.rs
deleted file mode 100644
index 124ffe4db6..0000000000
--- a/nexus/db-model/src/queries/virtual_provisioning_collection_update.rs
+++ /dev/null
@@ -1,60 +0,0 @@
-// This Source Code Form is subject to the terms of the Mozilla Public
-// License, v. 2.0. If a copy of the MPL was not distributed with this
-// file, You can obtain one at https://mozilla.org/MPL/2.0/.
-
-//! Describes the resource provisioning update CTE
-//!
-//! Refer to <nexus/src/db/queries/virtual_provisioning_collection_update.rs>
-//! for the construction of this query.
-
-use crate::schema::silo;
-use crate::schema::silo_quotas;
-use crate::schema::virtual_provisioning_collection;
-
-table! {
-    parent_silo {
-        id -> Uuid,
-    }
-}
-
-table! {
-    all_collections {
-        id -> Uuid,
-    }
-}
-
-table! {
-    do_update (update) {
-        update -> Bool,
-    }
-}
-
-table! {
-    quotas (silo_id) {
-        silo_id -> Uuid,
-        cpus -> Int8,
-        memory -> Int8,
-        storage -> Int8,
-    }
-}
-
-table! {
-    silo_provisioned {
-        id -> Uuid,
-        virtual_disk_bytes_provisioned -> Int8,
-        cpus_provisioned -> Int8,
-        ram_provisioned -> Int8,
-    }
-}
-
-diesel::allow_tables_to_appear_in_same_query!(silo, parent_silo,);
-
-diesel::allow_tables_to_appear_in_same_query!(
-    virtual_provisioning_collection,
-    silo_quotas,
-    parent_silo,
-    all_collections,
-    do_update,
-    quotas,
-    silo_provisioned
-);
diff --git a/nexus/db-queries/src/db/alias.rs b/nexus/db-queries/src/db/alias.rs
deleted file mode 100644
index 0a5bcca743..0000000000
--- a/nexus/db-queries/src/db/alias.rs
+++ /dev/null
@@ -1,84 +0,0 @@
-// This Source Code Form is subject to the terms of the Mozilla Public
-// License, v. 2.0. If a copy of the MPL was not distributed with this
-// file, You can obtain one at https://mozilla.org/MPL/2.0/.
-
-//! Tools for creating aliases in diesel.
-
-use diesel::pg::Pg;
-use diesel::query_builder::AstPass;
-use diesel::query_builder::QueryFragment;
-use diesel::Expression;
-use diesel::SelectableExpression;
-
-/// Allows an [`diesel::Expression`] to be referenced by a new name.
-///
-/// This generates an `<expression> AS <name>` SQL fragment.
-///
-///
-/// For example:
-///
-/// ```ignore
-/// diesel::sql_function!(fn gen_random_uuid() -> Uuid);
-///
-/// let query = sleds.select(
-///     (
-///         ExpressionAlias::<schema::services::dsl::id>(gen_random_uuid()),
-///         ExpressionAlias::<schema::services::dsl::sled_id>(gen_random_uuid()),
-///     ),
-/// );
-/// ```
-///
-/// Produces the following SQL:
-///
-/// ```sql
-/// SELECT
-///   gen_random_uuid() as id,
-///   gen_random_uuid() as sled_id,
-/// FROM sleds
-/// ```
-#[derive(diesel::expression::ValidGrouping, diesel::query_builder::QueryId)]
-pub struct ExpressionAlias<E> {
-    expr: E,
-    name: &'static str,
-}
-
-impl<E> ExpressionAlias<E>
-where
-    E: Expression,
-{
-    pub fn new<C: diesel::Column>(expr: E) -> Self {
-        Self { expr, name: C::NAME }
-    }
-}
-
-impl<E> Expression for ExpressionAlias<E>
-where
-    E: Expression,
-{
-    type SqlType = E::SqlType;
-}
-
-impl<E, QS> diesel::AppearsOnTable<QS> for ExpressionAlias<E> where
-    E: diesel::AppearsOnTable<QS>
-{
-}
-
-impl<E, T> SelectableExpression<T> for ExpressionAlias<E> where
-    E: SelectableExpression<T>
-{
-}
-
-impl<E> QueryFragment<Pg> for ExpressionAlias<E>
-where
-    E: QueryFragment<Pg>,
-{
-    fn walk_ast<'a>(
-        &'a self,
-        mut out: AstPass<'_, 'a, Pg>,
-    ) -> diesel::QueryResult<()> {
-        self.expr.walk_ast(out.reborrow())?;
-        out.push_sql(" AS ");
-        out.push_sql(&self.name);
-        Ok(())
-    }
-}
diff --git a/nexus/db-queries/src/db/mod.rs b/nexus/db-queries/src/db/mod.rs
index 9b3d71970c..7ce6890a4d 100644
--- a/nexus/db-queries/src/db/mod.rs
+++ b/nexus/db-queries/src/db/mod.rs
@@ -4,7 +4,6 @@
 
 //! Facilities for working with the Omicron database
 
-pub(crate) mod alias;
 // This is not intended to be public, but this is necessary to use it from
 // doctests
 pub mod collection_attach;
diff --git a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs
index 09798e4e5d..156691866e 100644
--- a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs
+++ b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs
@@ -4,32 +4,27 @@
 
 //! Implementation of queries for updating resource provisioning info.
 
-use crate::db::alias::ExpressionAlias;
+use crate::db::column_walker::AllColumnsOf;
 use crate::db::model::ByteCount;
 use crate::db::model::ResourceTypeProvisioned;
 use crate::db::model::VirtualProvisioningCollection;
 use crate::db::model::VirtualProvisioningResource;
-use crate::db::pool::DbConnection;
+use crate::db::raw_query_builder::{QueryBuilder, TypedSqlQuery};
 use crate::db::schema::virtual_provisioning_collection;
 use crate::db::schema::virtual_provisioning_resource;
-use crate::db::subquery::{AsQuerySource, Cte, CteBuilder, CteQuery};
 use crate::db::true_or_cast_error::matches_sentinel;
-use crate::db::true_or_cast_error::TrueOrCastError;
-use db_macros::Subquery;
+use const_format::concatcp;
 use diesel::pg::Pg;
-use diesel::query_builder::{AstPass, Query, QueryFragment, QueryId};
 use diesel::result::Error as DieselError;
-use diesel::{
-    sql_types, BoolExpressionMethods, CombineDsl, ExpressionMethods, IntoSql,
-    JoinOnDsl, NullableExpressionMethods, QueryDsl, RunQueryDsl,
-    SelectableHelper,
-};
-use nexus_db_model::queries::virtual_provisioning_collection_update::{
-    all_collections, do_update, parent_silo, quotas, silo_provisioned,
-};
+use diesel::sql_types;
 use omicron_common::api::external;
 use omicron_common::api::external::MessagePair;
 
+type AllColumnsOfVirtualResource =
+    AllColumnsOf<virtual_provisioning_resource::table>;
+type AllColumnsOfVirtualCollection =
+    AllColumnsOf<virtual_provisioning_collection::table>;
+
 const NOT_ENOUGH_CPUS_SENTINEL: &'static str = "Not enough cpus";
 const NOT_ENOUGH_MEMORY_SENTINEL: &'static str = "Not enough memory";
 const NOT_ENOUGH_STORAGE_SENTINEL: &'static str = "Not enough storage";
@@ -77,319 +72,33 @@ pub fn from_diesel(e: DieselError) -> external::Error {
     error::public_error_from_diesel(e, error::ErrorHandler::Server)
 }
 
-#[derive(Subquery, QueryId)]
-#[subquery(name = parent_silo)]
-struct ParentSilo {
-    query: Box<dyn CteQuery<SqlType = parent_silo::SqlType>>,
-}
-
-impl ParentSilo {
-    fn new(project_id: uuid::Uuid) -> Self {
-        use crate::db::schema::project::dsl;
-        Self {
-            query: Box::new(
-                dsl::project.filter(dsl::id.eq(project_id)).select((
-                    ExpressionAlias::new::<parent_silo::dsl::id>(dsl::silo_id),
-                )),
-            ),
-        }
-    }
-}
-
-#[derive(Subquery, QueryId)]
-#[subquery(name = all_collections)]
-struct AllCollections {
-    query: Box<dyn CteQuery<SqlType = all_collections::SqlType>>,
-}
-
-impl AllCollections {
-    fn new(
-        project_id: uuid::Uuid,
-        parent_silo: &ParentSilo,
-        fleet_id: uuid::Uuid,
-    ) -> Self {
-        let project_id = project_id.into_sql::<sql_types::Uuid>();
-        let fleet_id = fleet_id.into_sql::<sql_types::Uuid>();
-        Self {
-            query: Box::new(
-                diesel::select((ExpressionAlias::new::<
-                    all_collections::dsl::id,
-                >(project_id),))
-                .union(parent_silo.query_source().select((
-                    ExpressionAlias::new::<all_collections::dsl::id>(
-                        parent_silo::id,
-                    ),
-                )))
-                .union(diesel::select((ExpressionAlias::new::<
-                    all_collections::dsl::id,
-                >(fleet_id),))),
-            ),
-        }
-    }
-}
-
-#[derive(Subquery, QueryId)]
-#[subquery(name = do_update)]
-struct DoUpdate {
-    query: Box<dyn CteQuery<SqlType = do_update::SqlType>>,
-}
-
-impl DoUpdate {
-    fn new_for_insert(
-        silo_provisioned: &SiloProvisioned,
-        quotas: &Quotas,
-        resource: VirtualProvisioningResource,
-    ) -> Self {
-        use virtual_provisioning_resource::dsl;
-
-        let cpus_provisioned_delta =
-            resource.cpus_provisioned.into_sql::<sql_types::BigInt>();
-        let memory_provisioned_delta =
-            i64::from(resource.ram_provisioned).into_sql::<sql_types::BigInt>();
-        let storage_provisioned_delta =
-            i64::from(resource.virtual_disk_bytes_provisioned)
-                .into_sql::<sql_types::BigInt>();
-
-        let not_allocted = dsl::virtual_provisioning_resource
-            .find(resource.id)
-            .count()
-            .single_value()
-            .assume_not_null()
-            .eq(0);
-
-        let has_sufficient_cpus = quotas
-            .query_source()
-            .select(quotas::cpus)
-            .single_value()
-            .assume_not_null()
-            .ge(silo_provisioned
-                .query_source()
-                .select(silo_provisioned::cpus_provisioned)
-                .single_value()
-                .assume_not_null()
-                + cpus_provisioned_delta);
-
-        let has_sufficient_memory = quotas
-            .query_source()
-            .select(quotas::memory)
-            .single_value()
-            .assume_not_null()
-            .ge(silo_provisioned
-                .query_source()
-                .select(silo_provisioned::ram_provisioned)
-                .single_value()
-                .assume_not_null()
-                + memory_provisioned_delta);
-
-        let has_sufficient_storage = quotas
-            .query_source()
-            .select(quotas::storage)
-            .single_value()
-            .assume_not_null()
-            .ge(silo_provisioned
-                .query_source()
-                .select(silo_provisioned::virtual_disk_bytes_provisioned)
-                .single_value()
-                .assume_not_null()
-                + storage_provisioned_delta);
-
-        Self {
-            query: Box::new(diesel::select((ExpressionAlias::new::<
-                do_update::update,
-            >(
-                not_allocted
-                    .and(TrueOrCastError::new(
-                        cpus_provisioned_delta.eq(0).or(has_sufficient_cpus),
-                        NOT_ENOUGH_CPUS_SENTINEL,
-                    ))
-                    .and(TrueOrCastError::new(
-                        memory_provisioned_delta
-                            .eq(0)
-                            .or(has_sufficient_memory),
-                        NOT_ENOUGH_MEMORY_SENTINEL,
-                    ))
-                    .and(TrueOrCastError::new(
-                        storage_provisioned_delta
-                            .eq(0)
-                            .or(has_sufficient_storage),
-                        NOT_ENOUGH_STORAGE_SENTINEL,
-                    )),
-            ),))),
-        }
-    }
-
-    fn new_for_delete(id: uuid::Uuid) -> Self {
-        use virtual_provisioning_resource::dsl;
-
-        let already_allocated = dsl::virtual_provisioning_resource
-            .find(id)
-            .count()
-            .single_value()
-            .assume_not_null()
-            .eq(1);
-
-        Self {
-            query: Box::new(diesel::select((ExpressionAlias::new::<
-                do_update::update,
-            >(already_allocated),))),
-        }
-    }
-}
-
-#[derive(Subquery, QueryId)]
-#[subquery(name = virtual_provisioning_collection)]
-struct UpdatedProvisions {
-    query:
-        Box<dyn CteQuery<SqlType = virtual_provisioning_collection::SqlType>>,
-}
-
-impl UpdatedProvisions {
-    fn new<V>(
-        all_collections: &AllCollections,
-        do_update: &DoUpdate,
-        values: V,
-    ) -> Self
-    where
-        V: diesel::AsChangeset<Target = virtual_provisioning_collection::table>,
-        <V as diesel::AsChangeset>::Changeset:
-            QueryFragment<Pg> + Send + 'static,
-    {
-        use virtual_provisioning_collection::dsl;
-
-        Self {
-            query: Box::new(
-                diesel::update(dsl::virtual_provisioning_collection)
-                    .set(values)
-                    .filter(
-                        dsl::id.eq_any(
-                            all_collections
-                                .query_source()
-                                .select(all_collections::id),
-                        ),
-                    )
-                    .filter(
-                        do_update
-                            .query_source()
-                            .select(do_update::update)
-                            .single_value()
-                            .assume_not_null(),
-                    )
-                    .returning(virtual_provisioning_collection::all_columns),
-            ),
-        }
-    }
-}
-
-#[derive(Subquery, QueryId)]
-#[subquery(name = quotas)]
-struct Quotas {
-    query: Box<dyn CteQuery<SqlType = quotas::SqlType>>,
-}
-
-impl Quotas {
-    // TODO: We could potentially skip this in cases where we know we're removing a resource instead of inserting
-    fn new(parent_silo: &ParentSilo) -> Self {
-        use crate::db::schema::silo_quotas::dsl;
-        Self {
-            query: Box::new(
-                dsl::silo_quotas
-                    .inner_join(
-                        parent_silo
-                            .query_source()
-                            .on(dsl::silo_id.eq(parent_silo::id)),
-                    )
-                    .select((
-                        dsl::silo_id,
-                        dsl::cpus,
-                        ExpressionAlias::new::<quotas::dsl::memory>(
-                            dsl::memory_bytes,
-                        ),
-                        ExpressionAlias::new::<quotas::dsl::storage>(
-                            dsl::storage_bytes,
-                        ),
-                    )),
-            ),
-        }
-    }
-}
-
-#[derive(Subquery, QueryId)]
-#[subquery(name = silo_provisioned)]
-struct SiloProvisioned {
-    query: Box<dyn CteQuery<SqlType = silo_provisioned::SqlType>>,
-}
-
-impl SiloProvisioned {
-    fn new(parent_silo: &ParentSilo) -> Self {
-        use virtual_provisioning_collection::dsl;
-        Self {
-            query: Box::new(
-                dsl::virtual_provisioning_collection
-                    .inner_join(
-                        parent_silo
-                            .query_source()
-                            .on(dsl::id.eq(parent_silo::id)),
-                    )
-                    .select((
-                        dsl::id,
-                        dsl::cpus_provisioned,
-                        dsl::ram_provisioned,
-                        dsl::virtual_disk_bytes_provisioned,
-                    )),
-            ),
-        }
-    }
-}
-
-// This structure wraps a query, such that it can be used within a CTE.
-//
-// It generates a name that can be used by the "CteBuilder", but does not
-// implement "AsQuerySource". This basically means:
-// - It can be used to add data-modifying statements to the CTE
-// - The result of the query cannot be referenced by subsequent queries
-//
-// NOTE: The name for each CTE arm should be unique, so this shouldn't be used
-// multiple times within a single CTE. This restriction could be removed by
-// generating unique identifiers.
-struct UnreferenceableSubquery<Q>(Q);
-
-impl<Q> QueryFragment<Pg> for UnreferenceableSubquery<Q>
-where
-    Q: QueryFragment<Pg> + Send + 'static,
-{
-    fn walk_ast<'a>(
-        &'a self,
-        mut out: diesel::query_builder::AstPass<'_, 'a, Pg>,
-    ) -> diesel::QueryResult<()> {
-        out.push_identifier("unused_cte_arm")?;
-        Ok(())
-    }
-}
-
-impl<Q> crate::db::subquery::Subquery for UnreferenceableSubquery<Q>
-where
-    Q: QueryFragment<Pg> + Send + 'static,
-{
-    fn query(&self) -> &dyn QueryFragment<Pg> {
-        &self.0
-    }
-}
-
 /// The virtual resource collection is only updated when a resource is inserted
 /// or deleted from the resource provisioning table. By probing for the presence
 /// or absence of a resource, we can update collections at the same time as we
 /// create or destroy the resource, which helps make the operation idempotent.
+#[derive(Clone)]
 enum UpdateKind {
-    Insert(VirtualProvisioningResource),
-    Delete(uuid::Uuid),
+    InsertStorage(VirtualProvisioningResource),
+    DeleteStorage {
+        id: uuid::Uuid,
+        disk_byte_diff: ByteCount,
+    },
+    InsertInstance(VirtualProvisioningResource),
+    DeleteInstance {
+        id: uuid::Uuid,
+        max_instance_gen: i64,
+        cpus_diff: i64,
+        ram_diff: ByteCount,
+    },
 }
 
+type SelectableSql<T> = <
+    <T as diesel::Selectable<Pg>>::SelectExpression as diesel::Expression
+>::SqlType;
+
 /// Constructs a CTE for updating resource provisioning information in all
 /// collections for a particular object.
-#[derive(QueryId)]
-pub struct VirtualProvisioningCollectionUpdate {
-    cte: Cte,
-}
+pub struct VirtualProvisioningCollectionUpdate {}
 
 impl VirtualProvisioningCollectionUpdate {
     // Generic utility for updating all collections including this resource,
@@ -399,66 +108,336 @@ impl VirtualProvisioningCollectionUpdate {
     // - Project
     // - Silo
     // - Fleet
-    //
-    // Arguments:
-    // - do_update: A boolean SQL query to answer the question: "Should this update
-    // be applied"? This query is necessary for idempotency.
-    // - update: A SQL query to actually modify the resource record. Generally
-    // this is an "INSERT", "UPDATE", or "DELETE".
-    // - project_id: The project to which the resource belongs.
-    // - values: The updated values to propagate through collections (iff
-    // "do_update" evaluates to "true").
-    fn apply_update<U, V>(
+    fn apply_update(
         update_kind: UpdateKind,
-        update: U,
         project_id: uuid::Uuid,
-        values: V,
-    ) -> Self
-    where
-        U: QueryFragment<Pg> + crate::db::subquery::Subquery + Send + 'static,
-        V: diesel::AsChangeset<Target = virtual_provisioning_collection::table>,
-        <V as diesel::AsChangeset>::Changeset:
-            QueryFragment<Pg> + Send + 'static,
-    {
-        let parent_silo = ParentSilo::new(project_id);
-        let all_collections = AllCollections::new(
-            project_id,
-            &parent_silo,
-            *crate::db::fixed_data::FLEET_ID,
-        );
-
-        let quotas = Quotas::new(&parent_silo);
-        let silo_provisioned = SiloProvisioned::new(&parent_silo);
+    ) -> TypedSqlQuery<SelectableSql<VirtualProvisioningCollection>> {
+        let query = QueryBuilder::new().sql("
+WITH
+  parent_silo AS (SELECT project.silo_id AS id FROM project WHERE project.id = ").param().sql("),")
+            .bind::<sql_types::Uuid, _>(project_id).sql("
+  all_collections
+    AS (
+      ((SELECT ").param().sql(" AS id) UNION (SELECT parent_silo.id AS id FROM parent_silo))
+      UNION (SELECT ").param().sql(" AS id)
+    ),")
+            .bind::<sql_types::Uuid, _>(project_id)
+            .bind::<sql_types::Uuid, _>(*crate::db::fixed_data::FLEET_ID)
+            .sql("
+  quotas
+    AS (
+      SELECT
+        silo_quotas.silo_id,
+        silo_quotas.cpus,
+        silo_quotas.memory_bytes AS memory,
+        silo_quotas.storage_bytes AS storage
+      FROM
+        silo_quotas INNER JOIN parent_silo ON silo_quotas.silo_id = parent_silo.id
+    ),
+  silo_provisioned
+    AS (
+      SELECT
+        virtual_provisioning_collection.id,
+        virtual_provisioning_collection.cpus_provisioned,
+        virtual_provisioning_collection.ram_provisioned,
+        virtual_provisioning_collection.virtual_disk_bytes_provisioned
+      FROM
+        virtual_provisioning_collection
+        INNER JOIN parent_silo ON virtual_provisioning_collection.id = parent_silo.id
+    ),");
+
+        let query = match update_kind.clone() {
+            UpdateKind::InsertInstance(resource) | UpdateKind::InsertStorage(resource) => {
+                query.sql("
+  do_update
+    AS (
+      SELECT
+        (
+          (
+            (
+              SELECT count(*)
+              FROM virtual_provisioning_resource
+              WHERE virtual_provisioning_resource.id = ").param().sql("
+              LIMIT 1
+            )
+            = 0
+            AND CAST(
+                IF(
+                  (
+                    ").param().sql(" = 0
+                    OR (SELECT quotas.cpus FROM quotas LIMIT 1)
+                      >= (
+                          (SELECT silo_provisioned.cpus_provisioned FROM silo_provisioned LIMIT 1)
+                          + ").param().sql(concatcp!("
+                        )
+                  ),
+                  'TRUE',
+                  '", NOT_ENOUGH_CPUS_SENTINEL, "'
+                )
+                  AS BOOL
+              )
+          )
+          AND CAST(
+              IF(
+                (
+                  ")).param().sql(" = 0
+                  OR (SELECT quotas.memory FROM quotas LIMIT 1)
+                    >= (
+                        (SELECT silo_provisioned.ram_provisioned FROM silo_provisioned LIMIT 1)
+                        + ").param().sql(concatcp!("
+                      )
+                ),
+                'TRUE',
+                '", NOT_ENOUGH_MEMORY_SENTINEL, "'
+              )
+                AS BOOL
+            )
+        )
+        AND CAST(
+            IF(
+              (
+                ")).param().sql(" = 0
+                OR (SELECT quotas.storage FROM quotas LIMIT 1)
+                  >= (
+                      (
+                        SELECT
+                          silo_provisioned.virtual_disk_bytes_provisioned
+                        FROM
+                          silo_provisioned
+                        LIMIT
+                          1
+                      )
+                      + ").param().sql(concatcp!("
+                    )
+              ),
+              'TRUE',
+              '", NOT_ENOUGH_STORAGE_SENTINEL, "'
+            )
+              AS BOOL
+          )
+          AS update
+    ),"))
+                .bind::<sql_types::Uuid, _>(resource.id)
+                .bind::<sql_types::BigInt, _>(resource.cpus_provisioned)
+                .bind::<sql_types::BigInt, _>(resource.cpus_provisioned)
+                .bind::<sql_types::BigInt, _>(resource.ram_provisioned)
+                .bind::<sql_types::BigInt, _>(resource.ram_provisioned)
+                .bind::<sql_types::BigInt, _>(resource.virtual_disk_bytes_provisioned)
+                .bind::<sql_types::BigInt, _>(resource.virtual_disk_bytes_provisioned)
+            },
+            UpdateKind::DeleteInstance { id, .. } | UpdateKind::DeleteStorage { id, .. } => {
+                query.sql("
+  do_update
+    AS (
+      SELECT
+        (
+          SELECT
+            count(*)
+          FROM
+            virtual_provisioning_resource
+          WHERE
+            virtual_provisioning_resource.id = ").param().sql("
+          LIMIT
+            1
+        )
+        = 1
+          AS update
+    ),")
+                .bind::<sql_types::Uuid, _>(id)
+            },
+        };
 
-        let do_update = match update_kind {
-            UpdateKind::Insert(resource) => {
-                DoUpdate::new_for_insert(&silo_provisioned, &quotas, resource)
+        let query = match update_kind.clone() {
+            UpdateKind::InsertInstance(resource)
+            | UpdateKind::InsertStorage(resource) => query
+                .sql(
+                    "
+  unused_cte_arm
+    AS (
+      INSERT
+      INTO
+        virtual_provisioning_resource
+          (
+            id,
+            time_modified,
+            resource_type,
+            virtual_disk_bytes_provisioned,
+            cpus_provisioned,
+            ram_provisioned
+          )
+      VALUES
+        (",
+                )
+                .param()
+                .sql(", DEFAULT, ")
+                .param()
+                .sql(", ")
+                .param()
+                .sql(", ")
+                .param()
+                .sql(", ")
+                .param()
+                .sql(
+                    ")
+      ON CONFLICT
+      DO
+        NOTHING
+      RETURNING ",
+                )
+                .sql(AllColumnsOfVirtualResource::with_prefix(
+                    "virtual_provisioning_resource",
+                ))
+                .sql("),")
+                .bind::<sql_types::Uuid, _>(resource.id)
+                .bind::<sql_types::Text, _>(resource.resource_type)
+                .bind::<sql_types::BigInt, _>(
+                    resource.virtual_disk_bytes_provisioned,
+                )
+                .bind::<sql_types::BigInt, _>(resource.cpus_provisioned)
+                .bind::<sql_types::BigInt, _>(resource.ram_provisioned),
+            UpdateKind::DeleteInstance { id, max_instance_gen, .. } => {
+                // The filter condition here ensures that the provisioning record is
+                // only deleted if the corresponding instance has a generation
+                // number less than the supplied `max_instance_gen`. This allows a
+                // caller that is about to apply an instance update that will stop
+                // the instance and that bears generation G to avoid deleting
+                // resources if the instance generation was already advanced to or
+                // past G.
+                //
+                // If the relevant instance ID is not in the database, then some
+                // other operation must have ensured the instance was previously
+                // stopped (because that's the only way it could have been deleted),
+                // and that operation should have cleaned up the resources already,
+                // in which case there's nothing to do here.
+                query
+                    .sql(
+                        "
+  unused_cte_arm
+    AS (
+      DELETE FROM
+        virtual_provisioning_resource
+      WHERE
+        virtual_provisioning_resource.id = ",
+                    )
+                    .param()
+                    .sql(
+                        "
+        AND
+        virtual_provisioning_resource.id = (
+            SELECT instance.id FROM instance WHERE
+                instance.id = ",
+                    )
+                    .param()
+                    .sql(
+                        " AND
+                instance.state_generation < ",
+                    )
+                    .param()
+                    .sql(
+                        " LIMIT 1)
+      RETURNING ",
+                    )
+                    .sql(AllColumnsOfVirtualResource::with_prefix(
+                        "virtual_provisioning_resource",
+                    ))
+                    .sql("),")
+                    .bind::<sql_types::Uuid, _>(id)
+                    .bind::<sql_types::Uuid, _>(id)
+                    .bind::<sql_types::BigInt, _>(max_instance_gen)
             }
-            UpdateKind::Delete(id) => DoUpdate::new_for_delete(id),
+            UpdateKind::DeleteStorage { id, .. } => query
+                .sql(
+                    "
+  unused_cte_arm
+    AS (
+      DELETE FROM
+        virtual_provisioning_resource
+      WHERE
+        virtual_provisioning_resource.id = ",
+                )
+                .param()
+                .sql(
+                    "
+      RETURNING ",
+                )
+                .sql(AllColumnsOfVirtualResource::with_prefix(
+                    "virtual_provisioning_resource",
+                ))
+                .sql("),")
+                .bind::<sql_types::Uuid, _>(id),
         };
 
-        let updated_collections =
-            UpdatedProvisions::new(&all_collections, &do_update, values);
-
-        // TODO: Do we want to select from "all_collections" instead? Seems more
-        // idempotent; it'll work even when we don't update anything...
-        let final_select = Box::new(
-            updated_collections
-                .query_source()
-                .select(VirtualProvisioningCollection::as_select()),
+        let query = query.sql(
+            "
+  virtual_provisioning_collection
+    AS (
+      UPDATE
+        virtual_provisioning_collection
+      SET",
         );
+        let query = match update_kind.clone() {
+            UpdateKind::InsertInstance(resource) => query
+                .sql(
+                    "
+        time_modified = current_timestamp(),
+        cpus_provisioned = virtual_provisioning_collection.cpus_provisioned + ",
+                )
+                .param()
+                .sql(
+                    ",
+        ram_provisioned = virtual_provisioning_collection.ram_provisioned + ",
+                )
+                .param()
+                .bind::<sql_types::BigInt, _>(resource.cpus_provisioned)
+                .bind::<sql_types::BigInt, _>(resource.ram_provisioned),
+            UpdateKind::InsertStorage(resource) => query
+                .sql(
+                    "
+        time_modified = current_timestamp(),
+        virtual_disk_bytes_provisioned
+          = virtual_provisioning_collection.virtual_disk_bytes_provisioned + ",
+                )
+                .param()
+                .bind::<sql_types::BigInt, _>(
+                    resource.virtual_disk_bytes_provisioned,
+                ),
+            UpdateKind::DeleteInstance { cpus_diff, ram_diff, .. } => query
+                .sql(
+                    "
+        time_modified = current_timestamp(),
+        cpus_provisioned = virtual_provisioning_collection.cpus_provisioned - ",
+                )
+                .param()
+                .sql(
+                    ",
+        ram_provisioned = virtual_provisioning_collection.ram_provisioned - ",
+                )
+                .param()
+                .bind::<sql_types::BigInt, _>(cpus_diff)
+                .bind::<sql_types::BigInt, _>(ram_diff),
+            UpdateKind::DeleteStorage { disk_byte_diff, .. } => query
+                .sql(
+                    "
+        time_modified = current_timestamp(),
+        virtual_disk_bytes_provisioned
+          = virtual_provisioning_collection.virtual_disk_bytes_provisioned - ",
+                )
+                .param()
+                .bind::<sql_types::BigInt, _>(disk_byte_diff),
+        };
 
-        let cte = CteBuilder::new()
-            .add_subquery(parent_silo)
-            .add_subquery(all_collections)
-            .add_subquery(quotas)
-            .add_subquery(silo_provisioned)
-            .add_subquery(do_update)
-            .add_subquery(update)
-            .add_subquery(updated_collections)
-            .build(final_select);
-
-        Self { cte }
+        query.sql("
+      WHERE
+        virtual_provisioning_collection.id = ANY (SELECT all_collections.id FROM all_collections)
+        AND (SELECT do_update.update FROM do_update LIMIT 1)
+      RETURNING "
+        ).sql(AllColumnsOfVirtualCollection::with_prefix("virtual_provisioning_collection")).sql("
+    )
+SELECT "
+    ).sql(AllColumnsOfVirtualCollection::with_prefix("virtual_provisioning_collection")).sql("
+FROM
+  virtual_provisioning_collection
+").query()
     }
 
     pub fn new_insert_storage(
@@ -466,62 +445,22 @@ impl VirtualProvisioningCollectionUpdate {
         disk_byte_diff: ByteCount,
         project_id: uuid::Uuid,
         storage_type: crate::db::datastore::StorageType,
-    ) -> Self {
-        use virtual_provisioning_collection::dsl as collection_dsl;
-        use virtual_provisioning_resource::dsl as resource_dsl;
-
+    ) -> TypedSqlQuery<SelectableSql<VirtualProvisioningCollection>> {
         let mut provision =
             VirtualProvisioningResource::new(id, storage_type.into());
         provision.virtual_disk_bytes_provisioned = disk_byte_diff;
 
-        Self::apply_update(
-            UpdateKind::Insert(provision.clone()),
-            // The query to actually insert the record.
-            UnreferenceableSubquery(
-                diesel::insert_into(
-                    resource_dsl::virtual_provisioning_resource,
-                )
-                .values(provision)
-                .on_conflict_do_nothing()
-                .returning(virtual_provisioning_resource::all_columns),
-            ),
-            // Within this project, silo, fleet...
-            project_id,
-            // ... We add the disk usage.
-            (
-                collection_dsl::time_modified.eq(diesel::dsl::now),
-                collection_dsl::virtual_disk_bytes_provisioned
-                    .eq(collection_dsl::virtual_disk_bytes_provisioned
-                        + disk_byte_diff),
-            ),
-        )
+        Self::apply_update(UpdateKind::InsertStorage(provision), project_id)
     }
 
     pub fn new_delete_storage(
         id: uuid::Uuid,
         disk_byte_diff: ByteCount,
         project_id: uuid::Uuid,
-    ) -> Self {
-        use virtual_provisioning_collection::dsl as collection_dsl;
-        use virtual_provisioning_resource::dsl as resource_dsl;
-
+    ) -> TypedSqlQuery<SelectableSql<VirtualProvisioningCollection>> {
         Self::apply_update(
-            UpdateKind::Delete(id),
-            // The query to actually delete the record.
-            UnreferenceableSubquery(
-                diesel::delete(resource_dsl::virtual_provisioning_resource)
-                    .filter(resource_dsl::id.eq(id))
-                    .returning(virtual_provisioning_resource::all_columns),
-            ),
-            // Within this project, silo, fleet...
+            UpdateKind::DeleteStorage { id, disk_byte_diff },
             project_id,
-            // ... We subtract the disk usage.
-            (
-                collection_dsl::time_modified.eq(diesel::dsl::now),
-                collection_dsl::virtual_disk_bytes_provisioned
-                    .eq(collection_dsl::virtual_disk_bytes_provisioned
-                        - disk_byte_diff),
-            ),
         )
     }
 
@@ -530,10 +469,7 @@ impl VirtualProvisioningCollectionUpdate {
         cpus_diff: i64,
         ram_diff: ByteCount,
         project_id: uuid::Uuid,
-    ) -> Self {
-        use virtual_provisioning_collection::dsl as collection_dsl;
-        use virtual_provisioning_resource::dsl as resource_dsl;
-
+    ) -> TypedSqlQuery<SelectableSql<VirtualProvisioningCollection>> {
         let mut provision = VirtualProvisioningResource::new(
             id,
             ResourceTypeProvisioned::Instance,
@@ -541,28 +477,7 @@ impl VirtualProvisioningCollectionUpdate {
         provision.cpus_provisioned = cpus_diff;
         provision.ram_provisioned = ram_diff;
 
-        Self::apply_update(
-            UpdateKind::Insert(provision.clone()),
-            // The query to actually insert the record.
-            UnreferenceableSubquery(
-                diesel::insert_into(
-                    resource_dsl::virtual_provisioning_resource,
-                )
-                .values(provision)
-                .on_conflict_do_nothing()
-                .returning(virtual_provisioning_resource::all_columns),
-            ),
-            // Within this project, silo, fleet...
-            project_id,
-            // ... We update the resource usage.
-            (
-                collection_dsl::time_modified.eq(diesel::dsl::now),
-                collection_dsl::cpus_provisioned
-                    .eq(collection_dsl::cpus_provisioned + cpus_diff),
-                collection_dsl::ram_provisioned
-                    .eq(collection_dsl::ram_provisioned + ram_diff),
-            ),
-        )
+        Self::apply_update(UpdateKind::InsertInstance(provision), project_id)
     }
 
     pub fn new_delete_instance(
@@ -571,86 +486,26 @@ impl VirtualProvisioningCollectionUpdate {
         cpus_diff: i64,
         ram_diff: ByteCount,
         project_id: uuid::Uuid,
-    ) -> Self {
-        use crate::db::schema::instance::dsl as instance_dsl;
-        use virtual_provisioning_collection::dsl as collection_dsl;
-        use virtual_provisioning_resource::dsl as resource_dsl;
-
+    ) -> TypedSqlQuery<SelectableSql<VirtualProvisioningCollection>> {
         Self::apply_update(
-            UpdateKind::Delete(id),
-            // The query to actually delete the record.
-            //
-            // The filter condition here ensures that the provisioning record is
-            // only deleted if the corresponding instance has a generation
-            // number less than the supplied `max_instance_gen`. This allows a
-            // caller that is about to apply an instance update that will stop
-            // the instance and that bears generation G to avoid deleting
-            // resources if the instance generation was already advanced to or
-            // past G.
-            //
-            // If the relevant instance ID is not in the database, then some
-            // other operation must have ensured the instance was previously
-            // stopped (because that's the only way it could have been deleted),
-            // and that operation should have cleaned up the resources already,
-            // in which case there's nothing to do here.
-            //
-            // There is an additional "direct" filter on the target resource ID
-            // to avoid a full scan of the resource table.
-            UnreferenceableSubquery(
-                diesel::delete(resource_dsl::virtual_provisioning_resource)
-                    .filter(resource_dsl::id.eq(id))
-                    .filter(
-                        resource_dsl::id.nullable().eq(instance_dsl::instance
-                            .filter(instance_dsl::id.eq(id))
-                            .filter(
-                                instance_dsl::state_generation
-                                    .lt(max_instance_gen),
-                            )
-                            .select(instance_dsl::id)
-                            .single_value()),
-                    )
-                    .returning(virtual_provisioning_resource::all_columns),
-            ),
-            // Within this project, silo, fleet...
+            UpdateKind::DeleteInstance {
+                id,
+                max_instance_gen,
+                cpus_diff,
+                ram_diff,
+            },
             project_id,
-            // ... We update the resource usage.
-            (
-                collection_dsl::time_modified.eq(diesel::dsl::now),
-                collection_dsl::cpus_provisioned
-                    .eq(collection_dsl::cpus_provisioned - cpus_diff),
-                collection_dsl::ram_provisioned
-                    .eq(collection_dsl::ram_provisioned - ram_diff),
-            ),
         )
     }
 }
 
-impl QueryFragment<Pg> for VirtualProvisioningCollectionUpdate {
-    fn walk_ast<'a>(
-        &'a self,
-        mut out: AstPass<'_, 'a, Pg>,
-    ) -> diesel::QueryResult<()> {
-        out.unsafe_to_cache_prepared();
-
-        self.cte.walk_ast(out.reborrow())?;
-        Ok(())
-    }
-}
-
-type SelectableSql<T> = <
-    <T as diesel::Selectable<Pg>>::SelectExpression as diesel::Expression
->::SqlType;
-
-impl Query for VirtualProvisioningCollectionUpdate {
-    type SqlType = SelectableSql<VirtualProvisioningCollection>;
-}
-
-impl RunQueryDsl<DbConnection> for VirtualProvisioningCollectionUpdate {}
-
 #[cfg(test)]
 mod test {
     use super::*;
+    use crate::db::explain::ExplainableAsync;
     use crate::db::raw_query_builder::expectorate_query_contents;
+    use nexus_test_utils::db::test_setup_database;
+    use omicron_test_utils::dev;
     use uuid::Uuid;
 
     // These tests are a bit of a "change detector", but they're here to help
@@ -732,4 +587,120 @@ mod test {
             "tests/output/virtual_provisioning_collection_update_delete_instance.sql",
         ).await;
     }
+
+    // Explain the possible forms of the SQL query to ensure that it
+    // creates a valid SQL string.
+
+    #[tokio::test]
+    async fn explain_insert_storage() {
+        let logctx = dev::test_setup_log("explain_insert_storage");
+        let log = logctx.log.new(o!());
+        let mut db = test_setup_database(&log).await;
+        let cfg = crate::db::Config { url: db.pg_config().clone() };
+        let pool = crate::db::Pool::new(&logctx.log, &cfg);
+        let conn = pool.pool().get().await.unwrap();
+
+        let id = Uuid::nil();
+        let project_id = Uuid::nil();
+        let disk_byte_diff = 2048.try_into().unwrap();
+        let storage_type = crate::db::datastore::StorageType::Disk;
+
+        let query = VirtualProvisioningCollectionUpdate::new_insert_storage(
+            id,
+            disk_byte_diff,
+            project_id,
+            storage_type,
+        );
+        let _ = query
+            .explain_async(&conn)
+            .await
+            .expect("Failed to explain query - is it valid SQL?");
+
+        db.cleanup().await.unwrap();
+        logctx.cleanup_successful();
+    }
+
+    #[tokio::test]
+    async fn explain_delete_storage() {
+        let logctx = dev::test_setup_log("explain_delete_storage");
+        let log = logctx.log.new(o!());
+        let mut db = test_setup_database(&log).await;
+        let cfg = crate::db::Config { url: db.pg_config().clone() };
+        let pool = crate::db::Pool::new(&logctx.log, &cfg);
+        let conn = pool.pool().get().await.unwrap();
+
+        let id = Uuid::nil();
+        let project_id = Uuid::nil();
+        let disk_byte_diff = 2048.try_into().unwrap();
+
+        let query = VirtualProvisioningCollectionUpdate::new_delete_storage(
+            id,
+            disk_byte_diff,
+            project_id,
+        );
+        let _ = query
+            .explain_async(&conn)
+            .await
+            .expect("Failed to explain query - is it valid SQL?");
+
+        db.cleanup().await.unwrap();
+        logctx.cleanup_successful();
+    }
+
+    #[tokio::test]
+    async fn explain_insert_instance() {
+        let logctx = dev::test_setup_log("explain_insert_instance");
+        let log = logctx.log.new(o!());
+        let mut db = test_setup_database(&log).await;
+        let cfg = crate::db::Config { url: db.pg_config().clone() };
+        let pool = crate::db::Pool::new(&logctx.log, &cfg);
+        let conn = pool.pool().get().await.unwrap();
+
+        let id = Uuid::nil();
+        let project_id = Uuid::nil();
+        let cpus_diff = 16.try_into().unwrap();
+        let ram_diff = 2048.try_into().unwrap();
+
+        let query = VirtualProvisioningCollectionUpdate::new_insert_instance(
+            id, cpus_diff, ram_diff, project_id,
+        );
+        let _ = query
+            .explain_async(&conn)
+            .await
+            .expect("Failed to explain query - is it valid SQL?");
+
+        db.cleanup().await.unwrap();
+        logctx.cleanup_successful();
+    }
+
+    #[tokio::test]
+    async fn explain_delete_instance() {
+        let logctx = dev::test_setup_log("explain_delete_instance");
+        let log = logctx.log.new(o!());
+        let mut db = test_setup_database(&log).await;
+        let cfg = crate::db::Config { url: db.pg_config().clone() };
+        let pool = crate::db::Pool::new(&logctx.log, &cfg);
+        let conn = pool.pool().get().await.unwrap();
+
+        let id = Uuid::nil();
+        let max_instance_gen = 0;
+        let project_id = Uuid::nil();
+        let cpus_diff = 16.try_into().unwrap();
+        let ram_diff = 2048.try_into().unwrap();
+
+        let query = VirtualProvisioningCollectionUpdate::new_delete_instance(
+            id,
+            max_instance_gen,
+            cpus_diff,
+            ram_diff,
+            project_id,
+        );
+        let _ = query
+            .explain_async(&conn)
+            .await
+            .expect("Failed to explain query - is it valid SQL?");
+
+        db.cleanup().await.unwrap();
+        logctx.cleanup_successful();
+    }
 }
diff --git a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_instance.sql b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_instance.sql
index fcabefef26..48094a8371 100644
--- a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_instance.sql
+++ b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_instance.sql
@@ -37,9 +37,9 @@ WITH
           WHERE
             virtual_provisioning_resource.id = $4
           LIMIT
-            $5
+            1
         )
-        = $6
+        = 1
           AS update
     ),
   unused_cte_arm
@@ -47,7 +47,7 @@ WITH
       DELETE FROM
         virtual_provisioning_resource
       WHERE
-        virtual_provisioning_resource.id = $7
+        virtual_provisioning_resource.id = $5
         AND virtual_provisioning_resource.id
           = (
               SELECT
@@ -55,9 +55,9 @@ WITH
               FROM
                 instance
               WHERE
-                instance.id = $8 AND instance.state_generation < $9
+                instance.id = $6 AND instance.state_generation < $7
               LIMIT
-                $10
+                1
             )
       RETURNING
         virtual_provisioning_resource.id,
@@ -73,11 +73,11 @@ WITH
         virtual_provisioning_collection
       SET
         time_modified = current_timestamp(),
-        cpus_provisioned = virtual_provisioning_collection.cpus_provisioned - $11,
-        ram_provisioned = virtual_provisioning_collection.ram_provisioned - $12
+        cpus_provisioned = virtual_provisioning_collection.cpus_provisioned - $8,
+        ram_provisioned = virtual_provisioning_collection.ram_provisioned - $9
       WHERE
         virtual_provisioning_collection.id = ANY (SELECT all_collections.id FROM all_collections)
-        AND (SELECT do_update.update FROM do_update LIMIT $13)
+        AND (SELECT do_update.update FROM do_update LIMIT 1)
       RETURNING
         virtual_provisioning_collection.id,
         virtual_provisioning_collection.time_modified,
diff --git a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_storage.sql b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_storage.sql
index 72c0b81e15..b607ac4185 100644
--- a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_storage.sql
+++ b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_storage.sql
@@ -37,9 +37,9 @@ WITH
           WHERE
             virtual_provisioning_resource.id = $4
           LIMIT
-            $5
+            1
         )
-        = $6
+        = 1
           AS update
     ),
   unused_cte_arm
@@ -47,7 +47,7 @@ WITH
       DELETE FROM
         virtual_provisioning_resource
       WHERE
-        virtual_provisioning_resource.id = $7
+        virtual_provisioning_resource.id = $5
       RETURNING
         virtual_provisioning_resource.id,
         virtual_provisioning_resource.time_modified,
@@ -63,10 +63,10 @@ WITH
       SET
         time_modified = current_timestamp(),
         virtual_disk_bytes_provisioned
-          = virtual_provisioning_collection.virtual_disk_bytes_provisioned - $8
+          = virtual_provisioning_collection.virtual_disk_bytes_provisioned - $6
       WHERE
         virtual_provisioning_collection.id = ANY (SELECT all_collections.id FROM all_collections)
-        AND (SELECT do_update.update FROM do_update LIMIT $9)
+        AND (SELECT do_update.update FROM do_update LIMIT 1)
       RETURNING
         virtual_provisioning_collection.id,
         virtual_provisioning_collection.time_modified,
diff --git a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_instance.sql b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_instance.sql
index 753b7f09f3..38f10a7148 100644
--- a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_instance.sql
+++ b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_instance.sql
@@ -39,17 +39,17 @@ WITH
               WHERE
                 virtual_provisioning_resource.id = $4
               LIMIT
-                $5
+                1
             )
-            = $6
+            = 0
             AND CAST(
                 IF(
                   (
-                    $7 = $8
-                    OR (SELECT quotas.cpus FROM quotas LIMIT $9)
+                    $5 = 0
+                    OR (SELECT quotas.cpus FROM quotas LIMIT 1)
                       >= (
-                          (SELECT silo_provisioned.cpus_provisioned FROM silo_provisioned LIMIT $10)
-                          + $11
+                          (SELECT silo_provisioned.cpus_provisioned FROM silo_provisioned LIMIT 1)
+                          + $6
                         )
                   ),
                   'TRUE',
@@ -61,11 +61,10 @@ WITH
           AND CAST(
               IF(
                 (
-                  $12 = $13
-                  OR (SELECT quotas.memory FROM quotas LIMIT $14)
+                  $7 = 0
+                  OR (SELECT quotas.memory FROM quotas LIMIT 1)
                     >= (
-                        (SELECT silo_provisioned.ram_provisioned FROM silo_provisioned LIMIT $15)
-                        + $16
+                        (SELECT silo_provisioned.ram_provisioned FROM silo_provisioned LIMIT 1) + $8
                       )
                 ),
                 'TRUE',
@@ -77,8 +76,8 @@ WITH
         AND CAST(
             IF(
               (
-                $17 = $18
-                OR (SELECT quotas.storage FROM quotas LIMIT $19)
+                $9 = 0
+                OR (SELECT quotas.storage FROM quotas LIMIT 1)
                   >= (
                       (
                         SELECT
@@ -86,9 +85,9 @@ WITH
                         FROM
                           silo_provisioned
                         LIMIT
-                          $20
+                          1
                       )
-                      + $21
+                      + $10
                     )
               ),
               'TRUE',
@@ -112,7 +111,7 @@ WITH
             ram_provisioned
           )
       VALUES
-        ($22, DEFAULT, $23, $24, $25, $26)
+        ($11, DEFAULT, $12, $13, $14, $15)
       ON CONFLICT
       DO
         NOTHING
@@ -130,11 +129,11 @@ WITH
         virtual_provisioning_collection
       SET
         time_modified = current_timestamp(),
-        cpus_provisioned = virtual_provisioning_collection.cpus_provisioned + $27,
-        ram_provisioned = virtual_provisioning_collection.ram_provisioned + $28
+        cpus_provisioned = virtual_provisioning_collection.cpus_provisioned + $16,
+        ram_provisioned = virtual_provisioning_collection.ram_provisioned + $17
       WHERE
         virtual_provisioning_collection.id = ANY (SELECT all_collections.id FROM all_collections)
-        AND (SELECT do_update.update FROM do_update LIMIT $29)
+        AND (SELECT do_update.update FROM do_update LIMIT 1)
       RETURNING
         virtual_provisioning_collection.id,
         virtual_provisioning_collection.time_modified,
diff --git a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_storage.sql b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_storage.sql
index 040a5dc20c..87cd227ed9 100644
--- a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_storage.sql
+++ b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_storage.sql
@@ -39,17 +39,17 @@ WITH
               WHERE
                 virtual_provisioning_resource.id = $4
               LIMIT
-                $5
+                1
             )
-            = $6
+            = 0
             AND CAST(
                 IF(
                   (
-                    $7 = $8
-                    OR (SELECT quotas.cpus FROM quotas LIMIT $9)
+                    $5 = 0
+                    OR (SELECT quotas.cpus FROM quotas LIMIT 1)
                       >= (
-                          (SELECT silo_provisioned.cpus_provisioned FROM silo_provisioned LIMIT $10)
-                          + $11
+                          (SELECT silo_provisioned.cpus_provisioned FROM silo_provisioned LIMIT 1)
+                          + $6
                         )
                   ),
                   'TRUE',
@@ -61,11 +61,10 @@ WITH
           AND CAST(
               IF(
                 (
-                  $12 = $13
-                  OR (SELECT quotas.memory FROM quotas LIMIT $14)
+                  $7 = 0
+                  OR (SELECT quotas.memory FROM quotas LIMIT 1)
                     >= (
-                        (SELECT silo_provisioned.ram_provisioned FROM silo_provisioned LIMIT $15)
-                        + $16
+                        (SELECT silo_provisioned.ram_provisioned FROM silo_provisioned LIMIT 1) + $8
                       )
                 ),
                 'TRUE',
@@ -77,8 +76,8 @@ WITH
         AND CAST(
             IF(
               (
-                $17 = $18
-                OR (SELECT quotas.storage FROM quotas LIMIT $19)
+                $9 = 0
+                OR (SELECT quotas.storage FROM quotas LIMIT 1)
                   >= (
                       (
                         SELECT
@@ -86,9 +85,9 @@ WITH
                         FROM
                           silo_provisioned
                         LIMIT
-                          $20
+                          1
                       )
-                      + $21
+                      + $10
                     )
               ),
               'TRUE',
@@ -112,7 +111,7 @@ WITH
             ram_provisioned
           )
       VALUES
-        ($22, DEFAULT, $23, $24, $25, $26)
+        ($11, DEFAULT, $12, $13, $14, $15)
       ON CONFLICT
       DO
         NOTHING
@@ -131,10 +130,10 @@ WITH
       SET
         time_modified = current_timestamp(),
         virtual_disk_bytes_provisioned
-          = virtual_provisioning_collection.virtual_disk_bytes_provisioned + $27
+          = virtual_provisioning_collection.virtual_disk_bytes_provisioned + $16
       WHERE
         virtual_provisioning_collection.id = ANY (SELECT all_collections.id FROM all_collections)
-        AND (SELECT do_update.update FROM do_update LIMIT $28)
+        AND (SELECT do_update.update FROM do_update LIMIT 1)
       RETURNING
         virtual_provisioning_collection.id,
         virtual_provisioning_collection.time_modified,

From 7633d1728931adc0545c99fa04925b3301de7eff Mon Sep 17 00:00:00 2001
From: James MacMahon <james@oxide.computer>
Date: Wed, 29 May 2024 17:26:17 -0400
Subject: [PATCH 14/28] [#3886 2/4] Region replacement omdb commands (#5820)

This commit adds some commands to omdb related to the new region
replacement logic:

    $ ./target/debug/omdb db region-replacement
Query for information about region replacements, optionally manually
triggering one

    Usage: omdb db region-replacement [OPTIONS] <COMMAND>

    Commands:
      list     List region replacement requests
      status   Show current region replacements and their status
      info     Show detailed information for a region replacement
      request  Manually request a region replacement
      help     Print this message or the help of the given subcommand(s)

`list` will list all region replacement requests, along with their
request time and state.

`status` will show a summary of all non-Complete region replacements,
along with their state and progress.

`info` will show a detailed view of a region replacement, starting with
the details that the `status` summary shows, then showing all related
notifications and steps taken to drive the replacement forward.

Finally, `request` will request that a region be replaced, and return
the ID of the replacement.
---
 Cargo.lock                                    |   1 +
 dev-tools/omdb/Cargo.toml                     |   1 +
 dev-tools/omdb/src/bin/omdb/db.rs             | 413 ++++++++++++++++++
 dev-tools/omdb/tests/usage_errors.out         |   4 +
 .../src/db/datastore/region_replacement.rs    |  18 +
 nexus/db-queries/src/db/datastore/volume.rs   |  67 +++
 6 files changed, 504 insertions(+)

diff --git a/Cargo.lock b/Cargo.lock
index 88e9afd8c9..7b8326fb8d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5553,6 +5553,7 @@ dependencies = [
  "gateway-messages",
  "gateway-test-utils",
  "humantime",
+ "indicatif",
  "internal-dns",
  "ipnetwork",
  "multimap",
diff --git a/dev-tools/omdb/Cargo.toml b/dev-tools/omdb/Cargo.toml
index 3c466b1683..9cdf03093c 100644
--- a/dev-tools/omdb/Cargo.toml
+++ b/dev-tools/omdb/Cargo.toml
@@ -55,6 +55,7 @@ uuid.workspace = true
 ipnetwork.workspace = true
 omicron-workspace-hack.workspace = true
 multimap.workspace = true
+indicatif.workspace = true
 
 [dev-dependencies]
 expectorate.workspace = true
diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs
index 549f289ad0..be4e1e8696 100644
--- a/dev-tools/omdb/src/bin/omdb/db.rs
+++ b/dev-tools/omdb/src/bin/omdb/db.rs
@@ -15,6 +15,7 @@
 // NOTE: emanates from Tabled macros
 #![allow(clippy::useless_vec)]
 
+use crate::check_allow_destructive::DestructiveOperationToken;
 use crate::helpers::CONNECTION_OPTIONS_HEADING;
 use crate::helpers::DATABASE_OPTIONS_HEADING;
 use crate::Omdb;
@@ -25,7 +26,9 @@ use async_bb8_diesel::AsyncConnection;
 use async_bb8_diesel::AsyncRunQueryDsl;
 use async_bb8_diesel::AsyncSimpleConnection;
 use camino::Utf8PathBuf;
+use chrono::DateTime;
 use chrono::SecondsFormat;
+use chrono::Utc;
 use clap::ArgAction;
 use clap::Args;
 use clap::Subcommand;
@@ -39,6 +42,9 @@ use diesel::NullableExpressionMethods;
 use diesel::OptionalExtension;
 use diesel::TextExpressionMethods;
 use gateway_client::types::SpType;
+use indicatif::ProgressBar;
+use indicatif::ProgressDrawTarget;
+use indicatif::ProgressStyle;
 use ipnetwork::IpNetwork;
 use nexus_config::PostgresConfigWithUrl;
 use nexus_db_model::Dataset;
@@ -59,12 +65,18 @@ use nexus_db_model::NetworkInterfaceKind;
 use nexus_db_model::Probe;
 use nexus_db_model::Project;
 use nexus_db_model::Region;
+use nexus_db_model::RegionReplacement;
+use nexus_db_model::RegionReplacementState;
+use nexus_db_model::RegionReplacementStep;
+use nexus_db_model::RegionReplacementStepType;
 use nexus_db_model::RegionSnapshot;
 use nexus_db_model::Sled;
 use nexus_db_model::Snapshot;
 use nexus_db_model::SnapshotState;
 use nexus_db_model::SwCaboose;
 use nexus_db_model::SwRotPage;
+use nexus_db_model::UpstairsRepairNotification;
+use nexus_db_model::UpstairsRepairProgress;
 use nexus_db_model::Vmm;
 use nexus_db_model::Volume;
 use nexus_db_model::VpcSubnet;
@@ -270,6 +282,9 @@ enum DbCommands {
     Inventory(InventoryArgs),
     /// Save the current Reconfigurator inputs to a file
     ReconfiguratorSave(ReconfiguratorSaveArgs),
+    /// Query for information about region replacements, optionally manually
+    /// triggering one.
+    RegionReplacement(RegionReplacementArgs),
     /// Print information about sleds
     Sleds(SledsArgs),
     /// Print information about customer instances
@@ -434,6 +449,47 @@ struct SledsArgs {
     filter: Option<SledFilter>,
 }
 
+#[derive(Debug, Args)]
+struct RegionReplacementArgs {
+    #[command(subcommand)]
+    command: RegionReplacementCommands,
+}
+
+#[derive(Debug, Subcommand)]
+enum RegionReplacementCommands {
+    /// List region replacement requests
+    List(RegionReplacementListArgs),
+    /// Show current region replacements and their status
+    Status,
+    /// Show detailed information for a region replacement
+    Info(RegionReplacementInfoArgs),
+    /// Manually request a region replacement
+    Request(RegionReplacementRequestArgs),
+}
+
+#[derive(Debug, Args)]
+struct RegionReplacementListArgs {
+    /// Only show region replacement requests in this state
+    #[clap(long)]
+    state: Option<RegionReplacementState>,
+
+    /// Only show region replacement requests after a certain date
+    #[clap(long)]
+    after: Option<DateTime<Utc>>,
+}
+
+#[derive(Debug, Args)]
+struct RegionReplacementInfoArgs {
+    /// The UUID of the region replacement request
+    replacement_id: Uuid,
+}
+
+#[derive(Debug, Args)]
+struct RegionReplacementRequestArgs {
+    /// The UUID of the region to replace
+    region_id: Uuid,
+}
+
 #[derive(Debug, Args)]
 struct NetworkArgs {
     #[command(subcommand)]
@@ -542,6 +598,40 @@ impl DbArgs {
                 )
                 .await
             }
+            DbCommands::RegionReplacement(RegionReplacementArgs {
+                command: RegionReplacementCommands::List(args),
+            }) => {
+                cmd_db_region_replacement_list(
+                    &datastore,
+                    &self.fetch_opts,
+                    args,
+                )
+                .await
+            }
+            DbCommands::RegionReplacement(RegionReplacementArgs {
+                command: RegionReplacementCommands::Status,
+            }) => {
+                cmd_db_region_replacement_status(
+                    &opctx,
+                    &datastore,
+                    &self.fetch_opts,
+                )
+                .await
+            }
+            DbCommands::RegionReplacement(RegionReplacementArgs {
+                command: RegionReplacementCommands::Info(args),
+            }) => {
+                cmd_db_region_replacement_info(&opctx, &datastore, args).await
+            }
+            DbCommands::RegionReplacement(RegionReplacementArgs {
+                command: RegionReplacementCommands::Request(args),
+            }) => {
+                let token = omdb.check_allow_destructive()?;
+                cmd_db_region_replacement_request(
+                    &opctx, &datastore, args, token,
+                )
+                .await
+            }
             DbCommands::Sleds(args) => {
                 cmd_db_sleds(&opctx, &datastore, &self.fetch_opts, args).await
             }
@@ -1426,6 +1516,329 @@ async fn cmd_db_snapshot_info(
     Ok(())
 }
 
+/// List all region replacement requests
+async fn cmd_db_region_replacement_list(
+    datastore: &DataStore,
+    fetch_opts: &DbFetchOptions,
+    args: &RegionReplacementListArgs,
+) -> Result<(), anyhow::Error> {
+    let ctx = || "listing region replacement requests".to_string();
+    let limit = fetch_opts.fetch_limit;
+
+    let requests: Vec<RegionReplacement> = {
+        let conn = datastore.pool_connection_for_tests().await?;
+
+        use db::schema::region_replacement::dsl;
+
+        match (args.state, args.after) {
+            (Some(state), Some(after)) => {
+                dsl::region_replacement
+                    .filter(dsl::replacement_state.eq(state))
+                    .filter(dsl::request_time.gt(after))
+                    .limit(i64::from(u32::from(limit)))
+                    .select(RegionReplacement::as_select())
+                    .get_results_async(&*conn)
+                    .await?
+            }
+
+            (Some(state), None) => {
+                dsl::region_replacement
+                    .filter(dsl::replacement_state.eq(state))
+                    .limit(i64::from(u32::from(limit)))
+                    .select(RegionReplacement::as_select())
+                    .get_results_async(&*conn)
+                    .await?
+            }
+
+            (None, Some(after)) => {
+                dsl::region_replacement
+                    .filter(dsl::request_time.gt(after))
+                    .limit(i64::from(u32::from(limit)))
+                    .select(RegionReplacement::as_select())
+                    .get_results_async(&*conn)
+                    .await?
+            }
+
+            (None, None) => {
+                dsl::region_replacement
+                    .limit(i64::from(u32::from(limit)))
+                    .select(RegionReplacement::as_select())
+                    .get_results_async(&*conn)
+                    .await?
+            }
+        }
+    };
+
+    check_limit(&requests, limit, ctx);
+
+    #[derive(Tabled)]
+    #[tabled(rename_all = "SCREAMING_SNAKE_CASE")]
+    struct Row {
+        pub id: Uuid,
+        pub request_time: DateTime<Utc>,
+        pub replacement_state: String,
+    }
+
+    let mut rows = Vec::with_capacity(requests.len());
+
+    for request in requests {
+        rows.push(Row {
+            id: request.id,
+            request_time: request.request_time,
+            replacement_state: format!("{:?}", request.replacement_state),
+        });
+    }
+
+    let table = tabled::Table::new(rows)
+        .with(tabled::settings::Style::empty())
+        .with(tabled::settings::Padding::new(0, 1, 0, 0))
+        .with(tabled::settings::Panel::header("Region replacement requests"))
+        .to_string();
+
+    println!("{}", table);
+
+    Ok(())
+}
+
+/// Display all non-complete region replacements
+async fn cmd_db_region_replacement_status(
+    opctx: &OpContext,
+    datastore: &DataStore,
+    fetch_opts: &DbFetchOptions,
+) -> Result<(), anyhow::Error> {
+    let ctx = || "listing region replacement requests".to_string();
+    let limit = fetch_opts.fetch_limit;
+
+    let requests: Vec<RegionReplacement> = {
+        let conn = datastore.pool_connection_for_tests().await?;
+
+        use db::schema::region_replacement::dsl;
+
+        dsl::region_replacement
+            .filter(dsl::replacement_state.ne(RegionReplacementState::Complete))
+            .limit(i64::from(u32::from(limit)))
+            .select(RegionReplacement::as_select())
+            .get_results_async(&*conn)
+            .await?
+    };
+
+    check_limit(&requests, limit, ctx);
+
+    for request in requests {
+        println!("{}:", request.id);
+        println!();
+
+        println!("      started: {}", request.request_time);
+        println!("        state: {:?}", request.replacement_state);
+        println!("old region id: {}", request.old_region_id);
+        println!("new region id: {:?}", request.new_region_id);
+        println!();
+
+        if let Some(new_region_id) = request.new_region_id {
+            // Find the most recent upstairs repair notification where the
+            // downstairs being repaired is a "new" region id. This will give us
+            // the most recent repair id.
+            let maybe_repair: Option<UpstairsRepairNotification> = datastore
+                .most_recent_started_repair_notification(opctx, new_region_id)
+                .await?;
+
+            if let Some(repair) = maybe_repair {
+                let maybe_repair_progress: Option<UpstairsRepairProgress> =
+                    datastore
+                        .most_recent_repair_progress(
+                            opctx,
+                            repair.repair_id.into(),
+                        )
+                        .await?;
+
+                if let Some(repair_progress) = maybe_repair_progress {
+                    let bar = ProgressBar::with_draw_target(
+                        Some(repair_progress.total_items as u64),
+                        ProgressDrawTarget::stdout(),
+                    )
+                    .with_style(ProgressStyle::with_template(
+                        "progress:\t{wide_bar:.green} [{pos:>7}/{len:>7}]",
+                    )?)
+                    .with_position(repair_progress.current_item as u64);
+
+                    bar.abandon();
+
+                    println!();
+                }
+            }
+        }
+
+        println!();
+    }
+
+    Ok(())
+}
+
+/// Show details for a single region replacement
+async fn cmd_db_region_replacement_info(
+    opctx: &OpContext,
+    datastore: &DataStore,
+    args: &RegionReplacementInfoArgs,
+) -> Result<(), anyhow::Error> {
+    let request = datastore
+        .get_region_replacement_request_by_id(opctx, args.replacement_id)
+        .await?;
+
+    // Show details
+    println!("      started: {}", request.request_time);
+    println!("        state: {:?}", request.replacement_state);
+    println!("old region id: {}", request.old_region_id);
+    println!("new region id: {:?}", request.new_region_id);
+    println!();
+
+    if let Some(new_region_id) = request.new_region_id {
+        // Find all related notifications
+        let notifications: Vec<UpstairsRepairNotification> = datastore
+            .repair_notifications_for_region(opctx, new_region_id)
+            .await?;
+
+        #[derive(Tabled)]
+        #[tabled(rename_all = "SCREAMING_SNAKE_CASE")]
+        struct Row {
+            pub time: DateTime<Utc>,
+
+            pub repair_id: String,
+            pub repair_type: String,
+
+            pub upstairs_id: String,
+            pub session_id: String,
+
+            pub notification_type: String,
+        }
+
+        let mut rows = Vec::with_capacity(notifications.len());
+
+        for notification in &notifications {
+            rows.push(Row {
+                time: notification.time,
+                repair_id: notification.repair_id.to_string(),
+                repair_type: format!("{:?}", notification.repair_type),
+                upstairs_id: notification.upstairs_id.to_string(),
+                session_id: notification.session_id.to_string(),
+                notification_type: format!(
+                    "{:?}",
+                    notification.notification_type
+                ),
+            });
+        }
+
+        let table = tabled::Table::new(rows)
+            .with(tabled::settings::Style::empty())
+            .with(tabled::settings::Padding::new(0, 1, 0, 0))
+            .with(tabled::settings::Panel::header("Repair notifications"))
+            .to_string();
+
+        println!("{}", table);
+
+        println!();
+
+        // Use the most recent notification to get the most recent repair ID,
+        // and use that to search for progress.
+
+        let maybe_repair: Option<UpstairsRepairNotification> = datastore
+            .most_recent_started_repair_notification(opctx, new_region_id)
+            .await?;
+
+        if let Some(repair) = maybe_repair {
+            let maybe_repair_progress: Option<UpstairsRepairProgress> =
+                datastore
+                    .most_recent_repair_progress(opctx, repair.repair_id.into())
+                    .await?;
+
+            if let Some(repair_progress) = maybe_repair_progress {
+                let bar = ProgressBar::with_draw_target(
+                    Some(repair_progress.total_items as u64),
+                    ProgressDrawTarget::stdout(),
+                )
+                .with_style(ProgressStyle::with_template(
+                    "progress:\t{wide_bar:.green} [{pos:>7}/{len:>7}]",
+                )?)
+                .with_position(repair_progress.current_item as u64);
+
+                bar.abandon();
+
+                println!();
+            }
+        }
+
+        // Find the steps that the driver saga has committed to the DB.
+
+        let steps: Vec<RegionReplacementStep> = datastore
+            .region_replacement_request_steps(opctx, args.replacement_id)
+            .await?;
+
+        #[derive(Tabled)]
+        #[tabled(rename_all = "SCREAMING_SNAKE_CASE")]
+        struct StepRow {
+            pub time: DateTime<Utc>,
+            pub step_type: String,
+            pub details: String,
+        }
+
+        let mut rows = Vec::with_capacity(steps.len());
+
+        for step in steps {
+            rows.push(StepRow {
+                time: step.step_time,
+                step_type: format!("{:?}", step.step_type),
+                details: match step.step_type {
+                    RegionReplacementStepType::Propolis => {
+                        format!(
+                            "instance {:?} vmm {:?}",
+                            step.step_associated_instance_id,
+                            step.step_associated_vmm_id,
+                        )
+                    }
+
+                    RegionReplacementStepType::Pantry => {
+                        format!(
+                            "address {:?}:{:?} job {:?}",
+                            step.step_associated_pantry_ip,
+                            step.step_associated_pantry_port,
+                            step.step_associated_pantry_job_id,
+                        )
+                    }
+                },
+            });
+        }
+
+        println!();
+
+        let table = tabled::Table::new(rows)
+            .with(tabled::settings::Style::empty())
+            .with(tabled::settings::Padding::new(0, 1, 0, 0))
+            .with(tabled::settings::Panel::header("Repair steps"))
+            .to_string();
+
+        println!("{}", table);
+    }
+
+    Ok(())
+}
+
+/// Manually request a region replacement
+async fn cmd_db_region_replacement_request(
+    opctx: &OpContext,
+    datastore: &DataStore,
+    args: &RegionReplacementRequestArgs,
+    _destruction_token: DestructiveOperationToken,
+) -> Result<(), anyhow::Error> {
+    let region = datastore.get_region(args.region_id).await?;
+
+    let request_id = datastore
+        .create_region_replacement_request_for_region(opctx, &region)
+        .await?;
+
+    println!("region replacement {request_id} created");
+
+    Ok(())
+}
+
 // SLEDS
 
 #[derive(Tabled)]
diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out
index 15fc9d322e..563d23d6f3 100644
--- a/dev-tools/omdb/tests/usage_errors.out
+++ b/dev-tools/omdb/tests/usage_errors.out
@@ -107,6 +107,8 @@ Commands:
   dns                  Print information about internal and external DNS
   inventory            Print information about collected hardware/software inventory
   reconfigurator-save  Save the current Reconfigurator inputs to a file
+  region-replacement   Query for information about region replacements, optionally manually
+                           triggering one
   sleds                Print information about sleds
   instances            Print information about customer instances
   network              Print information about the network
@@ -145,6 +147,8 @@ Commands:
   dns                  Print information about internal and external DNS
   inventory            Print information about collected hardware/software inventory
   reconfigurator-save  Save the current Reconfigurator inputs to a file
+  region-replacement   Query for information about region replacements, optionally manually
+                           triggering one
   sleds                Print information about sleds
   instances            Print information about customer instances
   network              Print information about the network
diff --git a/nexus/db-queries/src/db/datastore/region_replacement.rs b/nexus/db-queries/src/db/datastore/region_replacement.rs
index d12d123e7e..56e73d2b2c 100644
--- a/nexus/db-queries/src/db/datastore/region_replacement.rs
+++ b/nexus/db-queries/src/db/datastore/region_replacement.rs
@@ -522,6 +522,24 @@ impl DataStore {
             .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
     }
 
+    /// Return all steps for a region replacement request
+    pub async fn region_replacement_request_steps(
+        &self,
+        opctx: &OpContext,
+        id: Uuid,
+    ) -> Result<Vec<RegionReplacementStep>, Error> {
+        use db::schema::region_replacement_step::dsl;
+
+        dsl::region_replacement_step
+            .filter(dsl::replacement_id.eq(id))
+            .order_by(dsl::step_time.desc())
+            .get_results_async::<RegionReplacementStep>(
+                &*self.pool_connection_authorized(opctx).await?,
+            )
+            .await
+            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
+    }
+
     /// Record a step taken to drive a region replacement forward
     pub async fn add_region_replacement_request_step(
         &self,
diff --git a/nexus/db-queries/src/db/datastore/volume.rs b/nexus/db-queries/src/db/datastore/volume.rs
index 294cd2decf..4e929b9b4b 100644
--- a/nexus/db-queries/src/db/datastore/volume.rs
+++ b/nexus/db-queries/src/db/datastore/volume.rs
@@ -1407,6 +1407,73 @@ impl DataStore {
 
         Ok(())
     }
+
+    /// For a downstairs being repaired, find the most recent repair
+    /// notification
+    pub async fn most_recent_started_repair_notification(
+        &self,
+        opctx: &OpContext,
+        region_id: Uuid,
+    ) -> Result<Option<UpstairsRepairNotification>, Error> {
+        let conn = self.pool_connection_authorized(opctx).await?;
+
+        use db::schema::upstairs_repair_notification::dsl;
+
+        dsl::upstairs_repair_notification
+            .filter(dsl::region_id.eq(region_id))
+            .filter(
+                dsl::notification_type
+                    .eq(UpstairsRepairNotificationType::Started),
+            )
+            .order_by(dsl::time.desc())
+            .limit(1)
+            .first_async(&*conn)
+            .await
+            .optional()
+            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
+    }
+
+    /// For a downstairs being repaired, return all related repair notifications
+    /// in order of notification time.
+    pub async fn repair_notifications_for_region(
+        &self,
+        opctx: &OpContext,
+        region_id: Uuid,
+    ) -> Result<Vec<UpstairsRepairNotification>, Error> {
+        let conn = self.pool_connection_authorized(opctx).await?;
+
+        use db::schema::upstairs_repair_notification::dsl;
+
+        dsl::upstairs_repair_notification
+            .filter(dsl::region_id.eq(region_id))
+            .order_by(dsl::time.asc())
+            .select(UpstairsRepairNotification::as_select())
+            .get_results_async(&*conn)
+            .await
+            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
+    }
+
+    /// For a repair ID, find the most recent progress notification
+    pub async fn most_recent_repair_progress(
+        &self,
+        opctx: &OpContext,
+        repair_id: TypedUuid<UpstairsRepairKind>,
+    ) -> Result<Option<UpstairsRepairProgress>, Error> {
+        let conn = self.pool_connection_authorized(opctx).await?;
+
+        use db::schema::upstairs_repair_progress::dsl;
+
+        dsl::upstairs_repair_progress
+            .filter(
+                dsl::repair_id.eq(nexus_db_model::to_db_typed_uuid(repair_id)),
+            )
+            .order_by(dsl::time.desc())
+            .limit(1)
+            .first_async(&*conn)
+            .await
+            .optional()
+            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
+    }
 }
 
 #[derive(Default, Clone, Debug, Serialize, Deserialize)]

From acbeb27672e24cff2540bfd5c203deadd97a84bd Mon Sep 17 00:00:00 2001
From: Sean Klein <sean@oxide.computer>
Date: Wed, 29 May 2024 18:38:49 -0700
Subject: [PATCH 15/28] [nexus] Add virtual provisioning idempotency tests,
 prevent underflow (#5830)

Builds on https://github.com/oxidecomputer/omicron/pull/5081 and
https://github.com/oxidecomputer/omicron/pull/5089 , but more out of
convenience than necessity.

# Summary

This PR attempts to validate that, for the "virtual provisioning
collection {insert, delete}" operations, they are idempotent. Currently,
our usage of `max_instance_gen` only **partially** prevents updates
during instance provisioning deletions:
- If `max_instance_gen` is smaller than the observed instance generation
number...
- ... we avoid deleting the `virtual_provisioning_resource` record
(which is great)
- ... but we still decrement the `virtual_provisioning_collection`
values (which is really not great).

This basically means that we can "only cause the project/silo/fleet
usage values to decrement arbitrarily, with no other changes". This has
been, mechanically, the root cause of our observed underflows (e.g,
https://github.com/oxidecomputer/omicron/issues/5525).

# Details of this change

- All the changes in
`nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs`
are tests validating idempotency of these operations.
- All the changes in
`nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs`
are actually changes to the query which change functionality. The
objective of these changes is to preserve idempotency of the newly added
tests, and to prevent undercounting of virtual provisioning resources.
If these changes are reverted, the newly added tests start failing,
showing a lack of coverage.
---
 .../virtual_provisioning_collection.rs        | 505 ++++++++++++++++++
 .../virtual_provisioning_collection_update.rs |  99 ++--
 ...ning_collection_update_delete_instance.sql |  16 +-
 ...oning_collection_update_delete_storage.sql |   2 +-
 4 files changed, 556 insertions(+), 66 deletions(-)

diff --git a/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs b/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs
index 348d277ddf..3630231b63 100644
--- a/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs
+++ b/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs
@@ -325,3 +325,508 @@ impl DataStore {
         Ok(())
     }
 }
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    use crate::db::datastore::test_utils::datastore_test;
+    use crate::db::fixed_data;
+    use crate::db::lookup::LookupPath;
+    use nexus_db_model::Instance;
+    use nexus_db_model::Project;
+    use nexus_db_model::SiloQuotasUpdate;
+    use nexus_test_utils::db::test_setup_database;
+    use nexus_types::external_api::params;
+    use omicron_common::api::external::IdentityMetadataCreateParams;
+    use omicron_test_utils::dev;
+    use uuid::Uuid;
+
+    async fn verify_collection_usage(
+        datastore: &DataStore,
+        opctx: &OpContext,
+        id: Uuid,
+        expected_cpus: i64,
+        expected_memory: i64,
+        expected_storage: i64,
+    ) {
+        let collection = datastore
+            .virtual_provisioning_collection_get(opctx, id)
+            .await
+            .expect("Could not lookup collection");
+
+        assert_eq!(collection.cpus_provisioned, expected_cpus);
+        assert_eq!(
+            collection.ram_provisioned.0.to_bytes(),
+            expected_memory as u64
+        );
+        assert_eq!(
+            collection.virtual_disk_bytes_provisioned.0.to_bytes(),
+            expected_storage as u64
+        );
+    }
+
+    struct TestData {
+        project_id: Uuid,
+        silo_id: Uuid,
+        fleet_id: Uuid,
+        authz_project: crate::authz::Project,
+    }
+
+    impl TestData {
+        fn ids(&self) -> [Uuid; 3] {
+            [self.project_id, self.silo_id, self.fleet_id]
+        }
+    }
+
+    // Use the default fleet and silo, but create a new project.
+    async fn setup_collections(
+        datastore: &DataStore,
+        opctx: &OpContext,
+    ) -> TestData {
+        let fleet_id = *fixed_data::FLEET_ID;
+        let silo_id = *fixed_data::silo::DEFAULT_SILO_ID;
+        let project_id = Uuid::new_v4();
+
+        let (authz_project, _project) = datastore
+            .project_create(
+                &opctx,
+                Project::new_with_id(
+                    project_id,
+                    silo_id,
+                    params::ProjectCreate {
+                        identity: IdentityMetadataCreateParams {
+                            name: "myproject".parse().unwrap(),
+                            description: "It's a project".into(),
+                        },
+                    },
+                ),
+            )
+            .await
+            .unwrap();
+
+        // Ensure the silo has a quota that can fit our requested instance.
+        //
+        // This also acts as a guard against a change in the default silo quota
+        // -- we overwrite it for the test unconditionally.
+
+        let quotas_update = SiloQuotasUpdate {
+            cpus: Some(24),
+            memory: Some(1 << 40),
+            storage: Some(1 << 50),
+            time_modified: chrono::Utc::now(),
+        };
+        let authz_silo = LookupPath::new(&opctx, &datastore)
+            .silo_id(silo_id)
+            .lookup_for(crate::authz::Action::Modify)
+            .await
+            .unwrap()
+            .0;
+        datastore
+            .silo_update_quota(&opctx, &authz_silo, quotas_update)
+            .await
+            .unwrap();
+
+        TestData { fleet_id, silo_id, project_id, authz_project }
+    }
+
+    async fn create_instance_record(
+        datastore: &DataStore,
+        opctx: &OpContext,
+        authz_project: &crate::authz::Project,
+        instance_id: Uuid,
+        project_id: Uuid,
+        cpus: i64,
+        memory: ByteCount,
+    ) {
+        datastore
+            .project_create_instance(
+                &opctx,
+                &authz_project,
+                Instance::new(
+                    instance_id,
+                    project_id,
+                    &params::InstanceCreate {
+                        identity: IdentityMetadataCreateParams {
+                            name: "myinstance".parse().unwrap(),
+                            description: "It's an instance".into(),
+                        },
+                        ncpus: cpus.try_into().unwrap(),
+                        memory: memory.try_into().unwrap(),
+                        hostname: "myhostname".try_into().unwrap(),
+                        user_data: Vec::new(),
+                        network_interfaces:
+                            params::InstanceNetworkInterfaceAttachment::None,
+                        external_ips: Vec::new(),
+                        disks: Vec::new(),
+                        ssh_public_keys: None,
+                        start: false,
+                    },
+                ),
+            )
+            .await
+            .unwrap();
+    }
+
+    #[tokio::test]
+    async fn test_instance_create_and_delete() {
+        let logctx = dev::test_setup_log("test_instance_create_and_delete");
+        let mut db = test_setup_database(&logctx.log).await;
+        let (opctx, datastore) = datastore_test(&logctx, &db).await;
+
+        let test_data = setup_collections(&datastore, &opctx).await;
+        let ids = test_data.ids();
+        let project_id = test_data.project_id;
+        let authz_project = test_data.authz_project;
+
+        // Actually provision the instance
+
+        let instance_id = Uuid::new_v4();
+        let cpus = 12;
+        let ram = ByteCount::try_from(1 << 30).unwrap();
+
+        for id in ids {
+            verify_collection_usage(&datastore, &opctx, id, 0, 0, 0).await;
+        }
+
+        create_instance_record(
+            &datastore,
+            &opctx,
+            &authz_project,
+            instance_id,
+            project_id,
+            cpus,
+            ram,
+        )
+        .await;
+
+        datastore
+            .virtual_provisioning_collection_insert_instance(
+                &opctx,
+                instance_id,
+                project_id,
+                cpus,
+                ram,
+            )
+            .await
+            .unwrap();
+
+        for id in ids {
+            verify_collection_usage(&datastore, &opctx, id, 12, 1 << 30, 0)
+                .await;
+        }
+
+        // Delete the instance
+
+        // Make this value outrageously high, so that as a "max" it is ignored.
+        let max_instance_gen: i64 = 1000;
+        datastore
+            .virtual_provisioning_collection_delete_instance(
+                &opctx,
+                instance_id,
+                project_id,
+                cpus,
+                ram,
+                max_instance_gen,
+            )
+            .await
+            .unwrap();
+
+        for id in ids {
+            verify_collection_usage(&datastore, &opctx, id, 0, 0, 0).await;
+        }
+
+        db.cleanup().await.unwrap();
+        logctx.cleanup_successful();
+    }
+
+    #[tokio::test]
+    async fn test_instance_create_and_delete_twice() {
+        let logctx =
+            dev::test_setup_log("test_instance_create_and_delete_twice");
+        let mut db = test_setup_database(&logctx.log).await;
+        let (opctx, datastore) = datastore_test(&logctx, &db).await;
+
+        let test_data = setup_collections(&datastore, &opctx).await;
+        let ids = test_data.ids();
+        let project_id = test_data.project_id;
+        let authz_project = test_data.authz_project;
+
+        // Actually provision the instance
+
+        let instance_id = Uuid::new_v4();
+        let cpus = 12;
+        let ram = ByteCount::try_from(1 << 30).unwrap();
+
+        for id in ids {
+            verify_collection_usage(&datastore, &opctx, id, 0, 0, 0).await;
+        }
+
+        create_instance_record(
+            &datastore,
+            &opctx,
+            &authz_project,
+            instance_id,
+            project_id,
+            cpus,
+            ram,
+        )
+        .await;
+
+        datastore
+            .virtual_provisioning_collection_insert_instance(
+                &opctx,
+                instance_id,
+                project_id,
+                cpus,
+                ram,
+            )
+            .await
+            .unwrap();
+
+        for id in ids {
+            verify_collection_usage(&datastore, &opctx, id, 12, 1 << 30, 0)
+                .await;
+        }
+
+        // Attempt to provision that same instance once more.
+        //
+        // The "virtual_provisioning_collection_insert" call should succeed for
+        // idempotency reasons, but we should not be double-dipping on the
+        // instance object's provisioning accounting.
+
+        datastore
+            .virtual_provisioning_collection_insert_instance(
+                &opctx,
+                instance_id,
+                project_id,
+                cpus,
+                ram,
+            )
+            .await
+            .unwrap();
+
+        // Verify that the usage is the same as before the call
+        for id in ids {
+            verify_collection_usage(&datastore, &opctx, id, 12, 1 << 30, 0)
+                .await;
+        }
+
+        // Delete the instance
+
+        // If the "instance gen" is too low, the delete operation should be
+        // dropped. This mimics circumstances where an instance update arrives
+        // late to the query.
+        let max_instance_gen = 0;
+        datastore
+            .virtual_provisioning_collection_delete_instance(
+                &opctx,
+                instance_id,
+                project_id,
+                cpus,
+                ram,
+                max_instance_gen,
+            )
+            .await
+            .unwrap();
+        for id in ids {
+            verify_collection_usage(&datastore, &opctx, id, 12, 1 << 30, 0)
+                .await;
+        }
+
+        // Make this value outrageously high, so that as a "max" it is ignored.
+        let max_instance_gen = 1000;
+        datastore
+            .virtual_provisioning_collection_delete_instance(
+                &opctx,
+                instance_id,
+                project_id,
+                cpus,
+                ram,
+                max_instance_gen,
+            )
+            .await
+            .unwrap();
+
+        for id in ids {
+            verify_collection_usage(&datastore, &opctx, id, 0, 0, 0).await;
+        }
+
+        // Attempt to delete the same instance once more.
+        //
+        // Just like "double-adding", double deletion should be an idempotent
+        // operation.
+
+        datastore
+            .virtual_provisioning_collection_delete_instance(
+                &opctx,
+                instance_id,
+                project_id,
+                cpus,
+                ram,
+                max_instance_gen,
+            )
+            .await
+            .unwrap();
+
+        for id in ids {
+            verify_collection_usage(&datastore, &opctx, id, 0, 0, 0).await;
+        }
+
+        db.cleanup().await.unwrap();
+        logctx.cleanup_successful();
+    }
+
+    #[tokio::test]
+    async fn test_storage_create_and_delete() {
+        let logctx = dev::test_setup_log("test_storage_create_and_delete");
+        let mut db = test_setup_database(&logctx.log).await;
+        let (opctx, datastore) = datastore_test(&logctx, &db).await;
+
+        let test_data = setup_collections(&datastore, &opctx).await;
+        let ids = test_data.ids();
+        let project_id = test_data.project_id;
+
+        // Actually provision storage
+
+        let disk_id = Uuid::new_v4();
+        let disk_byte_diff = ByteCount::try_from(1 << 30).unwrap();
+
+        for id in ids {
+            verify_collection_usage(&datastore, &opctx, id, 0, 0, 0).await;
+        }
+
+        datastore
+            .virtual_provisioning_collection_insert_storage(
+                &opctx,
+                disk_id,
+                project_id,
+                disk_byte_diff,
+                StorageType::Disk,
+            )
+            .await
+            .unwrap();
+
+        for id in ids {
+            verify_collection_usage(&datastore, &opctx, id, 0, 0, 1 << 30)
+                .await;
+        }
+
+        // Delete the disk
+
+        datastore
+            .virtual_provisioning_collection_delete_storage(
+                &opctx,
+                disk_id,
+                project_id,
+                disk_byte_diff,
+            )
+            .await
+            .unwrap();
+
+        for id in ids {
+            verify_collection_usage(&datastore, &opctx, id, 0, 0, 0).await;
+        }
+
+        db.cleanup().await.unwrap();
+        logctx.cleanup_successful();
+    }
+
+    #[tokio::test]
+    async fn test_storage_create_and_delete_twice() {
+        let logctx =
+            dev::test_setup_log("test_storage_create_and_delete_twice");
+        let mut db = test_setup_database(&logctx.log).await;
+        let (opctx, datastore) = datastore_test(&logctx, &db).await;
+
+        let test_data = setup_collections(&datastore, &opctx).await;
+        let ids = test_data.ids();
+        let project_id = test_data.project_id;
+
+        // Actually provision the disk
+
+        let disk_id = Uuid::new_v4();
+        let disk_byte_diff = ByteCount::try_from(1 << 30).unwrap();
+
+        for id in ids {
+            verify_collection_usage(&datastore, &opctx, id, 0, 0, 0).await;
+        }
+
+        datastore
+            .virtual_provisioning_collection_insert_storage(
+                &opctx,
+                disk_id,
+                project_id,
+                disk_byte_diff,
+                StorageType::Disk,
+            )
+            .await
+            .unwrap();
+
+        for id in ids {
+            verify_collection_usage(&datastore, &opctx, id, 0, 0, 1 << 30)
+                .await;
+        }
+
+        // Attempt to provision that same disk once more.
+        //
+        // The "virtual_provisioning_collection_insert" call should succeed for
+        // idempotency reasons, but we should not be double-dipping on the
+        // disk object's provisioning accounting.
+
+        datastore
+            .virtual_provisioning_collection_insert_storage(
+                &opctx,
+                disk_id,
+                project_id,
+                disk_byte_diff,
+                StorageType::Disk,
+            )
+            .await
+            .unwrap();
+
+        // Verify that the usage is the same as before the call
+        for id in ids {
+            verify_collection_usage(&datastore, &opctx, id, 0, 0, 1 << 30)
+                .await;
+        }
+
+        // Delete the disk
+
+        datastore
+            .virtual_provisioning_collection_delete_storage(
+                &opctx,
+                disk_id,
+                project_id,
+                disk_byte_diff,
+            )
+            .await
+            .unwrap();
+
+        for id in ids {
+            verify_collection_usage(&datastore, &opctx, id, 0, 0, 0).await;
+        }
+
+        // Attempt to delete the same disk once more.
+        //
+        // Just like "double-adding", double deletion should be an idempotent
+        // operation.
+
+        datastore
+            .virtual_provisioning_collection_delete_storage(
+                &opctx,
+                disk_id,
+                project_id,
+                disk_byte_diff,
+            )
+            .await
+            .unwrap();
+
+        for id in ids {
+            verify_collection_usage(&datastore, &opctx, id, 0, 0, 0).await;
+        }
+
+        db.cleanup().await.unwrap();
+        logctx.cleanup_successful();
+    }
+}
diff --git a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs
index 156691866e..895fee2092 100644
--- a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs
+++ b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs
@@ -225,7 +225,7 @@ WITH
                 .bind::<sql_types::BigInt, _>(resource.virtual_disk_bytes_provisioned)
                 .bind::<sql_types::BigInt, _>(resource.virtual_disk_bytes_provisioned)
             },
-            UpdateKind::DeleteInstance { id, .. } | UpdateKind::DeleteStorage { id, .. } => {
+            UpdateKind::DeleteStorage { id, .. } => {
                 query.sql("
   do_update
     AS (
@@ -239,11 +239,52 @@ WITH
             virtual_provisioning_resource.id = ").param().sql("
           LIMIT
             1
+        ) = 1
+          AS update
+    ),")
+                .bind::<sql_types::Uuid, _>(id)
+            },
+            UpdateKind::DeleteInstance { id, max_instance_gen, .. } => {
+                // The filter condition here ensures that the provisioning record is
+                // only deleted if the corresponding instance has a generation
+                // number less than the supplied `max_instance_gen`. This allows a
+                // caller that is about to apply an instance update that will stop
+                // the instance and that bears generation G to avoid deleting
+                // resources if the instance generation was already advanced to or
+                // past G.
+                //
+                // If the relevant instance ID is not in the database, then some
+                // other operation must have ensured the instance was previously
+                // stopped (because that's the only way it could have been deleted),
+                // and that operation should have cleaned up the resources already,
+                // in which case there's nothing to do here.
+                query.sql("
+  do_update
+    AS (
+      SELECT
+        (
+          SELECT
+            count(*)
+          FROM
+            virtual_provisioning_resource
+          WHERE
+            virtual_provisioning_resource.id = ").param().sql("
+          LIMIT
+            1
+        ) = 1 AND
+        EXISTS (
+          SELECT 1
+          FROM
+            instance
+          WHERE
+            instance.id = ").param().sql(" AND instance.state_generation < ").param().sql("
+          LIMIT 1
         )
-        = 1
           AS update
     ),")
                 .bind::<sql_types::Uuid, _>(id)
+                .bind::<sql_types::Uuid, _>(id)
+                .bind::<sql_types::BigInt, _>(max_instance_gen)
             },
         };
 
@@ -295,57 +336,8 @@ WITH
                 )
                 .bind::<sql_types::BigInt, _>(resource.cpus_provisioned)
                 .bind::<sql_types::BigInt, _>(resource.ram_provisioned),
-            UpdateKind::DeleteInstance { id, max_instance_gen, .. } => {
-                // The filter condition here ensures that the provisioning record is
-                // only deleted if the corresponding instance has a generation
-                // number less than the supplied `max_instance_gen`. This allows a
-                // caller that is about to apply an instance update that will stop
-                // the instance and that bears generation G to avoid deleting
-                // resources if the instance generation was already advanced to or
-                // past G.
-                //
-                // If the relevant instance ID is not in the database, then some
-                // other operation must have ensured the instance was previously
-                // stopped (because that's the only way it could have been deleted),
-                // and that operation should have cleaned up the resources already,
-                // in which case there's nothing to do here.
-                query
-                    .sql(
-                        "
-  unused_cte_arm
-    AS (
-      DELETE FROM
-        virtual_provisioning_resource
-      WHERE
-        virtual_provisioning_resource.id = ",
-                    )
-                    .param()
-                    .sql(
-                        "
-        AND
-        virtual_provisioning_resource.id = (
-            SELECT instance.id FROM instance WHERE
-                instance.id = ",
-                    )
-                    .param()
-                    .sql(
-                        " AND
-                instance.state_generation < ",
-                    )
-                    .param()
-                    .sql(
-                        " LIMIT 1)
-      RETURNING ",
-                    )
-                    .sql(AllColumnsOfVirtualResource::with_prefix(
-                        "virtual_provisioning_resource",
-                    ))
-                    .sql("),")
-                    .bind::<sql_types::Uuid, _>(id)
-                    .bind::<sql_types::Uuid, _>(id)
-                    .bind::<sql_types::BigInt, _>(max_instance_gen)
-            }
-            UpdateKind::DeleteStorage { id, .. } => query
+            UpdateKind::DeleteInstance { id, .. }
+            | UpdateKind::DeleteStorage { id, .. } => query
                 .sql(
                     "
   unused_cte_arm
@@ -358,6 +350,7 @@ WITH
                 .param()
                 .sql(
                     "
+        AND (SELECT do_update.update FROM do_update LIMIT 1)
       RETURNING ",
                 )
                 .sql(AllColumnsOfVirtualResource::with_prefix(
diff --git a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_instance.sql b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_instance.sql
index 48094a8371..3c97b7efc7 100644
--- a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_instance.sql
+++ b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_instance.sql
@@ -40,6 +40,9 @@ WITH
             1
         )
         = 1
+        AND EXISTS(
+            SELECT 1 FROM instance WHERE instance.id = $5 AND instance.state_generation < $6 LIMIT 1
+          )
           AS update
     ),
   unused_cte_arm
@@ -47,18 +50,7 @@ WITH
       DELETE FROM
         virtual_provisioning_resource
       WHERE
-        virtual_provisioning_resource.id = $5
-        AND virtual_provisioning_resource.id
-          = (
-              SELECT
-                instance.id
-              FROM
-                instance
-              WHERE
-                instance.id = $6 AND instance.state_generation < $7
-              LIMIT
-                1
-            )
+        virtual_provisioning_resource.id = $7 AND (SELECT do_update.update FROM do_update LIMIT 1)
       RETURNING
         virtual_provisioning_resource.id,
         virtual_provisioning_resource.time_modified,
diff --git a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_storage.sql b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_storage.sql
index b607ac4185..b372a62003 100644
--- a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_storage.sql
+++ b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_storage.sql
@@ -47,7 +47,7 @@ WITH
       DELETE FROM
         virtual_provisioning_resource
       WHERE
-        virtual_provisioning_resource.id = $5
+        virtual_provisioning_resource.id = $5 AND (SELECT do_update.update FROM do_update LIMIT 1)
       RETURNING
         virtual_provisioning_resource.id,
         virtual_provisioning_resource.time_modified,

From b07382fd933b67e3e5e121c8b8094170da637be4 Mon Sep 17 00:00:00 2001
From: James MacMahon <james@oxide.computer>
Date: Thu, 30 May 2024 11:25:13 -0400
Subject: [PATCH 16/28] Do not retry indefinitely if service is gone (#5789)

If there's a call to an external service, saga execution cannot move
forward until the result of that call is known, in the sense that Nexus
received a result. If there are transient problems, Nexus must retry
until a known result is returned.

This is problematic when the destination service is gone - Nexus will
retry indefinitely, halting the saga execution. Worse, in the case of
sagas calling the volume delete subsaga, subsequent calls that also call
volume delete will also halt.

With the introduction of a physical disk policy, Nexus can know when to
stop retrying a call - the destination service is gone, so the known
result is an error.

This commit adds a `ProgenitorOperationRetry` object that takes an
operation to retry plus a "gone" check, and checks each retry iteration
if the destination is gone. If it is, then bail out, otherwise assume
that any errors seen are transient.

Further work is required to deprecate the `retry_until_known_result`
function, as retrying indefinitely is a bad pattern.

Fixes #4331
Fixes #5022

---------

Co-authored-by: Eliza Weisman <eliza@elizas.website>
---
 common/src/api/external/error.rs             |  17 +-
 common/src/lib.rs                            |  86 +-
 common/src/progenitor_operation_retry.rs     | 182 ++++
 nexus/db-queries/src/db/datastore/dataset.rs |  50 +
 nexus/src/app/crucible.rs                    | 962 +++++++++++++++++++
 nexus/src/app/mod.rs                         |   1 +
 nexus/src/app/sagas/common_storage.rs        | 728 +-------------
 nexus/src/app/sagas/disk_create.rs           |  37 +-
 nexus/src/app/sagas/snapshot_create.rs       |  71 +-
 nexus/src/app/sagas/volume_delete.rs         |  36 +-
 nexus/src/app/session.rs                     |   5 +-
 nexus/tests/integration_tests/disks.rs       |  84 ++
 sled-agent/src/sim/sled_agent.rs             |   4 +
 sled-agent/src/sim/storage.rs                |  12 +
 14 files changed, 1411 insertions(+), 864 deletions(-)
 create mode 100644 common/src/progenitor_operation_retry.rs
 create mode 100644 nexus/src/app/crucible.rs

diff --git a/common/src/api/external/error.rs b/common/src/api/external/error.rs
index 6b3b93187f..10731c61c3 100644
--- a/common/src/api/external/error.rs
+++ b/common/src/api/external/error.rs
@@ -85,6 +85,11 @@ pub enum Error {
     /// ObjectNotFound instead.
     #[error("Not found: {}", .message.display_internal())]
     NotFound { message: MessagePair },
+
+    /// Access to the target resource is no longer available, and this condition
+    /// is likely to be permanent.
+    #[error("Gone")]
+    Gone,
 }
 
 /// Represents an error message which has an external component, along with
@@ -214,7 +219,8 @@ impl Error {
             | Error::InternalError { .. }
             | Error::TypeVersionMismatch { .. }
             | Error::NotFound { .. }
-            | Error::Conflict { .. } => false,
+            | Error::Conflict { .. }
+            | Error::Gone => false,
         }
     }
 
@@ -335,7 +341,8 @@ impl Error {
         match self {
             Error::ObjectNotFound { .. }
             | Error::ObjectAlreadyExists { .. }
-            | Error::Forbidden => self,
+            | Error::Forbidden
+            | Error::Gone => self,
             Error::InvalidRequest { message } => Error::InvalidRequest {
                 message: message.with_internal_context(context),
             },
@@ -513,6 +520,12 @@ impl From<Error> for HttpError {
                     internal_message,
                 }
             }
+
+            Error::Gone => HttpError::for_client_error(
+                Some(String::from("Gone")),
+                http::StatusCode::GONE,
+                String::from("Gone"),
+            ),
         }
     }
 }
diff --git a/common/src/lib.rs b/common/src/lib.rs
index a92237adfa..e4f53cbfab 100644
--- a/common/src/lib.rs
+++ b/common/src/lib.rs
@@ -26,6 +26,7 @@ pub mod backoff;
 pub mod cmd;
 pub mod disk;
 pub mod ledger;
+pub mod progenitor_operation_retry;
 pub mod update;
 pub mod vlan;
 pub mod zpool_name;
@@ -79,83 +80,40 @@ impl slog::KV for FileKv {
 
 pub const OMICRON_DPD_TAG: &str = "omicron";
 
-use futures::Future;
-use slog::warn;
+use crate::api::external::Error;
+use crate::progenitor_operation_retry::ProgenitorOperationRetry;
+use crate::progenitor_operation_retry::ProgenitorOperationRetryError;
+use std::future::Future;
 
 /// Retry a progenitor client operation until a known result is returned.
 ///
-/// Saga execution relies on the outcome of an external call being known: since
-/// they are idempotent, reissue the external call until a known result comes
-/// back. Retry if a communication error is seen, or if another retryable error
-/// is seen.
-///
-/// Note that retrying is only valid if the call itself is idempotent.
+/// See [`ProgenitorOperationRetry`] for more information.
+// TODO mark this deprecated, `never_bail` is a bad idea
 pub async fn retry_until_known_result<F, T, E, Fut>(
     log: &slog::Logger,
-    mut f: F,
+    f: F,
 ) -> Result<T, progenitor_client::Error<E>>
 where
     F: FnMut() -> Fut,
     Fut: Future<Output = Result<T, progenitor_client::Error<E>>>,
     E: std::fmt::Debug,
 {
-    backoff::retry_notify(
-        backoff::retry_policy_internal_service(),
-        move || {
-            let fut = f();
-            async move {
-                match fut.await {
-                    Err(progenitor_client::Error::CommunicationError(e)) => {
-                        warn!(
-                            log,
-                            "saw transient communication error {}, retrying...",
-                            e,
-                        );
-
-                        Err(backoff::BackoffError::transient(
-                            progenitor_client::Error::CommunicationError(e),
-                        ))
-                    }
-
-                    Err(progenitor_client::Error::ErrorResponse(
-                        response_value,
-                    )) => {
-                        match response_value.status() {
-                            // Retry on 503 or 429
-                            http::StatusCode::SERVICE_UNAVAILABLE
-                            | http::StatusCode::TOO_MANY_REQUESTS => {
-                                Err(backoff::BackoffError::transient(
-                                    progenitor_client::Error::ErrorResponse(
-                                        response_value,
-                                    ),
-                                ))
-                            }
-
-                            // Anything else is a permanent error
-                            _ => Err(backoff::BackoffError::Permanent(
-                                progenitor_client::Error::ErrorResponse(
-                                    response_value,
-                                ),
-                            )),
-                        }
-                    }
-
-                    Err(e) => {
-                        warn!(log, "saw permanent error {}, aborting", e,);
+    match ProgenitorOperationRetry::new(f, never_bail).run(log).await {
+        Ok(v) => Ok(v),
 
-                        Err(backoff::BackoffError::Permanent(e))
-                    }
+        Err(e) => match e {
+            ProgenitorOperationRetryError::ProgenitorError(e) => Err(e),
 
-                    Ok(v) => Ok(v),
-                }
+            ProgenitorOperationRetryError::Gone
+            | ProgenitorOperationRetryError::GoneCheckError(_) => {
+                // ProgenitorOperationRetry::new called with `never_bail` as the
+                // bail check should never return these variants!
+                unreachable!();
             }
         },
-        |error: progenitor_client::Error<_>, delay| {
-            warn!(
-                log,
-                "failed external call ({:?}), will retry in {:?}", error, delay,
-            );
-        },
-    )
-    .await
+    }
+}
+
+async fn never_bail() -> Result<bool, Error> {
+    Ok(false)
 }
diff --git a/common/src/progenitor_operation_retry.rs b/common/src/progenitor_operation_retry.rs
new file mode 100644
index 0000000000..6007e117b3
--- /dev/null
+++ b/common/src/progenitor_operation_retry.rs
@@ -0,0 +1,182 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+use futures::Future;
+use slog::warn;
+use slog::Logger;
+
+use crate::api::external::Error;
+use crate::backoff::retry_notify;
+use crate::backoff::retry_policy_internal_service;
+use crate::backoff::BackoffError;
+
+#[derive(Debug)]
+pub enum ProgenitorOperationRetryError<E> {
+    /// Nexus determined that the operation will never return a known result
+    /// because the remote server is gone.
+    Gone,
+
+    /// Attempting to check if the retry loop should be stopped failed
+    GoneCheckError(Error),
+
+    /// The retry loop progenitor operation saw a permanent client error
+    ProgenitorError(progenitor_client::Error<E>),
+}
+
+impl<E> ProgenitorOperationRetryError<E> {
+    pub fn is_not_found(&self) -> bool {
+        match &self {
+            ProgenitorOperationRetryError::ProgenitorError(e) => match e {
+                progenitor_client::Error::ErrorResponse(rv) => {
+                    match rv.status() {
+                        http::StatusCode::NOT_FOUND => true,
+
+                        _ => false,
+                    }
+                }
+
+                _ => false,
+            },
+
+            _ => false,
+        }
+    }
+
+    pub fn is_gone(&self) -> bool {
+        matches!(&self, ProgenitorOperationRetryError::Gone)
+    }
+}
+
+/// Retry a progenitor client operation until a known result is returned, or
+/// until something tells us that we should stop trying.
+///
+/// Saga execution relies on the outcome of an external call being known: since
+/// they are idempotent, reissue the external call until a known result comes
+/// back. Retry if a communication error is seen, or if another retryable error
+/// is seen.
+///
+/// During the retry loop, call the supplied `gone_check` function to see if the
+/// retry loop should be aborted: in the cases where Nexus can _know_ that a
+/// request will never complete, the retry loop must be aborted. Otherwise,
+/// Nexus will indefinitely retry until some known result is returned.
+///
+/// Note that retrying is only valid if the `operation` itself is idempotent.
+pub struct ProgenitorOperationRetry<
+    T,
+    E: std::fmt::Debug,
+    F: FnMut() -> Fut,
+    Fut: Future<Output = Result<T, progenitor_client::Error<E>>>,
+    BF: FnMut() -> BFut,
+    BFut: Future<Output = Result<bool, Error>>,
+> {
+    operation: F,
+
+    /// If Nexus knows that the supplied operation will never successfully
+    /// complete, then `gone_check` should return true.
+    gone_check: BF,
+}
+
+impl<T, E, F, Fut, BF, BFut> ProgenitorOperationRetry<T, E, F, Fut, BF, BFut>
+where
+    E: std::fmt::Debug,
+    F: FnMut() -> Fut,
+    Fut: Future<Output = Result<T, progenitor_client::Error<E>>>,
+    BF: FnMut() -> BFut,
+    BFut: Future<Output = Result<bool, Error>>,
+{
+    pub fn new(operation: F, gone_check: BF) -> Self {
+        Self { operation, gone_check }
+    }
+
+    pub async fn run(
+        mut self,
+        log: &Logger,
+    ) -> Result<T, ProgenitorOperationRetryError<E>> {
+        retry_notify(
+            retry_policy_internal_service(),
+            move || {
+                let gone_check = (self.gone_check)();
+                let f = (self.operation)();
+
+                async move {
+                    match gone_check.await {
+                        Ok(dest_is_gone) => {
+                            if dest_is_gone {
+                                return Err(BackoffError::Permanent(
+                                    ProgenitorOperationRetryError::Gone
+                                ));
+                            }
+                        }
+
+                        Err(e) => {
+                            return Err(BackoffError::Permanent(
+                                ProgenitorOperationRetryError::GoneCheckError(e)
+                            ));
+                        }
+                    }
+
+                    match f.await {
+                        Err(progenitor_client::Error::CommunicationError(e)) => {
+                            warn!(
+                                log,
+                                "saw transient communication error {}, retrying...",
+                                e,
+                            );
+
+                            Err(BackoffError::transient(
+                                ProgenitorOperationRetryError::ProgenitorError(
+                                    progenitor_client::Error::CommunicationError(e)
+                                )
+                            ))
+                        }
+
+                        Err(progenitor_client::Error::ErrorResponse(
+                            response_value,
+                        )) => {
+                            match response_value.status() {
+                                // Retry on 503 or 429
+                                http::StatusCode::SERVICE_UNAVAILABLE
+                                | http::StatusCode::TOO_MANY_REQUESTS => {
+                                    Err(BackoffError::transient(
+                                        ProgenitorOperationRetryError::ProgenitorError(
+                                            progenitor_client::Error::ErrorResponse(
+                                                response_value
+                                            )
+                                        )
+                                    ))
+                                }
+
+                                // Anything else is a permanent error
+                                _ => Err(BackoffError::Permanent(
+                                    ProgenitorOperationRetryError::ProgenitorError(
+                                        progenitor_client::Error::ErrorResponse(
+                                            response_value
+                                        )
+                                    )
+                                ))
+                            }
+                        }
+
+                        Err(e) => {
+                            warn!(log, "saw permanent error {}, aborting", e,);
+
+                            Err(BackoffError::Permanent(
+                                ProgenitorOperationRetryError::ProgenitorError(e)
+                            ))
+                        }
+
+                        Ok(v) => Ok(v),
+                    }
+                }
+            },
+            |error: ProgenitorOperationRetryError<E>, delay| {
+                warn!(
+                    log,
+                    "failed external call ({:?}), will retry in {:?}", error, delay,
+                );
+            },
+        )
+        .await
+    }
+}
diff --git a/nexus/db-queries/src/db/datastore/dataset.rs b/nexus/db-queries/src/db/datastore/dataset.rs
index 3617f6d7fc..3f1df24e45 100644
--- a/nexus/db-queries/src/db/datastore/dataset.rs
+++ b/nexus/db-queries/src/db/datastore/dataset.rs
@@ -15,6 +15,8 @@ use crate::db::error::public_error_from_diesel;
 use crate::db::error::ErrorHandler;
 use crate::db::identity::Asset;
 use crate::db::model::Dataset;
+use crate::db::model::PhysicalDisk;
+use crate::db::model::PhysicalDiskPolicy;
 use crate::db::model::Zpool;
 use crate::db::pagination::paginated;
 use crate::db::pagination::Paginator;
@@ -180,6 +182,54 @@ impl DataStore {
 
         Ok(all_datasets)
     }
+
+    pub async fn dataset_physical_disk_in_service(
+        &self,
+        dataset_id: Uuid,
+    ) -> LookupResult<bool> {
+        let conn = self.pool_connection_unauthorized().await?;
+
+        let dataset = {
+            use db::schema::dataset::dsl;
+
+            dsl::dataset
+                .filter(dsl::id.eq(dataset_id))
+                .select(Dataset::as_select())
+                .first_async::<Dataset>(&*conn)
+                .await
+                .map_err(|e| {
+                    public_error_from_diesel(e, ErrorHandler::Server)
+                })?
+        };
+
+        let zpool = {
+            use db::schema::zpool::dsl;
+
+            dsl::zpool
+                .filter(dsl::id.eq(dataset.pool_id))
+                .select(Zpool::as_select())
+                .first_async::<Zpool>(&*conn)
+                .await
+                .map_err(|e| {
+                    public_error_from_diesel(e, ErrorHandler::Server)
+                })?
+        };
+
+        let physical_disk = {
+            use db::schema::physical_disk::dsl;
+
+            dsl::physical_disk
+                .filter(dsl::id.eq(zpool.physical_disk_id))
+                .select(PhysicalDisk::as_select())
+                .first_async::<PhysicalDisk>(&*conn)
+                .await
+                .map_err(|e| {
+                    public_error_from_diesel(e, ErrorHandler::Server)
+                })?
+        };
+
+        Ok(physical_disk.disk_policy == PhysicalDiskPolicy::InService)
+    }
 }
 
 #[cfg(test)]
diff --git a/nexus/src/app/crucible.rs b/nexus/src/app/crucible.rs
new file mode 100644
index 0000000000..9acb4ee492
--- /dev/null
+++ b/nexus/src/app/crucible.rs
@@ -0,0 +1,962 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Functions common to interacting with Crucible agents
+
+use super::*;
+
+use anyhow::anyhow;
+use crucible_agent_client::types::CreateRegion;
+use crucible_agent_client::types::GetSnapshotResponse;
+use crucible_agent_client::types::Region;
+use crucible_agent_client::types::RegionId;
+use crucible_agent_client::types::State as RegionState;
+use crucible_agent_client::Client as CrucibleAgentClient;
+use futures::StreamExt;
+use nexus_db_queries::db;
+use nexus_db_queries::db::identity::Asset;
+use omicron_common::api::external::Error;
+use omicron_common::backoff::{self, BackoffError};
+use omicron_common::progenitor_operation_retry::ProgenitorOperationRetry;
+use omicron_common::progenitor_operation_retry::ProgenitorOperationRetryError;
+use slog::Logger;
+
+// Arbitrary limit on concurrency, for operations issued on multiple regions
+// within a disk at the same time.
+const MAX_CONCURRENT_REGION_REQUESTS: usize = 3;
+
+/// Provides a way for (with BackoffError) Permanent errors to have a different error type than
+/// Transient errors.
+#[derive(Debug, thiserror::Error)]
+enum WaitError {
+    #[error("Transient error: {0}")]
+    Transient(#[from] anyhow::Error),
+
+    #[error("Permanent error: {0}")]
+    Permanent(#[from] Error),
+}
+
+/// Convert an error returned from the ProgenitorOperationRetry loops in this
+/// file into an external Error
+fn into_external_error(
+    e: ProgenitorOperationRetryError<crucible_agent_client::types::Error>,
+) -> Error {
+    match e {
+        ProgenitorOperationRetryError::Gone => Error::Gone,
+
+        ProgenitorOperationRetryError::GoneCheckError(e) => {
+            Error::internal_error(&format!(
+                "insufficient permission for crucible_agent_gone_check: {e}"
+            ))
+        }
+
+        ProgenitorOperationRetryError::ProgenitorError(e) => match e {
+            crucible_agent_client::Error::ErrorResponse(rv) => {
+                if rv.status().is_client_error() {
+                    Error::invalid_request(&rv.message)
+                } else {
+                    Error::internal_error(&rv.message)
+                }
+            }
+
+            _ => Error::internal_error(&format!("unexpected failure: {e}",)),
+        },
+    }
+}
+
+impl super::Nexus {
+    fn crucible_agent_client_for_dataset(
+        &self,
+        dataset: &db::model::Dataset,
+    ) -> CrucibleAgentClient {
+        CrucibleAgentClient::new_with_client(
+            &format!("http://{}", dataset.address()),
+            self.reqwest_client.clone(),
+        )
+    }
+
+    /// Return if the Crucible agent is expected to be there and answer Nexus:
+    /// true means it's gone, and the caller should bail out of the
+    /// ProgenitorOperationRetry loop.
+    async fn crucible_agent_gone_check(
+        &self,
+        dataset_id: Uuid,
+    ) -> Result<bool, Error> {
+        let on_in_service_physical_disk = self
+            .datastore()
+            .dataset_physical_disk_in_service(dataset_id)
+            .await?;
+
+        Ok(!on_in_service_physical_disk)
+    }
+
+    /// Call out to Crucible agent and perform region creation.
+    async fn ensure_region_in_dataset(
+        &self,
+        log: &Logger,
+        dataset: &db::model::Dataset,
+        region: &db::model::Region,
+    ) -> Result<Region, Error> {
+        let client = self.crucible_agent_client_for_dataset(dataset);
+        let dataset_id = dataset.id();
+
+        let Ok(extent_count) = u32::try_from(region.extent_count()) else {
+            return Err(Error::internal_error(
+                "Extent count out of range for a u32",
+            ));
+        };
+
+        let region_request = CreateRegion {
+            block_size: region.block_size().to_bytes(),
+            extent_count,
+            extent_size: region.blocks_per_extent(),
+            // TODO: Can we avoid casting from UUID to string?
+            // NOTE: This'll require updating the crucible agent client.
+            id: RegionId(region.id().to_string()),
+            encrypted: region.encrypted(),
+            cert_pem: None,
+            key_pem: None,
+            root_pem: None,
+            source: None,
+        };
+
+        let create_region = || async {
+            let region = match ProgenitorOperationRetry::new(
+                || async { client.region_create(&region_request).await },
+                || async { self.crucible_agent_gone_check(dataset_id).await },
+            )
+            .run(log)
+            .await
+            {
+                Ok(v) => Ok(v),
+
+                Err(e) => {
+                    error!(
+                        log,
+                        "region_create saw {:?}",
+                        e;
+                        "region_id" => %region.id(),
+                        "dataset_id" => %dataset_id,
+                    );
+
+                    // Return an error if Nexus is unable to create the
+                    // requested region
+                    Err(BackoffError::Permanent(WaitError::Permanent(
+                        into_external_error(e),
+                    )))
+                }
+            }?;
+
+            match region.state {
+                RegionState::Requested => {
+                    Err(BackoffError::transient(WaitError::Transient(anyhow!(
+                        "Region creation in progress"
+                    ))))
+                }
+
+                RegionState::Created => Ok(region),
+
+                _ => Err(BackoffError::Permanent(WaitError::Permanent(
+                    Error::internal_error(&format!(
+                        "Failed to create region, unexpected state: {:?}",
+                        region.state
+                    )),
+                ))),
+            }
+        };
+
+        let log_create_failure = |_, delay| {
+            warn!(
+                log,
+                "Region requested, not yet created. Retrying in {:?}",
+                delay;
+                "dataset" => %dataset.id(),
+                "region" => %region.id(),
+            );
+        };
+
+        let region = backoff::retry_notify(
+            backoff::retry_policy_internal_service(),
+            create_region,
+            log_create_failure,
+        )
+        .await
+        .map_err(|e| match e {
+            WaitError::Transient(e) => {
+                // The backoff crate can be configured with a maximum elapsed
+                // time before giving up, which means that Transient could be
+                // returned here. Our current policies do **not** set this
+                // though.
+                Error::internal_error(&e.to_string())
+            }
+
+            WaitError::Permanent(e) => e,
+        })?;
+
+        Ok(region.into_inner())
+    }
+
+    /// Returns a Ok(Some(Region)) if a region with id {region_id} exists,
+    /// Ok(None) if it does not (a 404 was seen), and Err otherwise.
+    async fn maybe_get_crucible_region(
+        &self,
+        log: &Logger,
+        dataset: &db::model::Dataset,
+        region_id: Uuid,
+    ) -> Result<Option<Region>, Error> {
+        let client = self.crucible_agent_client_for_dataset(dataset);
+        let dataset_id = dataset.id();
+
+        let result = ProgenitorOperationRetry::new(
+            || async {
+                client.region_get(&RegionId(region_id.to_string())).await
+            },
+            || async { self.crucible_agent_gone_check(dataset_id).await },
+        )
+        .run(log)
+        .await;
+
+        match result {
+            Ok(v) => Ok(Some(v.into_inner())),
+
+            Err(e) => {
+                if e.is_not_found() {
+                    // A 404 Not Found is ok for this function, just return None
+                    Ok(None)
+                } else {
+                    error!(
+                        log,
+                        "region_get saw {:?}",
+                        e;
+                        "region_id" => %region_id,
+                        "dataset_id" => %dataset_id,
+                    );
+
+                    // Return an error if Nexus is unable to query the dataset's
+                    // agent for the requested region
+                    Err(into_external_error(e))
+                }
+            }
+        }
+    }
+
+    async fn get_crucible_region_snapshots(
+        &self,
+        log: &Logger,
+        dataset: &db::model::Dataset,
+        region_id: Uuid,
+    ) -> Result<GetSnapshotResponse, Error> {
+        let client = self.crucible_agent_client_for_dataset(dataset);
+        let dataset_id = dataset.id();
+
+        let result = ProgenitorOperationRetry::new(
+            || async {
+                client
+                    .region_get_snapshots(&RegionId(region_id.to_string()))
+                    .await
+            },
+            || async { self.crucible_agent_gone_check(dataset_id).await },
+        )
+        .run(log)
+        .await;
+
+        match result {
+            Ok(v) => Ok(v.into_inner()),
+
+            Err(e) => {
+                error!(
+                    log,
+                    "region_get_snapshots saw {:?}",
+                    e;
+                    "region_id" => %region_id,
+                    "dataset_id" => %dataset_id,
+                );
+
+                // Return an error if Nexus is unable to query the dataset's
+                // agent for the requested region 's snapshots
+                Err(into_external_error(e))
+            }
+        }
+    }
+
+    /// Send a region deletion request
+    async fn request_crucible_region_delete(
+        &self,
+        log: &Logger,
+        dataset: &db::model::Dataset,
+        region_id: Uuid,
+    ) -> Result<(), Error> {
+        let client = self.crucible_agent_client_for_dataset(dataset);
+        let dataset_id = dataset.id();
+
+        let result = ProgenitorOperationRetry::new(
+            || async {
+                client.region_delete(&RegionId(region_id.to_string())).await
+            },
+            || async { self.crucible_agent_gone_check(dataset_id).await },
+        )
+        .run(log)
+        .await;
+
+        match result {
+            Ok(_) => Ok(()),
+
+            Err(e) => {
+                if e.is_gone() {
+                    // Return Ok if the dataset's agent is gone, no delete call
+                    // is required.
+                    Ok(())
+                } else {
+                    error!(
+                        log,
+                        "region_delete saw {:?}",
+                        e;
+                        "region_id" => %region_id,
+                        "dataset_id" => %dataset.id(),
+                    );
+
+                    Err(into_external_error(e))
+                }
+            }
+        }
+    }
+
+    /// Send a running snapshot deletion request
+    async fn request_crucible_running_snapshot_delete(
+        &self,
+        log: &Logger,
+        dataset: &db::model::Dataset,
+        region_id: Uuid,
+        snapshot_id: Uuid,
+    ) -> Result<(), Error> {
+        let client = self.crucible_agent_client_for_dataset(dataset);
+        let dataset_id = dataset.id();
+
+        let result = ProgenitorOperationRetry::new(
+            || async {
+                client
+                    .region_delete_running_snapshot(
+                        &RegionId(region_id.to_string()),
+                        &snapshot_id.to_string(),
+                    )
+                    .await
+            },
+            || async { self.crucible_agent_gone_check(dataset_id).await },
+        )
+        .run(log)
+        .await;
+
+        match result {
+            Ok(_) => Ok(()),
+
+            Err(e) => {
+                if e.is_gone() {
+                    // Return Ok if the dataset's agent is gone, no delete call
+                    // is required.
+                    Ok(())
+                } else {
+                    error!(
+                        log,
+                        "region_delete_running_snapshot saw {:?}",
+                        e;
+                        "dataset_id" => %dataset_id,
+                        "region_id" => %region_id,
+                        "snapshot_id" => %snapshot_id,
+                    );
+
+                    Err(into_external_error(e))
+                }
+            }
+        }
+    }
+
+    /// Send a snapshot deletion request
+    async fn request_crucible_snapshot_delete(
+        &self,
+        log: &Logger,
+        dataset: &db::model::Dataset,
+        region_id: Uuid,
+        snapshot_id: Uuid,
+    ) -> Result<(), Error> {
+        let client = self.crucible_agent_client_for_dataset(dataset);
+        let dataset_id = dataset.id();
+
+        let result = ProgenitorOperationRetry::new(
+            || async {
+                client
+                    .region_delete_snapshot(
+                        &RegionId(region_id.to_string()),
+                        &snapshot_id.to_string(),
+                    )
+                    .await
+            },
+            || async { self.crucible_agent_gone_check(dataset_id).await },
+        )
+        .run(log)
+        .await;
+
+        match result {
+            Ok(_) => Ok(()),
+
+            Err(e) => {
+                if e.is_gone() {
+                    // Return Ok if the dataset's agent is gone, no delete call
+                    // is required.
+                    Ok(())
+                } else {
+                    error!(
+                        log,
+                        "region_delete_snapshot saw {:?}",
+                        e;
+                        "dataset_id" => %dataset_id,
+                        "region_id" => %region_id,
+                        "snapshot_id" => %snapshot_id,
+                    );
+
+                    Err(into_external_error(e))
+                }
+            }
+        }
+    }
+
+    /// Call out to a Crucible agent to delete a region
+    async fn delete_crucible_region(
+        &self,
+        log: &Logger,
+        dataset: &db::model::Dataset,
+        region_id: Uuid,
+    ) -> Result<(), Error> {
+        // If the region never existed, then a `GET` will return 404, and so
+        // will a `DELETE`. Catch this case, and return Ok if the region never
+        // existed.  This can occur if an `ensure_all_datasets_and_regions`
+        // partially fails.
+
+        match self.maybe_get_crucible_region(log, dataset, region_id).await {
+            Ok(Some(_)) => {
+                // region found, proceed with deleting
+            }
+
+            Ok(None) => {
+                // region never exited, return Ok
+                return Ok(());
+            }
+
+            // Return Ok if the dataset's agent is gone, no delete call
+            // is required.
+            Err(Error::Gone) => {
+                warn!(
+                    log,
+                    "dataset is gone";
+                    "dataset_id" => %dataset.id(),
+                );
+
+                return Ok(());
+            }
+
+            Err(e) => return Err(e),
+        }
+
+        // Past here, the region exists (or existed at some point): ensure it is
+        // deleted. Request the deletion (which is idempotent), then wait for
+        // the appropriate state change.
+
+        self.request_crucible_region_delete(log, dataset, region_id).await?;
+
+        // Wait until the region is deleted
+
+        backoff::retry_notify(
+            backoff::retry_policy_internal_service_aggressive(),
+            || async {
+                let region = match self
+                    .maybe_get_crucible_region(log, dataset, region_id)
+                    .await
+                {
+                    Ok(None) => Err(BackoffError::Permanent(
+                        WaitError::Permanent(Error::internal_error(&format!(
+                            "dataset {} region {region_id} is missing now!",
+                            dataset.id(),
+                        ))),
+                    )),
+
+                    Ok(Some(v)) => Ok(v),
+
+                    // Return Ok if the dataset's agent is gone, no
+                    // delete call is required.
+                    Err(Error::Gone) => {
+                        warn!(
+                            log,
+                            "dataset is gone";
+                            "dataset_id" => %dataset.id(),
+                        );
+
+                        return Ok(());
+                    }
+
+                    Err(e) => {
+                        Err(BackoffError::Permanent(WaitError::Permanent(e)))
+                    }
+                }?;
+
+                match region.state {
+                    RegionState::Tombstoned => Err(BackoffError::transient(
+                        WaitError::Transient(anyhow!("region not deleted yet")),
+                    )),
+
+                    RegionState::Destroyed => {
+                        info!(
+                            log,
+                            "region deleted";
+                            "region_id" => %region_id,
+                        );
+
+                        Ok(())
+                    }
+
+                    _ => Err(BackoffError::transient(WaitError::Transient(
+                        anyhow!("region unexpected state {:?}", region.state),
+                    ))),
+                }
+            },
+            |e: WaitError, delay| {
+                info!(
+                    log,
+                    "{:?}, trying again in {:?}",
+                    e,
+                    delay;
+                    "region_id" => %region_id,
+                );
+            },
+        )
+        .await
+        .map_err(|e| match e {
+            WaitError::Transient(e) => {
+                // The backoff crate can be configured with a maximum elapsed time
+                // before giving up, which means that Transient could be returned
+                // here. Our current policies do **not** set this though.
+                Error::internal_error(&e.to_string())
+            }
+
+            WaitError::Permanent(e) => e,
+        })
+    }
+
+    async fn delete_crucible_running_snapshot_impl(
+        &self,
+        log: &Logger,
+        dataset: &db::model::Dataset,
+        region_id: Uuid,
+        snapshot_id: Uuid,
+    ) -> Result<(), Error> {
+        // request running snapshot deletion
+
+        self.request_crucible_running_snapshot_delete(
+            log,
+            dataset,
+            region_id,
+            snapshot_id,
+        )
+        .await?;
+
+        // `region_delete_running_snapshot` is only a request: wait until
+        // running snapshot is deleted
+        backoff::retry_notify(
+            backoff::retry_policy_internal_service_aggressive(),
+            || async {
+                let response = match self.get_crucible_region_snapshots(
+                    log,
+                    dataset,
+                    region_id,
+                )
+                .await {
+                    Ok(v) => Ok(v),
+
+                    // Return Ok if the dataset's agent is gone, no
+                    // delete call is required.
+                    Err(Error::Gone) => {
+                        warn!(
+                            log,
+                            "dataset is gone";
+                            "dataset_id" => %dataset.id(),
+                        );
+
+                        return Ok(());
+                    }
+
+                    Err(e) => Err(BackoffError::Permanent(WaitError::Permanent(e))),
+                }?;
+
+                match response.running_snapshots.get(&snapshot_id.to_string()) {
+                    Some(running_snapshot) => {
+                        info!(
+                            log,
+                            "running_snapshot is Some, state is {}",
+                            running_snapshot.state.to_string();
+                            "region_id" => %region_id,
+                            "snapshot_id" => %snapshot_id,
+                        );
+
+                        match running_snapshot.state {
+                            RegionState::Tombstoned => {
+                                Err(BackoffError::transient(
+                                    WaitError::Transient(anyhow!(
+                                        "running_snapshot tombstoned, not deleted yet",
+                                    )
+                                )))
+                            }
+
+                            RegionState::Destroyed => {
+                                info!(
+                                    log,
+                                    "running_snapshot deleted",
+                                );
+
+                                Ok(())
+                            }
+
+                            _ => {
+                                Err(BackoffError::transient(
+                                    WaitError::Transient(anyhow!(
+                                        "running_snapshot unexpected state",
+                                    )
+                                )))
+                            }
+                        }
+                    }
+
+                    None => {
+                        // deleted?
+                        info!(
+                            log,
+                            "running_snapshot is None";
+                            "region_id" => %region_id,
+                            "snapshot_id" => %snapshot_id,
+                        );
+
+                        // break here - it's possible that the running snapshot
+                        // record was GCed, and it won't come back.
+                        Ok(())
+                    }
+                }
+            },
+            |e: WaitError, delay| {
+                info!(
+                    log,
+                    "{:?}, trying again in {:?}",
+                    e,
+                    delay;
+                    "region_id" => %region_id,
+                    "snapshot_id" => %snapshot_id,
+                );
+            }
+        )
+        .await
+        .map_err(|e| match e {
+            WaitError::Transient(e) => {
+                // The backoff crate can be configured with a maximum elapsed time
+                // before giving up, which means that Transient could be returned
+                // here. Our current policies do **not** set this though.
+                Error::internal_error(&e.to_string())
+            }
+
+            WaitError::Permanent(e) => {
+                e
+            }
+        })
+    }
+
+    pub async fn delete_crucible_snapshot(
+        &self,
+        log: &Logger,
+        dataset: &db::model::Dataset,
+        region_id: Uuid,
+        snapshot_id: Uuid,
+    ) -> Result<(), Error> {
+        self.delete_crucible_snapshot_impl(log, dataset, region_id, snapshot_id)
+            .await
+    }
+
+    async fn delete_crucible_snapshot_impl(
+        &self,
+        log: &Logger,
+        dataset: &db::model::Dataset,
+        region_id: Uuid,
+        snapshot_id: Uuid,
+    ) -> Result<(), Error> {
+        // Unlike other Crucible agent endpoints, this one is synchronous in that it
+        // is not only a request to the Crucible agent: `zfs destroy` is performed
+        // right away. However this is still a request to illumos that may not take
+        // effect right away. Wait until the snapshot no longer appears in the list
+        // of region snapshots, meaning it was not returned from `zfs list`.
+
+        let dataset_id = dataset.id();
+
+        info!(
+            log,
+            "requesting region snapshot delete";
+            "dataset_id" => %dataset_id,
+            "region_id" => %region_id,
+            "snapshot_id" => %snapshot_id,
+        );
+
+        self.request_crucible_snapshot_delete(
+            log,
+            dataset,
+            region_id,
+            snapshot_id,
+        )
+        .await?;
+
+        backoff::retry_notify(
+            backoff::retry_policy_internal_service_aggressive(),
+            || async {
+                let response = match self
+                    .get_crucible_region_snapshots(log, dataset, region_id)
+                    .await
+                {
+                    Ok(v) => Ok(v),
+
+                    // Return Ok if the dataset's agent is gone, no
+                    // delete call is required.
+                    Err(Error::Gone) => {
+                        warn!(
+                            log,
+                            "dataset is gone";
+                            "dataset_id" => %dataset.id(),
+                        );
+
+                        return Ok(());
+                    }
+
+                    Err(e) => {
+                        Err(BackoffError::Permanent(WaitError::Permanent(e)))
+                    }
+                }?;
+
+                if response
+                    .snapshots
+                    .iter()
+                    .any(|x| x.name == snapshot_id.to_string())
+                {
+                    info!(
+                        log,
+                        "snapshot still exists, waiting";
+                        "dataset_id" => %dataset_id,
+                        "region_id" => %region_id,
+                        "snapshot_id" => %snapshot_id,
+                    );
+
+                    Err(BackoffError::transient(WaitError::Transient(anyhow!(
+                        "snapshot not deleted yet",
+                    ))))
+                } else {
+                    info!(
+                        log,
+                        "snapshot deleted";
+                        "dataset_id" => %dataset_id,
+                        "region_id" => %region_id,
+                        "snapshot_id" => %snapshot_id,
+                    );
+
+                    Ok(())
+                }
+            },
+            |e: WaitError, delay| {
+                info!(
+                    log,
+                    "{:?}, trying again in {:?}",
+                    e,
+                    delay;
+                    "dataset_id" => %dataset_id,
+                    "region_id" => %region_id,
+                    "snapshot_id" => %snapshot_id,
+                );
+            },
+        )
+        .await
+        .map_err(|e| match e {
+            WaitError::Transient(e) => {
+                // The backoff crate can be configured with a maximum elapsed time
+                // before giving up, which means that Transient could be returned
+                // here. Our current policies do **not** set this though.
+                Error::internal_error(&e.to_string())
+            }
+
+            WaitError::Permanent(e) => e,
+        })
+    }
+
+    // PUBLIC API
+
+    pub async fn ensure_all_datasets_and_regions(
+        &self,
+        log: &Logger,
+        datasets_and_regions: Vec<(db::model::Dataset, db::model::Region)>,
+    ) -> Result<Vec<(db::model::Dataset, Region)>, Error> {
+        let request_count = datasets_and_regions.len();
+        if request_count == 0 {
+            return Ok(vec![]);
+        }
+
+        // Allocate regions, and additionally return the dataset that the region was
+        // allocated in.
+        let datasets_and_regions: Vec<(db::model::Dataset, Region)> =
+            futures::stream::iter(datasets_and_regions)
+                .map(|(dataset, region)| async move {
+                    match self
+                        .ensure_region_in_dataset(log, &dataset, &region)
+                        .await
+                    {
+                        Ok(result) => Ok((dataset, result)),
+                        Err(e) => Err(e),
+                    }
+                })
+                // Execute the allocation requests concurrently.
+                .buffer_unordered(std::cmp::min(
+                    request_count,
+                    MAX_CONCURRENT_REGION_REQUESTS,
+                ))
+                .collect::<Vec<Result<(db::model::Dataset, Region), Error>>>()
+                .await
+                .into_iter()
+                .collect::<Result<Vec<(db::model::Dataset, Region)>, Error>>(
+                )?;
+
+        // Assert each region has the same block size, otherwise Volume creation
+        // will fail.
+        let all_region_have_same_block_size = datasets_and_regions
+            .windows(2)
+            .all(|w| w[0].1.block_size == w[1].1.block_size);
+
+        if !all_region_have_same_block_size {
+            return Err(Error::internal_error(
+                "volume creation will fail due to block size mismatch",
+            ));
+        }
+
+        Ok(datasets_and_regions)
+    }
+
+    /// Given a list of datasets and regions, send DELETE calls to the datasets
+    /// corresponding Crucible Agent for each region.
+    pub async fn delete_crucible_regions(
+        &self,
+        log: &Logger,
+        datasets_and_regions: Vec<(db::model::Dataset, db::model::Region)>,
+    ) -> Result<(), Error> {
+        let request_count = datasets_and_regions.len();
+        if request_count == 0 {
+            return Ok(());
+        }
+
+        futures::stream::iter(datasets_and_regions)
+            .map(|(dataset, region)| async move {
+                self.delete_crucible_region(log, &dataset, region.id()).await
+            })
+            // Execute the requests concurrently.
+            .buffer_unordered(std::cmp::min(
+                request_count,
+                MAX_CONCURRENT_REGION_REQUESTS,
+            ))
+            .collect::<Vec<Result<_, _>>>()
+            .await
+            .into_iter()
+            .collect::<Result<Vec<_>, _>>()?;
+
+        Ok(())
+    }
+
+    /// Ensure that a Crucible "running snapshot" is deleted.
+    pub async fn delete_crucible_running_snapshot(
+        &self,
+        log: &Logger,
+        dataset: &db::model::Dataset,
+        region_id: Uuid,
+        snapshot_id: Uuid,
+    ) -> Result<(), Error> {
+        self.delete_crucible_running_snapshot_impl(
+            log,
+            dataset,
+            region_id,
+            snapshot_id,
+        )
+        .await
+    }
+
+    /// Given a list of datasets and region snapshots, send DELETE calls to the
+    /// datasets corresponding Crucible Agent for each running read-only
+    /// downstairs corresponding to the snapshot.
+    pub async fn delete_crucible_running_snapshots(
+        &self,
+        log: &Logger,
+        datasets_and_snapshots: Vec<(
+            db::model::Dataset,
+            db::model::RegionSnapshot,
+        )>,
+    ) -> Result<(), Error> {
+        let request_count = datasets_and_snapshots.len();
+        if request_count == 0 {
+            return Ok(());
+        }
+
+        futures::stream::iter(datasets_and_snapshots)
+            .map(|(dataset, region_snapshot)| async move {
+                self.delete_crucible_running_snapshot_impl(
+                    &log,
+                    &dataset,
+                    region_snapshot.region_id,
+                    region_snapshot.snapshot_id,
+                )
+                .await
+            })
+            // Execute the requests concurrently.
+            .buffer_unordered(std::cmp::min(
+                request_count,
+                MAX_CONCURRENT_REGION_REQUESTS,
+            ))
+            .collect::<Vec<Result<(), Error>>>()
+            .await
+            .into_iter()
+            .collect::<Result<Vec<_>, _>>()?;
+
+        Ok(())
+    }
+
+    /// Given a list of datasets and region snapshots, send DELETE calls to the
+    /// dataset's corresponding Crucible Agent for each snapshot.
+    pub async fn delete_crucible_snapshots(
+        &self,
+        log: &Logger,
+        datasets_and_snapshots: Vec<(
+            db::model::Dataset,
+            db::model::RegionSnapshot,
+        )>,
+    ) -> Result<(), Error> {
+        let request_count = datasets_and_snapshots.len();
+        if request_count == 0 {
+            return Ok(());
+        }
+
+        futures::stream::iter(datasets_and_snapshots)
+            .map(|(dataset, region_snapshot)| async move {
+                self.delete_crucible_snapshot_impl(
+                    &log,
+                    &dataset,
+                    region_snapshot.region_id,
+                    region_snapshot.snapshot_id,
+                )
+                .await
+            })
+            // Execute the requests concurrently.
+            .buffer_unordered(std::cmp::min(
+                request_count,
+                MAX_CONCURRENT_REGION_REQUESTS,
+            ))
+            .collect::<Vec<Result<(), Error>>>()
+            .await
+            .into_iter()
+            .collect::<Result<Vec<_>, _>>()?;
+
+        Ok(())
+    }
+}
diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs
index 3083a8e761..263ab24c70 100644
--- a/nexus/src/app/mod.rs
+++ b/nexus/src/app/mod.rs
@@ -44,6 +44,7 @@ pub(crate) mod background;
 mod bfd;
 mod bgp;
 mod certificate;
+mod crucible;
 mod deployment;
 mod device_auth;
 mod disk;
diff --git a/nexus/src/app/sagas/common_storage.rs b/nexus/src/app/sagas/common_storage.rs
index 51e9648592..1fe8d76783 100644
--- a/nexus/src/app/sagas/common_storage.rs
+++ b/nexus/src/app/sagas/common_storage.rs
@@ -7,740 +7,16 @@
 use super::*;
 
 use crate::Nexus;
-use anyhow::anyhow;
-use crucible_agent_client::{
-    types::{CreateRegion, RegionId, State as RegionState},
-    Client as CrucibleAgentClient,
-};
-use futures::StreamExt;
 use internal_dns::ServiceName;
 use nexus_db_queries::authz;
 use nexus_db_queries::context::OpContext;
 use nexus_db_queries::db;
 use nexus_db_queries::db::lookup::LookupPath;
 use omicron_common::api::external::Error;
-use omicron_common::backoff::{self, BackoffError};
 use omicron_common::retry_until_known_result;
-use slog::Logger;
 use std::net::SocketAddrV6;
 
-// Arbitrary limit on concurrency, for operations issued on multiple regions
-// within a disk at the same time.
-const MAX_CONCURRENT_REGION_REQUESTS: usize = 3;
-
-/// Call out to Crucible agent and perform region creation.
-pub(crate) async fn ensure_region_in_dataset(
-    log: &Logger,
-    dataset: &db::model::Dataset,
-    region: &db::model::Region,
-) -> Result<crucible_agent_client::types::Region, Error> {
-    let url = format!("http://{}", dataset.address());
-    let client = CrucibleAgentClient::new(&url);
-    let Ok(extent_count) = u32::try_from(region.extent_count()) else {
-        return Err(Error::internal_error(
-            "Extent count out of range for a u32",
-        ));
-    };
-    let region_request = CreateRegion {
-        block_size: region.block_size().to_bytes(),
-        extent_count,
-        extent_size: region.blocks_per_extent(),
-        // TODO: Can we avoid casting from UUID to string?
-        // NOTE: This'll require updating the crucible agent client.
-        id: RegionId(region.id().to_string()),
-        encrypted: region.encrypted(),
-        cert_pem: None,
-        key_pem: None,
-        root_pem: None,
-        source: None,
-    };
-
-    let create_region = || async {
-        let region = client
-            .region_create(&region_request)
-            .await
-            .map_err(|e| BackoffError::Permanent(e.into()))?;
-        match region.state {
-            RegionState::Requested => Err(BackoffError::transient(anyhow!(
-                "Region creation in progress"
-            ))),
-
-            RegionState::Created => Ok(region),
-
-            _ => Err(BackoffError::Permanent(anyhow!(
-                "Failed to create region, unexpected state: {:?}",
-                region.state
-            ))),
-        }
-    };
-
-    let log_create_failure = |_, delay| {
-        warn!(
-            log,
-            "Region requested, not yet created. Retrying in {:?}",
-            delay;
-            "region" => %region.id(),
-        );
-    };
-
-    let region = backoff::retry_notify(
-        backoff::retry_policy_internal_service(),
-        create_region,
-        log_create_failure,
-    )
-    .await
-    .map_err(|e| Error::internal_error(&e.to_string()))?;
-
-    Ok(region.into_inner())
-}
-
-pub(crate) async fn ensure_all_datasets_and_regions(
-    log: &Logger,
-    datasets_and_regions: Vec<(db::model::Dataset, db::model::Region)>,
-) -> Result<
-    Vec<(db::model::Dataset, crucible_agent_client::types::Region)>,
-    ActionError,
-> {
-    let request_count = datasets_and_regions.len();
-
-    // Allocate regions, and additionally return the dataset that the region was
-    // allocated in.
-    let datasets_and_regions: Vec<(
-        db::model::Dataset,
-        crucible_agent_client::types::Region,
-    )> = futures::stream::iter(datasets_and_regions)
-        .map(|(dataset, region)| async move {
-            match ensure_region_in_dataset(log, &dataset, &region).await {
-                Ok(result) => Ok((dataset, result)),
-                Err(e) => Err(e),
-            }
-        })
-        // Execute the allocation requests concurrently.
-        .buffer_unordered(std::cmp::min(
-            request_count,
-            MAX_CONCURRENT_REGION_REQUESTS,
-        ))
-        .collect::<Vec<
-            Result<
-                (db::model::Dataset, crucible_agent_client::types::Region),
-                Error,
-            >,
-        >>()
-        .await
-        .into_iter()
-        .collect::<Result<
-            Vec<(db::model::Dataset, crucible_agent_client::types::Region)>,
-            Error,
-        >>()
-        .map_err(ActionError::action_failed)?;
-
-    // Assert each region has the same block size, otherwise Volume creation
-    // will fail.
-    let all_region_have_same_block_size = datasets_and_regions
-        .windows(2)
-        .all(|w| w[0].1.block_size == w[1].1.block_size);
-
-    if !all_region_have_same_block_size {
-        return Err(ActionError::action_failed(Error::internal_error(
-            "volume creation will fail due to block size mismatch",
-        )));
-    }
-
-    Ok(datasets_and_regions)
-}
-
-pub(super) async fn delete_crucible_region(
-    log: &Logger,
-    client: &CrucibleAgentClient,
-    region_id: Uuid,
-) -> Result<(), Error> {
-    // If the region never existed, then a `GET` will return 404, and so will a
-    // `DELETE`. Catch this case, and return Ok if the region never existed.
-    // This can occur if an `ensure_all_datasets_and_regions` partially fails.
-    let result = retry_until_known_result(log, || async {
-        client.region_get(&RegionId(region_id.to_string())).await
-    })
-    .await;
-
-    if let Err(e) = result {
-        error!(
-            log,
-            "delete_crucible_region: region_get saw {:?}",
-            e;
-            "region_id" => %region_id,
-        );
-        match e {
-            crucible_agent_client::Error::ErrorResponse(rv) => {
-                match rv.status() {
-                    http::StatusCode::NOT_FOUND => {
-                        // Bail out here!
-                        return Ok(());
-                    }
-
-                    status if status.is_client_error() => {
-                        return Err(Error::invalid_request(&rv.message));
-                    }
-
-                    _ => {
-                        return Err(Error::internal_error(&rv.message));
-                    }
-                }
-            }
-
-            _ => {
-                return Err(Error::internal_error(
-                    "unexpected failure during `region_get`",
-                ));
-            }
-        }
-    }
-
-    // Past here, the region exists: ensure it is deleted.
-
-    retry_until_known_result(log, || async {
-        client.region_delete(&RegionId(region_id.to_string())).await
-    })
-    .await
-    .map_err(|e| {
-        error!(
-            log,
-            "delete_crucible_region: region_delete saw {:?}",
-            e;
-            "region_id" => %region_id,
-        );
-        match e {
-            crucible_agent_client::Error::ErrorResponse(rv) => {
-                match rv.status() {
-                    status if status.is_client_error() => {
-                        Error::invalid_request(&rv.message)
-                    }
-                    _ => Error::internal_error(&rv.message),
-                }
-            }
-            _ => Error::internal_error(
-                "unexpected failure during `region_delete`",
-            ),
-        }
-    })?;
-
-    #[derive(Debug, thiserror::Error)]
-    enum WaitError {
-        #[error("Transient error: {0}")]
-        Transient(#[from] anyhow::Error),
-
-        #[error("Permanent error: {0}")]
-        Permanent(#[from] Error),
-    }
-
-    // `region_delete` is only a request: wait until the region is
-    // deleted
-    backoff::retry_notify(
-        backoff::retry_policy_internal_service_aggressive(),
-        || async {
-            let region = retry_until_known_result(log, || async {
-                client.region_get(&RegionId(region_id.to_string())).await
-            })
-            .await
-            .map_err(|e| {
-                error!(
-                    log,
-                    "delete_crucible_region: region_get saw {:?}",
-                    e;
-                    "region_id" => %region_id,
-                );
-
-                match e {
-                    crucible_agent_client::Error::ErrorResponse(rv) => {
-                        match rv.status() {
-                            status if status.is_client_error() => {
-                                BackoffError::Permanent(WaitError::Permanent(
-                                    Error::invalid_request(&rv.message),
-                                ))
-                            }
-                            _ => BackoffError::Permanent(WaitError::Permanent(
-                                Error::internal_error(&rv.message),
-                            )),
-                        }
-                    }
-                    _ => BackoffError::Permanent(WaitError::Permanent(
-                        Error::internal_error(
-                            "unexpected failure during `region_get`",
-                        ),
-                    )),
-                }
-            })?;
-
-            match region.state {
-                RegionState::Tombstoned => Err(BackoffError::transient(
-                    WaitError::Transient(anyhow!("region not deleted yet")),
-                )),
-
-                RegionState::Destroyed => {
-                    info!(
-                        log,
-                        "region deleted";
-                        "region_id" => %region_id,
-                    );
-
-                    Ok(())
-                }
-
-                _ => Err(BackoffError::transient(WaitError::Transient(
-                    anyhow!("region unexpected state {:?}", region.state),
-                ))),
-            }
-        },
-        |e: WaitError, delay| {
-            info!(
-                log,
-                "{:?}, trying again in {:?}",
-                e,
-                delay;
-                "region_id" => %region_id,
-            );
-        },
-    )
-    .await
-    .map_err(|e| match e {
-        WaitError::Transient(e) => {
-            // The backoff crate can be configured with a maximum elapsed time
-            // before giving up, which means that Transient could be returned
-            // here. Our current policies do **not** set this though.
-            Error::internal_error(&e.to_string())
-        }
-
-        WaitError::Permanent(e) => e,
-    })
-}
-
-// Given a list of datasets and regions, send DELETE calls to the datasets
-// corresponding Crucible Agent for each region.
-pub(super) async fn delete_crucible_regions(
-    log: &Logger,
-    datasets_and_regions: Vec<(db::model::Dataset, db::model::Region)>,
-) -> Result<(), Error> {
-    let request_count = datasets_and_regions.len();
-    if request_count == 0 {
-        return Ok(());
-    }
-
-    futures::stream::iter(datasets_and_regions)
-        .map(|(dataset, region)| async move {
-            let url = format!("http://{}", dataset.address());
-            let client = CrucibleAgentClient::new(&url);
-
-            delete_crucible_region(&log, &client, region.id()).await
-        })
-        // Execute the requests concurrently.
-        .buffer_unordered(std::cmp::min(
-            request_count,
-            MAX_CONCURRENT_REGION_REQUESTS,
-        ))
-        .collect::<Vec<Result<_, _>>>()
-        .await
-        .into_iter()
-        .collect::<Result<Vec<_>, _>>()?;
-
-    Ok(())
-}
-
-pub(super) async fn delete_crucible_running_snapshot(
-    log: &Logger,
-    client: &CrucibleAgentClient,
-    region_id: Uuid,
-    snapshot_id: Uuid,
-) -> Result<(), Error> {
-    // delete running snapshot
-    retry_until_known_result(log, || async {
-        client
-            .region_delete_running_snapshot(
-                &RegionId(region_id.to_string()),
-                &snapshot_id.to_string(),
-            )
-            .await
-    })
-    .await
-    .map_err(|e| {
-        error!(
-            log,
-            "delete_crucible_running_snapshot: region_delete_running_snapshot saw {:?}",
-            e;
-            "region_id" => %region_id,
-            "snapshot_id" => %snapshot_id,
-        );
-        match e {
-            crucible_agent_client::Error::ErrorResponse(rv) => {
-                match rv.status() {
-                    status if status.is_client_error() => {
-                        Error::invalid_request(&rv.message)
-                    }
-                    _ => Error::internal_error(&rv.message),
-                }
-            }
-            _ => Error::internal_error(
-                "unexpected failure during `region_delete_running_snapshot`",
-            ),
-        }
-    })?;
-
-    #[derive(Debug, thiserror::Error)]
-    enum WaitError {
-        #[error("Transient error: {0}")]
-        Transient(#[from] anyhow::Error),
-
-        #[error("Permanent error: {0}")]
-        Permanent(#[from] Error),
-    }
-
-    // `region_delete_running_snapshot` is only a request: wait until
-    // running snapshot is deleted
-    backoff::retry_notify(
-        backoff::retry_policy_internal_service_aggressive(),
-        || async {
-            let snapshot = retry_until_known_result(log, || async {
-                    client.region_get_snapshots(
-                        &RegionId(region_id.to_string()),
-                    ).await
-                })
-                .await
-                .map_err(|e| {
-                    error!(
-                        log,
-                        "delete_crucible_running_snapshot: region_get_snapshots saw {:?}",
-                        e;
-                        "region_id" => %region_id,
-                        "snapshot_id" => %snapshot_id,
-                    );
-
-                    match e {
-                        crucible_agent_client::Error::ErrorResponse(rv) => {
-                            match rv.status() {
-                                status if status.is_client_error() => {
-                                    BackoffError::Permanent(
-                                        WaitError::Permanent(
-                                            Error::invalid_request(&rv.message)
-                                        )
-                                    )
-                                }
-                                _ => BackoffError::Permanent(
-                                    WaitError::Permanent(
-                                        Error::internal_error(&rv.message)
-                                    )
-                                )
-                            }
-                        }
-                        _ => BackoffError::Permanent(
-                            WaitError::Permanent(
-                                Error::internal_error(
-                                    "unexpected failure during `region_get_snapshots`",
-                                )
-                            )
-                        )
-                    }
-                })?;
-
-            match snapshot.running_snapshots.get(&snapshot_id.to_string()) {
-                Some(running_snapshot) => {
-                    info!(
-                        log,
-                        "running_snapshot is Some, state is {}",
-                        running_snapshot.state.to_string();
-                        "region_id" => %region_id,
-                        "snapshot_id" => %snapshot_id,
-                    );
-
-                    match running_snapshot.state {
-                        RegionState::Tombstoned => {
-                            Err(BackoffError::transient(
-                                WaitError::Transient(anyhow!(
-                                    "running_snapshot tombstoned, not deleted yet",
-                                )
-                            )))
-                        }
-
-                        RegionState::Destroyed => {
-                            info!(
-                                log,
-                                "running_snapshot deleted",
-                            );
-
-                            Ok(())
-                        }
-
-                        _ => {
-                            Err(BackoffError::transient(
-                                WaitError::Transient(anyhow!(
-                                    "running_snapshot unexpected state",
-                                )
-                            )))
-                        }
-                    }
-                }
-
-                None => {
-                    // deleted?
-                    info!(
-                        log,
-                        "running_snapshot is None";
-                        "region_id" => %region_id,
-                        "snapshot_id" => %snapshot_id,
-                    );
-
-                    // break here - it's possible that the running snapshot
-                    // record was GCed, and it won't come back.
-                    Ok(())
-                }
-            }
-        },
-        |e: WaitError, delay| {
-            info!(
-                log,
-                "{:?}, trying again in {:?}",
-                e,
-                delay;
-                "region_id" => %region_id,
-                "snapshot_id" => %snapshot_id,
-            );
-        }
-    )
-    .await
-    .map_err(|e| match e {
-        WaitError::Transient(e) => {
-            // The backoff crate can be configured with a maximum elapsed time
-            // before giving up, which means that Transient could be returned
-            // here. Our current policies do **not** set this though.
-            Error::internal_error(&e.to_string())
-        }
-
-        WaitError::Permanent(e) => {
-            e
-        }
-    })
-}
-
-pub(super) async fn delete_crucible_snapshot(
-    log: &Logger,
-    client: &CrucibleAgentClient,
-    region_id: Uuid,
-    snapshot_id: Uuid,
-) -> Result<(), Error> {
-    // Unlike other Crucible agent endpoints, this one is synchronous in that it
-    // is not only a request to the Crucible agent: `zfs destroy` is performed
-    // right away. However this is still a request to illumos that may not take
-    // effect right away. Wait until the snapshot no longer appears in the list
-    // of region snapshots, meaning it was not returned from `zfs list`.
-
-    info!(log, "deleting region {region_id} snapshot {snapshot_id}");
-
-    retry_until_known_result(log, || async {
-        client
-            .region_delete_snapshot(
-                &RegionId(region_id.to_string()),
-                &snapshot_id.to_string(),
-            )
-            .await
-    })
-    .await
-    .map_err(|e| {
-        error!(
-            log,
-            "delete_crucible_snapshot: region_delete_snapshot saw {:?}",
-            e;
-            "region_id" => %region_id,
-            "snapshot_id" => %snapshot_id,
-        );
-        match e {
-            crucible_agent_client::Error::ErrorResponse(rv) => {
-                match rv.status() {
-                    status if status.is_client_error() => {
-                        Error::invalid_request(&rv.message)
-                    }
-                    _ => Error::internal_error(&rv.message),
-                }
-            }
-            _ => Error::internal_error(
-                "unexpected failure during `region_delete_snapshot`",
-            ),
-        }
-    })?;
-
-    #[derive(Debug, thiserror::Error)]
-    enum WaitError {
-        #[error("Transient error: {0}")]
-        Transient(#[from] anyhow::Error),
-
-        #[error("Permanent error: {0}")]
-        Permanent(#[from] Error),
-    }
-
-    backoff::retry_notify(
-        backoff::retry_policy_internal_service_aggressive(),
-        || async {
-            let response = retry_until_known_result(log, || async {
-                client
-                    .region_get_snapshots(&RegionId(region_id.to_string()))
-                    .await
-            })
-            .await
-            .map_err(|e| {
-                error!(
-                    log,
-                    "delete_crucible_snapshot: region_get_snapshots saw {:?}",
-                    e;
-                    "region_id" => %region_id,
-                    "snapshot_id" => %snapshot_id,
-                );
-                match e {
-                    crucible_agent_client::Error::ErrorResponse(rv) => {
-                        match rv.status() {
-                            status if status.is_client_error() => {
-                                BackoffError::Permanent(WaitError::Permanent(
-                                    Error::invalid_request(&rv.message),
-                                ))
-                            }
-                            _ => BackoffError::Permanent(WaitError::Permanent(
-                                Error::internal_error(&rv.message),
-                            )),
-                        }
-                    }
-                    _ => BackoffError::Permanent(WaitError::Permanent(
-                        Error::internal_error(
-                            "unexpected failure during `region_get_snapshots`",
-                        ),
-                    )),
-                }
-            })?;
-
-            if response
-                .snapshots
-                .iter()
-                .any(|x| x.name == snapshot_id.to_string())
-            {
-                info!(
-                    log,
-                    "snapshot still exists, waiting";
-                    "region_id" => %region_id,
-                    "snapshot_id" => %snapshot_id,
-                );
-
-                Err(BackoffError::transient(WaitError::Transient(anyhow!(
-                    "snapshot not deleted yet",
-                ))))
-            } else {
-                info!(
-                    log,
-                    "snapshot deleted";
-                    "region_id" => %region_id,
-                    "snapshot_id" => %snapshot_id,
-                );
-
-                Ok(())
-            }
-        },
-        |e: WaitError, delay| {
-            info!(
-                log,
-                "{:?}, trying again in {:?}",
-                e,
-                delay;
-                "region_id" => %region_id,
-                "snapshot_id" => %snapshot_id,
-            );
-        },
-    )
-    .await
-    .map_err(|e| match e {
-        WaitError::Transient(e) => {
-            // The backoff crate can be configured with a maximum elapsed time
-            // before giving up, which means that Transient could be returned
-            // here. Our current policies do **not** set this though.
-            Error::internal_error(&e.to_string())
-        }
-
-        WaitError::Permanent(e) => e,
-    })
-}
-
-// Given a list of datasets and region snapshots, send DELETE calls to the
-// datasets corresponding Crucible Agent for each snapshot.
-pub(super) async fn delete_crucible_snapshots(
-    log: &Logger,
-    datasets_and_snapshots: Vec<(
-        db::model::Dataset,
-        db::model::RegionSnapshot,
-    )>,
-) -> Result<(), Error> {
-    let request_count = datasets_and_snapshots.len();
-    if request_count == 0 {
-        return Ok(());
-    }
-
-    futures::stream::iter(datasets_and_snapshots)
-        .map(|(dataset, region_snapshot)| async move {
-            let url = format!("http://{}", dataset.address());
-            let client = CrucibleAgentClient::new(&url);
-
-            delete_crucible_snapshot(
-                &log,
-                &client,
-                region_snapshot.region_id,
-                region_snapshot.snapshot_id,
-            )
-            .await
-        })
-        // Execute the requests concurrently.
-        .buffer_unordered(std::cmp::min(
-            request_count,
-            MAX_CONCURRENT_REGION_REQUESTS,
-        ))
-        .collect::<Vec<Result<(), Error>>>()
-        .await
-        .into_iter()
-        .collect::<Result<Vec<_>, _>>()?;
-
-    Ok(())
-}
-
-// Given a list of datasets and region snapshots, send DELETE calls to the
-// datasets corresponding Crucible Agent for each running read-only downstairs
-// corresponding to the snapshot.
-pub(super) async fn delete_crucible_running_snapshots(
-    log: &Logger,
-    datasets_and_snapshots: Vec<(
-        db::model::Dataset,
-        db::model::RegionSnapshot,
-    )>,
-) -> Result<(), Error> {
-    let request_count = datasets_and_snapshots.len();
-    if request_count == 0 {
-        return Ok(());
-    }
-
-    futures::stream::iter(datasets_and_snapshots)
-        .map(|(dataset, region_snapshot)| async move {
-            let url = format!("http://{}", dataset.address());
-            let client = CrucibleAgentClient::new(&url);
-
-            delete_crucible_running_snapshot(
-                &log,
-                &client,
-                region_snapshot.region_id,
-                region_snapshot.snapshot_id,
-            )
-            .await
-        })
-        // Execute the requests concurrently.
-        .buffer_unordered(std::cmp::min(
-            request_count,
-            MAX_CONCURRENT_REGION_REQUESTS,
-        ))
-        .collect::<Vec<Result<(), Error>>>()
-        .await
-        .into_iter()
-        .collect::<Result<Vec<_>, _>>()?;
-
-    Ok(())
-}
+// Common Pantry operations
 
 pub(crate) async fn get_pantry_address(
     nexus: &Arc<Nexus>,
@@ -754,8 +30,6 @@ pub(crate) async fn get_pantry_address(
         .map_err(ActionError::action_failed)
 }
 
-// Common Pantry operations
-
 pub(crate) async fn call_pantry_attach_for_disk(
     log: &slog::Logger,
     opctx: &OpContext,
diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs
index ee90f72862..ff0cc63d00 100644
--- a/nexus/src/app/sagas/disk_create.rs
+++ b/nexus/src/app/sagas/disk_create.rs
@@ -5,7 +5,6 @@
 use super::{
     common_storage::{
         call_pantry_attach_for_disk, call_pantry_detach_for_disk,
-        delete_crucible_regions, ensure_all_datasets_and_regions,
         get_pantry_address,
     },
     ActionRegistry, NexusActionContext, NexusSaga, SagaInitError,
@@ -346,16 +345,20 @@ async fn sdc_noop(_sagactx: NexusActionContext) -> Result<(), ActionError> {
 async fn sdc_regions_ensure(
     sagactx: NexusActionContext,
 ) -> Result<String, ActionError> {
-    let log = sagactx.user_data().log();
+    let osagactx = sagactx.user_data();
+    let log = osagactx.log();
     let disk_id = sagactx.lookup::<Uuid>("disk_id")?;
 
-    let datasets_and_regions = ensure_all_datasets_and_regions(
-        &log,
-        sagactx.lookup::<Vec<(db::model::Dataset, db::model::Region)>>(
-            "datasets_and_regions",
-        )?,
-    )
-    .await?;
+    let datasets_and_regions = osagactx
+        .nexus()
+        .ensure_all_datasets_and_regions(
+            &log,
+            sagactx.lookup::<Vec<(db::model::Dataset, db::model::Region)>>(
+                "datasets_and_regions",
+            )?,
+        )
+        .await
+        .map_err(ActionError::action_failed)?;
 
     let block_size = datasets_and_regions[0].1.block_size;
     let blocks_per_extent = datasets_and_regions[0].1.extent_size;
@@ -551,13 +554,15 @@ async fn sdc_regions_ensure_undo(
 
     warn!(log, "sdc_regions_ensure_undo: Deleting crucible regions");
 
-    let result = delete_crucible_regions(
-        log,
-        sagactx.lookup::<Vec<(db::model::Dataset, db::model::Region)>>(
-            "datasets_and_regions",
-        )?,
-    )
-    .await;
+    let result = osagactx
+        .nexus()
+        .delete_crucible_regions(
+            log,
+            sagactx.lookup::<Vec<(db::model::Dataset, db::model::Region)>>(
+                "datasets_and_regions",
+            )?,
+        )
+        .await;
 
     match result {
         Err(e) => {
diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs
index cca589b758..f8b56b3522 100644
--- a/nexus/src/app/sagas/snapshot_create.rs
+++ b/nexus/src/app/sagas/snapshot_create.rs
@@ -91,8 +91,6 @@
 use super::{
     common_storage::{
         call_pantry_attach_for_disk, call_pantry_detach_for_disk,
-        delete_crucible_regions, delete_crucible_running_snapshot,
-        delete_crucible_snapshot, ensure_all_datasets_and_regions,
         get_pantry_address,
     },
     ActionRegistry, NexusActionContext, NexusSaga, SagaInitError,
@@ -381,13 +379,16 @@ async fn ssc_regions_ensure(
     let destination_volume_id =
         sagactx.lookup::<Uuid>("destination_volume_id")?;
 
-    let datasets_and_regions = ensure_all_datasets_and_regions(
-        &log,
-        sagactx.lookup::<Vec<(db::model::Dataset, db::model::Region)>>(
-            "datasets_and_regions",
-        )?,
-    )
-    .await?;
+    let datasets_and_regions = osagactx
+        .nexus()
+        .ensure_all_datasets_and_regions(
+            &log,
+            sagactx.lookup::<Vec<(db::model::Dataset, db::model::Region)>>(
+                "datasets_and_regions",
+            )?,
+        )
+        .await
+        .map_err(ActionError::action_failed)?;
 
     let block_size = datasets_and_regions[0].1.block_size;
     let blocks_per_extent = datasets_and_regions[0].1.extent_size;
@@ -459,15 +460,18 @@ async fn ssc_regions_ensure(
 async fn ssc_regions_ensure_undo(
     sagactx: NexusActionContext,
 ) -> Result<(), anyhow::Error> {
-    let log = sagactx.user_data().log();
+    let osagactx = sagactx.user_data();
+    let log = osagactx.log();
     warn!(log, "ssc_regions_ensure_undo: Deleting crucible regions");
-    delete_crucible_regions(
-        log,
-        sagactx.lookup::<Vec<(db::model::Dataset, db::model::Region)>>(
-            "datasets_and_regions",
-        )?,
-    )
-    .await?;
+    osagactx
+        .nexus()
+        .delete_crucible_regions(
+            log,
+            sagactx.lookup::<Vec<(db::model::Dataset, db::model::Region)>>(
+                "datasets_and_regions",
+            )?,
+        )
+        .await?;
     info!(log, "ssc_regions_ensure_undo: Deleted crucible regions");
     Ok(())
 }
@@ -765,10 +769,9 @@ async fn ssc_send_snapshot_request_to_sled_agent_undo(
 
     // ... and instruct each of those regions to delete the snapshot.
     for (dataset, region) in datasets_and_regions {
-        let url = format!("http://{}", dataset.address());
-        let client = CrucibleAgentClient::new(&url);
-
-        delete_crucible_snapshot(log, &client, region.id(), snapshot_id)
+        osagactx
+            .nexus()
+            .delete_crucible_snapshot(log, &dataset, region.id(), snapshot_id)
             .await?;
     }
 
@@ -1091,10 +1094,9 @@ async fn ssc_call_pantry_snapshot_for_disk_undo(
 
     // ... and instruct each of those regions to delete the snapshot.
     for (dataset, region) in datasets_and_regions {
-        let url = format!("http://{}", dataset.address());
-        let client = CrucibleAgentClient::new(&url);
-
-        delete_crucible_snapshot(log, &client, region.id(), snapshot_id)
+        osagactx
+            .nexus()
+            .delete_crucible_snapshot(log, &dataset, region.id(), snapshot_id)
             .await?;
     }
     Ok(())
@@ -1351,16 +1353,15 @@ async fn ssc_start_running_snapshot_undo(
 
     // ... and instruct each of those regions to delete the running snapshot.
     for (dataset, region) in datasets_and_regions {
-        let url = format!("http://{}", dataset.address());
-        let client = CrucibleAgentClient::new(&url);
-
-        delete_crucible_running_snapshot(
-            &log,
-            &client,
-            region.id(),
-            snapshot_id,
-        )
-        .await?;
+        osagactx
+            .nexus()
+            .delete_crucible_running_snapshot(
+                &log,
+                &dataset,
+                region.id(),
+                snapshot_id,
+            )
+            .await?;
 
         osagactx
             .datastore()
diff --git a/nexus/src/app/sagas/volume_delete.rs b/nexus/src/app/sagas/volume_delete.rs
index 22425a0b99..bfd8e6616c 100644
--- a/nexus/src/app/sagas/volume_delete.rs
+++ b/nexus/src/app/sagas/volume_delete.rs
@@ -23,9 +23,6 @@
 //! resources, and when they are inserted or deleted the accounting needs to
 //! change. Saga nodes must be idempotent in order to work correctly.
 
-use super::common_storage::delete_crucible_regions;
-use super::common_storage::delete_crucible_running_snapshots;
-use super::common_storage::delete_crucible_snapshots;
 use super::ActionRegistry;
 use super::NexusActionContext;
 use super::NexusSaga;
@@ -45,17 +42,11 @@ pub(crate) struct Params {
     pub serialized_authn: authn::saga::Serialized,
     pub volume_id: Uuid,
 }
+
 // volume delete saga: actions
 
 declare_saga_actions! {
     volume_delete;
-    // TODO(https://github.com/oxidecomputer/omicron/issues/612):
-    //
-    // We need a way to deal with this operation failing, aside from
-    // propagating the error to the user.
-    //
-    // What if the Sled goes offline? Nexus must ultimately be
-    // responsible for reconciling this scenario.
     DECREASE_CRUCIBLE_RESOURCE_COUNT -> "crucible_resources_to_delete" {
         + svd_decrease_crucible_resource_count
     }
@@ -169,14 +160,16 @@ async fn svd_delete_crucible_regions(
             ))
         })?;
 
-    delete_crucible_regions(log, datasets_and_regions.clone()).await.map_err(
-        |e| {
+    osagactx
+        .nexus()
+        .delete_crucible_regions(log, datasets_and_regions.clone())
+        .await
+        .map_err(|e| {
             ActionError::action_failed(format!(
                 "failed to delete_crucible_regions: {:?}",
                 e,
             ))
-        },
-    )?;
+        })?;
 
     // Remove DB records
     let region_ids_to_delete =
@@ -226,7 +219,9 @@ async fn svd_delete_crucible_running_snapshots(
             ))
         })?;
 
-    delete_crucible_running_snapshots(log, datasets_and_snapshots.clone())
+    osagactx
+        .nexus()
+        .delete_crucible_running_snapshots(log, datasets_and_snapshots.clone())
         .await
         .map_err(|e| {
             ActionError::action_failed(format!(
@@ -267,7 +262,9 @@ async fn svd_delete_crucible_snapshots(
             ))
         })?;
 
-    delete_crucible_snapshots(log, datasets_and_snapshots.clone())
+    osagactx
+        .nexus()
+        .delete_crucible_snapshots(log, datasets_and_snapshots.clone())
         .await
         .map_err(|e| {
             ActionError::action_failed(format!(
@@ -439,7 +436,12 @@ async fn svd_delete_freed_crucible_regions(
         }
 
         // Send DELETE calls to the corresponding Crucible agents
-        delete_crucible_regions(log, vec![(dataset.clone(), region.clone())])
+        osagactx
+            .nexus()
+            .delete_crucible_regions(
+                log,
+                vec![(dataset.clone(), region.clone())],
+            )
             .await
             .map_err(|e| {
                 ActionError::action_failed(format!(
diff --git a/nexus/src/app/session.rs b/nexus/src/app/session.rs
index dd3665161a..fba4b2f0b7 100644
--- a/nexus/src/app/session.rs
+++ b/nexus/src/app/session.rs
@@ -157,9 +157,8 @@ impl super::Nexus {
                 | Error::InsufficientCapacity { .. }
                 | Error::TypeVersionMismatch { .. }
                 | Error::Conflict { .. }
-                | Error::NotFound { .. } => {
-                    Reason::UnknownError { source: error }
-                }
+                | Error::NotFound { .. }
+                | Error::Gone => Reason::UnknownError { source: error },
             })?;
         Ok(db_silo_user.silo_id)
     }
diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs
index ed4fd59277..78ebd83973 100644
--- a/nexus/tests/integration_tests/disks.rs
+++ b/nexus/tests/integration_tests/disks.rs
@@ -11,6 +11,7 @@ use dropshot::HttpErrorResponseBody;
 use http::method::Method;
 use http::StatusCode;
 use nexus_config::RegionAllocationStrategy;
+use nexus_db_model::PhysicalDiskPolicy;
 use nexus_db_queries::context::OpContext;
 use nexus_db_queries::db::datastore::REGION_REDUNDANCY_THRESHOLD;
 use nexus_db_queries::db::fixed_data::{silo::DEFAULT_SILO_ID, FLEET_ID};
@@ -39,11 +40,13 @@ use omicron_common::api::external::NameOrId;
 use omicron_nexus::app::{MAX_DISK_SIZE_BYTES, MIN_DISK_SIZE_BYTES};
 use omicron_nexus::Nexus;
 use omicron_nexus::TestInterfaces as _;
+use omicron_uuid_kinds::GenericUuid;
 use oximeter::types::Datum;
 use oximeter::types::Measurement;
 use sled_agent_client::TestInterfaces as _;
 use std::collections::HashSet;
 use std::sync::Arc;
+use tokio::sync::oneshot;
 use uuid::Uuid;
 
 type ControlPlaneTestContext =
@@ -2457,6 +2460,87 @@ async fn test_region_allocation_after_delete(
     assert_eq!(allocated_regions.len(), REGION_REDUNDANCY_THRESHOLD);
 }
 
+#[nexus_test]
+async fn test_no_halt_disk_delete_one_region_on_expunged_agent(
+    cptestctx: &ControlPlaneTestContext,
+) {
+    let nexus = &cptestctx.server.server_context().nexus;
+    let datastore = nexus.datastore();
+    let opctx =
+        OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone());
+
+    // Create the regular three 10 GiB zpools, each with one dataset.
+    let disk_test = DiskTest::new(&cptestctx).await;
+
+    // Create a disk
+    let client = &cptestctx.external_client;
+    let _project_id = create_project_and_pool(client).await;
+
+    let disk = create_disk(&client, PROJECT_NAME, DISK_NAME).await;
+
+    // Grab the db record now, before the delete
+    let (.., db_disk) = LookupPath::new(&opctx, datastore)
+        .disk_id(disk.identity.id)
+        .fetch()
+        .await
+        .unwrap();
+
+    // Choose one of the datasets, and drop the simulated Crucible agent
+    let zpool = &disk_test.zpools[0];
+    let dataset = &zpool.datasets[0];
+
+    cptestctx.sled_agent.sled_agent.drop_dataset(zpool.id, dataset.id).await;
+
+    // Spawn a task that tries to delete the disk
+    let disk_url = get_disk_url(DISK_NAME);
+    let client = client.clone();
+
+    let (task_started_tx, task_started_rx) = oneshot::channel();
+
+    let jh = tokio::spawn(async move {
+        task_started_tx.send(()).unwrap();
+
+        NexusRequest::object_delete(&client, &disk_url)
+            .authn_as(AuthnMode::PrivilegedUser)
+            .execute()
+            .await
+            .expect("failed to delete disk");
+    });
+
+    // Wait until the task starts
+    task_started_rx.await.unwrap();
+
+    // It won't finish until the dataset is expunged.
+    assert!(!jh.is_finished());
+
+    // Expunge the physical disk
+    let (_, db_zpool) = LookupPath::new(&opctx, datastore)
+        .zpool_id(zpool.id.into_untyped_uuid())
+        .fetch()
+        .await
+        .unwrap();
+
+    datastore
+        .physical_disk_update_policy(
+            &opctx,
+            db_zpool.physical_disk_id,
+            PhysicalDiskPolicy::Expunged,
+        )
+        .await
+        .unwrap();
+
+    // Now, the delete call will finish Ok
+    jh.await.unwrap();
+
+    // Ensure that the disk was properly deleted and all the regions are gone -
+    // Nexus should hard delete the region records in this case.
+
+    let datasets_and_regions =
+        datastore.get_allocated_regions(db_disk.volume_id).await.unwrap();
+
+    assert!(datasets_and_regions.is_empty());
+}
+
 async fn disk_get(client: &ClientTestContext, disk_url: &str) -> Disk {
     NexusRequest::object_get(client, disk_url)
         .authn_as(AuthnMode::PrivilegedUser)
diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs
index d91b9c9a33..e2a52bf983 100644
--- a/sled-agent/src/sim/sled_agent.rs
+++ b/sled-agent/src/sim/sled_agent.rs
@@ -854,4 +854,8 @@ impl SledAgent {
     ) {
         *self.fake_zones.lock().await = requested_zones;
     }
+
+    pub async fn drop_dataset(&self, zpool_id: ZpoolUuid, dataset_id: Uuid) {
+        self.storage.lock().await.drop_dataset(zpool_id, dataset_id)
+    }
 }
diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs
index 6a688f6101..dac2a4cb48 100644
--- a/sled-agent/src/sim/storage.rs
+++ b/sled-agent/src/sim/storage.rs
@@ -543,6 +543,10 @@ impl Zpool {
 
         None
     }
+
+    pub fn drop_dataset(&mut self, id: Uuid) {
+        let _ = self.datasets.remove(&id).expect("Failed to get the dataset");
+    }
 }
 
 /// Simulated representation of all storage on a sled.
@@ -642,6 +646,7 @@ impl Storage {
     pub fn zpools(&self) -> &HashMap<ZpoolUuid, Zpool> {
         &self.zpools
     }
+
     /// Adds a Dataset to the sled's simulated storage.
     pub async fn insert_dataset(
         &mut self,
@@ -757,6 +762,13 @@ impl Storage {
 
         None
     }
+
+    pub fn drop_dataset(&mut self, zpool_id: ZpoolUuid, dataset_id: Uuid) {
+        self.zpools
+            .get_mut(&zpool_id)
+            .expect("Zpool does not exist")
+            .drop_dataset(dataset_id)
+    }
 }
 
 /// Simulated crucible pantry

From d2af4e414d989f088e6078d81220fcfe6ec9e43e Mon Sep 17 00:00:00 2001
From: Sean Klein <sean@oxide.computer>
Date: Thu, 30 May 2024 09:34:42 -0700
Subject: [PATCH 17/28] [nexus] Remove zones on expunged disks (#5599)

Expunges zones for which the corresponding physical disk, holding
durable data, has been removed.

A follow-up PR will be necessary to expunge zones which have had their
transient zone filesystems removed,
but Nexus is not yet aware of where zone placement decisions are made.

Partial fix of #5372
---
 .../planning/src/blueprint_builder/builder.rs | 133 +++++++++++-------
 nexus/reconfigurator/planning/src/planner.rs  | 122 +++++++++++++---
 nexus/types/src/deployment/zone_type.rs       |  30 ++++
 3 files changed, 218 insertions(+), 67 deletions(-)

diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
index 7e98b3906d..c822e87a8f 100644
--- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
+++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
@@ -5,6 +5,7 @@
 //! Low-level facility for generating Blueprints
 
 use crate::ip_allocator::IpAllocator;
+use crate::planner::zone_needs_expungement;
 use crate::planner::ZoneExpungeReason;
 use anyhow::anyhow;
 use internal_dns::config::Host;
@@ -25,6 +26,7 @@ use nexus_types::deployment::DiskFilter;
 use nexus_types::deployment::OmicronZoneDataset;
 use nexus_types::deployment::OmicronZoneExternalFloatingIp;
 use nexus_types::deployment::PlanningInput;
+use nexus_types::deployment::SledDetails;
 use nexus_types::deployment::SledFilter;
 use nexus_types::deployment::SledResources;
 use nexus_types::deployment::ZpoolName;
@@ -351,33 +353,72 @@ impl<'a> BlueprintBuilder<'a> {
         self.comments.push(String::from(comment));
     }
 
-    /// Expunges all zones from a sled.
+    /// Expunges all zones requiring expungement from a sled.
     ///
     /// Returns a list of zone IDs expunged (excluding zones that were already
     /// expunged). If the list is empty, then the operation was a no-op.
-    pub(crate) fn expunge_all_zones_for_sled(
+    pub(crate) fn expunge_zones_for_sled(
         &mut self,
         sled_id: SledUuid,
-        reason: ZoneExpungeReason,
-    ) -> Result<BTreeSet<OmicronZoneUuid>, Error> {
+        sled_details: &SledDetails,
+    ) -> Result<BTreeMap<OmicronZoneUuid, ZoneExpungeReason>, Error> {
         let log = self.log.new(o!(
             "sled_id" => sled_id.to_string(),
         ));
 
         // Do any zones need to be marked expunged?
-        let mut zones_to_expunge = BTreeSet::new();
+        let mut zones_to_expunge = BTreeMap::new();
 
         let sled_zones = self.zones.current_sled_zones(sled_id);
-        for (z, state) in sled_zones {
+        for (zone_config, state) in sled_zones {
+            let zone_id = zone_config.id;
+            let log = log.new(o!(
+                "zone_id" => zone_id.to_string()
+            ));
+
+            let Some(reason) =
+                zone_needs_expungement(sled_details, zone_config)
+            else {
+                continue;
+            };
+
             let is_expunged =
-                is_already_expunged(z, state).map_err(|error| {
+                is_already_expunged(zone_config, state).map_err(|error| {
                     Error::Planner(anyhow!(error).context(format!(
                         "for sled {sled_id}, error computing zones to expunge"
                     )))
                 })?;
 
             if !is_expunged {
-                zones_to_expunge.insert(z.id);
+                match reason {
+                    ZoneExpungeReason::DiskExpunged => {
+                        info!(
+                            &log,
+                            "expunged disk with non-expunged zone was found"
+                        );
+                    }
+                    ZoneExpungeReason::SledDecommissioned => {
+                        // A sled marked as decommissioned should have no resources
+                        // allocated to it. If it does, it's an illegal state, possibly
+                        // introduced by a bug elsewhere in the system -- we need to
+                        // produce a loud warning (i.e. an ERROR-level log message) on
+                        // this, while still removing the zones.
+                        error!(
+                            &log,
+                            "sled has state Decommissioned, yet has zone \
+                             allocated to it; will expunge it"
+                        );
+                    }
+                    ZoneExpungeReason::SledExpunged => {
+                        // This is the expected situation.
+                        info!(
+                            &log,
+                            "expunged sled with non-expunged zone found"
+                        );
+                    }
+                }
+
+                zones_to_expunge.insert(zone_id, reason);
             }
         }
 
@@ -389,51 +430,43 @@ impl<'a> BlueprintBuilder<'a> {
             return Ok(zones_to_expunge);
         }
 
-        match reason {
-            ZoneExpungeReason::SledDecommissioned { policy } => {
-                // A sled marked as decommissioned should have no resources
-                // allocated to it. If it does, it's an illegal state, possibly
-                // introduced by a bug elsewhere in the system -- we need to
-                // produce a loud warning (i.e. an ERROR-level log message) on
-                // this, while still removing the zones.
-                error!(
-                    &log,
-                    "sled has state Decommissioned, yet has zones \
-                     allocated to it; will expunge them \
-                     (sled policy is \"{policy:?}\")"
-                );
-            }
-            ZoneExpungeReason::SledExpunged => {
-                // This is the expected situation.
-                info!(
-                    &log,
-                    "expunged sled with {} non-expunged zones found \
-                     (will expunge all zones)",
-                    zones_to_expunge.len()
-                );
-            }
-        }
-
         // Now expunge all the zones that need it.
         let change = self.zones.change_sled_zones(sled_id);
-        change.expunge_zones(zones_to_expunge.clone()).map_err(|error| {
-            anyhow!(error)
-                .context(format!("for sled {sled_id}, error expunging zones"))
-        })?;
-
-        // Finally, add a comment describing what happened.
-        let reason = match reason {
-            ZoneExpungeReason::SledDecommissioned { .. } => {
-                "sled state is decommissioned"
+        change
+            .expunge_zones(zones_to_expunge.keys().cloned().collect())
+            .map_err(|error| {
+                anyhow!(error).context(format!(
+                    "for sled {sled_id}, error expunging zones"
+                ))
+            })?;
+
+        // Finally, add comments describing what happened.
+        //
+        // Group the zones by their reason for expungement.
+        let mut count_disk_expunged = 0;
+        let mut count_sled_decommissioned = 0;
+        let mut count_sled_expunged = 0;
+        for reason in zones_to_expunge.values() {
+            match reason {
+                ZoneExpungeReason::DiskExpunged => count_disk_expunged += 1,
+                ZoneExpungeReason::SledDecommissioned => {
+                    count_sled_decommissioned += 1;
+                }
+                ZoneExpungeReason::SledExpunged => count_sled_expunged += 1,
+            };
+        }
+        let count_and_reason = [
+            (count_disk_expunged, "zone was using expunged disk"),
+            (count_sled_decommissioned, "sled state is decommissioned"),
+            (count_sled_expunged, "sled policy is expunged"),
+        ];
+        for (count, reason) in count_and_reason {
+            if count > 0 {
+                self.comment(format!(
+                    "sled {sled_id} ({reason}): {count} zones expunged",
+                ));
             }
-            ZoneExpungeReason::SledExpunged => "sled policy is expunged",
-        };
-
-        self.comment(format!(
-            "sled {} ({reason}): {} zones expunged",
-            sled_id,
-            zones_to_expunge.len(),
-        ));
+        }
 
         Ok(zones_to_expunge)
     }
diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs
index 6ed81cbb63..7f7b4f61ec 100644
--- a/nexus/reconfigurator/planning/src/planner.rs
+++ b/nexus/reconfigurator/planning/src/planner.rs
@@ -12,11 +12,13 @@ use crate::blueprint_builder::EnsureMultiple;
 use crate::blueprint_builder::Error;
 use crate::planner::omicron_zone_placement::PlacementError;
 use nexus_types::deployment::Blueprint;
+use nexus_types::deployment::BlueprintZoneConfig;
 use nexus_types::deployment::BlueprintZoneDisposition;
 use nexus_types::deployment::CockroachDbClusterVersion;
 use nexus_types::deployment::CockroachDbPreserveDowngrade;
 use nexus_types::deployment::CockroachDbSettings;
 use nexus_types::deployment::PlanningInput;
+use nexus_types::deployment::SledDetails;
 use nexus_types::deployment::SledFilter;
 use nexus_types::deployment::ZpoolFilter;
 use nexus_types::external_api::views::SledPolicy;
@@ -170,15 +172,8 @@ impl<'a> Planner<'a> {
         {
             commissioned_sled_ids.insert(sled_id);
 
-            // Does this sled need zone expungement based on the details?
-            let Some(reason) =
-                needs_zone_expungement(sled_details.state, sled_details.policy)
-            else {
-                continue;
-            };
-
-            // Perform the expungement.
-            self.blueprint.expunge_all_zones_for_sled(sled_id, reason)?;
+            // Perform the expungement, for any zones that might need it.
+            self.blueprint.expunge_zones_for_sled(sled_id, sled_details)?;
         }
 
         // Check for any decommissioned sleds (i.e., sleds for which our
@@ -558,7 +553,7 @@ impl<'a> Planner<'a> {
 
 /// Returns `Some(reason)` if the sled needs its zones to be expunged,
 /// based on the policy and state.
-fn needs_zone_expungement(
+fn sled_needs_all_zones_expunged(
     state: SledState,
     policy: SledPolicy,
 ) -> Option<ZoneExpungeReason> {
@@ -569,7 +564,7 @@ fn needs_zone_expungement(
             // an illegal state, but representable. If we see a sled in this
             // state, we should still expunge all zones in it, but parent code
             // should warn on it.
-            return Some(ZoneExpungeReason::SledDecommissioned { policy });
+            return Some(ZoneExpungeReason::SledDecommissioned);
         }
     }
 
@@ -579,13 +574,36 @@ fn needs_zone_expungement(
     }
 }
 
+pub(crate) fn zone_needs_expungement(
+    sled_details: &SledDetails,
+    zone_config: &BlueprintZoneConfig,
+) -> Option<ZoneExpungeReason> {
+    // Should we expunge the zone because the sled is gone?
+    if let Some(reason) =
+        sled_needs_all_zones_expunged(sled_details.state, sled_details.policy)
+    {
+        return Some(reason);
+    }
+
+    // Should we expunge the zone because durable storage is gone?
+    if let Some(durable_storage_zpool) = zone_config.zone_type.zpool() {
+        let zpool_id = durable_storage_zpool.id();
+        if !sled_details.resources.zpool_is_provisionable(&zpool_id) {
+            return Some(ZoneExpungeReason::DiskExpunged);
+        }
+    };
+
+    None
+}
+
 /// The reason a sled's zones need to be expunged.
 ///
 /// This is used only for introspection and logging -- it's not part of the
 /// logical flow.
-#[derive(Copy, Clone, Debug)]
+#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
 pub(crate) enum ZoneExpungeReason {
-    SledDecommissioned { policy: SledPolicy },
+    DiskExpunged,
+    SledDecommissioned,
     SledExpunged,
 }
 
@@ -611,6 +629,9 @@ mod test {
     use nexus_types::deployment::CockroachDbPreserveDowngrade;
     use nexus_types::deployment::CockroachDbSettings;
     use nexus_types::deployment::OmicronZoneNetworkResources;
+    use nexus_types::deployment::SledDisk;
+    use nexus_types::external_api::views::PhysicalDiskPolicy;
+    use nexus_types::external_api::views::PhysicalDiskState;
     use nexus_types::external_api::views::SledPolicy;
     use nexus_types::external_api::views::SledProvisionPolicy;
     use nexus_types::external_api::views::SledState;
@@ -1032,7 +1053,7 @@ mod test {
         // Make generated disk ids deterministic
         let mut disk_rng =
             TypedUuidRng::from_seed(TEST_NAME, "NewPhysicalDisks");
-        let mut new_sled_disk = |policy| nexus_types::deployment::SledDisk {
+        let mut new_sled_disk = |policy| SledDisk {
             disk_identity: DiskIdentity {
                 vendor: "test-vendor".to_string(),
                 serial: "test-serial".to_string(),
@@ -1040,7 +1061,7 @@ mod test {
             },
             disk_id: PhysicalDiskUuid::from(disk_rng.next()),
             policy,
-            state: nexus_types::external_api::views::PhysicalDiskState::Active,
+            state: PhysicalDiskState::Active,
         };
 
         let (_, sled_details) = builder.sleds_mut().iter_mut().next().unwrap();
@@ -1057,13 +1078,13 @@ mod test {
         for _ in 0..NEW_IN_SERVICE_DISKS {
             sled_details.resources.zpools.insert(
                 ZpoolUuid::from(zpool_rng.next()),
-                new_sled_disk(nexus_types::external_api::views::PhysicalDiskPolicy::InService),
+                new_sled_disk(PhysicalDiskPolicy::InService),
             );
         }
         for _ in 0..NEW_EXPUNGED_DISKS {
             sled_details.resources.zpools.insert(
                 ZpoolUuid::from(zpool_rng.next()),
-                new_sled_disk(nexus_types::external_api::views::PhysicalDiskPolicy::Expunged),
+                new_sled_disk(PhysicalDiskPolicy::Expunged),
             );
         }
 
@@ -1096,6 +1117,73 @@ mod test {
         logctx.cleanup_successful();
     }
 
+    #[test]
+    fn test_disk_expungement_removes_zones() {
+        static TEST_NAME: &str = "planner_disk_expungement_removes_zones";
+        let logctx = test_setup_log(TEST_NAME);
+
+        // Create an example system with a single sled
+        let (collection, input, blueprint1) =
+            example(&logctx.log, TEST_NAME, 1);
+
+        let mut builder = input.into_builder();
+
+        // Aside: Avoid churning on the quantity of Nexus zones - we're okay
+        // staying at one.
+        builder.policy_mut().target_nexus_zone_count = 1;
+
+        // The example system should be assigning crucible zones to each
+        // in-service disk. When we expunge one of these disks, the planner
+        // should remove the associated zone.
+        let (_, sled_details) = builder.sleds_mut().iter_mut().next().unwrap();
+        let (_, disk) =
+            sled_details.resources.zpools.iter_mut().next().unwrap();
+        disk.policy = PhysicalDiskPolicy::Expunged;
+
+        let input = builder.build();
+
+        let blueprint2 = Planner::new_based_on(
+            logctx.log.clone(),
+            &blueprint1,
+            &input,
+            "test: expunge a disk",
+            &collection,
+        )
+        .expect("failed to create planner")
+        .with_rng_seed((TEST_NAME, "bp2"))
+        .plan()
+        .expect("failed to plan");
+
+        let diff = blueprint2.diff_since_blueprint(&blueprint1);
+        println!("1 -> 2 (expunge a disk):\n{}", diff.display());
+        assert_eq!(diff.sleds_added.len(), 0);
+        assert_eq!(diff.sleds_removed.len(), 0);
+        assert_eq!(diff.sleds_modified.len(), 1);
+
+        // We should be removing a single zone, associated with the Crucible
+        // using that device.
+        assert_eq!(diff.zones.added.len(), 0);
+        assert_eq!(diff.zones.removed.len(), 0);
+        assert_eq!(diff.zones.modified.len(), 1);
+
+        let (_zone_id, modified_zones) =
+            diff.zones.modified.iter().next().unwrap();
+        assert_eq!(modified_zones.zones.len(), 1);
+        let modified_zone = &modified_zones.zones.first().unwrap().zone;
+        assert!(
+            matches!(modified_zone.kind(), ZoneKind::Crucible),
+            "Expected the modified zone to be a Crucible zone, but it was: {:?}",
+            modified_zone.kind()
+        );
+        assert_eq!(
+            modified_zone.disposition(),
+            BlueprintZoneDisposition::Expunged,
+            "Should have expunged this zone"
+        );
+
+        logctx.cleanup_successful();
+    }
+
     /// Check that the planner will skip non-provisionable sleds when allocating
     /// extra Nexus zones
     #[test]
diff --git a/nexus/types/src/deployment/zone_type.rs b/nexus/types/src/deployment/zone_type.rs
index 9f663015cd..5b14f1ee3c 100644
--- a/nexus/types/src/deployment/zone_type.rs
+++ b/nexus/types/src/deployment/zone_type.rs
@@ -33,6 +33,36 @@ pub enum BlueprintZoneType {
 }
 
 impl BlueprintZoneType {
+    /// Returns the zpool being used by this zone, if any.
+    pub fn zpool(&self) -> Option<&omicron_common::zpool_name::ZpoolName> {
+        match self {
+            BlueprintZoneType::ExternalDns(
+                blueprint_zone_type::ExternalDns { dataset, .. },
+            )
+            | BlueprintZoneType::Clickhouse(
+                blueprint_zone_type::Clickhouse { dataset, .. },
+            )
+            | BlueprintZoneType::ClickhouseKeeper(
+                blueprint_zone_type::ClickhouseKeeper { dataset, .. },
+            )
+            | BlueprintZoneType::CockroachDb(
+                blueprint_zone_type::CockroachDb { dataset, .. },
+            )
+            | BlueprintZoneType::Crucible(blueprint_zone_type::Crucible {
+                dataset,
+                ..
+            })
+            | BlueprintZoneType::InternalDns(
+                blueprint_zone_type::InternalDns { dataset, .. },
+            ) => Some(&dataset.pool_name),
+            BlueprintZoneType::BoundaryNtp(_)
+            | BlueprintZoneType::InternalNtp(_)
+            | BlueprintZoneType::Nexus(_)
+            | BlueprintZoneType::Oximeter(_)
+            | BlueprintZoneType::CruciblePantry(_) => None,
+        }
+    }
+
     pub fn external_networking(
         &self,
     ) -> Option<(OmicronZoneExternalIp, &NetworkInterface)> {

From 7cbb7da250e747b651d66e3edc0a5747d8faa567 Mon Sep 17 00:00:00 2001
From: Eliza Weisman <eliza@elizas.website>
Date: Thu, 30 May 2024 13:55:19 -0700
Subject: [PATCH 18/28] [nexus] add instance-updater lock (#5831)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In order to simplify the instance lifecycle state machine and ensure
that instance state updates are processed reliably, we intend to perform
instance state updates in a saga (which will be added in PR #5749). This
saga will require a notion of mutual exclusion between update sagas for
the same instance, in order to avoid race conditions like the following:

1. Sagas `S1` and `S2` start at the same time and observe the same
instance/VMM states, which indicate that the instance’s active VMM has
shut down
2. `S1` clears all the resources/provisioning counters and marks the
instance as `Stopped``
3. User restarts the instance
4. `S2` clears the same instance provisioning counters again

Presently, these races are avoided by the fact that instance state
updates are performed partially in `sled-agent`, which serves as an
"actor" with exclusive ownership over the state transition. Moving these
state transitions to Nexus requires introducing mutual exclusion.

This commit adds a distributed lock on instance state transitions to the
datastore. We add the following fields to the `instance` table:

- `updater_id`, which is the UUID of the saga currently holding the
update lock on the instance (or `NULL` if no saga has locked the
instance)
- `updater_gen`, a generation counter that is incremented each time the
lock is acquired by a new saga

Using these fields, we can add new datastore methods to try and acquire
an instance update lock by doing the following:

1. Generate a UUID for the saga, `saga_lock_id`. This will be performed
in the saga itself and isn't part of this PR.
2. Read the instance record and interpret the value of the `updater_id`
field as follows:
- `NULL`: lock not held, we can acquire it by incrementing the
`updater_gen` field and setting the `updater_id` field to the saga's
UUID.
- `updater_id == saga_id`: the saga already holds the lock, we can
proceed with the update.
- `updater_id != saga_id`: another saga holds the lock, we can't proceed
with the update. Fail the operation.
3. Attempt to write back the updated instance record with generation
incremented and the `updater_id` set to the saga's ID, conditional on
the `updater_gen` field being equal to the ID that was read when read
the instance record. This is equivalent to the atomic compare-and-swap
operation that one might use to implement a non-distributed lock in a
single address space.
- If this fails because the generation number is outdated, try again
(i.e. goto (2)).
  - If this succeeds, the lock was acquired successfully.

Additionally, we can add a method for unlocking an instance record by
clearing the `updater_id` field and incrementing the `updater_gen`. This
query is conditional on the `updater_id` field being equal to the saga's
UUID, to prevent cases where we accidentally unlock an instance that was
locked by a different saga.

Introducing this distributed lock is considered fairly safe, as it will
only ever be acquired in a saga, and the reverse action for the saga
action that acquires the lock will ensure that the lock is released if
the saga unwinds. Essentially, this is equivalent to a RAII guard
releasing a lock when a thread panics in a single-threaded Rust program.

Presently, none of these methods are actually used. The saga that uses
them will be added in PR #5749. I've factored out this change into its
own PR so that we can merge the foundation needed for that branch.
Hopefully this makes the diff a bit smaller and easier to review, as
well as decreasing merge conflict churn with the schema changes.
---
 nexus/db-model/src/instance.rs                |  22 +
 nexus/db-model/src/schema.rs                  |   2 +
 nexus/db-model/src/schema_versions.rs         |   3 +-
 nexus/db-queries/src/db/datastore/instance.rs | 544 ++++++++++++++++++
 nexus/db-queries/src/db/datastore/mod.rs      |   2 +-
 schema/crdb/add-instance-updater-lock/up.sql  |   5 +
 schema/crdb/dbinit.sql                        |  12 +-
 7 files changed, 586 insertions(+), 4 deletions(-)
 create mode 100644 schema/crdb/add-instance-updater-lock/up.sql

diff --git a/nexus/db-model/src/instance.rs b/nexus/db-model/src/instance.rs
index 286c68ac7c..8f110aff71 100644
--- a/nexus/db-model/src/instance.rs
+++ b/nexus/db-model/src/instance.rs
@@ -179,6 +179,24 @@ pub struct InstanceRuntimeState {
     /// This field is guarded by the instance's `gen`.
     #[diesel(column_name = migration_id)]
     pub migration_id: Option<Uuid>,
+
+    /// A UUID identifying the saga currently holding the update lock on this
+    /// instance. If this is [`None`] the instance is not locked. Otherwise, if
+    /// this is [`Some`], the instance is locked by the saga owning this UUID.
+    /// Note that this is not (presently) the UUID *of* the locking saga, but
+    /// rather, a UUID *generated by* that saga. Therefore, it may not be
+    /// useable to look up which saga holds the lock.
+    ///
+    /// This field is guarded by the instance's `updater_gen`
+    #[diesel(column_name = updater_id)]
+    pub updater_id: Option<Uuid>,
+
+    /// The generation number for the updater lock. This is updated whenever the
+    /// lock is acquired or released, and is used in attempts to set the
+    /// `updater_id` field to ensure that the snapshot which indicated that the
+    /// lock was not held is still valid when setting the lock ID.
+    #[diesel(column_name = updater_gen)]
+    pub updater_gen: Generation,
 }
 
 impl InstanceRuntimeState {
@@ -190,6 +208,8 @@ impl InstanceRuntimeState {
             dst_propolis_id: None,
             migration_id: None,
             gen: Generation::new(),
+            updater_gen: Generation::new(),
+            updater_id: None,
         }
     }
 }
@@ -213,6 +233,8 @@ impl From<omicron_common::api::internal::nexus::InstanceRuntimeState>
             propolis_id: state.propolis_id,
             dst_propolis_id: state.dst_propolis_id,
             migration_id: state.migration_id,
+            updater_gen: Generation::new(),
+            updater_id: None,
         }
     }
 }
diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs
index 94e699443c..22ef60483d 100644
--- a/nexus/db-model/src/schema.rs
+++ b/nexus/db-model/src/schema.rs
@@ -430,6 +430,8 @@ table! {
         active_propolis_id -> Nullable<Uuid>,
         target_propolis_id -> Nullable<Uuid>,
         migration_id -> Nullable<Uuid>,
+        updater_id -> Nullable<Uuid>,
+        updater_gen-> Int8,
     }
 }
 
diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs
index 75e1d7e440..b417570a6c 100644
--- a/nexus/db-model/src/schema_versions.rs
+++ b/nexus/db-model/src/schema_versions.rs
@@ -17,7 +17,7 @@ use std::collections::BTreeMap;
 ///
 /// This must be updated when you change the database schema.  Refer to
 /// schema/crdb/README.adoc in the root of this repository for details.
-pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(66, 0, 0);
+pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(67, 0, 0);
 
 /// List of all past database schema versions, in *reverse* order
 ///
@@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy<Vec<KnownVersion>> = Lazy::new(|| {
         // |  leaving the first copy as an example for the next person.
         // v
         // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"),
+        KnownVersion::new(67, "add-instance-updater-lock"),
         KnownVersion::new(66, "blueprint-crdb-preserve-downgrade"),
         KnownVersion::new(65, "region-replacement"),
         KnownVersion::new(64, "add-view-for-v2p-mappings"),
diff --git a/nexus/db-queries/src/db/datastore/instance.rs b/nexus/db-queries/src/db/datastore/instance.rs
index ce40e20501..60fd5c9dc3 100644
--- a/nexus/db-queries/src/db/datastore/instance.rs
+++ b/nexus/db-queries/src/db/datastore/instance.rs
@@ -18,6 +18,7 @@ use crate::db::error::public_error_from_diesel;
 use crate::db::error::ErrorHandler;
 use crate::db::identity::Resource;
 use crate::db::lookup::LookupPath;
+use crate::db::model::Generation;
 use crate::db::model::Instance;
 use crate::db::model::InstanceRuntimeState;
 use crate::db::model::Name;
@@ -26,6 +27,7 @@ use crate::db::model::Sled;
 use crate::db::model::Vmm;
 use crate::db::pagination::paginated;
 use crate::db::update_and_check::UpdateAndCheck;
+use crate::db::update_and_check::UpdateAndQueryResult;
 use crate::db::update_and_check::UpdateStatus;
 use async_bb8_diesel::AsyncRunQueryDsl;
 use chrono::Utc;
@@ -114,6 +116,29 @@ impl From<InstanceAndActiveVmm> for omicron_common::api::external::Instance {
     }
 }
 
+/// A token which represents that a saga holds the instance-updater lock on a
+/// particular instance.
+///
+/// This is returned by [`DataStore::instance_updater_lock`] if the lock is
+/// successfully acquired, and passed to [`DataStore::instance_updater_unlock`]
+/// when the lock is released.
+#[derive(Debug, serde::Serialize, serde::Deserialize)]
+pub struct UpdaterLock {
+    saga_lock_id: Uuid,
+    locked_gen: Generation,
+}
+
+/// Errors returned by [`DataStore::instance_updater_lock`].
+#[derive(Debug, thiserror::Error, PartialEq)]
+pub enum UpdaterLockError {
+    /// The instance was already locked by another saga.
+    #[error("instance already locked by another saga")]
+    AlreadyLocked,
+    /// An error occurred executing the query.
+    #[error("error locking instance: {0}")]
+    Query(#[from] Error),
+}
+
 impl DataStore {
     /// Idempotently insert a database record for an Instance
     ///
@@ -529,4 +554,523 @@ impl DataStore {
 
         Ok(())
     }
+
+    /// Attempts to lock an instance's record to apply state updates in an
+    /// instance-update saga, returning the state of the instance when the lock
+    /// was acquired.
+    ///
+    /// # Notes
+    ///
+    /// This method MUST only be called from the context of a saga! The
+    /// calling saga must ensure that the reverse action for the action that
+    /// acquires the lock must call [`DataStore::instance_updater_unlock`] to
+    /// ensure that the lock is always released if the saga unwinds.
+    ///
+    /// This method is idempotent: if the instance is already locked by the same
+    /// saga, it will succeed, as though the lock was acquired.
+    ///
+    /// # Arguments
+    ///
+    /// - `authz_instance`: the instance to attempt to lock to lock
+    /// - `saga_lock_id`: the UUID of the saga that's attempting to lock this
+    ///   instance.
+    ///
+    /// # Returns
+    ///
+    /// - [`Ok`]`(`[`UpdaterLock`]`)` if the lock was acquired.
+    /// - [`Err`]`([`UpdaterLockError::AlreadyLocked`])` if the instance was
+    ///   locked by another saga.
+    /// - [`Err`]`([`UpdaterLockError::Query`]`(...))` if the query to fetch
+    ///   the instance or lock it returned another error (such as if the
+    ///   instance no longer exists, or if the database connection failed).
+    pub async fn instance_updater_lock(
+        &self,
+        opctx: &OpContext,
+        authz_instance: &authz::Instance,
+        saga_lock_id: Uuid,
+    ) -> Result<UpdaterLock, UpdaterLockError> {
+        use db::schema::instance::dsl;
+
+        let mut instance = self.instance_refetch(opctx, authz_instance).await?;
+        let instance_id = instance.id();
+        // `true` if the instance was locked by *this* call to
+        // `instance_updater_lock`, *false* in the (rare) case that it was
+        // previously locked by *this* saga's ID. This is used only for logging,
+        // as this method is idempotent --- if the instance's current updater ID
+        // matches the provided saga ID, this method completes successfully.
+        //
+        // XXX(eliza): I *think* this is the right behavior for sagas, since
+        // saga actions are expected to be idempotent...but it also means that a
+        // UUID collision would allow two sagas to lock the instance. But...(1)
+        // a UUID collision is extremely unlikely, and (2), if a UUID collision
+        // *did* occur, the odds are even lower that the same UUID would
+        // assigned to two instance-update sagas which both try to update the
+        // *same* instance at the same time. So, idempotency is probably more
+        // important than handling that extremely unlikely edge case.
+        let mut did_lock = false;
+        loop {
+            match instance.runtime_state.updater_id {
+                // If the `updater_id` field is not null and the ID equals this
+                // saga's ID, we already have the lock. We're done here!
+                Some(lock_id) if lock_id == saga_lock_id => {
+                    slog::info!(
+                        &opctx.log,
+                        "instance updater lock acquired!";
+                        "instance_id" => %instance_id,
+                        "saga_id" => %saga_lock_id,
+                        "already_locked" => !did_lock,
+                    );
+                    return Ok(UpdaterLock {
+                        saga_lock_id,
+                        locked_gen: instance.runtime_state.updater_gen,
+                    });
+                }
+                // The `updater_id` field is set, but it's not our ID. The instance
+                // is locked by a different saga, so give up.
+                Some(lock_id) => {
+                    slog::info!(
+                        &opctx.log,
+                        "instance is locked by another saga";
+                        "instance_id" => %instance_id,
+                        "locked_by" => %lock_id,
+                        "saga_id" => %saga_lock_id,
+                    );
+                    return Err(UpdaterLockError::AlreadyLocked);
+                }
+                // No saga's ID is set as the instance's `updater_id`. We can
+                // attempt to lock it.
+                None => {}
+            }
+
+            // Okay, now attempt to acquire the lock
+            let current_gen = instance.runtime_state.updater_gen;
+            slog::debug!(
+                &opctx.log,
+                "attempting to acquire instance updater lock";
+                "instance_id" => %instance_id,
+                "saga_id" => %saga_lock_id,
+                "current_gen" => ?current_gen,
+            );
+
+            (instance, did_lock) = diesel::update(dsl::instance)
+                .filter(dsl::time_deleted.is_null())
+                .filter(dsl::id.eq(instance_id))
+                // If the generation is the same as the captured generation when we
+                // read the instance record to check if it was not locked, we can
+                // lock this instance. This is because changing the `updater_id`
+                // field always increments the generation number. Therefore, we
+                // want the update query to succeed if and only if the
+                // generation number remains the same as the generation when we
+                // last fetched the instance. This query is used equivalently to
+                // an atomic compare-and-swap instruction in the implementation
+                // of a non-distributed, single-process mutex.
+                .filter(dsl::updater_gen.eq(current_gen))
+                .set((
+                    dsl::updater_gen.eq(dsl::updater_gen + 1),
+                    dsl::updater_id.eq(Some(saga_lock_id)),
+                ))
+                .check_if_exists::<Instance>(instance_id)
+                .execute_and_check(
+                    &*self.pool_connection_authorized(opctx).await?,
+                )
+                .await
+                .map(|r| {
+                    // If we successfully updated the instance record, we have
+                    // acquired the lock; otherwise, we haven't --- either because
+                    // our generation is stale, or because the instance is already locked.
+                    let locked = match r.status {
+                        UpdateStatus::Updated => true,
+                        UpdateStatus::NotUpdatedButExists => false,
+                    };
+                    (r.found, locked)
+                })
+                .map_err(|e| {
+                    public_error_from_diesel(
+                        e,
+                        ErrorHandler::NotFoundByLookup(
+                            ResourceType::Instance,
+                            LookupType::ById(instance_id),
+                        ),
+                    )
+                })?;
+        }
+    }
+
+    /// Release the instance-updater lock acquired by
+    /// [`DataStore::instance_updater_lock`].
+    ///
+    /// This method will unlock the instance if (and only if) the lock is
+    /// currently held by the provided `saga_lock_id`. If the lock is held by a
+    /// different saga UUID, the instance will remain locked. If the instance
+    /// has already been unlocked, this method will return `false`.
+    ///
+    /// # Arguments
+    ///
+    /// - `authz_instance`: the instance to attempt to unlock
+    /// - `updater_lock`: an [`UpdaterLock`] token representing the acquired
+    ///   lock to release.
+    pub async fn instance_updater_unlock(
+        &self,
+        opctx: &OpContext,
+        authz_instance: &authz::Instance,
+        UpdaterLock { saga_lock_id, locked_gen }: UpdaterLock,
+    ) -> Result<bool, Error> {
+        use db::schema::instance::dsl;
+
+        let instance_id = authz_instance.id();
+
+        let result = diesel::update(dsl::instance)
+            .filter(dsl::time_deleted.is_null())
+            .filter(dsl::id.eq(instance_id))
+            // Only unlock the instance if:
+            // - the provided updater ID matches that of the saga that has
+            //   currently locked this instance.
+            .filter(dsl::updater_id.eq(Some(saga_lock_id)))
+            // - the provided updater generation matches the current updater
+            //   generation.
+            .filter(dsl::updater_gen.eq(locked_gen))
+            .set((
+                dsl::updater_gen.eq(Generation(locked_gen.0.next())),
+                dsl::updater_id.eq(None::<Uuid>),
+            ))
+            .check_if_exists::<Instance>(instance_id)
+            .execute_and_check(&*self.pool_connection_authorized(opctx).await?)
+            .await
+            .map_err(|e| {
+                public_error_from_diesel(
+                    e,
+                    ErrorHandler::NotFoundByLookup(
+                        ResourceType::Instance,
+                        LookupType::ById(instance_id),
+                    ),
+                )
+            })?;
+
+        match result {
+            // If we updated the record, the lock has been released! Return
+            // `Ok(true)` to indicate that we released the lock successfully.
+            UpdateAndQueryResult { status: UpdateStatus::Updated, .. } => {
+                Ok(true)
+            }
+            // The generation has advanced past the generation at which the
+            // lock was held. This means that we have already released the
+            // lock. Return `Ok(false)` here for idempotency.
+            UpdateAndQueryResult {
+                status: UpdateStatus::NotUpdatedButExists,
+                ref found,
+            } if found.runtime_state.updater_gen > locked_gen => Ok(false),
+            // The instance exists, but the lock ID doesn't match our lock ID.
+            // This means we were trying to release a lock we never held, whcih
+            // is almost certainly a programmer error.
+            UpdateAndQueryResult { ref found, .. } => {
+                match found.runtime_state.updater_id {
+                    Some(lock_holder) => {
+                        debug_assert_ne!(lock_holder, saga_lock_id);
+                        Err(Error::internal_error(
+                            "attempted to release a lock held by another saga! this is a bug!",
+                        ))
+                    },
+                    None => Err(Error::internal_error(
+                            "attempted to release a lock on an instance that is not locked! this is a bug!",
+                        )),
+                }
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::db::datastore::test_utils::datastore_test;
+    use crate::db::fixed_data;
+    use crate::db::lookup::LookupPath;
+    use nexus_db_model::Project;
+    use nexus_test_utils::db::test_setup_database;
+    use nexus_types::external_api::params;
+    use omicron_common::api::external::ByteCount;
+    use omicron_common::api::external::IdentityMetadataCreateParams;
+    use omicron_test_utils::dev;
+
+    async fn test_setup(
+        datastore: &DataStore,
+        opctx: &OpContext,
+    ) -> authz::Instance {
+        let silo_id = *fixed_data::silo::DEFAULT_SILO_ID;
+        let project_id = Uuid::new_v4();
+        let instance_id = Uuid::new_v4();
+
+        let (authz_project, _project) = datastore
+            .project_create(
+                &opctx,
+                Project::new_with_id(
+                    project_id,
+                    silo_id,
+                    params::ProjectCreate {
+                        identity: IdentityMetadataCreateParams {
+                            name: "stuff".parse().unwrap(),
+                            description: "Where I keep my stuff".into(),
+                        },
+                    },
+                ),
+            )
+            .await
+            .expect("project must be created successfully");
+
+        let _ = datastore
+            .project_create_instance(
+                &opctx,
+                &authz_project,
+                Instance::new(
+                    instance_id,
+                    project_id,
+                    &params::InstanceCreate {
+                        identity: IdentityMetadataCreateParams {
+                            name: "myinstance".parse().unwrap(),
+                            description: "It's an instance".into(),
+                        },
+                        ncpus: 2i64.try_into().unwrap(),
+                        memory: ByteCount::from_gibibytes_u32(16),
+                        hostname: "myhostname".try_into().unwrap(),
+                        user_data: Vec::new(),
+                        network_interfaces:
+                            params::InstanceNetworkInterfaceAttachment::None,
+                        external_ips: Vec::new(),
+                        disks: Vec::new(),
+                        ssh_public_keys: None,
+                        start: false,
+                    },
+                ),
+            )
+            .await
+            .expect("instance must be created successfully");
+
+        let (.., authz_instance) = LookupPath::new(&opctx, &datastore)
+            .instance_id(instance_id)
+            .lookup_for(authz::Action::Modify)
+            .await
+            .expect("instance must exist");
+        authz_instance
+    }
+
+    #[tokio::test]
+    async fn test_instance_updater_acquires_lock() {
+        // Setup
+        let logctx = dev::test_setup_log("test_instance_updater_acquires_lock");
+        let mut db = test_setup_database(&logctx.log).await;
+        let (opctx, datastore) = datastore_test(&logctx, &db).await;
+        let saga1 = Uuid::new_v4();
+        let saga2 = Uuid::new_v4();
+        let authz_instance = test_setup(&datastore, &opctx).await;
+
+        macro_rules! assert_locked {
+            ($id:expr) => {{
+                let lock = dbg!(
+                    datastore
+                        .instance_updater_lock(&opctx, &authz_instance, $id)
+                        .await
+                )
+                .expect(concat!(
+                    "instance must be locked by ",
+                    stringify!($id)
+                ));
+                assert_eq!(
+                    lock.saga_lock_id,
+                    $id,
+                    "instance's `updater_id` must be set to {}",
+                    stringify!($id),
+                );
+                lock
+            }};
+        }
+
+        macro_rules! assert_not_locked {
+            ($id:expr) => {
+                let err = dbg!(datastore
+                    .instance_updater_lock(&opctx, &authz_instance, $id)
+                    .await)
+                    .expect_err("attempting to lock the instance while it is already locked must fail");
+                assert_eq!(
+                    err,
+                    UpdaterLockError::AlreadyLocked,
+                );
+            };
+        }
+
+        // attempt to lock the instance from saga 1
+        let lock1 = assert_locked!(saga1);
+
+        // now, also attempt to lock the instance from saga 2. this must fail.
+        assert_not_locked!(saga2);
+
+        // unlock the instance from saga 1
+        let unlocked = datastore
+            .instance_updater_unlock(&opctx, &authz_instance, lock1)
+            .await
+            .expect("instance must be unlocked by saga 1");
+        assert!(unlocked, "instance must actually be unlocked");
+
+        // now, locking the instance from saga 2 should succeed.
+        let lock2 = assert_locked!(saga2);
+
+        // trying to lock the instance again from saga 1 should fail
+        assert_not_locked!(saga1);
+
+        // unlock the instance from saga 2
+        let unlocked = datastore
+            .instance_updater_unlock(&opctx, &authz_instance, lock2)
+            .await
+            .expect("instance must be unlocked by saga 2");
+        assert!(unlocked, "instance must actually be unlocked");
+
+        // Clean up.
+        db.cleanup().await.unwrap();
+        logctx.cleanup_successful();
+    }
+
+    #[tokio::test]
+    async fn test_instance_updater_lock_is_idempotent() {
+        // Setup
+        let logctx =
+            dev::test_setup_log("test_instance_updater_lock_is_idempotent");
+        let mut db = test_setup_database(&logctx.log).await;
+        let (opctx, datastore) = datastore_test(&logctx, &db).await;
+        let authz_instance = test_setup(&datastore, &opctx).await;
+        let saga1 = Uuid::new_v4();
+
+        // attempt to lock the instance once.
+        let lock1 = dbg!(
+            datastore
+                .instance_updater_lock(&opctx, &authz_instance, saga1)
+                .await
+        )
+        .expect("instance should be locked");
+        assert_eq!(lock1.saga_lock_id, saga1);
+
+        // doing it again should be fine.
+        let lock2 = dbg!(
+            datastore
+                .instance_updater_lock(&opctx, &authz_instance, saga1)
+                .await
+        )
+        .expect(
+            "instance_updater_lock should succeed again with the same saga ID",
+        );
+        assert_eq!(lock2.saga_lock_id, saga1);
+        // the generation should not have changed as a result of the second
+        // update.
+        assert_eq!(lock1.locked_gen, lock2.locked_gen);
+
+        // now, unlock the instance.
+        let unlocked = dbg!(
+            datastore
+                .instance_updater_unlock(&opctx, &authz_instance, lock1)
+                .await
+        )
+        .expect("instance should unlock");
+        assert!(unlocked, "instance should have unlocked");
+
+        // unlocking it again should also succeed...
+        let unlocked = dbg!(
+            datastore
+                .instance_updater_unlock(&opctx, &authz_instance, lock2)
+                .await
+        )
+        .expect("instance should unlock again");
+        // ...but the `locked` bool should now be false.
+        assert!(!unlocked, "instance should already have been unlocked");
+
+        // Clean up.
+        db.cleanup().await.unwrap();
+        logctx.cleanup_successful();
+    }
+
+    #[tokio::test]
+    async fn test_instance_updater_unlocking_someone_elses_instance_errors() {
+        // Setup
+        let logctx = dev::test_setup_log(
+            "test_instance_updater_unlocking_someone_elses_instance_errors",
+        );
+        let mut db = test_setup_database(&logctx.log).await;
+        let (opctx, datastore) = datastore_test(&logctx, &db).await;
+        let authz_instance = test_setup(&datastore, &opctx).await;
+        let saga1 = Uuid::new_v4();
+        let saga2 = Uuid::new_v4();
+
+        // lock the instance once.
+        let lock1 = dbg!(
+            datastore
+                .instance_updater_lock(&opctx, &authz_instance, saga1)
+                .await
+        )
+        .expect("instance should be locked");
+
+        // attempting to unlock with a different saga ID should be an error.
+        let err = dbg!(
+            datastore
+                .instance_updater_unlock(
+                    &opctx,
+                    &authz_instance,
+                    // N.B. that the `UpdaterLock` type's fields are private
+                    // specifically to *prevent* callers from accidentally doing
+                    // what we're doing here. But this simulates a case where
+                    // an incorrect one is constructed, or a raw database query
+                    // attempts an invalid unlock operation.
+                    UpdaterLock {
+                        saga_lock_id: saga2,
+                        locked_gen: lock1.locked_gen,
+                    },
+                )
+                .await
+        )
+        .expect_err(
+            "unlocking the instance with someone else's ID should fail",
+        );
+        assert_eq!(
+            err,
+            Error::internal_error(
+                "attempted to release a lock held by another saga! \
+                this is a bug!",
+            ),
+        );
+        let next_gen = Generation(lock1.locked_gen.0.next());
+
+        // unlocking with the correct ID should succeed.
+        let unlocked = dbg!(
+            datastore
+                .instance_updater_unlock(&opctx, &authz_instance, lock1)
+                .await
+        )
+        .expect("instance should unlock");
+        assert!(unlocked, "instance should have unlocked");
+
+        // unlocking with the lock holder's ID *again* at a new generation
+        // (where the lock is no longer held) should fail.
+        let err = dbg!(
+            datastore
+                .instance_updater_unlock(
+                    &opctx,
+                    &authz_instance,
+                    // Again, these fields are private specifically to prevent
+                    // you from doing this exact thing. But, we should  still
+                    // test that we handle it gracefully.
+                    UpdaterLock { saga_lock_id: saga1, locked_gen: next_gen },
+                )
+                .await
+        )
+        .expect_err(
+            "unlocking the instance with someone else's ID should fail",
+        );
+        assert_eq!(
+            err,
+            Error::internal_error(
+                "attempted to release a lock on an instance \
+                that is not locked! this is a bug!"
+            ),
+        );
+
+        // Clean up.
+        db.cleanup().await.unwrap();
+        logctx.cleanup_successful();
+    }
 }
diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs
index b5cb749162..b90f81affb 100644
--- a/nexus/db-queries/src/db/datastore/mod.rs
+++ b/nexus/db-queries/src/db/datastore/mod.rs
@@ -64,7 +64,7 @@ mod dns;
 mod external_ip;
 mod identity_provider;
 mod image;
-mod instance;
+pub mod instance;
 mod inventory;
 mod ip_pool;
 mod ipv4_nat_entry;
diff --git a/schema/crdb/add-instance-updater-lock/up.sql b/schema/crdb/add-instance-updater-lock/up.sql
new file mode 100644
index 0000000000..d3401527e2
--- /dev/null
+++ b/schema/crdb/add-instance-updater-lock/up.sql
@@ -0,0 +1,5 @@
+ALTER TABLE omicron.public.instance
+    ADD COLUMN IF NOT EXISTS updater_id UUID
+        DEFAULT NULL,
+    ADD COLUMN IF NOT EXISTS updater_gen INT
+        NOT NULL DEFAULT 0;
diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql
index cf4ac4b20b..d254c00138 100644
--- a/schema/crdb/dbinit.sql
+++ b/schema/crdb/dbinit.sql
@@ -1007,7 +1007,15 @@ CREATE TABLE IF NOT EXISTS omicron.public.instance (
     ncpus INT NOT NULL,
     memory INT NOT NULL,
     hostname STRING(63) NOT NULL,
-    boot_on_fault BOOL NOT NULL DEFAULT false
+    boot_on_fault BOOL NOT NULL DEFAULT false,
+
+    /* ID of the instance update saga that has locked this instance for
+     * updating, if one exists. */
+    updater_id UUID,
+
+    /* Generation of the instance updater lock */
+    updater_gen INT NOT NULL DEFAULT 0
+
 );
 
 -- Names for instances within a project should be unique
@@ -4011,7 +4019,7 @@ INSERT INTO omicron.public.db_metadata (
     version,
     target_version
 ) VALUES
-    (TRUE, NOW(), NOW(), '66.0.0', NULL)
+    (TRUE, NOW(), NOW(), '67.0.0', NULL)
 ON CONFLICT DO NOTHING;
 
 COMMIT;

From 0cd39427a8146203a9797872aa476fc8df90f62c Mon Sep 17 00:00:00 2001
From: artemis everfree <artemis@oxidecomputer.com>
Date: Thu, 30 May 2024 13:57:06 -0700
Subject: [PATCH 19/28] add `oxlog services` to print filterable service names
 (#5829)

`oxlog logs` can take a service name to filter to logs from just one specific service in a zone. This command adds `oxlog services`, which makes it easier to discover those service names. Use `oxlog services <zone>` to get a list of service names in the zone. These are the names you can pass to `oxlog logs`.
---
 dev-tools/oxlog/src/bin/oxlog.rs | 34 +++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/dev-tools/oxlog/src/bin/oxlog.rs b/dev-tools/oxlog/src/bin/oxlog.rs
index ed1c1a1fc8..4a90f5b773 100644
--- a/dev-tools/oxlog/src/bin/oxlog.rs
+++ b/dev-tools/oxlog/src/bin/oxlog.rs
@@ -6,6 +6,7 @@
 
 use clap::{ArgAction, Args, Parser, Subcommand};
 use oxlog::{Filter, LogFile, Zones};
+use std::collections::BTreeSet;
 
 #[derive(Debug, Parser)]
 #[command(version)]
@@ -21,7 +22,7 @@ enum Commands {
 
     /// List logs for a given service
     Logs {
-        // The name of the zone
+        /// The name of the zone
         zone: String,
 
         /// The name of the service to list logs for
@@ -34,6 +35,14 @@ enum Commands {
         #[command(flatten)]
         filter: FilterArgs,
     },
+
+    /// List the names of all services in a zone, from the perspective of oxlog.
+    /// Use these names with `oxlog logs` to filter output to logs from a
+    /// specific service.
+    Services {
+        /// The name of the zone
+        zone: String,
+    },
 }
 
 #[derive(Args, Debug)]
@@ -124,5 +133,28 @@ fn main() -> Result<(), anyhow::Error> {
             }
             Ok(())
         }
+        Commands::Services { zone } => {
+            let zones = Zones::load()?;
+
+            // We want all logs that exist, anywhere, so we can find their
+            // service names.
+            let filter = Filter {
+                current: true,
+                archived: true,
+                extra: true,
+                show_empty: true,
+            };
+
+            // Collect a unique set of services, based on the logs in the
+            // specified zone
+            let services: BTreeSet<String> =
+                zones.zone_logs(&zone, filter).into_keys().collect();
+
+            for svc in services {
+                println!("{}", svc);
+            }
+
+            Ok(())
+        }
     }
 }

From 82027322d93b18e93617838bd4a3e4a49a519785 Mon Sep 17 00:00:00 2001
From: "oxide-renovate[bot]"
 <146848827+oxide-renovate[bot]@users.noreply.github.com>
Date: Thu, 30 May 2024 15:18:09 -0700
Subject: [PATCH 20/28] Update Rust crate either to 1.12.0 (#5803)

---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index e6b0ffb099..0065e26618 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -261,7 +261,7 @@ dns-service-client = { path = "clients/dns-service-client" }
 dpd-client = { path = "clients/dpd-client" }
 dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", features = [ "usdt-probes" ] }
 dyn-clone = "1.0.17"
-either = "1.11.0"
+either = "1.12.0"
 expectorate = "1.1.0"
 fatfs = "0.3.6"
 filetime = "0.2.23"

From 63fc73b54df9f2d759153f14a46f269f819627d5 Mon Sep 17 00:00:00 2001
From: "oxide-renovate[bot]"
 <146848827+oxide-renovate[bot]@users.noreply.github.com>
Date: Thu, 30 May 2024 15:18:21 -0700
Subject: [PATCH 21/28] Update Rust crate serde to v1.0.203 (#5787)

---
 Cargo.lock                | 8 ++++----
 workspace-hack/Cargo.toml | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 7b8326fb8d..ce10771d8d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -8372,9 +8372,9 @@ dependencies = [
 
 [[package]]
 name = "serde"
-version = "1.0.202"
+version = "1.0.203"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "226b61a0d411b2ba5ff6d7f73a476ac4f8bb900373459cd00fab8512828ba395"
+checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094"
 dependencies = [
  "serde_derive",
 ]
@@ -8419,9 +8419,9 @@ dependencies = [
 
 [[package]]
 name = "serde_derive"
-version = "1.0.202"
+version = "1.0.203"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6048858004bcff69094cd972ed40a32500f153bd3be9f716b2eed2e8217c4838"
+checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba"
 dependencies = [
  "proc-macro2",
  "quote",
diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml
index 3b5e1917d0..ab5d08d711 100644
--- a/workspace-hack/Cargo.toml
+++ b/workspace-hack/Cargo.toml
@@ -92,7 +92,7 @@ ring = { version = "0.17.8", features = ["std"] }
 schemars = { version = "0.8.20", features = ["bytes", "chrono", "uuid1"] }
 scopeguard = { version = "1.2.0" }
 semver = { version = "1.0.23", features = ["serde"] }
-serde = { version = "1.0.202", features = ["alloc", "derive", "rc"] }
+serde = { version = "1.0.203", features = ["alloc", "derive", "rc"] }
 serde_json = { version = "1.0.117", features = ["raw_value", "unbounded_depth"] }
 sha2 = { version = "0.10.8", features = ["oid"] }
 similar = { version = "2.5.0", features = ["inline", "unicode"] }
@@ -197,7 +197,7 @@ ring = { version = "0.17.8", features = ["std"] }
 schemars = { version = "0.8.20", features = ["bytes", "chrono", "uuid1"] }
 scopeguard = { version = "1.2.0" }
 semver = { version = "1.0.23", features = ["serde"] }
-serde = { version = "1.0.202", features = ["alloc", "derive", "rc"] }
+serde = { version = "1.0.203", features = ["alloc", "derive", "rc"] }
 serde_json = { version = "1.0.117", features = ["raw_value", "unbounded_depth"] }
 sha2 = { version = "0.10.8", features = ["oid"] }
 similar = { version = "2.5.0", features = ["inline", "unicode"] }

From d79a51d57bdf324947275841ac849f2b37edff3a Mon Sep 17 00:00:00 2001
From: "oxide-renovate[bot]"
 <146848827+oxide-renovate[bot]@users.noreply.github.com>
Date: Thu, 30 May 2024 15:18:56 -0700
Subject: [PATCH 22/28] Update Rust crate libc to 0.2.155 (#5819)

---
 Cargo.lock                | 6 +++---
 Cargo.toml                | 2 +-
 workspace-hack/Cargo.toml | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index ce10771d8d..a0878a0c32 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3956,9 +3956,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
 
 [[package]]
 name = "libc"
-version = "0.2.153"
+version = "0.2.155"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
+checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
 
 [[package]]
 name = "libdlpi-sys"
@@ -4024,7 +4024,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19"
 dependencies = [
  "cfg-if",
- "windows-targets 0.48.5",
+ "windows-targets 0.52.5",
 ]
 
 [[package]]
diff --git a/Cargo.toml b/Cargo.toml
index 0065e26618..fc8811e9b5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -306,7 +306,7 @@ ipnetwork = { version = "0.20", features = ["schemars"] }
 ispf = { git = "https://github.com/oxidecomputer/ispf" }
 key-manager = { path = "key-manager" }
 kstat-rs = "0.2.3"
-libc = "0.2.153"
+libc = "0.2.155"
 libfalcon = { git = "https://github.com/oxidecomputer/falcon", rev = "e69694a1f7cc9fe31fab27f321017280531fb5f7" }
 libnvme = { git = "https://github.com/oxidecomputer/libnvme", rev = "6fffcc81d2c423ed2d2e6c5c2827485554c4ecbe" }
 linear-map = "1.2.0"
diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml
index ab5d08d711..ee4dcccb70 100644
--- a/workspace-hack/Cargo.toml
+++ b/workspace-hack/Cargo.toml
@@ -68,7 +68,7 @@ itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12.1" }
 itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10.5" }
 lalrpop-util = { version = "0.19.12" }
 lazy_static = { version = "1.4.0", default-features = false, features = ["spin_no_std"] }
-libc = { version = "0.2.153", features = ["extra_traits"] }
+libc = { version = "0.2.155", features = ["extra_traits"] }
 log = { version = "0.4.21", default-features = false, features = ["std"] }
 managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] }
 memchr = { version = "2.7.2" }
@@ -173,7 +173,7 @@ itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12.1" }
 itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10.5" }
 lalrpop-util = { version = "0.19.12" }
 lazy_static = { version = "1.4.0", default-features = false, features = ["spin_no_std"] }
-libc = { version = "0.2.153", features = ["extra_traits"] }
+libc = { version = "0.2.155", features = ["extra_traits"] }
 log = { version = "0.4.21", default-features = false, features = ["std"] }
 managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] }
 memchr = { version = "2.7.2" }

From aade5ade080e85238f7175ed281b38385a425a2c Mon Sep 17 00:00:00 2001
From: "oxide-renovate[bot]"
 <146848827+oxide-renovate[bot]@users.noreply.github.com>
Date: Fri, 31 May 2024 04:14:11 +0000
Subject: [PATCH 23/28] Update taiki-e/install-action digest to 51b8ba0 (#5840)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [taiki-e/install-action](https://togithub.com/taiki-e/install-action)
| action | digest | [`7491b90` ->
`51b8ba0`](https://togithub.com/taiki-e/install-action/compare/7491b90...51b8ba0)
|

---

### Configuration

📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone
America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone
America/Los_Angeles.

🚦 **Automerge**: Enabled.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Renovate
Bot](https://togithub.com/renovatebot/renovate).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNy4zODIuMyIsInVwZGF0ZWRJblZlciI6IjM3LjM4Mi4zIiwidGFyZ2V0QnJhbmNoIjoibWFpbiIsImxhYmVscyI6WyJkZXBlbmRlbmNpZXMiXX0=-->

Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com>
---
 .github/workflows/hakari.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml
index 236b9b5023..ed2615f655 100644
--- a/.github/workflows/hakari.yml
+++ b/.github/workflows/hakari.yml
@@ -24,7 +24,7 @@ jobs:
         with:
           toolchain: stable
       - name: Install cargo-hakari
-        uses: taiki-e/install-action@7491b900536dd0dae2e47ce7c17f140e46328dc4 # v2
+        uses: taiki-e/install-action@51b8ba088c63d8750c618764ff2030742da0ec19 # v2
         with:
           tool: cargo-hakari
       - name: Check workspace-hack Cargo.toml is up-to-date

From b0dfd535386cb93361e08a17d926e91867d9cb08 Mon Sep 17 00:00:00 2001
From: Sean Klein <sean@oxide.computer>
Date: Fri, 31 May 2024 08:48:43 -0700
Subject: [PATCH 24/28] [xtask] Convert ci_download bash scripts to Rust
 (#5481)

Implements `cargo xtask download`, which provides options to replace the
existing scripts:

- ci_download_clickhouse
- ci_download_cockroachdb
- ci_download_console
- ci_download_dendrite_openapi
- ci_download_dendrite_stub
- ci_download_maghemite_mgd
- ci_download_maghemite_openapi
- ci_download_softnpu_machinery
- ci_download_thundermuffin
- ci_download_transceiver_control

This PR additionally introduces the `cargo xtask download all` option,
which attempts to download all artifacts concurrently.

Somewhat related to https://github.com/oxidecomputer/omicron/issues/3939
---
 Cargo.lock                      |  12 +
 dev-tools/xtask/Cargo.toml      |  12 +
 dev-tools/xtask/src/download.rs | 873 ++++++++++++++++++++++++++++++++
 dev-tools/xtask/src/main.rs     |   9 +-
 4 files changed, 904 insertions(+), 2 deletions(-)
 create mode 100644 dev-tools/xtask/src/download.rs

diff --git a/Cargo.lock b/Cargo.lock
index a0878a0c32..15ebba75ae 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -11416,10 +11416,22 @@ dependencies = [
  "cargo_metadata",
  "cargo_toml",
  "clap",
+ "flate2",
  "fs-err",
+ "futures",
  "macaddr",
+ "md5",
+ "reqwest",
  "serde",
+ "sha2",
+ "slog",
+ "slog-async",
+ "slog-bunyan",
+ "slog-term",
+ "strum",
  "swrite",
+ "tar",
+ "tokio",
  "toml 0.8.13",
 ]
 
diff --git a/dev-tools/xtask/Cargo.toml b/dev-tools/xtask/Cargo.toml
index 2aecde57e5..745e16dea6 100644
--- a/dev-tools/xtask/Cargo.toml
+++ b/dev-tools/xtask/Cargo.toml
@@ -13,8 +13,20 @@ camino.workspace = true
 cargo_toml = "0.20"
 cargo_metadata.workspace = true
 clap.workspace = true
+flate2.workspace = true
+futures.workspace = true
 macaddr.workspace = true
+md5 = "0.7.0"
+reqwest = { workspace = true, features = [ "default-tls" ] }
 serde.workspace = true
+sha2.workspace = true
+slog.workspace = true
+slog-async.workspace = true
+slog-bunyan.workspace = true
+slog-term.workspace = true
+strum.workspace = true
+tar.workspace = true
+tokio = { workspace = true, features = ["full"] }
 toml.workspace = true
 fs-err.workspace = true
 swrite.workspace = true
diff --git a/dev-tools/xtask/src/download.rs b/dev-tools/xtask/src/download.rs
new file mode 100644
index 0000000000..ccfe8b2bc9
--- /dev/null
+++ b/dev-tools/xtask/src/download.rs
@@ -0,0 +1,873 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Subcommand: cargo xtask download
+
+use anyhow::{bail, Context, Result};
+use camino::{Utf8Path, Utf8PathBuf};
+use clap::Parser;
+use clap::ValueEnum;
+use flate2::bufread::GzDecoder;
+use futures::StreamExt;
+use sha2::Digest;
+use slog::{info, o, warn, Drain, Logger};
+use std::collections::{BTreeSet, HashMap};
+use std::io::Write;
+use std::os::unix::fs::PermissionsExt;
+use strum::EnumIter;
+use strum::IntoEnumIterator;
+use tar::Archive;
+use tokio::io::{AsyncReadExt, AsyncWriteExt};
+use tokio::process::Command;
+
+const BUILDOMAT_URL: &'static str =
+    "https://buildomat.eng.oxide.computer/public/file";
+
+/// What is being downloaded?
+#[derive(
+    Copy,
+    Clone,
+    Debug,
+    Hash,
+    PartialEq,
+    Eq,
+    PartialOrd,
+    Ord,
+    ValueEnum,
+    EnumIter,
+)]
+enum Target {
+    /// Download all targets
+    All,
+
+    /// Clickhouse binary
+    Clickhouse,
+
+    /// CockroachDB binary
+    Cockroach,
+
+    /// Web console assets
+    Console,
+
+    /// Dendrite OpenAPI spec
+    DendriteOpenapi,
+
+    /// Stub Dendrite binary tarball
+    DendriteStub,
+
+    /// Maghemite mgd binary
+    MaghemiteMgd,
+
+    /// SoftNPU, an admin program (scadm) and a pre-compiled P4 program.
+    Softnpu,
+
+    /// Transceiver Control binary
+    TransceiverControl,
+}
+
+#[derive(Parser)]
+pub struct DownloadArgs {
+    /// The targets to be downloaded. This list is additive.
+    #[clap(required = true)]
+    targets: Vec<Target>,
+
+    /// The path to the "out" directory of omicron.
+    #[clap(long, default_value = "out")]
+    output_dir: Utf8PathBuf,
+
+    /// The path to the versions and checksums directory.
+    #[clap(long, default_value = "tools")]
+    versions_dir: Utf8PathBuf,
+}
+
+pub async fn run_cmd(args: DownloadArgs) -> Result<()> {
+    let mut targets = BTreeSet::new();
+
+    for target in args.targets {
+        match target {
+            Target::All => {
+                // Add all targets, then remove the "All" variant because that
+                // isn't a real thing we can download.
+                let mut all = BTreeSet::from_iter(Target::iter());
+                all.remove(&Target::All);
+                targets.append(&mut all);
+            }
+            _ => _ = targets.insert(target),
+        }
+    }
+
+    let decorator = slog_term::TermDecorator::new().build();
+    let drain = slog_term::FullFormat::new(decorator).build().fuse();
+    let drain = slog_async::Async::new(drain).build().fuse();
+    let log = Logger::root(drain, o!());
+
+    let mut all_downloads = targets
+        .into_iter()
+        .map(|target| {
+            let log = log.new(o!("target" => format!("{target:?}")));
+            let output_dir = args.output_dir.clone();
+            let versions_dir = args.versions_dir.clone();
+            tokio::task::spawn(async move {
+                info!(&log, "Starting download");
+
+                let downloader = Downloader::new(
+                    log.clone(),
+                    &output_dir,
+                    &versions_dir,
+                );
+
+                match target {
+                    Target::All => {
+                        bail!("We should have already filtered this 'All' target out?");
+                    }
+                    Target::Clickhouse => downloader.download_clickhouse().await,
+                    Target::Cockroach => downloader.download_cockroach().await,
+                    Target::Console => downloader.download_console().await,
+                    Target::DendriteOpenapi => {
+                        downloader.download_dendrite_openapi().await
+                    }
+                    Target::DendriteStub => downloader.download_dendrite_stub().await,
+                    Target::MaghemiteMgd => downloader.download_maghemite_mgd().await,
+                    Target::Softnpu => downloader.download_softnpu().await,
+                    Target::TransceiverControl => {
+                        downloader.download_transceiver_control().await
+                    }
+                }.context("Failed to download {target:?}")?;
+
+                info!(&log, "Download complete");
+                Ok(())
+            })
+        })
+        .collect::<futures::stream::FuturesUnordered<_>>();
+
+    while let Some(result) = all_downloads.next().await {
+        result??;
+    }
+
+    Ok(())
+}
+
+enum Os {
+    Illumos,
+    Linux,
+    Mac,
+}
+
+impl Os {
+    fn env_name(&self) -> &'static str {
+        match self {
+            Os::Illumos => "ILLUMOS",
+            Os::Linux => "LINUX",
+            Os::Mac => "DARWIN",
+        }
+    }
+}
+
+fn os_name() -> Result<Os> {
+    let os = match std::env::consts::OS {
+        "linux" => Os::Linux,
+        "macos" => Os::Mac,
+        "solaris" | "illumos" => Os::Illumos,
+        other => bail!("OS not supported: {other}"),
+    };
+    Ok(os)
+}
+
+struct Downloader<'a> {
+    log: Logger,
+
+    /// The path to the "out" directory of omicron.
+    output_dir: &'a Utf8Path,
+
+    /// The path to the versions and checksums directory.
+    versions_dir: &'a Utf8Path,
+}
+
+impl<'a> Downloader<'a> {
+    fn new(
+        log: Logger,
+        output_dir: &'a Utf8Path,
+        versions_dir: &'a Utf8Path,
+    ) -> Self {
+        Self { log, output_dir, versions_dir }
+    }
+}
+
+/// Parses a file of the format:
+///
+/// ```ignore
+/// KEY1="value1"
+/// KEY2="value2"
+/// ```
+///
+/// And returns an array of the values in the same order as keys.
+async fn get_values_from_file<const N: usize>(
+    keys: [&str; N],
+    path: &Utf8Path,
+) -> Result<[String; N]> {
+    // Map of "key" => "Position in output".
+    let mut keys: HashMap<&str, usize> =
+        keys.into_iter().enumerate().map(|(i, s)| (s, i)).collect();
+
+    const EMPTY_STRING: String = String::new();
+    let mut values = [EMPTY_STRING; N];
+
+    let content = tokio::fs::read_to_string(&path)
+        .await
+        .context("Failed to read {path}")?;
+    for line in content.lines() {
+        let line = line.trim();
+        let Some((key, value)) = line.split_once("=") else {
+            continue;
+        };
+        let value = value.trim_matches('"');
+        if let Some(i) = keys.remove(key) {
+            values[i] = value.to_string();
+        }
+    }
+    if !keys.is_empty() {
+        bail!("Could not find keys: {:?}", keys.keys().collect::<Vec<_>>(),);
+    }
+    Ok(values)
+}
+
+/// Send a GET request to `url`, downloading the contents to `path`.
+///
+/// Writes the response to the file as it is received.
+async fn streaming_download(url: &str, path: &Utf8Path) -> Result<()> {
+    let mut response = reqwest::get(url).await?;
+    let mut tarball = tokio::fs::File::create(&path).await?;
+    while let Some(chunk) = response.chunk().await? {
+        tarball.write_all(chunk.as_ref()).await?;
+    }
+    Ok(())
+}
+
+/// Returns the hex, lowercase md5 checksum of a file at `path`.
+async fn md5_checksum(path: &Utf8Path) -> Result<String> {
+    let mut buf = vec![0u8; 65536];
+    let mut file = tokio::fs::File::open(path).await?;
+    let mut ctx = md5::Context::new();
+    loop {
+        let n = file.read(&mut buf).await?;
+        if n == 0 {
+            break;
+        }
+        ctx.write_all(&buf[0..n])?;
+    }
+
+    let digest = ctx.compute();
+    Ok(format!("{digest:x}"))
+}
+
+/// Returns the hex, lowercase sha2 checksum of a file at `path`.
+async fn sha2_checksum(path: &Utf8Path) -> Result<String> {
+    let mut buf = vec![0u8; 65536];
+    let mut file = tokio::fs::File::open(path).await?;
+    let mut ctx = sha2::Sha256::new();
+    loop {
+        let n = file.read(&mut buf).await?;
+        if n == 0 {
+            break;
+        }
+        ctx.write_all(&buf[0..n])?;
+    }
+
+    let digest = ctx.finalize();
+    Ok(format!("{digest:x}"))
+}
+
+async fn unpack_tarball(
+    log: &Logger,
+    tarball_path: &Utf8Path,
+    destination_dir: &Utf8Path,
+) -> Result<()> {
+    info!(log, "Unpacking {tarball_path} to {destination_dir}");
+    let tarball_path = tarball_path.to_owned();
+    let destination_dir = destination_dir.to_owned();
+
+    let task = tokio::task::spawn_blocking(move || {
+        let reader = std::fs::File::open(tarball_path)?;
+        let buf_reader = std::io::BufReader::new(reader);
+        let gz = GzDecoder::new(buf_reader);
+        let mut archive = Archive::new(gz);
+        archive.unpack(&destination_dir)?;
+        Ok(())
+    });
+    task.await?
+}
+
+async fn unpack_gzip(
+    log: &Logger,
+    gzip_path: &Utf8Path,
+    destination: &Utf8Path,
+) -> Result<()> {
+    info!(log, "Unpacking {gzip_path} to {destination}");
+    let gzip_path = gzip_path.to_owned();
+    let destination = destination.to_owned();
+
+    let task = tokio::task::spawn_blocking(move || {
+        let reader = std::fs::File::open(gzip_path)?;
+        let buf_reader = std::io::BufReader::new(reader);
+        let mut gz = GzDecoder::new(buf_reader);
+
+        let mut destination = std::fs::File::create(destination)?;
+        std::io::copy(&mut gz, &mut destination)?;
+        Ok(())
+    });
+    task.await?
+}
+
+async fn clickhouse_confirm_binary_works(binary: &Utf8Path) -> Result<()> {
+    let mut cmd = Command::new(binary);
+    cmd.args(["server", "--version"]);
+
+    let output =
+        cmd.output().await.context(format!("Failed to run {binary}"))?;
+    if !output.status.success() {
+        let stderr =
+            String::from_utf8(output.stderr).unwrap_or_else(|_| String::new());
+        bail!("{binary} failed: {} (stderr: {stderr})", output.status);
+    }
+    Ok(())
+}
+
+async fn cockroach_confirm_binary_works(binary: &Utf8Path) -> Result<()> {
+    let mut cmd = Command::new(binary);
+    cmd.arg("version");
+
+    let output =
+        cmd.output().await.context(format!("Failed to run {binary}"))?;
+    if !output.status.success() {
+        let stderr =
+            String::from_utf8(output.stderr).unwrap_or_else(|_| String::new());
+        bail!("{binary} failed: {} (stderr: {stderr})", output.status);
+    }
+    Ok(())
+}
+
+fn copy_dir_all(src: &Utf8Path, dst: &Utf8Path) -> Result<()> {
+    std::fs::create_dir_all(&dst)?;
+    for entry in src.read_dir_utf8()? {
+        let entry = entry?;
+        let ty = entry.file_type()?;
+        if ty.is_dir() {
+            copy_dir_all(entry.path(), &dst.join(entry.file_name()))?;
+        } else {
+            std::fs::copy(entry.path(), &dst.join(entry.file_name()))?;
+        }
+    }
+    Ok(())
+}
+
+async fn set_permissions(path: &Utf8Path, mode: u32) -> Result<()> {
+    let mut p = tokio::fs::metadata(&path).await?.permissions();
+    p.set_mode(mode);
+    tokio::fs::set_permissions(&path, p).await?;
+    Ok(())
+}
+
+enum ChecksumAlgorithm {
+    Md5,
+    Sha2,
+}
+
+impl ChecksumAlgorithm {
+    async fn checksum(&self, path: &Utf8Path) -> Result<String> {
+        match self {
+            ChecksumAlgorithm::Md5 => md5_checksum(path).await,
+            ChecksumAlgorithm::Sha2 => sha2_checksum(path).await,
+        }
+    }
+}
+
+/// Downloads a file and verifies the checksum.
+///
+/// If the file already exists and the checksum matches,
+/// avoids performing the download altogether.
+async fn download_file_and_verify(
+    log: &Logger,
+    path: &Utf8Path,
+    url: &str,
+    algorithm: ChecksumAlgorithm,
+    checksum: &str,
+) -> Result<()> {
+    let do_download = if path.exists() {
+        info!(log, "Already downloaded ({path})");
+        if algorithm.checksum(&path).await? == checksum {
+            info!(
+                log,
+                "Checksum matches already downloaded file - skipping download"
+            );
+            false
+        } else {
+            warn!(log, "Checksum mismatch - retrying download");
+            true
+        }
+    } else {
+        true
+    };
+
+    if do_download {
+        info!(log, "Downloading {path}");
+        streaming_download(&url, &path).await?;
+    }
+
+    let observed_checksum = algorithm.checksum(&path).await?;
+    if observed_checksum != checksum {
+        bail!(
+            "Checksum mismatch (saw {observed_checksum}, expected {checksum})"
+        );
+    }
+    Ok(())
+}
+
+impl<'a> Downloader<'a> {
+    async fn download_clickhouse(&self) -> Result<()> {
+        let os = os_name()?;
+
+        let download_dir = self.output_dir.join("downloads");
+        let destination_dir = self.output_dir.join("clickhouse");
+
+        let checksums_path = self.versions_dir.join("clickhouse_checksums");
+        let [checksum] = get_values_from_file(
+            [&format!("CIDL_MD5_{}", os.env_name())],
+            &checksums_path,
+        )
+        .await?;
+
+        let versions_path = self.versions_dir.join("clickhouse_version");
+        let version = tokio::fs::read_to_string(&versions_path)
+            .await
+            .context("Failed to read version from {versions_path}")?;
+        let version = version.trim();
+
+        const S3_BUCKET: &'static str =
+            "https://oxide-clickhouse-build.s3.us-west-2.amazonaws.com";
+
+        let platform = match os {
+            Os::Illumos => "illumos",
+            Os::Linux => "linux",
+            Os::Mac => "macos",
+        };
+        let tarball_filename =
+            format!("clickhouse-{version}.{platform}.tar.gz");
+        let tarball_url = format!("{S3_BUCKET}/{tarball_filename}");
+
+        let tarball_path = download_dir.join(tarball_filename);
+
+        tokio::fs::create_dir_all(&download_dir).await?;
+        tokio::fs::create_dir_all(&destination_dir).await?;
+
+        download_file_and_verify(
+            &self.log,
+            &tarball_path,
+            &tarball_url,
+            ChecksumAlgorithm::Md5,
+            &checksum,
+        )
+        .await?;
+
+        unpack_tarball(&self.log, &tarball_path, &destination_dir).await?;
+        let clickhouse_binary = destination_dir.join("clickhouse");
+
+        info!(self.log, "Checking that binary works");
+        clickhouse_confirm_binary_works(&clickhouse_binary).await?;
+
+        Ok(())
+    }
+
+    async fn download_cockroach(&self) -> Result<()> {
+        let os = os_name()?;
+
+        let download_dir = self.output_dir.join("downloads");
+        let destination_dir = self.output_dir.join("cockroachdb");
+
+        let checksums_path = self.versions_dir.join("cockroachdb_checksums");
+        let [checksum] = get_values_from_file(
+            [&format!("CIDL_SHA256_{}", os.env_name())],
+            &checksums_path,
+        )
+        .await?;
+
+        let versions_path = self.versions_dir.join("cockroachdb_version");
+        let version = tokio::fs::read_to_string(&versions_path)
+            .await
+            .context("Failed to read version from {versions_path}")?;
+        let version = version.trim();
+
+        let (url_base, suffix) = match os {
+            Os::Illumos => ("https://illumos.org/downloads", "tar.gz"),
+            Os::Linux | Os::Mac => ("https://binaries.cockroachdb.com", "tgz"),
+        };
+        let build = match os {
+            Os::Illumos => "illumos",
+            Os::Linux => "linux-amd64",
+            Os::Mac => "darwin-10.9-amd64",
+        };
+
+        let version_directory = format!("cockroach-{version}");
+        let tarball_name = format!("{version_directory}.{build}");
+        let tarball_filename = format!("{tarball_name}.{suffix}");
+        let tarball_url = format!("{url_base}/{tarball_filename}");
+
+        let tarball_path = download_dir.join(tarball_filename);
+
+        tokio::fs::create_dir_all(&download_dir).await?;
+        tokio::fs::create_dir_all(&destination_dir).await?;
+
+        download_file_and_verify(
+            &self.log,
+            &tarball_path,
+            &tarball_url,
+            ChecksumAlgorithm::Sha2,
+            &checksum,
+        )
+        .await?;
+
+        // We unpack the tarball in the download directory to emulate the old
+        // behavior. This could be a little more consistent with Clickhouse.
+        info!(self.log, "tarball path: {tarball_path}");
+        unpack_tarball(&self.log, &tarball_path, &download_dir).await?;
+
+        // This is where the binary will end up eventually
+        let cockroach_binary = destination_dir.join("bin/cockroach");
+
+        // Re-shuffle the downloaded tarball to our "destination" location.
+        //
+        // This ensures some uniformity, even though different platforms bundle
+        // the Cockroach package differently.
+        let binary_dir = destination_dir.join("bin");
+        tokio::fs::create_dir_all(&binary_dir).await?;
+        match os {
+            Os::Illumos => {
+                let src = tarball_path.with_file_name(version_directory);
+                let dst = &destination_dir;
+                info!(self.log, "Copying from {src} to {dst}");
+                copy_dir_all(&src, &dst)?;
+            }
+            Os::Linux | Os::Mac => {
+                let src =
+                    tarball_path.with_file_name(tarball_name).join("cockroach");
+                tokio::fs::copy(src, &cockroach_binary).await?;
+            }
+        }
+
+        info!(self.log, "Checking that binary works");
+        cockroach_confirm_binary_works(&cockroach_binary).await?;
+
+        Ok(())
+    }
+
+    async fn download_console(&self) -> Result<()> {
+        let download_dir = self.output_dir.join("downloads");
+        let tarball_path = download_dir.join("console.tar.gz");
+
+        let checksums_path = self.versions_dir.join("console_version");
+        let [commit, checksum] =
+            get_values_from_file(["COMMIT", "SHA2"], &checksums_path).await?;
+
+        tokio::fs::create_dir_all(&download_dir).await?;
+        let tarball_url = format!(
+            "https://dl.oxide.computer/releases/console/{commit}.tar.gz"
+        );
+        download_file_and_verify(
+            &self.log,
+            &tarball_path,
+            &tarball_url,
+            ChecksumAlgorithm::Sha2,
+            &checksum,
+        )
+        .await?;
+
+        let destination_dir = self.output_dir.join("console-assets");
+        let _ = tokio::fs::remove_dir_all(&destination_dir).await;
+        tokio::fs::create_dir_all(&destination_dir).await?;
+
+        unpack_tarball(&self.log, &tarball_path, &destination_dir).await?;
+
+        Ok(())
+    }
+
+    async fn download_dendrite_openapi(&self) -> Result<()> {
+        let download_dir = self.output_dir.join("downloads");
+
+        let checksums_path = self.versions_dir.join("dendrite_openapi_version");
+        let [commit, checksum] =
+            get_values_from_file(["COMMIT", "SHA2"], &checksums_path).await?;
+
+        let url = format!(
+            "{BUILDOMAT_URL}/oxidecomputer/dendrite/openapi/{commit}/dpd.json"
+        );
+        let path = download_dir.join(format!("dpd-{commit}.json"));
+
+        tokio::fs::create_dir_all(&download_dir).await?;
+        download_file_and_verify(
+            &self.log,
+            &path,
+            &url,
+            ChecksumAlgorithm::Sha2,
+            &checksum,
+        )
+        .await?;
+
+        Ok(())
+    }
+
+    async fn download_dendrite_stub(&self) -> Result<()> {
+        let download_dir = self.output_dir.join("downloads");
+        let destination_dir = self.output_dir.join("dendrite-stub");
+
+        let stub_checksums_path =
+            self.versions_dir.join("dendrite_stub_checksums");
+
+        // NOTE: This seems odd to me -- the "dendrite_openapi_version" file also
+        // contains a SHA2, but we're ignoring it?
+        //
+        // Regardless, this is currenlty the one that actually matches, regardless
+        // of host OS.
+        let [sha2, dpd_sha2, swadm_sha2] = get_values_from_file(
+            [
+                "CIDL_SHA256_ILLUMOS",
+                "CIDL_SHA256_LINUX_DPD",
+                "CIDL_SHA256_LINUX_SWADM",
+            ],
+            &stub_checksums_path,
+        )
+        .await?;
+        let checksums_path = self.versions_dir.join("dendrite_openapi_version");
+        let [commit, _sha2] =
+            get_values_from_file(["COMMIT", "SHA2"], &checksums_path).await?;
+
+        let tarball_file = "dendrite-stub.tar.gz";
+        let tarball_path = download_dir.join(tarball_file);
+        let repo = "oxidecomputer/dendrite";
+        let url_base = format!("{BUILDOMAT_URL}/{repo}/image/{commit}");
+
+        tokio::fs::create_dir_all(&download_dir).await?;
+        tokio::fs::create_dir_all(&destination_dir).await?;
+
+        download_file_and_verify(
+            &self.log,
+            &tarball_path,
+            &format!("{url_base}/{tarball_file}"),
+            ChecksumAlgorithm::Sha2,
+            &sha2,
+        )
+        .await?;
+
+        // Unpack in the download directory, then copy everything into the
+        // destination directory.
+        unpack_tarball(&self.log, &tarball_path, &download_dir).await?;
+
+        let _ = tokio::fs::remove_dir_all(&destination_dir).await;
+        tokio::fs::create_dir_all(&destination_dir).await?;
+        let destination_root = destination_dir.join("root");
+        tokio::fs::create_dir_all(&destination_root).await?;
+        copy_dir_all(&download_dir.join("root"), &destination_root)?;
+
+        let bin_dir = destination_dir.join("root/opt/oxide/dendrite/bin");
+
+        // Symbolic links for backwards compatibility with existing setups
+        std::os::unix::fs::symlink(
+            bin_dir.canonicalize()?,
+            destination_dir.canonicalize()?.join("bin"),
+        )
+        .context("Failed to create a symlink to dendrite's bin directory")?;
+
+        match os_name()? {
+            Os::Linux => {
+                let base_url =
+                    format!("{BUILDOMAT_URL}/{repo}/linux-bin/{commit}");
+                let filename = "dpd";
+                let path = download_dir.join(filename);
+                download_file_and_verify(
+                    &self.log,
+                    &path,
+                    &format!("{base_url}/{filename}"),
+                    ChecksumAlgorithm::Sha2,
+                    &dpd_sha2,
+                )
+                .await?;
+                set_permissions(&path, 0o755).await?;
+                tokio::fs::copy(path, bin_dir.join(filename)).await?;
+
+                let filename = "swadm";
+                let path = download_dir.join(filename);
+                download_file_and_verify(
+                    &self.log,
+                    &path,
+                    &format!("{base_url}/{filename}"),
+                    ChecksumAlgorithm::Sha2,
+                    &swadm_sha2,
+                )
+                .await?;
+                set_permissions(&path, 0o755).await?;
+                tokio::fs::copy(path, bin_dir.join(filename)).await?;
+            }
+            Os::Illumos => {}
+            Os::Mac => {
+                warn!(self.log, "WARNING: Dendrite not available for Mac");
+                warn!(self.log, "Network APIs will be unavailable");
+
+                let path = bin_dir.join("dpd");
+                tokio::fs::write(&path, "echo 'unsupported os' && exit 1")
+                    .await?;
+                set_permissions(&path, 0o755).await?;
+            }
+        }
+
+        Ok(())
+    }
+
+    async fn download_maghemite_mgd(&self) -> Result<()> {
+        let download_dir = self.output_dir.join("downloads");
+        tokio::fs::create_dir_all(&download_dir).await?;
+
+        let checksums_path = self.versions_dir.join("maghemite_mgd_checksums");
+        let [mgd_sha2, mgd_linux_sha2] = get_values_from_file(
+            ["CIDL_SHA256", "MGD_LINUX_SHA256"],
+            &checksums_path,
+        )
+        .await?;
+        let commit_path =
+            self.versions_dir.join("maghemite_mg_openapi_version");
+        let [commit] = get_values_from_file(["COMMIT"], &commit_path).await?;
+
+        let repo = "oxidecomputer/maghemite";
+        let base_url = format!("{BUILDOMAT_URL}/{repo}/image/{commit}");
+
+        let filename = "mgd.tar.gz";
+        let tarball_path = download_dir.join(filename);
+        download_file_and_verify(
+            &self.log,
+            &tarball_path,
+            &format!("{base_url}/{filename}"),
+            ChecksumAlgorithm::Sha2,
+            &mgd_sha2,
+        )
+        .await?;
+        unpack_tarball(&self.log, &tarball_path, &download_dir).await?;
+
+        let destination_dir = self.output_dir.join("mgd");
+        let _ = tokio::fs::remove_dir_all(&destination_dir).await;
+        tokio::fs::create_dir_all(&destination_dir).await?;
+        copy_dir_all(
+            &download_dir.join("root"),
+            &destination_dir.join("root"),
+        )?;
+
+        let binary_dir = destination_dir.join("root/opt/oxide/mgd/bin");
+
+        match os_name()? {
+            Os::Linux => {
+                let filename = "mgd";
+                let path = download_dir.join(filename);
+                download_file_and_verify(
+                    &self.log,
+                    &path,
+                    &format!(
+                        "{BUILDOMAT_URL}/{repo}/linux/{commit}/{filename}"
+                    ),
+                    ChecksumAlgorithm::Sha2,
+                    &mgd_linux_sha2,
+                )
+                .await?;
+                set_permissions(&path, 0o755).await?;
+                tokio::fs::copy(path, binary_dir.join(filename)).await?;
+            }
+            _ => (),
+        }
+
+        Ok(())
+    }
+
+    async fn download_softnpu(&self) -> Result<()> {
+        let destination_dir = self.output_dir.join("npuzone");
+        tokio::fs::create_dir_all(&destination_dir).await?;
+
+        let repo = "oxidecomputer/softnpu";
+
+        // TODO: This should probably live in a separate file, but
+        // at the moment we're just building parity with
+        // "ci_download_softnpu_machinery".
+        let commit = "3203c51cf4473d30991b522062ac0df2e045c2f2";
+
+        let filename = "npuzone";
+        let base_url = format!("{BUILDOMAT_URL}/{repo}/image/{commit}");
+        let artifact_url = format!("{base_url}/{filename}");
+        let sha2_url = format!("{base_url}/{filename}.sha256.txt");
+        let sha2 = reqwest::get(sha2_url).await?.text().await?;
+        let sha2 = sha2.trim();
+
+        let path = destination_dir.join(filename);
+        download_file_and_verify(
+            &self.log,
+            &path,
+            &artifact_url,
+            ChecksumAlgorithm::Sha2,
+            &sha2,
+        )
+        .await?;
+        set_permissions(&path, 0o755).await?;
+
+        Ok(())
+    }
+
+    async fn download_transceiver_control(&self) -> Result<()> {
+        let destination_dir = self.output_dir.join("transceiver-control");
+        let download_dir = self.output_dir.join("downloads");
+        tokio::fs::create_dir_all(&download_dir).await?;
+
+        let [commit, sha2] = get_values_from_file(
+            ["COMMIT", "CIDL_SHA256_ILLUMOS"],
+            &self.versions_dir.join("transceiver_control_version"),
+        )
+        .await?;
+
+        let repo = "oxidecomputer/transceiver-control";
+        let base_url = format!("{BUILDOMAT_URL}/{repo}/bins/{commit}");
+
+        let filename_gz = "xcvradm.gz";
+        let filename = "xcvradm";
+        let gzip_path = download_dir.join(filename_gz);
+        download_file_and_verify(
+            &self.log,
+            &gzip_path,
+            &format!("{base_url}/{filename_gz}"),
+            ChecksumAlgorithm::Sha2,
+            &sha2,
+        )
+        .await?;
+
+        let download_bin_dir = download_dir.join("root/opt/oxide/bin");
+        tokio::fs::create_dir_all(&download_bin_dir).await?;
+        let path = download_bin_dir.join(filename);
+        unpack_gzip(&self.log, &gzip_path, &path).await?;
+        set_permissions(&path, 0o755).await?;
+
+        let _ = tokio::fs::remove_dir_all(&destination_dir).await;
+        tokio::fs::create_dir_all(&destination_dir).await?;
+        copy_dir_all(
+            &download_dir.join("root"),
+            &destination_dir.join("root"),
+        )?;
+
+        match os_name()? {
+            Os::Illumos => (),
+            _ => {
+                let binary_dir = destination_dir.join("opt/oxide/bin");
+                tokio::fs::create_dir_all(&binary_dir).await?;
+
+                let path = binary_dir.join(filename);
+                warn!(self.log, "Unsupported OS for transceiver-control - Creating stub"; "path" => %path);
+                tokio::fs::write(&path, "echo 'unsupported os' && exit 1")
+                    .await?;
+                set_permissions(&path, 0o755).await?;
+            }
+        }
+
+        Ok(())
+    }
+}
diff --git a/dev-tools/xtask/src/main.rs b/dev-tools/xtask/src/main.rs
index 9f1131e758..22e5a22632 100644
--- a/dev-tools/xtask/src/main.rs
+++ b/dev-tools/xtask/src/main.rs
@@ -12,6 +12,7 @@ use clap::{Parser, Subcommand};
 
 mod check_workspace_deps;
 mod clippy;
+mod download;
 #[cfg_attr(not(target_os = "illumos"), allow(dead_code))]
 mod external;
 
@@ -38,10 +39,13 @@ enum Cmds {
     CheckWorkspaceDeps,
     /// Run configured clippy checks
     Clippy(clippy::ClippyArgs),
+    /// Download binaries, OpenAPI specs, and other out-of-repo utilities.
+    Download(download::DownloadArgs),
 
     #[cfg(target_os = "illumos")]
     /// Build a TUF repo
     Releng(external::External),
+
     /// Verify we are not leaking library bindings outside of intended
     /// crates
     #[cfg(target_os = "illumos")]
@@ -61,12 +65,13 @@ enum Cmds {
     VirtualHardware,
 }
 
-fn main() -> Result<()> {
+#[tokio::main]
+async fn main() -> Result<()> {
     let args = Args::parse();
     match args.cmd {
         Cmds::Clippy(args) => clippy::run_cmd(args),
         Cmds::CheckWorkspaceDeps => check_workspace_deps::run_cmd(),
-
+        Cmds::Download(args) => download::run_cmd(args).await,
         #[cfg(target_os = "illumos")]
         Cmds::Releng(external) => {
             external.cargo_args(["--release"]).exec("omicron-releng")

From 6dee6ee4bfb44d79e21500c0d8957f9ec115b742 Mon Sep 17 00:00:00 2001
From: Levon Tarver <11586085+internet-diglett@users.noreply.github.com>
Date: Fri, 31 May 2024 17:38:20 -0500
Subject: [PATCH 25/28] filter vmm table for active instances (#5845)

We needed to add a filter to the view that prevents v2p mappings for
inactive instances from showing up in the v2p_mapping view.
---
 nexus/db-model/src/schema_versions.rs         |  3 +-
 schema/crdb/dbinit.sql                        |  3 +-
 .../up01.sql                                  |  1 +
 .../up02.sql                                  | 43 +++++++++++++++++++
 4 files changed, 48 insertions(+), 2 deletions(-)
 create mode 100644 schema/crdb/filter-v2p-mapping-by-instance-state/up01.sql
 create mode 100644 schema/crdb/filter-v2p-mapping-by-instance-state/up02.sql

diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs
index b417570a6c..ebc9d0173a 100644
--- a/nexus/db-model/src/schema_versions.rs
+++ b/nexus/db-model/src/schema_versions.rs
@@ -17,7 +17,7 @@ use std::collections::BTreeMap;
 ///
 /// This must be updated when you change the database schema.  Refer to
 /// schema/crdb/README.adoc in the root of this repository for details.
-pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(67, 0, 0);
+pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(68, 0, 0);
 
 /// List of all past database schema versions, in *reverse* order
 ///
@@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy<Vec<KnownVersion>> = Lazy::new(|| {
         // |  leaving the first copy as an example for the next person.
         // v
         // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"),
+        KnownVersion::new(68, "filter-v2p-mapping-by-instance-state"),
         KnownVersion::new(67, "add-instance-updater-lock"),
         KnownVersion::new(66, "blueprint-crdb-preserve-downgrade"),
         KnownVersion::new(65, "region-replacement"),
diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql
index d254c00138..cf6bc2bf53 100644
--- a/schema/crdb/dbinit.sql
+++ b/schema/crdb/dbinit.sql
@@ -3841,6 +3841,7 @@ WITH VmV2pMappings AS (
   JOIN omicron.public.sled s ON vmm.sled_id = s.id
   WHERE n.time_deleted IS NULL
   AND n.kind = 'instance'
+  AND (vmm.state = 'running' OR vmm.state = 'starting')
   AND s.sled_policy = 'in_service'
   AND s.sled_state = 'active'
 ),
@@ -4019,7 +4020,7 @@ INSERT INTO omicron.public.db_metadata (
     version,
     target_version
 ) VALUES
-    (TRUE, NOW(), NOW(), '67.0.0', NULL)
+    (TRUE, NOW(), NOW(), '68.0.0', NULL)
 ON CONFLICT DO NOTHING;
 
 COMMIT;
diff --git a/schema/crdb/filter-v2p-mapping-by-instance-state/up01.sql b/schema/crdb/filter-v2p-mapping-by-instance-state/up01.sql
new file mode 100644
index 0000000000..aebe0119f5
--- /dev/null
+++ b/schema/crdb/filter-v2p-mapping-by-instance-state/up01.sql
@@ -0,0 +1 @@
+DROP VIEW IF EXISTS omicron.public.v2p_mapping_view;
diff --git a/schema/crdb/filter-v2p-mapping-by-instance-state/up02.sql b/schema/crdb/filter-v2p-mapping-by-instance-state/up02.sql
new file mode 100644
index 0000000000..c92ac4ae43
--- /dev/null
+++ b/schema/crdb/filter-v2p-mapping-by-instance-state/up02.sql
@@ -0,0 +1,43 @@
+-- view for v2p mapping rpw
+CREATE VIEW IF NOT EXISTS omicron.public.v2p_mapping_view
+AS
+WITH VmV2pMappings AS (
+  SELECT
+    n.id as nic_id,
+    s.id as sled_id,
+    s.ip as sled_ip,
+    v.vni,
+    n.mac,
+    n.ip
+  FROM omicron.public.network_interface n
+  JOIN omicron.public.vpc_subnet vs ON vs.id = n.subnet_id
+  JOIN omicron.public.vpc v ON v.id = n.vpc_id
+  JOIN omicron.public.vmm vmm ON n.parent_id = vmm.instance_id
+  JOIN omicron.public.sled s ON vmm.sled_id = s.id
+  WHERE n.time_deleted IS NULL
+  AND n.kind = 'instance'
+  AND (vmm.state = 'running' OR vmm.state = 'starting')
+  AND s.sled_policy = 'in_service'
+  AND s.sled_state = 'active'
+),
+ProbeV2pMapping AS (
+  SELECT
+    n.id as nic_id,
+    s.id as sled_id,
+    s.ip as sled_ip,
+    v.vni,
+    n.mac,
+    n.ip
+  FROM omicron.public.network_interface n
+  JOIN omicron.public.vpc_subnet vs ON vs.id = n.subnet_id
+  JOIN omicron.public.vpc v ON v.id = n.vpc_id
+  JOIN omicron.public.probe p ON n.parent_id = p.id
+  JOIN omicron.public.sled s ON p.sled = s.id
+  WHERE n.time_deleted IS NULL
+  AND n.kind = 'probe'
+  AND s.sled_policy = 'in_service'
+  AND s.sled_state = 'active'
+)
+SELECT nic_id, sled_id, sled_ip, vni, mac, ip FROM VmV2pMappings
+UNION
+SELECT nic_id, sled_id, sled_ip, vni, mac, ip FROM ProbeV2pMapping;

From 152f61c175ce94eaf4e942ac7bef243850d458dd Mon Sep 17 00:00:00 2001
From: Kyle Simpson <kyle@oxide.computer>
Date: Sat, 1 Jun 2024 00:12:46 +0100
Subject: [PATCH 26/28] Clear XDE underlay when destroying virtual hardware
 (#5602)

OPTE now prevents itself from being unloaded if its underlay state is
set. Currently, underlay setup is performed only once, and it seems to
be the case that XDE can be unloaded in some scenarios (e.g., `a4x2`
setup).

However, a consequence is that removing the driver requires an extra
operation to explicitly clear the underlay state. This PR adds this
operation to the `cargo xtask virtual-hardware destroy` command.

Closes #5314.
---
 .github/buildomat/jobs/a4x2-deploy.sh   |  2 +-
 .github/buildomat/jobs/deploy.sh        |  3 ++-
 Cargo.lock                              | 32 ++++++++-----------------
 Cargo.toml                              |  8 +++----
 dev-tools/xtask/src/virtual_hardware.rs | 12 +++++++++-
 package-manifest.toml                   | 16 ++++++-------
 tools/maghemite_ddm_openapi_version     |  2 +-
 tools/maghemite_mg_openapi_version      |  2 +-
 tools/maghemite_mgd_checksums           |  4 ++--
 tools/opte_version                      |  2 +-
 workspace-hack/Cargo.toml               |  4 ++--
 11 files changed, 43 insertions(+), 44 deletions(-)

diff --git a/.github/buildomat/jobs/a4x2-deploy.sh b/.github/buildomat/jobs/a4x2-deploy.sh
index c8eb998b35..53153beafb 100755
--- a/.github/buildomat/jobs/a4x2-deploy.sh
+++ b/.github/buildomat/jobs/a4x2-deploy.sh
@@ -2,7 +2,7 @@
 #:
 #: name = "a4x2-deploy"
 #: variety = "basic"
-#: target = "lab-2.0-opte-0.29"
+#: target = "lab-2.0-opte-0.31"
 #: output_rules = [
 #:	"/out/falcon/*.log",
 #:	"/out/falcon/*.err",
diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh
index 31733f0dc0..a2aac86aec 100755
--- a/.github/buildomat/jobs/deploy.sh
+++ b/.github/buildomat/jobs/deploy.sh
@@ -2,7 +2,7 @@
 #:
 #: name = "helios / deploy"
 #: variety = "basic"
-#: target = "lab-2.0-opte-0.29"
+#: target = "lab-2.0-opte-0.31"
 #: output_rules = [
 #:  "%/var/svc/log/oxide-sled-agent:default.log*",
 #:  "%/zone/oxz_*/root/var/svc/log/oxide-*.log*",
@@ -50,6 +50,7 @@ _exit_trap() {
 		dump-state
 	pfexec /opt/oxide/opte/bin/opteadm list-ports
 	pfexec /opt/oxide/opte/bin/opteadm dump-v2b
+	pfexec /opt/oxide/opte/bin/opteadm dump-v2p
 	z_swadm link ls
 	z_swadm addr list
 	z_swadm route list
diff --git a/Cargo.lock b/Cargo.lock
index 15ebba75ae..9072aff98c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -152,9 +152,9 @@ dependencies = [
 
 [[package]]
 name = "anyhow"
-version = "1.0.83"
+version = "1.0.86"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "25bdb32cbbdce2b519a9cd7df3a678443100e265d5e25ca763b7572a5104f5f3"
+checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da"
 dependencies = [
  "backtrace",
 ]
@@ -1591,7 +1591,7 @@ dependencies = [
 [[package]]
 name = "ddm-admin-client"
 version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/maghemite?rev=025389ff39d594bf2b815377e2c1dc4dd23b1f96#025389ff39d594bf2b815377e2c1dc4dd23b1f96"
+source = "git+https://github.com/oxidecomputer/maghemite?rev=5630887d0373857f77cb264f84aa19bdec720ce3#5630887d0373857f77cb264f84aa19bdec720ce3"
 dependencies = [
  "percent-encoding",
  "progenitor",
@@ -1729,17 +1729,6 @@ dependencies = [
  "syn 1.0.109",
 ]
 
-[[package]]
-name = "derror-macro"
-version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/opte?rev=194a8d1d6443f78d59702a25849607dba33db732#194a8d1d6443f78d59702a25849607dba33db732"
-dependencies = [
- "darling",
- "proc-macro2",
- "quote",
- "syn 2.0.64",
-]
-
 [[package]]
 name = "dhcproto"
 version = "0.11.0"
@@ -3481,7 +3470,7 @@ dependencies = [
 [[package]]
 name = "illumos-sys-hdrs"
 version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/opte?rev=194a8d1d6443f78d59702a25849607dba33db732#194a8d1d6443f78d59702a25849607dba33db732"
+source = "git+https://github.com/oxidecomputer/opte?rev=d6177ca84f23e60a661461bb4cece475689502d2#d6177ca84f23e60a661461bb4cece475689502d2"
 
 [[package]]
 name = "illumos-utils"
@@ -3895,7 +3884,7 @@ dependencies = [
 [[package]]
 name = "kstat-macro"
 version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/opte?rev=194a8d1d6443f78d59702a25849607dba33db732#194a8d1d6443f78d59702a25849607dba33db732"
+source = "git+https://github.com/oxidecomputer/opte?rev=d6177ca84f23e60a661461bb4cece475689502d2#d6177ca84f23e60a661461bb4cece475689502d2"
 dependencies = [
  "quote",
  "syn 2.0.64",
@@ -4307,7 +4296,7 @@ dependencies = [
 [[package]]
 name = "mg-admin-client"
 version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/maghemite?rev=025389ff39d594bf2b815377e2c1dc4dd23b1f96#025389ff39d594bf2b815377e2c1dc4dd23b1f96"
+source = "git+https://github.com/oxidecomputer/maghemite?rev=5630887d0373857f77cb264f84aa19bdec720ce3#5630887d0373857f77cb264f84aa19bdec720ce3"
 dependencies = [
  "anyhow",
  "chrono",
@@ -6060,10 +6049,9 @@ dependencies = [
 [[package]]
 name = "opte"
 version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/opte?rev=194a8d1d6443f78d59702a25849607dba33db732#194a8d1d6443f78d59702a25849607dba33db732"
+source = "git+https://github.com/oxidecomputer/opte?rev=d6177ca84f23e60a661461bb4cece475689502d2#d6177ca84f23e60a661461bb4cece475689502d2"
 dependencies = [
  "cfg-if",
- "derror-macro",
  "dyn-clone",
  "illumos-sys-hdrs",
  "kstat-macro",
@@ -6078,7 +6066,7 @@ dependencies = [
 [[package]]
 name = "opte-api"
 version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/opte?rev=194a8d1d6443f78d59702a25849607dba33db732#194a8d1d6443f78d59702a25849607dba33db732"
+source = "git+https://github.com/oxidecomputer/opte?rev=d6177ca84f23e60a661461bb4cece475689502d2#d6177ca84f23e60a661461bb4cece475689502d2"
 dependencies = [
  "illumos-sys-hdrs",
  "ipnetwork",
@@ -6090,7 +6078,7 @@ dependencies = [
 [[package]]
 name = "opte-ioctl"
 version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/opte?rev=194a8d1d6443f78d59702a25849607dba33db732#194a8d1d6443f78d59702a25849607dba33db732"
+source = "git+https://github.com/oxidecomputer/opte?rev=d6177ca84f23e60a661461bb4cece475689502d2#d6177ca84f23e60a661461bb4cece475689502d2"
 dependencies = [
  "libc",
  "libnet 0.1.0 (git+https://github.com/oxidecomputer/netadm-sys)",
@@ -6164,7 +6152,7 @@ dependencies = [
 [[package]]
 name = "oxide-vpc"
 version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/opte?rev=194a8d1d6443f78d59702a25849607dba33db732#194a8d1d6443f78d59702a25849607dba33db732"
+source = "git+https://github.com/oxidecomputer/opte?rev=d6177ca84f23e60a661461bb4cece475689502d2#d6177ca84f23e60a661461bb4cece475689502d2"
 dependencies = [
  "cfg-if",
  "illumos-sys-hdrs",
diff --git a/Cargo.toml b/Cargo.toml
index fc8811e9b5..5c0433a662 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -314,8 +314,8 @@ macaddr = { version = "1.0.1", features = ["serde_std"] }
 maplit = "1.0.2"
 mockall = "0.12"
 newtype_derive = "0.1.6"
-mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "025389ff39d594bf2b815377e2c1dc4dd23b1f96" }
-ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "025389ff39d594bf2b815377e2c1dc4dd23b1f96" }
+mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "5630887d0373857f77cb264f84aa19bdec720ce3" }
+ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "5630887d0373857f77cb264f84aa19bdec720ce3" }
 multimap = "0.10.0"
 nexus-client = { path = "clients/nexus-client" }
 nexus-config = { path = "nexus-config" }
@@ -351,14 +351,14 @@ omicron-sled-agent = { path = "sled-agent" }
 omicron-test-utils = { path = "test-utils" }
 omicron-zone-package = "0.11.0"
 oxide-client = { path = "clients/oxide-client" }
-oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "194a8d1d6443f78d59702a25849607dba33db732", features = [ "api", "std" ] }
+oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "d6177ca84f23e60a661461bb4cece475689502d2", features = [ "api", "std" ] }
 once_cell = "1.19.0"
 openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" }
 openapiv3 = "2.0.0"
 # must match samael's crate!
 openssl = "0.10"
 openssl-sys = "0.9"
-opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "194a8d1d6443f78d59702a25849607dba33db732" }
+opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "d6177ca84f23e60a661461bb4cece475689502d2" }
 oso = "0.27"
 owo-colors = "4.0.0"
 oximeter = { path = "oximeter/oximeter" }
diff --git a/dev-tools/xtask/src/virtual_hardware.rs b/dev-tools/xtask/src/virtual_hardware.rs
index d013ff6505..0ec9f91492 100644
--- a/dev-tools/xtask/src/virtual_hardware.rs
+++ b/dev-tools/xtask/src/virtual_hardware.rs
@@ -104,6 +104,7 @@ const IPADM: &'static str = "/usr/sbin/ipadm";
 const MODINFO: &'static str = "/usr/sbin/modinfo";
 const MODUNLOAD: &'static str = "/usr/sbin/modunload";
 const NETSTAT: &'static str = "/usr/bin/netstat";
+const OPTEADM: &'static str = "/opt/oxide/opte/bin/opteadm";
 const PFEXEC: &'static str = "/usr/bin/pfexec";
 const PING: &'static str = "/usr/sbin/ping";
 const SWAP: &'static str = "/usr/sbin/swap";
@@ -247,8 +248,17 @@ fn unload_xde_driver() -> Result<()> {
         println!("xde driver already unloaded");
         return Ok(());
     };
-    println!("unloading xde driver");
+    println!("unloading xde driver:\na) clearing underlay...");
+    let mut cmd = Command::new(PFEXEC);
+    cmd.args([OPTEADM, "clear-xde-underlay"]);
+    if let Err(e) = execute(cmd) {
+        // This is explicitly non-fatal: the underlay is only set when
+        // sled-agent is running. We still need to be able to tear
+        // down the driver if we immediately call create->destroy.
+        println!("\tFailed or already unset: {e}");
+    }
 
+    println!("b) unloading module...");
     let mut cmd = Command::new(PFEXEC);
     cmd.arg(MODUNLOAD);
     cmd.arg("-i");
diff --git a/package-manifest.toml b/package-manifest.toml
index bffd5be7dc..9d372cd4df 100644
--- a/package-manifest.toml
+++ b/package-manifest.toml
@@ -548,10 +548,10 @@ source.repo = "maghemite"
 # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when
 # building `ddm-admin-client` (which will instruct you to update
 # `tools/maghemite_openapi_version`).
-source.commit = "23b0cf439f9f62b9a4933e55cc72bcaddc9596cd"
+source.commit = "5630887d0373857f77cb264f84aa19bdec720ce3"
 # The SHA256 digest is automatically posted to:
-# https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image/<commit>/maghemite.sha256.txt
-source.sha256 = "1ea0e73e149a68bf91b5ce2e0db2a8a1af50dcdbbf381b672aa9ac7e36a3a181"
+# https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image/<commit>/mg-ddm-gz.sha256.txt
+source.sha256 = "28965f303a64f49cf5b83322babe1e0ceb4cfe33fb2df8c8d452d8c3ec02d933"
 output.type = "tarball"
 
 [package.mg-ddm]
@@ -564,10 +564,10 @@ source.repo = "maghemite"
 # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when
 # building `ddm-admin-client` (which will instruct you to update
 # `tools/maghemite_openapi_version`).
-source.commit = "23b0cf439f9f62b9a4933e55cc72bcaddc9596cd"
+source.commit = "5630887d0373857f77cb264f84aa19bdec720ce3"
 # The SHA256 digest is automatically posted to:
 # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image/<commit>/mg-ddm.sha256.txt
-source.sha256 = "3334b0a9d5956e3117a6b493b9a5a31220391fab1ecbfb3a4bd8e94d7030771a"
+source.sha256 = "6fa53be6fc5ad6273e0ca5e969c882ea40c473722415b060dfea420e962d4f8e"
 output.type = "zone"
 output.intermediate_only = true
 
@@ -579,10 +579,10 @@ source.repo = "maghemite"
 # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when
 # building `ddm-admin-client` (which will instruct you to update
 # `tools/maghemite_openapi_version`).
-source.commit = "23b0cf439f9f62b9a4933e55cc72bcaddc9596cd"
+source.commit = "5630887d0373857f77cb264f84aa19bdec720ce3"
 # The SHA256 digest is automatically posted to:
-# https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image/<commit>/mg-ddm.sha256.txt
-source.sha256 = "e0907de39ca9f8ab45d40d361a1dbeed4bd8e9b157f8d3d8fe0a4bc259d933bd"
+# https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image/<commit>/mgd.sha256.txt
+source.sha256 = "6ae4bc3b332e91706c1c6633a7fc218aac65b7feff5643ee2dbbe79b841e0df3"
 output.type = "zone"
 output.intermediate_only = true
 
diff --git a/tools/maghemite_ddm_openapi_version b/tools/maghemite_ddm_openapi_version
index c39c9690bb..9a93e6b556 100644
--- a/tools/maghemite_ddm_openapi_version
+++ b/tools/maghemite_ddm_openapi_version
@@ -1,2 +1,2 @@
-COMMIT="025389ff39d594bf2b815377e2c1dc4dd23b1f96"
+COMMIT="5630887d0373857f77cb264f84aa19bdec720ce3"
 SHA2="004e873e4120aa26460271368485266b75b7f964e5ed4dbee8fb5db4519470d7"
diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version
index 73095bd42d..22918c581a 100644
--- a/tools/maghemite_mg_openapi_version
+++ b/tools/maghemite_mg_openapi_version
@@ -1,2 +1,2 @@
-COMMIT="23b0cf439f9f62b9a4933e55cc72bcaddc9596cd"
+COMMIT="5630887d0373857f77cb264f84aa19bdec720ce3"
 SHA2="fdb33ee7425923560534672264008ef8948d227afce948ab704de092ad72157c"
diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums
index eeb873a424..d2ad05383d 100644
--- a/tools/maghemite_mgd_checksums
+++ b/tools/maghemite_mgd_checksums
@@ -1,2 +1,2 @@
-CIDL_SHA256="e0907de39ca9f8ab45d40d361a1dbeed4bd8e9b157f8d3d8fe0a4bc259d933bd"
-MGD_LINUX_SHA256="903413ddaab89594ed7518cb8f2f27793e96cd17ed2d6b3fe11657ec4375cb19"
+CIDL_SHA256="6ae4bc3b332e91706c1c6633a7fc218aac65b7feff5643ee2dbbe79b841e0df3"
+MGD_LINUX_SHA256="7930008cf8ce535a8b31043fc3edde0e825bd54d75f73234929bd0037ecc3a41"
diff --git a/tools/opte_version b/tools/opte_version
index 2de18d2d9b..6126a52eb4 100644
--- a/tools/opte_version
+++ b/tools/opte_version
@@ -1 +1 @@
-0.29.250
+0.31.259
diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml
index ee4dcccb70..f82fe1c833 100644
--- a/workspace-hack/Cargo.toml
+++ b/workspace-hack/Cargo.toml
@@ -19,7 +19,7 @@ workspace = true
 [dependencies]
 ahash = { version = "0.8.11" }
 aho-corasick = { version = "1.1.3" }
-anyhow = { version = "1.0.83", features = ["backtrace"] }
+anyhow = { version = "1.0.86", features = ["backtrace"] }
 base16ct = { version = "0.2.0", default-features = false, features = ["alloc"] }
 bit-set = { version = "0.5.3" }
 bit-vec = { version = "0.6.3" }
@@ -124,7 +124,7 @@ zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] }
 [build-dependencies]
 ahash = { version = "0.8.11" }
 aho-corasick = { version = "1.1.3" }
-anyhow = { version = "1.0.83", features = ["backtrace"] }
+anyhow = { version = "1.0.86", features = ["backtrace"] }
 base16ct = { version = "0.2.0", default-features = false, features = ["alloc"] }
 bit-set = { version = "0.5.3" }
 bit-vec = { version = "0.6.3" }

From 8df03b3934dc3c894a0484f333b15bbfd088262c Mon Sep 17 00:00:00 2001
From: David Crespo <david-crespo@users.noreply.github.com>
Date: Fri, 31 May 2024 19:49:19 -0500
Subject: [PATCH 27/28] Fix IP pool silos pagination bug (#5847)

Closes #5837

- [x] Write test reproducing the bug
- [x] Fix the bug
---
 nexus/db-queries/src/db/datastore/ip_pool.rs |  2 +-
 nexus/tests/integration_tests/ip_pools.rs    | 48 ++++++++++++++++++++
 2 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/nexus/db-queries/src/db/datastore/ip_pool.rs b/nexus/db-queries/src/db/datastore/ip_pool.rs
index 30ea2e89b4..08db5ef38c 100644
--- a/nexus/db-queries/src/db/datastore/ip_pool.rs
+++ b/nexus/db-queries/src/db/datastore/ip_pool.rs
@@ -426,7 +426,7 @@ impl DataStore {
 
         paginated(
             ip_pool_resource::table,
-            ip_pool_resource::ip_pool_id,
+            ip_pool_resource::resource_id,
             pagparams,
         )
         .inner_join(ip_pool::table)
diff --git a/nexus/tests/integration_tests/ip_pools.rs b/nexus/tests/integration_tests/ip_pools.rs
index 38cfd25844..e3ddc98029 100644
--- a/nexus/tests/integration_tests/ip_pools.rs
+++ b/nexus/tests/integration_tests/ip_pools.rs
@@ -724,6 +724,54 @@ async fn test_ip_pool_pagination(cptestctx: &ControlPlaneTestContext) {
     assert_eq!(get_names(next_page.items), &pool_names[5..8]);
 }
 
+#[nexus_test]
+async fn test_ip_pool_silos_pagination(cptestctx: &ControlPlaneTestContext) {
+    let client = &cptestctx.external_client;
+
+    // one pool, and there should be no linked silos
+    create_pool(client, "p0").await;
+    let silos_p0 = silos_for_pool(client, "p0").await;
+    assert_eq!(silos_p0.items.len(), 0);
+
+    // create and link some silos. we need to use discoverable silos because
+    // non-discoverable silos, while linkable, are filtered out of the list of
+    // linked silos for a pool
+    let mut silo_ids = vec![];
+    for i in 1..=8 {
+        let name = format!("silo-{}", i);
+        let silo =
+            create_silo(&client, &name, true, SiloIdentityMode::SamlJit).await;
+        silo_ids.push(silo.id());
+        link_ip_pool(client, "p0", &silo.id(), false).await;
+    }
+
+    // we paginate by ID, so these should be in order to match
+    silo_ids.sort();
+
+    let base_url = "/v1/system/ip-pools/p0/silos";
+    let first_five_url = format!("{}?limit=5", base_url);
+    let first_five =
+        objects_list_page_authz::<IpPoolSiloLink>(client, &first_five_url)
+            .await;
+    assert!(first_five.next_page.is_some());
+    assert_eq!(
+        first_five.items.iter().map(|s| s.silo_id).collect::<Vec<_>>(),
+        &silo_ids[0..5]
+    );
+
+    let next_page_url = format!(
+        "{}?limit=5&page_token={}",
+        base_url,
+        first_five.next_page.unwrap()
+    );
+    let next_page =
+        objects_list_page_authz::<IpPoolSiloLink>(client, &next_page_url).await;
+    assert_eq!(
+        next_page.items.iter().map(|s| s.silo_id).collect::<Vec<_>>(),
+        &silo_ids[5..8]
+    );
+}
+
 /// helper to make tests less ugly
 fn get_names(pools: Vec<IpPool>) -> Vec<String> {
     pools.iter().map(|p| p.identity.name.to_string()).collect()

From 450f906e8f54e7b6999de8256a74730d7475f6e3 Mon Sep 17 00:00:00 2001
From: Sean Klein <sean@oxide.computer>
Date: Fri, 31 May 2024 18:53:35 -0700
Subject: [PATCH 28/28] [nexus] Split authn/authz and db-fixed-data into new
 crates (#5849)

As a part of the ongoing effort to split Nexus into smaller pieces, this
PR splits out two new crates:

- `nexus-auth` takes the contents of `nexus/db-queries/src/auth{n,z}`,
as well as `nexus/db-queries/src/context.rs`, and separates this logic
into a new bespoke crate. Although this crate **does** have a dependency
on the datastore itself, it only actually invokes a single method, and
can be abstracted via a new trait, defined in `nexus/auth/storage`.
- `nexus-db-fixed-data` takes the contents of
`nexus/db-queries/src/db/fixed-data` and separates this logic into a new
crate.
---
 Cargo.lock                                    |  67 ++++-
 Cargo.toml                                    |   6 +
 nexus/Cargo.toml                              |   1 +
 nexus/auth/Cargo.toml                         |  48 ++++
 nexus/auth/build.rs                           |  10 +
 .../src/authn/external/cookies.rs             |   2 +
 .../src/authn/external/mod.rs                 |   1 +
 .../src/authn/external/session_cookie.rs      |   1 +
 .../src/authn/external/spoof.rs               |   1 +
 .../src/authn/external/token.rs               |   0
 nexus/{db-queries => auth}/src/authn/mod.rs   |  40 +--
 nexus/{db-queries => auth}/src/authn/saga.rs  |   0
 nexus/{db-queries => auth}/src/authn/silos.rs |  61 +----
 nexus/{db-queries => auth}/src/authz/actor.rs |   0
 .../src/authz/api_resources.rs                | 240 ++++++------------
 .../{db-queries => auth}/src/authz/context.rs | 108 ++++----
 nexus/{db-queries => auth}/src/authz/mod.rs   |   7 +-
 .../src/authz/omicron.polar                   |   0
 .../src/authz/oso_generic.rs                  |  25 +-
 nexus/{db-queries => auth}/src/authz/roles.rs |  11 +-
 nexus/{db-queries => auth}/src/context.rs     |  65 +++--
 nexus/auth/src/lib.rs                         |  11 +
 nexus/auth/src/storage.rs                     |  27 ++
 nexus/db-fixed-data/Cargo.toml                |  25 ++
 nexus/db-fixed-data/build.rs                  |  10 +
 .../src}/allow_list.rs                        |   0
 .../mod.rs => db-fixed-data/src/lib.rs}       |   0
 .../src}/project.rs                           |  10 +-
 .../src}/role_assignment.rs                   |   4 +-
 .../src}/role_builtin.rs                      |   2 +-
 .../fixed_data => db-fixed-data/src}/silo.rs  |  10 +-
 .../src}/silo_user.rs                         |  43 ++--
 .../src}/user_builtin.rs                      |   0
 .../fixed_data => db-fixed-data/src}/vpc.rs   |   8 +-
 .../src}/vpc_firewall_rule.rs                 |   0
 .../src}/vpc_subnet.rs                        |   2 +-
 nexus/db-queries/Cargo.toml                   |  14 +-
 .../db-queries/src/db/datastore/allow_list.rs |   2 +-
 nexus/db-queries/src/db/datastore/auth.rs     |  81 ++++++
 .../src/db/datastore/cockroachdb_settings.rs  |   6 +-
 .../src/db/datastore/identity_provider.rs     |  48 ++++
 nexus/db-queries/src/db/datastore/instance.rs |   3 +-
 nexus/db-queries/src/db/datastore/mod.rs      |  18 +-
 .../src/db/datastore/network_interface.rs     |   2 +-
 nexus/db-queries/src/db/datastore/project.rs  |   4 +-
 .../src/db/datastore/pub_test_utils.rs        |   8 +-
 nexus/db-queries/src/db/datastore/rack.rs     |   8 +-
 nexus/db-queries/src/db/datastore/role.rs     |  65 +----
 nexus/db-queries/src/db/datastore/silo.rs     |   2 +-
 .../db-queries/src/db/datastore/silo_user.rs  |   4 +-
 .../virtual_provisioning_collection.rs        |   7 +-
 nexus/db-queries/src/db/datastore/vpc.rs      |  20 +-
 nexus/db-queries/src/db/lookup.rs             |   6 +-
 nexus/db-queries/src/db/mod.rs                |   2 +-
 .../virtual_provisioning_collection_update.rs |   2 +-
 nexus/db-queries/src/db/saga_recovery.rs      |  10 +-
 nexus/db-queries/src/lib.rs                   |  12 +-
 .../src/{authz => }/policy_test/coverage.rs   |   5 +-
 .../src/{authz => }/policy_test/mod.rs        |  18 +-
 .../policy_test/resource_builder.rs           |  96 ++++---
 .../src/{authz => }/policy_test/resources.rs  |   6 +-
 nexus/db-queries/tests/output/authz-roles.out |   2 +-
 nexus/src/app/mod.rs                          |  18 +-
 nexus/src/app/test_interfaces.rs              |  12 +-
 nexus/src/external_api/console_api.rs         |  21 +-
 nexus/src/populate.rs                         |   4 +-
 nexus/tests/integration_tests/saml.rs         |  25 +-
 nexus/tests/integration_tests/silos.rs        |  27 +-
 workspace-hack/Cargo.toml                     |   1 +
 69 files changed, 800 insertions(+), 605 deletions(-)
 create mode 100644 nexus/auth/Cargo.toml
 create mode 100644 nexus/auth/build.rs
 rename nexus/{db-queries => auth}/src/authn/external/cookies.rs (98%)
 rename nexus/{db-queries => auth}/src/authn/external/mod.rs (99%)
 rename nexus/{db-queries => auth}/src/authn/external/session_cookie.rs (99%)
 rename nexus/{db-queries => auth}/src/authn/external/spoof.rs (99%)
 rename nexus/{db-queries => auth}/src/authn/external/token.rs (100%)
 rename nexus/{db-queries => auth}/src/authn/mod.rs (94%)
 rename nexus/{db-queries => auth}/src/authn/saga.rs (100%)
 rename nexus/{db-queries => auth}/src/authn/silos.rs (86%)
 rename nexus/{db-queries => auth}/src/authz/actor.rs (100%)
 rename nexus/{db-queries => auth}/src/authz/api_resources.rs (83%)
 rename nexus/{db-queries => auth}/src/authz/context.rs (80%)
 rename nexus/{db-queries => auth}/src/authz/mod.rs (98%)
 rename nexus/{db-queries => auth}/src/authz/omicron.polar (100%)
 rename nexus/{db-queries => auth}/src/authz/oso_generic.rs (96%)
 rename nexus/{db-queries => auth}/src/authz/roles.rs (96%)
 rename nexus/{db-queries => auth}/src/context.rs (92%)
 create mode 100644 nexus/auth/src/lib.rs
 create mode 100644 nexus/auth/src/storage.rs
 create mode 100644 nexus/db-fixed-data/Cargo.toml
 create mode 100644 nexus/db-fixed-data/build.rs
 rename nexus/{db-queries/src/db/fixed_data => db-fixed-data/src}/allow_list.rs (100%)
 rename nexus/{db-queries/src/db/fixed_data/mod.rs => db-fixed-data/src/lib.rs} (100%)
 rename nexus/{db-queries/src/db/fixed_data => db-fixed-data/src}/project.rs (79%)
 rename nexus/{db-queries/src/db/fixed_data => db-fixed-data/src}/role_assignment.rs (97%)
 rename nexus/{db-queries/src/db/fixed_data => db-fixed-data/src}/role_builtin.rs (99%)
 rename nexus/{db-queries/src/db/fixed_data => db-fixed-data/src}/silo.rs (91%)
 rename nexus/{db-queries/src/db/fixed_data => db-fixed-data/src}/silo_user.rs (66%)
 rename nexus/{db-queries/src/db/fixed_data => db-fixed-data/src}/user_builtin.rs (100%)
 rename nexus/{db-queries/src/db/fixed_data => db-fixed-data/src}/vpc.rs (91%)
 rename nexus/{db-queries/src/db/fixed_data => db-fixed-data/src}/vpc_firewall_rule.rs (100%)
 rename nexus/{db-queries/src/db/fixed_data => db-fixed-data/src}/vpc_subnet.rs (98%)
 create mode 100644 nexus/db-queries/src/db/datastore/auth.rs
 rename nexus/db-queries/src/{authz => }/policy_test/coverage.rs (97%)
 rename nexus/db-queries/src/{authz => }/policy_test/mod.rs (97%)
 rename nexus/db-queries/src/{authz => }/policy_test/resource_builder.rs (74%)
 rename nexus/db-queries/src/{authz => }/policy_test/resources.rs (99%)

diff --git a/Cargo.lock b/Cargo.lock
index 9072aff98c..4f4fa019c1 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4473,6 +4473,44 @@ dependencies = [
  "rustc_version 0.1.7",
 ]
 
+[[package]]
+name = "nexus-auth"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "async-trait",
+ "authz-macros",
+ "base64 0.22.1",
+ "chrono",
+ "cookie 0.18.1",
+ "dropshot",
+ "futures",
+ "headers",
+ "http 0.2.12",
+ "hyper 0.14.28",
+ "newtype_derive",
+ "nexus-db-fixed-data",
+ "nexus-db-model",
+ "nexus-types",
+ "omicron-common",
+ "omicron-rpaths",
+ "omicron-test-utils",
+ "omicron-uuid-kinds",
+ "omicron-workspace-hack",
+ "once_cell",
+ "openssl",
+ "oso",
+ "pq-sys",
+ "samael",
+ "serde",
+ "serde_urlencoded",
+ "slog",
+ "strum",
+ "thiserror",
+ "tokio",
+ "uuid",
+]
+
 [[package]]
 name = "nexus-client"
 version = "0.1.0"
@@ -4515,6 +4553,21 @@ dependencies = [
  "uuid",
 ]
 
+[[package]]
+name = "nexus-db-fixed-data"
+version = "0.1.0"
+dependencies = [
+ "nexus-db-model",
+ "nexus-types",
+ "omicron-common",
+ "omicron-rpaths",
+ "omicron-workspace-hack",
+ "once_cell",
+ "pq-sys",
+ "strum",
+ "uuid",
+]
+
 [[package]]
 name = "nexus-db-model"
 version = "0.1.0"
@@ -4568,14 +4621,11 @@ dependencies = [
  "assert_matches",
  "async-bb8-diesel",
  "async-trait",
- "authz-macros",
- "base64 0.22.1",
  "bb8",
  "camino",
  "camino-tempfile",
  "chrono",
  "const_format",
- "cookie 0.18.1",
  "db-macros",
  "diesel",
  "diesel-dtrace",
@@ -4583,17 +4633,15 @@ dependencies = [
  "expectorate",
  "futures",
  "gateway-client",
- "headers",
- "http 0.2.12",
- "hyper 0.14.28",
  "hyper-rustls 0.26.0",
  "illumos-utils",
  "internal-dns",
  "ipnetwork",
  "itertools 0.12.1",
  "macaddr",
- "newtype_derive",
+ "nexus-auth",
  "nexus-config",
+ "nexus-db-fixed-data",
  "nexus-db-model",
  "nexus-inventory",
  "nexus-reconfigurator-planning",
@@ -4608,7 +4656,6 @@ dependencies = [
  "omicron-workspace-hack",
  "once_cell",
  "openapiv3",
- "openssl",
  "oso",
  "oximeter",
  "oxnet",
@@ -4623,12 +4670,10 @@ dependencies = [
  "ref-cast",
  "regex",
  "rustls 0.22.4",
- "samael",
  "schemars",
  "semver 1.0.23",
  "serde",
  "serde_json",
- "serde_urlencoded",
  "serde_with",
  "sled-agent-client",
  "slog",
@@ -5437,6 +5482,7 @@ dependencies = [
  "itertools 0.12.1",
  "macaddr",
  "mg-admin-client",
+ "nexus-auth",
  "nexus-client",
  "nexus-config",
  "nexus-db-model",
@@ -5921,6 +5967,7 @@ dependencies = [
  "trust-dns-proto",
  "unicode-bidi",
  "unicode-normalization",
+ "unicode-xid",
  "usdt 0.5.0",
  "usdt-impl 0.5.0",
  "uuid",
diff --git a/Cargo.toml b/Cargo.toml
index 5c0433a662..4eb76f5859 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -39,6 +39,8 @@ members = [
     "nexus",
     "nexus-config",
     "nexus/authz-macros",
+    "nexus/auth",
+    "nexus/db-fixed-data",
     "nexus/db-macros",
     "nexus/db-model",
     "nexus/db-queries",
@@ -123,9 +125,11 @@ default-members = [
     "nexus",
     "nexus-config",
     "nexus/authz-macros",
+    "nexus/auth",
     "nexus/macros-common",
     "nexus/metrics-producer-gc",
     "nexus/networking",
+    "nexus/db-fixed-data",
     "nexus/db-macros",
     "nexus/db-model",
     "nexus/db-queries",
@@ -317,8 +321,10 @@ newtype_derive = "0.1.6"
 mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "5630887d0373857f77cb264f84aa19bdec720ce3" }
 ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "5630887d0373857f77cb264f84aa19bdec720ce3" }
 multimap = "0.10.0"
+nexus-auth = { path = "nexus/auth" }
 nexus-client = { path = "clients/nexus-client" }
 nexus-config = { path = "nexus-config" }
+nexus-db-fixed-data = { path = "nexus/db-fixed-data" }
 nexus-db-model = { path = "nexus/db-model" }
 nexus-db-queries = { path = "nexus/db-queries" }
 nexus-defaults = { path = "nexus/defaults" }
diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml
index 0b0bd097bc..58a1e824cb 100644
--- a/nexus/Cargo.toml
+++ b/nexus/Cargo.toml
@@ -86,6 +86,7 @@ tough.workspace = true
 trust-dns-resolver.workspace = true
 uuid.workspace = true
 
+nexus-auth.workspace = true
 nexus-defaults.workspace = true
 nexus-db-model.workspace = true
 nexus-db-queries.workspace = true
diff --git a/nexus/auth/Cargo.toml b/nexus/auth/Cargo.toml
new file mode 100644
index 0000000000..1a926f1789
--- /dev/null
+++ b/nexus/auth/Cargo.toml
@@ -0,0 +1,48 @@
+[package]
+name = "nexus-auth"
+version = "0.1.0"
+edition = "2021"
+license = "MPL-2.0"
+
+[lints]
+workspace = true
+
+[build-dependencies]
+omicron-rpaths.workspace = true
+
+[dependencies]
+anyhow.workspace = true
+async-trait.workspace = true
+base64.workspace = true
+chrono.workspace = true
+cookie.workspace = true
+dropshot.workspace = true
+futures.workspace = true
+headers.workspace = true
+http.workspace = true
+hyper.workspace = true
+newtype_derive.workspace = true
+# See omicron-rpaths for more about the "pq-sys" dependency.
+pq-sys = "*"
+once_cell.workspace = true
+openssl.workspace = true
+oso.workspace = true
+samael.workspace = true
+serde.workspace = true
+serde_urlencoded.workspace = true
+slog.workspace = true
+strum.workspace = true
+thiserror.workspace = true
+tokio = { workspace = true, features = ["full"] }
+uuid.workspace = true
+
+authz-macros.workspace = true
+nexus-db-fixed-data.workspace = true
+nexus-db-model.workspace = true
+nexus-types.workspace = true
+omicron-common.workspace = true
+omicron-uuid-kinds.workspace = true
+omicron-workspace-hack.workspace = true
+
+[dev-dependencies]
+omicron-test-utils.workspace = true
diff --git a/nexus/auth/build.rs b/nexus/auth/build.rs
new file mode 100644
index 0000000000..1ba9acd41c
--- /dev/null
+++ b/nexus/auth/build.rs
@@ -0,0 +1,10 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+// See omicron-rpaths for documentation.
+// NOTE: This file MUST be kept in sync with the other build.rs files in this
+// repository.
+fn main() {
+    omicron_rpaths::configure_default_omicron_rpaths();
+}
diff --git a/nexus/db-queries/src/authn/external/cookies.rs b/nexus/auth/src/authn/external/cookies.rs
similarity index 98%
rename from nexus/db-queries/src/authn/external/cookies.rs
rename to nexus/auth/src/authn/external/cookies.rs
index e3ad2e3264..35e697475b 100644
--- a/nexus/db-queries/src/authn/external/cookies.rs
+++ b/nexus/auth/src/authn/external/cookies.rs
@@ -9,6 +9,8 @@ use dropshot::{
     ApiEndpointBodyContentType, ExtensionMode, ExtractorMetadata, HttpError,
     RequestContext, ServerContext, SharedExtractor,
 };
+use newtype_derive::NewtypeDeref;
+use newtype_derive::NewtypeFrom;
 
 pub fn parse_cookies(
     headers: &http::HeaderMap<http::HeaderValue>,
diff --git a/nexus/db-queries/src/authn/external/mod.rs b/nexus/auth/src/authn/external/mod.rs
similarity index 99%
rename from nexus/db-queries/src/authn/external/mod.rs
rename to nexus/auth/src/authn/external/mod.rs
index 623544d38c..ccb7218285 100644
--- a/nexus/db-queries/src/authn/external/mod.rs
+++ b/nexus/auth/src/authn/external/mod.rs
@@ -9,6 +9,7 @@ use super::SiloAuthnPolicy;
 use crate::authn;
 use async_trait::async_trait;
 use authn::Reason;
+use slog::trace;
 use std::borrow::Borrow;
 use uuid::Uuid;
 
diff --git a/nexus/db-queries/src/authn/external/session_cookie.rs b/nexus/auth/src/authn/external/session_cookie.rs
similarity index 99%
rename from nexus/db-queries/src/authn/external/session_cookie.rs
rename to nexus/auth/src/authn/external/session_cookie.rs
index 74faafef9b..7811bf2826 100644
--- a/nexus/db-queries/src/authn/external/session_cookie.rs
+++ b/nexus/auth/src/authn/external/session_cookie.rs
@@ -13,6 +13,7 @@ use async_trait::async_trait;
 use chrono::{DateTime, Duration, Utc};
 use dropshot::HttpError;
 use http::HeaderValue;
+use slog::debug;
 use uuid::Uuid;
 
 // many parts of the implementation will reference this OWASP guide
diff --git a/nexus/db-queries/src/authn/external/spoof.rs b/nexus/auth/src/authn/external/spoof.rs
similarity index 99%
rename from nexus/db-queries/src/authn/external/spoof.rs
rename to nexus/auth/src/authn/external/spoof.rs
index 9b5ed94bde..326d529431 100644
--- a/nexus/db-queries/src/authn/external/spoof.rs
+++ b/nexus/auth/src/authn/external/spoof.rs
@@ -17,6 +17,7 @@ use async_trait::async_trait;
 use headers::authorization::{Authorization, Bearer};
 use headers::HeaderMapExt;
 use once_cell::sync::Lazy;
+use slog::debug;
 use uuid::Uuid;
 
 // This scheme is intended for demos, development, and testing until we have a
diff --git a/nexus/db-queries/src/authn/external/token.rs b/nexus/auth/src/authn/external/token.rs
similarity index 100%
rename from nexus/db-queries/src/authn/external/token.rs
rename to nexus/auth/src/authn/external/token.rs
diff --git a/nexus/db-queries/src/authn/mod.rs b/nexus/auth/src/authn/mod.rs
similarity index 94%
rename from nexus/db-queries/src/authn/mod.rs
rename to nexus/auth/src/authn/mod.rs
index 305c359820..08b27b9773 100644
--- a/nexus/db-queries/src/authn/mod.rs
+++ b/nexus/auth/src/authn/mod.rs
@@ -28,22 +28,21 @@ pub mod external;
 pub mod saga;
 pub mod silos;
 
-pub use crate::db::fixed_data::silo_user::USER_TEST_PRIVILEGED;
-pub use crate::db::fixed_data::silo_user::USER_TEST_UNPRIVILEGED;
-pub use crate::db::fixed_data::user_builtin::USER_DB_INIT;
-pub use crate::db::fixed_data::user_builtin::USER_EXTERNAL_AUTHN;
-pub use crate::db::fixed_data::user_builtin::USER_INTERNAL_API;
-pub use crate::db::fixed_data::user_builtin::USER_INTERNAL_READ;
-pub use crate::db::fixed_data::user_builtin::USER_SAGA_RECOVERY;
-pub use crate::db::fixed_data::user_builtin::USER_SERVICE_BALANCER;
-use crate::db::model::ConsoleSession;
+pub use nexus_db_fixed_data::silo_user::USER_TEST_PRIVILEGED;
+pub use nexus_db_fixed_data::silo_user::USER_TEST_UNPRIVILEGED;
+pub use nexus_db_fixed_data::user_builtin::USER_DB_INIT;
+pub use nexus_db_fixed_data::user_builtin::USER_EXTERNAL_AUTHN;
+pub use nexus_db_fixed_data::user_builtin::USER_INTERNAL_API;
+pub use nexus_db_fixed_data::user_builtin::USER_INTERNAL_READ;
+pub use nexus_db_fixed_data::user_builtin::USER_SAGA_RECOVERY;
+pub use nexus_db_fixed_data::user_builtin::USER_SERVICE_BALANCER;
 
 use crate::authz;
-use crate::db;
-use crate::db::fixed_data::silo::DEFAULT_SILO;
-use crate::db::identity::Asset;
+use newtype_derive::NewtypeDisplay;
+use nexus_db_fixed_data::silo::DEFAULT_SILO;
 use nexus_types::external_api::shared::FleetRole;
 use nexus_types::external_api::shared::SiloRole;
+use nexus_types::identity::Asset;
 use omicron_common::api::external::LookupType;
 use serde::Deserialize;
 use serde::Serialize;
@@ -254,7 +253,6 @@ pub struct SiloAuthnPolicy {
 }
 
 impl SiloAuthnPolicy {
-    #[cfg(test)]
     pub fn new(
         mapped_fleet_roles: BTreeMap<SiloRole, BTreeSet<FleetRole>>,
     ) -> SiloAuthnPolicy {
@@ -290,8 +288,8 @@ mod test {
     use super::USER_SERVICE_BALANCER;
     use super::USER_TEST_PRIVILEGED;
     use super::USER_TEST_UNPRIVILEGED;
-    use crate::db::fixed_data::user_builtin::USER_EXTERNAL_AUTHN;
-    use crate::db::identity::Asset;
+    use nexus_db_fixed_data::user_builtin::USER_EXTERNAL_AUTHN;
+    use nexus_types::identity::Asset;
 
     #[test]
     fn test_internal_users() {
@@ -386,11 +384,13 @@ impl Actor {
     }
 }
 
-impl From<&Actor> for db::model::IdentityType {
-    fn from(actor: &Actor) -> db::model::IdentityType {
+impl From<&Actor> for nexus_db_model::IdentityType {
+    fn from(actor: &Actor) -> nexus_db_model::IdentityType {
         match actor {
-            Actor::UserBuiltin { .. } => db::model::IdentityType::UserBuiltin,
-            Actor::SiloUser { .. } => db::model::IdentityType::SiloUser,
+            Actor::UserBuiltin { .. } => {
+                nexus_db_model::IdentityType::UserBuiltin
+            }
+            Actor::SiloUser { .. } => nexus_db_model::IdentityType::SiloUser,
         }
     }
 }
@@ -421,7 +421,7 @@ impl std::fmt::Debug for Actor {
 /// A console session with the silo id of the authenticated user
 #[derive(Clone, Debug)]
 pub struct ConsoleSessionWithSiloId {
-    pub console_session: ConsoleSession,
+    pub console_session: nexus_db_model::ConsoleSession,
     pub silo_id: Uuid,
 }
 
diff --git a/nexus/db-queries/src/authn/saga.rs b/nexus/auth/src/authn/saga.rs
similarity index 100%
rename from nexus/db-queries/src/authn/saga.rs
rename to nexus/auth/src/authn/saga.rs
diff --git a/nexus/db-queries/src/authn/silos.rs b/nexus/auth/src/authn/silos.rs
similarity index 86%
rename from nexus/db-queries/src/authn/silos.rs
rename to nexus/auth/src/authn/silos.rs
index fc5068fc3c..40b6346fa0 100644
--- a/nexus/db-queries/src/authn/silos.rs
+++ b/nexus/auth/src/authn/silos.rs
@@ -4,12 +4,6 @@
 
 //! Silo related authentication types and functions
 
-use crate::authz;
-use crate::context::OpContext;
-use crate::db::lookup::LookupPath;
-use crate::db::{model, DataStore};
-use omicron_common::api::external::LookupResult;
-
 use anyhow::{anyhow, Result};
 use base64::Engine;
 use dropshot::HttpError;
@@ -36,10 +30,10 @@ pub struct SamlIdentityProvider {
     pub group_attribute_name: Option<String>,
 }
 
-impl TryFrom<model::SamlIdentityProvider> for SamlIdentityProvider {
+impl TryFrom<nexus_db_model::SamlIdentityProvider> for SamlIdentityProvider {
     type Error = anyhow::Error;
     fn try_from(
-        model: model::SamlIdentityProvider,
+        model: nexus_db_model::SamlIdentityProvider,
     ) -> Result<Self, Self::Error> {
         let provider = SamlIdentityProvider {
             idp_metadata_document_string: model.idp_metadata_document_string,
@@ -68,57 +62,6 @@ pub enum IdentityProviderType {
     Saml(SamlIdentityProvider),
 }
 
-impl IdentityProviderType {
-    /// First, look up the provider type, then look in for the specific
-    /// provider details.
-    pub async fn lookup(
-        datastore: &DataStore,
-        opctx: &OpContext,
-        silo_name: &model::Name,
-        provider_name: &model::Name,
-    ) -> LookupResult<(authz::Silo, model::Silo, Self)> {
-        let (authz_silo, db_silo) = LookupPath::new(opctx, datastore)
-            .silo_name(silo_name)
-            .fetch()
-            .await?;
-
-        let (.., identity_provider) = LookupPath::new(opctx, datastore)
-            .silo_name(silo_name)
-            .identity_provider_name(provider_name)
-            .fetch()
-            .await?;
-
-        match identity_provider.provider_type {
-            model::IdentityProviderType::Saml => {
-                let (.., saml_identity_provider) =
-                    LookupPath::new(opctx, datastore)
-                        .silo_name(silo_name)
-                        .saml_identity_provider_name(provider_name)
-                        .fetch()
-                        .await?;
-
-                let saml_identity_provider = IdentityProviderType::Saml(
-                    saml_identity_provider.try_into()
-                        .map_err(|e: anyhow::Error|
-                            // If an error is encountered converting from the
-                            // model to the authn type here, this is a server
-                            // error: it was validated before it went into the
-                            // DB.
-                            omicron_common::api::external::Error::internal_error(
-                                &format!(
-                                    "saml_identity_provider.try_into() failed! {}",
-                                    &e.to_string()
-                                )
-                            )
-                        )?
-                    );
-
-                Ok((authz_silo, db_silo, saml_identity_provider))
-            }
-        }
-    }
-}
-
 impl SamlIdentityProvider {
     pub fn sign_in_url(&self, relay_state: Option<String>) -> Result<String> {
         let idp_metadata: EntityDescriptor =
diff --git a/nexus/db-queries/src/authz/actor.rs b/nexus/auth/src/authz/actor.rs
similarity index 100%
rename from nexus/db-queries/src/authz/actor.rs
rename to nexus/auth/src/authz/actor.rs
diff --git a/nexus/db-queries/src/authz/api_resources.rs b/nexus/auth/src/authz/api_resources.rs
similarity index 83%
rename from nexus/db-queries/src/authz/api_resources.rs
rename to nexus/auth/src/authz/api_resources.rs
index 69b883a8cf..98a24b68b5 100644
--- a/nexus/db-queries/src/authz/api_resources.rs
+++ b/nexus/auth/src/authz/api_resources.rs
@@ -34,13 +34,11 @@ use super::Action;
 use super::{actor::AuthenticatedActor, Authz};
 use crate::authn;
 use crate::context::OpContext;
-use crate::db;
-use crate::db::fixed_data::FLEET_ID;
-use crate::db::model::{ArtifactId, SemverVersion};
-use crate::db::DataStore;
 use authz_macros::authz_resource;
 use futures::future::BoxFuture;
 use futures::FutureExt;
+use nexus_db_fixed_data::FLEET_ID;
+use nexus_db_model::{ArtifactId, SemverVersion};
 use nexus_types::external_api::shared::{FleetRole, ProjectRole, SiloRole};
 use omicron_common::api::external::{Error, LookupType, ResourceType};
 use once_cell::sync::Lazy;
@@ -103,27 +101,21 @@ pub trait ApiResourceWithRoles: ApiResource {
 pub trait ApiResourceWithRolesType: ApiResourceWithRoles {
     type AllowedRoles: serde::Serialize
         + serde::de::DeserializeOwned
-        + db::model::DatabaseString
+        + nexus_db_model::DatabaseString
         + Clone;
 }
 
-impl<T: ApiResource + oso::PolarClass + Clone> AuthorizedResource for T {
-    fn load_roles<'a, 'b, 'c, 'd, 'e, 'f>(
-        &'a self,
-        opctx: &'b OpContext,
-        datastore: &'c DataStore,
-        authn: &'d authn::Context,
-        roleset: &'e mut RoleSet,
-    ) -> BoxFuture<'f, Result<(), Error>>
-    where
-        'a: 'f,
-        'b: 'f,
-        'c: 'f,
-        'd: 'f,
-        'e: 'f,
-    {
-        load_roles_for_resource_tree(self, opctx, datastore, authn, roleset)
-            .boxed()
+impl<T> AuthorizedResource for T
+where
+    T: ApiResource + oso::PolarClass + Clone,
+{
+    fn load_roles<'fut>(
+        &'fut self,
+        opctx: &'fut OpContext,
+        authn: &'fut authn::Context,
+        roleset: &'fut mut RoleSet,
+    ) -> BoxFuture<'fut, Result<(), Error>> {
+        load_roles_for_resource_tree(self, opctx, authn, roleset).boxed()
     }
 
     fn on_unauthorized(
@@ -263,26 +255,17 @@ impl oso::PolarClass for BlueprintConfig {
 }
 
 impl AuthorizedResource for BlueprintConfig {
-    fn load_roles<'a, 'b, 'c, 'd, 'e, 'f>(
-        &'a self,
-        opctx: &'b OpContext,
-        datastore: &'c DataStore,
-        authn: &'d authn::Context,
-        roleset: &'e mut RoleSet,
-    ) -> futures::future::BoxFuture<'f, Result<(), Error>>
-    where
-        'a: 'f,
-        'b: 'f,
-        'c: 'f,
-        'd: 'f,
-        'e: 'f,
-    {
+    fn load_roles<'fut>(
+        &'fut self,
+        opctx: &'fut OpContext,
+        authn: &'fut authn::Context,
+        roleset: &'fut mut RoleSet,
+    ) -> futures::future::BoxFuture<'fut, Result<(), Error>> {
         // There are no roles on the BlueprintConfig, only permissions. But we
         // still need to load the Fleet-related roles to verify that the actor
         // has the "admin" role on the Fleet (possibly conferred from a Silo
         // role).
-        load_roles_for_resource_tree(&FLEET, opctx, datastore, authn, roleset)
-            .boxed()
+        load_roles_for_resource_tree(&FLEET, opctx, authn, roleset).boxed()
     }
 
     fn on_unauthorized(
@@ -323,22 +306,13 @@ impl oso::PolarClass for ConsoleSessionList {
 }
 
 impl AuthorizedResource for ConsoleSessionList {
-    fn load_roles<'a, 'b, 'c, 'd, 'e, 'f>(
-        &'a self,
-        opctx: &'b OpContext,
-        datastore: &'c DataStore,
-        authn: &'d authn::Context,
-        roleset: &'e mut RoleSet,
-    ) -> futures::future::BoxFuture<'f, Result<(), Error>>
-    where
-        'a: 'f,
-        'b: 'f,
-        'c: 'f,
-        'd: 'f,
-        'e: 'f,
-    {
-        load_roles_for_resource_tree(&FLEET, opctx, datastore, authn, roleset)
-            .boxed()
+    fn load_roles<'fut>(
+        &'fut self,
+        opctx: &'fut OpContext,
+        authn: &'fut authn::Context,
+        roleset: &'fut mut RoleSet,
+    ) -> futures::future::BoxFuture<'fut, Result<(), Error>> {
+        load_roles_for_resource_tree(&FLEET, opctx, authn, roleset).boxed()
     }
 
     fn on_unauthorized(
@@ -379,22 +353,13 @@ impl oso::PolarClass for DnsConfig {
 }
 
 impl AuthorizedResource for DnsConfig {
-    fn load_roles<'a, 'b, 'c, 'd, 'e, 'f>(
-        &'a self,
-        opctx: &'b OpContext,
-        datastore: &'c DataStore,
-        authn: &'d authn::Context,
-        roleset: &'e mut RoleSet,
-    ) -> futures::future::BoxFuture<'f, Result<(), Error>>
-    where
-        'a: 'f,
-        'b: 'f,
-        'c: 'f,
-        'd: 'f,
-        'e: 'f,
-    {
-        load_roles_for_resource_tree(&FLEET, opctx, datastore, authn, roleset)
-            .boxed()
+    fn load_roles<'fut>(
+        &'fut self,
+        opctx: &'fut OpContext,
+        authn: &'fut authn::Context,
+        roleset: &'fut mut RoleSet,
+    ) -> futures::future::BoxFuture<'fut, Result<(), Error>> {
+        load_roles_for_resource_tree(&FLEET, opctx, authn, roleset).boxed()
     }
 
     fn on_unauthorized(
@@ -435,25 +400,16 @@ impl oso::PolarClass for IpPoolList {
 }
 
 impl AuthorizedResource for IpPoolList {
-    fn load_roles<'a, 'b, 'c, 'd, 'e, 'f>(
-        &'a self,
-        opctx: &'b OpContext,
-        datastore: &'c DataStore,
-        authn: &'d authn::Context,
-        roleset: &'e mut RoleSet,
-    ) -> futures::future::BoxFuture<'f, Result<(), Error>>
-    where
-        'a: 'f,
-        'b: 'f,
-        'c: 'f,
-        'd: 'f,
-        'e: 'f,
-    {
+    fn load_roles<'fut>(
+        &'fut self,
+        opctx: &'fut OpContext,
+        authn: &'fut authn::Context,
+        roleset: &'fut mut RoleSet,
+    ) -> futures::future::BoxFuture<'fut, Result<(), Error>> {
         // There are no roles on the IpPoolList, only permissions. But we still
         // need to load the Fleet-related roles to verify that the actor has the
         // "admin" role on the Fleet (possibly conferred from a Silo role).
-        load_roles_for_resource_tree(&FLEET, opctx, datastore, authn, roleset)
-            .boxed()
+        load_roles_for_resource_tree(&FLEET, opctx, authn, roleset).boxed()
     }
 
     fn on_unauthorized(
@@ -486,25 +442,16 @@ impl oso::PolarClass for DeviceAuthRequestList {
 }
 
 impl AuthorizedResource for DeviceAuthRequestList {
-    fn load_roles<'a, 'b, 'c, 'd, 'e, 'f>(
-        &'a self,
-        opctx: &'b OpContext,
-        datastore: &'c DataStore,
-        authn: &'d authn::Context,
-        roleset: &'e mut RoleSet,
-    ) -> futures::future::BoxFuture<'f, Result<(), Error>>
-    where
-        'a: 'f,
-        'b: 'f,
-        'c: 'f,
-        'd: 'f,
-        'e: 'f,
-    {
+    fn load_roles<'fut>(
+        &'fut self,
+        opctx: &'fut OpContext,
+        authn: &'fut authn::Context,
+        roleset: &'fut mut RoleSet,
+    ) -> futures::future::BoxFuture<'fut, Result<(), Error>> {
         // There are no roles on the DeviceAuthRequestList, only permissions. But we
         // still need to load the Fleet-related roles to verify that the actor has the
         // "admin" role on the Fleet.
-        load_roles_for_resource_tree(&FLEET, opctx, datastore, authn, roleset)
-            .boxed()
+        load_roles_for_resource_tree(&FLEET, opctx, authn, roleset).boxed()
     }
 
     fn on_unauthorized(
@@ -544,22 +491,13 @@ impl oso::PolarClass for Inventory {
 }
 
 impl AuthorizedResource for Inventory {
-    fn load_roles<'a, 'b, 'c, 'd, 'e, 'f>(
-        &'a self,
-        opctx: &'b OpContext,
-        datastore: &'c DataStore,
-        authn: &'d authn::Context,
-        roleset: &'e mut RoleSet,
-    ) -> futures::future::BoxFuture<'f, Result<(), Error>>
-    where
-        'a: 'f,
-        'b: 'f,
-        'c: 'f,
-        'd: 'f,
-        'e: 'f,
-    {
-        load_roles_for_resource_tree(&FLEET, opctx, datastore, authn, roleset)
-            .boxed()
+    fn load_roles<'fut>(
+        &'fut self,
+        opctx: &'fut OpContext,
+        authn: &'fut authn::Context,
+        roleset: &'fut mut RoleSet,
+    ) -> futures::future::BoxFuture<'fut, Result<(), Error>> {
+        load_roles_for_resource_tree(&FLEET, opctx, authn, roleset).boxed()
     }
 
     fn on_unauthorized(
@@ -603,23 +541,15 @@ impl oso::PolarClass for SiloCertificateList {
 }
 
 impl AuthorizedResource for SiloCertificateList {
-    fn load_roles<'a, 'b, 'c, 'd, 'e, 'f>(
-        &'a self,
-        opctx: &'b OpContext,
-        datastore: &'c DataStore,
-        authn: &'d authn::Context,
-        roleset: &'e mut RoleSet,
-    ) -> futures::future::BoxFuture<'f, Result<(), Error>>
-    where
-        'a: 'f,
-        'b: 'f,
-        'c: 'f,
-        'd: 'f,
-        'e: 'f,
-    {
+    fn load_roles<'fut>(
+        &'fut self,
+        opctx: &'fut OpContext,
+        authn: &'fut authn::Context,
+        roleset: &'fut mut RoleSet,
+    ) -> futures::future::BoxFuture<'fut, Result<(), Error>> {
         // There are no roles on this resource, but we still need to load the
         // Silo-related roles.
-        self.silo().load_roles(opctx, datastore, authn, roleset)
+        self.silo().load_roles(opctx, authn, roleset)
     }
 
     fn on_unauthorized(
@@ -663,23 +593,15 @@ impl oso::PolarClass for SiloIdentityProviderList {
 }
 
 impl AuthorizedResource for SiloIdentityProviderList {
-    fn load_roles<'a, 'b, 'c, 'd, 'e, 'f>(
-        &'a self,
-        opctx: &'b OpContext,
-        datastore: &'c DataStore,
-        authn: &'d authn::Context,
-        roleset: &'e mut RoleSet,
-    ) -> futures::future::BoxFuture<'f, Result<(), Error>>
-    where
-        'a: 'f,
-        'b: 'f,
-        'c: 'f,
-        'd: 'f,
-        'e: 'f,
-    {
+    fn load_roles<'fut>(
+        &'fut self,
+        opctx: &'fut OpContext,
+        authn: &'fut authn::Context,
+        roleset: &'fut mut RoleSet,
+    ) -> futures::future::BoxFuture<'fut, Result<(), Error>> {
         // There are no roles on this resource, but we still need to load the
         // Silo-related roles.
-        self.silo().load_roles(opctx, datastore, authn, roleset)
+        self.silo().load_roles(opctx, authn, roleset)
     }
 
     fn on_unauthorized(
@@ -720,23 +642,15 @@ impl oso::PolarClass for SiloUserList {
 }
 
 impl AuthorizedResource for SiloUserList {
-    fn load_roles<'a, 'b, 'c, 'd, 'e, 'f>(
-        &'a self,
-        opctx: &'b OpContext,
-        datastore: &'c DataStore,
-        authn: &'d authn::Context,
-        roleset: &'e mut RoleSet,
-    ) -> futures::future::BoxFuture<'f, Result<(), Error>>
-    where
-        'a: 'f,
-        'b: 'f,
-        'c: 'f,
-        'd: 'f,
-        'e: 'f,
-    {
+    fn load_roles<'fut>(
+        &'fut self,
+        opctx: &'fut OpContext,
+        authn: &'fut authn::Context,
+        roleset: &'fut mut RoleSet,
+    ) -> futures::future::BoxFuture<'fut, Result<(), Error>> {
         // There are no roles on this resource, but we still need to load the
         // Silo-related roles.
-        self.silo().load_roles(opctx, datastore, authn, roleset)
+        self.silo().load_roles(opctx, authn, roleset)
     }
 
     fn on_unauthorized(
diff --git a/nexus/db-queries/src/authz/context.rs b/nexus/auth/src/authz/context.rs
similarity index 80%
rename from nexus/db-queries/src/authz/context.rs
rename to nexus/auth/src/authz/context.rs
index 0d6f2a73ac..bd375321e3 100644
--- a/nexus/db-queries/src/authz/context.rs
+++ b/nexus/auth/src/authz/context.rs
@@ -10,12 +10,13 @@ use crate::authn;
 use crate::authz::oso_generic;
 use crate::authz::Action;
 use crate::context::OpContext;
-use crate::db::DataStore;
+use crate::storage::Storage;
 use futures::future::BoxFuture;
 use omicron_common::api::external::Error;
 use omicron_common::bail_unless;
 use oso::Oso;
 use oso::OsoError;
+use slog::debug;
 use std::collections::BTreeSet;
 use std::sync::Arc;
 
@@ -51,7 +52,6 @@ impl Authz {
         self.oso.is_allowed(actor.clone(), action, resource.clone())
     }
 
-    #[cfg(test)]
     pub fn into_class_names(self) -> BTreeSet<String> {
         self.class_names
     }
@@ -66,18 +66,22 @@ impl Authz {
 pub struct Context {
     authn: Arc<authn::Context>,
     authz: Arc<Authz>,
-    datastore: Arc<DataStore>,
+    datastore: Arc<dyn Storage>,
 }
 
 impl Context {
     pub fn new(
         authn: Arc<authn::Context>,
         authz: Arc<Authz>,
-        datastore: Arc<DataStore>,
+        datastore: Arc<dyn Storage>,
     ) -> Context {
         Context { authn, authz, datastore }
     }
 
+    pub(crate) fn datastore(&self) -> &Arc<dyn Storage> {
+        &self.datastore
+    }
+
     /// Check whether the actor performing this request is authorized for
     /// `action` on `resource`.
     pub async fn authorize<Resource>(
@@ -111,9 +115,7 @@ impl Context {
         );
 
         let mut roles = RoleSet::new();
-        resource
-            .load_roles(opctx, &self.datastore, &self.authn, &mut roles)
-            .await?;
+        resource.load_roles(opctx, &self.authn, &mut roles).await?;
         debug!(opctx.log, "roles"; "roles" => ?roles);
         let actor = AnyActor::new(&self.authn, roles);
         let is_authn = self.authn.actor().is_some();
@@ -162,19 +164,12 @@ pub trait AuthorizedResource: oso::ToPolar + Send + Sync + 'static {
     /// That's how this works for most resources.  There are other kinds of
     /// resources (like the Database itself) that aren't stored in the database
     /// and for which a different mechanism might be used.
-    fn load_roles<'a, 'b, 'c, 'd, 'e, 'f>(
-        &'a self,
-        opctx: &'b OpContext,
-        datastore: &'c DataStore,
-        authn: &'d authn::Context,
-        roleset: &'e mut RoleSet,
-    ) -> BoxFuture<'f, Result<(), Error>>
-    where
-        'a: 'f,
-        'b: 'f,
-        'c: 'f,
-        'd: 'f,
-        'e: 'f;
+    fn load_roles<'fut>(
+        &'fut self,
+        opctx: &'fut OpContext,
+        authn: &'fut authn::Context,
+        roleset: &'fut mut RoleSet,
+    ) -> BoxFuture<'fut, Result<(), Error>>;
 
     /// Invoked on authz failure to determine the final authz result
     ///
@@ -196,17 +191,45 @@ pub trait AuthorizedResource: oso::ToPolar + Send + Sync + 'static {
 mod test {
     use crate::authn;
     use crate::authz::Action;
+    use crate::authz::AnyActor;
     use crate::authz::Authz;
     use crate::authz::Context;
-    use crate::db::DataStore;
-    use nexus_test_utils::db::test_setup_database;
+    use crate::authz::RoleSet;
+    use crate::context::OpContext;
+    use nexus_db_model::IdentityType;
+    use nexus_db_model::RoleAssignment;
+    use omicron_common::api::external::Error;
+    use omicron_common::api::external::ResourceType;
     use omicron_test_utils::dev;
     use std::sync::Arc;
+    use uuid::Uuid;
+
+    struct FakeStorage {}
+
+    impl FakeStorage {
+        fn new() -> Arc<dyn crate::storage::Storage> {
+            Arc::new(Self {})
+        }
+    }
+
+    #[async_trait::async_trait]
+    impl crate::storage::Storage for FakeStorage {
+        async fn role_asgn_list_for(
+            &self,
+            _opctx: &OpContext,
+            _identity_type: IdentityType,
+            _identity_id: Uuid,
+            _resource_type: ResourceType,
+            _resource_id: Uuid,
+        ) -> Result<Vec<RoleAssignment>, Error> {
+            unimplemented!("This test is not expected to access the database");
+        }
+    }
 
     fn authz_context_for_actor(
         log: &slog::Logger,
         authn: authn::Context,
-        datastore: Arc<DataStore>,
+        datastore: Arc<dyn crate::storage::Storage>,
     ) -> Context {
         let authz = Authz::new(log);
         Context::new(Arc::new(authn), Arc::new(authz), datastore)
@@ -215,34 +238,26 @@ mod test {
     #[tokio::test]
     async fn test_unregistered_resource() {
         let logctx = dev::test_setup_log("test_unregistered_resource");
-        let mut db = test_setup_database(&logctx.log).await;
-        let (opctx, datastore) =
-            crate::db::datastore::test_utils::datastore_test(&logctx, &db)
-                .await;
+        let datastore = FakeStorage::new();
+        let opctx = OpContext::for_background(
+            logctx.log.new(o!()),
+            Arc::new(Authz::new(&logctx.log)),
+            authn::Context::internal_db_init(),
+            Arc::clone(&datastore) as Arc<dyn crate::storage::Storage>,
+        );
 
         // Define a resource that we "forget" to register with Oso.
-        use super::AuthorizedResource;
-        use crate::authz::actor::AnyActor;
-        use crate::authz::roles::RoleSet;
-        use crate::context::OpContext;
-        use omicron_common::api::external::Error;
+        use crate::authz::AuthorizedResource;
         use oso::PolarClass;
         #[derive(Clone, PolarClass)]
         struct UnregisteredResource;
         impl AuthorizedResource for UnregisteredResource {
-            fn load_roles<'a, 'b, 'c, 'd, 'e, 'f>(
-                &'a self,
-                _: &'b OpContext,
-                _: &'c DataStore,
-                _: &'d authn::Context,
-                _: &'e mut RoleSet,
-            ) -> futures::future::BoxFuture<'f, Result<(), Error>>
-            where
-                'a: 'f,
-                'b: 'f,
-                'c: 'f,
-                'd: 'f,
-                'e: 'f,
+            fn load_roles<'fut>(
+                &'fut self,
+                _: &'fut OpContext,
+                _: &'fut authn::Context,
+                _: &'fut mut RoleSet,
+            ) -> futures::future::BoxFuture<'fut, Result<(), Error>>
             {
                 // authorize() shouldn't get far enough to call this.
                 unimplemented!();
@@ -270,7 +285,7 @@ mod test {
         let authz_privileged = authz_context_for_actor(
             &logctx.log,
             authn::Context::privileged_test_user(),
-            Arc::clone(&datastore),
+            Arc::clone(&datastore) as Arc<dyn crate::storage::Storage>,
         );
         let error = authz_privileged
             .authorize(&opctx, Action::Read, unregistered_resource)
@@ -281,7 +296,6 @@ mod test {
         }) if internal_message == "attempted authz check \
             on unregistered resource: \"UnregisteredResource\""));
 
-        db.cleanup().await.unwrap();
         logctx.cleanup_successful();
     }
 }
diff --git a/nexus/db-queries/src/authz/mod.rs b/nexus/auth/src/authz/mod.rs
similarity index 98%
rename from nexus/db-queries/src/authz/mod.rs
rename to nexus/auth/src/authz/mod.rs
index 6b7dab7208..1c666d2296 100644
--- a/nexus/db-queries/src/authz/mod.rs
+++ b/nexus/auth/src/authz/mod.rs
@@ -168,6 +168,8 @@
 //! allowed.  Otherwise, it's not.
 
 mod actor;
+pub use actor::AnyActor;
+pub use actor::AuthenticatedActor;
 
 mod api_resources;
 pub use api_resources::*;
@@ -179,9 +181,8 @@ pub use context::Context;
 
 mod oso_generic;
 pub use oso_generic::Action;
+pub use oso_generic::Database;
 pub use oso_generic::DATABASE;
 
 mod roles;
-
-#[cfg(test)]
-mod policy_test;
+pub use roles::RoleSet;
diff --git a/nexus/db-queries/src/authz/omicron.polar b/nexus/auth/src/authz/omicron.polar
similarity index 100%
rename from nexus/db-queries/src/authz/omicron.polar
rename to nexus/auth/src/authz/omicron.polar
diff --git a/nexus/db-queries/src/authz/oso_generic.rs b/nexus/auth/src/authz/oso_generic.rs
similarity index 96%
rename from nexus/db-queries/src/authz/oso_generic.rs
rename to nexus/auth/src/authz/oso_generic.rs
index dd646a1c98..383a06e985 100644
--- a/nexus/db-queries/src/authz/oso_generic.rs
+++ b/nexus/auth/src/authz/oso_generic.rs
@@ -12,7 +12,6 @@ use super::roles::RoleSet;
 use super::Authz;
 use crate::authn;
 use crate::context::OpContext;
-use crate::db::DataStore;
 use anyhow::ensure;
 use anyhow::Context;
 use futures::future::BoxFuture;
@@ -20,6 +19,7 @@ use futures::FutureExt;
 use omicron_common::api::external::Error;
 use oso::Oso;
 use oso::PolarClass;
+use slog::info;
 use std::collections::BTreeSet;
 use std::fmt;
 
@@ -172,8 +172,7 @@ pub fn make_omicron_oso(log: &slog::Logger) -> Result<OsoInit, anyhow::Error> {
 ///
 /// There's currently just one enum of Actions for all of Omicron.  We expect
 /// most objects to support mostly the same set of actions.
-#[derive(Clone, Copy, Debug, Eq, PartialEq)]
-#[cfg_attr(test, derive(strum::EnumIter))]
+#[derive(Clone, Copy, Debug, Eq, PartialEq, strum::EnumIter)]
 pub enum Action {
     Query, // only used for `Database`
     Read,
@@ -267,20 +266,12 @@ impl oso::PolarClass for Database {
 }
 
 impl AuthorizedResource for Database {
-    fn load_roles<'a, 'b, 'c, 'd, 'e, 'f>(
-        &'a self,
-        _: &'b OpContext,
-        _: &'c DataStore,
-        _: &'d authn::Context,
-        _: &'e mut RoleSet,
-    ) -> BoxFuture<'f, Result<(), Error>>
-    where
-        'a: 'f,
-        'b: 'f,
-        'c: 'f,
-        'd: 'f,
-        'e: 'f,
-    {
+    fn load_roles<'fut>(
+        &'fut self,
+        _: &'fut OpContext,
+        _: &'fut authn::Context,
+        _: &'fut mut RoleSet,
+    ) -> BoxFuture<'fut, Result<(), Error>> {
         // We don't use (database) roles to grant access to the database.  The
         // role assignment is hardcoded for all authenticated users.  See the
         // "has_role" Polar method above.
diff --git a/nexus/db-queries/src/authz/roles.rs b/nexus/auth/src/authz/roles.rs
similarity index 96%
rename from nexus/db-queries/src/authz/roles.rs
rename to nexus/auth/src/authz/roles.rs
index 11b3d482d1..0716e05bc7 100644
--- a/nexus/db-queries/src/authz/roles.rs
+++ b/nexus/auth/src/authz/roles.rs
@@ -37,9 +37,9 @@
 use super::api_resources::ApiResource;
 use crate::authn;
 use crate::context::OpContext;
-use crate::db::DataStore;
 use omicron_common::api::external::Error;
 use omicron_common::api::external::ResourceType;
+use slog::trace;
 use std::collections::BTreeSet;
 use uuid::Uuid;
 
@@ -87,7 +87,6 @@ impl RoleSet {
 pub async fn load_roles_for_resource_tree<R>(
     resource: &R,
     opctx: &OpContext,
-    datastore: &DataStore,
     authn: &authn::Context,
     roleset: &mut RoleSet,
 ) -> Result<(), Error>
@@ -100,7 +99,6 @@ where
         let resource_id = with_roles.resource_id();
         load_directly_attached_roles(
             opctx,
-            datastore,
             authn,
             resource_type,
             resource_id,
@@ -115,7 +113,6 @@ where
         {
             load_directly_attached_roles(
                 opctx,
-                datastore,
                 authn,
                 resource_type,
                 resource_id,
@@ -135,7 +132,7 @@ where
     // it's clearer to just call this "parent" than
     // "related_resources_whose_roles_might_grant_access_to_this".)
     if let Some(parent) = resource.parent() {
-        parent.load_roles(opctx, datastore, authn, roleset).await?;
+        parent.load_roles(opctx, authn, roleset).await?;
     }
 
     Ok(())
@@ -143,7 +140,6 @@ where
 
 async fn load_directly_attached_roles(
     opctx: &OpContext,
-    datastore: &DataStore,
     authn: &authn::Context,
     resource_type: ResourceType,
     resource_id: Uuid,
@@ -159,7 +155,8 @@ async fn load_directly_attached_roles(
             "resource_id" => resource_id.to_string(),
         );
 
-        let roles = datastore
+        let roles = opctx
+            .datastore()
             .role_asgn_list_for(
                 opctx,
                 actor.into(),
diff --git a/nexus/db-queries/src/context.rs b/nexus/auth/src/context.rs
similarity index 92%
rename from nexus/db-queries/src/context.rs
rename to nexus/auth/src/context.rs
index dfd1fe4322..0aac0900c5 100644
--- a/nexus/db-queries/src/context.rs
+++ b/nexus/auth/src/context.rs
@@ -8,9 +8,12 @@ use super::authz;
 use crate::authn::external::session_cookie::Session;
 use crate::authn::ConsoleSessionWithSiloId;
 use crate::authz::AuthorizedResource;
-use crate::db::DataStore;
+use crate::storage::Storage;
 use chrono::{DateTime, Utc};
 use omicron_common::api::external::Error;
+use slog::debug;
+use slog::o;
+use slog::trace;
 use std::collections::BTreeMap;
 use std::fmt::Debug;
 use std::sync::Arc;
@@ -111,6 +114,10 @@ impl OpContext {
         })
     }
 
+    pub(crate) fn datastore(&self) -> &Arc<dyn Storage> {
+        self.authz.datastore()
+    }
+
     fn log_and_metadata_for_authn(
         log: &slog::Logger,
         authn: &authn::Context,
@@ -135,8 +142,8 @@ impl OpContext {
         (log, metadata)
     }
 
-    pub fn load_request_metadata<T: Send + Sync + 'static>(
-        rqctx: &dropshot::RequestContext<T>,
+    pub fn load_request_metadata<C: Send + Sync + 'static>(
+        rqctx: &dropshot::RequestContext<C>,
         metadata: &mut BTreeMap<String, String>,
     ) {
         let request = &rqctx.request;
@@ -151,7 +158,7 @@ impl OpContext {
         log: slog::Logger,
         authz: Arc<authz::Authz>,
         authn: authn::Context,
-        datastore: Arc<DataStore>,
+        datastore: Arc<dyn Storage>,
     ) -> OpContext {
         let created_instant = Instant::now();
         let created_walltime = SystemTime::now();
@@ -180,7 +187,7 @@ impl OpContext {
     // outside public interfaces.
     pub fn for_tests(
         log: slog::Logger,
-        datastore: Arc<DataStore>,
+        datastore: Arc<dyn Storage>,
     ) -> OpContext {
         let created_instant = Instant::now();
         let created_walltime = SystemTime::now();
@@ -207,7 +214,7 @@ impl OpContext {
     /// functionally the same as one that you already have, but where you want
     /// to provide extra debugging information (in the form of key-value pairs)
     /// in both the OpContext itself and its logger.
-    pub fn child(&self, new_metadata: BTreeMap<String, String>) -> OpContext {
+    pub fn child(&self, new_metadata: BTreeMap<String, String>) -> Self {
         let created_instant = Instant::now();
         let created_walltime = SystemTime::now();
         let mut metadata = self.metadata.clone();
@@ -346,19 +353,42 @@ mod test {
     use crate::authn;
     use crate::authz;
     use authz::Action;
-    use nexus_test_utils::db::test_setup_database;
+    use nexus_db_model::IdentityType;
+    use nexus_db_model::RoleAssignment;
     use omicron_common::api::external::Error;
+    use omicron_common::api::external::ResourceType;
     use omicron_test_utils::dev;
     use std::collections::BTreeMap;
     use std::sync::Arc;
+    use uuid::Uuid;
+
+    struct FakeStorage {}
+
+    impl FakeStorage {
+        fn new() -> Arc<dyn crate::storage::Storage> {
+            Arc::new(Self {})
+        }
+    }
+
+    #[async_trait::async_trait]
+    impl crate::storage::Storage for FakeStorage {
+        async fn role_asgn_list_for(
+            &self,
+            _opctx: &OpContext,
+            _identity_type: IdentityType,
+            _identity_id: Uuid,
+            _resource_type: ResourceType,
+            _resource_id: Uuid,
+        ) -> Result<Vec<RoleAssignment>, Error> {
+            unimplemented!("This test is not expected to access the database");
+        }
+    }
 
     #[tokio::test]
     async fn test_background_context() {
         let logctx = dev::test_setup_log("test_background_context");
-        let mut db = test_setup_database(&logctx.log).await;
-        let (_, datastore) =
-            crate::db::datastore::test_utils::datastore_test(&logctx, &db)
-                .await;
+
+        let datastore = FakeStorage::new();
         let opctx = OpContext::for_background(
             logctx.log.new(o!()),
             Arc::new(authz::Authz::new(&logctx.log)),
@@ -381,17 +411,13 @@ mod test {
             .await
             .expect_err("expected authorization error");
         assert!(matches!(error, Error::Unauthenticated { .. }));
-        db.cleanup().await.unwrap();
         logctx.cleanup_successful();
     }
 
     #[tokio::test]
     async fn test_test_context() {
         let logctx = dev::test_setup_log("test_background_context");
-        let mut db = test_setup_database(&logctx.log).await;
-        let (_, datastore) =
-            crate::db::datastore::test_utils::datastore_test(&logctx, &db)
-                .await;
+        let datastore = FakeStorage::new();
         let opctx = OpContext::for_tests(logctx.log.new(o!()), datastore);
 
         // Like in test_background_context(), this is essentially a test of the
@@ -403,17 +429,13 @@ mod test {
             .authorize(Action::Query, &authz::DATABASE)
             .await
             .expect("expected authorization to succeed");
-        db.cleanup().await.unwrap();
         logctx.cleanup_successful();
     }
 
     #[tokio::test]
     async fn test_child_context() {
         let logctx = dev::test_setup_log("test_child_context");
-        let mut db = test_setup_database(&logctx.log).await;
-        let (_, datastore) =
-            crate::db::datastore::test_utils::datastore_test(&logctx, &db)
-                .await;
+        let datastore = FakeStorage::new();
         let opctx = OpContext::for_background(
             logctx.log.new(o!()),
             Arc::new(authz::Authz::new(&logctx.log)),
@@ -451,7 +473,6 @@ mod test {
         assert_eq!(grandchild_opctx.metadata["one"], "seven");
         assert_eq!(grandchild_opctx.metadata["five"], "six");
 
-        db.cleanup().await.unwrap();
         logctx.cleanup_successful();
     }
 }
diff --git a/nexus/auth/src/lib.rs b/nexus/auth/src/lib.rs
new file mode 100644
index 0000000000..0f0b9064b2
--- /dev/null
+++ b/nexus/auth/src/lib.rs
@@ -0,0 +1,11 @@
+pub mod authn;
+pub mod authz;
+pub mod context;
+pub mod storage;
+
+#[macro_use]
+extern crate newtype_derive;
+
+#[allow(unused_imports)]
+#[macro_use]
+extern crate slog;
diff --git a/nexus/auth/src/storage.rs b/nexus/auth/src/storage.rs
new file mode 100644
index 0000000000..c1d2fcedd8
--- /dev/null
+++ b/nexus/auth/src/storage.rs
@@ -0,0 +1,27 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Describes the dependency from the auth system on the datastore.
+//!
+//! Auth and storage are intertwined, but by isolating the interface from
+//! auth on the database, we can avoid a circular dependency.
+
+use crate::context::OpContext;
+use nexus_db_model::IdentityType;
+use nexus_db_model::RoleAssignment;
+use omicron_common::api::external::Error;
+use omicron_common::api::external::ResourceType;
+use uuid::Uuid;
+
+#[async_trait::async_trait]
+pub trait Storage: Send + Sync {
+    async fn role_asgn_list_for(
+        &self,
+        opctx: &OpContext,
+        identity_type: IdentityType,
+        identity_id: Uuid,
+        resource_type: ResourceType,
+        resource_id: Uuid,
+    ) -> Result<Vec<RoleAssignment>, Error>;
+}
diff --git a/nexus/db-fixed-data/Cargo.toml b/nexus/db-fixed-data/Cargo.toml
new file mode 100644
index 0000000000..486df15686
--- /dev/null
+++ b/nexus/db-fixed-data/Cargo.toml
@@ -0,0 +1,25 @@
+[package]
+name = "nexus-db-fixed-data"
+version = "0.1.0"
+edition = "2021"
+license = "MPL-2.0"
+description = "Hard-coded database data, including defaults and built-ins"
+
+[lints]
+workspace = true
+
+[build-dependencies]
+omicron-rpaths.workspace = true
+
+[dependencies]
+once_cell.workspace = true
+# See omicron-rpaths for more about the "pq-sys" dependency.
+pq-sys = "*"
+strum.workspace = true
+uuid.workspace = true
+
+nexus-db-model.workspace = true
+nexus-types.workspace = true
+omicron-common.workspace = true
+omicron-workspace-hack.workspace = true
+
diff --git a/nexus/db-fixed-data/build.rs b/nexus/db-fixed-data/build.rs
new file mode 100644
index 0000000000..1ba9acd41c
--- /dev/null
+++ b/nexus/db-fixed-data/build.rs
@@ -0,0 +1,10 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+// See omicron-rpaths for documentation.
+// NOTE: This file MUST be kept in sync with the other build.rs files in this
+// repository.
+fn main() {
+    omicron_rpaths::configure_default_omicron_rpaths();
+}
diff --git a/nexus/db-queries/src/db/fixed_data/allow_list.rs b/nexus/db-fixed-data/src/allow_list.rs
similarity index 100%
rename from nexus/db-queries/src/db/fixed_data/allow_list.rs
rename to nexus/db-fixed-data/src/allow_list.rs
diff --git a/nexus/db-queries/src/db/fixed_data/mod.rs b/nexus/db-fixed-data/src/lib.rs
similarity index 100%
rename from nexus/db-queries/src/db/fixed_data/mod.rs
rename to nexus/db-fixed-data/src/lib.rs
diff --git a/nexus/db-queries/src/db/fixed_data/project.rs b/nexus/db-fixed-data/src/project.rs
similarity index 79%
rename from nexus/db-queries/src/db/fixed_data/project.rs
rename to nexus/db-fixed-data/src/project.rs
index e240900e0c..6b9f005916 100644
--- a/nexus/db-queries/src/db/fixed_data/project.rs
+++ b/nexus/db-fixed-data/src/project.rs
@@ -2,12 +2,14 @@
 // License, v. 2.0. If a copy of the MPL was not distributed with this
 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
 
-use crate::db;
-use crate::db::datastore::SERVICES_DB_NAME;
+use nexus_db_model as model;
 use nexus_types::external_api::params;
 use omicron_common::api::external::IdentityMetadataCreateParams;
 use once_cell::sync::Lazy;
 
+/// The name of the built-in Project and VPC for Oxide services.
+pub const SERVICES_DB_NAME: &str = "oxide-services";
+
 /// UUID of built-in project for internal services on the rack.
 pub static SERVICES_PROJECT_ID: Lazy<uuid::Uuid> = Lazy::new(|| {
     "001de000-4401-4000-8000-000000000000"
@@ -16,8 +18,8 @@ pub static SERVICES_PROJECT_ID: Lazy<uuid::Uuid> = Lazy::new(|| {
 });
 
 /// Built-in Project for internal services on the rack.
-pub static SERVICES_PROJECT: Lazy<db::model::Project> = Lazy::new(|| {
-    db::model::Project::new_with_id(
+pub static SERVICES_PROJECT: Lazy<model::Project> = Lazy::new(|| {
+    model::Project::new_with_id(
         *SERVICES_PROJECT_ID,
         *super::silo::INTERNAL_SILO_ID,
         params::ProjectCreate {
diff --git a/nexus/db-queries/src/db/fixed_data/role_assignment.rs b/nexus/db-fixed-data/src/role_assignment.rs
similarity index 97%
rename from nexus/db-queries/src/db/fixed_data/role_assignment.rs
rename to nexus/db-fixed-data/src/role_assignment.rs
index d6c95d47b6..25b26786f8 100644
--- a/nexus/db-queries/src/db/fixed_data/role_assignment.rs
+++ b/nexus/db-fixed-data/src/role_assignment.rs
@@ -6,8 +6,8 @@
 use super::role_builtin;
 use super::user_builtin;
 use super::FLEET_ID;
-use crate::db::model::IdentityType;
-use crate::db::model::RoleAssignment;
+use nexus_db_model::IdentityType;
+use nexus_db_model::RoleAssignment;
 use once_cell::sync::Lazy;
 
 pub static BUILTIN_ROLE_ASSIGNMENTS: Lazy<Vec<RoleAssignment>> =
diff --git a/nexus/db-queries/src/db/fixed_data/role_builtin.rs b/nexus/db-fixed-data/src/role_builtin.rs
similarity index 99%
rename from nexus/db-queries/src/db/fixed_data/role_builtin.rs
rename to nexus/db-fixed-data/src/role_builtin.rs
index f58077fc3f..c617874e98 100644
--- a/nexus/db-queries/src/db/fixed_data/role_builtin.rs
+++ b/nexus/db-fixed-data/src/role_builtin.rs
@@ -83,7 +83,7 @@ pub static BUILTIN_ROLES: Lazy<Vec<RoleBuiltinConfig>> = Lazy::new(|| {
 #[cfg(test)]
 mod test {
     use super::BUILTIN_ROLES;
-    use crate::db::model::DatabaseString;
+    use nexus_db_model::DatabaseString;
     use nexus_types::external_api::shared::{FleetRole, ProjectRole, SiloRole};
     use omicron_common::api::external::ResourceType;
     use strum::IntoEnumIterator;
diff --git a/nexus/db-queries/src/db/fixed_data/silo.rs b/nexus/db-fixed-data/src/silo.rs
similarity index 91%
rename from nexus/db-queries/src/db/fixed_data/silo.rs
rename to nexus/db-fixed-data/src/silo.rs
index dc5f19fc2f..ebc6776923 100644
--- a/nexus/db-queries/src/db/fixed_data/silo.rs
+++ b/nexus/db-fixed-data/src/silo.rs
@@ -2,7 +2,7 @@
 // License, v. 2.0. If a copy of the MPL was not distributed with this
 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
 
-use crate::db;
+use nexus_db_model as model;
 use nexus_types::external_api::{params, shared};
 use omicron_common::api::external::IdentityMetadataCreateParams;
 use once_cell::sync::Lazy;
@@ -17,8 +17,8 @@ pub static DEFAULT_SILO_ID: Lazy<uuid::Uuid> = Lazy::new(|| {
 ///
 /// This was historically used for demos and the unit tests.  The plan is to
 /// remove it per omicron#2305.
-pub static DEFAULT_SILO: Lazy<db::model::Silo> = Lazy::new(|| {
-    db::model::Silo::new_with_id(
+pub static DEFAULT_SILO: Lazy<model::Silo> = Lazy::new(|| {
+    model::Silo::new_with_id(
         *DEFAULT_SILO_ID,
         params::SiloCreate {
             identity: IdentityMetadataCreateParams {
@@ -47,8 +47,8 @@ pub static INTERNAL_SILO_ID: Lazy<uuid::Uuid> = Lazy::new(|| {
 
 /// Built-in Silo to house internal resources. It contains no users and
 /// can't be logged into.
-pub static INTERNAL_SILO: Lazy<db::model::Silo> = Lazy::new(|| {
-    db::model::Silo::new_with_id(
+pub static INTERNAL_SILO: Lazy<model::Silo> = Lazy::new(|| {
+    model::Silo::new_with_id(
         *INTERNAL_SILO_ID,
         params::SiloCreate {
             identity: IdentityMetadataCreateParams {
diff --git a/nexus/db-queries/src/db/fixed_data/silo_user.rs b/nexus/db-fixed-data/src/silo_user.rs
similarity index 66%
rename from nexus/db-queries/src/db/fixed_data/silo_user.rs
rename to nexus/db-fixed-data/src/silo_user.rs
index eb49093152..defaa9bd52 100644
--- a/nexus/db-queries/src/db/fixed_data/silo_user.rs
+++ b/nexus/db-fixed-data/src/silo_user.rs
@@ -4,8 +4,8 @@
 //! Built-in Silo Users
 
 use super::role_builtin;
-use crate::db;
-use crate::db::identity::Asset;
+use nexus_db_model as model;
+use nexus_types::identity::Asset;
 use once_cell::sync::Lazy;
 
 /// Test user that's granted all privileges, used for automated testing and
@@ -13,9 +13,9 @@ use once_cell::sync::Lazy;
 // TODO-security Once we have a way to bootstrap the initial Silo with the
 // initial privileged user, this user should be created in the test suite,
 // not automatically at Nexus startup.  See omicron#2305.
-pub static USER_TEST_PRIVILEGED: Lazy<db::model::SiloUser> = Lazy::new(|| {
-    db::model::SiloUser::new(
-        *db::fixed_data::silo::DEFAULT_SILO_ID,
+pub static USER_TEST_PRIVILEGED: Lazy<model::SiloUser> = Lazy::new(|| {
+    model::SiloUser::new(
+        *crate::silo::DEFAULT_SILO_ID,
         // "4007" looks a bit like "root".
         "001de000-05e4-4000-8000-000000004007".parse().unwrap(),
         "privileged".into(),
@@ -23,23 +23,23 @@ pub static USER_TEST_PRIVILEGED: Lazy<db::model::SiloUser> = Lazy::new(|| {
 });
 
 /// Role assignments needed for the privileged user
-pub static ROLE_ASSIGNMENTS_PRIVILEGED: Lazy<Vec<db::model::RoleAssignment>> =
+pub static ROLE_ASSIGNMENTS_PRIVILEGED: Lazy<Vec<model::RoleAssignment>> =
     Lazy::new(|| {
         vec![
             // The "test-privileged" user gets the "admin" role on the sole
             // Fleet as well as the default Silo.
-            db::model::RoleAssignment::new(
-                db::model::IdentityType::SiloUser,
+            model::RoleAssignment::new(
+                model::IdentityType::SiloUser,
                 USER_TEST_PRIVILEGED.id(),
                 role_builtin::FLEET_ADMIN.resource_type,
-                *db::fixed_data::FLEET_ID,
+                *crate::FLEET_ID,
                 role_builtin::FLEET_ADMIN.role_name,
             ),
-            db::model::RoleAssignment::new(
-                db::model::IdentityType::SiloUser,
+            model::RoleAssignment::new(
+                model::IdentityType::SiloUser,
                 USER_TEST_PRIVILEGED.id(),
                 role_builtin::SILO_ADMIN.resource_type,
-                *db::fixed_data::silo::DEFAULT_SILO_ID,
+                *crate::silo::DEFAULT_SILO_ID,
                 role_builtin::SILO_ADMIN.role_name,
             ),
         ]
@@ -49,22 +49,21 @@ pub static ROLE_ASSIGNMENTS_PRIVILEGED: Lazy<Vec<db::model::RoleAssignment>> =
 // TODO-security Once we have a way to bootstrap the initial Silo with the
 // initial privileged user, this user should be created in the test suite,
 // not automatically at Nexus startup.  See omicron#2305.
-pub static USER_TEST_UNPRIVILEGED: Lazy<db::model::SiloUser> =
-    Lazy::new(|| {
-        db::model::SiloUser::new(
-            *db::fixed_data::silo::DEFAULT_SILO_ID,
-            // 60001 is the decimal uid for "nobody" on Helios.
-            "001de000-05e4-4000-8000-000000060001".parse().unwrap(),
-            "unprivileged".into(),
-        )
-    });
+pub static USER_TEST_UNPRIVILEGED: Lazy<model::SiloUser> = Lazy::new(|| {
+    model::SiloUser::new(
+        *crate::silo::DEFAULT_SILO_ID,
+        // 60001 is the decimal uid for "nobody" on Helios.
+        "001de000-05e4-4000-8000-000000060001".parse().unwrap(),
+        "unprivileged".into(),
+    )
+});
 
 #[cfg(test)]
 mod test {
     use super::super::assert_valid_uuid;
     use super::USER_TEST_PRIVILEGED;
     use super::USER_TEST_UNPRIVILEGED;
-    use crate::db::identity::Asset;
+    use nexus_types::identity::Asset;
 
     #[test]
     fn test_silo_user_ids_are_valid() {
diff --git a/nexus/db-queries/src/db/fixed_data/user_builtin.rs b/nexus/db-fixed-data/src/user_builtin.rs
similarity index 100%
rename from nexus/db-queries/src/db/fixed_data/user_builtin.rs
rename to nexus/db-fixed-data/src/user_builtin.rs
diff --git a/nexus/db-queries/src/db/fixed_data/vpc.rs b/nexus/db-fixed-data/src/vpc.rs
similarity index 91%
rename from nexus/db-queries/src/db/fixed_data/vpc.rs
rename to nexus/db-fixed-data/src/vpc.rs
index c71b655ddc..25628a83b5 100644
--- a/nexus/db-queries/src/db/fixed_data/vpc.rs
+++ b/nexus/db-fixed-data/src/vpc.rs
@@ -2,8 +2,8 @@
 // License, v. 2.0. If a copy of the MPL was not distributed with this
 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
 
-use crate::db;
-use crate::db::datastore::SERVICES_DB_NAME;
+use crate::project::SERVICES_DB_NAME;
+use nexus_db_model as model;
 use nexus_types::external_api::params;
 use omicron_common::address::SERVICE_VPC_IPV6_PREFIX;
 use omicron_common::api::external::IdentityMetadataCreateParams;
@@ -31,8 +31,8 @@ pub static SERVICES_VPC_DEFAULT_ROUTE_ID: Lazy<uuid::Uuid> = Lazy::new(|| {
 });
 
 /// Built-in VPC for internal services on the rack.
-pub static SERVICES_VPC: Lazy<db::model::IncompleteVpc> = Lazy::new(|| {
-    db::model::IncompleteVpc::new(
+pub static SERVICES_VPC: Lazy<model::IncompleteVpc> = Lazy::new(|| {
+    model::IncompleteVpc::new(
         *SERVICES_VPC_ID,
         *super::project::SERVICES_PROJECT_ID,
         *SERVICES_VPC_ROUTER_ID,
diff --git a/nexus/db-queries/src/db/fixed_data/vpc_firewall_rule.rs b/nexus/db-fixed-data/src/vpc_firewall_rule.rs
similarity index 100%
rename from nexus/db-queries/src/db/fixed_data/vpc_firewall_rule.rs
rename to nexus/db-fixed-data/src/vpc_firewall_rule.rs
diff --git a/nexus/db-queries/src/db/fixed_data/vpc_subnet.rs b/nexus/db-fixed-data/src/vpc_subnet.rs
similarity index 98%
rename from nexus/db-queries/src/db/fixed_data/vpc_subnet.rs
rename to nexus/db-fixed-data/src/vpc_subnet.rs
index c42d4121c9..622799b000 100644
--- a/nexus/db-queries/src/db/fixed_data/vpc_subnet.rs
+++ b/nexus/db-fixed-data/src/vpc_subnet.rs
@@ -2,7 +2,7 @@
 // License, v. 2.0. If a copy of the MPL was not distributed with this
 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
 
-use crate::db::model::VpcSubnet;
+use nexus_db_model::VpcSubnet;
 use omicron_common::address::{
     DNS_OPTE_IPV4_SUBNET, DNS_OPTE_IPV6_SUBNET, NEXUS_OPTE_IPV4_SUBNET,
     NEXUS_OPTE_IPV6_SUBNET, NTP_OPTE_IPV4_SUBNET, NTP_OPTE_IPV6_SUBNET,
diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml
index 135f2fcdf7..cb7061f4ce 100644
--- a/nexus/db-queries/Cargo.toml
+++ b/nexus/db-queries/Cargo.toml
@@ -14,37 +14,27 @@ omicron-rpaths.workspace = true
 anyhow.workspace = true
 async-bb8-diesel.workspace = true
 async-trait.workspace = true
-base64.workspace = true
 bb8.workspace = true
 camino.workspace = true
 chrono.workspace = true
 const_format.workspace = true
-cookie.workspace = true
 diesel.workspace = true
 diesel-dtrace.workspace = true
 dropshot.workspace = true
 futures.workspace = true
-headers.workspace = true
-http.workspace = true
-hyper.workspace = true
 ipnetwork.workspace = true
 macaddr.workspace = true
-newtype_derive.workspace = true
 once_cell.workspace = true
-openssl.workspace = true
-oso.workspace = true
 oxnet.workspace = true
 paste.workspace = true
 # See omicron-rpaths for more about the "pq-sys" dependency.
 pq-sys = "*"
 rand.workspace = true
 ref-cast.workspace = true
-samael.workspace = true
 schemars.workspace = true
 semver.workspace = true
 serde.workspace = true
 serde_json.workspace = true
-serde_urlencoded.workspace = true
 serde_with.workspace = true
 sled-agent-client.workspace = true
 slog.workspace = true
@@ -58,9 +48,10 @@ tokio = { workspace = true, features = ["full"] }
 uuid.workspace = true
 usdt.workspace = true
 
-authz-macros.workspace = true
 db-macros.workspace = true
+nexus-auth.workspace = true
 nexus-config.workspace = true
+nexus-db-fixed-data.workspace = true
 nexus-db-model.workspace = true
 nexus-types.workspace = true
 omicron-common.workspace = true
@@ -91,6 +82,7 @@ nexus-test-utils.workspace = true
 omicron-sled-agent.workspace = true
 omicron-test-utils.workspace = true
 openapiv3.workspace = true
+oso.workspace = true
 pem.workspace = true
 petgraph.workspace = true
 predicates.workspace = true
diff --git a/nexus/db-queries/src/db/datastore/allow_list.rs b/nexus/db-queries/src/db/datastore/allow_list.rs
index 111ccad08f..7c1643451f 100644
--- a/nexus/db-queries/src/db/datastore/allow_list.rs
+++ b/nexus/db-queries/src/db/datastore/allow_list.rs
@@ -8,12 +8,12 @@ use crate::authz;
 use crate::context::OpContext;
 use crate::db::error::public_error_from_diesel;
 use crate::db::error::ErrorHandler;
-use crate::db::fixed_data::allow_list::USER_FACING_SERVICES_ALLOW_LIST_ID;
 use crate::db::DbConnection;
 use async_bb8_diesel::AsyncRunQueryDsl;
 use diesel::ExpressionMethods;
 use diesel::QueryDsl;
 use diesel::SelectableHelper;
+use nexus_db_fixed_data::allow_list::USER_FACING_SERVICES_ALLOW_LIST_ID;
 use nexus_db_model::schema::allow_list;
 use nexus_db_model::AllowList;
 use omicron_common::api::external::AllowedSourceIps;
diff --git a/nexus/db-queries/src/db/datastore/auth.rs b/nexus/db-queries/src/db/datastore/auth.rs
new file mode 100644
index 0000000000..3b1d1d18e3
--- /dev/null
+++ b/nexus/db-queries/src/db/datastore/auth.rs
@@ -0,0 +1,81 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Implements the [Storage] interface for [nexus_auth] integration.
+
+use crate::db;
+use crate::db::error::public_error_from_diesel;
+use crate::db::error::ErrorHandler;
+
+use async_bb8_diesel::AsyncRunQueryDsl;
+use diesel::prelude::*;
+use nexus_auth::context::OpContext;
+use nexus_auth::storage::Storage;
+use nexus_db_model::IdentityType;
+use nexus_db_model::RoleAssignment;
+use omicron_common::api::external::Error;
+use omicron_common::api::external::ResourceType;
+use uuid::Uuid;
+
+#[async_trait::async_trait]
+impl Storage for super::DataStore {
+    /// Return the built-in roles that the given built-in user has for the given
+    /// resource
+    async fn role_asgn_list_for(
+        &self,
+        opctx: &OpContext,
+        identity_type: IdentityType,
+        identity_id: Uuid,
+        resource_type: ResourceType,
+        resource_id: Uuid,
+    ) -> Result<Vec<RoleAssignment>, Error> {
+        use db::schema::role_assignment::dsl as role_dsl;
+        use db::schema::silo_group_membership::dsl as group_dsl;
+
+        // There is no resource-specific authorization check because all
+        // authenticated users need to be able to list their own roles --
+        // otherwise we can't do any authorization checks.
+        // TODO-security rethink this -- how do we know the user is looking up
+        // their own roles?  Maybe this should use an internal authz context.
+
+        // TODO-scalability TODO-security This needs to be paginated.  It's not
+        // exposed via an external API right now but someone could still put us
+        // into some hurt by assigning loads of roles to someone and having that
+        // person attempt to access anything.
+
+        let direct_roles_query = role_dsl::role_assignment
+            .filter(role_dsl::identity_type.eq(identity_type.clone()))
+            .filter(role_dsl::identity_id.eq(identity_id))
+            .filter(role_dsl::resource_type.eq(resource_type.to_string()))
+            .filter(role_dsl::resource_id.eq(resource_id))
+            .select(RoleAssignment::as_select());
+
+        let roles_from_groups_query = role_dsl::role_assignment
+            .filter(role_dsl::identity_type.eq(IdentityType::SiloGroup))
+            .filter(
+                role_dsl::identity_id.eq_any(
+                    group_dsl::silo_group_membership
+                        .filter(group_dsl::silo_user_id.eq(identity_id))
+                        .select(group_dsl::silo_group_id),
+                ),
+            )
+            .filter(role_dsl::resource_type.eq(resource_type.to_string()))
+            .filter(role_dsl::resource_id.eq(resource_id))
+            .select(RoleAssignment::as_select());
+
+        let conn = self.pool_connection_authorized(opctx).await?;
+        if identity_type == IdentityType::SiloUser {
+            direct_roles_query
+                .union(roles_from_groups_query)
+                .load_async::<RoleAssignment>(&*conn)
+                .await
+                .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
+        } else {
+            direct_roles_query
+                .load_async::<RoleAssignment>(&*conn)
+                .await
+                .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
+        }
+    }
+}
diff --git a/nexus/db-queries/src/db/datastore/cockroachdb_settings.rs b/nexus/db-queries/src/db/datastore/cockroachdb_settings.rs
index 177cf673e7..e7a975fa69 100644
--- a/nexus/db-queries/src/db/datastore/cockroachdb_settings.rs
+++ b/nexus/db-queries/src/db/datastore/cockroachdb_settings.rs
@@ -147,8 +147,10 @@ mod test {
         let (_, datastore) =
             crate::db::datastore::test_utils::datastore_test(&logctx, &db)
                 .await;
-        let opctx =
-            OpContext::for_tests(logctx.log.new(o!()), Arc::clone(&datastore));
+        let opctx = OpContext::for_tests(
+            logctx.log.new(o!()),
+            Arc::clone(&datastore) as Arc<dyn nexus_auth::storage::Storage>,
+        );
 
         let settings = datastore.cockroachdb_settings(&opctx).await.unwrap();
         // With a fresh cluster, this is the expected state
diff --git a/nexus/db-queries/src/db/datastore/identity_provider.rs b/nexus/db-queries/src/db/datastore/identity_provider.rs
index cee577acd6..e7ab9bde16 100644
--- a/nexus/db-queries/src/db/datastore/identity_provider.rs
+++ b/nexus/db-queries/src/db/datastore/identity_provider.rs
@@ -11,18 +11,66 @@ use crate::db;
 use crate::db::error::public_error_from_diesel;
 use crate::db::error::ErrorHandler;
 use crate::db::identity::Resource;
+use crate::db::lookup::LookupPath;
+use crate::db::model;
 use crate::db::model::IdentityProvider;
 use crate::db::model::Name;
 use crate::db::pagination::paginated;
 use async_bb8_diesel::AsyncRunQueryDsl;
 use diesel::prelude::*;
+use nexus_auth::authn::silos::IdentityProviderType;
 use omicron_common::api::external::http_pagination::PaginatedBy;
 use omicron_common::api::external::CreateResult;
 use omicron_common::api::external::ListResultVec;
+use omicron_common::api::external::LookupResult;
 use omicron_common::api::external::ResourceType;
 use ref_cast::RefCast;
 
 impl DataStore {
+    pub async fn identity_provider_lookup(
+        &self,
+        opctx: &OpContext,
+        silo_name: &model::Name,
+        provider_name: &model::Name,
+    ) -> LookupResult<(authz::Silo, model::Silo, IdentityProviderType)> {
+        let (authz_silo, db_silo) =
+            LookupPath::new(opctx, self).silo_name(silo_name).fetch().await?;
+
+        let (.., identity_provider) = LookupPath::new(opctx, self)
+            .silo_name(silo_name)
+            .identity_provider_name(provider_name)
+            .fetch()
+            .await?;
+
+        match identity_provider.provider_type {
+            model::IdentityProviderType::Saml => {
+                let (.., saml_identity_provider) = LookupPath::new(opctx, self)
+                    .silo_name(silo_name)
+                    .saml_identity_provider_name(provider_name)
+                    .fetch()
+                    .await?;
+
+                let saml_identity_provider = IdentityProviderType::Saml(
+                    saml_identity_provider.try_into()
+                        .map_err(|e: anyhow::Error|
+                            // If an error is encountered converting from the
+                            // model to the authn type here, this is a server
+                            // error: it was validated before it went into the
+                            // DB.
+                            omicron_common::api::external::Error::internal_error(
+                                &format!(
+                                    "saml_identity_provider.try_into() failed! {}",
+                                    &e.to_string()
+                                )
+                            )
+                        )?
+                    );
+
+                Ok((authz_silo, db_silo, saml_identity_provider))
+            }
+        }
+    }
+
     pub async fn identity_provider_list(
         &self,
         opctx: &OpContext,
diff --git a/nexus/db-queries/src/db/datastore/instance.rs b/nexus/db-queries/src/db/datastore/instance.rs
index 60fd5c9dc3..3b655e5bb9 100644
--- a/nexus/db-queries/src/db/datastore/instance.rs
+++ b/nexus/db-queries/src/db/datastore/instance.rs
@@ -783,7 +783,6 @@ impl DataStore {
 mod tests {
     use super::*;
     use crate::db::datastore::test_utils::datastore_test;
-    use crate::db::fixed_data;
     use crate::db::lookup::LookupPath;
     use nexus_db_model::Project;
     use nexus_test_utils::db::test_setup_database;
@@ -796,7 +795,7 @@ mod tests {
         datastore: &DataStore,
         opctx: &OpContext,
     ) -> authz::Instance {
-        let silo_id = *fixed_data::silo::DEFAULT_SILO_ID;
+        let silo_id = *nexus_db_fixed_data::silo::DEFAULT_SILO_ID;
         let project_id = Uuid::new_v4();
         let instance_id = Uuid::new_v4();
 
diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs
index b90f81affb..9ec3575860 100644
--- a/nexus/db-queries/src/db/datastore/mod.rs
+++ b/nexus/db-queries/src/db/datastore/mod.rs
@@ -49,6 +49,7 @@ use uuid::Uuid;
 
 mod address_lot;
 mod allow_list;
+mod auth;
 mod bfd;
 mod bgp;
 mod bootstore;
@@ -130,9 +131,6 @@ pub const REGION_REDUNDANCY_THRESHOLD: usize = 3;
 /// The name of the built-in IP pool for Oxide services.
 pub const SERVICE_IP_POOL_NAME: &str = "oxide-service-pool";
 
-/// The name of the built-in Project and VPC for Oxide services.
-pub const SERVICES_DB_NAME: &str = "oxide-services";
-
 /// "limit" to be used in SQL queries that paginate through large result sets
 ///
 /// This value is chosen to be small enough to avoid any queries being too
@@ -385,8 +383,6 @@ mod test {
         IneligibleSledKind, IneligibleSleds,
     };
     use crate::db::explain::ExplainableAsync;
-    use crate::db::fixed_data::silo::DEFAULT_SILO;
-    use crate::db::fixed_data::silo::DEFAULT_SILO_ID;
     use crate::db::identity::Asset;
     use crate::db::lookup::LookupPath;
     use crate::db::model::{
@@ -400,6 +396,8 @@ mod test {
     use futures::stream;
     use futures::StreamExt;
     use nexus_config::RegionAllocationStrategy;
+    use nexus_db_fixed_data::silo::DEFAULT_SILO;
+    use nexus_db_fixed_data::silo::DEFAULT_SILO_ID;
     use nexus_db_model::IpAttachState;
     use nexus_db_model::{to_db_typed_uuid, Generation};
     use nexus_test_utils::db::test_setup_database;
@@ -485,7 +483,7 @@ mod test {
             logctx.log.new(o!("component" => "TestExternalAuthn")),
             Arc::new(authz::Authz::new(&logctx.log)),
             authn::Context::external_authn(),
-            Arc::clone(&datastore),
+            Arc::clone(&datastore) as Arc<dyn nexus_auth::storage::Storage>,
         );
 
         let token = "a_token".to_string();
@@ -587,7 +585,7 @@ mod test {
                 *DEFAULT_SILO_ID,
                 SiloAuthnPolicy::try_from(&*DEFAULT_SILO).unwrap(),
             ),
-            Arc::clone(&datastore),
+            Arc::clone(&datastore) as Arc<dyn nexus_auth::storage::Storage>,
         );
         let delete = datastore
             .session_hard_delete(&silo_user_opctx, &authz_session)
@@ -1624,8 +1622,10 @@ mod test {
         let pool = Arc::new(db::Pool::new(&logctx.log, &cfg));
         let datastore =
             Arc::new(DataStore::new(&logctx.log, pool, None).await.unwrap());
-        let opctx =
-            OpContext::for_tests(logctx.log.new(o!()), datastore.clone());
+        let opctx = OpContext::for_tests(
+            logctx.log.new(o!()),
+            Arc::clone(&datastore) as Arc<dyn nexus_auth::storage::Storage>,
+        );
 
         let rack_id = Uuid::new_v4();
         let addr1 = "[fd00:1de::1]:12345".parse().unwrap();
diff --git a/nexus/db-queries/src/db/datastore/network_interface.rs b/nexus/db-queries/src/db/datastore/network_interface.rs
index af3f832e35..3ea2945b2f 100644
--- a/nexus/db-queries/src/db/datastore/network_interface.rs
+++ b/nexus/db-queries/src/db/datastore/network_interface.rs
@@ -854,8 +854,8 @@ impl DataStore {
 mod tests {
     use super::*;
     use crate::db::datastore::test_utils::datastore_test;
-    use crate::db::fixed_data::vpc_subnet::NEXUS_VPC_SUBNET;
     use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES;
+    use nexus_db_fixed_data::vpc_subnet::NEXUS_VPC_SUBNET;
     use nexus_test_utils::db::test_setup_database;
     use omicron_common::address::NEXUS_OPTE_IPV4_SUBNET;
     use omicron_test_utils::dev;
diff --git a/nexus/db-queries/src/db/datastore/project.rs b/nexus/db-queries/src/db/datastore/project.rs
index 08647b421e..42ccca4ed6 100644
--- a/nexus/db-queries/src/db/datastore/project.rs
+++ b/nexus/db-queries/src/db/datastore/project.rs
@@ -13,8 +13,6 @@ use crate::db::collection_insert::AsyncInsertError;
 use crate::db::collection_insert::DatastoreCollection;
 use crate::db::error::public_error_from_diesel;
 use crate::db::error::ErrorHandler;
-use crate::db::fixed_data::project::SERVICES_PROJECT;
-use crate::db::fixed_data::silo::INTERNAL_SILO_ID;
 use crate::db::identity::Resource;
 use crate::db::model::CollectionTypeProvisioned;
 use crate::db::model::Name;
@@ -27,6 +25,8 @@ use crate::transaction_retry::OptionalError;
 use async_bb8_diesel::AsyncRunQueryDsl;
 use chrono::Utc;
 use diesel::prelude::*;
+use nexus_db_fixed_data::project::SERVICES_PROJECT;
+use nexus_db_fixed_data::silo::INTERNAL_SILO_ID;
 use omicron_common::api::external::http_pagination::PaginatedBy;
 use omicron_common::api::external::CreateResult;
 use omicron_common::api::external::DeleteResult;
diff --git a/nexus/db-queries/src/db/datastore/pub_test_utils.rs b/nexus/db-queries/src/db/datastore/pub_test_utils.rs
index 5259a03656..93a172bd15 100644
--- a/nexus/db-queries/src/db/datastore/pub_test_utils.rs
+++ b/nexus/db-queries/src/db/datastore/pub_test_utils.rs
@@ -39,7 +39,7 @@ pub async fn datastore_test(
         logctx.log.new(o!()),
         Arc::new(authz::Authz::new(&logctx.log)),
         authn::Context::internal_db_init(),
-        Arc::clone(&datastore),
+        Arc::clone(&datastore) as Arc<dyn nexus_auth::storage::Storage>,
     );
 
     // TODO: Can we just call "Populate" instead of doing this?
@@ -59,8 +59,10 @@ pub async fn datastore_test(
 
     // Create an OpContext with the credentials of "test-privileged" for general
     // testing.
-    let opctx =
-        OpContext::for_tests(logctx.log.new(o!()), Arc::clone(&datastore));
+    let opctx = OpContext::for_tests(
+        logctx.log.new(o!()),
+        Arc::clone(&datastore) as Arc<dyn nexus_auth::storage::Storage>,
+    );
 
     (opctx, datastore)
 }
diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs
index d836185d87..4af6bf7263 100644
--- a/nexus/db-queries/src/db/datastore/rack.rs
+++ b/nexus/db-queries/src/db/datastore/rack.rs
@@ -16,10 +16,6 @@ use crate::db::error::public_error_from_diesel;
 use crate::db::error::retryable;
 use crate::db::error::ErrorHandler;
 use crate::db::error::MaybeRetryable::*;
-use crate::db::fixed_data::silo::INTERNAL_SILO_ID;
-use crate::db::fixed_data::vpc_subnet::DNS_VPC_SUBNET;
-use crate::db::fixed_data::vpc_subnet::NEXUS_VPC_SUBNET;
-use crate::db::fixed_data::vpc_subnet::NTP_VPC_SUBNET;
 use crate::db::identity::Asset;
 use crate::db::lookup::LookupPath;
 use crate::db::model::Dataset;
@@ -37,6 +33,10 @@ use diesel::prelude::*;
 use diesel::result::Error as DieselError;
 use diesel::upsert::excluded;
 use ipnetwork::IpNetwork;
+use nexus_db_fixed_data::silo::INTERNAL_SILO_ID;
+use nexus_db_fixed_data::vpc_subnet::DNS_VPC_SUBNET;
+use nexus_db_fixed_data::vpc_subnet::NEXUS_VPC_SUBNET;
+use nexus_db_fixed_data::vpc_subnet::NTP_VPC_SUBNET;
 use nexus_db_model::IncompleteNetworkInterface;
 use nexus_db_model::InitialDnsGroup;
 use nexus_db_model::PasswordHashString;
diff --git a/nexus/db-queries/src/db/datastore/role.rs b/nexus/db-queries/src/db/datastore/role.rs
index 3a57ffc44c..b91597ad1d 100644
--- a/nexus/db-queries/src/db/datastore/role.rs
+++ b/nexus/db-queries/src/db/datastore/role.rs
@@ -14,8 +14,6 @@ use crate::db::datastore::RunnableQueryNoReturn;
 use crate::db::error::public_error_from_diesel;
 use crate::db::error::ErrorHandler;
 use crate::db::error::TransactionError;
-use crate::db::fixed_data::role_assignment::BUILTIN_ROLE_ASSIGNMENTS;
-use crate::db::fixed_data::role_builtin::BUILTIN_ROLES;
 use crate::db::model::DatabaseString;
 use crate::db::model::IdentityType;
 use crate::db::model::RoleAssignment;
@@ -25,13 +23,13 @@ use crate::db::pool::DbConnection;
 use async_bb8_diesel::AsyncConnection;
 use async_bb8_diesel::AsyncRunQueryDsl;
 use diesel::prelude::*;
+use nexus_db_fixed_data::role_assignment::BUILTIN_ROLE_ASSIGNMENTS;
+use nexus_db_fixed_data::role_builtin::BUILTIN_ROLES;
 use nexus_types::external_api::shared;
 use omicron_common::api::external::DataPageParams;
 use omicron_common::api::external::Error;
 use omicron_common::api::external::ListResultVec;
-use omicron_common::api::external::ResourceType;
 use omicron_common::bail_unless;
-use uuid::Uuid;
 
 impl DataStore {
     /// List built-in roles
@@ -117,65 +115,6 @@ impl DataStore {
         Ok(())
     }
 
-    /// Return the built-in roles that the given built-in user has for the given
-    /// resource
-    pub async fn role_asgn_list_for(
-        &self,
-        opctx: &OpContext,
-        identity_type: IdentityType,
-        identity_id: Uuid,
-        resource_type: ResourceType,
-        resource_id: Uuid,
-    ) -> Result<Vec<RoleAssignment>, Error> {
-        use db::schema::role_assignment::dsl as role_dsl;
-        use db::schema::silo_group_membership::dsl as group_dsl;
-
-        // There is no resource-specific authorization check because all
-        // authenticated users need to be able to list their own roles --
-        // otherwise we can't do any authorization checks.
-        // TODO-security rethink this -- how do we know the user is looking up
-        // their own roles?  Maybe this should use an internal authz context.
-
-        // TODO-scalability TODO-security This needs to be paginated.  It's not
-        // exposed via an external API right now but someone could still put us
-        // into some hurt by assigning loads of roles to someone and having that
-        // person attempt to access anything.
-
-        let direct_roles_query = role_dsl::role_assignment
-            .filter(role_dsl::identity_type.eq(identity_type.clone()))
-            .filter(role_dsl::identity_id.eq(identity_id))
-            .filter(role_dsl::resource_type.eq(resource_type.to_string()))
-            .filter(role_dsl::resource_id.eq(resource_id))
-            .select(RoleAssignment::as_select());
-
-        let roles_from_groups_query = role_dsl::role_assignment
-            .filter(role_dsl::identity_type.eq(IdentityType::SiloGroup))
-            .filter(
-                role_dsl::identity_id.eq_any(
-                    group_dsl::silo_group_membership
-                        .filter(group_dsl::silo_user_id.eq(identity_id))
-                        .select(group_dsl::silo_group_id),
-                ),
-            )
-            .filter(role_dsl::resource_type.eq(resource_type.to_string()))
-            .filter(role_dsl::resource_id.eq(resource_id))
-            .select(RoleAssignment::as_select());
-
-        let conn = self.pool_connection_authorized(opctx).await?;
-        if identity_type == IdentityType::SiloUser {
-            direct_roles_query
-                .union(roles_from_groups_query)
-                .load_async::<RoleAssignment>(&*conn)
-                .await
-                .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
-        } else {
-            direct_roles_query
-                .load_async::<RoleAssignment>(&*conn)
-                .await
-                .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
-        }
-    }
-
     /// Fetches all of the externally-visible role assignments for the specified
     /// resource
     ///
diff --git a/nexus/db-queries/src/db/datastore/silo.rs b/nexus/db-queries/src/db/datastore/silo.rs
index 0fd858b900..2b7afa3270 100644
--- a/nexus/db-queries/src/db/datastore/silo.rs
+++ b/nexus/db-queries/src/db/datastore/silo.rs
@@ -15,7 +15,6 @@ use crate::db::error::public_error_from_diesel;
 use crate::db::error::retryable;
 use crate::db::error::ErrorHandler;
 use crate::db::error::TransactionError;
-use crate::db::fixed_data::silo::{DEFAULT_SILO, INTERNAL_SILO};
 use crate::db::identity::Resource;
 use crate::db::model::CollectionTypeProvisioned;
 use crate::db::model::IpPoolResourceType;
@@ -29,6 +28,7 @@ use async_bb8_diesel::AsyncConnection;
 use async_bb8_diesel::AsyncRunQueryDsl;
 use chrono::Utc;
 use diesel::prelude::*;
+use nexus_db_fixed_data::silo::{DEFAULT_SILO, INTERNAL_SILO};
 use nexus_db_model::Certificate;
 use nexus_db_model::ServiceKind;
 use nexus_db_model::SiloQuotas;
diff --git a/nexus/db-queries/src/db/datastore/silo_user.rs b/nexus/db-queries/src/db/datastore/silo_user.rs
index 59cb19a609..2825e2a310 100644
--- a/nexus/db-queries/src/db/datastore/silo_user.rs
+++ b/nexus/db-queries/src/db/datastore/silo_user.rs
@@ -429,7 +429,9 @@ impl DataStore {
         use db::schema::role_assignment::dsl;
         debug!(opctx.log, "attempting to create silo user role assignments");
         let count = diesel::insert_into(dsl::role_assignment)
-            .values(&*db::fixed_data::silo_user::ROLE_ASSIGNMENTS_PRIVILEGED)
+            .values(
+                &*nexus_db_fixed_data::silo_user::ROLE_ASSIGNMENTS_PRIVILEGED,
+            )
             .on_conflict((
                 dsl::identity_type,
                 dsl::identity_id,
diff --git a/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs b/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs
index 3630231b63..9738f05ff6 100644
--- a/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs
+++ b/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs
@@ -312,7 +312,7 @@ impl DataStore {
         &self,
         opctx: &OpContext,
     ) -> Result<(), Error> {
-        let id = *db::fixed_data::FLEET_ID;
+        let id = *nexus_db_fixed_data::FLEET_ID;
         self.virtual_provisioning_collection_create(
             opctx,
             db::model::VirtualProvisioningCollection::new(
@@ -331,7 +331,6 @@ mod test {
     use super::*;
 
     use crate::db::datastore::test_utils::datastore_test;
-    use crate::db::fixed_data;
     use crate::db::lookup::LookupPath;
     use nexus_db_model::Instance;
     use nexus_db_model::Project;
@@ -384,8 +383,8 @@ mod test {
         datastore: &DataStore,
         opctx: &OpContext,
     ) -> TestData {
-        let fleet_id = *fixed_data::FLEET_ID;
-        let silo_id = *fixed_data::silo::DEFAULT_SILO_ID;
+        let fleet_id = *nexus_db_fixed_data::FLEET_ID;
+        let silo_id = *nexus_db_fixed_data::silo::DEFAULT_SILO_ID;
         let project_id = Uuid::new_v4();
 
         let (authz_project, _project) = datastore
diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs
index 98af47f0e2..5322e20dbf 100644
--- a/nexus/db-queries/src/db/datastore/vpc.rs
+++ b/nexus/db-queries/src/db/datastore/vpc.rs
@@ -12,7 +12,6 @@ use crate::db::collection_insert::AsyncInsertError;
 use crate::db::collection_insert::DatastoreCollection;
 use crate::db::error::public_error_from_diesel;
 use crate::db::error::ErrorHandler;
-use crate::db::fixed_data::vpc::SERVICES_VPC_ID;
 use crate::db::identity::Resource;
 use crate::db::model::ApplyBlueprintZoneFilterExt;
 use crate::db::model::ApplySledFilterExt;
@@ -45,6 +44,7 @@ use diesel::prelude::*;
 use diesel::result::DatabaseErrorKind;
 use diesel::result::Error as DieselError;
 use ipnetwork::IpNetwork;
+use nexus_db_fixed_data::vpc::SERVICES_VPC_ID;
 use nexus_types::deployment::BlueprintZoneFilter;
 use nexus_types::deployment::SledFilter;
 use omicron_common::api::external::http_pagination::PaginatedBy;
@@ -72,9 +72,9 @@ impl DataStore {
         &self,
         opctx: &OpContext,
     ) -> Result<(), Error> {
-        use crate::db::fixed_data::project::SERVICES_PROJECT_ID;
-        use crate::db::fixed_data::vpc::SERVICES_VPC;
-        use crate::db::fixed_data::vpc::SERVICES_VPC_DEFAULT_ROUTE_ID;
+        use nexus_db_fixed_data::project::SERVICES_PROJECT_ID;
+        use nexus_db_fixed_data::vpc::SERVICES_VPC;
+        use nexus_db_fixed_data::vpc::SERVICES_VPC_DEFAULT_ROUTE_ID;
 
         opctx.authorize(authz::Action::Modify, &authz::DATABASE).await?;
 
@@ -175,8 +175,8 @@ impl DataStore {
         &self,
         opctx: &OpContext,
     ) -> Result<(), Error> {
-        use db::fixed_data::vpc_firewall_rule::DNS_VPC_FW_RULE;
-        use db::fixed_data::vpc_firewall_rule::NEXUS_VPC_FW_RULE;
+        use nexus_db_fixed_data::vpc_firewall_rule::DNS_VPC_FW_RULE;
+        use nexus_db_fixed_data::vpc_firewall_rule::NEXUS_VPC_FW_RULE;
 
         debug!(opctx.log, "attempting to create built-in VPC firewall rules");
 
@@ -229,9 +229,9 @@ impl DataStore {
         &self,
         opctx: &OpContext,
     ) -> Result<(), Error> {
-        use crate::db::fixed_data::vpc_subnet::DNS_VPC_SUBNET;
-        use crate::db::fixed_data::vpc_subnet::NEXUS_VPC_SUBNET;
-        use crate::db::fixed_data::vpc_subnet::NTP_VPC_SUBNET;
+        use nexus_db_fixed_data::vpc_subnet::DNS_VPC_SUBNET;
+        use nexus_db_fixed_data::vpc_subnet::NEXUS_VPC_SUBNET;
+        use nexus_db_fixed_data::vpc_subnet::NTP_VPC_SUBNET;
 
         debug!(opctx.log, "attempting to create built-in VPC Subnets");
 
@@ -1230,9 +1230,9 @@ mod tests {
     use crate::db::datastore::test::sled_system_hardware_for_test;
     use crate::db::datastore::test_utils::datastore_test;
     use crate::db::datastore::test_utils::IneligibleSleds;
-    use crate::db::fixed_data::vpc_subnet::NEXUS_VPC_SUBNET;
     use crate::db::model::Project;
     use crate::db::queries::vpc::MAX_VNI_SEARCH_RANGE_SIZE;
+    use nexus_db_fixed_data::vpc_subnet::NEXUS_VPC_SUBNET;
     use nexus_db_model::IncompleteNetworkInterface;
     use nexus_db_model::SledUpdate;
     use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder;
diff --git a/nexus/db-queries/src/db/lookup.rs b/nexus/db-queries/src/db/lookup.rs
index 487a68b517..0999694c54 100644
--- a/nexus/db-queries/src/db/lookup.rs
+++ b/nexus/db-queries/src/db/lookup.rs
@@ -924,8 +924,10 @@ mod test {
         let (_, datastore) =
             crate::db::datastore::test_utils::datastore_test(&logctx, &db)
                 .await;
-        let opctx =
-            OpContext::for_tests(logctx.log.new(o!()), Arc::clone(&datastore));
+        let opctx = OpContext::for_tests(
+            logctx.log.new(o!()),
+            Arc::clone(&datastore) as Arc<dyn nexus_auth::storage::Storage>,
+        );
         let project_name: Name = Name("my-project".parse().unwrap());
         let instance_name: Name = Name("my-instance".parse().unwrap());
 
diff --git a/nexus/db-queries/src/db/mod.rs b/nexus/db-queries/src/db/mod.rs
index 7ce6890a4d..7bd1bbec61 100644
--- a/nexus/db-queries/src/db/mod.rs
+++ b/nexus/db-queries/src/db/mod.rs
@@ -17,7 +17,6 @@ mod cte_utils;
 pub mod datastore;
 pub(crate) mod error;
 mod explain;
-pub mod fixed_data;
 pub mod lookup;
 mod on_conflict_ext;
 // Public for doctests.
@@ -42,6 +41,7 @@ pub use pool_connection::DISALLOW_FULL_TABLE_SCAN_SQL;
 #[cfg(test)]
 mod test_utils;
 
+pub use nexus_db_fixed_data as fixed_data;
 pub use nexus_db_model as model;
 use nexus_db_model::saga_types;
 pub use nexus_db_model::schema;
diff --git a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs
index 895fee2092..b3c1a569b0 100644
--- a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs
+++ b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs
@@ -122,7 +122,7 @@ WITH
       UNION (SELECT ").param().sql(" AS id)
     ),")
             .bind::<sql_types::Uuid, _>(project_id)
-            .bind::<sql_types::Uuid, _>(*crate::db::fixed_data::FLEET_ID)
+            .bind::<sql_types::Uuid, _>(*nexus_db_fixed_data::FLEET_ID)
             .sql("
   quotas
     AS (
diff --git a/nexus/db-queries/src/db/saga_recovery.rs b/nexus/db-queries/src/db/saga_recovery.rs
index 55cda03c3c..25f8ff788d 100644
--- a/nexus/db-queries/src/db/saga_recovery.rs
+++ b/nexus/db-queries/src/db/saga_recovery.rs
@@ -447,7 +447,10 @@ mod test {
         let (storage, sec_client, uctx) =
             create_storage_sec_and_context(&log, db_datastore.clone(), sec_id);
         let sec_log = log.new(o!("component" => "SEC"));
-        let opctx = OpContext::for_tests(log, Arc::clone(&db_datastore));
+        let opctx = OpContext::for_tests(
+            log,
+            Arc::clone(&db_datastore) as Arc<dyn nexus_auth::storage::Storage>,
+        );
 
         // Create and start a saga.
         //
@@ -520,7 +523,10 @@ mod test {
         let (storage, sec_client, uctx) =
             create_storage_sec_and_context(&log, db_datastore.clone(), sec_id);
         let sec_log = log.new(o!("component" => "SEC"));
-        let opctx = OpContext::for_tests(log, Arc::clone(&db_datastore));
+        let opctx = OpContext::for_tests(
+            log,
+            Arc::clone(&db_datastore) as Arc<dyn nexus_auth::storage::Storage>,
+        );
 
         // Create and start a saga, which we expect to complete successfully.
         let saga_id = SagaId(Uuid::new_v4());
diff --git a/nexus/db-queries/src/lib.rs b/nexus/db-queries/src/lib.rs
index 60177990e8..003310f920 100644
--- a/nexus/db-queries/src/lib.rs
+++ b/nexus/db-queries/src/lib.rs
@@ -4,17 +4,19 @@
 
 //! Facilities for working with the Omicron database
 
-pub mod authn;
-pub mod authz;
-pub mod context;
+pub use nexus_auth::authn;
+pub use nexus_auth::authz;
+pub use nexus_auth::context;
+
 pub mod db;
 pub mod provisioning;
 pub mod transaction_retry;
 
+#[cfg(test)]
+mod policy_test;
+
 #[macro_use]
 extern crate slog;
-#[macro_use]
-extern crate newtype_derive;
 #[cfg(test)]
 #[macro_use]
 extern crate diesel;
diff --git a/nexus/db-queries/src/authz/policy_test/coverage.rs b/nexus/db-queries/src/policy_test/coverage.rs
similarity index 97%
rename from nexus/db-queries/src/authz/policy_test/coverage.rs
rename to nexus/db-queries/src/policy_test/coverage.rs
index 021c9ef119..08235332ff 100644
--- a/nexus/db-queries/src/authz/policy_test/coverage.rs
+++ b/nexus/db-queries/src/policy_test/coverage.rs
@@ -2,8 +2,9 @@
 // License, v. 2.0. If a copy of the MPL was not distributed with this
 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
 
-use crate::authz;
-use crate::authz::AuthorizedResource;
+use nexus_auth::authz;
+use nexus_auth::authz::AuthorizedResource;
+use slog::{debug, error, o, warn};
 use std::collections::BTreeSet;
 
 /// Helper for identifying authz resources not covered by the IAM role policy
diff --git a/nexus/db-queries/src/authz/policy_test/mod.rs b/nexus/db-queries/src/policy_test/mod.rs
similarity index 97%
rename from nexus/db-queries/src/authz/policy_test/mod.rs
rename to nexus/db-queries/src/policy_test/mod.rs
index b6961bcc30..395a480c47 100644
--- a/nexus/db-queries/src/authz/policy_test/mod.rs
+++ b/nexus/db-queries/src/policy_test/mod.rs
@@ -14,14 +14,14 @@ mod coverage;
 mod resource_builder;
 mod resources;
 
-use crate::authn;
-use crate::authn::SiloAuthnPolicy;
-use crate::authz;
-use crate::context::OpContext;
 use crate::db;
-use authn::USER_TEST_PRIVILEGED;
 use coverage::Coverage;
 use futures::StreamExt;
+use nexus_auth::authn;
+use nexus_auth::authn::SiloAuthnPolicy;
+use nexus_auth::authn::USER_TEST_PRIVILEGED;
+use nexus_auth::authz;
+use nexus_auth::context::OpContext;
 use nexus_test_utils::db::test_setup_database;
 use nexus_types::external_api::shared;
 use nexus_types::external_api::shared::FleetRole;
@@ -33,6 +33,7 @@ use omicron_test_utils::dev;
 use resource_builder::DynAuthorizedResource;
 use resource_builder::ResourceBuilder;
 use resource_builder::ResourceSet;
+use slog::{o, trace};
 use std::collections::BTreeMap;
 use std::collections::BTreeSet;
 use std::io::Cursor;
@@ -117,7 +118,7 @@ async fn test_iam_roles_behavior() {
                     main_silo_id,
                     SiloAuthnPolicy::default(),
                 ),
-                Arc::clone(&datastore),
+                Arc::clone(&datastore) as Arc<dyn nexus_auth::storage::Storage>,
             );
 
             Arc::new((username.clone(), opctx))
@@ -140,7 +141,7 @@ async fn test_iam_roles_behavior() {
             user_log,
             Arc::clone(&authz),
             authn::Context::internal_unauthenticated(),
-            Arc::clone(&datastore),
+            Arc::clone(&datastore) as Arc<dyn nexus_auth::storage::Storage>,
         ),
     )));
 
@@ -439,7 +440,8 @@ async fn test_conferred_roles() {
                             main_silo_id,
                             policy.clone(),
                         ),
-                        Arc::clone(&datastore),
+                        Arc::clone(&datastore)
+                            as Arc<dyn nexus_auth::storage::Storage>,
                     );
                     Arc::new((username.clone(), opctx))
                 })
diff --git a/nexus/db-queries/src/authz/policy_test/resource_builder.rs b/nexus/db-queries/src/policy_test/resource_builder.rs
similarity index 74%
rename from nexus/db-queries/src/authz/policy_test/resource_builder.rs
rename to nexus/db-queries/src/policy_test/resource_builder.rs
index 59cb283a95..3d09b2ab2d 100644
--- a/nexus/db-queries/src/authz/policy_test/resource_builder.rs
+++ b/nexus/db-queries/src/policy_test/resource_builder.rs
@@ -6,14 +6,14 @@
 //! IAM policy test
 
 use super::coverage::Coverage;
-use crate::authz;
-use crate::authz::ApiResourceWithRolesType;
-use crate::authz::AuthorizedResource;
-use crate::context::OpContext;
 use crate::db;
 use authz::ApiResource;
 use futures::future::BoxFuture;
 use futures::FutureExt;
+use nexus_auth::authz;
+use nexus_auth::authz::ApiResourceWithRolesType;
+use nexus_auth::authz::AuthorizedResource;
+use nexus_auth::context::OpContext;
 use nexus_db_model::DatabaseString;
 use nexus_types::external_api::shared;
 use omicron_common::api::external::Error;
@@ -192,40 +192,40 @@ pub trait DynAuthorizedResource: AuthorizedResource + std::fmt::Debug {
     fn resource_name(&self) -> String;
 }
 
-impl<T> DynAuthorizedResource for T
-where
-    T: ApiResource + AuthorizedResource + oso::PolarClass + Clone,
-{
-    fn do_authorize<'a, 'b>(
-        &'a self,
-        opctx: &'b OpContext,
-        action: authz::Action,
-    ) -> BoxFuture<'a, Result<(), Error>>
-    where
-        'b: 'a,
-    {
-        opctx.authorize(action, self).boxed()
-    }
-
-    fn resource_name(&self) -> String {
-        let my_ident = match self.lookup_type() {
-            LookupType::ByName(name) => format!("{:?}", name),
-            LookupType::ById(id) => format!("id {:?}", id.to_string()),
-            LookupType::ByCompositeId(id) => format!("id {:?}", id),
-            LookupType::ByOther(_) => {
-                unimplemented!()
+macro_rules! impl_dyn_authorized_resource_for_global {
+    ($t:ty) => {
+        impl DynAuthorizedResource for $t {
+            fn resource_name(&self) -> String {
+                String::from(stringify!($t))
             }
-        };
 
-        format!("{:?} {}", self.resource_type(), my_ident)
-    }
+            fn do_authorize<'a, 'b>(
+                &'a self,
+                opctx: &'b OpContext,
+                action: authz::Action,
+            ) -> BoxFuture<'a, Result<(), Error>>
+            where
+                'b: 'a,
+            {
+                opctx.authorize(action, self).boxed()
+            }
+        }
+    };
 }
 
-macro_rules! impl_dyn_authorized_resource_for_global {
+macro_rules! impl_dyn_authorized_resource_for_resource {
     ($t:ty) => {
         impl DynAuthorizedResource for $t {
             fn resource_name(&self) -> String {
-                String::from(stringify!($t))
+                let my_ident = match self.lookup_type() {
+                    LookupType::ByName(name) => format!("{:?}", name),
+                    LookupType::ById(id) => format!("id {:?}", id.to_string()),
+                    LookupType::ByCompositeId(id) => format!("id {:?}", id),
+                    LookupType::ByOther(_) => {
+                        unimplemented!()
+                    }
+                };
+                format!("{:?} {}", self.resource_type(), my_ident)
             }
 
             fn do_authorize<'a, 'b>(
@@ -242,7 +242,39 @@ macro_rules! impl_dyn_authorized_resource_for_global {
     };
 }
 
-impl_dyn_authorized_resource_for_global!(authz::oso_generic::Database);
+impl_dyn_authorized_resource_for_resource!(authz::AddressLot);
+impl_dyn_authorized_resource_for_resource!(authz::Blueprint);
+impl_dyn_authorized_resource_for_resource!(authz::Certificate);
+impl_dyn_authorized_resource_for_resource!(authz::DeviceAccessToken);
+impl_dyn_authorized_resource_for_resource!(authz::DeviceAuthRequest);
+impl_dyn_authorized_resource_for_resource!(authz::Disk);
+impl_dyn_authorized_resource_for_resource!(authz::Fleet);
+impl_dyn_authorized_resource_for_resource!(authz::FloatingIp);
+impl_dyn_authorized_resource_for_resource!(authz::IdentityProvider);
+impl_dyn_authorized_resource_for_resource!(authz::Image);
+impl_dyn_authorized_resource_for_resource!(authz::Instance);
+impl_dyn_authorized_resource_for_resource!(authz::InstanceNetworkInterface);
+impl_dyn_authorized_resource_for_resource!(authz::LoopbackAddress);
+impl_dyn_authorized_resource_for_resource!(authz::Rack);
+impl_dyn_authorized_resource_for_resource!(authz::PhysicalDisk);
+impl_dyn_authorized_resource_for_resource!(authz::Project);
+impl_dyn_authorized_resource_for_resource!(authz::ProjectImage);
+impl_dyn_authorized_resource_for_resource!(authz::SamlIdentityProvider);
+impl_dyn_authorized_resource_for_resource!(authz::Service);
+impl_dyn_authorized_resource_for_resource!(authz::Silo);
+impl_dyn_authorized_resource_for_resource!(authz::SiloGroup);
+impl_dyn_authorized_resource_for_resource!(authz::SiloImage);
+impl_dyn_authorized_resource_for_resource!(authz::SiloUser);
+impl_dyn_authorized_resource_for_resource!(authz::Sled);
+impl_dyn_authorized_resource_for_resource!(authz::Snapshot);
+impl_dyn_authorized_resource_for_resource!(authz::SshKey);
+impl_dyn_authorized_resource_for_resource!(authz::TufArtifact);
+impl_dyn_authorized_resource_for_resource!(authz::TufRepo);
+impl_dyn_authorized_resource_for_resource!(authz::Vpc);
+impl_dyn_authorized_resource_for_resource!(authz::VpcSubnet);
+impl_dyn_authorized_resource_for_resource!(authz::Zpool);
+
+impl_dyn_authorized_resource_for_global!(authz::Database);
 impl_dyn_authorized_resource_for_global!(authz::BlueprintConfig);
 impl_dyn_authorized_resource_for_global!(authz::ConsoleSessionList);
 impl_dyn_authorized_resource_for_global!(authz::DeviceAuthRequestList);
diff --git a/nexus/db-queries/src/authz/policy_test/resources.rs b/nexus/db-queries/src/policy_test/resources.rs
similarity index 99%
rename from nexus/db-queries/src/authz/policy_test/resources.rs
rename to nexus/db-queries/src/policy_test/resources.rs
index bc30e77fac..478fa169ff 100644
--- a/nexus/db-queries/src/authz/policy_test/resources.rs
+++ b/nexus/db-queries/src/policy_test/resources.rs
@@ -6,8 +6,8 @@
 
 use super::resource_builder::ResourceBuilder;
 use super::resource_builder::ResourceSet;
-use crate::authz;
 use crate::db::model::ArtifactId;
+use nexus_auth::authz;
 use nexus_db_model::SemverVersion;
 use omicron_common::api::external::LookupType;
 use omicron_uuid_kinds::GenericUuid;
@@ -367,8 +367,8 @@ pub fn exempted_authz_classes() -> BTreeSet<String> {
     [
         // Non-resources:
         authz::Action::get_polar_class(),
-        authz::actor::AnyActor::get_polar_class(),
-        authz::actor::AuthenticatedActor::get_polar_class(),
+        authz::AnyActor::get_polar_class(),
+        authz::AuthenticatedActor::get_polar_class(),
         // Resources whose behavior should be identical to an existing type
         // and we don't want to do the test twice for performance reasons:
         // none yet.
diff --git a/nexus/db-queries/tests/output/authz-roles.out b/nexus/db-queries/tests/output/authz-roles.out
index 0482cdfd2a..41a1ded3b4 100644
--- a/nexus/db-queries/tests/output/authz-roles.out
+++ b/nexus/db-queries/tests/output/authz-roles.out
@@ -1,4 +1,4 @@
-resource: authz::oso_generic::Database
+resource: authz::Database
 
   USER                             Q  R LC RP  M MP CC  D
   fleet-admin                      ✔  ✘  ✘  ✘  ✘  ✘  ✘  ✘
diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs
index 263ab24c70..f9bcc2cf80 100644
--- a/nexus/src/app/mod.rs
+++ b/nexus/src/app/mod.rs
@@ -377,7 +377,7 @@ impl Nexus {
             log.new(o!("component" => "DataLoader")),
             Arc::clone(&authz),
             authn::Context::internal_db_init(),
-            Arc::clone(&db_datastore),
+            Arc::clone(&db_datastore) as Arc<dyn nexus_auth::storage::Storage>,
         );
 
         let populate_args = PopulateArgs::new(rack_id);
@@ -391,7 +391,7 @@ impl Nexus {
             log.new(o!("component" => "BackgroundTasks")),
             Arc::clone(&authz),
             authn::Context::internal_api(),
-            Arc::clone(&db_datastore),
+            Arc::clone(&db_datastore) as Arc<dyn nexus_auth::storage::Storage>,
         );
 
         let v2p_watcher_channel = tokio::sync::watch::channel(());
@@ -440,13 +440,15 @@ impl Nexus {
                 log.new(o!("component" => "InstanceAllocator")),
                 Arc::clone(&authz),
                 authn::Context::internal_read(),
-                Arc::clone(&db_datastore),
+                Arc::clone(&db_datastore)
+                    as Arc<dyn nexus_auth::storage::Storage>,
             ),
             opctx_external_authn: OpContext::for_background(
                 log.new(o!("component" => "ExternalAuthn")),
                 Arc::clone(&authz),
                 authn::Context::external_authn(),
-                Arc::clone(&db_datastore),
+                Arc::clone(&db_datastore)
+                    as Arc<dyn nexus_auth::storage::Storage>,
             ),
             samael_max_issue_delay: std::sync::Mutex::new(None),
             internal_resolver: resolver,
@@ -469,7 +471,7 @@ impl Nexus {
             log.new(o!("component" => "SagaRecoverer")),
             Arc::clone(&authz),
             authn::Context::internal_saga_recovery(),
-            Arc::clone(&db_datastore),
+            Arc::clone(&db_datastore) as Arc<dyn nexus_auth::storage::Storage>,
         );
         let saga_logger = nexus.log.new(o!("saga_type" => "recovery"));
         let recovery_task = db::recover(
@@ -701,7 +703,8 @@ impl Nexus {
             self.log.new(o!("component" => "ServiceBalancer")),
             Arc::clone(&self.authz),
             authn::Context::internal_service_balancer(),
-            Arc::clone(&self.db_datastore),
+            Arc::clone(&self.db_datastore)
+                as Arc<dyn nexus_auth::storage::Storage>,
         )
     }
 
@@ -711,7 +714,8 @@ impl Nexus {
             self.log.new(o!("component" => "InternalApi")),
             Arc::clone(&self.authz),
             authn::Context::internal_api(),
-            Arc::clone(&self.db_datastore),
+            Arc::clone(&self.db_datastore)
+                as Arc<dyn nexus_auth::storage::Storage>,
         )
     }
 
diff --git a/nexus/src/app/test_interfaces.rs b/nexus/src/app/test_interfaces.rs
index 581b9a89bb..9e7bd1582f 100644
--- a/nexus/src/app/test_interfaces.rs
+++ b/nexus/src/app/test_interfaces.rs
@@ -73,7 +73,8 @@ impl TestInterfaces for super::Nexus {
     ) -> Result<Option<Arc<SledAgentClient>>, Error> {
         let opctx = OpContext::for_tests(
             self.log.new(o!()),
-            Arc::clone(&self.db_datastore),
+            Arc::clone(&self.db_datastore)
+                as Arc<dyn nexus_auth::storage::Storage>,
         );
 
         self.instance_sled_by_id_with_opctx(id, &opctx).await
@@ -98,7 +99,8 @@ impl TestInterfaces for super::Nexus {
     ) -> Result<Option<Arc<SledAgentClient>>, Error> {
         let opctx = OpContext::for_tests(
             self.log.new(o!()),
-            Arc::clone(&self.db_datastore),
+            Arc::clone(&self.db_datastore)
+                as Arc<dyn nexus_auth::storage::Storage>,
         );
         let (.., db_disk) = LookupPath::new(&opctx, &self.db_datastore)
             .disk_id(*id)
@@ -112,7 +114,8 @@ impl TestInterfaces for super::Nexus {
     async fn instance_sled_id(&self, id: &Uuid) -> Result<Option<Uuid>, Error> {
         let opctx = OpContext::for_tests(
             self.log.new(o!()),
-            Arc::clone(&self.db_datastore),
+            Arc::clone(&self.db_datastore)
+                as Arc<dyn nexus_auth::storage::Storage>,
         );
 
         self.instance_sled_id_with_opctx(id, &opctx).await
@@ -138,7 +141,8 @@ impl TestInterfaces for super::Nexus {
     async fn set_disk_as_faulted(&self, disk_id: &Uuid) -> Result<bool, Error> {
         let opctx = OpContext::for_tests(
             self.log.new(o!()),
-            Arc::clone(&self.db_datastore),
+            Arc::clone(&self.db_datastore)
+                as Arc<dyn nexus_auth::storage::Storage>,
         );
 
         let (.., authz_disk, db_disk) =
diff --git a/nexus/src/external_api/console_api.rs b/nexus/src/external_api/console_api.rs
index caff195047..fb0a47bbea 100644
--- a/nexus/src/external_api/console_api.rs
+++ b/nexus/src/external_api/console_api.rs
@@ -270,13 +270,14 @@ pub(crate) async fn login_saml_redirect(
         // unauthenticated.
         let opctx = nexus.opctx_external_authn();
 
-        let (.., identity_provider) = IdentityProviderType::lookup(
-            &nexus.datastore(),
-            &opctx,
-            &path_params.silo_name,
-            &path_params.provider_name,
-        )
-        .await?;
+        let (.., identity_provider) = nexus
+            .datastore()
+            .identity_provider_lookup(
+                &opctx,
+                &path_params.silo_name,
+                &path_params.provider_name,
+            )
+            .await?;
 
         match identity_provider {
             IdentityProviderType::Saml(saml_identity_provider) => {
@@ -330,9 +331,9 @@ pub(crate) async fn login_saml(
         // keep specifically for this purpose.
         let opctx = nexus.opctx_external_authn();
 
-        let (authz_silo, db_silo, identity_provider) =
-            IdentityProviderType::lookup(
-                &nexus.datastore(),
+        let (authz_silo, db_silo, identity_provider) = nexus
+            .datastore()
+            .identity_provider_lookup(
                 &opctx,
                 &path_params.silo_name,
                 &path_params.provider_name,
diff --git a/nexus/src/populate.rs b/nexus/src/populate.rs
index ffe67baeae..724b25162d 100644
--- a/nexus/src/populate.rs
+++ b/nexus/src/populate.rs
@@ -388,7 +388,7 @@ mod test {
             logctx.log.clone(),
             Arc::new(authz::Authz::new(&logctx.log)),
             authn::Context::internal_db_init(),
-            Arc::clone(&datastore),
+            Arc::clone(&datastore) as Arc<dyn nexus_auth::storage::Storage>,
         );
         let log = &logctx.log;
 
@@ -444,7 +444,7 @@ mod test {
             logctx.log.clone(),
             Arc::new(authz::Authz::new(&logctx.log)),
             authn::Context::internal_db_init(),
-            Arc::clone(&datastore),
+            Arc::clone(&datastore) as Arc<dyn nexus_auth::storage::Storage>,
         );
 
         info!(&log, "cleaning up database");
diff --git a/nexus/tests/integration_tests/saml.rs b/nexus/tests/integration_tests/saml.rs
index 80816f2ea2..e075f3e4da 100644
--- a/nexus/tests/integration_tests/saml.rs
+++ b/nexus/tests/integration_tests/saml.rs
@@ -106,20 +106,23 @@ async fn test_create_a_saml_idp(cptestctx: &ControlPlaneTestContext) {
         .await
         .unwrap();
 
-    let (.., retrieved_silo_idp_from_nexus) = IdentityProviderType::lookup(
-        &nexus.datastore(),
-        &nexus.opctx_external_authn(),
-        &omicron_common::api::external::Name::try_from(SILO_NAME.to_string())
+    let (.., retrieved_silo_idp_from_nexus) = nexus
+        .datastore()
+        .identity_provider_lookup(
+            &nexus.opctx_external_authn(),
+            &omicron_common::api::external::Name::try_from(
+                SILO_NAME.to_string(),
+            )
+            .unwrap()
+            .into(),
+            &omicron_common::api::external::Name::try_from(
+                "some-totally-real-saml-provider".to_string(),
+            )
             .unwrap()
             .into(),
-        &omicron_common::api::external::Name::try_from(
-            "some-totally-real-saml-provider".to_string(),
         )
-        .unwrap()
-        .into(),
-    )
-    .await
-    .unwrap();
+        .await
+        .unwrap();
 
     match retrieved_silo_idp_from_nexus {
         IdentityProviderType::Saml(_) => {
diff --git a/nexus/tests/integration_tests/silos.rs b/nexus/tests/integration_tests/silos.rs
index e95b2870ca..2e6c21bb79 100644
--- a/nexus/tests/integration_tests/silos.rs
+++ b/nexus/tests/integration_tests/silos.rs
@@ -4,9 +4,7 @@
 
 use crate::integration_tests::saml::SAML_IDP_DESCRIPTOR;
 use dropshot::ResultsPage;
-use nexus_db_queries::authn::silos::{
-    AuthenticatedSubject, IdentityProviderType,
-};
+use nexus_db_queries::authn::silos::AuthenticatedSubject;
 use nexus_db_queries::authn::{USER_TEST_PRIVILEGED, USER_TEST_UNPRIVILEGED};
 use nexus_db_queries::authz::{self};
 use nexus_db_queries::context::OpContext;
@@ -525,19 +523,22 @@ async fn test_deleting_a_silo_deletes_the_idp(
     // Expect that the silo is gone
     let nexus = &cptestctx.server.server_context().nexus;
 
-    let response = IdentityProviderType::lookup(
-        &nexus.datastore(),
-        &nexus.opctx_external_authn(),
-        &omicron_common::api::external::Name::try_from(SILO_NAME.to_string())
+    let response = nexus
+        .datastore()
+        .identity_provider_lookup(
+            &nexus.opctx_external_authn(),
+            &omicron_common::api::external::Name::try_from(
+                SILO_NAME.to_string(),
+            )
+            .unwrap()
+            .into(),
+            &omicron_common::api::external::Name::try_from(
+                "some-totally-real-saml-provider".to_string(),
+            )
             .unwrap()
             .into(),
-        &omicron_common::api::external::Name::try_from(
-            "some-totally-real-saml-provider".to_string(),
         )
-        .unwrap()
-        .into(),
-    )
-    .await;
+        .await;
 
     assert!(response.is_err());
     match response.err().unwrap() {
diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml
index f82fe1c833..7880422c47 100644
--- a/workspace-hack/Cargo.toml
+++ b/workspace-hack/Cargo.toml
@@ -220,6 +220,7 @@ tracing = { version = "0.1.40", features = ["log"] }
 trust-dns-proto = { version = "0.22.0" }
 unicode-bidi = { version = "0.3.15" }
 unicode-normalization = { version = "0.1.23" }
+unicode-xid = { version = "0.2.4" }
 usdt = { version = "0.5.0" }
 usdt-impl = { version = "0.5.0", default-features = false, features = ["asm", "des"] }
 uuid = { version = "1.8.0", features = ["serde", "v4"] }