From 55ef8daa573669c69e9730ac6b18139f9a648698 Mon Sep 17 00:00:00 2001
From: John Gallagher <john@oxidecomputer.com>
Date: Wed, 21 Feb 2024 17:13:36 -0500
Subject: [PATCH 01/18] Reconfigurator: Record external networking allocations
 when realizing a blueprint (#5045)

This PR expands blueprint execution to record external IPs and created
NICs for Nexus, Boundary NTP, and External DNS in crdb _before_ sending
new zone requests to sled-agents.

The implementation has a very obvious TOCTOU race, but I think it's okay
(as explained in the comments inline). If two Nexuses try to realize the
same blueprint simultaneously and both see no records present, only one
will succeed to insert, and the other will spuriously fail. Assuming
that failure causes a retry, subsequent attempts to realize the same
blueprint will succeed, as the required records will be present. If this
seems wrong, please holler!

I'd like to give this a spin on either madrid or one of the software
testbeds before merging, but I think this is ready for review.
---
 nexus/blueprint-execution/src/lib.rs          |  13 +-
 .../src/resource_allocation.rs                | 992 ++++++++++++++++++
 nexus/db-model/src/external_ip.rs             |  14 +-
 nexus/db-model/src/network_interface.rs       |   2 +-
 .../src/db/datastore/external_ip.rs           |  23 +-
 .../src/db/datastore/network_interface.rs     |  65 +-
 .../db-queries/src/db/queries/external_ip.rs  |  44 +
 7 files changed, 1140 insertions(+), 13 deletions(-)
 create mode 100644 nexus/blueprint-execution/src/resource_allocation.rs

diff --git a/nexus/blueprint-execution/src/lib.rs b/nexus/blueprint-execution/src/lib.rs
index a13acdf265..d6f5f8fc31 100644
--- a/nexus/blueprint-execution/src/lib.rs
+++ b/nexus/blueprint-execution/src/lib.rs
@@ -21,6 +21,7 @@ use uuid::Uuid;
 
 mod dns;
 mod omicron_zones;
+mod resource_allocation;
 
 struct Sled {
     id: Uuid,
@@ -69,6 +70,14 @@ where
         "blueprint_id" => ?blueprint.id
     );
 
+    resource_allocation::ensure_zone_resources_allocated(
+        &opctx,
+        datastore,
+        &blueprint.omicron_zones,
+    )
+    .await
+    .map_err(|err| vec![err])?;
+
     let sleds_by_id: BTreeMap<Uuid, _> = datastore
         .sled_list_all_batched(&opctx)
         .await
@@ -82,9 +91,9 @@ where
 
     dns::deploy_dns(
         &opctx,
-        &datastore,
+        datastore,
         String::from(nexus_label),
-        &blueprint,
+        blueprint,
         &sleds_by_id,
     )
     .await
diff --git a/nexus/blueprint-execution/src/resource_allocation.rs b/nexus/blueprint-execution/src/resource_allocation.rs
new file mode 100644
index 0000000000..7f3ebb9876
--- /dev/null
+++ b/nexus/blueprint-execution/src/resource_allocation.rs
@@ -0,0 +1,992 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Manges allocation of resources required for blueprint realization
+
+use anyhow::bail;
+use anyhow::Context;
+use nexus_db_model::IncompleteNetworkInterface;
+use nexus_db_model::Name;
+use nexus_db_model::SqlU16;
+use nexus_db_model::VpcSubnet;
+use nexus_db_queries::context::OpContext;
+use nexus_db_queries::db::fixed_data::vpc_subnet::DNS_VPC_SUBNET;
+use nexus_db_queries::db::fixed_data::vpc_subnet::NEXUS_VPC_SUBNET;
+use nexus_db_queries::db::fixed_data::vpc_subnet::NTP_VPC_SUBNET;
+use nexus_db_queries::db::DataStore;
+use nexus_types::deployment::NetworkInterface;
+use nexus_types::deployment::NetworkInterfaceKind;
+use nexus_types::deployment::OmicronZoneType;
+use nexus_types::deployment::OmicronZonesConfig;
+use nexus_types::deployment::SourceNatConfig;
+use omicron_common::api::external::IdentityMetadataCreateParams;
+use slog::info;
+use slog::warn;
+use std::collections::BTreeMap;
+use std::net::IpAddr;
+use std::net::SocketAddr;
+use uuid::Uuid;
+
+pub(crate) async fn ensure_zone_resources_allocated(
+    opctx: &OpContext,
+    datastore: &DataStore,
+    zones: &BTreeMap<Uuid, OmicronZonesConfig>,
+) -> anyhow::Result<()> {
+    let allocator = ResourceAllocator { opctx, datastore };
+
+    for config in zones.values() {
+        for z in &config.zones {
+            match &z.zone_type {
+                OmicronZoneType::Nexus { external_ip, nic, .. } => {
+                    allocator
+                        .ensure_nexus_external_networking_allocated(
+                            z.id,
+                            *external_ip,
+                            nic,
+                        )
+                        .await?;
+                }
+                OmicronZoneType::ExternalDns { dns_address, nic, .. } => {
+                    allocator
+                        .ensure_external_dns_external_networking_allocated(
+                            z.id,
+                            dns_address,
+                            nic,
+                        )
+                        .await?;
+                }
+                OmicronZoneType::BoundaryNtp { snat_cfg, nic, .. } => {
+                    allocator
+                        .ensure_boundary_ntp_external_networking_allocated(
+                            z.id, snat_cfg, nic,
+                        )
+                        .await?;
+                }
+                OmicronZoneType::InternalNtp { .. }
+                | OmicronZoneType::Clickhouse { .. }
+                | OmicronZoneType::ClickhouseKeeper { .. }
+                | OmicronZoneType::CockroachDb { .. }
+                | OmicronZoneType::Crucible { .. }
+                | OmicronZoneType::CruciblePantry { .. }
+                | OmicronZoneType::InternalDns { .. }
+                | OmicronZoneType::Oximeter { .. } => (),
+            }
+        }
+    }
+
+    Ok(())
+}
+
+struct ResourceAllocator<'a> {
+    opctx: &'a OpContext,
+    datastore: &'a DataStore,
+}
+
+impl<'a> ResourceAllocator<'a> {
+    // Helper function to determine whether a given external IP address is
+    // already allocated to a specific service zone.
+    async fn is_external_ip_already_allocated(
+        &self,
+        zone_type: &'static str,
+        zone_id: Uuid,
+        external_ip: IpAddr,
+        port_range: Option<(u16, u16)>,
+    ) -> anyhow::Result<bool> {
+        let allocated_ips = self
+            .datastore
+            .service_lookup_external_ips(self.opctx, zone_id)
+            .await
+            .with_context(|| {
+                format!(
+                    "failed to look up external IPs for {zone_type} {zone_id}"
+                )
+            })?;
+
+        if !allocated_ips.is_empty() {
+            // All the service zones that want external IP addresses only expect
+            // to have a single IP. This service already has (at least) one:
+            // make sure this list includes the one we want, or return an error.
+            for allocated_ip in &allocated_ips {
+                if allocated_ip.ip.ip() == external_ip
+                    && port_range
+                        .map(|(first, last)| {
+                            allocated_ip.first_port == SqlU16(first)
+                                && allocated_ip.last_port == SqlU16(last)
+                        })
+                        .unwrap_or(true)
+                {
+                    info!(
+                        self.opctx.log, "found already-allocated external IP";
+                        "zone_type" => zone_type,
+                        "zone_id" => %zone_id,
+                        "ip" => %external_ip,
+                    );
+                    return Ok(true);
+                }
+            }
+
+            warn!(
+                self.opctx.log, "zone has unexpected IPs allocated";
+                "zone_type" => zone_type,
+                "zone_id" => %zone_id,
+                "want_ip" => %external_ip,
+                "allocated_ips" => ?allocated_ips,
+            );
+            bail!(
+                "zone {zone_id} already has {} non-matching IP(s) allocated",
+                allocated_ips.len()
+            );
+        }
+
+        info!(
+            self.opctx.log, "external IP allocation required for zone";
+            "zone_type" => zone_type,
+            "zone_id" => %zone_id,
+            "ip" => %external_ip,
+        );
+
+        Ok(false)
+    }
+
+    // Helper function to determine whether a given NIC is already allocated to
+    // a specific service zone.
+    async fn is_nic_already_allocated(
+        &self,
+        zone_type: &'static str,
+        zone_id: Uuid,
+        nic: &NetworkInterface,
+    ) -> anyhow::Result<bool> {
+        let allocated_nics = self
+            .datastore
+            .service_list_network_interfaces(self.opctx, zone_id)
+            .await
+            .with_context(|| {
+                format!("failed to look up NICs for {zone_type} {zone_id}")
+            })?;
+
+        if !allocated_nics.is_empty() {
+            // All the service zones that want NICs only expect to have a single
+            // one. Bail out here if this zone already has one or more allocated
+            // NICs but not the one we think it needs.
+            //
+            // This doesn't check the allocated NIC's subnet against our NICs,
+            // because that would require an extra DB lookup. We'll assume if
+            // these main properties are correct, the subnet is too.
+            for allocated_nic in &allocated_nics {
+                if allocated_nic.ip.ip() == nic.ip
+                    && *allocated_nic.mac == nic.mac
+                    && allocated_nic.slot == i16::from(nic.slot)
+                    && allocated_nic.primary == nic.primary
+                {
+                    info!(
+                        self.opctx.log, "found already-allocated NIC";
+                        "zone_type" => zone_type,
+                        "zone_id" => %zone_id,
+                        "nic" => ?allocated_nic,
+                    );
+                    return Ok(true);
+                }
+            }
+
+            warn!(
+                self.opctx.log, "zone has unexpected NICs allocated";
+                "zone_type" => zone_type,
+                "zone_id" => %zone_id,
+                "want_nic" => ?nic,
+                "allocated_nics" => ?allocated_nics,
+            );
+
+            bail!(
+                "zone {zone_id} already has {} non-matching NIC(s) allocated",
+                allocated_nics.len()
+            );
+        }
+
+        info!(
+            self.opctx.log, "NIC allocation required for zone";
+            "zone_type" => zone_type,
+            "zone_id" => %zone_id,
+            "nid" => ?nic,
+        );
+
+        Ok(false)
+    }
+
+    // Nexus and ExternalDns both use non-SNAT service IPs; this method is used
+    // to allocate external networking for both of them.
+    async fn ensure_external_service_ip(
+        &self,
+        zone_type: &'static str,
+        service_id: Uuid,
+        external_ip: IpAddr,
+        ip_name: &Name,
+    ) -> anyhow::Result<()> {
+        // Only attempt to allocate `external_ip` if it isn't already assigned
+        // to this zone.
+        //
+        // Checking for the existing of the external IP and then creating it
+        // if not found inserts a classic TOCTOU race: what if another Nexus
+        // is running concurrently, we both check and see that the IP is not
+        // allocated, then both attempt to create it? We believe this is
+        // okay: the loser of the race (i.e., the one whose create tries to
+        // commit second) will fail to allocate the IP, which will bubble
+        // out and prevent realization of the current blueprint. That's
+        // exactly what we want if two Nexuses try to realize the same
+        // blueprint at the same time.
+        if self
+            .is_external_ip_already_allocated(
+                zone_type,
+                service_id,
+                external_ip,
+                None,
+            )
+            .await?
+        {
+            return Ok(());
+        }
+        let ip_id = Uuid::new_v4();
+        let description = zone_type;
+        self.datastore
+            .allocate_explicit_service_ip(
+                self.opctx,
+                ip_id,
+                ip_name,
+                description,
+                service_id,
+                external_ip,
+            )
+            .await
+            .with_context(|| {
+                format!(
+                    "failed to allocate IP to {zone_type} {service_id}: \
+                         {external_ip}"
+                )
+            })?;
+
+        info!(
+            self.opctx.log, "successfully allocated external IP";
+            "zone_type" => zone_type,
+            "zone_id" => %service_id,
+            "ip" => %external_ip,
+            "ip_id" => %ip_id,
+        );
+
+        Ok(())
+    }
+
+    // BoundaryNtp uses a SNAT service IPs; this method is similar to
+    // `ensure_external_service_ip` but accounts for that.
+    async fn ensure_external_service_snat_ip(
+        &self,
+        zone_type: &'static str,
+        service_id: Uuid,
+        snat: &SourceNatConfig,
+    ) -> anyhow::Result<()> {
+        // Only attempt to allocate `external_ip` if it isn't already assigned
+        // to this zone.
+        //
+        // This is subject to the same kind of TOCTOU race as described for IP
+        // allocation in `ensure_external_service_ip`, and we believe it's okay
+        // for the same reasons as described there.
+        if self
+            .is_external_ip_already_allocated(
+                zone_type,
+                service_id,
+                snat.ip,
+                Some((snat.first_port, snat.last_port)),
+            )
+            .await?
+        {
+            return Ok(());
+        }
+
+        let ip_id = Uuid::new_v4();
+        self.datastore
+            .allocate_explicit_service_snat_ip(
+                self.opctx,
+                ip_id,
+                service_id,
+                snat.ip,
+                (snat.first_port, snat.last_port),
+            )
+            .await
+            .with_context(|| {
+                format!(
+                    "failed to allocate snat IP to {zone_type} {service_id}: \
+                     {snat:?}"
+                )
+            })?;
+
+        info!(
+            self.opctx.log, "successfully allocated external SNAT IP";
+            "zone_type" => zone_type,
+            "zone_id" => %service_id,
+            "snat" => ?snat,
+            "ip_id" => %ip_id,
+        );
+
+        Ok(())
+    }
+
+    // All service zones with external connectivity get service vNICs.
+    async fn ensure_service_nic(
+        &self,
+        zone_type: &'static str,
+        service_id: Uuid,
+        nic: &NetworkInterface,
+        nic_subnet: &VpcSubnet,
+    ) -> anyhow::Result<()> {
+        // We don't pass `nic.kind` into the database below, but instead
+        // explicitly call `service_create_network_interface`. Ensure this is
+        // indeed a service NIC.
+        match &nic.kind {
+            NetworkInterfaceKind::Instance { .. } => {
+                bail!("invalid NIC kind (expected service, got instance)")
+            }
+            NetworkInterfaceKind::Service { .. } => (),
+        }
+
+        // Only attempt to allocate `nic` if it isn't already assigned to this
+        // zone.
+        //
+        // This is subject to the same kind of TOCTOU race as described for IP
+        // allocation in `ensure_external_service_ip`, and we believe it's okay
+        // for the same reasons as described there.
+        if self.is_nic_already_allocated(zone_type, service_id, nic).await? {
+            return Ok(());
+        }
+        let nic_arg = IncompleteNetworkInterface::new_service(
+            nic.id,
+            service_id,
+            nic_subnet.clone(),
+            IdentityMetadataCreateParams {
+                name: nic.name.clone(),
+                description: format!("{zone_type} service vNIC"),
+            },
+            nic.ip,
+            nic.mac,
+            nic.slot,
+        )
+        .with_context(|| {
+            format!(
+                "failed to convert NIC into IncompleteNetworkInterface: {nic:?}"
+            )
+        })?;
+        let created_nic = self
+            .datastore
+            .service_create_network_interface(self.opctx, nic_arg)
+            .await
+            .map_err(|err| err.into_external())
+            .with_context(|| {
+                format!(
+                    "failed to allocate NIC to {zone_type} {service_id}: \
+                     {nic:?}"
+                )
+            })?;
+
+        // We don't pass all the properties of `nic` into the create request
+        // above. Double-check that the properties the DB assigned match
+        // what we expect.
+        //
+        // We do not check `nic.vni`, because it's not stored in the
+        // database. (All services are given the constant vni
+        // `Vni::SERVICES_VNI`.)
+        if created_nic.primary != nic.primary
+            || created_nic.slot != i16::from(nic.slot)
+        {
+            warn!(
+                self.opctx.log, "unexpected property on allocated NIC";
+                "db_primary" => created_nic.primary,
+                "expected_primary" => nic.primary,
+                "db_slot" => created_nic.slot,
+                "expected_slot" => nic.slot,
+            );
+
+            // Now what? We've allocated a NIC in the database but it's
+            // incorrect. Should we try to delete it? That would be best
+            // effort (we could fail to delete, or we could crash between
+            // creation and deletion).
+            //
+            // We only expect services to have one NIC, so the only way it
+            // should be possible to get a different primary/slot value is
+            // if somehow this same service got a _different_ NIC allocated
+            // to it in the TOCTOU race window above. That should be
+            // impossible with the way we generate blueprints, so we'll just
+            // return a scary error here and expect to never see it.
+            bail!(
+                "database cleanup required: \
+                 unexpected NIC ({created_nic:?}) \
+                 allocated for {zone_type} {service_id}"
+            );
+        }
+
+        info!(
+            self.opctx.log, "successfully allocated service vNIC";
+            "zone_type" => zone_type,
+            "zone_id" => %service_id,
+            "nic" => ?nic,
+        );
+
+        Ok(())
+    }
+
+    async fn ensure_nexus_external_networking_allocated(
+        &self,
+        zone_id: Uuid,
+        external_ip: IpAddr,
+        nic: &NetworkInterface,
+    ) -> anyhow::Result<()> {
+        self.ensure_external_service_ip(
+            "nexus",
+            zone_id,
+            external_ip,
+            &Name(nic.name.clone()),
+        )
+        .await?;
+        self.ensure_service_nic("nexus", zone_id, nic, &NEXUS_VPC_SUBNET)
+            .await?;
+        Ok(())
+    }
+
+    async fn ensure_external_dns_external_networking_allocated(
+        &self,
+        zone_id: Uuid,
+        dns_address: &str,
+        nic: &NetworkInterface,
+    ) -> anyhow::Result<()> {
+        let dns_address =
+            dns_address.parse::<SocketAddr>().with_context(|| {
+                format!("failed to parse ExternalDns address {dns_address}")
+            })?;
+        self.ensure_external_service_ip(
+            "external_dns",
+            zone_id,
+            dns_address.ip(),
+            &Name(nic.name.clone()),
+        )
+        .await?;
+        self.ensure_service_nic("external_dns", zone_id, nic, &DNS_VPC_SUBNET)
+            .await?;
+        Ok(())
+    }
+
+    async fn ensure_boundary_ntp_external_networking_allocated(
+        &self,
+        zone_id: Uuid,
+        snat: &SourceNatConfig,
+        nic: &NetworkInterface,
+    ) -> anyhow::Result<()> {
+        self.ensure_external_service_snat_ip("ntp", zone_id, snat).await?;
+        self.ensure_service_nic("ntp", zone_id, nic, &NTP_VPC_SUBNET).await?;
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use nexus_test_utils_macros::nexus_test;
+    use nexus_types::deployment::OmicronZoneConfig;
+    use nexus_types::deployment::OmicronZoneDataset;
+    use nexus_types::identity::Resource;
+    use omicron_common::address::IpRange;
+    use omicron_common::address::DNS_OPTE_IPV4_SUBNET;
+    use omicron_common::address::NEXUS_OPTE_IPV4_SUBNET;
+    use omicron_common::address::NTP_OPTE_IPV4_SUBNET;
+    use omicron_common::address::NUM_SOURCE_NAT_PORTS;
+    use omicron_common::api::external::Generation;
+    use omicron_common::api::external::IpNet;
+    use omicron_common::api::external::MacAddr;
+    use omicron_common::api::external::Vni;
+    use omicron_common::nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES;
+    use std::net::IpAddr;
+    use std::net::Ipv6Addr;
+    use std::net::SocketAddrV6;
+
+    type ControlPlaneTestContext =
+        nexus_test_utils::ControlPlaneTestContext<omicron_nexus::Server>;
+
+    #[nexus_test]
+    async fn test_allocate_external_networking(
+        cptestctx: &ControlPlaneTestContext,
+    ) {
+        // Set up.
+        let nexus = &cptestctx.server.apictx().nexus;
+        let datastore = nexus.datastore();
+        let opctx = OpContext::for_tests(
+            cptestctx.logctx.log.clone(),
+            datastore.clone(),
+        );
+
+        // Create an external IP range we can use for our services.
+        let external_ip_range = IpRange::try_from((
+            "192.0.2.1".parse::<IpAddr>().unwrap(),
+            "192.0.2.100".parse::<IpAddr>().unwrap(),
+        ))
+        .expect("bad IP range");
+        let mut external_ips = external_ip_range.iter();
+
+        // Add the external IP range to the services IP pool.
+        let (ip_pool, _) = datastore
+            .ip_pools_service_lookup(&opctx)
+            .await
+            .expect("failed to find service IP pool");
+        datastore
+            .ip_pool_add_range(&opctx, &ip_pool, &external_ip_range)
+            .await
+            .expect("failed to expand service IP pool");
+
+        // Generate the values we care about. (Other required zone config params
+        // that we don't care about will be filled in below arbitrarily.)
+
+        // Nexus:
+        let nexus_id = Uuid::new_v4();
+        let nexus_external_ip =
+            external_ips.next().expect("exhausted external_ips");
+        let nexus_nic = NetworkInterface {
+            id: Uuid::new_v4(),
+            kind: NetworkInterfaceKind::Service(nexus_id),
+            name: "test-nexus".parse().expect("bad name"),
+            ip: NEXUS_OPTE_IPV4_SUBNET
+                .iter()
+                .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES)
+                .unwrap()
+                .into(),
+            mac: MacAddr::random_system(),
+            subnet: IpNet::from(*NEXUS_OPTE_IPV4_SUBNET).into(),
+            vni: Vni::SERVICES_VNI,
+            primary: true,
+            slot: 0,
+        };
+
+        // External DNS:
+        let dns_id = Uuid::new_v4();
+        let dns_external_ip =
+            external_ips.next().expect("exhausted external_ips");
+        let dns_nic = NetworkInterface {
+            id: Uuid::new_v4(),
+            kind: NetworkInterfaceKind::Service(dns_id),
+            name: "test-external-dns".parse().expect("bad name"),
+            ip: DNS_OPTE_IPV4_SUBNET
+                .iter()
+                .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES)
+                .unwrap()
+                .into(),
+            mac: MacAddr::random_system(),
+            subnet: IpNet::from(*DNS_OPTE_IPV4_SUBNET).into(),
+            vni: Vni::SERVICES_VNI,
+            primary: true,
+            slot: 0,
+        };
+
+        // Boundary NTP:
+        let ntp_id = Uuid::new_v4();
+        let ntp_snat = SourceNatConfig {
+            ip: external_ips.next().expect("exhausted external_ips"),
+            first_port: NUM_SOURCE_NAT_PORTS,
+            last_port: 2 * NUM_SOURCE_NAT_PORTS - 1,
+        };
+        let ntp_nic = NetworkInterface {
+            id: Uuid::new_v4(),
+            kind: NetworkInterfaceKind::Service(ntp_id),
+            name: "test-external-ntp".parse().expect("bad name"),
+            ip: NTP_OPTE_IPV4_SUBNET
+                .iter()
+                .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES)
+                .unwrap()
+                .into(),
+            mac: MacAddr::random_system(),
+            subnet: IpNet::from(*NTP_OPTE_IPV4_SUBNET).into(),
+            vni: Vni::SERVICES_VNI,
+            primary: true,
+            slot: 0,
+        };
+
+        // Build the `zones` map needed by `ensure_zone_resources_allocated`,
+        // with an arbitrary sled_id.
+        let mut zones = BTreeMap::new();
+        let sled_id = Uuid::new_v4();
+        zones.insert(
+            sled_id,
+            OmicronZonesConfig {
+                generation: Generation::new().next(),
+                zones: vec![
+                    OmicronZoneConfig {
+                        id: nexus_id,
+                        underlay_address: Ipv6Addr::LOCALHOST,
+                        zone_type: OmicronZoneType::Nexus {
+                            internal_address: Ipv6Addr::LOCALHOST.to_string(),
+                            external_ip: nexus_external_ip,
+                            nic: nexus_nic.clone(),
+                            external_tls: false,
+                            external_dns_servers: Vec::new(),
+                        },
+                    },
+                    OmicronZoneConfig {
+                        id: dns_id,
+                        underlay_address: Ipv6Addr::LOCALHOST,
+                        zone_type: OmicronZoneType::ExternalDns {
+                            dataset: OmicronZoneDataset {
+                                pool_name: format!("oxp_{}", Uuid::new_v4())
+                                    .parse()
+                                    .expect("bad name"),
+                            },
+                            http_address: SocketAddrV6::new(
+                                Ipv6Addr::LOCALHOST,
+                                0,
+                                0,
+                                0,
+                            )
+                            .to_string(),
+                            dns_address: SocketAddr::new(dns_external_ip, 0)
+                                .to_string(),
+                            nic: dns_nic.clone(),
+                        },
+                    },
+                    OmicronZoneConfig {
+                        id: ntp_id,
+                        underlay_address: Ipv6Addr::LOCALHOST,
+                        zone_type: OmicronZoneType::BoundaryNtp {
+                            address: SocketAddr::new(dns_external_ip, 0)
+                                .to_string(),
+                            ntp_servers: Vec::new(),
+                            dns_servers: Vec::new(),
+                            domain: None,
+                            nic: ntp_nic.clone(),
+                            snat_cfg: ntp_snat,
+                        },
+                    },
+                ],
+            },
+        );
+
+        // Initialize resource allocation: this should succeed and create all
+        // the relevant db records.
+        ensure_zone_resources_allocated(&opctx, datastore, &zones)
+            .await
+            .with_context(|| format!("{zones:#?}"))
+            .unwrap();
+
+        // Check that the external IP records were created.
+        let db_nexus_ips = datastore
+            .service_lookup_external_ips(&opctx, nexus_id)
+            .await
+            .expect("failed to get external IPs");
+        assert_eq!(db_nexus_ips.len(), 1);
+        assert!(db_nexus_ips[0].is_service);
+        assert_eq!(db_nexus_ips[0].parent_id, Some(nexus_id));
+        assert_eq!(db_nexus_ips[0].ip, nexus_external_ip.into());
+        assert_eq!(db_nexus_ips[0].first_port, SqlU16(0));
+        assert_eq!(db_nexus_ips[0].last_port, SqlU16(65535));
+
+        let db_dns_ips = datastore
+            .service_lookup_external_ips(&opctx, dns_id)
+            .await
+            .expect("failed to get external IPs");
+        assert_eq!(db_dns_ips.len(), 1);
+        assert!(db_dns_ips[0].is_service);
+        assert_eq!(db_dns_ips[0].parent_id, Some(dns_id));
+        assert_eq!(db_dns_ips[0].ip, dns_external_ip.into());
+        assert_eq!(db_dns_ips[0].first_port, SqlU16(0));
+        assert_eq!(db_dns_ips[0].last_port, SqlU16(65535));
+
+        let db_ntp_ips = datastore
+            .service_lookup_external_ips(&opctx, ntp_id)
+            .await
+            .expect("failed to get external IPs");
+        assert_eq!(db_ntp_ips.len(), 1);
+        assert!(db_ntp_ips[0].is_service);
+        assert_eq!(db_ntp_ips[0].parent_id, Some(ntp_id));
+        assert_eq!(db_ntp_ips[0].ip, ntp_snat.ip.into());
+        assert_eq!(db_ntp_ips[0].first_port, SqlU16(ntp_snat.first_port));
+        assert_eq!(db_ntp_ips[0].last_port, SqlU16(ntp_snat.last_port));
+
+        // Check that the NIC records were created.
+        let db_nexus_nics = datastore
+            .service_list_network_interfaces(&opctx, nexus_id)
+            .await
+            .expect("failed to get NICs");
+        assert_eq!(db_nexus_nics.len(), 1);
+        assert_eq!(db_nexus_nics[0].id(), nexus_nic.id);
+        assert_eq!(db_nexus_nics[0].service_id, nexus_id);
+        assert_eq!(db_nexus_nics[0].vpc_id, NEXUS_VPC_SUBNET.vpc_id);
+        assert_eq!(db_nexus_nics[0].subnet_id, NEXUS_VPC_SUBNET.id());
+        assert_eq!(*db_nexus_nics[0].mac, nexus_nic.mac);
+        assert_eq!(db_nexus_nics[0].ip, nexus_nic.ip.into());
+        assert_eq!(db_nexus_nics[0].slot, i16::from(nexus_nic.slot));
+        assert_eq!(db_nexus_nics[0].primary, nexus_nic.primary);
+
+        let db_dns_nics = datastore
+            .service_list_network_interfaces(&opctx, dns_id)
+            .await
+            .expect("failed to get NICs");
+        assert_eq!(db_dns_nics.len(), 1);
+        assert_eq!(db_dns_nics[0].id(), dns_nic.id);
+        assert_eq!(db_dns_nics[0].service_id, dns_id);
+        assert_eq!(db_dns_nics[0].vpc_id, DNS_VPC_SUBNET.vpc_id);
+        assert_eq!(db_dns_nics[0].subnet_id, DNS_VPC_SUBNET.id());
+        assert_eq!(*db_dns_nics[0].mac, dns_nic.mac);
+        assert_eq!(db_dns_nics[0].ip, dns_nic.ip.into());
+        assert_eq!(db_dns_nics[0].slot, i16::from(dns_nic.slot));
+        assert_eq!(db_dns_nics[0].primary, dns_nic.primary);
+
+        let db_ntp_nics = datastore
+            .service_list_network_interfaces(&opctx, ntp_id)
+            .await
+            .expect("failed to get NICs");
+        assert_eq!(db_ntp_nics.len(), 1);
+        assert_eq!(db_ntp_nics[0].id(), ntp_nic.id);
+        assert_eq!(db_ntp_nics[0].service_id, ntp_id);
+        assert_eq!(db_ntp_nics[0].vpc_id, NTP_VPC_SUBNET.vpc_id);
+        assert_eq!(db_ntp_nics[0].subnet_id, NTP_VPC_SUBNET.id());
+        assert_eq!(*db_ntp_nics[0].mac, ntp_nic.mac);
+        assert_eq!(db_ntp_nics[0].ip, ntp_nic.ip.into());
+        assert_eq!(db_ntp_nics[0].slot, i16::from(ntp_nic.slot));
+        assert_eq!(db_ntp_nics[0].primary, ntp_nic.primary);
+
+        // We should be able to run the function again with the same inputs, and
+        // it should succeed without inserting any new records.
+        ensure_zone_resources_allocated(&opctx, datastore, &zones)
+            .await
+            .with_context(|| format!("{zones:#?}"))
+            .unwrap();
+        assert_eq!(
+            db_nexus_ips,
+            datastore
+                .service_lookup_external_ips(&opctx, nexus_id)
+                .await
+                .expect("failed to get external IPs")
+        );
+        assert_eq!(
+            db_dns_ips,
+            datastore
+                .service_lookup_external_ips(&opctx, dns_id)
+                .await
+                .expect("failed to get external IPs")
+        );
+        assert_eq!(
+            db_ntp_ips,
+            datastore
+                .service_lookup_external_ips(&opctx, ntp_id)
+                .await
+                .expect("failed to get external IPs")
+        );
+        assert_eq!(
+            db_nexus_nics,
+            datastore
+                .service_list_network_interfaces(&opctx, nexus_id)
+                .await
+                .expect("failed to get NICs")
+        );
+        assert_eq!(
+            db_dns_nics,
+            datastore
+                .service_list_network_interfaces(&opctx, dns_id)
+                .await
+                .expect("failed to get NICs")
+        );
+        assert_eq!(
+            db_ntp_nics,
+            datastore
+                .service_list_network_interfaces(&opctx, ntp_id)
+                .await
+                .expect("failed to get NICs")
+        );
+
+        // Now that we've tested the happy path, try some requests that ought to
+        // fail because the request includes an external IP that doesn't match
+        // the already-allocated external IPs from above.
+        let bogus_ip = external_ips.next().expect("exhausted external_ips");
+        for mutate_zones_fn in [
+            // non-matching IP on Nexus
+            (&|config: &mut OmicronZonesConfig| {
+                for zone in &mut config.zones {
+                    if let OmicronZoneType::Nexus {
+                        ref mut external_ip, ..
+                    } = &mut zone.zone_type
+                    {
+                        *external_ip = bogus_ip;
+                        return format!(
+                            "zone {} already has 1 non-matching IP",
+                            zone.id
+                        );
+                    }
+                }
+                panic!("didn't find expected zone");
+            }) as &dyn Fn(&mut OmicronZonesConfig) -> String,
+            // non-matching IP on External DNS
+            &|config| {
+                for zone in &mut config.zones {
+                    if let OmicronZoneType::ExternalDns {
+                        ref mut dns_address,
+                        ..
+                    } = &mut zone.zone_type
+                    {
+                        *dns_address = SocketAddr::new(bogus_ip, 0).to_string();
+                        return format!(
+                            "zone {} already has 1 non-matching IP",
+                            zone.id
+                        );
+                    }
+                }
+                panic!("didn't find expected zone");
+            },
+            // non-matching SNAT port range on Boundary NTP
+            &|config| {
+                for zone in &mut config.zones {
+                    if let OmicronZoneType::BoundaryNtp {
+                        ref mut snat_cfg,
+                        ..
+                    } = &mut zone.zone_type
+                    {
+                        snat_cfg.first_port += NUM_SOURCE_NAT_PORTS;
+                        snat_cfg.last_port += NUM_SOURCE_NAT_PORTS;
+                        return format!(
+                            "zone {} already has 1 non-matching IP",
+                            zone.id
+                        );
+                    }
+                }
+                panic!("didn't find expected zone");
+            },
+        ] {
+            // Run `mutate_zones_fn` on our config...
+            let mut config =
+                zones.remove(&sled_id).expect("missing zone config");
+            let orig_config = config.clone();
+            let expected_error = mutate_zones_fn(&mut config);
+            zones.insert(sled_id, config);
+
+            // ... check that we get the error we expect
+            let err =
+                ensure_zone_resources_allocated(&opctx, datastore, &zones)
+                    .await
+                    .expect_err("unexpected success");
+            assert!(
+                err.to_string().contains(&expected_error),
+                "expected {expected_error:?}, got {err:#}"
+            );
+
+            // ... and restore the original, valid config before iterating.
+            zones.insert(sled_id, orig_config);
+        }
+
+        // Also try some requests that ought to fail because the request
+        // includes a NIC that doesn't match the already-allocated NICs from
+        // above.
+        //
+        // All three zone types have a `nic` property, so here our mutating
+        // function only modifies that, and the body of our loop tries it on all
+        // three to ensure we get the errors we expect no matter the zone type.
+        for mutate_nic_fn in [
+            // switch kind from Service to Instance
+            (&|_: Uuid, nic: &mut NetworkInterface| {
+                match &nic.kind {
+                    NetworkInterfaceKind::Instance { .. } => {
+                        panic!(
+                            "invalid NIC kind (expected service, got instance)"
+                        )
+                    }
+                    NetworkInterfaceKind::Service(id) => {
+                        let id = *id;
+                        nic.kind = NetworkInterfaceKind::Instance(id);
+                    }
+                }
+                "invalid NIC kind".to_string()
+            }) as &dyn Fn(Uuid, &mut NetworkInterface) -> String,
+            // non-matching IP
+            &|zone_id, nic| {
+                nic.ip = bogus_ip;
+                format!("zone {zone_id} already has 1 non-matching NIC")
+            },
+        ] {
+            // Try this NIC mutation on Nexus...
+            let mut mutated_zones = zones.clone();
+            for zone in &mut mutated_zones
+                .get_mut(&sled_id)
+                .expect("missing sled")
+                .zones
+            {
+                if let OmicronZoneType::Nexus { ref mut nic, .. } =
+                    &mut zone.zone_type
+                {
+                    let expected_error = mutate_nic_fn(zone.id, nic);
+
+                    let err = ensure_zone_resources_allocated(
+                        &opctx,
+                        datastore,
+                        &mutated_zones,
+                    )
+                    .await
+                    .expect_err("unexpected success");
+
+                    assert!(
+                        err.to_string().contains(&expected_error),
+                        "expected {expected_error:?}, got {err:#}"
+                    );
+
+                    break;
+                }
+            }
+
+            // ... and again on ExternalDns
+            let mut mutated_zones = zones.clone();
+            for zone in &mut mutated_zones
+                .get_mut(&sled_id)
+                .expect("missing sled")
+                .zones
+            {
+                if let OmicronZoneType::ExternalDns { ref mut nic, .. } =
+                    &mut zone.zone_type
+                {
+                    let expected_error = mutate_nic_fn(zone.id, nic);
+
+                    let err = ensure_zone_resources_allocated(
+                        &opctx,
+                        datastore,
+                        &mutated_zones,
+                    )
+                    .await
+                    .expect_err("unexpected success");
+
+                    assert!(
+                        err.to_string().contains(&expected_error),
+                        "expected {expected_error:?}, got {err:#}"
+                    );
+
+                    break;
+                }
+            }
+
+            // ... and again on BoundaryNtp
+            let mut mutated_zones = zones.clone();
+            for zone in &mut mutated_zones
+                .get_mut(&sled_id)
+                .expect("missing sled")
+                .zones
+            {
+                if let OmicronZoneType::BoundaryNtp { ref mut nic, .. } =
+                    &mut zone.zone_type
+                {
+                    let expected_error = mutate_nic_fn(zone.id, nic);
+
+                    let err = ensure_zone_resources_allocated(
+                        &opctx,
+                        datastore,
+                        &mutated_zones,
+                    )
+                    .await
+                    .expect_err("unexpected success");
+
+                    assert!(
+                        err.to_string().contains(&expected_error),
+                        "expected {expected_error:?}, got {err:#}"
+                    );
+
+                    break;
+                }
+            }
+        }
+    }
+}
diff --git a/nexus/db-model/src/external_ip.rs b/nexus/db-model/src/external_ip.rs
index 1e9def4182..b30f91c7c0 100644
--- a/nexus/db-model/src/external_ip.rs
+++ b/nexus/db-model/src/external_ip.rs
@@ -33,7 +33,7 @@ impl_enum_type!(
     #[diesel(postgres_type(name = "ip_kind", schema = "public"))]
      pub struct IpKindEnum;
 
-     #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, PartialEq, Deserialize, Serialize)]
+     #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, PartialEq, Eq, Deserialize, Serialize)]
      #[diesel(sql_type = IpKindEnum)]
      pub enum IpKind;
 
@@ -47,7 +47,7 @@ impl_enum_type!(
     #[diesel(postgres_type(name = "ip_attach_state"))]
      pub struct IpAttachStateEnum;
 
-     #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, PartialEq, Deserialize, Serialize)]
+     #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, PartialEq, Eq, Deserialize, Serialize)]
      #[diesel(sql_type = IpAttachStateEnum)]
      pub enum IpAttachState;
 
@@ -89,7 +89,15 @@ impl std::fmt::Display for IpKind {
 /// API at all, and only provide outbound connectivity to instances, not
 /// inbound.
 #[derive(
-    Debug, Clone, Selectable, Queryable, Insertable, Deserialize, Serialize,
+    Debug,
+    Clone,
+    Selectable,
+    Queryable,
+    Insertable,
+    Deserialize,
+    Serialize,
+    PartialEq,
+    Eq,
 )]
 #[diesel(table_name = external_ip)]
 pub struct ExternalIp {
diff --git a/nexus/db-model/src/network_interface.rs b/nexus/db-model/src/network_interface.rs
index 01317fc160..72752ae3f8 100644
--- a/nexus/db-model/src/network_interface.rs
+++ b/nexus/db-model/src/network_interface.rs
@@ -91,7 +91,7 @@ pub struct InstanceNetworkInterface {
 /// The underlying "table" (`service_network_interface`) is actually a view
 /// over the `network_interface` table, that contains only rows with
 /// `kind = 'service'`.
-#[derive(Selectable, Queryable, Clone, Debug, Resource)]
+#[derive(Selectable, Queryable, Clone, Debug, PartialEq, Eq, Resource)]
 #[diesel(table_name = service_network_interface)]
 pub struct ServiceNetworkInterface {
     #[diesel(embed)]
diff --git a/nexus/db-queries/src/db/datastore/external_ip.rs b/nexus/db-queries/src/db/datastore/external_ip.rs
index 9d4d947476..24439aa3a0 100644
--- a/nexus/db-queries/src/db/datastore/external_ip.rs
+++ b/nexus/db-queries/src/db/datastore/external_ip.rs
@@ -167,6 +167,23 @@ impl DataStore {
         }
     }
 
+    /// Fetch all external IP addresses of any kind for the provided service.
+    pub async fn service_lookup_external_ips(
+        &self,
+        opctx: &OpContext,
+        service_id: Uuid,
+    ) -> LookupResult<Vec<ExternalIp>> {
+        use db::schema::external_ip::dsl;
+        dsl::external_ip
+            .filter(dsl::is_service.eq(true))
+            .filter(dsl::parent_id.eq(service_id))
+            .filter(dsl::time_deleted.is_null())
+            .select(ExternalIp::as_select())
+            .get_results_async(&*self.pool_connection_authorized(opctx).await?)
+            .await
+            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
+    }
+
     /// Allocates an IP address for internal service usage.
     pub async fn allocate_service_ip(
         &self,
@@ -337,7 +354,8 @@ impl DataStore {
         service_id: Uuid,
         ip: IpAddr,
     ) -> CreateResult<ExternalIp> {
-        let (.., pool) = self.ip_pools_service_lookup(opctx).await?;
+        let (authz_pool, pool) = self.ip_pools_service_lookup(opctx).await?;
+        opctx.authorize(authz::Action::CreateChild, &authz_pool).await?;
         let data = IncompleteExternalIp::for_service_explicit(
             ip_id,
             name,
@@ -361,7 +379,8 @@ impl DataStore {
         ip: IpAddr,
         port_range: (u16, u16),
     ) -> CreateResult<ExternalIp> {
-        let (.., pool) = self.ip_pools_service_lookup(opctx).await?;
+        let (authz_pool, pool) = self.ip_pools_service_lookup(opctx).await?;
+        opctx.authorize(authz::Action::CreateChild, &authz_pool).await?;
         let data = IncompleteExternalIp::for_service_explicit_snat(
             ip_id,
             service_id,
diff --git a/nexus/db-queries/src/db/datastore/network_interface.rs b/nexus/db-queries/src/db/datastore/network_interface.rs
index d715bf3889..f2782e8f67 100644
--- a/nexus/db-queries/src/db/datastore/network_interface.rs
+++ b/nexus/db-queries/src/db/datastore/network_interface.rs
@@ -29,6 +29,7 @@ use async_bb8_diesel::AsyncRunQueryDsl;
 use chrono::Utc;
 use diesel::prelude::*;
 use diesel::result::Error as DieselError;
+use nexus_db_model::ServiceNetworkInterface;
 use omicron_common::api::external;
 use omicron_common::api::external::http_pagination::PaginatedBy;
 use omicron_common::api::external::DeleteResult;
@@ -126,15 +127,69 @@ impl DataStore {
             .map(NetworkInterface::as_instance)
     }
 
-    #[cfg(test)]
+    /// List network interfaces associated with a given service.
+    pub async fn service_list_network_interfaces(
+        &self,
+        opctx: &OpContext,
+        service_id: Uuid,
+    ) -> ListResultVec<ServiceNetworkInterface> {
+        // See the comment in `service_create_network_interface`. There's no
+        // obvious parent for a service network interface (as opposed to
+        // instance network interfaces, which require ListChildren on the
+        // instance to list). As a logical proxy, we check for listing children
+        // of the service IP pool.
+        let (authz_service_ip_pool, _) =
+            self.ip_pools_service_lookup(opctx).await?;
+        opctx
+            .authorize(authz::Action::ListChildren, &authz_service_ip_pool)
+            .await?;
+
+        use db::schema::service_network_interface::dsl;
+        dsl::service_network_interface
+            .filter(dsl::time_deleted.is_null())
+            .filter(dsl::service_id.eq(service_id))
+            .select(ServiceNetworkInterface::as_select())
+            .get_results_async::<ServiceNetworkInterface>(
+                &*self.pool_connection_authorized(opctx).await?,
+            )
+            .await
+            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
+    }
+
+    /// Create a network interface attached to the provided service zone.
+    pub async fn service_create_network_interface(
+        &self,
+        opctx: &OpContext,
+        interface: IncompleteNetworkInterface,
+    ) -> Result<ServiceNetworkInterface, network_interface::InsertError> {
+        // In `instance_create_network_interface`, the authz checks are for
+        // creating children of the VpcSubnet and the instance. We don't have an
+        // instance. We do have a VpcSubet, but for services these are all
+        // fixed data subnets.
+        //
+        // As a proxy auth check that isn't really guarding the right resource
+        // but should logically be equivalent, we can insert a authz check for
+        // creating children of the service IP pool. For any service zone with
+        // external networking, we create an external IP (in the service IP
+        // pool) and a network interface (in the relevant VpcSubnet). Putting
+        // this check here ensures that the caller can't proceed if they also
+        // couldn't proceed with creating the corresponding external IP.
+        let (authz_service_ip_pool, _) = self
+            .ip_pools_service_lookup(opctx)
+            .await
+            .map_err(network_interface::InsertError::External)?;
+        opctx
+            .authorize(authz::Action::CreateChild, &authz_service_ip_pool)
+            .await
+            .map_err(network_interface::InsertError::External)?;
+        self.service_create_network_interface_raw(opctx, interface).await
+    }
+
     pub(crate) async fn service_create_network_interface_raw(
         &self,
         opctx: &OpContext,
         interface: IncompleteNetworkInterface,
-    ) -> Result<
-        db::model::ServiceNetworkInterface,
-        network_interface::InsertError,
-    > {
+    ) -> Result<ServiceNetworkInterface, network_interface::InsertError> {
         if interface.kind != NetworkInterfaceKind::Service {
             return Err(network_interface::InsertError::External(
                 Error::invalid_request(
diff --git a/nexus/db-queries/src/db/queries/external_ip.rs b/nexus/db-queries/src/db/queries/external_ip.rs
index 54595a8444..7456e6e6bb 100644
--- a/nexus/db-queries/src/db/queries/external_ip.rs
+++ b/nexus/db-queries/src/db/queries/external_ip.rs
@@ -1331,6 +1331,18 @@ mod tests {
         // Allocate an IP address as we would for an external, rack-associated
         // service.
         let service1_id = Uuid::new_v4();
+
+        // Check that `service_lookup_external_ips` returns an empty vector for
+        // a service with no external IPs.
+        assert_eq!(
+            context
+                .db_datastore
+                .service_lookup_external_ips(&context.opctx, service1_id)
+                .await
+                .expect("Failed to look up service external IPs"),
+            Vec::new(),
+        );
+
         let id1 = Uuid::new_v4();
         let ip1 = context
             .db_datastore
@@ -1349,6 +1361,14 @@ mod tests {
         assert_eq!(ip1.first_port.0, 0);
         assert_eq!(ip1.last_port.0, u16::MAX);
         assert_eq!(ip1.parent_id, Some(service1_id));
+        assert_eq!(
+            context
+                .db_datastore
+                .service_lookup_external_ips(&context.opctx, service1_id)
+                .await
+                .expect("Failed to look up service external IPs"),
+            vec![ip1],
+        );
 
         // Allocate an SNat IP
         let service2_id = Uuid::new_v4();
@@ -1364,6 +1384,14 @@ mod tests {
         assert_eq!(ip2.first_port.0, 0);
         assert_eq!(ip2.last_port.0, 16383);
         assert_eq!(ip2.parent_id, Some(service2_id));
+        assert_eq!(
+            context
+                .db_datastore
+                .service_lookup_external_ips(&context.opctx, service2_id)
+                .await
+                .expect("Failed to look up service external IPs"),
+            vec![ip2],
+        );
 
         // Allocate the next IP address
         let service3_id = Uuid::new_v4();
@@ -1385,6 +1413,14 @@ mod tests {
         assert_eq!(ip3.first_port.0, 0);
         assert_eq!(ip3.last_port.0, u16::MAX);
         assert_eq!(ip3.parent_id, Some(service3_id));
+        assert_eq!(
+            context
+                .db_datastore
+                .service_lookup_external_ips(&context.opctx, service3_id)
+                .await
+                .expect("Failed to look up service external IPs"),
+            vec![ip3],
+        );
 
         // Once we're out of IP addresses, test that we see the right error.
         let service3_id = Uuid::new_v4();
@@ -1422,6 +1458,14 @@ mod tests {
         assert_eq!(ip4.first_port.0, 16384);
         assert_eq!(ip4.last_port.0, 32767);
         assert_eq!(ip4.parent_id, Some(service4_id));
+        assert_eq!(
+            context
+                .db_datastore
+                .service_lookup_external_ips(&context.opctx, service4_id)
+                .await
+                .expect("Failed to look up service external IPs"),
+            vec![ip4],
+        );
 
         context.success().await;
     }

From 4de45c8e8138ab50394b8fe0290791d8c707f48e Mon Sep 17 00:00:00 2001
From: David Pacheco <dap@oxidecomputer.com>
Date: Wed, 21 Feb 2024 17:26:10 -0800
Subject: [PATCH 02/18] "expensive operation" check was too aggressive (#5113)

---
 nexus/db-queries/src/context.rs | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/nexus/db-queries/src/context.rs b/nexus/db-queries/src/context.rs
index 72da01a5f1..aea4a60e66 100644
--- a/nexus/db-queries/src/context.rs
+++ b/nexus/db-queries/src/context.rs
@@ -303,10 +303,17 @@ impl OpContext {
     ///   either.  Clients and proxies often don't expect long requests and
     ///   apply aggressive timeouts.  Depending on the HTTP version, a
     ///   long-running request can tie up the TCP connection.
+    ///
+    /// We shouldn't allow these in either internal or external API handlers,
+    /// but we currently have some internal APIs for exercising some expensive
+    /// blueprint operations and so we allow these cases here.
     pub fn check_complex_operations_allowed(&self) -> Result<(), Error> {
         let api_handler = match self.kind {
-            OpKind::ExternalApiRequest | OpKind::InternalApiRequest => true,
-            OpKind::Saga | OpKind::Background | OpKind::Test => false,
+            OpKind::ExternalApiRequest => true,
+            OpKind::InternalApiRequest
+            | OpKind::Saga
+            | OpKind::Background
+            | OpKind::Test => false,
         };
         if api_handler {
             Err(Error::internal_error(

From 828c79c10e75d7529d13836b6fcc08a155b6fe59 Mon Sep 17 00:00:00 2001
From: "oxide-renovate[bot]"
 <146848827+oxide-renovate[bot]@users.noreply.github.com>
Date: Thu, 22 Feb 2024 05:11:33 +0000
Subject: [PATCH 03/18] chore(deps): update taiki-e/install-action digest to
 875aa7b (#5114)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [taiki-e/install-action](https://togithub.com/taiki-e/install-action)
| action | digest | [`12af778` ->
`875aa7b`](https://togithub.com/taiki-e/install-action/compare/12af778...875aa7b)
|

---

### Configuration

📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone
America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone
America/Los_Angeles.

🚦 **Automerge**: Enabled.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Renovate
Bot](https://togithub.com/renovatebot/renovate).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNy4yMDQuMiIsInVwZGF0ZWRJblZlciI6IjM3LjIwNC4yIiwidGFyZ2V0QnJhbmNoIjoibWFpbiJ9-->

Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com>
---
 .github/workflows/hakari.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml
index 68bc323a07..dc907e93e7 100644
--- a/.github/workflows/hakari.yml
+++ b/.github/workflows/hakari.yml
@@ -24,7 +24,7 @@ jobs:
         with:
           toolchain: stable
       - name: Install cargo-hakari
-        uses: taiki-e/install-action@12af778b97addf4c562c75a0564dc7e7dc5339a5 # v2
+        uses: taiki-e/install-action@875aa7bb88c61a90732e8d159a915df57f27e475 # v2
         with:
           tool: cargo-hakari
       - name: Check workspace-hack Cargo.toml is up-to-date

From 42844311eb688e63ea23361dfb4279b44e65f792 Mon Sep 17 00:00:00 2001
From: "oxide-renovate[bot]"
 <146848827+oxide-renovate[bot]@users.noreply.github.com>
Date: Thu, 22 Feb 2024 07:28:45 +0000
Subject: [PATCH 04/18] chore(deps): update taiki-e/install-action digest to
 19e9b54 (#5117)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [taiki-e/install-action](https://togithub.com/taiki-e/install-action)
| action | digest | [`875aa7b` ->
`19e9b54`](https://togithub.com/taiki-e/install-action/compare/875aa7b...19e9b54)
|

---

### Configuration

📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone
America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone
America/Los_Angeles.

🚦 **Automerge**: Enabled.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Renovate
Bot](https://togithub.com/renovatebot/renovate).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNy4yMDYuMCIsInVwZGF0ZWRJblZlciI6IjM3LjIwNi4wIiwidGFyZ2V0QnJhbmNoIjoibWFpbiJ9-->

Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com>
---
 .github/workflows/hakari.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml
index dc907e93e7..1f55f2f255 100644
--- a/.github/workflows/hakari.yml
+++ b/.github/workflows/hakari.yml
@@ -24,7 +24,7 @@ jobs:
         with:
           toolchain: stable
       - name: Install cargo-hakari
-        uses: taiki-e/install-action@875aa7bb88c61a90732e8d159a915df57f27e475 # v2
+        uses: taiki-e/install-action@19e9b549a48620cc50fcf6e6e866b8fb4eca1b01 # v2
         with:
           tool: cargo-hakari
       - name: Check workspace-hack Cargo.toml is up-to-date

From dea0ea5a1e26464dc9780ae908f028d210f5f637 Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@users.noreply.github.com>
Date: Thu, 22 Feb 2024 09:25:23 -0500
Subject: [PATCH 05/18] RoT staging/dev and prod/rel 1.0.6 and SP 1.0.8 (#5057)

---
 .github/buildomat/jobs/tuf-repo.sh |  4 ++--
 tools/dvt_dock_version             |  2 +-
 tools/hubris_checksums             | 16 ++++++++--------
 tools/hubris_version               |  2 +-
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/.github/buildomat/jobs/tuf-repo.sh b/.github/buildomat/jobs/tuf-repo.sh
index 59fc0fd7a7..14c2293f5b 100644
--- a/.github/buildomat/jobs/tuf-repo.sh
+++ b/.github/buildomat/jobs/tuf-repo.sh
@@ -278,8 +278,8 @@ EOF
     done
 }
 # usage:              SERIES           ROT_DIR      ROT_VERSION              BOARDS...
-add_hubris_artifacts  rot-staging-dev  staging/dev  cert-staging-dev-v1.0.5  "${ALL_BOARDS[@]}"
-add_hubris_artifacts  rot-prod-rel     prod/rel     cert-prod-rel-v1.0.5     "${ALL_BOARDS[@]}"
+add_hubris_artifacts  rot-staging-dev  staging/dev  cert-staging-dev-v1.0.6  "${ALL_BOARDS[@]}"
+add_hubris_artifacts  rot-prod-rel     prod/rel     cert-prod-rel-v1.0.6     "${ALL_BOARDS[@]}"
 
 for series in "${SERIES_LIST[@]}"; do
     /work/tufaceous assemble --no-generate-key /work/manifest-"$series".toml /work/repo-"$series".zip
diff --git a/tools/dvt_dock_version b/tools/dvt_dock_version
index 047065135b..fd988dc876 100644
--- a/tools/dvt_dock_version
+++ b/tools/dvt_dock_version
@@ -1 +1 @@
-COMMIT=e384836415e05ae0ba648810ab1c87e9093cdabb
+COMMIT=9e8e16f508bb41f02455aeddcece0a7edae2f672
diff --git a/tools/hubris_checksums b/tools/hubris_checksums
index d451f7a86c..dca8ea0ab6 100644
--- a/tools/hubris_checksums
+++ b/tools/hubris_checksums
@@ -1,8 +1,8 @@
-e1b3dc5c7da643b27c0dd5bf8e915d13661446e711bfdeb1d8274eed63fa5843  build-gimlet-c-image-default-v1.0.6.zip
-3002444307047429531ef862435a034c64b89a698921bf19794ac97b777a2f95  build-gimlet-d-image-default-v1.0.6.zip
-9e783bc92fb1c8a91f4b117241ed4c0ff2818f32f46c5193cdcdbbe02d56af9a  build-gimlet-e-image-default-v1.0.6.zip
-458c4f02310fe79f27841ce87b2a7c163494f0196890e6420fac17dc4803b51c  build-gimlet-f-image-default-v1.0.6.zip
-dece7d39f7fcd2f15dc62d91e94046b1f438a3e0fd2c804efd5f67e12ce0dd58  build-psc-b-image-default-v1.0.6.zip
-7e94035b52f1dcb137b477750bf9e215d4fcd07fe95b2cfdbbc0d7fada79eb28  build-psc-c-image-default-v1.0.6.zip
-ccf09dc7c9c2a946b89bcfafb391100504880fa395c9079dfb7a3b28635a4abb  build-sidecar-b-image-default-v1.0.6.zip
-b5d91c212f813dbdba06c1f5b098fd37fe6cb93fe33fd3c58325cb6504dc6d05  build-sidecar-c-image-default-v1.0.6.zip
+28f432735a15f40101c133e4e9974d1681a33bb7b3386ccbe15b465b613f4826  build-gimlet-c-image-default-v1.0.8.zip
+db927999398f0723d5d614db78a5abb4a1d515c711ffba944477bdac10c48907  build-gimlet-d-image-default-v1.0.8.zip
+629a53b5d9d4bf3d410687d0ecedf4837a54233ce62b6223d209494777cc7ebc  build-gimlet-e-image-default-v1.0.8.zip
+e3c2a243257929a65de638f3be425370f084aeeefafbd1773d01ee71cf0b8ea7  build-gimlet-f-image-default-v1.0.8.zip
+556595b42d05508ebfdac9dd71b38fe9b72e0cb30f6aa4be626c02141e375a71  build-psc-b-image-default-v1.0.8.zip
+39fbf92cbc935b4eaecb81a9768357828cf3e79b5c74d36c9a655ae9023cc50c  build-psc-c-image-default-v1.0.8.zip
+4225dff721b034fe7cf1dc26277557e1f15f2710014dd46dfa7c92ff04c7e054  build-sidecar-b-image-default-v1.0.8.zip
+ae8f12d7b66d0bcc372dd79abf515255f6ca97bb0339c570a058684e04e12cf8  build-sidecar-c-image-default-v1.0.8.zip
diff --git a/tools/hubris_version b/tools/hubris_version
index f2c1e74f2b..4ee8ac61fe 100644
--- a/tools/hubris_version
+++ b/tools/hubris_version
@@ -1 +1 @@
-TAGS=(gimlet-v1.0.6 psc-v1.0.6 sidecar-v1.0.6)
+TAGS=(gimlet-v1.0.8 psc-v1.0.8 sidecar-v1.0.8)

From a5be09fe0f20466ff66ae546df4895b785129fb0 Mon Sep 17 00:00:00 2001
From: David Pacheco <dap@oxidecomputer.com>
Date: Thu, 22 Feb 2024 11:06:14 -0800
Subject: [PATCH 06/18] "add sled" needs a longer timeout (#5116)

In #5111:

- the "add sled" Nexus external API call invokes `PUT /sleds` to some
sled agent
- `PUT /sleds` itself blocks until the new sled's sled agent has started
- sled agent startup blocks on setting the reservoir
- on production hardware, setting the reservoir took 115s
- the default Progenitor (reqwest) timeout is only 15s

So as a result, the "add sled" request failed, even though the operation
ultimately succeeded.

In this PR, I bump the timeout to 5 minutes. I do wonder if we should
remove it altogether, or if we should consider the other changes
mentioned in #5111 (like not blocking sled agent startup on this, or not
blocking these API calls in this way). But for now, this seems like a
low-risk way to improve this situation.
---
 nexus/src/app/rack.rs | 41 +++++++++++++++++++++++++++++++++--------
 1 file changed, 33 insertions(+), 8 deletions(-)

diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs
index 7a1ad0e6a9..a137f19434 100644
--- a/nexus/src/app/rack.rs
+++ b/nexus/src/app/rack.rs
@@ -61,6 +61,7 @@ use sled_agent_client::types::{
     BgpConfig, BgpPeerConfig as SledBgpPeerConfig, EarlyNetworkConfig,
     PortConfigV1, RackNetworkConfigV1, RouteConfig as SledRouteConfig,
 };
+use slog_error_chain::InlineErrorChain;
 use std::collections::BTreeMap;
 use std::collections::BTreeSet;
 use std::collections::HashMap;
@@ -647,7 +648,7 @@ impl super::Nexus {
         if rack.rack_subnet.is_some() {
             return Ok(());
         }
-        let sa = self.get_any_sled_agent(opctx).await?;
+        let sa = self.get_any_sled_agent_client(opctx).await?;
         let result = sa
             .read_network_bootstore_config_cache()
             .await
@@ -883,7 +884,27 @@ impl super::Nexus {
                 },
             },
         };
-        let sa = self.get_any_sled_agent(opctx).await?;
+
+        // This timeout value is fairly arbitrary (as they usually are).  As of
+        // this writing, this operation is known to take close to two minutes on
+        // production hardware.
+        let dur = std::time::Duration::from_secs(300);
+        let sa_url = self.get_any_sled_agent_url(opctx).await?;
+        let reqwest_client = reqwest::ClientBuilder::new()
+            .connect_timeout(dur)
+            .timeout(dur)
+            .build()
+            .map_err(|e| {
+                Error::internal_error(&format!(
+                    "failed to create reqwest client for sled agent: {}",
+                    InlineErrorChain::new(&e)
+                ))
+            })?;
+        let sa = sled_agent_client::Client::new_with_client(
+            &sa_url,
+            reqwest_client,
+            self.log.new(o!("sled_agent_url" => sa_url.clone())),
+        );
         sa.sled_add(&req).await.map_err(|e| Error::InternalError {
             internal_message: format!(
                 "failed to add sled with baseboard {:?} to rack {}: {e}",
@@ -899,10 +920,10 @@ impl super::Nexus {
         Ok(())
     }
 
-    async fn get_any_sled_agent(
+    async fn get_any_sled_agent_url(
         &self,
         opctx: &OpContext,
-    ) -> Result<sled_agent_client::Client, Error> {
+    ) -> Result<String, Error> {
         let addr = self
             .sled_list(opctx, &DataPageParams::max_page())
             .await?
@@ -911,11 +932,15 @@ impl super::Nexus {
                 internal_message: "no sled agents available".into(),
             })?
             .address();
+        Ok(format!("http://{}", addr))
+    }
 
-        Ok(sled_agent_client::Client::new(
-            &format!("http://{}", addr),
-            self.log.clone(),
-        ))
+    async fn get_any_sled_agent_client(
+        &self,
+        opctx: &OpContext,
+    ) -> Result<sled_agent_client::Client, Error> {
+        let url = self.get_any_sled_agent_url(opctx).await?;
+        Ok(sled_agent_client::Client::new(&url, self.log.clone()))
     }
 }
 

From 119bcd1f3d376dabc90a398b4c8ce9f384bbd17d Mon Sep 17 00:00:00 2001
From: "oxide-reflector-bot[bot]"
 <130185838+oxide-reflector-bot[bot]@users.noreply.github.com>
Date: Thu, 22 Feb 2024 22:53:12 +0000
Subject: [PATCH 07/18] Update maghemite to 4b0e584 (#5123)

Updated maghemite to commit 4b0e584.

Co-authored-by: reflector[bot] <130185838+reflector[bot]@users.noreply.github.com>
---
 package-manifest.toml               | 12 ++++++------
 tools/maghemite_ddm_openapi_version |  2 +-
 tools/maghemite_mg_openapi_version  |  2 +-
 tools/maghemite_mgd_checksums       |  4 ++--
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/package-manifest.toml b/package-manifest.toml
index d7f42794ee..1a749c5b61 100644
--- a/package-manifest.toml
+++ b/package-manifest.toml
@@ -463,10 +463,10 @@ source.repo = "maghemite"
 # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when
 # building `ddm-admin-client` (which will instruct you to update
 # `tools/maghemite_openapi_version`).
-source.commit = "41a69a11db6cfa8fc0c8686dc2d725708e0586ce"
+source.commit = "4b0e584eec455a43c36af08ae207086965cef833"
 # The SHA256 digest is automatically posted to:
 # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image/<commit>/maghemite.sha256.txt
-source.sha256 = "19d5eaa744257c32ccdca52af79d718aeb88a0af188345d33a4564a69b259632"
+source.sha256 = "f1407cb9aac188d6493d2b0f948c75aad2c36668ddf4ae2a1ed80e9dd395b35d"
 output.type = "tarball"
 
 [package.mg-ddm]
@@ -479,10 +479,10 @@ source.repo = "maghemite"
 # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when
 # building `ddm-admin-client` (which will instruct you to update
 # `tools/maghemite_openapi_version`).
-source.commit = "41a69a11db6cfa8fc0c8686dc2d725708e0586ce"
+source.commit = "4b0e584eec455a43c36af08ae207086965cef833"
 # The SHA256 digest is automatically posted to:
 # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image/<commit>/mg-ddm.sha256.txt
-source.sha256 = "ffb647b3297ec616d3d9ea93396ad9edd16ed146048a660b34e9b86e85d466b7"
+source.sha256 = "fae53cb39536dc92d97cb9610de65b0acbce285e685d7167b719ea6311844fec"
 output.type = "zone"
 output.intermediate_only = true
 
@@ -494,10 +494,10 @@ source.repo = "maghemite"
 # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when
 # building `ddm-admin-client` (which will instruct you to update
 # `tools/maghemite_openapi_version`).
-source.commit = "41a69a11db6cfa8fc0c8686dc2d725708e0586ce"
+source.commit = "4b0e584eec455a43c36af08ae207086965cef833"
 # The SHA256 digest is automatically posted to:
 # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image/<commit>/mg-ddm.sha256.txt
-source.sha256 = "26d34f61589f63be64eaa77a6e9e2db4c95d6675798386a1d61721c1ccc59d4d"
+source.sha256 = "22996a6f3353296b848be729f14e78a42e7d3d6e62a4a918a5c2358ae011c8eb"
 output.type = "zone"
 output.intermediate_only = true
 
diff --git a/tools/maghemite_ddm_openapi_version b/tools/maghemite_ddm_openapi_version
index 6c58d83ea3..d161091fa8 100644
--- a/tools/maghemite_ddm_openapi_version
+++ b/tools/maghemite_ddm_openapi_version
@@ -1,2 +1,2 @@
-COMMIT="41a69a11db6cfa8fc0c8686dc2d725708e0586ce"
+COMMIT="4b0e584eec455a43c36af08ae207086965cef833"
 SHA2="0b0dbc2f8bbc5d2d9be92d64c4865f8f9335355aae62f7de9f67f81dfb3f1803"
diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version
index 896be8d38c..475d273f4a 100644
--- a/tools/maghemite_mg_openapi_version
+++ b/tools/maghemite_mg_openapi_version
@@ -1,2 +1,2 @@
-COMMIT="41a69a11db6cfa8fc0c8686dc2d725708e0586ce"
+COMMIT="4b0e584eec455a43c36af08ae207086965cef833"
 SHA2="0ac038bbaa54d0ae0ac5ccaeff48f03070618372cca26c9d09b716b909bf9355"
diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums
index 8fc4d083f8..ab84fafc01 100644
--- a/tools/maghemite_mgd_checksums
+++ b/tools/maghemite_mgd_checksums
@@ -1,2 +1,2 @@
-CIDL_SHA256="26d34f61589f63be64eaa77a6e9e2db4c95d6675798386a1d61721c1ccc59d4d"
-MGD_LINUX_SHA256="b2c823dd714fad67546a0e0c0d4ae56f2fe2e7c43434469b38e13b78de9f6968"
\ No newline at end of file
+CIDL_SHA256="22996a6f3353296b848be729f14e78a42e7d3d6e62a4a918a5c2358ae011c8eb"
+MGD_LINUX_SHA256="943b0a52d279bde55a419e2cdb24873acc32703bc97bd599376117ee0edc1511"
\ No newline at end of file

From 2088693046c7a666a50a5e6a8b0a14e99f7d01a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Karen=20C=C3=A1rcamo?= <karencfv@users.noreply.github.com>
Date: Fri, 23 Feb 2024 19:39:35 +1300
Subject: [PATCH 08/18] [sled-agent] Self assembling external DNS zone (#5059)

### Overview

In addition to implementing the external DNS self assembling zone, this
PR contains a new SMF service called `opte-interface-setup`.

Closes: https://github.com/oxidecomputer/omicron/issues/2881
Related: https://github.com/oxidecomputer/omicron/issues/1898
### Implementation

This service makes use of the zone-network CLI tool to avoid having too
many CLIs doing things.

The CLI is now shipped independently so it can be called by two
different services.

The [`zone-networking opte-interface-set-up`](
https://github.com/oxidecomputer/omicron/pull/5059/files#diff-5fb7b70dc87176e02517181b0887ce250b6a4e4079e495990551deeca741dc8bR181-R202)
command sets up what the `ensure_address_for_port()` method used to set
up.

### Justification

The reasoning behind this new service is to avoid setting up too many
things via the method_script.sh file, and to avoid code duplication. The
Nexus zone will also be using this service to set up the OPTE interface.
---
 illumos-utils/src/ipadm.rs                    |  25 +++
 illumos-utils/src/opte/port.rs                |   8 +-
 illumos-utils/src/route.rs                    |  67 +++++++-
 illumos-utils/src/running_zone.rs             |   7 +-
 package-manifest.toml                         |  56 +++++--
 sled-agent/src/services.rs                    | 140 ++++++++++++-----
 smf/external-dns/manifest.xml                 |  12 +-
 smf/opte-interface-setup/manifest.xml         |  46 ++++++
 smf/zone-network-setup/manifest.xml           |   2 +-
 zone-network-setup/src/bin/zone-networking.rs | 145 ++++++++++++++----
 10 files changed, 417 insertions(+), 91 deletions(-)
 create mode 100644 smf/opte-interface-setup/manifest.xml

diff --git a/illumos-utils/src/ipadm.rs b/illumos-utils/src/ipadm.rs
index f4d884d452..1c9e1e234e 100644
--- a/illumos-utils/src/ipadm.rs
+++ b/illumos-utils/src/ipadm.rs
@@ -107,4 +107,29 @@ impl Ipadm {
         };
         Ok(())
     }
+
+    // Create gateway on the IP interface if it doesn't already exist
+    pub fn create_opte_gateway(
+        opte_iface: &String,
+    ) -> Result<(), ExecutionError> {
+        let addrobj = format!("{}/public", opte_iface);
+        let mut cmd = std::process::Command::new(PFEXEC);
+        let cmd = cmd.args(&[IPADM, "show-addr", &addrobj]);
+        match execute(cmd) {
+            Err(_) => {
+                let mut cmd = std::process::Command::new(PFEXEC);
+                let cmd = cmd.args(&[
+                    IPADM,
+                    "create-addr",
+                    "-t",
+                    "-T",
+                    "dhcp",
+                    &addrobj,
+                ]);
+                execute(cmd)?;
+            }
+            Ok(_) => (),
+        };
+        Ok(())
+    }
 }
diff --git a/illumos-utils/src/opte/port.rs b/illumos-utils/src/opte/port.rs
index d06cdfe1ec..6fbb89c450 100644
--- a/illumos-utils/src/opte/port.rs
+++ b/illumos-utils/src/opte/port.rs
@@ -15,7 +15,7 @@ struct PortInner {
     // Name of the port as identified by OPTE
     name: String,
     // IP address within the VPC Subnet
-    _ip: IpAddr,
+    ip: IpAddr,
     // VPC-private MAC address
     mac: MacAddr6,
     // Emulated PCI slot for the guest NIC, passed to Propolis
@@ -95,7 +95,7 @@ impl Port {
         Self {
             inner: Arc::new(PortInner {
                 name,
-                _ip: ip,
+                ip,
                 mac,
                 slot,
                 vni,
@@ -105,6 +105,10 @@ impl Port {
         }
     }
 
+    pub fn ip(&self) -> &IpAddr {
+        &self.inner.ip
+    }
+
     pub fn name(&self) -> &str {
         &self.inner.name
     }
diff --git a/illumos-utils/src/route.rs b/illumos-utils/src/route.rs
index 2b6af9a9fd..ceff2b3d9e 100644
--- a/illumos-utils/src/route.rs
+++ b/illumos-utils/src/route.rs
@@ -7,27 +7,76 @@
 use crate::zone::ROUTE;
 use crate::{execute, inner, output_to_exec_error, ExecutionError, PFEXEC};
 use libc::ESRCH;
-use std::net::Ipv6Addr;
+use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
 
 /// Wraps commands for interacting with routing tables.
 pub struct Route {}
 
+pub enum Gateway {
+    Ipv4(Ipv4Addr),
+    Ipv6(Ipv6Addr),
+}
+
 #[cfg_attr(any(test, feature = "testing"), mockall::automock)]
 impl Route {
     pub fn ensure_default_route_with_gateway(
-        gateway: &Ipv6Addr,
+        gateway: Gateway,
     ) -> Result<(), ExecutionError> {
+        let inet;
+        let gw;
+        match gateway {
+            Gateway::Ipv4(addr) => {
+                inet = "-inet";
+                gw = addr.to_string();
+            }
+            Gateway::Ipv6(addr) => {
+                inet = "-inet6";
+                gw = addr.to_string();
+            }
+        }
         // Add the desired route if it doesn't already exist
         let destination = "default";
         let mut cmd = std::process::Command::new(PFEXEC);
+        let cmd = cmd.args(&[ROUTE, "-n", "get", inet, destination, inet, &gw]);
+
+        let out =
+            cmd.output().map_err(|err| ExecutionError::ExecutionStart {
+                command: inner::to_string(cmd),
+                err,
+            })?;
+        match out.status.code() {
+            Some(0) => (),
+            // If the entry is not found in the table,
+            // the exit status of the command will be 3 (ESRCH).
+            // When that is the case, we'll add the route.
+            Some(ESRCH) => {
+                let mut cmd = std::process::Command::new(PFEXEC);
+                let cmd =
+                    cmd.args(&[ROUTE, "add", inet, destination, inet, &gw]);
+                execute(cmd)?;
+            }
+            Some(_) | None => return Err(output_to_exec_error(cmd, &out)),
+        };
+        Ok(())
+    }
+
+    pub fn ensure_opte_route(
+        gateway: &Ipv4Addr,
+        iface: &String,
+        opte_ip: &IpAddr,
+    ) -> Result<(), ExecutionError> {
+        // Add the desired route if it doesn't already exist
+        let mut cmd = std::process::Command::new(PFEXEC);
         let cmd = cmd.args(&[
             ROUTE,
             "-n",
             "get",
-            "-inet6",
-            destination,
-            "-inet6",
+            "-host",
             &gateway.to_string(),
+            &opte_ip.to_string(),
+            "-interface",
+            "-ifp",
+            &iface.to_string(),
         ]);
 
         let out =
@@ -45,10 +94,12 @@ impl Route {
                 let cmd = cmd.args(&[
                     ROUTE,
                     "add",
-                    "-inet6",
-                    destination,
-                    "-inet6",
+                    "-host",
                     &gateway.to_string(),
+                    &opte_ip.to_string(),
+                    "-interface",
+                    "-ifp",
+                    &iface.to_string(),
                 ]);
                 execute(cmd)?;
             }
diff --git a/illumos-utils/src/running_zone.rs b/illumos-utils/src/running_zone.rs
index 4b4107f529..1c1df01980 100644
--- a/illumos-utils/src/running_zone.rs
+++ b/illumos-utils/src/running_zone.rs
@@ -888,7 +888,7 @@ impl RunningZone {
 
     /// Return references to the OPTE ports for this zone.
     pub fn opte_ports(&self) -> impl Iterator<Item = &Port> {
-        self.inner.opte_ports.iter().map(|(port, _)| port)
+        self.inner.opte_ports()
     }
 
     /// Remove the OPTE ports on this zone from the port manager.
@@ -1130,6 +1130,11 @@ impl InstalledZone {
         path.push("root/var/svc/profile/site.xml");
         path
     }
+
+    /// Returns references to the OPTE ports for this zone.
+    pub fn opte_ports(&self) -> impl Iterator<Item = &Port> {
+        self.opte_ports.iter().map(|(port, _)| port)
+    }
 }
 
 #[derive(Clone)]
diff --git a/package-manifest.toml b/package-manifest.toml
index 1a749c5b61..c474a52736 100644
--- a/package-manifest.toml
+++ b/package-manifest.toml
@@ -120,7 +120,7 @@ setup_hint = """
 service_name = "oximeter"
 only_for_targets.image = "standard"
 source.type = "composite"
-source.packages = [ "oximeter-collector.tar.gz", "zone-network-setup.tar.gz" ]
+source.packages = [ "oximeter-collector.tar.gz", "zone-network-setup.tar.gz", "zone-network-install.tar.gz" ]
 output.type = "zone"
 
 [package.oximeter-collector]
@@ -140,7 +140,12 @@ output.intermediate_only = true
 service_name = "clickhouse"
 only_for_targets.image = "standard"
 source.type = "composite"
-source.packages = [ "clickhouse_svc.tar.gz", "internal-dns-cli.tar.gz", "zone-network-setup.tar.gz" ]
+source.packages = [
+  "clickhouse_svc.tar.gz",
+  "internal-dns-cli.tar.gz",
+  "zone-network-setup.tar.gz",
+  "zone-network-install.tar.gz"
+]
 output.type = "zone"
 
 [package.clickhouse_svc]
@@ -161,7 +166,12 @@ setup_hint = "Run `./tools/ci_download_clickhouse` to download the necessary bin
 service_name = "clickhouse_keeper"
 only_for_targets.image = "standard"
 source.type = "composite"
-source.packages = [ "clickhouse_keeper_svc.tar.gz", "internal-dns-cli.tar.gz", "zone-network-setup.tar.gz" ]
+source.packages = [
+  "clickhouse_keeper_svc.tar.gz",
+  "internal-dns-cli.tar.gz",
+  "zone-network-setup.tar.gz",
+  "zone-network-install.tar.gz"
+]
 output.type = "zone"
 
 [package.clickhouse_keeper_svc]
@@ -182,7 +192,12 @@ setup_hint = "Run `./tools/ci_download_clickhouse` to download the necessary bin
 service_name = "cockroachdb"
 only_for_targets.image = "standard"
 source.type = "composite"
-source.packages = [ "cockroachdb-service.tar.gz", "internal-dns-cli.tar.gz", "zone-network-setup.tar.gz" ]
+source.packages = [ 
+  "cockroachdb-service.tar.gz",
+  "internal-dns-cli.tar.gz",
+  "zone-network-setup.tar.gz",
+  "zone-network-install.tar.gz"
+]
 output.type = "zone"
 
 [package.cockroachdb-service]
@@ -220,7 +235,13 @@ output.type = "zone"
 service_name = "external_dns"
 only_for_targets.image = "standard"
 source.type = "composite"
-source.packages = [ "dns-server.tar.gz", "external-dns-customizations.tar.gz" ]
+source.packages = [ 
+  "dns-server.tar.gz",
+  "external-dns-customizations.tar.gz",
+  "zone-network-setup.tar.gz",
+  "zone-network-install.tar.gz",
+  "opte-interface-setup.tar.gz"
+]
 output.type = "zone"
 
 [package.dns-server]
@@ -393,7 +414,7 @@ output.intermediate_only = true
 service_name = "crucible"
 only_for_targets.image = "standard"
 source.type = "composite"
-source.packages = [ "crucible.tar.gz", "zone-network-setup.tar.gz" ]
+source.packages = [ "crucible.tar.gz", "zone-network-setup.tar.gz", "zone-network-install.tar.gz" ]
 output.type = "zone"
 
 
@@ -401,7 +422,7 @@ output.type = "zone"
 service_name = "crucible_pantry"
 only_for_targets.image = "standard"
 source.type = "composite"
-source.packages = [ "crucible-pantry.tar.gz", "zone-network-setup.tar.gz" ]
+source.packages = [ "crucible-pantry.tar.gz", "zone-network-setup.tar.gz", "zone-network-install.tar.gz" ]
 output.type = "zone"
 
 # Packages not built within Omicron, but which must be imported.
@@ -643,14 +664,31 @@ source.packages = [
 ]
 output.type = "zone"
 
-[package.zone-network-setup]
+[package.zone-network-install]
 service_name = "zone-network-setup"
 only_for_targets.image = "standard"
 source.type = "local"
+source.paths = [
+    { from = "smf/zone-network-setup/manifest.xml", to = "/var/svc/manifest/site/zone-network-setup/manifest.xml" },
+]
+output.type = "zone"
+output.intermediate_only = true
+
+[package.zone-network-setup]
+service_name = "zone-network-cli"
+only_for_targets.image = "standard"
+source.type = "local"
 source.rust.binary_names = ["zone-networking"]
 source.rust.release = true
+output.type = "zone"
+output.intermediate_only = true
+
+[package.opte-interface-setup]
+service_name = "opte-interface-setup"
+only_for_targets.image = "standard"
+source.type = "local"
 source.paths = [
-    { from = "smf/zone-network-setup/manifest.xml", to = "/var/svc/manifest/site/zone-network-setup/manifest.xml" },
+    { from = "smf/opte-interface-setup/manifest.xml", to = "/var/svc/manifest/site/opte-interface-setup/manifest.xml" },
 ]
 output.type = "zone"
 output.intermediate_only = true
diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs
index bcd648cd2d..f3ddfdbf89 100644
--- a/sled-agent/src/services.rs
+++ b/sled-agent/src/services.rs
@@ -53,7 +53,8 @@ use illumos_utils::dladm::{
 use illumos_utils::link::{Link, VnicAllocator};
 use illumos_utils::opte::{DhcpCfg, Port, PortManager, PortTicket};
 use illumos_utils::running_zone::{
-    InstalledZone, RunCommandError, RunningZone, ZoneBuilderFactory,
+    EnsureAddressError, InstalledZone, RunCommandError, RunningZone,
+    ZoneBuilderFactory,
 };
 use illumos_utils::zfs::ZONE_ZFS_RAMDISK_DATASET_MOUNTPOINT;
 use illumos_utils::zone::AddressRequest;
@@ -68,6 +69,8 @@ use omicron_common::address::COCKROACH_PORT;
 use omicron_common::address::CRUCIBLE_PANTRY_PORT;
 use omicron_common::address::CRUCIBLE_PORT;
 use omicron_common::address::DENDRITE_PORT;
+use omicron_common::address::DNS_HTTP_PORT;
+use omicron_common::address::DNS_PORT;
 use omicron_common::address::MGS_PORT;
 use omicron_common::address::RACK_PREFIX;
 use omicron_common::address::SLED_PREFIX;
@@ -1444,6 +1447,32 @@ impl ServiceManager {
             .add_instance(ServiceInstanceBuilder::new("default")))
     }
 
+    fn opte_interface_set_up_install(
+        zone: &InstalledZone,
+    ) -> Result<ServiceBuilder, Error> {
+        let port_idx = 0;
+        let port = zone.opte_ports().nth(port_idx).ok_or_else(|| {
+            Error::ZoneEnsureAddress(EnsureAddressError::MissingOptePort {
+                zone: String::from(zone.name()),
+                port_idx,
+            })
+        })?;
+
+        let opte_interface = port.vnic_name();
+        let opte_gateway = port.gateway().ip().to_string();
+        let opte_ip = port.ip().to_string();
+
+        let mut config_builder = PropertyGroupBuilder::new("config");
+        config_builder = config_builder
+            .add_property("interface", "astring", opte_interface)
+            .add_property("gateway", "astring", &opte_gateway)
+            .add_property("ip", "astring", &opte_ip);
+
+        Ok(ServiceBuilder::new("oxide/opte-interface-setup")
+            .add_property_group(config_builder)
+            .add_instance(ServiceInstanceBuilder::new("default")))
+    }
+
     async fn initialize_zone(
         &self,
         request: ZoneArgs<'_>,
@@ -1841,6 +1870,73 @@ impl ServiceManager {
                     })?;
                 return Ok(RunningZone::boot(installed_zone).await?);
             }
+            ZoneArgs::Omicron(OmicronZoneConfigLocal {
+                zone:
+                    OmicronZoneConfig {
+                        zone_type: OmicronZoneType::ExternalDns { .. },
+                        underlay_address,
+                        ..
+                    },
+                ..
+            }) => {
+                let Some(info) = self.inner.sled_info.get() else {
+                    return Err(Error::SledAgentNotReady);
+                };
+
+                let static_addr = underlay_address.to_string();
+
+                let nw_setup_service = Self::zone_network_setup_install(
+                    info,
+                    &installed_zone,
+                    &static_addr.clone(),
+                )?;
+
+                // Like Nexus, we need to be reachable externally via
+                // `dns_address` but we don't listen on that address
+                // directly but instead on a VPC private IP. OPTE will
+                // en/decapsulate as appropriate.
+                let opte_interface_setup =
+                    Self::opte_interface_set_up_install(&installed_zone)?;
+
+                let port_idx = 0;
+                let port = installed_zone
+                    .opte_ports()
+                    .nth(port_idx)
+                    .ok_or_else(|| {
+                        Error::ZoneEnsureAddress(
+                            EnsureAddressError::MissingOptePort {
+                                zone: String::from(installed_zone.name()),
+                                port_idx,
+                            },
+                        )
+                    })?;
+                let opte_ip = port.ip();
+
+                let http_addr = format!("[{}]:{}", static_addr, DNS_HTTP_PORT);
+                let dns_addr = format!("{}:{}", opte_ip, DNS_PORT);
+
+                let external_dns_config = PropertyGroupBuilder::new("config")
+                    .add_property("http_address", "astring", &http_addr)
+                    .add_property("dns_address", "astring", &dns_addr);
+                let external_dns_service =
+                    ServiceBuilder::new("oxide/external_dns").add_instance(
+                        ServiceInstanceBuilder::new("default")
+                            .add_property_group(external_dns_config),
+                    );
+
+                let profile = ProfileBuilder::new("omicron")
+                    .add_service(nw_setup_service)
+                    .add_service(opte_interface_setup)
+                    .add_service(disabled_ssh_service)
+                    .add_service(external_dns_service);
+                profile
+                    .add_to_zone(&self.inner.log, &installed_zone)
+                    .await
+                    .map_err(|err| {
+                        Error::io("Failed to setup External DNS profile", err)
+                    })?;
+                return Ok(RunningZone::boot(installed_zone).await?);
+            }
             _ => {}
         }
 
@@ -2081,47 +2177,6 @@ impl ServiceManager {
                             .await
                             .map_err(|err| Error::io_path(&config_path, err))?;
                     }
-
-                    OmicronZoneType::ExternalDns {
-                        http_address,
-                        dns_address,
-                        ..
-                    } => {
-                        info!(
-                            self.inner.log,
-                            "Setting up external-dns service"
-                        );
-
-                        // Like Nexus, we need to be reachable externally via
-                        // `dns_address` but we don't listen on that address
-                        // directly but instead on a VPC private IP. OPTE will
-                        // en/decapsulate as appropriate.
-                        let port_ip = running_zone
-                            .ensure_address_for_port("public", 0)
-                            .await?
-                            .ip();
-                        let dns_address =
-                            SocketAddr::new(port_ip, dns_address.port());
-
-                        smfh.setprop(
-                            "config/http_address",
-                            format!(
-                                "[{}]:{}",
-                                http_address.ip(),
-                                http_address.port(),
-                            ),
-                        )?;
-                        smfh.setprop(
-                            "config/dns_address",
-                            dns_address.to_string(),
-                        )?;
-
-                        // Refresh the manifest with the new properties we set,
-                        // so they become "effective" properties when the
-                        // service is enabled.
-                        smfh.refresh()?;
-                    }
-
                     OmicronZoneType::InternalDns {
                         http_address,
                         dns_address,
@@ -2262,6 +2317,7 @@ impl ServiceManager {
                     | OmicronZoneType::CockroachDb { .. }
                     | OmicronZoneType::Crucible { .. }
                     | OmicronZoneType::CruciblePantry { .. }
+                    | OmicronZoneType::ExternalDns { .. }
                     | OmicronZoneType::Oximeter { .. } => {
                         panic!(
                             "{} is a service which exists as part of a \
diff --git a/smf/external-dns/manifest.xml b/smf/external-dns/manifest.xml
index 05c3c02b4e..1b6ee2cba0 100644
--- a/smf/external-dns/manifest.xml
+++ b/smf/external-dns/manifest.xml
@@ -4,13 +4,23 @@
 <service_bundle type='manifest' name='external_dns'>
 
   <service name='oxide/external_dns' type='service' version='1'>
-  <create_default_instance enabled='false' />
+  <create_default_instance enabled='true' />
 
   <dependency name='multi_user' grouping='require_all' restart_on='none'
     type='service'>
   <service_fmri value='svc:/milestone/multi-user:default' />
   </dependency>
 
+  <dependency name='zone_network_setup' grouping='require_all' restart_on='none'
+    type='service'>
+  <service_fmri value='svc:/oxide/zone-network-setup:default' />
+  </dependency>
+
+  <dependency name='opte_interface_setup' grouping='require_all' restart_on='none'
+    type='service'>
+  <service_fmri value='svc:/oxide/opte-interface-setup:default' />
+  </dependency>
+
   <exec_method type='method' name='start'
       exec='ctrun -l child -o noorphan,regent /opt/oxide/dns-server/bin/dns-server --config-file /var/svc/manifest/site/external_dns/config.toml --http-address %{config/http_address} --dns-address %{config/dns_address} &amp;'
     timeout_seconds='0' />
diff --git a/smf/opte-interface-setup/manifest.xml b/smf/opte-interface-setup/manifest.xml
new file mode 100644
index 0000000000..5b886c8a71
--- /dev/null
+++ b/smf/opte-interface-setup/manifest.xml
@@ -0,0 +1,46 @@
+<?xml version="1.0"?>
+<!DOCTYPE service_bundle SYSTEM "/usr/share/lib/xml/dtd/service_bundle.dtd.1">
+
+<service_bundle type='manifest' name='opte-interface-setup'>
+
+<service name='oxide/opte-interface-setup' type='service' version='1'>
+  <create_default_instance enabled='true' />
+
+  <!-- Run after the operating system's svc:/network/physical service is done. -->
+  <dependency name='physical' grouping='require_all' restart_on='none'
+    type='service'>
+  <service_fmri value='svc:/network/physical:default' />
+  </dependency>
+
+  <dependency name='multi_user' grouping='require_all' restart_on='none'
+    type='service'>
+  <service_fmri value='svc:/milestone/multi-user:default' />
+  </dependency>
+
+  <exec_method type='method' name='start'
+    exec='/opt/oxide/zone-network-cli/bin/zone-networking opte-interface-set-up -i %{config/interface} -g %{config/gateway} -p %{config/ip}'
+    timeout_seconds='0' />
+  
+  <property_group name='startd' type='framework'>
+    <propval name='duration' type='astring' value='transient' />
+  </property_group>
+
+  <property_group name='config' type='application'>
+    <propval name='gateway' type='astring' value='unknown' />
+    <propval name='interface' type='astring' value='unknown' />
+    <propval name='ip' type='astring' value='unknown' />
+  </property_group>
+
+  <stability value='Unstable' />
+
+  <template>
+    <common_name>
+      <loctext xml:lang='C'>Oxide OPTE interface set up</loctext>
+    </common_name>
+    <description>
+      <loctext xml:lang='C'>Sets up an OPTE interface for control plane zones that require it</loctext>
+    </description>
+  </template>
+</service>
+
+</service_bundle>
diff --git a/smf/zone-network-setup/manifest.xml b/smf/zone-network-setup/manifest.xml
index 0776329749..a20ff949a4 100644
--- a/smf/zone-network-setup/manifest.xml
+++ b/smf/zone-network-setup/manifest.xml
@@ -18,7 +18,7 @@
   </dependency>
 
   <exec_method type='method' name='start'
-    exec='/opt/oxide/zone-network-setup/bin/zone-networking -d %{config/datalink} -s %{config/static_addr} -g %{config/gateway}'
+    exec='/opt/oxide/zone-network-cli/bin/zone-networking set-up -d %{config/datalink} -s %{config/static_addr} -g %{config/gateway}'
     timeout_seconds='0' />
   
   <property_group name='startd' type='framework'>
diff --git a/zone-network-setup/src/bin/zone-networking.rs b/zone-network-setup/src/bin/zone-networking.rs
index f3d18832c5..e36afd6b03 100644
--- a/zone-network-setup/src/bin/zone-networking.rs
+++ b/zone-network-setup/src/bin/zone-networking.rs
@@ -5,17 +5,31 @@
 //! CLI to set up zone networking
 
 use anyhow::anyhow;
-use clap::{arg, command};
+use clap::{arg, command, ArgMatches, Command};
 use illumos_utils::ipadm::Ipadm;
-use illumos_utils::route::Route;
+use illumos_utils::route::{Gateway, Route};
 use omicron_common::cmd::fatal;
 use omicron_common::cmd::CmdError;
-use slog::info;
+use slog::{info, Logger};
 use std::fs;
-use std::net::Ipv6Addr;
+use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
 
 pub const HOSTS_FILE: &str = "/etc/inet/hosts";
 
+fn parse_ip(s: &str) -> anyhow::Result<IpAddr> {
+    if s == "unknown" {
+        return Err(anyhow!("ERROR: Missing input value"));
+    };
+    s.parse().map_err(|_| anyhow!("ERROR: Invalid IP address"))
+}
+
+fn parse_ipv4(s: &str) -> anyhow::Result<Ipv4Addr> {
+    if s == "unknown" {
+        return Err(anyhow!("ERROR: Missing input value"));
+    };
+    s.parse().map_err(|_| anyhow!("ERROR: Invalid IPv4 address"))
+}
+
 fn parse_ipv6(s: &str) -> anyhow::Result<Ipv6Addr> {
     if s == "unknown" {
         return Err(anyhow!("ERROR: Missing input value"));
@@ -30,6 +44,13 @@ fn parse_datalink(s: &str) -> anyhow::Result<String> {
     s.parse().map_err(|_| anyhow!("ERROR: Invalid data link"))
 }
 
+fn parse_opte_iface(s: &str) -> anyhow::Result<String> {
+    if s == "unknown" {
+        return Err(anyhow!("ERROR: Missing OPTE interface"));
+    };
+    s.parse().map_err(|_| anyhow!("ERROR: Invalid OPTE interface"))
+}
+
 #[tokio::main]
 async fn main() {
     if let Err(message) = do_run().await {
@@ -47,34 +68,81 @@ async fn do_run() -> Result<(), CmdError> {
     .map_err(|err| CmdError::Failure(anyhow!(err)))?;
 
     let matches = command!()
-        .arg(
-            arg!(
-                -d --datalink <STRING> "datalink"
-            )
-            .required(true)
-            .value_parser(parse_datalink),
-        )
-        .arg(
-            arg!(
-                -g --gateway <Ipv6Addr> "gateway"
-            )
-            .required(true)
-            .value_parser(parse_ipv6),
+        .subcommand(
+            Command::new("set-up")
+                .about(
+                    "Sets up common networking configuration across all zones",
+                )
+                .arg(
+                    arg!(
+                        -d --datalink <STRING> "datalink"
+                    )
+                    .required(true)
+                    .value_parser(parse_datalink),
+                )
+                .arg(
+                    arg!(
+                        -g --gateway <Ipv6Addr> "gateway"
+                    )
+                    .required(true)
+                    .value_parser(parse_ipv6),
+                )
+                .arg(
+                    arg!(
+                        -s --static_addr <Ipv6Addr> "static_addr"
+                    )
+                    .required(true)
+                    .value_parser(parse_ipv6),
+                ),
         )
-        .arg(
-            arg!(
-                -s --static_addr <Ipv6Addr> "static_addr"
-            )
-            .required(true)
-            .value_parser(parse_ipv6),
+        .subcommand(
+            Command::new("opte-interface-set-up")
+                .about("Sets up OPTE interface")
+                .arg(
+                    arg!(
+                        -i --opte_interface <STRING> "opte_interface"
+                    )
+                    .required(true)
+                    .value_parser(parse_opte_iface),
+                )
+                .arg(
+                    arg!(
+                        -g --opte_gateway <Ipv4Addr> "opte_gateway"
+                    )
+                    .required(true)
+                    .value_parser(parse_ipv4),
+                )
+                .arg(
+                    arg!(
+                        -p --opte_ip <IpAddr> "opte_ip"
+                    )
+                    .required(true)
+                    .value_parser(parse_ip),
+                ),
         )
         .get_matches();
 
-    let zonename =
-        zone::current().await.expect("Could not determine local zone name");
+    if let Some(matches) = matches.subcommand_matches("set-up") {
+        set_up(matches, log.clone()).await?;
+    }
+
+    if let Some(matches) = matches.subcommand_matches("opte-interface-set-up") {
+        opte_interface_set_up(matches, log.clone()).await?;
+    }
+
+    Ok(())
+}
+
+async fn set_up(matches: &ArgMatches, log: Logger) -> Result<(), CmdError> {
     let datalink: &String = matches.get_one("datalink").unwrap();
     let static_addr: &Ipv6Addr = matches.get_one("static_addr").unwrap();
-    let gateway: &Ipv6Addr = matches.get_one("gateway").unwrap();
+    let gateway: Ipv6Addr = *matches.get_one("gateway").unwrap();
+    let zonename = zone::current().await.map_err(|err| {
+        CmdError::Failure(anyhow!(
+            "Could not determine local zone name: {}",
+            err
+        ))
+    })?;
 
     // TODO: remove when https://github.com/oxidecomputer/stlouis/issues/435 is
     // addressed
@@ -91,7 +159,7 @@ async fn do_run() -> Result<(), CmdError> {
         .map_err(|err| CmdError::Failure(anyhow!(err)))?;
 
     info!(&log, "Ensuring there is a default route"; "gateway" => ?gateway);
-    Route::ensure_default_route_with_gateway(gateway)
+    Route::ensure_default_route_with_gateway(Gateway::Ipv6(gateway))
         .map_err(|err| CmdError::Failure(anyhow!(err)))?;
 
     info!(&log, "Populating hosts file for zone"; "zonename" => ?zonename);
@@ -109,3 +177,26 @@ async fn do_run() -> Result<(), CmdError> {
 
     Ok(())
 }
+
+async fn opte_interface_set_up(
+    matches: &ArgMatches,
+    log: Logger,
+) -> Result<(), CmdError> {
+    let interface: &String = matches.get_one("opte_interface").unwrap();
+    let gateway: Ipv4Addr = *matches.get_one("opte_gateway").unwrap();
+    let opte_ip: &IpAddr = matches.get_one("opte_ip").unwrap();
+
+    info!(&log, "Creating gateway on the OPTE IP interface if it doesn't already exist"; "OPTE interface" => ?interface);
+    Ipadm::create_opte_gateway(interface)
+        .map_err(|err| CmdError::Failure(anyhow!(err)))?;
+
+    info!(&log, "Ensuring there is a gateway route"; "OPTE gateway" => ?gateway, "OPTE interface" => ?interface, "OPTE IP" => ?opte_ip);
+    Route::ensure_opte_route(&gateway, interface, &opte_ip)
+        .map_err(|err| CmdError::Failure(anyhow!(err)))?;
+
+    info!(&log, "Ensuring there is a default route"; "gateway" => ?gateway);
+    Route::ensure_default_route_with_gateway(Gateway::Ipv4(gateway))
+        .map_err(|err| CmdError::Failure(anyhow!(err)))?;
+
+    Ok(())
+}

From 65ebf72288b16467dc6457bb4c3bfa90830000db Mon Sep 17 00:00:00 2001
From: Sean Klein <sean@oxide.computer>
Date: Fri, 23 Feb 2024 01:59:41 -0800
Subject: [PATCH 09/18] [test-utils] Give a little hint about running
 CockroachDB on test failures (#5125)

This is a pattern I use a lot, but I want to help make it more visible
to folks during test failures.

Digging into the state of Cockroach after a test failure is a useful
debugging tool, so I want to make
it obvious "how to do that".
---
 test-utils/src/dev/db.rs | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/test-utils/src/dev/db.rs b/test-utils/src/dev/db.rs
index da6cc90b03..3c1b46b4c2 100644
--- a/test-utils/src/dev/db.rs
+++ b/test-utils/src/dev/db.rs
@@ -640,8 +640,13 @@ impl Drop for CockroachInstance {
                 // Do NOT clean up the temporary directory in this case.
                 let path = temp_dir.into_path();
                 eprintln!(
-                    "WARN: temporary directory leaked: {}",
-                    path.display()
+                    "WARN: temporary directory leaked: {path:?}\n\
+                     \tIf you would like to access the database for debugging, run the following:\n\n\
+                     \t# Run the database\n\
+                     \tcargo run --bin omicron-dev db-run --no-populate --store-dir {data_path:?}\n\
+                     \t# Access the database. Note the port may change if you run multiple databases.\n\
+                     \tcockroach sql --host=localhost:32221 --insecure",
+                     data_path = path.join("data"),
                 );
             }
         }

From a07cae6485c3f7babbd4824b021c47601b99f92a Mon Sep 17 00:00:00 2001
From: John Gallagher <john@oxidecomputer.com>
Date: Fri, 23 Feb 2024 14:55:47 -0500
Subject: [PATCH 10/18] Fix backwards counts in bad partition layout log
 message (#5129)

---
 sled-hardware/src/illumos/partitions.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sled-hardware/src/illumos/partitions.rs b/sled-hardware/src/illumos/partitions.rs
index de62e25cfe..29b2466ad9 100644
--- a/sled-hardware/src/illumos/partitions.rs
+++ b/sled-hardware/src/illumos/partitions.rs
@@ -46,9 +46,8 @@ fn parse_partition_types<const N: usize>(
         return Err(PooledDiskError::BadPartitionLayout {
             path: path.to_path_buf(),
             why: format!(
-                "Expected {} partitions, only saw {}",
+                "Expected {N} partitions, only saw {}",
                 partitions.len(),
-                N
             ),
         });
     }

From a6ef7f9ed791893d1333f080b15ae4109e3b858d Mon Sep 17 00:00:00 2001
From: Rain <rain@oxide.computer>
Date: Fri, 23 Feb 2024 15:52:27 -0800
Subject: [PATCH 11/18] [nexus] add basic support for expunged sled policy and
 decommissioned sled state (#5032)

This PR does a few things:

* Migrates our current `sled_provision_state` to a new `sled_policy` enum, which has more information (per [RFD 457](https://rfd.shared.oxide.computer/rfd/0457)). This PR implements the expunged state, not the graceful removal state.
* Adds a `sled_state` enum, which describes Nexus's view of the sled. This PR adds the `active` and `decommissioned` states.
* Adds **internal** code to move around between valid states.
* Makes the blueprint execution code aware of sleds eligible for discretionary services.
* Adds tests for all of this new stuff, as well as valid and invalid state transitions -- and also makes sure that if we _do_ end up in an invalid state, things don't break down.

Not done here, but in future PRs (to try and keep this PR a manageable size):

* We'll add the endpoint to mark the sled as expunged (this is an irreversible operation and will need the appropriate warnings): https://github.com/oxidecomputer/omicron/issues/5134
* We'll add blueprint code to start removing sleds.
* We'll also remove the sled `time_deleted` because it has a lifecycle too complicated to be described that way -- instead, we'll add a `time_decommissioned` field: https://github.com/oxidecomputer/omicron/issues/5131
---
 Cargo.lock                                    |   1 +
 nexus/blueprint-execution/src/dns.rs          |   6 +-
 nexus/db-model/src/lib.rs                     |   6 +-
 nexus/db-model/src/schema.rs                  |   5 +-
 nexus/db-model/src/sled.rs                    |  31 +-
 nexus/db-model/src/sled_policy.rs             |  68 ++
 nexus/db-model/src/sled_provision_state.rs    |  53 --
 nexus/db-model/src/sled_state.rs              |  59 ++
 nexus/db-queries/Cargo.toml                   |   3 +-
 nexus/db-queries/src/authz/context.rs         |   3 +-
 nexus/db-queries/src/authz/policy_test/mod.rs |   6 +-
 nexus/db-queries/src/context.rs               |   9 +-
 .../db-queries/src/db/datastore/deployment.rs |   8 +-
 nexus/db-queries/src/db/datastore/disk.rs     |   2 +-
 nexus/db-queries/src/db/datastore/dns.rs      |   2 +-
 .../db-queries/src/db/datastore/inventory.rs  |   2 +-
 nexus/db-queries/src/db/datastore/ip_pool.rs  |   2 +-
 .../src/db/datastore/ipv4_nat_entry.rs        |   2 +-
 nexus/db-queries/src/db/datastore/mod.rs      | 434 +++++-----
 .../src/db/datastore/physical_disk.rs         |   3 +-
 nexus/db-queries/src/db/datastore/rack.rs     |   3 +-
 nexus/db-queries/src/db/datastore/sled.rs     | 799 ++++++++++++++++--
 .../src/db/datastore/switch_port.rs           |   3 +-
 .../db-queries/src/db/datastore/test_utils.rs | 343 ++++++++
 nexus/db-queries/src/db/datastore/volume.rs   |   2 +-
 nexus/db-queries/src/db/datastore/vpc.rs      |   2 +-
 nexus/db-queries/src/db/lookup.rs             |   3 +-
 nexus/db-queries/src/db/pool_connection.rs    |   3 +-
 .../db-queries/src/db/queries/external_ip.rs  |   3 +-
 .../src/db/queries/network_interface.rs       |   3 +-
 .../src/db/queries/region_allocation.rs       |   9 +-
 nexus/db-queries/src/transaction_retry.rs     |   2 +-
 nexus/deployment/src/blueprint_builder.rs     |  38 +-
 nexus/deployment/src/planner.rs               |  80 +-
 .../src/app/background/blueprint_execution.rs |   3 +-
 .../app/background/inventory_collection.rs    |   2 +-
 nexus/src/app/deployment.rs                   |   3 +-
 nexus/src/app/sled.rs                         |  26 +-
 nexus/src/external_api/http_entrypoints.rs    |  24 +-
 nexus/tests/integration_tests/endpoints.rs    |  17 +-
 nexus/tests/integration_tests/schema.rs       |  48 +-
 nexus/tests/output/nexus_tags.txt             |   2 +-
 nexus/types/src/deployment.rs                 |  22 +-
 nexus/types/src/external_api/params.rs        |  14 +-
 nexus/types/src/external_api/views.rs         | 200 ++++-
 openapi/nexus.json                            | 110 ++-
 schema/crdb/37.0.0/up01.sql                   |  13 +
 schema/crdb/37.0.0/up02.sql                   |  22 +
 schema/crdb/37.0.0/up03.sql                   |   7 +
 schema/crdb/37.0.0/up04.sql                   |  14 +
 schema/crdb/37.0.1/up01.sql                   |   8 +
 schema/crdb/37.0.1/up02.sql                   |   1 +
 schema/crdb/dbinit.sql                        |  49 +-
 53 files changed, 2115 insertions(+), 468 deletions(-)
 create mode 100644 nexus/db-model/src/sled_policy.rs
 delete mode 100644 nexus/db-model/src/sled_provision_state.rs
 create mode 100644 nexus/db-model/src/sled_state.rs
 create mode 100644 nexus/db-queries/src/db/datastore/test_utils.rs
 create mode 100644 schema/crdb/37.0.0/up01.sql
 create mode 100644 schema/crdb/37.0.0/up02.sql
 create mode 100644 schema/crdb/37.0.0/up03.sql
 create mode 100644 schema/crdb/37.0.0/up04.sql
 create mode 100644 schema/crdb/37.0.1/up01.sql
 create mode 100644 schema/crdb/37.0.1/up02.sql

diff --git a/Cargo.lock b/Cargo.lock
index e15afdfbab..85b7e5a186 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4405,6 +4405,7 @@ dependencies = [
  "pem",
  "petgraph",
  "pq-sys",
+ "predicates",
  "pretty_assertions",
  "rand 0.8.5",
  "rcgen",
diff --git a/nexus/blueprint-execution/src/dns.rs b/nexus/blueprint-execution/src/dns.rs
index 6dd9266f32..54611e9f66 100644
--- a/nexus/blueprint-execution/src/dns.rs
+++ b/nexus/blueprint-execution/src/dns.rs
@@ -328,7 +328,8 @@ mod test {
     use nexus_types::deployment::Policy;
     use nexus_types::deployment::SledResources;
     use nexus_types::deployment::ZpoolName;
-    use nexus_types::external_api::views::SledProvisionState;
+    use nexus_types::external_api::views::SledPolicy;
+    use nexus_types::external_api::views::SledState;
     use nexus_types::internal_api::params::DnsConfigParams;
     use nexus_types::internal_api::params::DnsConfigZone;
     use nexus_types::internal_api::params::DnsRecord;
@@ -409,7 +410,8 @@ mod test {
             .zip(possible_sled_subnets)
             .map(|(sled_id, subnet)| {
                 let sled_resources = SledResources {
-                    provision_state: SledProvisionState::Provisionable,
+                    policy: SledPolicy::provisionable(),
+                    state: SledState::Active,
                     zpools: BTreeSet::from([ZpoolName::from_str(&format!(
                         "oxp_{}",
                         Uuid::new_v4()
diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs
index ecbb8365fe..07c9f5eec6 100644
--- a/nexus/db-model/src/lib.rs
+++ b/nexus/db-model/src/lib.rs
@@ -73,9 +73,10 @@ mod silo_user;
 mod silo_user_password_hash;
 mod sled;
 mod sled_instance;
-mod sled_provision_state;
+mod sled_policy;
 mod sled_resource;
 mod sled_resource_kind;
+mod sled_state;
 mod sled_underlay_subnet_allocation;
 mod snapshot;
 mod ssh_key;
@@ -161,9 +162,10 @@ pub use silo_user::*;
 pub use silo_user_password_hash::*;
 pub use sled::*;
 pub use sled_instance::*;
-pub use sled_provision_state::*;
+pub use sled_policy::to_db_sled_policy; // Do not expose DbSledPolicy
 pub use sled_resource::*;
 pub use sled_resource_kind::*;
+pub use sled_state::*;
 pub use sled_underlay_subnet_allocation::*;
 pub use snapshot::*;
 pub use ssh_key::*;
diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs
index 54755486e5..d8344c2258 100644
--- a/nexus/db-model/src/schema.rs
+++ b/nexus/db-model/src/schema.rs
@@ -13,7 +13,7 @@ use omicron_common::api::external::SemverVersion;
 ///
 /// This should be updated whenever the schema is changed. For more details,
 /// refer to: schema/crdb/README.adoc
-pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(36, 0, 0);
+pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(37, 0, 1);
 
 table! {
     disk (id) {
@@ -824,7 +824,8 @@ table! {
         ip -> Inet,
         port -> Int4,
         last_used_address -> Inet,
-        provision_state -> crate::SledProvisionStateEnum,
+        sled_policy -> crate::sled_policy::SledPolicyEnum,
+        sled_state -> crate::SledStateEnum,
     }
 }
 
diff --git a/nexus/db-model/src/sled.rs b/nexus/db-model/src/sled.rs
index 52968c27d5..47912f89cc 100644
--- a/nexus/db-model/src/sled.rs
+++ b/nexus/db-model/src/sled.rs
@@ -2,10 +2,11 @@
 // License, v. 2.0. If a copy of the MPL was not distributed with this
 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
 
-use super::{ByteCount, Generation, SqlU16, SqlU32};
+use super::{ByteCount, Generation, SledState, SqlU16, SqlU32};
 use crate::collection::DatastoreCollectionConfig;
+use crate::ipv6;
 use crate::schema::{physical_disk, service, sled, zpool};
-use crate::{ipv6, SledProvisionState};
+use crate::sled_policy::DbSledPolicy;
 use chrono::{DateTime, Utc};
 use db_macros::Asset;
 use nexus_types::{external_api::shared, external_api::views, identity::Asset};
@@ -60,7 +61,11 @@ pub struct Sled {
     /// The last IP address provided to a propolis instance on this sled
     pub last_used_address: ipv6::Ipv6Addr,
 
-    provision_state: SledProvisionState,
+    #[diesel(column_name = sled_policy)]
+    policy: DbSledPolicy,
+
+    #[diesel(column_name = sled_state)]
+    state: SledState,
 }
 
 impl Sled {
@@ -84,8 +89,15 @@ impl Sled {
         &self.serial_number
     }
 
-    pub fn provision_state(&self) -> SledProvisionState {
-        self.provision_state
+    /// The policy here is the `views::SledPolicy` because we expect external
+    /// users to always use that.
+    pub fn policy(&self) -> views::SledPolicy {
+        self.policy.into()
+    }
+
+    /// Returns the sled's state.
+    pub fn state(&self) -> SledState {
+        self.state
     }
 }
 
@@ -99,7 +111,8 @@ impl From<Sled> for views::Sled {
                 part: sled.part_number,
                 revision: sled.revision,
             },
-            provision_state: sled.provision_state.into(),
+            policy: sled.policy.into(),
+            state: sled.state.into(),
             usable_hardware_threads: sled.usable_hardware_threads.0,
             usable_physical_ram: *sled.usable_physical_ram,
         }
@@ -197,8 +210,10 @@ impl SledUpdate {
             serial_number: self.serial_number,
             part_number: self.part_number,
             revision: self.revision,
-            // By default, sleds start as provisionable.
-            provision_state: SledProvisionState::Provisionable,
+            // By default, sleds start in-service.
+            policy: DbSledPolicy::InService,
+            // Currently, new sleds start in the "active" state.
+            state: SledState::Active,
             usable_hardware_threads: self.usable_hardware_threads,
             usable_physical_ram: self.usable_physical_ram,
             reservoir_size: self.reservoir_size,
diff --git a/nexus/db-model/src/sled_policy.rs b/nexus/db-model/src/sled_policy.rs
new file mode 100644
index 0000000000..13ecf10296
--- /dev/null
+++ b/nexus/db-model/src/sled_policy.rs
@@ -0,0 +1,68 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Database representation of a sled's operator-defined policy.
+//!
+//! This is related to, but different from `SledState`: a sled's **policy** is
+//! its disposition as specified by the operator, while its **state** refers to
+//! what's currently on it, as determined by Nexus.
+//!
+//! For example, a sled might be in the `Active` state, but have a policy of
+//! `Expunged` -- this would mean that Nexus knows about resources currently
+//! provisioned on the sled, but the operator has said that it should be marked
+//! as gone.
+
+use super::impl_enum_type;
+use nexus_types::external_api::views::{SledPolicy, SledProvisionPolicy};
+use serde::{Deserialize, Serialize};
+
+impl_enum_type!(
+    #[derive(Clone, SqlType, Debug, QueryId)]
+    #[diesel(postgres_type(name = "sled_policy", schema = "public"))]
+    pub struct SledPolicyEnum;
+
+    /// This type is not actually public, because [`SledPolicy`] has a somewhat
+    /// different, friendlier shape while being equivalent -- external code
+    /// should always use [`SledPolicy`].
+    ///
+    /// However, it must be marked `pub` to avoid errors like `crate-private
+    /// type `DbSledPolicy` in public interface`. Marking this type `pub`,
+    /// without actually making it public, tricks rustc in a desirable way.
+    #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)]
+    #[diesel(sql_type = SledPolicyEnum)]
+    pub enum DbSledPolicy;
+
+    // Enum values
+    InService => b"in_service"
+    NoProvision => b"no_provision"
+    Expunged => b"expunged"
+);
+
+/// Converts a [`SledPolicy`] to a version that can be inserted into a
+/// database.
+pub fn to_db_sled_policy(policy: SledPolicy) -> DbSledPolicy {
+    match policy {
+        SledPolicy::InService {
+            provision_policy: SledProvisionPolicy::Provisionable,
+        } => DbSledPolicy::InService,
+        SledPolicy::InService {
+            provision_policy: SledProvisionPolicy::NonProvisionable,
+        } => DbSledPolicy::NoProvision,
+        SledPolicy::Expunged => DbSledPolicy::Expunged,
+    }
+}
+
+impl From<DbSledPolicy> for SledPolicy {
+    fn from(policy: DbSledPolicy) -> Self {
+        match policy {
+            DbSledPolicy::InService => SledPolicy::InService {
+                provision_policy: SledProvisionPolicy::Provisionable,
+            },
+            DbSledPolicy::NoProvision => SledPolicy::InService {
+                provision_policy: SledProvisionPolicy::NonProvisionable,
+            },
+            DbSledPolicy::Expunged => SledPolicy::Expunged,
+        }
+    }
+}
diff --git a/nexus/db-model/src/sled_provision_state.rs b/nexus/db-model/src/sled_provision_state.rs
deleted file mode 100644
index ada842a32f..0000000000
--- a/nexus/db-model/src/sled_provision_state.rs
+++ /dev/null
@@ -1,53 +0,0 @@
-// This Source Code Form is subject to the terms of the Mozilla Public
-// License, v. 2.0. If a copy of the MPL was not distributed with this
-// file, You can obtain one at https://mozilla.org/MPL/2.0/.
-
-use super::impl_enum_type;
-use nexus_types::external_api::views;
-use serde::{Deserialize, Serialize};
-use thiserror::Error;
-
-impl_enum_type!(
-    #[derive(Clone, SqlType, Debug, QueryId)]
-    #[diesel(postgres_type(name = "sled_provision_state", schema = "public"))]
-    pub struct SledProvisionStateEnum;
-
-    #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)]
-    #[diesel(sql_type = SledProvisionStateEnum)]
-    pub enum SledProvisionState;
-
-    // Enum values
-    Provisionable => b"provisionable"
-    NonProvisionable => b"non_provisionable"
-);
-
-impl From<SledProvisionState> for views::SledProvisionState {
-    fn from(state: SledProvisionState) -> Self {
-        match state {
-            SledProvisionState::Provisionable => {
-                views::SledProvisionState::Provisionable
-            }
-            SledProvisionState::NonProvisionable => {
-                views::SledProvisionState::NonProvisionable
-            }
-        }
-    }
-}
-
-impl From<views::SledProvisionState> for SledProvisionState {
-    fn from(state: views::SledProvisionState) -> Self {
-        match state {
-            views::SledProvisionState::Provisionable => {
-                SledProvisionState::Provisionable
-            }
-            views::SledProvisionState::NonProvisionable => {
-                SledProvisionState::NonProvisionable
-            }
-        }
-    }
-}
-
-/// An unknown [`views::SledProvisionState`] was encountered.
-#[derive(Clone, Debug, Error)]
-#[error("Unknown SledProvisionState")]
-pub struct UnknownSledProvisionState;
diff --git a/nexus/db-model/src/sled_state.rs b/nexus/db-model/src/sled_state.rs
new file mode 100644
index 0000000000..31029f3f4f
--- /dev/null
+++ b/nexus/db-model/src/sled_state.rs
@@ -0,0 +1,59 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Database representation of a sled's state as understood by Nexus.
+//!
+//! This is related to, but different from `SledState`: a sled's **policy** is
+//! its disposition as specified by the operator, while its **state** refers to
+//! what's currently on it, as determined by Nexus.
+//!
+//! For example, a sled might be in the `Active` state, but have a policy of
+//! `Expunged` -- this would mean that Nexus knows about resources currently
+//! provisioned on the sled, but the operator has said that it should be marked
+//! as gone.
+
+use super::impl_enum_type;
+use nexus_types::external_api::views;
+use serde::{Deserialize, Serialize};
+use std::fmt;
+use strum::EnumIter;
+
+impl_enum_type!(
+    #[derive(Clone, SqlType, Debug, QueryId)]
+    #[diesel(postgres_type(name = "sled_state", schema = "public"))]
+    pub struct SledStateEnum;
+
+    #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq, Eq, EnumIter)]
+    #[diesel(sql_type = SledStateEnum)]
+    pub enum SledState;
+
+    // Enum values
+    Active => b"active"
+    Decommissioned => b"decommissioned"
+);
+
+impl fmt::Display for SledState {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        // Forward to the canonical implementation in nexus-types.
+        views::SledState::from(*self).fmt(f)
+    }
+}
+
+impl From<SledState> for views::SledState {
+    fn from(state: SledState) -> Self {
+        match state {
+            SledState::Active => views::SledState::Active,
+            SledState::Decommissioned => views::SledState::Decommissioned,
+        }
+    }
+}
+
+impl From<views::SledState> for SledState {
+    fn from(state: views::SledState) -> Self {
+        match state {
+            views::SledState::Active => SledState::Active,
+            views::SledState::Decommissioned => SledState::Decommissioned,
+        }
+    }
+}
diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml
index 9c9a30799e..539a913476 100644
--- a/nexus/db-queries/Cargo.toml
+++ b/nexus/db-queries/Cargo.toml
@@ -43,6 +43,7 @@ sled-agent-client.workspace = true
 slog.workspace = true
 static_assertions.workspace = true
 steno.workspace = true
+strum.workspace = true
 swrite.workspace = true
 thiserror.workspace = true
 tokio = { workspace = true, features = ["full"] }
@@ -76,10 +77,10 @@ omicron-test-utils.workspace = true
 openapiv3.workspace = true
 pem.workspace = true
 petgraph.workspace = true
+predicates.workspace = true
 pretty_assertions.workspace = true
 rcgen.workspace = true
 regex.workspace = true
 rustls.workspace = true
-strum.workspace = true
 subprocess.workspace = true
 term.workspace = true
diff --git a/nexus/db-queries/src/authz/context.rs b/nexus/db-queries/src/authz/context.rs
index 3510da2735..0d6f2a73ac 100644
--- a/nexus/db-queries/src/authz/context.rs
+++ b/nexus/db-queries/src/authz/context.rs
@@ -217,7 +217,8 @@ mod test {
         let logctx = dev::test_setup_log("test_unregistered_resource");
         let mut db = test_setup_database(&logctx.log).await;
         let (opctx, datastore) =
-            crate::db::datastore::datastore_test(&logctx, &db).await;
+            crate::db::datastore::test_utils::datastore_test(&logctx, &db)
+                .await;
 
         // Define a resource that we "forget" to register with Oso.
         use super::AuthorizedResource;
diff --git a/nexus/db-queries/src/authz/policy_test/mod.rs b/nexus/db-queries/src/authz/policy_test/mod.rs
index 00a9904499..b6961bcc30 100644
--- a/nexus/db-queries/src/authz/policy_test/mod.rs
+++ b/nexus/db-queries/src/authz/policy_test/mod.rs
@@ -62,7 +62,8 @@ use uuid::Uuid;
 async fn test_iam_roles_behavior() {
     let logctx = dev::test_setup_log("test_iam_roles");
     let mut db = test_setup_database(&logctx.log).await;
-    let (opctx, datastore) = db::datastore::datastore_test(&logctx, &db).await;
+    let (opctx, datastore) =
+        db::datastore::test_utils::datastore_test(&logctx, &db).await;
 
     // Before we can create the resources, users, and role assignments that we
     // need, we must grant the "test-privileged" user privileges to fetch and
@@ -328,7 +329,8 @@ async fn test_conferred_roles() {
     // To start, this test looks a lot like the test above.
     let logctx = dev::test_setup_log("test_conferred_roles");
     let mut db = test_setup_database(&logctx.log).await;
-    let (opctx, datastore) = db::datastore::datastore_test(&logctx, &db).await;
+    let (opctx, datastore) =
+        db::datastore::test_utils::datastore_test(&logctx, &db).await;
 
     // Before we can create the resources, users, and role assignments that we
     // need, we must grant the "test-privileged" user privileges to fetch and
diff --git a/nexus/db-queries/src/context.rs b/nexus/db-queries/src/context.rs
index aea4a60e66..dfd1fe4322 100644
--- a/nexus/db-queries/src/context.rs
+++ b/nexus/db-queries/src/context.rs
@@ -357,7 +357,8 @@ mod test {
         let logctx = dev::test_setup_log("test_background_context");
         let mut db = test_setup_database(&logctx.log).await;
         let (_, datastore) =
-            crate::db::datastore::datastore_test(&logctx, &db).await;
+            crate::db::datastore::test_utils::datastore_test(&logctx, &db)
+                .await;
         let opctx = OpContext::for_background(
             logctx.log.new(o!()),
             Arc::new(authz::Authz::new(&logctx.log)),
@@ -389,7 +390,8 @@ mod test {
         let logctx = dev::test_setup_log("test_background_context");
         let mut db = test_setup_database(&logctx.log).await;
         let (_, datastore) =
-            crate::db::datastore::datastore_test(&logctx, &db).await;
+            crate::db::datastore::test_utils::datastore_test(&logctx, &db)
+                .await;
         let opctx = OpContext::for_tests(logctx.log.new(o!()), datastore);
 
         // Like in test_background_context(), this is essentially a test of the
@@ -410,7 +412,8 @@ mod test {
         let logctx = dev::test_setup_log("test_child_context");
         let mut db = test_setup_database(&logctx.log).await;
         let (_, datastore) =
-            crate::db::datastore::datastore_test(&logctx, &db).await;
+            crate::db::datastore::test_utils::datastore_test(&logctx, &db)
+                .await;
         let opctx = OpContext::for_background(
             logctx.log.new(o!()),
             Arc::new(authz::Authz::new(&logctx.log)),
diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs
index d9df143022..00a75a21da 100644
--- a/nexus/db-queries/src/db/datastore/deployment.rs
+++ b/nexus/db-queries/src/db/datastore/deployment.rs
@@ -1054,14 +1054,15 @@ impl RunQueryDsl<DbConnection> for InsertTargetQuery {}
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::db::datastore::datastore_test;
+    use crate::db::datastore::test_utils::datastore_test;
     use nexus_deployment::blueprint_builder::BlueprintBuilder;
     use nexus_deployment::blueprint_builder::Ensure;
     use nexus_inventory::now_db_precision;
     use nexus_test_utils::db::test_setup_database;
     use nexus_types::deployment::Policy;
     use nexus_types::deployment::SledResources;
-    use nexus_types::external_api::views::SledProvisionState;
+    use nexus_types::external_api::views::SledPolicy;
+    use nexus_types::external_api::views::SledState;
     use nexus_types::inventory::Collection;
     use omicron_common::address::Ipv6Subnet;
     use omicron_common::api::external::Generation;
@@ -1126,7 +1127,8 @@ mod tests {
             .collect();
         let ip = ip.unwrap_or_else(|| thread_rng().gen::<u128>().into());
         SledResources {
-            provision_state: SledProvisionState::Provisionable,
+            policy: SledPolicy::provisionable(),
+            state: SledState::Active,
             zpools,
             subnet: Ipv6Subnet::new(ip),
         }
diff --git a/nexus/db-queries/src/db/datastore/disk.rs b/nexus/db-queries/src/db/datastore/disk.rs
index 390376e627..2916573322 100644
--- a/nexus/db-queries/src/db/datastore/disk.rs
+++ b/nexus/db-queries/src/db/datastore/disk.rs
@@ -817,7 +817,7 @@ impl DataStore {
 mod tests {
     use super::*;
 
-    use crate::db::datastore::datastore_test;
+    use crate::db::datastore::test_utils::datastore_test;
     use nexus_test_utils::db::test_setup_database;
     use nexus_types::external_api::params;
     use omicron_common::api::external;
diff --git a/nexus/db-queries/src/db/datastore/dns.rs b/nexus/db-queries/src/db/datastore/dns.rs
index 180764d38c..b12df1875f 100644
--- a/nexus/db-queries/src/db/datastore/dns.rs
+++ b/nexus/db-queries/src/db/datastore/dns.rs
@@ -688,7 +688,7 @@ impl DnsVersionUpdateBuilder {
 
 #[cfg(test)]
 mod test {
-    use crate::db::datastore::datastore_test;
+    use crate::db::datastore::test_utils::datastore_test;
     use crate::db::datastore::DnsVersionUpdateBuilder;
     use crate::db::DataStore;
     use crate::db::TransactionError;
diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs
index 6b737f21ac..3f2f4bd127 100644
--- a/nexus/db-queries/src/db/datastore/inventory.rs
+++ b/nexus/db-queries/src/db/datastore/inventory.rs
@@ -1914,8 +1914,8 @@ impl DataStoreInventoryTest for DataStore {
 
 #[cfg(test)]
 mod test {
-    use crate::db::datastore::datastore_test;
     use crate::db::datastore::inventory::DataStoreInventoryTest;
+    use crate::db::datastore::test_utils::datastore_test;
     use crate::db::datastore::DataStoreConnection;
     use crate::db::schema;
     use anyhow::Context;
diff --git a/nexus/db-queries/src/db/datastore/ip_pool.rs b/nexus/db-queries/src/db/datastore/ip_pool.rs
index 4634fda9ee..4a37efd612 100644
--- a/nexus/db-queries/src/db/datastore/ip_pool.rs
+++ b/nexus/db-queries/src/db/datastore/ip_pool.rs
@@ -816,7 +816,7 @@ mod test {
     use std::num::NonZeroU32;
 
     use crate::authz;
-    use crate::db::datastore::datastore_test;
+    use crate::db::datastore::test_utils::datastore_test;
     use crate::db::model::{IpPool, IpPoolResource, IpPoolResourceType};
     use assert_matches::assert_matches;
     use nexus_test_utils::db::test_setup_database;
diff --git a/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs b/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs
index 27a6bad32f..670ca08960 100644
--- a/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs
+++ b/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs
@@ -379,7 +379,7 @@ fn ipv4_nat_next_version() -> diesel::expression::SqlLiteral<BigInt> {
 mod test {
     use std::{net::Ipv4Addr, str::FromStr};
 
-    use crate::db::datastore::datastore_test;
+    use crate::db::datastore::test_utils::datastore_test;
     use chrono::Utc;
     use nexus_db_model::{Ipv4NatEntry, Ipv4NatValues, MacAddr, Vni};
     use nexus_test_utils::db::test_setup_database;
diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs
index 5f05aa1760..b5eff6cb85 100644
--- a/nexus/db-queries/src/db/datastore/mod.rs
+++ b/nexus/db-queries/src/db/datastore/mod.rs
@@ -44,6 +44,7 @@ use omicron_common::backoff::{
 use omicron_common::nexus_config::SchemaConfig;
 use slog::Logger;
 use std::net::Ipv6Addr;
+use std::num::NonZeroU32;
 use std::sync::Arc;
 use uuid::Uuid;
 
@@ -87,6 +88,8 @@ mod ssh_key;
 mod switch;
 mod switch_interface;
 mod switch_port;
+#[cfg(test)]
+pub(crate) mod test_utils;
 mod update;
 mod utilization;
 mod virtual_provisioning_collection;
@@ -105,7 +108,7 @@ pub use instance::InstanceAndActiveVmm;
 pub use inventory::DataStoreInventoryTest;
 pub use rack::RackInit;
 pub use silo::Discoverability;
-use std::num::NonZeroU32;
+pub use sled::SledUpsertOutput;
 pub use switch_port::SwitchPortSettingsCombinedResult;
 pub use virtual_provisioning_collection::StorageType;
 pub use volume::read_only_resources_associated_with_volume;
@@ -352,52 +355,16 @@ pub enum UpdatePrecondition<T> {
     Value(T),
 }
 
-/// Constructs a DataStore for use in test suites that has preloaded the
-/// built-in users, roles, and role assignments that are needed for basic
-/// operation
-#[cfg(test)]
-pub async fn datastore_test(
-    logctx: &dropshot::test_util::LogContext,
-    db: &omicron_test_utils::dev::db::CockroachInstance,
-) -> (OpContext, Arc<DataStore>) {
-    use crate::authn;
-
-    let cfg = db::Config { url: db.pg_config().clone() };
-    let pool = Arc::new(db::Pool::new(&logctx.log, &cfg));
-    let datastore =
-        Arc::new(DataStore::new(&logctx.log, pool, None).await.unwrap());
-
-    // Create an OpContext with the credentials of "db-init" just for the
-    // purpose of loading the built-in users, roles, and assignments.
-    let opctx = OpContext::for_background(
-        logctx.log.new(o!()),
-        Arc::new(authz::Authz::new(&logctx.log)),
-        authn::Context::internal_db_init(),
-        Arc::clone(&datastore),
-    );
-
-    // TODO: Can we just call "Populate" instead of doing this?
-    let rack_id = Uuid::parse_str(nexus_test_utils::RACK_UUID).unwrap();
-    datastore.load_builtin_users(&opctx).await.unwrap();
-    datastore.load_builtin_roles(&opctx).await.unwrap();
-    datastore.load_builtin_role_asgns(&opctx).await.unwrap();
-    datastore.load_builtin_silos(&opctx).await.unwrap();
-    datastore.load_builtin_projects(&opctx).await.unwrap();
-    datastore.load_builtin_vpcs(&opctx).await.unwrap();
-    datastore.load_silo_users(&opctx).await.unwrap();
-    datastore.load_silo_user_role_assignments(&opctx).await.unwrap();
-    datastore
-        .load_builtin_fleet_virtual_provisioning_collection(&opctx)
-        .await
-        .unwrap();
-    datastore.load_builtin_rack_data(&opctx, rack_id).await.unwrap();
-
-    // Create an OpContext with the credentials of "test-privileged" for general
-    // testing.
-    let opctx =
-        OpContext::for_tests(logctx.log.new(o!()), Arc::clone(&datastore));
-
-    (opctx, datastore)
+/// Whether state transitions should be validated. "No" is only accessible in
+/// test-only code.
+///
+/// Intended only for testing around illegal states.
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+#[must_use]
+enum ValidateTransition {
+    Yes,
+    #[cfg(test)]
+    No,
 }
 
 #[cfg(test)]
@@ -406,6 +373,10 @@ mod test {
     use crate::authn;
     use crate::authn::SiloAuthnPolicy;
     use crate::authz;
+    use crate::db::datastore::test_utils::datastore_test;
+    use crate::db::datastore::test_utils::{
+        IneligibleSledKind, IneligibleSleds,
+    };
     use crate::db::explain::ExplainableAsync;
     use crate::db::fixed_data::silo::DEFAULT_SILO;
     use crate::db::fixed_data::silo::SILO_ID;
@@ -414,8 +385,8 @@ mod test {
     use crate::db::model::{
         BlockSize, ConsoleSession, Dataset, DatasetKind, ExternalIp,
         PhysicalDisk, PhysicalDiskKind, Project, Rack, Region, Service,
-        ServiceKind, SiloUser, SledBaseboard, SledProvisionState,
-        SledSystemHardware, SledUpdate, SshKey, VpcSubnet, Zpool,
+        ServiceKind, SiloUser, SledBaseboard, SledSystemHardware, SledUpdate,
+        SshKey, VpcSubnet, Zpool,
     };
     use crate::db::queries::vpc_subnet::FilterConflictingVpcSubnetRangesQuery;
     use chrono::{Duration, Utc};
@@ -435,6 +406,7 @@ mod test {
     use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddrV6};
     use std::num::NonZeroU32;
     use std::sync::Arc;
+    use strum::EnumCount;
     use uuid::Uuid;
 
     // Creates a "fake" Sled Baseboard.
@@ -648,39 +620,10 @@ mod test {
             sled_system_hardware_for_test(),
             rack_id,
         );
-        datastore.sled_upsert(sled_update).await.unwrap();
+        datastore.sled_upsert(sled_update).await.unwrap().unwrap();
         sled_id
     }
 
-    // Marks a sled as non-provisionable.
-    async fn mark_sled_non_provisionable(
-        datastore: &DataStore,
-        opctx: &OpContext,
-        sled_id: Uuid,
-    ) {
-        let (authz_sled, sled) = LookupPath::new(opctx, datastore)
-            .sled_id(sled_id)
-            .fetch_for(authz::Action::Modify)
-            .await
-            .unwrap();
-        println!("sled: {:?}", sled);
-        let old_state = datastore
-            .sled_set_provision_state(
-                &opctx,
-                &authz_sled,
-                SledProvisionState::NonProvisionable,
-            )
-            .await
-            .unwrap_or_else(|error| {
-                panic!(
-                    "error marking sled {sled_id} as non-provisionable: {error}"
-                )
-            });
-        // The old state should always be provisionable since that's where we
-        // start.
-        assert_eq!(old_state, SledProvisionState::Provisionable);
-    }
-
     fn test_zpool_size() -> ByteCount {
         ByteCount::from_gibibytes_u32(100)
     }
@@ -742,95 +685,184 @@ mod test {
         }
     }
 
-    struct TestDataset {
-        sled_id: Uuid,
-        dataset_id: Uuid,
+    #[derive(Debug)]
+    struct TestDatasets {
+        // eligible and ineligible aren't currently used, but are probably handy
+        // for the future.
+        #[allow(dead_code)]
+        eligible: SledToDatasetMap,
+        #[allow(dead_code)]
+        ineligible: SledToDatasetMap,
+
+        // A map from eligible dataset IDs to their corresponding sled IDs.
+        eligible_dataset_ids: HashMap<Uuid, Uuid>,
+        ineligible_dataset_ids: HashMap<Uuid, IneligibleSledKind>,
     }
 
-    async fn create_test_datasets_for_region_allocation(
-        opctx: &OpContext,
-        datastore: Arc<DataStore>,
-        number_of_sleds: usize,
-    ) -> Vec<TestDataset> {
-        // Create sleds...
-        let sled_ids: Vec<Uuid> = stream::iter(0..number_of_sleds)
-            .then(|_| create_test_sled(&datastore))
-            .collect()
+    // Map of sled IDs to dataset IDs.
+    type SledToDatasetMap = HashMap<Uuid, Vec<Uuid>>;
+
+    impl TestDatasets {
+        async fn create(
+            opctx: &OpContext,
+            datastore: Arc<DataStore>,
+            num_eligible_sleds: usize,
+        ) -> Self {
+            let eligible =
+                Self::create_impl(opctx, datastore.clone(), num_eligible_sleds)
+                    .await;
+
+            let eligible_dataset_ids = eligible
+                .iter()
+                .flat_map(|(sled_id, dataset_ids)| {
+                    dataset_ids
+                        .iter()
+                        .map(move |dataset_id| (*dataset_id, *sled_id))
+                })
+                .collect();
+
+            let ineligible = Self::create_impl(
+                opctx,
+                datastore.clone(),
+                IneligibleSledKind::COUNT,
+            )
             .await;
 
-        struct PhysicalDisk {
-            sled_id: Uuid,
-            disk_id: Uuid,
-        }
+            let mut ineligible_sled_ids = ineligible.keys();
 
-        // create 9 disks on each sled
-        let physical_disks: Vec<PhysicalDisk> = stream::iter(sled_ids)
-            .map(|sled_id| {
-                let sled_id_iter: Vec<Uuid> = (0..9).map(|_| sled_id).collect();
-                stream::iter(sled_id_iter).then(|sled_id| {
-                    let disk_id_future = create_test_physical_disk(
-                        &datastore,
-                        opctx,
-                        sled_id,
-                        PhysicalDiskKind::U2,
-                    );
-                    async move {
-                        let disk_id = disk_id_future.await;
-                        PhysicalDisk { sled_id, disk_id }
-                    }
-                })
-            })
-            .flatten()
-            .collect()
-            .await;
+            // Set up the ineligible sleds. (We're guaranteed that
+            // IneligibleSledKind::COUNT is the same as the number of next()
+            // calls below.)
+            let ineligible_sleds = IneligibleSleds {
+                non_provisionable: *ineligible_sled_ids.next().unwrap(),
+                expunged: *ineligible_sled_ids.next().unwrap(),
+                decommissioned: *ineligible_sled_ids.next().unwrap(),
+                illegal_decommissioned: *ineligible_sled_ids.next().unwrap(),
+            };
 
-        #[derive(Copy, Clone)]
-        struct Zpool {
-            sled_id: Uuid,
-            pool_id: Uuid,
-        }
+            eprintln!("Setting up ineligible sleds: {:?}", ineligible_sleds);
 
-        // 1 pool per disk
-        let zpools: Vec<Zpool> = stream::iter(physical_disks)
-            .then(|disk| {
-                let pool_id_future =
-                    create_test_zpool(&datastore, disk.sled_id, disk.disk_id);
-                async move {
-                    let pool_id = pool_id_future.await;
-                    Zpool { sled_id: disk.sled_id, pool_id }
+            ineligible_sleds
+                .setup(opctx, &datastore)
+                .await
+                .expect("error setting up ineligible sleds");
+
+            // Build a map of dataset IDs to their ineligible kind.
+            let mut ineligible_dataset_ids = HashMap::new();
+            for (kind, sled_id) in ineligible_sleds.iter() {
+                for dataset_id in ineligible.get(&sled_id).unwrap() {
+                    ineligible_dataset_ids.insert(*dataset_id, kind);
                 }
-            })
-            .collect()
-            .await;
+            }
 
-        let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0);
+            Self {
+                eligible,
+                eligible_dataset_ids,
+                ineligible,
+                ineligible_dataset_ids,
+            }
+        }
 
-        let datasets: Vec<TestDataset> = stream::iter(zpools)
-            .map(|zpool| {
-                // 3 datasets per zpool, to test that pools are distinct
-                let zpool_iter: Vec<Zpool> = (0..3).map(|_| zpool).collect();
-                stream::iter(zpool_iter).then(|zpool| {
-                    let id = Uuid::new_v4();
-                    let dataset = Dataset::new(
-                        id,
-                        zpool.pool_id,
-                        bogus_addr,
-                        DatasetKind::Crucible,
-                    );
+        // Returns a map of sled ID to dataset IDs.
+        async fn create_impl(
+            opctx: &OpContext,
+            datastore: Arc<DataStore>,
+            number_of_sleds: usize,
+        ) -> SledToDatasetMap {
+            // Create sleds...
+            let sled_ids: Vec<Uuid> = stream::iter(0..number_of_sleds)
+                .then(|_| create_test_sled(&datastore))
+                .collect()
+                .await;
 
-                    let datastore = datastore.clone();
-                    async move {
-                        datastore.dataset_upsert(dataset).await.unwrap();
+            struct PhysicalDisk {
+                sled_id: Uuid,
+                disk_id: Uuid,
+            }
 
-                        TestDataset { sled_id: zpool.sled_id, dataset_id: id }
+            // create 9 disks on each sled
+            let physical_disks: Vec<PhysicalDisk> = stream::iter(sled_ids)
+                .map(|sled_id| {
+                    let sled_id_iter: Vec<Uuid> =
+                        (0..9).map(|_| sled_id).collect();
+                    stream::iter(sled_id_iter).then(|sled_id| {
+                        let disk_id_future = create_test_physical_disk(
+                            &datastore,
+                            opctx,
+                            sled_id,
+                            PhysicalDiskKind::U2,
+                        );
+                        async move {
+                            let disk_id = disk_id_future.await;
+                            PhysicalDisk { sled_id, disk_id }
+                        }
+                    })
+                })
+                .flatten()
+                .collect()
+                .await;
+
+            #[derive(Copy, Clone)]
+            struct Zpool {
+                sled_id: Uuid,
+                pool_id: Uuid,
+            }
+
+            // 1 pool per disk
+            let zpools: Vec<Zpool> = stream::iter(physical_disks)
+                .then(|disk| {
+                    let pool_id_future = create_test_zpool(
+                        &datastore,
+                        disk.sled_id,
+                        disk.disk_id,
+                    );
+                    async move {
+                        let pool_id = pool_id_future.await;
+                        Zpool { sled_id: disk.sled_id, pool_id }
                     }
                 })
-            })
-            .flatten()
-            .collect()
-            .await;
+                .collect()
+                .await;
+
+            let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0);
+
+            let datasets = stream::iter(zpools)
+                .map(|zpool| {
+                    // 3 datasets per zpool, to test that pools are distinct
+                    let zpool_iter: Vec<Zpool> =
+                        (0..3).map(|_| zpool).collect();
+                    stream::iter(zpool_iter).then(|zpool| {
+                        let dataset_id = Uuid::new_v4();
+                        let dataset = Dataset::new(
+                            dataset_id,
+                            zpool.pool_id,
+                            bogus_addr,
+                            DatasetKind::Crucible,
+                        );
+
+                        let datastore = datastore.clone();
+                        async move {
+                            datastore.dataset_upsert(dataset).await.unwrap();
+
+                            (zpool.sled_id, dataset_id)
+                        }
+                    })
+                })
+                .flatten()
+                .fold(
+                    SledToDatasetMap::new(),
+                    |mut map, (sled_id, dataset_id)| {
+                        // Build a map of sled ID to dataset IDs.
+                        map.entry(sled_id)
+                            .or_insert_with(Vec::new)
+                            .push(dataset_id);
+                        async move { map }
+                    },
+                )
+                .await;
 
-        datasets
+            datasets
+        }
     }
 
     #[tokio::test]
@@ -841,21 +873,12 @@ mod test {
         let logctx = dev::test_setup_log("test_region_allocation_strat_random");
         let mut db = test_setup_database(&logctx.log).await;
         let (opctx, datastore) = datastore_test(&logctx, &db).await;
-        let test_datasets = create_test_datasets_for_region_allocation(
+        let test_datasets = TestDatasets::create(
             &opctx,
             datastore.clone(),
-            // Even though we're going to mark one sled as non-provisionable to
-            // test that logic, we aren't forcing the datasets to be on
-            // distinct sleds, so REGION_REDUNDANCY_THRESHOLD is enough.
-            REGION_REDUNDANCY_THRESHOLD,
-        )
-        .await;
-
-        let non_provisionable_dataset_id = test_datasets[0].dataset_id;
-        mark_sled_non_provisionable(
-            &datastore,
-            &opctx,
-            test_datasets[0].sled_id,
+            // We aren't forcing the datasets to be on distinct sleds, so we
+            // just need one eligible sled.
+            1,
         )
         .await;
 
@@ -891,8 +914,16 @@ mod test {
                 // Must be 3 unique datasets
                 assert!(disk_datasets.insert(dataset.id()));
 
-                // Dataset must not be non-provisionable.
-                assert_ne!(dataset.id(), non_provisionable_dataset_id);
+                // Dataset must not be eligible for provisioning.
+                if let Some(kind) =
+                    test_datasets.ineligible_dataset_ids.get(&dataset.id())
+                {
+                    panic!(
+                        "Dataset {} was ineligible for provisioning: {:?}",
+                        dataset.id(),
+                        kind
+                    );
+                }
 
                 // Must be 3 unique zpools
                 assert!(disk_zpools.insert(dataset.pool_id));
@@ -923,31 +954,16 @@ mod test {
         let (opctx, datastore) = datastore_test(&logctx, &db).await;
 
         // Create a rack with enough sleds for a successful allocation when we
-        // require 3 distinct provisionable sleds.
-        let test_datasets = create_test_datasets_for_region_allocation(
+        // require 3 distinct eligible sleds.
+        let test_datasets = TestDatasets::create(
             &opctx,
             datastore.clone(),
-            // We're going to mark one sled as non-provisionable to test that
-            // logic, and we *are* forcing the datasets to be on distinct
-            // sleds: hence threshold + 1.
-            REGION_REDUNDANCY_THRESHOLD + 1,
-        )
-        .await;
-
-        let non_provisionable_dataset_id = test_datasets[0].dataset_id;
-        mark_sled_non_provisionable(
-            &datastore,
-            &opctx,
-            test_datasets[0].sled_id,
+            // We're forcing the datasets to be on distinct sleds, hence the
+            // full REGION_REDUNDANCY_THRESHOLD.
+            REGION_REDUNDANCY_THRESHOLD,
         )
         .await;
 
-        // We need to check that our datasets end up on 3 distinct sleds, but the query doesn't return the sled ID, so we need to reverse map from dataset ID to sled ID
-        let sled_id_map: HashMap<Uuid, Uuid> = test_datasets
-            .into_iter()
-            .map(|test_dataset| (test_dataset.dataset_id, test_dataset.sled_id))
-            .collect();
-
         // Allocate regions from the datasets for this disk. Do it a few times
         // for good measure.
         for alloc_seed in 0..10 {
@@ -980,14 +996,25 @@ mod test {
                 // Must be 3 unique datasets
                 assert!(disk_datasets.insert(dataset.id()));
 
-                // Dataset must not be non-provisionable.
-                assert_ne!(dataset.id(), non_provisionable_dataset_id);
+                // Dataset must not be eligible for provisioning.
+                if let Some(kind) =
+                    test_datasets.ineligible_dataset_ids.get(&dataset.id())
+                {
+                    panic!(
+                        "Dataset {} was ineligible for provisioning: {:?}",
+                        dataset.id(),
+                        kind
+                    );
+                }
 
                 // Must be 3 unique zpools
                 assert!(disk_zpools.insert(dataset.pool_id));
 
                 // Must be 3 unique sleds
-                let sled_id = sled_id_map.get(&dataset.id()).unwrap();
+                let sled_id = test_datasets
+                    .eligible_dataset_ids
+                    .get(&dataset.id())
+                    .unwrap();
                 assert!(disk_sleds.insert(*sled_id));
 
                 assert_eq!(volume_id, region.volume_id());
@@ -1016,21 +1043,12 @@ mod test {
 
         // Create a rack without enough sleds for a successful allocation when
         // we require 3 distinct provisionable sleds.
-        let test_datasets = create_test_datasets_for_region_allocation(
+        TestDatasets::create(
             &opctx,
             datastore.clone(),
-            // Here, we need to have REGION_REDUNDANCY_THRESHOLD - 1
-            // provisionable sleds to test this failure condition. We're going
-            // to mark one sled as non-provisionable to test that logic, so we
-            // need to add 1 to that number.
-            REGION_REDUNDANCY_THRESHOLD,
-        )
-        .await;
-
-        mark_sled_non_provisionable(
-            &datastore,
-            &opctx,
-            test_datasets[0].sled_id,
+            // Here, we need to have REGION_REDUNDANCY_THRESHOLD - 1 eligible
+            // sleds to test this failure condition.
+            REGION_REDUNDANCY_THRESHOLD - 1,
         )
         .await;
 
@@ -1075,7 +1093,7 @@ mod test {
             dev::test_setup_log("test_region_allocation_is_idempotent");
         let mut db = test_setup_database(&logctx.log).await;
         let (opctx, datastore) = datastore_test(&logctx, &db).await;
-        create_test_datasets_for_region_allocation(
+        TestDatasets::create(
             &opctx,
             datastore.clone(),
             REGION_REDUNDANCY_THRESHOLD,
@@ -1220,7 +1238,7 @@ mod test {
         let mut db = test_setup_database(&logctx.log).await;
         let (opctx, datastore) = datastore_test(&logctx, &db).await;
 
-        create_test_datasets_for_region_allocation(
+        TestDatasets::create(
             &opctx,
             datastore.clone(),
             REGION_REDUNDANCY_THRESHOLD,
@@ -1321,7 +1339,7 @@ mod test {
             sled_system_hardware_for_test(),
             rack_id,
         );
-        datastore.sled_upsert(sled1).await.unwrap();
+        datastore.sled_upsert(sled1).await.unwrap().unwrap();
 
         let addr2 = "[fd00:1df::1]:12345".parse().unwrap();
         let sled2_id = "66285c18-0c79-43e0-e54f-95271f271314".parse().unwrap();
@@ -1332,7 +1350,7 @@ mod test {
             sled_system_hardware_for_test(),
             rack_id,
         );
-        datastore.sled_upsert(sled2).await.unwrap();
+        datastore.sled_upsert(sled2).await.unwrap().unwrap();
 
         let ip = datastore.next_ipv6_address(&opctx, sled1_id).await.unwrap();
         let expected_ip = Ipv6Addr::new(0xfd00, 0x1de, 0, 0, 0, 0, 1, 0);
diff --git a/nexus/db-queries/src/db/datastore/physical_disk.rs b/nexus/db-queries/src/db/datastore/physical_disk.rs
index ecb583ee29..d4e94745aa 100644
--- a/nexus/db-queries/src/db/datastore/physical_disk.rs
+++ b/nexus/db-queries/src/db/datastore/physical_disk.rs
@@ -137,10 +137,10 @@ impl DataStore {
 #[cfg(test)]
 mod test {
     use super::*;
-    use crate::db::datastore::datastore_test;
     use crate::db::datastore::test::{
         sled_baseboard_for_test, sled_system_hardware_for_test,
     };
+    use crate::db::datastore::test_utils::datastore_test;
     use crate::db::model::{PhysicalDiskKind, Sled, SledUpdate};
     use dropshot::PaginationOrder;
     use nexus_test_utils::db::test_setup_database;
@@ -163,6 +163,7 @@ mod test {
         db.sled_upsert(sled_update)
             .await
             .expect("Could not upsert sled during test prep")
+            .unwrap()
     }
 
     fn list_disk_params() -> DataPageParams<'static, Uuid> {
diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs
index 99ed71a073..46a3e0af2d 100644
--- a/nexus/db-queries/src/db/datastore/rack.rs
+++ b/nexus/db-queries/src/db/datastore/rack.rs
@@ -853,10 +853,10 @@ impl DataStore {
 #[cfg(test)]
 mod test {
     use super::*;
-    use crate::db::datastore::datastore_test;
     use crate::db::datastore::test::{
         sled_baseboard_for_test, sled_system_hardware_for_test,
     };
+    use crate::db::datastore::test_utils::datastore_test;
     use crate::db::datastore::Discoverability;
     use crate::db::lookup::LookupPath;
     use crate::db::model::ExternalIp;
@@ -1069,6 +1069,7 @@ mod test {
         db.sled_upsert(sled_update)
             .await
             .expect("Could not upsert sled during test prep")
+            .unwrap()
     }
 
     // Hacky macro helper to:
diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs
index eb50061272..8809f4d60d 100644
--- a/nexus/db-queries/src/db/datastore/sled.rs
+++ b/nexus/db-queries/src/db/datastore/sled.rs
@@ -9,18 +9,23 @@ use super::SQL_BATCH_SIZE;
 use crate::authz;
 use crate::context::OpContext;
 use crate::db;
+use crate::db::datastore::ValidateTransition;
 use crate::db::error::public_error_from_diesel;
 use crate::db::error::ErrorHandler;
+use crate::db::model::to_db_sled_policy;
 use crate::db::model::Sled;
 use crate::db::model::SledResource;
+use crate::db::model::SledState;
 use crate::db::model::SledUpdate;
 use crate::db::pagination::paginated;
 use crate::db::pagination::Paginator;
-use crate::db::update_and_check::UpdateAndCheck;
+use crate::db::update_and_check::{UpdateAndCheck, UpdateStatus};
 use crate::transaction_retry::OptionalError;
 use async_bb8_diesel::AsyncRunQueryDsl;
 use chrono::Utc;
 use diesel::prelude::*;
+use nexus_types::external_api::views::SledPolicy;
+use nexus_types::external_api::views::SledProvisionPolicy;
 use nexus_types::identity::Asset;
 use omicron_common::api::external;
 use omicron_common::api::external::CreateResult;
@@ -28,16 +33,29 @@ use omicron_common::api::external::DataPageParams;
 use omicron_common::api::external::DeleteResult;
 use omicron_common::api::external::ListResultVec;
 use omicron_common::api::external::ResourceType;
+use std::fmt;
+use strum::IntoEnumIterator;
+use thiserror::Error;
 use uuid::Uuid;
 
 impl DataStore {
     /// Stores a new sled in the database.
+    ///
+    /// Produces `SledUpsertOutput::Decommissioned` if the sled is
+    /// decommissioned. This is not an error, because `sled_upsert`'s only
+    /// caller (sled-agent) is not expected to receive this error.
     pub async fn sled_upsert(
         &self,
         sled_update: SledUpdate,
-    ) -> CreateResult<Sled> {
+    ) -> CreateResult<SledUpsertOutput> {
         use db::schema::sled::dsl;
-        diesel::insert_into(dsl::sled)
+        // required for conditional upsert
+        use diesel::query_dsl::methods::FilterDsl;
+
+        // TODO: figure out what to do with time_deleted. We want to replace it
+        // with a time_decommissioned, most probably.
+
+        let query = diesel::insert_into(dsl::sled)
             .values(sled_update.clone().into_insertable())
             .on_conflict(dsl::id)
             .do_update()
@@ -52,9 +70,13 @@ impl DataStore {
                 dsl::usable_physical_ram.eq(sled_update.usable_physical_ram),
                 dsl::reservoir_size.eq(sled_update.reservoir_size),
             ))
-            .returning(Sled::as_returning())
+            .filter(dsl::sled_state.ne(SledState::Decommissioned))
+            .returning(Sled::as_returning());
+
+        let sled: Option<Sled> = query
             .get_result_async(&*self.pool_connection_unauthorized().await?)
             .await
+            .optional()
             .map_err(|e| {
                 public_error_from_diesel(
                     e,
@@ -63,7 +85,19 @@ impl DataStore {
                         &sled_update.id().to_string(),
                     ),
                 )
-            })
+            })?;
+
+        // The only situation in which a sled is not returned is if the
+        // `.filter(dsl::sled_state.ne(SledState::Decommissioned))` is not
+        // satisfied.
+        //
+        // If we want to return a sled even if it's decommissioned here, we may
+        // have to do something more complex. See
+        // https://stackoverflow.com/q/34708509.
+        match sled {
+            Some(sled) => Ok(SledUpsertOutput::Updated(sled)),
+            None => Ok(SledUpsertOutput::Decommissioned),
+        }
     }
 
     pub async fn sled_list(
@@ -194,10 +228,11 @@ impl DataStore {
                                     .and(sled_has_space_in_reservoir),
                             )
                             .filter(sled_dsl::time_deleted.is_null())
-                            // Filter out sleds that are not provisionable.
-                            .filter(sled_dsl::provision_state.eq(
-                                db::model::SledProvisionState::Provisionable,
+                            // Ensure that the sled is in-service and active.
+                            .filter(sled_dsl::sled_policy.eq(
+                                to_db_sled_policy(SledPolicy::provisionable()),
                             ))
+                            .filter(sled_dsl::sled_state.eq(SledState::Active))
                             .select(sled_dsl::id)
                             .into_boxed();
 
@@ -269,15 +304,61 @@ impl DataStore {
         Ok(())
     }
 
-    /// Sets the provision state for this sled.
+    /// Sets the provision policy for this sled.
+    ///
+    /// Errors if the sled is not in service.
+    ///
+    /// Returns the previous policy.
+    pub async fn sled_set_provision_policy(
+        &self,
+        opctx: &OpContext,
+        authz_sled: &authz::Sled,
+        policy: SledProvisionPolicy,
+    ) -> Result<SledProvisionPolicy, external::Error> {
+        match self
+            .sled_set_policy_impl(
+                opctx,
+                authz_sled,
+                SledPolicy::InService { provision_policy: policy },
+                ValidateTransition::Yes,
+            )
+            .await
+        {
+            Ok(old_policy) => Ok(old_policy
+                .provision_policy()
+                .expect("only valid policy was in-service")),
+            Err(error) => Err(error.into_external_error()),
+        }
+    }
+
+    /// Marks a sled as expunged, as directed by the operator.
     ///
-    /// Returns the previous state.
-    pub async fn sled_set_provision_state(
+    /// This is an irreversible process! It should only be called after
+    /// sufficient warning to the operator.
+    ///
+    /// This is idempotent, and it returns the old policy of the sled.
+    pub async fn sled_set_policy_to_expunged(
         &self,
         opctx: &OpContext,
         authz_sled: &authz::Sled,
-        state: db::model::SledProvisionState,
-    ) -> Result<db::model::SledProvisionState, external::Error> {
+    ) -> Result<SledPolicy, external::Error> {
+        self.sled_set_policy_impl(
+            opctx,
+            authz_sled,
+            SledPolicy::Expunged,
+            ValidateTransition::Yes,
+        )
+        .await
+        .map_err(|error| error.into_external_error())
+    }
+
+    pub(super) async fn sled_set_policy_impl(
+        &self,
+        opctx: &OpContext,
+        authz_sled: &authz::Sled,
+        new_policy: SledPolicy,
+        check: ValidateTransition,
+    ) -> Result<SledPolicy, TransitionError> {
         use db::schema::sled::dsl;
 
         opctx.authorize(authz::Action::Modify, authz_sled).await?;
@@ -285,38 +366,350 @@ impl DataStore {
         let sled_id = authz_sled.id();
         let query = diesel::update(dsl::sled)
             .filter(dsl::time_deleted.is_null())
-            .filter(dsl::id.eq(sled_id))
-            .filter(dsl::provision_state.ne(state))
+            .filter(dsl::id.eq(sled_id));
+
+        let t = SledTransition::Policy(new_policy);
+        let valid_old_policies = t.valid_old_policies();
+        let valid_old_states = t.valid_old_states();
+
+        let query = match check {
+            ValidateTransition::Yes => query
+                .filter(dsl::sled_policy.eq_any(
+                    valid_old_policies.into_iter().map(to_db_sled_policy),
+                ))
+                .filter(
+                    dsl::sled_state.eq_any(valid_old_states.iter().copied()),
+                )
+                .into_boxed(),
+            #[cfg(test)]
+            ValidateTransition::No => query.into_boxed(),
+        };
+
+        let query = query
             .set((
-                dsl::provision_state.eq(state),
+                dsl::sled_policy.eq(to_db_sled_policy(new_policy)),
                 dsl::time_modified.eq(Utc::now()),
             ))
             .check_if_exists::<Sled>(sled_id);
+
         let result = query
             .execute_and_check(&*self.pool_connection_authorized(opctx).await?)
             .await
             .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?;
 
-        Ok(result.found.provision_state())
+        match (check, result.status) {
+            (ValidateTransition::Yes, UpdateStatus::Updated) => {
+                Ok(result.found.policy())
+            }
+            (ValidateTransition::Yes, UpdateStatus::NotUpdatedButExists) => {
+                // Two reasons this can happen:
+                // 1. An idempotent update: this is treated as a success.
+                // 2. Invalid state transition: a failure.
+                //
+                // To differentiate between the two, check that the new policy
+                // is the same as the old policy, and that the old state is
+                // valid.
+                if result.found.policy() == new_policy
+                    && valid_old_states.contains(&result.found.state())
+                {
+                    Ok(result.found.policy())
+                } else {
+                    Err(TransitionError::InvalidTransition {
+                        current: result.found,
+                        transition: SledTransition::Policy(new_policy),
+                    })
+                }
+            }
+            #[cfg(test)]
+            (ValidateTransition::No, _) => Ok(result.found.policy()),
+        }
+    }
+
+    /// Marks the state of the sled as decommissioned, as believed by Nexus.
+    ///
+    /// This is an irreversible process! It should only be called after all
+    /// resources previously on the sled have been migrated over.
+    ///
+    /// This is idempotent, and it returns the old state of the sled.
+    ///
+    /// # Errors
+    ///
+    /// This method returns an error if the sled policy is not a state that is
+    /// valid to decommission from (i.e. if, for the current sled policy,
+    /// [`SledPolicy::is_decommissionable`] returns `false`).
+    pub async fn sled_set_state_to_decommissioned(
+        &self,
+        opctx: &OpContext,
+        authz_sled: &authz::Sled,
+    ) -> Result<SledState, external::Error> {
+        self.sled_set_state_impl(
+            opctx,
+            authz_sled,
+            SledState::Decommissioned,
+            ValidateTransition::Yes,
+        )
+        .await
+        .map_err(|error| error.into_external_error())
+    }
+
+    pub(super) async fn sled_set_state_impl(
+        &self,
+        opctx: &OpContext,
+        authz_sled: &authz::Sled,
+        new_state: SledState,
+        check: ValidateTransition,
+    ) -> Result<SledState, TransitionError> {
+        use db::schema::sled::dsl;
+
+        opctx.authorize(authz::Action::Modify, authz_sled).await?;
+
+        let sled_id = authz_sled.id();
+        let query = diesel::update(dsl::sled)
+            .filter(dsl::time_deleted.is_null())
+            .filter(dsl::id.eq(sled_id));
+
+        let t = SledTransition::State(new_state);
+        let valid_old_policies = t.valid_old_policies();
+        let valid_old_states = t.valid_old_states();
+
+        let query = match check {
+            ValidateTransition::Yes => query
+                .filter(dsl::sled_policy.eq_any(
+                    valid_old_policies.iter().copied().map(to_db_sled_policy),
+                ))
+                .filter(dsl::sled_state.eq_any(valid_old_states))
+                .into_boxed(),
+            #[cfg(test)]
+            ValidateTransition::No => query.into_boxed(),
+        };
+
+        let query = query
+            .set((
+                dsl::sled_state.eq(new_state),
+                dsl::time_modified.eq(Utc::now()),
+            ))
+            .check_if_exists::<Sled>(sled_id);
+
+        let result = query
+            .execute_and_check(&*self.pool_connection_authorized(opctx).await?)
+            .await
+            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?;
+
+        match (check, result.status) {
+            (ValidateTransition::Yes, UpdateStatus::Updated) => {
+                Ok(result.found.state())
+            }
+            (ValidateTransition::Yes, UpdateStatus::NotUpdatedButExists) => {
+                // Two reasons this can happen:
+                // 1. An idempotent update: this is treated as a success.
+                // 2. Invalid state transition: a failure.
+                //
+                // To differentiate between the two, check that the new state
+                // is the same as the old state, and the found policy is valid.
+                if result.found.state() == new_state
+                    && valid_old_policies.contains(&result.found.policy())
+                {
+                    Ok(result.found.state())
+                } else {
+                    Err(TransitionError::InvalidTransition {
+                        current: result.found,
+                        transition: SledTransition::State(new_state),
+                    })
+                }
+            }
+            #[cfg(test)]
+            (ValidateTransition::No, _) => Ok(result.found.state()),
+        }
+    }
+}
+
+/// The result of [`DataStore::sled_upsert`].
+#[derive(Clone, Debug)]
+#[must_use]
+pub enum SledUpsertOutput {
+    /// The sled was updated.
+    Updated(Sled),
+    /// The sled was not updated because it is decommissioned.
+    Decommissioned,
+}
+
+impl SledUpsertOutput {
+    /// Returns the sled if it was updated, or panics if it was not.
+    pub fn unwrap(self) -> Sled {
+        match self {
+            SledUpsertOutput::Updated(sled) => sled,
+            SledUpsertOutput::Decommissioned => {
+                panic!("sled was decommissioned, not updated")
+            }
+        }
+    }
+}
+
+// ---
+// State transition validators
+// ---
+
+// The functions in this section return the old policies or states that are
+// valid for a new policy or state, except idempotent transitions.
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub(super) enum SledTransition {
+    Policy(SledPolicy),
+    State(SledState),
+}
+
+impl SledTransition {
+    /// Returns the list of valid old policies, other than the provided one
+    /// (which is always considered valid).
+    ///
+    /// For a more descriptive listing of valid transitions, see
+    /// [`test_sled_transitions`].
+    fn valid_old_policies(&self) -> Vec<SledPolicy> {
+        use SledPolicy::*;
+        use SledProvisionPolicy::*;
+        use SledState::*;
+
+        match self {
+            SledTransition::Policy(new_policy) => match new_policy {
+                InService { provision_policy: Provisionable } => {
+                    vec![InService { provision_policy: NonProvisionable }]
+                }
+                InService { provision_policy: NonProvisionable } => {
+                    vec![InService { provision_policy: Provisionable }]
+                }
+                Expunged => SledProvisionPolicy::iter()
+                    .map(|provision_policy| InService { provision_policy })
+                    .collect(),
+            },
+            SledTransition::State(state) => {
+                match state {
+                    Active => {
+                        // Any policy is valid for the active state.
+                        SledPolicy::iter().collect()
+                    }
+                    Decommissioned => {
+                        SledPolicy::all_decommissionable().to_vec()
+                    }
+                }
+            }
+        }
+    }
+
+    /// Returns the list of valid old states, other than the provided one
+    /// (which is always considered valid).
+    ///
+    /// For a more descriptive listing of valid transitions, see
+    /// [`test_sled_transitions`].
+    fn valid_old_states(&self) -> Vec<SledState> {
+        use SledState::*;
+
+        match self {
+            SledTransition::Policy(_) => {
+                // Policies can only be transitioned in the active state. (In
+                // the future, this will include other non-decommissioned
+                // states.)
+                vec![Active]
+            }
+            SledTransition::State(state) => match state {
+                Active => vec![],
+                Decommissioned => vec![Active],
+            },
+        }
+    }
+}
+
+impl fmt::Display for SledTransition {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            SledTransition::Policy(policy) => {
+                write!(f, "policy \"{}\"", policy)
+            }
+            SledTransition::State(state) => write!(f, "state \"{}\"", state),
+        }
+    }
+}
+
+impl IntoEnumIterator for SledTransition {
+    type Iterator = std::vec::IntoIter<Self>;
+
+    fn iter() -> Self::Iterator {
+        let v: Vec<_> = SledPolicy::iter()
+            .map(SledTransition::Policy)
+            .chain(SledState::iter().map(SledTransition::State))
+            .collect();
+        v.into_iter()
+    }
+}
+
+/// An error that occurred while setting a policy or state.
+#[derive(Debug, Error)]
+#[must_use]
+pub(super) enum TransitionError {
+    /// The state transition check failed.
+    ///
+    /// The sled is returned.
+    #[error(
+        "sled id {} has current policy \"{}\" and state \"{}\" \
+        and the transition to {} is not permitted",
+        .current.id(),
+        .current.policy(),
+        .current.state(),
+        .transition,
+    )]
+    InvalidTransition {
+        /// The current sled as fetched from the database.
+        current: Sled,
+
+        /// The new policy or state that was attempted.
+        transition: SledTransition,
+    },
+
+    /// Some other kind of error occurred.
+    #[error("database error")]
+    External(#[from] external::Error),
+}
+
+impl TransitionError {
+    fn into_external_error(self) -> external::Error {
+        match self {
+            TransitionError::InvalidTransition { .. } => {
+                external::Error::conflict(self.to_string())
+            }
+            TransitionError::External(e) => e.clone(),
+        }
+    }
+
+    #[cfg(test)]
+    pub(super) fn ensure_invalid_transition(self) -> anyhow::Result<()> {
+        match self {
+            TransitionError::InvalidTransition { .. } => Ok(()),
+            TransitionError::External(e) => Err(anyhow::anyhow!(e)
+                .context("expected invalid transition, got other error")),
+        }
     }
 }
 
 #[cfg(test)]
 mod test {
     use super::*;
-    use crate::db::datastore::datastore_test;
     use crate::db::datastore::test::{
         sled_baseboard_for_test, sled_system_hardware_for_test,
     };
+    use crate::db::datastore::test_utils::{
+        datastore_test, sled_set_policy, sled_set_state, Expected,
+        IneligibleSleds,
+    };
     use crate::db::lookup::LookupPath;
     use crate::db::model::ByteCount;
     use crate::db::model::SqlU32;
+    use anyhow::{Context, Result};
+    use itertools::Itertools;
     use nexus_test_utils::db::test_setup_database;
     use nexus_types::identity::Asset;
     use omicron_common::api::external;
     use omicron_test_utils::dev;
+    use predicates::{prelude::*, BoxPredicate};
     use std::net::{Ipv6Addr, SocketAddrV6};
-    use std::num::NonZeroU32;
 
     fn rack_id() -> Uuid {
         Uuid::parse_str(nexus_test_utils::RACK_UUID).unwrap()
@@ -324,13 +717,13 @@ mod test {
 
     #[tokio::test]
     async fn upsert_sled_updates_hardware() {
-        let logctx = dev::test_setup_log("upsert_sled");
+        let logctx = dev::test_setup_log("upsert_sled_updates_hardware");
         let mut db = test_setup_database(&logctx.log).await;
         let (_opctx, datastore) = datastore_test(&logctx, &db).await;
 
         let mut sled_update = test_new_sled_update();
         let observed_sled =
-            datastore.sled_upsert(sled_update.clone()).await.unwrap();
+            datastore.sled_upsert(sled_update.clone()).await.unwrap().unwrap();
         assert_eq!(
             observed_sled.usable_hardware_threads,
             sled_update.usable_hardware_threads
@@ -362,7 +755,8 @@ mod test {
         let observed_sled = datastore
             .sled_upsert(sled_update.clone())
             .await
-            .expect("Could not upsert sled during test prep");
+            .expect("Could not upsert sled during test prep")
+            .unwrap();
         assert_eq!(
             observed_sled.usable_hardware_threads,
             sled_update.usable_hardware_threads
@@ -377,37 +771,128 @@ mod test {
         logctx.cleanup_successful();
     }
 
-    /// Test that new reservations aren't created on non-provisionable sleds.
     #[tokio::test]
-    async fn sled_reservation_create_non_provisionable() {
+    async fn upsert_sled_doesnt_update_decommissioned() {
         let logctx =
-            dev::test_setup_log("sled_reservation_create_non_provisionable");
+            dev::test_setup_log("upsert_sled_doesnt_update_decommissioned");
         let mut db = test_setup_database(&logctx.log).await;
         let (opctx, datastore) = datastore_test(&logctx, &db).await;
 
-        let sled_update = test_new_sled_update();
-        let non_provisionable_sled =
-            datastore.sled_upsert(sled_update.clone()).await.unwrap();
+        let mut sled_update = test_new_sled_update();
+        let observed_sled =
+            datastore.sled_upsert(sled_update.clone()).await.unwrap().unwrap();
+        assert_eq!(
+            observed_sled.usable_hardware_threads,
+            sled_update.usable_hardware_threads
+        );
+        assert_eq!(
+            observed_sled.usable_physical_ram,
+            sled_update.usable_physical_ram
+        );
+        assert_eq!(observed_sled.reservoir_size, sled_update.reservoir_size);
 
-        let (authz_sled, _) = LookupPath::new(&opctx, &datastore)
-            .sled_id(non_provisionable_sled.id())
-            .fetch_for(authz::Action::Modify)
-            .await
-            .unwrap();
+        // Set the sled to decommissioned (this is not a legal transition, but
+        // we don't care about sled policy in sled_upsert, just the state.)
+        sled_set_state(
+            &opctx,
+            &datastore,
+            observed_sled.id(),
+            SledState::Decommissioned,
+            ValidateTransition::No,
+            Expected::Ok(SledState::Active),
+        )
+        .await
+        .unwrap();
 
-        let old_state = datastore
-            .sled_set_provision_state(
-                &opctx,
-                &authz_sled,
-                db::model::SledProvisionState::NonProvisionable,
+        // Modify the sizes of hardware
+        sled_update.usable_hardware_threads =
+            SqlU32::new(sled_update.usable_hardware_threads.0 + 1);
+        const MIB: u64 = 1024 * 1024;
+        sled_update.usable_physical_ram = ByteCount::from(
+            external::ByteCount::try_from(
+                sled_update.usable_physical_ram.0.to_bytes() + MIB,
             )
+            .unwrap(),
+        );
+        sled_update.reservoir_size = ByteCount::from(
+            external::ByteCount::try_from(
+                sled_update.reservoir_size.0.to_bytes() + MIB,
+            )
+            .unwrap(),
+        );
+
+        // Upserting the sled should produce the `Decommisioned` variant.
+        let sled = datastore
+            .sled_upsert(sled_update.clone())
+            .await
+            .expect("updating a decommissioned sled should succeed");
+        assert!(
+            matches!(sled, SledUpsertOutput::Decommissioned),
+            "sled should be decommissioned"
+        );
+
+        // The sled should not have been updated.
+        let (_, observed_sled_2) = LookupPath::new(&opctx, &datastore)
+            .sled_id(observed_sled.id())
+            .fetch_for(authz::Action::Modify)
             .await
             .unwrap();
         assert_eq!(
-            old_state,
-            db::model::SledProvisionState::Provisionable,
-            "a newly created sled starts as provisionable"
+            observed_sled_2.usable_hardware_threads,
+            observed_sled.usable_hardware_threads,
+            "usable_hardware_threads should not have changed"
         );
+        assert_eq!(
+            observed_sled_2.usable_physical_ram,
+            observed_sled.usable_physical_ram,
+            "usable_physical_ram should not have changed"
+        );
+        assert_eq!(
+            observed_sled_2.reservoir_size, observed_sled.reservoir_size,
+            "reservoir_size should not have changed"
+        );
+
+        db.cleanup().await.unwrap();
+        logctx.cleanup_successful();
+    }
+
+    /// Test that new reservations aren't created on non-provisionable sleds.
+    #[tokio::test]
+    async fn sled_reservation_create_non_provisionable() {
+        let logctx =
+            dev::test_setup_log("sled_reservation_create_non_provisionable");
+        let mut db = test_setup_database(&logctx.log).await;
+        let (opctx, datastore) = datastore_test(&logctx, &db).await;
+
+        // Define some sleds that resources cannot be provisioned on.
+        let non_provisionable_sled = datastore
+            .sled_upsert(test_new_sled_update())
+            .await
+            .unwrap()
+            .unwrap();
+        let expunged_sled = datastore
+            .sled_upsert(test_new_sled_update())
+            .await
+            .unwrap()
+            .unwrap();
+        let decommissioned_sled = datastore
+            .sled_upsert(test_new_sled_update())
+            .await
+            .unwrap()
+            .unwrap();
+        let illegal_decommissioned_sled = datastore
+            .sled_upsert(test_new_sled_update())
+            .await
+            .unwrap()
+            .unwrap();
+
+        let ineligible_sleds = IneligibleSleds {
+            non_provisionable: non_provisionable_sled.id(),
+            expunged: expunged_sled.id(),
+            decommissioned: decommissioned_sled.id(),
+            illegal_decommissioned: illegal_decommissioned_sled.id(),
+        };
+        ineligible_sleds.setup(&opctx, &datastore).await.unwrap();
 
         // This should be an error since there are no provisionable sleds.
         let resources = db::model::Resources::new(
@@ -432,13 +917,7 @@ mod test {
         // Now add a provisionable sled and try again.
         let sled_update = test_new_sled_update();
         let provisionable_sled =
-            datastore.sled_upsert(sled_update.clone()).await.unwrap();
-
-        let sleds = datastore
-            .sled_list(&opctx, &first_page(NonZeroU32::new(10).unwrap()))
-            .await
-            .unwrap();
-        println!("sleds: {:?}", sleds);
+            datastore.sled_upsert(sled_update.clone()).await.unwrap().unwrap();
 
         // Try a few times to ensure that resources never get allocated to the
         // non-provisionable sled.
@@ -470,6 +949,212 @@ mod test {
         logctx.cleanup_successful();
     }
 
+    #[tokio::test]
+    async fn test_sled_transitions() {
+        // Test valid and invalid state and policy transitions.
+        let logctx = dev::test_setup_log("test_sled_transitions");
+        let mut db = test_setup_database(&logctx.log).await;
+
+        let (opctx, datastore) = datastore_test(&logctx, &db).await;
+
+        // This test generates all possible sets of transitions. Below, we list
+        // the before and after predicates for valid transitions.
+        //
+        // While it's possible to derive the list of valid transitions from the
+        // [`Transition::valid_old_policies`] and
+        // [`Transition::valid_old_states`] methods, we list them here
+        // explicitly since tests are really about writing things down twice.
+        let valid_transitions = [
+            (
+                // In-service and active sleds can be marked as expunged.
+                Before::new(
+                    predicate::in_iter(SledPolicy::all_in_service()),
+                    predicate::eq(SledState::Active),
+                ),
+                SledTransition::Policy(SledPolicy::Expunged),
+            ),
+            (
+                // The provision policy of in-service sleds can be changed, or
+                // kept the same (1 of 2).
+                Before::new(
+                    predicate::in_iter(SledPolicy::all_in_service()),
+                    predicate::eq(SledState::Active),
+                ),
+                SledTransition::Policy(SledPolicy::InService {
+                    provision_policy: SledProvisionPolicy::Provisionable,
+                }),
+            ),
+            (
+                // (2 of 2)
+                Before::new(
+                    predicate::in_iter(SledPolicy::all_in_service()),
+                    predicate::eq(SledState::Active),
+                ),
+                SledTransition::Policy(SledPolicy::InService {
+                    provision_policy: SledProvisionPolicy::NonProvisionable,
+                }),
+            ),
+            (
+                // Active sleds can be marked as active, regardless of their
+                // policy.
+                Before::new(
+                    predicate::always(),
+                    predicate::eq(SledState::Active),
+                ),
+                SledTransition::State(SledState::Active),
+            ),
+            (
+                // Expunged sleds can be marked as decommissioned.
+                Before::new(
+                    predicate::eq(SledPolicy::Expunged),
+                    predicate::eq(SledState::Active),
+                ),
+                SledTransition::State(SledState::Decommissioned),
+            ),
+            (
+                // Expunged sleds can always be marked as expunged again, as
+                // long as they aren't already decommissioned (we do not allow
+                // any transitions once a sled is decommissioned).
+                Before::new(
+                    predicate::eq(SledPolicy::Expunged),
+                    predicate::ne(SledState::Decommissioned),
+                ),
+                SledTransition::Policy(SledPolicy::Expunged),
+            ),
+            (
+                // Decommissioned sleds can always be marked as decommissioned
+                // again, as long as their policy is decommissionable.
+                Before::new(
+                    predicate::in_iter(SledPolicy::all_decommissionable()),
+                    predicate::eq(SledState::Decommissioned),
+                ),
+                SledTransition::State(SledState::Decommissioned),
+            ),
+        ];
+
+        // Generate all possible transitions.
+        let all_transitions = SledPolicy::iter()
+            .cartesian_product(SledState::iter())
+            .cartesian_product(SledTransition::iter())
+            .enumerate();
+
+        // Set up a sled to test against.
+        let sled = datastore
+            .sled_upsert(test_new_sled_update())
+            .await
+            .unwrap()
+            .unwrap();
+        let sled_id = sled.id();
+
+        for (i, ((policy, state), after)) in all_transitions {
+            test_sled_state_transitions_once(
+                &opctx,
+                &datastore,
+                sled_id,
+                policy,
+                state,
+                after,
+                &valid_transitions,
+            )
+            .await
+            .with_context(|| {
+                format!(
+                    "failed on transition {i} (policy: {policy}, \
+                        state: {state:?}, after: {after:?})",
+                )
+            })
+            .unwrap();
+        }
+
+        db.cleanup().await.unwrap();
+        logctx.cleanup_successful();
+    }
+
+    async fn test_sled_state_transitions_once(
+        opctx: &OpContext,
+        datastore: &DataStore,
+        sled_id: Uuid,
+        before_policy: SledPolicy,
+        before_state: SledState,
+        after: SledTransition,
+        valid_transitions: &[(Before, SledTransition)],
+    ) -> Result<()> {
+        // Is this a valid transition?
+        let is_valid = valid_transitions.iter().any(
+            |(Before(valid_policy, valid_state), valid_after)| {
+                valid_policy.eval(&before_policy)
+                    && valid_state.eval(&before_state)
+                    && valid_after == &after
+            },
+        );
+
+        // Set the sled to the initial policy and state, ignoring state
+        // transition errors (this is just to set up the initial state).
+        sled_set_policy(
+            opctx,
+            datastore,
+            sled_id,
+            before_policy,
+            ValidateTransition::No,
+            Expected::Ignore,
+        )
+        .await?;
+
+        sled_set_state(
+            opctx,
+            datastore,
+            sled_id,
+            before_state,
+            ValidateTransition::No,
+            Expected::Ignore,
+        )
+        .await?;
+
+        // Now perform the transition to the new policy or state.
+        match after {
+            SledTransition::Policy(new_policy) => {
+                let expected = if is_valid {
+                    Expected::Ok(before_policy)
+                } else {
+                    Expected::Invalid
+                };
+
+                sled_set_policy(
+                    opctx,
+                    datastore,
+                    sled_id,
+                    new_policy,
+                    ValidateTransition::Yes,
+                    expected,
+                )
+                .await?;
+            }
+            SledTransition::State(new_state) => {
+                let expected = if is_valid {
+                    Expected::Ok(before_state)
+                } else {
+                    Expected::Invalid
+                };
+
+                sled_set_state(
+                    opctx,
+                    datastore,
+                    sled_id,
+                    new_state,
+                    ValidateTransition::Yes,
+                    expected,
+                )
+                .await?;
+            }
+        }
+
+        Ok(())
+    }
+
+    // ---
+    // Helper methods
+    // ---
+
     fn test_new_sled_update() -> SledUpdate {
         let sled_id = Uuid::new_v4();
         let addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0);
@@ -482,13 +1167,17 @@ mod test {
         )
     }
 
-    /// Returns pagination parameters to fetch the first page of results for a
-    /// paginated endpoint
-    fn first_page<'a, T>(limit: NonZeroU32) -> DataPageParams<'a, T> {
-        DataPageParams {
-            marker: None,
-            direction: dropshot::PaginationOrder::Ascending,
-            limit,
+    /// Initial state for state transitions.
+    #[derive(Debug)]
+    struct Before(BoxPredicate<SledPolicy>, BoxPredicate<SledState>);
+
+    impl Before {
+        fn new<P, S>(policy: P, state: S) -> Self
+        where
+            P: Predicate<SledPolicy> + Send + Sync + 'static,
+            S: Predicate<SledState> + Send + Sync + 'static,
+        {
+            Before(policy.boxed(), state.boxed())
         }
     }
 
diff --git a/nexus/db-queries/src/db/datastore/switch_port.rs b/nexus/db-queries/src/db/datastore/switch_port.rs
index 4771768e43..842cd4bf11 100644
--- a/nexus/db-queries/src/db/datastore/switch_port.rs
+++ b/nexus/db-queries/src/db/datastore/switch_port.rs
@@ -1192,7 +1192,8 @@ impl DataStore {
 
 #[cfg(test)]
 mod test {
-    use crate::db::datastore::{datastore_test, UpdatePrecondition};
+    use crate::db::datastore::test_utils::datastore_test;
+    use crate::db::datastore::UpdatePrecondition;
     use nexus_test_utils::db::test_setup_database;
     use nexus_types::external_api::params::{
         BgpAnnounceSetCreate, BgpConfigCreate, BgpPeer, BgpPeerConfig,
diff --git a/nexus/db-queries/src/db/datastore/test_utils.rs b/nexus/db-queries/src/db/datastore/test_utils.rs
new file mode 100644
index 0000000000..6d26ad044b
--- /dev/null
+++ b/nexus/db-queries/src/db/datastore/test_utils.rs
@@ -0,0 +1,343 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Shared test-only code for the `datastore` module.
+
+use crate::authz;
+use crate::context::OpContext;
+use crate::db;
+use crate::db::datastore::ValidateTransition;
+use crate::db::lookup::LookupPath;
+use crate::db::DataStore;
+use anyhow::bail;
+use anyhow::ensure;
+use anyhow::Context;
+use anyhow::Result;
+use dropshot::test_util::LogContext;
+use nexus_db_model::SledState;
+use nexus_types::external_api::views::SledPolicy;
+use nexus_types::external_api::views::SledProvisionPolicy;
+use omicron_test_utils::dev::db::CockroachInstance;
+use std::sync::Arc;
+use strum::EnumCount;
+use uuid::Uuid;
+
+/// Constructs a DataStore for use in test suites that has preloaded the
+/// built-in users, roles, and role assignments that are needed for basic
+/// operation
+#[cfg(test)]
+pub async fn datastore_test(
+    logctx: &LogContext,
+    db: &CockroachInstance,
+) -> (OpContext, Arc<DataStore>) {
+    use crate::authn;
+
+    let cfg = db::Config { url: db.pg_config().clone() };
+    let pool = Arc::new(db::Pool::new(&logctx.log, &cfg));
+    let datastore =
+        Arc::new(DataStore::new(&logctx.log, pool, None).await.unwrap());
+
+    // Create an OpContext with the credentials of "db-init" just for the
+    // purpose of loading the built-in users, roles, and assignments.
+    let opctx = OpContext::for_background(
+        logctx.log.new(o!()),
+        Arc::new(authz::Authz::new(&logctx.log)),
+        authn::Context::internal_db_init(),
+        Arc::clone(&datastore),
+    );
+
+    // TODO: Can we just call "Populate" instead of doing this?
+    let rack_id = Uuid::parse_str(nexus_test_utils::RACK_UUID).unwrap();
+    datastore.load_builtin_users(&opctx).await.unwrap();
+    datastore.load_builtin_roles(&opctx).await.unwrap();
+    datastore.load_builtin_role_asgns(&opctx).await.unwrap();
+    datastore.load_builtin_silos(&opctx).await.unwrap();
+    datastore.load_builtin_projects(&opctx).await.unwrap();
+    datastore.load_builtin_vpcs(&opctx).await.unwrap();
+    datastore.load_silo_users(&opctx).await.unwrap();
+    datastore.load_silo_user_role_assignments(&opctx).await.unwrap();
+    datastore
+        .load_builtin_fleet_virtual_provisioning_collection(&opctx)
+        .await
+        .unwrap();
+    datastore.load_builtin_rack_data(&opctx, rack_id).await.unwrap();
+
+    // Create an OpContext with the credentials of "test-privileged" for general
+    // testing.
+    let opctx =
+        OpContext::for_tests(logctx.log.new(o!()), Arc::clone(&datastore));
+
+    (opctx, datastore)
+}
+
+/// Denotes a specific way in which a sled is ineligible.
+///
+/// This should match the list of sleds in `IneligibleSleds`.
+#[derive(
+    Clone, Copy, Debug, Eq, PartialEq, PartialOrd, Ord, Hash, EnumCount,
+)]
+pub(super) enum IneligibleSledKind {
+    NonProvisionable,
+    Expunged,
+    Decommissioned,
+    IllegalDecommissioned,
+}
+
+/// Specifies sleds to be marked as ineligible for provisioning.
+///
+/// This is less error-prone than several places duplicating this logic.
+#[derive(Debug)]
+pub(super) struct IneligibleSleds {
+    pub(super) non_provisionable: Uuid,
+    pub(super) expunged: Uuid,
+    pub(super) decommissioned: Uuid,
+    pub(super) illegal_decommissioned: Uuid,
+}
+
+impl IneligibleSleds {
+    pub(super) fn iter(
+        &self,
+    ) -> impl Iterator<Item = (IneligibleSledKind, Uuid)> {
+        [
+            (IneligibleSledKind::NonProvisionable, self.non_provisionable),
+            (IneligibleSledKind::Expunged, self.expunged),
+            (IneligibleSledKind::Decommissioned, self.decommissioned),
+            (
+                IneligibleSledKind::IllegalDecommissioned,
+                self.illegal_decommissioned,
+            ),
+        ]
+        .into_iter()
+    }
+
+    /// Marks the provided sleds as ineligible for provisioning.
+    ///
+    /// Assumes that:
+    ///
+    /// * the sleds have just been set up and are in the default state.
+    /// * the UUIDs are all distinct.
+    pub(super) async fn setup(
+        &self,
+        opctx: &OpContext,
+        datastore: &DataStore,
+    ) -> Result<()> {
+        let non_provisionable_fut = async {
+            sled_set_policy(
+                &opctx,
+                &datastore,
+                self.non_provisionable,
+                SledPolicy::InService {
+                    provision_policy: SledProvisionPolicy::NonProvisionable,
+                },
+                ValidateTransition::Yes,
+                Expected::Ok(SledPolicy::provisionable()),
+            )
+            .await
+            .with_context(|| {
+                format!(
+                    "failed to set non-provisionable policy for sled {}",
+                    self.non_provisionable
+                )
+            })
+        };
+
+        let expunged_fut = async {
+            sled_set_policy(
+                &opctx,
+                &datastore,
+                self.expunged,
+                SledPolicy::Expunged,
+                ValidateTransition::Yes,
+                Expected::Ok(SledPolicy::provisionable()),
+            )
+            .await
+            .with_context(|| {
+                format!(
+                    "failed to set expunged policy for sled {}",
+                    self.non_provisionable
+                )
+            })
+        };
+
+        // Legally, we must set the policy to expunged before setting the state
+        // to decommissioned. (In the future, we'll want to test graceful
+        // removal as well.)
+        let decommissioned_fut = async {
+            sled_set_policy(
+                &opctx,
+                &datastore,
+                self.decommissioned,
+                SledPolicy::Expunged,
+                ValidateTransition::Yes,
+                Expected::Ok(SledPolicy::provisionable()),
+            )
+            .await
+            .with_context(|| {
+                format!(
+                    "failed to set expunged policy for sled {}, \
+                     as prerequisite for decommissioning",
+                    self.decommissioned
+                )
+            })?;
+
+            sled_set_state(
+                &opctx,
+                &datastore,
+                self.decommissioned,
+                SledState::Decommissioned,
+                ValidateTransition::Yes,
+                Expected::Ok(SledState::Active),
+            )
+            .await
+            .with_context(|| {
+                format!(
+                    "failed to set decommissioned state for sled {}",
+                    self.decommissioned
+                )
+            })
+        };
+
+        // This is _not_ a legal state, BUT we test it out to ensure that if
+        // the system somehow enters this state anyway, we don't try and
+        // provision resources on it.
+        let illegal_decommissioned_fut = async {
+            sled_set_state(
+                &opctx,
+                &datastore,
+                self.illegal_decommissioned,
+                SledState::Decommissioned,
+                ValidateTransition::No,
+                Expected::Ok(SledState::Active),
+            )
+            .await
+            .with_context(|| {
+                format!(
+                    "failed to illegally set decommissioned state for sled {}",
+                    self.illegal_decommissioned
+                )
+            })
+        };
+
+        // We're okay cancelling the rest of the futures if one of them fails,
+        // since the overall test is going to fail anyway. Hence try_join
+        // rather than join_then_try.
+        futures::try_join!(
+            non_provisionable_fut,
+            expunged_fut,
+            decommissioned_fut,
+            illegal_decommissioned_fut
+        )?;
+
+        Ok(())
+    }
+}
+
+pub(super) async fn sled_set_policy(
+    opctx: &OpContext,
+    datastore: &DataStore,
+    sled_id: Uuid,
+    new_policy: SledPolicy,
+    check: ValidateTransition,
+    expected_old_policy: Expected<SledPolicy>,
+) -> Result<()> {
+    let (authz_sled, _) = LookupPath::new(&opctx, &datastore)
+        .sled_id(sled_id)
+        .fetch_for(authz::Action::Modify)
+        .await
+        .unwrap();
+
+    let res = datastore
+        .sled_set_policy_impl(opctx, &authz_sled, new_policy, check)
+        .await;
+    match expected_old_policy {
+        Expected::Ok(expected) => {
+            let actual = res.context(
+                "failed transition that was expected to be successful",
+            )?;
+            ensure!(
+                actual == expected,
+                "actual old policy ({actual}) is not \
+                 the same as expected ({expected})"
+            );
+        }
+        Expected::Invalid => match res {
+            Ok(old_policy) => {
+                bail!(
+                    "expected an invalid state transition error, \
+                     but transition was accepted with old policy: \
+                     {old_policy}"
+                )
+            }
+            Err(error) => {
+                error.ensure_invalid_transition()?;
+            }
+        },
+        Expected::Ignore => {
+            // The return value is ignored.
+        }
+    }
+
+    Ok(())
+}
+
+pub(super) async fn sled_set_state(
+    opctx: &OpContext,
+    datastore: &DataStore,
+    sled_id: Uuid,
+    new_state: SledState,
+    check: ValidateTransition,
+    expected_old_state: Expected<SledState>,
+) -> Result<()> {
+    let (authz_sled, _) = LookupPath::new(&opctx, &datastore)
+        .sled_id(sled_id)
+        .fetch_for(authz::Action::Modify)
+        .await
+        .unwrap();
+
+    let res = datastore
+        .sled_set_state_impl(&opctx, &authz_sled, new_state, check)
+        .await;
+    match expected_old_state {
+        Expected::Ok(expected) => {
+            let actual = res.context(
+                "failed transition that was expected to be successful",
+            )?;
+            ensure!(
+                actual == expected,
+                "actual old state ({actual:?}) \
+                 is not the same as expected ({expected:?})"
+            );
+        }
+        Expected::Invalid => match res {
+            Ok(old_state) => {
+                bail!(
+                    "expected an invalid state transition error, \
+                    but transition was accepted with old state: \
+                    {old_state:?}"
+                )
+            }
+            Err(error) => {
+                error.ensure_invalid_transition()?;
+            }
+        },
+        Expected::Ignore => {
+            // The return value is ignored.
+        }
+    }
+
+    Ok(())
+}
+
+/// For a transition, describes the expected value of the old state.
+pub(super) enum Expected<T> {
+    /// The transition is expected to successful, with the provided old
+    /// value.
+    Ok(T),
+
+    /// The transition is expected to be invalid.
+    Invalid,
+
+    /// The return value is ignored.
+    Ignore,
+}
diff --git a/nexus/db-queries/src/db/datastore/volume.rs b/nexus/db-queries/src/db/datastore/volume.rs
index d0b093ff45..374ef2cf73 100644
--- a/nexus/db-queries/src/db/datastore/volume.rs
+++ b/nexus/db-queries/src/db/datastore/volume.rs
@@ -1064,7 +1064,7 @@ pub fn read_only_resources_associated_with_volume(
 mod tests {
     use super::*;
 
-    use crate::db::datastore::datastore_test;
+    use crate::db::datastore::test_utils::datastore_test;
     use nexus_test_utils::db::test_setup_database;
     use omicron_test_utils::dev;
 
diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs
index 4f0245e283..4626301f76 100644
--- a/nexus/db-queries/src/db/datastore/vpc.rs
+++ b/nexus/db-queries/src/db/datastore/vpc.rs
@@ -1171,7 +1171,7 @@ impl DataStore {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::db::datastore::datastore_test;
+    use crate::db::datastore::test_utils::datastore_test;
     use crate::db::model::Project;
     use crate::db::queries::vpc::MAX_VNI_SEARCH_RANGE_SIZE;
     use nexus_test_utils::db::test_setup_database;
diff --git a/nexus/db-queries/src/db/lookup.rs b/nexus/db-queries/src/db/lookup.rs
index 18ea369685..050c1dcfe9 100644
--- a/nexus/db-queries/src/db/lookup.rs
+++ b/nexus/db-queries/src/db/lookup.rs
@@ -950,7 +950,8 @@ mod test {
         let logctx = dev::test_setup_log("test_lookup");
         let mut db = test_setup_database(&logctx.log).await;
         let (_, datastore) =
-            crate::db::datastore::datastore_test(&logctx, &db).await;
+            crate::db::datastore::test_utils::datastore_test(&logctx, &db)
+                .await;
         let opctx =
             OpContext::for_tests(logctx.log.new(o!()), Arc::clone(&datastore));
         let project_name: Name = Name("my-project".parse().unwrap());
diff --git a/nexus/db-queries/src/db/pool_connection.rs b/nexus/db-queries/src/db/pool_connection.rs
index 66fb125a7c..f419ba6852 100644
--- a/nexus/db-queries/src/db/pool_connection.rs
+++ b/nexus/db-queries/src/db/pool_connection.rs
@@ -59,9 +59,10 @@ static CUSTOM_TYPE_KEYS: &'static [&'static str] = &[
     "router_route_kind",
     "saga_state",
     "service_kind",
-    "sled_provision_state",
+    "sled_policy",
     "sled_resource_kind",
     "sled_role",
+    "sled_state",
     "snapshot_state",
     "sp_type",
     "switch_interface_kind",
diff --git a/nexus/db-queries/src/db/queries/external_ip.rs b/nexus/db-queries/src/db/queries/external_ip.rs
index 7456e6e6bb..739c0b0809 100644
--- a/nexus/db-queries/src/db/queries/external_ip.rs
+++ b/nexus/db-queries/src/db/queries/external_ip.rs
@@ -906,7 +906,8 @@ mod tests {
             let logctx = dev::test_setup_log(test_name);
             let log = logctx.log.new(o!());
             let db = test_setup_database(&log).await;
-            crate::db::datastore::datastore_test(&logctx, &db).await;
+            crate::db::datastore::test_utils::datastore_test(&logctx, &db)
+                .await;
             let cfg = crate::db::Config { url: db.pg_config().clone() };
             let pool = Arc::new(crate::db::Pool::new(&logctx.log, &cfg));
             let db_datastore = Arc::new(
diff --git a/nexus/db-queries/src/db/queries/network_interface.rs b/nexus/db-queries/src/db/queries/network_interface.rs
index a22c80b232..d96b26c9b3 100644
--- a/nexus/db-queries/src/db/queries/network_interface.rs
+++ b/nexus/db-queries/src/db/queries/network_interface.rs
@@ -1953,7 +1953,8 @@ mod tests {
             let log = logctx.log.new(o!());
             let db = test_setup_database(&log).await;
             let (opctx, db_datastore) =
-                crate::db::datastore::datastore_test(&logctx, &db).await;
+                crate::db::datastore::test_utils::datastore_test(&logctx, &db)
+                    .await;
 
             let authz_silo = opctx.authn.silo_required().unwrap();
 
diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs
index 3c37bf6b2e..43e1750812 100644
--- a/nexus/db-queries/src/db/queries/region_allocation.rs
+++ b/nexus/db-queries/src/db/queries/region_allocation.rs
@@ -27,6 +27,9 @@ use nexus_db_model::queries::region_allocation::{
     proposed_dataset_changes, shuffled_candidate_datasets, updated_datasets,
 };
 use nexus_db_model::schema;
+use nexus_db_model::to_db_sled_policy;
+use nexus_db_model::SledState;
+use nexus_types::external_api::views::SledPolicy;
 use omicron_common::api::external;
 use omicron_common::nexus_config::RegionAllocationStrategy;
 
@@ -321,14 +324,16 @@ impl CandidateZpools {
             .on(zpool_dsl::id.eq(old_zpool_usage::dsl::pool_id))
             .inner_join(with_sled);
 
-        let sled_is_provisionable = sled_dsl::provision_state
-            .eq(crate::db::model::SledProvisionState::Provisionable);
+        let sled_is_provisionable = sled_dsl::sled_policy
+            .eq(to_db_sled_policy(SledPolicy::provisionable()));
+        let sled_is_active = sled_dsl::sled_state.eq(SledState::Active);
 
         let base_query = old_zpool_usage
             .query_source()
             .inner_join(with_zpool)
             .filter(it_will_fit)
             .filter(sled_is_provisionable)
+            .filter(sled_is_active)
             .select((old_zpool_usage::dsl::pool_id,));
 
         let query = if distinct_sleds {
diff --git a/nexus/db-queries/src/transaction_retry.rs b/nexus/db-queries/src/transaction_retry.rs
index 6b5098158b..74a94a0b8f 100644
--- a/nexus/db-queries/src/transaction_retry.rs
+++ b/nexus/db-queries/src/transaction_retry.rs
@@ -270,7 +270,7 @@ impl<E: std::fmt::Debug> OptionalError<E> {
 mod test {
     use super::*;
 
-    use crate::db::datastore::datastore_test;
+    use crate::db::datastore::test_utils::datastore_test;
     use nexus_test_utils::db::test_setup_database;
     use omicron_test_utils::dev;
     use oximeter::types::FieldValue;
diff --git a/nexus/deployment/src/blueprint_builder.rs b/nexus/deployment/src/blueprint_builder.rs
index 86a9b8da6e..2263a99ce0 100644
--- a/nexus/deployment/src/blueprint_builder.rs
+++ b/nexus/deployment/src/blueprint_builder.rs
@@ -724,7 +724,8 @@ impl<'a> BlueprintZones<'a> {
 #[cfg(test)]
 pub mod test {
     use super::*;
-    use nexus_types::external_api::views::SledProvisionState;
+    use nexus_types::external_api::views::SledPolicy;
+    use nexus_types::external_api::views::SledState;
     use omicron_common::address::IpRange;
     use omicron_common::address::Ipv4Range;
     use omicron_common::address::Ipv6Subnet;
@@ -736,14 +737,26 @@ pub mod test {
     };
     use std::str::FromStr;
 
-    /// Returns a collection and policy describing a pretty simple system
-    pub fn example() -> (Collection, Policy) {
+    pub const DEFAULT_N_SLEDS: usize = 3;
+
+    /// Returns a collection and policy describing a pretty simple system.
+    ///
+    /// `n_sleds` is the number of sleds supported. Currently, this value can
+    /// be anywhere between 0 and 5. (More can be added in the future if
+    /// necessary.)
+    pub fn example(n_sleds: usize) -> (Collection, Policy) {
         let mut builder = nexus_inventory::CollectionBuilder::new("test-suite");
 
+        if n_sleds > 5 {
+            panic!("example() only supports up to 5 sleds, but got {n_sleds}");
+        }
+
         let sled_ids = [
             "72443b6c-b8bb-4ffa-ab3a-aeaa428ed79b",
             "a5f3db3a-61aa-4f90-ad3e-02833c253bf5",
             "0d168386-2551-44e8-98dd-ae7a7570f8a0",
+            "aaaaa1a1-0c3f-4928-aba7-6ec5c1db05f7",
+            "85e88acb-7b86-45ff-9c88-734e1da71c3d",
         ];
         let mut policy = Policy {
             sleds: BTreeMap::new(),
@@ -771,7 +784,7 @@ pub mod test {
             })
         };
 
-        for sled_id_str in sled_ids.iter() {
+        for sled_id_str in sled_ids.iter().take(n_sleds) {
             let sled_id: Uuid = sled_id_str.parse().unwrap();
             let sled_ip = policy_add_sled(&mut policy, sled_id);
             let serial_number = format!("s{}", policy.sleds.len());
@@ -900,7 +913,8 @@ pub mod test {
         policy.sleds.insert(
             sled_id,
             SledResources {
-                provision_state: SledProvisionState::Provisionable,
+                policy: SledPolicy::provisionable(),
+                state: SledState::Active,
                 zpools,
                 subnet,
             },
@@ -931,7 +945,7 @@ pub mod test {
     fn test_initial() {
         // Test creating a blueprint from a collection and verifying that it
         // describes no changes.
-        let (collection, policy) = example();
+        let (collection, policy) = example(DEFAULT_N_SLEDS);
         let blueprint_initial =
             BlueprintBuilder::build_initial_from_collection(
                 &collection,
@@ -977,7 +991,7 @@ pub mod test {
 
     #[test]
     fn test_basic() {
-        let (collection, mut policy) = example();
+        let (collection, mut policy) = example(DEFAULT_N_SLEDS);
         let blueprint1 = BlueprintBuilder::build_initial_from_collection(
             &collection,
             Generation::new(),
@@ -1096,7 +1110,7 @@ pub mod test {
 
     #[test]
     fn test_add_nexus_with_no_existing_nexus_zones() {
-        let (mut collection, policy) = example();
+        let (mut collection, policy) = example(DEFAULT_N_SLEDS);
 
         // We don't care about the internal DNS version here.
         let internal_dns_version = Generation::new();
@@ -1147,7 +1161,7 @@ pub mod test {
 
     #[test]
     fn test_add_nexus_error_cases() {
-        let (mut collection, policy) = example();
+        let (mut collection, policy) = example(DEFAULT_N_SLEDS);
 
         // We don't care about the internal DNS version here.
         let internal_dns_version = Generation::new();
@@ -1259,7 +1273,7 @@ pub mod test {
 
     #[test]
     fn test_invalid_parent_blueprint_two_zones_with_same_external_ip() {
-        let (mut collection, policy) = example();
+        let (mut collection, policy) = example(DEFAULT_N_SLEDS);
 
         // We should fail if the parent blueprint claims to contain two
         // zones with the same external IP. Skim through the zones, copy the
@@ -1310,7 +1324,7 @@ pub mod test {
 
     #[test]
     fn test_invalid_parent_blueprint_two_nexus_zones_with_same_nic_ip() {
-        let (mut collection, policy) = example();
+        let (mut collection, policy) = example(DEFAULT_N_SLEDS);
 
         // We should fail if the parent blueprint claims to contain two
         // Nexus zones with the same NIC IP. Skim through the zones, copy
@@ -1359,7 +1373,7 @@ pub mod test {
 
     #[test]
     fn test_invalid_parent_blueprint_two_zones_with_same_vnic_mac() {
-        let (mut collection, policy) = example();
+        let (mut collection, policy) = example(DEFAULT_N_SLEDS);
 
         // We should fail if the parent blueprint claims to contain two
         // zones with the same service vNIC MAC address. Skim through the
diff --git a/nexus/deployment/src/planner.rs b/nexus/deployment/src/planner.rs
index 7973157068..0773fec2bf 100644
--- a/nexus/deployment/src/planner.rs
+++ b/nexus/deployment/src/planner.rs
@@ -12,7 +12,7 @@ use crate::blueprint_builder::EnsureMultiple;
 use crate::blueprint_builder::Error;
 use nexus_types::deployment::Blueprint;
 use nexus_types::deployment::Policy;
-use nexus_types::external_api::views::SledProvisionState;
+use nexus_types::external_api::views::SledState;
 use nexus_types::inventory::Collection;
 use omicron_common::api::external::Generation;
 use slog::{info, warn, Logger};
@@ -84,6 +84,17 @@ impl<'a> Planner<'a> {
         let mut sleds_ineligible_for_services = BTreeSet::new();
 
         for (sled_id, sled_info) in &self.policy.sleds {
+            // Decommissioned sleds don't get any services. (This is an
+            // explicit match so that when more states are added, this fails to
+            // compile.)
+            match sled_info.state {
+                SledState::Decommissioned => {
+                    sleds_ineligible_for_services.insert(*sled_id);
+                    continue;
+                }
+                SledState::Active => {}
+            }
+
             // Check for an NTP zone.  Every sled should have one.  If it's not
             // there, all we can do is provision that one zone.  We have to wait
             // for that to succeed and synchronize the clock before we can
@@ -175,10 +186,8 @@ impl<'a> Planner<'a> {
         // assumption that there is something amiss with them.
         sleds_ineligible_for_services.extend(
             self.policy.sleds.iter().filter_map(|(sled_id, sled_info)| {
-                match sled_info.provision_state {
-                    SledProvisionState::Provisionable => None,
-                    SledProvisionState::NonProvisionable => Some(*sled_id),
-                }
+                (!sled_info.is_eligible_for_discretionary_services())
+                    .then_some(*sled_id)
             }),
         );
 
@@ -311,9 +320,12 @@ mod test {
     use crate::blueprint_builder::test::example;
     use crate::blueprint_builder::test::policy_add_sled;
     use crate::blueprint_builder::test::verify_blueprint;
+    use crate::blueprint_builder::test::DEFAULT_N_SLEDS;
     use crate::blueprint_builder::BlueprintBuilder;
     use nexus_inventory::now_db_precision;
-    use nexus_types::external_api::views::SledProvisionState;
+    use nexus_types::external_api::views::SledPolicy;
+    use nexus_types::external_api::views::SledProvisionPolicy;
+    use nexus_types::external_api::views::SledState;
     use nexus_types::inventory::OmicronZoneType;
     use nexus_types::inventory::OmicronZonesFound;
     use omicron_common::api::external::Generation;
@@ -328,7 +340,7 @@ mod test {
         let internal_dns_version = Generation::new();
 
         // Use our example inventory collection.
-        let (mut collection, mut policy) = example();
+        let (mut collection, mut policy) = example(DEFAULT_N_SLEDS);
 
         // Build the initial blueprint.  We don't bother verifying it here
         // because there's a separate test for that.
@@ -509,7 +521,7 @@ mod test {
         // Use our example inventory collection as a starting point, but strip
         // it down to just one sled.
         let (sled_id, collection, mut policy) = {
-            let (mut collection, mut policy) = example();
+            let (mut collection, mut policy) = example(DEFAULT_N_SLEDS);
 
             // Pick one sled ID to keep and remove the rest.
             let keep_sled_id =
@@ -593,7 +605,7 @@ mod test {
         );
 
         // Use our example inventory collection as a starting point.
-        let (collection, mut policy) = example();
+        let (collection, mut policy) = example(DEFAULT_N_SLEDS);
 
         // Build the initial blueprint.
         let blueprint1 = BlueprintBuilder::build_initial_from_collection(
@@ -674,7 +686,12 @@ mod test {
         );
 
         // Use our example inventory collection as a starting point.
-        let (collection, mut policy) = example();
+        //
+        // Request two extra sleds here so we test non-provisionable, expunged,
+        // and decommissioned sleds. (When we add more kinds of
+        // non-provisionable states in the future, we'll have to add more
+        // sleds.)
+        let (collection, mut policy) = example(5);
 
         // Build the initial blueprint.
         let blueprint1 = BlueprintBuilder::build_initial_from_collection(
@@ -685,8 +702,8 @@ mod test {
         )
         .expect("failed to create initial blueprint");
 
-        // This blueprint should only have 3 Nexus zones: one on each sled.
-        assert_eq!(blueprint1.omicron_zones.len(), 3);
+        // This blueprint should only have 5 Nexus zones: one on each sled.
+        assert_eq!(blueprint1.omicron_zones.len(), 5);
         for sled_config in blueprint1.omicron_zones.values() {
             assert_eq!(
                 sled_config
@@ -698,16 +715,39 @@ mod test {
             );
         }
 
-        // Arbitrarily choose one of the sleds and mark it non-provisionable.
+        // Arbitrarily choose some of the sleds and mark them non-provisionable
+        // in various ways.
+        let mut sleds_iter = policy.sleds.iter_mut();
+
         let nonprovisionable_sled_id = {
-            let (sled_id, resources) =
-                policy.sleds.iter_mut().next().expect("no sleds");
-            resources.provision_state = SledProvisionState::NonProvisionable;
+            let (sled_id, resources) = sleds_iter.next().expect("no sleds");
+            resources.policy = SledPolicy::InService {
+                provision_policy: SledProvisionPolicy::NonProvisionable,
+            };
+            *sled_id
+        };
+        let expunged_sled_id = {
+            let (sled_id, resources) = sleds_iter.next().expect("no sleds");
+            resources.policy = SledPolicy::Expunged;
+            *sled_id
+        };
+        let decommissioned_sled_id = {
+            let (sled_id, resources) = sleds_iter.next().expect("no sleds");
+            resources.state = SledState::Decommissioned;
             *sled_id
         };
 
-        // Now run the planner with a high number of target Nexus zones.
-        policy.target_nexus_zone_count = 14;
+        // Now run the planner with a high number of target Nexus zones. The
+        // number (16) is chosen such that:
+        //
+        // * we start with 5 sleds
+        // * we need to add 11 Nexus zones
+        // * there are two sleds eligible for provisioning
+        // * => 5 or 6 new Nexus zones per sled
+        //
+        // When the planner gets smarter about removing zones from expunged
+        // and/or removed sleds, we'll have to adjust this number.
+        policy.target_nexus_zone_count = 16;
         let blueprint2 = Planner::new_based_on(
             logctx.log.clone(),
             &blueprint1,
@@ -727,13 +767,15 @@ mod test {
         let sleds = diff.sleds_changed().collect::<Vec<_>>();
 
         // Only 2 of the 3 sleds should get additional Nexus zones. We expect a
-        // total of 11 new Nexus zones, which should be spread evenly across the
+        // total of 12 new Nexus zones, which should be spread evenly across the
         // two sleds (one gets 6 and the other gets 5), while the
         // non-provisionable sled should be unchanged.
         assert_eq!(sleds.len(), 2);
         let mut total_new_nexus_zones = 0;
         for (sled_id, sled_changes) in sleds {
             assert!(sled_id != nonprovisionable_sled_id);
+            assert!(sled_id != expunged_sled_id);
+            assert!(sled_id != decommissioned_sled_id);
             assert_eq!(sled_changes.zones_removed().count(), 0);
             assert_eq!(sled_changes.zones_changed().count(), 0);
             let zones = sled_changes.zones_added().collect::<Vec<_>>();
diff --git a/nexus/src/app/background/blueprint_execution.rs b/nexus/src/app/background/blueprint_execution.rs
index 32797facbf..373f023288 100644
--- a/nexus/src/app/background/blueprint_execution.rs
+++ b/nexus/src/app/background/blueprint_execution.rs
@@ -186,7 +186,8 @@ mod test {
             datastore
                 .sled_upsert(update)
                 .await
-                .expect("Failed to insert sled to db");
+                .expect("Failed to insert sled to db")
+                .unwrap();
         }
 
         let (blueprint_tx, blueprint_rx) = watch::channel(None);
diff --git a/nexus/src/app/background/inventory_collection.rs b/nexus/src/app/background/inventory_collection.rs
index 044e5a2234..c0d64d554a 100644
--- a/nexus/src/app/background/inventory_collection.rs
+++ b/nexus/src/app/background/inventory_collection.rs
@@ -338,7 +338,7 @@ mod test {
                 },
                 rack_id,
             );
-            sleds.push(datastore.sled_upsert(sled).await.unwrap());
+            sleds.push(datastore.sled_upsert(sled).await.unwrap().unwrap());
         }
 
         // The same enumerator should immediately find all the new sleds.
diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs
index 61ce803d13..adf6119c5c 100644
--- a/nexus/src/app/deployment.rs
+++ b/nexus/src/app/deployment.rs
@@ -160,7 +160,8 @@ impl super::Nexus {
                     .remove(&sled_id)
                     .unwrap_or_else(BTreeSet::new);
                 let sled_info = SledResources {
-                    provision_state: sled_row.provision_state().into(),
+                    policy: sled_row.policy(),
+                    state: sled_row.state().into(),
                     subnet,
                     zpools,
                 };
diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs
index ec3f11dc6f..88955d78e9 100644
--- a/nexus/src/app/sled.rs
+++ b/nexus/src/app/sled.rs
@@ -11,9 +11,11 @@ use crate::internal_api::params::{
 use nexus_db_queries::authz;
 use nexus_db_queries::context::OpContext;
 use nexus_db_queries::db;
+use nexus_db_queries::db::datastore::SledUpsertOutput;
 use nexus_db_queries::db::lookup;
 use nexus_db_queries::db::lookup::LookupPath;
 use nexus_db_queries::db::model::DatasetKind;
+use nexus_types::external_api::views::SledProvisionPolicy;
 use omicron_common::api::external::DataPageParams;
 use omicron_common::api::external::Error;
 use omicron_common::api::external::ListResultVec;
@@ -68,7 +70,19 @@ impl super::Nexus {
             },
             self.rack_id,
         );
-        self.db_datastore.sled_upsert(sled).await?;
+        match self.db_datastore.sled_upsert(sled).await? {
+            SledUpsertOutput::Updated(_) => {}
+            SledUpsertOutput::Decommissioned => {
+                // We currently don't bubble up errors for decommissioned sleds
+                // -- if it ever happens, a decommissioned sled-agent doesn't
+                // know about that.
+                warn!(
+                    self.log,
+                    "decommissioned sled-agent reached out for upserts";
+                    "sled_uuid" => id.to_string()
+                );
+            }
+        }
 
         Ok(())
     }
@@ -146,17 +160,17 @@ impl super::Nexus {
             .await
     }
 
-    /// Returns the old state.
-    pub(crate) async fn sled_set_provision_state(
+    /// Returns the old provision policy.
+    pub(crate) async fn sled_set_provision_policy(
         &self,
         opctx: &OpContext,
         sled_lookup: &lookup::Sled<'_>,
-        state: db::model::SledProvisionState,
-    ) -> Result<db::model::SledProvisionState, Error> {
+        new_policy: SledProvisionPolicy,
+    ) -> Result<SledProvisionPolicy, Error> {
         let (authz_sled,) =
             sled_lookup.lookup_for(authz::Action::Modify).await?;
         self.db_datastore
-            .sled_set_provision_state(opctx, &authz_sled, state)
+            .sled_set_provision_policy(opctx, &authz_sled, new_policy)
             .await
     }
 
diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs
index fd18cb2dab..b007cc6217 100644
--- a/nexus/src/external_api/http_entrypoints.rs
+++ b/nexus/src/external_api/http_entrypoints.rs
@@ -225,7 +225,7 @@ pub(crate) fn external_api() -> NexusApiDescription {
         api.register(rack_view)?;
         api.register(sled_list)?;
         api.register(sled_view)?;
-        api.register(sled_set_provision_state)?;
+        api.register(sled_set_provision_policy)?;
         api.register(sled_instance_list)?;
         api.register(sled_physical_disk_list)?;
         api.register(physical_disk_list)?;
@@ -5166,38 +5166,34 @@ async fn sled_view(
     apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await
 }
 
-/// Set sled provision state
+/// Set sled provision policy
 #[endpoint {
     method = PUT,
-    path = "/v1/system/hardware/sleds/{sled_id}/provision-state",
+    path = "/v1/system/hardware/sleds/{sled_id}/provision-policy",
     tags = ["system/hardware"],
 }]
-async fn sled_set_provision_state(
+async fn sled_set_provision_policy(
     rqctx: RequestContext<Arc<ServerContext>>,
     path_params: Path<params::SledPath>,
-    new_provision_state: TypedBody<params::SledProvisionStateParams>,
-) -> Result<HttpResponseOk<params::SledProvisionStateResponse>, HttpError> {
+    new_provision_state: TypedBody<params::SledProvisionPolicyParams>,
+) -> Result<HttpResponseOk<params::SledProvisionPolicyResponse>, HttpError> {
     let apictx = rqctx.context();
     let handler = async {
         let nexus = &apictx.nexus;
 
         let path = path_params.into_inner();
-        let provision_state = new_provision_state.into_inner().state;
+        let new_state = new_provision_state.into_inner().state;
 
         let opctx = crate::context::op_context_for_external_api(&rqctx).await?;
-        // Convert the external `SledProvisionState` into our internal data model.
-        let new_state = db::model::SledProvisionState::from(provision_state);
 
         let sled_lookup = nexus.sled_lookup(&opctx, &path.sled_id)?;
 
         let old_state = nexus
-            .sled_set_provision_state(&opctx, &sled_lookup, new_state)
+            .sled_set_provision_policy(&opctx, &sled_lookup, new_state)
             .await?;
 
-        let response = params::SledProvisionStateResponse {
-            old_state: old_state.into(),
-            new_state: new_state.into(),
-        };
+        let response =
+            params::SledProvisionPolicyResponse { old_state, new_state };
 
         Ok(HttpResponseOk(response))
     };
diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs
index cd04bb6018..81f2a02b31 100644
--- a/nexus/tests/integration_tests/endpoints.rs
+++ b/nexus/tests/integration_tests/endpoints.rs
@@ -22,6 +22,7 @@ use nexus_types::external_api::params;
 use nexus_types::external_api::shared;
 use nexus_types::external_api::shared::IpRange;
 use nexus_types::external_api::shared::Ipv4Range;
+use nexus_types::external_api::views::SledProvisionPolicy;
 use omicron_common::api::external::AddressLotKind;
 use omicron_common::api::external::ByteCount;
 use omicron_common::api::external::IdentityMetadataCreateParams;
@@ -45,14 +46,12 @@ pub const HARDWARE_UNINITIALIZED_SLEDS: &'static str =
     "/v1/system/hardware/sleds-uninitialized";
 pub static HARDWARE_SLED_URL: Lazy<String> =
     Lazy::new(|| format!("/v1/system/hardware/sleds/{}", SLED_AGENT_UUID));
-pub static HARDWARE_SLED_PROVISION_STATE_URL: Lazy<String> = Lazy::new(|| {
-    format!("/v1/system/hardware/sleds/{}/provision-state", SLED_AGENT_UUID)
+pub static HARDWARE_SLED_PROVISION_POLICY_URL: Lazy<String> = Lazy::new(|| {
+    format!("/v1/system/hardware/sleds/{}/provision-policy", SLED_AGENT_UUID)
 });
-pub static DEMO_SLED_PROVISION_STATE: Lazy<params::SledProvisionStateParams> =
-    Lazy::new(|| {
-        params::SledProvisionStateParams {
-            state: nexus_types::external_api::views::SledProvisionState::NonProvisionable,
-        }
+pub static DEMO_SLED_PROVISION_POLICY: Lazy<params::SledProvisionPolicyParams> =
+    Lazy::new(|| params::SledProvisionPolicyParams {
+        state: SledProvisionPolicy::NonProvisionable,
     });
 
 pub static HARDWARE_SWITCH_URL: Lazy<String> =
@@ -1911,11 +1910,11 @@ pub static VERIFY_ENDPOINTS: Lazy<Vec<VerifyEndpoint>> = Lazy::new(|| {
         },
 
         VerifyEndpoint {
-            url: &HARDWARE_SLED_PROVISION_STATE_URL,
+            url: &HARDWARE_SLED_PROVISION_POLICY_URL,
             visibility: Visibility::Protected,
             unprivileged_access: UnprivilegedAccess::None,
             allowed_methods: vec![AllowedMethod::Put(
-                serde_json::to_value(&*DEMO_SLED_PROVISION_STATE).unwrap()
+                serde_json::to_value(&*DEMO_SLED_PROVISION_POLICY).unwrap()
             )],
         },
 
diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs
index 1c23f1b842..380ec1c975 100644
--- a/nexus/tests/integration_tests/schema.rs
+++ b/nexus/tests/integration_tests/schema.rs
@@ -1048,7 +1048,8 @@ fn after_23_0_0(client: &Client) -> BoxFuture<'_, ()> {
 fn before_24_0_0(client: &Client) -> BoxFuture<'_, ()> {
     // IP addresses were pulled off dogfood sled 16
     Box::pin(async move {
-        // Create two sleds
+        // Create two sleds. (SLED2 is marked non_provisionable for
+        // after_37_0_1.)
         client
             .batch_execute(&format!(
                 "INSERT INTO sled
@@ -1062,7 +1063,7 @@ fn before_24_0_0(client: &Client) -> BoxFuture<'_, ()> {
             'fd00:1122:3344:104::1ac', 'provisionable'),
           ('{SLED2}', now(), now(), NULL, 1, '{RACK1}', false, 'zzzz', 'xxxx',
              '2', 64, 12345678, 77,'fd00:1122:3344:107::1', 12345,
-            'fd00:1122:3344:107::d4', 'provisionable');
+            'fd00:1122:3344:107::d4', 'non_provisionable');
         "
             ))
             .await
@@ -1095,6 +1096,45 @@ fn after_24_0_0(client: &Client) -> BoxFuture<'_, ()> {
     })
 }
 
+// This reuses the sleds created in before_24_0_0.
+fn after_37_0_1(client: &Client) -> BoxFuture<'_, ()> {
+    Box::pin(async {
+        // Confirm that the IP Addresses have the last 2 bytes changed to `0xFFFF`
+        let rows = client
+            .query("SELECT sled_policy, sled_state FROM sled ORDER BY id", &[])
+            .await
+            .expect("Failed to select sled policy and state");
+        let policy_and_state = process_rows(&rows);
+
+        assert_eq!(
+            policy_and_state[0].values,
+            vec![
+                ColumnValue::new(
+                    "sled_policy",
+                    SqlEnum::from(("sled_policy", "in_service"))
+                ),
+                ColumnValue::new(
+                    "sled_state",
+                    SqlEnum::from(("sled_state", "active"))
+                ),
+            ]
+        );
+        assert_eq!(
+            policy_and_state[1].values,
+            vec![
+                ColumnValue::new(
+                    "sled_policy",
+                    SqlEnum::from(("sled_policy", "no_provision"))
+                ),
+                ColumnValue::new(
+                    "sled_state",
+                    SqlEnum::from(("sled_state", "active"))
+                ),
+            ]
+        );
+    })
+}
+
 // Lazily initializes all migration checks. The combination of Rust function
 // pointers and async makes defining a static table fairly painful, so we're
 // using lazy initialization instead.
@@ -1112,6 +1152,10 @@ fn get_migration_checks() -> BTreeMap<SemverVersion, DataMigrationFns> {
         SemverVersion(semver::Version::parse("24.0.0").unwrap()),
         DataMigrationFns { before: Some(before_24_0_0), after: after_24_0_0 },
     );
+    map.insert(
+        SemverVersion(semver::Version::parse("37.0.1").unwrap()),
+        DataMigrationFns { before: None, after: after_37_0_1 },
+    );
 
     map
 }
diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt
index 7ed73fd30a..adb36a24af 100644
--- a/nexus/tests/output/nexus_tags.txt
+++ b/nexus/tests/output/nexus_tags.txt
@@ -133,7 +133,7 @@ sled_instance_list                       GET      /v1/system/hardware/sleds/{sle
 sled_list                                GET      /v1/system/hardware/sleds
 sled_list_uninitialized                  GET      /v1/system/hardware/sleds-uninitialized
 sled_physical_disk_list                  GET      /v1/system/hardware/sleds/{sled_id}/disks
-sled_set_provision_state                 PUT      /v1/system/hardware/sleds/{sled_id}/provision-state
+sled_set_provision_policy                PUT      /v1/system/hardware/sleds/{sled_id}/provision-policy
 sled_view                                GET      /v1/system/hardware/sleds/{sled_id}
 switch_list                              GET      /v1/system/hardware/switches
 switch_view                              GET      /v1/system/hardware/switches/{switch_id}
diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs
index 2e683878be..0f601f3db5 100644
--- a/nexus/types/src/deployment.rs
+++ b/nexus/types/src/deployment.rs
@@ -11,7 +11,8 @@
 //! nexus/deployment does not currently know about nexus/db-model and it's
 //! convenient to separate these concerns.)
 
-use crate::external_api::views::SledProvisionState;
+use crate::external_api::views::SledPolicy;
+use crate::external_api::views::SledState;
 use crate::inventory::Collection;
 pub use crate::inventory::NetworkInterface;
 pub use crate::inventory::NetworkInterfaceKind;
@@ -64,8 +65,11 @@ pub struct Policy {
 /// Describes the resources available on each sled for the planner
 #[derive(Debug, Clone)]
 pub struct SledResources {
-    /// provision state of this sled
-    pub provision_state: SledProvisionState,
+    /// current sled policy
+    pub policy: SledPolicy,
+
+    /// current sled state
+    pub state: SledState,
 
     /// zpools on this sled
     ///
@@ -80,6 +84,17 @@ pub struct SledResources {
     pub subnet: Ipv6Subnet<SLED_PREFIX>,
 }
 
+impl SledResources {
+    /// Returns true if the sled can have services provisioned on it that
+    /// aren't required to be on every sled.
+    ///
+    /// For example, NTP must exist on every sled, but Nexus does not have to.
+    pub fn is_eligible_for_discretionary_services(&self) -> bool {
+        self.policy.is_provisionable()
+            && self.state.is_eligible_for_discretionary_services()
+    }
+}
+
 /// Describes a complete set of software and configuration for the system
 // Blueprints are a fundamental part of how the system modifies itself.  Each
 // blueprint completely describes all of the software and configuration
@@ -266,6 +281,7 @@ pub struct OmicronZonesDiff<'a> {
 }
 
 /// Describes a sled that appeared on both sides of a diff (possibly changed)
+#[derive(Debug)]
 pub struct DiffSledCommon<'a> {
     /// id of the sled
     pub sled_id: Uuid,
diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs
index 6cb878084d..07eeb9b679 100644
--- a/nexus/types/src/external_api/params.rs
+++ b/nexus/types/src/external_api/params.rs
@@ -96,21 +96,21 @@ pub struct SledSelector {
     pub sled: Uuid,
 }
 
-/// Parameters for `sled_set_provision_state`.
+/// Parameters for `sled_set_provision_policy`.
 #[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq)]
-pub struct SledProvisionStateParams {
+pub struct SledProvisionPolicyParams {
     /// The provision state.
-    pub state: super::views::SledProvisionState,
+    pub state: super::views::SledProvisionPolicy,
 }
 
-/// Response to `sled_set_provision_state`.
+/// Response to `sled_set_provision_policy`.
 #[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq)]
-pub struct SledProvisionStateResponse {
+pub struct SledProvisionPolicyResponse {
     /// The old provision state.
-    pub old_state: super::views::SledProvisionState,
+    pub old_state: super::views::SledProvisionPolicy,
 
     /// The new provision state.
-    pub new_state: super::views::SledProvisionState,
+    pub new_state: super::views::SledProvisionPolicy,
 }
 
 pub struct SwitchSelector {
diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs
index 84648f109f..a3e87b162e 100644
--- a/nexus/types/src/external_api/views.rs
+++ b/nexus/types/src/external_api/views.rs
@@ -19,7 +19,9 @@ use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use std::collections::BTreeMap;
 use std::collections::BTreeSet;
+use std::fmt;
 use std::net::IpAddr;
+use strum::{EnumIter, IntoEnumIterator};
 use uuid::Uuid;
 
 use super::params::PhysicalDiskKind;
@@ -418,32 +420,214 @@ pub struct Sled {
     pub baseboard: Baseboard,
     /// The rack to which this Sled is currently attached
     pub rack_id: Uuid,
-    /// The provision state of the sled.
-    pub provision_state: SledProvisionState,
+    /// The operator-defined policy of a sled.
+    pub policy: SledPolicy,
+    /// The current state Nexus believes the sled to be in.
+    pub state: SledState,
     /// The number of hardware threads which can execute on this sled
     pub usable_hardware_threads: u32,
     /// Amount of RAM which may be used by the Sled's OS
     pub usable_physical_ram: ByteCount,
 }
 
-/// The provision state of a sled.
+/// The operator-defined provision policy of a sled.
 ///
 /// This controls whether new resources are going to be provisioned on this
 /// sled.
 #[derive(
-    Copy, Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq,
+    Copy,
+    Clone,
+    Debug,
+    Deserialize,
+    Serialize,
+    JsonSchema,
+    PartialEq,
+    Eq,
+    EnumIter,
 )]
 #[serde(rename_all = "snake_case")]
-pub enum SledProvisionState {
+pub enum SledProvisionPolicy {
     /// New resources will be provisioned on this sled.
     Provisionable,
 
-    /// New resources will not be provisioned on this sled. However, existing
-    /// resources will continue to be on this sled unless manually migrated
-    /// off.
+    /// New resources will not be provisioned on this sled. However, if the
+    /// sled is currently in service, existing resources will continue to be on
+    /// this sled unless manually migrated off.
     NonProvisionable,
 }
 
+impl SledProvisionPolicy {
+    /// Returns the opposite of the current provision state.
+    pub const fn invert(self) -> Self {
+        match self {
+            Self::Provisionable => Self::NonProvisionable,
+            Self::NonProvisionable => Self::Provisionable,
+        }
+    }
+}
+
+/// The operator-defined policy of a sled.
+#[derive(
+    Copy, Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq,
+)]
+#[serde(rename_all = "snake_case", tag = "kind")]
+pub enum SledPolicy {
+    /// The operator has indicated that the sled is in-service.
+    InService {
+        /// Determines whether new resources can be provisioned onto the sled.
+        provision_policy: SledProvisionPolicy,
+    },
+
+    /// The operator has indicated that the sled has been permanently removed
+    /// from service.
+    ///
+    /// This is a terminal state: once a particular sled ID is expunged, it
+    /// will never return to service. (The actual hardware may be reused, but
+    /// it will be treated as a brand-new sled.)
+    ///
+    /// An expunged sled is always non-provisionable.
+    Expunged,
+    // NOTE: if you add a new value here, be sure to add it to
+    // the `IntoEnumIterator` impl below!
+}
+
+// Can't automatically derive strum::EnumIter because that doesn't provide a
+// way to iterate over nested enums.
+impl IntoEnumIterator for SledPolicy {
+    type Iterator = std::array::IntoIter<Self, 3>;
+
+    fn iter() -> Self::Iterator {
+        [
+            Self::InService {
+                provision_policy: SledProvisionPolicy::Provisionable,
+            },
+            Self::InService {
+                provision_policy: SledProvisionPolicy::NonProvisionable,
+            },
+            Self::Expunged,
+        ]
+        .into_iter()
+    }
+}
+
+impl SledPolicy {
+    /// Creates a new `SledPolicy` that is in-service and provisionable.
+    pub fn provisionable() -> Self {
+        Self::InService { provision_policy: SledProvisionPolicy::Provisionable }
+    }
+
+    /// Returns the list of all in-service policies.
+    pub fn all_in_service() -> &'static [Self] {
+        &[
+            Self::InService {
+                provision_policy: SledProvisionPolicy::Provisionable,
+            },
+            Self::InService {
+                provision_policy: SledProvisionPolicy::NonProvisionable,
+            },
+        ]
+    }
+
+    /// Returns true if the sled can have services provisioned on it.
+    pub fn is_provisionable(&self) -> bool {
+        match self {
+            Self::InService {
+                provision_policy: SledProvisionPolicy::Provisionable,
+            } => true,
+            Self::InService {
+                provision_policy: SledProvisionPolicy::NonProvisionable,
+            }
+            | Self::Expunged => false,
+        }
+    }
+
+    /// Returns the provision policy, if the sled is in service.
+    pub fn provision_policy(&self) -> Option<SledProvisionPolicy> {
+        match self {
+            Self::InService { provision_policy } => Some(*provision_policy),
+            Self::Expunged => None,
+        }
+    }
+
+    /// Returns true if the sled can be decommissioned in this state.
+    pub fn is_decommissionable(&self) -> bool {
+        // This should be kept in sync with decommissionable_states below.
+        match self {
+            Self::InService { .. } => false,
+            Self::Expunged => true,
+        }
+    }
+
+    /// Returns all the possible policies a sled can have for it to be
+    /// decommissioned.
+    pub fn all_decommissionable() -> &'static [Self] {
+        &[Self::Expunged]
+    }
+}
+
+impl fmt::Display for SledPolicy {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            SledPolicy::InService {
+                provision_policy: SledProvisionPolicy::Provisionable,
+            } => write!(f, "in service"),
+            SledPolicy::InService {
+                provision_policy: SledProvisionPolicy::NonProvisionable,
+            } => write!(f, "in service (not provisionable)"),
+            SledPolicy::Expunged => write!(f, "expunged"),
+        }
+    }
+}
+
+/// The current state of the sled, as determined by Nexus.
+#[derive(
+    Copy,
+    Clone,
+    Debug,
+    Deserialize,
+    Serialize,
+    JsonSchema,
+    PartialEq,
+    Eq,
+    EnumIter,
+)]
+#[serde(rename_all = "snake_case")]
+pub enum SledState {
+    /// The sled is currently active, and has resources allocated on it.
+    Active,
+
+    /// The sled has been permanently removed from service.
+    ///
+    /// This is a terminal state: once a particular sled ID is decommissioned,
+    /// it will never return to service. (The actual hardware may be reused,
+    /// but it will be treated as a brand-new sled.)
+    Decommissioned,
+}
+
+impl SledState {
+    /// Returns true if the sled state makes it eligible for services that
+    /// aren't required to be on every sled.
+    ///
+    /// For example, NTP must exist on every sled, but Nexus does not have to.
+    pub fn is_eligible_for_discretionary_services(&self) -> bool {
+        // (Explicit match, so that this fails to compile if a new state is
+        // added.)
+        match self {
+            SledState::Active => true,
+            SledState::Decommissioned => false,
+        }
+    }
+}
+
+impl fmt::Display for SledState {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            SledState::Active => write!(f, "active"),
+            SledState::Decommissioned => write!(f, "decommissioned"),
+        }
+    }
+}
+
 /// An operator's view of an instance running on a given sled
 #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)]
 pub struct SledInstance {
diff --git a/openapi/nexus.json b/openapi/nexus.json
index f42841dcf6..cc8edec51d 100644
--- a/openapi/nexus.json
+++ b/openapi/nexus.json
@@ -4221,13 +4221,13 @@
         }
       }
     },
-    "/v1/system/hardware/sleds/{sled_id}/provision-state": {
+    "/v1/system/hardware/sleds/{sled_id}/provision-policy": {
       "put": {
         "tags": [
           "system/hardware"
         ],
-        "summary": "Set sled provision state",
-        "operationId": "sled_set_provision_state",
+        "summary": "Set sled provision policy",
+        "operationId": "sled_set_provision_policy",
         "parameters": [
           {
             "in": "path",
@@ -4244,7 +4244,7 @@
           "content": {
             "application/json": {
               "schema": {
-                "$ref": "#/components/schemas/SledProvisionStateParams"
+                "$ref": "#/components/schemas/SledProvisionPolicyParams"
               }
             }
           },
@@ -4256,7 +4256,7 @@
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/SledProvisionStateResponse"
+                  "$ref": "#/components/schemas/SledProvisionPolicyResponse"
                 }
               }
             }
@@ -14839,11 +14839,11 @@
             "type": "string",
             "format": "uuid"
           },
-          "provision_state": {
-            "description": "The provision state of the sled.",
+          "policy": {
+            "description": "The operator-defined policy of a sled.",
             "allOf": [
               {
-                "$ref": "#/components/schemas/SledProvisionState"
+                "$ref": "#/components/schemas/SledPolicy"
               }
             ]
           },
@@ -14852,6 +14852,14 @@
             "type": "string",
             "format": "uuid"
           },
+          "state": {
+            "description": "The current state Nexus believes the sled to be in.",
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/SledState"
+              }
+            ]
+          },
           "time_created": {
             "description": "timestamp when this resource was created",
             "type": "string",
@@ -14880,8 +14888,9 @@
         "required": [
           "baseboard",
           "id",
-          "provision_state",
+          "policy",
           "rack_id",
+          "state",
           "time_created",
           "time_modified",
           "usable_hardware_threads",
@@ -14971,8 +14980,52 @@
           "items"
         ]
       },
-      "SledProvisionState": {
-        "description": "The provision state of a sled.\n\nThis controls whether new resources are going to be provisioned on this sled.",
+      "SledPolicy": {
+        "description": "The operator-defined policy of a sled.",
+        "oneOf": [
+          {
+            "description": "The operator has indicated that the sled is in-service.",
+            "type": "object",
+            "properties": {
+              "kind": {
+                "type": "string",
+                "enum": [
+                  "in_service"
+                ]
+              },
+              "provision_policy": {
+                "description": "Determines whether new resources can be provisioned onto the sled.",
+                "allOf": [
+                  {
+                    "$ref": "#/components/schemas/SledProvisionPolicy"
+                  }
+                ]
+              }
+            },
+            "required": [
+              "kind",
+              "provision_policy"
+            ]
+          },
+          {
+            "description": "The operator has indicated that the sled has been permanently removed from service.\n\nThis is a terminal state: once a particular sled ID is expunged, it will never return to service. (The actual hardware may be reused, but it will be treated as a brand-new sled.)\n\nAn expunged sled is always non-provisionable.",
+            "type": "object",
+            "properties": {
+              "kind": {
+                "type": "string",
+                "enum": [
+                  "expunged"
+                ]
+              }
+            },
+            "required": [
+              "kind"
+            ]
+          }
+        ]
+      },
+      "SledProvisionPolicy": {
+        "description": "The operator-defined provision policy of a sled.\n\nThis controls whether new resources are going to be provisioned on this sled.",
         "oneOf": [
           {
             "description": "New resources will be provisioned on this sled.",
@@ -14982,7 +15035,7 @@
             ]
           },
           {
-            "description": "New resources will not be provisioned on this sled. However, existing resources will continue to be on this sled unless manually migrated off.",
+            "description": "New resources will not be provisioned on this sled. However, if the sled is currently in service, existing resources will continue to be on this sled unless manually migrated off.",
             "type": "string",
             "enum": [
               "non_provisionable"
@@ -14990,15 +15043,15 @@
           }
         ]
       },
-      "SledProvisionStateParams": {
-        "description": "Parameters for `sled_set_provision_state`.",
+      "SledProvisionPolicyParams": {
+        "description": "Parameters for `sled_set_provision_policy`.",
         "type": "object",
         "properties": {
           "state": {
             "description": "The provision state.",
             "allOf": [
               {
-                "$ref": "#/components/schemas/SledProvisionState"
+                "$ref": "#/components/schemas/SledProvisionPolicy"
               }
             ]
           }
@@ -15007,15 +15060,15 @@
           "state"
         ]
       },
-      "SledProvisionStateResponse": {
-        "description": "Response to `sled_set_provision_state`.",
+      "SledProvisionPolicyResponse": {
+        "description": "Response to `sled_set_provision_policy`.",
         "type": "object",
         "properties": {
           "new_state": {
             "description": "The new provision state.",
             "allOf": [
               {
-                "$ref": "#/components/schemas/SledProvisionState"
+                "$ref": "#/components/schemas/SledProvisionPolicy"
               }
             ]
           },
@@ -15023,7 +15076,7 @@
             "description": "The old provision state.",
             "allOf": [
               {
-                "$ref": "#/components/schemas/SledProvisionState"
+                "$ref": "#/components/schemas/SledProvisionPolicy"
               }
             ]
           }
@@ -15054,6 +15107,25 @@
           "items"
         ]
       },
+      "SledState": {
+        "description": "The current state of the sled, as determined by Nexus.",
+        "oneOf": [
+          {
+            "description": "The sled is currently active, and has resources allocated on it.",
+            "type": "string",
+            "enum": [
+              "active"
+            ]
+          },
+          {
+            "description": "The sled has been permanently removed from service.\n\nThis is a terminal state: once a particular sled ID is decommissioned, it will never return to service. (The actual hardware may be reused, but it will be treated as a brand-new sled.)",
+            "type": "string",
+            "enum": [
+              "decommissioned"
+            ]
+          }
+        ]
+      },
       "Snapshot": {
         "description": "View of a Snapshot",
         "type": "object",
diff --git a/schema/crdb/37.0.0/up01.sql b/schema/crdb/37.0.0/up01.sql
new file mode 100644
index 0000000000..bb66ff283e
--- /dev/null
+++ b/schema/crdb/37.0.0/up01.sql
@@ -0,0 +1,13 @@
+-- The disposition for a particular sled. This is updated solely by the
+-- operator, and not by Nexus.
+CREATE TYPE IF NOT EXISTS omicron.public.sled_policy AS ENUM (
+    -- The sled is in service, and new resources can be provisioned onto it.
+    'in_service',
+    -- The sled is in service, but the operator has indicated that new
+    -- resources should not be provisioned onto it.
+    'no_provision',
+    -- The operator has marked that the sled has, or will be, removed from the
+    -- rack, and it should be assumed that any resources currently on it are
+    -- now permanently missing.
+    'expunged'
+);
diff --git a/schema/crdb/37.0.0/up02.sql b/schema/crdb/37.0.0/up02.sql
new file mode 100644
index 0000000000..01cba8f7fe
--- /dev/null
+++ b/schema/crdb/37.0.0/up02.sql
@@ -0,0 +1,22 @@
+-- The actual state of the sled. This is updated exclusively by Nexus.
+--
+-- Nexus's goal is to match the sled's state with the operator-indicated
+-- policy. For example, if the sled_policy is "expunged" and the sled_state is
+-- "active", Nexus will assume that the sled is gone. Based on that, Nexus will
+-- reallocate resources currently on the expunged sled to other sleds, etc.
+-- Once the expunged sled no longer has any resources attached to it, Nexus
+-- will mark it as decommissioned.
+CREATE TYPE IF NOT EXISTS omicron.public.sled_state AS ENUM (
+    -- The sled has resources of any kind allocated on it, or, is available for
+    -- new resources.
+    --
+    -- The sled can be in this state and have a different sled policy, e.g.
+    -- "expunged".
+    'active',
+
+    -- The sled no longer has resources allocated on it, now or in the future.
+    --
+    -- This is a terminal state. This state is only valid if the sled policy is
+    -- 'expunged'.
+    'decommissioned'
+);
diff --git a/schema/crdb/37.0.0/up03.sql b/schema/crdb/37.0.0/up03.sql
new file mode 100644
index 0000000000..7e51cf9546
--- /dev/null
+++ b/schema/crdb/37.0.0/up03.sql
@@ -0,0 +1,7 @@
+-- Modify the existing sled table to add the columns as required.
+ALTER TABLE omicron.public.sled
+    -- Nullable for now -- we're going to set the data in sled_policy in the
+    -- next migration statement.
+    ADD COLUMN IF NOT EXISTS sled_policy omicron.public.sled_policy,
+    ADD COLUMN IF NOT EXISTS sled_state omicron.public.sled_state
+        NOT NULL DEFAULT 'active';
diff --git a/schema/crdb/37.0.0/up04.sql b/schema/crdb/37.0.0/up04.sql
new file mode 100644
index 0000000000..a85367ec10
--- /dev/null
+++ b/schema/crdb/37.0.0/up04.sql
@@ -0,0 +1,14 @@
+-- Mass-update the sled_policy column to match the sled_provision_state column.
+
+-- This is a full table scan, but is unavoidable.
+SET
+  LOCAL disallow_full_table_scans = OFF;
+
+UPDATE omicron.public.sled
+    SET sled_policy = 
+        (CASE provision_state
+            WHEN 'provisionable' THEN 'in_service'
+            WHEN 'non_provisionable' THEN 'no_provision'
+            -- No need to specify the ELSE case because the enum has been
+            -- exhaustively matched (sled_provision_state already bans NULL).
+        END);
diff --git a/schema/crdb/37.0.1/up01.sql b/schema/crdb/37.0.1/up01.sql
new file mode 100644
index 0000000000..c23e3e5a11
--- /dev/null
+++ b/schema/crdb/37.0.1/up01.sql
@@ -0,0 +1,8 @@
+-- This is a follow-up to the previous migration, done separately to ensure
+-- that the updated values for sled_policy are committed before the
+-- provision_state column is dropped.
+
+ALTER TABLE omicron.public.sled
+    DROP COLUMN IF EXISTS provision_state,
+    ALTER COLUMN sled_policy SET NOT NULL,
+    ALTER COLUMN sled_state DROP DEFAULT;
diff --git a/schema/crdb/37.0.1/up02.sql b/schema/crdb/37.0.1/up02.sql
new file mode 100644
index 0000000000..342f794c82
--- /dev/null
+++ b/schema/crdb/37.0.1/up02.sql
@@ -0,0 +1 @@
+DROP TYPE IF EXISTS omicron.public.sled_provision_state;
diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql
index 87a22d1adc..837be42c35 100644
--- a/schema/crdb/dbinit.sql
+++ b/schema/crdb/dbinit.sql
@@ -73,11 +73,41 @@ CREATE TABLE IF NOT EXISTS omicron.public.rack (
  * Sleds
  */
 
-CREATE TYPE IF NOT EXISTS omicron.public.sled_provision_state AS ENUM (
-    -- New resources can be provisioned onto the sled
-    'provisionable',
-    -- New resources must not be provisioned onto the sled
-    'non_provisionable'
+-- The disposition for a particular sled. This is updated solely by the
+-- operator, and not by Nexus.
+CREATE TYPE IF NOT EXISTS omicron.public.sled_policy AS ENUM (
+    -- The sled is in service, and new resources can be provisioned onto it.
+    'in_service',
+    -- The sled is in service, but the operator has indicated that new
+    -- resources should not be provisioned onto it.
+    'no_provision',
+    -- The operator has marked that the sled has, or will be, removed from the
+    -- rack, and it should be assumed that any resources currently on it are
+    -- now permanently missing.
+    'expunged'
+);
+
+-- The actual state of the sled. This is updated exclusively by Nexus.
+--
+-- Nexus's goal is to match the sled's state with the operator-indicated
+-- policy. For example, if the sled_policy is "expunged" and the sled_state is
+-- "active", Nexus will assume that the sled is gone. Based on that, Nexus will
+-- reallocate resources currently on the expunged sled to other sleds, etc.
+-- Once the expunged sled no longer has any resources attached to it, Nexus
+-- will mark it as decommissioned.
+CREATE TYPE IF NOT EXISTS omicron.public.sled_state AS ENUM (
+    -- The sled has resources of any kind allocated on it, or, is available for
+    -- new resources.
+    --
+    -- The sled can be in this state and have a different sled policy, e.g.
+    -- "expunged".
+    'active',
+
+    -- The sled no longer has resources allocated on it, now or in the future.
+    --
+    -- This is a terminal state. This state is only valid if the sled policy is
+    -- 'expunged'.
+    'decommissioned'
 );
 
 CREATE TABLE IF NOT EXISTS omicron.public.sled (
@@ -111,8 +141,11 @@ CREATE TABLE IF NOT EXISTS omicron.public.sled (
     /* The last address allocated to a propolis instance on this sled. */
     last_used_address INET NOT NULL,
 
-    /* The state of whether resources should be provisioned onto the sled */
-    provision_state omicron.public.sled_provision_state NOT NULL,
+    /* The policy for the sled, updated exclusively by the operator */
+    sled_policy omicron.public.sled_policy NOT NULL,
+
+    /* The actual state of the sled, updated exclusively by Nexus */
+    sled_state omicron.public.sled_state NOT NULL,
 
     -- This constraint should be upheld, even for deleted disks
     -- in the fleet.
@@ -3518,7 +3551,7 @@ INSERT INTO omicron.public.db_metadata (
     version,
     target_version
 ) VALUES
-    ( TRUE, NOW(), NOW(), '36.0.0', NULL)
+    ( TRUE, NOW(), NOW(), '37.0.1', NULL)
 ON CONFLICT DO NOTHING;
 
 COMMIT;

From c2ff7be9ce54fc4af61176c94ddc5f357ae33626 Mon Sep 17 00:00:00 2001
From: "oxide-renovate[bot]"
 <146848827+oxide-renovate[bot]@users.noreply.github.com>
Date: Sat, 24 Feb 2024 05:08:24 +0000
Subject: [PATCH 12/18] chore(deps): update taiki-e/install-action digest to
 ffdab02 (#5137)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [taiki-e/install-action](https://togithub.com/taiki-e/install-action)
| action | digest | [`19e9b54` ->
`ffdab02`](https://togithub.com/taiki-e/install-action/compare/19e9b54...ffdab02)
|

---

### Configuration

📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone
America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone
America/Los_Angeles.

🚦 **Automerge**: Enabled.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Renovate
Bot](https://togithub.com/renovatebot/renovate).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNy4yMTIuMCIsInVwZGF0ZWRJblZlciI6IjM3LjIxMi4wIiwidGFyZ2V0QnJhbmNoIjoibWFpbiJ9-->

Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com>
---
 .github/workflows/hakari.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml
index 1f55f2f255..a55c05ba7a 100644
--- a/.github/workflows/hakari.yml
+++ b/.github/workflows/hakari.yml
@@ -24,7 +24,7 @@ jobs:
         with:
           toolchain: stable
       - name: Install cargo-hakari
-        uses: taiki-e/install-action@19e9b549a48620cc50fcf6e6e866b8fb4eca1b01 # v2
+        uses: taiki-e/install-action@ffdab026038b43b56c3c9540cdadb98181c6155c # v2
         with:
           tool: cargo-hakari
       - name: Check workspace-hack Cargo.toml is up-to-date

From b6d39a0aab1c950f40e26ec7f5e021aed9c22448 Mon Sep 17 00:00:00 2001
From: "oxide-renovate[bot]"
 <146848827+oxide-renovate[bot]@users.noreply.github.com>
Date: Sun, 25 Feb 2024 05:10:08 +0000
Subject: [PATCH 13/18] chore(deps): update taiki-e/install-action digest to
 b7add58 (#5139)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [taiki-e/install-action](https://togithub.com/taiki-e/install-action)
| action | digest | [`ffdab02` ->
`b7add58`](https://togithub.com/taiki-e/install-action/compare/ffdab02...b7add58)
|

---

### Configuration

📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone
America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone
America/Los_Angeles.

🚦 **Automerge**: Enabled.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Renovate
Bot](https://togithub.com/renovatebot/renovate).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNy4yMTMuMCIsInVwZGF0ZWRJblZlciI6IjM3LjIxMy4wIiwidGFyZ2V0QnJhbmNoIjoibWFpbiJ9-->

Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com>
---
 .github/workflows/hakari.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml
index a55c05ba7a..2463ac4e2e 100644
--- a/.github/workflows/hakari.yml
+++ b/.github/workflows/hakari.yml
@@ -24,7 +24,7 @@ jobs:
         with:
           toolchain: stable
       - name: Install cargo-hakari
-        uses: taiki-e/install-action@ffdab026038b43b56c3c9540cdadb98181c6155c # v2
+        uses: taiki-e/install-action@b7add58e53e52e624966da65007ce24524f3dcf3 # v2
         with:
           tool: cargo-hakari
       - name: Check workspace-hack Cargo.toml is up-to-date

From 767c7fe2650040d7b8097a995bb3e9bde00a7512 Mon Sep 17 00:00:00 2001
From: "oxide-renovate[bot]"
 <146848827+oxide-renovate[bot]@users.noreply.github.com>
Date: Mon, 26 Feb 2024 14:25:48 +0000
Subject: [PATCH 14/18] chore(deps): update taiki-e/install-action digest to
 4ce8785 (#5142)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [taiki-e/install-action](https://togithub.com/taiki-e/install-action)
| action | digest | [`b7add58` ->
`4ce8785`](https://togithub.com/taiki-e/install-action/compare/b7add58...4ce8785)
|

---

### Configuration

📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone
America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone
America/Los_Angeles.

🚦 **Automerge**: Enabled.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the
rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update
again.

---

- [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check
this box

---

This PR has been generated by [Renovate
Bot](https://togithub.com/renovatebot/renovate).

<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzNy4yMTQuMCIsInVwZGF0ZWRJblZlciI6IjM3LjIxNC4wIiwidGFyZ2V0QnJhbmNoIjoibWFpbiJ9-->

Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com>
---
 .github/workflows/hakari.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml
index 2463ac4e2e..10c1c04003 100644
--- a/.github/workflows/hakari.yml
+++ b/.github/workflows/hakari.yml
@@ -24,7 +24,7 @@ jobs:
         with:
           toolchain: stable
       - name: Install cargo-hakari
-        uses: taiki-e/install-action@b7add58e53e52e624966da65007ce24524f3dcf3 # v2
+        uses: taiki-e/install-action@4ce8785db2a8a56c9ede16f705c2c49c5c61669c # v2
         with:
           tool: cargo-hakari
       - name: Check workspace-hack Cargo.toml is up-to-date

From 7fae994cbe954d576731638d800e2c0caf17a37e Mon Sep 17 00:00:00 2001
From: David Crespo <david-crespo@users.noreply.github.com>
Date: Mon, 26 Feb 2024 11:24:20 -0600
Subject: [PATCH 15/18] Bump web console (floating IPs) (#5126)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### User-facing changes

* [80f11673](https://github.com/oxidecomputer/console/commit/80f11673)
oxidecomputer/console#1957
* [f31d5331](https://github.com/oxidecomputer/console/commit/f31d5331)
oxidecomputer/console#1944
* [b454cefd](https://github.com/oxidecomputer/console/commit/b454cefd)
oxidecomputer/console#1955
* [1936e0d8](https://github.com/oxidecomputer/console/commit/1936e0d8)
oxidecomputer/console#1948

---

### All changes

https://github.com/oxidecomputer/console/compare/e5a1f804...80f11673

* [80f11673](https://github.com/oxidecomputer/console/commit/80f11673)
oxidecomputer/console#1957
* [5d989a70](https://github.com/oxidecomputer/console/commit/5d989a70)
oxidecomputer/console#1959
* [d4ec1927](https://github.com/oxidecomputer/console/commit/d4ec1927)
turn off license-eye comments, I hate them. CI failure is sufficient
* [f8d2f36e](https://github.com/oxidecomputer/console/commit/f8d2f36e)
oxidecomputer/console#1960
* [e0b676dc](https://github.com/oxidecomputer/console/commit/e0b676dc)
upgrade husky commands for v9
https://github.com/typicode/husky/releases/tag/v9.0.1
* [f31d5331](https://github.com/oxidecomputer/console/commit/f31d5331)
oxidecomputer/console#1944
* [b4552eea](https://github.com/oxidecomputer/console/commit/b4552eea)
Revert "Revert all app changes since v6 except two small fixes
(oxidecomputer/console#1958)"
* [1f8ebf28](https://github.com/oxidecomputer/console/commit/1f8ebf28)
oxidecomputer/console#1958
* [b454cefd](https://github.com/oxidecomputer/console/commit/b454cefd)
oxidecomputer/console#1955
* [1ce32d32](https://github.com/oxidecomputer/console/commit/1ce32d32)
oxidecomputer/console#1956
* [794ae11d](https://github.com/oxidecomputer/console/commit/794ae11d)
oxidecomputer/console#1952
* [903b8f6a](https://github.com/oxidecomputer/console/commit/903b8f6a)
tweak api-diff: fix type error on first arg, use dax's new built-in pipe
* [a4e15cdd](https://github.com/oxidecomputer/console/commit/a4e15cdd)
oxidecomputer/console#1950
* [32037d40](https://github.com/oxidecomputer/console/commit/32037d40)
oxidecomputer/console#1949
* [f02678b0](https://github.com/oxidecomputer/console/commit/f02678b0)
vite 5.1
* [1936e0d8](https://github.com/oxidecomputer/console/commit/1936e0d8)
oxidecomputer/console#1948
* [cfda1636](https://github.com/oxidecomputer/console/commit/cfda1636)
forgot to bump mockServiceWorker.js (no actual changes)
* [4792105e](https://github.com/oxidecomputer/console/commit/4792105e)
oxidecomputer/console#1943
* [a26db9ea](https://github.com/oxidecomputer/console/commit/a26db9ea)
upgrade date-fns thru major version, update calls accordingly
* [3dd635a6](https://github.com/oxidecomputer/console/commit/3dd635a6)
oxidecomputer/console#1933
* [6c8f7a9c](https://github.com/oxidecomputer/console/commit/6c8f7a9c)
upgrade filesize through a major version
* [8f641b97](https://github.com/oxidecomputer/console/commit/8f641b97)
remove blathering in readme about node 16, which is EOL
* [e9d157a1](https://github.com/oxidecomputer/console/commit/e9d157a1)
do ladle too—oh my god why does it have 3000 lines in the lockfile
* [e1cdcc13](https://github.com/oxidecomputer/console/commit/e1cdcc13)
oxidecomputer/console#1941
* [76877ffb](https://github.com/oxidecomputer/console/commit/76877ffb)
oxidecomputer/console#1938
---
 tools/console_version | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/console_version b/tools/console_version
index 7f80546553..1b2cc62547 100644
--- a/tools/console_version
+++ b/tools/console_version
@@ -1,2 +1,2 @@
-COMMIT="e5a1f804faa913de3be5b4cddac2011247a99774"
-SHA2="54ff1026062fc1a3f0de86aa558d051b8ad6248d458c1767b9e926f2606e75f5"
+COMMIT="80f116734fdf8ef4f3b5e49ed39e49339460407c"
+SHA2="458727fe747797860d02c9f75e62d308586c235d4708c549d5b70a40cce4d2d1"

From d7db26d9c6f09fec3fec6991e4a9cd7f90128465 Mon Sep 17 00:00:00 2001
From: David Pacheco <dap@oxidecomputer.com>
Date: Mon, 26 Feb 2024 11:54:31 -0800
Subject: [PATCH 16/18] trigger inventory collection after blueprint execution
 (#5130)

---
 .../src/app/background/blueprint_execution.rs | 11 +++-
 nexus/src/app/background/init.rs              | 56 +++++++++++--------
 2 files changed, 42 insertions(+), 25 deletions(-)

diff --git a/nexus/src/app/background/blueprint_execution.rs b/nexus/src/app/background/blueprint_execution.rs
index 373f023288..4ba60ab566 100644
--- a/nexus/src/app/background/blueprint_execution.rs
+++ b/nexus/src/app/background/blueprint_execution.rs
@@ -20,6 +20,7 @@ pub struct BlueprintExecutor {
     datastore: Arc<DataStore>,
     rx_blueprint: watch::Receiver<Option<Arc<(BlueprintTarget, Blueprint)>>>,
     nexus_label: String,
+    tx: watch::Sender<usize>,
 }
 
 impl BlueprintExecutor {
@@ -30,7 +31,12 @@ impl BlueprintExecutor {
         >,
         nexus_label: String,
     ) -> BlueprintExecutor {
-        BlueprintExecutor { datastore, rx_blueprint, nexus_label }
+        let (tx, _) = watch::channel(0);
+        BlueprintExecutor { datastore, rx_blueprint, nexus_label, tx }
+    }
+
+    pub fn watcher(&self) -> watch::Receiver<usize> {
+        self.tx.subscribe()
     }
 }
 
@@ -71,6 +77,9 @@ impl BackgroundTask for BlueprintExecutor {
             )
             .await;
 
+            // Trigger anybody waiting for this to finish.
+            self.tx.send_modify(|count| *count = *count + 1);
+
             // Return the result as a `serde_json::Value`
             match result {
                 Ok(()) => json!({}),
diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs
index 846051a068..9ba30bab64 100644
--- a/nexus/src/app/background/init.rs
+++ b/nexus/src/app/background/init.rs
@@ -170,30 +170,6 @@ impl BackgroundTasks {
             )
         };
 
-        // Background task: inventory collector
-        let task_inventory_collection = {
-            let collector = inventory_collection::InventoryCollector::new(
-                datastore.clone(),
-                resolver,
-                &nexus_id.to_string(),
-                config.inventory.nkeep,
-                config.inventory.disable,
-            );
-            let task = driver.register(
-                String::from("inventory_collection"),
-                String::from(
-                    "collects hardware and software inventory data from the \
-                    whole system",
-                ),
-                config.inventory.period_secs,
-                Box::new(collector),
-                opctx.child(BTreeMap::new()),
-                vec![],
-            );
-
-            task
-        };
-
         // Background task: phantom disk detection
         let task_phantom_disks = {
             let detector =
@@ -230,6 +206,7 @@ impl BackgroundTasks {
             rx_blueprint.clone(),
             nexus_id.to_string(),
         );
+        let rx_blueprint_exec = blueprint_executor.watcher();
         let task_blueprint_executor = driver.register(
             String::from("blueprint_executor"),
             String::from("Executes the target blueprint"),
@@ -239,6 +216,37 @@ impl BackgroundTasks {
             vec![Box::new(rx_blueprint)],
         );
 
+        // Background task: inventory collector
+        //
+        // This currently depends on the "output" of the blueprint executor in
+        // order to automatically trigger inventory collection whenever the
+        // blueprint executor runs.  In the limit, this could become a problem
+        // because the blueprint executor might also depend indirectly on the
+        // inventory collector.  In that case, we may need to do something more
+        // complicated.  But for now, this works.
+        let task_inventory_collection = {
+            let collector = inventory_collection::InventoryCollector::new(
+                datastore.clone(),
+                resolver,
+                &nexus_id.to_string(),
+                config.inventory.nkeep,
+                config.inventory.disable,
+            );
+            let task = driver.register(
+                String::from("inventory_collection"),
+                String::from(
+                    "collects hardware and software inventory data from the \
+                    whole system",
+                ),
+                config.inventory.period_secs,
+                Box::new(collector),
+                opctx.child(BTreeMap::new()),
+                vec![Box::new(rx_blueprint_exec)],
+            );
+
+            task
+        };
+
         let task_service_zone_nat_tracker = {
             driver.register(
                 "service_zone_nat_tracker".to_string(),

From d514878f417a94247791bd5564fbaafa9b4170a0 Mon Sep 17 00:00:00 2001
From: Charlie Park <charlie@oxidecomputer.com>
Date: Mon, 26 Feb 2024 14:32:16 -0800
Subject: [PATCH 17/18] Update Floating IP field name from address to ip
 (#5144)

Fixes #5046

Our API for Floating IPs currently has an `address` field, though in a
few other places we use `ip`. This PR just makes the API more consistent
across the API (and DB).
---
 nexus/db-queries/src/db/datastore/external_ip.rs | 2 +-
 nexus/test-utils/src/resource_helpers.rs         | 4 ++--
 nexus/tests/integration_tests/endpoints.rs       | 2 +-
 nexus/tests/integration_tests/external_ips.rs    | 8 ++++----
 nexus/types/src/external_api/params.rs           | 2 +-
 openapi/nexus.json                               | 8 ++++----
 6 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/nexus/db-queries/src/db/datastore/external_ip.rs b/nexus/db-queries/src/db/datastore/external_ip.rs
index 24439aa3a0..d15e1b7ca8 100644
--- a/nexus/db-queries/src/db/datastore/external_ip.rs
+++ b/nexus/db-queries/src/db/datastore/external_ip.rs
@@ -256,7 +256,7 @@ impl DataStore {
 
         let pool_id = pool.id();
 
-        let data = if let Some(ip) = params.address {
+        let data = if let Some(ip) = params.ip {
             IncompleteExternalIp::for_floating_explicit(
                 ip_id,
                 &Name(params.identity.name),
diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs
index 764332c5bc..25e58d093d 100644
--- a/nexus/test-utils/src/resource_helpers.rs
+++ b/nexus/test-utils/src/resource_helpers.rs
@@ -267,7 +267,7 @@ pub async fn create_floating_ip(
     client: &ClientTestContext,
     fip_name: &str,
     project: &str,
-    address: Option<IpAddr>,
+    ip: Option<IpAddr>,
     parent_pool_name: Option<&str>,
 ) -> FloatingIp {
     object_create(
@@ -278,7 +278,7 @@ pub async fn create_floating_ip(
                 name: fip_name.parse().unwrap(),
                 description: String::from("a floating ip"),
             },
-            address,
+            ip,
             pool: parent_pool_name.map(|v| NameOrId::Name(v.parse().unwrap())),
         },
     )
diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs
index 81f2a02b31..c5c69df232 100644
--- a/nexus/tests/integration_tests/endpoints.rs
+++ b/nexus/tests/integration_tests/endpoints.rs
@@ -768,7 +768,7 @@ pub static DEMO_FLOAT_IP_CREATE: Lazy<params::FloatingIpCreate> =
             name: DEMO_FLOAT_IP_NAME.clone(),
             description: String::from("a new IP pool"),
         },
-        address: Some(std::net::Ipv4Addr::new(10, 0, 0, 141).into()),
+        ip: Some(std::net::Ipv4Addr::new(10, 0, 0, 141).into()),
         pool: None,
     });
 
diff --git a/nexus/tests/integration_tests/external_ips.rs b/nexus/tests/integration_tests/external_ips.rs
index 57f813d505..ee59c6a034 100644
--- a/nexus/tests/integration_tests/external_ips.rs
+++ b/nexus/tests/integration_tests/external_ips.rs
@@ -189,7 +189,7 @@ async fn test_floating_ip_create(cptestctx: &ControlPlaneTestContext) {
             name: fip_name.parse().unwrap(),
             description: String::from("a floating ip"),
         },
-        address: None,
+        ip: None,
         pool: Some(NameOrId::Name("other-pool".parse().unwrap())),
     };
     let url = format!("/v1/floating-ips?project={}", project.identity.name);
@@ -259,7 +259,7 @@ async fn test_floating_ip_create_fails_in_other_silo_pool(
             name: fip_name.parse().unwrap(),
             description: String::from("a floating ip"),
         },
-        address: None,
+        ip: None,
         pool: Some(NameOrId::Name("external-silo-pool".parse().unwrap())),
     };
 
@@ -316,7 +316,7 @@ async fn test_floating_ip_create_ip_in_use(
                 name: FIP_NAMES[1].parse().unwrap(),
                 description: "another fip".into(),
             },
-            address: Some(contested_ip),
+            ip: Some(contested_ip),
             pool: None,
         }))
         .expect_status(Some(StatusCode::BAD_REQUEST)),
@@ -364,7 +364,7 @@ async fn test_floating_ip_create_name_in_use(
                 name: contested_name.parse().unwrap(),
                 description: "another fip".into(),
             },
-            address: None,
+            ip: None,
             pool: None,
         }))
         .expect_status(Some(StatusCode::BAD_REQUEST)),
diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs
index 07eeb9b679..567b1ff4ad 100644
--- a/nexus/types/src/external_api/params.rs
+++ b/nexus/types/src/external_api/params.rs
@@ -883,7 +883,7 @@ pub struct FloatingIpCreate {
     /// An IP address to reserve for use as a floating IP. This field is
     /// optional: when not set, an address will be automatically chosen from
     /// `pool`. If set, then the IP must be available in the resolved `pool`.
-    pub address: Option<IpAddr>,
+    pub ip: Option<IpAddr>,
 
     /// The parent IP pool that a floating IP is pulled from. If unset, the
     /// default pool is selected.
diff --git a/openapi/nexus.json b/openapi/nexus.json
index cc8edec51d..b0aa84d67a 100644
--- a/openapi/nexus.json
+++ b/openapi/nexus.json
@@ -11864,15 +11864,15 @@
         "description": "Parameters for creating a new floating IP address for instances.",
         "type": "object",
         "properties": {
-          "address": {
+          "description": {
+            "type": "string"
+          },
+          "ip": {
             "nullable": true,
             "description": "An IP address to reserve for use as a floating IP. This field is optional: when not set, an address will be automatically chosen from `pool`. If set, then the IP must be available in the resolved `pool`.",
             "type": "string",
             "format": "ip"
           },
-          "description": {
-            "type": "string"
-          },
           "name": {
             "$ref": "#/components/schemas/Name"
           },

From 58f7129d09ba0bc99c213806aa6fa8c8965f2ea7 Mon Sep 17 00:00:00 2001
From: Charlie Park <charlie@oxidecomputer.com>
Date: Tue, 27 Feb 2024 11:24:01 -0800
Subject: [PATCH 18/18] Bump web console (#5148)

Updating console for #5144

https://github.com/oxidecomputer/console/compare/80f11673...e991ec5c
---
 tools/console_version | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/console_version b/tools/console_version
index 1b2cc62547..ee7f619b53 100644
--- a/tools/console_version
+++ b/tools/console_version
@@ -1,2 +1,2 @@
-COMMIT="80f116734fdf8ef4f3b5e49ed39e49339460407c"
-SHA2="458727fe747797860d02c9f75e62d308586c235d4708c549d5b70a40cce4d2d1"
+COMMIT="e991ec5cf148d83f135b6a692dca9b8df05acef0"
+SHA2="18d53e4485e63d9e2273a889e70785648052e8a231df89f3bcbc2ed01a0eeeb1"