diff --git a/.config/nextest.toml b/.config/nextest.toml
index b1699e6717..cc21a442b3 100644
--- a/.config/nextest.toml
+++ b/.config/nextest.toml
@@ -3,7 +3,7 @@
 #
 # The required version should be bumped up if we need new features, performance
 # improvements or bugfixes that are present in newer versions of nextest.
-nextest-version = { required = "0.9.77", recommended = "0.9.78" }
+nextest-version = { required = "0.9.77", recommended = "0.9.86" }
 
 experimental = ["setup-scripts"]
 
diff --git a/.github/buildomat/build-and-test.sh b/.github/buildomat/build-and-test.sh
index 28c1dbc0e6..369274eb7a 100755
--- a/.github/buildomat/build-and-test.sh
+++ b/.github/buildomat/build-and-test.sh
@@ -18,7 +18,7 @@ target_os=$1
 # NOTE: This version should be in sync with the recommended version in
 # .config/nextest.toml. (Maybe build an automated way to pull the recommended
 # version in the future.)
-NEXTEST_VERSION='0.9.78'
+NEXTEST_VERSION='0.9.86'
 
 cargo --version
 rustc --version
@@ -69,7 +69,8 @@ banner ls-apis
     source ./tools/include/force-git-over-https.sh;
     ptime -m cargo xtask ls-apis apis &&
         ptime -m cargo xtask ls-apis deployment-units &&
-        ptime -m cargo xtask ls-apis servers
+        ptime -m cargo xtask ls-apis servers &&
+        ptime -m cargo xtask ls-apis check
 )
 
 #
diff --git a/Cargo.lock b/Cargo.lock
index 386149690d..6e10d4c12f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -718,7 +718,7 @@ dependencies = [
  "bitflags 2.6.0",
  "cexpr",
  "clang-sys",
- "itertools 0.12.1",
+ "itertools 0.10.5",
  "lazy_static",
  "lazycell",
  "log",
@@ -2321,9 +2321,9 @@ dependencies = [
 
 [[package]]
 name = "diesel-dtrace"
-version = "0.4.0"
+version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4e5130181059723aae1cfdb678d3698052a225aaadb18000f77fec4200047acc"
+checksum = "2750c8bd7a42381620b57f370ca5f757a71d554814e02a43c1bf54ae2656e01d"
 dependencies = [
  "diesel",
  "serde",
@@ -3692,9 +3692,9 @@ dependencies = [
 
 [[package]]
 name = "hashbrown"
-version = "0.15.0"
+version = "0.15.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb"
+checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3"
 dependencies = [
  "allocator-api2",
  "equivalent",
@@ -4162,7 +4162,7 @@ dependencies = [
  "http",
  "hyper",
  "hyper-util",
- "rustls 0.23.14",
+ "rustls 0.23.19",
  "rustls-pki-types",
  "tokio",
  "tokio-rustls 0.26.0",
@@ -4464,6 +4464,7 @@ dependencies = [
  "futures",
  "http",
  "ipnetwork",
+ "itertools 0.13.0",
  "libc",
  "macaddr",
  "mockall",
@@ -4524,7 +4525,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da"
 dependencies = [
  "equivalent",
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
  "serde",
 ]
 
@@ -5103,7 +5104,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4"
 dependencies = [
  "cfg-if",
- "windows-targets 0.52.6",
+ "windows-targets 0.48.5",
 ]
 
 [[package]]
@@ -5339,7 +5340,7 @@ version = "0.12.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38"
 dependencies = [
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
 ]
 
 [[package]]
@@ -6070,6 +6071,7 @@ dependencies = [
  "indexmap 2.6.0",
  "internal-dns-resolver",
  "ipnet",
+ "itertools 0.13.0",
  "maplit",
  "nexus-config",
  "nexus-inventory",
@@ -7268,6 +7270,7 @@ dependencies = [
  "sled-agent-api",
  "sled-agent-client",
  "sled-agent-types",
+ "sled-diagnostics",
  "sled-hardware",
  "sled-hardware-types",
  "sled-storage",
@@ -7398,7 +7401,8 @@ dependencies = [
  "generic-array",
  "getrandom",
  "group",
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
+ "heck 0.4.1",
  "hex",
  "hickory-proto",
  "hmac",
@@ -7409,7 +7413,6 @@ dependencies = [
  "indicatif",
  "inout",
  "itertools 0.10.5",
- "itertools 0.12.1",
  "lalrpop-util",
  "lazy_static",
  "libc",
@@ -7444,7 +7447,7 @@ dependencies = [
  "reqwest",
  "rsa",
  "rustix",
- "rustls 0.23.14",
+ "rustls 0.23.19",
  "rustls-webpki 0.102.8",
  "schemars",
  "scopeguard",
@@ -9154,7 +9157,7 @@ dependencies = [
  "quinn-proto",
  "quinn-udp",
  "rustc-hash 2.0.0",
- "rustls 0.23.14",
+ "rustls 0.23.19",
  "socket2",
  "thiserror 1.0.69",
  "tokio",
@@ -9171,7 +9174,7 @@ dependencies = [
  "rand",
  "ring 0.17.8",
  "rustc-hash 2.0.0",
- "rustls 0.23.14",
+ "rustls 0.23.19",
  "slab",
  "thiserror 1.0.69",
  "tinyvec",
@@ -9585,7 +9588,7 @@ dependencies = [
  "percent-encoding",
  "pin-project-lite",
  "quinn",
- "rustls 0.23.14",
+ "rustls 0.23.19",
  "rustls-pemfile 2.2.0",
  "rustls-pki-types",
  "serde",
@@ -9972,9 +9975,9 @@ dependencies = [
 
 [[package]]
 name = "rustls"
-version = "0.23.14"
+version = "0.23.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "415d9944693cb90382053259f89fbb077ea730ad7273047ec63b19bc9b160ba8"
+checksum = "934b404430bb06b3fae2cba809eb45a1ab1aecd64491213d7c3301b88393f8d1"
 dependencies = [
  "aws-lc-rs",
  "log",
@@ -10019,9 +10022,9 @@ dependencies = [
 
 [[package]]
 name = "rustls-pki-types"
-version = "1.9.0"
+version = "1.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0e696e35370c65c9c541198af4543ccd580cf17fc25d8e05c5a242b202488c55"
+checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b"
 
 [[package]]
 name = "rustls-webpki"
@@ -10738,6 +10741,16 @@ dependencies = [
  "uuid",
 ]
 
+[[package]]
+name = "sled-diagnostics"
+version = "0.1.0"
+dependencies = [
+ "futures",
+ "omicron-workspace-hack",
+ "thiserror 1.0.69",
+ "tokio",
+]
+
 [[package]]
 name = "sled-hardware"
 version = "0.1.0"
@@ -10809,6 +10822,7 @@ dependencies = [
  "slog",
  "thiserror 1.0.69",
  "tokio",
+ "tokio-stream",
  "uuid",
 ]
 
@@ -11011,7 +11025,7 @@ version = "0.8.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917"
 dependencies = [
- "heck 0.5.0",
+ "heck 0.4.1",
  "proc-macro2",
  "quote",
  "syn 2.0.87",
@@ -11088,7 +11102,7 @@ dependencies = [
  "ed25519-dalek",
  "libipcc",
  "pem-rfc7468",
- "rustls 0.23.14",
+ "rustls 0.23.19",
  "secrecy",
  "serde",
  "sha2",
@@ -11941,7 +11955,7 @@ version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4"
 dependencies = [
- "rustls 0.23.14",
+ "rustls 0.23.19",
  "rustls-pki-types",
  "tokio",
 ]
@@ -12106,7 +12120,7 @@ dependencies = [
  "pem",
  "percent-encoding",
  "reqwest",
- "rustls 0.23.14",
+ "rustls 0.23.19",
  "serde",
  "serde_json",
  "serde_plain",
diff --git a/Cargo.toml b/Cargo.toml
index ec862fb722..bb9ff0e80f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -108,6 +108,7 @@ members = [
     "sled-agent/bootstrap-agent-api",
     "sled-agent/repo-depot-api",
     "sled-agent/types",
+    "sled-diagnostics",
     "sled-hardware",
     "sled-hardware/types",
     "sled-storage",
@@ -240,6 +241,7 @@ default-members = [
     "sled-agent/bootstrap-agent-api",
     "sled-agent/repo-depot-api",
     "sled-agent/types",
+    "sled-diagnostics",
     "sled-hardware",
     "sled-hardware/types",
     "sled-storage",
@@ -361,7 +363,7 @@ derive_more = "0.99.18"
 derive-where = "1.2.7"
 # Having the i-implement-... feature here makes diesel go away from the workspace-hack
 diesel = { version = "2.2.4", features = ["i-implement-a-third-party-backend-and-opt-into-breaking-changes", "postgres", "r2d2", "chrono", "serde_json", "network-address", "uuid"] }
-diesel-dtrace = "0.4.0"
+diesel-dtrace = "0.4.2"
 dns-server = { path = "dns-server" }
 dns-server-api = { path = "dns-server-api" }
 dns-service-client = { path = "clients/dns-service-client" }
@@ -596,6 +598,7 @@ sled = "=0.34.7"
 sled-agent-api = { path = "sled-agent/api" }
 sled-agent-client = { path = "clients/sled-agent-client" }
 sled-agent-types = { path = "sled-agent/types" }
+sled-diagnostics = { path = "sled-diagnostics" }
 sled-hardware = { path = "sled-hardware" }
 sled-hardware-types = { path = "sled-hardware/types" }
 sled-storage = { path = "sled-storage" }
diff --git a/clippy.toml b/clippy.toml
index 31e28d5911..677fb67ade 100644
--- a/clippy.toml
+++ b/clippy.toml
@@ -15,5 +15,5 @@ disallowed-methods = [
     # and can fail spuriously.
     # Instead, the "transaction_retry_wrapper" should be preferred, as it
     # automatically retries transactions experiencing contention.
-    { path = "async_bb8_diesel::AsyncConnection::transaction_async", reason = "Prefer to use transaction_retry_wrapper, if possible. Feel free to override this for tests and nested transactions." },
+    { path = "async_bb8_diesel::AsyncConnection::transaction_async", reason = "Prefer to use transaction_retry_wrapper, if possible. For tests and nested transactions, use transaction_non_retry_wrapper to at least get dtrace probes" },
 ]
diff --git a/dev-tools/ls-apis/README.adoc b/dev-tools/ls-apis/README.adoc
index 00c229bff2..6b1aa1cdea 100644
--- a/dev-tools/ls-apis/README.adoc
+++ b/dev-tools/ls-apis/README.adoc
@@ -104,6 +104,30 @@ You can generate a PNG image of the graph like this:
 $ dot -T png -o deployment-units.png deployment-units.dot
 ```
 
+=== Checking the upgrade DAG
+
+As of this writing:
+
+* All Dropshot/Progenitor APIs in the system are identified with this tool (as far as we know).
+* Every API is annotated with metadata in `api-manifest.toml`.
+* The API metadata specifies whether versioning for the API is managed on the server side exclusively or using client-side versioning as well.  We've made this choice for every existing API.
+* There are no cycles in the server-side-managed-API dependency graph.
+
+This tool verifies these properties.  If you add a new API or a new API dependency without adding valid metadata, or if that metadata breaks these constraints, you will have to fix this up before you can land the change to Omicron.
+
+You can verify the versioning metadata using:
+
+```
+$ cargo xtask ls-apis check
+```
+
+You might see any of the following errors:
+
+* `+missing field `versioned_how`+`: this probably means that you added an API to the metadata but neglected to specify the `versioned_how` field.  This must be either `client` or `server`.  In general, prefer `server`.  The tool should tell you if you try to use `server` but that can't work.  If in doubt, ask the update team.
+* `at least one API has unknown version strategy (see above)`: this probably means you added an API with `version_how = "unknown"`.  (The specific API(s) with this problem are listed immediately above this error.)  You need to instead decide if it should be client-side or server-side versioned.  If in doubt, ask the update team.
+* `+missing field `versioned_how_reason`+`: this probably means that you added an API with `versioned_how = "client"`, but did not add a `versioned_how_reason`.  This field is required.  It's a free-text field that explains why we couldn't use server-side versioning for this API.
+* `graph of server-managed components has a cycle (includes node: "omicron-nexus")`: this probably means that you added an API with `versioned_how = "server"`, but that created a circular dependency with other server-side-versioned components.  You'll need to break the cycle by changing one of these components (probably your new one) to be client-managed instead.
+* `API identified by client package "nexus-client" (Nexus Internal API) is the "client" in a "non-dag" dependency rule, but its "versioned_how" is not "client"` (the specific APIs may be different): this (unlikely) condition means just what it says: the API manifest file contains a dependency rule saying that some API is *not* part of the update DAG, but that API is not marked accordingly.  See the documentation below on filter rules.
 
 == Details
 
diff --git a/dev-tools/ls-apis/api-manifest.toml b/dev-tools/ls-apis/api-manifest.toml
index 766708f478..75f4777a48 100644
--- a/dev-tools/ls-apis/api-manifest.toml
+++ b/dev-tools/ls-apis/api-manifest.toml
@@ -132,6 +132,28 @@ packages = [ "oximeter-collector" ]
 #
 # `server_package_name`: the package that contains the Dropshot API definition.
 #
+# `versioned_how`: one of:
+#
+#   "server":  this component uses server-side versioning only, meaning that the
+#              update system can ensure that servers are always updated before
+#              clients.
+#
+#   "client":  this component uses client-side versioning, meaning that the
+#              update system cannot ensure that servers are always updated
+#              before clients, so clients need to deal with multiple different
+#              server versions.
+#
+#              You must also specify `versioned_how_reason` if you use "client".
+#
+#   "unknown": this has not been determined.  This will break tests.  But the
+#              tooling supports this value so that during development you can
+#              relax these constraints.
+#
+# [`versioned_how_reason`]: free text string explaining why `versioned_how` must
+# be "client" for this API.  This is printed in documentation and command output
+# and serves as documentation for developers to understand why we made this
+# choice.  This should be present only if `versioned_how = "client"`.
+#
 # [`notes`]: optional free-form human-readable summary documentation about this
 # API
 ################################################################################
@@ -140,11 +162,14 @@ packages = [ "oximeter-collector" ]
 client_package_name = "bootstrap-agent-client"
 label = "Bootstrap Agent"
 server_package_name = "bootstrap-agent-api"
+versioned_how = "client"
+versioned_how_reason = "depends on itself (i.e., instances call each other)"
 
 [[apis]]
 client_package_name = "clickhouse-admin-keeper-client"
 label = "Clickhouse Cluster Admin for Keepers"
 server_package_name = "clickhouse-admin-api"
+versioned_how = "server"
 notes = """
 This is the server running inside multi-node Clickhouse keeper zones that's \
 responsible for local configuration and monitoring.
@@ -154,6 +179,7 @@ responsible for local configuration and monitoring.
 client_package_name = "clickhouse-admin-server-client"
 label = "Clickhouse Cluster Admin for Servers"
 server_package_name = "clickhouse-admin-api"
+versioned_how = "server"
 notes = """
 This is the server running inside multi-node Clickhouse server zones that's \
 responsible for local configuration and monitoring.
@@ -163,6 +189,7 @@ responsible for local configuration and monitoring.
 client_package_name = "clickhouse-admin-single-client"
 label = "Clickhouse Single-Node Cluster Admin"
 server_package_name = "clickhouse-admin-api"
+versioned_how = "server"
 notes = """
 This is the server running inside single-node Clickhouse server zones that's \
 responsible for local configuration and monitoring.
@@ -172,6 +199,7 @@ responsible for local configuration and monitoring.
 client_package_name = "cockroach-admin-client"
 label = "CockroachDB Cluster Admin"
 server_package_name = "cockroach-admin-api"
+versioned_how = "server"
 notes = """
 This is the server running inside CockroachDB zones that performs \
 configuration and monitoring that requires the `cockroach` CLI.
@@ -181,11 +209,14 @@ configuration and monitoring that requires the `cockroach` CLI.
 client_package_name = "crucible-agent-client"
 label = "Crucible Agent"
 server_package_name = "crucible-agent"
+versioned_how = "server"
 
 [[apis]]
 client_package_name = "repair-client"
 label = "Crucible Repair"
 server_package_name = "crucible-downstairs"
+versioned_how = "client"
+versioned_how_reason = "depends on itself (i.e., instances call each other)"
 notes = """
 The repair service offered by a crucible-downstairs supports both repairing \
 one downstairs from another, and making a clone of a read-only downstairs \
@@ -196,11 +227,13 @@ when creating a new region in the crucible agent.
 client_package_name = "crucible-pantry-client"
 label = "Crucible Pantry"
 server_package_name = "crucible-pantry"
+versioned_how = "server"
 
 [[apis]]
 client_package_name = "ddm-admin-client"
 label = "Maghemite DDM Admin"
 server_package_name = "ddmd"
+versioned_how = "server"
 notes = """
 The `ddmd` server runs in each sled GZ and each switch zone. These daemons \
 provide an interface for advertising network prefixes, and observing what \
@@ -216,11 +249,17 @@ observability APIs in the future.
 client_package_name = "dns-service-client"
 label = "DNS Server"
 server_package_name = "dns-server-api"
+versioned_how = "server"
 
 [[apis]]
 client_package_name = "dpd-client"
 label = "Dendrite DPD"
 server_package_name = "dpd"
+# TODO We might need to pick this apart more carefully.  DPD invokes and is
+# invoked by several different services.  It's possible that some of them can
+# treat it as server-managed.
+versioned_how = "client"
+versioned_how_reason = "Sled Agent calling DPD creates a circular dependency"
 notes = """
 Dendrite's data plane daemon (`dpd`) is the interface to configure and manage \
 the rack switches. It's consumed by sled-agent to get the rack off the \
@@ -235,17 +274,29 @@ repo is not currently open source.
 client_package_name = "gateway-client"
 label = "Management Gateway Service"
 server_package_name = "gateway-api"
+versioned_how = "server"
 notes = "Wicketd is deployed in a unit with MGS so we can ignore that one."
 
 [[apis]]
 client_package_name = "installinator-client"
 label = "Wicketd Installinator"
 server_package_name = "installinator-api"
+# The installinator-client is used only by Installinator.  This is part of the
+# recovery OS image.  Today, this is not really "shipped" in the traditional
+# sense.  The only way it gets used today is by an operator uploading it to
+# Wicket and then performing a MUPdate.  In this sense, we do not control the
+# client, so we mark this client-managed -- it's up to the operator to make sure
+# they're using a TUF repo whose recovery image contains an Installinator that's
+# compatible with the API served by Wicket on the deployed system.  In practice,
+# we're almost certainly just going to avoid changing this API.
+versioned_how = "client"
+versioned_how_reason = "client is provided implicitly by the operator"
 
 [[apis]]
 client_package_name = "mg-admin-client"
 label = "Maghemite MG Admin"
 server_package_name = "mgd"
+versioned_how = "server"
 notes = """
 The `mgd` daemon runs in each switch zone. This daemon is responsible for all \
 external route management for a switch. It provides interfaces for static \
@@ -258,17 +309,25 @@ bring the rack up.
 client_package_name = "nexus-client"
 label = "Nexus Internal API"
 server_package_name = "nexus-internal-api"
+# nexus-client has to be client-versioned today because it's got a cyclic
+# dependency with sled-agent-client, which is server-versioned.
+versioned_how = "client"
+versioned_how_reason = "Circular dependencies between Nexus and other services"
 
 [[apis]]
 client_package_name = "oxide-client"
 label = "External API"
 server_package_name = "nexus-external-api"
+# The versioning strategy for the external API is outside the scope of this
+# tool.  It doesn't matter what we put here.
+versioned_how = "server"
 notes = "Special case, since we don't fully control all clients"
 
 [[apis]]
 client_package_name = "oximeter-client"
 label = "Oximeter"
 server_package_name = "oximeter-api"
+versioned_how = "server"
 notes = """
 Shared types for this interface are in `omicron-common`. The producer makes \
 requests to Nexus, and receives them from `oximeter-collector`. \
@@ -281,6 +340,7 @@ and makes the periodic renewal requests to `oximeter-collector`.
 client_package_name = "propolis-client"
 label = "Propolis"
 server_package_name = "propolis-server"
+versioned_how = "server"
 notes = """
 Sled Agent is deployed in a unit with Propolis so we can ignore that one.
 """
@@ -289,16 +349,20 @@ Sled Agent is deployed in a unit with Propolis so we can ignore that one.
 client_package_name = "sled-agent-client"
 label = "Sled Agent"
 server_package_name = "sled-agent-api"
+versioned_how = "server"
 
 [[apis]]
 client_package_name = "repo-depot-client"
 label = "Repo Depot API"
 server_package_name = "repo-depot-api"
+versioned_how = "client"
+versioned_how_reason = "depends on itself (i.e., instances call each other)"
 
 [[apis]]
 client_package_name = "wicketd-client"
 label = "Wicketd"
 server_package_name = "wicketd-api"
+versioned_how = "server"
 notes = """
 wicketd-client is only used by wicket, which is deployed in a unit with wicketd.
 """
@@ -307,6 +371,7 @@ wicketd-client is only used by wicket, which is deployed in a unit with wicketd.
 client_package_name = "crucible-control-client"
 label = "Crucible Control (for testing only)"
 server_package_name = "crucible"
+versioned_how = "server"
 notes = """
 Exposed by Crucible upstairs for debugging via the `cmon` debugging tool.
 """
@@ -315,6 +380,7 @@ Exposed by Crucible upstairs for debugging via the `cmon` debugging tool.
 client_package_name = "dsc-client"
 label = "Downstairs Controller (debugging only)"
 server_package_name = "dsc"
+versioned_how = "server"
 dev_only = true
 notes = """
 `dsc` is a control program for spinning up and controlling instances of Crucible
@@ -400,6 +466,16 @@ DNS server depends on itself only to provide TransientServer, which is not a
 deployed component.
 """
 
+[[dependency_filter_rules]]
+ancestor = "omicron-sled-agent"
+client = "sled-agent-client"
+evaluation = "not-deployed"
+note = """
+Sled Agent uses a Sled Agent client in two ways: RSS, which we don't care about
+for the purpose of upgrade, and in the `zone-bundle` binary, which is not
+deployed.
+"""
+
 #
 # "Bogus" dependencies.  See above for details.
 #
diff --git a/dev-tools/ls-apis/src/api_metadata.rs b/dev-tools/ls-apis/src/api_metadata.rs
index 311e95549f..01f133debe 100644
--- a/dev-tools/ls-apis/src/api_metadata.rs
+++ b/dev-tools/ls-apis/src/api_metadata.rs
@@ -108,6 +108,20 @@ impl AllApiMetadata {
 
         Ok(which_rules[0].evaluation)
     }
+
+    /// Returns the list of APIs that have non-DAG dependency rules
+    pub(crate) fn non_dag_apis(&self) -> impl Iterator<Item = &ApiMetadata> {
+        self.dependency_rules.iter().filter_map(|(client_pkgname, rules)| {
+            rules.iter().any(|r| r.evaluation == Evaluation::NonDag).then(
+                || {
+                    // unwrap(): we previously verified that the "client" for
+                    // all dependency rules corresponds to an API that we have
+                    // metadata for.
+                    self.apis.get(client_pkgname).unwrap()
+                },
+            )
+        })
+    }
 }
 
 /// Format of the `api-manifest.toml` file
@@ -186,6 +200,23 @@ impl TryFrom<RawApiMetadata> for AllApiMetadata {
     }
 }
 
+/// Describes how an API in the system manages drift between client and server
+#[derive(Deserialize, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+#[serde(tag = "versioned_how", content = "versioned_how_reason")]
+pub enum VersionedHow {
+    /// We have not yet determined how this API will be versioned.
+    Unknown,
+
+    /// This API will be versioned solely on the server.  (The update system
+    /// will ensure that servers are always updated before clients.)
+    Server,
+
+    /// This API will be versioned on the client.  (The update system cannot
+    /// ensure that servers are always updated before clients.)
+    Client(String),
+}
+
 /// Describes one API in the system
 #[derive(Deserialize)]
 pub struct ApiMetadata {
@@ -199,6 +230,9 @@ pub struct ApiMetadata {
     pub server_package_name: ServerPackageName,
     /// human-readable notes about this API
     pub notes: Option<String>,
+    /// describes how we've decided this API will be versioned
+    #[serde(default, flatten)]
+    pub versioned_how: VersionedHow,
     /// If `dev_only` is true, then this API's server is not deployed in a
     /// production system.  It's only used in development environments.  The
     /// default is that APIs *are* deployed.
diff --git a/dev-tools/ls-apis/src/bin/ls-apis.rs b/dev-tools/ls-apis/src/bin/ls-apis.rs
index 9d2a5b5349..ea0ae48cd4 100644
--- a/dev-tools/ls-apis/src/bin/ls-apis.rs
+++ b/dev-tools/ls-apis/src/bin/ls-apis.rs
@@ -4,12 +4,12 @@
 
 //! Show information about Progenitor-based APIs
 
-use anyhow::{Context, Result};
+use anyhow::{bail, Context, Result};
 use camino::Utf8PathBuf;
 use clap::{Args, Parser, Subcommand};
 use omicron_ls_apis::{
     AllApiMetadata, ApiDependencyFilter, LoadArgs, ServerComponentName,
-    SystemApis,
+    SystemApis, VersionedHow,
 };
 use parse_display::{Display, FromStr};
 
@@ -34,6 +34,8 @@ enum Cmds {
     Adoc,
     /// print out each API, what exports it, and what consumes it
     Apis(ShowDepsArgs),
+    /// check the update DAG and propose changes
+    Check,
     /// print out APIs exported and consumed by each deployment unit
     DeploymentUnits(DotArgs),
     /// print out APIs exported and consumed, by server component
@@ -81,6 +83,7 @@ fn main() -> Result<()> {
     match cli_args.cmd {
         Cmds::Adoc => run_adoc(&apis),
         Cmds::Apis(args) => run_apis(&apis, args),
+        Cmds::Check => run_check(&apis),
         Cmds::DeploymentUnits(args) => run_deployment_units(&apis, args),
         Cmds::Servers(args) => run_servers(&apis, args),
     }
@@ -93,12 +96,13 @@ fn run_adoc(apis: &SystemApis) -> Result<()> {
     println!(
         ".List of OpenAPI/Progenitor-based interfaces for online upgrade."
     );
-    println!(r#"[cols="1h,2,2,2a,2", options="header"]"#);
+    println!(r#"[cols="1h,2,2,2a,2,2", options="header"]"#);
     println!("|===");
     println!("|API");
     println!("|Server location (`repo:path`)");
     println!("|Client packages (`repo:path`)");
     println!("|Consumers (`repo:path`; excluding omdb and tests)");
+    println!("|Versioning");
     println!("|Notes");
     println!("");
 
@@ -122,6 +126,14 @@ fn run_adoc(apis: &SystemApis) -> Result<()> {
             println!("* {}", apis.adoc_label(c)?);
         }
 
+        match &api.versioned_how {
+            VersionedHow::Unknown => println!("|TBD"),
+            VersionedHow::Server => println!("|Server-side only"),
+            VersionedHow::Client(reason) => {
+                println!("|Client-side ({})", reason);
+            }
+        };
+
         print!("|{}", api.notes.as_deref().unwrap_or("-\n"));
         println!("");
     }
@@ -261,3 +273,84 @@ impl TryFrom<&LsApis> for LoadArgs {
         Ok(LoadArgs { api_manifest_path })
     }
 }
+
+fn run_check(apis: &SystemApis) -> Result<()> {
+    let dag_check = apis.dag_check()?;
+
+    for (pkg, reasons) in dag_check.proposed_server_managed() {
+        println!(
+            "proposal: make {:?} server-managed: {}",
+            pkg,
+            reasons.join(", ")
+        );
+    }
+
+    for (pkg, reasons) in dag_check.proposed_client_managed() {
+        println!(
+            "proposal: make {:?} client-managed: {}",
+            pkg,
+            reasons.join(", ")
+        );
+    }
+
+    for (pkg1, pkg2) in dag_check.proposed_upick() {
+        println!(
+            "proposal: choose either {:?} or {:?} to be client-managed \
+             (they directly depend on each other)",
+            pkg1, pkg2,
+        );
+    }
+
+    println!("\n");
+    println!("Server-managed APIs:\n");
+    for api in apis
+        .api_metadata()
+        .apis()
+        .filter(|f| f.deployed() && f.versioned_how == VersionedHow::Server)
+    {
+        println!(
+            "    {} ({}, exposed by {})",
+            api.label,
+            api.client_package_name,
+            apis.api_producer(&api.client_package_name).unwrap()
+        );
+    }
+
+    println!("\n");
+    println!("Client-managed API:\n");
+    for api in apis.api_metadata().apis().filter(|f| f.deployed()) {
+        if let VersionedHow::Client(reason) = &api.versioned_how {
+            println!(
+                "    {} ({}, exposed by {})",
+                api.label,
+                api.client_package_name,
+                apis.api_producer(&api.client_package_name).unwrap()
+            );
+            println!("        reason: {}", reason);
+        }
+    }
+
+    println!("\n");
+    print!("APIs with unknown version management:");
+    let unknown: Vec<_> = apis
+        .api_metadata()
+        .apis()
+        .filter(|f| f.versioned_how == VersionedHow::Unknown)
+        .collect();
+    if unknown.is_empty() {
+        println!(" none");
+    } else {
+        println!("\n");
+        for api in unknown {
+            println!(
+                "    {} ({}, exposed by {})",
+                api.label,
+                api.client_package_name,
+                apis.api_producer(&api.client_package_name).unwrap()
+            );
+        }
+        bail!("at least one API has unknown version strategy (see above)");
+    }
+
+    Ok(())
+}
diff --git a/dev-tools/ls-apis/src/lib.rs b/dev-tools/ls-apis/src/lib.rs
index 8d4f760c8b..c27606c25f 100644
--- a/dev-tools/ls-apis/src/lib.rs
+++ b/dev-tools/ls-apis/src/lib.rs
@@ -10,6 +10,7 @@ mod system_apis;
 mod workspaces;
 
 pub use api_metadata::AllApiMetadata;
+pub use api_metadata::VersionedHow;
 pub use system_apis::ApiDependencyFilter;
 pub use system_apis::SystemApis;
 
diff --git a/dev-tools/ls-apis/src/system_apis.rs b/dev-tools/ls-apis/src/system_apis.rs
index 5d4a84160a..ef6dae0e43 100644
--- a/dev-tools/ls-apis/src/system_apis.rs
+++ b/dev-tools/ls-apis/src/system_apis.rs
@@ -8,6 +8,7 @@
 use crate::api_metadata::AllApiMetadata;
 use crate::api_metadata::ApiMetadata;
 use crate::api_metadata::Evaluation;
+use crate::api_metadata::VersionedHow;
 use crate::cargo::DepPath;
 use crate::parse_toml_file;
 use crate::workspaces::Workspaces;
@@ -16,11 +17,13 @@ use crate::DeploymentUnitName;
 use crate::LoadArgs;
 use crate::ServerComponentName;
 use crate::ServerPackageName;
-use anyhow::{anyhow, bail, Context, Result};
+use anyhow::Result;
+use anyhow::{anyhow, bail, Context};
 use camino::Utf8PathBuf;
 use cargo_metadata::Package;
 use parse_display::{Display, FromStr};
 use petgraph::dot::Dot;
+use petgraph::graph::NodeIndex;
 use std::collections::BTreeMap;
 use std::collections::BTreeSet;
 
@@ -369,12 +372,27 @@ impl SystemApis {
         &self,
         filter: ApiDependencyFilter,
     ) -> Result<String> {
+        let (graph, _nodes) = self.make_component_graph(filter, false)?;
+        Ok(Dot::new(&graph).to_string())
+    }
+
+    // The complex type below is only used in this one place: the return value
+    // of an internal helper function.  A type alias doesn't seem better.
+    #[allow(clippy::type_complexity)]
+    fn make_component_graph(
+        &self,
+        dependency_filter: ApiDependencyFilter,
+        versioned_on_server_only: bool,
+    ) -> Result<(
+        petgraph::graph::Graph<&ServerComponentName, &ClientPackageName>,
+        BTreeMap<&ServerComponentName, NodeIndex>,
+    )> {
         let mut graph = petgraph::graph::Graph::new();
         let nodes: BTreeMap<_, _> = self
             .server_component_units
             .keys()
             .map(|server_component| {
-                (server_component.clone(), graph.add_node(server_component))
+                (server_component, graph.add_node(server_component))
             })
             .collect();
 
@@ -383,16 +401,350 @@ impl SystemApis {
         for server_component in self.apis_consumed.keys() {
             // unwrap(): we created a node for each server component above.
             let my_node = nodes.get(server_component).unwrap();
-            let consumed_apis =
-                self.component_apis_consumed(server_component, filter)?;
+            let consumed_apis = self
+                .component_apis_consumed(server_component, dependency_filter)?;
             for (client_pkg, _) in consumed_apis {
+                if versioned_on_server_only {
+                    let api = self
+                        .api_metadata
+                        .client_pkgname_lookup(client_pkg)
+                        .unwrap();
+                    if api.versioned_how != VersionedHow::Server {
+                        continue;
+                    }
+                }
+
                 let other_component = self.api_producer(client_pkg).unwrap();
                 let other_node = nodes.get(other_component).unwrap();
-                graph.add_edge(*my_node, *other_node, client_pkg.clone());
+                graph.add_edge(*my_node, *other_node, client_pkg);
             }
         }
 
-        Ok(Dot::new(&graph).to_string())
+        Ok((graph, nodes))
+    }
+
+    /// Verifies various important properties about the assignment of which APIs
+    /// are server-managed vs. client-managed.
+    ///
+    /// Returns a structure with proposals for how to assign APIs that are
+    /// currently unassigned.
+    pub fn dag_check(&self) -> Result<DagCheck<'_>> {
+        // In this function, we'll use the following ApiDependencyFilter a bunch
+        // when walking the component dependency graph.  "Default" is the
+        // correct filter to use here.  This excludes relationships that are
+        // totally bogus, only affect components that are never actually
+        // deployed, or are part of an edge that we've already determined will
+        // be "non-DAG".
+        //
+        // This last case might be a little confusing.  The whole point of this
+        // function is to help developers figure out which edges should be part
+        // of the DAG or not.  Why would we ignore edges based on whether
+        // they're already in the DAG or not?
+        //
+        // Recall that there are two ways that the metadata can specify that a
+        // particular API is "not part of the update DAG" (which is equivalent
+        // to client-side-managed):
+        //
+        // - a specific class of Cargo dependencies can be marked "non-DAG" via
+        //   a dependency filter rule.  The only case of this today is where we
+        //   say that Cargo dependencies from "oximeter-producer" to
+        //   "nexus-client" are "non-DAG".  This means we promise to make the
+        //   Nexus internal API client-managed (i.e., not part of the update
+        //   DAG).  We verify this promise below.
+        // - a specific API can be marked as server-managed (meaning it's part
+        //   of the update DAG) or not.  That's most of what this function deals
+        //   with and proposes changes to.
+        //
+        // In the long term, it might be nice to combine these.  But that's more
+        // work than it sounds like: we'd probably want to convert everything
+        // to filter rules, but that requires (tediously) writing out every
+        // single edge that we care about.  An alternative would be to eliminate
+        // the non-DAG dependency filter rules and only use the property at the
+        // API level.  However right now it seems quite possible that we do want
+        // this on a per-edge basis, rather than a per-API basis (i.e., there
+        // are some client-side-versioned APIs that have consumers that could
+        // treat them as server-side-versioned).
+        //
+        // Anyway, what we're talking about here is ignoring the first category
+        // of information and looking only at the second.  This is *safe* (i.e.,
+        // correct) because we verify below that the second category (the
+        // API-level `versioned_for` property) contains the same information
+        // provided by the first category (the non-DAG dependency filter rules).
+        // We *choose* to do this because it makes the heuristics below more
+        // useful.  For example, excluding the non-DAG edges makes it easy for
+        // the heuristic below to tell that crucible-pantry ought to be
+        // server-side-managed because it has no (other) dependencies and so
+        // can't be part of a cycle.
+        let filter = ApiDependencyFilter::Default;
+
+        // Construct a graph where:
+        //
+        // - nodes are all the API producer and consumer components
+        // - we only include edges *to* components that produce server-managed
+        //   APIs
+        //
+        // Check if this DAG is cyclic.  This can't be made to work.
+        let (graph, nodes) = self.make_component_graph(filter, true)?;
+        let reverse_nodes: BTreeMap<_, _> =
+            nodes.iter().map(|(s_c, node)| (node, s_c)).collect();
+        if let Err(error) = petgraph::algo::toposort(&graph, None) {
+            bail!(
+                "graph of server-managed components has a cycle (includes \
+                 node: {:?})",
+                reverse_nodes.get(&error.node_id()).unwrap()
+            );
+        }
+
+        // Verify that the targets of any "non-dag" dependency filter rules are
+        // indeed not part of the server-side-versioned DAG.
+        for api in self.api_metadata.non_dag_apis() {
+            if !matches!(api.versioned_how, VersionedHow::Client(..)) {
+                bail!(
+                    "API identified by client package {:?} ({}) is the \
+                     \"client\" in a \"non-dag\" dependency rule, but its \
+                     \"versioned_how\" is not \"client\"",
+                    api.client_package_name,
+                    api.label,
+                );
+            }
+        }
+
+        // Use some heuristics to propose next steps.
+        //
+        // We're only looking for possible next steps here -- we don't have to
+        // programmatically figure out the whole graph.
+
+        let mut dag_check = DagCheck::new();
+
+        for api in self.api_metadata.apis() {
+            if !api.deployed() {
+                if api.versioned_how == VersionedHow::Unknown {
+                    dag_check.propose_server(
+                        &api.client_package_name,
+                        String::from("not produced by a deployed component"),
+                    );
+                }
+                continue;
+            }
+            let producer = self.api_producer(&api.client_package_name).unwrap();
+            let apis_consumed: BTreeSet<_> = self
+                .component_apis_consumed(producer, filter)?
+                .map(|(client_pkgname, _dep_path)| client_pkgname)
+                .collect();
+            let dependents: BTreeSet<_> = self
+                .api_consumers(&api.client_package_name, filter)
+                .unwrap()
+                .map(|(dependent, _dep_path)| dependent)
+                .collect();
+
+            if api.versioned_how == VersionedHow::Unknown {
+                // If we haven't determined how to manage versioning on this
+                // API, and it has no dependencies on "unknown" or
+                // client-managed APIs, then it can be made server-managed.
+                if !apis_consumed.iter().any(|client_pkgname| {
+                    let api = self
+                        .api_metadata
+                        .client_pkgname_lookup(*client_pkgname)
+                        .unwrap();
+                    api.versioned_how != VersionedHow::Server
+                }) {
+                    dag_check.propose_server(
+                        &api.client_package_name,
+                        String::from(
+                            "has no unknown or client-managed dependencies",
+                        ),
+                    );
+                } else if apis_consumed.contains(&api.client_package_name) {
+                    // If this thing depends on itself, it must be
+                    // client-managed.
+                    dag_check.propose_client(
+                        &api.client_package_name,
+                        String::from("depends on itself"),
+                    );
+                } else if dependents.is_empty() {
+                    // If something has no consumers in deployed components, it
+                    // can be server-managed.  (These are generally debug APIs.)
+                    dag_check.propose_server(
+                        &api.client_package_name,
+                        String::from(
+                            "has no consumers among deployed components",
+                        ),
+                    );
+                }
+
+                continue;
+            }
+
+            let dependencies: BTreeMap<_, _> = apis_consumed
+                .iter()
+                .map(|dependency_clientpkg| {
+                    (
+                        self.api_producer(dependency_clientpkg).unwrap(),
+                        *dependency_clientpkg,
+                    )
+                })
+                .collect();
+
+            // Look for one-step circular dependencies (i.e., API API A1 is
+            // produced by component C1, which uses API A2 produced by C2, which
+            // also uses A1).  In such cases, either A1 or A2 must be
+            // client-managed (or both).
+            for other_pkgname in dependents {
+                if let Some(dependency_clientpkg) =
+                    dependencies.get(other_pkgname)
+                {
+                    let dependency_api = self
+                        .api_metadata
+                        .client_pkgname_lookup(*dependency_clientpkg)
+                        .unwrap();
+
+                    // If we're looking at a server-managed dependency and the
+                    // other is unknown, then that one should be client-managed.
+                    //
+                    // Without loss of generality, we can ignore the reverse
+                    // case (because we will catch that case when we're
+                    // iterating over the dependency API).
+                    if api.versioned_how == VersionedHow::Server
+                        && dependency_api.versioned_how == VersionedHow::Unknown
+                    {
+                        dag_check.propose_client(
+                            dependency_clientpkg,
+                            format!(
+                                "has cyclic dependency on {:?}, which is \
+                                 server-managed",
+                                api.client_package_name,
+                            ),
+                        )
+                    }
+
+                    // If both are Unknown, tell the user to pick one.
+                    if api.versioned_how == VersionedHow::Unknown
+                        && dependency_api.versioned_how == VersionedHow::Unknown
+                    {
+                        dag_check.propose_upick(
+                            &api.client_package_name,
+                            dependency_clientpkg,
+                        );
+                    }
+                }
+            }
+        }
+
+        Ok(dag_check)
+    }
+}
+
+/// Describes proposals for assigning how APIs should be versioned, based on
+/// heuristics applied while checking the DAG
+pub struct DagCheck<'a> {
+    /// set of APIs (identified by client package name) that we propose should
+    /// be server-managed, along with a list of reasons why we think so
+    proposed_server_managed: BTreeMap<&'a ClientPackageName, Vec<String>>,
+    /// set of APIs (identified by client package name) that we propose should
+    /// be client-managed, along with a list of reasons why we think so
+    proposed_client_managed: BTreeMap<&'a ClientPackageName, Vec<String>>,
+    /// set of pairs of APIs where we propose that the user must pick one
+    /// package in each pair to be client-managed (because the two packages have
+    /// a mutual dependency)
+    ///
+    /// The ordering in these pairs is not semantically significant.  The
+    /// implementation will ensure that each pair of packages is represented at
+    /// most once in this structure.
+    proposed_upick:
+        BTreeMap<&'a ClientPackageName, BTreeSet<&'a ClientPackageName>>,
+}
+
+impl<'a> DagCheck<'a> {
+    fn new() -> DagCheck<'a> {
+        DagCheck {
+            proposed_server_managed: BTreeMap::new(),
+            proposed_client_managed: BTreeMap::new(),
+            proposed_upick: BTreeMap::new(),
+        }
+    }
+
+    fn propose_client(
+        &mut self,
+        client_pkgname: &'a ClientPackageName,
+        reason: String,
+    ) {
+        self.proposed_client_managed
+            .entry(client_pkgname)
+            .or_insert_with(Vec::new)
+            .push(reason);
+    }
+
+    fn propose_server(
+        &mut self,
+        client_pkgname: &'a ClientPackageName,
+        reason: String,
+    ) {
+        self.proposed_server_managed
+            .entry(client_pkgname)
+            .or_insert_with(Vec::new)
+            .push(reason);
+    }
+
+    /// Propose that one of these two packages should be client-managed (because
+    /// they depend on each other, so they can't both be server-managed).
+    fn propose_upick(
+        &mut self,
+        client_pkgname1: &'a ClientPackageName,
+        client_pkgname2: &'a ClientPackageName,
+    ) {
+        // A "upick" is a situation where you (the person running the tool)
+        // should choose either of `pkg1` or `pkg2` to be client-managed.  The
+        // caller will identify this situation twice: once when looking at
+        // `pkg1` and once when looking at `pkg2`.  But we only want to report
+        // it once.  So we'll ignore duplicates here because it's easier here
+        // than in the caller.
+        //
+        // To do that, first check whether the caller has already proposed this
+        // "upick" with the packages in the other order.  If so, do nothing.
+        if let Some(other_pkg_upicks) = self.proposed_upick.get(client_pkgname2)
+        {
+            if other_pkg_upicks.contains(client_pkgname1) {
+                return;
+            }
+        }
+
+        // Now go ahead and insert the pair in this order.  This construction
+        // will also do nothing if this same pair has already been inserted in
+        // this order.
+        self.proposed_upick
+            .entry(client_pkgname1)
+            .or_insert_with(BTreeSet::new)
+            .insert(client_pkgname2);
+    }
+
+    /// Returns a list of APIs (identified by client package name) that look
+    /// like they could use server-side versioning, along with reasons
+    pub fn proposed_server_managed(
+        &self,
+    ) -> impl Iterator<Item = (&'_ ClientPackageName, &Vec<String>)> {
+        self.proposed_server_managed.iter().map(|(c, r)| (*c, r))
+    }
+
+    /// Returns a list of APIs (identified by client package name) that look
+    /// like they should use client-side versioning, along with reasons
+    pub fn proposed_client_managed(
+        &self,
+    ) -> impl Iterator<Item = (&'_ ClientPackageName, &Vec<String>)> {
+        self.proposed_client_managed.iter().map(|(c, r)| (*c, r))
+    }
+
+    /// Returns a list of pairs of APIs (identified by client package names) for
+    /// which we have not yet picked client-side or server-side versioning and
+    /// where there is a direct mutual dependency
+    ///
+    /// At least one of these APIs will need to be marked client-managed.
+    pub fn proposed_upick(
+        &self,
+    ) -> impl Iterator<Item = (&'_ ClientPackageName, &'_ ClientPackageName)>
+    {
+        self.proposed_upick
+            .iter()
+            .flat_map(|(c, others)| others.iter().map(|o| (*c, *o)))
     }
 }
 
diff --git a/dev-tools/ls-apis/tests/api_dependencies.out b/dev-tools/ls-apis/tests/api_dependencies.out
index 8cd2f0a085..8e061f2906 100644
--- a/dev-tools/ls-apis/tests/api_dependencies.out
+++ b/dev-tools/ls-apis/tests/api_dependencies.out
@@ -79,7 +79,6 @@ Repo Depot API (client: repo-depot-client)
 Sled Agent (client: sled-agent-client)
     consumed by: dpd (dendrite/dpd) via 1 path
     consumed by: omicron-nexus (omicron/nexus) via 7 paths
-    consumed by: omicron-sled-agent (omicron/sled-agent) via 1 path
 
 Wicketd (client: wicketd-client)
 
diff --git a/illumos-utils/Cargo.toml b/illumos-utils/Cargo.toml
index e1421bd3ab..69276713bb 100644
--- a/illumos-utils/Cargo.toml
+++ b/illumos-utils/Cargo.toml
@@ -21,6 +21,7 @@ dropshot.workspace = true
 futures.workspace = true
 http.workspace = true
 ipnetwork.workspace = true
+itertools.workspace = true
 libc.workspace = true
 macaddr.workspace = true
 omicron-common.workspace = true
diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs
index fa09fb22c5..ee1ac58be9 100644
--- a/illumos-utils/src/zfs.rs
+++ b/illumos-utils/src/zfs.rs
@@ -5,14 +5,18 @@
 //! Utilities for poking at ZFS.
 
 use crate::{execute, PFEXEC};
+use anyhow::anyhow;
+use anyhow::bail;
 use anyhow::Context;
 use camino::{Utf8Path, Utf8PathBuf};
+use itertools::Itertools;
 use omicron_common::api::external::ByteCount;
 use omicron_common::disk::CompressionAlgorithm;
 use omicron_common::disk::DiskIdentity;
+use omicron_common::disk::SharedDatasetConfig;
 use omicron_uuid_kinds::DatasetUuid;
+use std::collections::BTreeMap;
 use std::fmt;
-use std::str::FromStr;
 
 // These locations in the ramdisk must only be used by the switch zone.
 //
@@ -80,25 +84,19 @@ enum EnsureFilesystemErrorRaw {
 
 /// Error returned by [`Zfs::ensure_filesystem`].
 #[derive(thiserror::Error, Debug)]
-#[error(
-    "Failed to ensure filesystem '{name}' exists at '{mountpoint:?}': {err}"
-)]
+#[error("Failed to ensure filesystem '{name}': {err}")]
 pub struct EnsureFilesystemError {
     name: String,
-    mountpoint: Mountpoint,
     #[source]
     err: EnsureFilesystemErrorRaw,
 }
 
 /// Error returned by [`Zfs::set_oxide_value`]
 #[derive(thiserror::Error, Debug)]
-#[error(
-    "Failed to set value '{name}={value}' on filesystem {filesystem}: {err}"
-)]
+#[error("Failed to set values '{values}' on filesystem {filesystem}: {err}")]
 pub struct SetValueError {
     filesystem: String,
-    name: String,
-    value: String,
+    values: String,
     err: crate::ExecutionError,
 }
 
@@ -236,56 +234,131 @@ pub struct DatasetProperties {
 }
 
 impl DatasetProperties {
-    // care about.
-    const ZFS_LIST_STR: &'static str =
+    const ZFS_GET_PROPS: &'static str =
         "oxide:uuid,name,avail,used,quota,reservation,compression";
 }
 
-// An inner parsing function, so that the FromStr implementation can always emit
-// the string 's' that failed to parse in the error message.
-fn dataset_properties_parse(
-    s: &str,
-) -> Result<DatasetProperties, anyhow::Error> {
-    let mut iter = s.split_whitespace();
-
-    let id = match iter.next().context("Missing UUID")? {
-        "-" => None,
-        anything_else => Some(anything_else.parse::<DatasetUuid>()?),
-    };
+impl TryFrom<&DatasetProperties> for SharedDatasetConfig {
+    type Error = anyhow::Error;
 
-    let name = iter.next().context("Missing 'name'")?.to_string();
-    let avail =
-        iter.next().context("Missing 'avail'")?.parse::<u64>()?.try_into()?;
-    let used =
-        iter.next().context("Missing 'used'")?.parse::<u64>()?.try_into()?;
-    let quota = match iter.next().context("Missing 'quota'")?.parse::<u64>()? {
-        0 => None,
-        q => Some(q.try_into()?),
-    };
-    let reservation =
-        match iter.next().context("Missing 'reservation'")?.parse::<u64>()? {
-            0 => None,
-            r => Some(r.try_into()?),
-        };
-    let compression = iter.next().context("Missing 'compression'")?.to_string();
-
-    Ok(DatasetProperties {
-        id,
-        name,
-        avail,
-        used,
-        quota,
-        reservation,
-        compression,
-    })
+    fn try_from(
+        props: &DatasetProperties,
+    ) -> Result<SharedDatasetConfig, Self::Error> {
+        Ok(SharedDatasetConfig {
+            compression: props.compression.parse()?,
+            quota: props.quota,
+            reservation: props.reservation,
+        })
+    }
 }
 
-impl FromStr for DatasetProperties {
-    type Err = anyhow::Error;
+impl DatasetProperties {
+    /// Parses dataset properties, assuming that the caller is providing the
+    /// output of the following command as stdout:
+    ///
+    /// zfs get \
+    ///     [maybe depth arguments] \
+    ///     -Hpo name,property,value,source $ZFS_GET_PROPS $DATASETS
+    fn parse_many(
+        stdout: &str,
+    ) -> Result<Vec<DatasetProperties>, anyhow::Error> {
+        let name_prop_val_source_list = stdout.trim().split('\n');
+
+        let mut datasets: BTreeMap<&str, BTreeMap<&str, _>> = BTreeMap::new();
+        for name_prop_val_source in name_prop_val_source_list {
+            // "-H" indicates that these columns are tab-separated;
+            // each column may internally have whitespace.
+            let mut iter = name_prop_val_source.split('\t');
+
+            let (name, prop, val, source) = (
+                iter.next().context("Missing 'name'")?,
+                iter.next().context("Missing 'property'")?,
+                iter.next().context("Missing 'value'")?,
+                iter.next().context("Missing 'source'")?,
+            );
+            if let Some(extra) = iter.next() {
+                bail!("Unexpected column data: '{extra}'");
+            }
+
+            let props = datasets.entry(name).or_default();
+            props.insert(prop, (val, source));
+        }
 
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        dataset_properties_parse(s)
-            .with_context(|| format!("Failed to parse: {s}"))
+        datasets
+            .into_iter()
+            .map(|(dataset_name, props)| {
+                let id = props
+                    .get("oxide:uuid")
+                    .filter(|(prop, source)| {
+                        // Dataset UUIDs are properties that are optionally attached to
+                        // datasets. However, some datasets are nested - to avoid them
+                        // from propagating, we explicitly ignore this value if it is
+                        // inherited.
+                        //
+                        // This can be the case for the "zone" filesystem root, which
+                        // can propagate this property to a child zone without it set.
+                        !source.starts_with("inherited") && *prop != "-"
+                    })
+                    .map(|(prop, _source)| {
+                        prop.parse::<DatasetUuid>()
+                            .context("Failed to parse UUID")
+                    })
+                    .transpose()?;
+                let name = dataset_name.to_string();
+                let avail = props
+                    .get("available")
+                    .map(|(prop, _source)| prop)
+                    .ok_or(anyhow!("Missing 'available'"))?
+                    .parse::<u64>()
+                    .context("Failed to parse 'available'")?
+                    .try_into()?;
+                let used = props
+                    .get("used")
+                    .map(|(prop, _source)| prop)
+                    .ok_or(anyhow!("Missing 'used'"))?
+                    .parse::<u64>()
+                    .context("Failed to parse 'used'")?
+                    .try_into()?;
+
+                // The values of "quota" and "reservation" can be either "-" or
+                // "0" when they are not actually set. To be cautious, we treat
+                // both of these values as "the value has not been set
+                // explicitly". As a result, setting either of these values
+                // explicitly to zero is indistinguishable from setting them
+                // with a value of "none".
+                let quota = props
+                    .get("quota")
+                    .filter(|(prop, _source)| *prop != "-" && *prop != "0")
+                    .map(|(prop, _source)| {
+                        prop.parse::<u64>().context("Failed to parse 'quota'")
+                    })
+                    .transpose()?
+                    .and_then(|v| ByteCount::try_from(v).ok());
+                let reservation = props
+                    .get("reservation")
+                    .filter(|(prop, _source)| *prop != "-" && *prop != "0")
+                    .map(|(prop, _source)| {
+                        prop.parse::<u64>()
+                            .context("Failed to parse 'reservation'")
+                    })
+                    .transpose()?
+                    .and_then(|v| ByteCount::try_from(v).ok());
+                let compression = props
+                    .get("compression")
+                    .map(|(prop, _source)| prop.to_string())
+                    .ok_or_else(|| anyhow!("Missing 'compression'"))?;
+
+                Ok(DatasetProperties {
+                    id,
+                    name,
+                    avail,
+                    used,
+                    quota,
+                    reservation,
+                    compression,
+                })
+            })
+            .collect::<Result<Vec<_>, _>>()
     }
 }
 
@@ -311,7 +384,40 @@ impl fmt::Display for PropertySource {
     }
 }
 
-#[cfg_attr(any(test, feature = "testing"), mockall::automock, allow(dead_code))]
+#[derive(Copy, Clone, Debug)]
+pub enum WhichDatasets {
+    SelfOnly,
+    SelfAndChildren,
+}
+
+fn build_zfs_set_key_value_pairs(
+    size_details: Option<SizeDetails>,
+    dataset_id: Option<DatasetUuid>,
+) -> Vec<(&'static str, String)> {
+    let mut props = Vec::new();
+    if let Some(SizeDetails { quota, reservation, compression }) = size_details
+    {
+        let quota = quota
+            .map(|q| q.to_bytes().to_string())
+            .unwrap_or_else(|| String::from("none"));
+        props.push(("quota", quota));
+
+        let reservation = reservation
+            .map(|r| r.to_bytes().to_string())
+            .unwrap_or_else(|| String::from("none"));
+        props.push(("reservation", reservation));
+
+        let compression = compression.to_string();
+        props.push(("compression", compression));
+    }
+
+    if let Some(id) = dataset_id {
+        props.push(("oxide:uuid", id.to_string()));
+    }
+
+    props
+}
+
 impl Zfs {
     /// Lists all datasets within a pool or existing dataset.
     ///
@@ -335,6 +441,9 @@ impl Zfs {
     }
 
     /// Get information about datasets within a list of zpools / datasets.
+    /// Returns properties for all input datasets, and optionally, for
+    /// their children (depending on the value of [WhichDatasets] is provided
+    /// as input).
     ///
     /// This function is similar to [Zfs::list_datasets], but provides a more
     /// substantial results about the datasets found.
@@ -342,28 +451,34 @@ impl Zfs {
     /// Sorts results and de-duplicates them by name.
     pub fn get_dataset_properties(
         datasets: &[String],
+        which: WhichDatasets,
     ) -> Result<Vec<DatasetProperties>, anyhow::Error> {
         let mut command = std::process::Command::new(ZFS);
-        let cmd = command.args(&["list", "-d", "1", "-rHpo"]);
+        let cmd = command.arg("get");
+        match which {
+            WhichDatasets::SelfOnly => (),
+            WhichDatasets::SelfAndChildren => {
+                cmd.args(&["-d", "1"]);
+            }
+        }
+        cmd.args(&["-Hpo", "name,property,value,source"]);
 
         // Note: this is tightly coupled with the layout of DatasetProperties
-        cmd.arg(DatasetProperties::ZFS_LIST_STR);
+        cmd.arg(DatasetProperties::ZFS_GET_PROPS);
         cmd.args(datasets);
 
-        let output = execute(cmd).with_context(|| {
-            format!("Failed to get dataset properties for {datasets:?}")
+        // We are intentionally ignoring the output status of this command.
+        //
+        // If one or more dataset doesn't exist, we can still read stdout to
+        // see about the ones that do exist.
+        let output = cmd.output().map_err(|err| {
+            anyhow!(
+                "Failed to get dataset properties for {datasets:?}: {err:?}"
+            )
         })?;
         let stdout = String::from_utf8(output.stdout)?;
-        let mut datasets = stdout
-            .trim()
-            .split('\n')
-            .map(|row| row.parse::<DatasetProperties>())
-            .collect::<Result<Vec<_>, _>>()?;
-
-        datasets.sort_by(|d1, d2| d1.name.partial_cmp(&d2.name).unwrap());
-        datasets.dedup_by(|d1, d2| d1.name.eq(&d2.name));
 
-        Ok(datasets)
+        DatasetProperties::parse_many(&stdout)
     }
 
     /// Return the name of a dataset for a ZFS object.
@@ -427,23 +542,19 @@ impl Zfs {
         do_format: bool,
         encryption_details: Option<EncryptionDetails>,
         size_details: Option<SizeDetails>,
+        id: Option<DatasetUuid>,
         additional_options: Option<Vec<String>>,
     ) -> Result<(), EnsureFilesystemError> {
         let (exists, mounted) = Self::dataset_exists(name, &mountpoint)?;
+
+        let props = build_zfs_set_key_value_pairs(size_details, id);
         if exists {
-            if let Some(SizeDetails { quota, reservation, compression }) =
-                size_details
-            {
-                // apply quota and compression mode (in case they've changed across
-                // sled-agent versions since creation)
-                Self::apply_properties(
-                    name,
-                    &mountpoint,
-                    quota,
-                    reservation,
-                    compression,
-                )?;
-            }
+            Self::set_values(name, props.as_slice()).map_err(|err| {
+                EnsureFilesystemError {
+                    name: name.to_string(),
+                    err: err.err.into(),
+                }
+            })?;
 
             if encryption_details.is_none() {
                 // If the dataset exists, we're done. Unencrypted datasets are
@@ -455,14 +566,13 @@ impl Zfs {
                     return Ok(());
                 }
                 // We need to load the encryption key and mount the filesystem
-                return Self::mount_encrypted_dataset(name, &mountpoint);
+                return Self::mount_encrypted_dataset(name);
             }
         }
 
         if !do_format {
             return Err(EnsureFilesystemError {
                 name: name.to_string(),
-                mountpoint,
                 err: EnsureFilesystemErrorRaw::NotFoundNotFormatted,
             });
         }
@@ -498,7 +608,6 @@ impl Zfs {
 
         execute(cmd).map_err(|err| EnsureFilesystemError {
             name: name.to_string(),
-            mountpoint: mountpoint.clone(),
             err: err.into(),
         })?;
 
@@ -511,82 +620,27 @@ impl Zfs {
             let cmd = command.args(["chown", "-R", &user, &mount]);
             execute(cmd).map_err(|err| EnsureFilesystemError {
                 name: name.to_string(),
-                mountpoint: mountpoint.clone(),
                 err: err.into(),
             })?;
         }
 
-        if let Some(SizeDetails { quota, reservation, compression }) =
-            size_details
-        {
-            // Apply any quota and compression mode.
-            Self::apply_properties(
-                name,
-                &mountpoint,
-                quota,
-                reservation,
-                compression,
-            )?;
-        }
-
-        Ok(())
-    }
-
-    /// Applies the following properties to the filesystem.
-    ///
-    /// If any of the options are not supplied, a default "none" or "off"
-    /// value is supplied.
-    fn apply_properties(
-        name: &str,
-        mountpoint: &Mountpoint,
-        quota: Option<ByteCount>,
-        reservation: Option<ByteCount>,
-        compression: CompressionAlgorithm,
-    ) -> Result<(), EnsureFilesystemError> {
-        let quota = quota
-            .map(|q| q.to_bytes().to_string())
-            .unwrap_or_else(|| String::from("none"));
-        let reservation = reservation
-            .map(|r| r.to_bytes().to_string())
-            .unwrap_or_else(|| String::from("none"));
-        let compression = compression.to_string();
-
-        if let Err(err) = Self::set_value(name, "quota", &quota) {
-            return Err(EnsureFilesystemError {
+        Self::set_values(name, props.as_slice()).map_err(|err| {
+            EnsureFilesystemError {
                 name: name.to_string(),
-                mountpoint: mountpoint.clone(),
-                // Take the execution error from the SetValueError
                 err: err.err.into(),
-            });
-        }
-        if let Err(err) = Self::set_value(name, "reservation", &reservation) {
-            return Err(EnsureFilesystemError {
-                name: name.to_string(),
-                mountpoint: mountpoint.clone(),
-                // Take the execution error from the SetValueError
-                err: err.err.into(),
-            });
-        }
-        if let Err(err) = Self::set_value(name, "compression", &compression) {
-            return Err(EnsureFilesystemError {
-                name: name.to_string(),
-                mountpoint: mountpoint.clone(),
-                // Take the execution error from the SetValueError
-                err: err.err.into(),
-            });
-        }
+            }
+        })?;
+
         Ok(())
     }
 
     fn mount_encrypted_dataset(
         name: &str,
-        mountpoint: &Mountpoint,
     ) -> Result<(), EnsureFilesystemError> {
         let mut command = std::process::Command::new(PFEXEC);
         let cmd = command.args(&[ZFS, "mount", "-l", name]);
         execute(cmd).map_err(|err| EnsureFilesystemError {
             name: name.to_string(),
-            mountpoint: mountpoint.clone(),
             err: EnsureFilesystemErrorRaw::MountEncryptedFsFailed(err),
         })?;
         Ok(())
@@ -594,13 +648,11 @@ impl Zfs {
 
     pub fn mount_overlay_dataset(
         name: &str,
-        mountpoint: &Mountpoint,
     ) -> Result<(), EnsureFilesystemError> {
         let mut command = std::process::Command::new(PFEXEC);
         let cmd = command.args(&[ZFS, "mount", "-O", name]);
         execute(cmd).map_err(|err| EnsureFilesystemError {
             name: name.to_string(),
-            mountpoint: mountpoint.clone(),
             err: EnsureFilesystemErrorRaw::MountOverlayFsFailed(err),
         })?;
         Ok(())
@@ -626,7 +678,6 @@ impl Zfs {
             if &values[..3] != &[name, "filesystem", &mountpoint.to_string()] {
                 return Err(EnsureFilesystemError {
                     name: name.to_string(),
-                    mountpoint: mountpoint.clone(),
                     err: EnsureFilesystemErrorRaw::Output(stdout.to_string()),
                 });
             }
@@ -651,13 +702,29 @@ impl Zfs {
         name: &str,
         value: &str,
     ) -> Result<(), SetValueError> {
+        Self::set_values(filesystem_name, &[(name, value)])
+    }
+
+    fn set_values<K: std::fmt::Display, V: std::fmt::Display>(
+        filesystem_name: &str,
+        name_values: &[(K, V)],
+    ) -> Result<(), SetValueError> {
+        if name_values.is_empty() {
+            return Ok(());
+        }
+
         let mut command = std::process::Command::new(PFEXEC);
-        let value_arg = format!("{}={}", name, value);
-        let cmd = command.args(&[ZFS, "set", &value_arg, filesystem_name]);
+        let cmd = command.args(&[ZFS, "set"]);
+        for (name, value) in name_values {
+            cmd.arg(format!("{name}={value}"));
+        }
+        cmd.arg(filesystem_name);
         execute(cmd).map_err(|err| SetValueError {
             filesystem: filesystem_name.to_string(),
-            name: name.to_string(),
-            value: value.to_string(),
+            values: name_values
+                .iter()
+                .map(|(k, v)| format!("{k}={v}"))
+                .join(","),
             err,
         })?;
         Ok(())
@@ -745,10 +812,7 @@ impl Zfs {
             err,
         })
     }
-}
 
-// These methods don't work with mockall, so they exist in a separate impl block
-impl Zfs {
     /// Calls "zfs get" to acquire multiple values
     ///
     /// - `names`: The properties being acquired
@@ -859,42 +923,68 @@ mod test {
 
     #[test]
     fn parse_dataset_props() {
-        let input =
-            "-       dataset_name        1234   5678   0       0       off";
-        let props = DatasetProperties::from_str(&input)
+        let input = "dataset_name\tavailable\t1234\t-\n\
+             dataset_name\tused\t5678\t-\n\
+             dataset_name\tname\tI_AM_IGNORED\t-\n\
+             dataset_name\tcompression\toff\tinherited from parent";
+        let props = DatasetProperties::parse_many(&input)
             .expect("Should have parsed data");
+        assert_eq!(props.len(), 1);
+
+        assert_eq!(props[0].id, None);
+        assert_eq!(props[0].name, "dataset_name");
+        assert_eq!(props[0].avail.to_bytes(), 1234);
+        assert_eq!(props[0].used.to_bytes(), 5678);
+        assert_eq!(props[0].quota, None);
+        assert_eq!(props[0].reservation, None);
+        assert_eq!(props[0].compression, "off");
+    }
 
-        assert_eq!(props.id, None);
-        assert_eq!(props.name, "dataset_name");
-        assert_eq!(props.avail.to_bytes(), 1234);
-        assert_eq!(props.used.to_bytes(), 5678);
-        assert_eq!(props.quota, None);
-        assert_eq!(props.reservation, None);
-        assert_eq!(props.compression, "off");
+    #[test]
+    fn parse_dataset_too_many_columns() {
+        let input = "dataset_name\tavailable\t1234\t-\tEXTRA\n\
+             dataset_name\tused\t5678\t-\n\
+             dataset_name\tname\tI_AM_IGNORED\t-\n\
+             dataset_name\tcompression\toff\tinherited from parent";
+        let err = DatasetProperties::parse_many(&input)
+            .expect_err("Should have parsed data");
+        assert!(
+            err.to_string().contains("Unexpected column data: 'EXTRA'"),
+            "{err}"
+        );
     }
 
     #[test]
     fn parse_dataset_props_with_optionals() {
-        let input = "d4e1e554-7b98-4413-809e-4a42561c3d0c       dataset_name        1234   5678   111       222       off";
-        let props = DatasetProperties::from_str(&input)
+        let input =
+            "dataset_name\toxide:uuid\td4e1e554-7b98-4413-809e-4a42561c3d0c\tlocal\n\
+             dataset_name\tavailable\t1234\t-\n\
+             dataset_name\tused\t5678\t-\n\
+             dataset_name\tquota\t111\t-\n\
+             dataset_name\treservation\t222\t-\n\
+             dataset_name\tcompression\toff\tinherited from parent";
+        let props = DatasetProperties::parse_many(&input)
             .expect("Should have parsed data");
-
+        assert_eq!(props.len(), 1);
         assert_eq!(
-            props.id,
+            props[0].id,
             Some("d4e1e554-7b98-4413-809e-4a42561c3d0c".parse().unwrap())
         );
-        assert_eq!(props.name, "dataset_name");
-        assert_eq!(props.avail.to_bytes(), 1234);
-        assert_eq!(props.used.to_bytes(), 5678);
-        assert_eq!(props.quota.map(|q| q.to_bytes()), Some(111));
-        assert_eq!(props.reservation.map(|r| r.to_bytes()), Some(222));
-        assert_eq!(props.compression, "off");
+        assert_eq!(props[0].name, "dataset_name");
+        assert_eq!(props[0].avail.to_bytes(), 1234);
+        assert_eq!(props[0].used.to_bytes(), 5678);
+        assert_eq!(props[0].quota.map(|q| q.to_bytes()), Some(111));
+        assert_eq!(props[0].reservation.map(|r| r.to_bytes()), Some(222));
+        assert_eq!(props[0].compression, "off");
     }
 
     #[test]
     fn parse_dataset_bad_uuid() {
-        let input = "bad       dataset_name        1234   5678   111       222       off";
-        let err = DatasetProperties::from_str(&input)
+        let input = "dataset_name\toxide:uuid\tbad\t-\n\
+             dataset_name\tavailable\t1234\t-\n\
+             dataset_name\tused\t5678\t-";
+
+        let err = DatasetProperties::parse_many(&input)
             .expect_err("Should have failed to parse");
         assert!(
             format!("{err:#}").contains("error parsing UUID (dataset)"),
@@ -904,8 +994,9 @@ mod test {
 
     #[test]
     fn parse_dataset_bad_avail() {
-        let input = "-       dataset_name        BADAVAIL   5678   111       222       off";
-        let err = DatasetProperties::from_str(&input)
+        let input = "dataset_name\tavailable\tBADAVAIL\t-\n\
+             dataset_name\tused\t5678\t-";
+        let err = DatasetProperties::parse_many(&input)
             .expect_err("Should have failed to parse");
         assert!(
             format!("{err:#}").contains("invalid digit found in string"),
@@ -915,8 +1006,9 @@ mod test {
 
     #[test]
     fn parse_dataset_bad_usage() {
-        let input = "-       dataset_name        1234   BADUSAGE   111       222       off";
-        let err = DatasetProperties::from_str(&input)
+        let input = "dataset_name\tavailable\t1234\t-\n\
+             dataset_name\tused\tBADUSAGE\t-";
+        let err = DatasetProperties::parse_many(&input)
             .expect_err("Should have failed to parse");
         assert!(
             format!("{err:#}").contains("invalid digit found in string"),
@@ -926,8 +1018,10 @@ mod test {
 
     #[test]
     fn parse_dataset_bad_quota() {
-        let input = "-       dataset_name        1234   5678   BADQUOTA      222       off";
-        let err = DatasetProperties::from_str(&input)
+        let input = "dataset_name\tavailable\t1234\t-\n\
+             dataset_name\tused\t5678\t-\n\
+             dataset_name\tquota\tBADQUOTA\t-";
+        let err = DatasetProperties::parse_many(&input)
             .expect_err("Should have failed to parse");
         assert!(
             format!("{err:#}").contains("invalid digit found in string"),
@@ -937,8 +1031,11 @@ mod test {
 
     #[test]
     fn parse_dataset_bad_reservation() {
-        let input = "-       dataset_name        1234   5678   111      BADRES       off";
-        let err = DatasetProperties::from_str(&input)
+        let input = "dataset_name\tavailable\t1234\t-\n\
+             dataset_name\tused\t5678\t-\n\
+             dataset_name\tquota\t111\t-\n\
+             dataset_name\treservation\tBADRES\t-";
+        let err = DatasetProperties::parse_many(&input)
             .expect_err("Should have failed to parse");
         assert!(
             format!("{err:#}").contains("invalid digit found in string"),
@@ -949,24 +1046,102 @@ mod test {
     #[test]
     fn parse_dataset_missing_fields() {
         let expect_missing = |input: &str, what: &str| {
-            let err = DatasetProperties::from_str(input)
+            let err = DatasetProperties::parse_many(input)
                 .expect_err("Should have failed to parse");
             let err = format!("{err:#}");
             assert!(err.contains(&format!("Missing {what}")), "{err}");
         };
 
         expect_missing(
-            "-       dataset_name        1234   5678   111      222",
-            "'compression'",
+            "dataset_name\tused\t5678\t-\n\
+             dataset_name\tquota\t111\t-\n\
+             dataset_name\treservation\t222\t-\n\
+             dataset_name\tcompression\toff\tinherited",
+            "'available'",
         );
         expect_missing(
-            "-       dataset_name        1234   5678   111",
-            "'reservation'",
+            "dataset_name\tavailable\t1234\t-\n\
+             dataset_name\tquota\t111\t-\n\
+             dataset_name\treservation\t222\t-\n\
+             dataset_name\tcompression\toff\tinherited",
+            "'used'",
         );
-        expect_missing("-       dataset_name        1234   5678", "'quota'");
-        expect_missing("-       dataset_name        1234", "'used'");
-        expect_missing("-       dataset_name", "'avail'");
-        expect_missing("-", "'name'");
-        expect_missing("", "UUID");
+        expect_missing(
+            "dataset_name\tavailable\t1234\t-\n\
+             dataset_name\tused\t5678\t-\n\
+             dataset_name\tquota\t111\t-\n\
+             dataset_name\treservation\t222\t-",
+            "'compression'",
+        );
+    }
+
+    #[test]
+    fn parse_dataset_uuid_ignored_if_inherited() {
+        let input =
+            "dataset_name\toxide:uuid\tb8698ede-60c2-4e16-b792-d28c165cfd12\tinherited from parent\n\
+             dataset_name\tavailable\t1234\t-\n\
+             dataset_name\tused\t5678\t-\n\
+             dataset_name\tcompression\toff\t-";
+        let props = DatasetProperties::parse_many(&input)
+            .expect("Should have parsed data");
+        assert_eq!(props.len(), 1);
+        assert_eq!(props[0].id, None);
+    }
+
+    #[test]
+    fn parse_dataset_uuid_ignored_if_dash() {
+        let input = "dataset_name\toxide:uuid\t-\t-\n\
+             dataset_name\tavailable\t1234\t-\n\
+             dataset_name\tused\t5678\t-\n\
+             dataset_name\tcompression\toff\t-";
+        let props = DatasetProperties::parse_many(&input)
+            .expect("Should have parsed data");
+        assert_eq!(props.len(), 1);
+        assert_eq!(props[0].id, None);
+    }
+
+    #[test]
+    fn parse_quota_ignored_if_default() {
+        let input = "dataset_name\tquota\t0\tdefault\n\
+             dataset_name\tavailable\t1234\t-\n\
+             dataset_name\tused\t5678\t-\n\
+             dataset_name\tcompression\toff\t-";
+        let props = DatasetProperties::parse_many(&input)
+            .expect("Should have parsed data");
+        assert_eq!(props.len(), 1);
+        assert_eq!(props[0].quota, None);
+    }
+
+    #[test]
+    fn parse_reservation_ignored_if_default() {
+        let input = "dataset_name\treservation\t0\tdefault\n\
+             dataset_name\tavailable\t1234\t-\n\
+             dataset_name\tused\t5678\t-\n\
+             dataset_name\tcompression\toff\t-";
+        let props = DatasetProperties::parse_many(&input)
+            .expect("Should have parsed data");
+        assert_eq!(props.len(), 1);
+        assert_eq!(props[0].reservation, None);
+    }
+
+    #[test]
+    fn parse_sorts_and_dedups() {
+        let input = "foo\tavailable\t111\t-\n\
+             foo\tused\t111\t-\n\
+             foo\tcompression\toff\t-\n\
+             foo\tavailable\t111\t-\n\
+             foo\tused\t111\t-\n\
+             foo\tcompression\toff\t-\n\
+             bar\tavailable\t222\t-\n\
+             bar\tused\t222\t-\n\
+             bar\tcompression\toff\t-";
+
+        let props = DatasetProperties::parse_many(&input)
+            .expect("Should have parsed data");
+        assert_eq!(props.len(), 2);
+        assert_eq!(props[0].name, "bar");
+        assert_eq!(props[0].used, 222.into());
+        assert_eq!(props[1].name, "foo");
+        assert_eq!(props[1].used, 111.into());
     }
 }
diff --git a/nexus/db-model/src/support_bundle.rs b/nexus/db-model/src/support_bundle.rs
index 0e53070877..a4b14d363b 100644
--- a/nexus/db-model/src/support_bundle.rs
+++ b/nexus/db-model/src/support_bundle.rs
@@ -51,7 +51,7 @@ impl SupportBundleState {
             Destroying => vec![Active, Collecting, Failing],
             Failing => vec![Collecting, Active],
             // The "Failed" state is terminal.
-            Failed => vec![Collecting, Active, Failing],
+            Failed => vec![Active, Collecting, Failing],
         }
     }
 }
@@ -69,8 +69,9 @@ impl From<SupportBundleState> for SupportBundleStateView {
             // whether or not the bundle record can be safely deleted.
             //
             // Either way, it should be possible to delete the bundle.
+            // If a user requests that we delete a bundle in these states:
             // - "Failing" bundles will become "Destroying"
-            // - "Failed" bundles will be destroyed immediately
+            // - "Failed" bundles can be deleted immediately
             Failing => SupportBundleStateView::Failed,
             Failed => SupportBundleStateView::Failed,
         }
diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs
index 0c73ae1ae2..4210f2bee2 100644
--- a/nexus/db-queries/src/db/datastore/deployment.rs
+++ b/nexus/db-queries/src/db/datastore/deployment.rs
@@ -16,7 +16,6 @@ use crate::db::DbConnection;
 use crate::db::TransactionError;
 use crate::transaction_retry::OptionalError;
 use anyhow::Context;
-use async_bb8_diesel::AsyncConnection;
 use async_bb8_diesel::AsyncRunQueryDsl;
 use chrono::DateTime;
 use chrono::Utc;
@@ -106,7 +105,7 @@ impl DataStore {
         blueprint: &Blueprint,
     ) -> Result<(), Error> {
         let conn = self.pool_connection_authorized(opctx).await?;
-        Self::blueprint_insert_on_connection(&conn, opctx, blueprint).await
+        self.blueprint_insert_on_connection(&conn, opctx, blueprint).await
     }
 
     /// Creates a transaction iff the current blueprint is "bp_id".
@@ -182,6 +181,7 @@ impl DataStore {
     /// Variant of [Self::blueprint_insert] which may be called from a
     /// transaction context.
     pub(crate) async fn blueprint_insert_on_connection(
+        &self,
         conn: &async_bb8_diesel::Connection<DbConnection>,
         opctx: &OpContext,
         blueprint: &Blueprint,
@@ -340,7 +340,8 @@ impl DataStore {
         // as most of the operations should be insertions rather than in-place
         // modifications of existing tables.
         #[allow(clippy::disallowed_methods)]
-        conn.transaction_async(|conn| async move {
+        self.transaction_non_retry_wrapper("blueprint_insert")
+            .transaction(&conn, |conn| async move {
             // Insert the row for the blueprint.
             {
                 use db::schema::blueprint::dsl;
diff --git a/nexus/db-queries/src/db/datastore/dns.rs b/nexus/db-queries/src/db/datastore/dns.rs
index 3f0f7828fa..114d553aac 100644
--- a/nexus/db-queries/src/db/datastore/dns.rs
+++ b/nexus/db-queries/src/db/datastore/dns.rs
@@ -20,7 +20,6 @@ use crate::db::pagination::Paginator;
 use crate::db::pool::DbConnection;
 use crate::db::TransactionError;
 use crate::transaction_retry::OptionalError;
-use async_bb8_diesel::AsyncConnection;
 use async_bb8_diesel::AsyncRunQueryDsl;
 use diesel::prelude::*;
 use futures::future::BoxFuture;
@@ -453,20 +452,24 @@ impl DataStore {
 
         // This method is used in nested transactions, which are not supported
         // with retryable transactions.
-        #[allow(clippy::disallowed_methods)]
-        conn.transaction_async(|c| async move {
-            let version = self
-                .dns_group_latest_version_conn(opctx, conn, update.dns_group)
-                .await?;
-            self.dns_write_version_internal(
-                &c,
-                update,
-                zones,
-                Generation(version.version.next()),
-            )
+        self.transaction_non_retry_wrapper("dns_update_incremental")
+            .transaction(&conn, |c| async move {
+                let version = self
+                    .dns_group_latest_version_conn(
+                        opctx,
+                        conn,
+                        update.dns_group,
+                    )
+                    .await?;
+                self.dns_write_version_internal(
+                    &c,
+                    update,
+                    zones,
+                    Generation(version.version.next()),
+                )
+                .await
+            })
             .await
-        })
-        .await
     }
 
     // This must only be used inside a transaction.  Otherwise, it may make
diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs
index 9269b233f3..08c64a3254 100644
--- a/nexus/db-queries/src/db/datastore/inventory.rs
+++ b/nexus/db-queries/src/db/datastore/inventory.rs
@@ -278,13 +278,13 @@ impl DataStore {
         // batch rather than making a bunch of round-trips to the database.
         // We'd do that if we had an interface for doing that with bound
         // parameters, etc.  See oxidecomputer/omicron#973.
-        let pool = self.pool_connection_authorized(opctx).await?;
+        let conn = self.pool_connection_authorized(opctx).await?;
 
         // The risk of a serialization error is possible here, but low,
         // as most of the operations should be insertions rather than in-place
         // modifications of existing tables.
-        #[allow(clippy::disallowed_methods)]
-        pool.transaction_async(|conn| async move {
+        self.transaction_non_retry_wrapper("inventory_insert_collection")
+            .transaction(&conn, |conn| async move {
             // Insert records (and generate ids) for any baseboards that do not
             // already exist in the database.  These rows are not scoped to a
             // particular collection.  They contain only immutable data --
diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs
index 2b4556d604..a12e0abc1d 100644
--- a/nexus/db-queries/src/db/datastore/mod.rs
+++ b/nexus/db-queries/src/db/datastore/mod.rs
@@ -322,6 +322,14 @@ impl DataStore {
         )
     }
 
+    /// Constructs a non-retryable transaction helper
+    pub fn transaction_non_retry_wrapper(
+        &self,
+        name: &'static str,
+    ) -> crate::transaction_retry::NonRetryHelper {
+        crate::transaction_retry::NonRetryHelper::new(&self.log, name)
+    }
+
     #[cfg(test)]
     pub(crate) fn transaction_retry_producer(
         &self,
diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs
index dc3175c22d..067b8ed3ce 100644
--- a/nexus/db-queries/src/db/datastore/rack.rs
+++ b/nexus/db-queries/src/db/datastore/rack.rs
@@ -26,7 +26,6 @@ use crate::db::model::Zpool;
 use crate::db::pagination::paginated;
 use crate::db::pool::DbConnection;
 use crate::db::TransactionError;
-use async_bb8_diesel::AsyncConnection;
 use async_bb8_diesel::AsyncRunQueryDsl;
 use chrono::Utc;
 use diesel::prelude::*;
@@ -676,11 +675,9 @@ impl DataStore {
 
         // This method uses nested transactions, which are not supported
         // with retryable transactions.
-        #[allow(clippy::disallowed_methods)]
-        let rack = self
-            .pool_connection_authorized(opctx)
-            .await?
-            .transaction_async(|conn| {
+        let conn = self.pool_connection_authorized(opctx).await?;
+        let rack = self.transaction_non_retry_wrapper("rack_set_initialized")
+            .transaction(&conn, |conn| {
                 let err = err.clone();
                 let log = log.clone();
                 let authz_service_pool = authz_service_pool.clone();
@@ -752,7 +749,7 @@ impl DataStore {
                     }
 
                     // Insert the RSS-generated blueprint.
-                    Self::blueprint_insert_on_connection(
+                    self.blueprint_insert_on_connection(
                         &conn, opctx, &blueprint,
                     )
                     .await
diff --git a/nexus/db-queries/src/db/datastore/role.rs b/nexus/db-queries/src/db/datastore/role.rs
index ed8ec6fcd9..757658351b 100644
--- a/nexus/db-queries/src/db/datastore/role.rs
+++ b/nexus/db-queries/src/db/datastore/role.rs
@@ -20,7 +20,6 @@ use crate::db::model::RoleAssignment;
 use crate::db::model::RoleBuiltin;
 use crate::db::pagination::paginated_multicolumn;
 use crate::db::pool::DbConnection;
-use async_bb8_diesel::AsyncConnection;
 use async_bb8_diesel::AsyncRunQueryDsl;
 use diesel::prelude::*;
 use nexus_db_fixed_data::role_assignment::BUILTIN_ROLE_ASSIGNMENTS;
@@ -213,10 +212,9 @@ impl DataStore {
         // This method should probably be retryable, but this is slightly
         // complicated by the cloning semantics of the queries, which
         // must be Clone to be retried.
-        #[allow(clippy::disallowed_methods)]
-        self.pool_connection_authorized(opctx)
-            .await?
-            .transaction_async(|conn| async move {
+        let conn = self.pool_connection_authorized(opctx).await?;
+        self.transaction_non_retry_wrapper("role_assignment_replace_visible")
+            .transaction(&conn, |conn| async move {
                 delete_old_query.execute_async(&conn).await?;
                 Ok(insert_new_query.get_results_async(&conn).await?)
             })
diff --git a/nexus/db-queries/src/db/datastore/silo.rs b/nexus/db-queries/src/db/datastore/silo.rs
index b862f3c461..c963e13ae4 100644
--- a/nexus/db-queries/src/db/datastore/silo.rs
+++ b/nexus/db-queries/src/db/datastore/silo.rs
@@ -24,7 +24,6 @@ use crate::db::model::VirtualProvisioningCollection;
 use crate::db::pagination::paginated;
 use crate::db::pagination::Paginator;
 use crate::db::pool::DbConnection;
-use async_bb8_diesel::AsyncConnection;
 use async_bb8_diesel::AsyncRunQueryDsl;
 use chrono::Utc;
 use diesel::prelude::*;
@@ -227,9 +226,9 @@ impl DataStore {
 
         // This method uses nested transactions, which are not supported
         // with retryable transactions.
-        #[allow(clippy::disallowed_methods)]
-        let silo = conn
-            .transaction_async(|conn| async move {
+        let silo = self
+            .transaction_non_retry_wrapper("silo_create")
+            .transaction(&conn, |conn| async move {
                 let silo = silo_create_query
                     .get_result_async(&conn)
                     .await
@@ -429,48 +428,49 @@ impl DataStore {
 
         // This method uses nested transactions, which are not supported
         // with retryable transactions.
-        #[allow(clippy::disallowed_methods)]
-        conn.transaction_async(|conn| async move {
-            let updated_rows = diesel::update(silo::dsl::silo)
-                .filter(silo::dsl::time_deleted.is_null())
-                .filter(silo::dsl::id.eq(id))
-                .filter(silo::dsl::rcgen.eq(rcgen))
-                .set(silo::dsl::time_deleted.eq(now))
-                .execute_async(&conn)
-                .await
-                .map_err(|e| {
-                    public_error_from_diesel(
-                        e,
-                        ErrorHandler::NotFoundByResource(authz_silo),
-                    )
-                })?;
+        self.transaction_non_retry_wrapper("silo_delete")
+            .transaction(&conn, |conn| async move {
+                let updated_rows = diesel::update(silo::dsl::silo)
+                    .filter(silo::dsl::time_deleted.is_null())
+                    .filter(silo::dsl::id.eq(id))
+                    .filter(silo::dsl::rcgen.eq(rcgen))
+                    .set(silo::dsl::time_deleted.eq(now))
+                    .execute_async(&conn)
+                    .await
+                    .map_err(|e| {
+                        public_error_from_diesel(
+                            e,
+                            ErrorHandler::NotFoundByResource(authz_silo),
+                        )
+                    })?;
 
-            if updated_rows == 0 {
-                return Err(TxnError::CustomError(Error::invalid_request(
-                    "silo deletion failed due to concurrent modification",
-                )));
-            }
+                if updated_rows == 0 {
+                    return Err(TxnError::CustomError(Error::invalid_request(
+                        "silo deletion failed due to concurrent modification",
+                    )));
+                }
 
-            self.silo_quotas_delete(opctx, &conn, &authz_silo).await?;
+                self.silo_quotas_delete(opctx, &conn, &authz_silo).await?;
 
-            self.virtual_provisioning_collection_delete_on_connection(
-                &opctx.log, &conn, id,
-            )
-            .await?;
+                self.virtual_provisioning_collection_delete_on_connection(
+                    &opctx.log, &conn, id,
+                )
+                .await?;
 
-            self.dns_update_incremental(dns_opctx, &conn, dns_update).await?;
+                self.dns_update_incremental(dns_opctx, &conn, dns_update)
+                    .await?;
 
-            info!(opctx.log, "deleted silo {}", id);
+                info!(opctx.log, "deleted silo {}", id);
 
-            Ok(())
-        })
-        .await
-        .map_err(|e| match e {
-            TxnError::CustomError(e) => e,
-            TxnError::Database(e) => {
-                public_error_from_diesel(e, ErrorHandler::Server)
-            }
-        })?;
+                Ok(())
+            })
+            .await
+            .map_err(|e| match e {
+                TxnError::CustomError(e) => e,
+                TxnError::Database(e) => {
+                    public_error_from_diesel(e, ErrorHandler::Server)
+                }
+            })?;
 
         // TODO-correctness This needs to happen in a saga or some other
         // mechanism that ensures it happens even if we crash at this point.
diff --git a/nexus/db-queries/src/db/datastore/silo_group.rs b/nexus/db-queries/src/db/datastore/silo_group.rs
index e6168f4e42..4d5543cc9c 100644
--- a/nexus/db-queries/src/db/datastore/silo_group.rs
+++ b/nexus/db-queries/src/db/datastore/silo_group.rs
@@ -16,7 +16,6 @@ use crate::db::model::SiloGroup;
 use crate::db::model::SiloGroupMembership;
 use crate::db::pagination::paginated;
 use crate::db::IncompleteOnConflictExt;
-use async_bb8_diesel::AsyncConnection;
 use async_bb8_diesel::AsyncRunQueryDsl;
 use chrono::Utc;
 use diesel::prelude::*;
@@ -198,12 +197,11 @@ impl DataStore {
         type TxnError = TransactionError<SiloDeleteError>;
 
         let group_id = authz_silo_group.id();
+        let conn = self.pool_connection_authorized(opctx).await?;
 
         // Prefer to use "transaction_retry_wrapper"
-        #[allow(clippy::disallowed_methods)]
-        self.pool_connection_authorized(opctx)
-            .await?
-            .transaction_async(|conn| async move {
+        self.transaction_non_retry_wrapper("silo_group_delete")
+            .transaction(&conn, |conn| async move {
                 use db::schema::silo_group_membership;
 
                 // Don't delete groups that still have memberships
diff --git a/nexus/db-queries/src/db/datastore/support_bundle.rs b/nexus/db-queries/src/db/datastore/support_bundle.rs
index 26822c7f5f..3f2af6d65a 100644
--- a/nexus/db-queries/src/db/datastore/support_bundle.rs
+++ b/nexus/db-queries/src/db/datastore/support_bundle.rs
@@ -36,8 +36,8 @@ use uuid::Uuid;
 
 const CANNOT_ALLOCATE_ERR_MSG: &'static str =
 "Current policy limits support bundle creation to 'one per external disk', and \
- no disks are available. Either delete old support bundles or add additional \
- disks";
+ no disks are available. You must delete old support bundles before new ones \
+ can be created";
 
 const FAILURE_REASON_NO_DATASET: &'static str =
     "Allocated dataset no longer exists";
@@ -50,8 +50,8 @@ pub struct SupportBundleExpungementReport {
     /// Bundles marked "failed" because the datasets storing them have been
     /// expunged.
     pub bundles_failed_missing_datasets: usize,
-    /// Bundles deleted because the datasets storing them have been
-    /// expunged.
+    /// Bundles already in the "destroying" state that have been deleted because
+    /// the datasets storing them have been expunged.
     pub bundles_deleted_missing_datasets: usize,
 
     /// Bundles marked "destroying" because the nexuses managing them have been
@@ -79,7 +79,7 @@ impl DataStore {
         reason_for_creation: &'static str,
         this_nexus_id: OmicronZoneUuid,
     ) -> CreateResult<SupportBundle> {
-        opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?;
+        opctx.authorize(authz::Action::Modify, &authz::FLEET).await?;
         let conn = self.pool_connection_authorized(opctx).await?;
 
         #[derive(Debug)]
@@ -164,7 +164,7 @@ impl DataStore {
         opctx: &OpContext,
         id: SupportBundleUuid,
     ) -> LookupResult<SupportBundle> {
-        opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?;
+        opctx.authorize(authz::Action::Read, &authz::FLEET).await?;
         use db::schema::support_bundle::dsl;
 
         let conn = self.pool_connection_authorized(opctx).await?;
@@ -182,7 +182,7 @@ impl DataStore {
         opctx: &OpContext,
         pagparams: &DataPageParams<'_, Uuid>,
     ) -> ListResultVec<SupportBundle> {
-        opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?;
+        opctx.authorize(authz::Action::Read, &authz::FLEET).await?;
         use db::schema::support_bundle::dsl;
 
         let conn = self.pool_connection_authorized(opctx).await?;
@@ -202,7 +202,7 @@ impl DataStore {
         nexus_id: OmicronZoneUuid,
         states: Vec<SupportBundleState>,
     ) -> ListResultVec<SupportBundle> {
-        opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?;
+        opctx.authorize(authz::Action::Read, &authz::FLEET).await?;
         use db::schema::support_bundle::dsl;
 
         let conn = self.pool_connection_authorized(opctx).await?;
@@ -223,7 +223,7 @@ impl DataStore {
         opctx: &OpContext,
         blueprint: &nexus_types::deployment::Blueprint,
     ) -> Result<SupportBundleExpungementReport, Error> {
-        opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?;
+        opctx.authorize(authz::Action::Modify, &authz::FLEET).await?;
 
         // For this blueprint: The set of all expunged Nexus zones
         let invalid_nexus_zones = blueprint
@@ -327,7 +327,6 @@ impl DataStore {
                     // the dataset expungement speeds up the process.
                     let bundles_deleted_missing_datasets =
                         diesel::delete(dsl::support_bundle)
-                            .filter(dsl::state.eq_any(state.valid_old_states()))
                             .filter(dsl::id.eq_any(bundles_to_delete))
                             // This check should be redundant (we already
                             // partitioned above based on this state) but out of
@@ -424,7 +423,7 @@ impl DataStore {
         id: SupportBundleUuid,
         state: SupportBundleState,
     ) -> Result<(), Error> {
-        opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?;
+        opctx.authorize(authz::Action::Modify, &authz::FLEET).await?;
 
         use db::schema::support_bundle::dsl;
         let conn = self.pool_connection_authorized(opctx).await?;
@@ -457,7 +456,7 @@ impl DataStore {
         opctx: &OpContext,
         id: SupportBundleUuid,
     ) -> Result<(), Error> {
-        opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?;
+        opctx.authorize(authz::Action::Modify, &authz::FLEET).await?;
 
         use db::schema::support_bundle::dsl;
 
@@ -913,7 +912,7 @@ mod test {
         let observed_bundles = datastore
             .support_bundle_list(&opctx, &pagparams)
             .await
-            .expect("Should be able to get bundle we just created");
+            .expect("Should be able to query when no bundles exist");
         assert!(observed_bundles.is_empty());
 
         db.terminate().await;
@@ -1087,6 +1086,121 @@ mod test {
         logctx.cleanup_successful();
     }
 
+    #[tokio::test]
+    async fn test_bundle_deleted_from_expunged_dataset() {
+        static TEST_NAME: &str = "test_bundle_deleted_from_expunged_dataset";
+        let logctx = dev::test_setup_log(TEST_NAME);
+        let db = TestDatabase::new_with_datastore(&logctx.log).await;
+        let (opctx, datastore) = (db.opctx(), db.datastore());
+
+        let mut rng = SimRngState::from_seed(TEST_NAME);
+        let (_example, mut bp1) = ExampleSystemBuilder::new_with_rng(
+            &logctx.log,
+            rng.next_system_rng(),
+        )
+        .build();
+
+        // Weirdly, the "ExampleSystemBuilder" blueprint has a parent blueprint,
+        // but which isn't exposed through the API. Since we're only able to see
+        // the blueprint it emits, that means we can't actually make it the
+        // target because "the parent blueprint is not the current target".
+        //
+        // Instead of dealing with that, we lie: claim this is the primordial
+        // blueprint, with no parent.
+        //
+        // Regardless, make this starter blueprint our target.
+        bp1.parent_blueprint_id = None;
+        bp_insert_and_make_target(&opctx, &datastore, &bp1).await;
+
+        // Manually perform the equivalent of blueprint execution to populate
+        // database records.
+        let sleds = TestSled::new_from_blueprint(&bp1);
+        for sled in &sleds {
+            sled.create_database_records(&datastore, &opctx).await;
+        }
+
+        // Extract Nexus and Dataset information from the generated blueprint.
+        let this_nexus_id = get_nexuses_from_blueprint(
+            &bp1,
+            BlueprintZoneFilter::ShouldBeRunning,
+        )
+        .get(0)
+        .map(|id| *id)
+        .expect("There should be a Nexus in the example blueprint");
+        let debug_datasets = get_debug_datasets_from_blueprint(
+            &bp1,
+            BlueprintDatasetFilter::InService,
+        );
+        assert!(!debug_datasets.is_empty());
+
+        // When we create a bundle, it should exist on a dataset provisioned by
+        // the blueprint.
+        let bundle = datastore
+            .support_bundle_create(&opctx, "for the test", this_nexus_id)
+            .await
+            .expect("Should be able to create bundle");
+        assert_eq!(bundle.assigned_nexus, Some(this_nexus_id.into()));
+        assert!(
+            debug_datasets.contains(&DatasetUuid::from(bundle.dataset_id)),
+            "Bundle should have been allocated from a blueprint dataset"
+        );
+
+        // Start the deletion of this bundle
+        datastore
+            .support_bundle_update(
+                &opctx,
+                bundle.id.into(),
+                SupportBundleState::Destroying,
+            )
+            .await
+            .expect("Should have been able to update state");
+
+        // If we try to "fail support bundles" from expunged datasets/nexuses,
+        // we should see a no-op. Nothing has been expunged yet!
+        let report =
+            datastore.support_bundle_fail_expunged(&opctx, &bp1).await.expect(
+                "Should have been able to perform no-op support bundle failure",
+            );
+        assert_eq!(SupportBundleExpungementReport::default(), report);
+
+        // Expunge the bundle's dataset (manually)
+        let bp2 = {
+            let mut bp2 = bp1.clone();
+            bp2.id = Uuid::new_v4();
+            bp2.parent_blueprint_id = Some(bp1.id);
+            expunge_dataset_for_bundle(&mut bp2, &bundle);
+            bp2
+        };
+        bp_insert_and_make_target(&opctx, &datastore, &bp2).await;
+
+        datastore
+            .support_bundle_fail_expunged(&opctx, &bp1)
+            .await
+            .expect_err("bp1 is no longer the target; this should fail");
+        let report = datastore
+            .support_bundle_fail_expunged(&opctx, &bp2)
+            .await
+            .expect("Should have been able to mark bundle state as failed");
+        assert_eq!(
+            SupportBundleExpungementReport {
+                bundles_deleted_missing_datasets: 1,
+                ..Default::default()
+            },
+            report
+        );
+
+        // Should observe no bundles (it should have been deleted)
+        let pagparams = DataPageParams::max_page();
+        let observed_bundles = datastore
+            .support_bundle_list(&opctx, &pagparams)
+            .await
+            .expect("Should be able to query when no bundles exist");
+        assert!(observed_bundles.is_empty());
+
+        db.terminate().await;
+        logctx.cleanup_successful();
+    }
+
     #[tokio::test]
     async fn test_bundle_failed_from_expunged_nexus_no_reassign() {
         static TEST_NAME: &str =
diff --git a/nexus/db-queries/src/lib.rs b/nexus/db-queries/src/lib.rs
index 003310f920..c02c781c8e 100644
--- a/nexus/db-queries/src/lib.rs
+++ b/nexus/db-queries/src/lib.rs
@@ -38,4 +38,10 @@ mod probes {
 
     // Fires when we fail to find a VNI in the provided range.
     fn vni__search__range__empty(_: &usdt::UniqueId) {}
+
+    // Fires when a transaction has started
+    fn transaction__start(conn_id: uuid::Uuid, name: &str) {}
+
+    // Fires when a transaction has completed
+    fn transaction__done(conn_id: uuid::Uuid, name: &str) {}
 }
diff --git a/nexus/db-queries/src/transaction_retry.rs b/nexus/db-queries/src/transaction_retry.rs
index 02d00f8215..cf8ee22376 100644
--- a/nexus/db-queries/src/transaction_retry.rs
+++ b/nexus/db-queries/src/transaction_retry.rs
@@ -106,9 +106,15 @@ impl RetryHelper {
             + Send
             + Sync,
     {
+        crate::probes::transaction__start!(|| {
+            (conn.as_sync_conn().id(), self.name)
+        });
         let result = conn
             .transaction_async_with_retry(f, || self.retry_callback())
             .await;
+        crate::probes::transaction__done!(|| {
+            (conn.as_sync_conn().id(), self.name)
+        });
 
         let retry_info = self.inner.lock().unwrap();
         if retry_info.has_retried() {
@@ -179,6 +185,48 @@ impl oximeter::Producer for Producer {
     }
 }
 
+/// Helper utility to have a similar interface to RetryHelper (also emitting
+/// probes) but for non-retryable transactions.
+pub struct NonRetryHelper {
+    log: Logger,
+    name: &'static str,
+}
+
+impl NonRetryHelper {
+    pub(crate) fn new(log: &Logger, name: &'static str) -> Self {
+        Self { log: log.new(o!("transaction" => name)), name }
+    }
+
+    /// Calls the function "f" in an asynchronous, non-retryable transaction.
+    pub async fn transaction<R, E, Func, Fut>(
+        self,
+        conn: &async_bb8_diesel::Connection<crate::db::DbConnection>,
+        f: Func,
+    ) -> Result<R, E>
+    where
+        R: Send + 'static,
+        E: From<DieselError> + std::fmt::Debug + Send + 'static,
+        Fut: std::future::Future<Output = Result<R, E>> + Send,
+        Func: FnOnce(async_bb8_diesel::Connection<crate::db::DbConnection>) -> Fut
+            + Send
+            + Sync,
+    {
+        crate::probes::transaction__start!(|| {
+            (conn.as_sync_conn().id(), self.name)
+        });
+
+        #[allow(clippy::disallowed_methods)]
+        let result = conn.transaction_async(f).await;
+        crate::probes::transaction__done!(|| {
+            (conn.as_sync_conn().id(), self.name)
+        });
+        if let Err(err) = result.as_ref() {
+            warn!(self.log, "Non-retryable transaction failure"; "err" => ?err);
+        }
+        result
+    }
+}
+
 /// Helper utility for passing non-retryable errors out-of-band from
 /// transactions.
 ///
diff --git a/nexus/external-api/output/nexus_tags.txt b/nexus/external-api/output/nexus_tags.txt
index 4c266f87b5..4fc92b18d8 100644
--- a/nexus/external-api/output/nexus_tags.txt
+++ b/nexus/external-api/output/nexus_tags.txt
@@ -33,7 +33,10 @@ probe_view                               GET      /experimental/v1/probes/{probe
 support_bundle_create                    POST     /experimental/v1/system/support-bundles
 support_bundle_delete                    DELETE   /experimental/v1/system/support-bundles/{support_bundle}
 support_bundle_download                  GET      /experimental/v1/system/support-bundles/{support_bundle}/download
+support_bundle_download_file             GET      /experimental/v1/system/support-bundles/{support_bundle}/download/{file}
 support_bundle_head                      HEAD     /experimental/v1/system/support-bundles/{support_bundle}/download
+support_bundle_head_file                 HEAD     /experimental/v1/system/support-bundles/{support_bundle}/download/{file}
+support_bundle_index                     GET      /experimental/v1/system/support-bundles/{support_bundle}/index
 support_bundle_list                      GET      /experimental/v1/system/support-bundles
 support_bundle_view                      GET      /experimental/v1/system/support-bundles/{support_bundle}
 timeseries_query                         POST     /v1/timeseries/query
diff --git a/nexus/external-api/src/lib.rs b/nexus/external-api/src/lib.rs
index 6429cfd06b..54ba3ab34b 100644
--- a/nexus/external-api/src/lib.rs
+++ b/nexus/external-api/src/lib.rs
@@ -2793,7 +2793,7 @@ pub trait NexusExternalApi {
         query_params: Query<PaginatedById>,
     ) -> Result<HttpResponseOk<ResultsPage<shared::SupportBundleInfo>>, HttpError>;
 
-    /// View a single support bundle
+    /// View a support bundle
     #[endpoint {
         method = GET,
         path = "/experimental/v1/system/support-bundles/{support_bundle}",
@@ -2804,7 +2804,18 @@ pub trait NexusExternalApi {
         path_params: Path<params::SupportBundlePath>,
     ) -> Result<HttpResponseOk<shared::SupportBundleInfo>, HttpError>;
 
-    /// Download the contents of a single support bundle
+    /// Download the index of a support bundle
+    #[endpoint {
+        method = GET,
+        path = "/experimental/v1/system/support-bundles/{support_bundle}/index",
+        tags = ["hidden"], // system/support-bundles: only one tag is allowed
+    }]
+    async fn support_bundle_index(
+        rqctx: RequestContext<Self::Context>,
+        path_params: Path<params::SupportBundlePath>,
+    ) -> Result<Response<Body>, HttpError>;
+
+    /// Download the contents of a support bundle
     #[endpoint {
         method = GET,
         path = "/experimental/v1/system/support-bundles/{support_bundle}/download",
@@ -2813,10 +2824,20 @@ pub trait NexusExternalApi {
     async fn support_bundle_download(
         rqctx: RequestContext<Self::Context>,
         path_params: Path<params::SupportBundlePath>,
-        body: TypedBody<SupportBundleGetQueryParams>,
     ) -> Result<Response<Body>, HttpError>;
 
-    /// Download the metadata of a single support bundle
+    /// Download a file within a support bundle
+    #[endpoint {
+        method = GET,
+        path = "/experimental/v1/system/support-bundles/{support_bundle}/download/{file}",
+        tags = ["hidden"], // system/support-bundles: only one tag is allowed
+    }]
+    async fn support_bundle_download_file(
+        rqctx: RequestContext<Self::Context>,
+        path_params: Path<params::SupportBundleFilePath>,
+    ) -> Result<Response<Body>, HttpError>;
+
+    /// Download the metadata of a support bundle
     #[endpoint {
         method = HEAD,
         path = "/experimental/v1/system/support-bundles/{support_bundle}/download",
@@ -2825,7 +2846,17 @@ pub trait NexusExternalApi {
     async fn support_bundle_head(
         rqctx: RequestContext<Self::Context>,
         path_params: Path<params::SupportBundlePath>,
-        body: TypedBody<SupportBundleGetQueryParams>,
+    ) -> Result<Response<Body>, HttpError>;
+
+    /// Download the metadata of a file within the support bundle
+    #[endpoint {
+        method = HEAD,
+        path = "/experimental/v1/system/support-bundles/{support_bundle}/download/{file}",
+        tags = ["hidden"], // system/support-bundles: only one tag is allowed
+    }]
+    async fn support_bundle_head_file(
+        rqctx: RequestContext<Self::Context>,
+        path_params: Path<params::SupportBundleFilePath>,
     ) -> Result<Response<Body>, HttpError>;
 
     /// Create a new support bundle
diff --git a/nexus/reconfigurator/planning/Cargo.toml b/nexus/reconfigurator/planning/Cargo.toml
index 43a65ad085..efed173a71 100644
--- a/nexus/reconfigurator/planning/Cargo.toml
+++ b/nexus/reconfigurator/planning/Cargo.toml
@@ -16,6 +16,7 @@ illumos-utils.workspace = true
 indexmap.workspace = true
 internal-dns-resolver.workspace = true
 ipnet.workspace = true
+itertools.workspace = true
 nexus-config.workspace = true
 nexus-inventory.workspace = true
 nexus-sled-agent-shared.workspace = true
diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
index 394133132b..74e91ff943 100644
--- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
+++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
@@ -228,6 +228,14 @@ pub struct SledEditCounts {
 }
 
 impl SledEditCounts {
+    pub fn zeroes() -> Self {
+        Self {
+            disks: EditCounts::zeroes(),
+            datasets: EditCounts::zeroes(),
+            zones: EditCounts::zeroes(),
+        }
+    }
+
     fn has_nonzero_counts(&self) -> bool {
         let Self { disks, datasets, zones } = self;
         disks.has_nonzero_counts()
@@ -548,7 +556,7 @@ impl<'a> BlueprintBuilder<'a> {
                     generation: Generation::new(),
                     datasets: BTreeMap::new(),
                 });
-            let editor = SledEditor::new(
+            let editor = SledEditor::for_existing(
                 state,
                 zones.clone(),
                 disks,
@@ -565,8 +573,7 @@ impl<'a> BlueprintBuilder<'a> {
         // that weren't in the parent blueprint. (These are newly-added sleds.)
         for sled_id in input.all_sled_ids(SledFilter::Commissioned) {
             if let Entry::Vacant(slot) = sled_editors.entry(sled_id) {
-                slot.insert(SledEditor::new_empty(
-                    SledState::Active,
+                slot.insert(SledEditor::for_new_active(
                     build_preexisting_dataset_ids(sled_id)?,
                 ));
             }
@@ -735,8 +742,8 @@ impl<'a> BlueprintBuilder<'a> {
             .retain(|sled_id, _| in_service_sled_ids.contains(sled_id));
 
         // If we have the clickhouse cluster setup enabled via policy and we
-        // don't yet have a `ClickhouseClusterConfiguration`, then we must create
-        // one and feed it to our `ClickhouseAllocator`.
+        // don't yet have a `ClickhouseClusterConfiguration`, then we must
+        // create one and feed it to our `ClickhouseAllocator`.
         let clickhouse_allocator = if self.input.clickhouse_cluster_enabled() {
             let parent_config = self
                 .parent_blueprint
@@ -815,18 +822,18 @@ impl<'a> BlueprintBuilder<'a> {
     }
 
     /// Set the desired state of the given sled.
-    pub fn set_sled_state(
+    pub fn set_sled_decommissioned(
         &mut self,
         sled_id: SledUuid,
-        desired_state: SledState,
     ) -> Result<(), Error> {
         let editor = self.sled_editors.get_mut(&sled_id).ok_or_else(|| {
             Error::Planner(anyhow!(
                 "tried to set sled state for unknown sled {sled_id}"
             ))
         })?;
-        editor.set_state(desired_state);
-        Ok(())
+        editor
+            .decommission()
+            .map_err(|err| Error::SledEditError { sled_id, err })
     }
 
     /// Within tests, set an RNG for deterministic results.
@@ -1046,15 +1053,18 @@ impl<'a> BlueprintBuilder<'a> {
         // blueprint
         for (disk_id, (zpool, disk)) in database_disks {
             database_disk_ids.insert(disk_id);
-            editor.ensure_disk(
-                BlueprintPhysicalDiskConfig {
-                    disposition: BlueprintPhysicalDiskDisposition::InService,
-                    identity: disk.disk_identity.clone(),
-                    id: disk_id,
-                    pool_id: *zpool,
-                },
-                &mut self.rng,
-            );
+            editor
+                .ensure_disk(
+                    BlueprintPhysicalDiskConfig {
+                        disposition:
+                            BlueprintPhysicalDiskDisposition::InService,
+                        identity: disk.disk_identity.clone(),
+                        id: disk_id,
+                        pool_id: *zpool,
+                    },
+                    &mut self.rng,
+                )
+                .map_err(|err| Error::SledEditError { sled_id, err })?;
         }
 
         // Remove any disks that appear in the blueprint, but not the database
diff --git a/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor.rs b/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor.rs
index 13094b97a4..995cc306a5 100644
--- a/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor.rs
+++ b/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor.rs
@@ -7,18 +7,25 @@
 use crate::blueprint_builder::SledEditCounts;
 use crate::planner::PlannerRng;
 use illumos_utils::zpool::ZpoolName;
+use itertools::Either;
+use nexus_sled_agent_shared::inventory::ZoneKind;
 use nexus_types::deployment::blueprint_zone_type;
 use nexus_types::deployment::BlueprintDatasetsConfig;
 use nexus_types::deployment::BlueprintPhysicalDiskConfig;
 use nexus_types::deployment::BlueprintPhysicalDisksConfig;
 use nexus_types::deployment::BlueprintZoneConfig;
+use nexus_types::deployment::BlueprintZoneDisposition;
 use nexus_types::deployment::BlueprintZoneFilter;
 use nexus_types::deployment::BlueprintZoneType;
 use nexus_types::deployment::BlueprintZonesConfig;
 use nexus_types::deployment::DiskFilter;
 use nexus_types::external_api::views::SledState;
+use omicron_common::disk::DatasetKind;
+use omicron_uuid_kinds::DatasetUuid;
 use omicron_uuid_kinds::OmicronZoneUuid;
 use omicron_uuid_kinds::PhysicalDiskUuid;
+use omicron_uuid_kinds::ZpoolUuid;
+use std::mem;
 
 mod datasets;
 mod disks;
@@ -50,6 +57,32 @@ pub enum SledInputError {
 
 #[derive(Debug, thiserror::Error)]
 pub enum SledEditError {
+    #[error("editing a decommissioned sled is not allowed")]
+    EditDecommissioned,
+    #[error(
+        "sled is not decommissionable: \
+         disk {disk_id} (zpool {zpool_id}) is in service"
+    )]
+    NonDecommissionableDiskInService {
+        disk_id: PhysicalDiskUuid,
+        zpool_id: ZpoolUuid,
+    },
+    #[error(
+        "sled is not decommissionable: \
+         dataset {dataset_id} (kind {kind:?}) is in service"
+    )]
+    NonDecommissionableDatasetInService {
+        dataset_id: DatasetUuid,
+        kind: DatasetKind,
+    },
+    #[error(
+        "sled is not decommissionable: \
+         zone {zone_id} (kind {kind:?}) is not expunged"
+    )]
+    NonDecommissionableZoneNotExpunged {
+        zone_id: OmicronZoneUuid,
+        kind: ZoneKind,
+    },
     #[error("failed to edit disks")]
     EditDisks(#[from] DisksEditError),
     #[error("failed to edit datasets")]
@@ -74,8 +107,196 @@ pub enum SledEditError {
 }
 
 #[derive(Debug)]
-pub(crate) struct SledEditor {
-    state: SledState,
+pub(crate) struct SledEditor(InnerSledEditor);
+
+#[derive(Debug)]
+enum InnerSledEditor {
+    // Internally, `SledEditor` has a variant for each variant of `SledState`,
+    // as the operations allowed in different states are substantially different
+    // (i.e., an active sled allows any edit; a decommissioned sled allows
+    // none).
+    Active(ActiveSledEditor),
+    Decommissioned(EditedSled),
+}
+
+impl SledEditor {
+    pub fn for_existing(
+        state: SledState,
+        zones: BlueprintZonesConfig,
+        disks: BlueprintPhysicalDisksConfig,
+        datasets: BlueprintDatasetsConfig,
+        preexisting_dataset_ids: DatasetIdsBackfillFromDb,
+    ) -> Result<Self, SledInputError> {
+        match state {
+            SledState::Active => {
+                let inner = ActiveSledEditor::new(
+                    zones,
+                    disks,
+                    datasets,
+                    preexisting_dataset_ids,
+                )?;
+                Ok(Self(InnerSledEditor::Active(inner)))
+            }
+            SledState::Decommissioned => {
+                let inner = EditedSled {
+                    state,
+                    zones,
+                    disks,
+                    datasets,
+                    edit_counts: SledEditCounts::zeroes(),
+                };
+                Ok(Self(InnerSledEditor::Decommissioned(inner)))
+            }
+        }
+    }
+
+    pub fn for_new_active(
+        preexisting_dataset_ids: DatasetIdsBackfillFromDb,
+    ) -> Self {
+        Self(InnerSledEditor::Active(ActiveSledEditor::new_empty(
+            preexisting_dataset_ids,
+        )))
+    }
+
+    pub fn finalize(self) -> EditedSled {
+        match self.0 {
+            InnerSledEditor::Active(editor) => editor.finalize(),
+            InnerSledEditor::Decommissioned(edited) => edited,
+        }
+    }
+
+    pub fn edit_counts(&self) -> SledEditCounts {
+        match &self.0 {
+            InnerSledEditor::Active(editor) => editor.edit_counts(),
+            InnerSledEditor::Decommissioned(edited) => edited.edit_counts,
+        }
+    }
+
+    pub fn decommission(&mut self) -> Result<(), SledEditError> {
+        match &mut self.0 {
+            InnerSledEditor::Active(editor) => {
+                // Decommissioning a sled is a one-way trip that has many
+                // preconditions. We can't check all of them here (e.g., we
+                // should kick the sled out of trust quorum before
+                // decommissioning, which is entirely outside the realm of
+                // `SledEditor`. But we can do some basic checks: all of the
+                // disks, datasets, and zones for this sled should be expunged.
+                editor.validate_decommisionable()?;
+
+                // We can't take ownership of `editor` from the `&mut self`
+                // reference we have, and we need ownership to call
+                // `finalize()`. Steal the contents via `mem::swap()` with an
+                // empty editor. This isn't panic safe (i.e., if we panic
+                // between the `mem::swap()` and the reassignment to `self.0`
+                // below, we'll be left in the active state with an empty sled
+                // editor), but omicron in general is not panic safe and aborts
+                // on panic. Plus `finalize()` should never panic.
+                let mut stolen = ActiveSledEditor::new_empty(
+                    DatasetIdsBackfillFromDb::empty(),
+                );
+                mem::swap(editor, &mut stolen);
+
+                let mut finalized = stolen.finalize();
+                finalized.state = SledState::Decommissioned;
+                self.0 = InnerSledEditor::Decommissioned(finalized);
+            }
+            // If we're already decommissioned, there's nothing to do.
+            InnerSledEditor::Decommissioned(_) => (),
+        }
+        Ok(())
+    }
+
+    pub fn disks(
+        &self,
+        filter: DiskFilter,
+    ) -> impl Iterator<Item = &BlueprintPhysicalDiskConfig> {
+        match &self.0 {
+            InnerSledEditor::Active(editor) => {
+                Either::Left(editor.disks(filter))
+            }
+            InnerSledEditor::Decommissioned(edited) => Either::Right(
+                edited
+                    .disks
+                    .disks
+                    .iter()
+                    .filter(move |disk| disk.disposition.matches(filter)),
+            ),
+        }
+    }
+
+    pub fn zones(
+        &self,
+        filter: BlueprintZoneFilter,
+    ) -> impl Iterator<Item = &BlueprintZoneConfig> {
+        match &self.0 {
+            InnerSledEditor::Active(editor) => {
+                Either::Left(editor.zones(filter))
+            }
+            InnerSledEditor::Decommissioned(edited) => Either::Right(
+                edited
+                    .zones
+                    .zones
+                    .iter()
+                    .filter(move |zone| zone.disposition.matches(filter)),
+            ),
+        }
+    }
+
+    fn as_active_mut(
+        &mut self,
+    ) -> Result<&mut ActiveSledEditor, SledEditError> {
+        match &mut self.0 {
+            InnerSledEditor::Active(editor) => Ok(editor),
+            InnerSledEditor::Decommissioned(_) => {
+                Err(SledEditError::EditDecommissioned)
+            }
+        }
+    }
+
+    pub fn ensure_disk(
+        &mut self,
+        disk: BlueprintPhysicalDiskConfig,
+        rng: &mut PlannerRng,
+    ) -> Result<(), SledEditError> {
+        self.as_active_mut()?.ensure_disk(disk, rng);
+        Ok(())
+    }
+
+    pub fn expunge_disk(
+        &mut self,
+        disk_id: &PhysicalDiskUuid,
+    ) -> Result<(), SledEditError> {
+        self.as_active_mut()?.expunge_disk(disk_id)
+    }
+
+    pub fn add_zone(
+        &mut self,
+        zone: BlueprintZoneConfig,
+        rng: &mut PlannerRng,
+    ) -> Result<(), SledEditError> {
+        self.as_active_mut()?.add_zone(zone, rng)
+    }
+
+    pub fn expunge_zone(
+        &mut self,
+        zone_id: &OmicronZoneUuid,
+    ) -> Result<(), SledEditError> {
+        self.as_active_mut()?.expunge_zone(zone_id)
+    }
+
+    /// Backwards compatibility / test helper: If we're given a blueprint that
+    /// has zones but wasn't created via `SledEditor`, it might not have
+    /// datasets for all its zones. This method backfills them.
+    pub fn ensure_datasets_for_running_zones(
+        &mut self,
+        rng: &mut PlannerRng,
+    ) -> Result<(), SledEditError> {
+        self.as_active_mut()?.ensure_datasets_for_running_zones(rng)
+    }
+}
+
+#[derive(Debug)]
+struct ActiveSledEditor {
     zones: ZonesEditor,
     disks: DisksEditor,
     datasets: DatasetsEditor,
@@ -90,16 +311,14 @@ pub(crate) struct EditedSled {
     pub edit_counts: SledEditCounts,
 }
 
-impl SledEditor {
+impl ActiveSledEditor {
     pub fn new(
-        state: SledState,
         zones: BlueprintZonesConfig,
         disks: BlueprintPhysicalDisksConfig,
         datasets: BlueprintDatasetsConfig,
         preexisting_dataset_ids: DatasetIdsBackfillFromDb,
     ) -> Result<Self, SledInputError> {
         Ok(Self {
-            state,
             zones: zones.try_into()?,
             disks: disks.try_into()?,
             datasets: DatasetsEditor::new(datasets, preexisting_dataset_ids)?,
@@ -107,11 +326,9 @@ impl SledEditor {
     }
 
     pub fn new_empty(
-        state: SledState,
         preexisting_dataset_ids: DatasetIdsBackfillFromDb,
     ) -> Self {
         Self {
-            state,
             zones: ZonesEditor::empty(),
             disks: DisksEditor::empty(),
             datasets: DatasetsEditor::empty(preexisting_dataset_ids),
@@ -123,7 +340,7 @@ impl SledEditor {
         let (datasets, datasets_counts) = self.datasets.finalize();
         let (zones, zones_counts) = self.zones.finalize();
         EditedSled {
-            state: self.state,
+            state: SledState::Active,
             zones,
             disks,
             datasets,
@@ -135,6 +352,24 @@ impl SledEditor {
         }
     }
 
+    fn validate_decommisionable(&self) -> Result<(), SledEditError> {
+        // ... and all zones are expunged.
+        if let Some(zone) = self.zones(BlueprintZoneFilter::All).find(|zone| {
+            match zone.disposition {
+                BlueprintZoneDisposition::InService
+                | BlueprintZoneDisposition::Quiesced => true,
+                BlueprintZoneDisposition::Expunged => false,
+            }
+        }) {
+            return Err(SledEditError::NonDecommissionableZoneNotExpunged {
+                zone_id: zone.id,
+                kind: zone.zone_type.kind(),
+            });
+        }
+
+        Ok(())
+    }
+
     pub fn edit_counts(&self) -> SledEditCounts {
         SledEditCounts {
             disks: self.disks.edit_counts(),
@@ -143,10 +378,6 @@ impl SledEditor {
         }
     }
 
-    pub fn set_state(&mut self, new_state: SledState) {
-        self.state = new_state;
-    }
-
     pub fn disks(
         &self,
         filter: DiskFilter,
diff --git a/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor/datasets.rs b/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor/datasets.rs
index de397b9caa..211af59aab 100644
--- a/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor/datasets.rs
+++ b/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor/datasets.rs
@@ -7,7 +7,6 @@ use crate::planner::PlannerRng;
 use illumos_utils::zpool::ZpoolName;
 use nexus_types::deployment::BlueprintDatasetConfig;
 use nexus_types::deployment::BlueprintDatasetDisposition;
-use nexus_types::deployment::BlueprintDatasetFilter;
 use nexus_types::deployment::BlueprintDatasetsConfig;
 use nexus_types::deployment::SledResources;
 use nexus_types::deployment::ZpoolFilter;
@@ -24,6 +23,9 @@ use std::collections::BTreeMap;
 use std::collections::BTreeSet;
 use std::net::SocketAddrV6;
 
+#[cfg(test)]
+use nexus_types::deployment::BlueprintDatasetFilter;
+
 #[derive(Debug, thiserror::Error)]
 #[error(
     "invalid blueprint input: multiple datasets with kind {kind:?} \
@@ -269,7 +271,7 @@ impl DatasetsEditor {
         self.counts
     }
 
-    #[allow(dead_code)] // currently only used by tests; this will change soon
+    #[cfg(test)]
     pub fn datasets(
         &self,
         filter: BlueprintDatasetFilter,
diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs
index 56fc671667..24ef80b253 100644
--- a/nexus/reconfigurator/planning/src/planner.rs
+++ b/nexus/reconfigurator/planning/src/planner.rs
@@ -159,8 +159,7 @@ impl<'a> Planner<'a> {
             let num_instances_assigned = 0;
 
             if all_zones_expunged && num_instances_assigned == 0 {
-                self.blueprint
-                    .set_sled_state(sled_id, SledState::Decommissioned)?;
+                self.blueprint.set_sled_decommissioned(sled_id)?;
             }
         }
 
diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs
index bc451e2a76..cfc9f99851 100644
--- a/nexus/src/external_api/http_entrypoints.rs
+++ b/nexus/src/external_api/http_entrypoints.rs
@@ -81,7 +81,6 @@ use omicron_common::api::external::NameOrId;
 use omicron_common::api::external::Probe;
 use omicron_common::api::external::RouterRoute;
 use omicron_common::api::external::RouterRouteKind;
-use omicron_common::api::external::SupportBundleGetQueryParams;
 use omicron_common::api::external::SwitchPort;
 use omicron_common::api::external::SwitchPortSettings;
 use omicron_common::api::external::SwitchPortSettingsView;
@@ -6074,10 +6073,55 @@ impl NexusExternalApi for NexusExternalApiImpl {
             .await
     }
 
+    async fn support_bundle_index(
+        rqctx: RequestContext<Self::Context>,
+        _path_params: Path<params::SupportBundlePath>,
+    ) -> Result<Response<Body>, HttpError> {
+        let apictx = rqctx.context();
+        let handler = async {
+            let nexus = &apictx.context.nexus;
+
+            let opctx =
+                crate::context::op_context_for_external_api(&rqctx).await?;
+
+            Err(nexus
+                .unimplemented_todo(&opctx, crate::app::Unimpl::Public)
+                .await
+                .into())
+        };
+        apictx
+            .context
+            .external_latencies
+            .instrument_dropshot_handler(&rqctx, handler)
+            .await
+    }
+
     async fn support_bundle_download(
         rqctx: RequestContext<Self::Context>,
         _path_params: Path<params::SupportBundlePath>,
-        _body: TypedBody<SupportBundleGetQueryParams>,
+    ) -> Result<Response<Body>, HttpError> {
+        let apictx = rqctx.context();
+        let handler = async {
+            let nexus = &apictx.context.nexus;
+
+            let opctx =
+                crate::context::op_context_for_external_api(&rqctx).await?;
+
+            Err(nexus
+                .unimplemented_todo(&opctx, crate::app::Unimpl::Public)
+                .await
+                .into())
+        };
+        apictx
+            .context
+            .external_latencies
+            .instrument_dropshot_handler(&rqctx, handler)
+            .await
+    }
+
+    async fn support_bundle_download_file(
+        rqctx: RequestContext<Self::Context>,
+        _path_params: Path<params::SupportBundleFilePath>,
     ) -> Result<Response<Body>, HttpError> {
         let apictx = rqctx.context();
         let handler = async {
@@ -6101,7 +6145,29 @@ impl NexusExternalApi for NexusExternalApiImpl {
     async fn support_bundle_head(
         rqctx: RequestContext<Self::Context>,
         _path_params: Path<params::SupportBundlePath>,
-        _body: TypedBody<SupportBundleGetQueryParams>,
+    ) -> Result<Response<Body>, HttpError> {
+        let apictx = rqctx.context();
+        let handler = async {
+            let nexus = &apictx.context.nexus;
+
+            let opctx =
+                crate::context::op_context_for_external_api(&rqctx).await?;
+
+            Err(nexus
+                .unimplemented_todo(&opctx, crate::app::Unimpl::Public)
+                .await
+                .into())
+        };
+        apictx
+            .context
+            .external_latencies
+            .instrument_dropshot_handler(&rqctx, handler)
+            .await
+    }
+
+    async fn support_bundle_head_file(
+        rqctx: RequestContext<Self::Context>,
+        _path_params: Path<params::SupportBundleFilePath>,
     ) -> Result<Response<Body>, HttpError> {
         let apictx = rqctx.context();
         let handler = async {
diff --git a/nexus/tests/integration_tests/metrics.rs b/nexus/tests/integration_tests/metrics.rs
index 7e5441c16a..5e5268fb55 100644
--- a/nexus/tests/integration_tests/metrics.rs
+++ b/nexus/tests/integration_tests/metrics.rs
@@ -399,8 +399,7 @@ pub async fn execute_timeseries_query(
     if rsp.status.is_client_error() {
         let text = std::str::from_utf8(&rsp.body)
             .expect("Timeseries query response body should be UTF-8");
-        if text.contains("Schema for timeseries") && text.contains("not found")
-        {
+        if text.starts_with("Timeseries not found for: ") {
             return None;
         }
     }
diff --git a/nexus/tests/output/uncovered-authz-endpoints.txt b/nexus/tests/output/uncovered-authz-endpoints.txt
index 13fb389d6e..0a9e62707f 100644
--- a/nexus/tests/output/uncovered-authz-endpoints.txt
+++ b/nexus/tests/output/uncovered-authz-endpoints.txt
@@ -6,9 +6,12 @@ probe_view                               (get    "/experimental/v1/probes/{probe
 support_bundle_list                      (get    "/experimental/v1/system/support-bundles")
 support_bundle_view                      (get    "/experimental/v1/system/support-bundles/{support_bundle}")
 support_bundle_download                  (get    "/experimental/v1/system/support-bundles/{support_bundle}/download")
+support_bundle_download_file             (get    "/experimental/v1/system/support-bundles/{support_bundle}/download/{file}")
+support_bundle_index                     (get    "/experimental/v1/system/support-bundles/{support_bundle}/index")
 ping                                     (get    "/v1/ping")
 networking_switch_port_status            (get    "/v1/system/hardware/switch-port/{port}/status")
 support_bundle_head                      (head   "/experimental/v1/system/support-bundles/{support_bundle}/download")
+support_bundle_head_file                 (head   "/experimental/v1/system/support-bundles/{support_bundle}/download/{file}")
 device_auth_request                      (post   "/device/auth")
 device_auth_confirm                      (post   "/device/confirm")
 device_access_token                      (post   "/device/token")
diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs
index 4a4af8b052..4e616e698f 100644
--- a/nexus/types/src/external_api/params.rs
+++ b/nexus/types/src/external_api/params.rs
@@ -143,6 +143,15 @@ impl From<Name> for SiloSelector {
     }
 }
 
+#[derive(Serialize, Deserialize, JsonSchema)]
+pub struct SupportBundleFilePath {
+    #[serde(flatten)]
+    pub bundle: SupportBundlePath,
+
+    /// The file within the bundle to download
+    pub file: String,
+}
+
 #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq)]
 pub struct OptionalSiloSelector {
     /// Name or ID of the silo
diff --git a/openapi/nexus.json b/openapi/nexus.json
index fff479b9df..bc043059dd 100644
--- a/openapi/nexus.json
+++ b/openapi/nexus.json
@@ -390,7 +390,7 @@
         "tags": [
           "hidden"
         ],
-        "summary": "View a single support bundle",
+        "summary": "View a support bundle",
         "operationId": "support_bundle_view",
         "parameters": [
           {
@@ -460,7 +460,7 @@
         "tags": [
           "hidden"
         ],
-        "summary": "Download the contents of a single support bundle",
+        "summary": "Download the contents of a support bundle",
         "operationId": "support_bundle_download",
         "parameters": [
           {
@@ -474,16 +474,75 @@
             }
           }
         ],
-        "requestBody": {
-          "content": {
-            "application/json": {
-              "schema": {
-                "$ref": "#/components/schemas/SupportBundleGetQueryParams"
+        "responses": {
+          "default": {
+            "description": "",
+            "content": {
+              "*/*": {
+                "schema": {}
+              }
+            }
+          }
+        }
+      },
+      "head": {
+        "tags": [
+          "hidden"
+        ],
+        "summary": "Download the metadata of a support bundle",
+        "operationId": "support_bundle_head",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "support_bundle",
+            "description": "ID of the support bundle",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "format": "uuid"
+            }
+          }
+        ],
+        "responses": {
+          "default": {
+            "description": "",
+            "content": {
+              "*/*": {
+                "schema": {}
               }
             }
+          }
+        }
+      }
+    },
+    "/experimental/v1/system/support-bundles/{support_bundle}/download/{file}": {
+      "get": {
+        "tags": [
+          "hidden"
+        ],
+        "summary": "Download a file within a support bundle",
+        "operationId": "support_bundle_download_file",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "file",
+            "description": "The file within the bundle to download",
+            "required": true,
+            "schema": {
+              "type": "string"
+            }
           },
-          "required": true
-        },
+          {
+            "in": "path",
+            "name": "support_bundle",
+            "description": "ID of the support bundle",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "format": "uuid"
+            }
+          }
+        ],
         "responses": {
           "default": {
             "description": "",
@@ -499,9 +558,18 @@
         "tags": [
           "hidden"
         ],
-        "summary": "Download the metadata of a single support bundle",
-        "operationId": "support_bundle_head",
+        "summary": "Download the metadata of a file within the support bundle",
+        "operationId": "support_bundle_head_file",
         "parameters": [
+          {
+            "in": "path",
+            "name": "file",
+            "description": "The file within the bundle to download",
+            "required": true,
+            "schema": {
+              "type": "string"
+            }
+          },
           {
             "in": "path",
             "name": "support_bundle",
@@ -513,16 +581,37 @@
             }
           }
         ],
-        "requestBody": {
-          "content": {
-            "application/json": {
-              "schema": {
-                "$ref": "#/components/schemas/SupportBundleGetQueryParams"
+        "responses": {
+          "default": {
+            "description": "",
+            "content": {
+              "*/*": {
+                "schema": {}
               }
             }
-          },
-          "required": true
-        },
+          }
+        }
+      }
+    },
+    "/experimental/v1/system/support-bundles/{support_bundle}/index": {
+      "get": {
+        "tags": [
+          "hidden"
+        ],
+        "summary": "Download the index of a support bundle",
+        "operationId": "support_bundle_index",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "support_bundle",
+            "description": "ID of the support bundle",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "format": "uuid"
+            }
+          }
+        ],
         "responses": {
           "default": {
             "description": "",
@@ -20293,18 +20382,6 @@
           "items"
         ]
       },
-      "SupportBundleGetQueryParams": {
-        "description": "Query parameters for reading the support bundle",
-        "type": "object",
-        "properties": {
-          "query_type": {
-            "$ref": "#/components/schemas/SupportBundleQueryType"
-          }
-        },
-        "required": [
-          "query_type"
-        ]
-      },
       "SupportBundleInfo": {
         "type": "object",
         "properties": {
@@ -20354,60 +20431,6 @@
           "items"
         ]
       },
-      "SupportBundleQueryType": {
-        "description": "Describes the type of access to the support bundle",
-        "oneOf": [
-          {
-            "description": "Access the whole support bundle",
-            "type": "object",
-            "properties": {
-              "type": {
-                "type": "string",
-                "enum": [
-                  "whole"
-                ]
-              }
-            },
-            "required": [
-              "type"
-            ]
-          },
-          {
-            "description": "Access the names of all files within the support bundle",
-            "type": "object",
-            "properties": {
-              "type": {
-                "type": "string",
-                "enum": [
-                  "index"
-                ]
-              }
-            },
-            "required": [
-              "type"
-            ]
-          },
-          {
-            "description": "Access a specific file within the support bundle",
-            "type": "object",
-            "properties": {
-              "file_path": {
-                "type": "string"
-              },
-              "type": {
-                "type": "string",
-                "enum": [
-                  "path"
-                ]
-              }
-            },
-            "required": [
-              "file_path",
-              "type"
-            ]
-          }
-        ]
-      },
       "SupportBundleState": {
         "oneOf": [
           {
diff --git a/package-manifest.toml b/package-manifest.toml
index 809c1ce6ca..83b1ba8168 100644
--- a/package-manifest.toml
+++ b/package-manifest.toml
@@ -125,6 +125,7 @@ source.paths = [
   { from = "smf/nexus/{{rack-topology}}", to = "/var/svc/manifest/site/nexus" },
   { from = "out/console-assets", to = "/var/nexus/static" },
   { from = "schema/crdb", to = "/var/nexus/schema/crdb" },
+  { from = "tools/dtrace/nexus", to = "/opt/oxide/dtrace/nexus" },
 ]
 output.type = "zone"
 setup_hint = """
diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql
index 399b06570a..37380f6e43 100644
--- a/schema/crdb/dbinit.sql
+++ b/schema/crdb/dbinit.sql
@@ -2449,7 +2449,7 @@ CREATE UNIQUE INDEX IF NOT EXISTS one_bundle_per_dataset ON omicron.public.suppo
     dataset_id
 );
 
-CREATE INDEX IF NOT EXISTS lookup_bundle_by_neuxs ON omicron.public.support_bundle (
+CREATE INDEX IF NOT EXISTS lookup_bundle_by_nexus ON omicron.public.support_bundle (
     assigned_nexus
 );
 
diff --git a/schema/crdb/support-bundles/up04.sql b/schema/crdb/support-bundles/up04.sql
index 6df95640fd..58b903e500 100644
--- a/schema/crdb/support-bundles/up04.sql
+++ b/schema/crdb/support-bundles/up04.sql
@@ -1,4 +1,4 @@
-CREATE INDEX IF NOT EXISTS lookup_bundle_by_neuxs ON omicron.public.support_bundle (
+CREATE INDEX IF NOT EXISTS lookup_bundle_by_nexus ON omicron.public.support_bundle (
     assigned_nexus
 );
 
diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml
index 10b4ba1cdb..88c758dc31 100644
--- a/sled-agent/Cargo.toml
+++ b/sled-agent/Cargo.toml
@@ -81,6 +81,7 @@ sha3.workspace = true
 sled-agent-api.workspace = true
 sled-agent-client.workspace = true
 sled-agent-types.workspace = true
+sled-diagnostics.workspace = true
 sled-hardware.workspace = true
 sled-hardware-types.workspace = true
 sled-storage.workspace = true
diff --git a/sled-agent/src/backing_fs.rs b/sled-agent/src/backing_fs.rs
index 265c0152e2..d24f85ad80 100644
--- a/sled-agent/src/backing_fs.rs
+++ b/sled-agent/src/backing_fs.rs
@@ -150,6 +150,7 @@ pub(crate) fn ensure_backing_fs(
             true,  // do_format
             None,  // encryption_details,
             size_details,
+            None,
             Some(vec!["canmount=noauto".to_string()]), // options
         )?;
 
@@ -180,7 +181,7 @@ pub(crate) fn ensure_backing_fs(
 
         info!(log, "Mounting {} on {}", dataset, mountpoint);
 
-        Zfs::mount_overlay_dataset(&dataset, &mountpoint)?;
+        Zfs::mount_overlay_dataset(&dataset)?;
 
         if let Some(subdirs) = bfs.subdirs {
             for dir in subdirs {
diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs
index 5b89506242..19a63cb71d 100644
--- a/sled-agent/src/bootstrap/pre_server.rs
+++ b/sled-agent/src/bootstrap/pre_server.rs
@@ -293,6 +293,7 @@ fn ensure_zfs_ramdisk_dataset() -> Result<(), StartError> {
         encryption_details,
         quota,
         None,
+        None,
     )
     .map_err(StartError::EnsureZfsRamdiskDataset)
 }
diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs
index 844e13151a..88259537d2 100644
--- a/sled-agent/src/http_entrypoints.rs
+++ b/sled-agent/src/http_entrypoints.rs
@@ -6,7 +6,6 @@
 
 use super::sled_agent::SledAgent;
 use crate::sled_agent::Error as SledAgentError;
-use crate::support_bundle::queries::SupportBundleCommandHttpOutput;
 use crate::zone_bundle::BundleError;
 use bootstore::schemes::v0::NetworkConfig;
 use camino::Utf8PathBuf;
@@ -53,6 +52,7 @@ use sled_agent_types::zone_bundle::{
     BundleUtilization, CleanupContext, CleanupCount, CleanupPeriod,
     StorageLimit, ZoneBundleId, ZoneBundleMetadata,
 };
+use sled_diagnostics::SledDiagnosticsCommandHttpOutput;
 use std::collections::BTreeMap;
 
 type SledApiDescription = ApiDescription<SledAgent>;
diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs
index 80dbe72ea3..b9bf703933 100644
--- a/sled-agent/src/sled_agent.rs
+++ b/sled-agent/src/sled_agent.rs
@@ -19,10 +19,6 @@ use crate::params::OmicronZoneTypeExt;
 use crate::probe_manager::ProbeManager;
 use crate::services::{self, ServiceManager};
 use crate::storage_monitor::StorageMonitorHandle;
-use crate::support_bundle::queries::{
-    dladm_info, ipadm_info, zoneadm_info, SupportBundleCmdError,
-    SupportBundleCmdOutput,
-};
 use crate::support_bundle::storage::SupportBundleManager;
 use crate::updates::{ConfigUpdates, UpdateManager};
 use crate::vmm_reservoir::{ReservoirMode, VmmReservoirManager};
@@ -76,6 +72,7 @@ use sled_agent_types::zone_bundle::{
     BundleUtilization, CleanupContext, CleanupCount, CleanupPeriod,
     PriorityOrder, StorageLimit, ZoneBundleMetadata,
 };
+use sled_diagnostics::{SledDiagnosticsCmdError, SledDiagnosticsCmdOutput};
 use sled_hardware::{underlay, HardwareManager};
 use sled_hardware_types::underlay::BootstrapInterface;
 use sled_hardware_types::Baseboard;
@@ -1367,20 +1364,20 @@ impl SledAgent {
 
     pub(crate) async fn support_zoneadm_info(
         &self,
-    ) -> Result<SupportBundleCmdOutput, SupportBundleCmdError> {
-        zoneadm_info().await
+    ) -> Result<SledDiagnosticsCmdOutput, SledDiagnosticsCmdError> {
+        sled_diagnostics::zoneadm_info().await
     }
 
     pub(crate) async fn support_ipadm_info(
         &self,
-    ) -> Vec<Result<SupportBundleCmdOutput, SupportBundleCmdError>> {
-        ipadm_info().await
+    ) -> Vec<Result<SledDiagnosticsCmdOutput, SledDiagnosticsCmdError>> {
+        sled_diagnostics::ipadm_info().await
     }
 
     pub(crate) async fn support_dladm_info(
         &self,
-    ) -> Vec<Result<SupportBundleCmdOutput, SupportBundleCmdError>> {
-        dladm_info().await
+    ) -> Vec<Result<SledDiagnosticsCmdOutput, SledDiagnosticsCmdError>> {
+        sled_diagnostics::dladm_info().await
     }
 }
 
diff --git a/sled-agent/src/support_bundle/mod.rs b/sled-agent/src/support_bundle/mod.rs
index 314edfaec8..a1c4942751 100644
--- a/sled-agent/src/support_bundle/mod.rs
+++ b/sled-agent/src/support_bundle/mod.rs
@@ -2,5 +2,4 @@
 // License, v. 2.0. If a copy of the MPL was not distributed with this
 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
 
-pub mod queries;
 pub mod storage;
diff --git a/sled-diagnostics/.gitignore b/sled-diagnostics/.gitignore
new file mode 100644
index 0000000000..ea8c4bf7f3
--- /dev/null
+++ b/sled-diagnostics/.gitignore
@@ -0,0 +1 @@
+/target
diff --git a/sled-diagnostics/Cargo.toml b/sled-diagnostics/Cargo.toml
new file mode 100644
index 0000000000..98ac59b674
--- /dev/null
+++ b/sled-diagnostics/Cargo.toml
@@ -0,0 +1,13 @@
+[package]
+name = "sled-diagnostics"
+version = "0.1.0"
+edition = "2021"
+
+[lints]
+workspace = true
+
+[dependencies]
+futures.workspace = true
+omicron-workspace-hack.workspace = true
+thiserror.workspace = true
+tokio = { workspace = true, features = ["full"] }
diff --git a/sled-diagnostics/src/lib.rs b/sled-diagnostics/src/lib.rs
new file mode 100644
index 0000000000..466c1ec446
--- /dev/null
+++ b/sled-diagnostics/src/lib.rs
@@ -0,0 +1,52 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Diagnostics for an Oxide sled that exposes common support commands.
+
+use futures::{stream::FuturesUnordered, StreamExt};
+
+mod queries;
+pub use crate::queries::{
+    SledDiagnosticsCmdError, SledDiagnosticsCmdOutput,
+    SledDiagnosticsCommandHttpOutput,
+};
+use queries::*;
+
+/// List all zones on a sled.
+pub async fn zoneadm_info(
+) -> Result<SledDiagnosticsCmdOutput, SledDiagnosticsCmdError> {
+    execute_command_with_timeout(zoneadm_list(), DEFAULT_TIMEOUT).await
+}
+
+/// Retrieve various `ipadm` command output for the system.
+pub async fn ipadm_info(
+) -> Vec<Result<SledDiagnosticsCmdOutput, SledDiagnosticsCmdError>> {
+    [ipadm_show_interface(), ipadm_show_addr(), ipadm_show_prop()]
+        .into_iter()
+        .map(|c| async move {
+            execute_command_with_timeout(c, DEFAULT_TIMEOUT).await
+        })
+        .collect::<FuturesUnordered<_>>()
+        .collect::<Vec<Result<SledDiagnosticsCmdOutput, SledDiagnosticsCmdError>>>()
+        .await
+}
+
+/// Retrieve various `dladm` command output for the system.
+pub async fn dladm_info(
+) -> Vec<Result<SledDiagnosticsCmdOutput, SledDiagnosticsCmdError>> {
+    [
+        dladm_show_phys(),
+        dladm_show_ether(),
+        dladm_show_link(),
+        dladm_show_vnic(),
+        dladm_show_linkprop(),
+    ]
+        .into_iter()
+        .map(|c| async move {
+            execute_command_with_timeout(c, DEFAULT_TIMEOUT).await
+        })
+        .collect::<FuturesUnordered<_>>()
+        .collect::<Vec<Result<SledDiagnosticsCmdOutput, SledDiagnosticsCmdError>>>()
+        .await
+}
diff --git a/sled-agent/src/support_bundle/queries.rs b/sled-diagnostics/src/queries.rs
similarity index 69%
rename from sled-agent/src/support_bundle/queries.rs
rename to sled-diagnostics/src/queries.rs
index 2313d9e08d..9a66842cb2 100644
--- a/sled-agent/src/support_bundle/queries.rs
+++ b/sled-diagnostics/src/queries.rs
@@ -1,18 +1,27 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Wrapper for command execution with timeout.
+
 use std::{process::Command, time::Duration};
 
-use futures::{stream::FuturesUnordered, StreamExt};
-use illumos_utils::{dladm::DLADM, zone::IPADM, PFEXEC, ZONEADM};
 use thiserror::Error;
 use tokio::io::AsyncReadExt;
 
-const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
+const DLADM: &str = "/usr/sbin/dladm";
+const IPADM: &str = "/usr/sbin/ipadm";
+const PFEXEC: &str = "/usr/bin/pfexec";
+const ZONEADM: &str = "/usr/sbin/zoneadm";
+
+pub const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
 
-pub trait SupportBundleCommandHttpOutput {
+pub trait SledDiagnosticsCommandHttpOutput {
     fn get_output(self) -> String;
 }
 
 #[derive(Error, Debug)]
-pub enum SupportBundleCmdError {
+pub enum SledDiagnosticsCmdError {
     #[error("Failed to duplicate pipe for command [{command}]: {error}")]
     Dup { command: String, error: std::io::Error },
     #[error("Failed to proccess output for command [{command}]: {error}")]
@@ -32,13 +41,13 @@ pub enum SupportBundleCmdError {
 }
 
 #[derive(Debug)]
-pub struct SupportBundleCmdOutput {
+pub struct SledDiagnosticsCmdOutput {
     pub command: String,
     pub stdio: String,
     pub exit_status: String,
 }
 
-impl std::fmt::Display for SupportBundleCmdOutput {
+impl std::fmt::Display for SledDiagnosticsCmdOutput {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         writeln!(f, "Command executed [{}]:", self.command)?;
         writeln!(f, "    ==== stdio ====\n{}", self.stdio)?;
@@ -46,8 +55,8 @@ impl std::fmt::Display for SupportBundleCmdOutput {
     }
 }
 
-impl SupportBundleCommandHttpOutput
-    for Result<SupportBundleCmdOutput, SupportBundleCmdError>
+impl SledDiagnosticsCommandHttpOutput
+    for Result<SledDiagnosticsCmdOutput, SledDiagnosticsCmdError>
 {
     fn get_output(self) -> String {
         match self {
@@ -76,16 +85,19 @@ fn command_to_string(command: &Command) -> String {
 /// and stderr as they occur.
 async fn execute(
     cmd: Command,
-) -> Result<SupportBundleCmdOutput, SupportBundleCmdError> {
+) -> Result<SledDiagnosticsCmdOutput, SledDiagnosticsCmdError> {
     let cmd_string = command_to_string(&cmd);
     let (sender, mut rx) = tokio::net::unix::pipe::pipe().map_err(|e| {
-        SupportBundleCmdError::Pipe { command: cmd_string.clone(), error: e }
+        SledDiagnosticsCmdError::Pipe { command: cmd_string.clone(), error: e }
     })?;
     let pipe = sender.into_nonblocking_fd().map_err(|e| {
-        SupportBundleCmdError::OwnedFd { command: cmd_string.clone(), error: e }
+        SledDiagnosticsCmdError::OwnedFd {
+            command: cmd_string.clone(),
+            error: e,
+        }
     })?;
     let pipe_dup = pipe.try_clone().map_err(|e| {
-        SupportBundleCmdError::Dup { command: cmd_string.clone(), error: e }
+        SledDiagnosticsCmdError::Dup { command: cmd_string.clone(), error: e }
     })?;
 
     // TODO MTZ: We may eventually want to reuse some of the process contract
@@ -95,9 +107,8 @@ async fn execute(
     cmd.stdout(pipe);
     cmd.stderr(pipe_dup);
 
-    let mut child = cmd.spawn().map_err(|e| SupportBundleCmdError::Spawn {
-        command: cmd_string.clone(),
-        error: e,
+    let mut child = cmd.spawn().map_err(|e| {
+        SledDiagnosticsCmdError::Spawn { command: cmd_string.clone(), error: e }
     })?;
     // NB: This drop call is load-bearing and prevents a deadlock. The command
     // struct holds onto the write half of the pipe preventing the read side
@@ -107,85 +118,88 @@ async fn execute(
 
     let mut stdio = String::new();
     rx.read_to_string(&mut stdio).await.map_err(|e| {
-        SupportBundleCmdError::Output { command: cmd_string.clone(), error: e }
+        SledDiagnosticsCmdError::Output {
+            command: cmd_string.clone(),
+            error: e,
+        }
     })?;
 
     let exit_status =
         child.wait().await.map(|es| format!("{es}")).map_err(|e| {
-            SupportBundleCmdError::Wait {
+            SledDiagnosticsCmdError::Wait {
                 command: cmd_string.clone(),
                 error: e,
             }
         })?;
 
-    Ok(SupportBundleCmdOutput { command: cmd_string, stdio, exit_status })
+    Ok(SledDiagnosticsCmdOutput { command: cmd_string, stdio, exit_status })
 }
 
 /// Spawn a command that's allowed to execute within a given time limit.
-async fn execute_command_with_timeout(
+pub async fn execute_command_with_timeout(
     command: Command,
     duration: Duration,
-) -> Result<SupportBundleCmdOutput, SupportBundleCmdError> {
+) -> Result<SledDiagnosticsCmdOutput, SledDiagnosticsCmdError> {
     let cmd_string = command_to_string(&command);
     let tokio_command = execute(command);
     match tokio::time::timeout(duration, tokio_command).await {
         Ok(res) => res,
-        Err(_elapsed) => Err(SupportBundleCmdError::Timeout {
+        Err(_elapsed) => Err(SledDiagnosticsCmdError::Timeout {
             command: cmd_string,
             duration,
         }),
     }
 }
 
-fn zoneadm_list() -> Command {
+pub fn zoneadm_list() -> Command {
     let mut cmd = std::process::Command::new(PFEXEC);
     cmd.env_clear().arg(ZONEADM).arg("list").arg("-cip");
     cmd
 }
 
-fn ipadm_show_interface() -> Command {
+pub fn ipadm_show_interface() -> Command {
     let mut cmd = std::process::Command::new(PFEXEC);
     cmd.env_clear().arg(IPADM).arg("show-if");
     cmd
 }
 
-fn ipadm_show_addr() -> Command {
+pub fn ipadm_show_addr() -> Command {
     let mut cmd = std::process::Command::new(PFEXEC);
     cmd.env_clear().arg(IPADM).arg("show-addr");
     cmd
 }
 
-fn ipadm_show_prop() -> Command {
+pub fn ipadm_show_prop() -> Command {
     let mut cmd = std::process::Command::new(PFEXEC);
     cmd.env_clear().arg(IPADM).arg("show-prop");
     cmd
 }
 
-fn dladm_show_phys() -> Command {
+pub fn dladm_show_phys() -> Command {
     let mut cmd = std::process::Command::new(PFEXEC);
     cmd.env_clear().arg(DLADM).args(["show-phys", "-m"]);
     cmd
 }
 
-fn dladm_show_ether() -> Command {
+pub fn dladm_show_ether() -> Command {
     let mut cmd = std::process::Command::new(PFEXEC);
     cmd.env_clear().arg(DLADM).arg("show-ether");
     cmd
 }
 
-fn dladm_show_link() -> Command {
+pub fn dladm_show_link() -> Command {
     let mut cmd = std::process::Command::new(PFEXEC);
     cmd.env_clear().arg(DLADM).arg("show-link");
     cmd
 }
 
-fn dladm_show_vnic() -> Command {
+pub fn dladm_show_vnic() -> Command {
     let mut cmd = std::process::Command::new(PFEXEC);
     cmd.env_clear().arg(DLADM).arg("show-vnic");
     cmd
 }
 
-fn dladm_show_linkprop() -> Command {
+pub fn dladm_show_linkprop() -> Command {
     let mut cmd = std::process::Command::new(PFEXEC);
     cmd.env_clear().arg(DLADM).arg("show-linkprop");
     cmd
@@ -195,44 +209,6 @@ fn dladm_show_linkprop() -> Command {
  * Public API
  */
 
-/// List all zones on a sled.
-pub async fn zoneadm_info(
-) -> Result<SupportBundleCmdOutput, SupportBundleCmdError> {
-    execute_command_with_timeout(zoneadm_list(), DEFAULT_TIMEOUT).await
-}
-
-/// Retrieve various `ipadm` command output for the system.
-pub async fn ipadm_info(
-) -> Vec<Result<SupportBundleCmdOutput, SupportBundleCmdError>> {
-    [ipadm_show_interface(), ipadm_show_addr(), ipadm_show_prop()]
-        .into_iter()
-        .map(|c| async move {
-            execute_command_with_timeout(c, DEFAULT_TIMEOUT).await
-        })
-        .collect::<FuturesUnordered<_>>()
-        .collect::<Vec<Result<SupportBundleCmdOutput, SupportBundleCmdError>>>()
-        .await
-}
-
-/// Retrieve various `dladm` command output for the system.
-pub async fn dladm_info(
-) -> Vec<Result<SupportBundleCmdOutput, SupportBundleCmdError>> {
-    [
-        dladm_show_phys(),
-        dladm_show_ether(),
-        dladm_show_link(),
-        dladm_show_vnic(),
-        dladm_show_linkprop(),
-    ]
-        .into_iter()
-        .map(|c| async move {
-            execute_command_with_timeout(c, DEFAULT_TIMEOUT).await
-        })
-        .collect::<FuturesUnordered<_>>()
-        .collect::<Vec<Result<SupportBundleCmdOutput, SupportBundleCmdError>>>()
-        .await
-}
-
 #[cfg(test)]
 mod test {
     use super::*;
@@ -246,7 +222,7 @@ mod test {
         match execute_command_with_timeout(command, Duration::from_millis(500))
             .await
         {
-            Err(SupportBundleCmdError::Timeout { .. }) => (),
+            Err(SledDiagnosticsCmdError::Timeout { .. }) => (),
             _ => panic!("command should have timed out"),
         }
     }
@@ -267,7 +243,7 @@ mod test {
     #[tokio::test]
     async fn test_command_stderr_is_correct() {
         let mut command = Command::new("bash");
-        command.env_clear().args(&["-c", "echo oxide computer > /dev/stderr"]);
+        command.env_clear().args(["-c", "echo oxide computer > /dev/stderr"]);
 
         let res = execute_command_with_timeout(command, Duration::from_secs(5))
             .await
@@ -279,7 +255,7 @@ mod test {
     #[tokio::test]
     async fn test_command_stdout_stderr_are_interleaved() {
         let mut command = Command::new("bash");
-        command.env_clear().args(&[
+        command.env_clear().args([
             "-c",
             "echo one > /dev/stdout \
             && echo two > /dev/stderr \
diff --git a/sled-storage/Cargo.toml b/sled-storage/Cargo.toml
index 2439c52aa7..d47a89e0ae 100644
--- a/sled-storage/Cargo.toml
+++ b/sled-storage/Cargo.toml
@@ -28,6 +28,7 @@ sled-hardware.workspace = true
 slog.workspace = true
 thiserror.workspace = true
 tokio.workspace = true
+tokio-stream.workspace = true
 uuid.workspace = true
 omicron-workspace-hack.workspace = true
 
diff --git a/sled-storage/src/dataset.rs b/sled-storage/src/dataset.rs
index e90a4c0092..717595ad9e 100644
--- a/sled-storage/src/dataset.rs
+++ b/sled-storage/src/dataset.rs
@@ -269,6 +269,7 @@ pub(crate) async fn ensure_zpool_has_datasets(
             Some(encryption_details),
             None,
             None,
+            None,
         );
 
         keyfile.zero_and_unlink().await.map_err(|error| {
@@ -331,6 +332,7 @@ pub(crate) async fn ensure_zpool_has_datasets(
             encryption_details,
             size_details,
             None,
+            None,
         )?;
 
         if dataset.wipe {
diff --git a/sled-storage/src/error.rs b/sled-storage/src/error.rs
index 351dd7f353..f00e35e654 100644
--- a/sled-storage/src/error.rs
+++ b/sled-storage/src/error.rs
@@ -77,6 +77,9 @@ pub enum Error {
     #[error("Not ready to manage U.2s (key manager is not ready)")]
     KeyManagerNotReady,
 
+    #[error("Physical disk configuration changed for the same generation number: {generation}")]
+    PhysicalDiskConfigurationChanged { generation: Generation },
+
     #[error("Physical disk configuration out-of-date (asked for {requested}, but latest is {current})")]
     PhysicalDiskConfigurationOutdated {
         requested: Generation,
diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index a760285d3f..b1832ca92b 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -14,7 +14,9 @@ use camino::Utf8Path;
 use camino::Utf8PathBuf;
 use debug_ignore::DebugIgnore;
 use futures::future::FutureExt;
-use illumos_utils::zfs::{DatasetProperties, Mountpoint, Zfs};
+use futures::Stream;
+use futures::StreamExt;
+use illumos_utils::zfs::{DatasetProperties, Mountpoint, WhichDatasets, Zfs};
 use illumos_utils::zpool::{ZpoolName, ZPOOL_MOUNTPOINT_ROOT};
 use key_manager::StorageKeyRequester;
 use omicron_common::disk::{
@@ -26,6 +28,7 @@ use omicron_common::ledger::Ledger;
 use omicron_uuid_kinds::DatasetUuid;
 use omicron_uuid_kinds::GenericUuid;
 use slog::{error, info, o, warn, Logger};
+use std::collections::BTreeMap;
 use std::collections::HashSet;
 use std::future::Future;
 use tokio::sync::{mpsc, oneshot, watch};
@@ -929,21 +932,107 @@ impl StorageManager {
     // includes details about all possible errors that may occur on
     // a per-dataset granularity.
     async fn datasets_ensure_internal(
-        &mut self,
+        &self,
         log: &Logger,
         config: &DatasetsConfig,
     ) -> DatasetsManagementResult {
-        let mut status = vec![];
-        for dataset in config.datasets.values() {
-            status.push(self.dataset_ensure_internal(log, dataset).await);
-        }
+        // Gather properties about these datasets, if they exist.
+        //
+        // This pre-fetching lets us avoid individually querying them later.
+        let old_datasets = Zfs::get_dataset_properties(
+            config
+                .datasets
+                .values()
+                .map(|config| config.name.full_name())
+                .collect::<Vec<String>>()
+                .as_slice(),
+            WhichDatasets::SelfOnly,
+        )
+        .unwrap_or_default()
+        .into_iter()
+        .map(|props| (props.name.clone(), props))
+        .collect::<BTreeMap<String, _>>();
+
+        let futures = config.datasets.values().map(|dataset| async {
+            self.dataset_ensure_internal(
+                log,
+                dataset,
+                old_datasets.get(&dataset.name.full_name()),
+            )
+            .await
+        });
+
+        // This "Box::pin" is a workaround for: https://github.com/rust-lang/rust/issues/64552
+        //
+        // Ideally, we would just use:
+        //
+        // ```
+        // let status: Vec<_> = futures::stream::iter(futures)
+        //      .buffered(...)
+        //      .collect()
+        //      .await;
+        // ```
+        const DATASET_ENSURE_CONCURRENCY_LIMIT: usize = 16;
+        let results: std::pin::Pin<Box<dyn Stream<Item = _> + Send>> = Box::pin(
+            futures::stream::iter(futures)
+                .buffered(DATASET_ENSURE_CONCURRENCY_LIMIT),
+        );
+
+        let status: Vec<DatasetManagementStatus> = results.collect().await;
+
         DatasetsManagementResult { status }
     }
 
+    fn should_skip_dataset_ensure(
+        log: &Logger,
+        config: &DatasetConfig,
+        old_dataset: Option<&DatasetProperties>,
+    ) -> Result<bool, Error> {
+        let Some(old_dataset) = old_dataset else {
+            info!(log, "This dataset did not exist");
+            return Ok(false);
+        };
+
+        let Some(old_id) = old_dataset.id else {
+            info!(log, "Old properties missing UUID");
+            return Ok(false);
+        };
+
+        if old_id != config.id {
+            return Err(Error::UuidMismatch {
+                name: config.name.full_name(),
+                old: old_id.into_untyped_uuid(),
+                new: config.id.into_untyped_uuid(),
+            });
+        }
+
+        let old_props = match SharedDatasetConfig::try_from(old_dataset) {
+            Ok(old_props) => old_props,
+            Err(err) => {
+                warn!(log, "Failed to parse old properties"; "err" => #%err);
+                return Ok(false);
+            }
+        };
+
+        info!(log, "Parsed old dataset properties"; "props" => ?old_props);
+        if old_props != config.inner {
+            info!(
+                log,
+                "Dataset properties changed";
+                "old_props" => ?old_props,
+                "requested_props" => ?config.inner,
+            );
+            return Ok(false);
+        }
+        info!(log, "No changes necessary, returning early");
+        return Ok(true);
+    }
+
     async fn dataset_ensure_internal(
-        &mut self,
+        &self,
         log: &Logger,
         config: &DatasetConfig,
+        old_dataset: Option<&DatasetProperties>,
     ) -> DatasetManagementStatus {
         let log = log.new(o!("name" => config.name.full_name()));
         info!(log, "Ensuring dataset");
@@ -952,6 +1041,15 @@ impl StorageManager {
             err: None,
         };
 
+        match Self::should_skip_dataset_ensure(&log, config, old_dataset) {
+            Ok(true) => return status,
+            Ok(false) => (),
+            Err(err) => {
+                status.err = Some(err.to_string());
+                return status;
+            }
+        }
+
         let mountpoint_root = &self.resources.disks().mount_config().root;
         let mountpoint_path = config.name.mountpoint(mountpoint_root);
         let details = DatasetCreationDetails {
@@ -961,9 +1059,9 @@ impl StorageManager {
         };
 
         if let Err(err) = self
-            .ensure_dataset_with_id(
+            .ensure_dataset(
                 config.name.pool(),
-                config.id,
+                Some(config.id),
                 &config.inner,
                 &details,
             )
@@ -1017,8 +1115,11 @@ impl StorageManager {
         ];
 
         info!(log, "Listing datasets within zpool"; "zpool" => zpool.to_string());
-        Zfs::get_dataset_properties(datasets_of_interest.as_slice())
-            .map_err(Error::Other)
+        Zfs::get_dataset_properties(
+            datasets_of_interest.as_slice(),
+            WhichDatasets::SelfAndChildren,
+        )
+        .map_err(Error::Other)
     }
 
     // Ensures that a dataset exists, nested somewhere arbitrary within
@@ -1039,8 +1140,13 @@ impl StorageManager {
             full_name: config.name.full_name(),
         };
 
-        self.ensure_dataset(config.name.root.pool(), &config.inner, &details)
-            .await?;
+        self.ensure_dataset(
+            config.name.root.pool(),
+            None,
+            &config.inner,
+            &details,
+        )
+        .await?;
 
         Ok(())
     }
@@ -1073,15 +1179,18 @@ impl StorageManager {
         info!(log, "Listing nested datasets");
 
         let full_name = name.full_name();
-        let properties =
-            Zfs::get_dataset_properties(&[full_name]).map_err(|e| {
-                warn!(
-                    log,
-                    "Failed to access nested dataset";
-                    "name" => ?name
-                );
-                crate::dataset::DatasetError::Other(e)
-            })?;
+        let properties = Zfs::get_dataset_properties(
+            &[full_name],
+            WhichDatasets::SelfAndChildren,
+        )
+        .map_err(|e| {
+            warn!(
+                log,
+                "Failed to access nested dataset";
+                "name" => ?name
+            );
+            crate::dataset::DatasetError::Other(e)
+        })?;
 
         let root_path = name.root.full_name();
         Ok(properties
@@ -1159,12 +1268,29 @@ impl StorageManager {
                         requested: config.generation,
                         current: ledger_data.generation,
                     });
-                }
-
-                // TODO: If the generation is equal, check that the values are
-                // also equal.
+                } else if config.generation == ledger_data.generation {
+                    info!(
+                        log,
+                        "Requested generation number matches prior request",
+                    );
 
-                info!(log, "Request looks newer than prior requests");
+                    if ledger_data != &config {
+                        error!(
+                            log,
+                            "Requested configuration changed (with the same generation)";
+                            "generation" => ?config.generation
+                        );
+                        return Err(Error::PhysicalDiskConfigurationChanged {
+                            generation: config.generation,
+                        });
+                    }
+                    info!(
+                        log,
+                        "Request looks identical to last request, re-sending"
+                    );
+                } else {
+                    info!(log, "Request looks newer than prior requests");
+                }
                 ledger
             }
             None => {
@@ -1291,40 +1417,13 @@ impl StorageManager {
         }
     }
 
-    // Invokes [Self::ensure_dataset] and also ensures the dataset has an
-    // expected UUID as a ZFS property.
-    async fn ensure_dataset_with_id(
-        &mut self,
-        zpool: &ZpoolName,
-        id: DatasetUuid,
-        config: &SharedDatasetConfig,
-        details: &DatasetCreationDetails,
-    ) -> Result<(), Error> {
-        self.ensure_dataset(zpool, config, details).await?;
-
-        // Ensure the dataset has a usable UUID.
-        if let Ok(id_str) = Zfs::get_oxide_value(&details.full_name, "uuid") {
-            if let Ok(found_id) = id_str.parse::<DatasetUuid>() {
-                if found_id != id {
-                    return Err(Error::UuidMismatch {
-                        name: details.full_name.clone(),
-                        old: found_id.into_untyped_uuid(),
-                        new: id.into_untyped_uuid(),
-                    });
-                }
-                return Ok(());
-            }
-        }
-        Zfs::set_oxide_value(&details.full_name, "uuid", &id.to_string())?;
-        Ok(())
-    }
-
     // Ensures a dataset exists within a zpool.
     //
     // Confirms that the zpool exists and is managed by this sled.
     async fn ensure_dataset(
-        &mut self,
+        &self,
         zpool: &ZpoolName,
+        dataset_id: Option<DatasetUuid>,
         config: &SharedDatasetConfig,
         details: &DatasetCreationDetails,
     ) -> Result<(), Error> {
@@ -1365,6 +1464,7 @@ impl StorageManager {
             do_format,
             encryption_details,
             size_details,
+            dataset_id,
             None,
         )?;
 
@@ -1401,6 +1501,7 @@ impl StorageManager {
             do_format,
             encryption_details,
             size_details,
+            Some(DatasetUuid::from_untyped_uuid(request.dataset_id)),
             None,
         )?;
         // Ensure the dataset has a usable UUID.
@@ -1725,7 +1826,7 @@ mod tests {
         );
         let mut harness = StorageManagerTestHarness::new(&logctx.log).await;
 
-        // Queue up a disks, as we haven't told the `StorageManager` that
+        // Queue up a disk, as we haven't told the `StorageManager` that
         // the `KeyManager` is ready yet.
         let raw_disks =
             harness.add_vdevs(&["u2_under_test.vdev", "m2_helping.vdev"]).await;
@@ -1996,6 +2097,111 @@ mod tests {
         logctx.cleanup_successful();
     }
 
+    #[tokio::test]
+    async fn ensure_many_datasets() {
+        illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst);
+        let logctx = test_setup_log("ensure_many_datasets");
+        let mut harness = StorageManagerTestHarness::new(&logctx.log).await;
+
+        // Test setup: Add U.2s and an M.2, adopt them into the "control plane"
+        // for usage.
+        harness.handle().key_manager_ready().await;
+        let raw_disks = harness
+            .add_vdevs(&[
+                "u2_0.vdev",
+                "u2_1.vdev",
+                "u2_2.vdev",
+                "u2_3.vdev",
+                "u2_4.vdev",
+                "u2_5.vdev",
+                "u2_6.vdev",
+                "u2_7.vdev",
+                "u2_8.vdev",
+                "u2_9.vdev",
+                "m2_helping.vdev",
+            ])
+            .await;
+        let config = harness.make_config(1, &raw_disks);
+
+        let result = harness
+            .handle()
+            .omicron_physical_disks_ensure(config.clone())
+            .await
+            .expect("Ensuring disks should work after key manager is ready");
+        assert!(!result.has_error(), "{:?}", result);
+
+        let result = harness
+            .handle()
+            .omicron_physical_disks_ensure(config.clone())
+            .await
+            .expect("Ensuring disks should work after key manager is ready");
+        assert!(!result.has_error(), "{:?}", result);
+
+        // Create datasets on the newly formatted U.2s
+        let mut datasets = BTreeMap::new();
+        for i in 0..10 {
+            let zpool_name = ZpoolName::new_external(config.disks[i].pool_id);
+
+            let id = DatasetUuid::new_v4();
+            let name =
+                DatasetName::new(zpool_name.clone(), DatasetKind::Crucible);
+            datasets.insert(
+                id,
+                DatasetConfig {
+                    id,
+                    name,
+                    inner: SharedDatasetConfig::default(),
+                },
+            );
+
+            let id = DatasetUuid::new_v4();
+            let name = DatasetName::new(zpool_name.clone(), DatasetKind::Debug);
+            datasets.insert(
+                id,
+                DatasetConfig {
+                    id,
+                    name,
+                    inner: SharedDatasetConfig::default(),
+                },
+            );
+
+            let id = DatasetUuid::new_v4();
+            let name = DatasetName::new(
+                zpool_name.clone(),
+                DatasetKind::TransientZoneRoot,
+            );
+            datasets.insert(
+                id,
+                DatasetConfig {
+                    id,
+                    name,
+                    inner: SharedDatasetConfig::default(),
+                },
+            );
+        }
+        // "Generation = 1" is reserved as "no requests seen yet", so we jump
+        // past it.
+        let generation = Generation::new().next();
+        let config = DatasetsConfig { generation, datasets };
+
+        let status =
+            harness.handle().datasets_ensure(config.clone()).await.unwrap();
+        assert!(!status.has_error());
+
+        // List datasets, expect to see what we just created
+        let observed_config =
+            harness.handle().datasets_config_list().await.unwrap();
+        assert_eq!(config, observed_config);
+
+        // Calling "datasets_ensure" with the same input should succeed.
+        let status =
+            harness.handle().datasets_ensure(config.clone()).await.unwrap();
+        assert!(!status.has_error());
+
+        harness.cleanup().await;
+        logctx.cleanup_successful();
+    }
+
     #[tokio::test]
     async fn nested_dataset() {
         illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst);
diff --git a/smf/chrony-setup/manifest.xml b/smf/chrony-setup/manifest.xml
index 7882693495..8127ca80df 100644
--- a/smf/chrony-setup/manifest.xml
+++ b/smf/chrony-setup/manifest.xml
@@ -14,7 +14,7 @@
   <exec_method type='method' name='start'
     exec='/opt/oxide/zone-setup-cli/bin/zone-setup chrony-setup -b %{config/boundary} -p %{config/boundary_pool} -s %{config/server} -a %{config/allow}'
     timeout_seconds='0'>
-  <method_context security_flags="aslr">
+  <method_context security_flags="aslr" working_directory="/">
   <method_credential user="root" group="root"
     privileges="basic,file_chown" />
   </method_context>
diff --git a/smf/cockroachdb/manifest.xml b/smf/cockroachdb/manifest.xml
index 67ddbe48b8..6ed18b1bca 100644
--- a/smf/cockroachdb/manifest.xml
+++ b/smf/cockroachdb/manifest.xml
@@ -19,7 +19,7 @@
   <exec_method type='method' name='start'
     exec='/opt/oxide/lib/svc/manifest/cockroachdb.sh'
     timeout_seconds='0'>
-    <method_context>
+    <method_context working_directory="/">
       <method_environment>
         <envvar name="GOTRACEBACK" value="crash" />
       </method_environment>
diff --git a/smf/ntp/manifest/manifest.xml b/smf/ntp/manifest/manifest.xml
index 5b45406a47..8fa1b4ebc4 100644
--- a/smf/ntp/manifest/manifest.xml
+++ b/smf/ntp/manifest/manifest.xml
@@ -66,7 +66,7 @@
   <exec_method type="method" name="start"
     exec='/usr/sbin/chronyd -d &amp;'
     timeout_seconds="60">
-  <method_context security_flags="aslr">
+  <method_context security_flags="aslr" working_directory="/">
   <method_credential user="root" group="root"
     privileges="basic,!file_link_any,!proc_info,!proc_session,file_chown_self,file_dac_search,file_dac_write,net_privaddr,proc_lock_memory,proc_priocntl,proc_setid,sys_time" />
   </method_context>
diff --git a/tools/console_version b/tools/console_version
index 08078c264e..85ca41f755 100644
--- a/tools/console_version
+++ b/tools/console_version
@@ -1,2 +1,2 @@
-COMMIT="927c8b63a6f97c230cd8766a80fa1cfef6429eb4"
-SHA2="96550b6e485aaee1c6ced00a4a1aeec86267c99fc79a4b2b253141cf0222d346"
+COMMIT="c1ebd8d9acae4ff7a09b2517265fba52ebdfe82e"
+SHA2="840dbfda1c0def66212e7602d7be6e8acf1b26ba218f10ce3e627df49f5ce9e2"
diff --git a/tools/dtrace/aggregate-query-latency.d b/tools/dtrace/nexus/aggregate-query-latency.d
similarity index 100%
rename from tools/dtrace/aggregate-query-latency.d
rename to tools/dtrace/nexus/aggregate-query-latency.d
diff --git a/tools/dtrace/slowest-queries.d b/tools/dtrace/nexus/slowest-queries.d
similarity index 99%
rename from tools/dtrace/slowest-queries.d
rename to tools/dtrace/nexus/slowest-queries.d
index 76e22de22f..08b40d4d79 100755
--- a/tools/dtrace/slowest-queries.d
+++ b/tools/dtrace/nexus/slowest-queries.d
@@ -35,7 +35,7 @@ diesel_db$target:::query-done
     query[this->conn_id] = NULL;
 }
 
-tick-5s
+tick-10s
 {
     printf("\n%Y\n", walltimestamp);
     trunc(@, 5);
diff --git a/tools/dtrace/trace-db-queries.d b/tools/dtrace/nexus/trace-db-queries.d
similarity index 100%
rename from tools/dtrace/trace-db-queries.d
rename to tools/dtrace/nexus/trace-db-queries.d
diff --git a/tools/dtrace/nexus/trace-transactions.d b/tools/dtrace/nexus/trace-transactions.d
new file mode 100755
index 0000000000..baf7818f72
--- /dev/null
+++ b/tools/dtrace/nexus/trace-transactions.d
@@ -0,0 +1,67 @@
+#!/usr/sbin/dtrace -qs
+
+/* Trace all transactions to the control plane database with their latency */
+
+dtrace:::BEGIN
+{
+    printf("Tracing all database transactions for nexus PID %d, use Ctrl-C to exit\n", $target);
+}
+
+/*
+ * Record the start and number of statements for each transaction.
+ *
+ * Note that we're using the Nexus-provided transaction start / done probes.
+ * This lets us associate the other data we might collect (number of statements,
+ * ustacks, etc) with the Nexus code itself. Although there are transaction
+ * start / done probes in `diesel-dtrace`, the existing way we run transactions
+ * with `async-bb8-diesel` involves spawning a future to run the transactions on
+ * a blocking thread-pool. That spawning makes it impossible to associate the
+ * context in which the `diesel-dtrace` probes fire with the Nexus code that
+ * actually spawned the transaction itself.
+ */
+nexus_db_queries$target:::transaction-start
+{
+    this->key = copyinstr(arg0);
+    transaction_names[this->key] = copyinstr(arg1);
+    ts[this->key] = timestamp;
+    n_statements[this->key] = 0;
+    printf(
+        "Started transaction '%s' on conn %s\n",
+        transaction_names[this->key],
+        json(this->key, "ok")
+    );
+}
+
+/*
+ * When a query runs in the context of a transaction (on the same connection),
+ * bump the statement counter.
+ */
+diesel_db$target:::query-start
+/ts[copyinstr(arg1)]/
+{
+    n_statements[copyinstr(arg1)] += 1
+}
+
+/*
+ * As transactions complete, print the number of statements we ran and the
+ * duration.
+ */
+nexus_db_queries$target:::transaction-done
+/ts[copyinstr(arg0)]/
+{
+    this->key = copyinstr(arg0);
+    this->conn_id = json(this->key, "ok");
+    this->latency = (timestamp - ts[this->key]) / 1000;
+    this->n_statements = n_statements[this->key];
+    printf(
+        "%s %d statement(s) in transaction '%s' on connection %s (%d us)\n",
+        arg2 ? "COMMIT" : "ROLLBACK",
+        n_statements[this->key],
+        transaction_names[this->key],
+        this->conn_id,
+        this->latency
+    );
+    ts[this->key] = 0;
+    n_statements[this->key] = 0;
+    transaction_names[this->key] = 0;
+}
diff --git a/tools/permslip_production b/tools/permslip_production
index ce73c3c6da..5c84faad8f 100644
--- a/tools/permslip_production
+++ b/tools/permslip_production
@@ -1,2 +1,2 @@
 a72a5f931bcfd3d931df407fbbba6d851165c4637adf39568a94f755966b6c9c manifest-oxide-rot-1-v1.0.30.toml
-610ebce44b1fb622eb56591534fb2569340fdba9b5ba62ca1b02f0b2d2e973dc manifest-bootleby-v1.3.1.toml
+9d5faa910e8e8e7aaeb74df972badcdf371615d4bbabdb9ddccf4d0d32517f7d manifest-bootleby-v1.3.3.toml
diff --git a/tools/permslip_staging b/tools/permslip_staging
index 9c413ddc6e..146a9d615e 100644
--- a/tools/permslip_staging
+++ b/tools/permslip_staging
@@ -1,5 +1,5 @@
 c33a381e716127e05da928c39b3a4d5f5278e43f526ff8c5c817708c378a5c87 manifest-gimlet-v1.0.32.toml
 2cda350adba506b3ab67813db932d07c7a7836b5731d5351e57d49302f41dbf4 manifest-oxide-rot-1-v1.0.30.toml
 70de21757b47e3e6c15d4c8701efe80e8cc90125afdd2883ff160045aed20956 manifest-psc-v1.0.31.toml
-499ee08eb77ed3600564239f3f3efdcf79f122ffc4b93b168790c24358ae1e3c manifest-sidecar-v1.0.31.toml
-6f8459afe22c27d5920356878e4d8d639464f39a15ce7b5b040c2d908d52a570 manifest-bootleby-v1.3.1.toml
+222ae9df38699037b75e98eb7a8b441f6cda958b8a79e57e72e410b054f1d8eb manifest-sidecar-v1.0.32.toml
+14c20540fe785dea65ef03446d5c4665a5f3d9106eb176691b35646faa54f61f manifest-bootleby-v1.3.3.toml
diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml
index b0bf8858d5..678170b25e 100644
--- a/workspace-hack/Cargo.toml
+++ b/workspace-hack/Cargo.toml
@@ -62,15 +62,15 @@ gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway-
 generic-array = { version = "0.14.7", default-features = false, features = ["more_lengths", "zeroize"] }
 getrandom = { version = "0.2.15", default-features = false, features = ["js", "rdrand", "std"] }
 group = { version = "0.13.0", default-features = false, features = ["alloc"] }
-hashbrown = { version = "0.15.0" }
+hashbrown = { version = "0.15.1" }
+heck = { version = "0.4.1" }
 hex = { version = "0.4.3", features = ["serde"] }
 hickory-proto = { version = "0.24.1", features = ["text-parsing"] }
 hmac = { version = "0.12.1", default-features = false, features = ["reset"] }
 hyper = { version = "1.5.0", features = ["full"] }
 indexmap = { version = "2.6.0", features = ["serde"] }
 inout = { version = "0.1.3", default-features = false, features = ["std"] }
-itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12.1" }
-itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10.5" }
+itertools = { version = "0.10.5" }
 lalrpop-util = { version = "0.19.12" }
 lazy_static = { version = "1.5.0", default-features = false, features = ["spin_no_std"] }
 libc = { version = "0.2.162", features = ["extra_traits"] }
@@ -101,7 +101,7 @@ regex-automata = { version = "0.4.8", default-features = false, features = ["dfa
 regex-syntax = { version = "0.8.5" }
 reqwest = { version = "0.12.9", features = ["blocking", "cookies", "json", "rustls-tls", "stream"] }
 rsa = { version = "0.9.6", features = ["serde", "sha2"] }
-rustls = { version = "0.23.14", features = ["ring"] }
+rustls = { version = "0.23.19", features = ["ring"] }
 rustls-webpki = { version = "0.102.8", default-features = false, features = ["aws_lc_rs", "ring", "std"] }
 schemars = { version = "0.8.21", features = ["bytes", "chrono", "uuid1"] }
 scopeguard = { version = "1.2.0" }
@@ -182,15 +182,15 @@ gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway-
 generic-array = { version = "0.14.7", default-features = false, features = ["more_lengths", "zeroize"] }
 getrandom = { version = "0.2.15", default-features = false, features = ["js", "rdrand", "std"] }
 group = { version = "0.13.0", default-features = false, features = ["alloc"] }
-hashbrown = { version = "0.15.0" }
+hashbrown = { version = "0.15.1" }
+heck = { version = "0.4.1" }
 hex = { version = "0.4.3", features = ["serde"] }
 hickory-proto = { version = "0.24.1", features = ["text-parsing"] }
 hmac = { version = "0.12.1", default-features = false, features = ["reset"] }
 hyper = { version = "1.5.0", features = ["full"] }
 indexmap = { version = "2.6.0", features = ["serde"] }
 inout = { version = "0.1.3", default-features = false, features = ["std"] }
-itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12.1" }
-itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10.5" }
+itertools = { version = "0.10.5" }
 lalrpop-util = { version = "0.19.12" }
 lazy_static = { version = "1.5.0", default-features = false, features = ["spin_no_std"] }
 libc = { version = "0.2.162", features = ["extra_traits"] }
@@ -221,7 +221,7 @@ regex-automata = { version = "0.4.8", default-features = false, features = ["dfa
 regex-syntax = { version = "0.8.5" }
 reqwest = { version = "0.12.9", features = ["blocking", "cookies", "json", "rustls-tls", "stream"] }
 rsa = { version = "0.9.6", features = ["serde", "sha2"] }
-rustls = { version = "0.23.14", features = ["ring"] }
+rustls = { version = "0.23.19", features = ["ring"] }
 rustls-webpki = { version = "0.102.8", default-features = false, features = ["aws_lc_rs", "ring", "std"] }
 schemars = { version = "0.8.21", features = ["bytes", "chrono", "uuid1"] }
 scopeguard = { version = "1.2.0" }