diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs
index 73ccec996cb..47c567dbe0e 100644
--- a/common/src/nexus_config.rs
+++ b/common/src/nexus_config.rs
@@ -372,6 +372,8 @@ pub struct PackageConfig {
     pub dendrite: HashMap<SwitchLocation, DpdConfig>,
     /// Background task configuration
     pub background_tasks: BackgroundTaskConfig,
+    /// Default Crucible region allocation strategy
+    pub default_region_allocation_strategy: RegionAllocationStrategy,
 }
 
 #[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
@@ -594,6 +596,9 @@ mod test {
             dns_external.period_secs_propagation = 7
             dns_external.max_concurrent_server_updates = 8
             external_endpoints.period_secs = 9
+            [default_region_allocation_strategy]
+            type = "random"
+            seed = 0
             "##,
         )
         .unwrap();
@@ -677,6 +682,10 @@ mod test {
                             period_secs: Duration::from_secs(9),
                         }
                     },
+                    default_region_allocation_strategy:
+                        crate::nexus_config::RegionAllocationStrategy::Random {
+                            seed: Some(0)
+                        }
                 },
             }
         );
@@ -724,6 +733,8 @@ mod test {
             dns_external.period_secs_propagation = 7
             dns_external.max_concurrent_server_updates = 8
             external_endpoints.period_secs = 9
+            [default_region_allocation_strategy]
+            type = "random"
             "##,
         )
         .unwrap();
@@ -894,3 +905,30 @@ mod test {
         );
     }
 }
+
+/// Defines a strategy for choosing what physical disks to use when allocating
+/// new crucible regions.
+///
+/// NOTE: More strategies can - and should! - be added.
+///
+/// See <https://rfd.shared.oxide.computer/rfd/0205> for a more
+/// complete discussion.
+///
+/// Longer-term, we should consider:
+/// - Storage size + remaining free space
+/// - Sled placement of datasets
+/// - What sort of loads we'd like to create (even split across all disks
+///   may not be preferable, especially if maintenance is expected)
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum RegionAllocationStrategy {
+    /// Choose disks pseudo-randomly. An optional seed may be provided to make
+    /// the ordering deterministic, otherwise the current time in nanoseconds
+    /// will be used. Ordering is based on sorting the output of `md5(UUID of
+    /// candidate dataset + seed)`. The seed does not need to come from a
+    /// cryptographically secure source.
+    Random { seed: Option<u64> },
+
+    /// Like Random, but ensures that each region is allocated on its own sled.
+    RandomWithDistinctSleds { seed: Option<u64> },
+}
diff --git a/nexus/db-model/src/queries/region_allocation.rs b/nexus/db-model/src/queries/region_allocation.rs
index 43fac3c9a6c..2025e79fb88 100644
--- a/nexus/db-model/src/queries/region_allocation.rs
+++ b/nexus/db-model/src/queries/region_allocation.rs
@@ -47,6 +47,13 @@ table! {
     }
 }
 
+table! {
+    shuffled_candidate_datasets {
+        id -> Uuid,
+        pool_id -> Uuid,
+    }
+}
+
 table! {
     candidate_regions {
         id -> Uuid,
@@ -89,6 +96,19 @@ table! {
     }
 }
 
+table! {
+    one_zpool_per_sled (pool_id) {
+        pool_id -> Uuid
+    }
+}
+
+table! {
+    one_dataset_per_zpool {
+        id -> Uuid,
+        pool_id -> Uuid
+    }
+}
+
 table! {
     inserted_regions {
         id -> Uuid,
@@ -141,6 +161,7 @@ diesel::allow_tables_to_appear_in_same_query!(
 );
 
 diesel::allow_tables_to_appear_in_same_query!(old_regions, dataset,);
+diesel::allow_tables_to_appear_in_same_query!(old_regions, zpool,);
 
 diesel::allow_tables_to_appear_in_same_query!(
     inserted_regions,
@@ -149,6 +170,7 @@ diesel::allow_tables_to_appear_in_same_query!(
 
 diesel::allow_tables_to_appear_in_same_query!(candidate_zpools, dataset,);
 diesel::allow_tables_to_appear_in_same_query!(candidate_zpools, zpool,);
+diesel::allow_tables_to_appear_in_same_query!(candidate_datasets, dataset);
 
 // == Needed for random region allocation ==
 
diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs
index 13fb132abb6..c3788aec8ef 100644
--- a/nexus/db-queries/src/db/datastore/mod.rs
+++ b/nexus/db-queries/src/db/datastore/mod.rs
@@ -286,43 +286,6 @@ pub enum UpdatePrecondition<T> {
     Value(T),
 }
 
-/// Defines a strategy for choosing what physical disks to use when allocating
-/// new crucible regions.
-///
-/// NOTE: More strategies can - and should! - be added.
-///
-/// See <https://rfd.shared.oxide.computer/rfd/0205> for a more
-/// complete discussion.
-///
-/// Longer-term, we should consider:
-/// - Storage size + remaining free space
-/// - Sled placement of datasets
-/// - What sort of loads we'd like to create (even split across all disks
-///   may not be preferable, especially if maintenance is expected)
-#[derive(Debug, Clone)]
-pub enum RegionAllocationStrategy {
-    /// Choose disks that have the least data usage in the rack. This strategy
-    /// can lead to bad failure states wherein the disks with the least usage
-    /// have the least usage because regions on them are actually failing in
-    /// some way. Further retried allocations will then continue to try to
-    /// allocate onto the disk, perpetuating the problem. Currently this
-    /// strategy only exists so we can test that using different allocation
-    /// strategies actually results in different allocation patterns, hence the
-    /// `#[cfg(test)]`.
-    ///
-    /// See https://github.com/oxidecomputer/omicron/issues/3416 for more on the
-    /// failure-states associated with this strategy
-    #[cfg(test)]
-    LeastUsedDisk,
-
-    /// Choose disks pseudo-randomly. An optional seed may be provided to make
-    /// the ordering deterministic, otherwise the current time in nanoseconds
-    /// will be used. Ordering is based on sorting the output of `md5(UUID of
-    /// candidate dataset + seed)`. The seed does not need to come from a
-    /// cryptographically secure source.
-    Random(Option<u128>),
-}
-
 /// Constructs a DataStore for use in test suites that has preloaded the
 /// built-in users, roles, and role assignments that are needed for basic
 /// operation
@@ -400,7 +363,9 @@ mod test {
     use omicron_common::api::external::{
         self, ByteCount, Error, IdentityMetadataCreateParams, LookupType, Name,
     };
+    use omicron_common::nexus_config::RegionAllocationStrategy;
     use omicron_test_utils::dev;
+    use std::collections::HashMap;
     use std::collections::HashSet;
     use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddrV6};
     use std::num::NonZeroU32;
@@ -683,12 +648,18 @@ mod test {
         }
     }
 
+    struct TestDataset {
+        sled_id: Uuid,
+        dataset_id: Uuid,
+    }
+
     async fn create_test_datasets_for_region_allocation(
         opctx: &OpContext,
         datastore: Arc<DataStore>,
-    ) -> Vec<Uuid> {
+        number_of_sleds: usize,
+    ) -> Vec<TestDataset> {
         // Create sleds...
-        let sled_ids: Vec<Uuid> = stream::iter(0..REGION_REDUNDANCY_THRESHOLD)
+        let sled_ids: Vec<Uuid> = stream::iter(0..number_of_sleds)
             .then(|_| create_test_sled(&datastore))
             .collect()
             .await;
@@ -719,48 +690,69 @@ mod test {
             .collect()
             .await;
 
+        #[derive(Copy, Clone)]
+        struct Zpool {
+            sled_id: Uuid,
+            pool_id: Uuid,
+        }
+
         // 1 pool per disk
-        let zpool_ids: Vec<Uuid> = stream::iter(physical_disks)
+        let zpools: Vec<Zpool> = stream::iter(physical_disks)
             .then(|disk| {
-                create_test_zpool(&datastore, disk.sled_id, disk.disk_id)
+                let pool_id_future =
+                    create_test_zpool(&datastore, disk.sled_id, disk.disk_id);
+                async move {
+                    let pool_id = pool_id_future.await;
+                    Zpool { sled_id: disk.sled_id, pool_id }
+                }
             })
             .collect()
             .await;
 
         let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0);
 
-        // 1 dataset per zpool
-        let dataset_ids: Vec<Uuid> = stream::iter(zpool_ids)
-            .then(|zpool_id| {
-                let id = Uuid::new_v4();
-                let dataset = Dataset::new(
-                    id,
-                    zpool_id,
-                    bogus_addr,
-                    DatasetKind::Crucible,
-                );
-                let datastore = datastore.clone();
-                async move {
-                    datastore.dataset_upsert(dataset).await.unwrap();
-                    id
-                }
+        let datasets: Vec<TestDataset> = stream::iter(zpools)
+            .map(|zpool| {
+                // 3 datasets per zpool, to test that pools are distinct
+                let zpool_iter: Vec<Zpool> = (0..3).map(|_| zpool).collect();
+                stream::iter(zpool_iter).then(|zpool| {
+                    let id = Uuid::new_v4();
+                    let dataset = Dataset::new(
+                        id,
+                        zpool.pool_id,
+                        bogus_addr,
+                        DatasetKind::Crucible,
+                    );
+
+                    let datastore = datastore.clone();
+                    async move {
+                        datastore.dataset_upsert(dataset).await.unwrap();
+
+                        TestDataset { sled_id: zpool.sled_id, dataset_id: id }
+                    }
+                })
             })
+            .flatten()
             .collect()
             .await;
 
-        dataset_ids
+        datasets
     }
 
     #[tokio::test]
     /// Note that this test is currently non-deterministic. It can be made
     /// deterministic by generating deterministic *dataset* Uuids. The sled and
     /// pool IDs should not matter.
-    async fn test_region_allocation() {
+    async fn test_region_allocation_strat_random() {
         let logctx = dev::test_setup_log("test_region_allocation");
         let mut db = test_setup_database(&logctx.log).await;
         let (opctx, datastore) = datastore_test(&logctx, &db).await;
-        create_test_datasets_for_region_allocation(&opctx, datastore.clone())
-            .await;
+        create_test_datasets_for_region_allocation(
+            &opctx,
+            datastore.clone(),
+            REGION_REDUNDANCY_THRESHOLD,
+        )
+        .await;
 
         // Allocate regions from the datasets for this disk. Do it a few times
         // for good measure.
@@ -778,7 +770,9 @@ mod test {
                     volume_id,
                     &params.disk_source,
                     params.size,
-                    &RegionAllocationStrategy::Random(Some(alloc_seed as u128)),
+                    &RegionAllocationStrategy::Random {
+                        seed: Some(alloc_seed),
+                    },
                 )
                 .await
                 .unwrap();
@@ -788,8 +782,79 @@ mod test {
             let mut disk_datasets = HashSet::new();
             let mut disk_zpools = HashSet::new();
 
-            // TODO: When allocation chooses 3 distinct sleds, uncomment this.
-            // let mut disk1_sleds = HashSet::new();
+            for (dataset, region) in dataset_and_regions {
+                // Must be 3 unique datasets
+                assert!(disk_datasets.insert(dataset.id()));
+
+                // Must be 3 unique zpools
+                assert!(disk_zpools.insert(dataset.pool_id));
+
+                assert_eq!(volume_id, region.volume_id());
+                assert_eq!(ByteCount::from(4096), region.block_size());
+                let (_, extent_count) = DataStore::get_crucible_allocation(
+                    &BlockSize::AdvancedFormat,
+                    params.size,
+                );
+                assert_eq!(extent_count, region.extent_count());
+            }
+        }
+
+        let _ = db.cleanup().await;
+        logctx.cleanup_successful();
+    }
+
+    #[tokio::test]
+    /// Test the [`RegionAllocationStrategy::RandomWithDistinctSleds`] strategy.
+    /// It should always pick datasets where no two datasets are on the same
+    /// zpool and no two zpools are on the same sled.
+    async fn test_region_allocation_strat_random_with_distinct_sleds() {
+        let logctx = dev::test_setup_log("test_region_allocation");
+        let mut db = test_setup_database(&logctx.log).await;
+        let (opctx, datastore) = datastore_test(&logctx, &db).await;
+
+        // Create a rack without enough sleds for a successful allocation when
+        // we require 3 distinct sleds.
+        let test_datasets = create_test_datasets_for_region_allocation(
+            &opctx,
+            datastore.clone(),
+            REGION_REDUNDANCY_THRESHOLD,
+        )
+        .await;
+
+        // We need to check that our datasets end up on 3 distinct sleds, but the query doesn't return the sled ID, so we need to reverse map from dataset ID to sled ID
+        let sled_id_map: HashMap<Uuid, Uuid> = test_datasets
+            .into_iter()
+            .map(|test_dataset| (test_dataset.dataset_id, test_dataset.sled_id))
+            .collect();
+
+        // Allocate regions from the datasets for this disk. Do it a few times
+        // for good measure.
+        for alloc_seed in 0..10 {
+            let params = create_test_disk_create_params(
+                &format!("disk{}", alloc_seed),
+                ByteCount::from_mebibytes_u32(1),
+            );
+            let volume_id = Uuid::new_v4();
+
+            let expected_region_count = REGION_REDUNDANCY_THRESHOLD;
+            let dataset_and_regions = datastore
+                .region_allocate(
+                    &opctx,
+                    volume_id,
+                    &params.disk_source,
+                    params.size,
+                    &&RegionAllocationStrategy::RandomWithDistinctSleds {
+                        seed: Some(alloc_seed),
+                    },
+                )
+                .await
+                .unwrap();
+
+            // Verify the allocation.
+            assert_eq!(expected_region_count, dataset_and_regions.len());
+            let mut disk_datasets = HashSet::new();
+            let mut disk_zpools = HashSet::new();
+            let mut disk_sleds = HashSet::new();
             for (dataset, region) in dataset_and_regions {
                 // Must be 3 unique datasets
                 assert!(disk_datasets.insert(dataset.id()));
@@ -798,8 +863,8 @@ mod test {
                 assert!(disk_zpools.insert(dataset.pool_id));
 
                 // Must be 3 unique sleds
-                // TODO: When allocation chooses 3 distinct sleds, uncomment this.
-                // assert!(disk1_sleds.insert(Err(dataset)));
+                let sled_id = sled_id_map.get(&dataset.id()).unwrap();
+                assert!(disk_sleds.insert(*sled_id));
 
                 assert_eq!(volume_id, region.volume_id());
                 assert_eq!(ByteCount::from(4096), region.block_size());
@@ -815,14 +880,70 @@ mod test {
         logctx.cleanup_successful();
     }
 
+    #[tokio::test]
+    /// Ensure the [`RegionAllocationStrategy::RandomWithDistinctSleds`]
+    /// strategy fails when there aren't enough distinct sleds.
+    async fn test_region_allocation_strat_random_with_distinct_sleds_fails() {
+        let logctx = dev::test_setup_log("test_region_allocation");
+        let mut db = test_setup_database(&logctx.log).await;
+        let (opctx, datastore) = datastore_test(&logctx, &db).await;
+
+        // Create a rack without enough sleds for a successful allocation when
+        // we require 3 distinct sleds.
+        create_test_datasets_for_region_allocation(
+            &opctx,
+            datastore.clone(),
+            REGION_REDUNDANCY_THRESHOLD - 1,
+        )
+        .await;
+
+        // Allocate regions from the datasets for this disk. Do it a few times
+        // for good measure.
+        for alloc_seed in 0..10 {
+            let params = create_test_disk_create_params(
+                &format!("disk{}", alloc_seed),
+                ByteCount::from_mebibytes_u32(1),
+            );
+            let volume_id = Uuid::new_v4();
+
+            let err = datastore
+                .region_allocate(
+                    &opctx,
+                    volume_id,
+                    &params.disk_source,
+                    params.size,
+                    &&RegionAllocationStrategy::RandomWithDistinctSleds {
+                        seed: Some(alloc_seed),
+                    },
+                )
+                .await
+                .unwrap_err();
+
+            let expected = "Not enough zpool space to allocate disks";
+            assert!(
+                err.to_string().contains(expected),
+                "Saw error: \'{err}\', but expected \'{expected}\'"
+            );
+
+            assert!(matches!(err, Error::ServiceUnavailable { .. }));
+        }
+
+        let _ = db.cleanup().await;
+        logctx.cleanup_successful();
+    }
+
     #[tokio::test]
     async fn test_region_allocation_is_idempotent() {
         let logctx =
             dev::test_setup_log("test_region_allocation_is_idempotent");
         let mut db = test_setup_database(&logctx.log).await;
         let (opctx, datastore) = datastore_test(&logctx, &db).await;
-        create_test_datasets_for_region_allocation(&opctx, datastore.clone())
-            .await;
+        create_test_datasets_for_region_allocation(
+            &opctx,
+            datastore.clone(),
+            REGION_REDUNDANCY_THRESHOLD,
+        )
+        .await;
 
         // Allocate regions from the datasets for this volume.
         let params = create_test_disk_create_params(
@@ -836,7 +957,7 @@ mod test {
                 volume_id,
                 &params.disk_source,
                 params.size,
-                &RegionAllocationStrategy::Random(Some(0)),
+                &RegionAllocationStrategy::Random { seed: Some(0) },
             )
             .await
             .unwrap();
@@ -849,7 +970,7 @@ mod test {
                 volume_id,
                 &params.disk_source,
                 params.size,
-                &RegionAllocationStrategy::Random(Some(1)),
+                &RegionAllocationStrategy::Random { seed: Some(1) },
             )
             .await
             .unwrap();
@@ -938,7 +1059,7 @@ mod test {
                 volume1_id,
                 &params.disk_source,
                 params.size,
-                &RegionAllocationStrategy::Random(Some(0)),
+                &RegionAllocationStrategy::Random { seed: Some(0) },
             )
             .await
             .unwrap_err();
@@ -962,8 +1083,12 @@ mod test {
         let mut db = test_setup_database(&logctx.log).await;
         let (opctx, datastore) = datastore_test(&logctx, &db).await;
 
-        create_test_datasets_for_region_allocation(&opctx, datastore.clone())
-            .await;
+        create_test_datasets_for_region_allocation(
+            &opctx,
+            datastore.clone(),
+            REGION_REDUNDANCY_THRESHOLD,
+        )
+        .await;
 
         let disk_size = test_zpool_size();
         let alloc_size = ByteCount::try_from(disk_size.to_bytes() * 2).unwrap();
@@ -976,7 +1101,7 @@ mod test {
                 volume1_id,
                 &params.disk_source,
                 params.size,
-                &RegionAllocationStrategy::Random(Some(0)),
+                &RegionAllocationStrategy::Random { seed: Some(0) },
             )
             .await
             .is_err());
diff --git a/nexus/db-queries/src/db/datastore/region.rs b/nexus/db-queries/src/db/datastore/region.rs
index 6bfea9085d0..a26442280d5 100644
--- a/nexus/db-queries/src/db/datastore/region.rs
+++ b/nexus/db-queries/src/db/datastore/region.rs
@@ -5,7 +5,6 @@
 //! [`DataStore`] methods on [`Region`]s.
 
 use super::DataStore;
-use super::RegionAllocationStrategy;
 use super::RunnableQuery;
 use crate::context::OpContext;
 use crate::db;
@@ -23,6 +22,7 @@ use omicron_common::api::external;
 use omicron_common::api::external::DeleteResult;
 use omicron_common::api::external::Error;
 use omicron_common::backoff::{self, BackoffError};
+use omicron_common::nexus_config::RegionAllocationStrategy;
 use slog::Logger;
 use uuid::Uuid;
 
diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs
index 674a525c5c8..0922c533dd4 100644
--- a/nexus/db-queries/src/db/queries/region_allocation.rs
+++ b/nexus/db-queries/src/db/queries/region_allocation.rs
@@ -6,7 +6,6 @@
 
 use crate::db::alias::ExpressionAlias;
 use crate::db::cast_uuid_as_bytea::CastUuidToBytea;
-use crate::db::datastore::RegionAllocationStrategy;
 use crate::db::datastore::REGION_REDUNDANCY_THRESHOLD;
 use crate::db::model::{Dataset, DatasetKind, Region};
 use crate::db::pool::DbConnection;
@@ -24,10 +23,11 @@ use diesel::{
 use nexus_db_model::queries::region_allocation::{
     candidate_datasets, candidate_regions, candidate_zpools, cockroach_md5,
     do_insert, inserted_regions, old_regions, old_zpool_usage,
-    proposed_dataset_changes, updated_datasets,
+    proposed_dataset_changes, shuffled_candidate_datasets, updated_datasets,
 };
 use nexus_db_model::schema;
 use omicron_common::api::external;
+use omicron_common::nexus_config::RegionAllocationStrategy;
 
 const NOT_ENOUGH_DATASETS_SENTINEL: &'static str = "Not enough datasets";
 const NOT_ENOUGH_ZPOOL_SPACE_SENTINEL: &'static str = "Not enough space";
@@ -91,6 +91,8 @@ impl OldRegions {
 /// This implicitly distinguishes between "M.2s" and "U.2s" -- Nexus needs to
 /// determine during dataset provisioning which devices should be considered for
 /// usage as Crucible storage.
+///
+/// We select only one dataset from each zpool.
 #[derive(Subquery, QueryId)]
 #[subquery(name = candidate_datasets)]
 struct CandidateDatasets {
@@ -98,71 +100,65 @@ struct CandidateDatasets {
 }
 
 impl CandidateDatasets {
-    fn new(
-        allocation_strategy: &RegionAllocationStrategy,
-        candidate_zpools: &CandidateZpools,
-    ) -> Self {
+    fn new(candidate_zpools: &CandidateZpools, seed: u128) -> Self {
         use crate::db::schema::dataset::dsl as dataset_dsl;
         use candidate_zpools::dsl as candidate_zpool_dsl;
 
-        let query = match allocation_strategy {
-            #[cfg(test)]
-            RegionAllocationStrategy::LeastUsedDisk => {
-                let query: Box<
-                    dyn CteQuery<SqlType = candidate_datasets::SqlType>,
-                > = Box::new(
-                    dataset_dsl::dataset
-                        .inner_join(
-                            candidate_zpools
-                                .query_source()
-                                .on(dataset_dsl::pool_id
-                                    .eq(candidate_zpool_dsl::pool_id)),
-                        )
-                        .filter(dataset_dsl::time_deleted.is_null())
-                        .filter(dataset_dsl::size_used.is_not_null())
-                        .filter(dataset_dsl::kind.eq(DatasetKind::Crucible))
-                        .order(dataset_dsl::size_used.asc())
-                        .limit(REGION_REDUNDANCY_THRESHOLD.try_into().unwrap())
-                        .select((dataset_dsl::id, dataset_dsl::pool_id)),
-                );
-                query
-            }
-            RegionAllocationStrategy::Random(seed) => {
-                let seed = seed.unwrap_or_else(|| {
-                    std::time::SystemTime::now()
-                        .duration_since(std::time::UNIX_EPOCH)
-                        .unwrap()
-                        .as_nanos()
-                });
-
-                let seed_bytes = seed.to_le_bytes();
-
-                let query: Box<
-                    dyn CteQuery<SqlType = candidate_datasets::SqlType>,
-                > = Box::new(
-                    dataset_dsl::dataset
-                        .inner_join(
-                            candidate_zpools
-                                .query_source()
-                                .on(dataset_dsl::pool_id
-                                    .eq(candidate_zpool_dsl::pool_id)),
-                        )
-                        .filter(dataset_dsl::time_deleted.is_null())
-                        .filter(dataset_dsl::size_used.is_not_null())
-                        .filter(dataset_dsl::kind.eq(DatasetKind::Crucible))
-                        // We order by md5 to shuffle the ordering of the datasets.
-                        // md5 has a uniform output distribution so it does the job.
-                        .order(cockroach_md5::dsl::md5(
+        let seed_bytes = seed.to_le_bytes();
+
+        let query: Box<dyn CteQuery<SqlType = candidate_datasets::SqlType>> =
+            Box::new(
+                dataset_dsl::dataset
+                    .inner_join(candidate_zpools.query_source().on(
+                        dataset_dsl::pool_id.eq(candidate_zpool_dsl::pool_id),
+                    ))
+                    .filter(dataset_dsl::time_deleted.is_null())
+                    .filter(dataset_dsl::size_used.is_not_null())
+                    .filter(dataset_dsl::kind.eq(DatasetKind::Crucible))
+                    .distinct_on(dataset_dsl::pool_id)
+                    .order_by((
+                        dataset_dsl::pool_id,
+                        cockroach_md5::dsl::md5(
                             CastUuidToBytea::new(dataset_dsl::id)
                                 .concat(seed_bytes.to_vec()),
-                        ))
-                        .select((dataset_dsl::id, dataset_dsl::pool_id))
-                        .limit(REGION_REDUNDANCY_THRESHOLD.try_into().unwrap()),
-                );
-                query
-            }
-        };
+                        ),
+                    ))
+                    .select((dataset_dsl::id, dataset_dsl::pool_id)),
+            );
+        Self { query }
+    }
+}
+
+/// Shuffle the candidate datasets, and select REGION_REDUNDANCY_THRESHOLD
+/// regions from it.
+#[derive(Subquery, QueryId)]
+#[subquery(name = shuffled_candidate_datasets)]
+struct ShuffledCandidateDatasets {
+    query: Box<dyn CteQuery<SqlType = shuffled_candidate_datasets::SqlType>>,
+}
 
+impl ShuffledCandidateDatasets {
+    fn new(candidate_datasets: &CandidateDatasets, seed: u128) -> Self {
+        use candidate_datasets::dsl as candidate_datasets_dsl;
+
+        let seed_bytes = seed.to_le_bytes();
+
+        let query: Box<dyn CteQuery<SqlType = candidate_datasets::SqlType>> =
+            Box::new(
+                candidate_datasets
+                    .query_source()
+                    // We order by md5 to shuffle the ordering of the datasets.
+                    // md5 has a uniform output distribution so it does the job.
+                    .order(cockroach_md5::dsl::md5(
+                        CastUuidToBytea::new(candidate_datasets_dsl::id)
+                            .concat(seed_bytes.to_vec()),
+                    ))
+                    .select((
+                        candidate_datasets_dsl::id,
+                        candidate_datasets_dsl::pool_id,
+                    ))
+                    .limit(REGION_REDUNDANCY_THRESHOLD.try_into().unwrap()),
+            );
         Self { query }
     }
 }
@@ -179,14 +175,14 @@ diesel::sql_function!(fn now() -> Timestamptz);
 
 impl CandidateRegions {
     fn new(
-        candidate_datasets: &CandidateDatasets,
+        shuffled_candidate_datasets: &ShuffledCandidateDatasets,
         volume_id: uuid::Uuid,
         block_size: u64,
         blocks_per_extent: u64,
         extent_count: u64,
     ) -> Self {
-        use candidate_datasets::dsl as candidate_datasets_dsl;
         use schema::region;
+        use shuffled_candidate_datasets::dsl as shuffled_candidate_datasets_dsl;
 
         let volume_id = volume_id.into_sql::<sql_types::Uuid>();
         let block_size = (block_size as i64).into_sql::<sql_types::BigInt>();
@@ -195,20 +191,22 @@ impl CandidateRegions {
         let extent_count =
             (extent_count as i64).into_sql::<sql_types::BigInt>();
         Self {
-            query: Box::new(candidate_datasets.query_source().select((
-                ExpressionAlias::new::<region::id>(gen_random_uuid()),
-                ExpressionAlias::new::<region::time_created>(now()),
-                ExpressionAlias::new::<region::time_modified>(now()),
-                ExpressionAlias::new::<region::dataset_id>(
-                    candidate_datasets_dsl::id,
+            query: Box::new(shuffled_candidate_datasets.query_source().select(
+                (
+                    ExpressionAlias::new::<region::id>(gen_random_uuid()),
+                    ExpressionAlias::new::<region::time_created>(now()),
+                    ExpressionAlias::new::<region::time_modified>(now()),
+                    ExpressionAlias::new::<region::dataset_id>(
+                        shuffled_candidate_datasets_dsl::id,
+                    ),
+                    ExpressionAlias::new::<region::volume_id>(volume_id),
+                    ExpressionAlias::new::<region::block_size>(block_size),
+                    ExpressionAlias::new::<region::blocks_per_extent>(
+                        blocks_per_extent,
+                    ),
+                    ExpressionAlias::new::<region::extent_count>(extent_count),
                 ),
-                ExpressionAlias::new::<region::volume_id>(volume_id),
-                ExpressionAlias::new::<region::block_size>(block_size),
-                ExpressionAlias::new::<region::blocks_per_extent>(
-                    blocks_per_extent,
-                ),
-                ExpressionAlias::new::<region::extent_count>(extent_count),
-            ))),
+            )),
         }
     }
 }
@@ -285,12 +283,14 @@ struct CandidateZpools {
 }
 
 impl CandidateZpools {
-    fn new(old_zpool_usage: &OldPoolUsage, zpool_size_delta: u64) -> Self {
+    fn new(
+        old_zpool_usage: &OldPoolUsage,
+        zpool_size_delta: u64,
+        seed: u128,
+        distinct_sleds: bool,
+    ) -> Self {
         use schema::zpool::dsl as zpool_dsl;
 
-        let with_zpool = zpool_dsl::zpool
-            .on(zpool_dsl::id.eq(old_zpool_usage::dsl::pool_id));
-
         // Why are we using raw `diesel::dsl::sql` here?
         //
         // When SQL performs the "SUM" operation on "bigint" type, the result
@@ -309,15 +309,40 @@ impl CandidateZpools {
             + diesel::dsl::sql(&zpool_size_delta.to_string()))
         .le(diesel::dsl::sql(zpool_dsl::total_size::NAME));
 
-        Self {
-            query: Box::new(
-                old_zpool_usage
-                    .query_source()
-                    .inner_join(with_zpool)
-                    .filter(it_will_fit)
-                    .select((old_zpool_usage::dsl::pool_id,)),
-            ),
-        }
+        let with_zpool = zpool_dsl::zpool
+            .on(zpool_dsl::id.eq(old_zpool_usage::dsl::pool_id));
+
+        let base_query = old_zpool_usage
+            .query_source()
+            .inner_join(with_zpool)
+            .filter(it_will_fit)
+            .select((old_zpool_usage::dsl::pool_id,));
+
+        let query = if distinct_sleds {
+            let seed_bytes = seed.to_le_bytes();
+
+            let query: Box<dyn CteQuery<SqlType = candidate_zpools::SqlType>> =
+                Box::new(
+                    base_query
+                        .order_by((
+                            zpool_dsl::sled_id,
+                            cockroach_md5::dsl::md5(
+                                CastUuidToBytea::new(zpool_dsl::id)
+                                    .concat(seed_bytes.to_vec()),
+                            ),
+                        ))
+                        .distinct_on(zpool_dsl::sled_id),
+                );
+
+            query
+        } else {
+            let query: Box<dyn CteQuery<SqlType = candidate_zpools::SqlType>> =
+                Box::new(base_query);
+
+            query
+        };
+
+        Self { query }
     }
 }
 
@@ -508,19 +533,47 @@ impl RegionAllocate {
         extent_count: u64,
         allocation_strategy: &RegionAllocationStrategy,
     ) -> Self {
+        let (seed, distinct_sleds) = {
+            let (input_seed, distinct_sleds) = match allocation_strategy {
+                RegionAllocationStrategy::Random { seed } => (seed, false),
+                RegionAllocationStrategy::RandomWithDistinctSleds { seed } => {
+                    (seed, true)
+                }
+            };
+            (
+                input_seed.map_or_else(
+                    || {
+                        std::time::SystemTime::now()
+                            .duration_since(std::time::UNIX_EPOCH)
+                            .unwrap()
+                            .as_nanos()
+                    },
+                    |seed| seed as u128,
+                ),
+                distinct_sleds,
+            )
+        };
+
         let size_delta = block_size * blocks_per_extent * extent_count;
 
         let old_regions = OldRegions::new(volume_id);
 
         let old_pool_usage = OldPoolUsage::new();
-        let candidate_zpools =
-            CandidateZpools::new(&old_pool_usage, size_delta);
+        let candidate_zpools = CandidateZpools::new(
+            &old_pool_usage,
+            size_delta,
+            seed,
+            distinct_sleds,
+        );
 
         let candidate_datasets =
-            CandidateDatasets::new(&allocation_strategy, &candidate_zpools);
+            CandidateDatasets::new(&candidate_zpools, seed);
+
+        let shuffled_candidate_datasets =
+            ShuffledCandidateDatasets::new(&candidate_datasets, seed);
 
         let candidate_regions = CandidateRegions::new(
-            &candidate_datasets,
+            &shuffled_candidate_datasets,
             volume_id,
             block_size,
             blocks_per_extent,
@@ -577,6 +630,7 @@ impl RegionAllocate {
             .add_subquery(old_pool_usage)
             .add_subquery(candidate_zpools)
             .add_subquery(candidate_datasets)
+            .add_subquery(shuffled_candidate_datasets)
             .add_subquery(candidate_regions)
             .add_subquery(proposed_changes)
             .add_subquery(do_insert)
diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml
index f1b20c32a10..1a9afbc6bdc 100644
--- a/nexus/examples/config.toml
+++ b/nexus/examples/config.toml
@@ -92,3 +92,14 @@ dns_external.max_concurrent_server_updates = 5
 # certificates it will take _other_ Nexus instances to notice and stop serving
 # them (on a sunny day).
 external_endpoints.period_secs = 60
+
+[default_region_allocation_strategy]
+# allocate region on 3 random distinct zpools, on 3 random distinct sleds.
+type = "random_with_distinct_sleds"
+
+# the same as random_with_distinct_sleds, but without requiring distinct sleds
+# type = "random"
+
+# setting `seed` to a fixed value will make dataset selection ordering use the
+# same shuffling order for every region allocation.
+# seed = 0
diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs
index 99ed75f14ba..7a03caca404 100644
--- a/nexus/src/app/mod.rs
+++ b/nexus/src/app/mod.rs
@@ -23,6 +23,7 @@ use omicron_common::address::DENDRITE_PORT;
 use omicron_common::address::MGS_PORT;
 use omicron_common::api::external::Error;
 use omicron_common::api::internal::shared::SwitchLocation;
+use omicron_common::nexus_config::RegionAllocationStrategy;
 use slog::Logger;
 use std::collections::HashMap;
 use std::net::Ipv6Addr;
@@ -152,6 +153,9 @@ pub struct Nexus {
 
     /// Background tasks
     background_tasks: background::BackgroundTasks,
+
+    /// Default Crucible region allocation strategy
+    default_region_allocation_strategy: RegionAllocationStrategy,
 }
 
 impl Nexus {
@@ -324,6 +328,10 @@ impl Nexus {
             external_resolver,
             dpd_clients,
             background_tasks,
+            default_region_allocation_strategy: config
+                .pkg
+                .default_region_allocation_strategy
+                .clone(),
         };
 
         // TODO-cleanup all the extra Arcs here seems wrong
diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs
index ff1d0b7174d..ef0eb79b75c 100644
--- a/nexus/src/app/sagas/disk_create.rs
+++ b/nexus/src/app/sagas/disk_create.rs
@@ -16,7 +16,6 @@ use crate::db::identity::{Asset, Resource};
 use crate::db::lookup::LookupPath;
 use crate::external_api::params;
 use crate::{authn, authz, db};
-use nexus_db_queries::db::datastore::RegionAllocationStrategy;
 use omicron_common::api::external::DiskState;
 use omicron_common::api::external::Error;
 use rand::{rngs::StdRng, RngCore, SeedableRng};
@@ -251,6 +250,9 @@ async fn sdc_alloc_regions(
         &sagactx,
         &params.serialized_authn,
     );
+
+    let strategy = &osagactx.nexus().default_region_allocation_strategy;
+
     let datasets_and_regions = osagactx
         .datastore()
         .region_allocate(
@@ -258,7 +260,7 @@ async fn sdc_alloc_regions(
             volume_id,
             &params.create_params.disk_source,
             params.create_params.size,
-            &RegionAllocationStrategy::Random(None),
+            &strategy,
         )
         .await
         .map_err(ActionError::action_failed)?;
diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs
index 81212571e25..8d34bd7a58a 100644
--- a/nexus/src/app/sagas/snapshot_create.rs
+++ b/nexus/src/app/sagas/snapshot_create.rs
@@ -107,7 +107,6 @@ use crate::{authn, authz, db};
 use anyhow::anyhow;
 use crucible_agent_client::{types::RegionId, Client as CrucibleAgentClient};
 use nexus_db_model::Generation;
-use nexus_db_queries::db::datastore::RegionAllocationStrategy;
 use omicron_common::api::external;
 use omicron_common::api::external::Error;
 use rand::{rngs::StdRng, RngCore, SeedableRng};
@@ -328,6 +327,8 @@ async fn ssc_alloc_regions(
         .await
         .map_err(ActionError::action_failed)?;
 
+    let strategy = &osagactx.nexus().default_region_allocation_strategy;
+
     let datasets_and_regions = osagactx
         .datastore()
         .region_allocate(
@@ -340,7 +341,7 @@ async fn ssc_alloc_regions(
                 .map_err(|e| ActionError::action_failed(e.to_string()))?,
             },
             external::ByteCount::from(disk.size),
-            &RegionAllocationStrategy::Random(None),
+            &strategy,
         )
         .await
         .map_err(ActionError::action_failed)?;
diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml
index 6eeacceaedd..1b1ae2c9129 100644
--- a/nexus/tests/config.test.toml
+++ b/nexus/tests/config.test.toml
@@ -89,3 +89,8 @@ dns_external.max_concurrent_server_updates = 5
 # certificates it will take _other_ Nexus instances to notice and stop serving
 # them (on a sunny day).
 external_endpoints.period_secs = 60
+
+[default_region_allocation_strategy]
+# we only have one sled in the test environment, so we need to use the
+# `Random` strategy, instead of `RandomWithDistinctSleds`
+type = "random"
\ No newline at end of file
diff --git a/smf/nexus/config-partial.toml b/smf/nexus/config-partial.toml
index b29727c4aa5..2dfee81d026 100644
--- a/smf/nexus/config-partial.toml
+++ b/smf/nexus/config-partial.toml
@@ -38,3 +38,8 @@ dns_external.max_concurrent_server_updates = 5
 # certificates it will take _other_ Nexus instances to notice and stop serving
 # them (on a sunny day).
 external_endpoints.period_secs = 60
+
+[default_region_allocation_strategy]
+# by default, allocate across 3 distinct sleds
+# seed is omitted so a new seed will be chosen with every allocation.
+type = "random_with_distinct_sleds"
\ No newline at end of file