Skip to content

Commit

Permalink
Merge branch 'main' into integrate-qorb
Browse files Browse the repository at this point in the history
  • Loading branch information
smklein committed Jun 17, 2024
2 parents 47b0932 + a8b3ce2 commit 5244907
Show file tree
Hide file tree
Showing 11 changed files with 1,060 additions and 84 deletions.
3 changes: 2 additions & 1 deletion nexus/db-model/src/schema_versions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use std::collections::BTreeMap;
///
/// This must be updated when you change the database schema. Refer to
/// schema/crdb/README.adoc in the root of this repository for details.
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(75, 0, 0);
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(76, 0, 0);

/// List of all past database schema versions, in *reverse* order
///
Expand All @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy<Vec<KnownVersion>> = Lazy::new(|| {
// | leaving the first copy as an example for the next person.
// v
// KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"),
KnownVersion::new(76, "lookup-region-snapshot-by-snapshot-id"),
KnownVersion::new(75, "add-cockroach-zone-id-to-node-id"),
KnownVersion::new(74, "add-migration-table"),
KnownVersion::new(73, "add-vlan-to-uplink"),
Expand Down
2 changes: 2 additions & 0 deletions nexus/db-queries/src/db/datastore/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ use nexus_db_model::AllSchemaVersions;
pub use probe::ProbeInfo;
pub use rack::RackInit;
pub use rack::SledUnderlayAllocationResult;
pub use region::RegionAllocationFor;
pub use region::RegionAllocationParameters;
pub use silo::Discoverability;
pub use sled::SledTransition;
pub use sled::TransitionError;
Expand Down
99 changes: 67 additions & 32 deletions nexus/db-queries/src/db/datastore/region.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,29 @@ use omicron_common::api::external::LookupResult;
use slog::Logger;
use uuid::Uuid;

pub enum RegionAllocationFor {
/// Allocate region(s) for a disk volume
DiskVolume { volume_id: Uuid },

/// Allocate region(s) for a snapshot volume, which may have read-only
/// targets.
SnapshotVolume { volume_id: Uuid, snapshot_id: Uuid },
}

/// Describe the region(s) to be allocated
pub enum RegionAllocationParameters<'a> {
FromDiskSource {
disk_source: &'a params::DiskSource,
size: external::ByteCount,
},

FromRaw {
block_size: u64,
blocks_per_extent: u64,
extent_count: u64,
},
}

impl DataStore {
pub(super) fn get_allocated_regions_query(
volume_id: Uuid,
Expand Down Expand Up @@ -156,9 +179,8 @@ impl DataStore {
) -> Result<Vec<(Dataset, Region)>, Error> {
self.arbitrary_region_allocate(
opctx,
volume_id,
disk_source,
size,
RegionAllocationFor::DiskVolume { volume_id },
RegionAllocationParameters::FromDiskSource { disk_source, size },
allocation_strategy,
REGION_REDUNDANCY_THRESHOLD,
)
Expand All @@ -175,47 +197,59 @@ impl DataStore {
/// level for a volume. If a single region is allocated in isolation this
/// could land on the same dataset as one of the existing volume's regions.
///
/// For allocating for snapshot volumes, it's important to take into account
/// `region_snapshot`s that may be used as some of the targets in the region
/// set, representing read-only downstairs served out of a ZFS snapshot
/// instead of a dataset.
///
/// Returns the allocated regions, as well as the datasets to which they
/// belong.
pub async fn arbitrary_region_allocate(
&self,
opctx: &OpContext,
volume_id: Uuid,
disk_source: &params::DiskSource,
size: external::ByteCount,
region_for: RegionAllocationFor,
region_parameters: RegionAllocationParameters<'_>,
allocation_strategy: &RegionAllocationStrategy,
num_regions_required: usize,
) -> Result<Vec<(Dataset, Region)>, Error> {
let block_size =
self.get_block_size_from_disk_source(opctx, &disk_source).await?;
let (blocks_per_extent, extent_count) =
Self::get_crucible_allocation(&block_size, size);
let (volume_id, maybe_snapshot_id) = match region_for {
RegionAllocationFor::DiskVolume { volume_id } => (volume_id, None),

self.arbitrary_region_allocate_direct(
opctx,
volume_id,
u64::from(block_size.to_bytes()),
blocks_per_extent,
extent_count,
allocation_strategy,
num_regions_required,
)
.await
}
RegionAllocationFor::SnapshotVolume { volume_id, snapshot_id } => {
(volume_id, Some(snapshot_id))
}
};

let (block_size, blocks_per_extent, extent_count) =
match region_parameters {
RegionAllocationParameters::FromDiskSource {
disk_source,
size,
} => {
let block_size = self
.get_block_size_from_disk_source(opctx, &disk_source)
.await?;

let (blocks_per_extent, extent_count) =
Self::get_crucible_allocation(&block_size, size);

(
u64::from(block_size.to_bytes()),
blocks_per_extent,
extent_count,
)
}

RegionAllocationParameters::FromRaw {
block_size,
blocks_per_extent,
extent_count,
} => (block_size, blocks_per_extent, extent_count),
};

#[allow(clippy::too_many_arguments)]
pub async fn arbitrary_region_allocate_direct(
&self,
opctx: &OpContext,
volume_id: Uuid,
block_size: u64,
blocks_per_extent: u64,
extent_count: u64,
allocation_strategy: &RegionAllocationStrategy,
num_regions_required: usize,
) -> Result<Vec<(Dataset, Region)>, Error> {
let query = crate::db::queries::region_allocation::allocation_query(
volume_id,
maybe_snapshot_id,
block_size,
blocks_per_extent,
extent_count,
Expand All @@ -234,6 +268,7 @@ impl DataStore {
self.log,
"Allocated regions for volume";
"volume_id" => %volume_id,
"maybe_snapshot_id" => ?maybe_snapshot_id,
"datasets_and_regions" => ?dataset_and_regions,
);

Expand Down
96 changes: 84 additions & 12 deletions nexus/db-queries/src/db/queries/region_allocation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,14 @@ type SelectableSql<T> = <
<T as diesel::Selectable<Pg>>::SelectExpression as diesel::Expression
>::SqlType;

/// For a given volume, idempotently allocate enough regions (according to some
/// allocation strategy) to meet some redundancy level. This should only be used
/// for the region set that is in the top level of the Volume (not the deeper
/// layers of the hierarchy). If that volume has region snapshots in the region
/// set, a `snapshot_id` should be supplied matching those entries.
pub fn allocation_query(
volume_id: uuid::Uuid,
snapshot_id: Option<uuid::Uuid>,
block_size: u64,
blocks_per_extent: u64,
extent_count: u64,
Expand Down Expand Up @@ -116,24 +122,42 @@ pub fn allocation_query(
SELECT
dataset.pool_id,
sum(dataset.size_used) AS size_used
FROM dataset WHERE ((dataset.size_used IS NOT NULL) AND (dataset.time_deleted IS NULL)) GROUP BY dataset.pool_id),")

// Any zpool already have this volume's existing regions?
.sql("
existing_zpools AS (
SELECT
dataset.pool_id
FROM
dataset INNER JOIN old_regions ON (old_regions.dataset_id = dataset.id)
),")
FROM dataset WHERE ((dataset.size_used IS NOT NULL) AND (dataset.time_deleted IS NULL)) GROUP BY dataset.pool_id),");

let builder = if let Some(snapshot_id) = snapshot_id {
// Any zpool already have this volume's existing regions, or host the
// snapshot volume's regions?
builder.sql("
existing_zpools AS ((
SELECT
dataset.pool_id
FROM
dataset INNER JOIN old_regions ON (old_regions.dataset_id = dataset.id)
) UNION (
select dataset.pool_id from
dataset inner join region_snapshot on (region_snapshot.dataset_id = dataset.id)
where region_snapshot.snapshot_id = ").param().sql(")),")
.bind::<sql_types::Uuid, _>(snapshot_id)
} else {
// Any zpool already have this volume's existing regions?
builder.sql("
existing_zpools AS (
SELECT
dataset.pool_id
FROM
dataset INNER JOIN old_regions ON (old_regions.dataset_id = dataset.id)
),")
};

// Identifies zpools with enough space for region allocation, that are not
// currently used by this Volume's existing regions.
//
// NOTE: 'distinct_sleds' changes the format of the underlying SQL query, as it uses
// distinct bind parameters depending on the conditional branch.
.sql("
candidate_zpools AS (");
let builder = builder.sql(
"
candidate_zpools AS (",
);
let builder = if distinct_sleds {
builder.sql("SELECT DISTINCT ON (zpool.sled_id) ")
} else {
Expand Down Expand Up @@ -384,10 +408,15 @@ mod test {
let blocks_per_extent = 4;
let extent_count = 8;

// Start with snapshot_id = None

let snapshot_id = None;

// First structure: "RandomWithDistinctSleds"

let region_allocate = allocation_query(
volume_id,
snapshot_id,
block_size,
blocks_per_extent,
extent_count,
Expand All @@ -406,6 +435,7 @@ mod test {

let region_allocate = allocation_query(
volume_id,
snapshot_id,
block_size,
blocks_per_extent,
extent_count,
Expand All @@ -417,6 +447,46 @@ mod test {
"tests/output/region_allocate_random_sleds.sql",
)
.await;

// Next, put a value in for snapshot_id

let snapshot_id = Some(Uuid::new_v4());

// First structure: "RandomWithDistinctSleds"

let region_allocate = allocation_query(
volume_id,
snapshot_id,
block_size,
blocks_per_extent,
extent_count,
&RegionAllocationStrategy::RandomWithDistinctSleds {
seed: Some(1),
},
REGION_REDUNDANCY_THRESHOLD,
);
expectorate_query_contents(
&region_allocate,
"tests/output/region_allocate_with_snapshot_distinct_sleds.sql",
)
.await;

// Second structure: "Random"

let region_allocate = allocation_query(
volume_id,
snapshot_id,
block_size,
blocks_per_extent,
extent_count,
&RegionAllocationStrategy::Random { seed: Some(1) },
REGION_REDUNDANCY_THRESHOLD,
);
expectorate_query_contents(
&region_allocate,
"tests/output/region_allocate_with_snapshot_random_sleds.sql",
)
.await;
}

// Explain the possible forms of the SQL query to ensure that it
Expand All @@ -439,6 +509,7 @@ mod test {

let region_allocate = allocation_query(
volume_id,
None,
block_size,
blocks_per_extent,
extent_count,
Expand All @@ -454,6 +525,7 @@ mod test {

let region_allocate = allocation_query(
volume_id,
None,
block_size,
blocks_per_extent,
extent_count,
Expand Down
Loading

0 comments on commit 5244907

Please sign in to comment.