Skip to content

Commit

Permalink
Implement record based Crucible reference counting
Browse files Browse the repository at this point in the history
Crucible volumes are created by layering read-write regions over a
hierarchy of read-only resources. Originally only a region snapshot
could be used as a read-only resource for a volume. With the
introduction of read-only regions (created during the region snapshot
replacement process) this is no longer true!

Read-only resources can be used by many volumes, and because of this
they need to have a reference count so they can be deleted when they're
not referenced anymore. The region_snapshot table uses a
`volume_references` column, which counts how many uses there are. The
region table does not have this column, and more over a simple integer
works for reference counting but does not tell you _what_ volume that
use is from. This can be determined (see omdb's validate volume
references command) but it's information that is tossed out, as Nexus
knows what volumes use what resources! Instead, record what read-only
resources a volume uses in a new table.

As part of the schema change to add the new `volume_resource_usage`
table, a migration is included that will create the appropriate records
for all region snapshots.

In testing, a few bugs were found: the worst being that read-only
regions did not have their read_only column set to true. This would be a
problem if read-only regions are created, but they're currently only
created during region snapshot replacement. To detect if any of these
regions were created, find all regions that were allocated for a
snapshot volume:

    SELECT id FROM region
    WHERE volume_id IN (SELECT volume_id FROM snapshot);

A similar bug was found in the simulated Crucible agent.

This commit also reverts oxidecomputer#6728, enabling region snapshot replacement
again - it was disabled due to a lack of read-only region reference
counting, so it can be enabled once again.
  • Loading branch information
jmpesp committed Oct 9, 2024
1 parent ba21231 commit ae4282a
Show file tree
Hide file tree
Showing 27 changed files with 3,678 additions and 512 deletions.
2 changes: 2 additions & 0 deletions nexus/db-model/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ mod vmm;
mod vni;
mod volume;
mod volume_repair;
mod volume_resource_usage;
mod vpc;
mod vpc_firewall_rule;
mod vpc_route;
Expand Down Expand Up @@ -215,6 +216,7 @@ pub use vmm_state::*;
pub use vni::*;
pub use volume::*;
pub use volume_repair::*;
pub use volume_resource_usage::*;
pub use vpc::*;
pub use vpc_firewall_rule::*;
pub use vpc_route::*;
Expand Down
16 changes: 16 additions & 0 deletions nexus/db-model/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1967,3 +1967,19 @@ joinable!(instance_ssh_key -> instance (instance_id));
allow_tables_to_appear_in_same_query!(sled, sled_instance);

joinable!(network_interface -> probe (parent_id));

table! {
volume_resource_usage (usage_id) {
usage_id -> Uuid,

volume_id -> Uuid,

usage_type -> crate::VolumeResourceUsageTypeEnum,

region_id -> Nullable<Uuid>,

region_snapshot_dataset_id -> Nullable<Uuid>,
region_snapshot_region_id -> Nullable<Uuid>,
region_snapshot_snapshot_id -> Nullable<Uuid>,
}
}
3 changes: 2 additions & 1 deletion nexus/db-model/src/schema_versions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use std::collections::BTreeMap;
///
/// This must be updated when you change the database schema. Refer to
/// schema/crdb/README.adoc in the root of this repository for details.
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(107, 0, 0);
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(108, 0, 0);

/// List of all past database schema versions, in *reverse* order
///
Expand All @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy<Vec<KnownVersion>> = Lazy::new(|| {
// | leaving the first copy as an example for the next person.
// v
// KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"),
KnownVersion::new(108, "crucible-ref-count-records"),
KnownVersion::new(107, "add-instance-boot-disk"),
KnownVersion::new(106, "dataset-kinds-update"),
KnownVersion::new(105, "inventory-nvme-firmware"),
Expand Down
132 changes: 132 additions & 0 deletions nexus/db-model/src/volume_resource_usage.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

use super::impl_enum_type;
use crate::schema::volume_resource_usage;
use uuid::Uuid;

impl_enum_type!(
#[derive(SqlType, Debug, QueryId)]
#[diesel(
postgres_type(name = "volume_resource_usage_type", schema = "public")
)]
pub struct VolumeResourceUsageTypeEnum;

#[derive(Copy, Clone, Debug, AsExpression, FromSqlRow, PartialEq, Eq, Hash)]
#[diesel(sql_type = VolumeResourceUsageTypeEnum)]
pub enum VolumeResourceUsageType;

ReadOnlyRegion => b"read_only_region"
RegionSnapshot => b"region_snapshot"
);

/// Crucible volumes are created by layering read-write regions over a hierarchy
/// of read-only resources. Originally only a region snapshot could be used as a
/// read-only resource for a volume. With the introduction of read-only regions
/// (created during the region snapshot replacement process) this is no longer
/// true.
///
/// Read-only resources can be used by many volumes, and because of this they
/// need to have a reference count so they can be deleted when they're not
/// referenced anymore. The region_snapshot table uses a `volume_references`
/// column, which counts how many uses there are. The region table does not have
/// this column, and more over a simple integer works for reference counting but
/// does not tell you _what_ volume that use is from. This can be determined
/// (see omdb's validate volume references command) but it's information that is
/// tossed out, as Nexus knows what volumes use what resources! Instead, record
/// what read-only resources a volume uses here.
///
/// Note: users should not use this object directly, and instead use the
/// [`VolumeResourceUsage`] enum, which is type-safe and will convert to and
/// from a [`VolumeResourceUsageRecord`] when interacting with the DB.
#[derive(
Queryable, Insertable, Debug, Clone, Selectable, PartialEq, Eq, Hash,
)]
#[diesel(table_name = volume_resource_usage)]
pub struct VolumeResourceUsageRecord {
pub usage_id: Uuid,

pub volume_id: Uuid,

pub usage_type: VolumeResourceUsageType,

pub region_id: Option<Uuid>,

pub region_snapshot_dataset_id: Option<Uuid>,
pub region_snapshot_region_id: Option<Uuid>,
pub region_snapshot_snapshot_id: Option<Uuid>,
}

#[derive(Debug, Clone)]
pub enum VolumeResourceUsage {
ReadOnlyRegion { region_id: Uuid },

RegionSnapshot { dataset_id: Uuid, region_id: Uuid, snapshot_id: Uuid },
}

impl VolumeResourceUsageRecord {
pub fn new(volume_id: Uuid, usage: VolumeResourceUsage) -> Self {
match usage {
VolumeResourceUsage::ReadOnlyRegion { region_id } => {
VolumeResourceUsageRecord {
usage_id: Uuid::new_v4(),
volume_id,
usage_type: VolumeResourceUsageType::ReadOnlyRegion,

region_id: Some(region_id),

region_snapshot_dataset_id: None,
region_snapshot_region_id: None,
region_snapshot_snapshot_id: None,
}
}

VolumeResourceUsage::RegionSnapshot {
dataset_id,
region_id,
snapshot_id,
} => VolumeResourceUsageRecord {
usage_id: Uuid::new_v4(),
volume_id,
usage_type: VolumeResourceUsageType::RegionSnapshot,

region_id: None,

region_snapshot_dataset_id: Some(dataset_id),
region_snapshot_region_id: Some(region_id),
region_snapshot_snapshot_id: Some(snapshot_id),
},
}
}
}

impl From<VolumeResourceUsageRecord> for VolumeResourceUsage {
fn from(record: VolumeResourceUsageRecord) -> VolumeResourceUsage {
match record.usage_type {
VolumeResourceUsageType::ReadOnlyRegion => {
VolumeResourceUsage::ReadOnlyRegion {
region_id: record
.region_id
.expect("valid read-only region usage record"),
}
}

VolumeResourceUsageType::RegionSnapshot => {
VolumeResourceUsage::RegionSnapshot {
dataset_id: record
.region_snapshot_dataset_id
.expect("valid region snapshot usage record"),

region_id: record
.region_snapshot_region_id
.expect("valid region snapshot usage record"),

snapshot_id: record
.region_snapshot_snapshot_id
.expect("valid region snapshot usage record"),
}
}
}
}
}
4 changes: 2 additions & 2 deletions nexus/db-queries/src/db/datastore/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -793,7 +793,7 @@ mod test {
}

#[derive(Debug)]
struct TestDatasets {
pub(crate) struct TestDatasets {
// eligible and ineligible aren't currently used, but are probably handy
// for the future.
#[allow(dead_code)]
Expand All @@ -810,7 +810,7 @@ mod test {
type SledToDatasetMap = HashMap<SledUuid, Vec<Uuid>>;

impl TestDatasets {
async fn create(
pub(crate) async fn create(
opctx: &OpContext,
datastore: Arc<DataStore>,
num_eligible_sleds: usize,
Expand Down
2 changes: 1 addition & 1 deletion nexus/db-queries/src/db/datastore/region.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ impl DataStore {
block_size,
blocks_per_extent,
extent_count,
read_only: false,
read_only: maybe_snapshot_id.is_some(),
},
allocation_strategy,
num_regions_required,
Expand Down
Loading

0 comments on commit ae4282a

Please sign in to comment.