From 25ddbcec609f8fc703a1ce015b3c15748d4edcef Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Wed, 27 Sep 2023 17:40:46 -0700 Subject: [PATCH] [db] omdb db disk for physical disks (#4141) New omdb command and database schema updates This bumps the database version to 5.0.0 This adds a new subcommand: `physical ` to `omdb db disks`. This will show the disk resources that reside on a physical disk. This change requires additional indexes in the zpool and dataset tables as we lookup which zpool is on a physical disk, and we lookup which datasets are on a given zpool. The usage here is just for the omdb tool, but those same searches will need to happen when Omicron needs to figure out what resources are on a physical disk. Tested both initial install, and schema update --------- Co-authored-by: Alan Hanson --- dev-tools/omdb/src/bin/omdb/db.rs | 163 +++++++++++++++++++++++++++++ dev-tools/omdb/tests/env.out | 6 +- dev-tools/omdb/tests/successes.out | 12 +-- nexus/db-model/src/schema.rs | 2 +- schema/crdb/5.0.0/up1.sql | 1 + schema/crdb/5.0.0/up2.sql | 1 + schema/crdb/dbinit.sql | 14 ++- 7 files changed, 188 insertions(+), 11 deletions(-) create mode 100644 schema/crdb/5.0.0/up1.sql create mode 100644 schema/crdb/5.0.0/up2.sql diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 268e74a622..42f4d53730 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -24,13 +24,16 @@ use clap::ValueEnum; use diesel::expression::SelectableHelper; use diesel::query_dsl::QueryDsl; use diesel::ExpressionMethods; +use nexus_db_model::Dataset; use nexus_db_model::Disk; use nexus_db_model::DnsGroup; use nexus_db_model::DnsName; use nexus_db_model::DnsVersion; use nexus_db_model::DnsZone; use nexus_db_model::Instance; +use nexus_db_model::Region; use nexus_db_model::Sled; +use nexus_db_model::Zpool; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; use nexus_db_queries::db::identity::Asset; @@ -45,6 +48,7 @@ use omicron_common::api::external::Generation; use omicron_common::postgres_config::PostgresConfigWithUrl; use std::cmp::Ordering; use std::collections::BTreeMap; +use std::collections::HashSet; use std::fmt::Display; use std::num::NonZeroU32; use std::sync::Arc; @@ -96,6 +100,8 @@ enum DiskCommands { Info(DiskInfoArgs), /// Summarize current disks List, + /// Determine what crucible resources are on the given physical disk. + Physical(DiskPhysicalArgs), } #[derive(Debug, Args)] @@ -104,6 +110,12 @@ struct DiskInfoArgs { uuid: Uuid, } +#[derive(Debug, Args)] +struct DiskPhysicalArgs { + /// The UUID of the physical disk + uuid: Uuid, +} + #[derive(Debug, Args)] struct DnsArgs { #[command(subcommand)] @@ -214,6 +226,12 @@ impl DbArgs { DbCommands::Disks(DiskArgs { command: DiskCommands::List }) => { cmd_db_disk_list(&datastore, self.fetch_limit).await } + DbCommands::Disks(DiskArgs { + command: DiskCommands::Physical(uuid), + }) => { + cmd_db_disk_physical(&opctx, &datastore, self.fetch_limit, uuid) + .await + } DbCommands::Dns(DnsArgs { command: DnsCommands::Show }) => { cmd_db_dns_show(&opctx, &datastore, self.fetch_limit).await } @@ -509,6 +527,151 @@ async fn cmd_db_disk_info( Ok(()) } +/// Run `omdb db disk physical `. +async fn cmd_db_disk_physical( + opctx: &OpContext, + datastore: &DataStore, + limit: NonZeroU32, + args: &DiskPhysicalArgs, +) -> Result<(), anyhow::Error> { + // We start by finding any zpools that are using the physical disk. + use db::schema::zpool::dsl as zpool_dsl; + let zpools = zpool_dsl::zpool + .filter(zpool_dsl::time_deleted.is_null()) + .filter(zpool_dsl::physical_disk_id.eq(args.uuid)) + .select(Zpool::as_select()) + .load_async(datastore.pool_for_tests().await?) + .await + .context("loading zpool from pysical disk id")?; + + let mut sled_ids = HashSet::new(); + let mut dataset_ids = HashSet::new(); + + // The current plan is a single zpool per physical disk, so we expect that + // this will have a single item. However, If single zpool per disk ever + // changes, this code will still work. + for zp in zpools { + // zpool has the sled id, record that so we can find the serial number. + sled_ids.insert(zp.sled_id); + + // Next, we find all the datasets that are on our zpool. + use db::schema::dataset::dsl as dataset_dsl; + let datasets = dataset_dsl::dataset + .filter(dataset_dsl::time_deleted.is_null()) + .filter(dataset_dsl::pool_id.eq(zp.id())) + .select(Dataset::as_select()) + .load_async(datastore.pool_for_tests().await?) + .await + .context("loading dataset")?; + + // Add all the datasets ids that are using this pool. + for ds in datasets { + dataset_ids.insert(ds.id()); + } + } + + // If we do have more than one sled ID, then something is wrong, but + // go ahead and print out whatever we have found. + for sid in sled_ids { + let (_, my_sled) = LookupPath::new(opctx, datastore) + .sled_id(sid) + .fetch() + .await + .context("failed to look up sled")?; + + println!( + "Physical disk: {} found on sled: {}", + args.uuid, + my_sled.serial_number() + ); + } + + let mut volume_ids = HashSet::new(); + // Now, take the list of datasets we found and search all the regions + // to see if any of them are on the dataset. If we find a region that + // is on one of our datasets, then record the volume ID of that region. + for did in dataset_ids.clone().into_iter() { + use db::schema::region::dsl as region_dsl; + let regions = region_dsl::region + .filter(region_dsl::dataset_id.eq(did)) + .select(Region::as_select()) + .load_async(datastore.pool_for_tests().await?) + .await + .context("loading region")?; + + for rs in regions { + volume_ids.insert(rs.volume_id()); + } + } + + // At this point, we have a list of volume IDs that contain a region + // that is part of a dataset on a pool on our disk. The final step is + // to find the virtual disks associated with these volume IDs and + // display information about those disks. + use db::schema::disk::dsl; + let disks = dsl::disk + .filter(dsl::time_deleted.is_null()) + .filter(dsl::volume_id.eq_any(volume_ids)) + .limit(i64::from(u32::from(limit))) + .select(Disk::as_select()) + .load_async(datastore.pool_for_tests().await?) + .await + .context("loading disks")?; + + check_limit(&disks, limit, || "listing disks".to_string()); + + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct DiskRow { + name: String, + id: String, + state: String, + instance_name: String, + } + + let mut rows = Vec::new(); + + for disk in disks { + // If the disk is attached to an instance, determine the name of the + // instance. + let instance_name = + if let Some(instance_uuid) = disk.runtime().attach_instance_id { + // Get the instance this disk is attached to + use db::schema::instance::dsl as instance_dsl; + let instance = instance_dsl::instance + .filter(instance_dsl::id.eq(instance_uuid)) + .limit(1) + .select(Instance::as_select()) + .load_async(datastore.pool_for_tests().await?) + .await + .context("loading requested instance")?; + + if let Some(instance) = instance.into_iter().next() { + instance.name().to_string() + } else { + "???".to_string() + } + } else { + "-".to_string() + }; + + rows.push(DiskRow { + name: disk.name().to_string(), + id: disk.id().to_string(), + state: disk.runtime().disk_state, + instance_name: instance_name, + }); + } + + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + + println!("{}", table); + Ok(()) +} + // SERVICES #[derive(Tabled)] diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index e7a50da935..eb4cd0d32d 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -7,7 +7,7 @@ sim-b6d65341 [::1]:REDACTED_PORT - REDACTED_UUID_REDACTED_UUID_REDACTED --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected (4.0.0) +note: database schema version matches expected (5.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "--db-url", "junk", "sleds"] termination: Exited(2) @@ -172,7 +172,7 @@ stderr: note: database URL not specified. Will search DNS. note: (override with --db-url or OMDB_DB_URL) note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected (4.0.0) +note: database schema version matches expected (5.0.0) ============================================= EXECUTING COMMAND: omdb ["--dns-server", "[::1]:REDACTED_PORT", "db", "sleds"] termination: Exited(0) @@ -185,5 +185,5 @@ stderr: note: database URL not specified. Will search DNS. note: (override with --db-url or OMDB_DB_URL) note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected (4.0.0) +note: database schema version matches expected (5.0.0) ============================================= diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 7532e9b61e..b1464cb824 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -8,7 +8,7 @@ external oxide-dev.test 2 create silo: "tes --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected (4.0.0) +note: database schema version matches expected (5.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "dns", "diff", "external", "2"] termination: Exited(0) @@ -24,7 +24,7 @@ changes: names added: 1, names removed: 0 --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected (4.0.0) +note: database schema version matches expected (5.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "dns", "names", "external", "2"] termination: Exited(0) @@ -36,7 +36,7 @@ External zone: oxide-dev.test --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected (4.0.0) +note: database schema version matches expected (5.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "services", "list-instances"] termination: Exited(0) @@ -52,7 +52,7 @@ Nexus REDACTED_UUID_REDACTED_UUID_REDACTED [::ffff:127.0.0.1]:REDACTED_ --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected (4.0.0) +note: database schema version matches expected (5.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "services", "list-by-sled"] termination: Exited(0) @@ -71,7 +71,7 @@ sled: sim-b6d65341 (id REDACTED_UUID_REDACTED_UUID_REDACTED) --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected (4.0.0) +note: database schema version matches expected (5.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "sleds"] termination: Exited(0) @@ -82,7 +82,7 @@ sim-b6d65341 [::1]:REDACTED_PORT - REDACTED_UUID_REDACTED_UUID_REDACTED --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected (4.0.0) +note: database schema version matches expected (5.0.0) ============================================= EXECUTING COMMAND: omdb ["nexus", "background-tasks", "doc"] termination: Exited(0) diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 3fde9ee715..94a770e2ca 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1130,7 +1130,7 @@ table! { /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(4, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(5, 0, 0); allow_tables_to_appear_in_same_query!( system_update, diff --git a/schema/crdb/5.0.0/up1.sql b/schema/crdb/5.0.0/up1.sql new file mode 100644 index 0000000000..0976070947 --- /dev/null +++ b/schema/crdb/5.0.0/up1.sql @@ -0,0 +1 @@ +CREATE INDEX IF NOT EXISTS lookup_zpool_by_disk ON omicron.public.zpool (physical_disk_id, id) WHERE physical_disk_id IS NOT NULL AND time_deleted IS NULL; diff --git a/schema/crdb/5.0.0/up2.sql b/schema/crdb/5.0.0/up2.sql new file mode 100644 index 0000000000..c209dab96b --- /dev/null +++ b/schema/crdb/5.0.0/up2.sql @@ -0,0 +1 @@ +CREATE INDEX IF NOT EXISTS lookup_dataset_by_zpool ON omicron.public.dataset (pool_id, id) WHERE pool_id IS NOT NULL AND time_deleted IS NULL; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 4b38b7dfe4..ad09092f8f 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -388,6 +388,12 @@ CREATE TABLE IF NOT EXISTS omicron.public.zpool ( total_size INT NOT NULL ); +/* Create an index on the physical disk id */ +CREATE INDEX IF NOT EXISTS lookup_zpool_by_disk on omicron.public.zpool ( + physical_disk_id, + id +) WHERE physical_disk_id IS NOT NULL AND time_deleted IS NULL; + CREATE TYPE IF NOT EXISTS omicron.public.dataset_kind AS ENUM ( 'crucible', 'cockroach', @@ -437,6 +443,12 @@ CREATE INDEX IF NOT EXISTS lookup_dataset_by_size_used on omicron.public.dataset size_used ) WHERE size_used IS NOT NULL AND time_deleted IS NULL; +/* Create an index on the zpool id */ +CREATE INDEX IF NOT EXISTS lookup_dataset_by_zpool on omicron.public.dataset ( + pool_id, + id +) WHERE pool_id IS NOT NULL AND time_deleted IS NULL; + /* * A region of space allocated to Crucible Downstairs, within a dataset. */ @@ -2562,7 +2574,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '4.0.0', NULL) + ( TRUE, NOW(), NOW(), '5.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT;