From 748a1d7b46279de5b7ccc5f3c1a98bbb091df887 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 15 Jul 2024 16:55:48 -0700 Subject: [PATCH] [nexus] Expunge disk internal API, omdb commands (#5994) Provides an internal API to remove disks, and wires it into omdb. Additionally, expands omdb commands for visibility. - `omdb db physical-disks` can be used to view all "control plane physical disks". This is similar to, but distinct from, the `omdb db inventory physical-disks` command, as it reports control plane disks that have been adopted in the control plane. This command is necessary for identifying the UUID of the associated control plane object, which is not observable via inventory. - `omdb nexus sleds expunge-disk` can be used to expunge a physical disk from a sled. This relies on many prior patches to operate correctly, but with the combination of: #5987, #5965, #5931, #5952, #5601, #5599, we can observe the following behavior: expunging a disk leads to all "users" of that disk (zone filesystems, datasets, zone bundles, etc) being removed. I tested this PR on a4x2 using the following steps: ```bash # Boot a4x2, confirm the Nexus zone is running # From g0, zlogin oxz_switch $ omdb db sleds SERIAL IP ROLE POLICY STATE ID g2 [fd00:1122:3344:103::1]:12345 - in service active 29fede5f-37e4-4528-bcf2-f3ee94924894 g0 [fd00:1122:3344:101::1]:12345 scrimlet in service active 6a2c7019-d055-4256-8bad-042b97aa0e5e g1 [fd00:1122:3344:102::1]:12345 - in service active a611b43e-3995-4cd4-9603-89ca6aca3dc5 g3 [fd00:1122:3344:104::1]:12345 scrimlet in service active f62f2cfe-d17b-4bd6-ae64-57e8224d3672 # We'll plan on expunging a disk on g1, and observing the effects. $ export SLED_ID=a611b43e-3995-4cd4-9603-89ca6aca3dc5 $ export OMDB_SLED_AGENT_URL=http://[fd00:1122:3344:102::1]:12345 $ omdb sled-agent zones list "oxz_cockroachdb_b3fecda8-2eb8-4ff3-9cf6-90c94fba7c50" "oxz_crucible_19831c98-3137-4af4-a93d-fc1a17c138f2" "oxz_crucible_6adcb8ec-6c9e-4e8a-a8d4-bbf9ad44e2c4" "oxz_crucible_74b2f587-10ce-4131-97fd-9832c52c8a41" "oxz_crucible_9e422508-f4d5-4c24-8dde-0080c0916419" "oxz_crucible_a47e9625-d189-4001-877a-cc3aa5b1f3eb" "oxz_crucible_pantry_c3b4e3cb-3e23-4f5e-921b-04e4801924fd" "oxz_external_dns_7e669b6f-a3fe-47a9-addd-20e42c58b8bb" "oxz_internal_dns_1a45a6e8-5b03-4ab4-a3db-e83fb7767767" "oxz_ntp_209ad0d0-a5e7-4ab8-ac8f-e99902697b32" "oxz_oximeter_864efebb-790f-4b7a-8377-b2c82c87f5b8" $ omdb db physical-disks | grep $SLED_ID ID SERIAL VENDOR MODEL SLED_ID POLICY STATE 23524716-a331-4d57-aa71-8bd4dbc916f8 synthetic-serial-g1_0 synthetic-vendor synthetic-model-U2 a611b43e-3995-4cd4-9603-89ca6aca3dc5 in service active 3ca1812b-55e3-47ed-861f-f667f626c8a0 synthetic-serial-g1_3 synthetic-vendor synthetic-model-U2 a611b43e-3995-4cd4-9603-89ca6aca3dc5 in service active 40139afb-7076-45d9-84cf-b96eefe7acf8 synthetic-serial-g1_1 synthetic-vendor synthetic-model-U2 a611b43e-3995-4cd4-9603-89ca6aca3dc5 in service active 5c8e33dd-1230-4214-af78-9be892d9f421 synthetic-serial-g1_4 synthetic-vendor synthetic-model-U2 a611b43e-3995-4cd4-9603-89ca6aca3dc5 in service active 85780bbf-8e2d-481e-9013-34611572f191 synthetic-serial-g1_2 synthetic-vendor synthetic-model-U2 a611b43e-3995-4cd4-9603-89ca6aca3dc5 in service active # Let's expunge the "0th" disk here. $ omdb nexus sleds expunge-disk 23524716-a331-4d57-aa71-8bd4dbc916f8 -w $ omdb nexus blueprints regenerate -w $ omdb nexus blueprints show $NEW_BLUEPRINT_ID # Observe that the new blueprint for the sled expunges some zones -- minimally, # the Crucible zone -- and no longer lists the "g1_0" disk. This should also be # summarized in the blueprint metadata comment. $ omdb nexus blueprints target set $NEW_BLUEPRINT_ID enabled -w $ omdb sled-agent zones list zones: "oxz_crucible_19831c98-3137-4af4-a93d-fc1a17c138f2" "oxz_crucible_74b2f587-10ce-4131-97fd-9832c52c8a41" "oxz_crucible_9e422508-f4d5-4c24-8dde-0080c0916419" "oxz_crucible_a47e9625-d189-4001-877a-cc3aa5b1f3eb" "oxz_crucible_pantry_c3b4e3cb-3e23-4f5e-921b-04e4801924fd" "oxz_ntp_209ad0d0-a5e7-4ab8-ac8f-e99902697b32" "oxz_oximeter_864efebb-790f-4b7a-8377-b2c82c87f5b8" # As we can see, the expunged zones have been removed. # We can also access the sled agent logs from g1 to observe that the expected requests have been sent # to adjust the set of control plane disks and expunge the expected zones. ``` This is a major part of https://github.com/oxidecomputer/omicron/issues/4719 Fixes https://github.com/oxidecomputer/omicron/issues/5370 --- dev-tools/omdb/src/bin/omdb/db.rs | 94 ++++++++- dev-tools/omdb/src/bin/omdb/nexus.rs | 197 +++++++++++++++--- dev-tools/omdb/tests/usage_errors.out | 9 +- nexus/db-model/src/physical_disk.rs | 72 +++++++ .../src/db/datastore/physical_disk.rs | 5 +- nexus/internal-api/src/lib.rs | 17 +- nexus/src/app/sled.rs | 29 ++- nexus/src/external_api/http_entrypoints.rs | 2 +- nexus/src/internal_api/http_entrypoints.rs | 19 ++ nexus/types/src/deployment/planning_input.rs | 56 ++++- openapi/nexus-internal.json | 41 ++++ 11 files changed, 497 insertions(+), 44 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index f0f7be0b83..44b34b0220 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -62,6 +62,7 @@ use nexus_db_model::IpAttachState; use nexus_db_model::IpKind; use nexus_db_model::NetworkInterface; use nexus_db_model::NetworkInterfaceKind; +use nexus_db_model::PhysicalDisk; use nexus_db_model::Probe; use nexus_db_model::Project; use nexus_db_model::Region; @@ -96,7 +97,10 @@ use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::BlueprintZoneType; +use nexus_types::deployment::DiskFilter; use nexus_types::deployment::SledFilter; +use nexus_types::external_api::views::PhysicalDiskPolicy; +use nexus_types::external_api::views::PhysicalDiskState; use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledState; use nexus_types::identity::Resource; @@ -281,12 +285,14 @@ pub struct DbFetchOptions { enum DbCommands { /// Print information about the rack Rack(RackArgs), - /// Print information about disks + /// Print information about virtual disks Disks(DiskArgs), /// Print information about internal and external DNS Dns(DnsArgs), /// Print information about collected hardware/software inventory Inventory(InventoryArgs), + /// Print information about physical disks + PhysicalDisks(PhysicalDisksArgs), /// Save the current Reconfigurator inputs to a file ReconfiguratorSave(ReconfiguratorSaveArgs), /// Print information about regions @@ -407,8 +413,8 @@ enum InventoryCommands { Cabooses, /// list and show details from particular collections Collections(CollectionsArgs), - /// show all physical disks every found - PhysicalDisks(PhysicalDisksArgs), + /// show all physical disks ever found + PhysicalDisks(InvPhysicalDisksArgs), /// list all root of trust pages ever found RotPages, } @@ -437,7 +443,7 @@ struct CollectionsShowArgs { } #[derive(Debug, Args, Clone, Copy)] -struct PhysicalDisksArgs { +struct InvPhysicalDisksArgs { #[clap(long)] collection_id: Option, @@ -445,6 +451,13 @@ struct PhysicalDisksArgs { sled_id: Option, } +#[derive(Debug, Args)] +struct PhysicalDisksArgs { + /// Show disks that match the given filter + #[clap(short = 'F', long, value_enum)] + filter: Option, +} + #[derive(Debug, Args)] struct ReconfiguratorSaveArgs { /// where to save the output @@ -611,6 +624,15 @@ impl DbArgs { ) .await } + DbCommands::PhysicalDisks(args) => { + cmd_db_physical_disks( + &opctx, + &datastore, + &self.fetch_opts, + args, + ) + .await + } DbCommands::ReconfiguratorSave(reconfig_save_args) => { cmd_db_reconfigurator_save( &opctx, @@ -1385,6 +1407,68 @@ async fn cmd_db_disk_physical( Ok(()) } +#[derive(Tabled)] +#[tabled(rename_all = "SCREAMING_SNAKE_CASE")] +struct PhysicalDiskRow { + id: Uuid, + serial: String, + vendor: String, + model: String, + sled_id: Uuid, + policy: PhysicalDiskPolicy, + state: PhysicalDiskState, +} + +impl From for PhysicalDiskRow { + fn from(d: PhysicalDisk) -> Self { + PhysicalDiskRow { + id: d.id(), + serial: d.serial.clone(), + vendor: d.vendor.clone(), + model: d.model.clone(), + sled_id: d.sled_id, + policy: d.disk_policy.into(), + state: d.disk_state.into(), + } + } +} + +/// Run `omdb db physical-disks`. +async fn cmd_db_physical_disks( + opctx: &OpContext, + datastore: &DataStore, + fetch_opts: &DbFetchOptions, + args: &PhysicalDisksArgs, +) -> Result<(), anyhow::Error> { + let limit = fetch_opts.fetch_limit; + let filter = match args.filter { + Some(filter) => filter, + None => { + eprintln!( + "note: listing all in-service disks \ + (use -F to filter, e.g. -F in-service)" + ); + DiskFilter::InService + } + }; + + let sleds = datastore + .physical_disk_list(&opctx, &first_page(limit), filter) + .await + .context("listing physical disks")?; + check_limit(&sleds, limit, || String::from("listing physical disks")); + + let rows = sleds.into_iter().map(|s| PhysicalDiskRow::from(s)); + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(1, 1, 0, 0)) + .to_string(); + + println!("{}", table); + + Ok(()) +} + // SERVICES // Snapshots @@ -3187,7 +3271,7 @@ async fn cmd_db_inventory_cabooses( async fn cmd_db_inventory_physical_disks( conn: &DataStoreConnection<'_>, limit: NonZeroU32, - args: PhysicalDisksArgs, + args: InvPhysicalDisksArgs, ) -> Result<(), anyhow::Error> { #[derive(Tabled)] #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index fb74ddd89b..f699466505 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -24,6 +24,7 @@ use nexus_client::types::BackgroundTask; use nexus_client::types::BackgroundTasksActivateRequest; use nexus_client::types::CurrentStatus; use nexus_client::types::LastResult; +use nexus_client::types::PhysicalDiskPath; use nexus_client::types::SledSelector; use nexus_client::types::UninitializedSledId; use nexus_db_queries::db::lookup::LookupPath; @@ -33,6 +34,7 @@ use nexus_types::internal_api::background::RegionReplacementDriverStatus; use nexus_types::inventory::BaseboardId; use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; use reedline::DefaultPrompt; use reedline::DefaultPromptSegment; @@ -256,6 +258,8 @@ enum SledsCommands { Add(SledAddArgs), /// Expunge a sled (DANGEROUS) Expunge(SledExpungeArgs), + /// Expunge a disk (DANGEROUS) + ExpungeDisk(DiskExpungeArgs), } #[derive(Debug, Args)] @@ -277,6 +281,17 @@ struct SledExpungeArgs { sled_id: SledUuid, } +#[derive(Debug, Args)] +struct DiskExpungeArgs { + // expunge is _extremely_ dangerous, so we also require a database + // connection to perform some safety checks + #[clap(flatten)] + db_url_opts: DbUrlOptions, + + /// Physical disk ID + physical_disk_id: PhysicalDiskUuid, +} + impl NexusArgs { /// Run a `omdb nexus` subcommand. pub(crate) async fn run_cmd( @@ -401,6 +416,13 @@ impl NexusArgs { let token = omdb.check_allow_destructive()?; cmd_nexus_sled_expunge(&client, args, omdb, log, token).await } + NexusCommands::Sleds(SledsArgs { + command: SledsCommands::ExpungeDisk(args), + }) => { + let token = omdb.check_allow_destructive()?; + cmd_nexus_sled_expunge_disk(&client, args, omdb, log, token) + .await + } } } } @@ -1458,6 +1480,39 @@ async fn cmd_nexus_sled_add( Ok(()) } +struct ConfirmationPrompt(Reedline); + +impl ConfirmationPrompt { + fn new() -> Self { + Self(Reedline::create()) + } + + fn read(&mut self, message: &str) -> Result { + let prompt = DefaultPrompt::new( + DefaultPromptSegment::Basic(message.to_string()), + DefaultPromptSegment::Empty, + ); + if let Ok(reedline::Signal::Success(input)) = self.0.read_line(&prompt) + { + Ok(input) + } else { + bail!("expungement aborted") + } + } + + fn read_and_validate( + &mut self, + message: &str, + expected: &str, + ) -> Result<(), anyhow::Error> { + let input = self.read(message)?; + if input != expected { + bail!("Aborting, input did not match expected value"); + } + Ok(()) + } +} + /// Runs `omdb nexus sleds expunge` async fn cmd_nexus_sled_expunge( client: &nexus_client::Client, @@ -1487,20 +1542,7 @@ async fn cmd_nexus_sled_expunge( .with_context(|| format!("failed to find sled {}", args.sled_id))?; // Helper to get confirmation messages from the user. - let mut line_editor = Reedline::create(); - let mut read_with_prompt = move |message: &str| { - let prompt = DefaultPrompt::new( - DefaultPromptSegment::Basic(message.to_string()), - DefaultPromptSegment::Empty, - ); - if let Ok(reedline::Signal::Success(input)) = - line_editor.read_line(&prompt) - { - Ok(input) - } else { - bail!("expungement aborted") - } - }; + let mut prompt = ConfirmationPrompt::new(); // Now check whether its sled-agent or SP were found in the most recent // inventory collection. @@ -1530,11 +1572,7 @@ async fn cmd_nexus_sled_expunge( proceed anyway?", args.sled_id, collection.time_done, ); - let confirm = read_with_prompt("y/N")?; - if confirm != "y" { - eprintln!("expungement not confirmed: aborting"); - return Ok(()); - } + prompt.read_and_validate("y/N", "y")?; } } None => { @@ -1552,11 +1590,7 @@ async fn cmd_nexus_sled_expunge( args.sled_id, sled.serial_number(), ); - let confirm = read_with_prompt("sled serial number")?; - if confirm != sled.serial_number() { - eprintln!("sled serial number not confirmed: aborting"); - return Ok(()); - } + prompt.read_and_validate("sled serial number", sled.serial_number())?; let old_policy = client .sled_expunge(&SledSelector { sled: args.sled_id.into_untyped_uuid() }) @@ -1569,3 +1603,118 @@ async fn cmd_nexus_sled_expunge( ); Ok(()) } + +/// Runs `omdb nexus sleds expunge-disk` +async fn cmd_nexus_sled_expunge_disk( + client: &nexus_client::Client, + args: &DiskExpungeArgs, + omdb: &Omdb, + log: &slog::Logger, + _destruction_token: DestructiveOperationToken, +) -> Result<(), anyhow::Error> { + use nexus_db_queries::context::OpContext; + + let datastore = args.db_url_opts.connect(omdb, log).await?; + let opctx = OpContext::for_tests(log.clone(), datastore.clone()); + let opctx = &opctx; + + // First, we need to look up the disk so we can lookup identity information. + let (_authz_physical_disk, physical_disk) = + LookupPath::new(opctx, &datastore) + .physical_disk(args.physical_disk_id.into_untyped_uuid()) + .fetch() + .await + .with_context(|| { + format!( + "failed to find physical disk {}", + args.physical_disk_id + ) + })?; + + // Helper to get confirmation messages from the user. + let mut prompt = ConfirmationPrompt::new(); + + // Now check whether its sled-agent was found in the most recent + // inventory collection. + match datastore + .inventory_get_latest_collection(opctx) + .await + .context("loading latest collection")? + { + Some(collection) => { + let disk_identity = omicron_common::disk::DiskIdentity { + vendor: physical_disk.vendor.clone(), + serial: physical_disk.serial.clone(), + model: physical_disk.model.clone(), + }; + + let mut sleds_containing_disk = vec![]; + + for (sled_id, sled_agent) in collection.sled_agents { + for sled_disk in sled_agent.disks { + if sled_disk.identity == disk_identity { + sleds_containing_disk.push(sled_id); + } + } + } + + match sleds_containing_disk.len() { + 0 => {} + 1 => { + eprintln!( + "WARNING: physical disk {} is PRESENT in the most \ + recent inventory collection (spotted at {}). Although \ + expunging a running disk is supported, it is safer \ + to expunge a disk from a system where it has been \ + removed. Are you sure you want to proceed anyway?", + args.physical_disk_id, collection.time_done, + ); + prompt.read_and_validate("y/N", "y")?; + } + _ => { + // This should be impossible due to a unique database index, + // "vendor_serial_model_unique". + // + // Even if someone tried moving a disk, it would need to be + // decommissioned before being re-commissioned elsewhere. + // + // However, we still print out an error message here in the + // (unlikely) even that it happens anyway. + eprintln!( + "ERROR: physical disk {} is PRESENT MULTIPLE TIMES in \ + the most recent inventory collection (spotted at {}). + This should not be possible, and is an indication of a \ + database issue.", + args.physical_disk_id, collection.time_done, + ); + bail!("Physical Disk appeared on multiple sleds"); + } + } + } + None => { + eprintln!( + "ERROR: cannot verify the physical disk inventory status \ + because there are no inventory collections present. Please \ + ensure that inventory may be collected." + ); + bail!("No inventory"); + } + } + + eprintln!( + "WARNING: This operation will PERMANENTLY and IRRECOVABLY mark physical disk \ + {} ({}) expunged. To proceed, type the physical disk's serial number.", + args.physical_disk_id, + physical_disk.serial, + ); + prompt.read_and_validate("disk serial number", &physical_disk.serial)?; + + client + .physical_disk_expunge(&PhysicalDiskPath { + disk_id: args.physical_disk_id.into_untyped_uuid(), + }) + .await + .context("expunging disk")?; + eprintln!("expunged disk {}", args.physical_disk_id); + Ok(()) +} diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out index 8762907e81..3d6f2af112 100644 --- a/dev-tools/omdb/tests/usage_errors.out +++ b/dev-tools/omdb/tests/usage_errors.out @@ -105,9 +105,10 @@ Usage: omdb db [OPTIONS] Commands: rack Print information about the rack - disks Print information about disks + disks Print information about virtual disks dns Print information about internal and external DNS inventory Print information about collected hardware/software inventory + physical-disks Print information about physical disks reconfigurator-save Save the current Reconfigurator inputs to a file region Print information about regions region-replacement Query for information about region replacements, optionally manually @@ -146,9 +147,10 @@ Usage: omdb db [OPTIONS] Commands: rack Print information about the rack - disks Print information about disks + disks Print information about virtual disks dns Print information about internal and external DNS inventory Print information about collected hardware/software inventory + physical-disks Print information about physical disks reconfigurator-save Save the current Reconfigurator inputs to a file region Print information about regions region-replacement Query for information about region replacements, optionally manually @@ -185,7 +187,7 @@ termination: Exited(2) stdout: --------------------------------------------- stderr: -Print information about disks +Print information about virtual disks Usage: omdb db disks [OPTIONS] @@ -526,6 +528,7 @@ Commands: list-uninitialized List all uninitialized sleds add Add an uninitialized sled expunge Expunge a sled (DANGEROUS) + expunge-disk Expunge a disk (DANGEROUS) help Print this message or the help of the given subcommand(s) Options: diff --git a/nexus/db-model/src/physical_disk.rs b/nexus/db-model/src/physical_disk.rs index c6ef97ee1f..d4a1dcd33c 100644 --- a/nexus/db-model/src/physical_disk.rs +++ b/nexus/db-model/src/physical_disk.rs @@ -85,3 +85,75 @@ impl DatastoreCollectionConfig for PhysicalDisk { type CollectionTimeDeletedColumn = physical_disk::dsl::time_deleted; type CollectionIdColumn = zpool::dsl::sled_id; } + +mod diesel_util { + use diesel::{ + helper_types::{And, EqAny}, + prelude::*, + query_dsl::methods::FilterDsl, + }; + use nexus_types::{ + deployment::DiskFilter, + external_api::views::{PhysicalDiskPolicy, PhysicalDiskState}, + }; + + /// An extension trait to apply a [`DiskFilter`] to a Diesel expression. + /// + /// This is applicable to any Diesel expression which includes the `physical_disk` + /// table. + /// + /// This needs to live here, rather than in `nexus-db-queries`, because it + /// names the `DbPhysicalDiskPolicy` type which is private to this crate. + pub trait ApplyPhysicalDiskFilterExt { + type Output; + + /// Applies a [`DiskFilter`] to a Diesel expression. + fn physical_disk_filter(self, filter: DiskFilter) -> Self::Output; + } + + impl ApplyPhysicalDiskFilterExt for E + where + E: FilterDsl, + { + type Output = E::Output; + + fn physical_disk_filter(self, filter: DiskFilter) -> Self::Output { + use crate::schema::physical_disk::dsl as physical_disk_dsl; + + // These are only boxed for ease of reference above. + let all_matching_policies: BoxedIterator< + crate::PhysicalDiskPolicy, + > = Box::new( + PhysicalDiskPolicy::all_matching(filter).map(Into::into), + ); + let all_matching_states: BoxedIterator = + Box::new( + PhysicalDiskState::all_matching(filter).map(Into::into), + ); + + FilterDsl::filter( + self, + physical_disk_dsl::disk_policy + .eq_any(all_matching_policies) + .and( + physical_disk_dsl::disk_state + .eq_any(all_matching_states), + ), + ) + } + } + + type BoxedIterator = Box>; + type PhysicalDiskFilterQuery = And< + EqAny< + crate::schema::physical_disk::disk_policy, + BoxedIterator, + >, + EqAny< + crate::schema::physical_disk::disk_state, + BoxedIterator, + >, + >; +} + +pub use diesel_util::ApplyPhysicalDiskFilterExt; diff --git a/nexus/db-queries/src/db/datastore/physical_disk.rs b/nexus/db-queries/src/db/datastore/physical_disk.rs index e51d59075e..11e056d19b 100644 --- a/nexus/db-queries/src/db/datastore/physical_disk.rs +++ b/nexus/db-queries/src/db/datastore/physical_disk.rs @@ -26,7 +26,8 @@ use crate::transaction_retry::OptionalError; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; -use nexus_types::deployment::SledFilter; +use nexus_db_model::ApplyPhysicalDiskFilterExt; +use nexus_types::deployment::{DiskFilter, SledFilter}; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::DeleteResult; @@ -247,11 +248,13 @@ impl DataStore { &self, opctx: &OpContext, pagparams: &DataPageParams<'_, Uuid>, + disk_filter: DiskFilter, ) -> ListResultVec { opctx.authorize(authz::Action::Read, &authz::FLEET).await?; use db::schema::physical_disk::dsl; paginated(dsl::physical_disk, dsl::id, pagparams) .filter(dsl::time_deleted.is_null()) + .physical_disk_filter(disk_filter) .select(PhysicalDisk::as_select()) .load_async(&*self.pool_connection_authorized(opctx).await?) .await diff --git a/nexus/internal-api/src/lib.rs b/nexus/internal-api/src/lib.rs index b2d68036bb..b6de85486a 100644 --- a/nexus/internal-api/src/lib.rs +++ b/nexus/internal-api/src/lib.rs @@ -14,7 +14,7 @@ use nexus_types::{ Blueprint, BlueprintMetadata, BlueprintTarget, BlueprintTargetSet, }, external_api::{ - params::{SledSelector, UninitializedSledId}, + params::{PhysicalDiskPath, SledSelector, UninitializedSledId}, shared::{ProbeInfo, UninitializedSled}, views::SledPolicy, }, @@ -472,6 +472,21 @@ pub trait NexusInternalApi { sled: TypedBody, ) -> Result, HttpError>; + /// Mark a physical disk as expunged + /// + /// This is an irreversible process! It should only be called after + /// sufficient warning to the operator. + /// + /// This is idempotent. + #[endpoint { + method = POST, + path = "/physical-disk/expunge", + }] + async fn physical_disk_expunge( + rqctx: RequestContext, + disk: TypedBody, + ) -> Result; + /// Get all the probes associated with a given sled. #[endpoint { method = GET, diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index fd5341ae80..0165b2d261 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -13,7 +13,9 @@ use nexus_db_queries::context::OpContext; use nexus_db_queries::db; use nexus_db_queries::db::lookup; use nexus_db_queries::db::model::DatasetKind; +use nexus_types::deployment::DiskFilter; use nexus_types::deployment::SledFilter; +use nexus_types::external_api::views::PhysicalDiskPolicy; use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledProvisionPolicy; use omicron_common::api::external::DataPageParams; @@ -186,7 +188,7 @@ impl super::Nexus { // Physical disks - pub async fn physical_disk_lookup<'a>( + pub fn physical_disk_lookup<'a>( &'a self, opctx: &'a OpContext, disk_selector: ¶ms::PhysicalDiskPath, @@ -211,7 +213,9 @@ impl super::Nexus { opctx: &OpContext, pagparams: &DataPageParams<'_, Uuid>, ) -> ListResultVec { - self.db_datastore.physical_disk_list(&opctx, pagparams).await + self.db_datastore + .physical_disk_list(&opctx, pagparams, DiskFilter::InService) + .await } /// Upserts a physical disk into the database, updating it if it already exists. @@ -240,6 +244,27 @@ impl super::Nexus { Ok(()) } + /// Mark a physical disk as expunged + /// + /// This is an irreversible process! It should only be called after + /// sufficient warning to the operator. + pub(crate) async fn physical_disk_expunge( + &self, + opctx: &OpContext, + disk: params::PhysicalDiskPath, + ) -> Result<(), Error> { + let physical_disk_lookup = self.physical_disk_lookup(opctx, &disk)?; + let (authz_disk,) = + physical_disk_lookup.lookup_for(authz::Action::Modify).await?; + self.db_datastore + .physical_disk_update_policy( + opctx, + authz_disk.id(), + PhysicalDiskPolicy::Expunged.into(), + ) + .await + } + // Zpools (contained within sleds) /// Upserts a Zpool into the database, updating it if it already exists. diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index d23f0d035a..9d616c7e9c 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -6142,7 +6142,7 @@ async fn physical_disk_view( let opctx = crate::context::op_context_for_external_api(&rqctx).await?; let (.., physical_disk) = - nexus.physical_disk_lookup(&opctx, &path).await?.fetch().await?; + nexus.physical_disk_lookup(&opctx, &path)?.fetch().await?; Ok(HttpResponseOk(physical_disk.into())) }; apictx diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index f324ea787d..28ff712c24 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -24,6 +24,7 @@ use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintMetadata; use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::BlueprintTargetSet; +use nexus_types::external_api::params::PhysicalDiskPath; use nexus_types::external_api::params::SledSelector; use nexus_types::external_api::params::UninitializedSledId; use nexus_types::external_api::shared::ProbeInfo; @@ -827,6 +828,24 @@ impl NexusInternalApi for NexusInternalApiImpl { .await } + async fn physical_disk_expunge( + rqctx: RequestContext, + disk: TypedBody, + ) -> Result { + let apictx = &rqctx.context().context; + let nexus = &apictx.nexus; + let handler = async { + let opctx = + crate::context::op_context_for_internal_api(&rqctx).await; + nexus.physical_disk_expunge(&opctx, disk.into_inner()).await?; + Ok(HttpResponseUpdatedNoContent()) + }; + apictx + .internal_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + async fn probes_get( rqctx: RequestContext, path_params: Path, diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs index 8a230469d5..a8f3989da4 100644 --- a/nexus/types/src/deployment/planning_input.rs +++ b/nexus/types/src/deployment/planning_input.rs @@ -340,7 +340,7 @@ impl SledDisk { } /// Filters that apply to disks. -#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, ValueEnum)] pub enum DiskFilter { /// All disks All, @@ -355,16 +355,58 @@ impl DiskFilter { policy: PhysicalDiskPolicy, state: PhysicalDiskState, ) -> bool { + policy.matches(self) && state.matches(self) + } +} + +impl PhysicalDiskPolicy { + /// Returns true if self matches the filter + pub fn matches(self, filter: DiskFilter) -> bool { match self { - DiskFilter::All => true, - DiskFilter::InService => match (policy, state) { - (PhysicalDiskPolicy::InService, PhysicalDiskState::Active) => { - true - } - _ => false, + PhysicalDiskPolicy::InService => match filter { + DiskFilter::All => true, + DiskFilter::InService => true, + }, + PhysicalDiskPolicy::Expunged => match filter { + DiskFilter::All => true, + DiskFilter::InService => false, }, } } + + /// Returns all policies matching the given filter. + /// + /// This is meant for database access, and is generally paired with + /// [`PhysicalDiskState::all_matching`]. See `ApplyPhysicalDiskFilterExt` in + /// nexus-db-model. + pub fn all_matching(filter: DiskFilter) -> impl Iterator { + Self::iter().filter(move |state| state.matches(filter)) + } +} + +impl PhysicalDiskState { + /// Returns true if self matches the filter + pub fn matches(self, filter: DiskFilter) -> bool { + match self { + PhysicalDiskState::Active => match filter { + DiskFilter::All => true, + DiskFilter::InService => true, + }, + PhysicalDiskState::Decommissioned => match filter { + DiskFilter::All => true, + DiskFilter::InService => false, + }, + } + } + + /// Returns all state matching the given filter. + /// + /// This is meant for database access, and is generally paired with + /// [`PhysicalDiskPolicy::all_matching`]. See `ApplyPhysicalDiskFilterExt` in + /// nexus-db-model. + pub fn all_matching(filter: DiskFilter) -> impl Iterator { + Self::iter().filter(move |state| state.matches(filter)) + } } /// Filters that apply to zpools. diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 27430c7599..c5fc2c3b56 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -909,6 +909,34 @@ } } }, + "/physical-disk/expunge": { + "post": { + "summary": "Mark a physical disk as expunged", + "description": "This is an irreversible process! It should only be called after sufficient warning to the operator.\nThis is idempotent.", + "operationId": "physical_disk_expunge", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PhysicalDiskPath" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/probes/{sled}": { "get": { "summary": "Get all the probes associated with a given sled.", @@ -3794,6 +3822,19 @@ "u2" ] }, + "PhysicalDiskPath": { + "type": "object", + "properties": { + "disk_id": { + "description": "ID of the physical disk", + "type": "string", + "format": "uuid" + } + }, + "required": [ + "disk_id" + ] + }, "PhysicalDiskPutRequest": { "type": "object", "properties": {