From ee96a64ec5efb6f5c51f4e641424d6bd2f81d662 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Fri, 8 Mar 2024 16:59:51 -0500 Subject: [PATCH 1/9] nexus internal API: add sled-expunge --- nexus/src/app/sled.rs | 18 ++++ nexus/src/internal_api/http_entrypoints.rs | 28 ++++++ nexus/types/src/external_api/params.rs | 1 + openapi/nexus-internal.json | 111 +++++++++++++++++++++ 4 files changed, 158 insertions(+) diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index 88f70c7d0d..0ba89e5c8d 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -14,6 +14,7 @@ use nexus_db_queries::db; use nexus_db_queries::db::lookup; use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::model::DatasetKind; +use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledProvisionPolicy; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; @@ -74,6 +75,23 @@ impl super::Nexus { Ok(()) } + /// Mark a sled as expunged + /// + /// This is an irreversible process! It should only be called after + /// sufficient warning to the operator. + /// + /// This is idempotent, and it returns the old policy of the sled. + pub(crate) async fn sled_expunge( + &self, + opctx: &OpContext, + sled_id: Uuid, + ) -> Result { + let sled_lookup = self.sled_lookup(opctx, &sled_id)?; + let (authz_sled,) = + sled_lookup.lookup_for(authz::Action::Modify).await?; + self.db_datastore.sled_set_policy_to_expunged(opctx, &authz_sled).await + } + pub(crate) async fn sled_request_firewall_rules( &self, opctx: &OpContext, diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index e298935fee..ed49ab56fd 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -29,8 +29,10 @@ use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintMetadata; use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::BlueprintTargetSet; +use nexus_types::external_api::params::SledSelector; use nexus_types::external_api::params::UninitializedSledId; use nexus_types::external_api::shared::UninitializedSled; +use nexus_types::external_api::views::SledPolicy; use nexus_types::internal_api::params::SwitchPutRequest; use nexus_types::internal_api::params::SwitchPutResponse; use nexus_types::internal_api::views::to_list; @@ -93,6 +95,7 @@ pub(crate) fn internal_api() -> NexusApiDescription { api.register(sled_list_uninitialized)?; api.register(sled_add)?; + api.register(sled_expunge)?; Ok(()) } @@ -864,3 +867,28 @@ async fn sled_add( }; apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await } + +/// Mark a sled as expunged +/// +/// This is an irreversible process! It should only be called after +/// sufficient warning to the operator. +/// +/// This is idempotent, and it returns the old policy of the sled. +#[endpoint { + method = POST, + path = "/sleds/expunge", +}] +async fn sled_expunge( + rqctx: RequestContext>, + sled: TypedBody, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let nexus = &apictx.nexus; + let handler = async { + let opctx = crate::context::op_context_for_internal_api(&rqctx).await; + let previous_policy = + nexus.sled_expunge(&opctx, sled.into_inner().sled).await?; + Ok(HttpResponseOk(previous_policy)) + }; + apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await +} diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 31cb1d3e5c..49dfc8137c 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -91,6 +91,7 @@ id_path_param!(SwitchPath, switch_id, "switch"); // Internal API parameters id_path_param!(BlueprintPath, blueprint_id, "blueprint"); +#[derive(Clone, Copy, Debug, Serialize, Deserialize, JsonSchema, PartialEq)] pub struct SledSelector { /// ID of the sled pub sled: Uuid, diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index fb3ff976ae..dc4c9b0e86 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -978,6 +978,41 @@ } } }, + "/sleds/expunge": { + "post": { + "summary": "Mark a sled as expunged", + "description": "This is an irreversible process! It should only be called after sufficient warning to the operator.\nThis is idempotent, and it returns the old policy of the sled.", + "operationId": "sled_expunge", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SledSelector" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SledPolicy" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/sleds/uninitialized": { "get": { "summary": "List uninitialized sleds", @@ -6529,6 +6564,69 @@ "vmm_state" ] }, + "SledPolicy": { + "description": "The operator-defined policy of a sled.", + "oneOf": [ + { + "description": "The operator has indicated that the sled is in-service.", + "type": "object", + "properties": { + "kind": { + "type": "string", + "enum": [ + "in_service" + ] + }, + "provision_policy": { + "description": "Determines whether new resources can be provisioned onto the sled.", + "allOf": [ + { + "$ref": "#/components/schemas/SledProvisionPolicy" + } + ] + } + }, + "required": [ + "kind", + "provision_policy" + ] + }, + { + "description": "The operator has indicated that the sled has been permanently removed from service.\n\nThis is a terminal state: once a particular sled ID is expunged, it will never return to service. (The actual hardware may be reused, but it will be treated as a brand-new sled.)\n\nAn expunged sled is always non-provisionable.", + "type": "object", + "properties": { + "kind": { + "type": "string", + "enum": [ + "expunged" + ] + } + }, + "required": [ + "kind" + ] + } + ] + }, + "SledProvisionPolicy": { + "description": "The operator-defined provision policy of a sled.\n\nThis controls whether new resources are going to be provisioned on this sled.", + "oneOf": [ + { + "description": "New resources will be provisioned on this sled.", + "type": "string", + "enum": [ + "provisionable" + ] + }, + { + "description": "New resources will not be provisioned on this sled. However, if the sled is currently in service, existing resources will continue to be on this sled unless manually migrated off.", + "type": "string", + "enum": [ + "non_provisionable" + ] + } + ] + }, "SledRole": { "description": "Describes the role of the sled within the rack.\n\nNote that this may change if the sled is physically moved within the rack.", "oneOf": [ @@ -6548,6 +6646,19 @@ } ] }, + "SledSelector": { + "type": "object", + "properties": { + "sled": { + "description": "ID of the sled", + "type": "string", + "format": "uuid" + } + }, + "required": [ + "sled" + ] + }, "SourceNatConfig": { "description": "An IP address and port range used for source NAT, i.e., making outbound network connections from guests or services.", "type": "object", From cd1fa55df65d4b026322a55af45511f4de921520 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Fri, 8 Mar 2024 17:53:35 -0500 Subject: [PATCH 2/9] add `omdb nexus sleds expunge ` --- Cargo.lock | 1 + dev-tools/omdb/Cargo.toml | 1 + dev-tools/omdb/src/bin/omdb/db.rs | 112 ++++++++++++++---------- dev-tools/omdb/src/bin/omdb/nexus.rs | 124 +++++++++++++++++++++++++++ nexus/db-model/src/sled.rs | 4 + 5 files changed, 198 insertions(+), 44 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 416766b9cb..0815cd2070 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5199,6 +5199,7 @@ dependencies = [ "oximeter-client", "pq-sys", "ratatui", + "reedline", "serde", "serde_json", "sled-agent-client", diff --git a/dev-tools/omdb/Cargo.toml b/dev-tools/omdb/Cargo.toml index 2b0480be2d..f4850b21d0 100644 --- a/dev-tools/omdb/Cargo.toml +++ b/dev-tools/omdb/Cargo.toml @@ -36,6 +36,7 @@ oximeter-client.workspace = true # See omicron-rpaths for more about the "pq-sys" dependency. pq-sys = "*" ratatui.workspace = true +reedline.workspace = true serde.workspace = true serde_json.workspace = true sled-agent-client.workspace = true diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 339c257d8e..57c44a1b64 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -147,9 +147,8 @@ impl Display for MaybeSledId { #[derive(Debug, Args)] pub struct DbArgs { - /// URL of the database SQL interface - #[clap(long, env("OMDB_DB_URL"))] - db_url: Option, + #[clap(flatten)] + db_url_opts: DbUrlOptions, #[clap(flatten)] fetch_opts: DbFetchOptions, @@ -158,6 +157,71 @@ pub struct DbArgs { command: DbCommands, } +#[derive(Debug, Args)] +pub struct DbUrlOptions { + /// URL of the database SQL interface + #[clap(long, env("OMDB_DB_URL"))] + db_url: Option, +} + +impl DbUrlOptions { + async fn resolve_pg_url( + &self, + omdb: &Omdb, + log: &slog::Logger, + ) -> anyhow::Result { + match &self.db_url { + Some(cli_or_env_url) => Ok(cli_or_env_url.clone()), + None => { + eprintln!( + "note: database URL not specified. Will search DNS." + ); + eprintln!("note: (override with --db-url or OMDB_DB_URL)"); + let addrs = omdb + .dns_lookup_all( + log.clone(), + internal_dns::ServiceName::Cockroach, + ) + .await?; + + format!( + "postgresql://root@{}/omicron?sslmode=disable", + addrs + .into_iter() + .map(|a| a.to_string()) + .collect::>() + .join(",") + ) + .parse() + .context("failed to parse constructed postgres URL") + } + } + } + + pub async fn connect( + &self, + omdb: &Omdb, + log: &slog::Logger, + ) -> anyhow::Result> { + let db_url = self.resolve_pg_url(omdb, log).await?; + eprintln!("note: using database URL {}", &db_url); + + let db_config = db::Config { url: db_url.clone() }; + let pool = Arc::new(db::Pool::new(&log.clone(), &db_config)); + + // Being a dev tool, we want to try this operation even if the schema + // doesn't match what we expect. So we use `DataStore::new_unchecked()` + // here. We will then check the schema version explicitly and warn the + // user if it doesn't match. + let datastore = Arc::new( + DataStore::new_unchecked(log.clone(), pool) + .map_err(|e| anyhow!(e).context("creating datastore"))?, + ); + check_schema_version(&datastore).await; + Ok(datastore) + } +} + #[derive(Debug, Args)] pub struct DbFetchOptions { /// limit to apply to queries that fetch rows @@ -404,47 +468,7 @@ impl DbArgs { omdb: &Omdb, log: &slog::Logger, ) -> Result<(), anyhow::Error> { - let db_url = match &self.db_url { - Some(cli_or_env_url) => cli_or_env_url.clone(), - None => { - eprintln!( - "note: database URL not specified. Will search DNS." - ); - eprintln!("note: (override with --db-url or OMDB_DB_URL)"); - let addrs = omdb - .dns_lookup_all( - log.clone(), - internal_dns::ServiceName::Cockroach, - ) - .await?; - - format!( - "postgresql://root@{}/omicron?sslmode=disable", - addrs - .into_iter() - .map(|a| a.to_string()) - .collect::>() - .join(",") - ) - .parse() - .context("failed to parse constructed postgres URL")? - } - }; - eprintln!("note: using database URL {}", &db_url); - - let db_config = db::Config { url: db_url.clone() }; - let pool = Arc::new(db::Pool::new(&log.clone(), &db_config)); - - // Being a dev tool, we want to try this operation even if the schema - // doesn't match what we expect. So we use `DataStore::new_unchecked()` - // here. We will then check the schema version explicitly and warn the - // user if it doesn't match. - let datastore = Arc::new( - DataStore::new_unchecked(log.clone(), pool) - .map_err(|e| anyhow!(e).context("creating datastore"))?, - ); - check_schema_version(&datastore).await; - + let datastore = self.db_url_opts.connect(omdb, log).await?; let opctx = OpContext::for_tests(log.clone(), datastore.clone()); match &self.command { DbCommands::Rack(RackArgs { command: RackCommands::List }) => { diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index 4705da2a32..e6f754651b 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -4,6 +4,7 @@ //! omdb commands that query or update specific Nexus instances +use crate::db::DbUrlOptions; use crate::Omdb; use anyhow::Context; use chrono::DateTime; @@ -17,7 +18,11 @@ use nexus_client::types::ActivationReason; use nexus_client::types::BackgroundTask; use nexus_client::types::CurrentStatus; use nexus_client::types::LastResult; +use nexus_client::types::SledSelector; use nexus_client::types::UninitializedSledId; +use reedline::DefaultPrompt; +use reedline::DefaultPromptSegment; +use reedline::Reedline; use serde::Deserialize; use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; @@ -150,6 +155,8 @@ enum SledsCommands { ListUninitialized, /// Add an uninitialized sled Add(SledAddArgs), + /// Expunge a sled (DANGEROUS) + Expunge(SledExpungeArgs), } #[derive(Debug, Args)] @@ -160,6 +167,17 @@ struct SledAddArgs { part: String, } +#[derive(Debug, Args)] +struct SledExpungeArgs { + // expunge is _extremely_ dangerous, so we also require a database + // connection to perform some safety checks + #[clap(flatten)] + db_url_opts: DbUrlOptions, + + /// sled ID + sled_id: Uuid, +} + impl NexusArgs { /// Run a `omdb nexus` subcommand. pub(crate) async fn run_cmd( @@ -253,6 +271,12 @@ impl NexusArgs { omdb.check_allow_destructive()?; cmd_nexus_sled_add(&client, args).await } + NexusCommands::Sleds(SledsArgs { + command: SledsCommands::Expunge(args), + }) => { + omdb.check_allow_destructive()?; + cmd_nexus_sled_expunge(&client, args, omdb, log).await + } } } } @@ -1048,3 +1072,103 @@ async fn cmd_nexus_sled_add( eprintln!("added sled {} ({})", args.serial, args.part); Ok(()) } + +/// Runs `omdb nexus sleds expunge` +async fn cmd_nexus_sled_expunge( + client: &nexus_client::Client, + args: &SledExpungeArgs, + omdb: &Omdb, + log: &slog::Logger, +) -> Result<(), anyhow::Error> { + // This is an extremely dangerous and irreversible operation. We put a + // couple of safeguards in place to ensure this cannot be called without + // due consideration: + // + // 1. We'll require manual input on stdin to confirm the sled to be removed + // 2. We'll warn sternly if the sled-to-be-expunged is still present in the + // most recent inventory collection + use nexus_db_model::Sled; + use nexus_db_queries::context::OpContext; + + // First, we need to look up the sled so we know it's serial number. + let datastore = args.db_url_opts.connect(omdb, log).await?; + let opctx = OpContext::for_tests(log.clone(), datastore.clone()); + let opctx = &opctx; + + let sled = { + use async_bb8_diesel::AsyncRunQueryDsl; + use diesel::ExpressionMethods; + use diesel::QueryDsl; + use diesel::SelectableHelper; + use nexus_db_queries::db::schema::sled::dsl; + + let conn = datastore.pool_connection_for_tests().await?; + dsl::sled + .filter(dsl::id.eq(args.sled_id)) + .select(Sled::as_select()) + .first_async::(&*conn) + .await + .with_context(|| format!("failed to find sled {}", args.sled_id))? + }; + + // Now check whether its sled-agent or SP were found in the most recent + // inventory collection. + match datastore + .inventory_get_latest_collection(opctx) + .await + .context("loading latest collection")? + { + Some(collection) => { + let sled_present = + collection.sled_agents.contains_key(&args.sled_id) + || collection.sps.keys().any(|baseboard| { + baseboard.part_number == sled.part_number() + && baseboard.serial_number == sled.serial_number() + }); + if sled_present { + eprintln!( + "WARNING: sled {} IS PRESENT in the most recent inventory \ + collection; are you sure you want to mark it expunged?", + args.sled_id, + ); + } + } + None => { + eprintln!("WARNING: no inventory collections present"); + } + } + + eprintln!( + "WARNING: This operation will PERMANENTLY and IRRECOVABLY mark sled \ + {} ({}) expunged. To proceed, type the sled's serial number.", + args.sled_id, + sled.serial_number(), + ); + let mut line_editor = Reedline::create(); + let prompt = DefaultPrompt::new( + DefaultPromptSegment::Basic("sled serial".to_string()), + DefaultPromptSegment::Empty, + ); + if let Ok(reedline::Signal::Success(input)) = + line_editor.read_line(&prompt) + { + if input != sled.serial_number() { + eprintln!("serial number mismatch; aborting"); + return Ok(()); + } + } else { + eprintln!("expungment aborted"); + return Ok(()); + } + + let old_policy = client + .sled_expunge(&SledSelector { sled: args.sled_id }) + .await + .context("expunging sled")? + .into_inner(); + eprintln!( + "expunged sled {} (previous policy: {old_policy:?})", + args.sled_id + ); + Ok(()) +} diff --git a/nexus/db-model/src/sled.rs b/nexus/db-model/src/sled.rs index c2cbb65bd6..a603f28d57 100644 --- a/nexus/db-model/src/sled.rs +++ b/nexus/db-model/src/sled.rs @@ -100,6 +100,10 @@ impl Sled { &self.serial_number } + pub fn part_number(&self) -> &str { + &self.part_number + } + /// The policy here is the `views::SledPolicy` because we expect external /// users to always use that. pub fn policy(&self) -> views::SledPolicy { From a806902ee57cecc5565ed9ca63f28f9ce140a53f Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Fri, 8 Mar 2024 17:56:31 -0500 Subject: [PATCH 3/9] cargo fmt --- dev-tools/omdb/src/bin/omdb/nexus.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index e6f754651b..0c35660e33 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -1149,8 +1149,7 @@ async fn cmd_nexus_sled_expunge( DefaultPromptSegment::Basic("sled serial".to_string()), DefaultPromptSegment::Empty, ); - if let Ok(reedline::Signal::Success(input)) = - line_editor.read_line(&prompt) + if let Ok(reedline::Signal::Success(input)) = line_editor.read_line(&prompt) { if input != sled.serial_number() { eprintln!("serial number mismatch; aborting"); From 45a782abc073bf34eaf58bcab853db127dc24bbb Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Fri, 8 Mar 2024 17:57:54 -0500 Subject: [PATCH 4/9] silence clippy enum variant warnings --- dev-tools/omdb/src/bin/omdb/nexus.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index 0c35660e33..34dbeda441 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -41,6 +41,7 @@ pub struct NexusArgs { } /// Subcommands for the "omdb nexus" subcommand +#[allow(clippy::large_enum_variant)] #[derive(Debug, Subcommand)] enum NexusCommands { /// print information about background tasks @@ -150,6 +151,7 @@ struct SledsArgs { } #[derive(Debug, Subcommand)] +#[allow(clippy::large_enum_variant)] enum SledsCommands { /// List all uninitialized sleds ListUninitialized, From 6effe7d180ff0f466df96f999bd45bc5f3a49fa0 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 11 Mar 2024 16:16:35 -0400 Subject: [PATCH 5/9] PR feedback --- dev-tools/omdb/src/bin/omdb/nexus.rs | 60 ++++++++++++++-------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index 34dbeda441..732b100eac 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -6,6 +6,7 @@ use crate::db::DbUrlOptions; use crate::Omdb; +use anyhow::bail; use anyhow::Context; use chrono::DateTime; use chrono::SecondsFormat; @@ -20,6 +21,7 @@ use nexus_client::types::CurrentStatus; use nexus_client::types::LastResult; use nexus_client::types::SledSelector; use nexus_client::types::UninitializedSledId; +use nexus_db_queries::db::lookup::LookupPath; use reedline::DefaultPrompt; use reedline::DefaultPromptSegment; use reedline::Reedline; @@ -1089,28 +1091,33 @@ async fn cmd_nexus_sled_expunge( // 1. We'll require manual input on stdin to confirm the sled to be removed // 2. We'll warn sternly if the sled-to-be-expunged is still present in the // most recent inventory collection - use nexus_db_model::Sled; use nexus_db_queries::context::OpContext; - // First, we need to look up the sled so we know it's serial number. let datastore = args.db_url_opts.connect(omdb, log).await?; let opctx = OpContext::for_tests(log.clone(), datastore.clone()); let opctx = &opctx; - let sled = { - use async_bb8_diesel::AsyncRunQueryDsl; - use diesel::ExpressionMethods; - use diesel::QueryDsl; - use diesel::SelectableHelper; - use nexus_db_queries::db::schema::sled::dsl; - - let conn = datastore.pool_connection_for_tests().await?; - dsl::sled - .filter(dsl::id.eq(args.sled_id)) - .select(Sled::as_select()) - .first_async::(&*conn) - .await - .with_context(|| format!("failed to find sled {}", args.sled_id))? + // First, we need to look up the sled so we know its serial number. + let (_authz_sled, sled) = LookupPath::new(opctx, &datastore) + .sled_id(args.sled_id) + .fetch() + .await + .with_context(|| format!("failed to find sled {}", args.sled_id))?; + + // Helper to get confirmation messages from the user. + let mut line_editor = Reedline::create(); + let mut read_with_prompt = move |message: &str| { + let prompt = DefaultPrompt::new( + DefaultPromptSegment::Basic(message.to_string()), + DefaultPromptSegment::Empty, + ); + if let Ok(reedline::Signal::Success(input)) = + line_editor.read_line(&prompt) + { + Ok(input) + } else { + bail!("expungement aborted") + } }; // Now check whether its sled-agent or SP were found in the most recent @@ -1133,6 +1140,11 @@ async fn cmd_nexus_sled_expunge( collection; are you sure you want to mark it expunged?", args.sled_id, ); + let confirm = read_with_prompt("y/N")?; + if confirm != "y" { + eprintln!("expungement not confirmed: aborting"); + return Ok(()); + } } } None => { @@ -1146,19 +1158,9 @@ async fn cmd_nexus_sled_expunge( args.sled_id, sled.serial_number(), ); - let mut line_editor = Reedline::create(); - let prompt = DefaultPrompt::new( - DefaultPromptSegment::Basic("sled serial".to_string()), - DefaultPromptSegment::Empty, - ); - if let Ok(reedline::Signal::Success(input)) = line_editor.read_line(&prompt) - { - if input != sled.serial_number() { - eprintln!("serial number mismatch; aborting"); - return Ok(()); - } - } else { - eprintln!("expungment aborted"); + let confirm = read_with_prompt("sled serial number")?; + if confirm != sled.serial_number() { + eprintln!("sled serial number not confirmed: aborting"); return Ok(()); } From 27812fde50c61322d8276d88e6102f89c49358f9 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 11 Mar 2024 16:24:48 -0400 Subject: [PATCH 6/9] omdb destructive operation check returns a token --- dev-tools/omdb/src/bin/omdb/main.rs | 26 +++++++++++++++------ dev-tools/omdb/src/bin/omdb/nexus.rs | 35 +++++++++++++++++----------- 2 files changed, 41 insertions(+), 20 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/main.rs b/dev-tools/omdb/src/bin/omdb/main.rs index 0fb9ba0121..0ad00430a3 100644 --- a/dev-tools/omdb/src/bin/omdb/main.rs +++ b/dev-tools/omdb/src/bin/omdb/main.rs @@ -92,16 +92,28 @@ struct Omdb { command: OmdbCommands, } -impl Omdb { - fn check_allow_destructive(&self) -> anyhow::Result<()> { - anyhow::ensure!( - self.allow_destructive, - "This command is potentially destructive. \ +mod check_allow_destructive { + /// Zero-size type that potentially-destructive functions can accept to + /// ensure `Omdb::check_allow_destructive` has been called. + // This is tucked away inside a module to prevent it from being constructed + // by anything other than `Omdb::check_allow_destructive`. + pub(crate) struct DestructiveOperationToken(()); + + impl super::Omdb { + pub(crate) fn check_allow_destructive( + &self, + ) -> anyhow::Result { + anyhow::ensure!( + self.allow_destructive, + "This command is potentially destructive. \ Pass the `-w` / `--destructive` flag to allow it." - ); - Ok(()) + ); + Ok(DestructiveOperationToken(())) + } } +} +impl Omdb { async fn dns_lookup_all( &self, log: slog::Logger, diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index 732b100eac..caa63ba532 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -4,6 +4,7 @@ //! omdb commands that query or update specific Nexus instances +use crate::check_allow_destructive::DestructiveOperationToken; use crate::db::DbUrlOptions; use crate::Omdb; use anyhow::bail; @@ -234,8 +235,8 @@ impl NexusArgs { NexusCommands::Blueprints(BlueprintsArgs { command: BlueprintsCommands::Delete(args), }) => { - omdb.check_allow_destructive()?; - cmd_nexus_blueprints_delete(&client, args).await + let token = omdb.check_allow_destructive()?; + cmd_nexus_blueprints_delete(&client, args, token).await } NexusCommands::Blueprints(BlueprintsArgs { command: @@ -249,21 +250,23 @@ impl NexusArgs { command: BlueprintTargetCommands::Set(args), }), }) => { - omdb.check_allow_destructive()?; - cmd_nexus_blueprints_target_set(&client, args).await + let token = omdb.check_allow_destructive()?; + cmd_nexus_blueprints_target_set(&client, args, token).await } NexusCommands::Blueprints(BlueprintsArgs { command: BlueprintsCommands::Regenerate, }) => { - omdb.check_allow_destructive()?; - cmd_nexus_blueprints_regenerate(&client).await + let token = omdb.check_allow_destructive()?; + cmd_nexus_blueprints_regenerate(&client, token).await } NexusCommands::Blueprints(BlueprintsArgs { command: BlueprintsCommands::GenerateFromCollection(args), }) => { - omdb.check_allow_destructive()?; - cmd_nexus_blueprints_generate_from_collection(&client, args) - .await + let token = omdb.check_allow_destructive()?; + cmd_nexus_blueprints_generate_from_collection( + &client, args, token, + ) + .await } NexusCommands::Sleds(SledsArgs { @@ -272,14 +275,14 @@ impl NexusArgs { NexusCommands::Sleds(SledsArgs { command: SledsCommands::Add(args), }) => { - omdb.check_allow_destructive()?; - cmd_nexus_sled_add(&client, args).await + let token = omdb.check_allow_destructive()?; + cmd_nexus_sled_add(&client, args, token).await } NexusCommands::Sleds(SledsArgs { command: SledsCommands::Expunge(args), }) => { - omdb.check_allow_destructive()?; - cmd_nexus_sled_expunge(&client, args, omdb, log).await + let token = omdb.check_allow_destructive()?; + cmd_nexus_sled_expunge(&client, args, omdb, log, token).await } } } @@ -937,6 +940,7 @@ async fn cmd_nexus_blueprints_diff( async fn cmd_nexus_blueprints_delete( client: &nexus_client::Client, args: &BlueprintIdArgs, + _destruction_token: DestructiveOperationToken, ) -> Result<(), anyhow::Error> { let _ = client .blueprint_delete(&args.blueprint_id) @@ -962,6 +966,7 @@ async fn cmd_nexus_blueprints_target_show( async fn cmd_nexus_blueprints_target_set( client: &nexus_client::Client, args: &BlueprintTargetSetArgs, + _destruction_token: DestructiveOperationToken, ) -> Result<(), anyhow::Error> { let enabled = match args.enabled { BlueprintTargetSetEnabled::Enabled => true, @@ -997,6 +1002,7 @@ async fn cmd_nexus_blueprints_target_set( async fn cmd_nexus_blueprints_generate_from_collection( client: &nexus_client::Client, args: &CollectionIdArgs, + _destruction_token: DestructiveOperationToken, ) -> Result<(), anyhow::Error> { let blueprint = client .blueprint_generate_from_collection( @@ -1012,6 +1018,7 @@ async fn cmd_nexus_blueprints_generate_from_collection( async fn cmd_nexus_blueprints_regenerate( client: &nexus_client::Client, + _destruction_token: DestructiveOperationToken, ) -> Result<(), anyhow::Error> { let blueprint = client.blueprint_regenerate().await.context("generating blueprint")?; @@ -1065,6 +1072,7 @@ async fn cmd_nexus_sleds_list_uninitialized( async fn cmd_nexus_sled_add( client: &nexus_client::Client, args: &SledAddArgs, + _destruction_token: DestructiveOperationToken, ) -> Result<(), anyhow::Error> { client .sled_add(&UninitializedSledId { @@ -1083,6 +1091,7 @@ async fn cmd_nexus_sled_expunge( args: &SledExpungeArgs, omdb: &Omdb, log: &slog::Logger, + _destruction_token: DestructiveOperationToken, ) -> Result<(), anyhow::Error> { // This is an extremely dangerous and irreversible operation. We put a // couple of safeguards in place to ensure this cannot be called without From 41427790c7b7241187024cc80b580e740e75495b Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 11 Mar 2024 16:36:01 -0400 Subject: [PATCH 7/9] use collection's interned baseboards instead of checking `sps` --- dev-tools/omdb/src/bin/omdb/nexus.rs | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index caa63ba532..6ca67aba7e 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -23,6 +23,7 @@ use nexus_client::types::LastResult; use nexus_client::types::SledSelector; use nexus_client::types::UninitializedSledId; use nexus_db_queries::db::lookup::LookupPath; +use nexus_types::inventory::BaseboardId; use reedline::DefaultPrompt; use reedline::DefaultPromptSegment; use reedline::Reedline; @@ -1137,15 +1138,21 @@ async fn cmd_nexus_sled_expunge( .context("loading latest collection")? { Some(collection) => { - let sled_present = + let sled_baseboard = BaseboardId { + part_number: sled.part_number().to_string(), + serial_number: sled.serial_number().to_string(), + }; + // Check the collection for either the sled-id or the baseboard. In + // general a sled-agent's self report should result in both the + // sled-id and the baseboard being present in the collection, but + // there are some test environments (e.g., `omicron-dev run-all`) + // where that isn't guaranteed. + let sled_present_in_collection = collection.sled_agents.contains_key(&args.sled_id) - || collection.sps.keys().any(|baseboard| { - baseboard.part_number == sled.part_number() - && baseboard.serial_number == sled.serial_number() - }); - if sled_present { + || collection.baseboards.contains(&sled_baseboard); + if sled_present_in_collection { eprintln!( - "WARNING: sled {} IS PRESENT in the most recent inventory \ + "WARNING: sled {} is PRESENT in the most recent inventory \ collection; are you sure you want to mark it expunged?", args.sled_id, ); From 2475a275d0b5aa2ec5ff57200e6c064ead36c436 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 12 Mar 2024 10:10:06 -0400 Subject: [PATCH 8/9] attach #[must_use] to DestructiveOperationToken --- dev-tools/omdb/src/bin/omdb/main.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dev-tools/omdb/src/bin/omdb/main.rs b/dev-tools/omdb/src/bin/omdb/main.rs index 0ad00430a3..17de22c2fa 100644 --- a/dev-tools/omdb/src/bin/omdb/main.rs +++ b/dev-tools/omdb/src/bin/omdb/main.rs @@ -97,6 +97,7 @@ mod check_allow_destructive { /// ensure `Omdb::check_allow_destructive` has been called. // This is tucked away inside a module to prevent it from being constructed // by anything other than `Omdb::check_allow_destructive`. + #[must_use] pub(crate) struct DestructiveOperationToken(()); impl super::Omdb { @@ -106,7 +107,7 @@ mod check_allow_destructive { anyhow::ensure!( self.allow_destructive, "This command is potentially destructive. \ - Pass the `-w` / `--destructive` flag to allow it." + Pass the `-w` / `--destructive` flag to allow it." ); Ok(DestructiveOperationToken(())) } From 83e8c4850009fe97681515a7bbbf9b6c16bbc8f4 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 13 Mar 2024 10:28:07 -0400 Subject: [PATCH 9/9] clarify omdb sled expungement user warnings --- dev-tools/omdb/src/bin/omdb/nexus.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index 6ca67aba7e..ecf2a8072d 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -1153,8 +1153,10 @@ async fn cmd_nexus_sled_expunge( if sled_present_in_collection { eprintln!( "WARNING: sled {} is PRESENT in the most recent inventory \ - collection; are you sure you want to mark it expunged?", - args.sled_id, + collection (spotted at {}). It is dangerous to expunge a \ + sled that is still running. Are you sure you want to \ + proceed anyway?", + args.sled_id, collection.time_done, ); let confirm = read_with_prompt("y/N")?; if confirm != "y" { @@ -1164,7 +1166,11 @@ async fn cmd_nexus_sled_expunge( } } None => { - eprintln!("WARNING: no inventory collections present"); + eprintln!( + "WARNING: cannot verify that the sled is physically gone \ + because there are no inventory collections present. Please \ + make sure that the sled has been physically removed." + ); } }