diff --git a/Cargo.lock b/Cargo.lock index 121e31550f..74c01d3411 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4698,6 +4698,7 @@ version = "0.1.0" dependencies = [ "anyhow", "base64", + "camino", "clap 4.4.3", "dropshot", "expectorate", @@ -4723,6 +4724,7 @@ dependencies = [ "signal-hook-tokio", "slog", "slog-dtrace", + "slog-error-chain", "sp-sim", "subprocess", "thiserror", @@ -7842,6 +7844,25 @@ dependencies = [ "slog-term", ] +[[package]] +name = "slog-error-chain" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/slog-error-chain?branch=main#15f69041f45774602108e47fb25e705dc23acfb2" +dependencies = [ + "slog", + "slog-error-chain-derive", +] + +[[package]] +name = "slog-error-chain-derive" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/slog-error-chain?branch=main#15f69041f45774602108e47fb25e705dc23acfb2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.32", +] + [[package]] name = "slog-json" version = "2.6.1" diff --git a/Cargo.toml b/Cargo.toml index 841c7bb16b..ca134536f5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -339,6 +339,7 @@ slog = { version = "2.7", features = [ "dynamic-keys", "max_level_trace", "relea slog-async = "2.8" slog-dtrace = "0.2" slog-envlogger = "2.2" +slog-error-chain = { git = "https://github.com/oxidecomputer/slog-error-chain", branch = "main", features = ["derive"] } slog-term = "2.9" smf = "0.2" snafu = "0.7" diff --git a/gateway/Cargo.toml b/gateway/Cargo.toml index 75c31e9977..f2e5f83a8a 100644 --- a/gateway/Cargo.toml +++ b/gateway/Cargo.toml @@ -7,6 +7,7 @@ license = "MPL-2.0" [dependencies] anyhow.workspace = true base64.workspace = true +camino.workspace = true clap.workspace = true dropshot.workspace = true futures.workspace = true @@ -25,6 +26,7 @@ signal-hook.workspace = true signal-hook-tokio.workspace = true slog.workspace = true slog-dtrace.workspace = true +slog-error-chain.workspace = true thiserror.workspace = true tokio = { workspace = true, features = ["full"] } tokio-stream.workspace = true diff --git a/gateway/src/bin/mgs.rs b/gateway/src/bin/mgs.rs index 6917d4f174..39810ea06a 100644 --- a/gateway/src/bin/mgs.rs +++ b/gateway/src/bin/mgs.rs @@ -5,6 +5,7 @@ //! Executable program to run gateway, the management gateway service use anyhow::{anyhow, Context}; +use camino::Utf8PathBuf; use clap::Parser; use futures::StreamExt; use omicron_common::cmd::{fatal, CmdError}; @@ -12,7 +13,6 @@ use omicron_gateway::{run_openapi, start_server, Config, MgsArguments}; use signal_hook::consts::signal; use signal_hook_tokio::Signals; use std::net::SocketAddrV6; -use std::path::PathBuf; use uuid::Uuid; #[derive(Debug, Parser)] @@ -24,7 +24,7 @@ enum Args { /// Start an MGS server Run { #[clap(name = "CONFIG_FILE_PATH", action)] - config_file_path: PathBuf, + config_file_path: Utf8PathBuf, /// Read server ID and address(es) for dropshot server from our SMF /// properties (only valid when running as a service on illumos) @@ -81,9 +81,7 @@ async fn do_run() -> Result<(), CmdError> { address, } => { let config = Config::from_file(&config_file_path) - .with_context(|| { - format!("failed to parse {}", config_file_path.display()) - }) + .map_err(anyhow::Error::new) .map_err(CmdError::Failure)?; let mut signals = Signals::new([signal::SIGUSR1]) diff --git a/gateway/src/config.rs b/gateway/src/config.rs index adbd16c6a1..afdb046881 100644 --- a/gateway/src/config.rs +++ b/gateway/src/config.rs @@ -6,10 +6,11 @@ //! configuration use crate::management_switch::SwitchConfig; +use camino::Utf8Path; +use camino::Utf8PathBuf; use dropshot::ConfigLogging; use serde::{Deserialize, Serialize}; -use std::path::Path; -use std::path::PathBuf; +use slog_error_chain::SlogInlineError; use thiserror::Error; /// Configuration for a gateway server @@ -30,13 +31,11 @@ impl Config { /// Load a `Config` from the given TOML file /// /// This config object can then be used to create a new gateway server. - // The format is described in the README. // TODO add a README - pub fn from_file>(path: P) -> Result { - let path = path.as_ref(); + pub fn from_file(path: &Utf8Path) -> Result { let file_contents = std::fs::read_to_string(path) - .map_err(|e| (path.to_path_buf(), e))?; + .map_err(|err| LoadError::Io { path: path.into(), err })?; let config_parsed: Config = toml::from_str(&file_contents) - .map_err(|e| (path.to_path_buf(), e))?; + .map_err(|err| LoadError::Parse { path: path.into(), err })?; Ok(config_parsed) } } @@ -46,32 +45,18 @@ pub struct PartialDropshotConfig { pub request_body_max_bytes: usize, } -#[derive(Debug, Error)] +#[derive(Debug, Error, SlogInlineError)] pub enum LoadError { - #[error("error reading \"{}\": {}", path.display(), err)] - Io { path: PathBuf, err: std::io::Error }, - #[error("error parsing \"{}\": {}", path.display(), err)] - Parse { path: PathBuf, err: toml::de::Error }, -} - -impl From<(PathBuf, std::io::Error)> for LoadError { - fn from((path, err): (PathBuf, std::io::Error)) -> Self { - LoadError::Io { path, err } - } -} - -impl From<(PathBuf, toml::de::Error)> for LoadError { - fn from((path, err): (PathBuf, toml::de::Error)) -> Self { - LoadError::Parse { path, err } - } -} - -impl std::cmp::PartialEq for LoadError { - fn eq(&self, other: &std::io::Error) -> bool { - if let LoadError::Io { err, .. } = self { - err.kind() == other.kind() - } else { - false - } - } + #[error("error reading \"{path}\"")] + Io { + path: Utf8PathBuf, + #[source] + err: std::io::Error, + }, + #[error("error parsing \"{path}\"")] + Parse { + path: Utf8PathBuf, + #[source] + err: toml::de::Error, + }, } diff --git a/gateway/src/error.rs b/gateway/src/error.rs index 6daf9312ba..5933daa340 100644 --- a/gateway/src/error.rs +++ b/gateway/src/error.rs @@ -5,16 +5,17 @@ //! Error handling facilities for the management gateway. use crate::management_switch::SpIdentifier; -use anyhow::anyhow; use dropshot::HttpError; use gateway_messages::SpError; pub use gateway_sp_comms::error::CommunicationError; use gateway_sp_comms::error::UpdateError; use gateway_sp_comms::BindError; +use slog_error_chain::InlineErrorChain; +use slog_error_chain::SlogInlineError; use std::time::Duration; use thiserror::Error; -#[derive(Debug, Error)] +#[derive(Debug, Error, SlogInlineError)] pub enum StartupError { #[error("invalid configuration file: {}", .reasons.join(", "))] InvalidConfig { reasons: Vec }, @@ -23,116 +24,137 @@ pub enum StartupError { BindError(#[from] BindError), } -#[derive(Debug, Error)] +#[derive(Debug, Error, SlogInlineError)] pub enum SpCommsError { #[error("discovery process not yet complete")] DiscoveryNotYetComplete, #[error("location discovery failed: {reason}")] DiscoveryFailed { reason: String }, - #[error("nonexistent SP (type {:?}, slot {})", .0.typ, .0.slot)] + #[error("nonexistent SP {0:?}")] SpDoesNotExist(SpIdentifier), - #[error( - "unknown socket address for SP (type {:?}, slot {})", - .0.typ, - .0.slot, - )] + #[error("unknown socket address for SP {0:?}")] SpAddressUnknown(SpIdentifier), #[error( "timeout ({timeout:?}) elapsed communicating with {sp:?} on port {port}" )] Timeout { timeout: Duration, port: usize, sp: Option }, - #[error("error communicating with SP: {0}")] - SpCommunicationFailed(#[from] CommunicationError), - #[error("updating SP failed: {0}")] - UpdateFailed(#[from] UpdateError), + #[error("error communicating with SP {sp:?}")] + SpCommunicationFailed { + sp: SpIdentifier, + #[source] + err: CommunicationError, + }, + #[error("updating SP {sp:?} failed")] + UpdateFailed { + sp: SpIdentifier, + #[source] + err: UpdateError, + }, } impl From for HttpError { - fn from(err: SpCommsError) -> Self { - match err { + fn from(error: SpCommsError) -> Self { + match error { SpCommsError::SpDoesNotExist(_) => HttpError::for_bad_request( Some("InvalidSp".to_string()), - format!("{:#}", anyhow!(err)), + InlineErrorChain::new(&error).to_string(), ), - SpCommsError::SpCommunicationFailed( - CommunicationError::SpError( - SpError::SerialConsoleAlreadyAttached, - ), - ) => HttpError::for_bad_request( + SpCommsError::SpCommunicationFailed { + err: + CommunicationError::SpError( + SpError::SerialConsoleAlreadyAttached, + ), + .. + } => HttpError::for_bad_request( Some("SerialConsoleAttached".to_string()), - format!("{:#}", anyhow!(err)), + InlineErrorChain::new(&error).to_string(), ), - SpCommsError::SpCommunicationFailed( - CommunicationError::SpError(SpError::RequestUnsupportedForSp), - ) => HttpError::for_bad_request( + SpCommsError::SpCommunicationFailed { + err: + CommunicationError::SpError(SpError::RequestUnsupportedForSp), + .. + } => HttpError::for_bad_request( Some("RequestUnsupportedForSp".to_string()), - format!("{:#}", anyhow!(err)), + InlineErrorChain::new(&error).to_string(), ), - SpCommsError::SpCommunicationFailed( - CommunicationError::SpError( - SpError::RequestUnsupportedForComponent, - ), - ) => HttpError::for_bad_request( + SpCommsError::SpCommunicationFailed { + err: + CommunicationError::SpError( + SpError::RequestUnsupportedForComponent, + ), + .. + } => HttpError::for_bad_request( Some("RequestUnsupportedForComponent".to_string()), - format!("{:#}", anyhow!(err)), + InlineErrorChain::new(&error).to_string(), ), - SpCommsError::SpCommunicationFailed( - CommunicationError::SpError(SpError::InvalidSlotForComponent), - ) => HttpError::for_bad_request( + SpCommsError::SpCommunicationFailed { + err: + CommunicationError::SpError(SpError::InvalidSlotForComponent), + .. + } => HttpError::for_bad_request( Some("InvalidSlotForComponent".to_string()), - format!("{:#}", anyhow!(err)), + InlineErrorChain::new(&error).to_string(), ), - SpCommsError::UpdateFailed(UpdateError::ImageTooLarge) => { - HttpError::for_bad_request( - Some("ImageTooLarge".to_string()), - format!("{:#}", anyhow!(err)), - ) - } - SpCommsError::UpdateFailed(UpdateError::Communication( - CommunicationError::SpError(SpError::UpdateSlotBusy), - )) => http_err_with_message( + SpCommsError::UpdateFailed { + err: UpdateError::ImageTooLarge, + .. + } => HttpError::for_bad_request( + Some("ImageTooLarge".to_string()), + InlineErrorChain::new(&error).to_string(), + ), + SpCommsError::UpdateFailed { + err: + UpdateError::Communication(CommunicationError::SpError( + SpError::UpdateSlotBusy, + )), + .. + } => http_err_with_message( http::StatusCode::SERVICE_UNAVAILABLE, "UpdateSlotBusy", - format!("{:#}", anyhow!(err)), + InlineErrorChain::new(&error).to_string(), ), - SpCommsError::UpdateFailed(UpdateError::Communication( - CommunicationError::SpError(SpError::UpdateInProgress { - .. - }), - )) => http_err_with_message( + SpCommsError::UpdateFailed { + err: + UpdateError::Communication(CommunicationError::SpError( + SpError::UpdateInProgress { .. }, + )), + .. + } => http_err_with_message( http::StatusCode::SERVICE_UNAVAILABLE, "UpdateInProgress", - format!("{:#}", anyhow!(err)), + InlineErrorChain::new(&error).to_string(), ), SpCommsError::DiscoveryNotYetComplete => http_err_with_message( http::StatusCode::SERVICE_UNAVAILABLE, "DiscoveryNotYetComplete", - format!("{:#}", anyhow!(err)), + InlineErrorChain::new(&error).to_string(), ), SpCommsError::SpAddressUnknown(_) => http_err_with_message( http::StatusCode::SERVICE_UNAVAILABLE, "SpAddressUnknown", - format!("{:#}", anyhow!(err)), + InlineErrorChain::new(&error).to_string(), ), SpCommsError::DiscoveryFailed { .. } => http_err_with_message( http::StatusCode::SERVICE_UNAVAILABLE, "DiscoveryFailed ", - format!("{:#}", anyhow!(err)), + InlineErrorChain::new(&error).to_string(), ), SpCommsError::Timeout { .. } => http_err_with_message( http::StatusCode::SERVICE_UNAVAILABLE, "Timeout ", - format!("{:#}", anyhow!(err)), + InlineErrorChain::new(&error).to_string(), ), - SpCommsError::SpCommunicationFailed(_) => http_err_with_message( - http::StatusCode::SERVICE_UNAVAILABLE, - "SpCommunicationFailed", - format!("{:#}", anyhow!(err)), - ), - SpCommsError::UpdateFailed(_) => http_err_with_message( + SpCommsError::SpCommunicationFailed { .. } => { + http_err_with_message( + http::StatusCode::SERVICE_UNAVAILABLE, + "SpCommunicationFailed", + InlineErrorChain::new(&error).to_string(), + ) + } + SpCommsError::UpdateFailed { .. } => http_err_with_message( http::StatusCode::SERVICE_UNAVAILABLE, "UpdateFailed", - format!("{:#}", anyhow!(err)), + InlineErrorChain::new(&error).to_string(), ), } } diff --git a/gateway/src/http_entrypoints.rs b/gateway/src/http_entrypoints.rs index 2db6121f1d..e33e8dd4a6 100644 --- a/gateway/src/http_entrypoints.rs +++ b/gateway/src/http_entrypoints.rs @@ -566,10 +566,12 @@ async fn sp_get( path: Path, ) -> Result, HttpError> { let apictx = rqctx.context(); - let sp_id = path.into_inner().sp; - let sp = apictx.mgmt_switch.sp(sp_id.into())?; + let sp_id = path.into_inner().sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; - let state = sp.state().await.map_err(SpCommsError::from)?; + let state = sp.state().await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; Ok(HttpResponseOk(state.into())) } @@ -588,9 +590,12 @@ async fn sp_startup_options_get( ) -> Result, HttpError> { let apictx = rqctx.context(); let mgmt_switch = &apictx.mgmt_switch; - let sp = mgmt_switch.sp(path.into_inner().sp.into())?; + let sp_id = path.into_inner().sp.into(); + let sp = mgmt_switch.sp(sp_id)?; - let options = sp.get_startup_options().await.map_err(SpCommsError::from)?; + let options = sp.get_startup_options().await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; Ok(HttpResponseOk(options.into())) } @@ -610,11 +615,12 @@ async fn sp_startup_options_set( ) -> Result { let apictx = rqctx.context(); let mgmt_switch = &apictx.mgmt_switch; - let sp = mgmt_switch.sp(path.into_inner().sp.into())?; + let sp_id = path.into_inner().sp.into(); + let sp = mgmt_switch.sp(sp_id)?; - sp.set_startup_options(body.into_inner().into()) - .await - .map_err(SpCommsError::from)?; + sp.set_startup_options(body.into_inner().into()).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; Ok(HttpResponseUpdatedNoContent {}) } @@ -632,8 +638,11 @@ async fn sp_component_list( path: Path, ) -> Result, HttpError> { let apictx = rqctx.context(); - let sp = apictx.mgmt_switch.sp(path.into_inner().sp.into())?; - let inventory = sp.inventory().await.map_err(SpCommsError::from)?; + let sp_id = path.into_inner().sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; + let inventory = sp.inventory().await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; Ok(HttpResponseOk(inventory.into())) } @@ -653,11 +662,13 @@ async fn sp_component_get( ) -> Result>, HttpError> { let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); - let sp = apictx.mgmt_switch.sp(sp.into())?; + let sp_id = sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; let component = component_from_str(&component)?; - let details = - sp.component_details(component).await.map_err(SpCommsError::from)?; + let details = sp.component_details(component).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; Ok(HttpResponseOk(details.entries.into_iter().map(Into::into).collect())) } @@ -690,7 +701,8 @@ async fn sp_component_caboose_get( let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); - let sp = apictx.mgmt_switch.sp(sp.into())?; + let sp_id = sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; let ComponentCabooseSlot { firmware_slot } = query_params.into_inner(); let component = component_from_str(&component)?; @@ -714,19 +726,31 @@ async fn sp_component_caboose_get( CABOOSE_KEY_GIT_COMMIT, ) .await - .map_err(SpCommsError::from)?; + .map_err(|err| SpCommsError::SpCommunicationFailed { + sp: sp_id, + err, + })?; let board = sp .read_component_caboose(component, firmware_slot, CABOOSE_KEY_BOARD) .await - .map_err(SpCommsError::from)?; + .map_err(|err| SpCommsError::SpCommunicationFailed { + sp: sp_id, + err, + })?; let name = sp .read_component_caboose(component, firmware_slot, CABOOSE_KEY_NAME) .await - .map_err(SpCommsError::from)?; + .map_err(|err| SpCommsError::SpCommunicationFailed { + sp: sp_id, + err, + })?; let version = sp .read_component_caboose(component, firmware_slot, CABOOSE_KEY_VERSION) .await - .map_err(SpCommsError::from)?; + .map_err(|err| SpCommsError::SpCommunicationFailed { + sp: sp_id, + err, + })?; let git_commit = from_utf8(&CABOOSE_KEY_GIT_COMMIT, git_commit)?; let board = from_utf8(&CABOOSE_KEY_BOARD, board)?; @@ -752,10 +776,13 @@ async fn sp_component_clear_status( ) -> Result { let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); - let sp = apictx.mgmt_switch.sp(sp.into())?; + let sp_id = sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; let component = component_from_str(&component)?; - sp.component_clear_status(component).await.map_err(SpCommsError::from)?; + sp.component_clear_status(component).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; Ok(HttpResponseUpdatedNoContent {}) } @@ -775,13 +802,13 @@ async fn sp_component_active_slot_get( ) -> Result, HttpError> { let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); - let sp = apictx.mgmt_switch.sp(sp.into())?; + let sp_id = sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; let component = component_from_str(&component)?; - let slot = sp - .component_active_slot(component) - .await - .map_err(SpCommsError::from)?; + let slot = sp.component_active_slot(component).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; Ok(HttpResponseOk(SpComponentFirmwareSlot { slot })) } @@ -809,14 +836,15 @@ async fn sp_component_active_slot_set( ) -> Result { let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); - let sp = apictx.mgmt_switch.sp(sp.into())?; + let sp_id = sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; let component = component_from_str(&component)?; let slot = body.into_inner().slot; let persist = query_params.into_inner().persist; - sp.set_component_active_slot(component, slot, persist) - .await - .map_err(SpCommsError::from)?; + sp.set_component_active_slot(component, slot, persist).await.map_err( + |err| SpCommsError::SpCommunicationFailed { sp: sp_id, err }, + )?; Ok(HttpResponseUpdatedNoContent {}) } @@ -843,21 +871,27 @@ async fn sp_component_serial_console_attach( ) -> WebsocketEndpointResult { let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); + let sp_id = sp.into(); let component = component_from_str(&component)?; // Ensure we can attach to this SP's serial console. let console = apictx .mgmt_switch - .sp(sp.into())? + .sp(sp_id)? .serial_console_attach(component) .await - .map_err(SpCommsError::from)?; + .map_err(|err| SpCommsError::SpCommunicationFailed { + sp: sp_id, + err, + })?; let log = apictx.log.new(slog::o!("sp" => format!("{sp:?}"))); // We've successfully attached to the SP's serial console: upgrade the // websocket and run our side of that connection. - websocket.handle(move |conn| crate::serial_console::run(console, conn, log)) + websocket.handle(move |conn| { + crate::serial_console::run(sp_id, console, conn, log) + }) } /// Detach the websocket connection attached to the given SP component's serial @@ -875,9 +909,12 @@ async fn sp_component_serial_console_detach( // TODO-cleanup: "component" support for the serial console is half baked; // we don't use it at all to detach. let PathSpComponent { sp, component: _ } = path.into_inner(); + let sp_id = sp.into(); - let sp = apictx.mgmt_switch.sp(sp.into())?; - sp.serial_console_detach().await.map_err(SpCommsError::from)?; + let sp = apictx.mgmt_switch.sp(sp_id)?; + sp.serial_console_detach().await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; Ok(HttpResponseUpdatedNoContent {}) } @@ -927,13 +964,17 @@ async fn sp_component_reset( ) -> Result { let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); - let sp = apictx.mgmt_switch.sp(sp.into())?; + let sp_id = sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; let component = component_from_str(&component)?; sp.reset_component_prepare(component) .and_then(|()| sp.reset_component_trigger(component)) .await - .map_err(SpCommsError::from)?; + .map_err(|err| SpCommsError::SpCommunicationFailed { + sp: sp_id, + err, + })?; Ok(HttpResponseUpdatedNoContent {}) } @@ -964,7 +1005,8 @@ async fn sp_component_update( let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); - let sp = apictx.mgmt_switch.sp(sp.into())?; + let sp_id = sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; let component = component_from_str(&component)?; let ComponentUpdateIdSlot { id, firmware_slot } = query_params.into_inner(); @@ -973,7 +1015,7 @@ async fn sp_component_update( sp.start_update(component, id, firmware_slot, image) .await - .map_err(SpCommsError::from)?; + .map_err(|err| SpCommsError::UpdateFailed { sp: sp_id, err })?; Ok(HttpResponseUpdatedNoContent {}) } @@ -993,11 +1035,13 @@ async fn sp_component_update_status( let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); - let sp = apictx.mgmt_switch.sp(sp.into())?; + let sp_id = sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; let component = component_from_str(&component)?; - let status = - sp.update_status(component).await.map_err(SpCommsError::from)?; + let status = sp.update_status(component).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; Ok(HttpResponseOk(status.into())) } @@ -1020,11 +1064,14 @@ async fn sp_component_update_abort( let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); - let sp = apictx.mgmt_switch.sp(sp.into())?; + let sp_id = sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; let component = component_from_str(&component)?; let UpdateAbortBody { id } = body.into_inner(); - sp.update_abort(component, id).await.map_err(SpCommsError::from)?; + sp.update_abort(component, id).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; Ok(HttpResponseUpdatedNoContent {}) } @@ -1043,6 +1090,7 @@ async fn sp_rot_cmpa_get( let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); + let sp_id = sp.into(); // Ensure the caller knows they're asking for the RoT if component_from_str(&component)? != SpComponent::ROT { @@ -1052,8 +1100,10 @@ async fn sp_rot_cmpa_get( )); } - let sp = apictx.mgmt_switch.sp(sp.into())?; - let data = sp.read_rot_cmpa().await.map_err(SpCommsError::from)?; + let sp = apictx.mgmt_switch.sp(sp_id)?; + let data = sp.read_rot_cmpa().await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; let base64_data = base64::engine::general_purpose::STANDARD.encode(data); @@ -1076,6 +1126,7 @@ async fn sp_rot_cfpa_get( let PathSpComponent { sp, component } = path.into_inner(); let GetCfpaParams { slot } = params.into_inner(); + let sp_id = sp.into(); // Ensure the caller knows they're asking for the RoT if component_from_str(&component)? != SpComponent::ROT { @@ -1085,13 +1136,13 @@ async fn sp_rot_cfpa_get( )); } - let sp = apictx.mgmt_switch.sp(sp.into())?; + let sp = apictx.mgmt_switch.sp(sp_id)?; let data = match slot { RotCfpaSlot::Active => sp.read_rot_active_cfpa().await, RotCfpaSlot::Inactive => sp.read_rot_inactive_cfpa().await, RotCfpaSlot::Scratch => sp.read_rot_scratch_cfpa().await, } - .map_err(SpCommsError::from)?; + .map_err(|err| SpCommsError::SpCommunicationFailed { sp: sp_id, err })?; let base64_data = base64::engine::general_purpose::STANDARD.encode(data); @@ -1141,16 +1192,19 @@ async fn ignition_get( let apictx = rqctx.context(); let mgmt_switch = &apictx.mgmt_switch; - let sp = path.into_inner().sp; - let ignition_target = mgmt_switch.ignition_target(sp.into())?; + let sp_id = path.into_inner().sp.into(); + let ignition_target = mgmt_switch.ignition_target(sp_id)?; let state = mgmt_switch .ignition_controller() .ignition_state(ignition_target) .await - .map_err(SpCommsError::from)?; + .map_err(|err| SpCommsError::SpCommunicationFailed { + sp: sp_id, + err, + })?; - let info = SpIgnitionInfo { id: sp, details: state.into() }; + let info = SpIgnitionInfo { id: sp_id.into(), details: state.into() }; Ok(HttpResponseOk(info)) } @@ -1173,13 +1227,17 @@ async fn ignition_command( let apictx = rqctx.context(); let mgmt_switch = &apictx.mgmt_switch; let PathSpIgnitionCommand { sp, command } = path.into_inner(); - let ignition_target = mgmt_switch.ignition_target(sp.into())?; + let sp_id = sp.into(); + let ignition_target = mgmt_switch.ignition_target(sp_id)?; mgmt_switch .ignition_controller() .ignition_command(ignition_target, command.into()) .await - .map_err(SpCommsError::from)?; + .map_err(|err| SpCommsError::SpCommunicationFailed { + sp: sp_id, + err, + })?; Ok(HttpResponseUpdatedNoContent {}) } @@ -1197,9 +1255,12 @@ async fn sp_power_state_get( path: Path, ) -> Result, HttpError> { let apictx = rqctx.context(); - let sp = apictx.mgmt_switch.sp(path.into_inner().sp.into())?; + let sp_id = path.into_inner().sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; - let power_state = sp.power_state().await.map_err(SpCommsError::from)?; + let power_state = sp.power_state().await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; Ok(HttpResponseOk(power_state.into())) } @@ -1218,10 +1279,13 @@ async fn sp_power_state_set( body: TypedBody, ) -> Result { let apictx = rqctx.context(); - let sp = apictx.mgmt_switch.sp(path.into_inner().sp.into())?; + let sp_id = path.into_inner().sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; let power_state = body.into_inner(); - sp.set_power_state(power_state.into()).await.map_err(SpCommsError::from)?; + sp.set_power_state(power_state.into()).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; Ok(HttpResponseUpdatedNoContent {}) } @@ -1241,7 +1305,8 @@ async fn sp_installinator_image_id_set( use ipcc_key_value::Key; let apictx = rqctx.context(); - let sp = apictx.mgmt_switch.sp(path.into_inner().sp.into())?; + let sp_id = path.into_inner().sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; let image_id = ipcc_key_value::InstallinatorImageId::from(body.into_inner()); @@ -1251,7 +1316,7 @@ async fn sp_installinator_image_id_set( image_id.serialize(), ) .await - .map_err(SpCommsError::from)?; + .map_err(|err| SpCommsError::SpCommunicationFailed { sp: sp_id, err })?; Ok(HttpResponseUpdatedNoContent {}) } @@ -1268,12 +1333,16 @@ async fn sp_installinator_image_id_delete( use ipcc_key_value::Key; let apictx = rqctx.context(); - let sp = apictx.mgmt_switch.sp(path.into_inner().sp.into())?; + let sp_id = path.into_inner().sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; // We clear the image ID by setting it to a 0-length vec. sp.set_ipcc_key_lookup_value(Key::InstallinatorImageId as u8, Vec::new()) .await - .map_err(SpCommsError::from)?; + .map_err(|err| SpCommsError::SpCommunicationFailed { + sp: sp_id, + err, + })?; Ok(HttpResponseUpdatedNoContent {}) } diff --git a/gateway/src/lib.rs b/gateway/src/lib.rs index 10fcf1539c..5aa833f6e2 100644 --- a/gateway/src/lib.rs +++ b/gateway/src/lib.rs @@ -35,6 +35,7 @@ use slog::info; use slog::o; use slog::warn; use slog::Logger; +use slog_error_chain::InlineErrorChain; use std::collections::hash_map::Entry; use std::collections::HashMap; use std::mem; @@ -138,7 +139,10 @@ impl Server { match gateway_sp_comms::register_probes() { Ok(_) => debug!(log, "successfully registered DTrace USDT probes"), Err(err) => { - warn!(log, "failed to register DTrace USDT probes: {}", err); + warn!( + log, "failed to register DTrace USDT probes"; + InlineErrorChain::new(&err), + ); } } @@ -328,9 +332,9 @@ pub async fn start_server( ); let log = slog::Logger::root(drain.fuse(), slog::o!(FileKv)); if let slog_dtrace::ProbeRegistration::Failed(e) = registration { - let msg = format!("failed to register DTrace probes: {}", e); - error!(log, "{}", msg); - return Err(msg); + let err = InlineErrorChain::new(&e); + error!(log, "failed to register DTrace probes"; &err); + return Err(format!("failed to register DTrace probes: {err}")); } else { debug!(log, "registered DTrace probes"); } diff --git a/gateway/src/management_switch.rs b/gateway/src/management_switch.rs index 03fdda2cca..0571dc051e 100644 --- a/gateway/src/management_switch.rs +++ b/gateway/src/management_switch.rs @@ -383,7 +383,14 @@ impl ManagementSwitch { > { let controller = self.ignition_controller(); let location_map = self.location_map()?; - let bulk_state = controller.bulk_ignition_state().await?; + let bulk_state = + controller.bulk_ignition_state().await.map_err(|err| { + SpCommsError::SpCommunicationFailed { + sp: location_map + .port_to_id(self.local_ignition_controller_port), + err, + } + })?; Ok(bulk_state.into_iter().enumerate().filter_map(|(target, state)| { // If the SP returns an ignition target we don't have a port @@ -402,11 +409,8 @@ impl ManagementSwitch { None => { warn!( self.log, - concat!( - "ignoring unknown ignition target {}", - " returned by ignition controller SP" - ), - target, + "ignoring unknown ignition target {target} \ + returned by ignition controller SP", ); None } diff --git a/gateway/src/serial_console.rs b/gateway/src/serial_console.rs index 3e49f8526a..49aa807e55 100644 --- a/gateway/src/serial_console.rs +++ b/gateway/src/serial_console.rs @@ -5,6 +5,7 @@ // Copyright 2022 Oxide Computer Company use crate::error::SpCommsError; +use crate::SpIdentifier; use dropshot::WebsocketChannelResult; use dropshot::WebsocketConnection; use futures::stream::SplitSink; @@ -19,6 +20,7 @@ use slog::error; use slog::info; use slog::warn; use slog::Logger; +use slog_error_chain::SlogInlineError; use std::borrow::Cow; use std::ops::Deref; use std::ops::DerefMut; @@ -34,7 +36,7 @@ use tokio_tungstenite::tungstenite::protocol::WebSocketConfig; use tokio_tungstenite::tungstenite::Message; use tokio_tungstenite::WebSocketStream; -#[derive(Debug, thiserror::Error)] +#[derive(Debug, thiserror::Error, SlogInlineError)] enum SerialTaskError { #[error(transparent)] SpCommsError(#[from] SpCommsError), @@ -43,6 +45,7 @@ enum SerialTaskError { } pub(crate) async fn run( + sp: SpIdentifier, console: AttachedSerialConsole, conn: WebsocketConnection, log: Logger, @@ -80,7 +83,7 @@ pub(crate) async fn run( let (console_tx, mut console_rx) = console.split(); let console_tx = DetachOnDrop::new(console_tx); let mut ws_recv_handle = - tokio::spawn(ws_recv_task(ws_stream, console_tx, log.clone())); + tokio::spawn(ws_recv_task(sp, ws_stream, console_tx, log.clone())); loop { tokio::select! { @@ -112,7 +115,9 @@ pub(crate) async fn run( Ok(()) => (), Err(TrySendError::Full(data)) => { warn!( - log, "channel full; discarding serial console data from SP"; + log, + "channel full; discarding serial \ + console data from SP"; "length" => data.len(), ); } @@ -160,6 +165,7 @@ async fn ws_sink_task( } async fn ws_recv_task( + sp: SpIdentifier, mut ws_stream: SplitStream>, mut console_tx: DetachOnDrop, log: Logger, @@ -175,7 +181,7 @@ async fn ws_recv_task( console_tx .write(data) .await - .map_err(SpCommsError::from)?; + .map_err(|err| SpCommsError::SpCommunicationFailed { sp, err })?; keepalive.reset(); } Some(Ok(Message::Close(_))) | None => { @@ -194,7 +200,7 @@ async fn ws_recv_task( } _= keepalive.tick() => { - console_tx.keepalive().await.map_err(SpCommsError::from)?; + console_tx.keepalive().await.map_err(|err| SpCommsError::SpCommunicationFailed { sp, err })?; } } }