diff --git a/Cargo.lock b/Cargo.lock index edd71efda1..c5f22e8dd1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9113,7 +9113,6 @@ dependencies = [ "serde", "sled-agent-types", "sled-hardware-types", - "sled-storage", "uuid", ] @@ -9163,7 +9162,6 @@ dependencies = [ "serde_json", "sha3", "sled-hardware-types", - "sled-storage", "slog", "thiserror", "toml 0.8.19", diff --git a/common/src/disk.rs b/common/src/disk.rs index c9093371a3..d8b4c2e0a1 100644 --- a/common/src/disk.rs +++ b/common/src/disk.rs @@ -118,6 +118,76 @@ impl From for DiskVariant { } } +/// Identifies how a single disk management operation may have succeeded or +/// failed. +#[derive(Debug, JsonSchema, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub struct DiskManagementStatus { + pub identity: DiskIdentity, + pub err: Option, +} + +/// The result from attempting to manage underlying disks. +/// +/// This is more complex than a simple "Error" type because it's possible +/// for some disks to be initialized correctly, while others can fail. +/// +/// This structure provides a mechanism for callers to learn about partial +/// failures, and handle them appropriately on a per-disk basis. +#[derive(Default, Debug, JsonSchema, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +#[must_use = "this `DiskManagementResult` may contain errors, which should be handled"] +pub struct DisksManagementResult { + pub status: Vec, +} + +impl DisksManagementResult { + pub fn has_error(&self) -> bool { + for status in &self.status { + if status.err.is_some() { + return true; + } + } + false + } + + pub fn has_retryable_error(&self) -> bool { + for status in &self.status { + if let Some(err) = &status.err { + if err.retryable() { + return true; + } + } + } + false + } +} + +#[derive(Debug, thiserror::Error, JsonSchema, Serialize, Deserialize)] +#[serde(rename_all = "snake_case", tag = "type", content = "value")] +pub enum DiskManagementError { + #[error("Disk requested by control plane, but not found on device")] + NotFound, + + #[error("Expected zpool UUID of {expected}, but saw {observed}")] + ZpoolUuidMismatch { expected: ZpoolUuid, observed: ZpoolUuid }, + + #[error("Failed to access keys necessary to unlock storage. This error may be transient.")] + KeyManager(String), + + #[error("Other error starting disk management: {0}")] + Other(String), +} + +impl DiskManagementError { + fn retryable(&self) -> bool { + match self { + DiskManagementError::KeyManager(_) => true, + _ => false, + } + } +} + /// Describes an M.2 slot, often in the context of writing a system image to /// it. #[derive( diff --git a/sled-agent/api/Cargo.toml b/sled-agent/api/Cargo.toml index 69c798097d..046f17574b 100644 --- a/sled-agent/api/Cargo.toml +++ b/sled-agent/api/Cargo.toml @@ -18,5 +18,4 @@ schemars.workspace = true serde.workspace = true sled-agent-types.workspace = true sled-hardware-types.workspace = true -sled-storage.workspace = true uuid.workspace = true diff --git a/sled-agent/api/src/lib.rs b/sled-agent/api/src/lib.rs index e7235e999a..c44b24d712 100644 --- a/sled-agent/api/src/lib.rs +++ b/sled-agent/api/src/lib.rs @@ -21,7 +21,7 @@ use omicron_common::{ SwitchPorts, VirtualNetworkInterfaceHost, }, }, - disk::{DiskVariant, OmicronPhysicalDisksConfig}, + disk::{DiskVariant, DisksManagementResult, OmicronPhysicalDisksConfig}, }; use omicron_uuid_kinds::{InstanceUuid, ZpoolUuid}; use schemars::JsonSchema; @@ -46,7 +46,6 @@ use sled_agent_types::{ ZoneBundleId, ZoneBundleMetadata, }, }; -use sled_storage::resources::DisksManagementResult; use uuid::Uuid; #[dropshot::api_description] diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 767a583fa8..2bf8067d1c 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -27,7 +27,9 @@ use omicron_common::api::internal::shared::{ ResolvedVpcRouteSet, ResolvedVpcRouteState, SledIdentifiers, SwitchPorts, VirtualNetworkInterfaceHost, }; -use omicron_common::disk::{DiskVariant, M2Slot, OmicronPhysicalDisksConfig}; +use omicron_common::disk::{ + DiskVariant, DisksManagementResult, M2Slot, OmicronPhysicalDisksConfig, +}; use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; use sled_agent_api::*; use sled_agent_types::boot_disk::{ @@ -48,7 +50,6 @@ use sled_agent_types::zone_bundle::{ BundleUtilization, CleanupContext, CleanupCount, CleanupPeriod, StorageLimit, ZoneBundleId, ZoneBundleMetadata, }; -use sled_storage::resources::DisksManagementResult; use std::collections::BTreeMap; type SledApiDescription = ApiDescription; diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index 7bf4e67432..c219a747ce 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -21,6 +21,7 @@ use omicron_common::api::internal::shared::VirtualNetworkInterfaceHost; use omicron_common::api::internal::shared::{ ResolvedVpcRouteSet, ResolvedVpcRouteState, SwitchPorts, }; +use omicron_common::disk::DisksManagementResult; use omicron_common::disk::OmicronPhysicalDisksConfig; use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; use schemars::JsonSchema; @@ -34,7 +35,6 @@ use sled_agent_types::instance::InstancePutStateBody; use sled_agent_types::instance::InstancePutStateResponse; use sled_agent_types::instance::InstanceUnregisterResponse; use sled_agent_types::sled::AddSledRequest; -use sled_storage::resources::DisksManagementResult; use std::sync::Arc; use uuid::Uuid; diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 865271e418..10536c8c80 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -35,7 +35,8 @@ use omicron_common::api::internal::shared::{ VirtualNetworkInterfaceHost, }; use omicron_common::disk::{ - DiskIdentity, DiskVariant, OmicronPhysicalDisksConfig, + DiskIdentity, DiskVariant, DisksManagementResult, + OmicronPhysicalDisksConfig, }; use omicron_uuid_kinds::{GenericUuid, InstanceUuid, PropolisUuid, ZpoolUuid}; use oxnet::Ipv6Net; @@ -52,7 +53,6 @@ use sled_agent_types::instance::{ InstancePutStateResponse, InstanceStateRequested, InstanceUnregisterResponse, }; -use sled_storage::resources::DisksManagementResult; use slog::Logger; use std::collections::{HashMap, HashSet, VecDeque}; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr}; diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index 948ac96bcd..556388ce93 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -19,15 +19,15 @@ use dropshot::HandlerTaskMode; use dropshot::HttpError; use futures::lock::Mutex; use omicron_common::disk::DiskIdentity; +use omicron_common::disk::DiskManagementStatus; use omicron_common::disk::DiskVariant; +use omicron_common::disk::DisksManagementResult; use omicron_common::disk::OmicronPhysicalDisksConfig; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::InstanceUuid; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::ZpoolUuid; use propolis_client::types::VolumeConstructionRequest; -use sled_storage::resources::DiskManagementStatus; -use sled_storage::resources::DisksManagementResult; use slog::Logger; use std::collections::HashMap; use std::collections::HashSet; diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 04e34b41f9..50e5611027 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -53,7 +53,7 @@ use omicron_common::api::{ use omicron_common::backoff::{ retry_notify, retry_policy_internal_service_aggressive, BackoffError, }; -use omicron_common::disk::OmicronPhysicalDisksConfig; +use omicron_common::disk::{DisksManagementResult, OmicronPhysicalDisksConfig}; use omicron_ddm_admin_client::Client as DdmAdminClient; use omicron_uuid_kinds::{InstanceUuid, PropolisUuid}; use sled_agent_api::Zpool; @@ -74,7 +74,6 @@ use sled_hardware::{underlay, HardwareManager}; use sled_hardware_types::underlay::BootstrapInterface; use sled_hardware_types::Baseboard; use sled_storage::manager::StorageHandle; -use sled_storage::resources::DisksManagementResult; use slog::Logger; use std::collections::BTreeMap; use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; diff --git a/sled-agent/types/Cargo.toml b/sled-agent/types/Cargo.toml index 7406651a98..e01d40db28 100644 --- a/sled-agent/types/Cargo.toml +++ b/sled-agent/types/Cargo.toml @@ -28,7 +28,6 @@ serde_human_bytes.workspace = true serde_json.workspace = true sha3.workspace = true sled-hardware-types.workspace = true -sled-storage.workspace = true slog.workspace = true thiserror.workspace = true toml.workspace = true diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs index 8168f32cea..3cbf00530a 100644 --- a/sled-storage/src/manager.rs +++ b/sled-storage/src/manager.rs @@ -10,7 +10,7 @@ use crate::config::MountConfig; use crate::dataset::{DatasetName, CONFIG_DATASET}; use crate::disk::RawDisk; use crate::error::Error; -use crate::resources::{AllDisks, DisksManagementResult, StorageResources}; +use crate::resources::{AllDisks, StorageResources}; use camino::Utf8PathBuf; use debug_ignore::DebugIgnore; use futures::future::FutureExt; @@ -18,7 +18,8 @@ use illumos_utils::zfs::{Mountpoint, Zfs}; use illumos_utils::zpool::ZpoolName; use key_manager::StorageKeyRequester; use omicron_common::disk::{ - DiskIdentity, DiskVariant, OmicronPhysicalDisksConfig, + DiskIdentity, DiskVariant, DisksManagementResult, + OmicronPhysicalDisksConfig, }; use omicron_common::ledger::Ledger; use slog::{info, o, warn, Logger}; @@ -826,10 +827,10 @@ mod tests { use crate::dataset::DatasetType; use crate::disk::RawSyntheticDisk; use crate::manager_test_harness::StorageManagerTestHarness; - use crate::resources::DiskManagementError; use super::*; use camino_tempfile::tempdir_in; + use omicron_common::disk::DiskManagementError; use omicron_common::ledger; use omicron_test_utils::dev::test_setup_log; use sled_hardware::DiskFirmware; diff --git a/sled-storage/src/resources.rs b/sled-storage/src/resources.rs index 98d6398d8b..425aafb12d 100644 --- a/sled-storage/src/resources.rs +++ b/sled-storage/src/resources.rs @@ -14,12 +14,10 @@ use illumos_utils::zpool::{PathInPool, ZpoolName}; use key_manager::StorageKeyRequester; use omicron_common::api::external::Generation; use omicron_common::disk::{ - DiskIdentity, DiskVariant, OmicronPhysicalDiskConfig, + DiskIdentity, DiskManagementError, DiskManagementStatus, DiskVariant, + DisksManagementResult, OmicronPhysicalDiskConfig, OmicronPhysicalDisksConfig, }; -use omicron_uuid_kinds::ZpoolUuid; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; use sled_hardware::DiskFirmware; use slog::{info, o, warn, Logger}; use std::collections::BTreeMap; @@ -32,76 +30,6 @@ const BUNDLE_DIRECTORY: &str = "bundle"; // The directory for zone bundles. const ZONE_BUNDLE_DIRECTORY: &str = "zone"; -#[derive(Debug, thiserror::Error, JsonSchema, Serialize, Deserialize)] -#[serde(rename_all = "snake_case", tag = "type", content = "value")] -pub enum DiskManagementError { - #[error("Disk requested by control plane, but not found on device")] - NotFound, - - #[error("Expected zpool UUID of {expected}, but saw {observed}")] - ZpoolUuidMismatch { expected: ZpoolUuid, observed: ZpoolUuid }, - - #[error("Failed to access keys necessary to unlock storage. This error may be transient.")] - KeyManager(String), - - #[error("Other error starting disk management: {0}")] - Other(String), -} - -impl DiskManagementError { - fn retryable(&self) -> bool { - match self { - DiskManagementError::KeyManager(_) => true, - _ => false, - } - } -} - -/// Identifies how a single disk management operation may have succeeded or -/// failed. -#[derive(Debug, JsonSchema, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub struct DiskManagementStatus { - pub identity: DiskIdentity, - pub err: Option, -} - -/// The result from attempting to manage underlying disks. -/// -/// This is more complex than a simple "Error" type because it's possible -/// for some disks to be initialized correctly, while others can fail. -/// -/// This structure provides a mechanism for callers to learn about partial -/// failures, and handle them appropriately on a per-disk basis. -#[derive(Default, Debug, JsonSchema, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -#[must_use = "this `DiskManagementResult` may contain errors, which should be handled"] -pub struct DisksManagementResult { - pub status: Vec, -} - -impl DisksManagementResult { - pub fn has_error(&self) -> bool { - for status in &self.status { - if status.err.is_some() { - return true; - } - } - false - } - - pub fn has_retryable_error(&self) -> bool { - for status in &self.status { - if let Some(err) = &status.err { - if err.retryable() { - return true; - } - } - } - false - } -} - // The Sled Agent is responsible for both observing disks and managing them at // the request of the broader control plane. This enum encompasses that duality, // by representing all disks that can exist, managed or not.