diff --git a/Cargo.lock b/Cargo.lock index 3f7b669e37..b588f2738d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3822,6 +3822,7 @@ name = "installinator-client" version = "0.1.0" dependencies = [ "installinator-common", + "omicron-common", "omicron-workspace-hack", "progenitor", "regress", @@ -3842,6 +3843,7 @@ dependencies = [ "camino", "illumos-utils", "libc", + "omicron-common", "omicron-workspace-hack", "proptest", "schemars", @@ -5139,8 +5141,10 @@ dependencies = [ "omicron-workspace-hack", "schemars", "serde", + "serde_json", "sled-hardware-types", "strum", + "thiserror", "uuid", ] @@ -5842,6 +5846,7 @@ dependencies = [ "serde_with", "similar-asserts", "sled-agent-client", + "sled-agent-types", "slog", "slog-async", "slog-dtrace", @@ -6071,8 +6076,6 @@ dependencies = [ "omicron-uuid-kinds", "omicron-workspace-hack", "once_cell", - "openapi-lint", - "openapiv3", "opte-ioctl", "oximeter", "oximeter-instruments", @@ -6090,6 +6093,7 @@ dependencies = [ "serde_human_bytes", "serde_json", "sha3", + "sled-agent-api", "sled-agent-client", "sled-agent-types", "sled-hardware", @@ -6369,6 +6373,7 @@ dependencies = [ "oximeter-api", "serde_json", "similar", + "sled-agent-api", "supports-color", "wicketd-api", ] @@ -9138,6 +9143,23 @@ dependencies = [ "parking_lot 0.11.2", ] +[[package]] +name = "sled-agent-api" +version = "0.1.0" +dependencies = [ + "camino", + "dropshot", + "nexus-sled-agent-shared", + "omicron-common", + "omicron-uuid-kinds", + "omicron-workspace-hack", + "schemars", + "serde", + "sled-agent-types", + "sled-hardware-types", + "uuid", +] + [[package]] name = "sled-agent-client" version = "0.1.0" @@ -9165,21 +9187,25 @@ name = "sled-agent-types" version = "0.1.0" dependencies = [ "anyhow", + "async-trait", "bootstore", "camino", "camino-tempfile", + "chrono", "nexus-sled-agent-shared", "omicron-common", "omicron-test-utils", "omicron-uuid-kinds", "omicron-workspace-hack", "oxnet", + "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=24a74d0c76b6a63961ecef76acb1516b6e66c5c9)", "rcgen", "schemars", "serde", + "serde_human_bytes", "serde_json", + "sha3", "sled-hardware-types", - "sled-storage", "slog", "thiserror", "toml 0.8.19", diff --git a/Cargo.toml b/Cargo.toml index bb899f8825..962bfb82de 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -85,6 +85,7 @@ members = [ "passwords", "rpaths", "sled-agent", + "sled-agent/api", "sled-agent/bootstrap-agent-api", "sled-agent/types", "sled-hardware", @@ -196,6 +197,7 @@ default-members = [ "passwords", "rpaths", "sled-agent", + "sled-agent/api", "sled-agent/bootstrap-agent-api", "sled-agent/types", "sled-hardware", @@ -517,6 +519,7 @@ similar-asserts = "1.5.0" # are still doing mupdate a change to the on-disk format will break existing DNS # server zones. sled = "=0.34.7" +sled-agent-api = { path = "sled-agent/api" } sled-agent-client = { path = "clients/sled-agent-client" } sled-agent-types = { path = "sled-agent/types" } sled-hardware = { path = "sled-hardware" } diff --git a/clients/installinator-client/Cargo.toml b/clients/installinator-client/Cargo.toml index ca2de0476a..ba869d79bd 100644 --- a/clients/installinator-client/Cargo.toml +++ b/clients/installinator-client/Cargo.toml @@ -9,6 +9,7 @@ workspace = true [dependencies] installinator-common.workspace = true +omicron-common.workspace = true progenitor.workspace = true regress.workspace = true reqwest = { workspace = true, features = ["rustls-tls", "stream"] } diff --git a/clients/installinator-client/src/lib.rs b/clients/installinator-client/src/lib.rs index a39ff3ff80..3b7abc333b 100644 --- a/clients/installinator-client/src/lib.rs +++ b/clients/installinator-client/src/lib.rs @@ -21,7 +21,7 @@ progenitor::generate_api!( replace = { Duration = std::time::Duration, EventReportForInstallinatorSpec = installinator_common::EventReport, - M2Slot = installinator_common::M2Slot, + M2Slot = omicron_common::disk::M2Slot, ProgressEventForGenericSpec = installinator_common::ProgressEvent, ProgressEventForInstallinatorSpec = installinator_common::ProgressEvent, StepEventForGenericSpec = installinator_common::StepEvent, diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index 3856a472ab..089ff9b324 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -19,6 +19,8 @@ use std::{ }; use uuid::Uuid; +use super::nexus::HostIdentifier; + /// The type of network interface #[derive( Clone, @@ -635,6 +637,53 @@ pub struct ResolvedVpcRoute { pub target: RouterTarget, } +/// VPC firewall rule after object name resolution has been performed by Nexus +#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] +pub struct ResolvedVpcFirewallRule { + pub status: external::VpcFirewallRuleStatus, + pub direction: external::VpcFirewallRuleDirection, + pub targets: Vec, + pub filter_hosts: Option>, + pub filter_ports: Option>, + pub filter_protocols: Option>, + pub action: external::VpcFirewallRuleAction, + pub priority: external::VpcFirewallRulePriority, +} + +/// A mapping from a virtual NIC to a physical host +#[derive( + Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq, Eq, Hash, +)] +pub struct VirtualNetworkInterfaceHost { + pub virtual_ip: IpAddr, + pub virtual_mac: external::MacAddr, + pub physical_host_ip: Ipv6Addr, + pub vni: external::Vni, +} + +/// DHCP configuration for a port +/// +/// Not present here: Hostname (DHCPv4 option 12; used in DHCPv6 option 39); we +/// use `InstanceRuntimeState::hostname` for this value. +#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] +pub struct DhcpConfig { + /// DNS servers to send to the instance + /// + /// (DHCPv4 option 6; DHCPv6 option 23) + pub dns_servers: Vec, + + /// DNS zone this instance's hostname belongs to (e.g. the `project.example` + /// part of `instance1.project.example`) + /// + /// (DHCPv4 option 15; used in DHCPv6 option 39) + pub host_domain: Option, + + /// DNS search domains + /// + /// (DHCPv4 option 119; DHCPv6 option 24) + pub search_domains: Vec, +} + /// The target for a given router entry. #[derive( Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, diff --git a/common/src/disk.rs b/common/src/disk.rs index 4b4cd2e69d..d8b4c2e0a1 100644 --- a/common/src/disk.rs +++ b/common/src/disk.rs @@ -4,6 +4,9 @@ //! Disk related types shared among crates +use std::fmt; + +use anyhow::bail; use omicron_uuid_kinds::ZpoolUuid; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -114,3 +117,115 @@ impl From for DiskVariant { } } } + +/// Identifies how a single disk management operation may have succeeded or +/// failed. +#[derive(Debug, JsonSchema, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub struct DiskManagementStatus { + pub identity: DiskIdentity, + pub err: Option, +} + +/// The result from attempting to manage underlying disks. +/// +/// This is more complex than a simple "Error" type because it's possible +/// for some disks to be initialized correctly, while others can fail. +/// +/// This structure provides a mechanism for callers to learn about partial +/// failures, and handle them appropriately on a per-disk basis. +#[derive(Default, Debug, JsonSchema, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +#[must_use = "this `DiskManagementResult` may contain errors, which should be handled"] +pub struct DisksManagementResult { + pub status: Vec, +} + +impl DisksManagementResult { + pub fn has_error(&self) -> bool { + for status in &self.status { + if status.err.is_some() { + return true; + } + } + false + } + + pub fn has_retryable_error(&self) -> bool { + for status in &self.status { + if let Some(err) = &status.err { + if err.retryable() { + return true; + } + } + } + false + } +} + +#[derive(Debug, thiserror::Error, JsonSchema, Serialize, Deserialize)] +#[serde(rename_all = "snake_case", tag = "type", content = "value")] +pub enum DiskManagementError { + #[error("Disk requested by control plane, but not found on device")] + NotFound, + + #[error("Expected zpool UUID of {expected}, but saw {observed}")] + ZpoolUuidMismatch { expected: ZpoolUuid, observed: ZpoolUuid }, + + #[error("Failed to access keys necessary to unlock storage. This error may be transient.")] + KeyManager(String), + + #[error("Other error starting disk management: {0}")] + Other(String), +} + +impl DiskManagementError { + fn retryable(&self) -> bool { + match self { + DiskManagementError::KeyManager(_) => true, + _ => false, + } + } +} + +/// Describes an M.2 slot, often in the context of writing a system image to +/// it. +#[derive( + Debug, + Clone, + Copy, + PartialEq, + Eq, + PartialOrd, + Ord, + Deserialize, + Serialize, + JsonSchema, +)] +pub enum M2Slot { + A, + B, +} + +impl fmt::Display for M2Slot { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::A => f.write_str("A"), + Self::B => f.write_str("B"), + } + } +} + +impl TryFrom for M2Slot { + type Error = anyhow::Error; + + fn try_from(value: i64) -> Result { + match value { + // Gimlet should have 2 M.2 drives: drive A is assigned slot 17, and + // drive B is assigned slot 18. + 17 => Ok(Self::A), + 18 => Ok(Self::B), + _ => bail!("unexpected M.2 slot {value}"), + } + } +} diff --git a/dev-tools/openapi-manager/Cargo.toml b/dev-tools/openapi-manager/Cargo.toml index e60000cc06..85d27aaafd 100644 --- a/dev-tools/openapi-manager/Cargo.toml +++ b/dev-tools/openapi-manager/Cargo.toml @@ -27,6 +27,7 @@ openapi-lint.workspace = true owo-colors.workspace = true oximeter-api.workspace = true serde_json.workspace = true +sled-agent-api.workspace = true similar.workspace = true supports-color.workspace = true wicketd-api.workspace = true diff --git a/dev-tools/openapi-manager/src/spec.rs b/dev-tools/openapi-manager/src/spec.rs index f991d35ec4..37a657ee93 100644 --- a/dev-tools/openapi-manager/src/spec.rs +++ b/dev-tools/openapi-manager/src/spec.rs @@ -87,6 +87,16 @@ pub fn all_apis() -> Vec { filename: "oximeter.json", extra_validation: None, }, + ApiSpec { + title: "Oxide Sled Agent API", + version: "0.0.1", + description: "API for interacting with individual sleds", + boundary: ApiBoundary::Internal, + api_description: + sled_agent_api::sled_agent_api_mod::stub_api_description, + filename: "sled-agent.json", + extra_validation: None, + }, ApiSpec { title: "Oxide Technician Port Control Service", version: "0.0.1", diff --git a/illumos-utils/src/opte/firewall_rules.rs b/illumos-utils/src/opte/firewall_rules.rs index 4dcb390e9e..26ab4d6218 100644 --- a/illumos-utils/src/opte/firewall_rules.rs +++ b/illumos-utils/src/opte/firewall_rules.rs @@ -5,7 +5,6 @@ //! Convert Omicron VPC firewall rules to OPTE firewall rules. use super::net_to_cidr; -use crate::opte::params::VpcFirewallRule; use crate::opte::Vni; use macaddr::MacAddr6; use omicron_common::api::external::VpcFirewallRuleAction; @@ -13,6 +12,7 @@ use omicron_common::api::external::VpcFirewallRuleDirection; use omicron_common::api::external::VpcFirewallRuleProtocol; use omicron_common::api::external::VpcFirewallRuleStatus; use omicron_common::api::internal::nexus::HostIdentifier; +use omicron_common::api::internal::shared::ResolvedVpcFirewallRule; use oxide_vpc::api::Address; use oxide_vpc::api::Direction; use oxide_vpc::api::Filters; @@ -34,7 +34,7 @@ trait FromVpcFirewallRule { fn protos(&self) -> Vec; } -impl FromVpcFirewallRule for VpcFirewallRule { +impl FromVpcFirewallRule for ResolvedVpcFirewallRule { fn action(&self) -> FirewallAction { match self.action { VpcFirewallRuleAction::Allow => FirewallAction::Allow, @@ -118,7 +118,7 @@ impl FromVpcFirewallRule for VpcFirewallRule { /// a single host address and protocol, so we must unroll rules with multiple /// hosts/protocols. pub fn opte_firewall_rules( - rules: &[VpcFirewallRule], + rules: &[ResolvedVpcFirewallRule], vni: &Vni, mac: &MacAddr6, ) -> Vec { diff --git a/illumos-utils/src/opte/mod.rs b/illumos-utils/src/opte/mod.rs index d7fd96b0c0..9a86711ae6 100644 --- a/illumos-utils/src/opte/mod.rs +++ b/illumos-utils/src/opte/mod.rs @@ -13,7 +13,6 @@ cfg_if::cfg_if! { } mod firewall_rules; -pub mod params; mod port; mod port_manager; diff --git a/illumos-utils/src/opte/params.rs b/illumos-utils/src/opte/params.rs deleted file mode 100644 index 17c61d680f..0000000000 --- a/illumos-utils/src/opte/params.rs +++ /dev/null @@ -1,65 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use omicron_common::api::external; -use omicron_common::api::internal::nexus::HostIdentifier; -use omicron_common::api::internal::shared::NetworkInterface; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use std::net::IpAddr; -use std::net::Ipv6Addr; - -/// Update firewall rules for a VPC -#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] -pub struct VpcFirewallRulesEnsureBody { - pub vni: external::Vni, - pub rules: Vec, -} - -/// VPC firewall rule after object name resolution has been performed by Nexus -#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] -pub struct VpcFirewallRule { - pub status: external::VpcFirewallRuleStatus, - pub direction: external::VpcFirewallRuleDirection, - pub targets: Vec, - pub filter_hosts: Option>, - pub filter_ports: Option>, - pub filter_protocols: Option>, - pub action: external::VpcFirewallRuleAction, - pub priority: external::VpcFirewallRulePriority, -} - -/// A mapping from a virtual NIC to a physical host -#[derive( - Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq, Eq, Hash, -)] -pub struct VirtualNetworkInterfaceHost { - pub virtual_ip: IpAddr, - pub virtual_mac: external::MacAddr, - pub physical_host_ip: Ipv6Addr, - pub vni: external::Vni, -} - -/// DHCP configuration for a port -/// -/// Not present here: Hostname (DHCPv4 option 12; used in DHCPv6 option 39); we -/// use `InstanceRuntimeState::hostname` for this value. -#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] -pub struct DhcpConfig { - /// DNS servers to send to the instance - /// - /// (DHCPv4 option 6; DHCPv6 option 23) - pub dns_servers: Vec, - - /// DNS zone this instance's hostname belongs to (e.g. the `project.example` - /// part of `instance1.project.example`) - /// - /// (DHCPv4 option 15; used in DHCPv6 option 39) - pub host_domain: Option, - - /// DNS search domains - /// - /// (DHCPv4 option 119; DHCPv6 option 24) - pub search_domains: Vec, -} diff --git a/illumos-utils/src/opte/port_manager.rs b/illumos-utils/src/opte/port_manager.rs index 93c646cfab..735428907e 100644 --- a/illumos-utils/src/opte/port_manager.rs +++ b/illumos-utils/src/opte/port_manager.rs @@ -6,8 +6,6 @@ use crate::dladm::OPTE_LINK_PREFIX; use crate::opte::opte_firewall_rules; -use crate::opte::params::VirtualNetworkInterfaceHost; -use crate::opte::params::VpcFirewallRule; use crate::opte::port::PortData; use crate::opte::Error; use crate::opte::Gateway; @@ -17,6 +15,7 @@ use ipnetwork::IpNetwork; use omicron_common::api::external; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::api::internal::shared::NetworkInterfaceKind; +use omicron_common::api::internal::shared::ResolvedVpcFirewallRule; use omicron_common::api::internal::shared::ResolvedVpcRoute; use omicron_common::api::internal::shared::ResolvedVpcRouteSet; use omicron_common::api::internal::shared::ResolvedVpcRouteState; @@ -24,6 +23,7 @@ use omicron_common::api::internal::shared::RouterId; use omicron_common::api::internal::shared::RouterTarget as ApiRouterTarget; use omicron_common::api::internal::shared::RouterVersion; use omicron_common::api::internal::shared::SourceNatConfig; +use omicron_common::api::internal::shared::VirtualNetworkInterfaceHost; use oxide_vpc::api::AddRouterEntryReq; use oxide_vpc::api::DelRouterEntryReq; use oxide_vpc::api::DhcpCfg; @@ -96,7 +96,7 @@ pub struct PortCreateParams<'a> { pub source_nat: Option, pub ephemeral_ip: Option, pub floating_ips: &'a [IpAddr], - pub firewall_rules: &'a [VpcFirewallRule], + pub firewall_rules: &'a [ResolvedVpcFirewallRule], pub dhcp_config: DhcpCfg, pub is_service: bool, } @@ -664,7 +664,7 @@ impl PortManager { pub fn firewall_rules_ensure( &self, vni: external::Vni, - rules: &[VpcFirewallRule], + rules: &[ResolvedVpcFirewallRule], ) -> Result<(), Error> { use opte_ioctl::OpteHdl; @@ -705,7 +705,7 @@ impl PortManager { pub fn firewall_rules_ensure( &self, vni: external::Vni, - rules: &[VpcFirewallRule], + rules: &[ResolvedVpcFirewallRule], ) -> Result<(), Error> { info!( self.inner.log, diff --git a/installinator-common/Cargo.toml b/installinator-common/Cargo.toml index 4c5560148f..039304c9de 100644 --- a/installinator-common/Cargo.toml +++ b/installinator-common/Cargo.toml @@ -11,6 +11,7 @@ workspace = true anyhow.workspace = true camino.workspace = true illumos-utils.workspace = true +omicron-common.workspace = true libc.workspace = true schemars.workspace = true serde.workspace = true diff --git a/installinator-common/src/progress.rs b/installinator-common/src/progress.rs index 900fe70028..9078da6ba5 100644 --- a/installinator-common/src/progress.rs +++ b/installinator-common/src/progress.rs @@ -4,9 +4,9 @@ use std::{collections::BTreeSet, fmt, net::SocketAddr}; -use anyhow::bail; use camino::Utf8PathBuf; use illumos_utils::zpool; +use omicron_common::disk::M2Slot; use schemars::{ gen::SchemaGenerator, schema::{Schema, SchemaObject}, @@ -165,47 +165,6 @@ impl WriteOutput { } } -/// An M.2 slot that was written. -#[derive( - Debug, - Clone, - Copy, - PartialEq, - Eq, - PartialOrd, - Ord, - Deserialize, - Serialize, - JsonSchema, -)] -pub enum M2Slot { - A, - B, -} - -impl fmt::Display for M2Slot { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::A => f.write_str("A"), - Self::B => f.write_str("B"), - } - } -} - -impl TryFrom for M2Slot { - type Error = anyhow::Error; - - fn try_from(value: i64) -> Result { - match value { - // Gimlet should have 2 M.2 drives: drive A is assigned slot 17, and - // drive B is assigned slot 18. - 17 => Ok(Self::A), - 18 => Ok(Self::B), - _ => bail!("unexpected M.2 slot {value}"), - } - } -} - /// The specification for write events. #[derive(JsonSchema)] pub enum WriteSpec {} diff --git a/installinator/src/write.rs b/installinator/src/write.rs index c7710baff7..fdc83cffa2 100644 --- a/installinator/src/write.rs +++ b/installinator/src/write.rs @@ -16,11 +16,14 @@ use bytes::Buf; use camino::{Utf8Path, Utf8PathBuf}; use illumos_utils::zpool::{Zpool, ZpoolName}; use installinator_common::{ - ControlPlaneZonesSpec, ControlPlaneZonesStepId, M2Slot, RawDiskWriter, - StepContext, StepProgress, StepResult, StepSuccess, UpdateEngine, - WriteComponent, WriteError, WriteOutput, WriteSpec, WriteStepId, + ControlPlaneZonesSpec, ControlPlaneZonesStepId, RawDiskWriter, StepContext, + StepProgress, StepResult, StepSuccess, UpdateEngine, WriteComponent, + WriteError, WriteOutput, WriteSpec, WriteStepId, +}; +use omicron_common::{ + disk::M2Slot, + update::{ArtifactHash, ArtifactHashId}, }; -use omicron_common::update::{ArtifactHash, ArtifactHashId}; use sha2::{Digest, Sha256}; use slog::{info, warn, Logger}; use tokio::{ diff --git a/nexus-sled-agent-shared/Cargo.toml b/nexus-sled-agent-shared/Cargo.toml index 544cebfbe4..144c755f34 100644 --- a/nexus-sled-agent-shared/Cargo.toml +++ b/nexus-sled-agent-shared/Cargo.toml @@ -11,8 +11,11 @@ omicron-common.workspace = true omicron-passwords.workspace = true omicron-uuid-kinds.workspace = true omicron-workspace-hack.workspace = true +# TODO: replace uses of propolis_client with local types schemars.workspace = true serde.workspace = true +serde_json.workspace = true sled-hardware-types.workspace = true strum.workspace = true +thiserror.workspace = true uuid.workspace = true diff --git a/nexus-sled-agent-shared/README.md b/nexus-sled-agent-shared/README.md index eeb3492eea..77b4d64486 100644 --- a/nexus-sled-agent-shared/README.md +++ b/nexus-sled-agent-shared/README.md @@ -3,7 +3,15 @@ Internal types shared between Nexus and sled-agent, with extra dependencies not in omicron-common. -**This crate should only be used for internal types and data structures.** +## Guidelines + +This crate should only be used for **internal types and data structures.** + +It should only be used for types that are used by **both `sled-agent-types` and `nexus-types`**. Prefer to put types in `sled-agent-types` or `nexus-types` if possible. + +- If a type is used by `sled-agent-api`, as well as any part of Nexus except `nexus-types`, put it in `sled-agent-types`. +- If a type is used by `nexus-internal-api`, as well as any part of sled-agent except `sled-agent-types`, put it in `nexus-types`. +- Only if a type is used by both `sled-agent-types` and `nexus-types` should it go here. ## Why not omicron-common? @@ -28,9 +36,10 @@ tokio-postgres, a dependency that is not a necessary component of sled-agent. ## Why not sled-agent-types or nexus-types? Types that are primarily used by sled-agent or nexus should continue to go in -those crates. However, types shared by both should go here. `sled-agent-types` -and `nexus-types` can thus avoid a dependency on each other: they're both "on -the same level" and neither dependency direction is clearly correct. +those crates. However, types used by both `nexus-types` and `sled-agent-types` +should go here. `sled-agent-types` and `nexus-types` can thus avoid a +dependency on each other: they're both "on the same level" and neither +dependency direction is clearly correct. ## Why not Progenitor-generated types? diff --git a/nexus-sled-agent-shared/src/lib.rs b/nexus-sled-agent-shared/src/lib.rs index 6781568d62..12fc040bbb 100644 --- a/nexus-sled-agent-shared/src/lib.rs +++ b/nexus-sled-agent-shared/src/lib.rs @@ -5,6 +5,14 @@ //! Internal types shared between Nexus and sled-agent, with extra dependencies //! not in omicron-common. //! +//! Only types that are shared between `nexus-types` and `sled-agent-types` +//! should go here. +//! +//! - If a type is used by `sled-agent-api` and Nexus, but is not required by +//! `nexus-types`, it should go in `sled-agent-types` instead. +//! - If a type is used by `nexus-internal-api` and Nexus, but is not required +//! by `sled-agent-types`, it should go in `nexus-types` instead. +//! //! For more information, see the crate [README](../README.md). pub mod inventory; diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index a949b31f0d..86d9abc460 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -137,6 +137,7 @@ pretty_assertions.workspace = true rcgen.workspace = true regex.workspace = true similar-asserts.workspace = true +sled-agent-types.workspace = true sp-sim.workspace = true rustls.workspace = true subprocess.workspace = true diff --git a/nexus/networking/src/firewall_rules.rs b/nexus/networking/src/firewall_rules.rs index 4ba66ec9f3..8491092353 100644 --- a/nexus/networking/src/firewall_rules.rs +++ b/nexus/networking/src/firewall_rules.rs @@ -49,7 +49,7 @@ pub async fn resolve_firewall_rules_for_sled_agent( vpc: &db::model::Vpc, rules: &[db::model::VpcFirewallRule], log: &Logger, -) -> Result, Error> { +) -> Result, Error> { // Collect the names of instances, subnets, and VPCs that are either // targets or host filters. We have to find the sleds for all the // targets, and we'll need information about the IP addresses or @@ -417,16 +417,18 @@ pub async fn resolve_firewall_rules_for_sled_agent( .as_ref() .map(|protocols| protocols.iter().map(|v| v.0.into()).collect()); - sled_agent_rules.push(sled_agent_client::types::VpcFirewallRule { - status: rule.status.0.into(), - direction: rule.direction.0.into(), - targets, - filter_hosts, - filter_ports, - filter_protocols, - action: rule.action.0.into(), - priority: rule.priority.0 .0, - }); + sled_agent_rules.push( + sled_agent_client::types::ResolvedVpcFirewallRule { + status: rule.status.0.into(), + direction: rule.direction.0.into(), + targets, + filter_hosts, + filter_ports, + filter_protocols, + action: rule.action.0.into(), + priority: rule.priority.0 .0, + }, + ); } debug!( log, diff --git a/nexus/src/app/sagas/instance_ip_attach.rs b/nexus/src/app/sagas/instance_ip_attach.rs index b18ac3109f..a14054cf66 100644 --- a/nexus/src/app/sagas/instance_ip_attach.rs +++ b/nexus/src/app/sagas/instance_ip_attach.rs @@ -346,6 +346,7 @@ pub(crate) mod test { }; use nexus_test_utils_macros::nexus_test; use omicron_common::api::external::SimpleIdentity; + use sled_agent_types::instance::InstanceExternalIpBody; type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; @@ -437,14 +438,12 @@ pub(crate) mod test { // Sled agent has a record of the new external IPs. let mut eips = sled_agent.external_ips.lock().await; let my_eips = eips.entry(instance_id.into_untyped_uuid()).or_default(); - assert!(my_eips.iter().any(|v| matches!( - v, - omicron_sled_agent::params::InstanceExternalIpBody::Floating(_) - ))); - assert!(my_eips.iter().any(|v| matches!( - v, - omicron_sled_agent::params::InstanceExternalIpBody::Ephemeral(_) - ))); + assert!(my_eips + .iter() + .any(|v| matches!(v, InstanceExternalIpBody::Floating(_)))); + assert!(my_eips + .iter() + .any(|v| matches!(v, InstanceExternalIpBody::Ephemeral(_)))); // DB has records for SNAT plus the new IPs. let db_eips = datastore diff --git a/nexus/src/app/vpc.rs b/nexus/src/app/vpc.rs index b3605945d3..56a7777f0e 100644 --- a/nexus/src/app/vpc.rs +++ b/nexus/src/app/vpc.rs @@ -260,7 +260,8 @@ impl super::Nexus { opctx: &OpContext, vpc: &db::model::Vpc, rules: &[db::model::VpcFirewallRule], - ) -> Result, Error> { + ) -> Result, Error> + { nexus_networking::resolve_firewall_rules_for_sled_agent( &self.db_datastore, opctx, diff --git a/openapi/installinator.json b/openapi/installinator.json index 0631344b25..6419760fbd 100644 --- a/openapi/installinator.json +++ b/openapi/installinator.json @@ -397,7 +397,7 @@ ] }, "M2Slot": { - "description": "An M.2 slot that was written.", + "description": "Describes an M.2 slot, often in the context of writing a system image to it.", "type": "string", "enum": [ "A", diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 21e1451689..1241248a5e 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -68,7 +68,6 @@ }, "/boot-disk/{boot_disk}/os/write/status": { "get": { - "summary": "Get the status of writing a new host OS", "operationId": "host_os_write_status_get", "parameters": [ { @@ -2945,7 +2944,7 @@ "firewall_rules": { "type": "array", "items": { - "$ref": "#/components/schemas/VpcFirewallRule" + "$ref": "#/components/schemas/ResolvedVpcFirewallRule" } }, "floating_ips": { @@ -4251,6 +4250,60 @@ "rack_subnet" ] }, + "ResolvedVpcFirewallRule": { + "description": "VPC firewall rule after object name resolution has been performed by Nexus", + "type": "object", + "properties": { + "action": { + "$ref": "#/components/schemas/VpcFirewallRuleAction" + }, + "direction": { + "$ref": "#/components/schemas/VpcFirewallRuleDirection" + }, + "filter_hosts": { + "nullable": true, + "type": "array", + "items": { + "$ref": "#/components/schemas/HostIdentifier" + } + }, + "filter_ports": { + "nullable": true, + "type": "array", + "items": { + "$ref": "#/components/schemas/L4PortRange" + } + }, + "filter_protocols": { + "nullable": true, + "type": "array", + "items": { + "$ref": "#/components/schemas/VpcFirewallRuleProtocol" + } + }, + "priority": { + "type": "integer", + "format": "uint16", + "minimum": 0 + }, + "status": { + "$ref": "#/components/schemas/VpcFirewallRuleStatus" + }, + "targets": { + "type": "array", + "items": { + "$ref": "#/components/schemas/NetworkInterface" + } + } + }, + "required": [ + "action", + "direction", + "priority", + "status", + "targets" + ] + }, "ResolvedVpcRoute": { "description": "A VPC route resolved into a concrete target.", "type": "object", @@ -5101,60 +5154,6 @@ } ] }, - "VpcFirewallRule": { - "description": "VPC firewall rule after object name resolution has been performed by Nexus", - "type": "object", - "properties": { - "action": { - "$ref": "#/components/schemas/VpcFirewallRuleAction" - }, - "direction": { - "$ref": "#/components/schemas/VpcFirewallRuleDirection" - }, - "filter_hosts": { - "nullable": true, - "type": "array", - "items": { - "$ref": "#/components/schemas/HostIdentifier" - } - }, - "filter_ports": { - "nullable": true, - "type": "array", - "items": { - "$ref": "#/components/schemas/L4PortRange" - } - }, - "filter_protocols": { - "nullable": true, - "type": "array", - "items": { - "$ref": "#/components/schemas/VpcFirewallRuleProtocol" - } - }, - "priority": { - "type": "integer", - "format": "uint16", - "minimum": 0 - }, - "status": { - "$ref": "#/components/schemas/VpcFirewallRuleStatus" - }, - "targets": { - "type": "array", - "items": { - "$ref": "#/components/schemas/NetworkInterface" - } - } - }, - "required": [ - "action", - "direction", - "priority", - "status", - "targets" - ] - }, "VpcFirewallRuleAction": { "type": "string", "enum": [ @@ -5192,7 +5191,7 @@ "rules": { "type": "array", "items": { - "$ref": "#/components/schemas/VpcFirewallRule" + "$ref": "#/components/schemas/ResolvedVpcFirewallRule" } }, "vni": { @@ -5317,7 +5316,7 @@ "pattern": "^ox[ip]_[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$" }, "M2Slot": { - "description": "An M.2 slot that was written.", + "description": "Describes an M.2 slot, often in the context of writing a system image to it.", "type": "string", "enum": [ "A", diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 52889d8fa2..2aefd8f464 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -69,6 +69,7 @@ serde.workspace = true serde_human_bytes.workspace = true serde_json = { workspace = true, features = ["raw_value"] } sha3.workspace = true +sled-agent-api.workspace = true sled-agent-client.workspace = true sled-agent-types.workspace = true sled-hardware.workspace = true @@ -103,8 +104,6 @@ guppy.workspace = true http.workspace = true hyper.workspace = true omicron-test-utils.workspace = true -openapi-lint.workspace = true -openapiv3.workspace = true pretty_assertions.workspace = true rcgen.workspace = true subprocess.workspace = true diff --git a/sled-agent/api/Cargo.toml b/sled-agent/api/Cargo.toml new file mode 100644 index 0000000000..046f17574b --- /dev/null +++ b/sled-agent/api/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "sled-agent-api" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +camino.workspace = true +dropshot.workspace = true +nexus-sled-agent-shared.workspace = true +omicron-common.workspace = true +omicron-uuid-kinds.workspace = true +omicron-workspace-hack.workspace = true +schemars.workspace = true +serde.workspace = true +sled-agent-types.workspace = true +sled-hardware-types.workspace = true +uuid.workspace = true diff --git a/sled-agent/api/src/lib.rs b/sled-agent/api/src/lib.rs new file mode 100644 index 0000000000..c44b24d712 --- /dev/null +++ b/sled-agent/api/src/lib.rs @@ -0,0 +1,549 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::{collections::BTreeMap, time::Duration}; + +use camino::Utf8PathBuf; +use dropshot::{ + FreeformBody, HttpError, HttpResponseCreated, HttpResponseDeleted, + HttpResponseHeaders, HttpResponseOk, HttpResponseUpdatedNoContent, Path, + Query, RequestContext, StreamingBody, TypedBody, +}; +use nexus_sled_agent_shared::inventory::{ + Inventory, OmicronZonesConfig, SledRole, +}; +use omicron_common::{ + api::internal::{ + nexus::{DiskRuntimeState, SledInstanceState, UpdateArtifactId}, + shared::{ + ResolvedVpcRouteSet, ResolvedVpcRouteState, SledIdentifiers, + SwitchPorts, VirtualNetworkInterfaceHost, + }, + }, + disk::{DiskVariant, DisksManagementResult, OmicronPhysicalDisksConfig}, +}; +use omicron_uuid_kinds::{InstanceUuid, ZpoolUuid}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use sled_agent_types::{ + boot_disk::{ + BootDiskOsWriteStatus, BootDiskPathParams, BootDiskUpdatePathParams, + BootDiskWriteStartQueryParams, + }, + bootstore::BootstoreStatus, + disk::DiskEnsureBody, + early_networking::EarlyNetworkConfig, + firewall_rules::VpcFirewallRulesEnsureBody, + instance::{ + InstanceEnsureBody, InstanceExternalIpBody, InstancePutStateBody, + InstancePutStateResponse, InstanceUnregisterResponse, + }, + sled::AddSledRequest, + time_sync::TimeSync, + zone_bundle::{ + BundleUtilization, CleanupContext, CleanupCount, PriorityOrder, + ZoneBundleId, ZoneBundleMetadata, + }, +}; +use uuid::Uuid; + +#[dropshot::api_description] +pub trait SledAgentApi { + type Context; + + /// List all zone bundles that exist, even for now-deleted zones. + #[endpoint { + method = GET, + path = "/zones/bundles", + }] + async fn zone_bundle_list_all( + rqctx: RequestContext, + query: Query, + ) -> Result>, HttpError>; + + /// List the zone bundles that are available for a running zone. + #[endpoint { + method = GET, + path = "/zones/bundles/{zone_name}", + }] + async fn zone_bundle_list( + rqctx: RequestContext, + params: Path, + ) -> Result>, HttpError>; + + /// Ask the sled agent to create a zone bundle. + #[endpoint { + method = POST, + path = "/zones/bundles/{zone_name}", + }] + async fn zone_bundle_create( + rqctx: RequestContext, + params: Path, + ) -> Result, HttpError>; + + /// Fetch the binary content of a single zone bundle. + #[endpoint { + method = GET, + path = "/zones/bundles/{zone_name}/{bundle_id}", + }] + async fn zone_bundle_get( + rqctx: RequestContext, + params: Path, + ) -> Result>, HttpError>; + + /// Delete a zone bundle. + #[endpoint { + method = DELETE, + path = "/zones/bundles/{zone_name}/{bundle_id}", + }] + async fn zone_bundle_delete( + rqctx: RequestContext, + params: Path, + ) -> Result; + + /// Return utilization information about all zone bundles. + #[endpoint { + method = GET, + path = "/zones/bundle-cleanup/utilization", + }] + async fn zone_bundle_utilization( + rqctx: RequestContext, + ) -> Result< + HttpResponseOk>, + HttpError, + >; + + /// Return context used by the zone-bundle cleanup task. + #[endpoint { + method = GET, + path = "/zones/bundle-cleanup/context", + }] + async fn zone_bundle_cleanup_context( + rqctx: RequestContext, + ) -> Result, HttpError>; + + /// Update context used by the zone-bundle cleanup task. + #[endpoint { + method = PUT, + path = "/zones/bundle-cleanup/context", + }] + async fn zone_bundle_cleanup_context_update( + rqctx: RequestContext, + body: TypedBody, + ) -> Result; + + /// Trigger a zone bundle cleanup. + #[endpoint { + method = POST, + path = "/zones/bundle-cleanup", + }] + async fn zone_bundle_cleanup( + rqctx: RequestContext, + ) -> Result>, HttpError>; + + /// List the zones that are currently managed by the sled agent. + #[endpoint { + method = GET, + path = "/zones", + }] + async fn zones_list( + rqctx: RequestContext, + ) -> Result>, HttpError>; + + #[endpoint { + method = GET, + path = "/omicron-zones", + }] + async fn omicron_zones_get( + rqctx: RequestContext, + ) -> Result, HttpError>; + + #[endpoint { + method = PUT, + path = "/omicron-zones", + }] + async fn omicron_zones_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result; + + #[endpoint { + method = GET, + path = "/omicron-physical-disks", + }] + async fn omicron_physical_disks_get( + rqctx: RequestContext, + ) -> Result, HttpError>; + + #[endpoint { + method = PUT, + path = "/omicron-physical-disks", + }] + async fn omicron_physical_disks_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result, HttpError>; + + #[endpoint { + method = GET, + path = "/zpools", + }] + async fn zpools_get( + rqctx: RequestContext, + ) -> Result>, HttpError>; + + #[endpoint { + method = GET, + path = "/sled-role", + }] + async fn sled_role_get( + rqctx: RequestContext, + ) -> Result, HttpError>; + + /// Initializes a CockroachDB cluster + #[endpoint { + method = POST, + path = "/cockroachdb", + }] + async fn cockroachdb_init( + rqctx: RequestContext, + ) -> Result; + + #[endpoint { + method = PUT, + path = "/instances/{instance_id}", + }] + async fn instance_register( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result, HttpError>; + + #[endpoint { + method = DELETE, + path = "/instances/{instance_id}", + }] + async fn instance_unregister( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + #[endpoint { + method = PUT, + path = "/instances/{instance_id}/state", + }] + async fn instance_put_state( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result, HttpError>; + + #[endpoint { + method = GET, + path = "/instances/{instance_id}/state", + }] + async fn instance_get_state( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + #[endpoint { + method = PUT, + path = "/instances/{instance_id}/external-ip", + }] + async fn instance_put_external_ip( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result; + + #[endpoint { + method = DELETE, + path = "/instances/{instance_id}/external-ip", + }] + async fn instance_delete_external_ip( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result; + + #[endpoint { + method = PUT, + path = "/disks/{disk_id}", + }] + async fn disk_put( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result, HttpError>; + + #[endpoint { + method = POST, + path = "/update" + }] + async fn update_artifact( + rqctx: RequestContext, + artifact: TypedBody, + ) -> Result; + + /// Take a snapshot of a disk that is attached to an instance + #[endpoint { + method = POST, + path = "/instances/{instance_id}/disks/{disk_id}/snapshot", + }] + async fn instance_issue_disk_snapshot_request( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result< + HttpResponseOk, + HttpError, + >; + + #[endpoint { + method = PUT, + path = "/vpc/{vpc_id}/firewall/rules", + }] + async fn vpc_firewall_rules_put( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result; + + /// Create a mapping from a virtual NIC to a physical host + // Keep interface_id to maintain parity with the simulated sled agent, which + // requires interface_id on the path. + #[endpoint { + method = PUT, + path = "/v2p/", + }] + async fn set_v2p( + rqctx: RequestContext, + body: TypedBody, + ) -> Result; + + /// Delete a mapping from a virtual NIC to a physical host + // Keep interface_id to maintain parity with the simulated sled agent, which + // requires interface_id on the path. + #[endpoint { + method = DELETE, + path = "/v2p/", + }] + async fn del_v2p( + rqctx: RequestContext, + body: TypedBody, + ) -> Result; + + /// List v2p mappings present on sled + // Used by nexus background task + #[endpoint { + method = GET, + path = "/v2p/", + }] + async fn list_v2p( + rqctx: RequestContext, + ) -> Result>, HttpError>; + + #[endpoint { + method = GET, + path = "/timesync", + }] + async fn timesync_get( + rqctx: RequestContext, + ) -> Result, HttpError>; + + #[endpoint { + method = POST, + path = "/switch-ports", + }] + async fn uplink_ensure( + rqctx: RequestContext, + body: TypedBody, + ) -> Result; + + /// This API endpoint is only reading the local sled agent's view of the + /// bootstore. The boostore is a distributed data store that is eventually + /// consistent. Reads from individual nodes may not represent the latest state. + #[endpoint { + method = GET, + path = "/network-bootstore-config", + }] + async fn read_network_bootstore_config_cache( + rqctx: RequestContext, + ) -> Result, HttpError>; + + #[endpoint { + method = PUT, + path = "/network-bootstore-config", + }] + async fn write_network_bootstore_config( + rqctx: RequestContext, + body: TypedBody, + ) -> Result; + + /// Add a sled to a rack that was already initialized via RSS + #[endpoint { + method = PUT, + path = "/sleds" + }] + async fn sled_add( + rqctx: RequestContext, + body: TypedBody, + ) -> Result; + + /// Write a new host OS image to the specified boot disk + #[endpoint { + method = POST, + path = "/boot-disk/{boot_disk}/os/write", + }] + async fn host_os_write_start( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + body: StreamingBody, + ) -> Result; + + #[endpoint { + method = GET, + path = "/boot-disk/{boot_disk}/os/write/status", + }] + async fn host_os_write_status_get( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + /// Clear the status of a completed write of a new host OS + #[endpoint { + method = DELETE, + path = "/boot-disk/{boot_disk}/os/write/status/{update_id}", + }] + async fn host_os_write_status_delete( + rqctx: RequestContext, + path_params: Path, + ) -> Result; + + /// Fetch basic information about this sled + #[endpoint { + method = GET, + path = "/inventory", + }] + async fn inventory( + rqctx: RequestContext, + ) -> Result, HttpError>; + + /// Fetch sled identifiers + #[endpoint { + method = GET, + path = "/sled-identifiers", + }] + async fn sled_identifiers( + rqctx: RequestContext, + ) -> Result, HttpError>; + + /// Get the internal state of the local bootstore node + #[endpoint { + method = GET, + path = "/bootstore/status", + }] + async fn bootstore_status( + request_context: RequestContext, + ) -> Result, HttpError>; + + /// Get the current versions of VPC routing rules. + #[endpoint { + method = GET, + path = "/vpc-routes", + }] + async fn list_vpc_routes( + rqctx: RequestContext, + ) -> Result>, HttpError>; + + /// Update VPC routing rules. + #[endpoint { + method = PUT, + path = "/vpc-routes", + }] + async fn set_vpc_routes( + request_context: RequestContext, + body: TypedBody>, + ) -> Result; +} + +#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] +pub struct ZoneBundleFilter { + /// An optional substring used to filter zone bundles. + pub filter: Option, +} + +#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] +pub struct ZonePathParam { + /// The name of the zone. + pub zone_name: String, +} + +/// Parameters used to update the zone bundle cleanup context. +#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] +pub struct CleanupContextUpdate { + /// The new period on which automatic cleanups are run. + pub period: Option, + /// The priority ordering for preserving old zone bundles. + pub priority: Option, + /// The new limit on the underlying dataset quota allowed for bundles. + pub storage_limit: Option, +} + +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +pub struct Zpool { + pub id: ZpoolUuid, + pub disk_type: DiskType, +} + +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +pub enum DiskType { + U2, + M2, +} + +impl From for DiskType { + fn from(v: DiskVariant) -> Self { + match v { + DiskVariant::U2 => Self::U2, + DiskVariant::M2 => Self::M2, + } + } +} + +/// Path parameters for Instance requests (sled agent API) +#[derive(Deserialize, JsonSchema)] +pub struct InstancePathParam { + pub instance_id: InstanceUuid, +} + +/// Path parameters for Disk requests (sled agent API) +#[derive(Deserialize, JsonSchema)] +pub struct DiskPathParam { + pub disk_id: Uuid, +} + +#[derive(Deserialize, JsonSchema)] +pub struct InstanceIssueDiskSnapshotRequestPathParam { + pub instance_id: Uuid, + pub disk_id: Uuid, +} + +#[derive(Deserialize, JsonSchema)] +pub struct InstanceIssueDiskSnapshotRequestBody { + pub snapshot_id: Uuid, +} + +#[derive(Serialize, JsonSchema)] +pub struct InstanceIssueDiskSnapshotRequestResponse { + pub snapshot_id: Uuid, +} + +/// Path parameters for VPC requests (sled agent API) +#[derive(Deserialize, JsonSchema)] +pub struct VpcPathParam { + pub vpc_id: Uuid, +} diff --git a/sled-agent/src/bin/sled-agent.rs b/sled-agent/src/bin/sled-agent.rs index 6feeffd302..446103e982 100644 --- a/sled-agent/src/bin/sled-agent.rs +++ b/sled-agent/src/bin/sled-agent.rs @@ -6,20 +6,14 @@ use anyhow::anyhow; use camino::Utf8PathBuf; -use clap::{Parser, Subcommand}; +use clap::Parser; use omicron_common::cmd::fatal; use omicron_common::cmd::CmdError; use omicron_sled_agent::bootstrap::server as bootstrap_server; use omicron_sled_agent::bootstrap::RssAccessError; -use omicron_sled_agent::{config::Config as SledConfig, server as sled_server}; +use omicron_sled_agent::config::Config as SledConfig; use sled_agent_types::rack_init::RackInitializeRequest; -#[derive(Subcommand, Debug)] -enum OpenapiFlavor { - /// Generates sled agent openapi spec - Sled, -} - #[derive(Debug, Parser)] #[clap( name = "sled_agent", @@ -27,10 +21,6 @@ enum OpenapiFlavor { version )] enum Args { - /// Generates the OpenAPI specification. - #[command(subcommand)] - Openapi(OpenapiFlavor), - /// Runs the Sled Agent server. Run { #[clap(name = "CONFIG_FILE_PATH", action)] @@ -49,10 +39,6 @@ async fn do_run() -> Result<(), CmdError> { let args = Args::parse(); match args { - Args::Openapi(flavor) => match flavor { - OpenapiFlavor::Sled => sled_server::run_openapi() - .map_err(|err| CmdError::Failure(anyhow!(err))), - }, Args::Run { config_path } => { let config = SledConfig::from_file(&config_path) .map_err(|e| CmdError::Failure(anyhow!(e)))?; diff --git a/sled-agent/src/boot_disk_os_writer.rs b/sled-agent/src/boot_disk_os_writer.rs index a0798ed174..59e79c418f 100644 --- a/sled-agent/src/boot_disk_os_writer.rs +++ b/sled-agent/src/boot_disk_os_writer.rs @@ -5,8 +5,6 @@ //! This module provides `BootDiskOsWriter`, via which sled-agent can write new //! OS images to its boot disks. -use crate::http_entrypoints::BootDiskOsWriteProgress; -use crate::http_entrypoints::BootDiskOsWriteStatus; use async_trait::async_trait; use bytes::Bytes; use camino::Utf8PathBuf; @@ -14,10 +12,12 @@ use display_error_chain::DisplayErrorChain; use dropshot::HttpError; use futures::Stream; use futures::TryStreamExt; -use installinator_common::M2Slot; use installinator_common::RawDiskWriter; +use omicron_common::disk::M2Slot; use sha3::Digest; use sha3::Sha3_256; +use sled_agent_types::boot_disk::BootDiskOsWriteProgress; +use sled_agent_types::boot_disk::BootDiskOsWriteStatus; use slog::Logger; use std::collections::btree_map::Entry; use std::collections::BTreeMap; @@ -37,18 +37,16 @@ use tokio::sync::oneshot::error::TryRecvError; use tokio::sync::watch; use uuid::Uuid; -impl BootDiskOsWriteStatus { - fn from_result( - update_id: Uuid, - result: &Result<(), Arc>, - ) -> Self { - match result { - Ok(()) => Self::Complete { update_id }, - Err(err) => Self::Failed { - update_id, - message: DisplayErrorChain::new(err).to_string(), - }, - } +fn to_boot_disk_status( + update_id: Uuid, + result: &Result<(), Arc>, +) -> BootDiskOsWriteStatus { + match result { + Ok(()) => BootDiskOsWriteStatus::Complete { update_id }, + Err(err) => BootDiskOsWriteStatus::Failed { + update_id, + message: DisplayErrorChain::new(err).to_string(), + }, } } @@ -393,9 +391,7 @@ impl BootDiskOsWriter { match running.complete_rx.try_recv() { Ok(result) => { let update_id = running.update_id; - let status = BootDiskOsWriteStatus::from_result( - update_id, &result, - ); + let status = to_boot_disk_status(update_id, &result); slot.insert(WriterState::Complete(TaskCompleteState { update_id, result, @@ -413,9 +409,7 @@ impl BootDiskOsWriter { let update_id = running.update_id; let result = Err(Arc::new(BootDiskOsWriteError::TaskPanic)); - let status = BootDiskOsWriteStatus::from_result( - update_id, &result, - ); + let status = to_boot_disk_status(update_id, &result); slot.insert(WriterState::Complete(TaskCompleteState { update_id, result, @@ -425,10 +419,7 @@ impl BootDiskOsWriter { } } WriterState::Complete(complete) => { - BootDiskOsWriteStatus::from_result( - complete.update_id, - &complete.result, - ) + to_boot_disk_status(complete.update_id, &complete.result) } } } diff --git a/sled-agent/src/bootstrap/client.rs b/sled-agent/src/bootstrap/client.rs index 10f1ab6f25..bfdaf6e6d4 100644 --- a/sled-agent/src/bootstrap/client.rs +++ b/sled-agent/src/bootstrap/client.rs @@ -7,10 +7,10 @@ use super::params::version; use super::params::Request; use super::params::RequestEnvelope; -use super::params::StartSledAgentRequest; use super::views::SledAgentResponse; use crate::bootstrap::views::Response; use crate::bootstrap::views::ResponseEnvelope; +use sled_agent_types::sled::StartSledAgentRequest; use slog::Logger; use std::borrow::Cow; use std::io; diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index 9fe399419f..5aedf848fe 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -4,181 +4,9 @@ //! Request types for the bootstrap agent -use anyhow::Result; -use async_trait::async_trait; -use omicron_common::address::{self, Ipv6Subnet, SLED_PREFIX}; -use omicron_common::ledger::Ledgerable; -use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -use sha3::{Digest, Sha3_256}; +use sled_agent_types::sled::StartSledAgentRequest; use std::borrow::Cow; -use std::net::{IpAddr, Ipv6Addr, SocketAddrV6}; -use uuid::Uuid; - -/// A representation of a Baseboard ID as used in the inventory subsystem -/// This type is essentially the same as a `Baseboard` except it doesn't have a -/// revision or HW type (Gimlet, PC, Unknown). -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)] -pub struct BaseboardId { - /// Oxide Part Number - pub part_number: String, - /// Serial number (unique for a given part number) - pub serial_number: String, -} - -/// A request to Add a given sled after rack initialization has occurred -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)] -pub struct AddSledRequest { - pub sled_id: BaseboardId, - pub start_request: StartSledAgentRequest, -} - -// A wrapper around StartSledAgentRequestV0 that was used -// for the ledger format. -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)] -struct PersistentSledAgentRequest { - request: StartSledAgentRequestV0, -} - -/// The version of `StartSledAgentRequest` we originally shipped with. -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)] -pub struct StartSledAgentRequestV0 { - /// Uuid of the Sled Agent to be created. - pub id: Uuid, - - /// Uuid of the rack to which this sled agent belongs. - pub rack_id: Uuid, - - /// The external NTP servers to use - pub ntp_servers: Vec, - - /// The external DNS servers to use - pub dns_servers: Vec, - - /// Use trust quorum for key generation - pub use_trust_quorum: bool, - - // Note: The order of these fields is load bearing, because we serialize - // `SledAgentRequest`s as toml. `subnet` serializes as a TOML table, so it - // must come after non-table fields. - /// Portion of the IP space to be managed by the Sled Agent. - pub subnet: Ipv6Subnet, -} - -/// Configuration information for launching a Sled Agent. -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)] -pub struct StartSledAgentRequest { - /// The current generation number of data as stored in CRDB. - /// - /// The initial generation is set during RSS time and then only mutated - /// by Nexus. For now, we don't actually anticipate mutating this data, - /// but we leave open the possiblity. - pub generation: u64, - - // Which version of the data structure do we have. This is to help with - // deserialization and conversion in future updates. - pub schema_version: u32, - - // The actual configuration details - pub body: StartSledAgentRequestBody, -} - -/// This is the actual app level data of `StartSledAgentRequest` -/// -/// We nest it below the "header" of `generation` and `schema_version` so that -/// we can perform partial deserialization of `EarlyNetworkConfig` to only read -/// the header and defer deserialization of the body once we know the schema -/// version. This is possible via the use of [`serde_json::value::RawValue`] in -/// future (post-v1) deserialization paths. -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)] -pub struct StartSledAgentRequestBody { - /// Uuid of the Sled Agent to be created. - pub id: Uuid, - - /// Uuid of the rack to which this sled agent belongs. - pub rack_id: Uuid, - - /// Use trust quorum for key generation - pub use_trust_quorum: bool, - - /// Is this node an LRTQ learner node? - /// - /// We only put the node into learner mode if `use_trust_quorum` is also - /// true. - pub is_lrtq_learner: bool, - - /// Portion of the IP space to be managed by the Sled Agent. - pub subnet: Ipv6Subnet, -} - -impl StartSledAgentRequest { - pub fn sled_address(&self) -> SocketAddrV6 { - address::get_sled_address(self.body.subnet) - } - - pub fn switch_zone_ip(&self) -> Ipv6Addr { - address::get_switch_zone_address(self.body.subnet) - } - - /// Compute the sha3_256 digest of `self.rack_id` to use as a `salt` - /// for disk encryption. We don't want to include other values that are - /// consistent across sleds as it would prevent us from moving drives - /// between sleds. - pub fn hash_rack_id(&self) -> [u8; 32] { - // We know the unwrap succeeds as a Sha3_256 digest is 32 bytes - Sha3_256::digest(self.body.rack_id.as_bytes()) - .as_slice() - .try_into() - .unwrap() - } -} - -impl From for StartSledAgentRequest { - fn from(v0: StartSledAgentRequestV0) -> Self { - StartSledAgentRequest { - generation: 0, - schema_version: 1, - body: StartSledAgentRequestBody { - id: v0.id, - rack_id: v0.rack_id, - use_trust_quorum: v0.use_trust_quorum, - is_lrtq_learner: false, - subnet: v0.subnet, - }, - } - } -} - -#[async_trait] -impl Ledgerable for StartSledAgentRequest { - fn is_newer_than(&self, other: &Self) -> bool { - self.generation > other.generation - } - - fn generation_bump(&mut self) { - // DO NOTHING! - // - // Generation bumps must only ever come from nexus and will be encoded - // in the struct itself - } - - // Attempt to deserialize the v1 or v0 version and return - // the v1 version. - fn deserialize( - s: &str, - ) -> Result { - // Try to deserialize the latest version of the data structure (v1). If - // that succeeds we are done. - if let Ok(val) = serde_json::from_str::(s) { - return Ok(val); - } - - // We don't have the latest version. Try to deserialize v0 and then - // convert it to the latest version. - let v0 = serde_json::from_str::(s)?.request; - Ok(v0.into()) - } -} #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub enum Request<'a> { @@ -200,6 +28,10 @@ pub(super) mod version { mod tests { use std::net::Ipv6Addr; + use omicron_common::address::Ipv6Subnet; + use sled_agent_types::sled::StartSledAgentRequestBody; + use uuid::Uuid; + use super::*; #[test] @@ -227,34 +59,4 @@ mod tests { assert!(envelope == deserialized, "serialization round trip failed"); } - - #[test] - fn serialize_start_sled_agent_v0_deserialize_v1() { - let v0 = PersistentSledAgentRequest { - request: StartSledAgentRequestV0 { - id: Uuid::new_v4(), - rack_id: Uuid::new_v4(), - ntp_servers: vec![String::from("test.pool.example.com")], - dns_servers: vec!["1.1.1.1".parse().unwrap()], - use_trust_quorum: false, - subnet: Ipv6Subnet::new(Ipv6Addr::LOCALHOST), - }, - }; - let serialized = serde_json::to_string(&v0).unwrap(); - let expected = StartSledAgentRequest { - generation: 0, - schema_version: 1, - body: StartSledAgentRequestBody { - id: v0.request.id, - rack_id: v0.request.rack_id, - use_trust_quorum: v0.request.use_trust_quorum, - is_lrtq_learner: false, - subnet: v0.request.subnet, - }, - }; - - let actual: StartSledAgentRequest = - Ledgerable::deserialize(&serialized).unwrap(); - assert_eq!(expected, actual); - } } diff --git a/sled-agent/src/bootstrap/rss_handle.rs b/sled-agent/src/bootstrap/rss_handle.rs index 73f7537853..eee7eed085 100644 --- a/sled-agent/src/bootstrap/rss_handle.rs +++ b/sled-agent/src/bootstrap/rss_handle.rs @@ -5,7 +5,6 @@ //! sled-agent's handle to the Rack Setup Service it spawns use super::client as bootstrap_agent_client; -use super::params::StartSledAgentRequest; use crate::rack_setup::service::RackSetupService; use crate::rack_setup::service::SetupServiceError; use ::bootstrap_agent_client::Client as BootstrapAgentClient; @@ -16,6 +15,7 @@ use omicron_common::backoff::retry_notify; use omicron_common::backoff::retry_policy_local; use omicron_common::backoff::BackoffError; use sled_agent_types::rack_init::RackInitializeRequest; +use sled_agent_types::sled::StartSledAgentRequest; use sled_storage::manager::StorageHandle; use slog::Logger; use std::net::Ipv6Addr; diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index fa1d781a96..6681f396b4 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -6,7 +6,6 @@ use super::config::BOOTSTRAP_AGENT_HTTP_PORT; use super::http_entrypoints; -use super::params::StartSledAgentRequest; use super::views::SledAgentResponse; use super::BootstrapError; use super::RssAccessError; @@ -41,6 +40,7 @@ use omicron_ddm_admin_client::Client as DdmAdminClient; use omicron_ddm_admin_client::DdmError; use omicron_uuid_kinds::RackInitUuid; use sled_agent_types::rack_init::RackInitializeRequest; +use sled_agent_types::sled::StartSledAgentRequest; use sled_hardware::underlay; use sled_storage::dataset::CONFIG_DATASET; use sled_storage::manager::StorageHandle; @@ -714,11 +714,10 @@ impl Inner { #[cfg(test)] mod tests { - use crate::bootstrap::params::StartSledAgentRequestBody; - use super::*; use omicron_common::address::Ipv6Subnet; use omicron_test_utils::dev::test_setup_log; + use sled_agent_types::sled::StartSledAgentRequestBody; use std::net::Ipv6Addr; use uuid::Uuid; diff --git a/sled-agent/src/bootstrap/sprockets_server.rs b/sled-agent/src/bootstrap/sprockets_server.rs index 796883b578..8d92970d54 100644 --- a/sled-agent/src/bootstrap/sprockets_server.rs +++ b/sled-agent/src/bootstrap/sprockets_server.rs @@ -7,10 +7,10 @@ use crate::bootstrap::params::version; use crate::bootstrap::params::Request; use crate::bootstrap::params::RequestEnvelope; -use crate::bootstrap::params::StartSledAgentRequest; use crate::bootstrap::views::Response; use crate::bootstrap::views::ResponseEnvelope; use crate::bootstrap::views::SledAgentResponse; +use sled_agent_types::sled::StartSledAgentRequest; use slog::Logger; use std::io; use std::net::SocketAddrV6; diff --git a/sled-agent/src/common/disk.rs b/sled-agent/src/common/disk.rs index 54c56825eb..7bef28ac7c 100644 --- a/sled-agent/src/common/disk.rs +++ b/sled-agent/src/common/disk.rs @@ -4,12 +4,12 @@ //! Describes the states of network-attached storage. -use crate::params::DiskStateRequested; use chrono::Utc; use omicron_common::api::external::DiskState; use omicron_common::api::external::Error; use omicron_common::api::internal::nexus::DiskRuntimeState; use propolis_client::types::DiskAttachmentState as PropolisDiskState; +use sled_agent_types::disk::DiskStateRequested; use uuid::Uuid; /// Action to be taken on behalf of state transition. diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 820ec746b8..2bf8067d1c 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -5,26 +5,17 @@ //! HTTP entrypoint functions for the sled agent's exposed API use super::sled_agent::SledAgent; -use crate::bootstrap::params::AddSledRequest; -use crate::params::{ - BootstoreStatus, CleanupContextUpdate, DiskEnsureBody, InstanceEnsureBody, - InstanceExternalIpBody, InstancePutStateBody, InstancePutStateResponse, - InstanceUnregisterResponse, TimeSync, VpcFirewallRulesEnsureBody, - ZoneBundleId, ZoneBundleMetadata, Zpool, -}; use crate::sled_agent::Error as SledAgentError; -use crate::zone_bundle; +use crate::zone_bundle::BundleError; use bootstore::schemes::v0::NetworkConfig; use camino::Utf8PathBuf; use display_error_chain::DisplayErrorChain; use dropshot::{ - endpoint, ApiDescription, ApiDescriptionRegisterError, FreeformBody, - HttpError, HttpResponseCreated, HttpResponseDeleted, HttpResponseHeaders, - HttpResponseOk, HttpResponseUpdatedNoContent, Path, Query, RequestContext, - StreamingBody, TypedBody, + ApiDescription, FreeformBody, HttpError, HttpResponseCreated, + HttpResponseDeleted, HttpResponseHeaders, HttpResponseOk, + HttpResponseUpdatedNoContent, Path, Query, RequestContext, StreamingBody, + TypedBody, }; -use illumos_utils::opte::params::VirtualNetworkInterfaceHost; -use installinator_common::M2Slot; use nexus_sled_agent_shared::inventory::{ Inventory, OmicronZonesConfig, SledRole, }; @@ -34,1017 +25,696 @@ use omicron_common::api::internal::nexus::{ }; use omicron_common::api::internal::shared::{ ResolvedVpcRouteSet, ResolvedVpcRouteState, SledIdentifiers, SwitchPorts, + VirtualNetworkInterfaceHost, +}; +use omicron_common::disk::{ + DiskVariant, DisksManagementResult, M2Slot, OmicronPhysicalDisksConfig, }; -use omicron_common::disk::{DiskVariant, OmicronPhysicalDisksConfig}; use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; +use sled_agent_api::*; +use sled_agent_types::boot_disk::{ + BootDiskOsWriteStatus, BootDiskPathParams, BootDiskUpdatePathParams, + BootDiskWriteStartQueryParams, +}; +use sled_agent_types::bootstore::BootstoreStatus; +use sled_agent_types::disk::DiskEnsureBody; use sled_agent_types::early_networking::EarlyNetworkConfig; -use sled_storage::resources::DisksManagementResult; +use sled_agent_types::firewall_rules::VpcFirewallRulesEnsureBody; +use sled_agent_types::instance::{ + InstanceEnsureBody, InstanceExternalIpBody, InstancePutStateBody, + InstancePutStateResponse, InstanceUnregisterResponse, +}; +use sled_agent_types::sled::AddSledRequest; +use sled_agent_types::time_sync::TimeSync; +use sled_agent_types::zone_bundle::{ + BundleUtilization, CleanupContext, CleanupCount, CleanupPeriod, + StorageLimit, ZoneBundleId, ZoneBundleMetadata, +}; use std::collections::BTreeMap; -use uuid::Uuid; type SledApiDescription = ApiDescription; /// Returns a description of the sled agent API pub fn api() -> SledApiDescription { - fn register_endpoints( - api: &mut SledApiDescription, - ) -> Result<(), ApiDescriptionRegisterError> { - api.register(disk_put)?; - api.register(cockroachdb_init)?; - api.register(instance_issue_disk_snapshot_request)?; - api.register(instance_put_state)?; - api.register(instance_get_state)?; - api.register(instance_put_external_ip)?; - api.register(instance_delete_external_ip)?; - api.register(instance_register)?; - api.register(instance_unregister)?; - api.register(omicron_zones_get)?; - api.register(omicron_zones_put)?; - api.register(zones_list)?; - api.register(omicron_physical_disks_get)?; - api.register(omicron_physical_disks_put)?; - api.register(zone_bundle_list)?; - api.register(zone_bundle_list_all)?; - api.register(zone_bundle_create)?; - api.register(zone_bundle_get)?; - api.register(zone_bundle_delete)?; - api.register(zone_bundle_utilization)?; - api.register(zone_bundle_cleanup_context)?; - api.register(zone_bundle_cleanup_context_update)?; - api.register(zone_bundle_cleanup)?; - api.register(sled_role_get)?; - api.register(list_v2p)?; - api.register(set_v2p)?; - api.register(del_v2p)?; - api.register(timesync_get)?; - api.register(update_artifact)?; - api.register(vpc_firewall_rules_put)?; - api.register(zpools_get)?; - api.register(uplink_ensure)?; - api.register(read_network_bootstore_config_cache)?; - api.register(write_network_bootstore_config)?; - api.register(sled_add)?; - api.register(host_os_write_start)?; - api.register(host_os_write_status_get)?; - api.register(host_os_write_status_delete)?; - api.register(inventory)?; - api.register(sled_identifiers)?; - api.register(bootstore_status)?; - api.register(list_vpc_routes)?; - api.register(set_vpc_routes)?; - - Ok(()) - } - - let mut api = SledApiDescription::new(); - if let Err(err) = register_endpoints(&mut api) { - panic!("failed to register entrypoints: {}", err); - } - api + sled_agent_api_mod::api_description::() + .expect("registered entrypoints") } -#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] -struct ZonePathParam { - /// The name of the zone. - zone_name: String, -} +enum SledAgentImpl {} -#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] -struct ZoneBundleFilter { - /// An optional substring used to filter zone bundles. - filter: Option, -} +impl SledAgentApi for SledAgentImpl { + type Context = SledAgent; -/// List all zone bundles that exist, even for now-deleted zones. -#[endpoint { - method = GET, - path = "/zones/bundles", -}] -async fn zone_bundle_list_all( - rqctx: RequestContext, - query: Query, -) -> Result>, HttpError> { - let sa = rqctx.context(); - let filter = query.into_inner().filter; - sa.list_all_zone_bundles(filter.as_deref()) - .await - .map(HttpResponseOk) - .map_err(HttpError::from) -} - -/// List the zone bundles that are available for a running zone. -#[endpoint { - method = GET, - path = "/zones/bundles/{zone_name}", -}] -async fn zone_bundle_list( - rqctx: RequestContext, - params: Path, -) -> Result>, HttpError> { - let params = params.into_inner(); - let zone_name = params.zone_name; - let sa = rqctx.context(); - sa.list_zone_bundles(&zone_name) - .await - .map(HttpResponseOk) - .map_err(HttpError::from) -} + async fn zone_bundle_list_all( + rqctx: RequestContext, + query: Query, + ) -> Result>, HttpError> { + let sa = rqctx.context(); + let filter = query.into_inner().filter; + sa.list_all_zone_bundles(filter.as_deref()) + .await + .map(HttpResponseOk) + .map_err(HttpError::from) + } -/// Ask the sled agent to create a zone bundle. -#[endpoint { - method = POST, - path = "/zones/bundles/{zone_name}", -}] -async fn zone_bundle_create( - rqctx: RequestContext, - params: Path, -) -> Result, HttpError> { - let params = params.into_inner(); - let zone_name = params.zone_name; - let sa = rqctx.context(); - sa.create_zone_bundle(&zone_name) - .await - .map(HttpResponseCreated) - .map_err(HttpError::from) -} + async fn zone_bundle_list( + rqctx: RequestContext, + params: Path, + ) -> Result>, HttpError> { + let params = params.into_inner(); + let zone_name = params.zone_name; + let sa = rqctx.context(); + sa.list_zone_bundles(&zone_name) + .await + .map(HttpResponseOk) + .map_err(HttpError::from) + } -/// Fetch the binary content of a single zone bundle. -#[endpoint { - method = GET, - path = "/zones/bundles/{zone_name}/{bundle_id}", -}] -async fn zone_bundle_get( - rqctx: RequestContext, - params: Path, -) -> Result>, HttpError> { - let params = params.into_inner(); - let zone_name = params.zone_name; - let bundle_id = params.bundle_id; - let sa = rqctx.context(); - let Some(path) = sa - .get_zone_bundle_paths(&zone_name, &bundle_id) - .await - .map_err(HttpError::from)? - .into_iter() - .next() - else { - return Err(HttpError::for_not_found( - None, - format!( - "No zone bundle for zone '{}' with ID '{}'", - zone_name, bundle_id - ), - )); - }; - let f = tokio::fs::File::open(&path).await.map_err(|e| { - HttpError::for_internal_error(format!( - "failed to open zone bundle file at {}: {:?}", - path, e, - )) - })?; - let stream = hyper_staticfile::FileBytesStream::new(f); - let body = FreeformBody(stream.into_body()); - let mut response = HttpResponseHeaders::new_unnamed(HttpResponseOk(body)); - response.headers_mut().append( - http::header::CONTENT_TYPE, - "application/gzip".try_into().unwrap(), - ); - Ok(response) -} + async fn zone_bundle_create( + rqctx: RequestContext, + params: Path, + ) -> Result, HttpError> { + let params = params.into_inner(); + let zone_name = params.zone_name; + let sa = rqctx.context(); + sa.create_zone_bundle(&zone_name) + .await + .map(HttpResponseCreated) + .map_err(HttpError::from) + } -/// Delete a zone bundle. -#[endpoint { - method = DELETE, - path = "/zones/bundles/{zone_name}/{bundle_id}", -}] -async fn zone_bundle_delete( - rqctx: RequestContext, - params: Path, -) -> Result { - let params = params.into_inner(); - let zone_name = params.zone_name; - let bundle_id = params.bundle_id; - let sa = rqctx.context(); - let paths = sa - .get_zone_bundle_paths(&zone_name, &bundle_id) - .await - .map_err(HttpError::from)?; - if paths.is_empty() { - return Err(HttpError::for_not_found( - None, - format!( - "No zone bundle for zone '{}' with ID '{}'", - zone_name, bundle_id - ), - )); - }; - for path in paths.into_iter() { - tokio::fs::remove_file(&path).await.map_err(|e| { + async fn zone_bundle_get( + rqctx: RequestContext, + params: Path, + ) -> Result>, HttpError> + { + let params = params.into_inner(); + let zone_name = params.zone_name; + let bundle_id = params.bundle_id; + let sa = rqctx.context(); + let Some(path) = sa + .get_zone_bundle_paths(&zone_name, &bundle_id) + .await + .map_err(HttpError::from)? + .into_iter() + .next() + else { + return Err(HttpError::for_not_found( + None, + format!( + "No zone bundle for zone '{}' with ID '{}'", + zone_name, bundle_id + ), + )); + }; + let f = tokio::fs::File::open(&path).await.map_err(|e| { HttpError::for_internal_error(format!( - "Failed to delete zone bundle: {e}" + "failed to open zone bundle file at {}: {:?}", + path, e, )) })?; + let stream = hyper_staticfile::FileBytesStream::new(f); + let body = FreeformBody(stream.into_body()); + let mut response = + HttpResponseHeaders::new_unnamed(HttpResponseOk(body)); + response.headers_mut().append( + http::header::CONTENT_TYPE, + "application/gzip".try_into().unwrap(), + ); + Ok(response) } - Ok(HttpResponseDeleted()) -} -/// Return utilization information about all zone bundles. -#[endpoint { - method = GET, - path = "/zones/bundle-cleanup/utilization", -}] -async fn zone_bundle_utilization( - rqctx: RequestContext, -) -> Result< - HttpResponseOk>, - HttpError, -> { - let sa = rqctx.context(); - sa.zone_bundle_utilization() - .await - .map(HttpResponseOk) - .map_err(HttpError::from) -} + async fn zone_bundle_delete( + rqctx: RequestContext, + params: Path, + ) -> Result { + let params = params.into_inner(); + let zone_name = params.zone_name; + let bundle_id = params.bundle_id; + let sa = rqctx.context(); + let paths = sa + .get_zone_bundle_paths(&zone_name, &bundle_id) + .await + .map_err(HttpError::from)?; + if paths.is_empty() { + return Err(HttpError::for_not_found( + None, + format!( + "No zone bundle for zone '{}' with ID '{}'", + zone_name, bundle_id + ), + )); + }; + for path in paths.into_iter() { + tokio::fs::remove_file(&path).await.map_err(|e| { + HttpError::for_internal_error(format!( + "Failed to delete zone bundle: {e}" + )) + })?; + } + Ok(HttpResponseDeleted()) + } -/// Return context used by the zone-bundle cleanup task. -#[endpoint { - method = GET, - path = "/zones/bundle-cleanup/context", -}] -async fn zone_bundle_cleanup_context( - rqctx: RequestContext, -) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.zone_bundle_cleanup_context().await)) -} + async fn zone_bundle_utilization( + rqctx: RequestContext, + ) -> Result< + HttpResponseOk>, + HttpError, + > { + let sa = rqctx.context(); + sa.zone_bundle_utilization() + .await + .map(HttpResponseOk) + .map_err(HttpError::from) + } + + async fn zone_bundle_cleanup_context( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.zone_bundle_cleanup_context().await)) + } -/// Update context used by the zone-bundle cleanup task. -#[endpoint { - method = PUT, - path = "/zones/bundle-cleanup/context", -}] -async fn zone_bundle_cleanup_context_update( - rqctx: RequestContext, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let params = body.into_inner(); - let new_period = params - .period - .map(zone_bundle::CleanupPeriod::new) - .transpose() - .map_err(|e| HttpError::from(SledAgentError::from(e)))?; - let new_priority = params.priority; - let new_limit = params - .storage_limit - .map(zone_bundle::StorageLimit::new) - .transpose() - .map_err(|e| HttpError::from(SledAgentError::from(e)))?; - sa.update_zone_bundle_cleanup_context(new_period, new_limit, new_priority) + async fn zone_bundle_cleanup_context_update( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let params = body.into_inner(); + let new_period = + params.period.map(CleanupPeriod::new).transpose().map_err(|e| { + HttpError::from(SledAgentError::from(BundleError::from(e))) + })?; + let new_priority = params.priority; + let new_limit = + params.storage_limit.map(StorageLimit::new).transpose().map_err( + |e| HttpError::from(SledAgentError::from(BundleError::from(e))), + )?; + sa.update_zone_bundle_cleanup_context( + new_period, + new_limit, + new_priority, + ) .await .map(|_| HttpResponseUpdatedNoContent()) .map_err(HttpError::from) -} + } -/// Trigger a zone bundle cleanup. -#[endpoint { - method = POST, - path = "/zones/bundle-cleanup", -}] -async fn zone_bundle_cleanup( - rqctx: RequestContext, -) -> Result< - HttpResponseOk>, - HttpError, -> { - let sa = rqctx.context(); - sa.zone_bundle_cleanup().await.map(HttpResponseOk).map_err(HttpError::from) -} + async fn zone_bundle_cleanup( + rqctx: RequestContext, + ) -> Result>, HttpError> + { + let sa = rqctx.context(); + sa.zone_bundle_cleanup() + .await + .map(HttpResponseOk) + .map_err(HttpError::from) + } -/// List the zones that are currently managed by the sled agent. -#[endpoint { - method = GET, - path = "/zones", -}] -async fn zones_list( - rqctx: RequestContext, -) -> Result>, HttpError> { - let sa = rqctx.context(); - sa.zones_list().await.map(HttpResponseOk).map_err(HttpError::from) -} + async fn zones_list( + rqctx: RequestContext, + ) -> Result>, HttpError> { + let sa = rqctx.context(); + sa.zones_list().await.map(HttpResponseOk).map_err(HttpError::from) + } -#[endpoint { - method = GET, - path = "/omicron-zones", -}] -async fn omicron_zones_get( - rqctx: RequestContext, -) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.omicron_zones_list().await?)) -} + async fn omicron_zones_get( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.omicron_zones_list().await?)) + } -#[endpoint { - method = PUT, - path = "/omicron-physical-disks", -}] -async fn omicron_physical_disks_put( - rqctx: RequestContext, - body: TypedBody, -) -> Result, HttpError> { - let sa = rqctx.context(); - let body_args = body.into_inner(); - let result = sa.omicron_physical_disks_ensure(body_args).await?; - Ok(HttpResponseOk(result)) -} + async fn omicron_zones_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let body_args = body.into_inner(); + sa.omicron_zones_ensure(body_args).await?; + Ok(HttpResponseUpdatedNoContent()) + } -#[endpoint { - method = GET, - path = "/omicron-physical-disks", -}] -async fn omicron_physical_disks_get( - rqctx: RequestContext, -) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.omicron_physical_disks_list().await?)) -} + async fn omicron_physical_disks_get( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.omicron_physical_disks_list().await?)) + } -#[endpoint { - method = PUT, - path = "/omicron-zones", -}] -async fn omicron_zones_put( - rqctx: RequestContext, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let body_args = body.into_inner(); - sa.omicron_zones_ensure(body_args).await?; - Ok(HttpResponseUpdatedNoContent()) -} + async fn omicron_physical_disks_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let body_args = body.into_inner(); + let result = sa.omicron_physical_disks_ensure(body_args).await?; + Ok(HttpResponseOk(result)) + } -#[endpoint { - method = GET, - path = "/zpools", -}] -async fn zpools_get( - rqctx: RequestContext, -) -> Result>, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.zpools_get().await)) -} + async fn zpools_get( + rqctx: RequestContext, + ) -> Result>, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.zpools_get().await)) + } -#[endpoint { - method = GET, - path = "/sled-role", -}] -async fn sled_role_get( - rqctx: RequestContext, -) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.get_role())) -} + async fn sled_role_get( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.get_role())) + } -/// Initializes a CockroachDB cluster -#[endpoint { - method = POST, - path = "/cockroachdb", -}] -async fn cockroachdb_init( - rqctx: RequestContext, -) -> Result { - let sa = rqctx.context(); - sa.cockroachdb_initialize().await?; - Ok(HttpResponseUpdatedNoContent()) -} + async fn cockroachdb_init( + rqctx: RequestContext, + ) -> Result { + let sa = rqctx.context(); + sa.cockroachdb_initialize().await?; + Ok(HttpResponseUpdatedNoContent()) + } -/// Path parameters for Instance requests (sled agent API) -#[derive(Deserialize, JsonSchema)] -struct InstancePathParam { - instance_id: InstanceUuid, -} + async fn instance_register( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + Ok(HttpResponseOk( + sa.instance_ensure_registered( + instance_id, + body_args.propolis_id, + body_args.hardware, + body_args.instance_runtime, + body_args.vmm_runtime, + body_args.propolis_addr, + body_args.metadata, + ) + .await?, + )) + } -#[endpoint { - method = PUT, - path = "/instances/{instance_id}", -}] -async fn instance_register( - rqctx: RequestContext, - path_params: Path, - body: TypedBody, -) -> Result, HttpError> { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - let body_args = body.into_inner(); - Ok(HttpResponseOk( - sa.instance_ensure_registered( - instance_id, - body_args.propolis_id, - body_args.hardware, - body_args.instance_runtime, - body_args.vmm_runtime, - body_args.propolis_addr, - body_args.metadata, - ) - .await?, - )) -} + async fn instance_unregister( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + Ok(HttpResponseOk(sa.instance_ensure_unregistered(instance_id).await?)) + } -#[endpoint { - method = DELETE, - path = "/instances/{instance_id}", -}] -async fn instance_unregister( - rqctx: RequestContext, - path_params: Path, -) -> Result, HttpError> { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - Ok(HttpResponseOk(sa.instance_ensure_unregistered(instance_id).await?)) -} + async fn instance_put_state( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + Ok(HttpResponseOk( + sa.instance_ensure_state(instance_id, body_args.state).await?, + )) + } -#[endpoint { - method = PUT, - path = "/instances/{instance_id}/state", -}] -async fn instance_put_state( - rqctx: RequestContext, - path_params: Path, - body: TypedBody, -) -> Result, HttpError> { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - let body_args = body.into_inner(); - Ok(HttpResponseOk( - sa.instance_ensure_state(instance_id, body_args.state).await?, - )) -} + async fn instance_get_state( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + Ok(HttpResponseOk(sa.instance_get_state(instance_id).await?)) + } -#[endpoint { - method = GET, - path = "/instances/{instance_id}/state", -}] -async fn instance_get_state( - rqctx: RequestContext, - path_params: Path, -) -> Result, HttpError> { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - Ok(HttpResponseOk(sa.instance_get_state(instance_id).await?)) -} + async fn instance_put_external_ip( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + sa.instance_put_external_ip(instance_id, &body_args).await?; + Ok(HttpResponseUpdatedNoContent()) + } -#[endpoint { - method = PUT, - path = "/instances/{instance_id}/external-ip", -}] -async fn instance_put_external_ip( - rqctx: RequestContext, - path_params: Path, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - let body_args = body.into_inner(); - sa.instance_put_external_ip(instance_id, &body_args).await?; - Ok(HttpResponseUpdatedNoContent()) -} + async fn instance_delete_external_ip( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + sa.instance_delete_external_ip(instance_id, &body_args).await?; + Ok(HttpResponseUpdatedNoContent()) + } -#[endpoint { - method = DELETE, - path = "/instances/{instance_id}/external-ip", -}] -async fn instance_delete_external_ip( - rqctx: RequestContext, - path_params: Path, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - let body_args = body.into_inner(); - sa.instance_delete_external_ip(instance_id, &body_args).await?; - Ok(HttpResponseUpdatedNoContent()) -} + async fn disk_put( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let disk_id = path_params.into_inner().disk_id; + let body_args = body.into_inner(); + Ok(HttpResponseOk( + sa.disk_ensure( + disk_id, + body_args.initial_runtime.clone(), + body_args.target.clone(), + ) + .await + .map_err(|e| Error::from(e))?, + )) + } -/// Path parameters for Disk requests (sled agent API) -#[derive(Deserialize, JsonSchema)] -struct DiskPathParam { - disk_id: Uuid, -} + async fn update_artifact( + rqctx: RequestContext, + artifact: TypedBody, + ) -> Result { + let sa = rqctx.context(); + sa.update_artifact(artifact.into_inner()).await.map_err(Error::from)?; + Ok(HttpResponseUpdatedNoContent()) + } -#[endpoint { - method = PUT, - path = "/disks/{disk_id}", -}] -async fn disk_put( - rqctx: RequestContext, - path_params: Path, - body: TypedBody, -) -> Result, HttpError> { - let sa = rqctx.context(); - let disk_id = path_params.into_inner().disk_id; - let body_args = body.into_inner(); - Ok(HttpResponseOk( - sa.disk_ensure( - disk_id, - body_args.initial_runtime.clone(), - body_args.target.clone(), + async fn instance_issue_disk_snapshot_request( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result< + HttpResponseOk, + HttpError, + > { + let sa = rqctx.context(); + let path_params = path_params.into_inner(); + let body = body.into_inner(); + + sa.instance_issue_disk_snapshot_request( + InstanceUuid::from_untyped_uuid(path_params.instance_id), + path_params.disk_id, + body.snapshot_id, ) - .await - .map_err(|e| Error::from(e))?, - )) -} + .await?; -#[endpoint { - method = POST, - path = "/update" -}] -async fn update_artifact( - rqctx: RequestContext, - artifact: TypedBody, -) -> Result { - let sa = rqctx.context(); - sa.update_artifact(artifact.into_inner()).await.map_err(Error::from)?; - Ok(HttpResponseUpdatedNoContent()) -} + Ok(HttpResponseOk(InstanceIssueDiskSnapshotRequestResponse { + snapshot_id: body.snapshot_id, + })) + } -#[derive(Deserialize, JsonSchema)] -pub struct InstanceIssueDiskSnapshotRequestPathParam { - instance_id: Uuid, - disk_id: Uuid, -} + async fn vpc_firewall_rules_put( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let _vpc_id = path_params.into_inner().vpc_id; + let body_args = body.into_inner(); -#[derive(Deserialize, JsonSchema)] -pub struct InstanceIssueDiskSnapshotRequestBody { - snapshot_id: Uuid, -} + sa.firewall_rules_ensure(body_args.vni, &body_args.rules[..]) + .await + .map_err(Error::from)?; -#[derive(Serialize, JsonSchema)] -pub struct InstanceIssueDiskSnapshotRequestResponse { - snapshot_id: Uuid, -} + Ok(HttpResponseUpdatedNoContent()) + } -/// Take a snapshot of a disk that is attached to an instance -#[endpoint { - method = POST, - path = "/instances/{instance_id}/disks/{disk_id}/snapshot", -}] -async fn instance_issue_disk_snapshot_request( - rqctx: RequestContext, - path_params: Path, - body: TypedBody, -) -> Result, HttpError> -{ - let sa = rqctx.context(); - let path_params = path_params.into_inner(); - let body = body.into_inner(); - - sa.instance_issue_disk_snapshot_request( - InstanceUuid::from_untyped_uuid(path_params.instance_id), - path_params.disk_id, - body.snapshot_id, - ) - .await?; - - Ok(HttpResponseOk(InstanceIssueDiskSnapshotRequestResponse { - snapshot_id: body.snapshot_id, - })) -} + async fn set_v2p( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let body_args = body.into_inner(); -/// Path parameters for VPC requests (sled agent API) -#[derive(Deserialize, JsonSchema)] -struct VpcPathParam { - vpc_id: Uuid, -} + sa.set_virtual_nic_host(&body_args).await.map_err(Error::from)?; -#[endpoint { - method = PUT, - path = "/vpc/{vpc_id}/firewall/rules", -}] -async fn vpc_firewall_rules_put( - rqctx: RequestContext, - path_params: Path, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let _vpc_id = path_params.into_inner().vpc_id; - let body_args = body.into_inner(); - - sa.firewall_rules_ensure(body_args.vni, &body_args.rules[..]) - .await - .map_err(Error::from)?; - - Ok(HttpResponseUpdatedNoContent()) -} + Ok(HttpResponseUpdatedNoContent()) + } -/// Create a mapping from a virtual NIC to a physical host -// Keep interface_id to maintain parity with the simulated sled agent, which -// requires interface_id on the path. -#[endpoint { - method = PUT, - path = "/v2p/", -}] -async fn set_v2p( - rqctx: RequestContext, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let body_args = body.into_inner(); - - sa.set_virtual_nic_host(&body_args).await.map_err(Error::from)?; - - Ok(HttpResponseUpdatedNoContent()) -} + async fn del_v2p( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let body_args = body.into_inner(); -/// Delete a mapping from a virtual NIC to a physical host -// Keep interface_id to maintain parity with the simulated sled agent, which -// requires interface_id on the path. -#[endpoint { - method = DELETE, - path = "/v2p/", -}] -async fn del_v2p( - rqctx: RequestContext, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let body_args = body.into_inner(); - - sa.unset_virtual_nic_host(&body_args).await.map_err(Error::from)?; - - Ok(HttpResponseUpdatedNoContent()) -} + sa.unset_virtual_nic_host(&body_args).await.map_err(Error::from)?; -/// List v2p mappings present on sled -// Used by nexus background task -#[endpoint { - method = GET, - path = "/v2p/", -}] -async fn list_v2p( - rqctx: RequestContext, -) -> Result>, HttpError> { - let sa = rqctx.context(); + Ok(HttpResponseUpdatedNoContent()) + } - let vnics = sa.list_virtual_nics().await.map_err(Error::from)?; + async fn list_v2p( + rqctx: RequestContext, + ) -> Result>, HttpError> + { + let sa = rqctx.context(); - Ok(HttpResponseOk(vnics)) -} + let vnics = sa.list_virtual_nics().await.map_err(Error::from)?; -#[endpoint { - method = GET, - path = "/timesync", -}] -async fn timesync_get( - rqctx: RequestContext, -) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.timesync_get().await.map_err(|e| Error::from(e))?)) -} + Ok(HttpResponseOk(vnics)) + } -#[endpoint { - method = POST, - path = "/switch-ports", -}] -async fn uplink_ensure( - rqctx: RequestContext, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - sa.ensure_scrimlet_host_ports(body.into_inner().uplinks).await?; - Ok(HttpResponseUpdatedNoContent()) -} + async fn timesync_get( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.timesync_get().await.map_err(|e| Error::from(e))?)) + } -/// This API endpoint is only reading the local sled agent's view of the -/// bootstore. The boostore is a distributed data store that is eventually -/// consistent. Reads from individual nodes may not represent the latest state. -#[endpoint { - method = GET, - path = "/network-bootstore-config", -}] -async fn read_network_bootstore_config_cache( - rqctx: RequestContext, -) -> Result, HttpError> { - let sa = rqctx.context(); - let bs = sa.bootstore(); - - let config = bs.get_network_config().await.map_err(|e| { - HttpError::for_internal_error(format!("failed to get bootstore: {e}")) - })?; - - let config = match config { - Some(config) => EarlyNetworkConfig::deserialize_bootstore_config( - &rqctx.log, &config, - ) - .map_err(|e| { - HttpError::for_internal_error(format!( - "deserialize early network config: {e}" - )) - })?, - None => { - return Err(HttpError::for_unavail( - None, - "early network config does not exist yet".into(), - )); - } - }; + async fn uplink_ensure( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + sa.ensure_scrimlet_host_ports(body.into_inner().uplinks).await?; + Ok(HttpResponseUpdatedNoContent()) + } - Ok(HttpResponseOk(config)) -} + async fn read_network_bootstore_config_cache( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let bs = sa.bootstore(); -#[endpoint { - method = PUT, - path = "/network-bootstore-config", -}] -async fn write_network_bootstore_config( - rqctx: RequestContext, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let bs = sa.bootstore(); - let config = body.into_inner(); - - bs.update_network_config(NetworkConfig::from(config)).await.map_err( - |e| { + let config = bs.get_network_config().await.map_err(|e| { HttpError::for_internal_error(format!( - "failed to write updated config to boot store: {e}" + "failed to get bootstore: {e}" )) - }, - )?; - - Ok(HttpResponseUpdatedNoContent()) -} + })?; -/// Add a sled to a rack that was already initialized via RSS -#[endpoint { - method = PUT, - path = "/sleds" -}] -async fn sled_add( - rqctx: RequestContext, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let request = body.into_inner(); - - // Perform some minimal validation - if request.start_request.body.use_trust_quorum - && !request.start_request.body.is_lrtq_learner - { - return Err(HttpError::for_bad_request( - None, - "New sleds must be LRTQ learners if trust quorum is in use" - .to_string(), - )); + let config = match config { + Some(config) => EarlyNetworkConfig::deserialize_bootstore_config( + &rqctx.log, &config, + ) + .map_err(|e| { + HttpError::for_internal_error(format!( + "deserialize early network config: {e}" + )) + })?, + None => { + return Err(HttpError::for_unavail( + None, + "early network config does not exist yet".into(), + )); + } + }; + + Ok(HttpResponseOk(config)) } - crate::sled_agent::sled_add( - sa.logger().clone(), - request.sled_id, - request.start_request, - ) - .await - .map_err(|e| { - let message = format!("Failed to add sled to rack cluster: {e}"); - HttpError { - status_code: http::StatusCode::INTERNAL_SERVER_ERROR, - error_code: None, - external_message: message.clone(), - internal_message: message, - } - })?; - Ok(HttpResponseUpdatedNoContent()) -} - -#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] -pub struct BootDiskPathParams { - pub boot_disk: M2Slot, -} - -#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] -pub struct BootDiskUpdatePathParams { - pub boot_disk: M2Slot, - pub update_id: Uuid, -} - -#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] -pub struct BootDiskWriteStartQueryParams { - pub update_id: Uuid, - // TODO do we already have sha2-256 hashes of the OS images, and if so - // should we use that instead? Another option is to use the external API - // `Digest` type, although it predates `serde_human_bytes` so just stores - // the hash as a `String`. - #[serde(with = "serde_human_bytes::hex_array")] - #[schemars(schema_with = "omicron_common::hex_schema::<32>")] - pub sha3_256_digest: [u8; 32], -} - -/// Write a new host OS image to the specified boot disk -#[endpoint { - method = POST, - path = "/boot-disk/{boot_disk}/os/write", -}] -async fn host_os_write_start( - request_context: RequestContext, - path_params: Path, - query_params: Query, - body: StreamingBody, -) -> Result { - let sa = request_context.context(); - let boot_disk = path_params.into_inner().boot_disk; - - // Find our corresponding disk. - let maybe_disk_path = - sa.storage().get_latest_disks().await.iter_managed().find_map( - |(_identity, disk)| { - // Synthetic disks panic if asked for their `slot()`, so filter - // them out first; additionally, filter out any non-M2 disks. - if disk.is_synthetic() || disk.variant() != DiskVariant::M2 { - return None; - } - - // Convert this M2 disk's slot to an M2Slot, and skip any that - // don't match the requested boot_disk. - let Ok(slot) = M2Slot::try_from(disk.slot()) else { - return None; - }; - if slot != boot_disk { - return None; - } - - let raw_devs_path = true; - Some(disk.boot_image_devfs_path(raw_devs_path)) + async fn write_network_bootstore_config( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let bs = sa.bootstore(); + let config = body.into_inner(); + + bs.update_network_config(NetworkConfig::from(config)).await.map_err( + |e| { + HttpError::for_internal_error(format!( + "failed to write updated config to boot store: {e}" + )) }, - ); + )?; - let disk_path = match maybe_disk_path { - Some(Ok(path)) => path, - Some(Err(err)) => { - let message = format!( - "failed to find devfs path for {boot_disk:?}: {}", - DisplayErrorChain::new(&err) - ); - return Err(HttpError { - status_code: http::StatusCode::SERVICE_UNAVAILABLE, - error_code: None, - external_message: message.clone(), - internal_message: message, - }); + Ok(HttpResponseUpdatedNoContent()) + } + + async fn sled_add( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let request = body.into_inner(); + + // Perform some minimal validation + if request.start_request.body.use_trust_quorum + && !request.start_request.body.is_lrtq_learner + { + return Err(HttpError::for_bad_request( + None, + "New sleds must be LRTQ learners if trust quorum is in use" + .to_string(), + )); } - None => { - let message = format!("no disk found for slot {boot_disk:?}",); - return Err(HttpError { - status_code: http::StatusCode::SERVICE_UNAVAILABLE, + + crate::sled_agent::sled_add( + sa.logger().clone(), + request.sled_id, + request.start_request, + ) + .await + .map_err(|e| { + let message = format!("Failed to add sled to rack cluster: {e}"); + HttpError { + status_code: http::StatusCode::INTERNAL_SERVER_ERROR, error_code: None, external_message: message.clone(), internal_message: message, - }); - } - }; - - let BootDiskWriteStartQueryParams { update_id, sha3_256_digest } = - query_params.into_inner(); - sa.boot_disk_os_writer() - .start_update( - boot_disk, - disk_path, - update_id, - sha3_256_digest, - body.into_stream(), - ) - .await - .map_err(|err| HttpError::from(&*err))?; - Ok(HttpResponseUpdatedNoContent()) -} + } + })?; + Ok(HttpResponseUpdatedNoContent()) + } -/// Current progress of an OS image being written to disk. -#[derive( - Debug, Clone, Copy, PartialEq, Eq, Deserialize, JsonSchema, Serialize, -)] -#[serde(tag = "state", rename_all = "snake_case")] -pub enum BootDiskOsWriteProgress { - /// The image is still being uploaded. - ReceivingUploadedImage { bytes_received: usize }, - /// The image is being written to disk. - WritingImageToDisk { bytes_written: usize }, - /// The image is being read back from disk for validation. - ValidatingWrittenImage { bytes_read: usize }, -} + async fn host_os_write_start( + request_context: RequestContext, + path_params: Path, + query_params: Query, + body: StreamingBody, + ) -> Result { + let sa = request_context.context(); + let boot_disk = path_params.into_inner().boot_disk; + + // Find our corresponding disk. + let maybe_disk_path = + sa.storage().get_latest_disks().await.iter_managed().find_map( + |(_identity, disk)| { + // Synthetic disks panic if asked for their `slot()`, so filter + // them out first; additionally, filter out any non-M2 disks. + if disk.is_synthetic() || disk.variant() != DiskVariant::M2 + { + return None; + } + + // Convert this M2 disk's slot to an M2Slot, and skip any that + // don't match the requested boot_disk. + let Ok(slot) = M2Slot::try_from(disk.slot()) else { + return None; + }; + if slot != boot_disk { + return None; + } + + let raw_devs_path = true; + Some(disk.boot_image_devfs_path(raw_devs_path)) + }, + ); -/// Status of an update to a boot disk OS. -#[derive(Debug, Clone, Deserialize, JsonSchema, Serialize)] -#[serde(tag = "status", rename_all = "snake_case")] -pub enum BootDiskOsWriteStatus { - /// No update has been started for this disk, or any previously-started - /// update has completed and had its status cleared. - NoUpdateStarted, - /// An update is currently running. - InProgress { update_id: Uuid, progress: BootDiskOsWriteProgress }, - /// The most recent update completed successfully. - Complete { update_id: Uuid }, - /// The most recent update failed. - Failed { update_id: Uuid, message: String }, -} + let disk_path = match maybe_disk_path { + Some(Ok(path)) => path, + Some(Err(err)) => { + let message = format!( + "failed to find devfs path for {boot_disk:?}: {}", + DisplayErrorChain::new(&err) + ); + return Err(HttpError { + status_code: http::StatusCode::SERVICE_UNAVAILABLE, + error_code: None, + external_message: message.clone(), + internal_message: message, + }); + } + None => { + let message = format!("no disk found for slot {boot_disk:?}",); + return Err(HttpError { + status_code: http::StatusCode::SERVICE_UNAVAILABLE, + error_code: None, + external_message: message.clone(), + internal_message: message, + }); + } + }; + + let BootDiskWriteStartQueryParams { update_id, sha3_256_digest } = + query_params.into_inner(); + sa.boot_disk_os_writer() + .start_update( + boot_disk, + disk_path, + update_id, + sha3_256_digest, + body.into_stream(), + ) + .await + .map_err(|err| HttpError::from(&*err))?; + Ok(HttpResponseUpdatedNoContent()) + } -/// Get the status of writing a new host OS -#[endpoint { - method = GET, - path = "/boot-disk/{boot_disk}/os/write/status", -}] -async fn host_os_write_status_get( - request_context: RequestContext, - path_params: Path, -) -> Result, HttpError> { - let sa = request_context.context(); - let boot_disk = path_params.into_inner().boot_disk; - let status = sa.boot_disk_os_writer().status(boot_disk); - Ok(HttpResponseOk(status)) -} + async fn host_os_write_status_get( + request_context: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let sa = request_context.context(); + let boot_disk = path_params.into_inner().boot_disk; + let status = sa.boot_disk_os_writer().status(boot_disk); + Ok(HttpResponseOk(status)) + } -/// Clear the status of a completed write of a new host OS -#[endpoint { - method = DELETE, - path = "/boot-disk/{boot_disk}/os/write/status/{update_id}", -}] -async fn host_os_write_status_delete( - request_context: RequestContext, - path_params: Path, -) -> Result { - let sa = request_context.context(); - let BootDiskUpdatePathParams { boot_disk, update_id } = - path_params.into_inner(); - sa.boot_disk_os_writer() - .clear_terminal_status(boot_disk, update_id) - .map_err(|err| HttpError::from(&err))?; - Ok(HttpResponseUpdatedNoContent()) -} + async fn host_os_write_status_delete( + request_context: RequestContext, + path_params: Path, + ) -> Result { + let sa = request_context.context(); + let BootDiskUpdatePathParams { boot_disk, update_id } = + path_params.into_inner(); + sa.boot_disk_os_writer() + .clear_terminal_status(boot_disk, update_id) + .map_err(|err| HttpError::from(&err))?; + Ok(HttpResponseUpdatedNoContent()) + } -/// Fetch basic information about this sled -#[endpoint { - method = GET, - path = "/inventory", -}] -async fn inventory( - request_context: RequestContext, -) -> Result, HttpError> { - let sa = request_context.context(); - Ok(HttpResponseOk(sa.inventory().await?)) -} + async fn inventory( + request_context: RequestContext, + ) -> Result, HttpError> { + let sa = request_context.context(); + Ok(HttpResponseOk(sa.inventory().await?)) + } -/// Fetch sled identifiers -#[endpoint { - method = GET, - path = "/sled-identifiers", -}] -async fn sled_identifiers( - request_context: RequestContext, -) -> Result, HttpError> { - Ok(HttpResponseOk(request_context.context().sled_identifiers())) -} + async fn sled_identifiers( + request_context: RequestContext, + ) -> Result, HttpError> { + Ok(HttpResponseOk(request_context.context().sled_identifiers())) + } -/// Get the internal state of the local bootstore node -#[endpoint { - method = GET, - path = "/bootstore/status", -}] -async fn bootstore_status( - request_context: RequestContext, -) -> Result, HttpError> { - let sa = request_context.context(); - let bootstore = sa.bootstore(); - let status = bootstore - .get_status() - .await - .map_err(|e| { - HttpError::from(omicron_common::api::external::Error::from(e)) - })? - .into(); - Ok(HttpResponseOk(status)) -} + async fn bootstore_status( + request_context: RequestContext, + ) -> Result, HttpError> { + let sa = request_context.context(); + let bootstore = sa.bootstore(); + let status = bootstore + .get_status() + .await + .map_err(|e| { + HttpError::from(omicron_common::api::external::Error::from(e)) + })? + .into(); + Ok(HttpResponseOk(status)) + } -/// Get the current versions of VPC routing rules. -#[endpoint { - method = GET, - path = "/vpc-routes", -}] -async fn list_vpc_routes( - request_context: RequestContext, -) -> Result>, HttpError> { - let sa = request_context.context(); - Ok(HttpResponseOk(sa.list_vpc_routes())) -} + async fn list_vpc_routes( + request_context: RequestContext, + ) -> Result>, HttpError> { + let sa = request_context.context(); + Ok(HttpResponseOk(sa.list_vpc_routes())) + } -/// Update VPC routing rules. -#[endpoint { - method = PUT, - path = "/vpc-routes", -}] -async fn set_vpc_routes( - request_context: RequestContext, - body: TypedBody>, -) -> Result { - let sa = request_context.context(); - sa.set_vpc_routes(body.into_inner())?; - Ok(HttpResponseUpdatedNoContent()) + async fn set_vpc_routes( + request_context: RequestContext, + body: TypedBody>, + ) -> Result { + let sa = request_context.context(); + sa.set_vpc_routes(body.into_inner())?; + Ok(HttpResponseUpdatedNoContent()) + } } diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 5dc4e1e6a2..0bcbc97fd2 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -13,13 +13,6 @@ use crate::instance_manager::{ }; use crate::metrics::MetricsRequestQueue; use crate::nexus::NexusClient; -use crate::params::ZoneBundleMetadata; -use crate::params::{InstanceExternalIpBody, ZoneBundleCause}; -use crate::params::{ - InstanceHardware, InstanceMetadata, InstanceMigrationTargetParams, - InstancePutStateResponse, InstanceStateRequested, - InstanceUnregisterResponse, VpcFirewallRule, -}; use crate::profile::*; use crate::zone_bundle::BundleError; use crate::zone_bundle::ZoneBundler; @@ -36,7 +29,7 @@ use omicron_common::api::internal::nexus::{ SledInstanceState, VmmRuntimeState, }; use omicron_common::api::internal::shared::{ - NetworkInterface, SledIdentifiers, SourceNatConfig, + NetworkInterface, ResolvedVpcFirewallRule, SledIdentifiers, SourceNatConfig, }; use omicron_common::backoff; use omicron_common::zpool_name::ZpoolName; @@ -44,6 +37,8 @@ use omicron_uuid_kinds::{GenericUuid, InstanceUuid, PropolisUuid}; use propolis_client::Client as PropolisClient; use rand::prelude::IteratorRandom; use rand::SeedableRng; +use sled_agent_types::instance::*; +use sled_agent_types::zone_bundle::{ZoneBundleCause, ZoneBundleMetadata}; use sled_storage::dataset::ZONE_DATASET; use sled_storage::manager::StorageHandle; use slog::Logger; @@ -225,7 +220,7 @@ enum InstanceRequest { tx: oneshot::Sender, }, PutState { - state: crate::params::InstanceStateRequested, + state: InstanceStateRequested, tx: oneshot::Sender>, }, Terminate { @@ -337,7 +332,7 @@ struct InstanceRunner { source_nat: SourceNatConfig, ephemeral_ip: Option, floating_ips: Vec, - firewall_rules: Vec, + firewall_rules: Vec, dhcp_config: DhcpCfg, // Disk related properties @@ -1158,7 +1153,7 @@ impl Instance { pub async fn put_state( &self, tx: oneshot::Sender>, - state: crate::params::InstanceStateRequested, + state: InstanceStateRequested, ) -> Result<(), Error> { self.tx .send(InstanceRequest::PutState { state, tx }) @@ -1305,7 +1300,7 @@ impl InstanceRunner { async fn put_state( &mut self, - state: crate::params::InstanceStateRequested, + state: InstanceStateRequested, ) -> Result { use propolis_client::types::InstanceStateRequested as PropolisRequest; let (propolis_state, next_published) = match state { @@ -1569,14 +1564,12 @@ mod tests { use crate::metrics; use crate::nexus::make_nexus_client_with_port; use crate::vmm_reservoir::VmmReservoirManagerHandle; - use crate::zone_bundle::CleanupContext; use camino_tempfile::Utf8TempDir; use dns_server::TransientServer; use dropshot::HttpServer; use illumos_utils::dladm::MockDladm; use illumos_utils::dladm::__mock_MockDladm::__create_vnic::Context as MockDladmCreateVnicContext; use illumos_utils::dladm::__mock_MockDladm::__delete_vnic::Context as MockDladmDeleteVnicContext; - use illumos_utils::opte::params::DhcpConfig; use illumos_utils::svc::__wait_for_service::Context as MockWaitForServiceContext; use illumos_utils::zone::MockZones; use illumos_utils::zone::__mock_MockZones::__boot::Context as MockZonesBootContext; @@ -1588,8 +1581,9 @@ mod tests { use omicron_common::api::internal::nexus::{ InstanceProperties, InstanceRuntimeState, VmmState, }; - use omicron_common::api::internal::shared::SledIdentifiers; + use omicron_common::api::internal::shared::{DhcpConfig, SledIdentifiers}; use omicron_common::FileKv; + use sled_agent_types::zone_bundle::CleanupContext; use sled_storage::manager_test_harness::StorageManagerTestHarness; use std::net::Ipv6Addr; use std::net::SocketAddrV6; diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index fe070464ad..63164ed290 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -8,13 +8,6 @@ use crate::instance::propolis_zone_name; use crate::instance::Instance; use crate::metrics::MetricsRequestQueue; use crate::nexus::NexusClient; -use crate::params::InstanceExternalIpBody; -use crate::params::InstanceMetadata; -use crate::params::ZoneBundleMetadata; -use crate::params::{ - InstanceHardware, InstancePutStateResponse, InstanceStateRequested, - InstanceUnregisterResponse, -}; use crate::vmm_reservoir::VmmReservoirManagerHandle; use crate::zone_bundle::BundleError; use crate::zone_bundle::ZoneBundler; @@ -32,6 +25,8 @@ use omicron_common::api::internal::nexus::VmmRuntimeState; use omicron_common::api::internal::shared::SledIdentifiers; use omicron_uuid_kinds::InstanceUuid; use omicron_uuid_kinds::PropolisUuid; +use sled_agent_types::instance::*; +use sled_agent_types::zone_bundle::ZoneBundleMetadata; use sled_storage::manager::StorageHandle; use sled_storage::resources::AllDisks; use slog::Logger; diff --git a/sled-agent/src/long_running_tasks.rs b/sled-agent/src/long_running_tasks.rs index e920ffc3fc..68389ccf43 100644 --- a/sled-agent/src/long_running_tasks.rs +++ b/sled-agent/src/long_running_tasks.rs @@ -21,9 +21,10 @@ use crate::hardware_monitor::HardwareMonitor; use crate::services::ServiceManager; use crate::sled_agent::SledAgent; use crate::storage_monitor::{StorageMonitor, StorageMonitorHandle}; -use crate::zone_bundle::{CleanupContext, ZoneBundler}; +use crate::zone_bundle::ZoneBundler; use bootstore::schemes::v0 as bootstore; use key_manager::{KeyManager, StorageKeyRequester}; +use sled_agent_types::zone_bundle::CleanupContext; use sled_hardware::{HardwareManager, SledMode, UnparsedDisk}; use sled_storage::config::MountConfig; use sled_storage::disk::RawSyntheticDisk; diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index aa5e8fd26f..419e897d75 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -2,234 +2,11 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use crate::zone_bundle::PriorityOrder; -pub use crate::zone_bundle::ZoneBundleCause; -pub use crate::zone_bundle::ZoneBundleId; -pub use crate::zone_bundle::ZoneBundleMetadata; -pub use illumos_utils::opte::params::DhcpConfig; -pub use illumos_utils::opte::params::VpcFirewallRule; -pub use illumos_utils::opte::params::VpcFirewallRulesEnsureBody; use nexus_sled_agent_shared::inventory::{OmicronZoneConfig, OmicronZoneType}; -use omicron_common::api::internal::nexus::{ - DiskRuntimeState, InstanceProperties, InstanceRuntimeState, - SledInstanceState, VmmRuntimeState, -}; -use omicron_common::api::internal::shared::{ - NetworkInterface, SourceNatConfig, -}; -use omicron_common::disk::DiskVariant; -use omicron_uuid_kinds::PropolisUuid; -use omicron_uuid_kinds::ZpoolUuid; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; pub use sled_hardware::DendriteAsic; -use sled_hardware_types::Baseboard; use sled_storage::dataset::DatasetName; use sled_storage::dataset::DatasetType; -use std::collections::BTreeSet; -use std::fmt::{Debug, Display, Formatter, Result as FormatResult}; -use std::net::{IpAddr, SocketAddr, SocketAddrV6}; -use std::time::Duration; -use uuid::Uuid; - -/// Used to request a Disk state change -#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize, JsonSchema)] -#[serde(rename_all = "lowercase", tag = "state", content = "instance")] -pub enum DiskStateRequested { - Detached, - Attached(Uuid), - Destroyed, - Faulted, -} - -impl DiskStateRequested { - /// Returns whether the requested state is attached to an Instance or not. - pub fn is_attached(&self) -> bool { - match self { - DiskStateRequested::Detached => false, - DiskStateRequested::Destroyed => false, - DiskStateRequested::Faulted => false, - - DiskStateRequested::Attached(_) => true, - } - } -} - -/// Sent from to a sled agent to establish the runtime state of a Disk -#[derive(Serialize, Deserialize, JsonSchema)] -pub struct DiskEnsureBody { - /// Last runtime state of the Disk known to Nexus (used if the agent has - /// never seen this Disk before). - pub initial_runtime: DiskRuntimeState, - /// requested runtime state of the Disk - pub target: DiskStateRequested, -} - -/// Describes the instance hardware. -#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] -pub struct InstanceHardware { - pub properties: InstanceProperties, - pub nics: Vec, - pub source_nat: SourceNatConfig, - /// Zero or more external IP addresses (either floating or ephemeral), - /// provided to an instance to allow inbound connectivity. - pub ephemeral_ip: Option, - pub floating_ips: Vec, - pub firewall_rules: Vec, - pub dhcp_config: DhcpConfig, - // TODO: replace `propolis_client::*` with locally-modeled request type - pub disks: Vec, - pub cloud_init_bytes: Option, -} - -/// Metadata used to track statistics about an instance. -/// -// NOTE: The instance ID is not here, since it's already provided in other -// pieces of the instance-related requests. It is pulled from there when -// publishing metrics for the instance. -#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] -pub struct InstanceMetadata { - pub silo_id: Uuid, - pub project_id: Uuid, -} - -/// The body of a request to ensure that a instance and VMM are known to a sled -/// agent. -#[derive(Serialize, Deserialize, JsonSchema)] -pub struct InstanceEnsureBody { - /// A description of the instance's virtual hardware and the initial runtime - /// state this sled agent should store for this incarnation of the instance. - pub hardware: InstanceHardware, - - /// The instance runtime state for the instance being registered. - pub instance_runtime: InstanceRuntimeState, - - /// The initial VMM runtime state for the VMM being registered. - pub vmm_runtime: VmmRuntimeState, - - /// The ID of the VMM being registered. This may not be the active VMM ID in - /// the instance runtime state (e.g. if the new VMM is going to be a - /// migration target). - pub propolis_id: PropolisUuid, - - /// The address at which this VMM should serve a Propolis server API. - pub propolis_addr: SocketAddr, - - /// Metadata used to track instance statistics. - pub metadata: InstanceMetadata, -} - -/// The body of a request to move a previously-ensured instance into a specific -/// runtime state. -#[derive(Serialize, Deserialize, JsonSchema)] -pub struct InstancePutStateBody { - /// The state into which the instance should be driven. - pub state: InstanceStateRequested, -} - -/// The response sent from a request to move an instance into a specific runtime -/// state. -#[derive(Debug, Serialize, Deserialize, JsonSchema)] -pub struct InstancePutStateResponse { - /// The current runtime state of the instance after handling the request to - /// change its state. If the instance's state did not change, this field is - /// `None`. - pub updated_runtime: Option, -} - -/// The response sent from a request to unregister an instance. -#[derive(Serialize, Deserialize, JsonSchema)] -pub struct InstanceUnregisterResponse { - /// The current state of the instance after handling the request to - /// unregister it. If the instance's state did not change, this field is - /// `None`. - pub updated_runtime: Option, -} - -/// Parameters used when directing Propolis to initialize itself via live -/// migration. -#[derive(Copy, Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct InstanceMigrationTargetParams { - /// The Propolis ID of the migration source. - pub src_propolis_id: Uuid, - - /// The address of the Propolis server that will serve as the migration - /// source. - pub src_propolis_addr: SocketAddr, -} - -/// Requestable running state of an Instance. -/// -/// A subset of [`omicron_common::api::external::InstanceState`]. -#[derive(Copy, Clone, Debug, Deserialize, Serialize, JsonSchema)] -#[serde(rename_all = "snake_case", tag = "type", content = "value")] -pub enum InstanceStateRequested { - /// Run this instance by migrating in from a previous running incarnation of - /// the instance. - MigrationTarget(InstanceMigrationTargetParams), - /// Start the instance if it is not already running. - Running, - /// Stop the instance. - Stopped, - /// Immediately reset the instance, as though it had stopped and immediately - /// began to run again. - Reboot, -} - -impl Display for InstanceStateRequested { - fn fmt(&self, f: &mut Formatter) -> FormatResult { - write!(f, "{}", self.label()) - } -} - -impl InstanceStateRequested { - fn label(&self) -> &str { - match self { - InstanceStateRequested::MigrationTarget(_) => "migrating in", - InstanceStateRequested::Running => "running", - InstanceStateRequested::Stopped => "stopped", - InstanceStateRequested::Reboot => "reboot", - } - } - - /// Returns true if the state represents a stopped Instance. - pub fn is_stopped(&self) -> bool { - match self { - InstanceStateRequested::MigrationTarget(_) => false, - InstanceStateRequested::Running => false, - InstanceStateRequested::Stopped => true, - InstanceStateRequested::Reboot => false, - } - } -} - -/// Instance runtime state to update for a migration. -#[derive(Copy, Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct InstanceMigrationSourceParams { - pub migration_id: Uuid, - pub dst_propolis_id: PropolisUuid, -} - -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] -pub enum DiskType { - U2, - M2, -} - -impl From for DiskType { - fn from(v: DiskVariant) -> Self { - match v { - DiskVariant::U2 => Self::U2, - DiskVariant::M2 => Self::M2, - } - } -} - -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] -pub struct Zpool { - pub id: ZpoolUuid, - pub disk_type: DiskType, -} +use std::net::SocketAddrV6; /// Extension trait for `OmicronZoneConfig`. /// @@ -312,88 +89,3 @@ impl OmicronZoneTypeExt for OmicronZoneConfig { &self.zone_type } } - -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] -pub struct TimeSync { - /// The synchronization state of the sled, true when the system clock - /// and the NTP clock are in sync (to within a small window). - pub sync: bool, - /// The NTP reference ID. - pub ref_id: u32, - /// The NTP reference IP address. - pub ip_addr: IpAddr, - /// The NTP stratum (our upstream's stratum plus one). - pub stratum: u8, - /// The NTP reference time (i.e. what chrony thinks the current time is, not - /// necessarily the current system time). - pub ref_time: f64, - // This could be f32, but there is a problem with progenitor/typify - // where, although the f32 correctly becomes "float" (and not "double") in - // the API spec, that "float" gets converted back to f64 when generating - // the client. - /// The current offset between the NTP clock and system clock. - pub correction: f64, -} - -/// Parameters used to update the zone bundle cleanup context. -#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] -pub struct CleanupContextUpdate { - /// The new period on which automatic cleanups are run. - pub period: Option, - /// The priority ordering for preserving old zone bundles. - pub priority: Option, - /// The new limit on the underlying dataset quota allowed for bundles. - pub storage_limit: Option, -} - -/// Used to dynamically update external IPs attached to an instance. -#[derive( - Copy, Clone, Debug, Eq, PartialEq, Hash, Deserialize, JsonSchema, Serialize, -)] -#[serde(rename_all = "snake_case", tag = "type", content = "value")] -pub enum InstanceExternalIpBody { - Ephemeral(IpAddr), - Floating(IpAddr), -} - -#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] -pub struct EstablishedConnection { - baseboard: Baseboard, - addr: SocketAddrV6, -} - -impl From<(Baseboard, SocketAddrV6)> for EstablishedConnection { - fn from(value: (Baseboard, SocketAddrV6)) -> Self { - EstablishedConnection { baseboard: value.0, addr: value.1 } - } -} - -#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] -pub struct BootstoreStatus { - pub fsm_ledger_generation: u64, - pub network_config_ledger_generation: Option, - pub fsm_state: String, - pub peers: BTreeSet, - pub established_connections: Vec, - pub accepted_connections: BTreeSet, - pub negotiating_connections: BTreeSet, -} - -impl From for BootstoreStatus { - fn from(value: bootstore::schemes::v0::Status) -> Self { - BootstoreStatus { - fsm_ledger_generation: value.fsm_ledger_generation, - network_config_ledger_generation: value - .network_config_ledger_generation, - fsm_state: value.fsm_state.to_string(), - peers: value.peers, - established_connections: value - .connections - .into_iter() - .map(EstablishedConnection::from) - .collect(), - accepted_connections: value.accepted_connections, - negotiating_connections: value.negotiating_connections, - } - } -} diff --git a/sled-agent/src/probe_manager.rs b/sled-agent/src/probe_manager.rs index 529ef392b7..42186f66e9 100644 --- a/sled-agent/src/probe_manager.rs +++ b/sled-agent/src/probe_manager.rs @@ -3,7 +3,6 @@ use crate::nexus::NexusClient; use anyhow::{anyhow, Result}; use illumos_utils::dladm::Etherstub; use illumos_utils::link::VnicAllocator; -use illumos_utils::opte::params::VpcFirewallRule; use illumos_utils::opte::{DhcpCfg, PortCreateParams, PortManager}; use illumos_utils::running_zone::{RunningZone, ZoneBuilderFactory}; use illumos_utils::zone::Zones; @@ -14,7 +13,9 @@ use omicron_common::api::external::{ Generation, VpcFirewallRuleAction, VpcFirewallRuleDirection, VpcFirewallRulePriority, VpcFirewallRuleStatus, }; -use omicron_common::api::internal::shared::NetworkInterface; +use omicron_common::api::internal::shared::{ + NetworkInterface, ResolvedVpcFirewallRule, +}; use rand::prelude::IteratorRandom; use rand::SeedableRng; use sled_storage::dataset::ZONE_DATASET; @@ -308,7 +309,7 @@ impl ProbeManagerInner { source_nat: None, ephemeral_ip: Some(eip.ip), floating_ips: &[], - firewall_rules: &[VpcFirewallRule { + firewall_rules: &[ResolvedVpcFirewallRule { status: VpcFirewallRuleStatus::Enabled, direction: VpcFirewallRuleDirection::Inbound, targets: vec![nic.clone()], diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index 471717989a..ff137f131f 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -4,7 +4,6 @@ //! Plan generation for "where should services be initialized". -use crate::bootstrap::params::StartSledAgentRequest; use camino::Utf8PathBuf; use dns_service_client::types::DnsConfigParams; use illumos_utils::zpool::ZpoolName; @@ -40,6 +39,7 @@ use sled_agent_client::{ types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError, }; use sled_agent_types::rack_init::RackInitializeRequest as Config; +use sled_agent_types::sled::StartSledAgentRequest; use sled_storage::dataset::{DatasetName, DatasetType, CONFIG_DATASET}; use sled_storage::manager::StorageHandle; use slog::Logger; diff --git a/sled-agent/src/rack_setup/plan/sled.rs b/sled-agent/src/rack_setup/plan/sled.rs index 3d5b90a22d..c511cf1447 100644 --- a/sled-agent/src/rack_setup/plan/sled.rs +++ b/sled-agent/src/rack_setup/plan/sled.rs @@ -4,16 +4,15 @@ //! Plan generation for "how should sleds be initialized". -use crate::bootstrap::params::StartSledAgentRequestBody; -use crate::bootstrap::{ - config::BOOTSTRAP_AGENT_RACK_INIT_PORT, params::StartSledAgentRequest, -}; +use crate::bootstrap::config::BOOTSTRAP_AGENT_RACK_INIT_PORT; use camino::Utf8PathBuf; use omicron_common::ledger::{self, Ledger, Ledgerable}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use sled_agent_types::rack_init::back_compat::RackInitializeRequestV1 as ConfigV1; use sled_agent_types::rack_init::RackInitializeRequest as Config; +use sled_agent_types::sled::StartSledAgentRequest; +use sled_agent_types::sled::StartSledAgentRequestBody; use sled_storage::dataset::CONFIG_DATASET; use sled_storage::manager::StorageHandle; use slog::Logger; diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index ac51912fe6..bead95be80 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -69,10 +69,9 @@ use crate::bootstrap::config::BOOTSTRAP_AGENT_HTTP_PORT; use crate::bootstrap::early_networking::{ EarlyNetworkSetup, EarlyNetworkSetupError, }; -use crate::bootstrap::params::StartSledAgentRequest; use crate::bootstrap::rss_handle::BootstrapAgentHandle; use crate::nexus::d2n_params; -use crate::params::{OmicronZoneTypeExt, TimeSync}; +use crate::params::OmicronZoneTypeExt; use crate::rack_setup::plan::service::{ Plan as ServicePlan, PlanError as ServicePlanError, }; @@ -120,6 +119,8 @@ use sled_agent_types::early_networking::{ use sled_agent_types::rack_init::{ BootstrapAddressDiscovery, RackInitializeRequest as Config, }; +use sled_agent_types::sled::StartSledAgentRequest; +use sled_agent_types::time_sync::TimeSync; use sled_hardware_types::underlay::BootstrapInterface; use sled_storage::dataset::CONFIG_DATASET; use sled_storage::manager::StorageHandle; diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index 43fb64914f..c4f3e1008f 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -7,11 +7,11 @@ use super::config::Config; use super::http_entrypoints::api as http_api; use super::sled_agent::SledAgent; -use crate::bootstrap::params::StartSledAgentRequest; use crate::long_running_tasks::LongRunningTaskHandles; use crate::nexus::make_nexus_client; use crate::services::ServiceManager; use internal_dns::resolver::Resolver; +use sled_agent_types::sled::StartSledAgentRequest; use slog::Logger; use std::net::SocketAddr; use std::sync::Arc; @@ -99,14 +99,3 @@ impl Server { self.http_server.close().await } } - -/// Runs the OpenAPI generator, emitting the spec to stdout. -pub fn run_openapi() -> Result<(), String> { - http_api() - .openapi("Oxide Sled Agent API", "0.0.1") - .description("API for interacting with individual sleds") - .contact_url("https://oxide.computer") - .contact_email("api@oxide.computer") - .write(&mut std::io::stdout()) - .map_err(|e| e.to_string()) -} diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index e319b3fa15..b822ae2963 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -31,10 +31,7 @@ use crate::bootstrap::early_networking::{ use crate::bootstrap::BootstrapNetworking; use crate::config::SidecarRevision; use crate::metrics::MetricsRequestQueue; -use crate::params::{ - DendriteAsic, OmicronZoneConfigExt, OmicronZoneTypeExt, TimeSync, - ZoneBundleCause, ZoneBundleMetadata, -}; +use crate::params::{DendriteAsic, OmicronZoneConfigExt, OmicronZoneTypeExt}; use crate::profile::*; use crate::zone_bundle::BundleError; use crate::zone_bundle::ZoneBundler; @@ -96,6 +93,10 @@ use omicron_common::ledger::{self, Ledger, Ledgerable}; use omicron_ddm_admin_client::{Client as DdmAdminClient, DdmError}; use once_cell::sync::OnceCell; use rand::prelude::SliceRandom; +use sled_agent_types::{ + time_sync::TimeSync, + zone_bundle::{ZoneBundleCause, ZoneBundleMetadata}, +}; use sled_hardware::is_gimlet; use sled_hardware::underlay; use sled_hardware::SledMode; diff --git a/sled-agent/src/sim/collection.rs b/sled-agent/src/sim/collection.rs index ffb7327ce7..6057d03f70 100644 --- a/sled-agent/src/sim/collection.rs +++ b/sled-agent/src/sim/collection.rs @@ -410,7 +410,6 @@ impl SimCollection { #[cfg(test)] mod test { - use crate::params::{DiskStateRequested, InstanceStateRequested}; use crate::sim::collection::SimObject; use crate::sim::disk::SimDisk; use crate::sim::instance::SimInstance; @@ -427,6 +426,8 @@ mod test { use omicron_common::api::internal::nexus::VmmState; use omicron_test_utils::dev::test_setup_log; use omicron_uuid_kinds::PropolisUuid; + use sled_agent_types::disk::DiskStateRequested; + use sled_agent_types::instance::InstanceStateRequested; fn make_instance( logctx: &LogContext, diff --git a/sled-agent/src/sim/disk.rs b/sled-agent/src/sim/disk.rs index 284e424ebf..9661b1949b 100644 --- a/sled-agent/src/sim/disk.rs +++ b/sled-agent/src/sim/disk.rs @@ -5,7 +5,6 @@ //! Simulated sled agent implementation use crate::nexus::NexusClient; -use crate::params::DiskStateRequested; use crate::sim::simulatable::Simulatable; use async_trait::async_trait; use dropshot::ConfigLogging; @@ -20,6 +19,7 @@ use omicron_common::api::internal::nexus::ProducerKind; use oximeter_producer::LogConfig; use oximeter_producer::Server as ProducerServer; use propolis_client::types::DiskAttachmentState as PropolisDiskState; +use sled_agent_types::disk::DiskStateRequested; use std::net::{Ipv6Addr, SocketAddr}; use std::sync::Arc; use std::time::Duration; diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index d042e19814..c219a747ce 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -5,12 +5,6 @@ //! HTTP entrypoint functions for the sled agent's exposed API use super::collection::PokeMode; -use crate::bootstrap::params::AddSledRequest; -use crate::params::{ - DiskEnsureBody, InstanceEnsureBody, InstanceExternalIpBody, - InstancePutStateBody, InstancePutStateResponse, InstanceUnregisterResponse, - VpcFirewallRulesEnsureBody, -}; use dropshot::ApiDescription; use dropshot::HttpError; use dropshot::HttpResponseOk; @@ -19,20 +13,28 @@ use dropshot::Path; use dropshot::RequestContext; use dropshot::TypedBody; use dropshot::{endpoint, ApiDescriptionRegisterError}; -use illumos_utils::opte::params::VirtualNetworkInterfaceHost; use nexus_sled_agent_shared::inventory::{Inventory, OmicronZonesConfig}; use omicron_common::api::internal::nexus::DiskRuntimeState; use omicron_common::api::internal::nexus::SledInstanceState; use omicron_common::api::internal::nexus::UpdateArtifactId; +use omicron_common::api::internal::shared::VirtualNetworkInterfaceHost; use omicron_common::api::internal::shared::{ ResolvedVpcRouteSet, ResolvedVpcRouteState, SwitchPorts, }; +use omicron_common::disk::DisksManagementResult; use omicron_common::disk::OmicronPhysicalDisksConfig; use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use sled_agent_types::disk::DiskEnsureBody; use sled_agent_types::early_networking::EarlyNetworkConfig; -use sled_storage::resources::DisksManagementResult; +use sled_agent_types::firewall_rules::VpcFirewallRulesEnsureBody; +use sled_agent_types::instance::InstanceEnsureBody; +use sled_agent_types::instance::InstanceExternalIpBody; +use sled_agent_types::instance::InstancePutStateBody; +use sled_agent_types::instance::InstancePutStateResponse; +use sled_agent_types::instance::InstanceUnregisterResponse; +use sled_agent_types::sled::AddSledRequest; use std::sync::Arc; use uuid::Uuid; diff --git a/sled-agent/src/sim/instance.rs b/sled-agent/src/sim/instance.rs index 8ee0130262..33bc1c40c1 100644 --- a/sled-agent/src/sim/instance.rs +++ b/sled-agent/src/sim/instance.rs @@ -8,7 +8,6 @@ use super::simulatable::Simulatable; use crate::common::instance::{ObservedPropolisState, PublishedVmmState}; use crate::nexus::NexusClient; -use crate::params::InstanceStateRequested; use async_trait::async_trait; use chrono::Utc; use nexus_client; @@ -21,6 +20,7 @@ use propolis_client::types::{ InstanceMigrationStatus as PropolisMigrationStatus, InstanceState as PropolisInstanceState, InstanceStateMonitorResponse, }; +use sled_agent_types::instance::InstanceStateRequested; use std::collections::VecDeque; use std::sync::Arc; use std::sync::Mutex; diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 79d57a42e6..10536c8c80 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -11,18 +11,12 @@ use super::instance::{self, SimInstance}; use super::storage::CrucibleData; use super::storage::Storage; use crate::nexus::NexusClient; -use crate::params::{ - DiskStateRequested, InstanceExternalIpBody, InstanceHardware, - InstanceMetadata, InstancePutStateResponse, InstanceStateRequested, - InstanceUnregisterResponse, -}; use crate::sim::simulatable::Simulatable; use crate::updates::UpdateManager; use anyhow::bail; use anyhow::Context; use dropshot::{HttpError, HttpServer}; use futures::lock::Mutex; -use illumos_utils::opte::params::VirtualNetworkInterfaceHost; use nexus_sled_agent_shared::inventory::{ Inventory, InventoryDisk, InventoryZpool, OmicronZonesConfig, SledRole, }; @@ -38,9 +32,11 @@ use omicron_common::api::internal::nexus::{ use omicron_common::api::internal::shared::{ RackNetworkConfig, ResolvedVpcRoute, ResolvedVpcRouteSet, ResolvedVpcRouteState, RouterId, RouterKind, RouterVersion, + VirtualNetworkInterfaceHost, }; use omicron_common::disk::{ - DiskIdentity, DiskVariant, OmicronPhysicalDisksConfig, + DiskIdentity, DiskVariant, DisksManagementResult, + OmicronPhysicalDisksConfig, }; use omicron_uuid_kinds::{GenericUuid, InstanceUuid, PropolisUuid, ZpoolUuid}; use oxnet::Ipv6Net; @@ -48,10 +44,15 @@ use propolis_client::{ types::VolumeConstructionRequest, Client as PropolisClient, }; use propolis_mock_server::Context as PropolisContext; +use sled_agent_types::disk::DiskStateRequested; use sled_agent_types::early_networking::{ EarlyNetworkConfig, EarlyNetworkConfigBody, }; -use sled_storage::resources::DisksManagementResult; +use sled_agent_types::instance::{ + InstanceExternalIpBody, InstanceHardware, InstanceMetadata, + InstancePutStateResponse, InstanceStateRequested, + InstanceUnregisterResponse, +}; use slog::Logger; use std::collections::{HashMap, HashSet, VecDeque}; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr}; diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index 948ac96bcd..556388ce93 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -19,15 +19,15 @@ use dropshot::HandlerTaskMode; use dropshot::HttpError; use futures::lock::Mutex; use omicron_common::disk::DiskIdentity; +use omicron_common::disk::DiskManagementStatus; use omicron_common::disk::DiskVariant; +use omicron_common::disk::DisksManagementResult; use omicron_common::disk::OmicronPhysicalDisksConfig; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::InstanceUuid; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::ZpoolUuid; use propolis_client::types::VolumeConstructionRequest; -use sled_storage::resources::DiskManagementStatus; -use sled_storage::resources::DisksManagementResult; use slog::Logger; use std::collections::HashMap; use std::collections::HashSet; diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index d87df0d7c5..50e5611027 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -7,7 +7,6 @@ use crate::boot_disk_os_writer::BootDiskOsWriter; use crate::bootstrap::config::BOOTSTRAP_AGENT_RACK_INIT_PORT; use crate::bootstrap::early_networking::EarlyNetworkSetupError; -use crate::bootstrap::params::{BaseboardId, StartSledAgentRequest}; use crate::config::Config; use crate::instance_manager::InstanceManager; use crate::long_running_tasks::LongRunningTaskHandles; @@ -15,12 +14,7 @@ use crate::metrics::MetricsManager; use crate::nexus::{ NexusClient, NexusNotifierHandle, NexusNotifierInput, NexusNotifierTask, }; -use crate::params::{ - DiskStateRequested, InstanceExternalIpBody, InstanceHardware, - InstanceMetadata, InstancePutStateResponse, InstanceStateRequested, - InstanceUnregisterResponse, OmicronZoneTypeExt, TimeSync, VpcFirewallRule, - ZoneBundleMetadata, Zpool, -}; +use crate::params::OmicronZoneTypeExt; use crate::probe_manager::ProbeManager; use crate::services::{self, ServiceManager}; use crate::storage_monitor::StorageMonitorHandle; @@ -34,7 +28,6 @@ use derive_more::From; use dropshot::HttpError; use futures::stream::FuturesUnordered; use futures::StreamExt; -use illumos_utils::opte::params::VirtualNetworkInterfaceHost; use illumos_utils::opte::PortManager; use illumos_utils::zone::PROPOLIS_ZONE_PREFIX; use illumos_utils::zone::ZONE_PREFIX; @@ -49,8 +42,9 @@ use omicron_common::api::internal::nexus::{ SledInstanceState, VmmRuntimeState, }; use omicron_common::api::internal::shared::{ - HostPortConfig, RackNetworkConfig, ResolvedVpcRouteSet, - ResolvedVpcRouteState, SledIdentifiers, + HostPortConfig, RackNetworkConfig, ResolvedVpcFirewallRule, + ResolvedVpcRouteSet, ResolvedVpcRouteState, SledIdentifiers, + VirtualNetworkInterfaceHost, }; use omicron_common::api::{ internal::nexus::DiskRuntimeState, internal::nexus::InstanceRuntimeState, @@ -59,15 +53,27 @@ use omicron_common::api::{ use omicron_common::backoff::{ retry_notify, retry_policy_internal_service_aggressive, BackoffError, }; -use omicron_common::disk::OmicronPhysicalDisksConfig; +use omicron_common::disk::{DisksManagementResult, OmicronPhysicalDisksConfig}; use omicron_ddm_admin_client::Client as DdmAdminClient; use omicron_uuid_kinds::{InstanceUuid, PropolisUuid}; +use sled_agent_api::Zpool; +use sled_agent_types::disk::DiskStateRequested; use sled_agent_types::early_networking::EarlyNetworkConfig; +use sled_agent_types::instance::{ + InstanceExternalIpBody, InstanceHardware, InstanceMetadata, + InstancePutStateResponse, InstanceStateRequested, + InstanceUnregisterResponse, +}; +use sled_agent_types::sled::{BaseboardId, StartSledAgentRequest}; +use sled_agent_types::time_sync::TimeSync; +use sled_agent_types::zone_bundle::{ + BundleUtilization, CleanupContext, CleanupCount, CleanupPeriod, + PriorityOrder, StorageLimit, ZoneBundleMetadata, +}; use sled_hardware::{underlay, HardwareManager}; use sled_hardware_types::underlay::BootstrapInterface; use sled_hardware_types::Baseboard; use sled_storage::manager::StorageHandle; -use sled_storage::resources::DisksManagementResult; use slog::Logger; use std::collections::BTreeMap; use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; @@ -233,8 +239,9 @@ impl From for dropshot::HttpError { BundleError::NoSuchZone { .. } => { HttpError::for_not_found(None, inner.to_string()) } - BundleError::InvalidStorageLimit - | BundleError::InvalidCleanupPeriod => { + BundleError::StorageLimitCreate(_) + | BundleError::CleanupPeriodCreate(_) + | BundleError::PriorityOrderCreate(_) => { HttpError::for_bad_request(None, inner.to_string()) } BundleError::InstanceTerminating => { @@ -772,18 +779,16 @@ impl SledAgent { } /// Fetch the zone bundle cleanup context. - pub async fn zone_bundle_cleanup_context( - &self, - ) -> zone_bundle::CleanupContext { + pub async fn zone_bundle_cleanup_context(&self) -> CleanupContext { self.inner.zone_bundler.cleanup_context().await } /// Update the zone bundle cleanup context. pub async fn update_zone_bundle_cleanup_context( &self, - period: Option, - storage_limit: Option, - priority: Option, + period: Option, + storage_limit: Option, + priority: Option, ) -> Result<(), Error> { self.inner .zone_bundler @@ -795,15 +800,14 @@ impl SledAgent { /// Fetch the current utilization of the relevant datasets for zone bundles. pub async fn zone_bundle_utilization( &self, - ) -> Result, Error> - { + ) -> Result, Error> { self.inner.zone_bundler.utilization().await.map_err(Error::from) } /// Trigger an explicit request to cleanup old zone bundles. pub async fn zone_bundle_cleanup( &self, - ) -> Result, Error> { + ) -> Result, Error> { self.inner.zone_bundler.cleanup().await.map_err(Error::from) } @@ -1098,7 +1102,7 @@ impl SledAgent { pub async fn firewall_rules_ensure( &self, vpc_vni: Vni, - rules: &[VpcFirewallRule], + rules: &[ResolvedVpcFirewallRule], ) -> Result<(), Error> { self.inner .port_manager diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs index 4062016597..46cee1c415 100644 --- a/sled-agent/src/zone_bundle.rs +++ b/sled-agent/src/zone_bundle.rs @@ -11,8 +11,6 @@ use anyhow::Context; use camino::FromPathBufError; use camino::Utf8Path; use camino::Utf8PathBuf; -use chrono::DateTime; -use chrono::Utc; use flate2::bufread::GzDecoder; use illumos_utils::running_zone::is_oxide_smf_log_file; use illumos_utils::running_zone::RunningZone; @@ -29,18 +27,12 @@ use illumos_utils::zfs::Snapshot; use illumos_utils::zfs::Zfs; use illumos_utils::zfs::ZFS; use illumos_utils::zone::AdmError; -use schemars::JsonSchema; -use serde::Deserialize; -use serde::Serialize; +use sled_agent_types::zone_bundle::*; use sled_storage::dataset::U2_DEBUG_DATASET; use sled_storage::manager::StorageHandle; use slog::Logger; -use std::cmp::Ord; -use std::cmp::Ordering; -use std::cmp::PartialOrd; use std::collections::BTreeMap; use std::collections::BTreeSet; -use std::collections::HashSet; use std::io::Cursor; use std::sync::Arc; use std::time::Duration; @@ -55,104 +47,6 @@ use tokio::time::sleep; use tokio::time::Instant; use uuid::Uuid; -/// An identifier for a zone bundle. -#[derive( - Clone, - Debug, - Deserialize, - Eq, - Hash, - JsonSchema, - Ord, - PartialEq, - PartialOrd, - Serialize, -)] -pub struct ZoneBundleId { - /// The name of the zone this bundle is derived from. - pub zone_name: String, - /// The ID for this bundle itself. - pub bundle_id: Uuid, -} - -/// The reason or cause for a zone bundle, i.e., why it was created. -// -// NOTE: The ordering of the enum variants is important, and should not be -// changed without careful consideration. -// -// The ordering is used when deciding which bundles to remove automatically. In -// addition to time, the cause is used to sort bundles, so changing the variant -// order will change that priority. -#[derive( - Clone, - Copy, - Debug, - Default, - Deserialize, - Eq, - Hash, - JsonSchema, - Ord, - PartialEq, - PartialOrd, - Serialize, -)] -#[serde(rename_all = "snake_case")] -#[non_exhaustive] -pub enum ZoneBundleCause { - /// Some other, unspecified reason. - #[default] - Other, - /// A zone bundle taken when a sled agent finds a zone that it does not - /// expect to be running. - UnexpectedZone, - /// An instance zone was terminated. - TerminatedInstance, - /// Generated in response to an explicit request to the sled agent. - ExplicitRequest, -} - -/// Metadata about a zone bundle. -#[derive( - Clone, - Debug, - Deserialize, - Eq, - Hash, - JsonSchema, - Ord, - PartialEq, - PartialOrd, - Serialize, -)] -pub struct ZoneBundleMetadata { - /// Identifier for this zone bundle - pub id: ZoneBundleId, - /// The time at which this zone bundle was created. - pub time_created: DateTime, - /// A version number for this zone bundle. - pub version: u8, - /// The reason or cause a bundle was created. - pub cause: ZoneBundleCause, -} - -impl ZoneBundleMetadata { - const VERSION: u8 = 0; - - /// Create a new set of metadata for the provided zone. - pub(crate) fn new(zone_name: &str, cause: ZoneBundleCause) -> Self { - Self { - id: ZoneBundleId { - zone_name: zone_name.to_string(), - bundle_id: Uuid::new_v4(), - }, - time_created: Utc::now(), - version: Self::VERSION, - cause, - } - } -} - // The name of the snapshot created from the zone root filesystem. const ZONE_ROOT_SNAPSHOT_NAME: &'static str = "zone-root"; @@ -650,20 +544,14 @@ pub enum BundleError { #[error("Zone '{name}' cannot currently be bundled")] Unavailable { name: String }, - #[error("Storage limit must be expressed as a percentage in (0, 100]")] - InvalidStorageLimit, + #[error(transparent)] + StorageLimitCreate(#[from] StorageLimitCreateError), - #[error( - "Cleanup period must be between {min:?} and {max:?}, inclusive", - min = CleanupPeriod::MIN, - max = CleanupPeriod::MAX, - )] - InvalidCleanupPeriod, + #[error(transparent)] + CleanupPeriodCreate(#[from] CleanupPeriodCreateError), - #[error( - "Invalid priority ordering. Each element must appear exactly once." - )] - InvalidPriorityOrder, + #[error(transparent)] + PriorityOrderCreate(#[from] PriorityOrderCreateError), #[error("Cleanup failed")] Cleanup(#[source] anyhow::Error), @@ -1484,29 +1372,6 @@ async fn get_zone_bundle_paths( Ok(out) } -/// The portion of a debug dataset used for zone bundles. -#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] -pub struct BundleUtilization { - /// The total dataset quota, in bytes. - pub dataset_quota: u64, - /// The total number of bytes available for zone bundles. - /// - /// This is `dataset_quota` multiplied by the context's storage limit. - pub bytes_available: u64, - /// Total bundle usage, in bytes. - pub bytes_used: u64, -} - -#[derive(Clone, Debug, PartialEq)] -struct ZoneBundleInfo { - // The raw metadata for the bundle - metadata: ZoneBundleMetadata, - // The full path to the bundle - path: Utf8PathBuf, - // The number of bytes consumed on disk by the bundle - bytes: u64, -} - // Enumerate all zone bundles under the provided directory. async fn enumerate_zone_bundles( log: &Logger, @@ -1577,15 +1442,6 @@ async fn enumerate_zone_bundles( Ok(out) } -/// The count of bundles / bytes removed during a cleanup operation. -#[derive(Clone, Copy, Debug, Default, Deserialize, JsonSchema, Serialize)] -pub struct CleanupCount { - /// The number of bundles removed. - bundles: u64, - /// The number of bytes removed. - bytes: u64, -} - // Run a cleanup, removing old bundles according to the strategy. // // Return the number of bundles removed and the new usage. @@ -1687,19 +1543,6 @@ async fn compute_bundle_utilization( Ok(out) } -/// Context provided for the zone bundle cleanup task. -#[derive( - Clone, Copy, Debug, Default, Deserialize, JsonSchema, PartialEq, Serialize, -)] -pub struct CleanupContext { - /// The period on which automatic checks and cleanup is performed. - pub period: CleanupPeriod, - /// The limit on the dataset quota available for zone bundles. - pub storage_limit: StorageLimit, - /// The priority ordering for keeping old bundles. - pub priority: PriorityOrder, -} - // Return the number of bytes occupied by the provided directory. // // This returns an error if: @@ -1814,258 +1657,10 @@ async fn zfs_quota(path: &Utf8PathBuf) -> Result { } } -/// The limit on space allowed for zone bundles, as a percentage of the overall -/// dataset's quota. -#[derive( - Clone, - Copy, - Debug, - Deserialize, - JsonSchema, - PartialEq, - PartialOrd, - Serialize, -)] -pub struct StorageLimit(u8); - -impl std::fmt::Display for StorageLimit { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{}%", self.as_u8()) - } -} - -impl Default for StorageLimit { - fn default() -> Self { - StorageLimit(25) - } -} - -impl StorageLimit { - /// Minimum percentage of dataset quota supported. - pub const MIN: Self = Self(0); - - /// Maximum percentage of dataset quota supported. - pub const MAX: Self = Self(50); - - /// Construct a new limit allowed for zone bundles. - /// - /// This should be expressed as a percentage, in the range (Self::MIN, - /// Self::MAX]. - pub const fn new(percentage: u8) -> Result { - if percentage > Self::MIN.0 && percentage <= Self::MAX.0 { - Ok(Self(percentage)) - } else { - Err(BundleError::InvalidStorageLimit) - } - } - - /// Return the contained quota percentage. - pub const fn as_u8(&self) -> u8 { - self.0 - } - - // Compute the number of bytes available from a dataset quota, in bytes. - const fn bytes_available(&self, dataset_quota: u64) -> u64 { - (dataset_quota * self.as_u8() as u64) / 100 - } -} - -/// A dimension along with bundles can be sorted, to determine priority. -#[derive( - Clone, - Copy, - Debug, - Deserialize, - Eq, - Hash, - JsonSchema, - Serialize, - Ord, - PartialEq, - PartialOrd, -)] -#[serde(rename_all = "snake_case")] -pub enum PriorityDimension { - /// Sorting by time, with older bundles with lower priority. - Time, - /// Sorting by the cause for creating the bundle. - Cause, - // TODO-completeness: Support zone or zone type (e.g., service vs instance)? -} - -/// The priority order for bundles during cleanup. -/// -/// Bundles are sorted along the dimensions in [`PriorityDimension`], with each -/// dimension appearing exactly once. During cleanup, lesser-priority bundles -/// are pruned first, to maintain the dataset quota. Note that bundles are -/// sorted by each dimension in the order in which they appear, with each -/// dimension having higher priority than the next. -#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] -pub struct PriorityOrder([PriorityDimension; PriorityOrder::EXPECTED_SIZE]); - -impl std::ops::Deref for PriorityOrder { - type Target = [PriorityDimension; PriorityOrder::EXPECTED_SIZE]; - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl Default for PriorityOrder { - fn default() -> Self { - Self::DEFAULT - } -} - -impl PriorityOrder { - // NOTE: Must match the number of variants in `PriorityDimension`. - const EXPECTED_SIZE: usize = 2; - const DEFAULT: Self = - Self([PriorityDimension::Cause, PriorityDimension::Time]); - - /// Construct a new priority order. - /// - /// This requires that each dimension appear exactly once. - pub fn new(dims: &[PriorityDimension]) -> Result { - if dims.len() != Self::EXPECTED_SIZE { - return Err(BundleError::InvalidPriorityOrder); - } - let mut seen = HashSet::new(); - for dim in dims.iter() { - if !seen.insert(dim) { - return Err(BundleError::InvalidPriorityOrder); - } - } - Ok(Self(dims.try_into().unwrap())) - } - - // Order zone bundle info according to the contained priority. - // - // We sort the info by each dimension, in the order in which it appears. - // That means earlier dimensions have higher priority than later ones. - fn compare_bundles( - &self, - lhs: &ZoneBundleInfo, - rhs: &ZoneBundleInfo, - ) -> Ordering { - for dim in self.0.iter() { - let ord = match dim { - PriorityDimension::Cause => { - lhs.metadata.cause.cmp(&rhs.metadata.cause) - } - PriorityDimension::Time => { - lhs.metadata.time_created.cmp(&rhs.metadata.time_created) - } - }; - if matches!(ord, Ordering::Equal) { - continue; - } - return ord; - } - Ordering::Equal - } -} - -/// A period on which bundles are automatically cleaned up. -#[derive( - Clone, Copy, Deserialize, JsonSchema, PartialEq, PartialOrd, Serialize, -)] -pub struct CleanupPeriod(Duration); - -impl Default for CleanupPeriod { - fn default() -> Self { - Self(Duration::from_secs(600)) - } -} - -impl CleanupPeriod { - /// The minimum supported cleanup period. - pub const MIN: Self = Self(Duration::from_secs(60)); - - /// The maximum supported cleanup period. - pub const MAX: Self = Self(Duration::from_secs(60 * 60 * 24)); - - /// Construct a new cleanup period, checking that it's valid. - pub fn new(duration: Duration) -> Result { - if duration >= Self::MIN.as_duration() - && duration <= Self::MAX.as_duration() - { - Ok(Self(duration)) - } else { - Err(BundleError::InvalidCleanupPeriod) - } - } - - /// Return the period as a duration. - pub const fn as_duration(&self) -> Duration { - self.0 - } -} - -impl TryFrom for CleanupPeriod { - type Error = BundleError; - - fn try_from(duration: Duration) -> Result { - Self::new(duration) - } -} - -impl std::fmt::Debug for CleanupPeriod { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - self.0.fmt(f) - } -} - #[cfg(test)] mod tests { use super::disk_usage; - use super::PriorityDimension; - use super::PriorityOrder; - use super::StorageLimit; use super::Utf8PathBuf; - use super::ZoneBundleCause; - use super::ZoneBundleId; - use super::ZoneBundleInfo; - use super::ZoneBundleMetadata; - use chrono::TimeZone; - use chrono::Utc; - - #[test] - fn test_sort_zone_bundle_cause() { - use ZoneBundleCause::*; - let mut original = - [ExplicitRequest, Other, TerminatedInstance, UnexpectedZone]; - let expected = - [Other, UnexpectedZone, TerminatedInstance, ExplicitRequest]; - original.sort(); - assert_eq!(original, expected); - } - - #[test] - fn test_priority_dimension() { - assert!(PriorityOrder::new(&[]).is_err()); - assert!(PriorityOrder::new(&[PriorityDimension::Cause]).is_err()); - assert!(PriorityOrder::new(&[ - PriorityDimension::Cause, - PriorityDimension::Cause - ]) - .is_err()); - assert!(PriorityOrder::new(&[ - PriorityDimension::Cause, - PriorityDimension::Cause, - PriorityDimension::Time - ]) - .is_err()); - - assert!(PriorityOrder::new(&[ - PriorityDimension::Cause, - PriorityDimension::Time - ]) - .is_ok()); - assert_eq!( - PriorityOrder::new(&PriorityOrder::default().0).unwrap(), - PriorityOrder::default() - ); - } #[tokio::test] async fn test_disk_usage() { @@ -2081,95 +1676,6 @@ mod tests { let path = Utf8PathBuf::from("/some/nonexistent/path"); assert!(disk_usage(&path).await.is_err()); } - - #[test] - fn test_storage_limit_bytes_available() { - let pct = StorageLimit(1); - assert_eq!(pct.bytes_available(100), 1); - assert_eq!(pct.bytes_available(1000), 10); - - let pct = StorageLimit(100); - assert_eq!(pct.bytes_available(100), 100); - assert_eq!(pct.bytes_available(1000), 1000); - - let pct = StorageLimit(100); - assert_eq!(pct.bytes_available(99), 99); - - let pct = StorageLimit(99); - assert_eq!(pct.bytes_available(1), 0); - - // Test non-power of 10. - let pct = StorageLimit(25); - assert_eq!(pct.bytes_available(32768), 8192); - } - - #[test] - fn test_compare_bundles() { - use PriorityDimension::*; - let time_first = PriorityOrder([Time, Cause]); - let cause_first = PriorityOrder([Cause, Time]); - - fn make_info( - year: i32, - month: u32, - day: u32, - cause: ZoneBundleCause, - ) -> ZoneBundleInfo { - ZoneBundleInfo { - metadata: ZoneBundleMetadata { - id: ZoneBundleId { - zone_name: String::from("oxz_whatever"), - bundle_id: uuid::Uuid::new_v4(), - }, - time_created: Utc - .with_ymd_and_hms(year, month, day, 0, 0, 0) - .single() - .unwrap(), - cause, - version: 0, - }, - path: Utf8PathBuf::from("/some/path"), - bytes: 0, - } - } - - let info = [ - make_info(2020, 1, 2, ZoneBundleCause::TerminatedInstance), - make_info(2020, 1, 2, ZoneBundleCause::ExplicitRequest), - make_info(2020, 1, 1, ZoneBundleCause::TerminatedInstance), - make_info(2020, 1, 1, ZoneBundleCause::ExplicitRequest), - ]; - - let mut sorted = info.clone(); - sorted.sort_by(|lhs, rhs| time_first.compare_bundles(lhs, rhs)); - // Low -> high priority - // [old/terminated, old/explicit, new/terminated, new/explicit] - let expected = [ - info[2].clone(), - info[3].clone(), - info[0].clone(), - info[1].clone(), - ]; - assert_eq!( - sorted, expected, - "sorting zone bundles by time-then-cause failed" - ); - - let mut sorted = info.clone(); - sorted.sort_by(|lhs, rhs| cause_first.compare_bundles(lhs, rhs)); - // Low -> high priority - // [old/terminated, new/terminated, old/explicit, new/explicit] - let expected = [ - info[2].clone(), - info[0].clone(), - info[3].clone(), - info[1].clone(), - ]; - assert_eq!( - sorted, expected, - "sorting zone bundles by cause-then-time failed" - ); - } } #[cfg(all(target_os = "illumos", test))] @@ -2347,7 +1853,10 @@ mod illumos_tests { let new_context = CleanupContext { period: CleanupPeriod::new(ctx.context.period.as_duration() / 2) .unwrap(), - storage_limit: StorageLimit(ctx.context.storage_limit.as_u8() / 2), + storage_limit: StorageLimit::new( + ctx.context.storage_limit.as_u8() / 2, + ) + .unwrap(), priority: PriorityOrder::new( &ctx.context.priority.iter().copied().rev().collect::>(), ) @@ -2529,7 +2038,11 @@ mod illumos_tests { // First, reduce the storage limit, so that we only need to add a few // bundles. ctx.bundler - .update_cleanup_context(None, Some(StorageLimit(2)), None) + .update_cleanup_context( + None, + Some(StorageLimit::new(2).unwrap()), + None, + ) .await .context("failed to update cleanup context")?; diff --git a/sled-agent/tests/integration_tests/commands.rs b/sled-agent/tests/integration_tests/commands.rs index 26c82e488e..8a5b355770 100644 --- a/sled-agent/tests/integration_tests/commands.rs +++ b/sled-agent/tests/integration_tests/commands.rs @@ -13,9 +13,7 @@ use expectorate::assert_contents; use omicron_test_utils::dev::test_cmds::assert_exit_code; use omicron_test_utils::dev::test_cmds::path_to_executable; use omicron_test_utils::dev::test_cmds::run_command; -use omicron_test_utils::dev::test_cmds::EXIT_SUCCESS; use omicron_test_utils::dev::test_cmds::EXIT_USAGE; -use openapiv3::OpenAPI; use subprocess::Exec; /// name of the "sled-agent-sim" executable @@ -56,26 +54,3 @@ fn test_sled_agent_no_args() { assert_contents("tests/output/cmd-sled-agent-noargs-stdout", &stdout_text); assert_contents("tests/output/cmd-sled-agent-noargs-stderr", &stderr_text); } - -#[test] -fn test_sled_agent_openapi_sled() { - let exec = Exec::cmd(path_to_sled_agent()).arg("openapi").arg("sled"); - let (exit_status, stdout_text, stderr_text) = run_command(exec); - assert_exit_code(exit_status, EXIT_SUCCESS, &stderr_text); - assert_contents( - "tests/output/cmd-sled-agent-openapi-sled-stderr", - &stderr_text, - ); - - let spec: OpenAPI = serde_json::from_str(&stdout_text) - .expect("stdout was not valid OpenAPI"); - - // Check for lint errors. - let errors = openapi_lint::validate(&spec); - assert!(errors.is_empty(), "{}", errors.join("\n\n")); - - // Confirm that the output hasn't changed. It's expected that we'll change - // this file as the API evolves, but pay attention to the diffs to ensure - // that the changes match your expectations. - assert_contents("../openapi/sled-agent.json", &stdout_text); -} diff --git a/sled-agent/tests/output/cmd-sled-agent-noargs-stderr b/sled-agent/tests/output/cmd-sled-agent-noargs-stderr index ee397c0ef7..409d1ec0d8 100644 --- a/sled-agent/tests/output/cmd-sled-agent-noargs-stderr +++ b/sled-agent/tests/output/cmd-sled-agent-noargs-stderr @@ -3,9 +3,8 @@ See README.adoc for more information Usage: sled-agent Commands: - openapi Generates the OpenAPI specification - run Runs the Sled Agent server - help Print this message or the help of the given subcommand(s) + run Runs the Sled Agent server + help Print this message or the help of the given subcommand(s) Options: -h, --help Print help diff --git a/sled-agent/tests/output/cmd-sled-agent-openapi-bootstrap-stderr b/sled-agent/tests/output/cmd-sled-agent-openapi-bootstrap-stderr deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/sled-agent/tests/output/cmd-sled-agent-openapi-sled-stderr b/sled-agent/tests/output/cmd-sled-agent-openapi-sled-stderr deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/sled-agent/types/Cargo.toml b/sled-agent/types/Cargo.toml index a9ed8fcb22..e01d40db28 100644 --- a/sled-agent/types/Cargo.toml +++ b/sled-agent/types/Cargo.toml @@ -9,8 +9,10 @@ workspace = true [dependencies] anyhow.workspace = true +async-trait.workspace = true bootstore.workspace = true camino.workspace = true +chrono.workspace = true nexus-sled-agent-shared.workspace = true # Note: we're trying to avoid a dependency from sled-agent-types to nexus-types # because the correct direction of dependency is unclear. If there are types @@ -19,11 +21,13 @@ omicron-common.workspace = true omicron-uuid-kinds.workspace = true omicron-workspace-hack.workspace = true oxnet.workspace = true +propolis-client.workspace = true schemars.workspace = true serde.workspace = true +serde_human_bytes.workspace = true serde_json.workspace = true +sha3.workspace = true sled-hardware-types.workspace = true -sled-storage.workspace = true slog.workspace = true thiserror.workspace = true toml.workspace = true diff --git a/sled-agent/types/src/boot_disk.rs b/sled-agent/types/src/boot_disk.rs new file mode 100644 index 0000000000..30129d6c7e --- /dev/null +++ b/sled-agent/types/src/boot_disk.rs @@ -0,0 +1,62 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Common types related to boot disks. + +use omicron_common::disk::M2Slot; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] +pub struct BootDiskPathParams { + pub boot_disk: M2Slot, +} + +#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] +pub struct BootDiskUpdatePathParams { + pub boot_disk: M2Slot, + pub update_id: Uuid, +} + +#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] +pub struct BootDiskWriteStartQueryParams { + pub update_id: Uuid, + // TODO do we already have sha2-256 hashes of the OS images, and if so + // should we use that instead? Another option is to use the external API + // `Digest` type, although it predates `serde_human_bytes` so just stores + // the hash as a `String`. + #[serde(with = "serde_human_bytes::hex_array")] + #[schemars(schema_with = "omicron_common::hex_schema::<32>")] + pub sha3_256_digest: [u8; 32], +} + +/// Current progress of an OS image being written to disk. +#[derive( + Debug, Clone, Copy, PartialEq, Eq, Deserialize, JsonSchema, Serialize, +)] +#[serde(tag = "state", rename_all = "snake_case")] +pub enum BootDiskOsWriteProgress { + /// The image is still being uploaded. + ReceivingUploadedImage { bytes_received: usize }, + /// The image is being written to disk. + WritingImageToDisk { bytes_written: usize }, + /// The image is being read back from disk for validation. + ValidatingWrittenImage { bytes_read: usize }, +} + +/// Status of an update to a boot disk OS. +#[derive(Debug, Clone, Deserialize, JsonSchema, Serialize)] +#[serde(tag = "status", rename_all = "snake_case")] +pub enum BootDiskOsWriteStatus { + /// No update has been started for this disk, or any previously-started + /// update has completed and had its status cleared. + NoUpdateStarted, + /// An update is currently running. + InProgress { update_id: Uuid, progress: BootDiskOsWriteProgress }, + /// The most recent update completed successfully. + Complete { update_id: Uuid }, + /// The most recent update failed. + Failed { update_id: Uuid, message: String }, +} diff --git a/sled-agent/types/src/bootstore.rs b/sled-agent/types/src/bootstore.rs new file mode 100644 index 0000000000..9c9e8257a4 --- /dev/null +++ b/sled-agent/types/src/bootstore.rs @@ -0,0 +1,51 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::{collections::BTreeSet, net::SocketAddrV6}; + +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use sled_hardware_types::Baseboard; + +#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] +pub struct BootstoreStatus { + pub fsm_ledger_generation: u64, + pub network_config_ledger_generation: Option, + pub fsm_state: String, + pub peers: BTreeSet, + pub established_connections: Vec, + pub accepted_connections: BTreeSet, + pub negotiating_connections: BTreeSet, +} + +impl From for BootstoreStatus { + fn from(value: bootstore::schemes::v0::Status) -> Self { + BootstoreStatus { + fsm_ledger_generation: value.fsm_ledger_generation, + network_config_ledger_generation: value + .network_config_ledger_generation, + fsm_state: value.fsm_state.to_string(), + peers: value.peers, + established_connections: value + .connections + .into_iter() + .map(EstablishedConnection::from) + .collect(), + accepted_connections: value.accepted_connections, + negotiating_connections: value.negotiating_connections, + } + } +} + +#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] +pub struct EstablishedConnection { + pub baseboard: Baseboard, + pub addr: SocketAddrV6, +} + +impl From<(Baseboard, SocketAddrV6)> for EstablishedConnection { + fn from(value: (Baseboard, SocketAddrV6)) -> Self { + EstablishedConnection { baseboard: value.0, addr: value.1 } + } +} diff --git a/sled-agent/types/src/disk.rs b/sled-agent/types/src/disk.rs new file mode 100644 index 0000000000..332f1a0c5c --- /dev/null +++ b/sled-agent/types/src/disk.rs @@ -0,0 +1,41 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use omicron_common::api::internal::nexus::DiskRuntimeState; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// Sent from to a sled agent to establish the runtime state of a Disk +#[derive(Serialize, Deserialize, JsonSchema)] +pub struct DiskEnsureBody { + /// Last runtime state of the Disk known to Nexus (used if the agent has + /// never seen this Disk before). + pub initial_runtime: DiskRuntimeState, + /// requested runtime state of the Disk + pub target: DiskStateRequested, +} + +/// Used to request a Disk state change +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize, JsonSchema)] +#[serde(rename_all = "lowercase", tag = "state", content = "instance")] +pub enum DiskStateRequested { + Detached, + Attached(Uuid), + Destroyed, + Faulted, +} + +impl DiskStateRequested { + /// Returns whether the requested state is attached to an Instance or not. + pub fn is_attached(&self) -> bool { + match self { + DiskStateRequested::Detached => false, + DiskStateRequested::Destroyed => false, + DiskStateRequested::Faulted => false, + + DiskStateRequested::Attached(_) => true, + } + } +} diff --git a/sled-agent/types/src/firewall_rules.rs b/sled-agent/types/src/firewall_rules.rs new file mode 100644 index 0000000000..d7cb22f976 --- /dev/null +++ b/sled-agent/types/src/firewall_rules.rs @@ -0,0 +1,16 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use omicron_common::api::{ + external, internal::shared::ResolvedVpcFirewallRule, +}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +/// Update firewall rules for a VPC +#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] +pub struct VpcFirewallRulesEnsureBody { + pub vni: external::Vni, + pub rules: Vec, +} diff --git a/sled-agent/types/src/instance.rs b/sled-agent/types/src/instance.rs new file mode 100644 index 0000000000..0753e273dc --- /dev/null +++ b/sled-agent/types/src/instance.rs @@ -0,0 +1,172 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Common instance-related types. + +use std::{ + fmt, + net::{IpAddr, SocketAddr}, +}; + +use omicron_common::api::internal::{ + nexus::{ + InstanceProperties, InstanceRuntimeState, SledInstanceState, + VmmRuntimeState, + }, + shared::{ + DhcpConfig, NetworkInterface, ResolvedVpcFirewallRule, SourceNatConfig, + }, +}; +use omicron_uuid_kinds::PropolisUuid; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// The body of a request to ensure that a instance and VMM are known to a sled +/// agent. +#[derive(Serialize, Deserialize, JsonSchema)] +pub struct InstanceEnsureBody { + /// A description of the instance's virtual hardware and the initial runtime + /// state this sled agent should store for this incarnation of the instance. + pub hardware: InstanceHardware, + + /// The instance runtime state for the instance being registered. + pub instance_runtime: InstanceRuntimeState, + + /// The initial VMM runtime state for the VMM being registered. + pub vmm_runtime: VmmRuntimeState, + + /// The ID of the VMM being registered. This may not be the active VMM ID in + /// the instance runtime state (e.g. if the new VMM is going to be a + /// migration target). + pub propolis_id: PropolisUuid, + + /// The address at which this VMM should serve a Propolis server API. + pub propolis_addr: SocketAddr, + + /// Metadata used to track instance statistics. + pub metadata: InstanceMetadata, +} + +/// Describes the instance hardware. +#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] +pub struct InstanceHardware { + pub properties: InstanceProperties, + pub nics: Vec, + pub source_nat: SourceNatConfig, + /// Zero or more external IP addresses (either floating or ephemeral), + /// provided to an instance to allow inbound connectivity. + pub ephemeral_ip: Option, + pub floating_ips: Vec, + pub firewall_rules: Vec, + pub dhcp_config: DhcpConfig, + // TODO: replace `propolis_client::*` with locally-modeled request type + pub disks: Vec, + pub cloud_init_bytes: Option, +} + +/// Metadata used to track statistics about an instance. +/// +// NOTE: The instance ID is not here, since it's already provided in other +// pieces of the instance-related requests. It is pulled from there when +// publishing metrics for the instance. +#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] +pub struct InstanceMetadata { + pub silo_id: Uuid, + pub project_id: Uuid, +} + +/// The body of a request to move a previously-ensured instance into a specific +/// runtime state. +#[derive(Serialize, Deserialize, JsonSchema)] +pub struct InstancePutStateBody { + /// The state into which the instance should be driven. + pub state: InstanceStateRequested, +} + +/// The response sent from a request to move an instance into a specific runtime +/// state. +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct InstancePutStateResponse { + /// The current runtime state of the instance after handling the request to + /// change its state. If the instance's state did not change, this field is + /// `None`. + pub updated_runtime: Option, +} + +/// Requestable running state of an Instance. +/// +/// A subset of [`omicron_common::api::external::InstanceState`]. +#[derive(Copy, Clone, Debug, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case", tag = "type", content = "value")] +pub enum InstanceStateRequested { + /// Run this instance by migrating in from a previous running incarnation of + /// the instance. + MigrationTarget(InstanceMigrationTargetParams), + /// Start the instance if it is not already running. + Running, + /// Stop the instance. + Stopped, + /// Immediately reset the instance, as though it had stopped and immediately + /// began to run again. + Reboot, +} + +impl fmt::Display for InstanceStateRequested { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.label()) + } +} + +impl InstanceStateRequested { + fn label(&self) -> &str { + match self { + InstanceStateRequested::MigrationTarget(_) => "migrating in", + InstanceStateRequested::Running => "running", + InstanceStateRequested::Stopped => "stopped", + InstanceStateRequested::Reboot => "reboot", + } + } + + /// Returns true if the state represents a stopped Instance. + pub fn is_stopped(&self) -> bool { + match self { + InstanceStateRequested::MigrationTarget(_) => false, + InstanceStateRequested::Running => false, + InstanceStateRequested::Stopped => true, + InstanceStateRequested::Reboot => false, + } + } +} + +/// The response sent from a request to unregister an instance. +#[derive(Serialize, Deserialize, JsonSchema)] +pub struct InstanceUnregisterResponse { + /// The current state of the instance after handling the request to + /// unregister it. If the instance's state did not change, this field is + /// `None`. + pub updated_runtime: Option, +} + +/// Parameters used when directing Propolis to initialize itself via live +/// migration. +#[derive(Copy, Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct InstanceMigrationTargetParams { + /// The Propolis ID of the migration source. + pub src_propolis_id: Uuid, + + /// The address of the Propolis server that will serve as the migration + /// source. + pub src_propolis_addr: SocketAddr, +} + +/// Used to dynamically update external IPs attached to an instance. +#[derive( + Copy, Clone, Debug, Eq, PartialEq, Hash, Deserialize, JsonSchema, Serialize, +)] +#[serde(rename_all = "snake_case", tag = "type", content = "value")] +pub enum InstanceExternalIpBody { + Ephemeral(IpAddr), + Floating(IpAddr), +} diff --git a/sled-agent/types/src/lib.rs b/sled-agent/types/src/lib.rs index 12e8f049f9..47e1535ade 100644 --- a/sled-agent/types/src/lib.rs +++ b/sled-agent/types/src/lib.rs @@ -4,6 +4,14 @@ //! Common types for sled-agent. +pub mod boot_disk; +pub mod bootstore; +pub mod disk; pub mod early_networking; +pub mod firewall_rules; +pub mod instance; pub mod rack_init; pub mod rack_ops; +pub mod sled; +pub mod time_sync; +pub mod zone_bundle; diff --git a/sled-agent/types/src/sled.rs b/sled-agent/types/src/sled.rs new file mode 100644 index 0000000000..37a064bdc9 --- /dev/null +++ b/sled-agent/types/src/sled.rs @@ -0,0 +1,219 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Types related to operating on sleds. + +use std::net::{IpAddr, Ipv6Addr, SocketAddrV6}; + +use async_trait::async_trait; +use omicron_common::{ + address::{self, Ipv6Subnet, SLED_PREFIX}, + ledger::Ledgerable, +}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use sha3::{Digest, Sha3_256}; +use uuid::Uuid; + +/// A representation of a Baseboard ID as used in the inventory subsystem +/// This type is essentially the same as a `Baseboard` except it doesn't have a +/// revision or HW type (Gimlet, PC, Unknown). +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)] +pub struct BaseboardId { + /// Oxide Part Number + pub part_number: String, + /// Serial number (unique for a given part number) + pub serial_number: String, +} + +/// A request to Add a given sled after rack initialization has occurred +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)] +pub struct AddSledRequest { + pub sled_id: BaseboardId, + pub start_request: StartSledAgentRequest, +} + +/// Configuration information for launching a Sled Agent. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)] +pub struct StartSledAgentRequest { + /// The current generation number of data as stored in CRDB. + /// + /// The initial generation is set during RSS time and then only mutated + /// by Nexus. For now, we don't actually anticipate mutating this data, + /// but we leave open the possiblity. + pub generation: u64, + + // Which version of the data structure do we have. This is to help with + // deserialization and conversion in future updates. + pub schema_version: u32, + + // The actual configuration details + pub body: StartSledAgentRequestBody, +} + +impl StartSledAgentRequest { + pub fn sled_address(&self) -> SocketAddrV6 { + address::get_sled_address(self.body.subnet) + } + + pub fn switch_zone_ip(&self) -> Ipv6Addr { + address::get_switch_zone_address(self.body.subnet) + } + + /// Compute the sha3_256 digest of `self.rack_id` to use as a `salt` + /// for disk encryption. We don't want to include other values that are + /// consistent across sleds as it would prevent us from moving drives + /// between sleds. + pub fn hash_rack_id(&self) -> [u8; 32] { + // We know the unwrap succeeds as a Sha3_256 digest is 32 bytes + Sha3_256::digest(self.body.rack_id.as_bytes()) + .as_slice() + .try_into() + .unwrap() + } +} + +#[async_trait] +impl Ledgerable for StartSledAgentRequest { + fn is_newer_than(&self, other: &Self) -> bool { + self.generation > other.generation + } + + fn generation_bump(&mut self) { + // DO NOTHING! + // + // Generation bumps must only ever come from nexus and will be encoded + // in the struct itself + } + + // Attempt to deserialize the v1 or v0 version and return + // the v1 version. + fn deserialize( + s: &str, + ) -> Result { + // Try to deserialize the latest version of the data structure (v1). If + // that succeeds we are done. + if let Ok(val) = serde_json::from_str::(s) { + return Ok(val); + } + + // We don't have the latest version. Try to deserialize v0 and then + // convert it to the latest version. + let v0 = serde_json::from_str::(s)?.request; + Ok(v0.into()) + } +} + +/// This is the actual app level data of `StartSledAgentRequest` +/// +/// We nest it below the "header" of `generation` and `schema_version` so that +/// we can perform partial deserialization of `EarlyNetworkConfig` to only read +/// the header and defer deserialization of the body once we know the schema +/// version. This is possible via the use of [`serde_json::value::RawValue`] in +/// future (post-v1) deserialization paths. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)] +pub struct StartSledAgentRequestBody { + /// Uuid of the Sled Agent to be created. + pub id: Uuid, + + /// Uuid of the rack to which this sled agent belongs. + pub rack_id: Uuid, + + /// Use trust quorum for key generation + pub use_trust_quorum: bool, + + /// Is this node an LRTQ learner node? + /// + /// We only put the node into learner mode if `use_trust_quorum` is also + /// true. + pub is_lrtq_learner: bool, + + /// Portion of the IP space to be managed by the Sled Agent. + pub subnet: Ipv6Subnet, +} + +/// The version of `StartSledAgentRequest` we originally shipped with. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)] +pub struct StartSledAgentRequestV0 { + /// Uuid of the Sled Agent to be created. + pub id: Uuid, + + /// Uuid of the rack to which this sled agent belongs. + pub rack_id: Uuid, + + /// The external NTP servers to use + pub ntp_servers: Vec, + + /// The external DNS servers to use + pub dns_servers: Vec, + + /// Use trust quorum for key generation + pub use_trust_quorum: bool, + + // Note: The order of these fields is load bearing, because we serialize + // `SledAgentRequest`s as toml. `subnet` serializes as a TOML table, so it + // must come after non-table fields. + /// Portion of the IP space to be managed by the Sled Agent. + pub subnet: Ipv6Subnet, +} + +impl From for StartSledAgentRequest { + fn from(v0: StartSledAgentRequestV0) -> Self { + StartSledAgentRequest { + generation: 0, + schema_version: 1, + body: StartSledAgentRequestBody { + id: v0.id, + rack_id: v0.rack_id, + use_trust_quorum: v0.use_trust_quorum, + is_lrtq_learner: false, + subnet: v0.subnet, + }, + } + } +} + +// A wrapper around StartSledAgentRequestV0 that was used +// for the ledger format. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)] +struct PersistentSledAgentRequest { + request: StartSledAgentRequestV0, +} + +#[cfg(test)] +mod tests { + use std::net::Ipv6Addr; + + use super::*; + + #[test] + fn serialize_start_sled_agent_v0_deserialize_v1() { + let v0 = PersistentSledAgentRequest { + request: StartSledAgentRequestV0 { + id: Uuid::new_v4(), + rack_id: Uuid::new_v4(), + ntp_servers: vec![String::from("test.pool.example.com")], + dns_servers: vec!["1.1.1.1".parse().unwrap()], + use_trust_quorum: false, + subnet: Ipv6Subnet::new(Ipv6Addr::LOCALHOST), + }, + }; + let serialized = serde_json::to_string(&v0).unwrap(); + let expected = StartSledAgentRequest { + generation: 0, + schema_version: 1, + body: StartSledAgentRequestBody { + id: v0.request.id, + rack_id: v0.request.rack_id, + use_trust_quorum: v0.request.use_trust_quorum, + is_lrtq_learner: false, + subnet: v0.request.subnet, + }, + }; + + let actual: StartSledAgentRequest = + Ledgerable::deserialize(&serialized).unwrap(); + assert_eq!(expected, actual); + } +} diff --git a/sled-agent/types/src/time_sync.rs b/sled-agent/types/src/time_sync.rs new file mode 100644 index 0000000000..7ac9ded636 --- /dev/null +++ b/sled-agent/types/src/time_sync.rs @@ -0,0 +1,30 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::net::IpAddr; + +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +pub struct TimeSync { + /// The synchronization state of the sled, true when the system clock + /// and the NTP clock are in sync (to within a small window). + pub sync: bool, + /// The NTP reference ID. + pub ref_id: u32, + /// The NTP reference IP address. + pub ip_addr: IpAddr, + /// The NTP stratum (our upstream's stratum plus one). + pub stratum: u8, + /// The NTP reference time (i.e. what chrony thinks the current time is, not + /// necessarily the current system time). + pub ref_time: f64, + // This could be f32, but there is a problem with progenitor/typify + // where, although the f32 correctly becomes "float" (and not "double") in + // the API spec, that "float" gets converted back to f64 when generating + // the client. + /// The current offset between the NTP clock and system clock. + pub correction: f64, +} diff --git a/sled-agent/types/src/zone_bundle.rs b/sled-agent/types/src/zone_bundle.rs new file mode 100644 index 0000000000..f7a388771d --- /dev/null +++ b/sled-agent/types/src/zone_bundle.rs @@ -0,0 +1,529 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Types related to zone bundles. + +use std::{cmp::Ordering, collections::HashSet, time::Duration}; + +use camino::Utf8PathBuf; +use chrono::{DateTime, Utc}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use thiserror::Error; +use uuid::Uuid; + +/// An identifier for a zone bundle. +#[derive( + Clone, + Debug, + Deserialize, + Eq, + Hash, + JsonSchema, + Ord, + PartialEq, + PartialOrd, + Serialize, +)] +pub struct ZoneBundleId { + /// The name of the zone this bundle is derived from. + pub zone_name: String, + /// The ID for this bundle itself. + pub bundle_id: Uuid, +} + +/// The reason or cause for a zone bundle, i.e., why it was created. +// +// NOTE: The ordering of the enum variants is important, and should not be +// changed without careful consideration. +// +// The ordering is used when deciding which bundles to remove automatically. In +// addition to time, the cause is used to sort bundles, so changing the variant +// order will change that priority. +#[derive( + Clone, + Copy, + Debug, + Default, + Deserialize, + Eq, + Hash, + JsonSchema, + Ord, + PartialEq, + PartialOrd, + Serialize, +)] +#[serde(rename_all = "snake_case")] +#[non_exhaustive] +pub enum ZoneBundleCause { + /// Some other, unspecified reason. + #[default] + Other, + /// A zone bundle taken when a sled agent finds a zone that it does not + /// expect to be running. + UnexpectedZone, + /// An instance zone was terminated. + TerminatedInstance, + /// Generated in response to an explicit request to the sled agent. + ExplicitRequest, +} + +/// Metadata about a zone bundle. +#[derive( + Clone, + Debug, + Deserialize, + Eq, + Hash, + JsonSchema, + Ord, + PartialEq, + PartialOrd, + Serialize, +)] +pub struct ZoneBundleMetadata { + /// Identifier for this zone bundle + pub id: ZoneBundleId, + /// The time at which this zone bundle was created. + pub time_created: DateTime, + /// A version number for this zone bundle. + pub version: u8, + /// The reason or cause a bundle was created. + pub cause: ZoneBundleCause, +} + +impl ZoneBundleMetadata { + pub const VERSION: u8 = 0; + + /// Create a new set of metadata for the provided zone. + pub fn new(zone_name: &str, cause: ZoneBundleCause) -> Self { + Self { + id: ZoneBundleId { + zone_name: zone_name.to_string(), + bundle_id: Uuid::new_v4(), + }, + time_created: Utc::now(), + version: Self::VERSION, + cause, + } + } +} + +/// A dimension along with bundles can be sorted, to determine priority. +#[derive( + Clone, + Copy, + Debug, + Deserialize, + Eq, + Hash, + JsonSchema, + Serialize, + Ord, + PartialEq, + PartialOrd, +)] +#[serde(rename_all = "snake_case")] +pub enum PriorityDimension { + /// Sorting by time, with older bundles with lower priority. + Time, + /// Sorting by the cause for creating the bundle. + Cause, + // TODO-completeness: Support zone or zone type (e.g., service vs instance)? +} + +/// The priority order for bundles during cleanup. +/// +/// Bundles are sorted along the dimensions in [`PriorityDimension`], with each +/// dimension appearing exactly once. During cleanup, lesser-priority bundles +/// are pruned first, to maintain the dataset quota. Note that bundles are +/// sorted by each dimension in the order in which they appear, with each +/// dimension having higher priority than the next. +#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] +pub struct PriorityOrder([PriorityDimension; PriorityOrder::EXPECTED_SIZE]); + +impl std::ops::Deref for PriorityOrder { + type Target = [PriorityDimension; PriorityOrder::EXPECTED_SIZE]; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl Default for PriorityOrder { + fn default() -> Self { + Self::DEFAULT + } +} + +impl PriorityOrder { + // NOTE: Must match the number of variants in `PriorityDimension`. + const EXPECTED_SIZE: usize = 2; + const DEFAULT: Self = + Self([PriorityDimension::Cause, PriorityDimension::Time]); + + /// Construct a new priority order. + /// + /// This requires that each dimension appear exactly once. + pub fn new( + dims: &[PriorityDimension], + ) -> Result { + if dims.len() != Self::EXPECTED_SIZE { + return Err(PriorityOrderCreateError::WrongDimensionCount( + dims.len(), + )); + } + let mut seen = HashSet::new(); + for dim in dims.iter() { + if !seen.insert(dim) { + return Err(PriorityOrderCreateError::DuplicateFound(*dim)); + } + } + Ok(Self(dims.try_into().unwrap())) + } + + /// Get the priority order as a slice. + pub fn as_slice(&self) -> &[PriorityDimension] { + &self.0 + } + + /// Order zone bundle info according to the contained priority. + /// + /// We sort the info by each dimension, in the order in which it appears. + /// That means earlier dimensions have higher priority than later ones. + pub fn compare_bundles( + &self, + lhs: &ZoneBundleInfo, + rhs: &ZoneBundleInfo, + ) -> Ordering { + for dim in self.0.iter() { + let ord = match dim { + PriorityDimension::Cause => { + lhs.metadata.cause.cmp(&rhs.metadata.cause) + } + PriorityDimension::Time => { + lhs.metadata.time_created.cmp(&rhs.metadata.time_created) + } + }; + if matches!(ord, Ordering::Equal) { + continue; + } + return ord; + } + Ordering::Equal + } +} + +/// A period on which bundles are automatically cleaned up. +#[derive( + Clone, Copy, Deserialize, JsonSchema, PartialEq, PartialOrd, Serialize, +)] +pub struct CleanupPeriod(Duration); + +impl Default for CleanupPeriod { + fn default() -> Self { + Self(Duration::from_secs(600)) + } +} + +impl CleanupPeriod { + /// The minimum supported cleanup period. + pub const MIN: Self = Self(Duration::from_secs(60)); + + /// The maximum supported cleanup period. + pub const MAX: Self = Self(Duration::from_secs(60 * 60 * 24)); + + /// Construct a new cleanup period, checking that it's valid. + pub fn new(duration: Duration) -> Result { + if duration >= Self::MIN.as_duration() + && duration <= Self::MAX.as_duration() + { + Ok(Self(duration)) + } else { + Err(CleanupPeriodCreateError::OutOfBounds(duration)) + } + } + + /// Return the period as a duration. + pub const fn as_duration(&self) -> Duration { + self.0 + } +} + +impl TryFrom for CleanupPeriod { + type Error = CleanupPeriodCreateError; + + fn try_from(duration: Duration) -> Result { + Self::new(duration) + } +} + +impl std::fmt::Debug for CleanupPeriod { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + self.0.fmt(f) + } +} + +#[derive(Clone, Debug, PartialEq)] +pub struct ZoneBundleInfo { + /// The raw metadata for the bundle + pub metadata: ZoneBundleMetadata, + /// The full path to the bundle + pub path: Utf8PathBuf, + /// The number of bytes consumed on disk by the bundle + pub bytes: u64, +} + +/// The portion of a debug dataset used for zone bundles. +#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] +pub struct BundleUtilization { + /// The total dataset quota, in bytes. + pub dataset_quota: u64, + /// The total number of bytes available for zone bundles. + /// + /// This is `dataset_quota` multiplied by the context's storage limit. + pub bytes_available: u64, + /// Total bundle usage, in bytes. + pub bytes_used: u64, +} + +/// Context provided for the zone bundle cleanup task. +#[derive( + Clone, Copy, Debug, Default, Deserialize, JsonSchema, PartialEq, Serialize, +)] +pub struct CleanupContext { + /// The period on which automatic checks and cleanup is performed. + pub period: CleanupPeriod, + /// The limit on the dataset quota available for zone bundles. + pub storage_limit: StorageLimit, + /// The priority ordering for keeping old bundles. + pub priority: PriorityOrder, +} + +/// The count of bundles / bytes removed during a cleanup operation. +#[derive(Clone, Copy, Debug, Default, Deserialize, JsonSchema, Serialize)] +pub struct CleanupCount { + /// The number of bundles removed. + pub bundles: u64, + /// The number of bytes removed. + pub bytes: u64, +} + +/// The limit on space allowed for zone bundles, as a percentage of the overall +/// dataset's quota. +#[derive( + Clone, + Copy, + Debug, + Deserialize, + JsonSchema, + PartialEq, + PartialOrd, + Serialize, +)] +pub struct StorageLimit(u8); + +impl std::fmt::Display for StorageLimit { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}%", self.as_u8()) + } +} + +impl Default for StorageLimit { + fn default() -> Self { + StorageLimit(25) + } +} + +impl StorageLimit { + /// Minimum percentage of dataset quota supported. + pub const MIN: Self = Self(0); + + /// Maximum percentage of dataset quota supported. + pub const MAX: Self = Self(50); + + /// Construct a new limit allowed for zone bundles. + /// + /// This should be expressed as a percentage, in the range (Self::MIN, + /// Self::MAX]. + pub const fn new(percentage: u8) -> Result { + if percentage > Self::MIN.0 && percentage <= Self::MAX.0 { + Ok(Self(percentage)) + } else { + Err(StorageLimitCreateError::OutOfBounds(percentage)) + } + } + + /// Return the contained quota percentage. + pub const fn as_u8(&self) -> u8 { + self.0 + } + + // Compute the number of bytes available from a dataset quota, in bytes. + pub const fn bytes_available(&self, dataset_quota: u64) -> u64 { + (dataset_quota * self.as_u8() as u64) / 100 + } +} + +#[derive(Debug, Error)] +pub enum PriorityOrderCreateError { + #[error("expected exactly {n} dimensions, found {0}", n = PriorityOrder::EXPECTED_SIZE)] + WrongDimensionCount(usize), + #[error("duplicate element found in priority ordering: {0:?}")] + DuplicateFound(PriorityDimension), +} + +#[derive(Debug, Error)] +pub enum CleanupPeriodCreateError { + #[error( + "invalid cleanup period ({0:?}): must be \ + between {min:?} and {max:?}, inclusive", + min = CleanupPeriod::MIN, + max = CleanupPeriod::MAX, + )] + OutOfBounds(Duration), +} + +#[derive(Debug, Error)] +pub enum StorageLimitCreateError { + #[error("invalid storage limit ({0}): must be expressed as a percentage in ({min}, {max}]", + min = StorageLimit::MIN.0, + max = StorageLimit::MAX.0, + )] + OutOfBounds(u8), +} + +#[cfg(test)] +mod tests { + use chrono::TimeZone; + + use super::*; + + #[test] + fn test_sort_zone_bundle_cause() { + use ZoneBundleCause::*; + let mut original = + [ExplicitRequest, Other, TerminatedInstance, UnexpectedZone]; + let expected = + [Other, UnexpectedZone, TerminatedInstance, ExplicitRequest]; + original.sort(); + assert_eq!(original, expected); + } + + #[test] + fn test_priority_dimension() { + assert!(PriorityOrder::new(&[]).is_err()); + assert!(PriorityOrder::new(&[PriorityDimension::Cause]).is_err()); + assert!(PriorityOrder::new(&[ + PriorityDimension::Cause, + PriorityDimension::Cause + ]) + .is_err()); + assert!(PriorityOrder::new(&[ + PriorityDimension::Cause, + PriorityDimension::Cause, + PriorityDimension::Time + ]) + .is_err()); + + assert!(PriorityOrder::new(&[ + PriorityDimension::Cause, + PriorityDimension::Time + ]) + .is_ok()); + assert_eq!( + PriorityOrder::new(PriorityOrder::default().as_slice()).unwrap(), + PriorityOrder::default() + ); + } + + #[test] + fn test_storage_limit_bytes_available() { + let pct = StorageLimit(1); + assert_eq!(pct.bytes_available(100), 1); + assert_eq!(pct.bytes_available(1000), 10); + + let pct = StorageLimit(100); + assert_eq!(pct.bytes_available(100), 100); + assert_eq!(pct.bytes_available(1000), 1000); + + let pct = StorageLimit(100); + assert_eq!(pct.bytes_available(99), 99); + + let pct = StorageLimit(99); + assert_eq!(pct.bytes_available(1), 0); + + // Test non-power of 10. + let pct = StorageLimit(25); + assert_eq!(pct.bytes_available(32768), 8192); + } + + #[test] + fn test_compare_bundles() { + use PriorityDimension::*; + let time_first = PriorityOrder([Time, Cause]); + let cause_first = PriorityOrder([Cause, Time]); + + fn make_info( + year: i32, + month: u32, + day: u32, + cause: ZoneBundleCause, + ) -> ZoneBundleInfo { + ZoneBundleInfo { + metadata: ZoneBundleMetadata { + id: ZoneBundleId { + zone_name: String::from("oxz_whatever"), + bundle_id: uuid::Uuid::new_v4(), + }, + time_created: Utc + .with_ymd_and_hms(year, month, day, 0, 0, 0) + .single() + .unwrap(), + cause, + version: 0, + }, + path: Utf8PathBuf::from("/some/path"), + bytes: 0, + } + } + + let info = [ + make_info(2020, 1, 2, ZoneBundleCause::TerminatedInstance), + make_info(2020, 1, 2, ZoneBundleCause::ExplicitRequest), + make_info(2020, 1, 1, ZoneBundleCause::TerminatedInstance), + make_info(2020, 1, 1, ZoneBundleCause::ExplicitRequest), + ]; + + let mut sorted = info.clone(); + sorted.sort_by(|lhs, rhs| time_first.compare_bundles(lhs, rhs)); + // Low -> high priority + // [old/terminated, old/explicit, new/terminated, new/explicit] + let expected = [ + info[2].clone(), + info[3].clone(), + info[0].clone(), + info[1].clone(), + ]; + assert_eq!( + sorted, expected, + "sorting zone bundles by time-then-cause failed" + ); + + let mut sorted = info.clone(); + sorted.sort_by(|lhs, rhs| cause_first.compare_bundles(lhs, rhs)); + // Low -> high priority + // [old/terminated, new/terminated, old/explicit, new/explicit] + let expected = [ + info[2].clone(), + info[0].clone(), + info[3].clone(), + info[1].clone(), + ]; + assert_eq!( + sorted, expected, + "sorting zone bundles by cause-then-time failed" + ); + } +} diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs index 8168f32cea..3cbf00530a 100644 --- a/sled-storage/src/manager.rs +++ b/sled-storage/src/manager.rs @@ -10,7 +10,7 @@ use crate::config::MountConfig; use crate::dataset::{DatasetName, CONFIG_DATASET}; use crate::disk::RawDisk; use crate::error::Error; -use crate::resources::{AllDisks, DisksManagementResult, StorageResources}; +use crate::resources::{AllDisks, StorageResources}; use camino::Utf8PathBuf; use debug_ignore::DebugIgnore; use futures::future::FutureExt; @@ -18,7 +18,8 @@ use illumos_utils::zfs::{Mountpoint, Zfs}; use illumos_utils::zpool::ZpoolName; use key_manager::StorageKeyRequester; use omicron_common::disk::{ - DiskIdentity, DiskVariant, OmicronPhysicalDisksConfig, + DiskIdentity, DiskVariant, DisksManagementResult, + OmicronPhysicalDisksConfig, }; use omicron_common::ledger::Ledger; use slog::{info, o, warn, Logger}; @@ -826,10 +827,10 @@ mod tests { use crate::dataset::DatasetType; use crate::disk::RawSyntheticDisk; use crate::manager_test_harness::StorageManagerTestHarness; - use crate::resources::DiskManagementError; use super::*; use camino_tempfile::tempdir_in; + use omicron_common::disk::DiskManagementError; use omicron_common::ledger; use omicron_test_utils::dev::test_setup_log; use sled_hardware::DiskFirmware; diff --git a/sled-storage/src/resources.rs b/sled-storage/src/resources.rs index 98d6398d8b..425aafb12d 100644 --- a/sled-storage/src/resources.rs +++ b/sled-storage/src/resources.rs @@ -14,12 +14,10 @@ use illumos_utils::zpool::{PathInPool, ZpoolName}; use key_manager::StorageKeyRequester; use omicron_common::api::external::Generation; use omicron_common::disk::{ - DiskIdentity, DiskVariant, OmicronPhysicalDiskConfig, + DiskIdentity, DiskManagementError, DiskManagementStatus, DiskVariant, + DisksManagementResult, OmicronPhysicalDiskConfig, OmicronPhysicalDisksConfig, }; -use omicron_uuid_kinds::ZpoolUuid; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; use sled_hardware::DiskFirmware; use slog::{info, o, warn, Logger}; use std::collections::BTreeMap; @@ -32,76 +30,6 @@ const BUNDLE_DIRECTORY: &str = "bundle"; // The directory for zone bundles. const ZONE_BUNDLE_DIRECTORY: &str = "zone"; -#[derive(Debug, thiserror::Error, JsonSchema, Serialize, Deserialize)] -#[serde(rename_all = "snake_case", tag = "type", content = "value")] -pub enum DiskManagementError { - #[error("Disk requested by control plane, but not found on device")] - NotFound, - - #[error("Expected zpool UUID of {expected}, but saw {observed}")] - ZpoolUuidMismatch { expected: ZpoolUuid, observed: ZpoolUuid }, - - #[error("Failed to access keys necessary to unlock storage. This error may be transient.")] - KeyManager(String), - - #[error("Other error starting disk management: {0}")] - Other(String), -} - -impl DiskManagementError { - fn retryable(&self) -> bool { - match self { - DiskManagementError::KeyManager(_) => true, - _ => false, - } - } -} - -/// Identifies how a single disk management operation may have succeeded or -/// failed. -#[derive(Debug, JsonSchema, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub struct DiskManagementStatus { - pub identity: DiskIdentity, - pub err: Option, -} - -/// The result from attempting to manage underlying disks. -/// -/// This is more complex than a simple "Error" type because it's possible -/// for some disks to be initialized correctly, while others can fail. -/// -/// This structure provides a mechanism for callers to learn about partial -/// failures, and handle them appropriately on a per-disk basis. -#[derive(Default, Debug, JsonSchema, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -#[must_use = "this `DiskManagementResult` may contain errors, which should be handled"] -pub struct DisksManagementResult { - pub status: Vec, -} - -impl DisksManagementResult { - pub fn has_error(&self) -> bool { - for status in &self.status { - if status.err.is_some() { - return true; - } - } - false - } - - pub fn has_retryable_error(&self) -> bool { - for status in &self.status { - if let Some(err) = &status.err { - if err.retryable() { - return true; - } - } - } - false - } -} - // The Sled Agent is responsible for both observing disks and managing them at // the request of the broader control plane. This enum encompasses that duality, // by representing all disks that can exist, managed or not. diff --git a/wicketd/src/installinator_progress.rs b/wicketd/src/installinator_progress.rs index 7d076e7b0e..8f8465652e 100644 --- a/wicketd/src/installinator_progress.rs +++ b/wicketd/src/installinator_progress.rs @@ -295,10 +295,10 @@ mod tests { use installinator_common::{ InstallinatorCompletionMetadata, InstallinatorComponent, - InstallinatorSpec, InstallinatorStepId, M2Slot, StepEvent, - StepEventKind, StepInfo, StepInfoWithMetadata, StepOutcome, - WriteOutput, + InstallinatorSpec, InstallinatorStepId, StepEvent, StepEventKind, + StepInfo, StepInfoWithMetadata, StepOutcome, WriteOutput, }; + use omicron_common::disk::M2Slot; use omicron_test_utils::dev::test_setup_log; use schemars::JsonSchema; use update_engine::ExecutionId; diff --git a/wicketd/src/update_tracker.rs b/wicketd/src/update_tracker.rs index dee22f70c0..9980359253 100644 --- a/wicketd/src/update_tracker.rs +++ b/wicketd/src/update_tracker.rs @@ -35,9 +35,9 @@ use gateway_messages::ROT_PAGE_SIZE; use hubtools::RawHubrisArchive; use installinator_common::InstallinatorCompletionMetadata; use installinator_common::InstallinatorSpec; -use installinator_common::M2Slot; use installinator_common::WriteOutput; use omicron_common::api::external::SemverVersion; +use omicron_common::disk::M2Slot; use omicron_common::update::ArtifactHash; use slog::error; use slog::info;