From d2e9582c11121a856a6ee266a29902e7a773236b Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 16 Jun 2023 09:16:20 -0700 Subject: [PATCH 01/57] Refactor test setup as builder, pull DNS server out of simulated sled agent --- Cargo.lock | 2 + common/src/nexus_config.rs | 48 +- dev-tools/src/bin/omicron-dev.rs | 8 +- dns-server/Cargo.toml | 4 +- dns-server/src/lib.rs | 81 ++ internal-dns/src/config.rs | 1 + nexus/examples/config.toml | 5 +- nexus/src/app/background/init.rs | 2 +- nexus/src/context.rs | 26 +- nexus/src/lib.rs | 11 +- nexus/test-interface/src/lib.rs | 5 +- nexus/test-utils/Cargo.toml | 1 + nexus/test-utils/src/lib.rs | 824 +++++++++++++----- nexus/tests/config.test.toml | 5 +- nexus/tests/integration_tests/certificates.rs | 2 +- nexus/tests/integration_tests/silos.rs | 5 +- sled-agent/src/bin/sled-agent-sim.rs | 8 +- sled-agent/src/services.rs | 8 +- sled-agent/src/sim/mod.rs | 2 +- sled-agent/src/sim/server.rs | 424 +++++---- sled-agent/src/sim/sled_agent.rs | 11 + sled-agent/src/sim/storage.rs | 14 + 22 files changed, 1003 insertions(+), 494 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5b8f1a144b..abe093f967 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1885,6 +1885,7 @@ dependencies = [ "slog-term", "subprocess", "tempdir", + "tempfile", "thiserror", "tokio", "toml 0.7.4", @@ -4117,6 +4118,7 @@ dependencies = [ "chrono", "crucible-agent-client", "dns-server", + "dns-service-client 0.1.0", "dropshot", "headers", "http", diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index b5e87fdbf6..3c4a83fd1f 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -105,6 +105,21 @@ pub enum Database { }, } +/// The mechanism Nexus should use to contact the internal DNS servers. +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum InternalDns { + /// Nexus should infer the DNS server addresses from this subnet. + /// + /// This is a more common usage for production. + FromSubnet { subnet: Ipv6Subnet }, + /// Nexus should use precisely the following address. + /// + /// This is less desirable in production, but can give value + /// in test scenarios. + FromAddress { address: SocketAddr }, +} + #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] pub struct DeploymentConfig { /// Uuid of the Nexus instance @@ -115,8 +130,9 @@ pub struct DeploymentConfig { pub dropshot_external: ConfigDropshotWithTls, /// Dropshot configuration for internal API server. pub dropshot_internal: ConfigDropshot, - /// Portion of the IP space to be managed by the Rack. - pub subnet: Ipv6Subnet, + /// Describes how Nexus should find internal DNS servers + /// for bootstrapping. + pub internal_dns: InternalDns, /// DB configuration. pub database: Database, } @@ -413,7 +429,7 @@ mod test { use crate::nexus_config::{ BackgroundTaskConfig, ConfigDropshotWithTls, Database, DeploymentConfig, DnsTasksConfig, DpdConfig, ExternalEndpointsConfig, - LoadErrorKind, + InternalDns, LoadErrorKind, }; use dropshot::ConfigDropshot; use dropshot::ConfigLogging; @@ -540,8 +556,9 @@ mod test { [deployment.dropshot_internal] bind_address = "10.1.2.3:4568" request_body_max_bytes = 1024 - [deployment.subnet] - net = "::/56" + [deployment.internal_dns] + type = "from_subnet" + subnet.net = "::/56" [deployment.database] type = "from_dns" [dendrite] @@ -583,7 +600,11 @@ mod test { .unwrap(), ..Default::default() }, - subnet: Ipv6Subnet::::new(Ipv6Addr::LOCALHOST), + internal_dns: InternalDns::FromSubnet { + subnet: Ipv6Subnet::::new( + Ipv6Addr::LOCALHOST + ) + }, database: Database::FromDns, }, pkg: PackageConfig { @@ -657,8 +678,9 @@ mod test { [deployment.dropshot_internal] bind_address = "10.1.2.3:4568" request_body_max_bytes = 1024 - [deployment.subnet] - net = "::/56" + [deployment.internal_dns] + type = "from_subnet" + subnet.net = "::/56" [deployment.database] type = "from_dns" [dendrite] @@ -710,8 +732,9 @@ mod test { [deployment.dropshot_internal] bind_address = "10.1.2.3:4568" request_body_max_bytes = 1024 - [deployment.subnet] - net = "::/56" + [deployment.internal_dns] + type = "from_subnet" + subnet.net = "::/56" [deployment.database] type = "from_dns" "##, @@ -765,8 +788,9 @@ mod test { [deployment.dropshot_internal] bind_address = "10.1.2.3:4568" request_body_max_bytes = 1024 - [deployment.subnet] - net = "::/56" + [deployment.internal_dns] + type = "from_subnet" + subnet.net = "::/56" [deployment.database] type = "from_dns" "##, diff --git a/dev-tools/src/bin/omicron-dev.rs b/dev-tools/src/bin/omicron-dev.rs index 84f446e98a..0589f12a1d 100644 --- a/dev-tools/src/bin/omicron-dev.rs +++ b/dev-tools/src/bin/omicron-dev.rs @@ -370,16 +370,16 @@ async fn cmd_run_all(args: &RunAllArgs) -> Result<(), anyhow::Error> { ); println!( "omicron-dev: external DNS HTTP: http://{}", - cptestctx.external_dns_config_server.local_addr() + cptestctx.external_dns.dropshot_server.local_addr() ); println!( "omicron-dev: external DNS: {}", - cptestctx.external_dns_server.local_address() + cptestctx.external_dns.server.local_address() ); println!( "omicron-dev: e.g. `dig @{} -p {} {}.sys.{}`", - cptestctx.external_dns_server.local_address().ip(), - cptestctx.external_dns_server.local_address().port(), + cptestctx.external_dns.server.local_address().ip(), + cptestctx.external_dns.server.local_address().port(), cptestctx.silo_name, cptestctx.external_dns_zone_name, ); diff --git a/dns-server/Cargo.toml b/dns-server/Cargo.toml index b400a5f25a..28b209efe4 100644 --- a/dns-server/Cargo.toml +++ b/dns-server/Cargo.toml @@ -21,13 +21,15 @@ slog.workspace = true slog-term.workspace = true slog-async.workspace = true slog-envlogger.workspace = true +tempfile.workspace = true tempdir.workspace = true thiserror.workspace = true tokio = { workspace = true, features = [ "full" ] } toml.workspace = true +trust-dns-client.workspace = true trust-dns-proto.workspace = true +trust-dns-resolver.workspace = true trust-dns-server.workspace = true -trust-dns-client.workspace = true uuid.workspace = true [dev-dependencies] diff --git a/dns-server/src/lib.rs b/dns-server/src/lib.rs index 3c28369471..bc697d98c9 100644 --- a/dns-server/src/lib.rs +++ b/dns-server/src/lib.rs @@ -49,6 +49,11 @@ pub mod storage; use anyhow::{anyhow, Context}; use slog::o; +use trust_dns_resolver::config::NameServerConfig; +use trust_dns_resolver::config::Protocol; +use trust_dns_resolver::config::ResolverConfig; +use trust_dns_resolver::config::ResolverOpts; +use trust_dns_resolver::TokioAsyncResolver; /// Starts both the HTTP and DNS servers over a given store. pub async fn start_servers( @@ -86,3 +91,79 @@ pub async fn start_servers( Ok((dns_server, dropshot_server)) } + +/// An in-memory DNS server running on localhost. +/// +/// Intended to be used for testing only. +pub struct InMemoryServer { + /// Server storage dir + pub storage_dir: tempfile::TempDir, + /// DNS server + pub server: dns_server::ServerHandle, + /// Dropshot server + pub dropshot_server: dropshot::HttpServer, +} + +impl InMemoryServer { + pub async fn new(log: &slog::Logger) -> Result { + let storage_dir = tempfile::tempdir()?; + + let dns_log = log.new(o!("kind" => "dns")); + + let store = storage::Store::new( + log.new(o!("component" => "store")), + &storage::Config { + keep_old_generations: 3, + storage_path: storage_dir + .path() + .to_string_lossy() + .to_string() + .into(), + }, + ) + .context("initializing DNS storage")?; + + let (server, dropshot_server) = start_servers( + dns_log, + store, + &dns_server::Config { bind_address: "[::1]:0".parse().unwrap() }, + &dropshot::ConfigDropshot { + bind_address: "[::1]:0".parse().unwrap(), + request_body_max_bytes: 4 * 1024 * 1024, + }, + ) + .await?; + Ok(Self { storage_dir, server, dropshot_server }) + } + + pub async fn initialize_with_config( + &self, + log: &slog::Logger, + dns_config: &dns_service_client::types::DnsConfigParams, + ) -> Result<(), anyhow::Error> { + let dns_config_client = dns_service_client::Client::new( + &format!("http://{}", self.dropshot_server.local_addr()), + log.clone(), + ); + dns_config_client + .dns_config_put(&dns_config) + .await + .context("initializing DNS")?; + Ok(()) + } + + pub async fn resolver(&self) -> Result { + let mut resolver_config = ResolverConfig::new(); + resolver_config.add_name_server(NameServerConfig { + socket_addr: *self.server.local_address(), + protocol: Protocol::Udp, + tls_dns_name: None, + trust_nx_responses: false, + bind_addr: None, + }); + let resolver = + TokioAsyncResolver::tokio(resolver_config, ResolverOpts::default()) + .context("creating DNS resolver")?; + Ok(resolver) + } +} diff --git a/internal-dns/src/config.rs b/internal-dns/src/config.rs index 5de19ae1cf..998a2c31b0 100644 --- a/internal-dns/src/config.rs +++ b/internal-dns/src/config.rs @@ -129,6 +129,7 @@ impl Host { /// /// This builder ensures that the constructed DNS data satisfies these /// assumptions. +#[derive(Clone)] pub struct DnsConfigBuilder { /// set of hosts of type "sled" that have been configured so far, mapping /// each sled's unique uuid to its sole IPv6 address on the control plane diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index a116b31a5e..fbcc4d4a8e 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -51,8 +51,9 @@ request_body_max_bytes = 1048576 bind_address = "[::1]:12221" request_body_max_bytes = 1048576 -[deployment.subnet] -net = "fd00:1122:3344:0100::/56" +[deployment.internal_dns] +type = "from_subnet" +subnet.net = "fd00:1122:3344:0100::/56" [deployment.database] # URL for connecting to the database diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index 79de660083..00b6cbdecb 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -203,7 +203,7 @@ pub mod test { // the case because it was configured with version 1 when the simulated // sled agent started up. let initial_dns_dropshot_server = - &cptestctx.sled_agent.dns_dropshot_server; + &cptestctx.internal_dns.dropshot_server; let dns_config_client = dns_service_client::Client::new( &format!("http://{}", initial_dns_dropshot_server.local_addr()), cptestctx.logctx.log.clone(), diff --git a/nexus/src/context.rs b/nexus/src/context.rs index 871f7ed2bb..08a99ea9a3 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -134,14 +134,24 @@ impl ServerContext { // nexus in dev for everyone // Set up DNS Client - let az_subnet = - Ipv6Subnet::::new(config.deployment.subnet.net().ip()); - info!(log, "Setting up resolver on subnet: {:?}", az_subnet); - let resolver = internal_dns::resolver::Resolver::new_from_subnet( - log.new(o!("component" => "DnsResolver")), - az_subnet, - ) - .map_err(|e| format!("Failed to create DNS resolver: {}", e))?; + let resolver = match config.deployment.internal_dns { + nexus_config::InternalDns::FromSubnet { subnet } => { + let az_subnet = Ipv6Subnet::::new(subnet.net().ip()); + info!(log, "Setting up resolver on subnet: {:?}", az_subnet); + internal_dns::resolver::Resolver::new_from_subnet( + log.new(o!("component" => "DnsResolver")), + az_subnet, + ) + .map_err(|e| format!("Failed to create DNS resolver: {}", e))? + } + nexus_config::InternalDns::FromAddress { address } => { + internal_dns::resolver::Resolver::new_from_addrs( + log.new(o!("component" => "DnsResolver")), + vec![address], + ) + .map_err(|e| format!("Failed to create DNS resolver: {}", e))? + } + }; // Set up DB pool let url = match &config.deployment.database { diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index 9dbaebf4a6..6e84c5ccef 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -29,7 +29,6 @@ pub use context::ServerContext; pub use crucible_agent_client; use external_api::http_entrypoints::external_api; use internal_api::http_entrypoints::internal_api; -use internal_dns::DnsConfigBuilder; use nexus_types::internal_api::params::ServiceKind; use omicron_common::address::IpRange; use slog::Logger; @@ -198,6 +197,8 @@ impl nexus_test_interface::NexusServer for Server { internal_server: InternalServer, config: &Config, services: Vec, + datasets: Vec, + internal_dns_zone_config: nexus_types::internal_api::params::DnsConfigParams, external_dns_zone_name: &str, recovery_silo: nexus_types::internal_api::params::RecoverySiloConfig, certs: Vec, @@ -239,10 +240,10 @@ impl nexus_test_interface::NexusServer for Server { config.deployment.rack_id, internal_api::params::RackInitializationRequest { services, - datasets: vec![], + datasets, internal_services_ip_pool_ranges, certs, - internal_dns_zone_config: DnsConfigBuilder::new().build(), + internal_dns_zone_config, external_dns_zone_name: external_dns_zone_name.to_owned(), recovery_silo, external_port_count: 1, @@ -264,10 +265,6 @@ impl nexus_test_interface::NexusServer for Server { self.apictx.nexus.get_internal_server_address().await.unwrap() } - async fn set_resolver(&self, resolver: internal_dns::resolver::Resolver) { - self.apictx.nexus.set_resolver(resolver).await - } - async fn upsert_crucible_dataset( &self, id: Uuid, diff --git a/nexus/test-interface/src/lib.rs b/nexus/test-interface/src/lib.rs index 599c8baba6..2456f27684 100644 --- a/nexus/test-interface/src/lib.rs +++ b/nexus/test-interface/src/lib.rs @@ -46,10 +46,13 @@ pub trait NexusServer { log: &Logger, ) -> (Self::InternalServer, SocketAddr); + #[allow(clippy::too_many_arguments)] async fn start( internal_server: Self::InternalServer, config: &Config, services: Vec, + datasets: Vec, + internal_dns_config: nexus_types::internal_api::params::DnsConfigParams, external_dns_zone_name: &str, recovery_silo: nexus_types::internal_api::params::RecoverySiloConfig, tls_certificates: Vec, @@ -58,8 +61,6 @@ pub trait NexusServer { async fn get_http_server_external_address(&self) -> SocketAddr; async fn get_http_server_internal_address(&self) -> SocketAddr; - async fn set_resolver(&self, resolver: internal_dns::resolver::Resolver); - // Previously, as a dataset was created (within the sled agent), // we'd use an internal API from Nexus to record that the dataset // now exists. In other words, Sled Agent was in control, by telling diff --git a/nexus/test-utils/Cargo.toml b/nexus/test-utils/Cargo.toml index ce96b505be..db98a979c1 100644 --- a/nexus/test-utils/Cargo.toml +++ b/nexus/test-utils/Cargo.toml @@ -12,6 +12,7 @@ camino-tempfile.workspace = true chrono.workspace = true crucible-agent-client.workspace = true dns-server.workspace = true +dns-service-client.workspace = true dropshot.workspace = true headers.workspace = true http.workspace = true diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 29503c99f0..ecae9209c5 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -6,6 +6,7 @@ use anyhow::Context; use camino::Utf8Path; +use dns_service_client::types::DnsConfigParams; use dropshot::test_util::ClientTestContext; use dropshot::test_util::LogContext; use dropshot::ConfigDropshot; @@ -14,6 +15,9 @@ use dropshot::ConfigLoggingLevel; use nexus_test_interface::NexusServer; use nexus_types::external_api::params::UserId; use nexus_types::internal_api::params::Certificate; +use nexus_types::internal_api::params::DatasetCreateRequest; +use nexus_types::internal_api::params::DatasetKind; +use nexus_types::internal_api::params::DatasetPutRequest; use nexus_types::internal_api::params::RecoverySiloConfig; use nexus_types::internal_api::params::ServiceKind; use nexus_types::internal_api::params::ServiceNic; @@ -30,8 +34,7 @@ use omicron_test_utils::dev; use oximeter_collector::Oximeter; use oximeter_producer::LogConfig; use oximeter_producer::Server as ProducerServer; -use slog::o; -use slog::Logger; +use slog::{debug, o, Logger}; use std::fmt::Debug; use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; use std::time::Duration; @@ -79,11 +82,10 @@ pub struct ControlPlaneTestContext { pub oximeter: Oximeter, pub producer: ProducerServer, pub dendrite: dev::dendrite::DendriteInstance, + pub external_dns_zone_name: String, - pub external_dns_server: dns_server::dns_server::ServerHandle, - pub external_dns_config_server: - dropshot::HttpServer, - pub external_dns_resolver: trust_dns_resolver::TokioAsyncResolver, + pub external_dns: dns_server::InMemoryServer, + pub internal_dns: dns_server::InMemoryServer, pub silo_name: Name, pub user_name: UserId, } @@ -138,128 +140,300 @@ pub async fn test_setup( .await } -pub async fn test_setup_with_config( - test_name: &str, - config: &mut omicron_common::nexus_config::Config, - sim_mode: sim::SimMode, - initial_cert: Option, -) -> ControlPlaneTestContext { - let start_time = chrono::Utc::now(); - let logctx = LogContext::new(test_name, &config.pkg.log); - let log = &logctx.log; +struct RackInitRequestBuilder { + services: Vec, + datasets: Vec, + internal_dns_config: internal_dns::DnsConfigBuilder, +} + +impl RackInitRequestBuilder { + fn new() -> Self { + Self { + services: vec![], + datasets: vec![], + internal_dns_config: internal_dns::DnsConfigBuilder::new(), + } + } + + // Keeps track of: + // - The "ServicePutRequest" (for handoff to Nexus) + // - The internal DNS configuration for this service + fn add_service( + &mut self, + address: SocketAddrV6, + kind: ServiceKind, + service_name: internal_dns::ServiceName, + sled_id: Uuid, + ) { + let zone_id = Uuid::new_v4(); + self.services.push(ServicePutRequest { + address, + kind, + service_id: Uuid::new_v4(), + sled_id, + zone_id: Some(zone_id), + }); + let zone = self + .internal_dns_config + .host_zone(zone_id, *address.ip()) + .expect("Failed to set up DNS for {kind}"); + self.internal_dns_config + .service_backend_zone(service_name, &zone, address.port()) + .expect("Failed to set up DNS for {kind}"); + } - // Start up CockroachDB. - let database = db::test_setup_database(log).await; + // Keeps track of: + // - The "DatasetPutRequest" (for handoff to Nexus) + // - The internal DNS configuration for this service + fn add_dataset( + &mut self, + zpool_id: Uuid, + dataset_id: Uuid, + address: SocketAddrV6, + kind: DatasetKind, + service_name: internal_dns::ServiceName, + ) { + self.datasets.push(DatasetCreateRequest { + zpool_id, + dataset_id, + request: DatasetPutRequest { address, kind }, + }); + let zone = self + .internal_dns_config + .host_zone(dataset_id, *address.ip()) + .expect("Failed to set up DNS for {kind}"); + self.internal_dns_config + .service_backend_zone(service_name, &zone, address.port()) + .expect("Failed to set up DNS for {kind}"); + } +} + +pub struct ControlPlaneTestContextBuilder<'a, N: NexusServer> { + pub config: &'a mut omicron_common::nexus_config::Config, + rack_init_builder: RackInitRequestBuilder, + + pub start_time: chrono::DateTime, + pub logctx: LogContext, + + pub external_client: Option, + pub internal_client: Option, + + pub server: Option, + pub database: Option, + pub clickhouse: Option, + pub sled_agent_storage: Option, + pub sled_agent: Option, + pub oximeter: Option, + pub producer: Option, + pub dendrite: Option, + + // NOTE: Only exists after starting Nexus, until external Nexus is + // initialized. + nexus_internal: Option<::InternalServer>, + nexus_internal_addr: Option, + + pub external_dns_zone_name: Option, + pub external_dns: Option, + pub internal_dns: Option, + + pub silo_name: Option, + pub user_name: Option, +} + +impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { + pub fn new( + test_name: &str, + config: &'a mut omicron_common::nexus_config::Config, + ) -> Self { + let start_time = chrono::Utc::now(); + let logctx = LogContext::new(test_name, &config.pkg.log); + + Self { + config, + rack_init_builder: RackInitRequestBuilder::new(), + start_time, + logctx, + external_client: None, + internal_client: None, + server: None, + database: None, + clickhouse: None, + sled_agent_storage: None, + sled_agent: None, + oximeter: None, + producer: None, + dendrite: None, + nexus_internal: None, + nexus_internal_addr: None, + external_dns_zone_name: None, + external_dns: None, + internal_dns: None, + silo_name: None, + user_name: None, + } + } + + pub async fn start_crdb(&mut self) { + let log = &self.logctx.log; + debug!(log, "Starting CRDB"); + + // Start up CockroachDB. + let database = db::test_setup_database(log).await; + + eprintln!("DB URL: {}", database.pg_config()); + let address = database + .pg_config() + .to_string() + .split("postgresql://root@") + .nth(1) + .expect("Malformed URL: Missing postgresql prefix") + .split('/') + .next() + .expect("Malformed URL: No slash after port") + .parse::() + .expect("Failed to parse port"); + + let zpool_id = Uuid::new_v4(); + let dataset_id = Uuid::new_v4(); + eprintln!("DB address: {}", address); + self.rack_init_builder.add_dataset( + zpool_id, + dataset_id, + address, + DatasetKind::Cockroach, + internal_dns::ServiceName::Cockroach, + ); + self.database = Some(database); + } // Start ClickHouse database server. - let clickhouse = dev::clickhouse::ClickHouseInstance::new(0).await.unwrap(); + pub async fn start_clickhouse(&mut self) { + let log = &self.logctx.log; + debug!(log, "Starting Clickhouse"); + let clickhouse = + dev::clickhouse::ClickHouseInstance::new(0).await.unwrap(); + let port = clickhouse.port(); + + let zpool_id = Uuid::new_v4(); + let dataset_id = Uuid::new_v4(); + let address = SocketAddrV6::new(Ipv6Addr::LOCALHOST, port, 0, 0); + self.rack_init_builder.add_dataset( + zpool_id, + dataset_id, + address, + DatasetKind::Clickhouse, + internal_dns::ServiceName::Clickhouse, + ); + self.clickhouse = Some(clickhouse); + + // NOTE: We could pass this port information via DNS, rather than + // requiring it to be known before Nexus starts. + self.config + .pkg + .timeseries_db + .address + .as_mut() + .expect("Tests expect to set a port of Clickhouse") + .set_port(port); + } - // Set up a stub instance of dendrite - let dendrite = dev::dendrite::DendriteInstance::start(0).await.unwrap(); + pub async fn start_dendrite(&mut self) { + let log = &self.logctx.log; + debug!(log, "Starting Dendrite"); + + // Set up a stub instance of dendrite + let dendrite = dev::dendrite::DendriteInstance::start(0).await.unwrap(); + let port = dendrite.port; + self.dendrite = Some(dendrite); + + let address = SocketAddrV6::new(Ipv6Addr::LOCALHOST, port, 0, 0); + + // Update the configuration options for Nexus, if it's launched later. + // + // NOTE: If dendrite is started after Nexus, this is ignored. + self.config.pkg.dendrite.address = Some(address.into()); + + let sled_id = Uuid::parse_str(SLED_AGENT_UUID).unwrap(); + self.rack_init_builder.add_service( + address, + ServiceKind::Dendrite, + internal_dns::ServiceName::Dendrite, + sled_id, + ); + } - // Store actual address/port information for the databases after they start. - config.deployment.database = - nexus_config::Database::FromUrl { url: database.pg_config().clone() }; - config - .pkg - .timeseries_db - .address - .as_mut() - .expect("Tests expect to set a port of Clickhouse") - .set_port(clickhouse.port()); - config - .pkg - .dendrite - .address - .as_mut() - .expect("Tests expect an explicit dendrite address") - .set_port(dendrite.port); + pub async fn start_oximeter(&mut self) { + let log = &self.logctx.log; + debug!(log, "Starting Oximeter"); + + let nexus_internal_addr = self + .nexus_internal_addr + .expect("Must start Nexus internally before Oximeter"); + + let clickhouse = self + .clickhouse + .as_ref() + .expect("Must start Clickhouse before oximeter"); + + // Set up an Oximeter collector server + let collector_id = Uuid::parse_str(OXIMETER_UUID).unwrap(); + let oximeter = start_oximeter( + log.new(o!("component" => "oximeter")), + nexus_internal_addr, + clickhouse.port(), + collector_id, + ) + .await + .unwrap(); - // Begin starting Nexus. - let (nexus_internal, nexus_internal_addr) = - N::start_internal(&config, &logctx.log).await; - - // Set up a single sled agent. - let sa_id = Uuid::parse_str(SLED_AGENT_UUID).unwrap(); - let tempdir = camino_tempfile::tempdir().unwrap(); - let sled_agent = start_sled_agent( - logctx.log.new(o!( - "component" => "omicron_sled_agent::sim::Server", - "sled_id" => sa_id.to_string(), - )), - nexus_internal_addr, - sa_id, - tempdir.path(), - sim_mode, - ) - .await - .unwrap(); + self.oximeter = Some(oximeter); + } - // Set up an external DNS server. - let ( - external_dns_server, - external_dns_config_server, - external_dns_resolver, - ) = start_dns_server( - logctx.log.new(o!( - "component" => "external_dns_server", - )), - tempdir.path(), - ) - .await - .unwrap(); + pub async fn start_producer_server(&mut self) { + let log = &self.logctx.log; + debug!(log, "Starting test metric Producer Server"); - // Finish setting up Nexus by initializing the rack. We need to include - // information about the internal DNS server started within the simulated - // Sled Agent. - let dns_server_address_internal = - match sled_agent.dns_dropshot_server.local_addr() { - SocketAddr::V4(_) => panic!( - "expected internal DNS config (HTTP) server to have IPv6 address" - ), - SocketAddr::V6(addr) => addr, - }; - if let SocketAddr::V4(_) = sled_agent.dns_server.local_address() { - panic!("expected internal DNS server to have IPv6 address"); + let nexus_internal_addr = self + .nexus_internal_addr + .expect("Must start Nexus internally before producer server"); + + // Set up a test metric producer server + let producer_id = Uuid::parse_str(PRODUCER_UUID).unwrap(); + let producer = start_producer_server(nexus_internal_addr, producer_id) + .await + .unwrap(); + register_test_producer(&producer).unwrap(); + + self.producer = Some(producer); } - let dns_server_zone = Uuid::new_v4(); - let dns_service_internal = ServicePutRequest { - service_id: Uuid::new_v4(), - sled_id: sa_id, - zone_id: Some(dns_server_zone), - address: dns_server_address_internal, - kind: ServiceKind::InternalDns, - }; - let dns_server_address_external = - match external_dns_config_server.local_addr() { - SocketAddr::V4(_) => panic!( - "expected external DNS config (HTTP) server to have IPv6 address" - ), - SocketAddr::V6(addr) => addr, + + // Begin starting Nexus. + pub async fn start_nexus_internal(&mut self) { + let log = &self.logctx.log; + debug!(log, "Starting Nexus (internal API)"); + + self.config.deployment.internal_dns = + nexus_config::InternalDns::FromAddress { + address: *self + .internal_dns + .as_ref() + .expect("Must initialize internal DNS server first") + .server + .local_address(), + }; + self.config.deployment.database = nexus_config::Database::FromUrl { + url: self + .database + .as_ref() + .expect("Must start CRDB before Nexus") + .pg_config() + .clone(), }; - let dns_service_external = ServicePutRequest { - service_id: Uuid::new_v4(), - sled_id: sa_id, - zone_id: Some(Uuid::new_v4()), - address: dns_server_address_external, - kind: ServiceKind::ExternalDns { - external_address: external_dns_server.local_address().ip(), - nic: ServiceNic { - id: Uuid::new_v4(), - name: "external-dns".parse().unwrap(), - ip: DNS_OPTE_IPV4_SUBNET - .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1) - .unwrap() - .into(), - mac: MacAddr::random_system(), - }, - }, - }; - let nexus_service = ServicePutRequest { - service_id: Uuid::new_v4(), - sled_id: sa_id, - zone_id: Some(Uuid::new_v4()), - address: SocketAddrV6::new( + + let (nexus_internal, nexus_internal_addr) = + N::start_internal(&self.config, &log).await; + + let address = SocketAddrV6::new( match nexus_internal_addr.ip() { IpAddr::V4(addr) => addr.to_ipv6_mapped(), IpAddr::V6(addr) => addr, @@ -267,118 +441,325 @@ pub async fn test_setup_with_config( nexus_internal_addr.port(), 0, 0, - ), - kind: ServiceKind::Nexus { - external_address: config - .deployment - .dropshot_external - .dropshot - .bind_address - .ip(), - nic: ServiceNic { - id: Uuid::new_v4(), - name: "nexus".parse().unwrap(), - ip: NEXUS_OPTE_IPV4_SUBNET - .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1) - .unwrap() - .into(), - mac: MacAddr::random_system(), + ); + + let sled_id = Uuid::parse_str(SLED_AGENT_UUID).unwrap(); + self.rack_init_builder.add_service( + address, + ServiceKind::Nexus { + external_address: self + .config + .deployment + .dropshot_external + .dropshot + .bind_address + .ip(), + nic: ServiceNic { + id: Uuid::new_v4(), + name: "nexus".parse().unwrap(), + ip: NEXUS_OPTE_IPV4_SUBNET + .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1) + .unwrap() + .into(), + mac: MacAddr::random_system(), + }, }, - }, - }; - let external_dns_zone_name = - internal_dns::names::DNS_ZONE_EXTERNAL_TESTING.to_string(); - let silo_name: Name = "test-suite-silo".parse().unwrap(); - let user_name = UserId::try_from("test-privileged".to_string()).unwrap(); - let user_password_hash = omicron_passwords::Hasher::default() - .create_password( - &omicron_passwords::Password::new(TEST_SUITE_PASSWORD).unwrap(), - ) - .unwrap() - .as_str() - .parse() - .unwrap(); - let recovery_silo = RecoverySiloConfig { - silo_name: silo_name.clone(), - user_name: user_name.clone(), - user_password_hash, - }; + internal_dns::ServiceName::Nexus, + sled_id, + ); - let tls_certificates = initial_cert.into_iter().collect(); + self.nexus_internal = Some(nexus_internal); + self.nexus_internal_addr = Some(nexus_internal_addr); + } - let server = N::start( - nexus_internal, - &config, - vec![dns_service_internal, dns_service_external, nexus_service], - &external_dns_zone_name, - recovery_silo, - tls_certificates, - ) - .await; - - let external_server_addr = server.get_http_server_external_address().await; - let internal_server_addr = server.get_http_server_internal_address().await; - - let testctx_external = ClientTestContext::new( - external_server_addr, - logctx.log.new(o!("component" => "external client test context")), - ); - let testctx_internal = ClientTestContext::new( - internal_server_addr, - logctx.log.new(o!("component" => "internal client test context")), - ); - - // Set Nexus' shared resolver to point to the simulated sled agent's - // internal DNS server - server - .set_resolver( - internal_dns::resolver::Resolver::new_from_addrs( - logctx.log.new(o!("component" => "DnsResolver")), - vec![*sled_agent.dns_server.local_address()], + pub async fn populate_internal_dns(&mut self) -> DnsConfigParams { + let log = &self.logctx.log; + debug!(log, "Populating Internal DNS"); + + // Populate the internal DNS system with all known DNS records + let internal_dns_address = self + .internal_dns + .as_ref() + .expect("Must start internal DNS server first") + .dropshot_server + .local_addr(); + let dns_config_client = dns_service_client::Client::new( + &format!("http://{}", internal_dns_address), + log.clone(), + ); + let dns_config = + self.rack_init_builder.internal_dns_config.clone().build(); + dns_config_client.dns_config_put(&dns_config).await.expect( + "Failed to send initial DNS records to internal DNS server", + ); + dns_config + } + + // Perform RSS handoff + pub async fn start_nexus_external( + &mut self, + dns_config: DnsConfigParams, + tls_certificates: Vec, + ) { + let log = &self.logctx.log; + debug!(log, "Starting Nexus (external API)"); + + // Create a recovery silo + let external_dns_zone_name = + internal_dns::names::DNS_ZONE_EXTERNAL_TESTING.to_string(); + let silo_name: Name = "test-suite-silo".parse().unwrap(); + let user_name = + UserId::try_from("test-privileged".to_string()).unwrap(); + let user_password_hash = omicron_passwords::Hasher::default() + .create_password( + &omicron_passwords::Password::new(TEST_SUITE_PASSWORD).unwrap(), ) - .unwrap(), + .unwrap() + .as_str() + .parse() + .unwrap(); + let recovery_silo = RecoverySiloConfig { + silo_name: silo_name.clone(), + user_name: user_name.clone(), + user_password_hash, + }; + + // Handoff all known service information to Nexus + let server = N::start( + self.nexus_internal + .take() + .expect("Must launch internal nexus first"), + &self.config, + self.rack_init_builder.services.clone(), + // NOTE: We should probably hand off + // "self.rack_init_builder.datasets" here, but Nexus won't be happy + // if we pass it right now: + // + // - When we "call .add_dataset(...)", we need to keep track of + // which zpool the dataset is coming from. For these synthetic + // environments, we make this value up. + // - When we tell Nexus about datasets, we need to provide the + // parent zpool UUID, which must be known to Nexus's database. + // - The sled agent we're creating to run alongside this test DOES + // create synthetic zpools on boot, but + // (a) They're not the same zpools we're making up when we start + // Clickhouse / CRDB (we're basically making distinct calls to + // Uuid::new_v4()). + // (b) These sled-agent-created zpools are registered with Nexus + // asynchronously, and we're not making any effort (currently) to + // wait for them to be known to Nexus. + vec![], + dns_config, + &external_dns_zone_name, + recovery_silo, + tls_certificates, ) .await; - // Set up an Oximeter collector server - let collector_id = Uuid::parse_str(OXIMETER_UUID).unwrap(); - let oximeter = start_oximeter( - log.new(o!("component" => "oximeter")), - internal_server_addr, - clickhouse.port(), - collector_id, - ) - .await - .unwrap(); + let external_server_addr = + server.get_http_server_external_address().await; + let internal_server_addr = + server.get_http_server_internal_address().await; + let testctx_external = ClientTestContext::new( + external_server_addr, + self.logctx + .log + .new(o!("component" => "external client test context")), + ); + let testctx_internal = ClientTestContext::new( + internal_server_addr, + self.logctx + .log + .new(o!("component" => "internal client test context")), + ); + + self.external_dns_zone_name = Some(external_dns_zone_name); + self.external_client = Some(testctx_external); + self.internal_client = Some(testctx_internal); + self.silo_name = Some(silo_name); + self.user_name = Some(user_name); + self.server = Some(server); + } + + pub async fn start_sled(&mut self, sim_mode: sim::SimMode) { + let nexus_address = + self.nexus_internal_addr.expect("Must launch Nexus first"); + + // Set up a single sled agent. + let sa_id = Uuid::parse_str(SLED_AGENT_UUID).unwrap(); + let tempdir = camino_tempfile::tempdir().unwrap(); + let sled_agent = start_sled_agent( + self.logctx.log.new(o!( + "component" => "omicron_sled_agent::sim::Server", + "sled_id" => sa_id.to_string(), + )), + nexus_address, + sa_id, + tempdir.path(), + sim_mode, + ) + .await + .expect("Failed to start sled agent"); + + self.sled_agent = Some(sled_agent); + self.sled_agent_storage = Some(tempdir); + } + + // Set up the Crucible Pantry on an existing Sled Agent. + pub async fn start_crucible_pantry(&mut self) { + let sled_agent = self + .sled_agent + .as_mut() + .expect("Cannot start pantry without first starting sled agent"); + + let pantry = sled_agent.start_pantry().await; + let address = pantry.addr(); + + let SocketAddr::V6(address) = address else { + panic!("Expected IPv6 Pantry Address"); + }; + + let sled_id = Uuid::parse_str(SLED_AGENT_UUID).unwrap(); + self.rack_init_builder.add_service( + address, + ServiceKind::CruciblePantry, + internal_dns::ServiceName::CruciblePantry, + sled_id, + ); + } + + // Set up an external DNS server. + pub async fn start_external_dns(&mut self) { + let log = self.logctx.log.new(o!("component" => "external_dns_server")); + let sled_id = Uuid::parse_str(SLED_AGENT_UUID).unwrap(); - // Set up a test metric producer server - let producer_id = Uuid::parse_str(PRODUCER_UUID).unwrap(); - let producer = - start_producer_server(internal_server_addr, producer_id).await.unwrap(); - register_test_producer(&producer).unwrap(); - - ControlPlaneTestContext { - start_time, - server, - external_client: testctx_external, - internal_client: testctx_internal, - database, - clickhouse, - sled_agent_storage: tempdir, - sled_agent, - oximeter, - producer, - logctx, - dendrite, - external_dns_zone_name, - external_dns_server, - external_dns_config_server, - external_dns_resolver, - silo_name, - user_name, + let dns = dns_server::InMemoryServer::new(&log).await.unwrap(); + + let SocketAddr::V6(dns_address) = *dns.server.local_address() else { + panic!("Unsupported IPv4 DNS address"); + }; + let SocketAddr::V6(dropshot_address) = dns.dropshot_server.local_addr() else { + panic!("Unsupported IPv4 Dropshot address"); + }; + + self.rack_init_builder.add_service( + dropshot_address, + ServiceKind::ExternalDns { + external_address: (*dns_address.ip()).into(), + nic: ServiceNic { + id: Uuid::new_v4(), + name: "external-dns".parse().unwrap(), + ip: DNS_OPTE_IPV4_SUBNET + .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1) + .unwrap() + .into(), + mac: MacAddr::random_system(), + }, + }, + internal_dns::ServiceName::ExternalDns, + sled_id, + ); + self.external_dns = Some(dns); + } + + // Set up an internal DNS server. + pub async fn start_internal_dns(&mut self) { + let log = self.logctx.log.new(o!("component" => "internal_dns_server")); + let sled_id = Uuid::parse_str(SLED_AGENT_UUID).unwrap(); + let dns = dns_server::InMemoryServer::new(&log).await.unwrap(); + + let SocketAddr::V6(address) = dns.dropshot_server.local_addr() else { + panic!("Unsupported IPv4 DNS address"); + }; + self.rack_init_builder.add_service( + address, + ServiceKind::InternalDns, + internal_dns::ServiceName::InternalDns, + sled_id, + ); + + self.internal_dns = Some(dns); + } + + pub fn build(self) -> ControlPlaneTestContext { + ControlPlaneTestContext { + start_time: self.start_time, + server: self.server.unwrap(), + external_client: self.external_client.unwrap(), + internal_client: self.internal_client.unwrap(), + database: self.database.unwrap(), + clickhouse: self.clickhouse.unwrap(), + sled_agent_storage: self.sled_agent_storage.unwrap(), + sled_agent: self.sled_agent.unwrap(), + oximeter: self.oximeter.unwrap(), + producer: self.producer.unwrap(), + logctx: self.logctx, + dendrite: self.dendrite.unwrap(), + external_dns_zone_name: self.external_dns_zone_name.unwrap(), + external_dns: self.external_dns.unwrap(), + internal_dns: self.internal_dns.unwrap(), + silo_name: self.silo_name.unwrap(), + user_name: self.user_name.unwrap(), + } + } + + pub async fn teardown(self) { + if let Some(server) = self.server { + server.close().await; + } + if let Some(mut database) = self.database { + database.cleanup().await.unwrap(); + } + if let Some(mut clickhouse) = self.clickhouse { + clickhouse.cleanup().await.unwrap(); + } + if let Some(sled_agent) = self.sled_agent { + sled_agent.http_server.close().await.unwrap(); + } + if let Some(oximeter) = self.oximeter { + oximeter.close().await.unwrap(); + } + if let Some(producer) = self.producer { + producer.close().await.unwrap(); + } + if let Some(mut dendrite) = self.dendrite { + dendrite.cleanup().await.unwrap(); + } + self.logctx.cleanup_successful(); } } +pub async fn test_setup_with_config( + test_name: &str, + config: &mut omicron_common::nexus_config::Config, + sim_mode: sim::SimMode, + initial_cert: Option, +) -> ControlPlaneTestContext { + let mut builder = + ControlPlaneTestContextBuilder::::new(test_name, config); + + builder.start_crdb().await; + builder.start_clickhouse().await; + builder.start_dendrite().await; + builder.start_internal_dns().await; + builder.start_external_dns().await; + builder.start_nexus_internal().await; + builder.start_sled(sim_mode).await; + builder.start_crucible_pantry().await; + + // Give Nexus necessary information to find the Crucible Pantry + let dns_config = builder.populate_internal_dns().await; + + builder + .start_nexus_external(dns_config, initial_cert.into_iter().collect()) + .await; + + builder.start_oximeter().await; + builder.start_producer_server().await; + + builder.build() +} + pub async fn start_sled_agent( log: Logger, nexus_address: SocketAddr, @@ -409,9 +790,8 @@ pub async fn start_sled_agent( reservoir_ram: TEST_RESERVOIR_RAM, }, }; - - let (server, _rack_init_request) = - sim::Server::start(&config, &log, &sim::RssArgs::default()).await?; + let server = + sim::Server::start(&config, &log).await.map_err(|e| e.to_string())?; Ok(server) } diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index cd17bdc175..3154957e58 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -51,8 +51,9 @@ request_body_max_bytes = 8388608 bind_address = "127.0.0.1:0" request_body_max_bytes = 8388608 -[deployment.subnet] -net = "fd00:1122:3344:0100::/56" +[deployment.internal_dns] +type = "from_subnet" +subnet.net = "fd00:1122:3344:0100::/56" # # NOTE: for the test suite, the database URL will be replaced with one diff --git a/nexus/tests/integration_tests/certificates.rs b/nexus/tests/integration_tests/certificates.rs index 7b07db27b7..a59de58466 100644 --- a/nexus/tests/integration_tests/certificates.rs +++ b/nexus/tests/integration_tests/certificates.rs @@ -393,7 +393,7 @@ async fn test_silo_certificates() { // that was created when that Silo was created. We'll use this session to // create the other Silos and their users. let resolver = Arc::new( - CustomDnsResolver::new(*cptestctx.external_dns_server.local_address()) + CustomDnsResolver::new(*cptestctx.external_dns.server.local_address()) .unwrap(), ); let session_token = oxide_client::login( diff --git a/nexus/tests/integration_tests/silos.rs b/nexus/tests/integration_tests/silos.rs index 4562f43b43..2b30f6bcf5 100644 --- a/nexus/tests/integration_tests/silos.rs +++ b/nexus/tests/integration_tests/silos.rs @@ -2143,7 +2143,10 @@ pub async fn verify_silo_dns_name( wait_for_condition( || async { let found = match cptestctx - .external_dns_resolver + .external_dns + .resolver() + .await + .expect("Failed to create external DNS resolver") .ipv4_lookup(&dns_name) .await { diff --git a/sled-agent/src/bin/sled-agent-sim.rs b/sled-agent/src/bin/sled-agent-sim.rs index 20418eb703..05f8519f9c 100644 --- a/sled-agent/src/bin/sled-agent-sim.rs +++ b/sled-agent/src/bin/sled-agent-sim.rs @@ -17,8 +17,8 @@ use omicron_common::cmd::fatal; use omicron_common::cmd::CmdError; use omicron_sled_agent::sim::RssArgs; use omicron_sled_agent::sim::{ - run_server, Config, ConfigHardware, ConfigStorage, ConfigUpdates, - ConfigZpool, SimMode, + run_standalone_server, Config, ConfigHardware, ConfigStorage, + ConfigUpdates, ConfigZpool, SimMode, }; use std::net::SocketAddr; use std::net::SocketAddrV6; @@ -137,5 +137,7 @@ async fn do_run() -> Result<(), CmdError> { tls_certificate, }; - run_server(&config, &rss_args).await.map_err(CmdError::Failure) + run_standalone_server(&config, &rss_args) + .await + .map_err(|e| CmdError::Failure(e.to_string())) } diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index c2887fc01d..06db30bf52 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -1340,9 +1340,11 @@ impl ServiceManager { // setup. request_body_max_bytes: 10 * 1024 * 1024, }, - subnet: Ipv6Subnet::::new( - sled_info.underlay_address, - ), + internal_dns: nexus_config::InternalDns::FromSubnet { + subnet: Ipv6Subnet::::new( + sled_info.underlay_address, + ), + }, database: nexus_config::Database::FromDns, }; diff --git a/sled-agent/src/sim/mod.rs b/sled-agent/src/sim/mod.rs index 531557a1c2..8a730d5988 100644 --- a/sled-agent/src/sim/mod.rs +++ b/sled-agent/src/sim/mod.rs @@ -18,5 +18,5 @@ mod storage; pub use crate::updates::ConfigUpdates; pub use config::{Config, ConfigHardware, ConfigStorage, ConfigZpool, SimMode}; -pub use server::{run_server, RssArgs, Server}; +pub use server::{run_standalone_server, RssArgs, Server}; pub use sled_agent::SledAgent; diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs index 87387b2547..a8f366ae71 100644 --- a/sled-agent/src/sim/server.rs +++ b/sled-agent/src/sim/server.rs @@ -10,7 +10,7 @@ use super::sled_agent::SledAgent; use super::storage::PantryServer; use crate::nexus::d2n_params; use crate::nexus::NexusClient; -use anyhow::Context; +use anyhow::anyhow; use crucible_agent_client::types::State as RegionState; use internal_dns::ServiceName; use nexus_client::types as NexusTypes; @@ -32,28 +32,23 @@ use uuid::Uuid; /// Packages up a [`SledAgent`], running the sled agent API under a Dropshot /// server wired up to the sled agent pub struct Server { + // Configuration used to start server + config: Config, + log: Logger, + /// underlying sled agent pub sled_agent: Arc, /// dropshot server for the API pub http_server: dropshot::HttpServer>, /// simulated pantry server - pub pantry_server: PantryServer, - /// real internal dns server storage dir - pub dns_server_storage_dir: tempfile::TempDir, - /// real internal dns server - pub dns_server: dns_server::dns_server::ServerHandle, - /// real internal dns dropshot server - pub dns_dropshot_server: - dropshot::HttpServer, + pub pantry_server: Option, } impl Server { - /// Start a SledAgent server pub async fn start( config: &Config, log: &Logger, - rss_args: &RssArgs, - ) -> Result<(Server, NexusTypes::RackInitializationRequest), String> { + ) -> Result { info!(log, "setting up sled agent server"); let client_log = log.new(o!("component" => "NexusClient")); @@ -61,7 +56,6 @@ impl Server { &format!("http://{}", config.nexus_address), client_log, )); - let sa_log = log.new(o!( "component" => "SledAgent", "server" => config.id.clone().to_string() @@ -81,7 +75,7 @@ impl Server { sled_agent.clone(), &dropshot_log, ) - .map_err(|error| format!("initializing server: {}", error))? + .map_err(|error| anyhow!("initializing server: {}", error))? .start(); // Notify the control plane that we're up, and continue trying this @@ -93,7 +87,7 @@ impl Server { let sa_address = http_server.local_addr(); let notify_nexus = || async { debug!(log, "contacting server nexus"); - (nexus_client + nexus_client .sled_agent_put( &config.id, &NexusTypes::SledAgentStartupInfo { @@ -120,7 +114,7 @@ impl Server { .unwrap(), }, ) - .await) + .await .map_err(BackoffError::transient) }; let log_notification_failure = |error, delay| { @@ -140,7 +134,7 @@ impl Server { // Crucible dataset for each. This emulates the setup we expect to have // on the physical rack. for zpool in &config.storage.zpools { - let zpool_id = uuid::Uuid::new_v4(); + let zpool_id = Uuid::new_v4(); let vendor = "synthetic-vendor".to_string(); let serial = format!("synthetic-serial-{zpool_id}"); let model = "synthetic-model".to_string(); @@ -155,7 +149,7 @@ impl Server { sled_agent .create_zpool(zpool_id, vendor, serial, model, zpool.size) .await; - let dataset_id = uuid::Uuid::new_v4(); + let dataset_id = Uuid::new_v4(); let address = sled_agent.create_crucible_dataset(zpool_id, dataset_id).await; @@ -177,204 +171,26 @@ impl Server { .await; } + Ok(Server { + config: config.clone(), + log: log.clone(), + sled_agent, + http_server, + pantry_server: None, + }) + } + + /// Starts the pantry service and add it to the DNS config builder + pub async fn start_pantry(&mut self) -> &PantryServer { // Create the simulated Pantry let pantry_server = PantryServer::new( - log.new(o!("kind" => "pantry")), - config.storage.ip, - sled_agent.clone(), + self.log.new(o!("kind" => "pantry")), + self.config.storage.ip, + self.sled_agent.clone(), ) .await; - - // Start the internal DNS server, insert the simulated Pantry DNS - // record - let dns_server_storage_dir = - tempfile::tempdir().map_err(|e| e.to_string())?; - - let dns_log = log.new(o!("kind" => "dns")); - - let store = dns_server::storage::Store::new( - log.new(o!("component" => "store")), - &dns_server::storage::Config { - keep_old_generations: 3, - storage_path: dns_server_storage_dir - .path() - .to_string_lossy() - .to_string() - .into(), - }, - ) - .context("initializing DNS storage") - .map_err(|e| e.to_string())?; - - let (dns_server, dns_dropshot_server) = dns_server::start_servers( - dns_log, - store, - &dns_server::dns_server::Config { - bind_address: "[::1]:0".parse().unwrap(), - }, - &dropshot::ConfigDropshot { - bind_address: "[::1]:0".parse().unwrap(), - ..Default::default() - }, - ) - .await - .map_err(|e| e.to_string())?; - - // Insert SRV and AAAA record for Crucible Pantry - let mut dns = internal_dns::DnsConfigBuilder::new(); - let pantry_zone_id = pantry_server.server.app_private().id; - let pantry_addr = match pantry_server.addr() { - SocketAddr::V6(v6) => v6, - SocketAddr::V4(_) => { - panic!("pantry address must be IPv6"); - } - }; - let pantry_zone = dns - .host_zone(pantry_zone_id, *pantry_addr.ip()) - .expect("failed to set up DNS"); - dns.service_backend_zone( - ServiceName::CruciblePantry, - &pantry_zone, - pantry_addr.port(), - ) - .expect("failed to set up DNS"); - - let dns_config = dns.build(); - let dns_config_client = dns_service_client::Client::new( - &format!("http://{}", dns_dropshot_server.local_addr()), - log.clone(), - ); - dns_config_client - .dns_config_put(&dns_config) - .await - .context("initializing DNS") - .map_err(|e| e.to_string())?; - - // Record the internal DNS server as though RSS had provisioned it so - // that Nexus knows about it. - if let SocketAddr::V4(_) = dns_server.local_address() { - panic!("expected internal DNS server to have IPv6 address"); - } - let http_bound = match dns_dropshot_server.local_addr() { - SocketAddr::V4(_) => panic!( - "expected internal DNS config (HTTP) server to have IPv6 address" - ), - SocketAddr::V6(a) => a, - }; - let mut services = vec![NexusTypes::ServicePutRequest { - address: http_bound.to_string(), - kind: NexusTypes::ServiceKind::InternalDns, - service_id: Uuid::new_v4(), - sled_id: config.id, - zone_id: Some(Uuid::new_v4()), - }]; - - let mut internal_services_ip_pool_ranges = vec![]; - if let Some(nexus_external_addr) = rss_args.nexus_external_addr { - let ip = nexus_external_addr.ip(); - - services.push(NexusTypes::ServicePutRequest { - address: config.nexus_address.to_string(), - kind: NexusTypes::ServiceKind::Nexus { - external_address: ip, - nic: NexusTypes::ServiceNic { - id: Uuid::new_v4(), - name: "nexus".parse().unwrap(), - ip: NEXUS_OPTE_IPV4_SUBNET - .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1) - .unwrap() - .into(), - mac: MacAddr::random_system(), - }, - }, - service_id: Uuid::new_v4(), - sled_id: config.id, - zone_id: Some(Uuid::new_v4()), - }); - - internal_services_ip_pool_ranges.push(match ip { - IpAddr::V4(addr) => { - IpRange::V4(Ipv4Range { first: addr, last: addr }) - } - IpAddr::V6(addr) => { - IpRange::V6(Ipv6Range { first: addr, last: addr }) - } - }); - } - - if let Some(external_dns_internal_addr) = - rss_args.external_dns_internal_addr - { - let ip = *external_dns_internal_addr.ip(); - services.push(NexusTypes::ServicePutRequest { - address: external_dns_internal_addr.to_string(), - kind: NexusTypes::ServiceKind::ExternalDns { - external_address: ip.into(), - nic: NexusTypes::ServiceNic { - id: Uuid::new_v4(), - name: "external-dns".parse().unwrap(), - ip: DNS_OPTE_IPV4_SUBNET - .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1) - .unwrap() - .into(), - mac: MacAddr::random_system(), - }, - }, - service_id: Uuid::new_v4(), - sled_id: config.id, - zone_id: Some(Uuid::new_v4()), - }); - - internal_services_ip_pool_ranges - .push(IpRange::V6(Ipv6Range { first: ip, last: ip })); - } - - let recovery_silo = NexusTypes::RecoverySiloConfig { - silo_name: "demo-silo".parse().unwrap(), - user_name: "demo-privileged".parse().unwrap(), - // The following is a hash for the password "oxide". This is - // (obviously) only intended for transient deployments in - // development with no sensitive data or resources. You can change - // this value to any other supported hash. The only thing that - // needs to be changed with this hash are the instructions given to - // individuals running this program who then want to log in as this - // user. For more on what's supported, see the API docs for this - // type and the specific constraints in the omicron-passwords crate. - user_password_hash: "$argon2id$v=19$m=98304,t=13,p=1$\ - RUlWc0ZxaHo0WFdrN0N6ZQ$S8p52j85GPvMhR/ek3GL0el/oProgTwWpHJZ8lsQQoY" - .parse() - .unwrap(), - }; - - let certs = match &rss_args.tls_certificate { - Some(c) => vec![c.clone()], - None => vec![], - }; - - let rack_init_request = NexusTypes::RackInitializationRequest { - services, - datasets, - internal_services_ip_pool_ranges, - certs, - internal_dns_zone_config: d2n_params(&dns_config), - external_dns_zone_name: - internal_dns::names::DNS_ZONE_EXTERNAL_TESTING.to_owned(), - recovery_silo, - external_port_count: 1, - rack_network_config: None, - }; - - Ok(( - Server { - sled_agent, - http_server, - pantry_server, - dns_server_storage_dir, - dns_server, - dns_dropshot_server, - }, - rack_init_request, - )) + self.pantry_server = Some(pantry_server); + self.pantry_server.as_ref().unwrap() } /// Wait for the given server to shut down @@ -382,8 +198,8 @@ impl Server { /// Note that this doesn't initiate a graceful shutdown, so if you call this /// immediately after calling `start()`, the program will block indefinitely /// or until something else initiates a graceful shutdown. - pub async fn wait_for_finish(self) -> Result<(), String> { - self.http_server.await + pub async fn wait_for_finish(self) -> Result<(), anyhow::Error> { + self.http_server.await.map_err(|err| anyhow!(err)) } } @@ -391,7 +207,7 @@ async fn handoff_to_nexus( log: &Logger, config: &Config, request: &NexusTypes::RackInitializationRequest, -) -> Result<(), String> { +) -> Result<(), anyhow::Error> { let nexus_client = NexusClient::new( &format!("http://{}", config.nexus_address), log.new(o!("component" => "NexusClient")), @@ -412,8 +228,7 @@ async fn handoff_to_nexus( notify_nexus, log_failure, ) - .await - .map_err(|e| e.to_string())?; + .await?; Ok(()) } @@ -431,30 +246,191 @@ pub struct RssArgs { pub tls_certificate: Option, } -/// Run an instance of the `Server` -pub async fn run_server( +/// Run an instance of the `Server` which is able to handoff to Nexus. +/// +/// This starts: +/// - A Sled Agent +/// - An Internal DNS server +/// - A Crucible Pantry +/// +/// And performs the following actions, similar to the Rack Setup Service: +/// - Populates the Internal DNS server with records +/// - Performs handoff to Nexus +pub async fn run_standalone_server( config: &Config, rss_args: &RssArgs, -) -> Result<(), String> { +) -> Result<(), anyhow::Error> { let (drain, registration) = slog_dtrace::with_drain( config .log .to_logger("sled-agent") - .map_err(|message| format!("initializing logger: {}", message))?, + .map_err(|message| anyhow!("initializing logger: {}", message))?, ); let log = slog::Logger::root(drain.fuse(), slog::o!()); if let slog_dtrace::ProbeRegistration::Failed(e) = registration { let msg = format!("failed to register DTrace probes: {}", e); error!(log, "{}", msg); - return Err(msg); + return Err(anyhow!(msg)); } else { debug!(log, "registered DTrace probes"); } - let (server, rack_init_request) = - Server::start(config, &log, rss_args).await?; + // Start the sled agent + let mut server = Server::start(config, &log).await?; info!(log, "sled agent started successfully"); + // Start the Internal DNS server + let dns = dns_server::InMemoryServer::new(&log).await?; + let mut dns_config_builder = internal_dns::DnsConfigBuilder::new(); + + // Start the Crucible Pantry + let pantry_server = server.start_pantry().await; + + // Insert SRV and AAAA record for Crucible Pantry + let pantry_zone_id = pantry_server.server.app_private().id; + let pantry_addr = match pantry_server.addr() { + SocketAddr::V6(v6) => v6, + SocketAddr::V4(_) => { + panic!("pantry address must be IPv6"); + } + }; + let pantry_zone = dns_config_builder + .host_zone(pantry_zone_id, *pantry_addr.ip()) + .expect("failed to set up DNS"); + dns_config_builder + .service_backend_zone( + ServiceName::CruciblePantry, + &pantry_zone, + pantry_addr.port(), + ) + .expect("failed to set up DNS"); + + // Initialize the internal DNS entries + let dns_config = dns_config_builder.build(); + dns.initialize_with_config(&log, &dns_config).await?; + + // Record the internal DNS server as though RSS had provisioned it so + // that Nexus knows about it. + let http_bound = match dns.dropshot_server.local_addr() { + SocketAddr::V4(_) => panic!("did not expect v4 address"), + SocketAddr::V6(a) => a, + }; + let mut services = vec![NexusTypes::ServicePutRequest { + address: http_bound.to_string(), + kind: NexusTypes::ServiceKind::InternalDns, + service_id: Uuid::new_v4(), + sled_id: config.id, + zone_id: Some(Uuid::new_v4()), + }]; + + let mut internal_services_ip_pool_ranges = vec![]; + if let Some(nexus_external_addr) = rss_args.nexus_external_addr { + let ip = nexus_external_addr.ip(); + + services.push(NexusTypes::ServicePutRequest { + address: config.nexus_address.to_string(), + kind: NexusTypes::ServiceKind::Nexus { + external_address: ip, + nic: NexusTypes::ServiceNic { + id: Uuid::new_v4(), + name: "nexus".parse().unwrap(), + ip: NEXUS_OPTE_IPV4_SUBNET + .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1) + .unwrap() + .into(), + mac: MacAddr::random_system(), + }, + }, + service_id: Uuid::new_v4(), + sled_id: config.id, + zone_id: Some(Uuid::new_v4()), + }); + + internal_services_ip_pool_ranges.push(match ip { + IpAddr::V4(addr) => { + IpRange::V4(Ipv4Range { first: addr, last: addr }) + } + IpAddr::V6(addr) => { + IpRange::V6(Ipv6Range { first: addr, last: addr }) + } + }); + } + + if let Some(external_dns_internal_addr) = + rss_args.external_dns_internal_addr + { + let ip = *external_dns_internal_addr.ip(); + services.push(NexusTypes::ServicePutRequest { + address: external_dns_internal_addr.to_string(), + kind: NexusTypes::ServiceKind::ExternalDns { + external_address: ip.into(), + nic: NexusTypes::ServiceNic { + id: Uuid::new_v4(), + name: "external-dns".parse().unwrap(), + ip: DNS_OPTE_IPV4_SUBNET + .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1) + .unwrap() + .into(), + mac: MacAddr::random_system(), + }, + }, + service_id: Uuid::new_v4(), + sled_id: config.id, + zone_id: Some(Uuid::new_v4()), + }); + } + + let recovery_silo = NexusTypes::RecoverySiloConfig { + silo_name: "demo-silo".parse().unwrap(), + user_name: "demo-privileged".parse().unwrap(), + // The following is a hash for the password "oxide". This is + // (obviously) only intended for transient deployments in + // development with no sensitive data or resources. You can change + // this value to any other supported hash. The only thing that + // needs to be changed with this hash are the instructions given to + // individuals running this program who then want to log in as this + // user. For more on what's supported, see the API docs for this + // type and the specific constraints in the nexus-passwords crate. + user_password_hash: "$argon2id$v=19$m=98304,t=13,p=1$\ + RUlWc0ZxaHo0WFdrN0N6ZQ$S8p52j85GPvMhR/ek3GL0el/oProgTwWpHJZ8lsQQoY" + .parse() + .unwrap(), + }; + + let mut datasets = vec![]; + for zpool_id in server.sled_agent.get_zpools().await { + for (dataset_id, address) in + server.sled_agent.get_datasets(zpool_id).await + { + datasets.push(NexusTypes::DatasetCreateRequest { + zpool_id, + dataset_id, + request: NexusTypes::DatasetPutRequest { + address: address.to_string(), + kind: NexusTypes::DatasetKind::Crucible, + }, + }); + } + } + + let certs = match &rss_args.tls_certificate { + Some(c) => vec![c.clone()], + None => vec![], + }; + + let rack_init_request = NexusTypes::RackInitializationRequest { + services, + datasets, + internal_services_ip_pool_ranges, + certs, + internal_dns_zone_config: d2n_params(&dns_config), + external_dns_zone_name: internal_dns::names::DNS_ZONE_EXTERNAL_TESTING + .to_owned(), + recovery_silo, + external_port_count: 1, + rack_network_config: None, + }; + handoff_to_nexus(&log, &config, &rack_init_request).await?; info!(log, "Handoff to Nexus is complete"); diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index b8b50c0122..5f9ab3ca97 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -480,6 +480,17 @@ impl SledAgent { .await; } + pub async fn get_zpools(&self) -> Vec { + self.storage.lock().await.get_all_zpools() + } + + pub async fn get_datasets( + &self, + zpool_id: Uuid, + ) -> Vec<(Uuid, SocketAddr)> { + self.storage.lock().await.get_all_datasets(zpool_id) + } + /// Adds a Zpool to the simulated sled agent. pub async fn create_zpool( &self, diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index e8a668881a..d28437975d 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -557,6 +557,20 @@ impl Storage { dataset.address() } + pub fn get_all_zpools(&self) -> Vec { + self.zpools.keys().cloned().collect() + } + + pub fn get_all_datasets(&self, zpool_id: Uuid) -> Vec<(Uuid, SocketAddr)> { + let zpool = self.zpools.get(&zpool_id).expect("Zpool does not exist"); + + zpool + .datasets + .iter() + .map(|(id, server)| (*id, server.address())) + .collect() + } + pub async fn get_dataset( &self, zpool_id: Uuid, From c41ff8a4487c5396669d3d2ce32315227cb3b2f3 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 16 Jun 2023 09:19:17 -0700 Subject: [PATCH 02/57] [nexus] Resilience to arbitrary boot order --- nexus/src/app/mod.rs | 30 ++-- nexus/src/context.rs | 27 +++- .../tests/integration_tests/initialization.rs | 129 ++++++++++++++++++ nexus/tests/integration_tests/mod.rs | 1 + 4 files changed, 166 insertions(+), 21 deletions(-) create mode 100644 nexus/tests/integration_tests/initialization.rs diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 60c91b77c5..4342e7f745 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -178,13 +178,22 @@ impl Nexus { { (dpd_address.ip().to_string(), dpd_address.port()) } else { - let addr = resolver - .lock() - .await - .lookup_socket_v6(ServiceName::Dendrite) - .await - .map_err(|e| format!("Cannot access Dendrite address: {e}"))?; - (addr.ip().to_string(), addr.port()) + loop { + match resolver + .lock() + .await + .lookup_socket_v6(ServiceName::Dendrite) + .await + .map_err(|e| format!("Cannot access Dendrite address: {e}")) + { + Ok(addr) => break (addr.ip().to_string(), addr.port()), + Err(e) => { + warn!(log, "Failed to access Dendrite address: {e}"); + tokio::time::sleep(std::time::Duration::from_secs(1)) + .await; + } + } + } }; let dpd_client = Arc::new(dpd_client::Client::new( &format!("http://[{dpd_host}]:{dpd_port}"), @@ -655,13 +664,6 @@ impl Nexus { db::lookup::LookupPath::new(opctx, &self.db_datastore) } - pub async fn set_resolver( - &self, - resolver: internal_dns::resolver::Resolver, - ) { - *self.resolver.lock().await = resolver; - } - pub async fn resolver(&self) -> internal_dns::resolver::Resolver { let resolver = self.resolver.lock().await; resolver.clone() diff --git a/nexus/src/context.rs b/nexus/src/context.rs index 08a99ea9a3..7b60666463 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -18,7 +18,7 @@ use authn::external::HttpAuthnScheme; use chrono::Duration; use internal_dns::ServiceName; use nexus_db_queries::context::{OpContext, OpKind}; -use omicron_common::address::{Ipv6Subnet, AZ_PREFIX, COCKROACH_PORT}; +use omicron_common::address::{Ipv6Subnet, AZ_PREFIX}; use omicron_common::nexus_config; use omicron_common::postgres_config::PostgresConfigWithUrl; use oximeter::types::ProducerRegistry; @@ -158,14 +158,27 @@ impl ServerContext { nexus_config::Database::FromUrl { url } => url.clone(), nexus_config::Database::FromDns => { info!(log, "Accessing DB url from DNS"); - let address = resolver - .lookup_ipv6(ServiceName::Cockroach) - .await - .map_err(|e| format!("Failed to lookup IP: {}", e))?; + let address = loop { + match resolver + .lookup_socket_v6(ServiceName::Cockroach) + .await + { + Ok(address) => break address, + Err(e) => { + warn!( + log, + "Failed to lookup cockroach address: {e}" + ); + tokio::time::sleep(std::time::Duration::from_secs( + 1, + )) + .await; + } + } + }; info!(log, "DB address: {}", address); PostgresConfigWithUrl::from_str(&format!( - "postgresql://root@[{}]:{}/omicron?sslmode=disable", - address, COCKROACH_PORT + "postgresql://root@{address}/omicron?sslmode=disable", )) .map_err(|e| format!("Cannot parse Postgres URL: {}", e))? } diff --git a/nexus/tests/integration_tests/initialization.rs b/nexus/tests/integration_tests/initialization.rs new file mode 100644 index 0000000000..7aa4b54e9f --- /dev/null +++ b/nexus/tests/integration_tests/initialization.rs @@ -0,0 +1,129 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use nexus_test_interface::NexusServer; +use nexus_test_utils::{load_test_config, ControlPlaneTestContextBuilder}; + +#[tokio::test] +async fn test_nexus_boots_before_cockroach() { + let mut config = load_test_config(); + + let mut builder = + ControlPlaneTestContextBuilder::::new( + "test_nexus_boots_before_cockroach", + &mut config, + ); + + let log = builder.logctx.log.new(o!("component" => "test")); + + builder.start_dendrite().await; + builder.start_internal_dns().await; + builder.start_external_dns().await; + + // Start Nexus, referencing the internal DNS system. + // + // This call won't return successfully until we can... + // 1. Contact the internal DNS system to find Cockroach + // 2. Contact Cockroach to ensure the database has been populated + builder.config.deployment.database = + omicron_common::nexus_config::Database::FromDns; + builder.config.deployment.internal_dns = + omicron_common::nexus_config::InternalDns::FromAddress { + address: *builder + .internal_dns + .as_ref() + .expect("Must start Internal DNS before acquiring an address") + .server + .local_address(), + }; + let nexus_config = builder.config.clone(); + let nexus_log = log.clone(); + let nexus_handle = tokio::task::spawn(async move { + info!(nexus_log, "Test: Trying to start Nexus (internal)"); + omicron_nexus::Server::start_internal(&nexus_config, &nexus_log).await; + info!(nexus_log, "Test: Started Nexus (internal)"); + }); + + // Start Cockroach and populate the Internal DNS system with CRDB records. + // + // This is necessary for the prior call to "start Nexus" to succeed. + info!(log, "Starting CRDB"); + builder.start_crdb().await; + info!(log, "Started CRDB"); + + info!(log, "Populating internal DNS records"); + builder.populate_internal_dns().await; + info!(log, "Populated internal DNS records"); + + // Now that Cockroach has started, we expect the request to succeed. + nexus_handle.await.expect("Test: Task starting Nexus has failed"); + + builder.teardown().await; +} + +#[tokio::test] +async fn test_nexus_boots_before_dendrite() { + let mut config = load_test_config(); + + let mut builder = + ControlPlaneTestContextBuilder::::new( + "test_nexus_boots_before_dendrite", + &mut config, + ); + + let log = builder.logctx.log.new(o!("component" => "test")); + + builder.start_crdb().await; + builder.start_internal_dns().await; + builder.start_external_dns().await; + + // Start Nexus, referencing the internal DNS system. + // + // This call won't return successfully until we can... + // 1. Contact the internal DNS system to find Dendrite + // 2. Contact Dendrite + builder.config.deployment.database = + omicron_common::nexus_config::Database::FromUrl { + url: builder + .database + .as_ref() + .expect("Must start CRDB first") + .pg_config() + .clone(), + }; + builder.config.pkg.dendrite = + omicron_common::nexus_config::DpdConfig { address: None }; + builder.config.deployment.internal_dns = + omicron_common::nexus_config::InternalDns::FromAddress { + address: *builder + .internal_dns + .as_ref() + .expect("Must start Internal DNS before acquiring an address") + .server + .local_address(), + }; + let nexus_config = builder.config.clone(); + let nexus_log = log.clone(); + let nexus_handle = tokio::task::spawn(async move { + info!(nexus_log, "Test: Trying to start Nexus (internal)"); + omicron_nexus::Server::start_internal(&nexus_config, &nexus_log).await; + info!(nexus_log, "Test: Started Nexus (internal)"); + }); + + // Start Dendrite and populate the Internal DNS system. + // + // This is necessary for the prior call to "start Nexus" to succeed. + info!(log, "Starting Dendrite"); + builder.start_dendrite().await; + info!(log, "Started Dendrite"); + + info!(log, "Populating internal DNS records"); + builder.populate_internal_dns().await; + info!(log, "Populated internal DNS records"); + + // Now that Dendrite has started, we expect the request to succeed. + nexus_handle.await.expect("Test: Task starting Nexus has failed"); + + builder.teardown().await; +} diff --git a/nexus/tests/integration_tests/mod.rs b/nexus/tests/integration_tests/mod.rs index b0231d7480..b07878717b 100644 --- a/nexus/tests/integration_tests/mod.rs +++ b/nexus/tests/integration_tests/mod.rs @@ -13,6 +13,7 @@ mod console_api; mod device_auth; mod disks; mod images; +mod initialization; mod instances; mod ip_pools; mod loopback_address; From 4ba64cf84ea50ee7dbeeb90b86eb3482505b8e2d Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 16 Jun 2023 09:39:01 -0700 Subject: [PATCH 03/57] Handler Task Mode --- dns-server/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/dns-server/src/lib.rs b/dns-server/src/lib.rs index bc697d98c9..a7d1edf54a 100644 --- a/dns-server/src/lib.rs +++ b/dns-server/src/lib.rs @@ -130,6 +130,7 @@ impl InMemoryServer { &dropshot::ConfigDropshot { bind_address: "[::1]:0".parse().unwrap(), request_body_max_bytes: 4 * 1024 * 1024, + default_handler_task_mode: dropshot::HandlerTaskMode::Detached, }, ) .await?; From 9e9a4eb78bfeb2941cebf86142d30a31dc8007f9 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 16 Jun 2023 10:56:21 -0700 Subject: [PATCH 04/57] Ipv6 CockroachDB addresses --- test-utils/src/dev/db.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test-utils/src/dev/db.rs b/test-utils/src/dev/db.rs index 7b425565d7..b390c5a3b6 100644 --- a/test-utils/src/dev/db.rs +++ b/test-utils/src/dev/db.rs @@ -55,7 +55,7 @@ const COCKROACHDB_VERSION: &str = /// arguments for the `cockroach start-single-node` command /// /// Without customizations, this will run `cockroach start-single-node --insecure -/// --listen-addr=127.0.0.1:0 --http-addr=:0`. +/// --listen-addr=[::1]:0 --http-addr=:0`. /// /// It's useful to support running this concurrently (as in the test suite). To /// support this, we allow CockroachDB to choose its listening ports. To figure @@ -163,7 +163,7 @@ impl CockroachStarterBuilder { /// Sets the listening port for the PostgreSQL and CockroachDB protocols /// - /// We always listen only on 127.0.0.1. + /// We always listen only on `[::1]`. pub fn listen_port(mut self, listen_port: u16) -> Self { self.listen_port = listen_port; self @@ -221,7 +221,7 @@ impl CockroachStarterBuilder { let listen_url_file = CockroachStarterBuilder::temp_path(&temp_dir, "listen-url"); - let listen_arg = format!("127.0.0.1:{}", self.listen_port); + let listen_arg = format!("[::1]:{}", self.listen_port); self.arg(&store_arg) .arg("--listen-addr") .arg(&listen_arg) @@ -880,7 +880,7 @@ fn make_pg_config( if let Host::Tcp(ip_host) = &hosts[0] { let url = format!( - "postgresql://{}@{}:{}/{}?sslmode=disable", + "postgresql://{}@[{}]:{}/{}?sslmode=disable", COCKROACHDB_USER, ip_host, ports[0], COCKROACHDB_DATABASE ); url.parse::().with_context(|| { @@ -1512,12 +1512,12 @@ mod test { // Success case for make_pg_config() #[test] fn test_make_pg_config_ok() { - let url = "postgresql://root@127.0.0.1:45913?sslmode=disable"; + let url = "postgresql://root@[::1]:45913?sslmode=disable"; let config = make_pg_config(url).expect("failed to parse basic case"); assert_eq!( config.to_string().as_str(), // TODO-security This user should become "omicron" - "postgresql://root@127.0.0.1:45913/omicron?sslmode=disable", + "postgresql://root@[::1]:45913/omicron?sslmode=disable", ); } @@ -1530,7 +1530,7 @@ mod test { // unexpected contents in initial listen URL (wrong db name) let error = make_pg_config( - "postgresql://root@127.0.0.1:45913/foobar?sslmode=disable", + "postgresql://root@[::1]:45913/foobar?sslmode=disable", ) .unwrap_err() .to_string(); @@ -1542,7 +1542,7 @@ mod test { // unexpected contents in initial listen URL (extra param) let error = make_pg_config( - "postgresql://root@127.0.0.1:45913/foobar?application_name=foo", + "postgresql://root@[::1]:45913/foobar?application_name=foo", ) .unwrap_err() .to_string(); From 9d91fb24efc0ccbf550c2c9e73de18316f063a64 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 16 Jun 2023 14:30:00 -0700 Subject: [PATCH 05/57] [sled agent] Clarify LazyNexusClient constructor arguments, cache address --- sled-agent/src/instance.rs | 8 ++++-- sled-agent/src/instance_manager.rs | 16 +++++++---- sled-agent/src/nexus.rs | 44 +++++++++++++++++++++++++----- sled-agent/src/server.rs | 5 ++-- 4 files changed, 55 insertions(+), 18 deletions(-) diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 5648431f40..4233b6eeea 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -1164,9 +1164,11 @@ mod test { 0xfd00, 0x1de, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, ); let port_manager = PortManager::new(log.new(slog::o!()), underlay_ip); - let lazy_nexus_client = - LazyNexusClient::new(log.clone(), std::net::Ipv6Addr::LOCALHOST) - .unwrap(); + let lazy_nexus_client = LazyNexusClient::new_from_subnet( + log.clone(), + std::net::Ipv6Addr::LOCALHOST, + ) + .unwrap(); let instance_manager = InstanceManager::new( log.clone(), lazy_nexus_client.clone(), diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index 3a9ce80c7e..f8bd065ce0 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -416,9 +416,11 @@ mod test { async fn ensure_instance() { let logctx = test_setup_log("ensure_instance"); let log = &logctx.log; - let lazy_nexus_client = - LazyNexusClient::new(log.clone(), std::net::Ipv6Addr::LOCALHOST) - .unwrap(); + let lazy_nexus_client = LazyNexusClient::new_from_subnet( + log.clone(), + std::net::Ipv6Addr::LOCALHOST, + ) + .unwrap(); // Creation of the instance manager incurs some "global" system // checks: cleanup of existing zones + vnics. @@ -532,9 +534,11 @@ mod test { async fn ensure_instance_state_repeatedly() { let logctx = test_setup_log("ensure_instance_repeatedly"); let log = &logctx.log; - let lazy_nexus_client = - LazyNexusClient::new(log.clone(), std::net::Ipv6Addr::LOCALHOST) - .unwrap(); + let lazy_nexus_client = LazyNexusClient::new_from_subnet( + log.clone(), + std::net::Ipv6Addr::LOCALHOST, + ) + .unwrap(); // Instance Manager creation. diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index 1c26cb7b2b..db66297395 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -9,18 +9,18 @@ pub use nexus_client::Client as NexusClient; use internal_dns::resolver::{ResolveError, Resolver}; use internal_dns::ServiceName; -use omicron_common::address::NEXUS_INTERNAL_PORT; use slog::Logger; use std::future::Future; -use std::net::Ipv6Addr; +use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; use std::pin::Pin; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use tokio::sync::mpsc; use tokio::task::JoinHandle; struct Inner { log: Logger, resolver: Resolver, + address: Mutex>, } /// Wrapper around a [`NexusClient`] object, which allows deferring @@ -39,7 +39,10 @@ pub struct LazyNexusClient { } impl LazyNexusClient { - pub fn new(log: Logger, addr: Ipv6Addr) -> Result { + pub fn new_from_subnet( + log: Logger, + addr: Ipv6Addr, + ) -> Result { Ok(Self { inner: Arc::new(Inner { log: log.clone(), @@ -47,19 +50,46 @@ impl LazyNexusClient { log.new(o!("component" => "DnsResolver")), addr, )?, + address: Mutex::new(None), + }), + }) + } + + pub fn new_from_dns( + log: Logger, + dns_addrs: Vec, + ) -> Result { + Ok(Self { + inner: Arc::new(Inner { + log: log.clone(), + resolver: Resolver::new_from_addrs( + log.new(o!("component" => "DnsResolver")), + dns_addrs, + )?, + address: Mutex::new(None), }), }) } pub async fn get_ip(&self) -> Result { - self.inner.resolver.lookup_ipv6(ServiceName::Nexus).await + self.get_addr().await.map(|addr| *addr.ip()) + } + + pub async fn get_addr(&self) -> Result { + if let Some(addr) = self.inner.address.lock().unwrap().as_ref() { + return Ok(*addr); + } + let addr = + self.inner.resolver.lookup_socket_v6(ServiceName::Nexus).await?; + *self.inner.address.lock().unwrap() = Some(addr); + Ok(addr) } pub async fn get(&self) -> Result { - let address = self.get_ip().await?; + let address = self.get_addr().await?; Ok(NexusClient::new( - &format!("http://[{}]:{}", address, NEXUS_INTERNAL_PORT), + &format!("http://{address}"), self.inner.log.clone(), )) } diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index 6b48251b79..e6002292e9 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -44,8 +44,9 @@ impl Server { let client_log = log.new(o!("component" => "NexusClient")); let addr = request.sled_address(); - let lazy_nexus_client = LazyNexusClient::new(client_log, *addr.ip()) - .map_err(|e| e.to_string())?; + let lazy_nexus_client = + LazyNexusClient::new_from_subnet(client_log, *addr.ip()) + .map_err(|e| e.to_string())?; let sled_agent = SledAgent::new( &config, From 65e047323ce4388e30ee87e9972ae6fa86acdc32 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 20 Jun 2023 19:19:27 -0700 Subject: [PATCH 06/57] Update docs around how-to-run-simulated --- dev-tools/src/bin/omicron-dev.rs | 8 ++++++++ docs/how-to-run-simulated.adoc | 7 +++++-- nexus/examples/config.toml | 9 ++++++--- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/dev-tools/src/bin/omicron-dev.rs b/dev-tools/src/bin/omicron-dev.rs index 0589f12a1d..6f8687c672 100644 --- a/dev-tools/src/bin/omicron-dev.rs +++ b/dev-tools/src/bin/omicron-dev.rs @@ -364,6 +364,14 @@ async fn cmd_run_all(args: &RunAllArgs) -> Result<(), anyhow::Error> { "omicron-dev: cockroachdb directory: {}", cptestctx.database.temp_dir().display() ); + println!( + "omicron-dev: internal DNS HTTP: http://{}", + cptestctx.internal_dns.dropshot_server.local_addr() + ); + println!( + "omicron-dev: internal DNS: {}", + cptestctx.internal_dns.server.local_address() + ); println!( "omicron-dev: external DNS name: {}", cptestctx.external_dns_zone_name, diff --git a/docs/how-to-run-simulated.adoc b/docs/how-to-run-simulated.adoc index 5af145bec1..5b0bc70475 100644 --- a/docs/how-to-run-simulated.adoc +++ b/docs/how-to-run-simulated.adoc @@ -76,10 +76,13 @@ omicron-dev: services are running. omicron-dev: nexus external API: 127.0.0.1:12220 omicron-dev: nexus internal API: [::1]:12221 omicron-dev: cockroachdb pid: 7180 -omicron-dev: cockroachdb: postgresql://root@127.0.0.1:54649/omicron?sslmode=disable +omicron-dev: cockroachdb: postgresql://root@[::]:54649/omicron?sslmode=disable omicron-dev: cockroachdb directory: /dangerzone/omicron_tmp/.tmpB8FNBT +omicron-dev: internal DNS HTTP: http://[::1]:33652 +omicron-dev: internal DNS: [::1]:37503 +omicron-dev: external DNS name: oxide-dev.test +omicron-dev: external DNS HTTP: http://[::1]:38374 omicron-dev: external DNS: [::1]:54342 -omicron-dev: external DNS HTTP: [::1]:38374 ---- + . If you use CTRL-C to shut this down, it will gracefully terminate CockroachDB and remove the temporary directory: diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index fbcc4d4a8e..626625ec2c 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -52,13 +52,16 @@ bind_address = "[::1]:12221" request_body_max_bytes = 1048576 [deployment.internal_dns] -type = "from_subnet" -subnet.net = "fd00:1122:3344:0100::/56" +# Example address. +# If you're using `omicron-dev run-all`, this is value is overwritten +# by the address / port created after starting the Internal DNS server. +type = "from_address" +address = "[::1]:5353" [deployment.database] # URL for connecting to the database type = "from_url" -url = "postgresql://root@127.0.0.1:32221/omicron?sslmode=disable" +url = "postgresql://root@[::1]:32221/omicron?sslmode=disable" # Tunable configuration parameters, for testing or experimentation [tunables] From 4046281baa2e950c98a1b1fa7101a04439fb33bb Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 20 Jun 2023 19:20:19 -0700 Subject: [PATCH 07/57] Updated comment --- nexus/src/context.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/nexus/src/context.rs b/nexus/src/context.rs index 08a99ea9a3..b50e58c5b8 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -137,7 +137,11 @@ impl ServerContext { let resolver = match config.deployment.internal_dns { nexus_config::InternalDns::FromSubnet { subnet } => { let az_subnet = Ipv6Subnet::::new(subnet.net().ip()); - info!(log, "Setting up resolver on subnet: {:?}", az_subnet); + info!( + log, + "Setting up resolver using DNS servers for subnet: {:?}", + az_subnet + ); internal_dns::resolver::Resolver::new_from_subnet( log.new(o!("component" => "DnsResolver")), az_subnet, From 93bcca6ee1495c1eaae3feedb0c7fc7ae8cf4884 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 20 Jun 2023 19:21:55 -0700 Subject: [PATCH 08/57] Add logging --- nexus/src/context.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/nexus/src/context.rs b/nexus/src/context.rs index b50e58c5b8..73ba6b8683 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -149,6 +149,11 @@ impl ServerContext { .map_err(|e| format!("Failed to create DNS resolver: {}", e))? } nexus_config::InternalDns::FromAddress { address } => { + info!( + log, + "Setting up resolver using DNS address: {:?}", address + ); + internal_dns::resolver::Resolver::new_from_addrs( log.new(o!("component" => "DnsResolver")), vec![address], From 6a6187ba8528b12992b8a148d3a267843570afe7 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 20 Jun 2023 19:30:11 -0700 Subject: [PATCH 09/57] Add note about test replacement --- nexus/tests/config.test.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 3154957e58..cf83cb656d 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -51,6 +51,10 @@ request_body_max_bytes = 8388608 bind_address = "127.0.0.1:0" request_body_max_bytes = 8388608 +# +# NOTE: for the test suite, the internal DNS address will be replaced with one +# that's started by the test runner. +# [deployment.internal_dns] type = "from_subnet" subnet.net = "fd00:1122:3344:0100::/56" From e340a0439337dc8cf59c2cc5e5f14707c7e34179 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 20 Jun 2023 19:32:33 -0700 Subject: [PATCH 10/57] Update comment about DNS w.r.t. pantry server --- sled-agent/src/sim/server.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs index a8f366ae71..da4a044067 100644 --- a/sled-agent/src/sim/server.rs +++ b/sled-agent/src/sim/server.rs @@ -180,7 +180,7 @@ impl Server { }) } - /// Starts the pantry service and add it to the DNS config builder + /// Starts the pantry service pub async fn start_pantry(&mut self) -> &PantryServer { // Create the simulated Pantry let pantry_server = PantryServer::new( From f80788a0297434b9b5d0bee553d5877adeaacf8b Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 22 Jun 2023 13:06:13 -0700 Subject: [PATCH 11/57] Review feedback --- dev-tools/src/bin/omicron-dev.rs | 8 ++++---- dns-server/src/lib.rs | 14 +++++++------- nexus/test-utils/src/lib.rs | 16 ++++++++-------- nexus/tests/integration_tests/certificates.rs | 6 ++++-- sled-agent/src/sim/server.rs | 5 ++++- 5 files changed, 27 insertions(+), 22 deletions(-) diff --git a/dev-tools/src/bin/omicron-dev.rs b/dev-tools/src/bin/omicron-dev.rs index 6f8687c672..f2e3e6e9f2 100644 --- a/dev-tools/src/bin/omicron-dev.rs +++ b/dev-tools/src/bin/omicron-dev.rs @@ -370,7 +370,7 @@ async fn cmd_run_all(args: &RunAllArgs) -> Result<(), anyhow::Error> { ); println!( "omicron-dev: internal DNS: {}", - cptestctx.internal_dns.server.local_address() + cptestctx.internal_dns.dns_server.local_address() ); println!( "omicron-dev: external DNS name: {}", @@ -382,12 +382,12 @@ async fn cmd_run_all(args: &RunAllArgs) -> Result<(), anyhow::Error> { ); println!( "omicron-dev: external DNS: {}", - cptestctx.external_dns.server.local_address() + cptestctx.external_dns.dns_server.local_address() ); println!( "omicron-dev: e.g. `dig @{} -p {} {}.sys.{}`", - cptestctx.external_dns.server.local_address().ip(), - cptestctx.external_dns.server.local_address().port(), + cptestctx.external_dns.dns_server.local_address().ip(), + cptestctx.external_dns.dns_server.local_address().port(), cptestctx.silo_name, cptestctx.external_dns_zone_name, ); diff --git a/dns-server/src/lib.rs b/dns-server/src/lib.rs index a7d1edf54a..cde8b05e54 100644 --- a/dns-server/src/lib.rs +++ b/dns-server/src/lib.rs @@ -92,19 +92,19 @@ pub async fn start_servers( Ok((dns_server, dropshot_server)) } -/// An in-memory DNS server running on localhost. +/// An DNS server running on localhost, using a temporary directory for storage. /// /// Intended to be used for testing only. -pub struct InMemoryServer { +pub struct TransientServer { /// Server storage dir pub storage_dir: tempfile::TempDir, /// DNS server - pub server: dns_server::ServerHandle, + pub dns_server: dns_server::ServerHandle, /// Dropshot server pub dropshot_server: dropshot::HttpServer, } -impl InMemoryServer { +impl TransientServer { pub async fn new(log: &slog::Logger) -> Result { let storage_dir = tempfile::tempdir()?; @@ -123,7 +123,7 @@ impl InMemoryServer { ) .context("initializing DNS storage")?; - let (server, dropshot_server) = start_servers( + let (dns_server, dropshot_server) = start_servers( dns_log, store, &dns_server::Config { bind_address: "[::1]:0".parse().unwrap() }, @@ -134,7 +134,7 @@ impl InMemoryServer { }, ) .await?; - Ok(Self { storage_dir, server, dropshot_server }) + Ok(Self { storage_dir, dns_server, dropshot_server }) } pub async fn initialize_with_config( @@ -156,7 +156,7 @@ impl InMemoryServer { pub async fn resolver(&self) -> Result { let mut resolver_config = ResolverConfig::new(); resolver_config.add_name_server(NameServerConfig { - socket_addr: *self.server.local_address(), + socket_addr: *self.dns_server.local_address(), protocol: Protocol::Udp, tls_dns_name: None, trust_nx_responses: false, diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 5d03bac53e..14203d90c3 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -85,8 +85,8 @@ pub struct ControlPlaneTestContext { pub dendrite: dev::dendrite::DendriteInstance, pub external_dns_zone_name: String, - pub external_dns: dns_server::InMemoryServer, - pub internal_dns: dns_server::InMemoryServer, + pub external_dns: dns_server::TransientServer, + pub internal_dns: dns_server::TransientServer, pub silo_name: Name, pub user_name: UserId, } @@ -234,8 +234,8 @@ pub struct ControlPlaneTestContextBuilder<'a, N: NexusServer> { nexus_internal_addr: Option, pub external_dns_zone_name: Option, - pub external_dns: Option, - pub internal_dns: Option, + pub external_dns: Option, + pub internal_dns: Option, pub silo_name: Option, pub user_name: Option, @@ -419,7 +419,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { .internal_dns .as_ref() .expect("Must initialize internal DNS server first") - .server + .dns_server .local_address(), }; self.config.deployment.database = nexus_config::Database::FromUrl { @@ -634,9 +634,9 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { let log = self.logctx.log.new(o!("component" => "external_dns_server")); let sled_id = Uuid::parse_str(SLED_AGENT_UUID).unwrap(); - let dns = dns_server::InMemoryServer::new(&log).await.unwrap(); + let dns = dns_server::TransientServer::new(&log).await.unwrap(); - let SocketAddr::V6(dns_address) = *dns.server.local_address() else { + let SocketAddr::V6(dns_address) = *dns.dns_server.local_address() else { panic!("Unsupported IPv4 DNS address"); }; let SocketAddr::V6(dropshot_address) = dns.dropshot_server.local_addr() else { @@ -667,7 +667,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { pub async fn start_internal_dns(&mut self) { let log = self.logctx.log.new(o!("component" => "internal_dns_server")); let sled_id = Uuid::parse_str(SLED_AGENT_UUID).unwrap(); - let dns = dns_server::InMemoryServer::new(&log).await.unwrap(); + let dns = dns_server::TransientServer::new(&log).await.unwrap(); let SocketAddr::V6(address) = dns.dropshot_server.local_addr() else { panic!("Unsupported IPv4 DNS address"); diff --git a/nexus/tests/integration_tests/certificates.rs b/nexus/tests/integration_tests/certificates.rs index a59de58466..c5d8c86f6a 100644 --- a/nexus/tests/integration_tests/certificates.rs +++ b/nexus/tests/integration_tests/certificates.rs @@ -393,8 +393,10 @@ async fn test_silo_certificates() { // that was created when that Silo was created. We'll use this session to // create the other Silos and their users. let resolver = Arc::new( - CustomDnsResolver::new(*cptestctx.external_dns.server.local_address()) - .unwrap(), + CustomDnsResolver::new( + *cptestctx.external_dns.dns_server.local_address(), + ) + .unwrap(), ); let session_token = oxide_client::login( silo1.reqwest_client().dns_resolver(resolver.clone()), diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs index da4a044067..8dd994dd7a 100644 --- a/sled-agent/src/sim/server.rs +++ b/sled-agent/src/sim/server.rs @@ -280,7 +280,7 @@ pub async fn run_standalone_server( info!(log, "sled agent started successfully"); // Start the Internal DNS server - let dns = dns_server::InMemoryServer::new(&log).await?; + let dns = dns_server::TransientServer::new(&log).await?; let mut dns_config_builder = internal_dns::DnsConfigBuilder::new(); // Start the Crucible Pantry @@ -378,6 +378,9 @@ pub async fn run_standalone_server( sled_id: config.id, zone_id: Some(Uuid::new_v4()), }); + + internal_services_ip_pool_ranges + .push(IpRange::V6(Ipv6Range { first: ip, last: ip })); } let recovery_silo = NexusTypes::RecoverySiloConfig { From 8c50d38b84fd5dd50723407f8ed521ce02c8ce02 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 22 Jun 2023 13:22:11 -0700 Subject: [PATCH 12/57] server to dns_server --- nexus/tests/integration_tests/initialization.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nexus/tests/integration_tests/initialization.rs b/nexus/tests/integration_tests/initialization.rs index 7aa4b54e9f..1b2a7f64a2 100644 --- a/nexus/tests/integration_tests/initialization.rs +++ b/nexus/tests/integration_tests/initialization.rs @@ -34,7 +34,7 @@ async fn test_nexus_boots_before_cockroach() { .internal_dns .as_ref() .expect("Must start Internal DNS before acquiring an address") - .server + .dns_server .local_address(), }; let nexus_config = builder.config.clone(); @@ -100,7 +100,7 @@ async fn test_nexus_boots_before_dendrite() { .internal_dns .as_ref() .expect("Must start Internal DNS before acquiring an address") - .server + .dns_server .local_address(), }; let nexus_config = builder.config.clone(); From f06942e068230f82ea366f81eadb60cf75a0f3ae Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 23 Jun 2023 08:48:47 -0700 Subject: [PATCH 13/57] Attempting to pull DNS resolver into progenitor client, tbd --- Cargo.lock | 2 + internal-dns/Cargo.toml | 2 + internal-dns/src/names.rs | 4 ++ internal-dns/src/resolver.rs | 98 ++++++++++++++++++++++++++++-------- sled-agent/src/mocks/mod.rs | 1 + sled-agent/src/nexus.rs | 32 ++++++------ 6 files changed, 102 insertions(+), 37 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a0e5c232b4..328aca5961 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3363,8 +3363,10 @@ dependencies = [ "dropshot", "expectorate", "futures", + "hyper", "omicron-common 0.1.0", "omicron-test-utils", + "reqwest", "serde_json", "sled", "slog", diff --git a/internal-dns/Cargo.toml b/internal-dns/Cargo.toml index c6e8f7dff4..b9ba996d1b 100644 --- a/internal-dns/Cargo.toml +++ b/internal-dns/Cargo.toml @@ -10,7 +10,9 @@ assert_matches.workspace = true chrono.workspace = true dns-service-client.workspace = true futures.workspace = true +hyper.workspace = true omicron-common.workspace = true +reqwest = { workspace = true, features = ["rustls-tls", "stream"] } slog.workspace = true thiserror.workspace = true trust-dns-proto.workspace = true diff --git a/internal-dns/src/names.rs b/internal-dns/src/names.rs index 9da4d05c55..05423f870c 100644 --- a/internal-dns/src/names.rs +++ b/internal-dns/src/names.rs @@ -84,4 +84,8 @@ impl ServiceName { } } } + + pub fn srv_name(&self) -> String { + format!("{}.{DNS_ZONE}", self.dns_name()) + } } diff --git a/internal-dns/src/resolver.rs b/internal-dns/src/resolver.rs index b0c5ad0617..275e08b4c1 100644 --- a/internal-dns/src/resolver.rs +++ b/internal-dns/src/resolver.rs @@ -2,12 +2,13 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use crate::DNS_ZONE; +use hyper::client::connect::dns::Name; use omicron_common::address::{ Ipv6Subnet, ReservedRackSubnet, AZ_PREFIX, DNS_PORT, }; use slog::{debug, info}; use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; +use std::sync::Arc; use trust_dns_proto::rr::record_type::RecordType; use trust_dns_resolver::config::{ NameServerConfig, Protocol, ResolverConfig, ResolverOpts, @@ -21,19 +22,36 @@ pub enum ResolveError { #[error(transparent)] Resolve(#[from] trust_dns_resolver::error::ResolveError), - #[error("Record not found for SRV key: {}", .0.dns_name())] - NotFound(crate::ServiceName), + #[error("Record not found for SRV key: {0}")] + NotFound(String), #[error("Record not found for {0}")] NotFoundByString(String), } +struct Inner { + log: slog::Logger, + resolver: TokioAsyncResolver, +} + /// A wrapper around a DNS resolver, providing a way to conveniently /// look up IP addresses of services based on their SRV keys. #[derive(Clone)] pub struct Resolver { - log: slog::Logger, - inner: Box, + inner: Arc, +} + +type BoxError = Box; + +impl reqwest::dns::Resolve for Resolver { + fn resolve(&self, name: Name) -> reqwest::dns::Resolving { + let this = self.clone(); + Box::pin(async move { + this.lookup_sockets_v6_raw(name.as_str()) + .await + .map_err(|err| -> BoxError { Box::new(err) } ) + }) + } } impl Resolver { @@ -53,10 +71,10 @@ impl Resolver { bind_addr: None, }); } - let inner = - Box::new(TokioAsyncResolver::tokio(rc, ResolverOpts::default())?); + let resolver = + TokioAsyncResolver::tokio(rc, ResolverOpts::default())?; - Ok(Self { inner, log }) + Ok(Self { inner: Arc::new(Inner { log, resolver })}) } /// Convenience wrapper for [`Resolver::new_from_addrs`] that determines @@ -111,13 +129,13 @@ impl Resolver { &self, srv: crate::ServiceName, ) -> Result { - let name = format!("{}.{}", srv.dns_name(), DNS_ZONE); - debug!(self.log, "lookup_ipv6 srv"; "dns_name" => &name); - let response = self.inner.ipv6_lookup(&name).await?; + let name = srv.srv_name(); + debug!(self.inner.log, "lookup_ipv6 srv"; "dns_name" => &name); + let response = self.inner.resolver.ipv6_lookup(&name).await?; let address = response .iter() .next() - .ok_or_else(|| ResolveError::NotFound(srv))?; + .ok_or_else(|| ResolveError::NotFound(name))?; Ok(*address) } @@ -127,20 +145,20 @@ impl Resolver { &self, srv: crate::ServiceName, ) -> Result { - let name = format!("{}.{}", srv.dns_name(), DNS_ZONE); - debug!(self.log, "lookup_socket_v6 srv"; "dns_name" => &name); - let response = self.inner.lookup(&name, RecordType::SRV).await?; + let name = srv.srv_name(); + debug!(self.inner.log, "lookup_socket_v6 srv"; "dns_name" => &name); + let response = self.inner.resolver.lookup(&name, RecordType::SRV).await?; let rdata = response .iter() .next() - .ok_or_else(|| ResolveError::NotFound(srv))?; + .ok_or_else(|| ResolveError::NotFound(name.to_string()))?; Ok(match rdata { trust_dns_proto::rr::record_data::RData::SRV(srv) => { let name = srv.target(); let response = - self.inner.ipv6_lookup(&name.to_string()).await?; + self.inner.resolver.ipv6_lookup(&name.to_string()).await?; let address = response.iter().next().ok_or_else(|| { ResolveError::NotFoundByString(name.to_string()) @@ -157,17 +175,55 @@ impl Resolver { }) } + pub async fn lookup_sockets_v6_raw( + &self, + name: &str, + ) -> Result + Send>, ResolveError> { + debug!(self.inner.log, "lookup_socket_v6 srv"; "dns_name" => &name); + let response = self.inner.resolver.lookup(name, RecordType::SRV).await?; + + let rdata = response + .into_iter() + .next() + .ok_or_else(|| ResolveError::NotFound(name.to_string()))?; + + Ok(match rdata { + trust_dns_proto::rr::record_data::RData::SRV(srv) => { + let name = srv.target(); + let port = srv.port(); + Box::new( + self + .inner + .resolver + .ipv6_lookup(&name.to_string()) + .await? + .into_iter() + .map(move |ip| { + SocketAddr::V6(SocketAddrV6::new(ip, port, 0, 0)) + }) + ) + } + + _ => { + return Err(ResolveError::Resolve( + "SRV query did not return SRV RData!".into(), + )); + } + }) + } + + pub async fn lookup_ip( &self, srv: crate::ServiceName, ) -> Result { - let name = format!("{}.{}", srv.dns_name(), DNS_ZONE); - debug!(self.log, "lookup srv"; "dns_name" => &name); - let response = self.inner.lookup_ip(&name).await?; + let name = srv.srv_name(); + debug!(self.inner.log, "lookup srv"; "dns_name" => &name); + let response = self.inner.resolver.lookup_ip(&name).await?; let address = response .iter() .next() - .ok_or_else(|| ResolveError::NotFound(srv))?; + .ok_or_else(|| ResolveError::NotFound(name))?; Ok(address) } } diff --git a/sled-agent/src/mocks/mod.rs b/sled-agent/src/mocks/mod.rs index 8577dec9f3..87fd1cb532 100644 --- a/sled-agent/src/mocks/mod.rs +++ b/sled-agent/src/mocks/mod.rs @@ -22,6 +22,7 @@ type Result = std::result::Result< mock! { pub NexusClient { pub fn new(server_addr: &str, log: Logger) -> Self; + pub fn new_with_client(server_addr: &str, client: reqwest::Client, log: Logger) -> Self; pub fn client(&self) -> reqwest::Client; pub fn baseurl(&self) -> &'static str; pub async fn sled_agent_put( diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index db66297395..73bdaa55a7 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -13,14 +13,13 @@ use slog::Logger; use std::future::Future; use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; use std::pin::Pin; -use std::sync::{Arc, Mutex}; +use std::sync::Arc; use tokio::sync::mpsc; use tokio::task::JoinHandle; struct Inner { log: Logger, - resolver: Resolver, - address: Mutex>, + resolver: Arc, } /// Wrapper around a [`NexusClient`] object, which allows deferring @@ -46,11 +45,10 @@ impl LazyNexusClient { Ok(Self { inner: Arc::new(Inner { log: log.clone(), - resolver: Resolver::new_from_ip( + resolver: Arc::new(Resolver::new_from_ip( log.new(o!("component" => "DnsResolver")), addr, - )?, - address: Mutex::new(None), + )?), }), }) } @@ -62,11 +60,10 @@ impl LazyNexusClient { Ok(Self { inner: Arc::new(Inner { log: log.clone(), - resolver: Resolver::new_from_addrs( + resolver: Arc::new(Resolver::new_from_addrs( log.new(o!("component" => "DnsResolver")), dns_addrs, - )?, - address: Mutex::new(None), + )?), }), }) } @@ -76,20 +73,23 @@ impl LazyNexusClient { } pub async fn get_addr(&self) -> Result { - if let Some(addr) = self.inner.address.lock().unwrap().as_ref() { - return Ok(*addr); - } let addr = self.inner.resolver.lookup_socket_v6(ServiceName::Nexus).await?; - *self.inner.address.lock().unwrap() = Some(addr); Ok(addr) } pub async fn get(&self) -> Result { - let address = self.get_addr().await?; + let dns_name = ServiceName::Nexus.srv_name(); + + let client = reqwest::ClientBuilder::new() + .dns_resolver(self.inner.resolver.clone()) + .build() + // TODO: + .expect("Failed to build client"); - Ok(NexusClient::new( - &format!("http://{address}"), + Ok(NexusClient::new_with_client( + &format!("http://{dns_name}"), + client, self.inner.log.clone(), )) } From e745c76b7387bd904fd2788ca3ec8060150f9fb7 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 23 Jun 2023 09:57:14 -0700 Subject: [PATCH 14/57] Create sled agent config parameter to specify internal DNS --- dns-server/src/lib.rs | 10 +++++++++- docs/how-to-run-simulated.adoc | 2 +- nexus/examples/config.toml | 2 +- sled-agent/src/bin/sled-agent-sim.rs | 6 ++++++ sled-agent/src/sim/server.rs | 8 +++++++- 5 files changed, 24 insertions(+), 4 deletions(-) diff --git a/dns-server/src/lib.rs b/dns-server/src/lib.rs index cde8b05e54..d6dd75b4bd 100644 --- a/dns-server/src/lib.rs +++ b/dns-server/src/lib.rs @@ -49,6 +49,7 @@ pub mod storage; use anyhow::{anyhow, Context}; use slog::o; +use std::net::SocketAddr; use trust_dns_resolver::config::NameServerConfig; use trust_dns_resolver::config::Protocol; use trust_dns_resolver::config::ResolverConfig; @@ -106,6 +107,13 @@ pub struct TransientServer { impl TransientServer { pub async fn new(log: &slog::Logger) -> Result { + Self::new_with_address(log, "[::1]:0".parse().unwrap()).await + } + + pub async fn new_with_address( + log: &slog::Logger, + dns_bind_address: SocketAddr, + ) -> Result { let storage_dir = tempfile::tempdir()?; let dns_log = log.new(o!("kind" => "dns")); @@ -126,7 +134,7 @@ impl TransientServer { let (dns_server, dropshot_server) = start_servers( dns_log, store, - &dns_server::Config { bind_address: "[::1]:0".parse().unwrap() }, + &dns_server::Config { bind_address: dns_bind_address }, &dropshot::ConfigDropshot { bind_address: "[::1]:0".parse().unwrap(), request_body_max_bytes: 4 * 1024 * 1024, diff --git a/docs/how-to-run-simulated.adoc b/docs/how-to-run-simulated.adoc index 5b0bc70475..a2ff8d38b6 100644 --- a/docs/how-to-run-simulated.adoc +++ b/docs/how-to-run-simulated.adoc @@ -178,7 +178,7 @@ $ cargo run --bin=dns-server -- --config-file dns-server/examples/config.toml -- + [source,text] ---- -$ cargo run --bin=sled-agent-sim -- $(uuidgen) [::1]:12345 [::1]:12221 --rss-nexus-external-addr 127.0.0.1:12220 --rss-external-dns-internal-addr [::1]:5353 +$ cargo run --bin=sled-agent-sim -- $(uuidgen) [::1]:12345 [::1]:12221 --rss-nexus-external-addr 127.0.0.1:12220 --rss-external-dns-internal-addr [::1]:5353 --rss-internal-dns-dns-addr [::1]:3535 ---- . `oximeter` is similar to `nexus`, requiring a configuration file. You can use `oximeter/collector/config.toml`, and the whole thing can be run with: diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index 626625ec2c..3da4f67082 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -56,7 +56,7 @@ request_body_max_bytes = 1048576 # If you're using `omicron-dev run-all`, this is value is overwritten # by the address / port created after starting the Internal DNS server. type = "from_address" -address = "[::1]:5353" +address = "[::1]:3535" [deployment.database] # URL for connecting to the database diff --git a/sled-agent/src/bin/sled-agent-sim.rs b/sled-agent/src/bin/sled-agent-sim.rs index b4903811c4..5cbfb48355 100644 --- a/sled-agent/src/bin/sled-agent-sim.rs +++ b/sled-agent/src/bin/sled-agent-sim.rs @@ -67,6 +67,11 @@ struct Args { /// Nexus to publish DNS names to external DNS. rss_external_dns_internal_addr: Option, + #[clap(long, name = "INTERNAL_DNS_INTERNAL_IP:PORT", action)] + /// If specified, the sled agent will create a DNS server exposing the + /// following socket address for the DNS interface. + rss_internal_dns_dns_addr: Option, + #[clap(long, name = "TLS_CERT_PEM_FILE", action)] /// If this flag and TLS_KEY_PEM_FILE are specified, when the simulated sled /// agent initializes the rack, the specified certificate and private keys @@ -136,6 +141,7 @@ async fn do_run() -> Result<(), CmdError> { let rss_args = RssArgs { nexus_external_addr: args.rss_nexus_external_addr, external_dns_internal_addr: args.rss_external_dns_internal_addr, + internal_dns_dns_addr: args.rss_internal_dns_dns_addr, tls_certificate, }; diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs index 8dd994dd7a..051d0ce3ef 100644 --- a/sled-agent/src/sim/server.rs +++ b/sled-agent/src/sim/server.rs @@ -241,6 +241,8 @@ pub struct RssArgs { /// Specify the (internal) address of an external DNS server so that Nexus /// will know about it and keep it up to date pub external_dns_internal_addr: Option, + /// Specify the (dns) address of an internal DNS server + pub internal_dns_dns_addr: Option, /// Specify a certificate and associated private key for the initial Silo's /// initial TLS certificates pub tls_certificate: Option, @@ -280,7 +282,11 @@ pub async fn run_standalone_server( info!(log, "sled agent started successfully"); // Start the Internal DNS server - let dns = dns_server::TransientServer::new(&log).await?; + let dns = if let Some(addr) = rss_args.internal_dns_dns_addr { + dns_server::TransientServer::new_with_address(&log, addr.into()).await? + } else { + dns_server::TransientServer::new(&log).await? + }; let mut dns_config_builder = internal_dns::DnsConfigBuilder::new(); // Start the Crucible Pantry From 4ea32bce14ea2fb64613201c8c9b0908f29e847d Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 25 Jun 2023 17:24:23 -0700 Subject: [PATCH 15/57] Test that we can use the internal DNS resolver with progenitor --- Cargo.lock | 2 + internal-dns/Cargo.toml | 2 + internal-dns/src/resolver.rs | 214 ++++++++++++++++++++- internal-dns/tests/output/test-server.json | 70 +++++++ 4 files changed, 287 insertions(+), 1 deletion(-) create mode 100644 internal-dns/tests/output/test-server.json diff --git a/Cargo.lock b/Cargo.lock index fdbd53f2ea..722f6e7b68 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3366,7 +3366,9 @@ dependencies = [ "hyper", "omicron-common 0.1.0", "omicron-test-utils", + "progenitor", "reqwest", + "serde", "serde_json", "sled", "slog", diff --git a/internal-dns/Cargo.toml b/internal-dns/Cargo.toml index b9ba996d1b..8a44cdba88 100644 --- a/internal-dns/Cargo.toml +++ b/internal-dns/Cargo.toml @@ -24,6 +24,8 @@ dropshot.workspace = true dns-server.workspace = true expectorate.workspace = true omicron-test-utils.workspace = true +progenitor.workspace = true +serde = { workspace = true, features = ["derive"] } serde_json.workspace = true sled.workspace = true tempfile.workspace = true diff --git a/internal-dns/src/resolver.rs b/internal-dns/src/resolver.rs index 275e08b4c1..2567b805e7 100644 --- a/internal-dns/src/resolver.rs +++ b/internal-dns/src/resolver.rs @@ -236,7 +236,7 @@ mod test { use anyhow::Context; use assert_matches::assert_matches; use dns_service_client::types::DnsConfigParams; - use dropshot::HandlerTaskMode; + use dropshot::{endpoint, ApiDescription, HandlerTaskMode, HttpError, HttpResponseOk, RequestContext}; use omicron_test_utils::dev::test_setup_log; use slog::{o, Logger}; use std::collections::HashMap; @@ -571,4 +571,216 @@ mod test { dns_server.cleanup_successful(); logctx.cleanup_successful(); } + + // What follows is a "test endpoint" to validate that the integration of + // the DNS server, resolver, and progenitor all work together correctly. + + #[endpoint { + method = GET, + path = "/test", + }] + async fn test_endpoint( + rqctx: RequestContext, + ) -> Result, HttpError> { + Ok(HttpResponseOk(*rqctx.context())) + } + + fn api() -> ApiDescription { + let mut api = ApiDescription::new(); + api.register(test_endpoint).unwrap(); + api + } + + progenitor::generate_api!( + spec = "tests/output/test-server.json", + inner_type = slog::Logger, + pre_hook = (|log: &slog::Logger, request: &reqwest::Request| { + slog::debug!(log, "client request"; + "method" => %request.method(), + "uri" => %request.url(), + "body" => ?&request.body(), + ); + }), + post_hook = (|log: &slog::Logger, result: &Result<_, _>| { + slog::debug!(log, "client response"; "result" => ?result); + }), + ); + + // Verify that we have an up-to-date representation + // of this server's API as JSON. + // + // We'll need this to be up-to-date to have a reliable + // Progenitor client. + fn expect_openapi_json_valid_for_test_server() { + let api = api(); + let openapi = api.openapi("Test Server", "v0.1.0"); + let mut output = std::io::Cursor::new(Vec::new()); + openapi.write(&mut output).unwrap(); + expectorate::assert_contents( + "tests/output/test-server.json", + std::str::from_utf8(&output.into_inner()).unwrap(), + ); + } + + fn start_test_server(log: slog::Logger, label: u32) -> dropshot::HttpServer { + let config_dropshot = dropshot::ConfigDropshot { + bind_address: "[::1]:0".parse().unwrap(), + ..Default::default() + }; + dropshot::HttpServerStarter::new( + &config_dropshot, + api(), + label, + &log, + ).unwrap().start() + } + + #[tokio::test] + async fn resolver_can_be_used_with_progenitor_client() { + let logctx = test_setup_log("resolver_can_be_used_with_progenitor_client"); + + // Confirm that we can create a progenitor client for this server. + expect_openapi_json_valid_for_test_server(); + + // Next, create a DNS server, and a corresponding resolver. + let dns_server = DnsServer::create(&logctx.log).await; + let resolver = Resolver::new_from_addrs( + logctx.log.clone(), + vec![dns_server.dns_server.local_address().clone()], + ).unwrap(); + + // Start a test server, but don't register it with the DNS server (yet). + let label = 1234; + let server = start_test_server(logctx.log.clone(), label); + let ip = match server.local_addr().ip() { + std::net::IpAddr::V6(ip) => ip, + _ => panic!("Expected IPv6"), + }; + let port = server.local_addr().port(); + + // Use the resolver -- referencing our DNS server -- in the construction + // of a progenitor client. + // + // We'll use the SRV record for Nexus, even though it's just our + // standalone test server. + let dns_name = crate::ServiceName::Nexus.srv_name(); + let reqwest_client = reqwest::ClientBuilder::new() + .dns_resolver(resolver.clone().into()) + .build() + .expect("Failed to build client"); + + // NOTE: We explicitly pass the port here, before DNS resolution, + // because the DNS support in reqwest does not actually use the ports + // returned by the resolver. + let client = Client::new_with_client( + &format!("http://{dns_name}:{port}"), + reqwest_client, + logctx.log.clone(), + ); + + // The DNS server is running, but has no records. Expect a failure. + let err = client.test_endpoint().await.unwrap_err(); + assert!( + err.to_string().contains("no record found"), + "Unexpected Error (expected 'no record found'): {err}", + ); + + // Add a record for the new service. + let mut dns_config = DnsConfigBuilder::new(); + let zone = dns_config.host_zone(Uuid::new_v4(), ip).unwrap(); + dns_config + .service_backend_zone(ServiceName::Nexus, &zone, port) + .unwrap(); + let dns_config = dns_config.build(); + dns_server.update(&dns_config).await.unwrap(); + + // Confirm that we can access this record manually. + let found_ip = resolver + .lookup_ipv6(ServiceName::Nexus) + .await + .expect("Should have been able to look up IP address"); + assert_eq!(found_ip, ip); + + // Confirm that the progenitor client can access this record too. + let value = client.test_endpoint().await.unwrap(); + assert_eq!(value.into_inner(), label); + + server.close().await.expect("Failed to stop test server"); + dns_server.cleanup_successful(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn resolver_can_access_backup_dns_server() { + let logctx = test_setup_log("resolver_can_access_backup_dns_server"); + + // Confirm that we can create a progenitor client for this server. + expect_openapi_json_valid_for_test_server(); + + // Create DNS servers, and a corresponding resolver. + let dns_server1 = DnsServer::create(&logctx.log).await; + let dns_server2 = DnsServer::create(&logctx.log).await; + let resolver = Resolver::new_from_addrs( + logctx.log.clone(), + vec![ + dns_server1.dns_server.local_address().clone(), + dns_server2.dns_server.local_address().clone(), + ], + ).unwrap(); + + // Start a test server, but don't register it with the DNS server (yet). + let label = 1234; + let server = start_test_server(logctx.log.clone(), label); + let ip = match server.local_addr().ip() { + std::net::IpAddr::V6(ip) => ip, + _ => panic!("Expected IPv6"), + }; + let port = server.local_addr().port(); + + // Use the resolver -- referencing our DNS server -- in the construction + // of a progenitor client. + // + // We'll use the SRV record for Nexus, even though it's just our + // standalone test server. + let dns_name = crate::ServiceName::Nexus.srv_name(); + let reqwest_client = reqwest::ClientBuilder::new() + .dns_resolver(resolver.clone().into()) + .build() + .expect("Failed to build client"); + + // NOTE: We explicitly pass the port here, before DNS resolution, + // because the DNS support in reqwest does not actually use the ports + // returned by the resolver. + let client = Client::new_with_client( + &format!("http://{dns_name}:{port}"), + reqwest_client, + logctx.log.clone(), + ); + + // The DNS server is running, but has no records. Expect a failure. + let err = client.test_endpoint().await.unwrap_err(); + assert!( + err.to_string().contains("no record found"), + "Unexpected Error (expected 'no record found'): {err}", + ); + + // Add a record for the new service, but only to the second DNS server. + let mut dns_config = DnsConfigBuilder::new(); + let zone = dns_config.host_zone(Uuid::new_v4(), ip).unwrap(); + dns_config + .service_backend_zone(ServiceName::Nexus, &zone, port) + .unwrap(); + let dns_config = dns_config.build(); + dns_server2.update(&dns_config).await.unwrap(); + + // Confirm that the progenitor client can access this record, + // even though the first DNS server doesn't know about it. + let value = client.test_endpoint().await.unwrap(); + assert_eq!(value.into_inner(), label); + + server.close().await.expect("Failed to stop test server"); + dns_server1.cleanup_successful(); + dns_server2.cleanup_successful(); + logctx.cleanup_successful(); + } } diff --git a/internal-dns/tests/output/test-server.json b/internal-dns/tests/output/test-server.json new file mode 100644 index 0000000000..5f4d6d155e --- /dev/null +++ b/internal-dns/tests/output/test-server.json @@ -0,0 +1,70 @@ +{ + "openapi": "3.0.3", + "info": { + "title": "Test Server", + "version": "v0.1.0" + }, + "paths": { + "/test": { + "get": { + "operationId": "test_endpoint", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "uint32", + "type": "integer", + "format": "uint32", + "minimum": 0 + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + } + }, + "components": { + "responses": { + "Error": { + "description": "Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + } + } + } + } + }, + "schemas": { + "Error": { + "description": "Error information from a response.", + "type": "object", + "properties": { + "error_code": { + "type": "string" + }, + "message": { + "type": "string" + }, + "request_id": { + "type": "string" + } + }, + "required": [ + "message", + "request_id" + ] + } + } + } +} \ No newline at end of file From 05109dc8b49790753290c8e8683c0189e170b203 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 25 Jun 2023 20:30:23 -0700 Subject: [PATCH 16/57] Deprecate 'LazyNexusClient', relying on internal DNS resolver instead --- internal-dns/src/resolver.rs | 53 ++++++++------- sled-agent/src/instance.rs | 47 +++++++------ sled-agent/src/instance_manager.rs | 32 ++++----- sled-agent/src/mocks/mod.rs | 4 ++ sled-agent/src/nexus.rs | 102 +++++++++++------------------ sled-agent/src/server.rs | 14 ++-- sled-agent/src/sled_agent.rs | 27 ++++---- sled-agent/src/storage_manager.rs | 37 +++++------ 8 files changed, 149 insertions(+), 167 deletions(-) diff --git a/internal-dns/src/resolver.rs b/internal-dns/src/resolver.rs index 2567b805e7..05e58e99fb 100644 --- a/internal-dns/src/resolver.rs +++ b/internal-dns/src/resolver.rs @@ -49,7 +49,7 @@ impl reqwest::dns::Resolve for Resolver { Box::pin(async move { this.lookup_sockets_v6_raw(name.as_str()) .await - .map_err(|err| -> BoxError { Box::new(err) } ) + .map_err(|err| -> BoxError { Box::new(err) }) }) } } @@ -71,10 +71,9 @@ impl Resolver { bind_addr: None, }); } - let resolver = - TokioAsyncResolver::tokio(rc, ResolverOpts::default())?; + let resolver = TokioAsyncResolver::tokio(rc, ResolverOpts::default())?; - Ok(Self { inner: Arc::new(Inner { log, resolver })}) + Ok(Self { inner: Arc::new(Inner { log, resolver }) }) } /// Convenience wrapper for [`Resolver::new_from_addrs`] that determines @@ -147,7 +146,8 @@ impl Resolver { ) -> Result { let name = srv.srv_name(); debug!(self.inner.log, "lookup_socket_v6 srv"; "dns_name" => &name); - let response = self.inner.resolver.lookup(&name, RecordType::SRV).await?; + let response = + self.inner.resolver.lookup(&name, RecordType::SRV).await?; let rdata = response .iter() @@ -180,7 +180,8 @@ impl Resolver { name: &str, ) -> Result + Send>, ResolveError> { debug!(self.inner.log, "lookup_socket_v6 srv"; "dns_name" => &name); - let response = self.inner.resolver.lookup(name, RecordType::SRV).await?; + let response = + self.inner.resolver.lookup(name, RecordType::SRV).await?; let rdata = response .into_iter() @@ -192,15 +193,14 @@ impl Resolver { let name = srv.target(); let port = srv.port(); Box::new( - self - .inner + self.inner .resolver .ipv6_lookup(&name.to_string()) .await? .into_iter() .map(move |ip| { SocketAddr::V6(SocketAddrV6::new(ip, port, 0, 0)) - }) + }), ) } @@ -212,7 +212,6 @@ impl Resolver { }) } - pub async fn lookup_ip( &self, srv: crate::ServiceName, @@ -236,7 +235,10 @@ mod test { use anyhow::Context; use assert_matches::assert_matches; use dns_service_client::types::DnsConfigParams; - use dropshot::{endpoint, ApiDescription, HandlerTaskMode, HttpError, HttpResponseOk, RequestContext}; + use dropshot::{ + endpoint, ApiDescription, HandlerTaskMode, HttpError, HttpResponseOk, + RequestContext, + }; use omicron_test_utils::dev::test_setup_log; use slog::{o, Logger}; use std::collections::HashMap; @@ -622,22 +624,23 @@ mod test { ); } - fn start_test_server(log: slog::Logger, label: u32) -> dropshot::HttpServer { + fn start_test_server( + log: slog::Logger, + label: u32, + ) -> dropshot::HttpServer { let config_dropshot = dropshot::ConfigDropshot { bind_address: "[::1]:0".parse().unwrap(), ..Default::default() }; - dropshot::HttpServerStarter::new( - &config_dropshot, - api(), - label, - &log, - ).unwrap().start() + dropshot::HttpServerStarter::new(&config_dropshot, api(), label, &log) + .unwrap() + .start() } #[tokio::test] async fn resolver_can_be_used_with_progenitor_client() { - let logctx = test_setup_log("resolver_can_be_used_with_progenitor_client"); + let logctx = + test_setup_log("resolver_can_be_used_with_progenitor_client"); // Confirm that we can create a progenitor client for this server. expect_openapi_json_valid_for_test_server(); @@ -646,8 +649,9 @@ mod test { let dns_server = DnsServer::create(&logctx.log).await; let resolver = Resolver::new_from_addrs( logctx.log.clone(), - vec![dns_server.dns_server.local_address().clone()], - ).unwrap(); + vec![*dns_server.dns_server.local_address()], + ) + .unwrap(); // Start a test server, but don't register it with the DNS server (yet). let label = 1234; @@ -723,10 +727,11 @@ mod test { let resolver = Resolver::new_from_addrs( logctx.log.clone(), vec![ - dns_server1.dns_server.local_address().clone(), - dns_server2.dns_server.local_address().clone(), + *dns_server1.dns_server.local_address(), + *dns_server2.dns_server.local_address(), ], - ).unwrap(); + ) + .unwrap(); // Start a test server, but don't register it with the DNS server (yet). let label = 1234; diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 4233b6eeea..e17f2514eb 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -9,7 +9,7 @@ use crate::common::instance::{ PublishedInstanceState, }; use crate::instance_manager::InstanceTicket; -use crate::nexus::LazyNexusClient; +use crate::nexus::NexusClientWithResolver; use crate::params::{ InstanceHardware, InstanceMigrationSourceParams, InstanceMigrationTargetParams, InstanceStateRequested, VpcFirewallRule, @@ -235,7 +235,7 @@ struct InstanceInner { running_state: Option, // Connection to Nexus - lazy_nexus_client: LazyNexusClient, + nexus_client: NexusClientWithResolver, // Object representing membership in the "instance manager". instance_ticket: InstanceTicket, @@ -266,10 +266,8 @@ impl InstanceInner { "state" => ?state, ); - self.lazy_nexus_client - .get() - .await - .map_err(|e| backoff::BackoffError::transient(e.into()))? + self.nexus_client + .client() .cpapi_instances_put(self.id(), &state.into()) .await .map_err(|err| -> backoff::BackoffError { @@ -574,7 +572,7 @@ mockall::mock! { initial: InstanceHardware, vnic_allocator: VnicAllocator, port_manager: PortManager, - lazy_nexus_client: LazyNexusClient, + nexus_client: NexusClientWithResolver, ) -> Result; pub async fn current_state(&self) -> InstanceRuntimeState; pub async fn put_state( @@ -611,7 +609,7 @@ impl Instance { /// lengths, otherwise the UUID would be used instead). /// * `port_manager`: Handle to the object responsible for managing OPTE /// ports. - /// * `lazy_nexus_client`: Connection to Nexus, used for sending notifications. + /// * `nexus_client`: Connection to Nexus, used for sending notifications. // TODO: This arg list is getting a little long; can we clean this up? #[allow(clippy::too_many_arguments)] pub fn new( @@ -621,7 +619,7 @@ impl Instance { initial: InstanceHardware, vnic_allocator: VnicAllocator, port_manager: PortManager, - lazy_nexus_client: LazyNexusClient, + nexus_client: NexusClientWithResolver, ) -> Result { info!(log, "Instance::new w/initial HW: {:?}", initial); let instance = InstanceInner { @@ -651,7 +649,7 @@ impl Instance { cloud_init_bytes: initial.cloud_init_bytes, state: InstanceStates::new(initial.runtime), running_state: None, - lazy_nexus_client, + nexus_client, instance_ticket: ticket, }; @@ -946,7 +944,20 @@ impl Instance { &format!("config/server_addr={}", server_addr), ])?; - let metric_addr = inner.lazy_nexus_client.get_ip().await.unwrap(); + // TODO: We should not be using the resolver here to lookup the Nexus IP + // address. It would be preferable for Propolis, and through Propolis, + // Oximeter, to access the Nexus internal interface using a progenitor + // resolver that relies on a DNS resolver. + // + // - With the current implementation: if Nexus' IP address changes, this + // breaks. + // - With a DNS resolver: the metric producer would be able to continue + // sending requests to new servers as they arise. + let metric_addr = inner + .nexus_client + .resolver() + .lookup_ipv6(internal_dns::ServiceName::Nexus) + .await?; info!( inner.log, "Setting metric address property address [{}]:{}", @@ -1087,7 +1098,7 @@ impl Instance { mod test { use super::*; use crate::instance_manager::InstanceManager; - use crate::nexus::LazyNexusClient; + use crate::nexus::NexusClientWithResolver; use crate::params::InstanceStateRequested; use chrono::Utc; use illumos_utils::dladm::Etherstub; @@ -1164,14 +1175,12 @@ mod test { 0xfd00, 0x1de, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, ); let port_manager = PortManager::new(log.new(slog::o!()), underlay_ip); - let lazy_nexus_client = LazyNexusClient::new_from_subnet( - log.clone(), - std::net::Ipv6Addr::LOCALHOST, - ) - .unwrap(); + let nexus_client = + NexusClientWithResolver::new(&log, std::net::Ipv6Addr::LOCALHOST) + .unwrap(); let instance_manager = InstanceManager::new( log.clone(), - lazy_nexus_client.clone(), + nexus_client.clone(), Etherstub("mylink".to_string()), port_manager.clone(), ) @@ -1184,7 +1193,7 @@ mod test { new_initial_instance(), vnic_allocator, port_manager, - lazy_nexus_client, + nexus_client, ) .unwrap(); diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index f8bd065ce0..eccf1daa0f 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -4,7 +4,7 @@ //! API for controlling multiple instances on a sled. -use crate::nexus::LazyNexusClient; +use crate::nexus::NexusClientWithResolver; use crate::params::{ InstanceHardware, InstanceMigrationSourceParams, InstancePutStateResponse, InstanceStateRequested, InstanceUnregisterResponse, @@ -45,7 +45,7 @@ pub enum Error { struct InstanceManagerInternal { log: Logger, - lazy_nexus_client: LazyNexusClient, + nexus_client: NexusClientWithResolver, /// Last set size of the VMM reservoir (in bytes) reservoir_size: Mutex, @@ -69,14 +69,14 @@ impl InstanceManager { /// Initializes a new [`InstanceManager`] object. pub fn new( log: Logger, - lazy_nexus_client: LazyNexusClient, + nexus_client: NexusClientWithResolver, etherstub: Etherstub, port_manager: PortManager, ) -> Result { Ok(InstanceManager { inner: Arc::new(InstanceManagerInternal { log: log.new(o!("component" => "InstanceManager")), - lazy_nexus_client, + nexus_client, // no reservoir size set on startup reservoir_size: Mutex::new(ByteCount::from_kibibytes_u32(0)), @@ -204,7 +204,7 @@ impl InstanceManager { initial_hardware, self.inner.vnic_allocator.clone(), self.inner.port_manager.clone(), - self.inner.lazy_nexus_client.clone(), + self.inner.nexus_client.clone(), )?; let instance_clone = instance.clone(); let _old = instances @@ -362,7 +362,7 @@ impl Drop for InstanceTicket { mod test { use super::*; use crate::instance::MockInstance; - use crate::nexus::LazyNexusClient; + use crate::nexus::NexusClientWithResolver; use crate::params::InstanceStateRequested; use chrono::Utc; use illumos_utils::dladm::Etherstub; @@ -416,11 +416,9 @@ mod test { async fn ensure_instance() { let logctx = test_setup_log("ensure_instance"); let log = &logctx.log; - let lazy_nexus_client = LazyNexusClient::new_from_subnet( - log.clone(), - std::net::Ipv6Addr::LOCALHOST, - ) - .unwrap(); + let nexus_client = + NexusClientWithResolver::new(&log, std::net::Ipv6Addr::LOCALHOST) + .unwrap(); // Creation of the instance manager incurs some "global" system // checks: cleanup of existing zones + vnics. @@ -439,7 +437,7 @@ mod test { ); let im = InstanceManager::new( log.clone(), - lazy_nexus_client, + nexus_client, Etherstub("mylink".to_string()), port_manager, ) @@ -534,11 +532,9 @@ mod test { async fn ensure_instance_state_repeatedly() { let logctx = test_setup_log("ensure_instance_repeatedly"); let log = &logctx.log; - let lazy_nexus_client = LazyNexusClient::new_from_subnet( - log.clone(), - std::net::Ipv6Addr::LOCALHOST, - ) - .unwrap(); + let nexus_client = + NexusClientWithResolver::new(&log, std::net::Ipv6Addr::LOCALHOST) + .unwrap(); // Instance Manager creation. @@ -556,7 +552,7 @@ mod test { ); let im = InstanceManager::new( log.clone(), - lazy_nexus_client, + nexus_client, Etherstub("mylink".to_string()), port_manager, ) diff --git a/sled-agent/src/mocks/mod.rs b/sled-agent/src/mocks/mod.rs index 87fd1cb532..617eeb2d01 100644 --- a/sled-agent/src/mocks/mod.rs +++ b/sled-agent/src/mocks/mod.rs @@ -66,4 +66,8 @@ mock! { request: &RackInitializationRequest, ) -> Result<()>; } + + impl Clone for NexusClient { + fn clone(&self) -> Self; + } } diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index 73bdaa55a7..0729ba3f78 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -9,89 +9,63 @@ pub use nexus_client::Client as NexusClient; use internal_dns::resolver::{ResolveError, Resolver}; use internal_dns::ServiceName; +use omicron_common::address::NEXUS_INTERNAL_PORT; use slog::Logger; use std::future::Future; -use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; +use std::net::Ipv6Addr; use std::pin::Pin; use std::sync::Arc; use tokio::sync::mpsc; use tokio::task::JoinHandle; -struct Inner { - log: Logger, - resolver: Arc, -} - -/// Wrapper around a [`NexusClient`] object, which allows deferring -/// the DNS lookup until accessed. +/// A thin wrapper over a progenitor-generated NexusClient. /// -/// Without the assistance of OS-level DNS lookups, the [`NexusClient`] -/// interface requires knowledge of the target service IP address. -/// For some services, like Nexus, this can be painful, as the IP address -/// may not have even been allocated when the Sled Agent starts. -/// -/// This structure allows clients to access the client on-demand, performing -/// the DNS lookup only once it is actually needed. +/// Also attaches the "DNS resolver" for historical reasons. #[derive(Clone)] -pub struct LazyNexusClient { - inner: Arc, +pub struct NexusClientWithResolver { + client: NexusClient, + resolver: Arc, } -impl LazyNexusClient { - pub fn new_from_subnet( - log: Logger, - addr: Ipv6Addr, +impl NexusClientWithResolver { + pub fn new( + log: &Logger, + sled_agent_address: Ipv6Addr, ) -> Result { - Ok(Self { - inner: Arc::new(Inner { - log: log.clone(), - resolver: Arc::new(Resolver::new_from_ip( - log.new(o!("component" => "DnsResolver")), - addr, - )?), - }), - }) - } + let resolver = Arc::new(Resolver::new_from_ip( + log.new(o!("component" => "DnsResolver")), + sled_agent_address, + )?); - pub fn new_from_dns( - log: Logger, - dns_addrs: Vec, - ) -> Result { + let client = reqwest::ClientBuilder::new() + .dns_resolver(resolver.clone()) + .build() + .expect("Failed to build client"); + + let dns_name = ServiceName::Nexus.srv_name(); Ok(Self { - inner: Arc::new(Inner { - log: log.clone(), - resolver: Arc::new(Resolver::new_from_addrs( - log.new(o!("component" => "DnsResolver")), - dns_addrs, - )?), - }), + client: NexusClient::new_with_client( + &format!("http://{dns_name}:{NEXUS_INTERNAL_PORT}"), + client, + log.new(o!("component" => "NexusClient")), + ), + resolver, }) } - pub async fn get_ip(&self) -> Result { - self.get_addr().await.map(|addr| *addr.ip()) - } - - pub async fn get_addr(&self) -> Result { - let addr = - self.inner.resolver.lookup_socket_v6(ServiceName::Nexus).await?; - Ok(addr) + /// Access the progenitor-based Nexus Client. + pub fn client(&self) -> &NexusClient { + &self.client } - pub async fn get(&self) -> Result { - let dns_name = ServiceName::Nexus.srv_name(); - - let client = reqwest::ClientBuilder::new() - .dns_resolver(self.inner.resolver.clone()) - .build() - // TODO: - .expect("Failed to build client"); - - Ok(NexusClient::new_with_client( - &format!("http://{dns_name}"), - client, - self.inner.log.clone(), - )) + /// Access the DNS resolver used by the Nexus Client. + /// + /// WARNING: If you're using this resolver to access an IP address of + /// another service, be aware that it might change if that service moves + /// around! Be cautious when accessing and persisting IP addresses of other + /// services. + pub fn resolver(&self) -> &Arc { + &self.resolver } } diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index e6002292e9..d9c968d108 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -8,7 +8,7 @@ use super::config::Config; use super::http_entrypoints::api as http_api; use super::sled_agent::SledAgent; use crate::bootstrap::params::StartSledAgentRequest; -use crate::nexus::LazyNexusClient; +use crate::nexus::NexusClientWithResolver; use crate::services::ServiceManager; use crate::storage_manager::StorageManager; use slog::Logger; @@ -41,17 +41,15 @@ impl Server { ) -> Result { info!(log, "setting up sled agent server"); - let client_log = log.new(o!("component" => "NexusClient")); - - let addr = request.sled_address(); - let lazy_nexus_client = - LazyNexusClient::new_from_subnet(client_log, *addr.ip()) + let sled_address = request.sled_address(); + let nexus_client = + NexusClientWithResolver::new(&log, *sled_address.ip()) .map_err(|e| e.to_string())?; let sled_agent = SledAgent::new( &config, log.clone(), - lazy_nexus_client.clone(), + nexus_client, request, services, storage, @@ -61,7 +59,7 @@ impl Server { let mut dropshot_config = dropshot::ConfigDropshot::default(); dropshot_config.request_body_max_bytes = 1024 * 1024; - dropshot_config.bind_address = SocketAddr::V6(addr); + dropshot_config.bind_address = SocketAddr::V6(sled_address); let dropshot_log = log.new(o!("component" => "dropshot (SledAgent)")); let http_server = dropshot::HttpServerStarter::new( &dropshot_config, diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 0e63112966..9bb1c5b566 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -7,7 +7,7 @@ use crate::bootstrap::params::StartSledAgentRequest; use crate::config::Config; use crate::instance_manager::InstanceManager; -use crate::nexus::{LazyNexusClient, NexusRequestQueue}; +use crate::nexus::{NexusClientWithResolver, NexusRequestQueue}; use crate::params::{ DatasetKind, DiskStateRequested, InstanceHardware, InstanceMigrationSourceParams, InstancePutStateResponse, @@ -177,8 +177,8 @@ struct SledAgentInner { // Other Oxide-controlled services running on this Sled. services: ServiceManager, - // Lazily-acquired connection to Nexus. - lazy_nexus_client: LazyNexusClient, + // Connection to Nexus. + nexus_client: NexusClientWithResolver, // A serialized request queue for operations interacting with Nexus. nexus_request_queue: NexusRequestQueue, @@ -204,7 +204,7 @@ impl SledAgent { pub async fn new( config: &Config, log: Logger, - lazy_nexus_client: LazyNexusClient, + nexus_client: NexusClientWithResolver, request: StartSledAgentRequest, services: ServiceManager, storage: StorageManager, @@ -248,7 +248,7 @@ impl SledAgent { storage .setup_underlay_access(storage_manager::UnderlayAccess { - lazy_nexus_client: lazy_nexus_client.clone(), + nexus_client: nexus_client.clone(), sled_id: request.id, }) .await?; @@ -258,7 +258,7 @@ impl SledAgent { let instances = InstanceManager::new( parent_log.clone(), - lazy_nexus_client.clone(), + nexus_client.clone(), etherstub.clone(), port_manager.clone(), )?; @@ -307,7 +307,7 @@ impl SledAgent { updates, port_manager, services, - lazy_nexus_client, + nexus_client, // TODO(https://github.com/oxidecomputer/omicron/issues/1917): // Propagate usage of this request queue throughout the Sled Agent. @@ -431,7 +431,7 @@ impl SledAgent { // Sends a request to Nexus informing it that the current sled exists. fn notify_nexus_about_self(&self, log: &Logger) { let sled_id = self.inner.id; - let lazy_nexus_client = self.inner.lazy_nexus_client.clone(); + let nexus_client = self.inner.nexus_client.clone(); let sled_address = self.inner.sled_address(); let is_scrimlet = self.inner.hardware.is_scrimlet(); let baseboard = nexus_client::types::Baseboard::from( @@ -464,11 +464,8 @@ impl SledAgent { nexus_client::types::SledRole::Gimlet }; - let nexus_client = lazy_nexus_client - .get() - .await - .map_err(|err| BackoffError::transient(err.to_string()))?; nexus_client + .client() .sled_agent_put( &sled_id, &nexus_client::types::SledAgentStartupInfo { @@ -668,8 +665,10 @@ impl SledAgent { &self, artifact: UpdateArtifactId, ) -> Result<(), Error> { - let nexus_client = self.inner.lazy_nexus_client.get().await?; - self.inner.updates.download_artifact(artifact, &nexus_client).await?; + self.inner + .updates + .download_artifact(artifact, &self.inner.nexus_client.client()) + .await?; Ok(()) } diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index c573dbe6a2..38eb3fd799 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -4,7 +4,7 @@ //! Management of sled-local storage. -use crate::nexus::LazyNexusClient; +use crate::nexus::NexusClientWithResolver; use crate::params::DatasetKind; use crate::storage::dataset::DatasetName; use camino::Utf8PathBuf; @@ -279,7 +279,7 @@ impl StorageResources { /// Describes the access to the underlay used by the StorageManager. pub struct UnderlayAccess { - pub lazy_nexus_client: LazyNexusClient, + pub nexus_client: NexusClientWithResolver, pub sled_id: Uuid, } @@ -374,15 +374,10 @@ impl StorageWorker { return Err(backoff::BackoffError::transient(Error::UnderlayNotInitialized.to_string())); }; let sled_id = underlay.sled_id; - let lazy_nexus_client = underlay.lazy_nexus_client.clone(); + let nexus_client = underlay.nexus_client.client().clone(); drop(underlay_guard); - lazy_nexus_client - .get() - .await - .map_err(|e| { - backoff::BackoffError::transient(e.to_string()) - })? + nexus_client .zpool_put(&sled_id, &pool_id, &zpool_request) .await .map_err(|e| { @@ -693,13 +688,9 @@ impl StorageWorker { return Err(backoff::BackoffError::transient(Error::UnderlayNotInitialized.to_string())); }; let sled_id = underlay.sled_id; - let lazy_nexus_client = underlay.lazy_nexus_client.clone(); + let nexus_client = underlay.nexus_client.client().clone(); drop(underlay_guard); - let nexus = lazy_nexus_client.get().await.map_err(|e| { - backoff::BackoffError::transient(e.to_string()) - })?; - match &disk { NotifyDiskRequest::Add { identity, variant } => { let request = PhysicalDiskPutRequest { @@ -712,9 +703,12 @@ impl StorageWorker { }, sled_id, }; - nexus.physical_disk_put(&request).await.map_err( - |e| backoff::BackoffError::transient(e.to_string()), - )?; + nexus_client + .physical_disk_put(&request) + .await + .map_err(|e| { + backoff::BackoffError::transient(e.to_string()) + })?; } NotifyDiskRequest::Remove(disk_identity) => { let request = PhysicalDiskDeleteRequest { @@ -723,9 +717,12 @@ impl StorageWorker { vendor: disk_identity.vendor.clone(), sled_id, }; - nexus.physical_disk_delete(&request).await.map_err( - |e| backoff::BackoffError::transient(e.to_string()), - )?; + nexus_client + .physical_disk_delete(&request) + .await + .map_err(|e| { + backoff::BackoffError::transient(e.to_string()) + })?; } } Ok(()) From 55b0178e31347a4863a074067a64f3470e67e5ec Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 25 Jun 2023 20:46:44 -0700 Subject: [PATCH 17/57] clippy, comments --- internal-dns/src/names.rs | 1 + internal-dns/src/resolver.rs | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/internal-dns/src/names.rs b/internal-dns/src/names.rs index 05423f870c..3f663263e1 100644 --- a/internal-dns/src/names.rs +++ b/internal-dns/src/names.rs @@ -85,6 +85,7 @@ impl ServiceName { } } + /// Returns the full DNS name of this service pub fn srv_name(&self) -> String { format!("{}.{DNS_ZONE}", self.dns_name()) } diff --git a/internal-dns/src/resolver.rs b/internal-dns/src/resolver.rs index 05e58e99fb..270e8caa83 100644 --- a/internal-dns/src/resolver.rs +++ b/internal-dns/src/resolver.rs @@ -43,6 +43,8 @@ pub struct Resolver { type BoxError = Box; +// By implementing this trait, [Resolver] can be used as an argument to +// [reqwest::ClientBuilder::dns_resolver]. impl reqwest::dns::Resolve for Resolver { fn resolve(&self, name: Name) -> reqwest::dns::Resolving { let this = self.clone(); @@ -175,11 +177,15 @@ impl Resolver { }) } - pub async fn lookup_sockets_v6_raw( + // Returns an iterator of SocketAddrs for the specified SRV name. + // + // Acts on a raw string for compatibility with the reqwest::dns::Resolve + // trait, rather than a strongly-typed service name. + async fn lookup_sockets_v6_raw( &self, name: &str, ) -> Result + Send>, ResolveError> { - debug!(self.inner.log, "lookup_socket_v6 srv"; "dns_name" => &name); + debug!(self.inner.log, "lookup_sockets_v6_raw srv"; "dns_name" => &name); let response = self.inner.resolver.lookup(name, RecordType::SRV).await?; From fb74e5c9e7af41e804c383f7cae54567d05d430c Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 25 Jun 2023 21:07:35 -0700 Subject: [PATCH 18/57] Comments --- internal-dns/src/resolver.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/internal-dns/src/resolver.rs b/internal-dns/src/resolver.rs index 270e8caa83..d7b3a865d2 100644 --- a/internal-dns/src/resolver.rs +++ b/internal-dns/src/resolver.rs @@ -57,6 +57,7 @@ impl reqwest::dns::Resolve for Resolver { } impl Resolver { + /// Construct a new DNS resolver from specific DNS server addresses. pub fn new_from_addrs( log: slog::Logger, dns_addrs: Vec, @@ -78,7 +79,7 @@ impl Resolver { Ok(Self { inner: Arc::new(Inner { log, resolver }) }) } - /// Convenience wrapper for [`Resolver::new_from_addrs`] that determines + /// Convenience wrapper for [`Resolver::new_from_subnet`] that determines /// the subnet based on a provided IP address and then uses the DNS /// resolvers for that subnet. pub fn new_from_ip( @@ -111,6 +112,14 @@ impl Resolver { .collect() } + /// Create a DNS resolver using the implied DNS servers within this subnet. + /// + /// The addresses of the DNS servers are inferred within an Availability + /// Zone's subnet: normally, each rack within an AZ (/48) gets a unique + /// subnet (/56), but the FIRST /56 is reserved for internal DNS servers. + /// + /// For more details on this "reserved" rack subnet, refer to + /// [omicron_common::address::ReservedRackSubnet]. pub fn new_from_subnet( log: slog::Logger, subnet: Ipv6Subnet, From 259f5ec1975cf1c28c8166d9d96ee37256250eaa Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 25 Jun 2023 23:45:56 -0700 Subject: [PATCH 19/57] Deal with mocks --- sled-agent/src/instance.rs | 8 ++++++++ sled-agent/src/instance_manager.rs | 18 ++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index e17f2514eb..7aca7c1eb0 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -1175,6 +1175,14 @@ mod test { 0xfd00, 0x1de, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, ); let port_manager = PortManager::new(log.new(slog::o!()), underlay_ip); + let nexus_client_ctx = + crate::mocks::MockNexusClient::new_with_client_context(); + nexus_client_ctx.expect().returning(|_, _, _| { + let mut mock = crate::mocks::MockNexusClient::default(); + mock.expect_clone() + .returning(|| crate::mocks::MockNexusClient::default()); + mock + }); let nexus_client = NexusClientWithResolver::new(&log, std::net::Ipv6Addr::LOCALHOST) .unwrap(); diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index eccf1daa0f..ae18378e24 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -416,6 +416,15 @@ mod test { async fn ensure_instance() { let logctx = test_setup_log("ensure_instance"); let log = &logctx.log; + + let nexus_client_ctx = + crate::mocks::MockNexusClient::new_with_client_context(); + nexus_client_ctx.expect().returning(|_, _, _| { + let mut mock = crate::mocks::MockNexusClient::default(); + mock.expect_clone() + .returning(|| crate::mocks::MockNexusClient::default()); + mock + }); let nexus_client = NexusClientWithResolver::new(&log, std::net::Ipv6Addr::LOCALHOST) .unwrap(); @@ -532,6 +541,15 @@ mod test { async fn ensure_instance_state_repeatedly() { let logctx = test_setup_log("ensure_instance_repeatedly"); let log = &logctx.log; + + let nexus_client_ctx = + crate::mocks::MockNexusClient::new_with_client_context(); + nexus_client_ctx.expect().returning(|_, _, _| { + let mut mock = crate::mocks::MockNexusClient::default(); + mock.expect_clone() + .returning(|| crate::mocks::MockNexusClient::default()); + mock + }); let nexus_client = NexusClientWithResolver::new(&log, std::net::Ipv6Addr::LOCALHOST) .unwrap(); From d8efb4a80021dbd812d5b150c4e81b49d99df192 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 26 Jun 2023 00:26:39 -0700 Subject: [PATCH 20/57] Avoid mocks in the update test, add a nexus fake --- Cargo.lock | 1 + sled-agent/Cargo.toml | 1 + sled-agent/src/fakes/mod.rs | 7 ++++ sled-agent/src/fakes/nexus.rs | 62 +++++++++++++++++++++++++++++++++++ sled-agent/src/lib.rs | 3 ++ sled-agent/src/nexus.rs | 3 -- sled-agent/src/updates.rs | 57 ++++++++++++++++++-------------- 7 files changed, 106 insertions(+), 28 deletions(-) create mode 100644 sled-agent/src/fakes/mod.rs create mode 100644 sled-agent/src/fakes/nexus.rs diff --git a/Cargo.lock b/Cargo.lock index 722f6e7b68..ec35990788 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4776,6 +4776,7 @@ dependencies = [ "flate2", "futures", "http", + "hyper", "illumos-utils", "internal-dns 0.1.0", "ipnetwork", diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index a030eba5eb..6df9b25260 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -73,6 +73,7 @@ opte-ioctl.workspace = true assert_matches.workspace = true expectorate.workspace = true http.workspace = true +hyper.workspace = true mockall.workspace = true omicron-test-utils.workspace = true openapi-lint.workspace = true diff --git a/sled-agent/src/fakes/mod.rs b/sled-agent/src/fakes/mod.rs new file mode 100644 index 0000000000..a4b56c4f5c --- /dev/null +++ b/sled-agent/src/fakes/mod.rs @@ -0,0 +1,7 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Fake server interfaces used for tests + +pub mod nexus; diff --git a/sled-agent/src/fakes/nexus.rs b/sled-agent/src/fakes/nexus.rs new file mode 100644 index 0000000000..5a25893077 --- /dev/null +++ b/sled-agent/src/fakes/nexus.rs @@ -0,0 +1,62 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! A fake implementation of (some) of the internal Nexus interface +//! +//! This must be an exact subset of the Nexus internal interface +//! to operate correctly. + +use dropshot::{ + endpoint, ApiDescription, FreeformBody, HttpError, HttpResponseOk, Path, + RequestContext, +}; +use hyper::Body; +use omicron_common::api::external::Error; +use omicron_common::api::internal::nexus::UpdateArtifactId; + +pub trait FakeNexusServer: Send + Sync { + fn cpapi_artifact_download( + &self, + _artifact_id: UpdateArtifactId, + ) -> Result, Error> { + Err(Error::internal_error("Not implemented")) + } +} + +pub type ServerContext = Box; + +#[endpoint { + method = GET, + path = "/artifacts/{kind}/{name}/{version}", +}] +async fn cpapi_artifact_download( + request_context: RequestContext, + path_params: Path, +) -> Result, HttpError> { + let context = request_context.context(); + + Ok(HttpResponseOk( + Body::from(context.cpapi_artifact_download(path_params.into_inner())?) + .into(), + )) +} + +fn api() -> ApiDescription { + let mut api = ApiDescription::new(); + api.register(cpapi_artifact_download).unwrap(); + api +} + +pub fn start_test_server( + log: slog::Logger, + label: ServerContext, +) -> dropshot::HttpServer { + let config_dropshot = dropshot::ConfigDropshot { + bind_address: "[::1]:0".parse().unwrap(), + ..Default::default() + }; + dropshot::HttpServerStarter::new(&config_dropshot, api(), label, &log) + .unwrap() + .start() +} diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs index de940c67d5..d83434801d 100644 --- a/sled-agent/src/lib.rs +++ b/sled-agent/src/lib.rs @@ -38,6 +38,9 @@ mod updates; #[cfg(test)] mod mocks; +#[cfg(test)] +mod fakes; + #[macro_use] extern crate slog; diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index 0729ba3f78..f8ce9527e4 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -2,9 +2,6 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -#[cfg(test)] -pub use crate::mocks::MockNexusClient as NexusClient; -#[cfg(not(test))] pub use nexus_client::Client as NexusClient; use internal_dns::resolver::{ResolveError, Resolver}; diff --git a/sled-agent/src/updates.rs b/sled-agent/src/updates.rs index 6d837700e9..f6e98e57af 100644 --- a/sled-agent/src/updates.rs +++ b/sled-agent/src/updates.rs @@ -257,21 +257,22 @@ impl UpdateManager { #[cfg(test)] mod test { use super::*; - use crate::mocks::MockNexusClient; - use bytes::Bytes; + use crate::fakes::nexus::FakeNexusServer; use flate2::write::GzEncoder; - use http::StatusCode; - use progenitor::progenitor_client::{ByteStream, ResponseValue}; - use reqwest::{header::HeaderMap, Result}; + use nexus_client::Client as NexusClient; + use omicron_common::api::external::Error; + use omicron_common::api::internal::nexus::UpdateArtifactId; + use omicron_test_utils::dev::test_setup_log; use std::io::Write; use tar::Builder; #[tokio::test] - #[serial_test::serial] async fn test_write_artifact_to_filesystem() { + let logctx = test_setup_log("test_write_artifact_to_filesystem"); + let log = &logctx.log; // The (completely fabricated) artifact we'd like to download. let expected_name = "test_artifact"; - let expected_contents = "test_artifact contents"; + const EXPECTED_CONTENTS: &'static str = "test_artifact contents"; let artifact = UpdateArtifactId { name: expected_name.to_string(), version: "0.0.0".parse().unwrap(), @@ -286,23 +287,27 @@ mod test { let _ = tokio::fs::remove_file(&expected_path).await; // Let's pretend this is an artifact Nexus can actually give us. - let mut nexus_client = MockNexusClient::default(); - nexus_client.expect_cpapi_artifact_download().times(1).return_once( - move |kind, name, version| { - assert_eq!(name, "test_artifact"); - assert_eq!(version.to_string(), "0.0.0"); - assert_eq!(kind.to_string(), "control_plane"); - let response = ByteStream::new(Box::pin( - futures::stream::once(futures::future::ready(Result::Ok( - Bytes::from(expected_contents), - ))), - )); - Ok(ResponseValue::new( - response, - StatusCode::OK, - HeaderMap::default(), - )) - }, + struct NexusServer {} + impl FakeNexusServer for NexusServer { + fn cpapi_artifact_download( + &self, + artifact_id: UpdateArtifactId, + ) -> Result, Error> { + assert_eq!(artifact_id.name, "test_artifact"); + assert_eq!(artifact_id.version.to_string(), "0.0.0"); + assert_eq!(artifact_id.kind.to_string(), "control_plane"); + + Ok(EXPECTED_CONTENTS.as_bytes().to_vec()) + } + } + + let nexus_server = crate::fakes::nexus::start_test_server( + log.clone(), + Box::new(NexusServer {}), + ); + let nexus_client = NexusClient::new( + &format!("http://{}", nexus_server.local_addr().to_string()), + log.clone(), ); let config = @@ -314,7 +319,9 @@ mod test { // Confirm the download succeeded. assert!(expected_path.exists()); let contents = tokio::fs::read(&expected_path).await.unwrap(); - assert_eq!(std::str::from_utf8(&contents).unwrap(), expected_contents); + assert_eq!(std::str::from_utf8(&contents).unwrap(), EXPECTED_CONTENTS); + + logctx.cleanup_successful(); } #[tokio::test] From 97223aec9501bd9366cba042fe93f430338549e5 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 26 Jun 2023 01:31:06 -0700 Subject: [PATCH 21/57] Continue to reduce usage of mocks to represent NexusClient --- sled-agent/src/fakes/nexus.rs | 41 +++++++++++++++++ sled-agent/src/instance.rs | 54 +++++++++++----------- sled-agent/src/instance_manager.rs | 65 +++++++++++++++++--------- sled-agent/src/lib.rs | 3 -- sled-agent/src/mocks/mod.rs | 73 ------------------------------ sled-agent/src/nexus.rs | 24 ++++++++-- 6 files changed, 131 insertions(+), 129 deletions(-) delete mode 100644 sled-agent/src/mocks/mod.rs diff --git a/sled-agent/src/fakes/nexus.rs b/sled-agent/src/fakes/nexus.rs index 5a25893077..392f106558 100644 --- a/sled-agent/src/fakes/nexus.rs +++ b/sled-agent/src/fakes/nexus.rs @@ -12,9 +12,15 @@ use dropshot::{ RequestContext, }; use hyper::Body; +use internal_dns::ServiceName; use omicron_common::api::external::Error; use omicron_common::api::internal::nexus::UpdateArtifactId; +/// Implements a fake Nexus. +/// +/// - All methods should match Nexus' interface, if they exist. +/// - Not all methods should be called by all tests. By default, +/// each method, representing an endpoint, should return an error. pub trait FakeNexusServer: Send + Sync { fn cpapi_artifact_download( &self, @@ -24,6 +30,10 @@ pub trait FakeNexusServer: Send + Sync { } } +/// Describes the server context type. +/// +/// If you're writing a test, this is a type you should create when calling +/// [`start_test_server`]. pub type ServerContext = Box; #[endpoint { @@ -48,6 +58,9 @@ fn api() -> ApiDescription { api } +/// Creates a fake Nexus test server. +/// +/// Uses a [`ServerContext`] type to represent the faked Nexus server. pub fn start_test_server( log: slog::Logger, label: ServerContext, @@ -60,3 +73,31 @@ pub fn start_test_server( .unwrap() .start() } + +/// Creates a transient DNS server pointing to a fake Nexus dropshot server. +pub async fn start_dns_server( + log: &slog::Logger, + nexus: &dropshot::HttpServer, +) -> dns_server::TransientServer { + let dns = dns_server::TransientServer::new(log).await.unwrap(); + let mut dns_config_builder = internal_dns::DnsConfigBuilder::new(); + + let nexus_addr = match nexus.local_addr() { + std::net::SocketAddr::V6(addr) => addr, + _ => panic!("Expected IPv6 address"), + }; + + let nexus_zone = dns_config_builder + .host_zone(uuid::Uuid::new_v4(), *nexus_addr.ip()) + .expect("failed to set up DNS"); + dns_config_builder + .service_backend_zone( + ServiceName::Nexus, + &nexus_zone, + nexus_addr.port(), + ) + .expect("failed to set up DNS"); + let dns_config = dns_config_builder.build(); + dns.initialize_with_config(log, &dns_config).await.unwrap(); + dns +} diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 7aca7c1eb0..857f6efc50 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -1097,6 +1097,7 @@ impl Instance { #[cfg(test)] mod test { use super::*; + use crate::fakes::nexus::FakeNexusServer; use crate::instance_manager::InstanceManager; use crate::nexus::NexusClientWithResolver; use crate::params::InstanceStateRequested; @@ -1152,20 +1153,7 @@ mod test { } } - // Due to the usage of global mocks, we use "serial_test" to avoid - // parellizing test invocations. - // - // From https://docs.rs/mockall/0.10.1/mockall/index.html#static-methods - // - // Mockall can also mock static methods. But be careful! The expectations - // are global. If you want to use a static method in multiple tests, you - // must provide your own synchronization. For ordinary methods, - // expectations are set on the mock object. But static methods don’t have - // any mock object. Instead, you must create a Context object just to set - // their expectations. - #[tokio::test] - #[serial_test::serial] async fn transition_before_start() { let logctx = test_setup_log("transition_before_start"); let log = &logctx.log; @@ -1175,20 +1163,33 @@ mod test { 0xfd00, 0x1de, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, ); let port_manager = PortManager::new(log.new(slog::o!()), underlay_ip); - let nexus_client_ctx = - crate::mocks::MockNexusClient::new_with_client_context(); - nexus_client_ctx.expect().returning(|_, _, _| { - let mut mock = crate::mocks::MockNexusClient::default(); - mock.expect_clone() - .returning(|| crate::mocks::MockNexusClient::default()); - mock - }); - let nexus_client = - NexusClientWithResolver::new(&log, std::net::Ipv6Addr::LOCALHOST) - .unwrap(); + + // Create a fake Nexus Server (for notifications) and add it to a + // corresponding fake DNS server for discovery. + struct NexusServer {} + impl FakeNexusServer for NexusServer {} + let nexus_server = crate::fakes::nexus::start_test_server( + log.clone(), + Box::new(NexusServer {}), + ); + let dns = + crate::fakes::nexus::start_dns_server(log, &nexus_server).await; + let internal_resolver = + internal_dns::resolver::Resolver::new_from_addrs( + log.clone(), + vec![*dns.dns_server.local_address()], + ) + .unwrap(); + let nexus_client_with_resolver = + NexusClientWithResolver::new_from_resolver_with_port( + log, + internal_resolver, + nexus_server.local_addr().port(), + ); + let instance_manager = InstanceManager::new( log.clone(), - nexus_client.clone(), + nexus_client_with_resolver.clone(), Etherstub("mylink".to_string()), port_manager.clone(), ) @@ -1201,13 +1202,14 @@ mod test { new_initial_instance(), vnic_allocator, port_manager, - nexus_client, + nexus_client_with_resolver, ) .unwrap(); // Pick a state transition that requires the instance to have started. assert!(inst.put_state(InstanceStateRequested::Reboot).await.is_err()); + drop(dns); logctx.cleanup_successful(); } } diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index ae18378e24..43f475ea21 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -361,6 +361,7 @@ impl Drop for InstanceTicket { #[cfg(test)] mod test { use super::*; + use crate::fakes::nexus::FakeNexusServer; use crate::instance::MockInstance; use crate::nexus::NexusClientWithResolver; use crate::params::InstanceStateRequested; @@ -417,17 +418,27 @@ mod test { let logctx = test_setup_log("ensure_instance"); let log = &logctx.log; - let nexus_client_ctx = - crate::mocks::MockNexusClient::new_with_client_context(); - nexus_client_ctx.expect().returning(|_, _, _| { - let mut mock = crate::mocks::MockNexusClient::default(); - mock.expect_clone() - .returning(|| crate::mocks::MockNexusClient::default()); - mock - }); - let nexus_client = - NexusClientWithResolver::new(&log, std::net::Ipv6Addr::LOCALHOST) - .unwrap(); + // Create a fake Nexus Server (for notifications) and add it to a + // corresponding fake DNS server for discovery. + struct NexusServer {} + impl FakeNexusServer for NexusServer {} + let nexus_server = crate::fakes::nexus::start_test_server( + log.clone(), + Box::new(NexusServer {}), + ); + let dns = + crate::fakes::nexus::start_dns_server(log, &nexus_server).await; + let internal_resolver = + internal_dns::resolver::Resolver::new_from_addrs( + log.clone(), + vec![*dns.dns_server.local_address()], + ) + .unwrap(); + let nexus_client = NexusClientWithResolver::new_from_resolver_with_port( + log, + internal_resolver, + nexus_server.local_addr().port(), + ); // Creation of the instance manager incurs some "global" system // checks: cleanup of existing zones + vnics. @@ -542,17 +553,27 @@ mod test { let logctx = test_setup_log("ensure_instance_repeatedly"); let log = &logctx.log; - let nexus_client_ctx = - crate::mocks::MockNexusClient::new_with_client_context(); - nexus_client_ctx.expect().returning(|_, _, _| { - let mut mock = crate::mocks::MockNexusClient::default(); - mock.expect_clone() - .returning(|| crate::mocks::MockNexusClient::default()); - mock - }); - let nexus_client = - NexusClientWithResolver::new(&log, std::net::Ipv6Addr::LOCALHOST) - .unwrap(); + // Create a fake Nexus Server (for notifications) and add it to a + // corresponding fake DNS server for discovery. + struct NexusServer {} + impl FakeNexusServer for NexusServer {} + let nexus_server = crate::fakes::nexus::start_test_server( + log.clone(), + Box::new(NexusServer {}), + ); + let dns = + crate::fakes::nexus::start_dns_server(log, &nexus_server).await; + let internal_resolver = + internal_dns::resolver::Resolver::new_from_addrs( + log.clone(), + vec![*dns.dns_server.local_address()], + ) + .unwrap(); + let nexus_client = NexusClientWithResolver::new_from_resolver_with_port( + log, + internal_resolver, + nexus_server.local_addr().port(), + ); // Instance Manager creation. diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs index d83434801d..63d4954034 100644 --- a/sled-agent/src/lib.rs +++ b/sled-agent/src/lib.rs @@ -35,9 +35,6 @@ pub(crate) mod storage; mod storage_manager; mod updates; -#[cfg(test)] -mod mocks; - #[cfg(test)] mod fakes; diff --git a/sled-agent/src/mocks/mod.rs b/sled-agent/src/mocks/mod.rs deleted file mode 100644 index 617eeb2d01..0000000000 --- a/sled-agent/src/mocks/mod.rs +++ /dev/null @@ -1,73 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Mock structures for testing. - -use mockall::mock; -use nexus_client::types::{ - DiskRuntimeState, InstanceRuntimeState, KnownArtifactKind, - PhysicalDiskDeleteRequest, PhysicalDiskPutRequest, - RackInitializationRequest, SemverVersion, SledAgentStartupInfo, - ZpoolPutRequest, ZpoolPutResponse, -}; -use slog::Logger; -use uuid::Uuid; - -type Result = std::result::Result< - progenitor::progenitor_client::ResponseValue, - progenitor::progenitor_client::Error, ->; - -mock! { - pub NexusClient { - pub fn new(server_addr: &str, log: Logger) -> Self; - pub fn new_with_client(server_addr: &str, client: reqwest::Client, log: Logger) -> Self; - pub fn client(&self) -> reqwest::Client; - pub fn baseurl(&self) -> &'static str; - pub async fn sled_agent_put( - &self, - id: &Uuid, - info: &SledAgentStartupInfo, - ) -> Result<()>; - pub async fn physical_disk_put( - &self, - request: &PhysicalDiskPutRequest, - ) -> Result<()>; - pub async fn physical_disk_delete( - &self, - request: &PhysicalDiskDeleteRequest, - ) -> Result<()>; - pub async fn cpapi_instances_put( - &self, - id: &Uuid, - new_runtime_state: &InstanceRuntimeState, - ) -> Result<()>; - pub async fn cpapi_disks_put( - &self, - disk_id: &Uuid, - new_runtime_state: &DiskRuntimeState, - ) -> Result<()>; - pub async fn cpapi_artifact_download( - &self, - kind: KnownArtifactKind, - name: &str, - version: &SemverVersion, - ) -> Result; - pub async fn zpool_put( - &self, - sled_id: &Uuid, - zpool_id: &Uuid, - info: &ZpoolPutRequest, - ) -> Result; - pub async fn rack_initialization_complete( - &self, - rack_id: &Uuid, - request: &RackInitializationRequest, - ) -> Result<()>; - } - - impl Clone for NexusClient { - fn clone(&self) -> Self; - } -} diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index f8ce9527e4..77dc6428fe 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -29,10 +29,24 @@ impl NexusClientWithResolver { log: &Logger, sled_agent_address: Ipv6Addr, ) -> Result { - let resolver = Arc::new(Resolver::new_from_ip( + let resolver = Resolver::new_from_ip( log.new(o!("component" => "DnsResolver")), sled_agent_address, - )?); + )?; + + Ok(Self::new_from_resolver_with_port( + log, + resolver, + NEXUS_INTERNAL_PORT, + )) + } + + pub fn new_from_resolver_with_port( + log: &Logger, + resolver: Resolver, + port: u16, + ) -> Self { + let resolver = Arc::new(resolver); let client = reqwest::ClientBuilder::new() .dns_resolver(resolver.clone()) @@ -40,14 +54,14 @@ impl NexusClientWithResolver { .expect("Failed to build client"); let dns_name = ServiceName::Nexus.srv_name(); - Ok(Self { + Self { client: NexusClient::new_with_client( - &format!("http://{dns_name}:{NEXUS_INTERNAL_PORT}"), + &format!("http://{dns_name}:{port}"), client, log.new(o!("component" => "NexusClient")), ), resolver, - }) + } } /// Access the progenitor-based Nexus Client. From d5fcafc52834cb74245c159c29209cc838d42663 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 27 Jun 2023 00:26:44 -0700 Subject: [PATCH 22/57] Add Executor trait, use it --- Cargo.lock | 3 + illumos-utils/Cargo.toml | 2 + illumos-utils/src/dladm.rs | 112 ++++++++---- illumos-utils/src/fstyp.rs | 11 +- illumos-utils/src/lib.rs | 90 +-------- illumos-utils/src/link.rs | 78 ++++++-- illumos-utils/src/process.rs | 201 +++++++++++++++++++++ illumos-utils/src/running_zone.rs | 125 ++++++++----- illumos-utils/src/zfs.rs | 75 +++++--- illumos-utils/src/zone.rs | 85 +++++---- illumos-utils/src/zpool.rs | 46 +++-- installinator/src/bootstrap.rs | 24 ++- installinator/src/dispatch.rs | 36 +++- installinator/src/hardware.rs | 5 +- installinator/src/main.rs | 4 +- installinator/src/write.rs | 23 ++- package/src/bin/omicron-package.rs | 16 +- sled-agent/src/bin/sled-agent.rs | 20 +- sled-agent/src/bootstrap/agent.rs | 113 +++++++----- sled-agent/src/bootstrap/hardware.rs | 5 +- sled-agent/src/bootstrap/server.rs | 6 +- sled-agent/src/config.rs | 10 +- sled-agent/src/instance.rs | 19 +- sled-agent/src/instance_manager.rs | 24 ++- sled-agent/src/server.rs | 3 + sled-agent/src/services.rs | 62 ++++--- sled-agent/src/sled_agent.rs | 13 +- sled-agent/src/storage_manager.rs | 35 +++- sled-hardware/src/cleanup.rs | 58 +++--- sled-hardware/src/disk.rs | 77 ++++---- sled-hardware/src/underlay.rs | 29 ++- wicketd/Cargo.toml | 1 + wicketd/tests/integration_tests/updates.rs | 4 +- 33 files changed, 957 insertions(+), 458 deletions(-) create mode 100644 illumos-utils/src/process.rs diff --git a/Cargo.lock b/Cargo.lock index ec35990788..bca81410d4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3168,10 +3168,12 @@ dependencies = [ "cfg-if 1.0.0", "futures", "ipnetwork", + "itertools", "libc", "macaddr", "mockall", "omicron-common 0.1.0", + "omicron-test-utils", "opte-ioctl", "oxide-vpc", "schemars", @@ -9241,6 +9243,7 @@ dependencies = [ "hex", "http", "hyper", + "illumos-utils", "installinator", "installinator-artifact-client", "installinator-artifactd", diff --git a/illumos-utils/Cargo.toml b/illumos-utils/Cargo.toml index 2ad76d579f..cd843ce19c 100644 --- a/illumos-utils/Cargo.toml +++ b/illumos-utils/Cargo.toml @@ -13,6 +13,7 @@ camino.workspace = true cfg-if.workspace = true futures.workspace = true ipnetwork.workspace = true +itertools.workspace = true libc.workspace = true macaddr.workspace = true omicron-common.workspace = true @@ -34,6 +35,7 @@ opte-ioctl.workspace = true [dev-dependencies] mockall.workspace = true +omicron-test-utils.workspace = true toml.workspace = true [features] diff --git a/illumos-utils/src/dladm.rs b/illumos-utils/src/dladm.rs index b40a4d4cd8..101e4a3ec1 100644 --- a/illumos-utils/src/dladm.rs +++ b/illumos-utils/src/dladm.rs @@ -5,8 +5,8 @@ //! Utilities for poking at data links. use crate::link::{Link, LinkKind}; +use crate::process::{BoxedExecutor, ExecutionError, PFEXEC}; use crate::zone::IPADM; -use crate::{execute, ExecutionError, PFEXEC}; use omicron_common::api::external::MacAddr; use omicron_common::vlan::VlanID; use serde::{Deserialize, Serialize}; @@ -171,21 +171,27 @@ pub struct Dladm {} #[cfg_attr(any(test, feature = "testing"), mockall::automock, allow(dead_code))] impl Dladm { /// Creates an etherstub, or returns one which already exists. - pub fn ensure_etherstub(name: &str) -> Result { - if let Ok(stub) = Self::get_etherstub(name) { + pub fn ensure_etherstub( + executor: &BoxedExecutor, + name: &str, + ) -> Result { + if let Ok(stub) = Self::get_etherstub(executor, name) { return Ok(stub); } let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[DLADM, "create-etherstub", "-t", name]); - execute(cmd)?; + executor.execute(cmd)?; Ok(Etherstub(name.to_string())) } /// Finds an etherstub. - fn get_etherstub(name: &str) -> Result { + fn get_etherstub( + executor: &BoxedExecutor, + name: &str, + ) -> Result { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[DLADM, "show-etherstub", name]); - execute(cmd)?; + executor.execute(cmd)?; Ok(Etherstub(name.to_string())) } @@ -194,6 +200,7 @@ impl Dladm { /// This VNIC is not tracked like [`crate::link::Link`], because /// it is expected to exist for the lifetime of the sled. pub fn ensure_etherstub_vnic( + executor: &BoxedExecutor, source: &Etherstub, ) -> Result { let (vnic_name, mtu) = match source.0.as_str() { @@ -201,80 +208,95 @@ impl Dladm { BOOTSTRAP_ETHERSTUB_NAME => (BOOTSTRAP_ETHERSTUB_VNIC_NAME, 1500), _ => unreachable!(), }; - if let Ok(vnic) = Self::get_etherstub_vnic(vnic_name) { + if let Ok(vnic) = Self::get_etherstub_vnic(executor, vnic_name) { return Ok(vnic); } - Self::create_vnic(source, vnic_name, None, None, mtu)?; + Self::create_vnic(executor, source, vnic_name, None, None, mtu)?; Ok(EtherstubVnic(vnic_name.to_string())) } - fn get_etherstub_vnic(name: &str) -> Result { + fn get_etherstub_vnic( + executor: &BoxedExecutor, + name: &str, + ) -> Result { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[DLADM, "show-vnic", name]); - execute(cmd)?; + executor.execute(cmd)?; Ok(EtherstubVnic(name.to_string())) } // Return the name of the IP interface over the etherstub VNIC, if it // exists. fn get_etherstub_vnic_interface( + executor: &BoxedExecutor, name: &str, ) -> Result { let mut cmd = std::process::Command::new(PFEXEC); let cmd = cmd.args(&[IPADM, "show-if", "-p", "-o", "IFNAME", name]); - execute(cmd)?; + executor.execute(cmd)?; Ok(name.to_string()) } /// Delete the VNIC over the inter-zone comms etherstub. - pub fn delete_etherstub_vnic(name: &str) -> Result<(), ExecutionError> { + pub fn delete_etherstub_vnic( + executor: &BoxedExecutor, + name: &str, + ) -> Result<(), ExecutionError> { // It's not clear why, but this requires deleting the _interface_ that's // over the VNIC first. Other VNICs don't require this for some reason. - if Self::get_etherstub_vnic_interface(name).is_ok() { + if Self::get_etherstub_vnic_interface(executor, name).is_ok() { let mut cmd = std::process::Command::new(PFEXEC); let cmd = cmd.args(&[IPADM, "delete-if", name]); - execute(cmd)?; + executor.execute(cmd)?; } - if Self::get_etherstub_vnic(name).is_ok() { + if Self::get_etherstub_vnic(executor, name).is_ok() { let mut cmd = std::process::Command::new(PFEXEC); let cmd = cmd.args(&[DLADM, "delete-vnic", name]); - execute(cmd)?; + executor.execute(cmd)?; } Ok(()) } /// Delete the inter-zone comms etherstub. - pub fn delete_etherstub(name: &str) -> Result<(), ExecutionError> { - if Self::get_etherstub(name).is_ok() { + pub fn delete_etherstub( + executor: &BoxedExecutor, + name: &str, + ) -> Result<(), ExecutionError> { + if Self::get_etherstub(executor, name).is_ok() { let mut cmd = std::process::Command::new(PFEXEC); let cmd = cmd.args(&[DLADM, "delete-etherstub", name]); - execute(cmd)?; + executor.execute(cmd)?; } Ok(()) } /// Verify that the given link exists - pub fn verify_link(link: &str) -> Result { + pub fn verify_link( + executor: &BoxedExecutor, + link: &str, + ) -> Result { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[DLADM, "show-link", "-p", "-o", "LINK", link]); - let output = execute(cmd)?; + let output = executor.execute(cmd)?; match String::from_utf8_lossy(&output.stdout) .lines() .next() .map(|s| s.trim()) { - Some(x) if x == link => Ok(Link::wrap_physical(link)), + Some(x) if x == link => Ok(Link::wrap_physical(executor, link)), _ => Err(FindPhysicalLinkError::NoPhysicalLinkFound), } } /// Returns the name of the first observed physical data link. - pub fn find_physical() -> Result { + pub fn find_physical( + executor: &BoxedExecutor, + ) -> Result { // TODO: This is arbitrary, but we're currently grabbing the first // physical device. Should we have a more sophisticated method for // selection? - Self::list_physical()? + Self::list_physical(executor)? .into_iter() .next() .ok_or_else(|| FindPhysicalLinkError::NoPhysicalLinkFound) @@ -283,10 +305,12 @@ impl Dladm { /// List the extant physical data links on the system. /// /// Note that this returns _all_ links. - pub fn list_physical() -> Result, FindPhysicalLinkError> { + pub fn list_physical( + executor: &BoxedExecutor, + ) -> Result, FindPhysicalLinkError> { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[DLADM, "show-phys", "-p", "-o", "LINK"]); - let output = execute(cmd)?; + let output = executor.execute(cmd)?; std::str::from_utf8(&output.stdout) .map_err(FindPhysicalLinkError::NonUtf8Output) .map(|stdout| { @@ -298,7 +322,10 @@ impl Dladm { } /// Returns the MAC address of a physical link. - pub fn get_mac(link: &PhysicalLink) -> Result { + pub fn get_mac( + executor: &BoxedExecutor, + link: &PhysicalLink, + ) -> Result { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[ DLADM, @@ -309,7 +336,7 @@ impl Dladm { "ADDRESS", &link.0, ]); - let output = execute(cmd)?; + let output = executor.execute(cmd)?; let name = String::from_utf8_lossy(&output.stdout) .lines() .next() @@ -336,6 +363,7 @@ impl Dladm { /// * `mac`: An optional unicast MAC address for the newly created NIC. /// * `vlan`: An optional VLAN ID for VLAN tagging. pub fn create_vnic( + executor: &BoxedExecutor, source: &T, vnic_name: &str, mac: Option, @@ -367,7 +395,7 @@ impl Dladm { args.push(vnic_name.to_string()); let cmd = command.args(&args); - execute(cmd).map_err(|err| CreateVnicError { + executor.execute(cmd).map_err(|err| CreateVnicError { name: vnic_name.to_string(), link: source.name().to_string(), err, @@ -387,7 +415,7 @@ impl Dladm { &prop, vnic_name, ]); - execute(cmd).map_err(|err| CreateVnicError { + executor.execute(cmd).map_err(|err| CreateVnicError { name: vnic_name.to_string(), link: source.name().to_string(), err, @@ -397,10 +425,13 @@ impl Dladm { } /// Returns VNICs that may be managed by the Sled Agent. - pub fn get_vnics() -> Result, GetVnicError> { + pub fn get_vnics( + executor: &BoxedExecutor, + ) -> Result, GetVnicError> { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[DLADM, "show-vnic", "-p", "-o", "LINK"]); - let output = execute(cmd).map_err(|err| GetVnicError { err })?; + let output = + executor.execute(cmd).map_err(|err| GetVnicError { err })?; let vnics = String::from_utf8_lossy(&output.stdout) .lines() @@ -417,16 +448,21 @@ impl Dladm { } /// Remove a vnic from the sled. - pub fn delete_vnic(name: &str) -> Result<(), DeleteVnicError> { + pub fn delete_vnic( + executor: &BoxedExecutor, + name: &str, + ) -> Result<(), DeleteVnicError> { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[DLADM, "delete-vnic", name]); - execute(cmd) + executor + .execute(cmd) .map_err(|err| DeleteVnicError { name: name.to_string(), err })?; Ok(()) } /// Get a link property value on a VNIC pub fn get_linkprop( + executor: &BoxedExecutor, vnic: &str, prop_name: &str, ) -> Result { @@ -441,7 +477,7 @@ impl Dladm { prop_name, vnic, ]); - let result = execute(cmd).map_err(|err| GetLinkpropError { + let result = executor.execute(cmd).map_err(|err| GetLinkpropError { link_name: vnic.to_string(), prop_name: prop_name.to_string(), err, @@ -450,6 +486,7 @@ impl Dladm { } /// Set a link property on a VNIC pub fn set_linkprop( + executor: &BoxedExecutor, vnic: &str, prop_name: &str, prop_value: &str, @@ -458,7 +495,7 @@ impl Dladm { let prop = format!("{}={}", prop_name, prop_value); let cmd = command.args(&[DLADM, "set-linkprop", "-t", "-p", &prop, vnic]); - execute(cmd).map_err(|err| SetLinkpropError { + executor.execute(cmd).map_err(|err| SetLinkpropError { link_name: vnic.to_string(), prop_name: prop_name.to_string(), prop_value: prop_value.to_string(), @@ -469,6 +506,7 @@ impl Dladm { /// Reset a link property on a VNIC pub fn reset_linkprop( + executor: &BoxedExecutor, vnic: &str, prop_name: &str, ) -> Result<(), ResetLinkpropError> { @@ -481,7 +519,7 @@ impl Dladm { prop_name, vnic, ]); - execute(cmd).map_err(|err| ResetLinkpropError { + executor.execute(cmd).map_err(|err| ResetLinkpropError { link_name: vnic.to_string(), prop_name: prop_name.to_string(), err, diff --git a/illumos-utils/src/fstyp.rs b/illumos-utils/src/fstyp.rs index dbbe3442dc..917bdc3a08 100644 --- a/illumos-utils/src/fstyp.rs +++ b/illumos-utils/src/fstyp.rs @@ -4,8 +4,8 @@ //! Helper for calling fstyp. +use crate::process::{BoxedExecutor, ExecutionError, PFEXEC}; use crate::zpool::ZpoolName; -use crate::{execute, PFEXEC}; use camino::Utf8Path; use std::str::FromStr; @@ -17,7 +17,7 @@ pub enum Error { NotValidUtf8(#[from] std::string::FromUtf8Error), #[error("fstyp execution error: {0}")] - Execution(#[from] crate::ExecutionError), + Execution(#[from] ExecutionError), #[error("Failed to find zpool name from fstyp")] NotFound, @@ -33,14 +33,17 @@ pub struct Fstyp {} impl Fstyp { /// Executes the 'fstyp' command and parses the name of a zpool from it, if /// one exists. - pub fn get_zpool(path: &Utf8Path) -> Result { + pub fn get_zpool( + executor: &BoxedExecutor, + path: &Utf8Path, + ) -> Result { let mut cmd = std::process::Command::new(PFEXEC); cmd.env_clear(); cmd.env("LC_ALL", "C.UTF-8"); let cmd = cmd.arg(FSTYP).arg("-a").arg(path); - let output = execute(cmd).map_err(Error::from)?; + let output = executor.execute(cmd).map_err(Error::from)?; let stdout = String::from_utf8(output.stdout)?; let mut seen_zfs_marker = false; diff --git a/illumos-utils/src/lib.rs b/illumos-utils/src/lib.rs index 0001c2f8f4..3ef707395e 100644 --- a/illumos-utils/src/lib.rs +++ b/illumos-utils/src/lib.rs @@ -4,8 +4,6 @@ //! Wrappers around illumos-specific commands. -use cfg_if::cfg_if; - pub mod addrobj; pub mod destructor; pub mod dkio; @@ -13,96 +11,10 @@ pub mod dladm; pub mod fstyp; pub mod link; pub mod opte; +pub mod process; pub mod running_zone; pub mod svc; pub mod vmm_reservoir; pub mod zfs; pub mod zone; pub mod zpool; - -pub const PFEXEC: &str = "/usr/bin/pfexec"; - -#[derive(Debug)] -pub struct CommandFailureInfo { - command: String, - status: std::process::ExitStatus, - stdout: String, - stderr: String, -} - -impl std::fmt::Display for CommandFailureInfo { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!( - f, - "Command [{}] executed and failed with status: {}", - self.command, self.status - )?; - write!(f, " stdout: {}", self.stdout)?; - write!(f, " stderr: {}", self.stderr) - } -} - -#[derive(thiserror::Error, Debug)] -pub enum ExecutionError { - #[error("Failed to start execution of [{command}]: {err}")] - ExecutionStart { command: String, err: std::io::Error }, - - #[error("{0}")] - CommandFailure(Box), - - #[error("Failed to enter zone: {err}")] - ZoneEnter { err: std::io::Error }, - - #[error("Zone is not running")] - NotRunning, -} - -// We wrap this method in an inner module to make it possible to mock -// these free functions. -#[cfg_attr(any(test, feature = "testing"), mockall::automock, allow(dead_code))] -mod inner { - use super::*; - - fn to_string(command: &mut std::process::Command) -> String { - command - .get_args() - .map(|s| s.to_string_lossy().into()) - .collect::>() - .join(" ") - } - - // Helper function for starting the process and checking the - // exit code result. - pub fn execute( - command: &mut std::process::Command, - ) -> Result { - let output = command.output().map_err(|err| { - ExecutionError::ExecutionStart { command: to_string(command), err } - })?; - - if !output.status.success() { - return Err(ExecutionError::CommandFailure(Box::new( - CommandFailureInfo { - command: command - .get_args() - .map(|s| s.to_string_lossy().into()) - .collect::>() - .join(" "), - status: output.status, - stdout: String::from_utf8_lossy(&output.stdout).to_string(), - stderr: String::from_utf8_lossy(&output.stderr).to_string(), - }, - ))); - } - - Ok(output) - } -} - -cfg_if! { - if #[cfg(any(test, feature = "testing"))] { - pub use mock_inner::*; - } else { - pub use inner::*; - } -} diff --git a/illumos-utils/src/link.rs b/illumos-utils/src/link.rs index 871ba55e75..15ed60663a 100644 --- a/illumos-utils/src/link.rs +++ b/illumos-utils/src/link.rs @@ -6,25 +6,22 @@ use crate::destructor::{Deletable, Destructor}; use crate::dladm::{ - CreateVnicError, DeleteVnicError, VnicSource, VNIC_PREFIX, + CreateVnicError, DeleteVnicError, Dladm, VnicSource, VNIC_PREFIX, VNIC_PREFIX_BOOTSTRAP, VNIC_PREFIX_CONTROL, VNIC_PREFIX_GUEST, }; +use crate::process::BoxedExecutor; use omicron_common::api::external::MacAddr; use std::sync::{ atomic::{AtomicU64, Ordering}, Arc, }; -#[cfg(not(any(test, feature = "testing")))] -use crate::dladm::Dladm; -#[cfg(any(test, feature = "testing"))] -use crate::dladm::MockDladm as Dladm; - /// A shareable wrapper around an atomic counter. /// May be used to allocate runtime-unique IDs for objects /// which have naming constraints - such as VNICs. #[derive(Clone)] pub struct VnicAllocator { + executor: BoxedExecutor, value: Arc, scope: String, data_link: DL, @@ -44,8 +41,13 @@ impl VnicAllocator
{ /// /// VnicAllocator::new("Storage") produces /// - oxControlStorage0 - pub fn new>(scope: S, data_link: DL) -> Self { + pub fn new>( + executor: &BoxedExecutor, + scope: S, + data_link: DL, + ) -> Self { Self { + executor: executor.clone(), value: Arc::new(AtomicU64::new(0)), scope: scope.as_ref().to_string(), data_link, @@ -63,8 +65,16 @@ impl VnicAllocator
{ let name = allocator.next(); debug_assert!(name.starts_with(VNIC_PREFIX)); debug_assert!(name.starts_with(VNIC_PREFIX_CONTROL)); - Dladm::create_vnic(&self.data_link, &name, mac, None, 9000)?; + Dladm::create_vnic( + &self.executor, + &self.data_link, + &name, + mac, + None, + 9000, + )?; Ok(Link { + executor: self.executor.clone(), name, deleted: false, kind: LinkKind::OxideControlVnic, @@ -79,6 +89,7 @@ impl VnicAllocator
{ ) -> Result { match LinkKind::from_name(name.as_ref()) { Some(kind) => Ok(Link { + executor: self.executor.clone(), name: name.as_ref().to_owned(), deleted: false, kind, @@ -90,6 +101,7 @@ impl VnicAllocator
{ fn new_superscope>(&self, scope: S) -> Self { Self { + executor: self.executor.clone(), value: self.value.clone(), scope: format!("{}{}", scope.as_ref(), self.scope), data_link: self.data_link.clone(), @@ -99,8 +111,16 @@ impl VnicAllocator
{ pub fn new_bootstrap(&self) -> Result { let name = self.next(); - Dladm::create_vnic(&self.data_link, &name, None, None, 1500)?; + Dladm::create_vnic( + &self.executor, + &self.data_link, + &name, + None, + None, + 1500, + )?; Ok(Link { + executor: self.executor.clone(), name, deleted: false, kind: LinkKind::OxideBootstrapVnic, @@ -156,6 +176,7 @@ pub struct InvalidLinkKind(String); /// another process in the global zone could also modify / destroy /// the VNIC while this object is alive. pub struct Link { + executor: BoxedExecutor, name: String, deleted: bool, kind: LinkKind, @@ -176,8 +197,12 @@ impl Link { /// Wraps a physical nic in a Link structure. /// /// It is the caller's responsibility to ensure this is a physical link. - pub fn wrap_physical>(name: S) -> Self { + pub fn wrap_physical>( + executor: &BoxedExecutor, + name: S, + ) -> Self { Link { + executor: executor.clone(), name: name.as_ref().to_owned(), deleted: false, kind: LinkKind::Physical, @@ -190,7 +215,7 @@ impl Link { if self.deleted || self.kind == LinkKind::Physical { Ok(()) } else { - Dladm::delete_vnic(&self.name)?; + Dladm::delete_vnic(&self.executor, &self.name)?; self.deleted = true; Ok(()) } @@ -208,8 +233,10 @@ impl Link { impl Drop for Link { fn drop(&mut self) { if let Some(destructor) = self.destructor.take() { - destructor - .enqueue_destroy(VnicDestruction { name: self.name.clone() }); + destructor.enqueue_destroy(VnicDestruction { + executor: self.executor.clone(), + name: self.name.clone(), + }); } } } @@ -217,12 +244,13 @@ impl Drop for Link { // Represents the request to destroy a VNIC struct VnicDestruction { name: String, + executor: BoxedExecutor, } #[async_trait::async_trait] impl Deletable for VnicDestruction { async fn delete(&self) -> Result<(), anyhow::Error> { - Dladm::delete_vnic(&self.name)?; + Dladm::delete_vnic(&self.executor, &self.name)?; Ok(()) } } @@ -231,22 +259,36 @@ impl Deletable for VnicDestruction { mod test { use super::*; use crate::dladm::Etherstub; + use crate::process::FakeExecutor; + use omicron_test_utils::dev; #[tokio::test] async fn test_allocate() { - let allocator = - VnicAllocator::new("Foo", Etherstub("mystub".to_string())); + let logctx = dev::test_setup_log("test_allocate"); + let executor = FakeExecutor::new(logctx.log.clone()); + let allocator = VnicAllocator::new( + &executor.as_executor(), + "Foo", + Etherstub("mystub".to_string()), + ); assert_eq!("oxFoo0", allocator.next()); assert_eq!("oxFoo1", allocator.next()); assert_eq!("oxFoo2", allocator.next()); + logctx.cleanup_successful(); } #[tokio::test] async fn test_allocate_within_scopes() { - let allocator = - VnicAllocator::new("Foo", Etherstub("mystub".to_string())); + let logctx = dev::test_setup_log("test_allocate_within_scopes"); + let executor = FakeExecutor::new(logctx.log.clone()); + let allocator = VnicAllocator::new( + &executor.as_executor(), + "Foo", + Etherstub("mystub".to_string()), + ); assert_eq!("oxFoo0", allocator.next()); let allocator = allocator.new_superscope("Baz"); assert_eq!("oxBazFoo1", allocator.next()); + logctx.cleanup_successful(); } } diff --git a/illumos-utils/src/process.rs b/illumos-utils/src/process.rs new file mode 100644 index 0000000000..f085c3a131 --- /dev/null +++ b/illumos-utils/src/process.rs @@ -0,0 +1,201 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! A process executor + +use itertools::Itertools; +use slog::{debug, error, info, Logger}; +use std::os::unix::process::ExitStatusExt; +use std::process::{Command, ExitStatus, Output}; +use std::str::from_utf8; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::{Arc, Mutex}; + +// NOTE: Is the "counter as ID" misleading? +// +// It's not actually possible to make an incrementing counter avoid the race of +// "log, do operation, log again" without making executing processes serialized +// (which seems bad). +// +// We could make this a UUID, but I don't like how hard-to-read those can be +// when trying to quickly parse logs. + +pub type BoxedExecutor = Arc; + +pub trait Executor: Send + Sync { + fn execute(&self, command: &mut Command) -> Result; +} + +fn log_command(log: &Logger, id: u64, command: &Command) { + info!( + log, + "{id} - Running Command: [{:?} {:?}]", + command.get_program(), + to_space_separated_string(command.get_args()), + ); + debug!( + log, + "{id} - Environment: [{:?}]", + to_space_separated_string(command.get_envs()), + ) +} + +fn log_output(log: &Logger, id: u64, output: &Output) { + info!( + log, + "{id} - {} (status code: {})", + if output.status.success() { "OK" } else { "ERROR" }, + output + .status + .code() + .map(|c| c.to_string()) + .unwrap_or_else(|| "none".to_string()), + ); + if !output.stdout.is_empty() { + debug!( + log, + "{id} - stdout: {}", + from_utf8(&output.stdout).unwrap_or(""), + ); + } + if !output.stderr.is_empty() { + debug!( + log, + "{id} - stderr: {}", + from_utf8(&output.stderr).unwrap_or(""), + ); + } +} + +pub struct FakeExecutor { + log: Logger, + counter: AtomicU64, + all_operations: Mutex>, +} + +impl FakeExecutor { + pub fn new(log: Logger) -> Arc { + Arc::new(Self { + log, + counter: AtomicU64::new(0), + all_operations: Mutex::new(vec![]), + }) + } + + pub fn as_executor(self: Arc) -> BoxedExecutor { + self + } +} + +impl Executor for FakeExecutor { + fn execute(&self, command: &mut Command) -> Result { + let id = self.counter.fetch_add(1, Ordering::SeqCst); + log_command(&self.log, id, command); + + // TODO: Environment variables? + let mut record = Command::new(command.get_program()); + record.args(command.get_args()); + self.all_operations.lock().unwrap().push(record); + + // TODO: Control failure of the command? + let output = Output { + status: ExitStatus::from_raw(0), + stdout: vec![], + stderr: vec![], + }; + log_output(&self.log, id, &output); + Ok(output) + } +} + +pub struct RealExecutor { + log: slog::Logger, + counter: std::sync::atomic::AtomicU64, +} + +impl RealExecutor { + pub fn new(log: Logger) -> Arc { + Arc::new(Self { log, counter: AtomicU64::new(0) }) + } + + pub fn as_executor(self: Arc) -> BoxedExecutor { + self + } +} + +impl Executor for RealExecutor { + fn execute(&self, command: &mut Command) -> Result { + let id = self.counter.fetch_add(1, Ordering::SeqCst); + log_command(&self.log, id, command); + let output = command.output().map_err(|err| { + error!(self.log, "{id} - Could not start program!"); + ExecutionError::ExecutionStart { + command: to_space_separated_string(command.get_args()), + err, + } + })?; + log_output(&self.log, id, &output); + + if !output.status.success() { + return Err(ExecutionError::CommandFailure(Box::new( + FailureInfo { + command: command + .get_args() + .map(|s| s.to_string_lossy().into()) + .collect::>() + .join(" "), + status: output.status, + stdout: String::from_utf8_lossy(&output.stdout).to_string(), + stderr: String::from_utf8_lossy(&output.stderr).to_string(), + }, + ))); + } + Ok(output) + } +} + +pub const PFEXEC: &str = "/usr/bin/pfexec"; + +#[derive(Debug)] +pub struct FailureInfo { + pub command: String, + pub status: std::process::ExitStatus, + pub stdout: String, + pub stderr: String, +} + +impl std::fmt::Display for FailureInfo { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "Command [{}] executed and failed with status: {}", + self.command, self.status + )?; + write!(f, " stdout: {}", self.stdout)?; + write!(f, " stderr: {}", self.stderr) + } +} + +#[derive(thiserror::Error, Debug)] +pub enum ExecutionError { + #[error("Failed to start execution of [{command}]: {err}")] + ExecutionStart { command: String, err: std::io::Error }, + + #[error("{0}")] + CommandFailure(Box), +} + +// We wrap this method in an inner module to make it possible to mock +// these free functions. +fn to_space_separated_string(iter: T) -> String +where + T: IntoIterator, + I: std::fmt::Debug, +{ + Itertools::intersperse( + iter.into_iter().map(|arg| format!("{arg:?}")), + " ".into(), + ) + .collect::() +} diff --git a/illumos-utils/src/running_zone.rs b/illumos-utils/src/running_zone.rs index 1d2fd89830..6f5b523dac 100644 --- a/illumos-utils/src/running_zone.rs +++ b/illumos-utils/src/running_zone.rs @@ -8,6 +8,7 @@ use crate::addrobj::AddrObject; use crate::dladm::Etherstub; use crate::link::{Link, VnicAllocator}; use crate::opte::{Port, PortTicket}; +use crate::process::{BoxedExecutor, ExecutionError}; use crate::svc::wait_for_service; use crate::zone::{AddressRequest, IPADM, ZONE_PREFIX}; use camino::{Utf8Path, Utf8PathBuf}; @@ -30,7 +31,7 @@ use crate::zone::Zones; pub struct RunCommandError { zone: String, #[source] - err: crate::ExecutionError, + err: ExecutionError, } /// Errors returned from [`RunningZone::boot`]. @@ -66,7 +67,7 @@ pub enum EnsureAddressError { GetAddressesError(#[from] crate::zone::GetAddressesError), #[error("Failed ensuring link-local address in {zone}: {err}")] - LinkLocal { zone: String, err: crate::ExecutionError }, + LinkLocal { zone: String, err: ExecutionError }, #[error("Failed to find non-link-local address in {zone}")] NoDhcpV6Addr { zone: String }, @@ -193,12 +194,12 @@ mod zenter { // Automatically detach inherited contracts. const CT_PR_REGENT: c_uint = 0x08; - pub fn new() -> Result { + pub fn new() -> Result { let path = CStr::from_bytes_with_nul(Self::TEMPLATE_PATH).unwrap(); let fd = unsafe { libc::open(path.as_ptr(), libc::O_RDWR) }; if fd < 0 { let err = std::io::Error::last_os_error(); - return Err(crate::ExecutionError::ZoneEnter { err }); + return Err(ExecutionError::ZoneEnter { err }); } // Initialize the contract template. @@ -221,7 +222,7 @@ mod zenter { || unsafe { ct_tmpl_activate(fd) } != 0 { let err = std::io::Error::last_os_error(); - return Err(crate::ExecutionError::ZoneEnter { err }); + return Err(ExecutionError::ZoneEnter { err }); } Ok(Self { fd }) } @@ -276,7 +277,7 @@ impl RunningZone { let Some(id) = self.id else { return Err(RunCommandError { zone: self.name().to_string(), - err: crate::ExecutionError::NotRunning, + err: ExecutionError::NotRunning, }); }; let template = @@ -284,7 +285,7 @@ impl RunningZone { RunCommandError { zone: self.name().to_string(), err } })?); let tmpl = std::sync::Arc::clone(&template); - let mut command = std::process::Command::new(crate::PFEXEC); + let mut command = std::process::Command::new(crate::process::PFEXEC); command.env_clear(); unsafe { command.pre_exec(move || { @@ -305,9 +306,8 @@ impl RunningZone { // Capture the result, and be sure to clear the template for this // process itself before returning. - let res = crate::execute(command).map_err(|err| RunCommandError { - zone: self.name().to_string(), - err, + let res = self.inner.executor.execute(command).map_err(|err| { + RunCommandError { zone: self.name().to_string(), err } }); template.clear(); res.map(|output| String::from_utf8_lossy(&output.stdout).to_string()) @@ -321,12 +321,14 @@ impl RunningZone { S: AsRef, { // NOTE: This implementation is useless, and will never work. However, - // it must actually call `crate::execute()` for the testing purposes. + // it must actually call `execute()` for the testing purposes. // That's mocked by `mockall` to return known data, and so the command // that's actually run is irrelevant. let mut command = std::process::Command::new("echo"); let command = command.args(args); - crate::execute(command) + self.inner + .executor + .execute(command) .map_err(|err| RunCommandError { zone: self.name().to_string(), err, @@ -460,8 +462,12 @@ impl RunningZone { zone: self.inner.name.clone(), err, })?; - let network = - Zones::ensure_address(Some(&self.inner.name), &addrobj, addrtype)?; + let network = Zones::ensure_address( + &self.inner.executor, + Some(&self.inner.name), + &addrobj, + addrtype, + )?; Ok(network) } @@ -487,8 +493,12 @@ impl RunningZone { err, } })?; - let _ = - Zones::ensure_address(Some(&self.inner.name), &addrobj, addrtype)?; + let _ = Zones::ensure_address( + &self.inner.executor, + Some(&self.inner.name), + &addrobj, + addrtype, + )?; Ok(()) } @@ -515,8 +525,12 @@ impl RunningZone { })?; let zone = Some(self.inner.name.as_ref()); if let IpAddr::V4(gateway) = port.gateway().ip() { - let addr = - Zones::ensure_address(zone, &addrobj, AddressRequest::Dhcp)?; + let addr = Zones::ensure_address( + &self.inner.executor, + zone, + &addrobj, + AddressRequest::Dhcp, + )?; // TODO-remove(#2931): OPTE's DHCP "server" returns the list of routes // to add via option 121 (Classless Static Route). The illumos DHCP // client currently does not support this option, so we add the routes @@ -544,12 +558,15 @@ impl RunningZone { } else { // If the port is using IPv6 addressing we still want it to use // DHCP(v6) which requires first creating a link-local address. - Zones::ensure_has_link_local_v6_address(zone, &addrobj).map_err( - |err| EnsureAddressError::LinkLocal { - zone: self.inner.name.clone(), - err, - }, - )?; + Zones::ensure_has_link_local_v6_address( + &self.inner.executor, + zone, + &addrobj, + ) + .map_err(|err| EnsureAddressError::LinkLocal { + zone: self.inner.name.clone(), + err, + })?; // Unlike DHCPv4, there's no blocking `ipadm` call we can // make as it just happens in the background. So we just poll @@ -559,12 +576,16 @@ impl RunningZone { || async { // Grab all the address on the addrobj. There should // always be at least one (the link-local we added) - let addrs = Zones::get_all_addresses(zone, &addrobj) - .map_err(|e| { - backoff::BackoffError::permanent( - EnsureAddressError::from(e), - ) - })?; + let addrs = Zones::get_all_addresses( + &self.inner.executor, + zone, + &addrobj, + ) + .map_err(|e| { + backoff::BackoffError::permanent( + EnsureAddressError::from(e), + ) + })?; // Ipv6Addr::is_unicast_link_local is sadly not stable let is_ll = @@ -655,6 +676,7 @@ impl RunningZone { /// address on the zone. pub async fn get( log: &Logger, + executor: &BoxedExecutor, vnic_allocator: &VnicAllocator, zone_prefix: &str, addrtype: AddressRequest, @@ -679,22 +701,19 @@ impl RunningZone { } let zone_name = zone_info.name(); - let vnic_name = - Zones::get_control_interface(zone_name).map_err(|err| { - GetZoneError::ControlInterface { - name: zone_name.to_string(), - err, - } + let vnic_name = Zones::get_control_interface(executor, zone_name) + .map_err(|err| GetZoneError::ControlInterface { + name: zone_name.to_string(), + err, })?; let addrobj = AddrObject::new_control(&vnic_name).map_err(|err| { GetZoneError::AddrObject { name: zone_name.to_string(), err } })?; - Zones::ensure_address(Some(zone_name), &addrobj, addrtype).map_err( - |err| GetZoneError::EnsureAddress { + Zones::ensure_address(executor, Some(zone_name), &addrobj, addrtype) + .map_err(|err| GetZoneError::EnsureAddress { name: zone_name.to_string(), err, - }, - )?; + })?; let control_vnic = vnic_allocator .wrap_existing(vnic_name) @@ -703,16 +722,17 @@ impl RunningZone { // The bootstrap address for a running zone never changes, // so there's no need to call `Zones::ensure_address`. // Currently, only the switch zone has a bootstrap interface. - let bootstrap_vnic = Zones::get_bootstrap_interface(zone_name) - .map_err(|err| GetZoneError::BootstrapInterface { - name: zone_name.to_string(), - err, - })? - .map(|name| { - vnic_allocator - .wrap_existing(name) - .expect("Failed to wrap valid bootstrap VNIC") - }); + let bootstrap_vnic = + Zones::get_bootstrap_interface(executor, zone_name) + .map_err(|err| GetZoneError::BootstrapInterface { + name: zone_name.to_string(), + err, + })? + .map(|name| { + vnic_allocator + .wrap_existing(name) + .expect("Failed to wrap valid bootstrap VNIC") + }); Ok(Self { id: zone_info.id().map(|x| { @@ -720,6 +740,7 @@ impl RunningZone { }), inner: InstalledZone { log: log.new(o!("zone" => zone_name.to_string())), + executor: executor.clone(), zonepath: zone_info.path().to_path_buf().try_into()?, name: zone_name.to_string(), control_vnic, @@ -808,6 +829,8 @@ pub enum InstallZoneError { pub struct InstalledZone { log: Logger, + executor: BoxedExecutor, + // Filesystem path of the zone zonepath: Utf8PathBuf, @@ -863,6 +886,7 @@ impl InstalledZone { #[allow(clippy::too_many_arguments)] pub async fn install( log: &Logger, + executor: &BoxedExecutor, underlay_vnic_allocator: &VnicAllocator, zone_root_path: &Utf8Path, zone_image_paths: &[Utf8PathBuf], @@ -934,6 +958,7 @@ impl InstalledZone { Ok(InstalledZone { log: log.new(o!("zone" => full_zone_name.clone())), + executor: executor.clone(), zonepath: zone_root_path.join(&full_zone_name), name: full_zone_name, control_vnic, diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs index 5e7c4790fb..8152ca2cda 100644 --- a/illumos-utils/src/zfs.rs +++ b/illumos-utils/src/zfs.rs @@ -4,7 +4,7 @@ //! Utilities for poking at ZFS. -use crate::{execute, PFEXEC}; +use crate::process::{BoxedExecutor, ExecutionError, PFEXEC}; use camino::Utf8PathBuf; use omicron_common::disk::DiskIdentity; use std::fmt; @@ -20,7 +20,7 @@ pub const KEYPATH_ROOT: &str = "/var/run/oxide/"; pub struct ListDatasetsError { name: String, #[source] - err: crate::ExecutionError, + err: ExecutionError, } /// Error returned by [`Zfs::destroy_dataset`]. @@ -29,13 +29,13 @@ pub struct ListDatasetsError { pub struct DestroyDatasetError { name: String, #[source] - err: crate::ExecutionError, + err: ExecutionError, } #[derive(thiserror::Error, Debug)] enum EnsureFilesystemErrorRaw { #[error("ZFS execution error: {0}")] - Execution(#[from] crate::ExecutionError), + Execution(#[from] ExecutionError), #[error("Filesystem does not exist, and formatting was not requested")] NotFoundNotFormatted, @@ -44,7 +44,7 @@ enum EnsureFilesystemErrorRaw { Output(String), #[error("Failed to mount encrypted filesystem: {0}")] - MountEncryptedFsFailed(crate::ExecutionError), + MountEncryptedFsFailed(ExecutionError), } /// Error returned by [`Zfs::ensure_filesystem`]. @@ -68,13 +68,13 @@ pub struct SetValueError { filesystem: String, name: String, value: String, - err: crate::ExecutionError, + err: ExecutionError, } #[derive(thiserror::Error, Debug)] enum GetValueErrorRaw { #[error(transparent)] - Execution(#[from] crate::ExecutionError), + Execution(#[from] ExecutionError), #[error("No value found with that name")] MissingValue, @@ -141,11 +141,15 @@ pub struct EncryptionDetails { #[cfg_attr(any(test, feature = "testing"), mockall::automock, allow(dead_code))] impl Zfs { /// Lists all datasets within a pool or existing dataset. - pub fn list_datasets(name: &str) -> Result, ListDatasetsError> { + pub fn list_datasets( + executor: &BoxedExecutor, + name: &str, + ) -> Result, ListDatasetsError> { let mut command = std::process::Command::new(ZFS); let cmd = command.args(&["list", "-d", "1", "-rHpo", "name", name]); - let output = execute(cmd) + let output = executor + .execute(cmd) .map_err(|err| ListDatasetsError { name: name.to_string(), err })?; let stdout = String::from_utf8_lossy(&output.stdout); let filesystems: Vec = stdout @@ -160,10 +164,13 @@ impl Zfs { } /// Destroys a dataset. - pub fn destroy_dataset(name: &str) -> Result<(), DestroyDatasetError> { + pub fn destroy_dataset( + executor: &BoxedExecutor, + name: &str, + ) -> Result<(), DestroyDatasetError> { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[ZFS, "destroy", "-r", name]); - execute(cmd).map_err(|err| DestroyDatasetError { + executor.execute(cmd).map_err(|err| DestroyDatasetError { name: name.to_string(), err, })?; @@ -172,13 +179,15 @@ impl Zfs { /// Creates a new ZFS filesystem named `name`, unless one already exists. pub fn ensure_filesystem( + executor: &BoxedExecutor, name: &str, mountpoint: Mountpoint, zoned: bool, do_format: bool, encryption_details: Option, ) -> Result<(), EnsureFilesystemError> { - let (exists, mounted) = Self::dataset_exists(name, &mountpoint)?; + let (exists, mounted) = + Self::dataset_exists(executor, name, &mountpoint)?; if exists { if encryption_details.is_none() { // If the dataset exists, we're done. Unencrypted datasets are @@ -190,7 +199,11 @@ impl Zfs { return Ok(()); } // We need to load the encryption key and mount the filesystem - return Self::mount_encrypted_dataset(name, &mountpoint); + return Self::mount_encrypted_dataset( + executor, + name, + &mountpoint, + ); } } @@ -223,7 +236,7 @@ impl Zfs { ]); } cmd.args(&["-o", &format!("mountpoint={}", mountpoint), name]); - execute(cmd).map_err(|err| EnsureFilesystemError { + executor.execute(cmd).map_err(|err| EnsureFilesystemError { name: name.to_string(), mountpoint, err: err.into(), @@ -232,12 +245,13 @@ impl Zfs { } fn mount_encrypted_dataset( + executor: &BoxedExecutor, name: &str, mountpoint: &Mountpoint, ) -> Result<(), EnsureFilesystemError> { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[ZFS, "mount", "-l", name]); - execute(cmd).map_err(|err| EnsureFilesystemError { + executor.execute(cmd).map_err(|err| EnsureFilesystemError { name: name.to_string(), mountpoint: mountpoint.clone(), err: EnsureFilesystemErrorRaw::MountEncryptedFsFailed(err), @@ -248,6 +262,7 @@ impl Zfs { // Return (true, mounted) if the dataset exists, (false, false) otherwise, // where mounted is if the dataset is mounted. fn dataset_exists( + executor: &BoxedExecutor, name: &str, mountpoint: &Mountpoint, ) -> Result<(bool, bool), EnsureFilesystemError> { @@ -259,7 +274,7 @@ impl Zfs { name, ]); // If the list command returns any valid output, validate it. - if let Ok(output) = execute(cmd) { + if let Ok(output) = executor.execute(cmd) { let stdout = String::from_utf8_lossy(&output.stdout); let values: Vec<&str> = stdout.trim().split('\t').collect(); if &values[..3] != &[name, "filesystem", &mountpoint.to_string()] { @@ -277,14 +292,21 @@ impl Zfs { } pub fn set_oxide_value( + executor: &BoxedExecutor, filesystem_name: &str, name: &str, value: &str, ) -> Result<(), SetValueError> { - Zfs::set_value(filesystem_name, &format!("oxide:{}", name), value) + Zfs::set_value( + executor, + filesystem_name, + &format!("oxide:{}", name), + value, + ) } fn set_value( + executor: &BoxedExecutor, filesystem_name: &str, name: &str, value: &str, @@ -292,7 +314,7 @@ impl Zfs { let mut command = std::process::Command::new(PFEXEC); let value_arg = format!("{}={}", name, value); let cmd = command.args(&[ZFS, "set", &value_arg, filesystem_name]); - execute(cmd).map_err(|err| SetValueError { + executor.execute(cmd).map_err(|err| SetValueError { filesystem: filesystem_name.to_string(), name: name.to_string(), value: value.to_string(), @@ -302,20 +324,22 @@ impl Zfs { } pub fn get_oxide_value( + executor: &BoxedExecutor, filesystem_name: &str, name: &str, ) -> Result { - Zfs::get_value(filesystem_name, &format!("oxide:{}", name)) + Zfs::get_value(executor, filesystem_name, &format!("oxide:{}", name)) } fn get_value( + executor: &BoxedExecutor, filesystem_name: &str, name: &str, ) -> Result { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[ZFS, "get", "-Ho", "value", &name, filesystem_name]); - let output = execute(cmd).map_err(|err| GetValueError { + let output = executor.execute(cmd).map_err(|err| GetValueError { filesystem: filesystem_name.to_string(), name: name.to_string(), err: err.into(), @@ -334,17 +358,19 @@ impl Zfs { } /// Returns all datasets managed by Omicron -pub fn get_all_omicron_datasets_for_delete() -> anyhow::Result> { +pub fn get_all_omicron_datasets_for_delete( + executor: &BoxedExecutor, +) -> anyhow::Result> { let mut datasets = vec![]; // Collect all datasets within Oxide zpools. // // This includes cockroachdb, clickhouse, and crucible datasets. - let zpools = crate::zpool::Zpool::list()?; + let zpools = crate::zpool::Zpool::list(executor)?; for pool in &zpools { let internal = pool.kind() == crate::zpool::ZpoolKind::Internal; let pool = pool.to_string(); - for dataset in &Zfs::list_datasets(&pool)? { + for dataset in &Zfs::list_datasets(executor, &pool)? { // Avoid erasing crashdump datasets on internal pools if dataset == "crash" && internal { continue; @@ -356,7 +382,8 @@ pub fn get_all_omicron_datasets_for_delete() -> anyhow::Result> { // Collect all datasets for ramdisk-based Oxide zones, // if any exist. - if let Ok(ramdisk_datasets) = Zfs::list_datasets(&ZONE_ZFS_RAMDISK_DATASET) + if let Ok(ramdisk_datasets) = + Zfs::list_datasets(executor, &ZONE_ZFS_RAMDISK_DATASET) { for dataset in &ramdisk_datasets { datasets.push(format!("{}/{dataset}", ZONE_ZFS_RAMDISK_DATASET)); diff --git a/illumos-utils/src/zone.rs b/illumos-utils/src/zone.rs index 6b605eb571..608a9b422f 100644 --- a/illumos-utils/src/zone.rs +++ b/illumos-utils/src/zone.rs @@ -14,7 +14,7 @@ use std::net::{IpAddr, Ipv6Addr}; use crate::addrobj::AddrObject; use crate::dladm::{EtherstubVnic, VNIC_PREFIX_BOOTSTRAP, VNIC_PREFIX_CONTROL}; -use crate::{execute, PFEXEC}; +use crate::process::{BoxedExecutor, ExecutionError, PFEXEC}; use omicron_common::address::SLED_PREFIX; const DLADM: &str = "/usr/sbin/dladm"; @@ -30,7 +30,7 @@ pub const PROPOLIS_ZONE_PREFIX: &str = "oxz_propolis-server_"; #[derive(thiserror::Error, Debug)] enum Error { #[error("Zone execution error: {0}")] - Execution(#[from] crate::ExecutionError), + Execution(#[from] ExecutionError), #[error(transparent)] AddrObject(#[from] crate::addrobj::ParseError), @@ -68,7 +68,7 @@ pub struct DeleteAddressError { zone: String, addrobj: AddrObject, #[source] - err: crate::ExecutionError, + err: ExecutionError, } /// Errors from [`Zones::get_control_interface`]. @@ -79,7 +79,7 @@ pub enum GetControlInterfaceError { Execution { zone: String, #[source] - err: crate::ExecutionError, + err: ExecutionError, }, #[error("VNIC starting with 'oxControl' not found in {zone}")] @@ -94,7 +94,7 @@ pub enum GetBootstrapInterfaceError { Execution { zone: String, #[source] - err: crate::ExecutionError, + err: ExecutionError, }, #[error("VNIC starting with 'oxBootstrap' not found in {zone}")] @@ -415,6 +415,7 @@ impl Zones { /// Returns the name of the VNIC used to communicate with the control plane. pub fn get_control_interface( + executor: &BoxedExecutor, zone: &str, ) -> Result { let mut command = std::process::Command::new(PFEXEC); @@ -427,7 +428,7 @@ impl Zones { "-o", "LINK", ]); - let output = execute(cmd).map_err(|err| { + let output = executor.execute(cmd).map_err(|err| { GetControlInterfaceError::Execution { zone: zone.to_string(), err } })?; String::from_utf8_lossy(&output.stdout) @@ -446,6 +447,7 @@ impl Zones { /// Returns the name of the VNIC used to communicate with the bootstrap network. pub fn get_bootstrap_interface( + executor: &BoxedExecutor, zone: &str, ) -> Result, GetBootstrapInterfaceError> { let mut command = std::process::Command::new(PFEXEC); @@ -458,7 +460,7 @@ impl Zones { "-o", "LINK", ]); - let output = execute(cmd).map_err(|err| { + let output = executor.execute(cmd).map_err(|err| { GetBootstrapInterfaceError::Execution { zone: zone.to_string(), err, @@ -493,12 +495,13 @@ impl Zones { /// If `None` is supplied, the address is queried from the Global Zone. #[allow(clippy::needless_lifetimes)] pub fn ensure_address<'a>( + executor: &BoxedExecutor, zone: Option<&'a str>, addrobj: &AddrObject, addrtype: AddressRequest, ) -> Result { |zone, addrobj, addrtype| -> Result { - match Self::get_address_impl(zone, addrobj) { + match Self::get_address_impl(executor, zone, addrobj) { Ok(addr) => { if let AddressRequest::Static(expected_addr) = addrtype { // If the address is static, we need to validate that it @@ -506,18 +509,20 @@ impl Zones { if addr != expected_addr { // If the address doesn't match, try removing the old // value before using the new one. - Self::delete_address(zone, addrobj) + Self::delete_address(executor, zone, addrobj) .map_err(|e| anyhow!(e))?; return Self::create_address( - zone, addrobj, addrtype, + executor, zone, addrobj, addrtype, ) .map_err(|e| anyhow!(e)); } } Ok(addr) } - Err(_) => Self::create_address(zone, addrobj, addrtype) - .map_err(|e| anyhow!(e)), + Err(_) => { + Self::create_address(executor, zone, addrobj, addrtype) + .map_err(|e| anyhow!(e)) + } } }(zone, addrobj, addrtype) .map_err(|err| EnsureAddressError { @@ -534,13 +539,16 @@ impl Zones { /// If `None` is supplied, the address is queried from the Global Zone. #[allow(clippy::needless_lifetimes)] pub fn get_address<'a>( + executor: &BoxedExecutor, zone: Option<&'a str>, addrobj: &AddrObject, ) -> Result { - Self::get_address_impl(zone, addrobj).map_err(|err| GetAddressError { - zone: zone.unwrap_or("global").to_string(), - name: addrobj.clone(), - err: anyhow!(err), + Self::get_address_impl(executor, zone, addrobj).map_err(|err| { + GetAddressError { + zone: zone.unwrap_or("global").to_string(), + name: addrobj.clone(), + err: anyhow!(err), + } }) } @@ -550,6 +558,7 @@ impl Zones { /// If `None` is supplied, the address is queried from the Global Zone. #[allow(clippy::needless_lifetimes)] fn get_address_impl<'a>( + executor: &BoxedExecutor, zone: Option<&'a str>, addrobj: &AddrObject, ) -> Result { @@ -564,7 +573,7 @@ impl Zones { args.extend(&[IPADM, "show-addr", "-p", "-o", "ADDR", &addrobj_str]); let cmd = command.args(args); - let output = execute(cmd)?; + let output = executor.execute(cmd)?; String::from_utf8_lossy(&output.stdout) .lines() .find_map(|s| parse_ip_network(s).ok()) @@ -577,6 +586,7 @@ impl Zones { /// If `None` is supplied, the address is queried from the Global Zone. #[allow(clippy::needless_lifetimes)] pub fn get_all_addresses<'a>( + executor: &BoxedExecutor, zone: Option<&'a str>, addrobj: &AddrObject, ) -> Result, GetAddressesError> { @@ -591,11 +601,12 @@ impl Zones { args.extend(&[IPADM, "show-addr", "-p", "-o", "ADDR", &addrobj_str]); let cmd = command.args(args); - let output = execute(cmd).map_err(|err| GetAddressesError { - zone: zone.unwrap_or("global").to_string(), - name: addrobj.clone(), - err: err.into(), - })?; + let output = + executor.execute(cmd).map_err(|err| GetAddressesError { + zone: zone.unwrap_or("global").to_string(), + name: addrobj.clone(), + err: err.into(), + })?; Ok(String::from_utf8_lossy(&output.stdout) .lines() .filter_map(|s| s.parse().ok()) @@ -608,6 +619,7 @@ impl Zones { /// run the command in the Global zone. #[allow(clippy::needless_lifetimes)] fn has_link_local_v6_address<'a>( + executor: &BoxedExecutor, zone: Option<&'a str>, addrobj: &AddrObject, ) -> Result<(), Error> { @@ -622,7 +634,7 @@ impl Zones { let args = prefix.iter().chain(show_addr_args); let cmd = command.args(args); - let output = execute(cmd)?; + let output = executor.execute(cmd)?; if let Some(_) = String::from_utf8_lossy(&output.stdout) .lines() .find(|s| s.trim() == "addrconf") @@ -637,10 +649,11 @@ impl Zones { // Does NOT check if the address already exists. #[allow(clippy::needless_lifetimes)] fn create_address_internal<'a>( + executor: &BoxedExecutor, zone: Option<&'a str>, addrobj: &AddrObject, addrtype: AddressRequest, - ) -> Result<(), crate::ExecutionError> { + ) -> Result<(), ExecutionError> { let mut command = std::process::Command::new(PFEXEC); let mut args = vec![]; if let Some(zone) = zone { @@ -667,13 +680,14 @@ impl Zones { args.push(addrobj.to_string()); let cmd = command.args(args); - execute(cmd)?; + executor.execute(cmd)?; Ok(()) } #[allow(clippy::needless_lifetimes)] pub fn delete_address<'a>( + executor: &BoxedExecutor, zone: Option<&'a str>, addrobj: &AddrObject, ) -> Result<(), DeleteAddressError> { @@ -689,7 +703,7 @@ impl Zones { args.push(addrobj.to_string()); let cmd = command.args(args); - execute(cmd).map_err(|err| DeleteAddressError { + executor.execute(cmd).map_err(|err| DeleteAddressError { zone: zone.unwrap_or("global").to_string(), addrobj: addrobj.clone(), err, @@ -706,10 +720,13 @@ impl Zones { /// #[allow(clippy::needless_lifetimes)] pub fn ensure_has_link_local_v6_address<'a>( + executor: &BoxedExecutor, zone: Option<&'a str>, addrobj: &AddrObject, - ) -> Result<(), crate::ExecutionError> { - if let Ok(()) = Self::has_link_local_v6_address(zone, &addrobj) { + ) -> Result<(), ExecutionError> { + if let Ok(()) = + Self::has_link_local_v6_address(executor, zone, &addrobj) + { return Ok(()); } @@ -730,7 +747,7 @@ impl Zones { let args = prefix.iter().chain(create_addr_args); let cmd = command.args(args); - execute(cmd)?; + executor.execute(cmd)?; Ok(()) } @@ -740,6 +757,7 @@ impl Zones { // (which exists pre-RSS), but we should remove all uses of it other than // the bootstrap agent. pub fn ensure_has_global_zone_v6_address( + executor: &BoxedExecutor, link: EtherstubVnic, address: Ipv6Addr, name: &str, @@ -750,6 +768,7 @@ impl Zones { let gz_link_local_addrobj = AddrObject::link_local(&link.0) .map_err(|err| anyhow!(err))?; Self::ensure_has_link_local_v6_address( + executor, None, &gz_link_local_addrobj, ) @@ -762,6 +781,7 @@ impl Zones { // this sled itself are within the underlay or bootstrap prefix. // Anything else must be routed through Sidecar. Self::ensure_address( + executor, None, &gz_link_local_addrobj .on_same_interface(name) @@ -802,6 +822,7 @@ impl Zones { // Creates an IP address within a Zone. #[allow(clippy::needless_lifetimes)] fn create_address<'a>( + executor: &BoxedExecutor, zone: Option<&'a str>, addrobj: &AddrObject, addrtype: AddressRequest, @@ -821,6 +842,7 @@ impl Zones { let link_local_addrobj = addrobj.link_local_on_same_interface()?; Self::ensure_has_link_local_v6_address( + executor, Some(zone), &link_local_addrobj, )?; @@ -830,9 +852,8 @@ impl Zones { }; // Actually perform address allocation. - Self::create_address_internal(zone, addrobj, addrtype)?; - - Self::get_address_impl(zone, addrobj) + Self::create_address_internal(executor, zone, addrobj, addrtype)?; + Self::get_address_impl(executor, zone, addrobj) } } diff --git a/illumos-utils/src/zpool.rs b/illumos-utils/src/zpool.rs index 2560e68eba..fddc93dfdc 100644 --- a/illumos-utils/src/zpool.rs +++ b/illumos-utils/src/zpool.rs @@ -4,7 +4,7 @@ //! Utilities for managing Zpools. -use crate::{execute, ExecutionError, PFEXEC}; +use crate::process::{BoxedExecutor, ExecutionError, PFEXEC}; use camino::{Utf8Path, Utf8PathBuf}; use schemars::JsonSchema; use serde::{Deserialize, Deserializer, Serialize, Serializer}; @@ -23,7 +23,7 @@ pub struct ParseError(String); #[derive(thiserror::Error, Debug)] pub enum Error { #[error("Zpool execution error: {0}")] - Execution(#[from] crate::ExecutionError), + Execution(#[from] ExecutionError), #[error(transparent)] Parse(#[from] ParseError), @@ -167,14 +167,18 @@ pub struct Zpool {} #[cfg_attr(any(test, feature = "testing"), mockall::automock, allow(dead_code))] impl Zpool { - pub fn create(name: ZpoolName, vdev: &Utf8Path) -> Result<(), CreateError> { + pub fn create( + executor: &BoxedExecutor, + name: ZpoolName, + vdev: &Utf8Path, + ) -> Result<(), CreateError> { let mut cmd = std::process::Command::new(PFEXEC); cmd.env_clear(); cmd.env("LC_ALL", "C.UTF-8"); cmd.arg(ZPOOL).arg("create"); cmd.arg(&name.to_string()); cmd.arg(vdev); - execute(&mut cmd).map_err(Error::from)?; + executor.execute(&mut cmd).map_err(Error::from)?; // Ensure that this zpool has the encryption feature enabled let mut cmd = std::process::Command::new(PFEXEC); @@ -184,18 +188,21 @@ impl Zpool { .arg("set") .arg("feature@encryption=enabled") .arg(&name.to_string()); - execute(&mut cmd).map_err(Error::from)?; + executor.execute(&mut cmd).map_err(Error::from)?; Ok(()) } - pub fn import(name: ZpoolName) -> Result<(), Error> { + pub fn import( + executor: &BoxedExecutor, + name: ZpoolName, + ) -> Result<(), Error> { let mut cmd = std::process::Command::new(PFEXEC); cmd.env_clear(); cmd.env("LC_ALL", "C.UTF-8"); cmd.arg(ZPOOL).arg("import").arg("-f"); cmd.arg(&name.to_string()); - match execute(&mut cmd) { + match executor.execute(&mut cmd) { Ok(_) => Ok(()), Err(ExecutionError::CommandFailure(err_info)) => { // I'd really prefer to match on a specific error code, but the @@ -213,18 +220,24 @@ impl Zpool { } } - pub fn export(name: &ZpoolName) -> Result<(), Error> { + pub fn export( + executor: &BoxedExecutor, + name: &ZpoolName, + ) -> Result<(), Error> { let mut cmd = std::process::Command::new(PFEXEC); cmd.env_clear(); cmd.env("LC_ALL", "C.UTF-8"); cmd.arg(ZPOOL).arg("export").arg(&name.to_string()); - execute(&mut cmd)?; + executor.execute(&mut cmd)?; Ok(()) } /// `zpool set failmode=continue ` - pub fn set_failmode_continue(name: &ZpoolName) -> Result<(), Error> { + pub fn set_failmode_continue( + executor: &BoxedExecutor, + name: &ZpoolName, + ) -> Result<(), Error> { let mut cmd = std::process::Command::new(PFEXEC); cmd.env_clear(); cmd.env("LC_ALL", "C.UTF-8"); @@ -232,15 +245,15 @@ impl Zpool { .arg("set") .arg("failmode=continue") .arg(&name.to_string()); - execute(&mut cmd)?; + executor.execute(&mut cmd)?; Ok(()) } - pub fn list() -> Result, ListError> { + pub fn list(executor: &BoxedExecutor) -> Result, ListError> { let mut command = std::process::Command::new(ZPOOL); let cmd = command.args(&["list", "-Hpo", "name"]); - let output = execute(cmd).map_err(Error::from)?; + let output = executor.execute(cmd).map_err(Error::from)?; let stdout = String::from_utf8_lossy(&output.stdout); let zpool = stdout .lines() @@ -250,7 +263,10 @@ impl Zpool { } #[cfg_attr(test, allow(dead_code))] - pub fn get_info(name: &str) -> Result { + pub fn get_info( + executor: &BoxedExecutor, + name: &str, + ) -> Result { let mut command = std::process::Command::new(ZPOOL); let cmd = command.args(&[ "list", @@ -259,7 +275,7 @@ impl Zpool { name, ]); - let output = execute(cmd).map_err(|err| GetInfoError { + let output = executor.execute(cmd).map_err(|err| GetInfoError { name: name.to_string(), err: err.into(), })?; diff --git a/installinator/src/bootstrap.rs b/installinator/src/bootstrap.rs index a1642df22b..2a609396d7 100644 --- a/installinator/src/bootstrap.rs +++ b/installinator/src/bootstrap.rs @@ -13,6 +13,7 @@ use ddm_admin_client::Client as DdmAdminClient; use illumos_utils::addrobj::AddrObject; use illumos_utils::dladm; use illumos_utils::dladm::Dladm; +use illumos_utils::process::BoxedExecutor; use illumos_utils::zone::Zones; use omicron_common::address::Ipv6Subnet; use sled_hardware::underlay; @@ -26,9 +27,12 @@ const MG_DDM_MANIFEST_PATH: &str = "/opt/oxide/mg-ddm/pkg/ddm/manifest.xml"; // TODO-cleanup The implementation of this function is heavily derived from // `sled_agent::bootstrap::server::Server::start()`; consider whether we could // find a way for them to share it. -pub(crate) async fn bootstrap_sled(log: Logger) -> Result<()> { +pub(crate) async fn bootstrap_sled( + log: Logger, + executor: &BoxedExecutor, +) -> Result<()> { // Find address objects to pass to maghemite. - let links = underlay::find_chelsio_links() + let links = underlay::find_chelsio_links(executor) .context("failed to find chelsio links")?; ensure!( !links.is_empty(), @@ -36,8 +40,10 @@ pub(crate) async fn bootstrap_sled(log: Logger) -> Result<()> { ); let mg_addr_objs = - underlay::ensure_links_have_global_zone_link_local_v6_addresses(&links) - .context("failed to create address objects for maghemite")?; + underlay::ensure_links_have_global_zone_link_local_v6_addresses( + executor, &links, + ) + .context("failed to create address objects for maghemite")?; info!(log, "Starting mg-ddm service"); { @@ -51,20 +57,22 @@ pub(crate) async fn bootstrap_sled(log: Logger) -> Result<()> { // Set up an interface for our bootstrap network. let bootstrap_etherstub = - Dladm::ensure_etherstub(dladm::BOOTSTRAP_ETHERSTUB_NAME) + Dladm::ensure_etherstub(executor, dladm::BOOTSTRAP_ETHERSTUB_NAME) .context("failed to ensure bootstrap etherstub existence")?; let bootstrap_etherstub_vnic = - Dladm::ensure_etherstub_vnic(&bootstrap_etherstub) + Dladm::ensure_etherstub_vnic(executor, &bootstrap_etherstub) .context("failed to ensure bootstrap etherstub vnic existence")?; // Use the mac address of the first link to derive our bootstrap address. - let ip = - BootstrapInterface::GlobalZone.ip(&links[0]).with_context(|| { + let ip = BootstrapInterface::GlobalZone + .ip(executor, &links[0]) + .with_context(|| { format!("failed to derive a bootstrap prefix from {:?}", links[0]) })?; Zones::ensure_has_global_zone_v6_address( + executor, bootstrap_etherstub_vnic, ip, "bootstrap6", diff --git a/installinator/src/dispatch.rs b/installinator/src/dispatch.rs index ae4762218e..4b42184596 100644 --- a/installinator/src/dispatch.rs +++ b/installinator/src/dispatch.rs @@ -8,6 +8,7 @@ use anyhow::{Context, Result}; use buf_list::Cursor; use camino::{Utf8Path, Utf8PathBuf}; use clap::{Args, Parser, Subcommand}; +use illumos_utils::process::BoxedExecutor; use installinator_common::{ InstallinatorCompletionMetadata, InstallinatorComponent, InstallinatorSpec, InstallinatorStepId, StepContext, StepHandle, StepProgress, StepSuccess, @@ -38,13 +39,19 @@ pub struct InstallinatorApp { impl InstallinatorApp { /// Executes the app. - pub async fn exec(self, log: &slog::Logger) -> Result<()> { + pub async fn exec( + self, + log: &slog::Logger, + executor: &BoxedExecutor, + ) -> Result<()> { match self.subcommand { InstallinatorCommand::DebugDiscover(opts) => opts.exec(log).await, InstallinatorCommand::DebugHardwareScan(opts) => { - opts.exec(log).await + opts.exec(log, executor).await + } + InstallinatorCommand::Install(opts) => { + opts.exec(log, executor).await } - InstallinatorCommand::Install(opts) => opts.exec(log).await, } } @@ -113,11 +120,15 @@ struct DiscoverOpts { struct DebugHardwareScan {} impl DebugHardwareScan { - async fn exec(self, log: &slog::Logger) -> Result<()> { + async fn exec( + self, + log: &slog::Logger, + executor: &BoxedExecutor, + ) -> Result<()> { // Finding the write destination from the gimlet hardware logs details // about what it's doing sufficiently for this subcommand; just create a // write destination and then discard it. - _ = WriteDestination::from_hardware(log).await?; + _ = WriteDestination::from_hardware(log, executor).await?; Ok(()) } } @@ -160,9 +171,13 @@ struct InstallOpts { } impl InstallOpts { - async fn exec(self, log: &slog::Logger) -> Result<()> { + async fn exec( + self, + log: &slog::Logger, + executor: &BoxedExecutor, + ) -> Result<()> { if self.bootstrap_sled { - crate::bootstrap::bootstrap_sled(log.clone()).await?; + crate::bootstrap::bootstrap_sled(log.clone(), executor).await?; } let image_id = self.artifact_ids.resolve()?; @@ -252,7 +267,7 @@ impl InstallOpts { InstallinatorStepId::Scan, "Scanning hardware to find M.2 disks", move |cx| async move { - scan_hardware_with_retries(&cx, &log).await + scan_hardware_with_retries(&cx, &log, executor).await }, ) .register() @@ -309,7 +324,7 @@ impl InstallOpts { // TODO: verify artifact was correctly written out to disk. - let write_output = writer.write(&cx, log).await; + let write_output = writer.write(&cx, log, executor).await; let slots_not_written = write_output.slots_not_written(); let metadata = InstallinatorCompletionMetadata::Write { @@ -355,6 +370,7 @@ impl InstallOpts { async fn scan_hardware_with_retries( cx: &StepContext, log: &slog::Logger, + executor: &BoxedExecutor, ) -> Result> { // Scanning for our disks is inherently racy: we have to wait for the disks // to attach. This should take milliseconds in general; we'll set a hard cap @@ -365,7 +381,7 @@ async fn scan_hardware_with_retries( let mut retry = 0; let result = loop { let log = log.clone(); - let result = WriteDestination::from_hardware(&log).await; + let result = WriteDestination::from_hardware(&log, executor).await; match result { Ok(destination) => break Ok(destination), diff --git a/installinator/src/hardware.rs b/installinator/src/hardware.rs index ffa0b74739..a35871b624 100644 --- a/installinator/src/hardware.rs +++ b/installinator/src/hardware.rs @@ -6,6 +6,7 @@ use anyhow::anyhow; use anyhow::ensure; use anyhow::Context; use anyhow::Result; +use illumos_utils::process::BoxedExecutor; use sled_hardware::Disk; use sled_hardware::DiskVariant; use sled_hardware::HardwareManager; @@ -18,7 +19,7 @@ pub struct Hardware { } impl Hardware { - pub async fn scan(log: &Logger) -> Result { + pub async fn scan(log: &Logger, executor: &BoxedExecutor) -> Result { let is_gimlet = sled_hardware::is_gimlet() .context("failed to detect whether host is a gimlet")?; ensure!(is_gimlet, "hardware scan only supported on gimlets"); @@ -47,7 +48,7 @@ impl Hardware { ); } DiskVariant::M2 => { - let disk = Disk::new(log, disk, None) + let disk = Disk::new(log, executor, disk, None) .await .context("failed to instantiate Disk handle for M.2")?; m2_disks.push(disk); diff --git a/installinator/src/main.rs b/installinator/src/main.rs index 1fb3d3f678..ca5ad539e7 100644 --- a/installinator/src/main.rs +++ b/installinator/src/main.rs @@ -5,12 +5,14 @@ use std::error::Error; use clap::Parser; +use illumos_utils::process::RealExecutor; use installinator::InstallinatorApp; #[tokio::main] async fn main() -> Result<(), Box> { let app = InstallinatorApp::parse(); let log = InstallinatorApp::setup_log("/tmp/installinator.log")?; - app.exec(&log).await?; + let executor = RealExecutor::new(log.clone()).as_executor(); + app.exec(&log, &executor).await?; Ok(()) } diff --git a/installinator/src/write.rs b/installinator/src/write.rs index f1b315f644..fbea8b1a8c 100644 --- a/installinator/src/write.rs +++ b/installinator/src/write.rs @@ -16,6 +16,7 @@ use bytes::Buf; use camino::{Utf8Path, Utf8PathBuf}; use illumos_utils::{ dkio::{self, MediaInfoExtended}, + process::BoxedExecutor, zpool::{Zpool, ZpoolName}, }; use installinator_common::{ @@ -90,8 +91,11 @@ impl WriteDestination { Ok(Self { drives, is_host_phase_2_block_device: false }) } - pub(crate) async fn from_hardware(log: &Logger) -> Result { - let hardware = Hardware::scan(log).await?; + pub(crate) async fn from_hardware( + log: &Logger, + executor: &BoxedExecutor, + ) -> Result { + let hardware = Hardware::scan(log, executor).await?; // We want the `,raw`-suffixed path to the boot image partition, as that // allows us file-like access via the character device. @@ -219,6 +223,7 @@ impl<'a> ArtifactWriter<'a> { &mut self, cx: &StepContext, log: &Logger, + executor: &BoxedExecutor, ) -> WriteOutput { let mut control_plane_transport = FileTransport; if self.is_host_phase_2_block_device { @@ -226,6 +231,7 @@ impl<'a> ArtifactWriter<'a> { self.write_with_transport( cx, log, + executor, &mut host_transport, &mut control_plane_transport, ) @@ -235,6 +241,7 @@ impl<'a> ArtifactWriter<'a> { self.write_with_transport( cx, log, + executor, &mut host_transport, &mut control_plane_transport, ) @@ -246,6 +253,7 @@ impl<'a> ArtifactWriter<'a> { &mut self, cx: &StepContext, log: &Logger, + executor: &BoxedExecutor, host_phase_2_transport: &mut impl WriteTransport, control_plane_transport: &mut impl WriteTransport, ) -> WriteOutput { @@ -264,6 +272,7 @@ impl<'a> ArtifactWriter<'a> { // want each drive to track success and failure independently. let write_cx = SlotWriteContext { log: log.clone(), + executor: executor.clone(), artifacts: self.artifacts, slot: *drive, destinations, @@ -346,6 +355,7 @@ impl<'a> ArtifactWriter<'a> { struct SlotWriteContext<'a> { log: Logger, + executor: BoxedExecutor, artifacts: ArtifactsToWrite<'a>, slot: M2Slot, destinations: &'a ArtifactDestination, @@ -419,6 +429,7 @@ impl<'a> SlotWriteContext<'a> { self.artifacts .write_control_plane( &self.log, + &self.executor, self.slot, self.destinations, transport, @@ -470,6 +481,7 @@ impl ArtifactsToWrite<'_> { async fn write_control_plane( &self, log: &Logger, + executor: &BoxedExecutor, slot: M2Slot, destinations: &ArtifactDestination, transport: &mut impl WriteTransport, @@ -479,6 +491,7 @@ impl ArtifactsToWrite<'_> { // own step. let inner_cx = &ControlPlaneZoneWriteContext { slot, + executor: executor.clone(), clean_output_directory: destinations.clean_control_plane_dir, output_directory: &destinations.control_plane_dir, zones: self.control_plane_zones, @@ -512,6 +525,7 @@ impl ArtifactsToWrite<'_> { struct ControlPlaneZoneWriteContext<'a> { slot: M2Slot, + executor: BoxedExecutor, clean_output_directory: bool, output_directory: &'a Utf8Path, zones: &'a ControlPlaneZoneImages, @@ -610,7 +624,7 @@ impl ControlPlaneZoneWriteContext<'_> { std::mem::drop(output_directory); if let Some(zpool) = zpool { - Zpool::export(zpool)?; + Zpool::export(&self.executor, zpool)?; } StepSuccess::new(()).into() @@ -1039,6 +1053,8 @@ mod tests { let engine = UpdateEngine::new(&logctx.log, event_sender); let log = logctx.log.clone(); + let executor = illumos_utils::process::FakeExecutor::new(log.clone()) + .as_executor(); engine .new_step( InstallinatorComponent::Both, @@ -1049,6 +1065,7 @@ mod tests { .write_with_transport( &cx, &log, + &executor, &mut host_transport, &mut control_plane_transport, ) diff --git a/package/src/bin/omicron-package.rs b/package/src/bin/omicron-package.rs index 29b0069240..6a04fb5f04 100644 --- a/package/src/bin/omicron-package.rs +++ b/package/src/bin/omicron-package.rs @@ -7,6 +7,7 @@ use anyhow::{anyhow, bail, Context, Result}; use clap::{Parser, Subcommand}; use futures::stream::{self, StreamExt, TryStreamExt}; +use illumos_utils::process::{BoxedExecutor, RealExecutor}; use illumos_utils::{zfs, zone}; use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; use omicron_package::target::KnownTarget; @@ -565,8 +566,11 @@ async fn uninstall_all_omicron_zones() -> Result<()> { Ok(()) } -fn uninstall_all_omicron_datasets(config: &Config) -> Result<()> { - let datasets = match zfs::get_all_omicron_datasets_for_delete() { +fn uninstall_all_omicron_datasets( + executor: &BoxedExecutor, + config: &Config, +) -> Result<()> { + let datasets = match zfs::get_all_omicron_datasets_for_delete(executor) { Err(e) => { warn!(config.log, "Failed to get omicron datasets: {}", e); return Err(e); @@ -584,7 +588,7 @@ fn uninstall_all_omicron_datasets(config: &Config) -> Result<()> { ))?; for dataset in &datasets { info!(config.log, "Deleting dataset: {dataset}"); - zfs::Zfs::destroy_dataset(dataset)?; + zfs::Zfs::destroy_dataset(executor, dataset)?; } Ok(()) @@ -653,19 +657,21 @@ fn remove_all_except>( } async fn do_deactivate(config: &Config) -> Result<()> { + let executor = RealExecutor::new(config.log.clone()).as_executor(); info!(&config.log, "Removing all Omicron zones"); uninstall_all_omicron_zones().await?; info!(config.log, "Uninstalling all packages"); uninstall_all_packages(config); info!(config.log, "Removing networking resources"); - cleanup_networking_resources(&config.log).await?; + cleanup_networking_resources(&config.log, &executor).await?; Ok(()) } async fn do_uninstall(config: &Config) -> Result<()> { + let executor = RealExecutor::new(config.log.clone()).as_executor(); do_deactivate(config).await?; info!(config.log, "Removing datasets"); - uninstall_all_omicron_datasets(config)?; + uninstall_all_omicron_datasets(&executor, config)?; Ok(()) } diff --git a/sled-agent/src/bin/sled-agent.rs b/sled-agent/src/bin/sled-agent.rs index bea1cdc0c1..bb99fe4732 100644 --- a/sled-agent/src/bin/sled-agent.rs +++ b/sled-agent/src/bin/sled-agent.rs @@ -6,6 +6,7 @@ use camino::Utf8PathBuf; use clap::{Parser, Subcommand}; +use illumos_utils::process::RealExecutor; use omicron_common::cmd::fatal; use omicron_common::cmd::CmdError; use omicron_sled_agent::bootstrap::{ @@ -91,9 +92,15 @@ async fn do_run() -> Result<(), CmdError> { None }; + let log = config + .log + .to_logger("sled-agent") + .map_err(|e| CmdError::Failure(e.to_string()))?; + let executor = RealExecutor::new(log).as_executor(); + // Derive the bootstrap addresses from the data link's MAC address. let link = config - .get_link() + .get_link(&executor) .map_err(|e| CmdError::Failure(e.to_string()))?; // Configure and run the Bootstrap server. @@ -107,10 +114,13 @@ async fn do_run() -> Result<(), CmdError> { // TODO: It's a little silly to pass the config this way - namely, // that we construct the bootstrap config from `config`, but then // pass it separately just so the sled agent can ingest it later on. - let server = - bootstrap_server::Server::start(bootstrap_config, config) - .await - .map_err(CmdError::Failure)?; + let server = bootstrap_server::Server::start( + &executor, + bootstrap_config, + config, + ) + .await + .map_err(CmdError::Failure)?; // If requested, automatically supply the RSS configuration. // diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index 4da8ae0d12..dc9ec7bd36 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -24,12 +24,12 @@ use ddm_admin_client::{Client as DdmAdminClient, DdmError}; use futures::stream::{self, StreamExt, TryStreamExt}; use illumos_utils::addrobj::AddrObject; use illumos_utils::dladm::{Dladm, Etherstub, EtherstubVnic, GetMacError}; +use illumos_utils::process::{BoxedExecutor, PFEXEC}; use illumos_utils::zfs::{ self, Mountpoint, Zfs, ZONE_ZFS_RAMDISK_DATASET, ZONE_ZFS_RAMDISK_DATASET_MOUNTPOINT, }; use illumos_utils::zone::Zones; -use illumos_utils::{execute, PFEXEC}; use key_manager::{KeyManager, StorageKeyRequester}; use omicron_common::address::Ipv6Subnet; use omicron_common::api::external::Error as ExternalError; @@ -68,7 +68,7 @@ pub enum BootstrapError { Cleanup(anyhow::Error), #[error("Failed to enable routing: {0}")] - EnablingRouting(illumos_utils::ExecutionError), + EnablingRouting(illumos_utils::process::ExecutionError), #[error("Error contacting ddmd: {0}")] DdmError(#[from] DdmError), @@ -137,20 +137,26 @@ enum SledAgentState { After(SledServer), } -fn underlay_etherstub() -> Result { - Dladm::ensure_etherstub(illumos_utils::dladm::UNDERLAY_ETHERSTUB_NAME) - .map_err(|e| { - BootstrapError::SledError(format!( - "Can't access etherstub device: {}", - e - )) - }) +fn underlay_etherstub( + executor: &BoxedExecutor, +) -> Result { + Dladm::ensure_etherstub( + executor, + illumos_utils::dladm::UNDERLAY_ETHERSTUB_NAME, + ) + .map_err(|e| { + BootstrapError::SledError(format!( + "Can't access etherstub device: {}", + e + )) + }) } fn underlay_etherstub_vnic( + executor: &BoxedExecutor, underlay_etherstub: &Etherstub, ) -> Result { - Dladm::ensure_etherstub_vnic(&underlay_etherstub).map_err(|e| { + Dladm::ensure_etherstub_vnic(executor, &underlay_etherstub).map_err(|e| { BootstrapError::SledError(format!( "Can't access etherstub VNIC device: {}", e @@ -158,14 +164,19 @@ fn underlay_etherstub_vnic( }) } -fn bootstrap_etherstub() -> Result { - Dladm::ensure_etherstub(illumos_utils::dladm::BOOTSTRAP_ETHERSTUB_NAME) - .map_err(|e| { - BootstrapError::SledError(format!( - "Can't access etherstub device: {}", - e - )) - }) +fn bootstrap_etherstub( + executor: &BoxedExecutor, +) -> Result { + Dladm::ensure_etherstub( + executor, + illumos_utils::dladm::BOOTSTRAP_ETHERSTUB_NAME, + ) + .map_err(|e| { + BootstrapError::SledError(format!( + "Can't access etherstub device: {}", + e + )) + }) } /// The entity responsible for bootstrapping an Oxide rack. @@ -176,6 +187,9 @@ pub struct Agent { /// other launched components can set their own value. parent_log: Logger, + /// The sink for running "std::process::Command" + executor: BoxedExecutor, + /// Bootstrap network address. ip: Ipv6Addr, @@ -215,6 +229,7 @@ const SLED_AGENT_REQUEST_FILE: &str = "sled-agent-request.toml"; // known clean slate" is easier to work with. async fn cleanup_all_old_global_state( log: &Logger, + executor: &BoxedExecutor, ) -> Result<(), BootstrapError> { // Identify all existing zones which should be managed by the Sled // Agent. @@ -246,7 +261,7 @@ async fn cleanup_all_old_global_state( // Note that we don't currently delete the VNICs in any particular // order. That should be OK, since we're definitely deleting the guest // VNICs before the xde devices, which is the main constraint. - sled_hardware::cleanup::delete_omicron_vnics(&log) + sled_hardware::cleanup::delete_omicron_vnics(&log, executor) .await .map_err(|err| BootstrapError::Cleanup(err))?; @@ -274,6 +289,7 @@ async fn sled_config_paths(storage: &StorageResources) -> Vec { impl Agent { pub async fn new( log: Logger, + executor: &BoxedExecutor, config: Config, sled_config: SledConfig, ) -> Result { @@ -281,7 +297,7 @@ impl Agent { "component" => "BootstrapAgent", )); let link = config.link.clone(); - let ip = BootstrapInterface::GlobalZone.ip(&link)?; + let ip = BootstrapInterface::GlobalZone.ip(executor, &link)?; // We expect this directory to exist for Key Management // It's purposefully in the ramdisk and files only exist long enough @@ -300,18 +316,18 @@ impl Agent { err, })?; - let bootstrap_etherstub = bootstrap_etherstub()?; - let bootstrap_etherstub_vnic = Dladm::ensure_etherstub_vnic( - &bootstrap_etherstub, - ) - .map_err(|e| { - BootstrapError::SledError(format!( - "Can't access etherstub VNIC device: {}", - e - )) - })?; + let bootstrap_etherstub = bootstrap_etherstub(executor)?; + let bootstrap_etherstub_vnic = + Dladm::ensure_etherstub_vnic(executor, &bootstrap_etherstub) + .map_err(|e| { + BootstrapError::SledError(format!( + "Can't access etherstub VNIC device: {}", + e + )) + })?; Zones::ensure_has_global_zone_v6_address( + executor, bootstrap_etherstub_vnic.clone(), ip, "bootstrap6", @@ -319,6 +335,7 @@ impl Agent { .map_err(|err| BootstrapError::BootstrapAddress { err })?; let global_zone_bootstrap_link_local_address = Zones::get_address( + executor, None, // AddrObject::link_local() can only fail if the interface name is // malformed, but we just got it from `Dladm`, so we know it's @@ -351,6 +368,7 @@ impl Agent { let do_format = true; let encryption_details = None; Zfs::ensure_filesystem( + executor, ZONE_ZFS_RAMDISK_DATASET, Mountpoint::Path(Utf8PathBuf::from( ZONE_ZFS_RAMDISK_DATASET_MOUNTPOINT, @@ -364,7 +382,7 @@ impl Agent { // predictable state. // // This means all VNICs, zones, etc. - cleanup_all_old_global_state(&log).await?; + cleanup_all_old_global_state(&log, executor).await?; // Ipv6 forwarding must be enabled to route traffic between zones, // including the switch zone which we may launch below if we find we're @@ -379,7 +397,9 @@ impl Agent { "ipv6-forwarding", "-u", ]); - execute(cmd).map_err(|e| BootstrapError::EnablingRouting(e))?; + executor + .execute(cmd) + .map_err(|e| BootstrapError::EnablingRouting(e))?; // Spawn the `KeyManager` which is needed by the the StorageManager to // retrieve encryption keys. @@ -394,6 +414,7 @@ impl Agent { let hardware_monitor = Self::hardware_monitor( &ba_log, + executor, &config.link, &sled_config, global_zone_bootstrap_link_local_address, @@ -428,6 +449,7 @@ impl Agent { let make_agent = move |initialized| Agent { log: ba_log, parent_log: log, + executor: executor.clone(), ip, rss_access: RssAccess::new(initialized), sled_state: Mutex::new(SledAgentState::Before(Some( @@ -464,6 +486,7 @@ impl Agent { ) -> Result { Self::hardware_monitor( &self.log, + &self.executor, &self.config.link, &self.sled_config, self.global_zone_bootstrap_link_local_address, @@ -474,20 +497,22 @@ impl Agent { async fn hardware_monitor( log: &Logger, + executor: &BoxedExecutor, link: &illumos_utils::dladm::PhysicalLink, sled_config: &SledConfig, global_zone_bootstrap_link_local_address: Ipv6Addr, storage_key_requester: StorageKeyRequester, ) -> Result { - let underlay_etherstub = underlay_etherstub()?; + let underlay_etherstub = underlay_etherstub(executor)?; let underlay_etherstub_vnic = - underlay_etherstub_vnic(&underlay_etherstub)?; - let bootstrap_etherstub = bootstrap_etherstub()?; + underlay_etherstub_vnic(executor, &underlay_etherstub)?; + let bootstrap_etherstub = bootstrap_etherstub(executor)?; let switch_zone_bootstrap_address = - BootstrapInterface::SwitchZone.ip(&link)?; + BootstrapInterface::SwitchZone.ip(executor, &link)?; let hardware_monitor = HardwareMonitor::new( &log, + executor, &sled_config, global_zone_bootstrap_link_local_address, underlay_etherstub, @@ -585,6 +610,7 @@ impl Agent { let server = SledServer::start( &self.sled_config, self.parent_log.clone(), + &self.executor, request.clone(), services.clone(), storage.clone(), @@ -762,9 +788,12 @@ impl Agent { // these addresses would delete "cxgbe0/ll", and could render // the sled inaccessible via a local interface. - sled_hardware::cleanup::delete_underlay_addresses(&self.log) - .map_err(BootstrapError::Cleanup)?; - sled_hardware::cleanup::delete_omicron_vnics(&self.log) + sled_hardware::cleanup::delete_underlay_addresses( + &self.log, + &self.executor, + ) + .map_err(BootstrapError::Cleanup)?; + sled_hardware::cleanup::delete_omicron_vnics(&self.log, &self.executor) .await .map_err(BootstrapError::Cleanup)?; illumos_utils::opte::delete_all_xde_devices(&self.log)?; @@ -775,11 +804,11 @@ impl Agent { &self, _state: &tokio::sync::MutexGuard<'_, SledAgentState>, ) -> Result<(), BootstrapError> { - let datasets = zfs::get_all_omicron_datasets_for_delete() + let datasets = zfs::get_all_omicron_datasets_for_delete(&self.executor) .map_err(BootstrapError::ZfsDatasetsList)?; for dataset in &datasets { info!(self.log, "Removing dataset: {dataset}"); - zfs::Zfs::destroy_dataset(dataset)?; + zfs::Zfs::destroy_dataset(&self.executor, dataset)?; } Ok(()) diff --git a/sled-agent/src/bootstrap/hardware.rs b/sled-agent/src/bootstrap/hardware.rs index 4132edfa76..ca6dfffc63 100644 --- a/sled-agent/src/bootstrap/hardware.rs +++ b/sled-agent/src/bootstrap/hardware.rs @@ -8,6 +8,7 @@ use crate::config::{Config as SledConfig, SledMode as SledModeConfig}; use crate::services::ServiceManager; use crate::storage_manager::{StorageManager, StorageResources}; use illumos_utils::dladm::{Etherstub, EtherstubVnic}; +use illumos_utils::process::BoxedExecutor; use key_manager::StorageKeyRequester; use sled_hardware::{Baseboard, DendriteAsic, HardwareManager, SledMode}; use slog::Logger; @@ -151,6 +152,7 @@ impl HardwareMonitor { #[allow(clippy::too_many_arguments)] pub async fn new( log: &Logger, + executor: &BoxedExecutor, sled_config: &SledConfig, global_zone_bootstrap_link_local_address: Ipv6Addr, underlay_etherstub: Etherstub, @@ -194,7 +196,7 @@ impl HardwareMonitor { .map_err(|e| Error::Hardware(e))?; let storage_manager = - StorageManager::new(&log, storage_key_requester).await; + StorageManager::new(&log, executor, storage_key_requester).await; // If our configuration asks for synthetic zpools, insert them now. if let Some(pools) = &sled_config.zpools { @@ -210,6 +212,7 @@ impl HardwareMonitor { let service_manager = ServiceManager::new( log.clone(), + executor, global_zone_bootstrap_link_local_address, underlay_etherstub.clone(), underlay_etherstub_vnic.clone(), diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index bd4f700497..641a12bf7f 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -14,6 +14,7 @@ use super::views::ResponseEnvelope; use crate::bootstrap::http_entrypoints::api as http_api; use crate::bootstrap::maghemite; use crate::config::Config as SledConfig; +use illumos_utils::process::BoxedExecutor; use sled_hardware::underlay; use slog::Drain; use slog::Logger; @@ -36,6 +37,7 @@ pub struct Server { impl Server { pub async fn start( + executor: &BoxedExecutor, config: Config, sled_config: SledConfig, ) -> Result { @@ -54,7 +56,7 @@ impl Server { } // Find address objects to pass to maghemite. - let mg_addr_objs = underlay::find_nics().map_err(|err| { + let mg_addr_objs = underlay::find_nics(executor).map_err(|err| { format!("Failed to find address objects for maghemite: {err}") })?; if mg_addr_objs.is_empty() { @@ -70,7 +72,7 @@ impl Server { info!(log, "setting up bootstrap agent server"); let bootstrap_agent = - Agent::new(log.clone(), config.clone(), sled_config) + Agent::new(log.clone(), executor, config.clone(), sled_config) .await .map_err(|e| e.to_string())?; let bootstrap_agent = Arc::new(bootstrap_agent); diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index 461b1bdcb7..647b353ec5 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -11,6 +11,7 @@ use illumos_utils::dladm::Dladm; use illumos_utils::dladm::FindPhysicalLinkError; use illumos_utils::dladm::PhysicalLink; use illumos_utils::dladm::CHELSIO_LINK_PREFIX; +use illumos_utils::process::BoxedExecutor; use illumos_utils::zpool::ZpoolName; use omicron_common::vlan::VlanID; use serde::Deserialize; @@ -115,12 +116,15 @@ impl Config { Ok(config) } - pub fn get_link(&self) -> Result { + pub fn get_link( + &self, + executor: &BoxedExecutor, + ) -> Result { if let Some(link) = self.data_link.as_ref() { Ok(link.clone()) } else { if is_gimlet().map_err(ConfigError::SystemDetection)? { - Dladm::list_physical() + Dladm::list_physical(executor) .map_err(ConfigError::FindLinks)? .into_iter() .find(|link| link.0.starts_with(CHELSIO_LINK_PREFIX)) @@ -130,7 +134,7 @@ impl Config { ) }) } else { - Dladm::find_physical().map_err(ConfigError::FindLinks) + Dladm::find_physical(executor).map_err(ConfigError::FindLinks) } } } diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 857f6efc50..f72d4c4fba 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -20,6 +20,7 @@ use futures::lock::{Mutex, MutexGuard}; use illumos_utils::dladm::Etherstub; use illumos_utils::link::VnicAllocator; use illumos_utils::opte::PortManager; +use illumos_utils::process::BoxedExecutor; use illumos_utils::running_zone::{ InstalledZone, RunCommandError, RunningZone, }; @@ -203,6 +204,8 @@ struct PropolisSetup { struct InstanceInner { log: Logger, + executor: BoxedExecutor, + // Properties visible to Propolis properties: propolis_client::api::InstanceProperties, @@ -567,6 +570,7 @@ mockall::mock! { #[allow(clippy::too_many_arguments)] pub fn new( log: Logger, + executor: &BoxedExecutor, id: Uuid, ticket: InstanceTicket, initial: InstanceHardware, @@ -614,6 +618,7 @@ impl Instance { #[allow(clippy::too_many_arguments)] pub fn new( log: Logger, + executor: &BoxedExecutor, id: Uuid, ticket: InstanceTicket, initial: InstanceHardware, @@ -624,6 +629,7 @@ impl Instance { info!(log, "Instance::new w/initial HW: {:?}", initial); let instance = InstanceInner { log: log.new(o!("instance_id" => id.to_string())), + executor: executor.clone(), // NOTE: Mostly lies. properties: propolis_client::api::InstanceProperties { id, @@ -854,6 +860,7 @@ impl Instance { let root = camino::Utf8Path::new(ZONE_ZFS_RAMDISK_DATASET_MOUNTPOINT); let installed_zone = InstalledZone::install( &inner.log, + &inner.executor, &inner.vnic_allocator, &root, &["/opt/oxide".into()], @@ -1104,6 +1111,7 @@ mod test { use chrono::Utc; use illumos_utils::dladm::Etherstub; use illumos_utils::opte::PortManager; + use illumos_utils::process::FakeExecutor; use omicron_common::api::external::{ ByteCount, Generation, InstanceCpuCount, InstanceState, }; @@ -1157,8 +1165,13 @@ mod test { async fn transition_before_start() { let logctx = test_setup_log("transition_before_start"); let log = &logctx.log; - let vnic_allocator = - VnicAllocator::new("Test", Etherstub("mylink".to_string())); + + let executor = &FakeExecutor::new(log.clone()).as_executor(); + let vnic_allocator = VnicAllocator::new( + executor, + "Test", + Etherstub("mylink".to_string()), + ); let underlay_ip = std::net::Ipv6Addr::new( 0xfd00, 0x1de, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, ); @@ -1189,6 +1202,7 @@ mod test { let instance_manager = InstanceManager::new( log.clone(), + executor, nexus_client_with_resolver.clone(), Etherstub("mylink".to_string()), port_manager.clone(), @@ -1197,6 +1211,7 @@ mod test { let inst = Instance::new( log.clone(), + executor, test_uuid(), instance_manager.test_instance_ticket(test_uuid()), new_initial_instance(), diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index 43f475ea21..10de02cfb3 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -12,6 +12,7 @@ use crate::params::{ use illumos_utils::dladm::Etherstub; use illumos_utils::link::VnicAllocator; use illumos_utils::opte::PortManager; +use illumos_utils::process::BoxedExecutor; use illumos_utils::vmm_reservoir; use omicron_common::api::external::ByteCount; use omicron_common::api::internal::nexus::InstanceRuntimeState; @@ -45,6 +46,7 @@ pub enum Error { struct InstanceManagerInternal { log: Logger, + executor: BoxedExecutor, nexus_client: NexusClientWithResolver, /// Last set size of the VMM reservoir (in bytes) @@ -69,6 +71,7 @@ impl InstanceManager { /// Initializes a new [`InstanceManager`] object. pub fn new( log: Logger, + executor: &BoxedExecutor, nexus_client: NexusClientWithResolver, etherstub: Etherstub, port_manager: PortManager, @@ -76,12 +79,15 @@ impl InstanceManager { Ok(InstanceManager { inner: Arc::new(InstanceManagerInternal { log: log.new(o!("component" => "InstanceManager")), + executor: executor.clone(), nexus_client, // no reservoir size set on startup reservoir_size: Mutex::new(ByteCount::from_kibibytes_u32(0)), instances: Mutex::new(BTreeMap::new()), - vnic_allocator: VnicAllocator::new("Instance", etherstub), + vnic_allocator: VnicAllocator::new( + executor, "Instance", etherstub, + ), port_manager, }), }) @@ -199,6 +205,7 @@ impl InstanceManager { InstanceTicket::new(instance_id, self.inner.clone()); let instance = Instance::new( instance_log, + &self.inner.executor, instance_id, ticket, initial_hardware, @@ -367,6 +374,7 @@ mod test { use crate::params::InstanceStateRequested; use chrono::Utc; use illumos_utils::dladm::Etherstub; + use illumos_utils::process::FakeExecutor; use illumos_utils::{dladm::MockDladm, zone::MockZones}; use omicron_common::api::external::{ ByteCount, Generation, InstanceCpuCount, InstanceState, @@ -440,6 +448,8 @@ mod test { nexus_server.local_addr().port(), ); + let executor = FakeExecutor::new(log.clone()).as_executor(); + // Creation of the instance manager incurs some "global" system // checks: cleanup of existing zones + vnics. @@ -447,7 +457,7 @@ mod test { zones_get_ctx.expect().return_once(|| Ok(vec![])); let dladm_get_vnics_ctx = MockDladm::get_vnics_context(); - dladm_get_vnics_ctx.expect().return_once(|| Ok(vec![])); + dladm_get_vnics_ctx.expect().return_once(|_| Ok(vec![])); let port_manager = PortManager::new( log.clone(), @@ -457,6 +467,7 @@ mod test { ); let im = InstanceManager::new( log.clone(), + &executor, nexus_client, Etherstub("mylink".to_string()), port_manager, @@ -483,7 +494,7 @@ mod test { // Expect one call to new() that produces an instance that expects to be // cloned once. The clone should expect to ask to be put into the // Running state. - instance_new_ctx.expect().return_once(move |_, _, t, _, _, _, _| { + instance_new_ctx.expect().return_once(move |_, _, _, t, _, _, _, _| { let mut inst = MockInstance::default(); // Move the instance ticket out to the test, since the mock instance @@ -575,13 +586,15 @@ mod test { nexus_server.local_addr().port(), ); + let executor = FakeExecutor::new(log.clone()).as_executor(); + // Instance Manager creation. let zones_get_ctx = MockZones::get_context(); zones_get_ctx.expect().return_once(|| Ok(vec![])); let dladm_get_vnics_ctx = MockDladm::get_vnics_context(); - dladm_get_vnics_ctx.expect().return_once(|| Ok(vec![])); + dladm_get_vnics_ctx.expect().return_once(|_| Ok(vec![])); let port_manager = PortManager::new( log.clone(), @@ -591,6 +604,7 @@ mod test { ); let im = InstanceManager::new( log.clone(), + &executor, nexus_client, Etherstub("mylink".to_string()), port_manager, @@ -601,7 +615,7 @@ mod test { let ticket_clone = ticket.clone(); let instance_new_ctx = MockInstance::new_context(); let mut seq = mockall::Sequence::new(); - instance_new_ctx.expect().return_once(move |_, _, t, _, _, _, _| { + instance_new_ctx.expect().return_once(move |_, _, _, t, _, _, _, _| { let mut inst = MockInstance::default(); let mut ticket_guard = ticket_clone.lock().unwrap(); *ticket_guard = Some(t); diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index d9c968d108..3725f7f2a2 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -11,6 +11,7 @@ use crate::bootstrap::params::StartSledAgentRequest; use crate::nexus::NexusClientWithResolver; use crate::services::ServiceManager; use crate::storage_manager::StorageManager; +use illumos_utils::process::BoxedExecutor; use slog::Logger; use std::net::SocketAddr; use uuid::Uuid; @@ -35,6 +36,7 @@ impl Server { pub async fn start( config: &Config, log: Logger, + executor: &BoxedExecutor, request: StartSledAgentRequest, services: ServiceManager, storage: StorageManager, @@ -49,6 +51,7 @@ impl Server { let sled_agent = SledAgent::new( &config, log.clone(), + executor, nexus_client, request, services, diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 2fd7d03aa1..d8baf4a8a7 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -44,11 +44,11 @@ use illumos_utils::addrobj::IPV6_LINK_LOCAL_NAME; use illumos_utils::dladm::{Dladm, Etherstub, EtherstubVnic, PhysicalLink}; use illumos_utils::link::{Link, VnicAllocator}; use illumos_utils::opte::{Port, PortManager, PortTicket}; +use illumos_utils::process::{BoxedExecutor, PFEXEC}; use illumos_utils::running_zone::{InstalledZone, RunningZone}; use illumos_utils::zfs::ZONE_ZFS_RAMDISK_DATASET_MOUNTPOINT; use illumos_utils::zone::AddressRequest; use illumos_utils::zone::Zones; -use illumos_utils::{execute, PFEXEC}; use internal_dns::resolver::Resolver; use itertools::Itertools; use omicron_common::address::Ipv6Subnet; @@ -171,7 +171,7 @@ pub enum Error { NtpZoneNotReady, #[error("Execution error: {0}")] - ExecutionError(#[from] illumos_utils::ExecutionError), + ExecutionError(#[from] illumos_utils::process::ExecutionError), #[error("Error resolving DNS name: {0}")] ResolveError(#[from] internal_dns::resolver::ResolveError), @@ -297,6 +297,7 @@ enum SledLocalZone { /// Manages miscellaneous Sled-local services. pub struct ServiceManagerInner { log: Logger, + executor: BoxedExecutor, global_zone_bootstrap_link_local_address: Ipv6Addr, switch_zone: Mutex, sled_mode: SledMode, @@ -354,6 +355,7 @@ impl ServiceManager { #[allow(clippy::too_many_arguments)] pub async fn new( log: Logger, + executor: &BoxedExecutor, global_zone_bootstrap_link_local_address: Ipv6Addr, underlay_etherstub: Etherstub, underlay_vnic: EtherstubVnic, @@ -370,6 +372,7 @@ impl ServiceManager { let mgr = Self { inner: Arc::new(ServiceManagerInner { log: log.clone(), + executor: executor.clone(), global_zone_bootstrap_link_local_address, // TODO(https://github.com/oxidecomputer/omicron/issues/725): // Load the switch zone if it already exists? @@ -382,11 +385,13 @@ impl ServiceManager { zones: Mutex::new(vec![]), dataset_zones: Mutex::new(vec![]), underlay_vnic_allocator: VnicAllocator::new( + executor, "Service", underlay_etherstub, ), underlay_vnic, bootstrap_vnic_allocator: VnicAllocator::new( + executor, "Bootstrap", bootstrap_etherstub, ), @@ -662,7 +667,7 @@ impl ServiceManager { // The tfport service requires a MAC device to/from which sidecar // packets may be multiplexed. If the link isn't present, don't // bother trying to start the zone. - match Dladm::verify_link(pkt_source) { + match Dladm::verify_link(&self.inner.executor, pkt_source) { Ok(link) => { // It's important that tfpkt does **not** receive a // link local address! See: https://github.com/oxidecomputer/stlouis/issues/391 @@ -679,7 +684,10 @@ impl ServiceManager { // If on a non-gimlet, sled-agent can be configured to map // links into the switch zone. Validate those links here. for link in &self.inner.switch_zone_maghemite_links { - match Dladm::verify_link(&link.to_string()) { + match Dladm::verify_link( + &self.inner.executor, + &link.to_string(), + ) { Ok(link) => { // Link local addresses should be created in the // zone so that maghemite can listen on them. @@ -950,6 +958,7 @@ impl ServiceManager { let installed_zone = InstalledZone::install( &self.inner.log, + &self.inner.executor, &self.inner.underlay_vnic_allocator, &request.root, zone_image_paths.as_slice(), @@ -1178,6 +1187,7 @@ impl ServiceManager { IPV6_LINK_LOCAL_NAME ); Zones::ensure_has_link_local_v6_address( + &self.inner.executor, Some(running_zone.name()), &AddrObject::new(link.name(), IPV6_LINK_LOCAL_NAME) .unwrap(), @@ -1243,6 +1253,7 @@ impl ServiceManager { let addr_name = request.zone.zone_type.to_string().replace(&['-', '_'][..], ""); Zones::ensure_has_global_zone_v6_address( + &self.inner.executor, self.inner.underlay_vnic.clone(), addr, &addr_name, @@ -2009,7 +2020,7 @@ impl ServiceManager { &format!("{}", now.as_secs()), &file.as_str(), ]); - match execute(cmd) { + match self.inner.executor.execute(cmd) { Err(e) => { warn!(self.inner.log, "Updating {} failed: {}", &file, e); } @@ -2421,6 +2432,7 @@ mod test { Etherstub, MockDladm, BOOTSTRAP_ETHERSTUB_NAME, UNDERLAY_ETHERSTUB_NAME, UNDERLAY_ETHERSTUB_VNIC_NAME, }, + process::FakeExecutor, svc, zone::MockZones, }; @@ -2429,7 +2441,6 @@ mod test { StorageKeyRequester, VersionedIkm, }; use std::net::Ipv6Addr; - use std::os::unix::process::ExitStatusExt; use uuid::Uuid; // Just placeholders. Not used. @@ -2443,7 +2454,7 @@ mod test { // Create a VNIC let create_vnic_ctx = MockDladm::create_vnic_context(); create_vnic_ctx.expect().return_once( - |physical_link: &Etherstub, _, _, _, _| { + |_, physical_link: &Etherstub, _, _, _, _| { assert_eq!(&physical_link.0, &UNDERLAY_ETHERSTUB_NAME); Ok(()) }, @@ -2473,7 +2484,7 @@ mod test { // Ensure the address exists let ensure_address_ctx = MockZones::ensure_address_context(); - ensure_address_ctx.expect().return_once(|_, _, _| { + ensure_address_ctx.expect().return_once(|_, _, _, _| { Ok(ipnetwork::IpNetwork::new(IpAddr::V6(Ipv6Addr::LOCALHOST), 64) .unwrap()) }); @@ -2482,16 +2493,6 @@ mod test { let wait_ctx = svc::wait_for_service_context(); wait_ctx.expect().return_once(|_, _| Ok(())); - // Import the manifest, enable the service - let execute_ctx = illumos_utils::execute_context(); - execute_ctx.expect().times(..).returning(|_| { - Ok(std::process::Output { - status: std::process::ExitStatus::from_raw(0), - stdout: vec![], - stderr: vec![], - }) - }); - vec![ Box::new(create_vnic_ctx), Box::new(install_ctx), @@ -2499,7 +2500,6 @@ mod test { Box::new(id_ctx), Box::new(ensure_address_ctx), Box::new(wait_ctx), - Box::new(execute_ctx), ] } @@ -2555,7 +2555,7 @@ mod test { Ok(()) }); let delete_vnic_ctx = MockDladm::delete_vnic_context(); - delete_vnic_ctx.expect().returning(|_| Ok(())); + delete_vnic_ctx.expect().returning(|_, _| Ok(())); // Explicitly drop the service manager drop(mgr); @@ -2634,9 +2634,11 @@ mod test { let log = logctx.log.clone(); let test_config = TestConfig::new().await; let storage_key_requester = spawn_key_manager(&log).await; + let executor = FakeExecutor::new(log.clone()).as_executor(); let mgr = ServiceManager::new( log.clone(), + &executor, GLOBAL_ZONE_BOOTSTRAP_IP, Etherstub(UNDERLAY_ETHERSTUB_NAME.to_string()), EtherstubVnic(UNDERLAY_ETHERSTUB_VNIC_NAME.to_string()), @@ -2646,7 +2648,7 @@ mod test { SidecarRevision::Physical("rev-test".to_string()), SWITCH_ZONE_BOOTSTRAP_IP, vec![], - StorageManager::new(&log, storage_key_requester).await, + StorageManager::new(&log, &executor, storage_key_requester).await, ) .await .unwrap(); @@ -2681,9 +2683,11 @@ mod test { let log = logctx.log.clone(); let test_config = TestConfig::new().await; let storage_key_requester = spawn_key_manager(&log).await; + let executor = FakeExecutor::new(log.clone()).as_executor(); let mgr = ServiceManager::new( log.clone(), + &executor, GLOBAL_ZONE_BOOTSTRAP_IP, Etherstub(UNDERLAY_ETHERSTUB_NAME.to_string()), EtherstubVnic(UNDERLAY_ETHERSTUB_VNIC_NAME.to_string()), @@ -2693,7 +2697,7 @@ mod test { SidecarRevision::Physical("rev-test".to_string()), SWITCH_ZONE_BOOTSTRAP_IP, vec![], - StorageManager::new(&log, storage_key_requester).await, + StorageManager::new(&log, &executor, storage_key_requester).await, ) .await .unwrap(); @@ -2729,11 +2733,13 @@ mod test { let log = logctx.log.clone(); let test_config = TestConfig::new().await; let storage_key_requester = spawn_key_manager(&log).await; + let executor = FakeExecutor::new(log.clone()).as_executor(); // First, spin up a ServiceManager, create a new service, and tear it // down. let mgr = ServiceManager::new( logctx.log.clone(), + &executor, GLOBAL_ZONE_BOOTSTRAP_IP, Etherstub(UNDERLAY_ETHERSTUB_NAME.to_string()), EtherstubVnic(UNDERLAY_ETHERSTUB_VNIC_NAME.to_string()), @@ -2743,7 +2749,7 @@ mod test { SidecarRevision::Physical("rev-test".to_string()), SWITCH_ZONE_BOOTSTRAP_IP, vec![], - StorageManager::new(&log, storage_key_requester).await, + StorageManager::new(&log, &executor, storage_key_requester).await, ) .await .unwrap(); @@ -2772,6 +2778,7 @@ mod test { let _expectations = expect_new_service(); let mgr = ServiceManager::new( logctx.log.clone(), + &executor, GLOBAL_ZONE_BOOTSTRAP_IP, Etherstub(UNDERLAY_ETHERSTUB_NAME.to_string()), EtherstubVnic(UNDERLAY_ETHERSTUB_VNIC_NAME.to_string()), @@ -2781,7 +2788,7 @@ mod test { SidecarRevision::Physical("rev-test".to_string()), SWITCH_ZONE_BOOTSTRAP_IP, vec![], - StorageManager::new(&log, storage_key_requester).await, + StorageManager::new(&log, &executor, storage_key_requester).await, ) .await .unwrap(); @@ -2814,11 +2821,13 @@ mod test { let log = logctx.log.clone(); let test_config = TestConfig::new().await; let storage_key_requester = spawn_key_manager(&log).await; + let executor = FakeExecutor::new(log.clone()).as_executor(); // First, spin up a ServiceManager, create a new service, and tear it // down. let mgr = ServiceManager::new( logctx.log.clone(), + &executor, GLOBAL_ZONE_BOOTSTRAP_IP, Etherstub(UNDERLAY_ETHERSTUB_NAME.to_string()), EtherstubVnic(UNDERLAY_ETHERSTUB_VNIC_NAME.to_string()), @@ -2828,7 +2837,7 @@ mod test { SidecarRevision::Physical("rev-test".to_string()), SWITCH_ZONE_BOOTSTRAP_IP, vec![], - StorageManager::new(&log, storage_key_requester).await, + StorageManager::new(&log, &executor, storage_key_requester).await, ) .await .unwrap(); @@ -2866,6 +2875,7 @@ mod test { // Observe that the old service is not re-initialized. let mgr = ServiceManager::new( logctx.log.clone(), + &executor, GLOBAL_ZONE_BOOTSTRAP_IP, Etherstub(UNDERLAY_ETHERSTUB_NAME.to_string()), EtherstubVnic(UNDERLAY_ETHERSTUB_VNIC_NAME.to_string()), @@ -2875,7 +2885,7 @@ mod test { SidecarRevision::Physical("rev-test".to_string()), SWITCH_ZONE_BOOTSTRAP_IP, vec![], - StorageManager::new(&log, storage_key_requester).await, + StorageManager::new(&log, &executor, storage_key_requester).await, ) .await .unwrap(); diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 9bb1c5b566..79e364b220 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -21,6 +21,7 @@ use camino::Utf8PathBuf; use dropshot::HttpError; use illumos_utils::opte::params::SetVirtualNetworkInterfaceHost; use illumos_utils::opte::PortManager; +use illumos_utils::process::BoxedExecutor; use omicron_common::address::{ get_sled_address, get_switch_zone_address, Ipv6Subnet, SLED_PREFIX, }; @@ -50,7 +51,7 @@ pub enum Error { Config(#[from] crate::config::ConfigError), #[error("Failed to acquire etherstub: {0}")] - Etherstub(illumos_utils::ExecutionError), + Etherstub(illumos_utils::process::ExecutionError), #[error("Failed to acquire etherstub VNIC: {0}")] EtherstubVnic(illumos_utils::dladm::CreateVnicError), @@ -59,7 +60,7 @@ pub enum Error { Bootstrap(#[from] crate::bootstrap::agent::BootstrapError), #[error("Failed to remove Omicron address: {0}")] - DeleteAddress(#[from] illumos_utils::ExecutionError), + DeleteAddress(#[from] illumos_utils::process::ExecutionError), #[error("Failed to operate on underlay device: {0}")] Underlay(#[from] underlay::Error), @@ -204,6 +205,7 @@ impl SledAgent { pub async fn new( config: &Config, log: Logger, + executor: &BoxedExecutor, nexus_client: NexusClientWithResolver, request: StartSledAgentRequest, services: ServiceManager, @@ -221,15 +223,17 @@ impl SledAgent { info!(&log, "created sled agent"); let etherstub = Dladm::ensure_etherstub( + executor, illumos_utils::dladm::UNDERLAY_ETHERSTUB_NAME, ) .map_err(|e| Error::Etherstub(e))?; - let etherstub_vnic = Dladm::ensure_etherstub_vnic(ðerstub) + let etherstub_vnic = Dladm::ensure_etherstub_vnic(executor, ðerstub) .map_err(|e| Error::EtherstubVnic(e))?; // Ensure the global zone has a functioning IPv6 address. let sled_address = request.sled_address(); Zones::ensure_has_global_zone_v6_address( + executor, etherstub_vnic.clone(), *sled_address.ip(), "sled6", @@ -237,7 +241,7 @@ impl SledAgent { .map_err(|err| Error::SledSubnet { err })?; // Initialize the xde kernel driver with the underlay devices. - let underlay_nics = underlay::find_nics()?; + let underlay_nics = underlay::find_nics(executor)?; illumos_utils::opte::initialize_xde_driver(&log, &underlay_nics)?; // Create the PortManager to manage all the OPTE ports on the sled. @@ -258,6 +262,7 @@ impl SledAgent { let instances = InstanceManager::new( parent_log.clone(), + executor, nexus_client.clone(), etherstub.clone(), port_manager.clone(), diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 38eb3fd799..f131dcf772 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -11,6 +11,7 @@ use camino::Utf8PathBuf; use futures::stream::FuturesOrdered; use futures::FutureExt; use futures::StreamExt; +use illumos_utils::process::BoxedExecutor; use illumos_utils::zpool::{ZpoolKind, ZpoolName}; use illumos_utils::{zfs::Mountpoint, zpool::ZpoolInfo}; use key_manager::StorageKeyRequester; @@ -139,8 +140,12 @@ impl Pool { /// Queries for an existing Zpool by name. /// /// Returns Ok if the pool exists. - fn new(name: ZpoolName, parent: DiskIdentity) -> Result { - let info = Zpool::get_info(&name.to_string())?; + fn new( + executor: &BoxedExecutor, + name: ZpoolName, + parent: DiskIdentity, + ) -> Result { + let info = Zpool::get_info(executor, &name.to_string())?; Ok(Pool { name, info, parent }) } @@ -286,6 +291,7 @@ pub struct UnderlayAccess { // A worker that starts zones for pools as they are received. struct StorageWorker { log: Logger, + executor: BoxedExecutor, nexus_notifications: FuturesOrdered, rx: mpsc::Receiver, underlay: Arc>>, @@ -316,6 +322,7 @@ impl StorageWorker { let do_format = true; let encryption_details = None; Zfs::ensure_filesystem( + &self.executor, &dataset_name.full(), Mountpoint::Path(Utf8PathBuf::from("/data")), zoned, @@ -323,7 +330,9 @@ impl StorageWorker { encryption_details, )?; // Ensure the dataset has a usable UUID. - if let Ok(id_str) = Zfs::get_oxide_value(&fs_name, "uuid") { + if let Ok(id_str) = + Zfs::get_oxide_value(&self.executor, &fs_name, "uuid") + { if let Ok(id) = id_str.parse::() { if id != dataset_id { return Err(Error::UuidMismatch { @@ -335,7 +344,12 @@ impl StorageWorker { return Ok(()); } } - Zfs::set_oxide_value(&fs_name, "uuid", &dataset_id.to_string())?; + Zfs::set_oxide_value( + &self.executor, + &fs_name, + "uuid", + &dataset_id.to_string(), + )?; Ok(()) } @@ -425,6 +439,7 @@ impl StorageWorker { for disk in unparsed_disks.into_iter() { match sled_hardware::Disk::new( &self.log, + &self.executor, disk, Some(&self.key_requester), ) @@ -524,6 +539,7 @@ impl StorageWorker { // Ensure the disk conforms to an expected partition layout. let disk = sled_hardware::Disk::new( &self.log, + &self.executor, disk, Some(&self.key_requester), ) @@ -556,6 +572,7 @@ impl StorageWorker { }; sled_hardware::Disk::ensure_zpool_ready( &self.log, + &self.executor, &zpool_name, &synthetic_id, Some(&self.key_requester), @@ -757,7 +774,7 @@ impl StorageWorker { pool_name: &ZpoolName, ) -> Result<(), Error> { let mut pools = resources.pools.lock().await; - let zpool = Pool::new(pool_name.clone(), parent)?; + let zpool = Pool::new(&self.executor, pool_name.clone(), parent)?; let pool = match pools.entry(pool_name.id()) { hash_map::Entry::Occupied(mut entry) => { @@ -894,7 +911,11 @@ pub struct StorageManager { impl StorageManager { /// Creates a new [`StorageManager`] which should manage local storage. - pub async fn new(log: &Logger, key_requester: StorageKeyRequester) -> Self { + pub async fn new( + log: &Logger, + executor: &BoxedExecutor, + key_requester: StorageKeyRequester, + ) -> Self { let log = log.new(o!("component" => "StorageManager")); let resources = StorageResources { disks: Arc::new(Mutex::new(HashMap::new())), @@ -902,6 +923,7 @@ impl StorageManager { }; let (tx, rx) = mpsc::channel(30); + let executor = executor.clone(); StorageManager { inner: Arc::new(StorageManagerInner { log: log.clone(), @@ -910,6 +932,7 @@ impl StorageManager { task: tokio::task::spawn(async move { let mut worker = StorageWorker { log, + executor, nexus_notifications: FuturesOrdered::new(), rx, underlay: Arc::new(Mutex::new(None)), diff --git a/sled-hardware/src/cleanup.rs b/sled-hardware/src/cleanup.rs index 1a7f8be2f7..1a1b3c97d9 100644 --- a/sled-hardware/src/cleanup.rs +++ b/sled-hardware/src/cleanup.rs @@ -13,31 +13,37 @@ use illumos_utils::dladm::UNDERLAY_ETHERSTUB_NAME; use illumos_utils::dladm::UNDERLAY_ETHERSTUB_VNIC_NAME; use illumos_utils::link::LinkKind; use illumos_utils::opte; +use illumos_utils::process::{BoxedExecutor, ExecutionError, PFEXEC}; use illumos_utils::zone::IPADM; -use illumos_utils::ExecutionError; -use illumos_utils::{execute, PFEXEC}; use slog::warn; use slog::Logger; use std::process::Command; -pub fn delete_underlay_addresses(log: &Logger) -> Result<(), Error> { +pub fn delete_underlay_addresses( + log: &Logger, + executor: &BoxedExecutor, +) -> Result<(), Error> { let underlay_prefix = format!("{}/", UNDERLAY_ETHERSTUB_VNIC_NAME); - delete_addresses_matching_prefixes(log, &[underlay_prefix]) + delete_addresses_matching_prefixes(log, executor, &[underlay_prefix]) } -pub fn delete_bootstrap_addresses(log: &Logger) -> Result<(), Error> { +pub fn delete_bootstrap_addresses( + log: &Logger, + executor: &BoxedExecutor, +) -> Result<(), Error> { let bootstrap_prefix = format!("{}/", BOOTSTRAP_ETHERSTUB_VNIC_NAME); - delete_addresses_matching_prefixes(log, &[bootstrap_prefix]) + delete_addresses_matching_prefixes(log, executor, &[bootstrap_prefix]) } fn delete_addresses_matching_prefixes( log: &Logger, + executor: &BoxedExecutor, prefixes: &[String], ) -> Result<(), Error> { use std::io::BufRead; let mut cmd = Command::new(PFEXEC); let cmd = cmd.args(&[IPADM, "show-addr", "-p", "-o", "ADDROBJ"]); - let output = execute(cmd)?; + let output = executor.execute(cmd)?; // `ipadm show-addr` can return multiple addresses with the same name, but // multiple values. Collecting to a set ensures that only a single name is @@ -57,34 +63,41 @@ fn delete_addresses_matching_prefixes( ); let mut cmd = Command::new(PFEXEC); let cmd = cmd.args(&[IPADM, "delete-addr", addrobj.as_str()]); - execute(cmd)?; + executor.execute(cmd)?; } } Ok(()) } /// Delete the etherstub and underlay VNIC used for interzone communication -pub fn delete_etherstub(log: &Logger) -> Result<(), ExecutionError> { +pub fn delete_etherstub( + log: &Logger, + executor: &BoxedExecutor, +) -> Result<(), ExecutionError> { warn!(log, "Deleting Omicron underlay VNIC"; "vnic_name" => UNDERLAY_ETHERSTUB_VNIC_NAME); - Dladm::delete_etherstub_vnic(UNDERLAY_ETHERSTUB_VNIC_NAME)?; + Dladm::delete_etherstub_vnic(executor, UNDERLAY_ETHERSTUB_VNIC_NAME)?; warn!(log, "Deleting Omicron underlay etherstub"; "stub_name" => UNDERLAY_ETHERSTUB_NAME); - Dladm::delete_etherstub(UNDERLAY_ETHERSTUB_NAME)?; + Dladm::delete_etherstub(executor, UNDERLAY_ETHERSTUB_NAME)?; warn!(log, "Deleting Omicron bootstrap VNIC"; "vnic_name" => BOOTSTRAP_ETHERSTUB_VNIC_NAME); - Dladm::delete_etherstub_vnic(BOOTSTRAP_ETHERSTUB_VNIC_NAME)?; + Dladm::delete_etherstub_vnic(executor, BOOTSTRAP_ETHERSTUB_VNIC_NAME)?; warn!(log, "Deleting Omicron bootstrap etherstub"; "stub_name" => BOOTSTRAP_ETHERSTUB_NAME); - Dladm::delete_etherstub(BOOTSTRAP_ETHERSTUB_NAME)?; + Dladm::delete_etherstub(executor, BOOTSTRAP_ETHERSTUB_NAME)?; Ok(()) } /// Delete all VNICs that can be managed by the control plane. /// /// These are currently those that match the prefix `ox` or `vopte`. -pub async fn delete_omicron_vnics(log: &Logger) -> Result<(), Error> { - let vnics = Dladm::get_vnics()?; +pub async fn delete_omicron_vnics( + log: &Logger, + executor: &BoxedExecutor, +) -> Result<(), Error> { + let vnics = Dladm::get_vnics(executor)?; stream::iter(vnics) .zip(stream::iter(std::iter::repeat(log.clone()))) .map(Ok::<_, illumos_utils::dladm::DeleteVnicError>) .try_for_each_concurrent(None, |(vnic, log)| async { + let executor = executor.clone(); tokio::task::spawn_blocking(move || { warn!( log, @@ -92,7 +105,7 @@ pub async fn delete_omicron_vnics(log: &Logger) -> Result<(), Error> { "vnic_name" => &vnic, "vnic_kind" => ?LinkKind::from_name(&vnic).unwrap(), ); - Dladm::delete_vnic(&vnic) + Dladm::delete_vnic(&executor, &vnic) }) .await .unwrap() @@ -101,11 +114,14 @@ pub async fn delete_omicron_vnics(log: &Logger) -> Result<(), Error> { Ok(()) } -pub async fn cleanup_networking_resources(log: &Logger) -> Result<(), Error> { - delete_underlay_addresses(log)?; - delete_bootstrap_addresses(log)?; - delete_omicron_vnics(log).await?; - delete_etherstub(log)?; +pub async fn cleanup_networking_resources( + log: &Logger, + executor: &BoxedExecutor, +) -> Result<(), Error> { + delete_underlay_addresses(log, executor)?; + delete_bootstrap_addresses(log, executor)?; + delete_omicron_vnics(log, executor).await?; + delete_etherstub(log, executor)?; opte::delete_all_xde_devices(log)?; Ok(()) diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs index 0e4bc5fbd2..95720628a5 100644 --- a/sled-hardware/src/disk.rs +++ b/sled-hardware/src/disk.rs @@ -4,6 +4,7 @@ use camino::{Utf8Path, Utf8PathBuf}; use illumos_utils::fstyp::Fstyp; +use illumos_utils::process::BoxedExecutor; use illumos_utils::zfs::EncryptionDetails; use illumos_utils::zfs::Keypath; use illumos_utils::zfs::Mountpoint; @@ -252,6 +253,7 @@ impl Disk { /// `None` is for the M.2s touched by the Installinator. pub async fn new( log: &Logger, + executor: &BoxedExecutor, unparsed_disk: UnparsedDisk, key_requester: Option<&StorageKeyRequester>, ) -> Result { @@ -271,9 +273,11 @@ impl Disk { false, )?; - let zpool_name = Self::ensure_zpool_exists(log, variant, &zpool_path)?; + let zpool_name = + Self::ensure_zpool_exists(log, executor, variant, &zpool_path)?; Self::ensure_zpool_ready( log, + executor, &zpool_name, &unparsed_disk.identity, key_requester, @@ -293,14 +297,16 @@ impl Disk { pub async fn ensure_zpool_ready( log: &Logger, + executor: &BoxedExecutor, zpool_name: &ZpoolName, disk_identity: &DiskIdentity, key_requester: Option<&StorageKeyRequester>, ) -> Result<(), DiskError> { - Self::ensure_zpool_imported(log, &zpool_name)?; - Self::ensure_zpool_failmode_is_continue(log, &zpool_name)?; + Self::ensure_zpool_imported(log, executor, &zpool_name)?; + Self::ensure_zpool_failmode_is_continue(log, executor, &zpool_name)?; Self::ensure_zpool_has_datasets( log, + executor, &zpool_name, disk_identity, key_requester, @@ -311,10 +317,11 @@ impl Disk { fn ensure_zpool_exists( log: &Logger, + executor: &BoxedExecutor, variant: DiskVariant, zpool_path: &Utf8Path, ) -> Result { - let zpool_name = match Fstyp::get_zpool(&zpool_path) { + let zpool_name = match Fstyp::get_zpool(executor, &zpool_path) { Ok(zpool_name) => zpool_name, Err(_) => { // What happened here? @@ -338,11 +345,11 @@ impl Disk { DiskVariant::M2 => ZpoolName::new_internal(Uuid::new_v4()), DiskVariant::U2 => ZpoolName::new_external(Uuid::new_v4()), }; - Zpool::create(zpool_name.clone(), &zpool_path)?; + Zpool::create(executor, zpool_name.clone(), &zpool_path)?; zpool_name } }; - Zpool::import(zpool_name.clone()).map_err(|e| { + Zpool::import(executor, zpool_name.clone()).map_err(|e| { warn!(log, "Failed to import zpool {zpool_name}: {e}"); DiskError::ZpoolImport(e) })?; @@ -352,9 +359,10 @@ impl Disk { fn ensure_zpool_imported( log: &Logger, + executor: &BoxedExecutor, zpool_name: &ZpoolName, ) -> Result<(), DiskError> { - Zpool::import(zpool_name.clone()).map_err(|e| { + Zpool::import(executor, zpool_name.clone()).map_err(|e| { warn!(log, "Failed to import zpool {zpool_name}: {e}"); DiskError::ZpoolImport(e) })?; @@ -363,6 +371,7 @@ impl Disk { fn ensure_zpool_failmode_is_continue( log: &Logger, + executor: &BoxedExecutor, zpool_name: &ZpoolName, ) -> Result<(), DiskError> { // Ensure failmode is set to `continue`. See @@ -372,7 +381,7 @@ impl Disk { // actively harmful to try to wait for it to come back; we'll be waiting // forever and get stuck. We'd rather get the errors so we can deal with // them ourselves. - Zpool::set_failmode_continue(&zpool_name).map_err(|e| { + Zpool::set_failmode_continue(executor, &zpool_name).map_err(|e| { warn!( log, "Failed to set failmode=continue on zpool {zpool_name}: {e}" @@ -386,6 +395,7 @@ impl Disk { // contain. async fn ensure_zpool_has_datasets( log: &Logger, + executor: &BoxedExecutor, zpool_name: &ZpoolName, disk_identity: &DiskIdentity, key_requester: Option<&StorageKeyRequester>, @@ -409,31 +419,32 @@ impl Disk { let mountpoint = zpool_name.dataset_mountpoint(dataset); let keypath: Keypath = disk_identity.into(); - let epoch = - if let Ok(epoch_str) = Zfs::get_oxide_value(dataset, "epoch") { - if let Ok(epoch) = epoch_str.parse::() { - epoch - } else { - return Err(DiskError::CannotParseEpochProperty( - dataset.to_string(), - )); - } + let epoch = if let Ok(epoch_str) = + Zfs::get_oxide_value(executor, dataset, "epoch") + { + if let Ok(epoch) = epoch_str.parse::() { + epoch } else { - // We got an error trying to call `Zfs::get_oxide_value` - // which indicates that the dataset doesn't exist or there - // was a problem running the command. - // - // Note that `Zfs::get_oxide_value` will succeed even if - // the epoch is missing. `epoch_str` will show up as a dash - // (`-`) and will not parse into a `u64`. So we don't have - // to worry about that case here as it is handled above. - // - // If the error indicated that the command failed for some - // other reason, but the dataset actually existed, we will - // try to create the dataset below and that will fail. So - // there is no harm in just loading the latest secret here. - key_requester.load_latest_secret().await? - }; + return Err(DiskError::CannotParseEpochProperty( + dataset.to_string(), + )); + } + } else { + // We got an error trying to call `Zfs::get_oxide_value` + // which indicates that the dataset doesn't exist or there + // was a problem running the command. + // + // Note that `Zfs::get_oxide_value` will succeed even if + // the epoch is missing. `epoch_str` will show up as a dash + // (`-`) and will not parse into a `u64`. So we don't have + // to worry about that case here as it is handled above. + // + // If the error indicated that the command failed for some + // other reason, but the dataset actually existed, we will + // try to create the dataset below and that will fail. So + // there is no harm in just loading the latest secret here. + key_requester.load_latest_secret().await? + }; let key = key_requester.get_key(epoch, disk_identity.clone()).await?; @@ -453,6 +464,7 @@ impl Disk { "Ensuring encryted filesystem: {} for epoch {}", dataset, epoch ); let result = Zfs::ensure_filesystem( + executor, &format!("{}/{}", zpool_name, dataset), Mountpoint::Path(mountpoint), zoned, @@ -471,6 +483,7 @@ impl Disk { let mountpoint = zpool_name.dataset_mountpoint(dataset); let encryption_details = None; Zfs::ensure_filesystem( + executor, &format!("{}/{}", zpool_name, dataset), Mountpoint::Path(mountpoint), zoned, diff --git a/sled-hardware/src/underlay.rs b/sled-hardware/src/underlay.rs index fa0297108e..f2ddd5731f 100644 --- a/sled-hardware/src/underlay.rs +++ b/sled-hardware/src/underlay.rs @@ -14,6 +14,7 @@ use illumos_utils::dladm::GetLinkpropError; use illumos_utils::dladm::PhysicalLink; use illumos_utils::dladm::SetLinkpropError; use illumos_utils::dladm::CHELSIO_LINK_PREFIX; +use illumos_utils::process::BoxedExecutor; use illumos_utils::zone::Zones; use omicron_common::api::external::MacAddr; use std::net::Ipv6Addr; @@ -36,7 +37,7 @@ pub enum Error { #[error( "Failed to create an IPv6 link-local address for underlay devices: {0}" )] - UnderlayDeviceAddress(#[from] illumos_utils::ExecutionError), + UnderlayDeviceAddress(#[from] illumos_utils::process::ExecutionError), #[error(transparent)] BadAddrObj(#[from] addrobj::ParseError), @@ -57,21 +58,25 @@ pub enum Error { /// Convenience function that calls /// `ensure_links_have_global_zone_link_local_v6_addresses()` with the links /// returned by `find_chelsio_links()`. -pub fn find_nics() -> Result, Error> { - let underlay_nics = find_chelsio_links()?; +pub fn find_nics(executor: &BoxedExecutor) -> Result, Error> { + let underlay_nics = find_chelsio_links(executor)?; // Before these links have any consumers (eg. IP interfaces), set the MTU. // If we have previously set the MTU, do not attempt to re-set. const MTU: &str = "9000"; for link in &underlay_nics { - let existing_mtu = Dladm::get_linkprop(&link.to_string(), "mtu")?; + let existing_mtu = + Dladm::get_linkprop(executor, &link.to_string(), "mtu")?; if existing_mtu != MTU { - Dladm::set_linkprop(&link.to_string(), "mtu", MTU)?; + Dladm::set_linkprop(executor, &link.to_string(), "mtu", MTU)?; } } - ensure_links_have_global_zone_link_local_v6_addresses(&underlay_nics) + ensure_links_have_global_zone_link_local_v6_addresses( + executor, + &underlay_nics, + ) } /// Return the Chelsio links on the system. @@ -79,9 +84,11 @@ pub fn find_nics() -> Result, Error> { /// For a real Gimlet, this should return the devices like `cxgbeN`. For a /// developer machine, or generally a non-Gimlet, this will return the /// VNICs we use to emulate those Chelsio links. -pub fn find_chelsio_links() -> Result, Error> { +pub fn find_chelsio_links( + executor: &BoxedExecutor, +) -> Result, Error> { if is_gimlet().map_err(Error::SystemDetection)? { - Dladm::list_physical().map_err(Error::FindLinks).map(|links| { + Dladm::list_physical(executor).map_err(Error::FindLinks).map(|links| { links .into_iter() .filter(|link| link.0.starts_with(CHELSIO_LINK_PREFIX)) @@ -98,13 +105,14 @@ pub fn find_chelsio_links() -> Result, Error> { /// Ensure each of the `PhysicalLink`s has a link local IPv6 address in the /// global zone. pub fn ensure_links_have_global_zone_link_local_v6_addresses( + executor: &BoxedExecutor, links: &[PhysicalLink], ) -> Result, Error> { let mut addr_objs = Vec::with_capacity(links.len()); for link in links { let addrobj = AddrObject::link_local(&link.0)?; - Zones::ensure_has_link_local_v6_address(None, &addrobj)?; + Zones::ensure_has_link_local_v6_address(executor, None, &addrobj)?; addr_objs.push(addrobj); } @@ -129,9 +137,10 @@ impl BootstrapInterface { // could be randomly generated when it no longer needs to be durable. pub fn ip( self, + executor: &BoxedExecutor, link: &PhysicalLink, ) -> Result { - let mac = Dladm::get_mac(link)?; + let mac = Dladm::get_mac(executor, link)?; Ok(mac_to_bootstrap_ip(mac, self.interface_id())) } } diff --git a/wicketd/Cargo.toml b/wicketd/Cargo.toml index fbdb3d9944..c987459418 100644 --- a/wicketd/Cargo.toml +++ b/wicketd/Cargo.toml @@ -20,6 +20,7 @@ gateway-messages.workspace = true hex.workspace = true http.workspace = true hyper.workspace = true +illumos-utils.workspace = true reqwest.workspace = true schemars.workspace = true serde.workspace = true diff --git a/wicketd/tests/integration_tests/updates.rs b/wicketd/tests/integration_tests/updates.rs index 3fc1288f74..e640ccadcc 100644 --- a/wicketd/tests/integration_tests/updates.rs +++ b/wicketd/tests/integration_tests/updates.rs @@ -11,6 +11,7 @@ use camino_tempfile::Utf8TempDir; use clap::Parser; use gateway_messages::SpPort; use gateway_test_utils::setup as gateway_setup; +use illumos_utils::process::FakeExecutor; use installinator::HOST_PHASE_2_FILE_NAME; use omicron_common::{ api::internal::nexus::KnownArtifactKind, @@ -239,7 +240,8 @@ async fn test_installinator_fetch() { ]) .expect("installinator args parsed successfully"); - args.exec(&log.new(slog::o!("crate" => "installinator"))) + let executor = FakeExecutor::new(log.clone()).as_executor(); + args.exec(&log.new(slog::o!("crate" => "installinator")), &executor) .await .expect("installinator succeeded"); From 96edc931b166f7e68ffbed82a9b481ba59370932 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 27 Jun 2023 00:49:43 -0700 Subject: [PATCH 23/57] Remove unused mocks --- illumos-utils/src/dladm.rs | 1 - illumos-utils/src/fstyp.rs | 1 - illumos-utils/src/zfs.rs | 1 - illumos-utils/src/zpool.rs | 1 - sled-agent/src/instance_manager.rs | 8 +------- sled-agent/src/services.rs | 14 +------------- sled-agent/src/sled_agent.rs | 5 +++-- sled-agent/src/storage_manager.rs | 9 ++------- 8 files changed, 7 insertions(+), 33 deletions(-) diff --git a/illumos-utils/src/dladm.rs b/illumos-utils/src/dladm.rs index 101e4a3ec1..5b0b076473 100644 --- a/illumos-utils/src/dladm.rs +++ b/illumos-utils/src/dladm.rs @@ -168,7 +168,6 @@ impl VnicSource for PhysicalLink { /// Wraps commands for interacting with data links. pub struct Dladm {} -#[cfg_attr(any(test, feature = "testing"), mockall::automock, allow(dead_code))] impl Dladm { /// Creates an etherstub, or returns one which already exists. pub fn ensure_etherstub( diff --git a/illumos-utils/src/fstyp.rs b/illumos-utils/src/fstyp.rs index 917bdc3a08..51e49b4bd2 100644 --- a/illumos-utils/src/fstyp.rs +++ b/illumos-utils/src/fstyp.rs @@ -29,7 +29,6 @@ pub enum Error { /// Wraps 'fstyp' command. pub struct Fstyp {} -#[cfg_attr(test, mockall::automock)] impl Fstyp { /// Executes the 'fstyp' command and parses the name of a zpool from it, if /// one exists. diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs index 8152ca2cda..6a4bc86cb6 100644 --- a/illumos-utils/src/zfs.rs +++ b/illumos-utils/src/zfs.rs @@ -138,7 +138,6 @@ pub struct EncryptionDetails { pub epoch: u64, } -#[cfg_attr(any(test, feature = "testing"), mockall::automock, allow(dead_code))] impl Zfs { /// Lists all datasets within a pool or existing dataset. pub fn list_datasets( diff --git a/illumos-utils/src/zpool.rs b/illumos-utils/src/zpool.rs index fddc93dfdc..b14211a68c 100644 --- a/illumos-utils/src/zpool.rs +++ b/illumos-utils/src/zpool.rs @@ -165,7 +165,6 @@ impl FromStr for ZpoolInfo { /// Wraps commands for interacting with ZFS pools. pub struct Zpool {} -#[cfg_attr(any(test, feature = "testing"), mockall::automock, allow(dead_code))] impl Zpool { pub fn create( executor: &BoxedExecutor, diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index 10de02cfb3..e35849e8f8 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -375,7 +375,7 @@ mod test { use chrono::Utc; use illumos_utils::dladm::Etherstub; use illumos_utils::process::FakeExecutor; - use illumos_utils::{dladm::MockDladm, zone::MockZones}; + use illumos_utils::zone::MockZones; use omicron_common::api::external::{ ByteCount, Generation, InstanceCpuCount, InstanceState, }; @@ -456,9 +456,6 @@ mod test { let zones_get_ctx = MockZones::get_context(); zones_get_ctx.expect().return_once(|| Ok(vec![])); - let dladm_get_vnics_ctx = MockDladm::get_vnics_context(); - dladm_get_vnics_ctx.expect().return_once(|_| Ok(vec![])); - let port_manager = PortManager::new( log.clone(), std::net::Ipv6Addr::new( @@ -593,9 +590,6 @@ mod test { let zones_get_ctx = MockZones::get_context(); zones_get_ctx.expect().return_once(|| Ok(vec![])); - let dladm_get_vnics_ctx = MockDladm::get_vnics_context(); - dladm_get_vnics_ctx.expect().return_once(|_| Ok(vec![])); - let port_manager = PortManager::new( log.clone(), std::net::Ipv6Addr::new( diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index d8baf4a8a7..02f9186b0d 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -2429,7 +2429,7 @@ mod test { use async_trait::async_trait; use illumos_utils::{ dladm::{ - Etherstub, MockDladm, BOOTSTRAP_ETHERSTUB_NAME, + Etherstub, BOOTSTRAP_ETHERSTUB_NAME, UNDERLAY_ETHERSTUB_NAME, UNDERLAY_ETHERSTUB_VNIC_NAME, }, process::FakeExecutor, @@ -2451,14 +2451,6 @@ mod test { // Returns the expectations for a new service to be created. fn expect_new_service() -> Vec> { - // Create a VNIC - let create_vnic_ctx = MockDladm::create_vnic_context(); - create_vnic_ctx.expect().return_once( - |_, physical_link: &Etherstub, _, _, _, _| { - assert_eq!(&physical_link.0, &UNDERLAY_ETHERSTUB_NAME); - Ok(()) - }, - ); // Install the Omicron Zone let install_ctx = MockZones::install_omicron_zone_context(); install_ctx.expect().return_once(|_, _, name, _, _, _, _, _, _| { @@ -2494,7 +2486,6 @@ mod test { wait_ctx.expect().return_once(|_, _| Ok(())); vec![ - Box::new(create_vnic_ctx), Box::new(install_ctx), Box::new(boot_ctx), Box::new(id_ctx), @@ -2554,9 +2545,6 @@ mod test { assert_eq!(name, EXPECTED_ZONE_NAME); Ok(()) }); - let delete_vnic_ctx = MockDladm::delete_vnic_context(); - delete_vnic_ctx.expect().returning(|_, _| Ok(())); - // Explicitly drop the service manager drop(mgr); } diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 79e364b220..6c88582346 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -19,6 +19,7 @@ use crate::storage_manager::{self, StorageManager}; use crate::updates::{ConfigUpdates, UpdateManager}; use camino::Utf8PathBuf; use dropshot::HttpError; +use illumos_utils::dladm::Dladm; use illumos_utils::opte::params::SetVirtualNetworkInterfaceHost; use illumos_utils::opte::PortManager; use illumos_utils::process::BoxedExecutor; @@ -41,9 +42,9 @@ use std::sync::Arc; use uuid::Uuid; #[cfg(not(test))] -use illumos_utils::{dladm::Dladm, zone::Zones}; +use illumos_utils::zone::Zones; #[cfg(test)] -use illumos_utils::{dladm::MockDladm as Dladm, zone::MockZones as Zones}; +use illumos_utils::zone::MockZones as Zones; #[derive(thiserror::Error, Debug)] pub enum Error { diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index f131dcf772..171168e28d 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -12,8 +12,8 @@ use futures::stream::FuturesOrdered; use futures::FutureExt; use futures::StreamExt; use illumos_utils::process::BoxedExecutor; -use illumos_utils::zpool::{ZpoolKind, ZpoolName}; -use illumos_utils::{zfs::Mountpoint, zpool::ZpoolInfo}; +use illumos_utils::zpool::{Zpool, ZpoolInfo, ZpoolKind, ZpoolName}; +use illumos_utils::zfs::{Mountpoint, Zfs}; use key_manager::StorageKeyRequester; use nexus_client::types::PhysicalDiskDeleteRequest; use nexus_client::types::PhysicalDiskKind; @@ -33,11 +33,6 @@ use tokio::sync::{mpsc, oneshot, Mutex}; use tokio::task::JoinHandle; use uuid::Uuid; -#[cfg(test)] -use illumos_utils::{zfs::MockZfs as Zfs, zpool::MockZpool as Zpool}; -#[cfg(not(test))] -use illumos_utils::{zfs::Zfs, zpool::Zpool}; - #[derive(thiserror::Error, Debug)] pub enum Error { #[error(transparent)] From b9ef2675d6c5ca46c98afe777577ee523238c85c Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 27 Jun 2023 03:11:29 -0700 Subject: [PATCH 24/57] Add dladm tests --- illumos-utils/src/dladm.rs | 126 +++++++++++++++++ illumos-utils/src/process.rs | 217 +++++++++++++++++++++++++++--- sled-agent/src/services.rs | 4 +- sled-agent/src/sled_agent.rs | 4 +- sled-agent/src/storage_manager.rs | 2 +- 5 files changed, 329 insertions(+), 24 deletions(-) diff --git a/illumos-utils/src/dladm.rs b/illumos-utils/src/dladm.rs index 5b0b076473..b77ec3ff17 100644 --- a/illumos-utils/src/dladm.rs +++ b/illumos-utils/src/dladm.rs @@ -526,3 +526,129 @@ impl Dladm { Ok(()) } } + +#[cfg(test)] +mod test { + use super::*; + use crate::process::{FakeExecutor, Input, OutputExt, StaticHandler}; + use omicron_test_utils::dev; + use std::process::Output; + + #[test] + fn ensure_new_etherstub() { + let logctx = dev::test_setup_log("ensure_new_etherstub"); + + let mut handler = StaticHandler::new(); + handler.expect_fail(format!("{PFEXEC} {DLADM} show-etherstub mystub1")); + handler + .expect_ok(format!("{PFEXEC} {DLADM} create-etherstub -t mystub1")); + + let executor = FakeExecutor::new(logctx.log.clone()); + executor.set_static_handler(handler); + + let etherstub = + Dladm::ensure_etherstub(&executor.as_executor(), "mystub1") + .expect("Failed to ensure etherstub"); + assert_eq!(etherstub.0, "mystub1"); + + logctx.cleanup_successful(); + } + + #[test] + fn ensure_existing_etherstub() { + let logctx = dev::test_setup_log("ensure_existing_etherstub"); + + let mut handler = StaticHandler::new(); + handler.expect_ok(format!("{PFEXEC} {DLADM} show-etherstub mystub1")); + let executor = FakeExecutor::new(logctx.log.clone()); + executor.set_static_handler(handler); + + let etherstub = + Dladm::ensure_etherstub(&executor.as_executor(), "mystub1") + .expect("Failed to ensure etherstub"); + assert_eq!(etherstub.0, "mystub1"); + + logctx.cleanup_successful(); + } + + #[test] + fn ensure_existing_etherstub_vnic() { + let logctx = dev::test_setup_log("ensure_existing_etherstub_vnic"); + + let mut handler = StaticHandler::new(); + handler.expect_ok(format!( + "{PFEXEC} {DLADM} show-etherstub {UNDERLAY_ETHERSTUB_NAME}" + )); + handler.expect_ok(format!( + "{PFEXEC} {DLADM} show-vnic {UNDERLAY_ETHERSTUB_VNIC_NAME}" + )); + let executor = FakeExecutor::new(logctx.log.clone()); + executor.set_static_handler(handler); + + let executor = &executor.as_executor(); + let etherstub = + Dladm::ensure_etherstub(executor, UNDERLAY_ETHERSTUB_NAME) + .expect("Failed to ensure etherstub"); + let _vnic = Dladm::ensure_etherstub_vnic(executor, ðerstub) + .expect("Failed to ensure etherstub VNIC"); + + logctx.cleanup_successful(); + } + + #[test] + fn ensure_new_etherstub_vnic() { + let logctx = dev::test_setup_log("ensure_new_etherstub_vnic"); + + let mut handler = StaticHandler::new(); + handler.expect_ok(format!( + "{PFEXEC} {DLADM} show-etherstub {UNDERLAY_ETHERSTUB_NAME}" + )); + handler.expect_fail(format!( + "{PFEXEC} {DLADM} show-vnic {UNDERLAY_ETHERSTUB_VNIC_NAME}" + )); + handler.expect_ok(format!( + "{PFEXEC} {DLADM} create-vnic -t -l {UNDERLAY_ETHERSTUB_NAME} \ + -p mtu=9000 {UNDERLAY_ETHERSTUB_VNIC_NAME}" + )); + handler.expect_ok(format!( + "{PFEXEC} {DLADM} set-linkprop -t -p mtu=9000 \ + {UNDERLAY_ETHERSTUB_VNIC_NAME}" + )); + let executor = FakeExecutor::new(logctx.log.clone()); + executor.set_static_handler(handler); + + let executor = &executor.as_executor(); + let etherstub = + Dladm::ensure_etherstub(executor, UNDERLAY_ETHERSTUB_NAME) + .expect("Failed to ensure etherstub"); + let _vnic = Dladm::ensure_etherstub_vnic(executor, ðerstub) + .expect("Failed to ensure etherstub VNIC"); + + logctx.cleanup_successful(); + } + + #[test] + fn only_parse_oxide_vnics() { + let logctx = dev::test_setup_log("only_parse_oxide_vnics"); + + let mut handler = StaticHandler::new(); + handler.expect( + Input::shell(format!("{PFEXEC} {DLADM} show-vnic -p -o LINK")), + Output::success().set_stdout( + "oxVnic\nvopteVnic\nInvalid\noxBootstrapVnic\nInvalid", + ), + ); + let executor = FakeExecutor::new(logctx.log.clone()); + executor.set_static_handler(handler); + + let executor = &executor.as_executor(); + let vnics = Dladm::get_vnics(executor).expect("Failed to get VNICs"); + + assert_eq!(vnics[0], "oxVnic"); + assert_eq!(vnics[1], "vopteVnic"); + assert_eq!(vnics[2], "oxBootstrapVnic"); + assert_eq!(vnics.len(), 3); + + logctx.cleanup_successful(); + } +} diff --git a/illumos-utils/src/process.rs b/illumos-utils/src/process.rs index f085c3a131..2b05a0a9ed 100644 --- a/illumos-utils/src/process.rs +++ b/illumos-utils/src/process.rs @@ -28,15 +28,10 @@ pub trait Executor: Send + Sync { } fn log_command(log: &Logger, id: u64, command: &Command) { - info!( - log, - "{id} - Running Command: [{:?} {:?}]", - command.get_program(), - to_space_separated_string(command.get_args()), - ); + info!(log, "{id} - Running Command: [{}]", Input::from(command),); debug!( log, - "{id} - Environment: [{:?}]", + "{id} - Environment: [{}]", to_space_separated_string(command.get_envs()), ) } @@ -68,10 +63,168 @@ fn log_output(log: &Logger, id: u64, output: &Output) { } } +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Input { + pub program: String, + pub args: Vec, + pub envs: Vec<(String, String)>, +} + +impl Input { + pub fn new>(program: S, args: Vec) -> Self { + Self { + program: program.as_ref().to_string(), + args: args.into_iter().map(|s| s.as_ref().to_string()).collect(), + envs: vec![], + } + } + + pub fn shell>(input: S) -> Self { + let mut args = input.as_ref().split_whitespace(); + + Self::new( + args.next().expect("Needs at least a program"), + args.collect(), + ) + } +} + +impl std::fmt::Display for Input { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.program)?; + for arg in &self.args { + write!(f, " {}", arg)?; + } + Ok(()) + } +} + +impl From<&Command> for Input { + fn from(command: &Command) -> Self { + Self { + program: os_str_to_string(command.get_program()), + args: command + .get_args() + .into_iter() + .map(os_str_to_string) + .collect(), + envs: command + .get_envs() + .into_iter() + .map(|(k, v)| { + ( + os_str_to_string(k), + os_str_to_string(v.unwrap_or_default()), + ) + }) + .collect(), + } + } +} + +pub trait OutputExt: Sized { + fn success() -> Self; + fn silent_failure() -> Self; + fn set_stdout>(self, stdout: S) -> Self; + fn set_stderr>(self, stderr: S) -> Self; +} + +impl OutputExt for Output { + fn success() -> Self { + Output { + status: ExitStatus::from_raw(0), + stdout: vec![], + stderr: vec![], + } + } + + fn silent_failure() -> Self { + Output { + status: ExitStatus::from_raw(-1), + stdout: vec![], + stderr: vec![], + } + } + + fn set_stdout>(mut self, stdout: S) -> Self { + self.stdout = stdout.as_ref().as_bytes().to_vec(); + self + } + + fn set_stderr>(mut self, stderr: S) -> Self { + self.stderr = stderr.as_ref().as_bytes().to_vec(); + self + } +} + +#[derive(Clone)] +pub struct CompletedCommand { + pub input: Input, + pub output: Output, +} + +fn os_str_to_string(s: &std::ffi::OsStr) -> String { + s.to_string_lossy().to_string() +} + +impl CompletedCommand { + fn new(command: &Command, output: Output) -> Self { + Self { input: Input::from(command), output } + } +} + +pub struct StaticHandler { + expected: Vec<(Input, Output)>, + index: usize, +} + +impl StaticHandler { + pub fn new() -> Self { + Self { expected: Vec::new(), index: 0 } + } + + pub fn expect(&mut self, input: Input, output: Output) { + self.expected.push((input, output)); + } + + pub fn expect_ok>(&mut self, input: S) { + self.expect(Input::shell(input), Output::success()) + } + + pub fn expect_fail>(&mut self, input: S) { + self.expect(Input::shell(input), Output::silent_failure()) + } + + fn execute(&mut self, command: &Command) -> Output { + let input = Input::from(command); + let expected = &self + .expected + .get(self.index) + .expect(&format!("Unexpected command: {input}")); + self.index += 1; + assert_eq!(input, expected.0); + expected.1.clone() + } +} + +impl Drop for StaticHandler { + fn drop(&mut self) { + let expected = self.expected.len(); + let actual = self.index; + if actual < expected { + let next = &self.expected[actual].0; + assert!(false, "Only saw {actual} calls, expected {expected}\nNext would have been: {next}"); + } + } +} + +pub type ExecutorFn = Box Output + Send + Sync>; + pub struct FakeExecutor { log: Logger, counter: AtomicU64, - all_operations: Mutex>, + all_operations: Mutex>, + handler: Mutex, } impl FakeExecutor { @@ -80,12 +233,27 @@ impl FakeExecutor { log, counter: AtomicU64::new(0), all_operations: Mutex::new(vec![]), + handler: Mutex::new(Box::new(|_cmd| Output::success())), }) } + pub fn set_handler(&self, f: ExecutorFn) { + *self.handler.lock().unwrap() = f; + } + + pub fn set_static_handler(&self, mut handler: StaticHandler) { + self.set_handler(Box::new(move |cmd| -> Output { + handler.execute(cmd) + })); + } + pub fn as_executor(self: Arc) -> BoxedExecutor { self } + + pub fn all_operations(&self) -> Vec { + (*self.all_operations.lock().unwrap()).clone() + } } impl Executor for FakeExecutor { @@ -93,18 +261,29 @@ impl Executor for FakeExecutor { let id = self.counter.fetch_add(1, Ordering::SeqCst); log_command(&self.log, id, command); - // TODO: Environment variables? - let mut record = Command::new(command.get_program()); - record.args(command.get_args()); - self.all_operations.lock().unwrap().push(record); + // Call our handler function with the caller-provided function. + let output = self.handler.lock().unwrap()(command); - // TODO: Control failure of the command? - let output = Output { - status: ExitStatus::from_raw(0), - stdout: vec![], - stderr: vec![], - }; + // TODO: De-duplicate this with the RealExecutor + if !output.status.success() { + return Err(ExecutionError::CommandFailure(Box::new( + FailureInfo { + command: command + .get_args() + .map(|s| s.to_string_lossy().into()) + .collect::>() + .join(" "), + status: output.status, + stdout: String::from_utf8_lossy(&output.stdout).to_string(), + stderr: String::from_utf8_lossy(&output.stderr).to_string(), + }, + ))); + } log_output(&self.log, id, &output); + self.all_operations + .lock() + .unwrap() + .push(CompletedCommand::new(command, output.clone())); Ok(output) } } @@ -131,7 +310,7 @@ impl Executor for RealExecutor { let output = command.output().map_err(|err| { error!(self.log, "{id} - Could not start program!"); ExecutionError::ExecutionStart { - command: to_space_separated_string(command.get_args()), + command: Input::from(&*command).to_string(), err, } })?; diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 02f9186b0d..44b66e1f35 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -2429,8 +2429,8 @@ mod test { use async_trait::async_trait; use illumos_utils::{ dladm::{ - Etherstub, BOOTSTRAP_ETHERSTUB_NAME, - UNDERLAY_ETHERSTUB_NAME, UNDERLAY_ETHERSTUB_VNIC_NAME, + Etherstub, BOOTSTRAP_ETHERSTUB_NAME, UNDERLAY_ETHERSTUB_NAME, + UNDERLAY_ETHERSTUB_VNIC_NAME, }, process::FakeExecutor, svc, diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 6c88582346..8c4a5c2e2b 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -41,10 +41,10 @@ use std::net::{Ipv6Addr, SocketAddrV6}; use std::sync::Arc; use uuid::Uuid; -#[cfg(not(test))] -use illumos_utils::zone::Zones; #[cfg(test)] use illumos_utils::zone::MockZones as Zones; +#[cfg(not(test))] +use illumos_utils::zone::Zones; #[derive(thiserror::Error, Debug)] pub enum Error { diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 171168e28d..37d9838968 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -12,8 +12,8 @@ use futures::stream::FuturesOrdered; use futures::FutureExt; use futures::StreamExt; use illumos_utils::process::BoxedExecutor; -use illumos_utils::zpool::{Zpool, ZpoolInfo, ZpoolKind, ZpoolName}; use illumos_utils::zfs::{Mountpoint, Zfs}; +use illumos_utils::zpool::{Zpool, ZpoolInfo, ZpoolKind, ZpoolName}; use key_manager::StorageKeyRequester; use nexus_client::types::PhysicalDiskDeleteRequest; use nexus_client::types::PhysicalDiskKind; From 2a495207756ae8e98eda44826b5670116c249236 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 27 Jun 2023 15:52:20 -0700 Subject: [PATCH 25/57] docs --- illumos-utils/src/process.rs | 51 ++++++++++++++++++------------ installinator/src/main.rs | 4 +-- package/src/bin/omicron-package.rs | 6 ++-- sled-agent/src/bin/sled-agent.rs | 4 +-- 4 files changed, 38 insertions(+), 27 deletions(-) diff --git a/illumos-utils/src/process.rs b/illumos-utils/src/process.rs index 2b05a0a9ed..2a3ecce0f9 100644 --- a/illumos-utils/src/process.rs +++ b/illumos-utils/src/process.rs @@ -12,17 +12,14 @@ use std::str::from_utf8; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::{Arc, Mutex}; -// NOTE: Is the "counter as ID" misleading? -// -// It's not actually possible to make an incrementing counter avoid the race of -// "log, do operation, log again" without making executing processes serialized -// (which seems bad). -// -// We could make this a UUID, but I don't like how hard-to-read those can be -// when trying to quickly parse logs. - +/// Describes the commonly-used "safe-to-reference" type describing the +/// Executor as a trait object. pub type BoxedExecutor = Arc; +/// Describes an "executor", which can run [Command]s and return a response. +/// +/// - In production, this is usually simply a [HostExecutor]. +/// - Under test, this can be customized, and a [FakeExecutor] may be used. pub trait Executor: Send + Sync { fn execute(&self, command: &mut Command) -> Result; } @@ -63,6 +60,7 @@ fn log_output(log: &Logger, id: u64, output: &Output) { } } +/// Wrapper around the input of a [std::process::Command] as strings. #[derive(Clone, Debug, Eq, PartialEq)] pub struct Input { pub program: String, @@ -79,6 +77,8 @@ impl Input { } } + /// Short-hand for a whitespace-separated string, which can be provided + /// "like a shell command". pub fn shell>(input: S) -> Self { let mut args = input.as_ref().split_whitespace(); @@ -99,6 +99,10 @@ impl std::fmt::Display for Input { } } +fn os_str_to_string(s: &std::ffi::OsStr) -> String { + s.to_string_lossy().to_string() +} + impl From<&Command> for Input { fn from(command: &Command) -> Self { Self { @@ -122,9 +126,11 @@ impl From<&Command> for Input { } } +/// Convenience functions for usage in tests, to perform common operations +/// with minimal boilerplate. pub trait OutputExt: Sized { fn success() -> Self; - fn silent_failure() -> Self; + fn failure() -> Self; fn set_stdout>(self, stdout: S) -> Self; fn set_stderr>(self, stderr: S) -> Self; } @@ -138,7 +144,7 @@ impl OutputExt for Output { } } - fn silent_failure() -> Self { + fn failure() -> Self { Output { status: ExitStatus::from_raw(-1), stdout: vec![], @@ -157,22 +163,23 @@ impl OutputExt for Output { } } +/// Describes a fully-completed command. #[derive(Clone)] pub struct CompletedCommand { pub input: Input, pub output: Output, } -fn os_str_to_string(s: &std::ffi::OsStr) -> String { - s.to_string_lossy().to_string() -} - impl CompletedCommand { fn new(command: &Command, output: Output) -> Self { Self { input: Input::from(command), output } } } +/// A handler that may be used for setting inputs/outputs to the executor +/// when these commands are known ahead-of-time. +/// +/// See: [FakeExecutor::set_static_handler] for usage. pub struct StaticHandler { expected: Vec<(Input, Output)>, index: usize, @@ -192,7 +199,7 @@ impl StaticHandler { } pub fn expect_fail>(&mut self, input: S) { - self.expect(Input::shell(input), Output::silent_failure()) + self.expect(Input::shell(input), Output::failure()) } fn execute(&mut self, command: &Command) -> Output { @@ -220,6 +227,7 @@ impl Drop for StaticHandler { pub type ExecutorFn = Box Output + Send + Sync>; +/// An executor which can expect certain inputs, and respond with specific outputs. pub struct FakeExecutor { log: Logger, counter: AtomicU64, @@ -237,16 +245,19 @@ impl FakeExecutor { }) } + /// Set the request handler to an arbitrary function. pub fn set_handler(&self, f: ExecutorFn) { *self.handler.lock().unwrap() = f; } + /// Set the request handler to a static set of inputs and outputs. pub fn set_static_handler(&self, mut handler: StaticHandler) { self.set_handler(Box::new(move |cmd| -> Output { handler.execute(cmd) })); } + /// Perform some type coercion to access a commonly-used trait object. pub fn as_executor(self: Arc) -> BoxedExecutor { self } @@ -264,7 +275,7 @@ impl Executor for FakeExecutor { // Call our handler function with the caller-provided function. let output = self.handler.lock().unwrap()(command); - // TODO: De-duplicate this with the RealExecutor + // TODO: De-duplicate this with the HostExecutor if !output.status.success() { return Err(ExecutionError::CommandFailure(Box::new( FailureInfo { @@ -288,12 +299,12 @@ impl Executor for FakeExecutor { } } -pub struct RealExecutor { +pub struct HostExecutor { log: slog::Logger, counter: std::sync::atomic::AtomicU64, } -impl RealExecutor { +impl HostExecutor { pub fn new(log: Logger) -> Arc { Arc::new(Self { log, counter: AtomicU64::new(0) }) } @@ -303,7 +314,7 @@ impl RealExecutor { } } -impl Executor for RealExecutor { +impl Executor for HostExecutor { fn execute(&self, command: &mut Command) -> Result { let id = self.counter.fetch_add(1, Ordering::SeqCst); log_command(&self.log, id, command); diff --git a/installinator/src/main.rs b/installinator/src/main.rs index ca5ad539e7..601649470f 100644 --- a/installinator/src/main.rs +++ b/installinator/src/main.rs @@ -5,14 +5,14 @@ use std::error::Error; use clap::Parser; -use illumos_utils::process::RealExecutor; +use illumos_utils::process::HostExecutor; use installinator::InstallinatorApp; #[tokio::main] async fn main() -> Result<(), Box> { let app = InstallinatorApp::parse(); let log = InstallinatorApp::setup_log("/tmp/installinator.log")?; - let executor = RealExecutor::new(log.clone()).as_executor(); + let executor = HostExecutor::new(log.clone()).as_executor(); app.exec(&log, &executor).await?; Ok(()) } diff --git a/package/src/bin/omicron-package.rs b/package/src/bin/omicron-package.rs index 6a04fb5f04..d3ed93696b 100644 --- a/package/src/bin/omicron-package.rs +++ b/package/src/bin/omicron-package.rs @@ -7,7 +7,7 @@ use anyhow::{anyhow, bail, Context, Result}; use clap::{Parser, Subcommand}; use futures::stream::{self, StreamExt, TryStreamExt}; -use illumos_utils::process::{BoxedExecutor, RealExecutor}; +use illumos_utils::process::{BoxedExecutor, HostExecutor}; use illumos_utils::{zfs, zone}; use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; use omicron_package::target::KnownTarget; @@ -657,7 +657,7 @@ fn remove_all_except>( } async fn do_deactivate(config: &Config) -> Result<()> { - let executor = RealExecutor::new(config.log.clone()).as_executor(); + let executor = HostExecutor::new(config.log.clone()).as_executor(); info!(&config.log, "Removing all Omicron zones"); uninstall_all_omicron_zones().await?; info!(config.log, "Uninstalling all packages"); @@ -668,7 +668,7 @@ async fn do_deactivate(config: &Config) -> Result<()> { } async fn do_uninstall(config: &Config) -> Result<()> { - let executor = RealExecutor::new(config.log.clone()).as_executor(); + let executor = HostExecutor::new(config.log.clone()).as_executor(); do_deactivate(config).await?; info!(config.log, "Removing datasets"); uninstall_all_omicron_datasets(&executor, config)?; diff --git a/sled-agent/src/bin/sled-agent.rs b/sled-agent/src/bin/sled-agent.rs index bb99fe4732..2bac505cff 100644 --- a/sled-agent/src/bin/sled-agent.rs +++ b/sled-agent/src/bin/sled-agent.rs @@ -6,7 +6,7 @@ use camino::Utf8PathBuf; use clap::{Parser, Subcommand}; -use illumos_utils::process::RealExecutor; +use illumos_utils::process::HostExecutor; use omicron_common::cmd::fatal; use omicron_common::cmd::CmdError; use omicron_sled_agent::bootstrap::{ @@ -96,7 +96,7 @@ async fn do_run() -> Result<(), CmdError> { .log .to_logger("sled-agent") .map_err(|e| CmdError::Failure(e.to_string()))?; - let executor = RealExecutor::new(log).as_executor(); + let executor = HostExecutor::new(log).as_executor(); // Derive the bootstrap addresses from the data link's MAC address. let link = config From 909d9ba71be189dc051711d15263c14c728b63e5 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 4 Jul 2023 12:35:33 -0700 Subject: [PATCH 26/57] Okay now happy on non-illumos too --- illumos-utils/src/opte/port.rs | 1 + sled-hardware/src/non_illumos/mod.rs | 2 ++ 2 files changed, 3 insertions(+) diff --git a/illumos-utils/src/opte/port.rs b/illumos-utils/src/opte/port.rs index 4897f478ea..f5776b6f00 100644 --- a/illumos-utils/src/opte/port.rs +++ b/illumos-utils/src/opte/port.rs @@ -14,6 +14,7 @@ use std::sync::Arc; #[derive(Debug)] struct PortInner { + #[allow(dead_code)] executor: DebugIgnore, // Name of the port as identified by OPTE name: String, diff --git a/sled-hardware/src/non_illumos/mod.rs b/sled-hardware/src/non_illumos/mod.rs index 6e36330df0..d6de97e3f0 100644 --- a/sled-hardware/src/non_illumos/mod.rs +++ b/sled-hardware/src/non_illumos/mod.rs @@ -4,6 +4,7 @@ use crate::disk::{DiskError, DiskPaths, DiskVariant, Partition, UnparsedDisk}; use crate::{Baseboard, SledMode}; +use illumos_utils::process::BoxedExecutor; use slog::Logger; use std::collections::HashSet; use tokio::sync::broadcast; @@ -54,6 +55,7 @@ impl HardwareManager { pub fn ensure_partition_layout( _log: &Logger, + _executor: &BoxedExecutor, _paths: &DiskPaths, _variant: DiskVariant, ) -> Result, DiskError> { From b05c6aa1e8ca90bb79347e7bff5d9fb7a0d652e1 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 4 Jul 2023 12:42:36 -0700 Subject: [PATCH 27/57] clippy --- illumos-utils/src/opte/non_illumos.rs | 2 +- illumos-utils/src/opte/port.rs | 1 + illumos-utils/src/process.rs | 9 ++------- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/illumos-utils/src/opte/non_illumos.rs b/illumos-utils/src/opte/non_illumos.rs index ccd4990d5f..1893c7f1aa 100644 --- a/illumos-utils/src/opte/non_illumos.rs +++ b/illumos-utils/src/opte/non_illumos.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -//! Mock / dummy versions of the OPTE module, for non-illumos platforms +//! Stub versions of the OPTE module, for non-illumos platforms use slog::Logger; diff --git a/illumos-utils/src/opte/port.rs b/illumos-utils/src/opte/port.rs index f5776b6f00..0080c91b3f 100644 --- a/illumos-utils/src/opte/port.rs +++ b/illumos-utils/src/opte/port.rs @@ -89,6 +89,7 @@ pub struct Port { } impl Port { + #[allow(clippy::too_many_arguments)] pub fn new( executor: &BoxedExecutor, name: String, diff --git a/illumos-utils/src/process.rs b/illumos-utils/src/process.rs index cb44807e53..5e90e42e09 100644 --- a/illumos-utils/src/process.rs +++ b/illumos-utils/src/process.rs @@ -107,14 +107,9 @@ impl From<&Command> for Input { fn from(command: &Command) -> Self { Self { program: os_str_to_string(command.get_program()), - args: command - .get_args() - .into_iter() - .map(os_str_to_string) - .collect(), + args: command.get_args().map(os_str_to_string).collect(), envs: command .get_envs() - .into_iter() .map(|(k, v)| { ( os_str_to_string(k), @@ -207,7 +202,7 @@ impl StaticHandler { let expected = &self .expected .get(self.index) - .expect(&format!("Unexpected command: {input}")); + .unwrap_or_else(|| panic!("Unexpected command: {input}")); self.index += 1; assert_eq!(input, expected.0); expected.1.clone() From 9767d6717743ba745eb5e6a1ba9436843298d9b2 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 8 Aug 2023 16:57:14 -0700 Subject: [PATCH 28/57] Add support for spawning fake processes too --- illumos-utils/src/process.rs | 226 +++++++++++++++++++++++++--------- sled-agent/src/swap_device.rs | 12 +- 2 files changed, 178 insertions(+), 60 deletions(-) diff --git a/illumos-utils/src/process.rs b/illumos-utils/src/process.rs index 4959e94474..13e55b1685 100644 --- a/illumos-utils/src/process.rs +++ b/illumos-utils/src/process.rs @@ -6,7 +6,8 @@ use itertools::Itertools; use slog::{debug, error, info, Logger}; -use std::io::Write; +use std::collections::VecDeque; +use std::io::{Read, Write}; use std::os::unix::process::ExitStatusExt; use std::process::{Command, ExitStatus, Output, Stdio}; use std::str::from_utf8; @@ -36,10 +37,16 @@ pub trait Executor: Send + Sync { pub type BoxedChild = Box; /// A child process spawned by the executor. -pub trait Child: Send + Sync { +pub trait Child: Send { /// Accesses the stdin of the spawned child, as a Writer. fn stdin(&mut self) -> Option>; + /// Accesses the stdout of the spawned child, as a Reader. + fn stdout(&mut self) -> Option>; + + /// Accesses the stderr of the spawned child, as a Reader. + fn stderr(&mut self) -> Option>; + /// Waits for the child to complete, and returns the output. fn wait(&mut self) -> Result; } @@ -59,6 +66,22 @@ impl Child for SpawnedChild { .map(|s| Box::new(s) as Box) } + fn stdout(&mut self) -> Option> { + self.child + .as_mut()? + .stdout + .take() + .map(|s| Box::new(s) as Box) + } + + fn stderr(&mut self) -> Option> { + self.child + .as_mut()? + .stderr + .take() + .map(|s| Box::new(s) as Box) + } + fn wait(&mut self) -> Result { let output = self.child.take().unwrap().wait_with_output().map_err(|err| { @@ -79,24 +102,111 @@ impl Child for SpawnedChild { } } +/// A queue of bytes that can selectively act as a reader or writer, +/// which can also be cloned. +/// +/// This is primarily used to emulate stdin / stdout / stderr. +#[derive(Clone)] +struct ByteQueue { + buf: Arc>>, +} + +impl ByteQueue { + fn new() -> Self { + Self { buf: Arc::new(Mutex::new(VecDeque::new())) } + } +} + +impl std::io::Write for ByteQueue { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.buf.lock().unwrap().write(buf) + } + + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } +} + +impl std::io::Read for ByteQueue { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + self.buf.lock().unwrap().read(buf) + } +} + /// A child spawned by a [FakeExecutor]. pub struct FakeChild { - command_str: String, + id: u64, + command: Command, + executor: Arc, + stdin: ByteQueue, + stdout: ByteQueue, + stderr: ByteQueue, +} + +impl FakeChild { + fn new( + id: u64, + command: &Command, + executor: Arc, + ) -> Box { + // std::process::Command -- somewhat reasonably - doesn't implement Copy + // or Clone. However, we'd like to be able to reference it in the + // FakeChild, independently of where it was spawned. + // + // Manually copy the relevant pieces of the incoming command. + let mut copy_command = Command::new(command.get_program()); + copy_command.args(command.get_args()); + copy_command.envs(command.get_envs().filter_map(|(k, v)| { + if let Some(v) = v { + Some((k, v)) + } else { + None + } + })); + + Box::new(FakeChild { + id, + command: copy_command, + executor, + stdin: ByteQueue::new(), + stdout: ByteQueue::new(), + stderr: ByteQueue::new(), + }) + } + + fn command(&self) -> &Command { + &self.command + } } impl Child for FakeChild { fn stdin(&mut self) -> Option> { - // TODO: maybe a vecdeque? need to hook into the fakexecutor from which - // we spawned. - todo!(); + Some(Box::new(self.stdin.clone())) + } + + fn stdout(&mut self) -> Option> { + Some(Box::new(self.stdout.clone())) + } + + fn stderr(&mut self) -> Option> { + Some(Box::new(self.stderr.clone())) } fn wait(&mut self) -> Result { - todo!() + let executor = self.executor.clone(); + let output = executor.wait_handler.lock().unwrap()(self); + log_output(&self.executor.log, self.id, &output); + if !output.status.success() { + return Err(output_to_exec_error( + command_to_string(&self.command), + &output, + )); + } + Ok(output) } } -fn to_string(command: &std::process::Command) -> String { +pub fn command_to_string(command: &std::process::Command) -> String { command .get_args() .map(|s| s.to_string_lossy().into()) @@ -238,19 +348,6 @@ impl OutputExt for Output { } } -/// Describes a fully-completed command. -#[derive(Clone)] -pub struct CompletedCommand { - pub input: Input, - pub output: Output, -} - -impl CompletedCommand { - fn new(command: &Command, output: Output) -> Self { - Self { input: Input::from(command), output } - } -} - /// A handler that may be used for setting inputs/outputs to the executor /// when these commands are known ahead-of-time. /// @@ -284,7 +381,7 @@ impl StaticHandler { .get(self.index) .unwrap_or_else(|| panic!("Unexpected command: {input}")); self.index += 1; - assert_eq!(input, expected.0); + assert_eq!(input, expected.0, "Unexpected input command"); expected.1.clone() } } @@ -300,36 +397,52 @@ impl Drop for StaticHandler { } } -pub type ExecutorFn = dyn FnMut(&Command) -> Output + Send + Sync; -pub type BoxedExecutorFn = Box; +/// Handler called when spawning a fake child process +pub type SpawnFn = dyn FnMut(&mut FakeChild) + Send + Sync; +pub type BoxedSpawnFn = Box; -/// An executor which can expect certain inputs, and respond with specific outputs. -pub struct FakeExecutor { +/// Handler called when awaiting a fake child process +pub type WaitFn = dyn FnMut(&mut FakeChild) -> Output + Send + Sync; +pub type BoxedWaitFn = Box; + +struct FakeExecutorInner { log: Logger, counter: AtomicU64, - all_operations: Mutex>, - handler: Mutex, + spawn_handler: Mutex, + wait_handler: Mutex, +} + +/// An executor which can expect certain inputs, and respond with specific outputs. +pub struct FakeExecutor { + inner: Arc, } impl FakeExecutor { pub fn new(log: Logger) -> Arc { Arc::new(Self { - log, - counter: AtomicU64::new(0), - all_operations: Mutex::new(vec![]), - handler: Mutex::new(Box::new(|_cmd| Output::success())), + inner: Arc::new(FakeExecutorInner { + log, + counter: AtomicU64::new(0), + spawn_handler: Mutex::new(Box::new(|_cmd| ())), + wait_handler: Mutex::new(Box::new(|_cmd| Output::success())), + }), }) } + /// Set the spawn handler to an arbitrary function. + pub fn set_spawn_handler(&self, f: BoxedSpawnFn) { + *self.inner.spawn_handler.lock().unwrap() = f; + } + /// Set the request handler to an arbitrary function. - pub fn set_handler(&self, f: BoxedExecutorFn) { - *self.handler.lock().unwrap() = f; + pub fn set_wait_handler(&self, f: BoxedWaitFn) { + *self.inner.wait_handler.lock().unwrap() = f; } /// Set the request handler to a static set of inputs and outputs. pub fn set_static_handler(&self, mut handler: StaticHandler) { - self.set_handler(Box::new(move |cmd| -> Output { - handler.execute(cmd) + self.set_wait_handler(Box::new(move |child| -> Output { + handler.execute(child.command()) })); } @@ -337,29 +450,27 @@ impl FakeExecutor { pub fn as_executor(self: Arc) -> BoxedExecutor { self } - - /// Returns the list of all commands that have executed on the executor. - pub fn all_operations(&self) -> Vec { - (*self.all_operations.lock().unwrap()).clone() - } } impl Executor for FakeExecutor { fn execute(&self, command: &mut Command) -> Result { - let id = self.counter.fetch_add(1, Ordering::SeqCst); - log_command(&self.log, id, command); + let id = self.inner.counter.fetch_add(1, Ordering::SeqCst); + log_command(&self.inner.log, id, command); - // Call our handler function with the caller-provided function. - let output = self.handler.lock().unwrap()(command); + let mut child = FakeChild::new(id, command, self.inner.clone()); - log_output(&self.log, id, &output); - self.all_operations - .lock() - .unwrap() - .push(CompletedCommand::new(command, output.clone())); + // Call our handler function with the caller-provided functions. + // + // This performs both the "spawn" and "wait" actions back-to-back. + self.inner.spawn_handler.lock().unwrap()(&mut child); + let output = self.inner.wait_handler.lock().unwrap()(&mut child); + log_output(&self.inner.log, id, &output); if !output.status.success() { - return Err(output_to_exec_error(to_string(command), &output)); + return Err(output_to_exec_error( + command_to_string(command), + &output, + )); } Ok(output) } @@ -368,8 +479,10 @@ impl Executor for FakeExecutor { &self, command: &mut Command, ) -> Result { - let command_str = to_string(&command); - Ok(Box::new(FakeChild { command_str })) + let id = self.inner.counter.fetch_add(1, Ordering::SeqCst); + log_command(&self.inner.log, id, command); + + Ok(FakeChild::new(id, command, self.inner.clone())) } } @@ -402,7 +515,10 @@ impl Executor for HostExecutor { log_output(&self.log, id, &output); if !output.status.success() { - return Err(output_to_exec_error(to_string(&command), &output)); + return Err(output_to_exec_error( + command_to_string(&command), + &output, + )); } Ok(output) } @@ -411,7 +527,7 @@ impl Executor for HostExecutor { &self, command: &mut Command, ) -> Result { - let command_str = to_string(&command); + let command_str = command_to_string(&command); Ok(Box::new(SpawnedChild { child: Some( command diff --git a/sled-agent/src/swap_device.rs b/sled-agent/src/swap_device.rs index 73262bc000..2d7958003f 100644 --- a/sled-agent/src/swap_device.rs +++ b/sled-agent/src/swap_device.rs @@ -4,7 +4,9 @@ //! Operations for creating a system swap device. -use illumos_utils::process::{BoxedExecutor, ExecutionError}; +use illumos_utils::process::{ + command_to_string, output_to_exec_error, BoxedExecutor, ExecutionError, +}; use std::io::Read; use zeroize::Zeroize; @@ -253,10 +255,10 @@ fn create_encrypted_swap_zvol( hdl.join().unwrap(); if !output.status.success() { - todo!(); - // return Err(SwapDeviceError::Zfs(output_to_exec_error( - // &command, &output, - // ))); + return Err(SwapDeviceError::Zfs(output_to_exec_error( + command_to_string(&command), + &output, + ))); } info!( From 091ac111e4892dbf37a82fd28ea7db72f6407000 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 9 Aug 2023 08:53:30 -0700 Subject: [PATCH 29/57] Update cargo.lock, appease clippy --- Cargo.lock | 2 +- sled-agent/src/sled_agent.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 35cbaa3b5e..4ee414929d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3392,7 +3392,7 @@ dependencies = [ "debug-ignore", "futures", "ipnetwork", - "itertools", + "itertools 0.10.5", "libc", "macaddr", "mockall", diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 032c9a828c..0a09d811c8 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -235,6 +235,7 @@ pub struct SledAgent { impl SledAgent { /// Initializes a new [`SledAgent`] object. + #[allow(clippy::too_many_arguments)] pub async fn new( config: &Config, log: Logger, From 49a9b35529e0e7489d5bc1b11afd96900835d369 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 9 Aug 2023 14:14:46 -0700 Subject: [PATCH 30/57] More attempts to make compilation on illumos happy --- illumos-utils/src/process.rs | 13 ++++++++++++- illumos-utils/src/running_zone.rs | 2 +- sled-hardware/src/illumos/partitions.rs | 4 ++-- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/illumos-utils/src/process.rs b/illumos-utils/src/process.rs index 13e55b1685..70b7b5f330 100644 --- a/illumos-utils/src/process.rs +++ b/illumos-utils/src/process.rs @@ -47,6 +47,9 @@ pub trait Child: Send { /// Accesses the stderr of the spawned child, as a Reader. fn stderr(&mut self) -> Option>; + /// OS-assigned PID identifier for the child + fn id(&self) -> u32; + /// Waits for the child to complete, and returns the output. fn wait(&mut self) -> Result; } @@ -82,6 +85,10 @@ impl Child for SpawnedChild { .map(|s| Box::new(s) as Box) } + fn id(&self) -> u32 { + self.child.as_ref().expect("No child").id() + } + fn wait(&mut self) -> Result { let output = self.child.take().unwrap().wait_with_output().map_err(|err| { @@ -174,7 +181,7 @@ impl FakeChild { }) } - fn command(&self) -> &Command { + pub fn command(&self) -> &Command { &self.command } } @@ -192,6 +199,10 @@ impl Child for FakeChild { Some(Box::new(self.stderr.clone())) } + fn id(&self) -> u32 { + self.id.try_into().expect("u32 overflow") + } + fn wait(&mut self) -> Result { let executor = self.executor.clone(); let output = executor.wait_handler.lock().unwrap()(self); diff --git a/illumos-utils/src/running_zone.rs b/illumos-utils/src/running_zone.rs index ed47573c67..8b48b37223 100644 --- a/illumos-utils/src/running_zone.rs +++ b/illumos-utils/src/running_zone.rs @@ -393,7 +393,7 @@ impl RunningZone { // executing. // unwrap() safety - child.id() returns u32 but pid_t is i32. // PID_MAX is 999999 so this will not overflow. - let child_pid: pid_t = spawn.child.id().try_into().unwrap(); + let child_pid: pid_t = spawn.id().try_into().unwrap(); let contract = zenter::get_contract(child_pid); // Capture the result, and be sure to clear the template for this diff --git a/sled-hardware/src/illumos/partitions.rs b/sled-hardware/src/illumos/partitions.rs index 4801f23ca1..023cc8e243 100644 --- a/sled-hardware/src/illumos/partitions.rs +++ b/sled-hardware/src/illumos/partitions.rs @@ -220,8 +220,8 @@ mod test { let executor = FakeExecutor::new(log.clone()); let mut calls = 0; let mut zpool_name = None; - executor.set_handler(Box::new(move |cmd| -> Output { - let input = Input::from(cmd); + executor.set_wait_handler(Box::new(move |child| -> Output { + let input = Input::from(child.command()); assert_eq!(input.program, PFEXEC); match calls { From 837ec375f22e22d7f69393fc718a1a761b52a129 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 11 Aug 2023 12:33:52 -0700 Subject: [PATCH 31/57] Remove all usage of mocks --- Cargo.lock | 110 +---------- Cargo.toml | 6 +- illumos-utils/Cargo.toml | 8 - illumos-utils/src/process.rs | 61 ++++-- illumos-utils/src/running_zone.rs | 29 ++- illumos-utils/src/svc.rs | 105 +++++----- illumos-utils/src/zone.rs | 203 ++++++++++++++++--- package/src/bin/omicron-package.rs | 8 +- sled-agent/Cargo.toml | 3 - sled-agent/src/bootstrap/agent.rs | 8 +- sled-agent/src/instance.rs | 6 +- sled-agent/src/services.rs | 279 ++++++++++++++++++--------- sled-agent/src/sled_agent.rs | 12 +- sled-agent/src/storage/dump_setup.rs | 2 +- sled-hardware/Cargo.toml | 2 - 15 files changed, 506 insertions(+), 336 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4ee414929d..f899539963 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -279,7 +279,7 @@ dependencies = [ "anstyle", "bstr 1.3.0", "doc-comment", - "predicates 3.0.3", + "predicates", "predicates-core", "predicates-tree", "wait-timeout", @@ -1707,19 +1707,6 @@ dependencies = [ "syn 2.0.28", ] -[[package]] -name = "dashmap" -version = "5.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "907076dfda823b0b36d2a1bb5f90c96660a5bbcd7729e10727f07858f22c4edc" -dependencies = [ - "cfg-if 1.0.0", - "hashbrown 0.12.3", - "lock_api", - "once_cell", - "parking_lot_core 0.9.7", -] - [[package]] name = "data-encoding" version = "2.3.3" @@ -2098,12 +2085,6 @@ dependencies = [ "zerocopy 0.3.0", ] -[[package]] -name = "downcast" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1435fa1053d8b2fbbe9be7e97eca7f33d37b28409959813daefc1446a14247f1" - [[package]] name = "dpd-client" version = "0.1.0" @@ -2579,12 +2560,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "fragile" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa" - [[package]] name = "fs-err" version = "2.9.0" @@ -3395,7 +3370,6 @@ dependencies = [ "itertools 0.10.5", "libc", "macaddr", - "mockall", "omicron-common 0.1.0", "omicron-test-utils", "opte-ioctl", @@ -4187,33 +4161,6 @@ dependencies = [ "windows-sys 0.45.0", ] -[[package]] -name = "mockall" -version = "0.11.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c84490118f2ee2d74570d114f3d0493cbf02790df303d2707606c3e14e07c96" -dependencies = [ - "cfg-if 1.0.0", - "downcast", - "fragile", - "lazy_static", - "mockall_derive", - "predicates 2.1.5", - "predicates-tree", -] - -[[package]] -name = "mockall_derive" -version = "0.11.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22ce75669015c4f47b289fd4d4f56e894e4c96003ffdf3ac51313126f94c6cbb" -dependencies = [ - "cfg-if 1.0.0", - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "native-tls" version = "0.2.11" @@ -5172,7 +5119,6 @@ dependencies = [ "semver 1.0.18", "serde", "serde_json", - "serial_test", "sha3", "sled-agent-client", "sled-hardware", @@ -6115,20 +6061,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" -[[package]] -name = "predicates" -version = "2.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59230a63c37f3e18569bdb90e4a89cbf5bf8b06fea0b84e65ea10cc4df47addd" -dependencies = [ - "difflib", - "float-cmp", - "itertools 0.10.5", - "normalize-line-endings", - "predicates-core", - "regex", -] - [[package]] name = "predicates" version = "3.0.3" @@ -7581,31 +7513,6 @@ dependencies = [ "unsafe-libyaml", ] -[[package]] -name = "serial_test" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c789ec87f4687d022a2405cf46e0cd6284889f1839de292cadeb6c6019506f2" -dependencies = [ - "dashmap", - "futures", - "lazy_static", - "log", - "parking_lot 0.12.1", - "serial_test_derive", -] - -[[package]] -name = "serial_test_derive" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b64f9e531ce97c88b4778aad0ceee079216071cffec6ac9b904277f8f92e7fe3" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "sha-1" version = "0.10.1" @@ -7805,7 +7712,6 @@ dependencies = [ "rand 0.8.5", "schemars", "serde", - "serial_test", "slog", "thiserror", "tofino", @@ -7933,9 +7839,9 @@ checksum = "f67ad224767faa3c7d8b6d91985b78e70a1324408abcb1cfcc2be4c06bc06043" [[package]] name = "smf" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f19d427ae89311c2770c49fdcfa14627577c499311fe8f4cc8fcfde2dd3c4e2e" +checksum = "e6015a9bbf269b84c928dc68e11680bbdfa6f065f1c6d5383ec134f55bab188b" dependencies = [ "thiserror", ] @@ -8955,7 +8861,7 @@ dependencies = [ "humantime", "omicron-common 0.1.0", "omicron-test-utils", - "predicates 3.0.3", + "predicates", "slog", "slog-async", "slog-envlogger", @@ -10092,9 +9998,9 @@ dependencies = [ [[package]] name = "zone" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0545a42fbd7a81245726d54a0146cb4fd93882ebb6da50d60acf2e37394f198" +checksum = "a62a428a79ea2224ce8ab05d6d8a21bdd7b4b68a8dbc1230511677a56e72ef22" dependencies = [ "itertools 0.10.5", "thiserror", @@ -10104,9 +10010,9 @@ dependencies = [ [[package]] name = "zone_cfg_derive" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef224b009d070d3b1adb9e375fcf8ec2f1948a412c3bbf8755c0ef4e3f91ef94" +checksum = "d5c4f01d3785e222d5aca11c9813e9c46b69abfe258756c99c9b628683626cc8" dependencies = [ "heck 0.4.1", "proc-macro-error", diff --git a/Cargo.toml b/Cargo.toml index 2a61fb48cc..9998e6dbb9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -214,7 +214,6 @@ libc = "0.2.147" linear-map = "1.2.0" macaddr = { version = "1.0.1", features = ["serde_std"] } mime_guess = "2.0.4" -mockall = "0.11" newtype_derive = "0.1.6" nexus-client = { path = "nexus-client" } nexus-db-model = { path = "nexus/db-model" } @@ -299,7 +298,6 @@ serde_path_to_error = "0.1.14" serde_tokenstream = "0.2" serde_urlencoded = "0.7.1" serde_with = "2.3.3" -serial_test = "0.10" sha2 = "0.10.7" sha3 = "0.10.8" shell-words = "1.1.0" @@ -313,7 +311,7 @@ slog-async = "2.7" slog-dtrace = "0.2" slog-envlogger = "2.2" slog-term = "2.9" -smf = "0.2" +smf = "0.2.2" snafu = "0.7" sp-sim = { path = "sp-sim" } sprockets-common = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } @@ -364,7 +362,7 @@ wicket-common = { path = "wicket-common" } wicketd-client = { path = "wicketd-client" } zeroize = { version = "1.6.0", features = ["zeroize_derive", "std"] } zip = { version = "0.6.6", default-features = false, features = ["deflate","bzip2"] } -zone = { version = "0.2", default-features = false, features = ["async"] } +zone = { version = "0.3", default-features = false, features = ["async"] } [profile.dev] panic = "abort" diff --git a/illumos-utils/Cargo.toml b/illumos-utils/Cargo.toml index a69bbd731a..7999a0aa6c 100644 --- a/illumos-utils/Cargo.toml +++ b/illumos-utils/Cargo.toml @@ -29,19 +29,11 @@ tokio.workspace = true uuid.workspace = true zone.workspace = true -# only enabled via the `testing` feature -mockall = { workspace = true, optional = true } - [target.'cfg(target_os = "illumos")'.dependencies] opte-ioctl.workspace = true [dev-dependencies] -mockall.workspace = true omicron-test-utils.workspace = true regress.workspace = true serde_json.workspace = true toml.workspace = true - -[features] -# Enable to generate MockZones -testing = ["mockall"] diff --git a/illumos-utils/src/process.rs b/illumos-utils/src/process.rs index 70b7b5f330..f3838d2f88 100644 --- a/illumos-utils/src/process.rs +++ b/illumos-utils/src/process.rs @@ -9,7 +9,7 @@ use slog::{debug, error, info, Logger}; use std::collections::VecDeque; use std::io::{Read, Write}; use std::os::unix::process::ExitStatusExt; -use std::process::{Command, ExitStatus, Output, Stdio}; +use std::process::{Command, ExitStatus, Stdio}; use std::str::from_utf8; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::{Arc, Mutex}; @@ -23,6 +23,13 @@ pub type BoxedExecutor = Arc; /// - In production, this is usually simply a [HostExecutor]. /// - Under test, this can be customized, and a [FakeExecutor] may be used. pub trait Executor: Send + Sync { + // TODO: Would be nice to have an async variant of this? + // - Is that possible? + // - Could it be additive? + // + // XXX: If we don't have that, I think we're regressing for the + // zone commands, which were previously async. + /// Executes a task, waiting for it to complete, and returning output. fn execute(&self, command: &mut Command) -> Result; @@ -322,6 +329,8 @@ impl From<&Command> for Input { } } +pub type Output = std::process::Output; + /// Convenience functions for usage in tests, to perform common operations /// with minimal boilerplate. pub trait OutputExt: Sized { @@ -359,12 +368,19 @@ impl OutputExt for Output { } } +type DynamicHandler = Box Output + Send + Sync>; + +enum HandledCommand { + Static { input: Input, output: Output }, + Dynamic { handler: DynamicHandler }, +} + /// A handler that may be used for setting inputs/outputs to the executor /// when these commands are known ahead-of-time. /// /// See: [FakeExecutor::set_static_handler] for usage. pub struct StaticHandler { - expected: Vec<(Input, Output)>, + expected: Vec, index: usize, } @@ -373,27 +389,44 @@ impl StaticHandler { Self { expected: Vec::new(), index: 0 } } + /// Expects a static "input" to exactly produce some "output". pub fn expect(&mut self, input: Input, output: Output) { - self.expected.push((input, output)); + self.expected.push(HandledCommand::Static { input, output }); } + /// A helper for [Self::expect] which quietly succeeds. pub fn expect_ok>(&mut self, input: S) { self.expect(Input::shell(input), Output::success()) } + /// A helper for [Self::expect] which quietly fails. pub fn expect_fail>(&mut self, input: S) { self.expect(Input::shell(input), Output::failure()) } + /// Expects a dynamic handler to be invoked to dynamically + /// determine the output of this call. + pub fn expect_dynamic(&mut self, handler: DynamicHandler) { + self.expected.push(HandledCommand::Dynamic { handler }); + } + fn execute(&mut self, command: &Command) -> Output { - let input = Input::from(command); - let expected = &self + let observed_input = Input::from(command); + let expected = &mut self .expected - .get(self.index) - .unwrap_or_else(|| panic!("Unexpected command: {input}")); + .get_mut(self.index) + .unwrap_or_else(|| panic!("Unexpected command: {observed_input}")); self.index += 1; - assert_eq!(input, expected.0, "Unexpected input command"); - expected.1.clone() + + match expected { + HandledCommand::Static { input, output } => { + assert_eq!(&observed_input, input, "Unexpected input command"); + output.clone() + } + HandledCommand::Dynamic { ref mut handler } => { + handler(observed_input) + } + } } } @@ -402,8 +435,14 @@ impl Drop for StaticHandler { let expected = self.expected.len(); let actual = self.index; if actual < expected { - let next = &self.expected[actual].0; - assert!(false, "Only saw {actual} calls, expected {expected}\nNext would have been: {next}"); + let next = &self.expected[actual]; + let tip = match next { + HandledCommand::Static { input, .. } => input.to_string(), + HandledCommand::Dynamic { .. } => { + "".to_string() + } + }; + assert!(false, "Only saw {actual} calls, expected {expected}\nNext would have been: {tip}"); } } } diff --git a/illumos-utils/src/running_zone.rs b/illumos-utils/src/running_zone.rs index 8b48b37223..9450076cf4 100644 --- a/illumos-utils/src/running_zone.rs +++ b/illumos-utils/src/running_zone.rs @@ -10,7 +10,7 @@ use crate::link::{Link, VnicAllocator}; use crate::opte::{Port, PortTicket}; use crate::process::{BoxedExecutor, ExecutionError}; use crate::svc::wait_for_service; -use crate::zone::{AddressRequest, IPADM, ZONE_PREFIX}; +use crate::zone::{AddressRequest, Zones, IPADM, ZONE_PREFIX}; use camino::{Utf8Path, Utf8PathBuf}; use ipnetwork::IpNetwork; #[cfg(target_os = "illumos")] @@ -24,11 +24,6 @@ use slog::Logger; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; use uuid::Uuid; -#[cfg(any(test, feature = "testing"))] -use crate::zone::MockZones as Zones; -#[cfg(not(any(test, feature = "testing")))] -use crate::zone::Zones; - /// Errors returned from methods for fetching SMF services and log files #[derive(thiserror::Error, Debug)] pub enum ServiceError { @@ -458,7 +453,7 @@ impl RunningZone { // Boot the zone. info!(zone.log, "Zone booting"); - Zones::boot(&zone.name).await?; + Zones::boot(&zone.executor, &zone.name).await?; // Wait until the zone reaches the 'single-user' SMF milestone. // At this point, we know that the dependent @@ -467,12 +462,12 @@ impl RunningZone { // services are up, so future requests to create network addresses // or manipulate services will work. let fmri = "svc:/milestone/single-user:default"; - wait_for_service(Some(&zone.name), fmri).await.map_err(|_| { - BootError::Timeout { + wait_for_service(&zone.executor, Some(&zone.name), fmri) + .await + .map_err(|_| BootError::Timeout { service: fmri.to_string(), zone: zone.name.to_string(), - } - })?; + })?; // If the zone is self-assembling, then SMF service(s) inside the zone // will be creating the listen address for the zone's service(s), @@ -482,7 +477,7 @@ impl RunningZone { // Use the zone ID in order to check if /var/svc/profile/site.xml // exists. - let id = Zones::id(&zone.name) + let id = Zones::id(&zone.executor, &zone.name) .await? .ok_or_else(|| BootError::NoZoneId { zone: zone.name.clone() })?; let site_profile_xml_exists = @@ -778,7 +773,7 @@ impl RunningZone { zone_prefix: &str, addrtype: AddressRequest, ) -> Result { - let zone_info = Zones::get() + let zone_info = Zones::get(executor) .await .map_err(|err| GetZoneError::GetZones { prefix: zone_prefix.to_string(), @@ -870,7 +865,7 @@ impl RunningZone { if let Some(_) = self.id.take() { let log = self.inner.log.clone(); let name = self.name().to_string(); - Zones::halt_and_remove_logged(&log, &name) + Zones::halt_and_remove_logged(&self.inner.executor, &log, &name) .await .map_err(|err| err.to_string())?; } @@ -1008,8 +1003,11 @@ impl Drop for RunningZone { if let Some(_) = self.id.take() { let log = self.inner.log.clone(); let name = self.name().to_string(); + let executor = self.inner.executor.clone(); tokio::task::spawn(async move { - match Zones::halt_and_remove_logged(&log, &name).await { + match Zones::halt_and_remove_logged(&executor, &log, &name) + .await + { Ok(()) => { info!(log, "Stopped and uninstalled zone") } @@ -1180,6 +1178,7 @@ impl InstalledZone { net_device_names.dedup(); Zones::install_omicron_zone( + executor, log, &zone_root_path, &full_zone_name, diff --git a/illumos-utils/src/svc.rs b/illumos-utils/src/svc.rs index b08679b720..2678befa5b 100644 --- a/illumos-utils/src/svc.rs +++ b/illumos-utils/src/svc.rs @@ -4,68 +4,61 @@ //! Utilities for accessing services. -use cfg_if::cfg_if; - +use crate::process::BoxedExecutor; use omicron_common::api::external::Error; use omicron_common::backoff; -#[cfg_attr(any(test, feature = "testing"), mockall::automock, allow(dead_code))] -mod inner { - use super::*; - - // TODO(https://www.illumos.org/issues/13837): This is a hack; - // remove me when when fixed. Ideally, the ".synchronous()" argument - // to "svcadm enable" would wait for the service to be online, which - // would simplify all this stuff. - // - // Ideally, when "svccfg add" returns, these properties would be set, - // but unfortunately, they are not. This means that when we invoke - // "svcadm enable -s", it's possible for critical restarter - // properties to not exist when the command returns. - // - // We workaround this by querying for these properties in a loop. - pub async fn wait_for_service<'a, 'b>( - zone: Option<&'a str>, - fmri: &'b str, - ) -> Result<(), Error> { - let name = smf::PropertyName::new("restarter", "state").unwrap(); +// TODO(https://www.illumos.org/issues/13837): This is a hack; +// remove me when when fixed. Ideally, the ".synchronous()" argument +// to "svcadm enable" would wait for the service to be online, which +// would simplify all this stuff. +// +// Ideally, when "svccfg add" returns, these properties would be set, +// but unfortunately, they are not. This means that when we invoke +// "svcadm enable -s", it's possible for critical restarter +// properties to not exist when the command returns. +// +// We workaround this by querying for these properties in a loop. +pub async fn wait_for_service<'a, 'b>( + executor: &BoxedExecutor, + zone: Option<&'a str>, + fmri: &'b str, +) -> Result<(), Error> { + let name = smf::PropertyName::new("restarter", "state").unwrap(); - let log_notification_failure = |_error, _delay| {}; - backoff::retry_notify( - backoff::retry_policy_local(), - || async { - let mut p = smf::Properties::new(); - let properties = { - if let Some(zone) = zone { - p.zone(zone) - } else { - &mut p - } - }; - if let Ok(value) = properties.lookup().run(&name, &fmri) { - if value.value() - == &smf::PropertyValue::Astring("online".to_string()) - { - return Ok(()); - } + let log_notification_failure = |_error, _delay| {}; + backoff::retry_notify( + backoff::retry_policy_local(), + || async { + let mut p = smf::Properties::new(); + let properties = { + if let Some(zone) = zone { + p.zone(zone) + } else { + &mut p } + }; + let mut cmd = properties.lookup().as_command(&name, &fmri); + + let Ok(output) = executor.execute(&mut cmd) else { return Err(backoff::BackoffError::transient( - "Property not found", + "Failed to execute command", )); - }, - log_notification_failure, - ) - .await - .map_err(|e| Error::InternalError { - internal_message: format!("Failed to wait for service: {}", e), - }) - } -} + }; -cfg_if! { - if #[cfg(any(test, feature = "testing"))] { - pub use mock_inner::*; - } else { - pub use inner::*; - } + if let Ok(value) = smf::PropertyLookup::parse_output(&output) { + if value.value() + == &smf::PropertyValue::Astring("online".to_string()) + { + return Ok(()); + } + } + return Err(backoff::BackoffError::transient("Property not found")); + }, + log_notification_failure, + ) + .await + .map_err(|e| Error::InternalError { + internal_message: format!("Failed to wait for service: {}", e), + }) } diff --git a/illumos-utils/src/zone.rs b/illumos-utils/src/zone.rs index f975dd1fe7..ec3c2b07a4 100644 --- a/illumos-utils/src/zone.rs +++ b/illumos-utils/src/zone.rs @@ -22,6 +22,8 @@ pub const IPADM: &str = "/usr/sbin/ipadm"; pub const SVCADM: &str = "/usr/sbin/svcadm"; pub const SVCCFG: &str = "/usr/sbin/svccfg"; pub const ZLOGIN: &str = "/usr/sbin/zlogin"; +pub const ZONEADM: &str = "/usr/sbin/zoneadm"; +pub const ZONECFG: &str = "/usr/sbin/zonecfg"; // TODO: These could become enums pub const ZONE_PREFIX: &str = "oxz_"; @@ -58,7 +60,7 @@ pub struct AdmError { op: Operation, zone: String, #[source] - err: zone::ZoneError, + err: Box, } /// Errors which may be encountered when deleting addresses. @@ -203,16 +205,16 @@ fn parse_ip_network(s: &str) -> Result { } } -#[cfg_attr(any(test, feature = "testing"), mockall::automock, allow(dead_code))] impl Zones { /// Ensures a zone is halted before both uninstalling and deleting it. /// /// Returns the state the zone was in before it was removed, or None if the /// zone did not exist. pub async fn halt_and_remove( + executor: &BoxedExecutor, name: &str, ) -> Result, AdmError> { - match Self::find(name).await? { + match Self::find(executor, name).await? { None => Ok(None), Some(zone) => { let state = zone.state(); @@ -231,7 +233,7 @@ impl Zones { AdmError { op: Operation::Halt, zone: name.to_string(), - err, + err: Box::new(err), } })?; } @@ -242,7 +244,7 @@ impl Zones { .map_err(|err| AdmError { op: Operation::Uninstall, zone: name.to_string(), - err, + err: Box::new(err), })?; } zone::Config::new(name) @@ -252,7 +254,7 @@ impl Zones { .map_err(|err| AdmError { op: Operation::Delete, zone: name.to_string(), - err, + err: Box::new(err), })?; Ok(Some(state)) } @@ -261,10 +263,11 @@ impl Zones { /// Halt and remove the zone, logging the state in which the zone was found. pub async fn halt_and_remove_logged( + executor: &BoxedExecutor, log: &Logger, name: &str, ) -> Result<(), AdmError> { - if let Some(state) = Self::halt_and_remove(name).await? { + if let Some(state) = Self::halt_and_remove(executor, name).await? { info!( log, "halt_and_remove_logged: Previous zone state: {:?}", state @@ -280,6 +283,7 @@ impl Zones { /// - Otherwise, the zone is deleted. #[allow(clippy::too_many_arguments)] pub async fn install_omicron_zone( + executor: &BoxedExecutor, log: &Logger, zone_root_path: &Utf8Path, zone_name: &str, @@ -290,7 +294,7 @@ impl Zones { links: Vec, limit_priv: Vec, ) -> Result<(), AdmError> { - if let Some(zone) = Self::find(zone_name).await? { + if let Some(zone) = Self::find(executor, zone_name).await? { info!( log, "install_omicron_zone: Found zone: {} in state {:?}", @@ -307,7 +311,8 @@ impl Zones { "Invalid state; uninstalling and deleting zone {}", zone_name ); - Zones::halt_and_remove_logged(log, zone.name()).await?; + Zones::halt_and_remove_logged(executor, log, zone.name()) + .await?; } } @@ -344,34 +349,38 @@ impl Zones { ..Default::default() }); } - cfg.run().await.map_err(|err| AdmError { + executor.execute(&mut cfg.as_command()).map_err(|err| AdmError { op: Operation::Configure, zone: zone_name.to_string(), - err, + err: Box::new(err), })?; info!(log, "Installing Omicron zone: {}", zone_name); - zone::Adm::new(zone_name) - .install(&[ + executor + .execute(&mut zone::Adm::new(zone_name).install_command(&[ zone_image.as_ref(), "/opt/oxide/overlay.tar.gz".as_ref(), - ]) - .await + ])) .map_err(|err| AdmError { op: Operation::Install, zone: zone_name.to_string(), - err, + err: Box::new(err), })?; Ok(()) } /// Boots a zone (named `name`). - pub async fn boot(name: &str) -> Result<(), AdmError> { - zone::Adm::new(name).boot().await.map_err(|err| AdmError { + pub async fn boot( + executor: &BoxedExecutor, + name: &str, + ) -> Result<(), AdmError> { + let mut cmd = zone::Adm::new(name).boot_command(); + + executor.execute(&mut cmd).map_err(|err| AdmError { op: Operation::Boot, zone: name.to_string(), - err, + err: Box::new(err), })?; Ok(()) } @@ -379,14 +388,24 @@ impl Zones { /// Returns all zones that may be managed by the Sled Agent. /// /// These zones must have names starting with [`ZONE_PREFIX`]. - pub async fn get() -> Result, AdmError> { - Ok(zone::Adm::list() - .await - .map_err(|err| AdmError { - op: Operation::List, - zone: "".to_string(), - err, - })? + pub async fn get( + executor: &BoxedExecutor, + ) -> Result, AdmError> { + let handle_err = |err| AdmError { + op: Operation::List, + zone: "".to_string(), + err, + }; + + let mut cmd = zone::Adm::list_command(); + let output = executor + .execute(&mut cmd) + .map_err(|err| handle_err(Box::new(err)))?; + + let zones = zone::Adm::parse_list_output(&output) + .map_err(|err| handle_err(Box::new(err)))?; + + Ok(zones .into_iter() .filter(|z| z.name().starts_with(ZONE_PREFIX)) .collect()) @@ -396,8 +415,14 @@ impl Zones { /// /// Can only return zones that start with [`ZONE_PREFIX`], as they /// are managed by the Sled Agent. - pub async fn find(name: &str) -> Result, AdmError> { - Ok(Self::get().await?.into_iter().find(|zone| zone.name() == name)) + pub async fn find( + executor: &BoxedExecutor, + name: &str, + ) -> Result, AdmError> { + Ok(Self::get(executor) + .await? + .into_iter() + .find(|zone| zone.name() == name)) } /// Return the ID for a _running_ zone with the specified name. @@ -407,10 +432,13 @@ impl Zones { // object. But that can't easily be done, because we need to supply // `mockall` with a value to return, and `zone::Zone` objects can't be // constructed since they have private fields. - pub async fn id(name: &str) -> Result, AdmError> { + pub async fn id( + executor: &BoxedExecutor, + name: &str, + ) -> Result, AdmError> { // Safety: illumos defines `zoneid_t` as a typedef for an integer, i.e., // an `i32`, so this unwrap should always be safe. - match Self::find(name).await?.map(|zn| zn.id()) { + match Self::find(executor, name).await?.map(|zn| zn.id()) { Some(Some(id)) => Ok(Some(id.try_into().unwrap())), Some(None) | None => Ok(None), } @@ -863,6 +891,119 @@ impl Zones { #[cfg(test)] mod tests { use super::*; + use crate::process::{FakeExecutor, Input, OutputExt, StaticHandler}; + use omicron_test_utils::dev; + use std::process::Output; + + #[tokio::test] + async fn install_new_zone_calls_config_then_install() { + let logctx = + dev::test_setup_log("install_new_zone_calls_config_then_install"); + + let zone_root_path = Utf8Path::new("/root"); + let zone_name = "oxz_myzone"; + let zone_image = Utf8Path::new("/image.tar.gz"); + + // When installing a new zone, we expect to see: + // - A request for the list of existing zones + // - A command to configure the zone + // - A command to install the zone + let mut handler = StaticHandler::new(); + handler.expect( + Input::shell(format!("{PFEXEC} {ZONEADM} list -cip")), + Output::success().set_stdout("0:global:running:/::ipkg:shared"), + ); + + handler.expect( + Input::shell(format!( + "{PFEXEC} {ZONECFG} -z {zone_name} \ + create -F -b ; \ + set brand=omicron1 ; \ + set zonepath={zone_root_path}/{zone_name} ; \ + set autoboot=false ; \ + set ip-type=exclusive" + )), + Output::success(), + ); + + handler.expect( + Input::shell(format!( + "{PFEXEC} {ZONEADM} -z {zone_name} \ + install {zone_image} /opt/oxide/overlay.tar.gz" + )), + Output::success(), + ); + + let executor = FakeExecutor::new(logctx.log.clone()); + executor.set_static_handler(handler); + + let datasets = []; + let filesystems = []; + let devices = []; + let links = vec![]; + let limit_priv = vec![]; + + Zones::install_omicron_zone( + &executor.as_executor(), + &logctx.log, + &zone_root_path, + zone_name, + &zone_image, + &datasets, + &filesystems, + &devices, + links, + limit_priv, + ) + .await + .expect("Failed to install zone"); + + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn install_existing_zone_queries_for_it() { + let logctx = + dev::test_setup_log("install_existing_zone_queries_for_it"); + + let zone_root_path = Utf8Path::new("/root"); + let zone_name = "oxz_myzone"; + let zone_image = Utf8Path::new("/image.tar.gz"); + + let mut handler = StaticHandler::new(); + handler.expect( + Input::shell(format!("{PFEXEC} {ZONEADM} list -cip")), + Output::success().set_stdout( + "0:global:running:/::ipkg:shared\n1:oxz_myzone:running:/root/oxz_myzone::omicron1:excl" + ) + ); + + let executor = FakeExecutor::new(logctx.log.clone()); + executor.set_static_handler(handler); + + let datasets = []; + let filesystems = []; + let devices = []; + let links = vec![]; + let limit_priv = vec![]; + + Zones::install_omicron_zone( + &executor.as_executor(), + &logctx.log, + &zone_root_path, + zone_name, + &zone_image, + &datasets, + &filesystems, + &devices, + links, + limit_priv, + ) + .await + .expect("Failed to install zone"); + + logctx.cleanup_successful(); + } #[test] fn test_parse_ip_network() { diff --git a/package/src/bin/omicron-package.rs b/package/src/bin/omicron-package.rs index 495e5925e1..857db6afe5 100644 --- a/package/src/bin/omicron-package.rs +++ b/package/src/bin/omicron-package.rs @@ -587,12 +587,12 @@ async fn do_install( do_activate(config, install_dir) } -async fn uninstall_all_omicron_zones() -> Result<()> { +async fn uninstall_all_omicron_zones(executor: &BoxedExecutor) -> Result<()> { const CONCURRENCY_CAP: usize = 32; - futures::stream::iter(zone::Zones::get().await?) + futures::stream::iter(zone::Zones::get(executor).await?) .map(Ok::<_, anyhow::Error>) .try_for_each_concurrent(CONCURRENCY_CAP, |zone| async move { - zone::Zones::halt_and_remove(zone.name()).await?; + zone::Zones::halt_and_remove(executor, zone.name()).await?; Ok(()) }) .await?; @@ -692,7 +692,7 @@ fn remove_all_except>( async fn do_deactivate(config: &Config) -> Result<()> { let executor = HostExecutor::new(config.log.clone()).as_executor(); info!(&config.log, "Removing all Omicron zones"); - uninstall_all_omicron_zones().await?; + uninstall_all_omicron_zones(&executor).await?; info!(config.log, "Uninstalling all packages"); uninstall_all_packages(config); info!(config.log, "Removing networking resources"); diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index e9e1bd6cfe..375289135f 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -91,13 +91,10 @@ openapi-lint.workspace = true openapiv3.workspace = true pretty_assertions.workspace = true rcgen.workspace = true -serial_test.workspace = true subprocess.workspace = true slog-async.workspace = true slog-term.workspace = true -illumos-utils = { workspace = true, features = ["testing"] } - # # Disable doc builds by default for our binaries to work around issue # rust-lang/cargo#8373. These docs would not be very useful anyway. diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index 7e513ab68d..f98e5afb68 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -242,7 +242,7 @@ async fn cleanup_all_old_global_state( // Currently, we're removing these zones. In the future, we should // re-establish contact (i.e., if the Sled Agent crashed, but we wanted // to leave the running Zones intact). - let zones = Zones::get().await?; + let zones = Zones::get(executor).await?; stream::iter(zones) .zip(stream::iter(std::iter::repeat(log.clone()))) .map(Ok::<_, illumos_utils::zone::AdmError>) @@ -251,7 +251,7 @@ async fn cleanup_all_old_global_state( // the caller that this failed. .for_each_concurrent_then_try(None, |(zone, log)| async move { warn!(log, "Deleting existing zone"; "zone_name" => zone.name()); - Zones::halt_and_remove_logged(&log, zone.name()).await + Zones::halt_and_remove_logged(executor, &log, zone.name()).await }) .await?; @@ -760,14 +760,14 @@ impl Agent { _state: &tokio::sync::MutexGuard<'_, SledAgentState>, ) -> Result<(), BootstrapError> { const CONCURRENCY_CAP: usize = 32; - futures::stream::iter(Zones::get().await?) + futures::stream::iter(Zones::get(&self.executor).await?) .map(Ok::<_, anyhow::Error>) // Use for_each_concurrent_then_try to delete as much as possible. // We only return one error though -- hopefully that's enough to // signal to the caller that this failed. .for_each_concurrent_then_try(CONCURRENCY_CAP, |zone| async move { if zone.name() != "oxz_switch" { - Zones::halt_and_remove(zone.name()).await?; + Zones::halt_and_remove(&self.executor, zone.name()).await?; } Ok(()) }) diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 8aba42d56e..3d11447e42 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -577,7 +577,9 @@ impl InstanceInner { // `RunningZone::stop` in case we're called between creating the // zone and assigning `running_state`. warn!(self.log, "Halting and removing zone: {}", zname); - Zones::halt_and_remove_logged(&self.log, &zname).await.unwrap(); + Zones::halt_and_remove_logged(&self.executor, &self.log, &zname) + .await + .unwrap(); // Remove ourselves from the instance manager's map of instances. self.instance_ticket.terminate(); @@ -983,7 +985,7 @@ impl Instance { // but it helps distinguish "online in SMF" from "responding to HTTP // requests". let fmri = fmri_name(); - wait_for_service(Some(&zname), &fmri) + wait_for_service(&inner.executor, Some(&zname), &fmri) .await .map_err(|_| Error::Timeout(fmri.to_string()))?; info!(inner.log, "Propolis SMF service is online"); diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 0270b0a032..151fce9bbf 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -2167,7 +2167,7 @@ impl ServiceManager { let name = zone.zone_name(); if existing_zones.contains_key(&name) { // Make sure the zone actually exists in the right state too - match Zones::find(&name).await { + match Zones::find(&self.inner.executor, &name).await { Ok(Some(zone)) if zone.state() == zone::State::Running => { info!(log, "skipping running zone"; "zone" => &name); continue; @@ -2859,9 +2859,8 @@ mod test { Etherstub, BOOTSTRAP_ETHERSTUB_NAME, UNDERLAY_ETHERSTUB_NAME, UNDERLAY_ETHERSTUB_VNIC_NAME, }, - process::FakeExecutor, - svc, - zone::MockZones, + process::{FakeExecutor, Input, Output, OutputExt, StaticHandler}, + zone::{ZONEADM, ZONECFG}, }; use key_manager::{ SecretRetriever, SecretRetrieverError, SecretState, VersionedIkm, @@ -2876,55 +2875,120 @@ mod test { const EXPECTED_ZONE_NAME_PREFIX: &str = "oxz_oximeter"; - // Returns the expectations for a new service to be created. - fn expect_new_service() -> Vec> { - // Install the Omicron Zone - let install_ctx = MockZones::install_omicron_zone_context(); - install_ctx.expect().return_once(|_, _, name, _, _, _, _, _, _| { - assert!(name.starts_with(EXPECTED_ZONE_NAME_PREFIX)); - Ok(()) - }); + // Generate a static executor handler with the expected invocations (and + // responses) when generating a new service. + fn expect_new_service( + handler: &mut StaticHandler, + config: &TestConfig, + zone_id: Uuid, + u2_mountpoint: &Utf8Path, + ) { + handler.expect( + Input::shell(format!("{PFEXEC} /usr/sbin/dladm create-vnic -t -l underlay_stub0 -p mtu=9000 oxControlService0")), + Output::success() + ); + handler.expect( + Input::shell(format!("{PFEXEC} /usr/sbin/dladm set-linkprop -t -p mtu=9000 oxControlService0")), + Output::success() + ); - // Boot the zone. - let boot_ctx = MockZones::boot_context(); - boot_ctx.expect().return_once(|name| { - assert!(name.starts_with(EXPECTED_ZONE_NAME_PREFIX)); - Ok(()) - }); + handler.expect( + Input::shell(format!("{PFEXEC} {ZONEADM} list -cip")), + Output::success().set_stdout("0:global:running:/::ipkg:shared"), + ); - // After calling `MockZones::boot`, `RunningZone::boot` will then look - // up the zone ID for the booted zone. This goes through - // `MockZone::id` to find the zone and get its ID. - let id_ctx = MockZones::id_context(); - id_ctx.expect().return_once(|name| { - assert!(name.starts_with(EXPECTED_ZONE_NAME_PREFIX)); - Ok(Some(1)) - }); + let zone_name = format!("{EXPECTED_ZONE_NAME_PREFIX}_{zone_id}"); + + let zonepath = format!("{u2_mountpoint}/{zone_name}"); + handler.expect( + Input::shell(format!( + "{PFEXEC} {ZONECFG} -z {zone_name} \ + create -F -b ; \ + set brand=omicron1 ; \ + set zonepath={zonepath} ; \ + set autoboot=false ; \ + set ip-type=exclusive ; \ + add net ; \ + set physical=oxControlService0 ; \ + end" + )), + Output::success(), + ); - // Ensure the address exists - let ensure_address_ctx = MockZones::ensure_address_context(); - ensure_address_ctx.expect().return_once(|_, _, _, _| { - Ok(ipnetwork::IpNetwork::new(IpAddr::V6(Ipv6Addr::LOCALHOST), 64) - .unwrap()) - }); + let zone_image = + format!("{}/oximeter.tar.gz", config.config_dir.path()); + handler.expect( + Input::shell(format!( + "{PFEXEC} {ZONEADM} -z {zone_name} \ + install {zone_image} /opt/oxide/overlay.tar.gz" + )), + Output::success(), + ); + + handler.expect( + Input::shell(format!("{PFEXEC} {ZONEADM} -z {zone_name} boot")), + Output::success(), + ); + + handler.expect( + Input::shell( + format!("{PFEXEC} /usr/bin/svcprop -t -z {zone_name} -p restarter/state svc:/milestone/single-user:default") + ), + Output::success().set_stdout("restarter/state astring online"), + ); - // Wait for the networking service. - let wait_ctx = svc::wait_for_service_context(); - wait_ctx.expect().return_once(|_, _| Ok(())); - - vec![ - Box::new(install_ctx), - Box::new(boot_ctx), - Box::new(id_ctx), - Box::new(ensure_address_ctx), - Box::new(wait_ctx), - ] + handler.expect( + Input::shell(format!("{PFEXEC} {ZONEADM} list -cip")), + Output::success().set_stdout( + format!("0:global:running:/::ipkg:shared\n1:{zone_name}:running:{zonepath}::omicron1:excl") + ) + ); + + // TODO: The "echo" is a linux-only hack for commands which would + // typically run within a zone. + handler.expect( + Input::shell("echo /usr/sbin/ipadm create-if -t oxControlService0"), + Output::success(), + ); + handler.expect( + Input::shell("echo /usr/sbin/ipadm set-ifprop -t -p mtu=9000 -m ipv4 oxControlService0"), + Output::success(), + ); + handler.expect( + Input::shell("echo /usr/sbin/ipadm set-ifprop -t -p mtu=9000 -m ipv6 oxControlService0"), + Output::success(), + ); + handler.expect( + Input::shell("echo /usr/sbin/route add -inet6 default -inet6 ::1"), + Output::success(), + ); + handler.expect( + Input::shell("echo /usr/sbin/svccfg import /var/svc/manifest/site/oximeter/manifest.xml"), + Output::success(), + ); + handler.expect( + Input::shell(format!("echo /usr/sbin/svccfg -s svc:/oxide/oximeter setprop config/id={zone_id}")), + Output::success(), + ); + handler.expect( + Input::shell(format!("echo /usr/sbin/svccfg -s svc:/oxide/oximeter setprop config/address=[::1]:12223")), + Output::success(), + ); + handler.expect( + Input::shell(format!( + "echo /usr/sbin/svccfg -s svc:/oxide/oximeter:default refresh" + )), + Output::success(), + ); + handler.expect( + Input::shell(format!( + "echo /usr/sbin/svcadm enable -t svc:/oxide/oximeter:default" + )), + Output::success(), + ); } - // Prepare to call "ensure" for a new service, then actually call "ensure". async fn ensure_new_service(mgr: &ServiceManager, id: Uuid) { - let _expectations = expect_new_service(); - mgr.ensure_all_services_persistent(ServiceEnsureBody { services: vec![ServiceZoneRequest { id, @@ -2948,8 +3012,6 @@ mod test { .unwrap(); } - // Prepare to call "ensure" for a service which already exists. We should - // return the service without actually installing a new zone. async fn ensure_existing_service(mgr: &ServiceManager, id: Uuid) { mgr.ensure_all_services_persistent(ServiceEnsureBody { services: vec![ServiceZoneRequest { @@ -2974,20 +3036,6 @@ mod test { .unwrap(); } - // Prepare to drop the service manager. - // - // This will shut down all allocated zones, and delete their - // associated VNICs. - fn drop_service_manager(mgr: ServiceManager) { - let halt_ctx = MockZones::halt_and_remove_logged_context(); - halt_ctx.expect().returning(|_, name| { - assert!(name.starts_with(EXPECTED_ZONE_NAME_PREFIX)); - Ok(()) - }); - // Explicitly drop the service manager - drop(mgr); - } - struct TestConfig { config_dir: camino_tempfile::Utf8TempDir, } @@ -3046,13 +3094,23 @@ mod test { } #[tokio::test] - #[serial_test::serial] async fn test_ensure_service() { let logctx = omicron_test_utils::dev::test_setup_log("test_ensure_service"); let log = logctx.log.clone(); let test_config = TestConfig::new().await; - let executor = FakeExecutor::new(log.clone()).as_executor(); + + let storage = StorageResources::new_for_test(); + let u2_mountpoints = storage.all_u2_mountpoints(ZONE_DATASET).await; + assert_eq!(u2_mountpoints.len(), 1); + let u2_mountpoint = &u2_mountpoints[0]; + + let executor = FakeExecutor::new(log.clone()); + let id = Uuid::new_v4(); + let mut handler = StaticHandler::new(); + expect_new_service(&mut handler, &test_config, id, &u2_mountpoint); + executor.set_static_handler(handler); + let executor = executor.as_executor(); let mgr = ServiceManager::new( log.clone(), @@ -3066,7 +3124,7 @@ mod test { SidecarRevision::Physical("rev-test".to_string()), SWITCH_ZONE_BOOTSTRAP_IP, vec![], - StorageResources::new_for_test(), + storage, ) .await .unwrap(); @@ -3086,22 +3144,31 @@ mod test { ) .unwrap(); - let id = Uuid::new_v4(); ensure_new_service(&mgr, id).await; - drop_service_manager(mgr); + drop(mgr); logctx.cleanup_successful(); } #[tokio::test] - #[serial_test::serial] async fn test_ensure_service_which_already_exists() { let logctx = omicron_test_utils::dev::test_setup_log( "test_ensure_service_which_already_exists", ); let log = logctx.log.clone(); let test_config = TestConfig::new().await; - let executor = FakeExecutor::new(log.clone()).as_executor(); + + let storage = StorageResources::new_for_test(); + let u2_mountpoints = storage.all_u2_mountpoints(ZONE_DATASET).await; + assert_eq!(u2_mountpoints.len(), 1); + let u2_mountpoint = &u2_mountpoints[0]; + + let executor = FakeExecutor::new(log.clone()); + let id = Uuid::new_v4(); + let mut handler = StaticHandler::new(); + expect_new_service(&mut handler, &test_config, id, &u2_mountpoint); + executor.set_static_handler(handler); + let executor = executor.as_executor(); let mgr = ServiceManager::new( log.clone(), @@ -3115,7 +3182,7 @@ mod test { SidecarRevision::Physical("rev-test".to_string()), SWITCH_ZONE_BOOTSTRAP_IP, vec![], - StorageResources::new_for_test(), + storage, ) .await .unwrap(); @@ -3135,23 +3202,32 @@ mod test { ) .unwrap(); - let id = Uuid::new_v4(); ensure_new_service(&mgr, id).await; ensure_existing_service(&mgr, id).await; - drop_service_manager(mgr); + drop(mgr); logctx.cleanup_successful(); } #[tokio::test] - #[serial_test::serial] async fn test_services_are_recreated_on_reboot() { let logctx = omicron_test_utils::dev::test_setup_log( "test_services_are_recreated_on_reboot", ); let log = logctx.log.clone(); let test_config = TestConfig::new().await; - let executor = FakeExecutor::new(log.clone()).as_executor(); + + let storage = StorageResources::new_for_test(); + let u2_mountpoints = storage.all_u2_mountpoints(ZONE_DATASET).await; + assert_eq!(u2_mountpoints.len(), 1); + let u2_mountpoint = &u2_mountpoints[0]; + + let executor = FakeExecutor::new(log.clone()); + let id = Uuid::new_v4(); + let mut handler = StaticHandler::new(); + expect_new_service(&mut handler, &test_config, id, &u2_mountpoint); + executor.set_static_handler(handler); + let executor = executor.as_executor(); // First, spin up a ServiceManager, create a new service, and tear it // down. @@ -3167,7 +3243,7 @@ mod test { SidecarRevision::Physical("rev-test".to_string()), SWITCH_ZONE_BOOTSTRAP_IP, vec![], - StorageResources::new_for_test(), + storage.clone(), ) .await .unwrap(); @@ -3187,13 +3263,32 @@ mod test { ) .unwrap(); - let id = Uuid::new_v4(); ensure_new_service(&mgr, id).await; - drop_service_manager(mgr); + drop(mgr); // Before we re-create the service manager - notably, using the same // config file! - expect that a service gets initialized. - let _expectations = expect_new_service(); + let executor = FakeExecutor::new(log.clone()); + let mut handler = StaticHandler::new(); + + handler.expect_dynamic(Box::new(|input| -> Output { + assert_eq!(input.program, PFEXEC); + assert_eq!(input.args[0], "/usr/platform/oxide/bin/tmpx"); + // input.args[1] is the current time. + assert_eq!(input.args[2], "/var/adm/utmpx"); + Output::success() + })); + handler.expect_dynamic(Box::new(|input| -> Output { + assert_eq!(input.program, PFEXEC); + assert_eq!(input.args[0], "/usr/platform/oxide/bin/tmpx"); + // input.args[1] is the current time. + assert_eq!(input.args[2], "/var/adm/wtmpx"); + Output::success() + })); + expect_new_service(&mut handler, &test_config, id, &u2_mountpoint); + executor.set_static_handler(handler); + let executor = executor.as_executor(); + let mgr = ServiceManager::new( log.clone(), &executor, @@ -3206,7 +3301,7 @@ mod test { SidecarRevision::Physical("rev-test".to_string()), SWITCH_ZONE_BOOTSTRAP_IP, vec![], - StorageResources::new_for_test(), + storage, ) .await .unwrap(); @@ -3226,20 +3321,31 @@ mod test { ) .unwrap(); - drop_service_manager(mgr); + mgr.load_services().await.expect("Failed to load services"); + drop(mgr); logctx.cleanup_successful(); } #[tokio::test] - #[serial_test::serial] async fn test_services_do_not_persist_without_config() { let logctx = omicron_test_utils::dev::test_setup_log( "test_services_do_not_persist_without_config", ); let log = logctx.log.clone(); let test_config = TestConfig::new().await; - let executor = FakeExecutor::new(log.clone()).as_executor(); + + let storage = StorageResources::new_for_test(); + let u2_mountpoints = storage.all_u2_mountpoints(ZONE_DATASET).await; + assert_eq!(u2_mountpoints.len(), 1); + let u2_mountpoint = &u2_mountpoints[0]; + + let executor = FakeExecutor::new(log.clone()); + let id = Uuid::new_v4(); + let mut handler = StaticHandler::new(); + expect_new_service(&mut handler, &test_config, id, &u2_mountpoint); + executor.set_static_handler(handler); + let executor = executor.as_executor(); // First, spin up a ServiceManager, create a new service, and tear it // down. @@ -3255,7 +3361,7 @@ mod test { SidecarRevision::Physical("rev-test".to_string()), SWITCH_ZONE_BOOTSTRAP_IP, vec![], - StorageResources::new_for_test(), + storage.clone(), ) .await .unwrap(); @@ -3275,9 +3381,8 @@ mod test { ) .unwrap(); - let id = Uuid::new_v4(); ensure_new_service(&mgr, id).await; - drop_service_manager(mgr); + drop(mgr); // Next, delete the ledger. This means the service we just created will // not be remembered on the next initialization. @@ -3299,7 +3404,7 @@ mod test { SidecarRevision::Physical("rev-test".to_string()), SWITCH_ZONE_BOOTSTRAP_IP, vec![], - StorageResources::new_for_test(), + storage, ) .await .unwrap(); @@ -3319,7 +3424,7 @@ mod test { ) .unwrap(); - drop_service_manager(mgr); + drop(mgr); logctx.cleanup_successful(); } diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 670fde98d4..36cdaca782 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -29,6 +29,7 @@ use illumos_utils::dladm::Dladm; use illumos_utils::opte::params::SetVirtualNetworkInterfaceHost; use illumos_utils::opte::PortManager; use illumos_utils::process::BoxedExecutor; +use illumos_utils::zone::Zones; use illumos_utils::zone::PROPOLIS_ZONE_PREFIX; use illumos_utils::zone::ZONE_PREFIX; use omicron_common::address::{ @@ -52,11 +53,6 @@ use std::net::{Ipv6Addr, SocketAddrV6}; use std::sync::Arc; use uuid::Uuid; -#[cfg(test)] -use illumos_utils::zone::MockZones as Zones; -#[cfg(not(test))] -use illumos_utils::zone::Zones; - #[derive(thiserror::Error, Debug)] pub enum Error { #[error("Configuration error: {0}")] @@ -193,6 +189,9 @@ struct SledAgentInner { // Logger used for generic sled agent operations, e.g., zone bundles. log: Logger, + // Sled Agent's interaction with the host system + executor: BoxedExecutor, + // Subnet of the Sled's underlay. // // The Sled Agent's address can be derived from this value. @@ -414,6 +413,7 @@ impl SledAgent { inner: Arc::new(SledAgentInner { id: request.id, log: log.clone(), + executor: executor.clone(), subnet: request.subnet, storage, instances, @@ -820,7 +820,7 @@ impl SledAgent { /// List the zones that the sled agent is currently managing. pub async fn zones_list(&self) -> Result, Error> { - Zones::get() + Zones::get(&self.inner.executor) .await .map(|zones| { zones diff --git a/sled-agent/src/storage/dump_setup.rs b/sled-agent/src/storage/dump_setup.rs index df54a03f4f..4565e18510 100644 --- a/sled-agent/src/storage/dump_setup.rs +++ b/sled-agent/src/storage/dump_setup.rs @@ -603,7 +603,7 @@ impl DumpSetupWorker { // its 'sync' and 'async' features simultaneously :( let rt = tokio::runtime::Runtime::new().map_err(ArchiveLogsError::Tokio)?; - let oxz_zones = rt.block_on(Zones::get())?; + let oxz_zones = rt.block_on(Zones::get(&self.executor))?; self.archive_logs_inner( debug_dir, PathBuf::from("/var/svc/log"), diff --git a/sled-hardware/Cargo.toml b/sled-hardware/Cargo.toml index c6bc09f41e..df1513be01 100644 --- a/sled-hardware/Cargo.toml +++ b/sled-hardware/Cargo.toml @@ -30,6 +30,4 @@ illumos-devinfo = { git = "https://github.com/oxidecomputer/illumos-devinfo", br libefi-illumos = { git = "https://github.com/oxidecomputer/libefi-illumos", branch = "master" } [dev-dependencies] -illumos-utils = { workspace = true, features = ["testing"] } omicron-test-utils.workspace = true -serial_test.workspace = true From dc3a6aa3df53de39035065db0be3941c56adb60b Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 11 Aug 2023 12:35:23 -0700 Subject: [PATCH 32/57] Clippy for tests --- sled-agent/src/services.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 151fce9bbf..c283b91625 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -2971,19 +2971,19 @@ mod test { Output::success(), ); handler.expect( - Input::shell(format!("echo /usr/sbin/svccfg -s svc:/oxide/oximeter setprop config/address=[::1]:12223")), + Input::shell("echo /usr/sbin/svccfg -s svc:/oxide/oximeter setprop config/address=[::1]:12223"), Output::success(), ); handler.expect( - Input::shell(format!( - "echo /usr/sbin/svccfg -s svc:/oxide/oximeter:default refresh" - )), + Input::shell( + "echo /usr/sbin/svccfg -s svc:/oxide/oximeter:default refresh", + ), Output::success(), ); handler.expect( - Input::shell(format!( - "echo /usr/sbin/svcadm enable -t svc:/oxide/oximeter:default" - )), + Input::shell( + "echo /usr/sbin/svcadm enable -t svc:/oxide/oximeter:default", + ), Output::success(), ); } From 629ed64a56cab7d3fc3e9d73bb2ee09f5c4e453d Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 11 Aug 2023 13:38:36 -0700 Subject: [PATCH 33/57] Deduplicate --- illumos-utils/src/process.rs | 90 ++++++++++++++++++++++++++++-------- illumos-utils/src/zone.rs | 30 ++++++------ 2 files changed, 87 insertions(+), 33 deletions(-) diff --git a/illumos-utils/src/process.rs b/illumos-utils/src/process.rs index f3838d2f88..c9b9886c0d 100644 --- a/illumos-utils/src/process.rs +++ b/illumos-utils/src/process.rs @@ -4,6 +4,7 @@ //! A process executor +use async_trait::async_trait; use itertools::Itertools; use slog::{debug, error, info, Logger}; use std::collections::VecDeque; @@ -22,13 +23,13 @@ pub type BoxedExecutor = Arc; /// /// - In production, this is usually simply a [HostExecutor]. /// - Under test, this can be customized, and a [FakeExecutor] may be used. +#[async_trait] pub trait Executor: Send + Sync { - // TODO: Would be nice to have an async variant of this? - // - Is that possible? - // - Could it be additive? - // - // XXX: If we don't have that, I think we're regressing for the - // zone commands, which were previously async. + /// Executes a task, waiting for it to complete, and returning output. + async fn execute_async( + &self, + command: &mut tokio::process::Command, + ) -> Result; /// Executes a task, waiting for it to complete, and returning output. fn execute(&self, command: &mut Command) -> Result; @@ -500,10 +501,11 @@ impl FakeExecutor { pub fn as_executor(self: Arc) -> BoxedExecutor { self } -} -impl Executor for FakeExecutor { - fn execute(&self, command: &mut Command) -> Result { + fn execute_internal( + &self, + command: &Command, + ) -> Result { let id = self.inner.counter.fetch_add(1, Ordering::SeqCst); log_command(&self.inner.log, id, command); @@ -524,6 +526,25 @@ impl Executor for FakeExecutor { } Ok(output) } +} + +#[async_trait] +impl Executor for FakeExecutor { + // NOTE: We aren't actually performing any async operations -- it's up to + // the caller to control the (synchronous) handlers. + // + // However, this still provides testability, while letting the "real + // executor" make truly async calls while launching processes. + async fn execute_async( + &self, + command: &mut tokio::process::Command, + ) -> Result { + self.execute_internal(command.as_std()) + } + + fn execute(&self, command: &mut Command) -> Result { + self.execute_internal(command) + } fn spawn( &self, @@ -549,12 +570,49 @@ impl HostExecutor { pub fn as_executor(self: Arc) -> BoxedExecutor { self } + + fn prepare(&self, command: &Command) -> u64 { + let id = self.counter.fetch_add(1, Ordering::SeqCst); + log_command(&self.log, id, command); + id + } + + fn finalize( + &self, + command: &Command, + id: u64, + output: Output, + ) -> Result { + log_output(&self.log, id, &output); + if !output.status.success() { + return Err(output_to_exec_error( + command_to_string(command), + &output, + )); + } + Ok(output) + } } +#[async_trait] impl Executor for HostExecutor { + async fn execute_async( + &self, + command: &mut tokio::process::Command, + ) -> Result { + let id = self.prepare(command.as_std()); + let output = command.output().await.map_err(|err| { + error!(self.log, "{id} - Could not start program!"); + ExecutionError::ExecutionStart { + command: Input::from(command.as_std()).to_string(), + err, + } + })?; + self.finalize(command.as_std(), id, output) + } + fn execute(&self, command: &mut Command) -> Result { - let id = self.counter.fetch_add(1, Ordering::SeqCst); - log_command(&self.log, id, command); + let id = self.prepare(command); let output = command.output().map_err(|err| { error!(self.log, "{id} - Could not start program!"); ExecutionError::ExecutionStart { @@ -562,15 +620,7 @@ impl Executor for HostExecutor { err, } })?; - log_output(&self.log, id, &output); - - if !output.status.success() { - return Err(output_to_exec_error( - command_to_string(&command), - &output, - )); - } - Ok(output) + self.finalize(command, id, output) } fn spawn( diff --git a/illumos-utils/src/zone.rs b/illumos-utils/src/zone.rs index ec3c2b07a4..8f3d1b135f 100644 --- a/illumos-utils/src/zone.rs +++ b/illumos-utils/src/zone.rs @@ -349,7 +349,8 @@ impl Zones { ..Default::default() }); } - executor.execute(&mut cfg.as_command()).map_err(|err| AdmError { + let mut cmd = tokio::process::Command::from(cfg.as_command()); + executor.execute_async(&mut cmd).await.map_err(|err| AdmError { op: Operation::Configure, zone: zone_name.to_string(), err: Box::new(err), @@ -357,16 +358,17 @@ impl Zones { info!(log, "Installing Omicron zone: {}", zone_name); - executor - .execute(&mut zone::Adm::new(zone_name).install_command(&[ + let mut cmd = tokio::process::Command::from( + zone::Adm::new(zone_name).install_command(&[ zone_image.as_ref(), "/opt/oxide/overlay.tar.gz".as_ref(), - ])) - .map_err(|err| AdmError { - op: Operation::Install, - zone: zone_name.to_string(), - err: Box::new(err), - })?; + ]), + ); + executor.execute_async(&mut cmd).await.map_err(|err| AdmError { + op: Operation::Install, + zone: zone_name.to_string(), + err: Box::new(err), + })?; Ok(()) } @@ -375,9 +377,10 @@ impl Zones { executor: &BoxedExecutor, name: &str, ) -> Result<(), AdmError> { - let mut cmd = zone::Adm::new(name).boot_command(); + let mut cmd = + tokio::process::Command::from(zone::Adm::new(name).boot_command()); - executor.execute(&mut cmd).map_err(|err| AdmError { + executor.execute_async(&mut cmd).await.map_err(|err| AdmError { op: Operation::Boot, zone: name.to_string(), err: Box::new(err), @@ -397,9 +400,10 @@ impl Zones { err, }; - let mut cmd = zone::Adm::list_command(); + let mut cmd = tokio::process::Command::from(zone::Adm::list_command()); let output = executor - .execute(&mut cmd) + .execute_async(&mut cmd) + .await .map_err(|err| handle_err(Box::new(err)))?; let zones = zone::Adm::parse_list_output(&output) From 3420a9d5bb9951a381d3fff08ed8e538f936fed9 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 14 Aug 2023 15:11:09 -0700 Subject: [PATCH 34/57] less echo --- illumos-utils/src/running_zone.rs | 8 +++----- sled-agent/src/services.rs | 27 +++++++++++++++------------ 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/illumos-utils/src/running_zone.rs b/illumos-utils/src/running_zone.rs index 4095b1d663..c471ddf620 100644 --- a/illumos-utils/src/running_zone.rs +++ b/illumos-utils/src/running_zone.rs @@ -10,7 +10,7 @@ use crate::link::{Link, VnicAllocator}; use crate::opte::{Port, PortTicket}; use crate::process::{BoxedExecutor, ExecutionError}; use crate::svc::wait_for_service; -use crate::zone::{AddressRequest, Zones, IPADM, ZONE_PREFIX}; +use crate::zone::{AddressRequest, Zones, IPADM, ZLOGIN, ZONE_PREFIX}; use camino::{Utf8Path, Utf8PathBuf}; use ipnetwork::IpNetwork; use omicron_common::backoff; @@ -474,10 +474,8 @@ impl RunningZone { { // NOTE: This implementation is useless, and will never work. However, // it must actually call `execute()` for the testing purposes. - // That's mocked by `mockall` to return known data, and so the command - // that's actually run is irrelevant. - let mut command = std::process::Command::new("echo"); - let command = command.args(args); + let mut command = std::process::Command::new(crate::process::PFEXEC); + let command = command.arg(ZLOGIN).arg(self.name()).args(args); self.inner .executor .execute(command) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index c7dbeddc7b..b81e6b2958 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -2874,7 +2874,7 @@ mod test { UNDERLAY_ETHERSTUB_VNIC_NAME, }, process::{FakeExecutor, Input, Output, OutputExt, StaticHandler}, - zone::{ZONEADM, ZONECFG}, + zone::{ZLOGIN, ZONEADM, ZONECFG}, }; use key_manager::{ SecretRetriever, SecretRetrieverError, SecretState, VersionedIkm, @@ -2958,45 +2958,48 @@ mod test { ) ); - // TODO: The "echo" is a linux-only hack for commands which would - // typically run within a zone. + let login = format!("{PFEXEC} {ZLOGIN} {zone_name}"); handler.expect( - Input::shell("echo /usr/sbin/ipadm create-if -t oxControlService0"), + Input::shell(format!( + "{login} /usr/sbin/ipadm create-if -t oxControlService0" + )), Output::success(), ); handler.expect( - Input::shell("echo /usr/sbin/ipadm set-ifprop -t -p mtu=9000 -m ipv4 oxControlService0"), + Input::shell(format!("{login} /usr/sbin/ipadm set-ifprop -t -p mtu=9000 -m ipv4 oxControlService0")), Output::success(), ); handler.expect( - Input::shell("echo /usr/sbin/ipadm set-ifprop -t -p mtu=9000 -m ipv6 oxControlService0"), + Input::shell(format!("{login} /usr/sbin/ipadm set-ifprop -t -p mtu=9000 -m ipv6 oxControlService0")), Output::success(), ); handler.expect( - Input::shell("echo /usr/sbin/route add -inet6 default -inet6 ::1"), + Input::shell(format!( + "{login} /usr/sbin/route add -inet6 default -inet6 ::1" + )), Output::success(), ); handler.expect( - Input::shell("echo /usr/sbin/svccfg import /var/svc/manifest/site/oximeter/manifest.xml"), + Input::shell(format!("{login} /usr/sbin/svccfg import /var/svc/manifest/site/oximeter/manifest.xml")), Output::success(), ); handler.expect( - Input::shell(format!("echo /usr/sbin/svccfg -s svc:/oxide/oximeter setprop config/id={zone_id}")), + Input::shell(format!("{login} /usr/sbin/svccfg -s svc:/oxide/oximeter setprop config/id={zone_id}")), Output::success(), ); handler.expect( - Input::shell("echo /usr/sbin/svccfg -s svc:/oxide/oximeter setprop config/address=[::1]:12223"), + Input::shell(format!("{login} /usr/sbin/svccfg -s svc:/oxide/oximeter setprop config/address=[::1]:12223")), Output::success(), ); handler.expect( Input::shell( - "echo /usr/sbin/svccfg -s svc:/oxide/oximeter:default refresh", + format!("{login} /usr/sbin/svccfg -s svc:/oxide/oximeter:default refresh"), ), Output::success(), ); handler.expect( Input::shell( - "echo /usr/sbin/svcadm enable -t svc:/oxide/oximeter:default", + format!("{login} /usr/sbin/svcadm enable -t svc:/oxide/oximeter:default"), ), Output::success(), ); From 673b0edef7e94cea1dbbdc5960af8ba47f59869d Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 14 Aug 2023 16:15:22 -0700 Subject: [PATCH 35/57] Refactor emulated host --- illumos-utils/src/dladm.rs | 4 +- illumos-utils/src/dumpadm.rs | 2 +- illumos-utils/src/fstyp.rs | 2 +- illumos-utils/src/host/byte_queue.rs | 37 + illumos-utils/src/host/error.rs | 52 ++ .../src/{process.rs => host/executor.rs} | 708 +++++++----------- illumos-utils/src/host/input.rs | 66 ++ illumos-utils/src/host/mod.rs | 20 + illumos-utils/src/host/output.rs | 59 ++ illumos-utils/src/lib.rs | 2 +- illumos-utils/src/link.rs | 4 +- illumos-utils/src/opte/port.rs | 2 +- illumos-utils/src/opte/port_manager.rs | 2 +- illumos-utils/src/running_zone.rs | 6 +- illumos-utils/src/svc.rs | 2 +- illumos-utils/src/zfs.rs | 2 +- illumos-utils/src/zone.rs | 4 +- illumos-utils/src/zpool.rs | 2 +- installinator/src/bootstrap.rs | 2 +- installinator/src/dispatch.rs | 2 +- installinator/src/hardware.rs | 2 +- installinator/src/main.rs | 2 +- installinator/src/write.rs | 6 +- package/src/bin/omicron-package.rs | 2 +- sled-agent/src/bin/sled-agent.rs | 2 +- sled-agent/src/bootstrap/agent.rs | 4 +- sled-agent/src/bootstrap/hardware.rs | 2 +- sled-agent/src/bootstrap/server.rs | 2 +- sled-agent/src/config.rs | 2 +- sled-agent/src/instance.rs | 2 +- sled-agent/src/instance_manager.rs | 2 +- sled-agent/src/server.rs | 2 +- sled-agent/src/services.rs | 6 +- sled-agent/src/sled_agent.rs | 6 +- sled-agent/src/storage/dump_setup.rs | 2 +- sled-agent/src/storage_manager.rs | 2 +- sled-agent/src/swap_device.rs | 2 +- sled-hardware/src/cleanup.rs | 2 +- sled-hardware/src/disk.rs | 2 +- sled-hardware/src/illumos/partitions.rs | 4 +- sled-hardware/src/non_illumos/mod.rs | 2 +- sled-hardware/src/underlay.rs | 4 +- wicketd/tests/integration_tests/updates.rs | 2 +- 43 files changed, 549 insertions(+), 495 deletions(-) create mode 100644 illumos-utils/src/host/byte_queue.rs create mode 100644 illumos-utils/src/host/error.rs rename illumos-utils/src/{process.rs => host/executor.rs} (75%) create mode 100644 illumos-utils/src/host/input.rs create mode 100644 illumos-utils/src/host/mod.rs create mode 100644 illumos-utils/src/host/output.rs diff --git a/illumos-utils/src/dladm.rs b/illumos-utils/src/dladm.rs index 62609b80c7..b27d349e1a 100644 --- a/illumos-utils/src/dladm.rs +++ b/illumos-utils/src/dladm.rs @@ -4,8 +4,8 @@ //! Utilities for poking at data links. +use crate::host::{BoxedExecutor, ExecutionError, PFEXEC}; use crate::link::{Link, LinkKind}; -use crate::process::{BoxedExecutor, ExecutionError, PFEXEC}; use crate::zone::IPADM; use omicron_common::api::external::MacAddr; use omicron_common::vlan::VlanID; @@ -560,7 +560,7 @@ impl Dladm { #[cfg(test)] mod test { use super::*; - use crate::process::{FakeExecutor, Input, OutputExt, StaticHandler}; + use crate::host::{FakeExecutor, Input, OutputExt, StaticHandler}; use omicron_test_utils::dev; use std::process::Output; diff --git a/illumos-utils/src/dumpadm.rs b/illumos-utils/src/dumpadm.rs index 7f40254884..89c4003e40 100644 --- a/illumos-utils/src/dumpadm.rs +++ b/illumos-utils/src/dumpadm.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use crate::process::{BoxedExecutor, ExecutionError}; +use crate::host::{BoxedExecutor, ExecutionError}; use byteorder::{LittleEndian, ReadBytesExt}; use camino::Utf8PathBuf; diff --git a/illumos-utils/src/fstyp.rs b/illumos-utils/src/fstyp.rs index 51e49b4bd2..c16cba91fb 100644 --- a/illumos-utils/src/fstyp.rs +++ b/illumos-utils/src/fstyp.rs @@ -4,7 +4,7 @@ //! Helper for calling fstyp. -use crate::process::{BoxedExecutor, ExecutionError, PFEXEC}; +use crate::host::{BoxedExecutor, ExecutionError, PFEXEC}; use crate::zpool::ZpoolName; use camino::Utf8Path; use std::str::FromStr; diff --git a/illumos-utils/src/host/byte_queue.rs b/illumos-utils/src/host/byte_queue.rs new file mode 100644 index 0000000000..372ca20f99 --- /dev/null +++ b/illumos-utils/src/host/byte_queue.rs @@ -0,0 +1,37 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::collections::VecDeque; +use std::sync::{Arc, Mutex}; + +/// A queue of bytes that can selectively act as a reader or writer, +/// which can also be cloned. +/// +/// This is primarily used to emulate stdin / stdout / stderr. +#[derive(Clone)] +pub(crate) struct ByteQueue { + buf: Arc>>, +} + +impl ByteQueue { + pub fn new() -> Self { + Self { buf: Arc::new(Mutex::new(VecDeque::new())) } + } +} + +impl std::io::Write for ByteQueue { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.buf.lock().unwrap().write(buf) + } + + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } +} + +impl std::io::Read for ByteQueue { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + self.buf.lock().unwrap().read(buf) + } +} diff --git a/illumos-utils/src/host/error.rs b/illumos-utils/src/host/error.rs new file mode 100644 index 0000000000..ced633c103 --- /dev/null +++ b/illumos-utils/src/host/error.rs @@ -0,0 +1,52 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use slog::error; + +#[derive(Debug)] +pub struct FailureInfo { + pub command: String, + pub status: std::process::ExitStatus, + pub stdout: String, + pub stderr: String, +} + +impl std::fmt::Display for FailureInfo { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "Command [{}] executed and failed with status: {}", + self.command, self.status + )?; + write!(f, " stdout: {}", self.stdout)?; + write!(f, " stderr: {}", self.stderr) + } +} + +#[derive(thiserror::Error, Debug)] +pub enum ExecutionError { + #[error("Failed to start execution of [{command}]: {err}")] + ExecutionStart { command: String, err: std::io::Error }, + + #[error("{0}")] + CommandFailure(Box), + + #[error("Failed to enter zone: {err}")] + ZoneEnter { err: std::io::Error }, + + #[error("Zone not running")] + NotRunning, +} + +pub fn output_to_exec_error( + command_str: String, + output: &std::process::Output, +) -> ExecutionError { + ExecutionError::CommandFailure(Box::new(FailureInfo { + command: command_str, + status: output.status, + stdout: String::from_utf8_lossy(&output.stdout).to_string(), + stderr: String::from_utf8_lossy(&output.stderr).to_string(), + })) +} diff --git a/illumos-utils/src/process.rs b/illumos-utils/src/host/executor.rs similarity index 75% rename from illumos-utils/src/process.rs rename to illumos-utils/src/host/executor.rs index c9b9886c0d..d34a22a668 100644 --- a/illumos-utils/src/process.rs +++ b/illumos-utils/src/host/executor.rs @@ -2,229 +2,20 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -//! A process executor +use crate::host::{ + byte_queue::ByteQueue, error::ExecutionError, input::Input, + output::output_to_exec_error, output::Output, output::OutputExt, +}; use async_trait::async_trait; use itertools::Itertools; use slog::{debug, error, info, Logger}; -use std::collections::VecDeque; use std::io::{Read, Write}; -use std::os::unix::process::ExitStatusExt; -use std::process::{Command, ExitStatus, Stdio}; +use std::process::{Command, Stdio}; use std::str::from_utf8; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::{Arc, Mutex}; -/// Describes the commonly-used "safe-to-reference" type describing the -/// Executor as a trait object. -pub type BoxedExecutor = Arc; - -/// Describes an "executor", which can run [Command]s and return a response. -/// -/// - In production, this is usually simply a [HostExecutor]. -/// - Under test, this can be customized, and a [FakeExecutor] may be used. -#[async_trait] -pub trait Executor: Send + Sync { - /// Executes a task, waiting for it to complete, and returning output. - async fn execute_async( - &self, - command: &mut tokio::process::Command, - ) -> Result; - - /// Executes a task, waiting for it to complete, and returning output. - fn execute(&self, command: &mut Command) -> Result; - - /// Spawns a task, without waiting for it to complete. - fn spawn( - &self, - command: &mut Command, - ) -> Result; -} - -/// A wrapper around a spawned [Child] process. -pub type BoxedChild = Box; - -/// A child process spawned by the executor. -pub trait Child: Send { - /// Accesses the stdin of the spawned child, as a Writer. - fn stdin(&mut self) -> Option>; - - /// Accesses the stdout of the spawned child, as a Reader. - fn stdout(&mut self) -> Option>; - - /// Accesses the stderr of the spawned child, as a Reader. - fn stderr(&mut self) -> Option>; - - /// OS-assigned PID identifier for the child - fn id(&self) -> u32; - - /// Waits for the child to complete, and returns the output. - fn wait(&mut self) -> Result; -} - -/// A real, host-controlled child process -pub struct SpawnedChild { - command_str: String, - child: Option, -} - -impl Child for SpawnedChild { - fn stdin(&mut self) -> Option> { - self.child - .as_mut()? - .stdin - .take() - .map(|s| Box::new(s) as Box) - } - - fn stdout(&mut self) -> Option> { - self.child - .as_mut()? - .stdout - .take() - .map(|s| Box::new(s) as Box) - } - - fn stderr(&mut self) -> Option> { - self.child - .as_mut()? - .stderr - .take() - .map(|s| Box::new(s) as Box) - } - - fn id(&self) -> u32 { - self.child.as_ref().expect("No child").id() - } - - fn wait(&mut self) -> Result { - let output = - self.child.take().unwrap().wait_with_output().map_err(|err| { - ExecutionError::ExecutionStart { - command: self.command_str.clone(), - err, - } - })?; - - if !output.status.success() { - return Err(output_to_exec_error( - self.command_str.clone(), - &output, - )); - } - - Ok(output) - } -} - -/// A queue of bytes that can selectively act as a reader or writer, -/// which can also be cloned. -/// -/// This is primarily used to emulate stdin / stdout / stderr. -#[derive(Clone)] -struct ByteQueue { - buf: Arc>>, -} - -impl ByteQueue { - fn new() -> Self { - Self { buf: Arc::new(Mutex::new(VecDeque::new())) } - } -} - -impl std::io::Write for ByteQueue { - fn write(&mut self, buf: &[u8]) -> std::io::Result { - self.buf.lock().unwrap().write(buf) - } - - fn flush(&mut self) -> std::io::Result<()> { - Ok(()) - } -} - -impl std::io::Read for ByteQueue { - fn read(&mut self, buf: &mut [u8]) -> std::io::Result { - self.buf.lock().unwrap().read(buf) - } -} - -/// A child spawned by a [FakeExecutor]. -pub struct FakeChild { - id: u64, - command: Command, - executor: Arc, - stdin: ByteQueue, - stdout: ByteQueue, - stderr: ByteQueue, -} - -impl FakeChild { - fn new( - id: u64, - command: &Command, - executor: Arc, - ) -> Box { - // std::process::Command -- somewhat reasonably - doesn't implement Copy - // or Clone. However, we'd like to be able to reference it in the - // FakeChild, independently of where it was spawned. - // - // Manually copy the relevant pieces of the incoming command. - let mut copy_command = Command::new(command.get_program()); - copy_command.args(command.get_args()); - copy_command.envs(command.get_envs().filter_map(|(k, v)| { - if let Some(v) = v { - Some((k, v)) - } else { - None - } - })); - - Box::new(FakeChild { - id, - command: copy_command, - executor, - stdin: ByteQueue::new(), - stdout: ByteQueue::new(), - stderr: ByteQueue::new(), - }) - } - - pub fn command(&self) -> &Command { - &self.command - } -} - -impl Child for FakeChild { - fn stdin(&mut self) -> Option> { - Some(Box::new(self.stdin.clone())) - } - - fn stdout(&mut self) -> Option> { - Some(Box::new(self.stdout.clone())) - } - - fn stderr(&mut self) -> Option> { - Some(Box::new(self.stderr.clone())) - } - - fn id(&self) -> u32 { - self.id.try_into().expect("u32 overflow") - } - - fn wait(&mut self) -> Result { - let executor = self.executor.clone(); - let output = executor.wait_handler.lock().unwrap()(self); - log_output(&self.executor.log, self.id, &output); - if !output.status.success() { - return Err(output_to_exec_error( - command_to_string(&self.command), - &output, - )); - } - Ok(output) - } -} - pub fn command_to_string(command: &std::process::Command) -> String { command .get_args() @@ -233,6 +24,18 @@ pub fn command_to_string(command: &std::process::Command) -> String { .join(" ") } +fn to_space_separated_string(iter: T) -> String +where + T: IntoIterator, + I: std::fmt::Debug, +{ + Itertools::intersperse( + iter.into_iter().map(|arg| format!("{arg:?}")), + " ".into(), + ) + .collect::() +} + fn log_command(log: &Logger, id: u64, command: &Command) { info!(log, "{id} - Running Command: [{}]", Input::from(command),); debug!( @@ -264,190 +67,37 @@ fn log_output(log: &Logger, id: u64, output: &Output) { debug!( log, "{id} - stderr: {}", - from_utf8(&output.stderr).unwrap_or(""), - ); - } -} - -/// Wrapper around the input of a [std::process::Command] as strings. -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct Input { - pub program: String, - pub args: Vec, - pub envs: Vec<(String, String)>, -} - -impl Input { - pub fn new>(program: S, args: Vec) -> Self { - Self { - program: program.as_ref().to_string(), - args: args.into_iter().map(|s| s.as_ref().to_string()).collect(), - envs: vec![], - } - } - - /// Short-hand for a whitespace-separated string, which can be provided - /// "like a shell command". - pub fn shell>(input: S) -> Self { - let mut args = input.as_ref().split_whitespace(); - - Self::new( - args.next().expect("Needs at least a program"), - args.collect(), - ) - } -} - -impl std::fmt::Display for Input { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.program)?; - for arg in &self.args { - write!(f, " {}", arg)?; - } - Ok(()) - } -} - -fn os_str_to_string(s: &std::ffi::OsStr) -> String { - s.to_string_lossy().to_string() -} - -impl From<&Command> for Input { - fn from(command: &Command) -> Self { - Self { - program: os_str_to_string(command.get_program()), - args: command.get_args().map(os_str_to_string).collect(), - envs: command - .get_envs() - .map(|(k, v)| { - ( - os_str_to_string(k), - os_str_to_string(v.unwrap_or_default()), - ) - }) - .collect(), - } - } -} - -pub type Output = std::process::Output; - -/// Convenience functions for usage in tests, to perform common operations -/// with minimal boilerplate. -pub trait OutputExt: Sized { - fn success() -> Self; - fn failure() -> Self; - fn set_stdout>(self, stdout: S) -> Self; - fn set_stderr>(self, stderr: S) -> Self; -} - -impl OutputExt for Output { - fn success() -> Self { - Output { - status: ExitStatus::from_raw(0), - stdout: vec![], - stderr: vec![], - } - } - - fn failure() -> Self { - Output { - status: ExitStatus::from_raw(-1), - stdout: vec![], - stderr: vec![], - } - } - - fn set_stdout>(mut self, stdout: S) -> Self { - self.stdout = stdout.as_ref().as_bytes().to_vec(); - self - } - - fn set_stderr>(mut self, stderr: S) -> Self { - self.stderr = stderr.as_ref().as_bytes().to_vec(); - self - } -} - -type DynamicHandler = Box Output + Send + Sync>; - -enum HandledCommand { - Static { input: Input, output: Output }, - Dynamic { handler: DynamicHandler }, -} - -/// A handler that may be used for setting inputs/outputs to the executor -/// when these commands are known ahead-of-time. -/// -/// See: [FakeExecutor::set_static_handler] for usage. -pub struct StaticHandler { - expected: Vec, - index: usize, -} - -impl StaticHandler { - pub fn new() -> Self { - Self { expected: Vec::new(), index: 0 } - } - - /// Expects a static "input" to exactly produce some "output". - pub fn expect(&mut self, input: Input, output: Output) { - self.expected.push(HandledCommand::Static { input, output }); - } - - /// A helper for [Self::expect] which quietly succeeds. - pub fn expect_ok>(&mut self, input: S) { - self.expect(Input::shell(input), Output::success()) - } - - /// A helper for [Self::expect] which quietly fails. - pub fn expect_fail>(&mut self, input: S) { - self.expect(Input::shell(input), Output::failure()) - } - - /// Expects a dynamic handler to be invoked to dynamically - /// determine the output of this call. - pub fn expect_dynamic(&mut self, handler: DynamicHandler) { - self.expected.push(HandledCommand::Dynamic { handler }); - } - - fn execute(&mut self, command: &Command) -> Output { - let observed_input = Input::from(command); - let expected = &mut self - .expected - .get_mut(self.index) - .unwrap_or_else(|| panic!("Unexpected command: {observed_input}")); - self.index += 1; - - match expected { - HandledCommand::Static { input, output } => { - assert_eq!(&observed_input, input, "Unexpected input command"); - output.clone() - } - HandledCommand::Dynamic { ref mut handler } => { - handler(observed_input) - } - } - } -} - -impl Drop for StaticHandler { - fn drop(&mut self) { - let expected = self.expected.len(); - let actual = self.index; - if actual < expected { - let next = &self.expected[actual]; - let tip = match next { - HandledCommand::Static { input, .. } => input.to_string(), - HandledCommand::Dynamic { .. } => { - "".to_string() - } - }; - assert!(false, "Only saw {actual} calls, expected {expected}\nNext would have been: {tip}"); - } + from_utf8(&output.stderr).unwrap_or(""), + ); } } +/// Describes the commonly-used "safe-to-reference" type describing the +/// Executor as a trait object. +pub type BoxedExecutor = Arc; + +/// Describes an "executor", which can run [Command]s and return a response. +/// +/// - In production, this is usually simply a [HostExecutor]. +/// - Under test, this can be customized, and a [FakeExecutor] may be used. +#[async_trait] +pub trait Executor: Send + Sync { + /// Executes a task, waiting for it to complete, and returning output. + async fn execute_async( + &self, + command: &mut tokio::process::Command, + ) -> Result; + + /// Executes a task, waiting for it to complete, and returning output. + fn execute(&self, command: &mut Command) -> Result; + + /// Spawns a task, without waiting for it to complete. + fn spawn( + &self, + command: &mut Command, + ) -> Result; +} + /// Handler called when spawning a fake child process pub type SpawnFn = dyn FnMut(&mut FakeChild) + Send + Sync; pub type BoxedSpawnFn = Box; @@ -456,7 +106,7 @@ pub type BoxedSpawnFn = Box; pub type WaitFn = dyn FnMut(&mut FakeChild) -> Output + Send + Sync; pub type BoxedWaitFn = Box; -struct FakeExecutorInner { +pub(crate) struct FakeExecutorInner { log: Logger, counter: AtomicU64, spawn_handler: Mutex, @@ -491,6 +141,7 @@ impl FakeExecutor { } /// Set the request handler to a static set of inputs and outputs. + // TODO: Remove me, add a method to the StatiHandler itself. pub fn set_static_handler(&self, mut handler: StaticHandler) { self.set_wait_handler(Box::new(move |child| -> Output { handler.execute(child.command()) @@ -644,65 +295,234 @@ impl Executor for HostExecutor { } } -pub const PFEXEC: &str = "/usr/bin/pfexec"; +/// A wrapper around a spawned [Child] process. +pub type BoxedChild = Box; + +/// A child process spawned by the executor. +pub trait Child: Send { + /// Accesses the stdin of the spawned child, as a Writer. + fn stdin(&mut self) -> Option>; + + /// Accesses the stdout of the spawned child, as a Reader. + fn stdout(&mut self) -> Option>; + + /// Accesses the stderr of the spawned child, as a Reader. + fn stderr(&mut self) -> Option>; + + /// OS-assigned PID identifier for the child + fn id(&self) -> u32; + + /// Waits for the child to complete, and returns the output. + fn wait(&mut self) -> Result; +} + +/// A real, host-controlled child process +pub struct SpawnedChild { + command_str: String, + child: Option, +} + +impl Child for SpawnedChild { + fn stdin(&mut self) -> Option> { + self.child + .as_mut()? + .stdin + .take() + .map(|s| Box::new(s) as Box) + } + + fn stdout(&mut self) -> Option> { + self.child + .as_mut()? + .stdout + .take() + .map(|s| Box::new(s) as Box) + } + + fn stderr(&mut self) -> Option> { + self.child + .as_mut()? + .stderr + .take() + .map(|s| Box::new(s) as Box) + } + + fn id(&self) -> u32 { + self.child.as_ref().expect("No child").id() + } + + fn wait(&mut self) -> Result { + let output = + self.child.take().unwrap().wait_with_output().map_err(|err| { + ExecutionError::ExecutionStart { + command: self.command_str.clone(), + err, + } + })?; + + if !output.status.success() { + return Err(output_to_exec_error( + self.command_str.clone(), + &output, + )); + } + + Ok(output) + } +} -#[derive(Debug)] -pub struct FailureInfo { - pub command: String, - pub status: std::process::ExitStatus, - pub stdout: String, - pub stderr: String, +/// A child spawned by a [FakeExecutor]. +pub struct FakeChild { + id: u64, + command: Command, + executor: Arc, + stdin: ByteQueue, + stdout: ByteQueue, + stderr: ByteQueue, } -impl std::fmt::Display for FailureInfo { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!( - f, - "Command [{}] executed and failed with status: {}", - self.command, self.status - )?; - write!(f, " stdout: {}", self.stdout)?; - write!(f, " stderr: {}", self.stderr) +impl FakeChild { + fn new( + id: u64, + command: &Command, + executor: Arc, + ) -> Box { + // std::process::Command -- somewhat reasonably - doesn't implement Copy + // or Clone. However, we'd like to be able to reference it in the + // FakeChild, independently of where it was spawned. + // + // Manually copy the relevant pieces of the incoming command. + let mut copy_command = Command::new(command.get_program()); + copy_command.args(command.get_args()); + copy_command.envs(command.get_envs().filter_map(|(k, v)| { + if let Some(v) = v { + Some((k, v)) + } else { + None + } + })); + + Box::new(FakeChild { + id, + command: copy_command, + executor, + stdin: ByteQueue::new(), + stdout: ByteQueue::new(), + stderr: ByteQueue::new(), + }) + } + + pub fn command(&self) -> &Command { + &self.command } } -#[derive(thiserror::Error, Debug)] -pub enum ExecutionError { - #[error("Failed to start execution of [{command}]: {err}")] - ExecutionStart { command: String, err: std::io::Error }, +impl Child for FakeChild { + fn stdin(&mut self) -> Option> { + Some(Box::new(self.stdin.clone())) + } + + fn stdout(&mut self) -> Option> { + Some(Box::new(self.stdout.clone())) + } - #[error("{0}")] - CommandFailure(Box), + fn stderr(&mut self) -> Option> { + Some(Box::new(self.stderr.clone())) + } - #[error("Failed to enter zone: {err}")] - ZoneEnter { err: std::io::Error }, + fn id(&self) -> u32 { + self.id.try_into().expect("u32 overflow") + } - #[error("Zone not running")] - NotRunning, + fn wait(&mut self) -> Result { + let executor = self.executor.clone(); + let output = executor.wait_handler.lock().unwrap()(self); + log_output(&self.executor.log, self.id, &output); + if !output.status.success() { + return Err(output_to_exec_error( + command_to_string(&self.command), + &output, + )); + } + Ok(output) + } } -pub fn output_to_exec_error( - command_str: String, - output: &std::process::Output, -) -> ExecutionError { - ExecutionError::CommandFailure(Box::new(FailureInfo { - command: command_str, - status: output.status, - stdout: String::from_utf8_lossy(&output.stdout).to_string(), - stderr: String::from_utf8_lossy(&output.stderr).to_string(), - })) +type DynamicHandler = Box Output + Send + Sync>; + +enum HandledCommand { + Static { input: Input, output: Output }, + Dynamic { handler: DynamicHandler }, } -// We wrap this method in an inner module to make it possible to mock -// these free functions. -fn to_space_separated_string(iter: T) -> String -where - T: IntoIterator, - I: std::fmt::Debug, -{ - Itertools::intersperse( - iter.into_iter().map(|arg| format!("{arg:?}")), - " ".into(), - ) - .collect::() +/// A handler that may be used for setting inputs/outputs to the executor +/// when these commands are known ahead-of-time. +/// +/// See: [FakeExecutor::set_static_handler] for usage. +pub struct StaticHandler { + expected: Vec, + index: usize, +} + +impl StaticHandler { + pub fn new() -> Self { + Self { expected: Vec::new(), index: 0 } + } + + /// Expects a static "input" to exactly produce some "output". + pub fn expect(&mut self, input: Input, output: Output) { + self.expected.push(HandledCommand::Static { input, output }); + } + + /// A helper for [Self::expect] which quietly succeeds. + pub fn expect_ok>(&mut self, input: S) { + self.expect(Input::shell(input), Output::success()) + } + + /// A helper for [Self::expect] which quietly fails. + pub fn expect_fail>(&mut self, input: S) { + self.expect(Input::shell(input), Output::failure()) + } + + /// Expects a dynamic handler to be invoked to dynamically + /// determine the output of this call. + pub fn expect_dynamic(&mut self, handler: DynamicHandler) { + self.expected.push(HandledCommand::Dynamic { handler }); + } + + fn execute(&mut self, command: &Command) -> Output { + let observed_input = Input::from(command); + let expected = &mut self + .expected + .get_mut(self.index) + .unwrap_or_else(|| panic!("Unexpected command: {observed_input}")); + self.index += 1; + + match expected { + HandledCommand::Static { input, output } => { + assert_eq!(&observed_input, input, "Unexpected input command"); + output.clone() + } + HandledCommand::Dynamic { ref mut handler } => { + handler(observed_input) + } + } + } +} + +impl Drop for StaticHandler { + fn drop(&mut self) { + let expected = self.expected.len(); + let actual = self.index; + if actual < expected { + let next = &self.expected[actual]; + let tip = match next { + HandledCommand::Static { input, .. } => input.to_string(), + HandledCommand::Dynamic { .. } => { + "".to_string() + } + }; + assert!(false, "Only saw {actual} calls, expected {expected}\nNext would have been: {tip}"); + } + } } diff --git a/illumos-utils/src/host/input.rs b/illumos-utils/src/host/input.rs new file mode 100644 index 0000000000..721dc1df23 --- /dev/null +++ b/illumos-utils/src/host/input.rs @@ -0,0 +1,66 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::process::Command; + +/// Wrapper around the input of a [std::process::Command] as strings. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Input { + pub program: String, + pub args: Vec, + pub envs: Vec<(String, String)>, +} + +impl Input { + pub fn new>(program: S, args: Vec) -> Self { + Self { + program: program.as_ref().to_string(), + args: args.into_iter().map(|s| s.as_ref().to_string()).collect(), + envs: vec![], + } + } + + /// Short-hand for a whitespace-separated string, which can be provided + /// "like a shell command". + pub fn shell>(input: S) -> Self { + let mut args = input.as_ref().split_whitespace(); + + Self::new( + args.next().expect("Needs at least a program"), + args.collect(), + ) + } +} + +impl std::fmt::Display for Input { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.program)?; + for arg in &self.args { + write!(f, " {}", arg)?; + } + Ok(()) + } +} + +fn os_str_to_string(s: &std::ffi::OsStr) -> String { + s.to_string_lossy().to_string() +} + +impl From<&Command> for Input { + fn from(command: &Command) -> Self { + Self { + program: os_str_to_string(command.get_program()), + args: command.get_args().map(os_str_to_string).collect(), + envs: command + .get_envs() + .map(|(k, v)| { + ( + os_str_to_string(k), + os_str_to_string(v.unwrap_or_default()), + ) + }) + .collect(), + } + } +} diff --git a/illumos-utils/src/host/mod.rs b/illumos-utils/src/host/mod.rs new file mode 100644 index 0000000000..87c0ae8e6c --- /dev/null +++ b/illumos-utils/src/host/mod.rs @@ -0,0 +1,20 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Utilities to either access or emulate a host system + +mod byte_queue; +mod error; +mod executor; +mod input; +mod output; + +pub const PFEXEC: &str = "/usr/bin/pfexec"; + +pub use error::{output_to_exec_error, ExecutionError}; +pub use executor::{ + command_to_string, BoxedExecutor, FakeExecutor, HostExecutor, StaticHandler, +}; +pub use input::Input; +pub use output::{Output, OutputExt}; diff --git a/illumos-utils/src/host/output.rs b/illumos-utils/src/host/output.rs new file mode 100644 index 0000000000..3417eb2e84 --- /dev/null +++ b/illumos-utils/src/host/output.rs @@ -0,0 +1,59 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::host::error::{ExecutionError, FailureInfo}; + +use std::os::unix::process::ExitStatusExt; +use std::process::ExitStatus; + +pub type Output = std::process::Output; + +/// Convenience functions for usage in tests, to perform common operations +/// with minimal boilerplate. +pub trait OutputExt: Sized { + fn success() -> Self; + fn failure() -> Self; + fn set_stdout>(self, stdout: S) -> Self; + fn set_stderr>(self, stderr: S) -> Self; +} + +impl OutputExt for Output { + fn success() -> Self { + Output { + status: ExitStatus::from_raw(0), + stdout: vec![], + stderr: vec![], + } + } + + fn failure() -> Self { + Output { + status: ExitStatus::from_raw(-1), + stdout: vec![], + stderr: vec![], + } + } + + fn set_stdout>(mut self, stdout: S) -> Self { + self.stdout = stdout.as_ref().as_bytes().to_vec(); + self + } + + fn set_stderr>(mut self, stderr: S) -> Self { + self.stderr = stderr.as_ref().as_bytes().to_vec(); + self + } +} + +pub fn output_to_exec_error( + command_str: String, + output: &std::process::Output, +) -> ExecutionError { + ExecutionError::CommandFailure(Box::new(FailureInfo { + command: command_str, + status: output.status, + stdout: String::from_utf8_lossy(&output.stdout).to_string(), + stderr: String::from_utf8_lossy(&output.stderr).to_string(), + })) +} diff --git a/illumos-utils/src/lib.rs b/illumos-utils/src/lib.rs index 1c2c1fb254..23cc379fef 100644 --- a/illumos-utils/src/lib.rs +++ b/illumos-utils/src/lib.rs @@ -11,10 +11,10 @@ pub mod dkio; pub mod dladm; pub mod dumpadm; pub mod fstyp; +pub mod host; pub mod libc; pub mod link; pub mod opte; -pub mod process; pub mod running_zone; pub mod svc; pub mod vmm_reservoir; diff --git a/illumos-utils/src/link.rs b/illumos-utils/src/link.rs index 15ed60663a..90f0dcc451 100644 --- a/illumos-utils/src/link.rs +++ b/illumos-utils/src/link.rs @@ -9,7 +9,7 @@ use crate::dladm::{ CreateVnicError, DeleteVnicError, Dladm, VnicSource, VNIC_PREFIX, VNIC_PREFIX_BOOTSTRAP, VNIC_PREFIX_CONTROL, VNIC_PREFIX_GUEST, }; -use crate::process::BoxedExecutor; +use crate::host::BoxedExecutor; use omicron_common::api::external::MacAddr; use std::sync::{ atomic::{AtomicU64, Ordering}, @@ -259,7 +259,7 @@ impl Deletable for VnicDestruction { mod test { use super::*; use crate::dladm::Etherstub; - use crate::process::FakeExecutor; + use crate::host::FakeExecutor; use omicron_test_utils::dev; #[tokio::test] diff --git a/illumos-utils/src/opte/port.rs b/illumos-utils/src/opte/port.rs index 0080c91b3f..7b6efb9f25 100644 --- a/illumos-utils/src/opte/port.rs +++ b/illumos-utils/src/opte/port.rs @@ -4,9 +4,9 @@ //! A single port on the OPTE virtual switch. +use crate::host::BoxedExecutor; use crate::opte::Gateway; use crate::opte::Vni; -use crate::process::BoxedExecutor; use debug_ignore::DebugIgnore; use macaddr::MacAddr6; use std::net::IpAddr; diff --git a/illumos-utils/src/opte/port_manager.rs b/illumos-utils/src/opte/port_manager.rs index 25db867f8b..190bf77e0a 100644 --- a/illumos-utils/src/opte/port_manager.rs +++ b/illumos-utils/src/opte/port_manager.rs @@ -4,6 +4,7 @@ //! Manager for all OPTE ports on a Helios system +use crate::host::BoxedExecutor; use crate::opte::default_boundary_services; use crate::opte::opte_firewall_rules; use crate::opte::params::SetVirtualNetworkInterfaceHost; @@ -12,7 +13,6 @@ use crate::opte::Error; use crate::opte::Gateway; use crate::opte::Port; use crate::opte::Vni; -use crate::process::BoxedExecutor; use debug_ignore::DebugIgnore; use ipnetwork::IpNetwork; use omicron_common::api::external; diff --git a/illumos-utils/src/running_zone.rs b/illumos-utils/src/running_zone.rs index c471ddf620..2cf3fa7bf8 100644 --- a/illumos-utils/src/running_zone.rs +++ b/illumos-utils/src/running_zone.rs @@ -6,9 +6,9 @@ use crate::addrobj::AddrObject; use crate::dladm::Etherstub; +use crate::host::{BoxedExecutor, ExecutionError}; use crate::link::{Link, VnicAllocator}; use crate::opte::{Port, PortTicket}; -use crate::process::{BoxedExecutor, ExecutionError}; use crate::svc::wait_for_service; use crate::zone::{AddressRequest, Zones, IPADM, ZLOGIN, ZONE_PREFIX}; use camino::{Utf8Path, Utf8PathBuf}; @@ -436,7 +436,7 @@ impl RunningZone { RunCommandError { zone: self.name().to_string(), err } })?); let tmpl = std::sync::Arc::clone(&template); - let mut command = std::process::Command::new(crate::process::PFEXEC); + let mut command = std::process::Command::new(crate::host::PFEXEC); command.env_clear(); unsafe { command.pre_exec(move || { @@ -474,7 +474,7 @@ impl RunningZone { { // NOTE: This implementation is useless, and will never work. However, // it must actually call `execute()` for the testing purposes. - let mut command = std::process::Command::new(crate::process::PFEXEC); + let mut command = std::process::Command::new(crate::host::PFEXEC); let command = command.arg(ZLOGIN).arg(self.name()).args(args); self.inner .executor diff --git a/illumos-utils/src/svc.rs b/illumos-utils/src/svc.rs index 2678befa5b..3e3e69bd58 100644 --- a/illumos-utils/src/svc.rs +++ b/illumos-utils/src/svc.rs @@ -4,7 +4,7 @@ //! Utilities for accessing services. -use crate::process::BoxedExecutor; +use crate::host::BoxedExecutor; use omicron_common::api::external::Error; use omicron_common::backoff; diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs index e92b43360b..f38582b16b 100644 --- a/illumos-utils/src/zfs.rs +++ b/illumos-utils/src/zfs.rs @@ -4,7 +4,7 @@ //! Utilities for poking at ZFS. -use crate::process::{BoxedExecutor, ExecutionError, PFEXEC}; +use crate::host::{BoxedExecutor, ExecutionError, PFEXEC}; use camino::Utf8PathBuf; use omicron_common::disk::DiskIdentity; use std::fmt; diff --git a/illumos-utils/src/zone.rs b/illumos-utils/src/zone.rs index 8f3d1b135f..dc6c1e7539 100644 --- a/illumos-utils/src/zone.rs +++ b/illumos-utils/src/zone.rs @@ -14,7 +14,7 @@ use std::net::{IpAddr, Ipv6Addr}; use crate::addrobj::AddrObject; use crate::dladm::{EtherstubVnic, VNIC_PREFIX_BOOTSTRAP, VNIC_PREFIX_CONTROL}; -use crate::process::{BoxedExecutor, ExecutionError, PFEXEC}; +use crate::host::{BoxedExecutor, ExecutionError, PFEXEC}; use omicron_common::address::SLED_PREFIX; const DLADM: &str = "/usr/sbin/dladm"; @@ -895,7 +895,7 @@ impl Zones { #[cfg(test)] mod tests { use super::*; - use crate::process::{FakeExecutor, Input, OutputExt, StaticHandler}; + use crate::host::{FakeExecutor, Input, OutputExt, StaticHandler}; use omicron_test_utils::dev; use std::process::Output; diff --git a/illumos-utils/src/zpool.rs b/illumos-utils/src/zpool.rs index af74708164..040c849b1e 100644 --- a/illumos-utils/src/zpool.rs +++ b/illumos-utils/src/zpool.rs @@ -4,7 +4,7 @@ //! Utilities for managing Zpools. -use crate::process::{BoxedExecutor, ExecutionError, PFEXEC}; +use crate::host::{BoxedExecutor, ExecutionError, PFEXEC}; use camino::{Utf8Path, Utf8PathBuf}; use schemars::JsonSchema; use serde::{Deserialize, Deserializer, Serialize, Serializer}; diff --git a/installinator/src/bootstrap.rs b/installinator/src/bootstrap.rs index 0f0ae3d3aa..f91e3216d8 100644 --- a/installinator/src/bootstrap.rs +++ b/installinator/src/bootstrap.rs @@ -13,7 +13,7 @@ use ddm_admin_client::Client as DdmAdminClient; use illumos_utils::addrobj::AddrObject; use illumos_utils::dladm; use illumos_utils::dladm::Dladm; -use illumos_utils::process::BoxedExecutor; +use illumos_utils::host::BoxedExecutor; use illumos_utils::zone::Zones; use omicron_common::address::Ipv6Subnet; use sled_hardware::underlay; diff --git a/installinator/src/dispatch.rs b/installinator/src/dispatch.rs index d8140a7910..f2213ae7dd 100644 --- a/installinator/src/dispatch.rs +++ b/installinator/src/dispatch.rs @@ -8,7 +8,7 @@ use anyhow::{bail, Context, Result}; use buf_list::{BufList, Cursor}; use camino::{Utf8Path, Utf8PathBuf}; use clap::{Args, Parser, Subcommand}; -use illumos_utils::process::BoxedExecutor; +use illumos_utils::host::BoxedExecutor; use installinator_common::{ InstallinatorCompletionMetadata, InstallinatorComponent, InstallinatorSpec, InstallinatorStepId, StepContext, StepHandle, StepProgress, StepSuccess, diff --git a/installinator/src/hardware.rs b/installinator/src/hardware.rs index a35871b624..5b467d5cae 100644 --- a/installinator/src/hardware.rs +++ b/installinator/src/hardware.rs @@ -6,7 +6,7 @@ use anyhow::anyhow; use anyhow::ensure; use anyhow::Context; use anyhow::Result; -use illumos_utils::process::BoxedExecutor; +use illumos_utils::host::BoxedExecutor; use sled_hardware::Disk; use sled_hardware::DiskVariant; use sled_hardware::HardwareManager; diff --git a/installinator/src/main.rs b/installinator/src/main.rs index 601649470f..bd4b4202b2 100644 --- a/installinator/src/main.rs +++ b/installinator/src/main.rs @@ -5,7 +5,7 @@ use std::error::Error; use clap::Parser; -use illumos_utils::process::HostExecutor; +use illumos_utils::host::HostExecutor; use installinator::InstallinatorApp; #[tokio::main] diff --git a/installinator/src/write.rs b/installinator/src/write.rs index 38fa6c29d9..5cc0566d7a 100644 --- a/installinator/src/write.rs +++ b/installinator/src/write.rs @@ -17,7 +17,7 @@ use bytes::Buf; use camino::{Utf8Path, Utf8PathBuf}; use illumos_utils::{ dkio::{self, MediaInfoExtended}, - process::BoxedExecutor, + host::BoxedExecutor, zpool::{Zpool, ZpoolName}, }; use installinator_common::{ @@ -1160,8 +1160,8 @@ mod tests { let engine = UpdateEngine::new(&logctx.log, event_sender); let log = logctx.log.clone(); - let executor = illumos_utils::process::FakeExecutor::new(log.clone()) - .as_executor(); + let executor = + illumos_utils::host::FakeExecutor::new(log.clone()).as_executor(); engine .new_step( InstallinatorComponent::Both, diff --git a/package/src/bin/omicron-package.rs b/package/src/bin/omicron-package.rs index 857db6afe5..4c27282736 100644 --- a/package/src/bin/omicron-package.rs +++ b/package/src/bin/omicron-package.rs @@ -7,7 +7,7 @@ use anyhow::{anyhow, bail, Context, Result}; use clap::{Parser, Subcommand}; use futures::stream::{self, StreamExt, TryStreamExt}; -use illumos_utils::process::{BoxedExecutor, HostExecutor}; +use illumos_utils::host::{BoxedExecutor, HostExecutor}; use illumos_utils::{zfs, zone}; use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; use omicron_package::target::KnownTarget; diff --git a/sled-agent/src/bin/sled-agent.rs b/sled-agent/src/bin/sled-agent.rs index 24c374965b..6562b0c7d2 100644 --- a/sled-agent/src/bin/sled-agent.rs +++ b/sled-agent/src/bin/sled-agent.rs @@ -6,7 +6,7 @@ use camino::Utf8PathBuf; use clap::{Parser, Subcommand}; -use illumos_utils::process::HostExecutor; +use illumos_utils::host::HostExecutor; use omicron_common::cmd::fatal; use omicron_common::cmd::CmdError; use omicron_sled_agent::bootstrap::RssAccessError; diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index f98e5afb68..60d8c50c16 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -27,7 +27,7 @@ use ddm_admin_client::{Client as DdmAdminClient, DdmError}; use futures::stream::{self, StreamExt}; use illumos_utils::addrobj::AddrObject; use illumos_utils::dladm::{Dladm, Etherstub, EtherstubVnic, GetMacError}; -use illumos_utils::process::{BoxedExecutor, PFEXEC}; +use illumos_utils::host::{BoxedExecutor, PFEXEC}; use illumos_utils::zfs::{ self, Mountpoint, Zfs, ZONE_ZFS_RAMDISK_DATASET, ZONE_ZFS_RAMDISK_DATASET_MOUNTPOINT, @@ -63,7 +63,7 @@ pub enum BootstrapError { Cleanup(anyhow::Error), #[error("Failed to enable routing: {0}")] - EnablingRouting(illumos_utils::process::ExecutionError), + EnablingRouting(illumos_utils::host::ExecutionError), #[error("Error contacting ddmd: {0}")] DdmError(#[from] DdmError), diff --git a/sled-agent/src/bootstrap/hardware.rs b/sled-agent/src/bootstrap/hardware.rs index afe3674f30..22f037efc0 100644 --- a/sled-agent/src/bootstrap/hardware.rs +++ b/sled-agent/src/bootstrap/hardware.rs @@ -8,7 +8,7 @@ use crate::config::{Config as SledConfig, SledMode as SledModeConfig}; use crate::services::ServiceManager; use crate::storage_manager::{StorageManager, StorageResources}; use illumos_utils::dladm::{Etherstub, EtherstubVnic}; -use illumos_utils::process::BoxedExecutor; +use illumos_utils::host::BoxedExecutor; use key_manager::StorageKeyRequester; use sled_hardware::{Baseboard, DendriteAsic, HardwareManager, SledMode}; use slog::Logger; diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index 147dfb11c0..448953fa58 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -10,7 +10,7 @@ use crate::bootstrap::http_entrypoints::api as http_api; use crate::bootstrap::maghemite; use crate::bootstrap::sprockets_server::SprocketsServer; use crate::config::Config as SledConfig; -use illumos_utils::process::BoxedExecutor; +use illumos_utils::host::BoxedExecutor; use omicron_common::FileKv; use sled_hardware::underlay; use slog::Drain; diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index 5d00e68bdf..c328c37d33 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -11,7 +11,7 @@ use illumos_utils::dladm::Dladm; use illumos_utils::dladm::FindPhysicalLinkError; use illumos_utils::dladm::PhysicalLink; use illumos_utils::dladm::CHELSIO_LINK_PREFIX; -use illumos_utils::process::BoxedExecutor; +use illumos_utils::host::BoxedExecutor; use illumos_utils::zpool::ZpoolName; use omicron_common::vlan::VlanID; use serde::Deserialize; diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 3d11447e42..a9fb59a312 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -20,9 +20,9 @@ use anyhow::anyhow; use backoff::BackoffError; use futures::lock::{Mutex, MutexGuard}; use illumos_utils::dladm::Etherstub; +use illumos_utils::host::BoxedExecutor; use illumos_utils::link::VnicAllocator; use illumos_utils::opte::PortManager; -use illumos_utils::process::BoxedExecutor; use illumos_utils::running_zone::{InstalledZone, RunningZone}; use illumos_utils::svc::wait_for_service; use illumos_utils::zone::Zones; diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index e67dac9836..1c7e9a4417 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -15,9 +15,9 @@ use crate::params::{ use crate::storage_manager::StorageResources; use crate::zone_bundle::BundleError; use illumos_utils::dladm::Etherstub; +use illumos_utils::host::BoxedExecutor; use illumos_utils::link::VnicAllocator; use illumos_utils::opte::PortManager; -use illumos_utils::process::BoxedExecutor; use illumos_utils::vmm_reservoir; use omicron_common::api::external::ByteCount; use omicron_common::api::internal::nexus::InstanceRuntimeState; diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index 595232948c..be3a4cb667 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -12,7 +12,7 @@ use crate::nexus::NexusClientWithResolver; use crate::services::ServiceManager; use crate::storage_manager::StorageManager; use bootstore::schemes::v0 as bootstore; -use illumos_utils::process::BoxedExecutor; +use illumos_utils::host::BoxedExecutor; use internal_dns::resolver::Resolver; use slog::Logger; use std::net::SocketAddr; diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index b81e6b2958..0fa9a446d1 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -50,9 +50,9 @@ use illumos_utils::addrobj::IPV6_LINK_LOCAL_NAME; use illumos_utils::dladm::{ Dladm, Etherstub, EtherstubVnic, GetSimnetError, PhysicalLink, }; +use illumos_utils::host::{BoxedExecutor, PFEXEC}; use illumos_utils::link::{Link, VnicAllocator}; use illumos_utils::opte::{Port, PortManager, PortTicket}; -use illumos_utils::process::{BoxedExecutor, PFEXEC}; use illumos_utils::running_zone::{ InstalledZone, RunCommandError, RunningZone, }; @@ -203,7 +203,7 @@ pub enum Error { NtpZoneNotReady, #[error("Execution error: {0}")] - ExecutionError(#[from] illumos_utils::process::ExecutionError), + ExecutionError(#[from] illumos_utils::host::ExecutionError), #[error("Error resolving DNS name: {0}")] ResolveError(#[from] internal_dns::resolver::ResolveError), @@ -2873,7 +2873,7 @@ mod test { Etherstub, BOOTSTRAP_ETHERSTUB_NAME, UNDERLAY_ETHERSTUB_NAME, UNDERLAY_ETHERSTUB_VNIC_NAME, }, - process::{FakeExecutor, Input, Output, OutputExt, StaticHandler}, + host::{FakeExecutor, Input, Output, OutputExt, StaticHandler}, zone::{ZLOGIN, ZONEADM, ZONECFG}, }; use key_manager::{ diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 8ff4bdc225..a0188e8b0f 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -26,9 +26,9 @@ use bootstore::schemes::v0 as bootstore; use camino::Utf8PathBuf; use dropshot::HttpError; use illumos_utils::dladm::Dladm; +use illumos_utils::host::BoxedExecutor; use illumos_utils::opte::params::SetVirtualNetworkInterfaceHost; use illumos_utils::opte::PortManager; -use illumos_utils::process::BoxedExecutor; use illumos_utils::zone::Zones; use illumos_utils::zone::PROPOLIS_ZONE_PREFIX; use illumos_utils::zone::ZONE_PREFIX; @@ -62,7 +62,7 @@ pub enum Error { SwapDevice(#[from] crate::swap_device::SwapDeviceError), #[error("Failed to acquire etherstub: {0}")] - Etherstub(illumos_utils::process::ExecutionError), + Etherstub(illumos_utils::host::ExecutionError), #[error("Failed to acquire etherstub VNIC: {0}")] EtherstubVnic(illumos_utils::dladm::CreateVnicError), @@ -71,7 +71,7 @@ pub enum Error { Bootstrap(#[from] crate::bootstrap::agent::BootstrapError), #[error("Failed to remove Omicron address: {0}")] - DeleteAddress(#[from] illumos_utils::process::ExecutionError), + DeleteAddress(#[from] illumos_utils::host::ExecutionError), #[error("Failed to operate on underlay device: {0}")] Underlay(#[from] underlay::Error), diff --git a/sled-agent/src/storage/dump_setup.rs b/sled-agent/src/storage/dump_setup.rs index 4565e18510..f6c3cd3909 100644 --- a/sled-agent/src/storage/dump_setup.rs +++ b/sled-agent/src/storage/dump_setup.rs @@ -2,7 +2,7 @@ use crate::storage_manager::DiskWrapper; use camino::Utf8PathBuf; use derive_more::{AsRef, Deref, From}; use illumos_utils::dumpadm::DumpAdmError; -use illumos_utils::process::{BoxedExecutor, ExecutionError}; +use illumos_utils::host::{BoxedExecutor, ExecutionError}; use illumos_utils::zone::{AdmError, Zones}; use illumos_utils::zpool::{ZpoolHealth, ZpoolName}; use omicron_common::disk::DiskIdentity; diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index e1975ca989..94003b64a0 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -15,7 +15,7 @@ use futures::stream::FuturesOrdered; use futures::FutureExt; use futures::StreamExt; use illumos_utils::dumpadm::DumpHdrError; -use illumos_utils::process::BoxedExecutor; +use illumos_utils::host::BoxedExecutor; use illumos_utils::zfs::{Mountpoint, Zfs}; use illumos_utils::zpool::{Zpool, ZpoolInfo, ZpoolKind, ZpoolName}; use key_manager::StorageKeyRequester; diff --git a/sled-agent/src/swap_device.rs b/sled-agent/src/swap_device.rs index 2d7958003f..fab171b989 100644 --- a/sled-agent/src/swap_device.rs +++ b/sled-agent/src/swap_device.rs @@ -4,7 +4,7 @@ //! Operations for creating a system swap device. -use illumos_utils::process::{ +use illumos_utils::host::{ command_to_string, output_to_exec_error, BoxedExecutor, ExecutionError, }; use std::io::Read; diff --git a/sled-hardware/src/cleanup.rs b/sled-hardware/src/cleanup.rs index 1a1b3c97d9..dee9be8ea9 100644 --- a/sled-hardware/src/cleanup.rs +++ b/sled-hardware/src/cleanup.rs @@ -11,9 +11,9 @@ use illumos_utils::dladm::BOOTSTRAP_ETHERSTUB_NAME; use illumos_utils::dladm::BOOTSTRAP_ETHERSTUB_VNIC_NAME; use illumos_utils::dladm::UNDERLAY_ETHERSTUB_NAME; use illumos_utils::dladm::UNDERLAY_ETHERSTUB_VNIC_NAME; +use illumos_utils::host::{BoxedExecutor, ExecutionError, PFEXEC}; use illumos_utils::link::LinkKind; use illumos_utils::opte; -use illumos_utils::process::{BoxedExecutor, ExecutionError, PFEXEC}; use illumos_utils::zone::IPADM; use slog::warn; use slog::Logger; diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs index 6e865de866..cfdd5596c1 100644 --- a/sled-hardware/src/disk.rs +++ b/sled-hardware/src/disk.rs @@ -4,7 +4,7 @@ use camino::{Utf8Path, Utf8PathBuf}; use illumos_utils::fstyp::Fstyp; -use illumos_utils::process::BoxedExecutor; +use illumos_utils::host::BoxedExecutor; use illumos_utils::zfs; use illumos_utils::zfs::DestroyDatasetErrorVariant; use illumos_utils::zfs::EncryptionDetails; diff --git a/sled-hardware/src/illumos/partitions.rs b/sled-hardware/src/illumos/partitions.rs index 023cc8e243..9a8e1b5049 100644 --- a/sled-hardware/src/illumos/partitions.rs +++ b/sled-hardware/src/illumos/partitions.rs @@ -7,7 +7,7 @@ use crate::illumos::gpt; use crate::{DiskError, DiskPaths, DiskVariant, Partition}; use camino::Utf8Path; -use illumos_utils::process::BoxedExecutor; +use illumos_utils::host::BoxedExecutor; use illumos_utils::zpool::{Zpool, ZpoolName}; use slog::info; use slog::Logger; @@ -158,7 +158,7 @@ mod test { use super::*; use crate::DiskPaths; use camino::Utf8PathBuf; - use illumos_utils::process::{FakeExecutor, Input, OutputExt, PFEXEC}; + use illumos_utils::host::{FakeExecutor, Input, OutputExt, PFEXEC}; use illumos_utils::zpool::{ZpoolKind, ZPOOL}; use omicron_test_utils::dev::test_setup_log; use std::path::Path; diff --git a/sled-hardware/src/non_illumos/mod.rs b/sled-hardware/src/non_illumos/mod.rs index d6de97e3f0..5c14692111 100644 --- a/sled-hardware/src/non_illumos/mod.rs +++ b/sled-hardware/src/non_illumos/mod.rs @@ -4,7 +4,7 @@ use crate::disk::{DiskError, DiskPaths, DiskVariant, Partition, UnparsedDisk}; use crate::{Baseboard, SledMode}; -use illumos_utils::process::BoxedExecutor; +use illumos_utils::host::BoxedExecutor; use slog::Logger; use std::collections::HashSet; use tokio::sync::broadcast; diff --git a/sled-hardware/src/underlay.rs b/sled-hardware/src/underlay.rs index 2c31fcff26..fd99418649 100644 --- a/sled-hardware/src/underlay.rs +++ b/sled-hardware/src/underlay.rs @@ -14,7 +14,7 @@ use illumos_utils::dladm::GetLinkpropError; use illumos_utils::dladm::PhysicalLink; use illumos_utils::dladm::SetLinkpropError; use illumos_utils::dladm::CHELSIO_LINK_PREFIX; -use illumos_utils::process::BoxedExecutor; +use illumos_utils::host::BoxedExecutor; use illumos_utils::zone::Zones; use omicron_common::api::external::MacAddr; use std::net::Ipv6Addr; @@ -30,7 +30,7 @@ pub enum Error { #[error( "Failed to create an IPv6 link-local address for underlay devices: {0}" )] - UnderlayDeviceAddress(#[from] illumos_utils::process::ExecutionError), + UnderlayDeviceAddress(#[from] illumos_utils::host::ExecutionError), #[error(transparent)] BadAddrObj(#[from] addrobj::ParseError), diff --git a/wicketd/tests/integration_tests/updates.rs b/wicketd/tests/integration_tests/updates.rs index bc35273b88..5df0b7f74d 100644 --- a/wicketd/tests/integration_tests/updates.rs +++ b/wicketd/tests/integration_tests/updates.rs @@ -11,7 +11,7 @@ use camino_tempfile::Utf8TempDir; use clap::Parser; use gateway_messages::SpPort; use gateway_test_utils::setup as gateway_setup; -use illumos_utils::process::FakeExecutor; +use illumos_utils::host::FakeExecutor; use installinator::HOST_PHASE_2_FILE_NAME; use omicron_common::{ api::internal::nexus::KnownArtifactKind, From 544aaeffca0d11716ddceaa78cbc1db332d66a59 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 14 Aug 2023 17:40:24 -0700 Subject: [PATCH 36/57] Tests passing on illumos --- illumos-utils/src/host/executor.rs | 15 ++++++++++----- illumos-utils/src/running_zone.rs | 5 +++-- sled-agent/src/services.rs | 22 +++++++++++++++++++--- 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/illumos-utils/src/host/executor.rs b/illumos-utils/src/host/executor.rs index d34a22a668..4d232eab31 100644 --- a/illumos-utils/src/host/executor.rs +++ b/illumos-utils/src/host/executor.rs @@ -36,7 +36,7 @@ where .collect::() } -fn log_command(log: &Logger, id: u64, command: &Command) { +fn log_input(log: &Logger, id: u64, command: &Command) { info!(log, "{id} - Running Command: [{}]", Input::from(command),); debug!( log, @@ -158,7 +158,7 @@ impl FakeExecutor { command: &Command, ) -> Result { let id = self.inner.counter.fetch_add(1, Ordering::SeqCst); - log_command(&self.inner.log, id, command); + log_input(&self.inner.log, id, command); let mut child = FakeChild::new(id, command, self.inner.clone()); @@ -202,7 +202,7 @@ impl Executor for FakeExecutor { command: &mut Command, ) -> Result { let id = self.inner.counter.fetch_add(1, Ordering::SeqCst); - log_command(&self.inner.log, id, command); + log_input(&self.inner.log, id, command); Ok(FakeChild::new(id, command, self.inner.clone())) } @@ -224,7 +224,7 @@ impl HostExecutor { fn prepare(&self, command: &Command) -> u64 { let id = self.counter.fetch_add(1, Ordering::SeqCst); - log_command(&self.log, id, command); + log_input(&self.log, id, command); id } @@ -522,7 +522,12 @@ impl Drop for StaticHandler { "".to_string() } }; - assert!(false, "Only saw {actual} calls, expected {expected}\nNext would have been: {tip}"); + let errmsg = format!("Only saw {actual} calls, expected {expected}\nNext would have been: {tip}"); + if !std::thread::panicking() { + assert!(false, "Only saw {actual} calls, expected {expected}\nNext would have been: {tip}"); + } else { + eprintln!("{errmsg}"); + } } } } diff --git a/illumos-utils/src/running_zone.rs b/illumos-utils/src/running_zone.rs index 2cf3fa7bf8..b25155b248 100644 --- a/illumos-utils/src/running_zone.rs +++ b/illumos-utils/src/running_zone.rs @@ -10,7 +10,7 @@ use crate::host::{BoxedExecutor, ExecutionError}; use crate::link::{Link, VnicAllocator}; use crate::opte::{Port, PortTicket}; use crate::svc::wait_for_service; -use crate::zone::{AddressRequest, Zones, IPADM, ZLOGIN, ZONE_PREFIX}; +use crate::zone::{AddressRequest, Zones, IPADM, ZONE_PREFIX}; use camino::{Utf8Path, Utf8PathBuf}; use ipnetwork::IpNetwork; use omicron_common::backoff; @@ -475,7 +475,8 @@ impl RunningZone { // NOTE: This implementation is useless, and will never work. However, // it must actually call `execute()` for the testing purposes. let mut command = std::process::Command::new(crate::host::PFEXEC); - let command = command.arg(ZLOGIN).arg(self.name()).args(args); + let command = + command.arg(crate::zone::ZLOGIN).arg(self.name()).args(args); self.inner .executor .execute(command) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 0fa9a446d1..4690e77610 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -1047,13 +1047,15 @@ impl ServiceManager { .map(|d| zone::Device { name: d.to_string() }) .collect(); - // Look for the image in the ramdisk first - let mut zone_image_paths = vec![Utf8PathBuf::from("/opt/oxide")]; + let mut zone_image_paths = vec![]; // Inject an image path if requested by a test. if let Some(path) = self.inner.image_directory_override.get() { zone_image_paths.push(path.clone()); }; + // Look for the image in the ramdisk next. + zone_image_paths.push(Utf8PathBuf::from("/opt/oxide")); + // If the boot disk exists, look for the image in the "install" dataset // there too. if let Some((_, boot_zpool)) = self.inner.storage.boot_disk().await { @@ -2958,7 +2960,21 @@ mod test { ) ); - let login = format!("{PFEXEC} {ZLOGIN} {zone_name}"); + // Refer to illumos-utils/src/running_zone.rs for the difference here. + // + // On illumos, we tend to avoid using zlogin, and instead use + // thread-level contracts with zenter::zone_enter to run commands within + // the context of zones. + // + // On non-illumos systems, we just pretend to zlogin, since the + // interface for doing so is simpler than the host API to access + // zenter. + let login = if cfg!(target_os = "illumos") { + format!("{PFEXEC} ") + } else { + format!("{PFEXEC} {ZLOGIN} {zone_name} ") + }; + handler.expect( Input::shell(format!( "{login} /usr/sbin/ipadm create-if -t oxControlService0" From a57d4dbde33c525b859dd8e671db1308cc1a4581 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 15 Aug 2023 10:35:21 -0700 Subject: [PATCH 37/57] Flip disposition of registration --- illumos-utils/src/dladm.rs | 10 +++++----- illumos-utils/src/host/executor.rs | 15 +++++++-------- illumos-utils/src/zone.rs | 4 ++-- sled-agent/src/services.rs | 10 +++++----- 4 files changed, 19 insertions(+), 20 deletions(-) diff --git a/illumos-utils/src/dladm.rs b/illumos-utils/src/dladm.rs index b27d349e1a..bc5d426d73 100644 --- a/illumos-utils/src/dladm.rs +++ b/illumos-utils/src/dladm.rs @@ -574,7 +574,7 @@ mod test { .expect_ok(format!("{PFEXEC} {DLADM} create-etherstub -t mystub1")); let executor = FakeExecutor::new(logctx.log.clone()); - executor.set_static_handler(handler); + handler.register(&executor); let etherstub = Dladm::ensure_etherstub(&executor.as_executor(), "mystub1") @@ -591,7 +591,7 @@ mod test { let mut handler = StaticHandler::new(); handler.expect_ok(format!("{PFEXEC} {DLADM} show-etherstub mystub1")); let executor = FakeExecutor::new(logctx.log.clone()); - executor.set_static_handler(handler); + handler.register(&executor); let etherstub = Dladm::ensure_etherstub(&executor.as_executor(), "mystub1") @@ -613,7 +613,7 @@ mod test { "{PFEXEC} {DLADM} show-vnic {UNDERLAY_ETHERSTUB_VNIC_NAME}" )); let executor = FakeExecutor::new(logctx.log.clone()); - executor.set_static_handler(handler); + handler.register(&executor); let executor = &executor.as_executor(); let etherstub = @@ -645,7 +645,7 @@ mod test { {UNDERLAY_ETHERSTUB_VNIC_NAME}" )); let executor = FakeExecutor::new(logctx.log.clone()); - executor.set_static_handler(handler); + handler.register(&executor); let executor = &executor.as_executor(); let etherstub = @@ -669,7 +669,7 @@ mod test { ), ); let executor = FakeExecutor::new(logctx.log.clone()); - executor.set_static_handler(handler); + handler.register(&executor); let executor = &executor.as_executor(); let vnics = Dladm::get_vnics(executor).expect("Failed to get VNICs"); diff --git a/illumos-utils/src/host/executor.rs b/illumos-utils/src/host/executor.rs index 4d232eab31..05241a8181 100644 --- a/illumos-utils/src/host/executor.rs +++ b/illumos-utils/src/host/executor.rs @@ -140,14 +140,6 @@ impl FakeExecutor { *self.inner.wait_handler.lock().unwrap() = f; } - /// Set the request handler to a static set of inputs and outputs. - // TODO: Remove me, add a method to the StatiHandler itself. - pub fn set_static_handler(&self, mut handler: StaticHandler) { - self.set_wait_handler(Box::new(move |child| -> Output { - handler.execute(child.command()) - })); - } - /// Perform some type coercion to access a commonly-used trait object. pub fn as_executor(self: Arc) -> BoxedExecutor { self @@ -469,6 +461,13 @@ impl StaticHandler { Self { expected: Vec::new(), index: 0 } } + /// Convenience function to register the handler with a [FakeExecutor]. + pub fn register(mut self, executor: &FakeExecutor) { + executor.set_wait_handler(Box::new(move |child| -> Output { + self.execute(child.command()) + })); + } + /// Expects a static "input" to exactly produce some "output". pub fn expect(&mut self, input: Input, output: Output) { self.expected.push(HandledCommand::Static { input, output }); diff --git a/illumos-utils/src/zone.rs b/illumos-utils/src/zone.rs index dc6c1e7539..506bdcd676 100644 --- a/illumos-utils/src/zone.rs +++ b/illumos-utils/src/zone.rs @@ -939,7 +939,7 @@ mod tests { ); let executor = FakeExecutor::new(logctx.log.clone()); - executor.set_static_handler(handler); + handler.register(&executor); let datasets = []; let filesystems = []; @@ -983,7 +983,7 @@ mod tests { ); let executor = FakeExecutor::new(logctx.log.clone()); - executor.set_static_handler(handler); + handler.register(&executor); let datasets = []; let filesystems = []; diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 4690e77610..a3bdd99dfb 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -3142,7 +3142,7 @@ mod test { let id = Uuid::new_v4(); let mut handler = StaticHandler::new(); expect_new_service(&mut handler, &test_config, id, &u2_mountpoint); - executor.set_static_handler(handler); + handler.register(&executor); let executor = executor.as_executor(); let mgr = ServiceManager::new( @@ -3200,7 +3200,7 @@ mod test { let id = Uuid::new_v4(); let mut handler = StaticHandler::new(); expect_new_service(&mut handler, &test_config, id, &u2_mountpoint); - executor.set_static_handler(handler); + handler.register(&executor); let executor = executor.as_executor(); let mgr = ServiceManager::new( @@ -3259,7 +3259,7 @@ mod test { let id = Uuid::new_v4(); let mut handler = StaticHandler::new(); expect_new_service(&mut handler, &test_config, id, &u2_mountpoint); - executor.set_static_handler(handler); + handler.register(&executor); let executor = executor.as_executor(); // First, spin up a ServiceManager, create a new service, and tear it @@ -3319,7 +3319,7 @@ mod test { Output::success() })); expect_new_service(&mut handler, &test_config, id, &u2_mountpoint); - executor.set_static_handler(handler); + handler.register(&executor); let executor = executor.as_executor(); let mgr = ServiceManager::new( @@ -3377,7 +3377,7 @@ mod test { let id = Uuid::new_v4(); let mut handler = StaticHandler::new(); expect_new_service(&mut handler, &test_config, id, &u2_mountpoint); - executor.set_static_handler(handler); + handler.register(&executor); let executor = executor.as_executor(); // First, spin up a ServiceManager, create a new service, and tear it From 27378dde96d4e7866591d9df52891faee7e4cc97 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 15 Aug 2023 13:49:53 -0700 Subject: [PATCH 38/57] cleanup --- illumos-utils/src/host/executor.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/illumos-utils/src/host/executor.rs b/illumos-utils/src/host/executor.rs index e612742c2e..2b24e68bdc 100644 --- a/illumos-utils/src/host/executor.rs +++ b/illumos-utils/src/host/executor.rs @@ -523,7 +523,7 @@ impl Drop for StaticHandler { }; let errmsg = format!("Only saw {actual} calls, expected {expected}\nNext would have been: {tip}"); if !std::thread::panicking() { - assert!(false, "Only saw {actual} calls, expected {expected}\nNext would have been: {tip}"); + assert!(false, "{errmsg}"); } else { eprintln!("{errmsg}"); } From e6ee96f30081fd5d69f8600aafb46ee17631d3e1 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 17 Aug 2023 17:33:45 -0700 Subject: [PATCH 39/57] Less boxy errors --- illumos-utils/src/zone.rs | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/illumos-utils/src/zone.rs b/illumos-utils/src/zone.rs index f1876d1369..2560af4ea6 100644 --- a/illumos-utils/src/zone.rs +++ b/illumos-utils/src/zone.rs @@ -53,6 +53,13 @@ pub enum Operation { Uninstall, } +#[derive(thiserror::Error, Debug)] +#[error("{0}")] +enum AdmErrorVariant { + Execution(#[from] ExecutionError), + Adm(#[from] zone::ZoneError), +} + /// Errors from issuing [`zone::Adm`] commands. #[derive(thiserror::Error, Debug)] #[error("Failed to execute zoneadm command '{op:?}' for zone '{zone}': {err}")] @@ -60,7 +67,7 @@ pub struct AdmError { op: Operation, zone: String, #[source] - err: Box, + err: AdmErrorVariant, } /// Errors which may be encountered when deleting addresses. @@ -233,7 +240,7 @@ impl Zones { AdmError { op: Operation::Halt, zone: name.to_string(), - err: Box::new(err), + err: err.into(), } })?; } @@ -244,7 +251,7 @@ impl Zones { .map_err(|err| AdmError { op: Operation::Uninstall, zone: name.to_string(), - err: Box::new(err), + err: err.into(), })?; } zone::Config::new(name) @@ -254,7 +261,7 @@ impl Zones { .map_err(|err| AdmError { op: Operation::Delete, zone: name.to_string(), - err: Box::new(err), + err: err.into(), })?; Ok(Some(state)) } @@ -353,7 +360,7 @@ impl Zones { executor.execute_async(&mut cmd).await.map_err(|err| AdmError { op: Operation::Configure, zone: zone_name.to_string(), - err: Box::new(err), + err: err.into(), })?; info!(log, "Installing Omicron zone: {}", zone_name); @@ -367,7 +374,7 @@ impl Zones { executor.execute_async(&mut cmd).await.map_err(|err| AdmError { op: Operation::Install, zone: zone_name.to_string(), - err: Box::new(err), + err: err.into(), })?; Ok(()) } @@ -383,7 +390,7 @@ impl Zones { executor.execute_async(&mut cmd).await.map_err(|err| AdmError { op: Operation::Boot, zone: name.to_string(), - err: Box::new(err), + err: err.into(), })?; Ok(()) } @@ -404,10 +411,10 @@ impl Zones { let output = executor .execute_async(&mut cmd) .await - .map_err(|err| handle_err(Box::new(err)))?; + .map_err(|err| handle_err(err.into()))?; let zones = zone::Adm::parse_list_output(&output) - .map_err(|err| handle_err(Box::new(err)))?; + .map_err(|err| handle_err(err.into()))?; Ok(zones .into_iter() From 2954fd273c07add95dd101bfeb8ea24129ac8576 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 17 Aug 2023 17:40:36 -0700 Subject: [PATCH 40/57] One less clone --- installinator/src/write.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/installinator/src/write.rs b/installinator/src/write.rs index 5cc0566d7a..90cf0bc489 100644 --- a/installinator/src/write.rs +++ b/installinator/src/write.rs @@ -274,7 +274,7 @@ impl<'a> ArtifactWriter<'a> { // want each drive to track success and failure independently. let write_cx = SlotWriteContext { log: log.clone(), - executor: executor.clone(), + executor, artifacts: self.artifacts, slot: *drive, destinations, @@ -357,7 +357,7 @@ impl<'a> ArtifactWriter<'a> { struct SlotWriteContext<'a> { log: Logger, - executor: BoxedExecutor, + executor: &'a BoxedExecutor, artifacts: ArtifactsToWrite<'a>, slot: M2Slot, destinations: &'a ArtifactDestination, @@ -570,7 +570,7 @@ impl ArtifactsToWrite<'_> { // own step. let inner_cx = &ControlPlaneZoneWriteContext { slot, - executor: executor.clone(), + executor, clean_output_directory: destinations.clean_control_plane_dir, output_directory: &destinations.control_plane_dir, zones: self.control_plane_zones, @@ -604,7 +604,7 @@ impl ArtifactsToWrite<'_> { struct ControlPlaneZoneWriteContext<'a> { slot: M2Slot, - executor: BoxedExecutor, + executor: &'a BoxedExecutor, clean_output_directory: bool, output_directory: &'a Utf8Path, zones: &'a ControlPlaneZoneImages, From 5bfc6eada7970a6f5715fb376fcc949aa4c6e8c2 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 17 Aug 2023 17:49:09 -0700 Subject: [PATCH 41/57] Better slog citizen --- illumos-utils/src/host/executor.rs | 31 +++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/illumos-utils/src/host/executor.rs b/illumos-utils/src/host/executor.rs index 2b24e68bdc..f62d679617 100644 --- a/illumos-utils/src/host/executor.rs +++ b/illumos-utils/src/host/executor.rs @@ -37,20 +37,23 @@ where } fn log_input(log: &Logger, id: u64, command: &Command) { - info!(log, "{id} - Running Command: [{}]", Input::from(command),); + info!( + log, + "running command via executor"; "id" => id, "command" => %Input::from(command) + ); debug!( log, - "{id} - Environment: [{}]", - to_space_separated_string(command.get_envs()), - ) + "running command via executor"; "id" => id, "envs" => %to_space_separated_string(command.get_envs()) + ); } fn log_output(log: &Logger, id: u64, output: &Output) { info!( log, - "{id} - {} (status code: {})", - if output.status.success() { "OK" } else { "ERROR" }, - output + "finished running command via executor"; + "id" => id, + "result" => if output.status.success() { "OK" } else { "ERROR" }, + "status" => output .status .code() .map(|c| c.to_string()) @@ -59,15 +62,17 @@ fn log_output(log: &Logger, id: u64, output: &Output) { if !output.stdout.is_empty() { debug!( log, - "{id} - stdout: {}", - from_utf8(&output.stdout).unwrap_or(""), + "finished command stdout"; + "id" => id, + "stdout" => from_utf8(&output.stdout).unwrap_or(""), ); } if !output.stderr.is_empty() { debug!( log, - "{id} - stderr: {}", - from_utf8(&output.stderr).unwrap_or(""), + "finished command stderr"; + "id" => id, + "stderr" => from_utf8(&output.stderr).unwrap_or(""), ); } } @@ -245,7 +250,7 @@ impl Executor for HostExecutor { ) -> Result { let id = self.prepare(command.as_std()); let output = command.output().await.map_err(|err| { - error!(self.log, "{id} - Could not start program!"); + error!(self.log, "Could not start program asynchronously!"; "id" => id); ExecutionError::ExecutionStart { command: Input::from(command.as_std()).to_string(), err, @@ -257,7 +262,7 @@ impl Executor for HostExecutor { fn execute(&self, command: &mut Command) -> Result { let id = self.prepare(command); let output = command.output().map_err(|err| { - error!(self.log, "{id} - Could not start program!"); + error!(self.log, "Could not start program!"; "id" => id); ExecutionError::ExecutionStart { command: Input::from(&*command).to_string(), err, From 6d85d90fcb38ff9db1b9f3a8d814360eaa74608c Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 18 Aug 2023 10:24:47 -0700 Subject: [PATCH 42/57] fn wait consumes box --- illumos-utils/src/host/executor.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/illumos-utils/src/host/executor.rs b/illumos-utils/src/host/executor.rs index f62d679617..53d02c49bd 100644 --- a/illumos-utils/src/host/executor.rs +++ b/illumos-utils/src/host/executor.rs @@ -310,7 +310,7 @@ pub trait Child: Send { fn id(&self) -> u32; /// Waits for the child to complete, and returns the output. - fn wait(&mut self) -> Result; + fn wait(self: Box) -> Result; } /// A real, host-controlled child process @@ -348,7 +348,7 @@ impl Child for SpawnedChild { self.child.as_ref().expect("No child").id() } - fn wait(&mut self) -> Result { + fn wait(mut self: Box) -> Result { let output = self.child.take().unwrap().wait_with_output().map_err(|err| { ExecutionError::ExecutionStart { @@ -431,9 +431,9 @@ impl Child for FakeChild { self.id.try_into().expect("u32 overflow") } - fn wait(&mut self) -> Result { + fn wait(mut self: Box) -> Result { let executor = self.executor.clone(); - let output = executor.wait_handler.lock().unwrap()(self); + let output = executor.wait_handler.lock().unwrap()(&mut self); log_output(&self.executor.log, self.id, &output); if !output.status.success() { return Err(output_to_exec_error( From eece3b1dd6e9393fa2e3fa2a6e72fed7f242fa36 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 18 Aug 2023 10:26:54 -0700 Subject: [PATCH 43/57] take stdin --- illumos-utils/src/host/executor.rs | 18 +++++++++--------- sled-agent/src/swap_device.rs | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/illumos-utils/src/host/executor.rs b/illumos-utils/src/host/executor.rs index 53d02c49bd..5b59256562 100644 --- a/illumos-utils/src/host/executor.rs +++ b/illumos-utils/src/host/executor.rs @@ -298,13 +298,13 @@ pub type BoxedChild = Box; /// A child process spawned by the executor. pub trait Child: Send { /// Accesses the stdin of the spawned child, as a Writer. - fn stdin(&mut self) -> Option>; + fn take_stdin(&mut self) -> Option>; /// Accesses the stdout of the spawned child, as a Reader. - fn stdout(&mut self) -> Option>; + fn take_stdout(&mut self) -> Option>; /// Accesses the stderr of the spawned child, as a Reader. - fn stderr(&mut self) -> Option>; + fn take_stderr(&mut self) -> Option>; /// OS-assigned PID identifier for the child fn id(&self) -> u32; @@ -320,7 +320,7 @@ pub struct SpawnedChild { } impl Child for SpawnedChild { - fn stdin(&mut self) -> Option> { + fn take_stdin(&mut self) -> Option> { self.child .as_mut()? .stdin @@ -328,7 +328,7 @@ impl Child for SpawnedChild { .map(|s| Box::new(s) as Box) } - fn stdout(&mut self) -> Option> { + fn take_stdout(&mut self) -> Option> { self.child .as_mut()? .stdout @@ -336,7 +336,7 @@ impl Child for SpawnedChild { .map(|s| Box::new(s) as Box) } - fn stderr(&mut self) -> Option> { + fn take_stderr(&mut self) -> Option> { self.child .as_mut()? .stderr @@ -415,15 +415,15 @@ impl FakeChild { } impl Child for FakeChild { - fn stdin(&mut self) -> Option> { + fn take_stdin(&mut self) -> Option> { Some(Box::new(self.stdin.clone())) } - fn stdout(&mut self) -> Option> { + fn take_stdout(&mut self) -> Option> { Some(Box::new(self.stdout.clone())) } - fn stderr(&mut self) -> Option> { + fn take_stderr(&mut self) -> Option> { Some(Box::new(self.stderr.clone())) } diff --git a/sled-agent/src/swap_device.rs b/sled-agent/src/swap_device.rs index fab171b989..22df0008e3 100644 --- a/sled-agent/src/swap_device.rs +++ b/sled-agent/src/swap_device.rs @@ -234,7 +234,7 @@ fn create_encrypted_swap_zvol( error: e.to_string(), })?; - let mut stdin = spawn.stdin().take().unwrap(); + let mut stdin = spawn.take_stdin().take().unwrap(); let child_log = log.clone(); let hdl = std::thread::spawn(move || { use std::io::Write; From be077b37241a8b0afb1110e0640b08927da242ed Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 18 Aug 2023 11:49:55 -0700 Subject: [PATCH 44/57] CommandSequence --- illumos-utils/src/dladm.rs | 12 ++++++------ illumos-utils/src/host/executor.rs | 6 +++--- illumos-utils/src/host/mod.rs | 3 ++- illumos-utils/src/zone.rs | 6 +++--- sled-agent/src/services.rs | 14 +++++++------- 5 files changed, 21 insertions(+), 20 deletions(-) diff --git a/illumos-utils/src/dladm.rs b/illumos-utils/src/dladm.rs index bc5d426d73..274693a98d 100644 --- a/illumos-utils/src/dladm.rs +++ b/illumos-utils/src/dladm.rs @@ -560,7 +560,7 @@ impl Dladm { #[cfg(test)] mod test { use super::*; - use crate::host::{FakeExecutor, Input, OutputExt, StaticHandler}; + use crate::host::{CommandSequence, FakeExecutor, Input, OutputExt}; use omicron_test_utils::dev; use std::process::Output; @@ -568,7 +568,7 @@ mod test { fn ensure_new_etherstub() { let logctx = dev::test_setup_log("ensure_new_etherstub"); - let mut handler = StaticHandler::new(); + let mut handler = CommandSequence::new(); handler.expect_fail(format!("{PFEXEC} {DLADM} show-etherstub mystub1")); handler .expect_ok(format!("{PFEXEC} {DLADM} create-etherstub -t mystub1")); @@ -588,7 +588,7 @@ mod test { fn ensure_existing_etherstub() { let logctx = dev::test_setup_log("ensure_existing_etherstub"); - let mut handler = StaticHandler::new(); + let mut handler = CommandSequence::new(); handler.expect_ok(format!("{PFEXEC} {DLADM} show-etherstub mystub1")); let executor = FakeExecutor::new(logctx.log.clone()); handler.register(&executor); @@ -605,7 +605,7 @@ mod test { fn ensure_existing_etherstub_vnic() { let logctx = dev::test_setup_log("ensure_existing_etherstub_vnic"); - let mut handler = StaticHandler::new(); + let mut handler = CommandSequence::new(); handler.expect_ok(format!( "{PFEXEC} {DLADM} show-etherstub {UNDERLAY_ETHERSTUB_NAME}" )); @@ -629,7 +629,7 @@ mod test { fn ensure_new_etherstub_vnic() { let logctx = dev::test_setup_log("ensure_new_etherstub_vnic"); - let mut handler = StaticHandler::new(); + let mut handler = CommandSequence::new(); handler.expect_ok(format!( "{PFEXEC} {DLADM} show-etherstub {UNDERLAY_ETHERSTUB_NAME}" )); @@ -661,7 +661,7 @@ mod test { fn only_parse_oxide_vnics() { let logctx = dev::test_setup_log("only_parse_oxide_vnics"); - let mut handler = StaticHandler::new(); + let mut handler = CommandSequence::new(); handler.expect( Input::shell(format!("{PFEXEC} {DLADM} show-vnic -p -o LINK")), Output::success().set_stdout( diff --git a/illumos-utils/src/host/executor.rs b/illumos-utils/src/host/executor.rs index 5b59256562..f42d5a7058 100644 --- a/illumos-utils/src/host/executor.rs +++ b/illumos-utils/src/host/executor.rs @@ -456,12 +456,12 @@ enum HandledCommand { /// when these commands are known ahead-of-time. /// /// See: [Self::register] for integration with a [FakeExecutor]. -pub struct StaticHandler { +pub struct CommandSequence { expected: Vec, index: usize, } -impl StaticHandler { +impl CommandSequence { pub fn new() -> Self { Self { expected: Vec::new(), index: 0 } } @@ -514,7 +514,7 @@ impl StaticHandler { } } -impl Drop for StaticHandler { +impl Drop for CommandSequence { fn drop(&mut self) { let expected = self.expected.len(); let actual = self.index; diff --git a/illumos-utils/src/host/mod.rs b/illumos-utils/src/host/mod.rs index 87c0ae8e6c..5c0dfcd4f7 100644 --- a/illumos-utils/src/host/mod.rs +++ b/illumos-utils/src/host/mod.rs @@ -14,7 +14,8 @@ pub const PFEXEC: &str = "/usr/bin/pfexec"; pub use error::{output_to_exec_error, ExecutionError}; pub use executor::{ - command_to_string, BoxedExecutor, FakeExecutor, HostExecutor, StaticHandler, + command_to_string, BoxedExecutor, CommandSequence, FakeExecutor, + HostExecutor, }; pub use input::Input; pub use output::{Output, OutputExt}; diff --git a/illumos-utils/src/zone.rs b/illumos-utils/src/zone.rs index 2560af4ea6..1510d4829f 100644 --- a/illumos-utils/src/zone.rs +++ b/illumos-utils/src/zone.rs @@ -902,7 +902,7 @@ impl Zones { #[cfg(test)] mod tests { use super::*; - use crate::host::{FakeExecutor, Input, OutputExt, StaticHandler}; + use crate::host::{CommandSequence, FakeExecutor, Input, OutputExt}; use omicron_test_utils::dev; use std::process::Output; @@ -919,7 +919,7 @@ mod tests { // - A request for the list of existing zones // - A command to configure the zone // - A command to install the zone - let mut handler = StaticHandler::new(); + let mut handler = CommandSequence::new(); handler.expect( Input::shell(format!("{PFEXEC} {ZONEADM} list -cip")), Output::success().set_stdout("0:global:running:/::ipkg:shared"), @@ -981,7 +981,7 @@ mod tests { let zone_name = "oxz_myzone"; let zone_image = Utf8Path::new("/image.tar.gz"); - let mut handler = StaticHandler::new(); + let mut handler = CommandSequence::new(); handler.expect( Input::shell(format!("{PFEXEC} {ZONEADM} list -cip")), Output::success().set_stdout( diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index f34d22af34..4e63509d93 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -2865,7 +2865,7 @@ mod test { Etherstub, BOOTSTRAP_ETHERSTUB_NAME, UNDERLAY_ETHERSTUB_NAME, UNDERLAY_ETHERSTUB_VNIC_NAME, }, - host::{FakeExecutor, Input, Output, OutputExt, StaticHandler}, + host::{CommandSequence, FakeExecutor, Input, Output, OutputExt}, zone::{ZLOGIN, ZONEADM, ZONECFG}, }; use key_manager::{ @@ -2899,7 +2899,7 @@ mod test { // Generate a static executor handler with the expected invocations (and // responses) when generating a new service. fn expect_new_service( - handler: &mut StaticHandler, + handler: &mut CommandSequence, config: &TestConfig, zone_id: Uuid, u2_mountpoint: &Utf8Path, @@ -3145,7 +3145,7 @@ mod test { let executor = FakeExecutor::new(log.clone()); let id = Uuid::new_v4(); - let mut handler = StaticHandler::new(); + let mut handler = CommandSequence::new(); expect_new_service(&mut handler, &test_config, id, &u2_mountpoint); handler.register(&executor); let executor = executor.as_executor(); @@ -3201,7 +3201,7 @@ mod test { let executor = FakeExecutor::new(log.clone()); let id = Uuid::new_v4(); - let mut handler = StaticHandler::new(); + let mut handler = CommandSequence::new(); expect_new_service(&mut handler, &test_config, id, &u2_mountpoint); handler.register(&executor); let executor = executor.as_executor(); @@ -3260,7 +3260,7 @@ mod test { let executor = FakeExecutor::new(log.clone()); let id = Uuid::new_v4(); - let mut handler = StaticHandler::new(); + let mut handler = CommandSequence::new(); expect_new_service(&mut handler, &test_config, id, &u2_mountpoint); handler.register(&executor); let executor = executor.as_executor(); @@ -3303,7 +3303,7 @@ mod test { // Before we re-create the service manager - notably, using the same // config file! - expect that a service gets initialized. let executor = FakeExecutor::new(log.clone()); - let mut handler = StaticHandler::new(); + let mut handler = CommandSequence::new(); handler.expect_dynamic(Box::new(|input| -> Output { assert_eq!(input.program, PFEXEC); @@ -3374,7 +3374,7 @@ mod test { let executor = FakeExecutor::new(log.clone()); let id = Uuid::new_v4(); - let mut handler = StaticHandler::new(); + let mut handler = CommandSequence::new(); expect_new_service(&mut handler, &test_config, id, &u2_mountpoint); handler.register(&executor); let executor = executor.as_executor(); From 2d281ee260f15e2e7fc407b287057ccde5de8b10 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 18 Aug 2023 12:01:18 -0700 Subject: [PATCH 45/57] Into string --- illumos-utils/src/host/input.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/illumos-utils/src/host/input.rs b/illumos-utils/src/host/input.rs index 721dc1df23..d40d1dda69 100644 --- a/illumos-utils/src/host/input.rs +++ b/illumos-utils/src/host/input.rs @@ -13,10 +13,10 @@ pub struct Input { } impl Input { - pub fn new>(program: S, args: Vec) -> Self { + pub fn new>(program: S, args: Vec) -> Self { Self { - program: program.as_ref().to_string(), - args: args.into_iter().map(|s| s.as_ref().to_string()).collect(), + program: program.into(), + args: args.into_iter().map(|s| s.into()).collect(), envs: vec![], } } From 9acb7b8ff3a1ec988e22fd9a947470018acb4a71 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 18 Aug 2023 12:18:26 -0700 Subject: [PATCH 46/57] output_to_exec error is now ExecutionError::from_output --- illumos-utils/src/host/error.rs | 22 ++++++++++++---------- illumos-utils/src/host/executor.rs | 27 +++++++++------------------ illumos-utils/src/host/mod.rs | 2 +- illumos-utils/src/host/output.rs | 14 -------------- sled-agent/src/swap_device.rs | 4 ++-- 5 files changed, 24 insertions(+), 45 deletions(-) diff --git a/illumos-utils/src/host/error.rs b/illumos-utils/src/host/error.rs index ced633c103..901481fee4 100644 --- a/illumos-utils/src/host/error.rs +++ b/illumos-utils/src/host/error.rs @@ -39,14 +39,16 @@ pub enum ExecutionError { NotRunning, } -pub fn output_to_exec_error( - command_str: String, - output: &std::process::Output, -) -> ExecutionError { - ExecutionError::CommandFailure(Box::new(FailureInfo { - command: command_str, - status: output.status, - stdout: String::from_utf8_lossy(&output.stdout).to_string(), - stderr: String::from_utf8_lossy(&output.stderr).to_string(), - })) +impl ExecutionError { + pub fn from_output>( + command_str: S, + output: &std::process::Output, + ) -> Self { + Self::CommandFailure(Box::new(FailureInfo { + command: command_str.into(), + status: output.status, + stdout: String::from_utf8_lossy(&output.stdout).to_string(), + stderr: String::from_utf8_lossy(&output.stderr).to_string(), + })) + } } diff --git a/illumos-utils/src/host/executor.rs b/illumos-utils/src/host/executor.rs index f42d5a7058..f3a2e6c004 100644 --- a/illumos-utils/src/host/executor.rs +++ b/illumos-utils/src/host/executor.rs @@ -3,8 +3,11 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use crate::host::{ - byte_queue::ByteQueue, error::ExecutionError, input::Input, - output::output_to_exec_error, output::Output, output::OutputExt, + byte_queue::ByteQueue, + error::ExecutionError, + input::Input, + output::Output, + output::OutputExt, }; use async_trait::async_trait; @@ -167,10 +170,7 @@ impl FakeExecutor { log_output(&self.inner.log, id, &output); if !output.status.success() { - return Err(output_to_exec_error( - command_to_string(command), - &output, - )); + return Err(ExecutionError::from_output(command_to_string(command), &output)); } Ok(output) } @@ -233,10 +233,7 @@ impl HostExecutor { ) -> Result { log_output(&self.log, id, &output); if !output.status.success() { - return Err(output_to_exec_error( - command_to_string(command), - &output, - )); + return Err(ExecutionError::from_output(command_to_string(command), &output)); } Ok(output) } @@ -358,10 +355,7 @@ impl Child for SpawnedChild { })?; if !output.status.success() { - return Err(output_to_exec_error( - self.command_str.clone(), - &output, - )); + return Err(ExecutionError::from_output(self.command_str, &output)); } Ok(output) @@ -436,10 +430,7 @@ impl Child for FakeChild { let output = executor.wait_handler.lock().unwrap()(&mut self); log_output(&self.executor.log, self.id, &output); if !output.status.success() { - return Err(output_to_exec_error( - command_to_string(&self.command), - &output, - )); + return Err(ExecutionError::from_output(command_to_string(&self.command), &output)); } Ok(output) } diff --git a/illumos-utils/src/host/mod.rs b/illumos-utils/src/host/mod.rs index 5c0dfcd4f7..9d876b7b2e 100644 --- a/illumos-utils/src/host/mod.rs +++ b/illumos-utils/src/host/mod.rs @@ -12,7 +12,7 @@ mod output; pub const PFEXEC: &str = "/usr/bin/pfexec"; -pub use error::{output_to_exec_error, ExecutionError}; +pub use error::ExecutionError; pub use executor::{ command_to_string, BoxedExecutor, CommandSequence, FakeExecutor, HostExecutor, diff --git a/illumos-utils/src/host/output.rs b/illumos-utils/src/host/output.rs index 3417eb2e84..b7c5bd401e 100644 --- a/illumos-utils/src/host/output.rs +++ b/illumos-utils/src/host/output.rs @@ -2,8 +2,6 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use crate::host::error::{ExecutionError, FailureInfo}; - use std::os::unix::process::ExitStatusExt; use std::process::ExitStatus; @@ -45,15 +43,3 @@ impl OutputExt for Output { self } } - -pub fn output_to_exec_error( - command_str: String, - output: &std::process::Output, -) -> ExecutionError { - ExecutionError::CommandFailure(Box::new(FailureInfo { - command: command_str, - status: output.status, - stdout: String::from_utf8_lossy(&output.stdout).to_string(), - stderr: String::from_utf8_lossy(&output.stderr).to_string(), - })) -} diff --git a/sled-agent/src/swap_device.rs b/sled-agent/src/swap_device.rs index 22df0008e3..49b2925de1 100644 --- a/sled-agent/src/swap_device.rs +++ b/sled-agent/src/swap_device.rs @@ -5,7 +5,7 @@ //! Operations for creating a system swap device. use illumos_utils::host::{ - command_to_string, output_to_exec_error, BoxedExecutor, ExecutionError, + command_to_string, BoxedExecutor, ExecutionError, }; use std::io::Read; use zeroize::Zeroize; @@ -255,7 +255,7 @@ fn create_encrypted_swap_zvol( hdl.join().unwrap(); if !output.status.success() { - return Err(SwapDeviceError::Zfs(output_to_exec_error( + return Err(SwapDeviceError::Zfs(ExecutionError::from_output( command_to_string(&command), &output, ))); From 6c19b403fd4ca4e452fc0becf87112d268e0e70e Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 18 Aug 2023 14:56:26 -0700 Subject: [PATCH 47/57] Use shlex for strong quoting --- Cargo.lock | 1 + Cargo.toml | 1 + illumos-utils/Cargo.toml | 1 + illumos-utils/src/host/error.rs | 27 ++++++++++++++++++++++++--- illumos-utils/src/host/executor.rs | 18 +++++------------- illumos-utils/src/host/input.rs | 15 ++++++++------- illumos-utils/src/host/mod.rs | 3 +-- sled-agent/src/swap_device.rs | 7 ++----- 8 files changed, 43 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bf3c530ffb..a4defa6d79 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3387,6 +3387,7 @@ dependencies = [ "schemars", "serde", "serde_json", + "shlex", "slog", "smf", "thiserror", diff --git a/Cargo.toml b/Cargo.toml index 84474979c8..ef0072ce82 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -300,6 +300,7 @@ serde_with = "2.3.3" sha2 = "0.10.7" sha3 = "0.10.8" shell-words = "1.1.0" +shlex = "1.1.0" signal-hook = "0.3" signal-hook-tokio = { version = "0.3", features = [ "futures-v0_3" ] } sled = "0.34" diff --git a/illumos-utils/Cargo.toml b/illumos-utils/Cargo.toml index 7999a0aa6c..1e9e2cf3f9 100644 --- a/illumos-utils/Cargo.toml +++ b/illumos-utils/Cargo.toml @@ -22,6 +22,7 @@ omicron-common.workspace = true oxide-vpc.workspace = true schemars.workspace = true serde.workspace = true +shlex.workspace = true slog.workspace = true smf.workspace = true thiserror.workspace = true diff --git a/illumos-utils/src/host/error.rs b/illumos-utils/src/host/error.rs index 901481fee4..8d956b7cd6 100644 --- a/illumos-utils/src/host/error.rs +++ b/illumos-utils/src/host/error.rs @@ -39,13 +39,34 @@ pub enum ExecutionError { NotRunning, } +/// Convenience trait for turning [std::process::Command] into a String. +pub trait AsCommandStr { + fn into_str(&self) -> String; +} + +impl AsCommandStr for String { + fn into_str(&self) -> String { + self.into() + } +} + +impl AsCommandStr for &std::process::Command { + fn into_str(&self) -> String { + shlex::join( + std::iter::once(self.get_program()) + .chain(self.get_args()) + .map(|s| s.to_str().expect("Invalid UTF-8")), + ) + } +} + impl ExecutionError { - pub fn from_output>( - command_str: S, + pub fn from_output( + command: S, output: &std::process::Output, ) -> Self { Self::CommandFailure(Box::new(FailureInfo { - command: command_str.into(), + command: command.into_str(), status: output.status, stdout: String::from_utf8_lossy(&output.stdout).to_string(), stderr: String::from_utf8_lossy(&output.stderr).to_string(), diff --git a/illumos-utils/src/host/executor.rs b/illumos-utils/src/host/executor.rs index f3a2e6c004..5f9d7ffeb4 100644 --- a/illumos-utils/src/host/executor.rs +++ b/illumos-utils/src/host/executor.rs @@ -4,7 +4,7 @@ use crate::host::{ byte_queue::ByteQueue, - error::ExecutionError, + error::{AsCommandStr, ExecutionError}, input::Input, output::Output, output::OutputExt, @@ -19,14 +19,6 @@ use std::str::from_utf8; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::{Arc, Mutex}; -pub fn command_to_string(command: &std::process::Command) -> String { - command - .get_args() - .map(|s| s.to_string_lossy().into()) - .collect::>() - .join(" ") -} - fn to_space_separated_string(iter: T) -> String where T: IntoIterator, @@ -170,7 +162,7 @@ impl FakeExecutor { log_output(&self.inner.log, id, &output); if !output.status.success() { - return Err(ExecutionError::from_output(command_to_string(command), &output)); + return Err(ExecutionError::from_output(command, &output)); } Ok(output) } @@ -233,7 +225,7 @@ impl HostExecutor { ) -> Result { log_output(&self.log, id, &output); if !output.status.success() { - return Err(ExecutionError::from_output(command_to_string(command), &output)); + return Err(ExecutionError::from_output(command, &output)); } Ok(output) } @@ -272,7 +264,7 @@ impl Executor for HostExecutor { &self, command: &mut Command, ) -> Result { - let command_str = command_to_string(&command); + let command_str = (&*command).into_str(); Ok(Box::new(SpawnedChild { child: Some( command @@ -430,7 +422,7 @@ impl Child for FakeChild { let output = executor.wait_handler.lock().unwrap()(&mut self); log_output(&self.executor.log, self.id, &output); if !output.status.success() { - return Err(ExecutionError::from_output(command_to_string(&self.command), &output)); + return Err(ExecutionError::from_output(&self.command, &output)); } Ok(output) } diff --git a/illumos-utils/src/host/input.rs b/illumos-utils/src/host/input.rs index d40d1dda69..1450ebd42c 100644 --- a/illumos-utils/src/host/input.rs +++ b/illumos-utils/src/host/input.rs @@ -24,20 +24,21 @@ impl Input { /// Short-hand for a whitespace-separated string, which can be provided /// "like a shell command". pub fn shell>(input: S) -> Self { - let mut args = input.as_ref().split_whitespace(); + let mut args = shlex::split(input.as_ref()).expect("Invalid input"); - Self::new( - args.next().expect("Needs at least a program"), - args.collect(), - ) + if args.is_empty() { + panic!("Empty input is invalid"); + } + + Self::new(args.remove(0), args) } } impl std::fmt::Display for Input { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.program)?; + write!(f, "{}", shlex::quote(&self.program))?; for arg in &self.args { - write!(f, " {}", arg)?; + write!(f, " {}", shlex::quote(arg))?; } Ok(()) } diff --git a/illumos-utils/src/host/mod.rs b/illumos-utils/src/host/mod.rs index 9d876b7b2e..b347787f42 100644 --- a/illumos-utils/src/host/mod.rs +++ b/illumos-utils/src/host/mod.rs @@ -14,8 +14,7 @@ pub const PFEXEC: &str = "/usr/bin/pfexec"; pub use error::ExecutionError; pub use executor::{ - command_to_string, BoxedExecutor, CommandSequence, FakeExecutor, - HostExecutor, + BoxedExecutor, CommandSequence, FakeExecutor, HostExecutor, }; pub use input::Input; pub use output::{Output, OutputExt}; diff --git a/sled-agent/src/swap_device.rs b/sled-agent/src/swap_device.rs index 49b2925de1..3fa29933d9 100644 --- a/sled-agent/src/swap_device.rs +++ b/sled-agent/src/swap_device.rs @@ -4,9 +4,7 @@ //! Operations for creating a system swap device. -use illumos_utils::host::{ - command_to_string, BoxedExecutor, ExecutionError, -}; +use illumos_utils::host::{BoxedExecutor, ExecutionError}; use std::io::Read; use zeroize::Zeroize; @@ -256,8 +254,7 @@ fn create_encrypted_swap_zvol( if !output.status.success() { return Err(SwapDeviceError::Zfs(ExecutionError::from_output( - command_to_string(&command), - &output, + &command, &output, ))); } From cd332e6962f05714e5489b0f01fad275d6125af7 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 18 Aug 2023 15:07:39 -0700 Subject: [PATCH 48/57] less allocations while logging --- illumos-utils/src/host/executor.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/illumos-utils/src/host/executor.rs b/illumos-utils/src/host/executor.rs index 5f9d7ffeb4..f561d4a658 100644 --- a/illumos-utils/src/host/executor.rs +++ b/illumos-utils/src/host/executor.rs @@ -47,12 +47,8 @@ fn log_output(log: &Logger, id: u64, output: &Output) { log, "finished running command via executor"; "id" => id, - "result" => if output.status.success() { "OK" } else { "ERROR" }, - "status" => output - .status - .code() - .map(|c| c.to_string()) - .unwrap_or_else(|| "none".to_string()), + "succeeded" => output.status.success(), + "status" => output.status.code() ); if !output.stdout.is_empty() { debug!( From be2454bac04102ef0a597ff264488b2d6d3711e9 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 18 Aug 2023 16:14:31 -0700 Subject: [PATCH 49/57] modules, satiate clippy --- illumos-utils/src/dladm.rs | 3 +- illumos-utils/src/host/executor/executor.rs | 111 ++++++++ .../host/{executor.rs => executor/fake.rs} | 257 +----------------- illumos-utils/src/host/executor/mod.rs | 9 + illumos-utils/src/host/executor/real.rs | 159 +++++++++++ illumos-utils/src/host/mod.rs | 4 +- illumos-utils/src/link.rs | 2 +- illumos-utils/src/running_zone.rs | 28 +- illumos-utils/src/zone.rs | 5 +- installinator/src/main.rs | 2 +- installinator/src/write.rs | 3 +- package/src/bin/omicron-package.rs | 3 +- sled-agent/src/bootstrap/pre_server.rs | 3 +- sled-agent/src/services.rs | 3 +- wicketd/tests/integration_tests/updates.rs | 2 +- 15 files changed, 326 insertions(+), 268 deletions(-) create mode 100644 illumos-utils/src/host/executor/executor.rs rename illumos-utils/src/host/{executor.rs => executor/fake.rs} (54%) create mode 100644 illumos-utils/src/host/executor/mod.rs create mode 100644 illumos-utils/src/host/executor/real.rs diff --git a/illumos-utils/src/dladm.rs b/illumos-utils/src/dladm.rs index 274693a98d..9d43cc2318 100644 --- a/illumos-utils/src/dladm.rs +++ b/illumos-utils/src/dladm.rs @@ -560,7 +560,8 @@ impl Dladm { #[cfg(test)] mod test { use super::*; - use crate::host::{CommandSequence, FakeExecutor, Input, OutputExt}; + use crate::host::fake::{CommandSequence, FakeExecutor}; + use crate::host::{Input, OutputExt}; use omicron_test_utils::dev; use std::process::Output; diff --git a/illumos-utils/src/host/executor/executor.rs b/illumos-utils/src/host/executor/executor.rs new file mode 100644 index 0000000000..c02694541e --- /dev/null +++ b/illumos-utils/src/host/executor/executor.rs @@ -0,0 +1,111 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Interfaces used to interact with the underlying host system. + +use crate::host::{error::ExecutionError, input::Input, output::Output}; + +use async_trait::async_trait; +use itertools::Itertools; +use slog::{debug, info, Logger}; +use std::io::{Read, Write}; +use std::process::Command; +use std::str::from_utf8; +use std::sync::Arc; + +fn to_space_separated_string(iter: T) -> String +where + T: IntoIterator, + I: std::fmt::Debug, +{ + Itertools::intersperse( + iter.into_iter().map(|arg| format!("{arg:?}")), + " ".into(), + ) + .collect::() +} + +pub(super) fn log_input(log: &Logger, id: u64, command: &Command) { + info!( + log, + "running command via executor"; "id" => id, "command" => %Input::from(command) + ); + debug!( + log, + "running command via executor"; "id" => id, "envs" => %to_space_separated_string(command.get_envs()) + ); +} + +pub(super) fn log_output(log: &Logger, id: u64, output: &Output) { + info!( + log, + "finished running command via executor"; + "id" => id, + "succeeded" => output.status.success(), + "status" => output.status.code() + ); + if !output.stdout.is_empty() { + debug!( + log, + "finished command stdout"; + "id" => id, + "stdout" => from_utf8(&output.stdout).unwrap_or(""), + ); + } + if !output.stderr.is_empty() { + debug!( + log, + "finished command stderr"; + "id" => id, + "stderr" => from_utf8(&output.stderr).unwrap_or(""), + ); + } +} + +/// Describes the commonly-used "safe-to-reference" type describing the +/// Executor as a trait object. +pub type BoxedExecutor = Arc; + +/// Describes an "executor", which can run [Command]s and return a response. +/// +/// - In production, this is usually simply a [super::real::HostExecutor]. +/// - Under test, this can be customized, and a [super::fake::FakeExecutor] may be used. +#[async_trait] +pub trait Executor: Send + Sync { + /// Executes a task, waiting for it to complete, and returning output. + async fn execute_async( + &self, + command: &mut tokio::process::Command, + ) -> Result; + + /// Executes a task, waiting for it to complete, and returning output. + fn execute(&self, command: &mut Command) -> Result; + + /// Spawns a task, without waiting for it to complete. + fn spawn( + &self, + command: &mut Command, + ) -> Result; +} + +/// A wrapper around a spawned [Child] process. +pub type BoxedChild = Box; + +/// A child process spawned by the executor. +pub trait Child: Send { + /// Accesses the stdin of the spawned child, as a Writer. + fn take_stdin(&mut self) -> Option>; + + /// Accesses the stdout of the spawned child, as a Reader. + fn take_stdout(&mut self) -> Option>; + + /// Accesses the stderr of the spawned child, as a Reader. + fn take_stderr(&mut self) -> Option>; + + /// OS-assigned PID identifier for the child + fn id(&self) -> u32; + + /// Waits for the child to complete, and returns the output. + fn wait(self: Box) -> Result; +} diff --git a/illumos-utils/src/host/executor.rs b/illumos-utils/src/host/executor/fake.rs similarity index 54% rename from illumos-utils/src/host/executor.rs rename to illumos-utils/src/host/executor/fake.rs index f561d4a658..98a7645255 100644 --- a/illumos-utils/src/host/executor.rs +++ b/illumos-utils/src/host/executor/fake.rs @@ -2,105 +2,31 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +//! A "fake" [Executor] implementation, which can respond to host requests. + +use super::executor::{ + log_input, log_output, BoxedChild, BoxedExecutor, Child, Executor, +}; + use crate::host::{ - byte_queue::ByteQueue, - error::{AsCommandStr, ExecutionError}, - input::Input, - output::Output, + byte_queue::ByteQueue, error::ExecutionError, input::Input, output::Output, output::OutputExt, }; use async_trait::async_trait; -use itertools::Itertools; -use slog::{debug, error, info, Logger}; +use slog::Logger; use std::io::{Read, Write}; -use std::process::{Command, Stdio}; -use std::str::from_utf8; +use std::process::Command; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::{Arc, Mutex}; -fn to_space_separated_string(iter: T) -> String -where - T: IntoIterator, - I: std::fmt::Debug, -{ - Itertools::intersperse( - iter.into_iter().map(|arg| format!("{arg:?}")), - " ".into(), - ) - .collect::() -} - -fn log_input(log: &Logger, id: u64, command: &Command) { - info!( - log, - "running command via executor"; "id" => id, "command" => %Input::from(command) - ); - debug!( - log, - "running command via executor"; "id" => id, "envs" => %to_space_separated_string(command.get_envs()) - ); -} - -fn log_output(log: &Logger, id: u64, output: &Output) { - info!( - log, - "finished running command via executor"; - "id" => id, - "succeeded" => output.status.success(), - "status" => output.status.code() - ); - if !output.stdout.is_empty() { - debug!( - log, - "finished command stdout"; - "id" => id, - "stdout" => from_utf8(&output.stdout).unwrap_or(""), - ); - } - if !output.stderr.is_empty() { - debug!( - log, - "finished command stderr"; - "id" => id, - "stderr" => from_utf8(&output.stderr).unwrap_or(""), - ); - } -} - -/// Describes the commonly-used "safe-to-reference" type describing the -/// Executor as a trait object. -pub type BoxedExecutor = Arc; - -/// Describes an "executor", which can run [Command]s and return a response. -/// -/// - In production, this is usually simply a [HostExecutor]. -/// - Under test, this can be customized, and a [FakeExecutor] may be used. -#[async_trait] -pub trait Executor: Send + Sync { - /// Executes a task, waiting for it to complete, and returning output. - async fn execute_async( - &self, - command: &mut tokio::process::Command, - ) -> Result; - - /// Executes a task, waiting for it to complete, and returning output. - fn execute(&self, command: &mut Command) -> Result; - - /// Spawns a task, without waiting for it to complete. - fn spawn( - &self, - command: &mut Command, - ) -> Result; -} - /// Handler called when spawning a fake child process -pub type SpawnFn = dyn FnMut(&mut FakeChild) + Send + Sync; -pub type BoxedSpawnFn = Box; +type SpawnFn = dyn FnMut(&mut FakeChild) + Send + Sync; +type BoxedSpawnFn = Box; /// Handler called when awaiting a fake child process -pub type WaitFn = dyn FnMut(&mut FakeChild) -> Output + Send + Sync; -pub type BoxedWaitFn = Box; +type WaitFn = dyn FnMut(&mut FakeChild) -> Output + Send + Sync; +type BoxedWaitFn = Box; pub(crate) struct FakeExecutorInner { log: Logger, @@ -193,163 +119,6 @@ impl Executor for FakeExecutor { } } -pub struct HostExecutor { - log: slog::Logger, - counter: std::sync::atomic::AtomicU64, -} - -impl HostExecutor { - pub fn new(log: Logger) -> Arc { - Arc::new(Self { log, counter: AtomicU64::new(0) }) - } - - pub fn as_executor(self: Arc) -> BoxedExecutor { - self - } - - fn prepare(&self, command: &Command) -> u64 { - let id = self.counter.fetch_add(1, Ordering::SeqCst); - log_input(&self.log, id, command); - id - } - - fn finalize( - &self, - command: &Command, - id: u64, - output: Output, - ) -> Result { - log_output(&self.log, id, &output); - if !output.status.success() { - return Err(ExecutionError::from_output(command, &output)); - } - Ok(output) - } -} - -#[async_trait] -impl Executor for HostExecutor { - async fn execute_async( - &self, - command: &mut tokio::process::Command, - ) -> Result { - let id = self.prepare(command.as_std()); - let output = command.output().await.map_err(|err| { - error!(self.log, "Could not start program asynchronously!"; "id" => id); - ExecutionError::ExecutionStart { - command: Input::from(command.as_std()).to_string(), - err, - } - })?; - self.finalize(command.as_std(), id, output) - } - - fn execute(&self, command: &mut Command) -> Result { - let id = self.prepare(command); - let output = command.output().map_err(|err| { - error!(self.log, "Could not start program!"; "id" => id); - ExecutionError::ExecutionStart { - command: Input::from(&*command).to_string(), - err, - } - })?; - self.finalize(command, id, output) - } - - fn spawn( - &self, - command: &mut Command, - ) -> Result { - let command_str = (&*command).into_str(); - Ok(Box::new(SpawnedChild { - child: Some( - command - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .spawn() - .map_err(|err| ExecutionError::ExecutionStart { - command: command_str.clone(), - err, - })?, - ), - command_str, - })) - } -} - -/// A wrapper around a spawned [Child] process. -pub type BoxedChild = Box; - -/// A child process spawned by the executor. -pub trait Child: Send { - /// Accesses the stdin of the spawned child, as a Writer. - fn take_stdin(&mut self) -> Option>; - - /// Accesses the stdout of the spawned child, as a Reader. - fn take_stdout(&mut self) -> Option>; - - /// Accesses the stderr of the spawned child, as a Reader. - fn take_stderr(&mut self) -> Option>; - - /// OS-assigned PID identifier for the child - fn id(&self) -> u32; - - /// Waits for the child to complete, and returns the output. - fn wait(self: Box) -> Result; -} - -/// A real, host-controlled child process -pub struct SpawnedChild { - command_str: String, - child: Option, -} - -impl Child for SpawnedChild { - fn take_stdin(&mut self) -> Option> { - self.child - .as_mut()? - .stdin - .take() - .map(|s| Box::new(s) as Box) - } - - fn take_stdout(&mut self) -> Option> { - self.child - .as_mut()? - .stdout - .take() - .map(|s| Box::new(s) as Box) - } - - fn take_stderr(&mut self) -> Option> { - self.child - .as_mut()? - .stderr - .take() - .map(|s| Box::new(s) as Box) - } - - fn id(&self) -> u32 { - self.child.as_ref().expect("No child").id() - } - - fn wait(mut self: Box) -> Result { - let output = - self.child.take().unwrap().wait_with_output().map_err(|err| { - ExecutionError::ExecutionStart { - command: self.command_str.clone(), - err, - } - })?; - - if !output.status.success() { - return Err(ExecutionError::from_output(self.command_str, &output)); - } - - Ok(output) - } -} - /// A child spawned by a [FakeExecutor]. pub struct FakeChild { id: u64, diff --git a/illumos-utils/src/host/executor/mod.rs b/illumos-utils/src/host/executor/mod.rs new file mode 100644 index 0000000000..a96f5132fc --- /dev/null +++ b/illumos-utils/src/host/executor/mod.rs @@ -0,0 +1,9 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +mod executor; +pub mod fake; +pub mod real; + +pub use executor::{BoxedChild, BoxedExecutor, Child, Executor}; diff --git a/illumos-utils/src/host/executor/real.rs b/illumos-utils/src/host/executor/real.rs new file mode 100644 index 0000000000..c2d610c341 --- /dev/null +++ b/illumos-utils/src/host/executor/real.rs @@ -0,0 +1,159 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! A "real" [Executor] implementation, which sends commands to the host. + +use super::executor::{ + log_input, log_output, BoxedChild, BoxedExecutor, Child, Executor, +}; + +use crate::host::{ + error::{AsCommandStr, ExecutionError}, + input::Input, + output::Output, +}; + +use async_trait::async_trait; +use slog::{error, Logger}; +use std::io::{Read, Write}; +use std::process::{Command, Stdio}; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::Arc; + +/// Implements [Executor] by running commands against the host system. +pub struct HostExecutor { + log: slog::Logger, + counter: std::sync::atomic::AtomicU64, +} + +impl HostExecutor { + pub fn new(log: Logger) -> Arc { + Arc::new(Self { log, counter: AtomicU64::new(0) }) + } + + pub fn as_executor(self: Arc) -> BoxedExecutor { + self + } + + fn prepare(&self, command: &Command) -> u64 { + let id = self.counter.fetch_add(1, Ordering::SeqCst); + log_input(&self.log, id, command); + id + } + + fn finalize( + &self, + command: &Command, + id: u64, + output: Output, + ) -> Result { + log_output(&self.log, id, &output); + if !output.status.success() { + return Err(ExecutionError::from_output(command, &output)); + } + Ok(output) + } +} + +#[async_trait] +impl Executor for HostExecutor { + async fn execute_async( + &self, + command: &mut tokio::process::Command, + ) -> Result { + let id = self.prepare(command.as_std()); + let output = command.output().await.map_err(|err| { + error!(self.log, "Could not start program asynchronously!"; "id" => id); + ExecutionError::ExecutionStart { + command: Input::from(command.as_std()).to_string(), + err, + } + })?; + self.finalize(command.as_std(), id, output) + } + + fn execute(&self, command: &mut Command) -> Result { + let id = self.prepare(command); + let output = command.output().map_err(|err| { + error!(self.log, "Could not start program!"; "id" => id); + ExecutionError::ExecutionStart { + command: Input::from(&*command).to_string(), + err, + } + })?; + self.finalize(command, id, output) + } + + fn spawn( + &self, + command: &mut Command, + ) -> Result { + let command_str = (&*command).into_str(); + Ok(Box::new(SpawnedChild { + child: Some( + command + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .map_err(|err| ExecutionError::ExecutionStart { + command: command_str.clone(), + err, + })?, + ), + command_str, + })) + } +} + +/// A real, host-controlled child process +pub struct SpawnedChild { + command_str: String, + child: Option, +} + +impl Child for SpawnedChild { + fn take_stdin(&mut self) -> Option> { + self.child + .as_mut()? + .stdin + .take() + .map(|s| Box::new(s) as Box) + } + + fn take_stdout(&mut self) -> Option> { + self.child + .as_mut()? + .stdout + .take() + .map(|s| Box::new(s) as Box) + } + + fn take_stderr(&mut self) -> Option> { + self.child + .as_mut()? + .stderr + .take() + .map(|s| Box::new(s) as Box) + } + + fn id(&self) -> u32 { + self.child.as_ref().expect("No child").id() + } + + fn wait(mut self: Box) -> Result { + let output = + self.child.take().unwrap().wait_with_output().map_err(|err| { + ExecutionError::ExecutionStart { + command: self.command_str.clone(), + err, + } + })?; + + if !output.status.success() { + return Err(ExecutionError::from_output(self.command_str, &output)); + } + + Ok(output) + } +} diff --git a/illumos-utils/src/host/mod.rs b/illumos-utils/src/host/mod.rs index b347787f42..f86754f7ac 100644 --- a/illumos-utils/src/host/mod.rs +++ b/illumos-utils/src/host/mod.rs @@ -13,8 +13,6 @@ mod output; pub const PFEXEC: &str = "/usr/bin/pfexec"; pub use error::ExecutionError; -pub use executor::{ - BoxedExecutor, CommandSequence, FakeExecutor, HostExecutor, -}; +pub use executor::*; pub use input::Input; pub use output::{Output, OutputExt}; diff --git a/illumos-utils/src/link.rs b/illumos-utils/src/link.rs index 90f0dcc451..cd29c26943 100644 --- a/illumos-utils/src/link.rs +++ b/illumos-utils/src/link.rs @@ -259,7 +259,7 @@ impl Deletable for VnicDestruction { mod test { use super::*; use crate::dladm::Etherstub; - use crate::host::FakeExecutor; + use crate::host::fake::FakeExecutor; use omicron_test_utils::dev; #[tokio::test] diff --git a/illumos-utils/src/running_zone.rs b/illumos-utils/src/running_zone.rs index 790797333b..8a56dfef92 100644 --- a/illumos-utils/src/running_zone.rs +++ b/illumos-utils/src/running_zone.rs @@ -1086,18 +1086,22 @@ pub enum InstallZoneError { err: crate::dladm::CreateVnicError, }, - #[error("Failed to install zone '{zone}' from '{image_path}': {err}")] - InstallZone { - zone: String, - image_path: Utf8PathBuf, - #[source] - err: crate::zone::AdmError, - }, + #[error(transparent)] + InstallZone(Box), #[error("Failed to find zone image '{image}' from {paths:?}")] ImageNotFound { image: String, paths: Vec }, } +#[derive(thiserror::Error, Debug)] +#[error("Failed to install zone '{zone}' from '{image_path}': {err}")] +pub struct InstallFailure { + zone: String, + image_path: Utf8PathBuf, + #[source] + err: crate::zone::AdmError, +} + pub struct InstalledZone { log: Logger, @@ -1231,10 +1235,12 @@ impl InstalledZone { limit_priv, ) .await - .map_err(|err| InstallZoneError::InstallZone { - zone: full_zone_name.to_string(), - image_path: zone_image_path.clone(), - err, + .map_err(|err| { + InstallZoneError::InstallZone(Box::new(InstallFailure { + zone: full_zone_name.to_string(), + image_path: zone_image_path.clone(), + err, + })) })?; Ok(InstalledZone { diff --git a/illumos-utils/src/zone.rs b/illumos-utils/src/zone.rs index 1510d4829f..e12d75a6ee 100644 --- a/illumos-utils/src/zone.rs +++ b/illumos-utils/src/zone.rs @@ -54,7 +54,7 @@ pub enum Operation { } #[derive(thiserror::Error, Debug)] -#[error("{0}")] +#[error(transparent)] enum AdmErrorVariant { Execution(#[from] ExecutionError), Adm(#[from] zone::ZoneError), @@ -902,7 +902,8 @@ impl Zones { #[cfg(test)] mod tests { use super::*; - use crate::host::{CommandSequence, FakeExecutor, Input, OutputExt}; + use crate::host::fake::{CommandSequence, FakeExecutor}; + use crate::host::{Input, OutputExt}; use omicron_test_utils::dev; use std::process::Output; diff --git a/installinator/src/main.rs b/installinator/src/main.rs index bd4b4202b2..677c82d369 100644 --- a/installinator/src/main.rs +++ b/installinator/src/main.rs @@ -5,7 +5,7 @@ use std::error::Error; use clap::Parser; -use illumos_utils::host::HostExecutor; +use illumos_utils::host::real::HostExecutor; use installinator::InstallinatorApp; #[tokio::main] diff --git a/installinator/src/write.rs b/installinator/src/write.rs index 90cf0bc489..1980ce2af8 100644 --- a/installinator/src/write.rs +++ b/installinator/src/write.rs @@ -1161,7 +1161,8 @@ mod tests { let engine = UpdateEngine::new(&logctx.log, event_sender); let log = logctx.log.clone(); let executor = - illumos_utils::host::FakeExecutor::new(log.clone()).as_executor(); + illumos_utils::host::fake::FakeExecutor::new(log.clone()) + .as_executor(); engine .new_step( InstallinatorComponent::Both, diff --git a/package/src/bin/omicron-package.rs b/package/src/bin/omicron-package.rs index 4c27282736..edc40c1443 100644 --- a/package/src/bin/omicron-package.rs +++ b/package/src/bin/omicron-package.rs @@ -7,7 +7,8 @@ use anyhow::{anyhow, bail, Context, Result}; use clap::{Parser, Subcommand}; use futures::stream::{self, StreamExt, TryStreamExt}; -use illumos_utils::host::{BoxedExecutor, HostExecutor}; +use illumos_utils::host::real::HostExecutor; +use illumos_utils::host::BoxedExecutor; use illumos_utils::{zfs, zone}; use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; use omicron_package::target::KnownTarget; diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs index 5f4451ce87..51296b2e78 100644 --- a/sled-agent/src/bootstrap/pre_server.rs +++ b/sled-agent/src/bootstrap/pre_server.rs @@ -25,7 +25,8 @@ use futures::StreamExt; use illumos_utils::addrobj::AddrObject; use illumos_utils::dladm; use illumos_utils::dladm::Dladm; -use illumos_utils::host::{BoxedExecutor, HostExecutor}; +use illumos_utils::host::real::HostExecutor; +use illumos_utils::host::BoxedExecutor; use illumos_utils::zfs; use illumos_utils::zfs::Zfs; use illumos_utils::zone; diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 4e63509d93..2a0aeeed7a 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -2865,7 +2865,8 @@ mod test { Etherstub, BOOTSTRAP_ETHERSTUB_NAME, UNDERLAY_ETHERSTUB_NAME, UNDERLAY_ETHERSTUB_VNIC_NAME, }, - host::{CommandSequence, FakeExecutor, Input, Output, OutputExt}, + host::fake::{CommandSequence, FakeExecutor}, + host::{Input, Output, OutputExt}, zone::{ZLOGIN, ZONEADM, ZONECFG}, }; use key_manager::{ diff --git a/wicketd/tests/integration_tests/updates.rs b/wicketd/tests/integration_tests/updates.rs index 41f8c09954..5035023efc 100644 --- a/wicketd/tests/integration_tests/updates.rs +++ b/wicketd/tests/integration_tests/updates.rs @@ -11,7 +11,7 @@ use camino_tempfile::Utf8TempDir; use clap::Parser; use gateway_messages::SpPort; use gateway_test_utils::setup as gateway_setup; -use illumos_utils::host::FakeExecutor; +use illumos_utils::host::fake::FakeExecutor; use installinator::HOST_PHASE_2_FILE_NAME; use omicron_common::{ api::internal::nexus::KnownArtifactKind, From 846840cce172fde1731c89e63a0cfaf62463614a Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 18 Aug 2023 16:45:26 -0700 Subject: [PATCH 50/57] Builder for FakeExecutor --- illumos-utils/src/dladm.rs | 27 +++++----- illumos-utils/src/host/executor/fake.rs | 57 ++++++++++++++++++---- illumos-utils/src/link.rs | 6 +-- illumos-utils/src/zone.rs | 12 +++-- installinator/src/write.rs | 3 +- sled-agent/src/services.rs | 37 ++++++++------ wicketd/tests/integration_tests/updates.rs | 4 +- 7 files changed, 98 insertions(+), 48 deletions(-) diff --git a/illumos-utils/src/dladm.rs b/illumos-utils/src/dladm.rs index 9d43cc2318..d92be9f52e 100644 --- a/illumos-utils/src/dladm.rs +++ b/illumos-utils/src/dladm.rs @@ -560,7 +560,7 @@ impl Dladm { #[cfg(test)] mod test { use super::*; - use crate::host::fake::{CommandSequence, FakeExecutor}; + use crate::host::fake::{CommandSequence, FakeExecutorBuilder}; use crate::host::{Input, OutputExt}; use omicron_test_utils::dev; use std::process::Output; @@ -574,8 +574,9 @@ mod test { handler .expect_ok(format!("{PFEXEC} {DLADM} create-etherstub -t mystub1")); - let executor = FakeExecutor::new(logctx.log.clone()); - handler.register(&executor); + let executor = FakeExecutorBuilder::new(logctx.log.clone()) + .with_sequence(handler) + .build(); let etherstub = Dladm::ensure_etherstub(&executor.as_executor(), "mystub1") @@ -591,8 +592,9 @@ mod test { let mut handler = CommandSequence::new(); handler.expect_ok(format!("{PFEXEC} {DLADM} show-etherstub mystub1")); - let executor = FakeExecutor::new(logctx.log.clone()); - handler.register(&executor); + let executor = FakeExecutorBuilder::new(logctx.log.clone()) + .with_sequence(handler) + .build(); let etherstub = Dladm::ensure_etherstub(&executor.as_executor(), "mystub1") @@ -613,8 +615,9 @@ mod test { handler.expect_ok(format!( "{PFEXEC} {DLADM} show-vnic {UNDERLAY_ETHERSTUB_VNIC_NAME}" )); - let executor = FakeExecutor::new(logctx.log.clone()); - handler.register(&executor); + let executor = FakeExecutorBuilder::new(logctx.log.clone()) + .with_sequence(handler) + .build(); let executor = &executor.as_executor(); let etherstub = @@ -645,8 +648,9 @@ mod test { "{PFEXEC} {DLADM} set-linkprop -t -p mtu=9000 \ {UNDERLAY_ETHERSTUB_VNIC_NAME}" )); - let executor = FakeExecutor::new(logctx.log.clone()); - handler.register(&executor); + let executor = FakeExecutorBuilder::new(logctx.log.clone()) + .with_sequence(handler) + .build(); let executor = &executor.as_executor(); let etherstub = @@ -669,8 +673,9 @@ mod test { "oxVnic\nvopteVnic\nInvalid\noxBootstrapVnic\nInvalid", ), ); - let executor = FakeExecutor::new(logctx.log.clone()); - handler.register(&executor); + let executor = FakeExecutorBuilder::new(logctx.log.clone()) + .with_sequence(handler) + .build(); let executor = &executor.as_executor(); let vnics = Dladm::get_vnics(executor).expect("Failed to get VNICs"); diff --git a/illumos-utils/src/host/executor/fake.rs b/illumos-utils/src/host/executor/fake.rs index 98a7645255..5694c170ef 100644 --- a/illumos-utils/src/host/executor/fake.rs +++ b/illumos-utils/src/host/executor/fake.rs @@ -28,6 +28,46 @@ type BoxedSpawnFn = Box; type WaitFn = dyn FnMut(&mut FakeChild) -> Output + Send + Sync; type BoxedWaitFn = Box; +pub struct FakeExecutorBuilder { + log: Logger, + spawn_handler: Option, + wait_handler: Option, +} + +impl FakeExecutorBuilder { + pub fn new(log: Logger) -> Self { + Self { log, spawn_handler: None, wait_handler: None } + } + + pub fn spawn_handler(mut self, f: BoxedSpawnFn) -> Self { + self.spawn_handler = Some(f); + self + } + + pub fn wait_handler(mut self, f: BoxedWaitFn) -> Self { + self.wait_handler = Some(f); + self + } + + /// Convenience function to register the sequence with a [FakeExecutor]. + pub fn with_sequence(mut self, mut sequence: CommandSequence) -> Self { + self.wait_handler = + Some(Box::new(move |child: &mut FakeChild| -> Output { + sequence.execute(child.command()) + })); + self + } + + pub fn build(self) -> Arc { + FakeExecutor::new( + self.log, + self.spawn_handler.unwrap_or_else(|| Box::new(|_cmd| ())), + self.wait_handler + .unwrap_or_else(|| Box::new(|_cmd| Output::success())), + ) + } +} + pub(crate) struct FakeExecutorInner { log: Logger, counter: AtomicU64, @@ -41,13 +81,17 @@ pub struct FakeExecutor { } impl FakeExecutor { - pub fn new(log: Logger) -> Arc { + pub fn new( + log: Logger, + s: BoxedSpawnFn, + w: BoxedWaitFn, + ) -> Arc { Arc::new(Self { inner: Arc::new(FakeExecutorInner { log, counter: AtomicU64::new(0), - spawn_handler: Mutex::new(Box::new(|_cmd| ())), - wait_handler: Mutex::new(Box::new(|_cmd| Output::success())), + spawn_handler: Mutex::new(s), + wait_handler: Mutex::new(w), }), }) } @@ -214,13 +258,6 @@ impl CommandSequence { Self { expected: Vec::new(), index: 0 } } - /// Convenience function to register the handler with a [FakeExecutor]. - pub fn register(mut self, executor: &FakeExecutor) { - executor.set_wait_handler(Box::new(move |child| -> Output { - self.execute(child.command()) - })); - } - /// Expects a static "input" to exactly produce some "output". pub fn expect(&mut self, input: Input, output: Output) { self.expected.push(HandledCommand::Static { input, output }); diff --git a/illumos-utils/src/link.rs b/illumos-utils/src/link.rs index cd29c26943..f2c2d78cda 100644 --- a/illumos-utils/src/link.rs +++ b/illumos-utils/src/link.rs @@ -259,13 +259,13 @@ impl Deletable for VnicDestruction { mod test { use super::*; use crate::dladm::Etherstub; - use crate::host::fake::FakeExecutor; + use crate::host::fake::FakeExecutorBuilder; use omicron_test_utils::dev; #[tokio::test] async fn test_allocate() { let logctx = dev::test_setup_log("test_allocate"); - let executor = FakeExecutor::new(logctx.log.clone()); + let executor = FakeExecutorBuilder::new(logctx.log.clone()).build(); let allocator = VnicAllocator::new( &executor.as_executor(), "Foo", @@ -280,7 +280,7 @@ mod test { #[tokio::test] async fn test_allocate_within_scopes() { let logctx = dev::test_setup_log("test_allocate_within_scopes"); - let executor = FakeExecutor::new(logctx.log.clone()); + let executor = FakeExecutorBuilder::new(logctx.log.clone()).build(); let allocator = VnicAllocator::new( &executor.as_executor(), "Foo", diff --git a/illumos-utils/src/zone.rs b/illumos-utils/src/zone.rs index e12d75a6ee..acd4f01bd1 100644 --- a/illumos-utils/src/zone.rs +++ b/illumos-utils/src/zone.rs @@ -902,7 +902,7 @@ impl Zones { #[cfg(test)] mod tests { use super::*; - use crate::host::fake::{CommandSequence, FakeExecutor}; + use crate::host::fake::{CommandSequence, FakeExecutorBuilder}; use crate::host::{Input, OutputExt}; use omicron_test_utils::dev; use std::process::Output; @@ -946,8 +946,9 @@ mod tests { Output::success(), ); - let executor = FakeExecutor::new(logctx.log.clone()); - handler.register(&executor); + let executor = FakeExecutorBuilder::new(logctx.log.clone()) + .with_sequence(handler) + .build(); let datasets = []; let filesystems = []; @@ -990,8 +991,9 @@ mod tests { ) ); - let executor = FakeExecutor::new(logctx.log.clone()); - handler.register(&executor); + let executor = FakeExecutorBuilder::new(logctx.log.clone()) + .with_sequence(handler) + .build(); let datasets = []; let filesystems = []; diff --git a/installinator/src/write.rs b/installinator/src/write.rs index 1980ce2af8..af912ac968 100644 --- a/installinator/src/write.rs +++ b/installinator/src/write.rs @@ -1161,7 +1161,8 @@ mod tests { let engine = UpdateEngine::new(&logctx.log, event_sender); let log = logctx.log.clone(); let executor = - illumos_utils::host::fake::FakeExecutor::new(log.clone()) + illumos_utils::host::fake::FakeExecutorBuilder::new(log.clone()) + .build() .as_executor(); engine .new_step( diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 2a0aeeed7a..4a8f82a8ad 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -2865,7 +2865,7 @@ mod test { Etherstub, BOOTSTRAP_ETHERSTUB_NAME, UNDERLAY_ETHERSTUB_NAME, UNDERLAY_ETHERSTUB_VNIC_NAME, }, - host::fake::{CommandSequence, FakeExecutor}, + host::fake::{CommandSequence, FakeExecutorBuilder}, host::{Input, Output, OutputExt}, zone::{ZLOGIN, ZONEADM, ZONECFG}, }; @@ -3144,12 +3144,13 @@ mod test { assert_eq!(u2_mountpoints.len(), 1); let u2_mountpoint = &u2_mountpoints[0]; - let executor = FakeExecutor::new(log.clone()); let id = Uuid::new_v4(); let mut handler = CommandSequence::new(); expect_new_service(&mut handler, &test_config, id, &u2_mountpoint); - handler.register(&executor); - let executor = executor.as_executor(); + let executor = FakeExecutorBuilder::new(log.clone()) + .with_sequence(handler) + .build() + .as_executor(); let zone_bundler = ZoneBundler::new(log.clone(), storage.clone(), Default::default()); @@ -3200,12 +3201,13 @@ mod test { assert_eq!(u2_mountpoints.len(), 1); let u2_mountpoint = &u2_mountpoints[0]; - let executor = FakeExecutor::new(log.clone()); let id = Uuid::new_v4(); let mut handler = CommandSequence::new(); expect_new_service(&mut handler, &test_config, id, &u2_mountpoint); - handler.register(&executor); - let executor = executor.as_executor(); + let executor = FakeExecutorBuilder::new(log.clone()) + .with_sequence(handler) + .build() + .as_executor(); let zone_bundler = ZoneBundler::new(log.clone(), storage.clone(), Default::default()); @@ -3259,12 +3261,13 @@ mod test { assert_eq!(u2_mountpoints.len(), 1); let u2_mountpoint = &u2_mountpoints[0]; - let executor = FakeExecutor::new(log.clone()); let id = Uuid::new_v4(); let mut handler = CommandSequence::new(); expect_new_service(&mut handler, &test_config, id, &u2_mountpoint); - handler.register(&executor); - let executor = executor.as_executor(); + let executor = FakeExecutorBuilder::new(log.clone()) + .with_sequence(handler) + .build() + .as_executor(); // First, spin up a ServiceManager, create a new service, and tear it // down. @@ -3303,7 +3306,6 @@ mod test { // Before we re-create the service manager - notably, using the same // config file! - expect that a service gets initialized. - let executor = FakeExecutor::new(log.clone()); let mut handler = CommandSequence::new(); handler.expect_dynamic(Box::new(|input| -> Output { @@ -3321,8 +3323,10 @@ mod test { Output::success() })); expect_new_service(&mut handler, &test_config, id, &u2_mountpoint); - handler.register(&executor); - let executor = executor.as_executor(); + let executor = FakeExecutorBuilder::new(log.clone()) + .with_sequence(handler) + .build() + .as_executor(); let mgr = ServiceManager::new( &log, @@ -3373,12 +3377,13 @@ mod test { assert_eq!(u2_mountpoints.len(), 1); let u2_mountpoint = &u2_mountpoints[0]; - let executor = FakeExecutor::new(log.clone()); let id = Uuid::new_v4(); let mut handler = CommandSequence::new(); expect_new_service(&mut handler, &test_config, id, &u2_mountpoint); - handler.register(&executor); - let executor = executor.as_executor(); + let executor = FakeExecutorBuilder::new(log.clone()) + .with_sequence(handler) + .build() + .as_executor(); // First, spin up a ServiceManager, create a new service, and tear it // down. diff --git a/wicketd/tests/integration_tests/updates.rs b/wicketd/tests/integration_tests/updates.rs index 5035023efc..8c6032ff6f 100644 --- a/wicketd/tests/integration_tests/updates.rs +++ b/wicketd/tests/integration_tests/updates.rs @@ -11,7 +11,7 @@ use camino_tempfile::Utf8TempDir; use clap::Parser; use gateway_messages::SpPort; use gateway_test_utils::setup as gateway_setup; -use illumos_utils::host::fake::FakeExecutor; +use illumos_utils::host::fake::FakeExecutorBuilder; use installinator::HOST_PHASE_2_FILE_NAME; use omicron_common::{ api::internal::nexus::KnownArtifactKind, @@ -245,7 +245,7 @@ async fn test_installinator_fetch() { ]) .expect("installinator args parsed successfully"); - let executor = FakeExecutor::new(log.clone()).as_executor(); + let executor = FakeExecutorBuilder::new(log.clone()).build().as_executor(); args.exec(&log.new(slog::o!("crate" => "installinator")), &executor) .await .expect("installinator succeeded"); From f22548ccd786003f767b018f287db17d530c12da Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 18 Aug 2023 17:01:38 -0700 Subject: [PATCH 51/57] Fix helios tests --- illumos-utils/src/host/executor/fake.rs | 10 ---------- sled-hardware/src/illumos/partitions.rs | 23 +++++++++++++---------- 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/illumos-utils/src/host/executor/fake.rs b/illumos-utils/src/host/executor/fake.rs index 5694c170ef..ec5664c540 100644 --- a/illumos-utils/src/host/executor/fake.rs +++ b/illumos-utils/src/host/executor/fake.rs @@ -96,16 +96,6 @@ impl FakeExecutor { }) } - /// Set the spawn handler to an arbitrary function. - pub fn set_spawn_handler(&self, f: BoxedSpawnFn) { - *self.inner.spawn_handler.lock().unwrap() = f; - } - - /// Set the request handler to an arbitrary function. - pub fn set_wait_handler(&self, f: BoxedWaitFn) { - *self.inner.wait_handler.lock().unwrap() = f; - } - /// Perform some type coercion to access a commonly-used trait object. pub fn as_executor(self: Arc) -> BoxedExecutor { self diff --git a/sled-hardware/src/illumos/partitions.rs b/sled-hardware/src/illumos/partitions.rs index 9a8e1b5049..77cdbaed07 100644 --- a/sled-hardware/src/illumos/partitions.rs +++ b/sled-hardware/src/illumos/partitions.rs @@ -158,7 +158,8 @@ mod test { use super::*; use crate::DiskPaths; use camino::Utf8PathBuf; - use illumos_utils::host::{FakeExecutor, Input, OutputExt, PFEXEC}; + use illumos_utils::host::fake::{FakeChild, FakeExecutorBuilder}; + use illumos_utils::host::{Input, OutputExt, PFEXEC}; use illumos_utils::zpool::{ZpoolKind, ZPOOL}; use omicron_test_utils::dev::test_setup_log; use std::path::Path; @@ -192,7 +193,7 @@ mod test { "ensure_partition_layout_u2_no_format_without_dev_path", ); let log = &logctx.log; - let executor = FakeExecutor::new(log.clone()); + let executor = FakeExecutorBuilder::new(log.clone()).build(); let devfs_path = Utf8PathBuf::from("/devfs/path"); let result = internal_ensure_partition_layout::( @@ -217,10 +218,9 @@ mod test { let devfs_path = Utf8PathBuf::from("/devfs/path"); const DEV_PATH: &'static str = "/dev/path"; - let executor = FakeExecutor::new(log.clone()); let mut calls = 0; let mut zpool_name = None; - executor.set_wait_handler(Box::new(move |child| -> Output { + let wait_handler = Box::new(move |child: &mut FakeChild| -> Output { let input = Input::from(child.command()); assert_eq!(input.program, PFEXEC); @@ -252,7 +252,10 @@ mod test { }; calls += 1; Output::success() - })); + }); + let executor = FakeExecutorBuilder::new(log.clone()) + .wait_handler(wait_handler) + .build(); let partitions = internal_ensure_partition_layout::( &log, @@ -275,7 +278,7 @@ mod test { fn ensure_partition_layout_m2_cannot_format() { let logctx = test_setup_log("ensure_partition_layout_m2_cannot_format"); let log = &logctx.log.clone(); - let executor = FakeExecutor::new(log.clone()); + let executor = FakeExecutorBuilder::new(log.clone()).build(); let devfs_path = Utf8PathBuf::from("/devfs/path"); const DEV_PATH: &'static str = "/dev/path"; @@ -314,7 +317,7 @@ mod test { let logctx = test_setup_log("ensure_partition_layout_u2_with_expected_format"); let log = &logctx.log; - let executor = FakeExecutor::new(log.clone()); + let executor = FakeExecutorBuilder::new(log.clone()).build(); let devfs_path = Utf8PathBuf::from("/devfs/path"); const DEV_PATH: &'static str = "/dev/path"; @@ -358,7 +361,7 @@ mod test { let logctx = test_setup_log("ensure_partition_layout_m2_with_expected_format"); let log = &logctx.log; - let executor = FakeExecutor::new(log.clone()); + let executor = FakeExecutorBuilder::new(log.clone()).build(); let devfs_path = Utf8PathBuf::from("/devfs/path"); const DEV_PATH: &'static str = "/dev/path"; @@ -398,7 +401,7 @@ mod test { let logctx = test_setup_log("ensure_partition_layout_m2_fails_with_empty_gpt"); let log = &logctx.log; - let executor = FakeExecutor::new(log.clone()); + let executor = FakeExecutorBuilder::new(log.clone()).build(); let devfs_path = Utf8PathBuf::from("/devfs/path"); const DEV_PATH: &'static str = "/dev/path"; @@ -425,7 +428,7 @@ mod test { let logctx = test_setup_log("ensure_partition_layout_u2_fails_with_empty_gpt"); let log = &logctx.log; - let executor = FakeExecutor::new(log.clone()); + let executor = FakeExecutorBuilder::new(log.clone()).build(); let devfs_path = Utf8PathBuf::from("/devfs/path"); const DEV_PATH: &'static str = "/dev/path"; From ad7e3a4459cb98787de052178997968348a040c6 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 18 Aug 2023 17:12:05 -0700 Subject: [PATCH 52/57] Fix docs --- illumos-utils/src/host/executor/fake.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/illumos-utils/src/host/executor/fake.rs b/illumos-utils/src/host/executor/fake.rs index ec5664c540..80dea1e9c5 100644 --- a/illumos-utils/src/host/executor/fake.rs +++ b/illumos-utils/src/host/executor/fake.rs @@ -237,7 +237,7 @@ enum HandledCommand { /// A handler that may be used for setting inputs/outputs to the executor /// when these commands are known ahead-of-time. /// -/// See: [Self::register] for integration with a [FakeExecutor]. +/// See: [FakeExecutorBuilder::with_sequence] for integration with a [FakeExecutor]. pub struct CommandSequence { expected: Vec, index: usize, From 5272ca9aa5e80450abe8cba64f43c1179ecaa2bb Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 21 Aug 2023 16:06:58 -0700 Subject: [PATCH 53/57] Split into helios-fusion, helios-tokamak, helios-protostar crates --- Cargo.lock | 75 +++++++++++++++++++ Cargo.toml | 9 +++ helios/README.adoc | 13 ++++ helios/fusion/Cargo.toml | 14 ++++ .../src/host => helios/fusion/src}/error.rs | 0 .../fusion/src}/executor.rs | 6 +- .../src/host => helios/fusion/src}/input.rs | 0 .../host/mod.rs => helios/fusion/src/lib.rs | 13 ++-- .../src/host => helios/fusion/src}/output.rs | 0 helios/protostar/Cargo.toml | 29 +++++++ helios/protostar/README.adoc | 22 ++++++ .../real.rs => helios/protostar/src/lib.rs | 11 +-- helios/tokamak/Cargo.toml | 29 +++++++ helios/tokamak/README.adoc | 16 ++++ .../host => helios/tokamak/src}/byte_queue.rs | 2 +- .../fake.rs => helios/tokamak/src/executor.rs | 13 ++-- .../mod.rs => helios/tokamak/src/lib.rs | 5 +- illumos-utils/Cargo.toml | 2 + illumos-utils/src/dladm.rs | 6 +- illumos-utils/src/dumpadm.rs | 2 +- illumos-utils/src/fstyp.rs | 2 +- illumos-utils/src/lib.rs | 1 - illumos-utils/src/link.rs | 5 +- illumos-utils/src/opte/port.rs | 2 +- illumos-utils/src/opte/port_manager.rs | 2 +- illumos-utils/src/running_zone.rs | 6 +- illumos-utils/src/svc.rs | 2 +- illumos-utils/src/zfs.rs | 2 +- illumos-utils/src/zone.rs | 6 +- illumos-utils/src/zpool.rs | 2 +- installinator/Cargo.toml | 3 + installinator/src/bootstrap.rs | 2 +- installinator/src/dispatch.rs | 2 +- installinator/src/hardware.rs | 2 +- installinator/src/main.rs | 2 +- installinator/src/write.rs | 9 +-- package/Cargo.toml | 2 + package/src/bin/omicron-package.rs | 4 +- sled-agent/Cargo.toml | 3 + sled-agent/src/bootstrap/pre_server.rs | 7 +- sled-agent/src/bootstrap/server.rs | 6 +- sled-agent/src/config.rs | 2 +- sled-agent/src/instance.rs | 2 +- sled-agent/src/instance_manager.rs | 2 +- sled-agent/src/server.rs | 2 +- sled-agent/src/services.rs | 8 +- sled-agent/src/sled_agent.rs | 6 +- sled-agent/src/storage/dump_setup.rs | 2 +- sled-agent/src/storage_manager.rs | 2 +- sled-agent/src/swap_device.rs | 2 +- sled-hardware/Cargo.toml | 2 + sled-hardware/src/cleanup.rs | 2 +- sled-hardware/src/disk.rs | 2 +- sled-hardware/src/illumos/partitions.rs | 6 +- sled-hardware/src/non_illumos/mod.rs | 2 +- sled-hardware/src/underlay.rs | 4 +- wicketd/Cargo.toml | 1 + wicketd/tests/integration_tests/updates.rs | 2 +- 58 files changed, 298 insertions(+), 90 deletions(-) create mode 100644 helios/README.adoc create mode 100644 helios/fusion/Cargo.toml rename {illumos-utils/src/host => helios/fusion/src}/error.rs (100%) rename {illumos-utils/src/host/executor => helios/fusion/src}/executor.rs (94%) rename {illumos-utils/src/host => helios/fusion/src}/input.rs (100%) rename illumos-utils/src/host/mod.rs => helios/fusion/src/lib.rs (66%) rename {illumos-utils/src/host => helios/fusion/src}/output.rs (100%) create mode 100644 helios/protostar/Cargo.toml create mode 100644 helios/protostar/README.adoc rename illumos-utils/src/host/executor/real.rs => helios/protostar/src/lib.rs (95%) create mode 100644 helios/tokamak/Cargo.toml create mode 100644 helios/tokamak/README.adoc rename {illumos-utils/src/host => helios/tokamak/src}/byte_queue.rs (97%) rename illumos-utils/src/host/executor/fake.rs => helios/tokamak/src/executor.rs (98%) rename illumos-utils/src/host/executor/mod.rs => helios/tokamak/src/lib.rs (70%) diff --git a/Cargo.lock b/Cargo.lock index a4defa6d79..65942d9162 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3021,6 +3021,68 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +[[package]] +name = "helios-fusion" +version = "0.1.0" +dependencies = [ + "async-trait", + "itertools 0.10.5", + "shlex", + "slog", + "thiserror", + "tokio", +] + +[[package]] +name = "helios-protostar" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "camino", + "cfg-if 1.0.0", + "futures", + "helios-fusion", + "itertools 0.10.5", + "libc", + "omicron-common 0.1.0", + "omicron-test-utils", + "schemars", + "serde", + "shlex", + "slog", + "smf", + "thiserror", + "tokio", + "uuid", + "zone", +] + +[[package]] +name = "helios-tokamak" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "camino", + "cfg-if 1.0.0", + "futures", + "helios-fusion", + "itertools 0.10.5", + "libc", + "omicron-common 0.1.0", + "omicron-test-utils", + "schemars", + "serde", + "shlex", + "slog", + "smf", + "thiserror", + "tokio", + "uuid", + "zone", +] + [[package]] name = "hermit-abi" version = "0.1.19" @@ -3375,6 +3437,8 @@ dependencies = [ "cfg-if 1.0.0", "debug-ignore", "futures", + "helios-fusion", + "helios-tokamak", "ipnetwork", "itertools 0.10.5", "libc", @@ -3473,6 +3537,9 @@ dependencies = [ "ddm-admin-client", "display-error-chain", "futures", + "helios-fusion", + "helios-protostar", + "helios-tokamak", "hex", "hex-literal", "http", @@ -5023,6 +5090,8 @@ dependencies = [ "clap 4.3.21", "expectorate", "futures", + "helios-fusion", + "helios-protostar", "hex", "illumos-utils", "indicatif", @@ -5112,6 +5181,9 @@ dependencies = [ "futures", "gateway-client", "glob", + "helios-fusion", + "helios-protostar", + "helios-tokamak", "http", "hyper", "hyper-staticfile", @@ -7747,6 +7819,8 @@ dependencies = [ "camino", "cfg-if 1.0.0", "futures", + "helios-fusion", + "helios-tokamak", "illumos-devinfo", "illumos-utils", "key-manager", @@ -9702,6 +9776,7 @@ dependencies = [ "gateway-client", "gateway-messages", "gateway-test-utils", + "helios-tokamak", "hex", "http", "hubtools", diff --git a/Cargo.toml b/Cargo.toml index ef0072ce82..04e3db6e10 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -53,6 +53,9 @@ members = [ "sled-hardware", "sp-sim", "test-utils", + "helios/fusion", + "helios/protostar", + "helios/tokamak", "tufaceous-lib", "tufaceous", "update-engine", @@ -113,6 +116,9 @@ default-members = [ "sled-hardware", "sp-sim", "test-utils", + "helios/fusion", + "helios/protostar", + "helios/tokamak", "tufaceous", "tufaceous-lib", "update-engine", @@ -187,6 +193,9 @@ gateway-test-utils = { path = "gateway-test-utils" } glob = "0.3.1" headers = "0.3.8" heck = "0.4" +helios-fusion = { path = "helios/fusion" } +helios-protostar = { path = "helios/protostar" } +helios-tokamak = { path = "helios/tokamak" } hex = "0.4.3" hex-literal = "0.3.4" hkdf = "0.12.3" diff --git a/helios/README.adoc b/helios/README.adoc new file mode 100644 index 0000000000..3531f73705 --- /dev/null +++ b/helios/README.adoc @@ -0,0 +1,13 @@ +:showtitle: +:toc: left +:icons: font + += helios + +This directory describes various interfaces for acting upon a Helios system. + +* `fusion` describes an interface for interacting with the underlying OS. +* `tokamak` provides a fake implementation of `fusion` - not enough +to be virtualized, but enough to test code depending on `fusion` under test. +* `protostar` provides a real implementation of `fusion`, which should actually +make calls to the underlying OS. diff --git a/helios/fusion/Cargo.toml b/helios/fusion/Cargo.toml new file mode 100644 index 0000000000..bac3d9c7a2 --- /dev/null +++ b/helios/fusion/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "helios-fusion" +description = "Interface to access an underlying Helios system" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[dependencies] +async-trait.workspace = true +itertools.workspace = true +shlex.workspace = true +slog.workspace = true +thiserror.workspace = true +tokio.workspace = true diff --git a/illumos-utils/src/host/error.rs b/helios/fusion/src/error.rs similarity index 100% rename from illumos-utils/src/host/error.rs rename to helios/fusion/src/error.rs diff --git a/illumos-utils/src/host/executor/executor.rs b/helios/fusion/src/executor.rs similarity index 94% rename from illumos-utils/src/host/executor/executor.rs rename to helios/fusion/src/executor.rs index c02694541e..5a28f29519 100644 --- a/illumos-utils/src/host/executor/executor.rs +++ b/helios/fusion/src/executor.rs @@ -4,7 +4,7 @@ //! Interfaces used to interact with the underlying host system. -use crate::host::{error::ExecutionError, input::Input, output::Output}; +use crate::{error::ExecutionError, input::Input, output::Output}; use async_trait::async_trait; use itertools::Itertools; @@ -26,7 +26,7 @@ where .collect::() } -pub(super) fn log_input(log: &Logger, id: u64, command: &Command) { +pub fn log_input(log: &Logger, id: u64, command: &Command) { info!( log, "running command via executor"; "id" => id, "command" => %Input::from(command) @@ -37,7 +37,7 @@ pub(super) fn log_input(log: &Logger, id: u64, command: &Command) { ); } -pub(super) fn log_output(log: &Logger, id: u64, output: &Output) { +pub fn log_output(log: &Logger, id: u64, output: &Output) { info!( log, "finished running command via executor"; diff --git a/illumos-utils/src/host/input.rs b/helios/fusion/src/input.rs similarity index 100% rename from illumos-utils/src/host/input.rs rename to helios/fusion/src/input.rs diff --git a/illumos-utils/src/host/mod.rs b/helios/fusion/src/lib.rs similarity index 66% rename from illumos-utils/src/host/mod.rs rename to helios/fusion/src/lib.rs index f86754f7ac..3fc9d5a172 100644 --- a/illumos-utils/src/host/mod.rs +++ b/helios/fusion/src/lib.rs @@ -2,17 +2,16 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -//! Utilities to either access or emulate a host system +//! Interfaces used to interact with the underlying host system. -mod byte_queue; mod error; mod executor; mod input; mod output; -pub const PFEXEC: &str = "/usr/bin/pfexec"; - -pub use error::ExecutionError; +pub use error::*; pub use executor::*; -pub use input::Input; -pub use output::{Output, OutputExt}; +pub use input::*; +pub use output::*; + +pub const PFEXEC: &str = "/usr/bin/pfexec"; diff --git a/illumos-utils/src/host/output.rs b/helios/fusion/src/output.rs similarity index 100% rename from illumos-utils/src/host/output.rs rename to helios/fusion/src/output.rs diff --git a/helios/protostar/Cargo.toml b/helios/protostar/Cargo.toml new file mode 100644 index 0000000000..a2fbdf7f82 --- /dev/null +++ b/helios/protostar/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "helios-protostar" +description = "Utilities to interact with a real Helios system" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[dependencies] +anyhow.workspace = true +async-trait.workspace = true +camino.workspace = true +cfg-if.workspace = true +futures.workspace = true +helios-fusion.workspace = true +itertools.workspace = true +libc.workspace = true +omicron-common.workspace = true +schemars.workspace = true +serde.workspace = true +shlex.workspace = true +slog.workspace = true +smf.workspace = true +thiserror.workspace = true +tokio.workspace = true +uuid.workspace = true +zone.workspace = true + +[dev-dependencies] +omicron-test-utils.workspace = true diff --git a/helios/protostar/README.adoc b/helios/protostar/README.adoc new file mode 100644 index 0000000000..591447e05c --- /dev/null +++ b/helios/protostar/README.adoc @@ -0,0 +1,22 @@ +:showtitle: +:toc: left +:icons: font + += protostar + +[TIP] +A protostar is one of the earliest stages in a star's lifecycle, which gathers +mass before eventually igniting and becoming a main-sequence star. When this +happens, a star generates energy via fusion. + +`protostar` is an implementation of the `fusion` interface which is designed to +execute on a real Helios system. This interface, while necessary, is notoriously +difficult to test. Although we encourage the addition of arbitrary host-interfacing +calls here, we recommend the following: + +1. Keeping them as simple as possible, with the understanding that code within +this crate can only sufficiently be tested via end-to-end tests. +2. When adding new interfaces to the host, access them via the `fusion` interface, +and provide a "fake" implementation in the `tokamak` crate. This will allow callers +integrating with host interfaces to write tests, and simulate the underlying system +behaviors. diff --git a/illumos-utils/src/host/executor/real.rs b/helios/protostar/src/lib.rs similarity index 95% rename from illumos-utils/src/host/executor/real.rs rename to helios/protostar/src/lib.rs index c2d610c341..5ad75e4f01 100644 --- a/illumos-utils/src/host/executor/real.rs +++ b/helios/protostar/src/lib.rs @@ -4,14 +4,9 @@ //! A "real" [Executor] implementation, which sends commands to the host. -use super::executor::{ - log_input, log_output, BoxedChild, BoxedExecutor, Child, Executor, -}; - -use crate::host::{ - error::{AsCommandStr, ExecutionError}, - input::Input, - output::Output, +use helios_fusion::{ + log_input, log_output, AsCommandStr, BoxedChild, BoxedExecutor, Child, + ExecutionError, Executor, Input, Output, }; use async_trait::async_trait; diff --git a/helios/tokamak/Cargo.toml b/helios/tokamak/Cargo.toml new file mode 100644 index 0000000000..c09c7e37c8 --- /dev/null +++ b/helios/tokamak/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "helios-tokamak" +description = "Utilities to create a fake Helios system" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[dependencies] +anyhow.workspace = true +async-trait.workspace = true +camino.workspace = true +cfg-if.workspace = true +futures.workspace = true +helios-fusion.workspace = true +itertools.workspace = true +libc.workspace = true +omicron-common.workspace = true +schemars.workspace = true +serde.workspace = true +shlex.workspace = true +slog.workspace = true +smf.workspace = true +thiserror.workspace = true +tokio.workspace = true +uuid.workspace = true +zone.workspace = true + +[dev-dependencies] +omicron-test-utils.workspace = true diff --git a/helios/tokamak/README.adoc b/helios/tokamak/README.adoc new file mode 100644 index 0000000000..54cde184c4 --- /dev/null +++ b/helios/tokamak/README.adoc @@ -0,0 +1,16 @@ +:showtitle: +:toc: left +:icons: font + += tokamak + +[TIP] +A tokamak is a magnetic confinement fusion device which can be used to +produce nuclear fusion reactions. In other words, they're one of our tools for +creating a "artificial sun". + +`tokamak` is a toolkit implementing the `fusion` interface which allows callers +to run an emulated Helios system. It does not intend to operate as a virtual +machine, nor run any real workloads: the purpose of this crate is to give +callers an opportunity to test very specific interfaces against the underlying +host, without actually requiring a host that has undergone that behavior. diff --git a/illumos-utils/src/host/byte_queue.rs b/helios/tokamak/src/byte_queue.rs similarity index 97% rename from illumos-utils/src/host/byte_queue.rs rename to helios/tokamak/src/byte_queue.rs index 372ca20f99..3ea1f686c3 100644 --- a/illumos-utils/src/host/byte_queue.rs +++ b/helios/tokamak/src/byte_queue.rs @@ -10,7 +10,7 @@ use std::sync::{Arc, Mutex}; /// /// This is primarily used to emulate stdin / stdout / stderr. #[derive(Clone)] -pub(crate) struct ByteQueue { +pub struct ByteQueue { buf: Arc>>, } diff --git a/illumos-utils/src/host/executor/fake.rs b/helios/tokamak/src/executor.rs similarity index 98% rename from illumos-utils/src/host/executor/fake.rs rename to helios/tokamak/src/executor.rs index 80dea1e9c5..57a7405ea2 100644 --- a/illumos-utils/src/host/executor/fake.rs +++ b/helios/tokamak/src/executor.rs @@ -4,16 +4,13 @@ //! A "fake" [Executor] implementation, which can respond to host requests. -use super::executor::{ - log_input, log_output, BoxedChild, BoxedExecutor, Child, Executor, -}; - -use crate::host::{ - byte_queue::ByteQueue, error::ExecutionError, input::Input, output::Output, - output::OutputExt, -}; +use crate::byte_queue::ByteQueue; use async_trait::async_trait; +use helios_fusion::{ + log_input, log_output, BoxedChild, BoxedExecutor, Child, ExecutionError, + Executor, Input, Output, OutputExt, +}; use slog::Logger; use std::io::{Read, Write}; use std::process::Command; diff --git a/illumos-utils/src/host/executor/mod.rs b/helios/tokamak/src/lib.rs similarity index 70% rename from illumos-utils/src/host/executor/mod.rs rename to helios/tokamak/src/lib.rs index a96f5132fc..c6f48aadf3 100644 --- a/illumos-utils/src/host/executor/mod.rs +++ b/helios/tokamak/src/lib.rs @@ -2,8 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +mod byte_queue; mod executor; -pub mod fake; -pub mod real; -pub use executor::{BoxedChild, BoxedExecutor, Child, Executor}; +pub use executor::*; diff --git a/illumos-utils/Cargo.toml b/illumos-utils/Cargo.toml index 1e9e2cf3f9..2d85c8a5fd 100644 --- a/illumos-utils/Cargo.toml +++ b/illumos-utils/Cargo.toml @@ -14,6 +14,7 @@ camino.workspace = true cfg-if.workspace = true debug-ignore.workspace = true futures.workspace = true +helios-fusion.workspace = true ipnetwork.workspace = true itertools.workspace = true libc.workspace = true @@ -34,6 +35,7 @@ zone.workspace = true opte-ioctl.workspace = true [dev-dependencies] +helios-tokamak.workspace = true omicron-test-utils.workspace = true regress.workspace = true serde_json.workspace = true diff --git a/illumos-utils/src/dladm.rs b/illumos-utils/src/dladm.rs index d92be9f52e..e42add426a 100644 --- a/illumos-utils/src/dladm.rs +++ b/illumos-utils/src/dladm.rs @@ -4,9 +4,9 @@ //! Utilities for poking at data links. -use crate::host::{BoxedExecutor, ExecutionError, PFEXEC}; use crate::link::{Link, LinkKind}; use crate::zone::IPADM; +use helios_fusion::{BoxedExecutor, ExecutionError, PFEXEC}; use omicron_common::api::external::MacAddr; use omicron_common::vlan::VlanID; use serde::{Deserialize, Serialize}; @@ -560,8 +560,8 @@ impl Dladm { #[cfg(test)] mod test { use super::*; - use crate::host::fake::{CommandSequence, FakeExecutorBuilder}; - use crate::host::{Input, OutputExt}; + use helios_fusion::{Input, OutputExt}; + use helios_tokamak::{CommandSequence, FakeExecutorBuilder}; use omicron_test_utils::dev; use std::process::Output; diff --git a/illumos-utils/src/dumpadm.rs b/illumos-utils/src/dumpadm.rs index 89c4003e40..fb882f72e1 100644 --- a/illumos-utils/src/dumpadm.rs +++ b/illumos-utils/src/dumpadm.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use crate::host::{BoxedExecutor, ExecutionError}; +use helios_fusion::{BoxedExecutor, ExecutionError}; use byteorder::{LittleEndian, ReadBytesExt}; use camino::Utf8PathBuf; diff --git a/illumos-utils/src/fstyp.rs b/illumos-utils/src/fstyp.rs index c16cba91fb..41d0317f1d 100644 --- a/illumos-utils/src/fstyp.rs +++ b/illumos-utils/src/fstyp.rs @@ -4,9 +4,9 @@ //! Helper for calling fstyp. -use crate::host::{BoxedExecutor, ExecutionError, PFEXEC}; use crate::zpool::ZpoolName; use camino::Utf8Path; +use helios_fusion::{BoxedExecutor, ExecutionError, PFEXEC}; use std::str::FromStr; const FSTYP: &str = "/usr/sbin/fstyp"; diff --git a/illumos-utils/src/lib.rs b/illumos-utils/src/lib.rs index 23cc379fef..d1375224cc 100644 --- a/illumos-utils/src/lib.rs +++ b/illumos-utils/src/lib.rs @@ -11,7 +11,6 @@ pub mod dkio; pub mod dladm; pub mod dumpadm; pub mod fstyp; -pub mod host; pub mod libc; pub mod link; pub mod opte; diff --git a/illumos-utils/src/link.rs b/illumos-utils/src/link.rs index f2c2d78cda..04d13e36ce 100644 --- a/illumos-utils/src/link.rs +++ b/illumos-utils/src/link.rs @@ -9,7 +9,7 @@ use crate::dladm::{ CreateVnicError, DeleteVnicError, Dladm, VnicSource, VNIC_PREFIX, VNIC_PREFIX_BOOTSTRAP, VNIC_PREFIX_CONTROL, VNIC_PREFIX_GUEST, }; -use crate::host::BoxedExecutor; +use helios_fusion::BoxedExecutor; use omicron_common::api::external::MacAddr; use std::sync::{ atomic::{AtomicU64, Ordering}, @@ -259,7 +259,8 @@ impl Deletable for VnicDestruction { mod test { use super::*; use crate::dladm::Etherstub; - use crate::host::fake::FakeExecutorBuilder; + use helios_tokamak::FakeExecutorBuilder; + use omicron_test_utils::dev; #[tokio::test] diff --git a/illumos-utils/src/opte/port.rs b/illumos-utils/src/opte/port.rs index 7b6efb9f25..b1c10d448b 100644 --- a/illumos-utils/src/opte/port.rs +++ b/illumos-utils/src/opte/port.rs @@ -4,10 +4,10 @@ //! A single port on the OPTE virtual switch. -use crate::host::BoxedExecutor; use crate::opte::Gateway; use crate::opte::Vni; use debug_ignore::DebugIgnore; +use helios_fusion::BoxedExecutor; use macaddr::MacAddr6; use std::net::IpAddr; use std::sync::Arc; diff --git a/illumos-utils/src/opte/port_manager.rs b/illumos-utils/src/opte/port_manager.rs index 190bf77e0a..4a39f2cc3e 100644 --- a/illumos-utils/src/opte/port_manager.rs +++ b/illumos-utils/src/opte/port_manager.rs @@ -4,7 +4,6 @@ //! Manager for all OPTE ports on a Helios system -use crate::host::BoxedExecutor; use crate::opte::default_boundary_services; use crate::opte::opte_firewall_rules; use crate::opte::params::SetVirtualNetworkInterfaceHost; @@ -14,6 +13,7 @@ use crate::opte::Gateway; use crate::opte::Port; use crate::opte::Vni; use debug_ignore::DebugIgnore; +use helios_fusion::BoxedExecutor; use ipnetwork::IpNetwork; use omicron_common::api::external; use omicron_common::api::internal::shared::NetworkInterface; diff --git a/illumos-utils/src/running_zone.rs b/illumos-utils/src/running_zone.rs index 8a56dfef92..1e91aaadb5 100644 --- a/illumos-utils/src/running_zone.rs +++ b/illumos-utils/src/running_zone.rs @@ -6,12 +6,12 @@ use crate::addrobj::AddrObject; use crate::dladm::Etherstub; -use crate::host::{BoxedExecutor, ExecutionError}; use crate::link::{Link, VnicAllocator}; use crate::opte::{Port, PortTicket}; use crate::svc::wait_for_service; use crate::zone::{AddressRequest, Zones, IPADM, ZONE_PREFIX}; use camino::{Utf8Path, Utf8PathBuf}; +use helios_fusion::{BoxedExecutor, ExecutionError}; use ipnetwork::IpNetwork; use omicron_common::backoff; use slog::{error, info, o, warn, Logger}; @@ -436,7 +436,7 @@ impl RunningZone { RunCommandError { zone: self.name().to_string(), err } })?); let tmpl = std::sync::Arc::clone(&template); - let mut command = std::process::Command::new(crate::host::PFEXEC); + let mut command = std::process::Command::new(helios_fusion::PFEXEC); command.env_clear(); unsafe { command.pre_exec(move || { @@ -474,7 +474,7 @@ impl RunningZone { { // NOTE: This implementation is useless, and will never work. However, // it must actually call `execute()` for the testing purposes. - let mut command = std::process::Command::new(crate::host::PFEXEC); + let mut command = std::process::Command::new(helios_fusion::PFEXEC); let command = command.arg(crate::zone::ZLOGIN).arg(self.name()).args(args); self.inner diff --git a/illumos-utils/src/svc.rs b/illumos-utils/src/svc.rs index 3e3e69bd58..234b705044 100644 --- a/illumos-utils/src/svc.rs +++ b/illumos-utils/src/svc.rs @@ -4,7 +4,7 @@ //! Utilities for accessing services. -use crate::host::BoxedExecutor; +use helios_fusion::BoxedExecutor; use omicron_common::api::external::Error; use omicron_common::backoff; diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs index f38582b16b..980709597e 100644 --- a/illumos-utils/src/zfs.rs +++ b/illumos-utils/src/zfs.rs @@ -4,8 +4,8 @@ //! Utilities for poking at ZFS. -use crate::host::{BoxedExecutor, ExecutionError, PFEXEC}; use camino::Utf8PathBuf; +use helios_fusion::{BoxedExecutor, ExecutionError, PFEXEC}; use omicron_common::disk::DiskIdentity; use std::fmt; diff --git a/illumos-utils/src/zone.rs b/illumos-utils/src/zone.rs index acd4f01bd1..209c403fd2 100644 --- a/illumos-utils/src/zone.rs +++ b/illumos-utils/src/zone.rs @@ -14,7 +14,7 @@ use std::net::{IpAddr, Ipv6Addr}; use crate::addrobj::AddrObject; use crate::dladm::{EtherstubVnic, VNIC_PREFIX_BOOTSTRAP, VNIC_PREFIX_CONTROL}; -use crate::host::{BoxedExecutor, ExecutionError, PFEXEC}; +use helios_fusion::{BoxedExecutor, ExecutionError, PFEXEC}; use omicron_common::address::SLED_PREFIX; const DLADM: &str = "/usr/sbin/dladm"; @@ -902,8 +902,8 @@ impl Zones { #[cfg(test)] mod tests { use super::*; - use crate::host::fake::{CommandSequence, FakeExecutorBuilder}; - use crate::host::{Input, OutputExt}; + use helios_fusion::{Input, OutputExt}; + use helios_tokamak::{CommandSequence, FakeExecutorBuilder}; use omicron_test_utils::dev; use std::process::Output; diff --git a/illumos-utils/src/zpool.rs b/illumos-utils/src/zpool.rs index 040c849b1e..d96efc60d7 100644 --- a/illumos-utils/src/zpool.rs +++ b/illumos-utils/src/zpool.rs @@ -4,8 +4,8 @@ //! Utilities for managing Zpools. -use crate::host::{BoxedExecutor, ExecutionError, PFEXEC}; use camino::{Utf8Path, Utf8PathBuf}; +use helios_fusion::{BoxedExecutor, ExecutionError, PFEXEC}; use schemars::JsonSchema; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::fmt; diff --git a/installinator/Cargo.toml b/installinator/Cargo.toml index c0e7625e6e..b8ea604086 100644 --- a/installinator/Cargo.toml +++ b/installinator/Cargo.toml @@ -15,6 +15,8 @@ clap.workspace = true ddm-admin-client.workspace = true display-error-chain.workspace = true futures.workspace = true +helios-fusion.workspace = true +helios-protostar.workspace = true hex.workspace = true http.workspace = true illumos-utils.workspace = true @@ -45,6 +47,7 @@ uuid.workspace = true [dev-dependencies] omicron-test-utils.workspace = true +helios-tokamak.workspace = true hex-literal.workspace = true partial-io.workspace = true proptest.workspace = true diff --git a/installinator/src/bootstrap.rs b/installinator/src/bootstrap.rs index f91e3216d8..017e5d0b50 100644 --- a/installinator/src/bootstrap.rs +++ b/installinator/src/bootstrap.rs @@ -10,10 +10,10 @@ use anyhow::ensure; use anyhow::Context; use anyhow::Result; use ddm_admin_client::Client as DdmAdminClient; +use helios_fusion::BoxedExecutor; use illumos_utils::addrobj::AddrObject; use illumos_utils::dladm; use illumos_utils::dladm::Dladm; -use illumos_utils::host::BoxedExecutor; use illumos_utils::zone::Zones; use omicron_common::address::Ipv6Subnet; use sled_hardware::underlay; diff --git a/installinator/src/dispatch.rs b/installinator/src/dispatch.rs index f2213ae7dd..435ae2ebb8 100644 --- a/installinator/src/dispatch.rs +++ b/installinator/src/dispatch.rs @@ -8,7 +8,7 @@ use anyhow::{bail, Context, Result}; use buf_list::{BufList, Cursor}; use camino::{Utf8Path, Utf8PathBuf}; use clap::{Args, Parser, Subcommand}; -use illumos_utils::host::BoxedExecutor; +use helios_fusion::BoxedExecutor; use installinator_common::{ InstallinatorCompletionMetadata, InstallinatorComponent, InstallinatorSpec, InstallinatorStepId, StepContext, StepHandle, StepProgress, StepSuccess, diff --git a/installinator/src/hardware.rs b/installinator/src/hardware.rs index 5b467d5cae..ec4260e490 100644 --- a/installinator/src/hardware.rs +++ b/installinator/src/hardware.rs @@ -6,7 +6,7 @@ use anyhow::anyhow; use anyhow::ensure; use anyhow::Context; use anyhow::Result; -use illumos_utils::host::BoxedExecutor; +use helios_fusion::BoxedExecutor; use sled_hardware::Disk; use sled_hardware::DiskVariant; use sled_hardware::HardwareManager; diff --git a/installinator/src/main.rs b/installinator/src/main.rs index 677c82d369..006cf7457d 100644 --- a/installinator/src/main.rs +++ b/installinator/src/main.rs @@ -5,7 +5,7 @@ use std::error::Error; use clap::Parser; -use illumos_utils::host::real::HostExecutor; +use helios_protostar::HostExecutor; use installinator::InstallinatorApp; #[tokio::main] diff --git a/installinator/src/write.rs b/installinator/src/write.rs index af912ac968..b5cd56bffa 100644 --- a/installinator/src/write.rs +++ b/installinator/src/write.rs @@ -15,9 +15,9 @@ use async_trait::async_trait; use buf_list::BufList; use bytes::Buf; use camino::{Utf8Path, Utf8PathBuf}; +use helios_fusion::BoxedExecutor; use illumos_utils::{ dkio::{self, MediaInfoExtended}, - host::BoxedExecutor, zpool::{Zpool, ZpoolName}, }; use installinator_common::{ @@ -1160,10 +1160,9 @@ mod tests { let engine = UpdateEngine::new(&logctx.log, event_sender); let log = logctx.log.clone(); - let executor = - illumos_utils::host::fake::FakeExecutorBuilder::new(log.clone()) - .build() - .as_executor(); + let executor = helios_tokamak::FakeExecutorBuilder::new(log.clone()) + .build() + .as_executor(); engine .new_step( InstallinatorComponent::Both, diff --git a/package/Cargo.toml b/package/Cargo.toml index 7c786b77ef..f6e4abdf19 100644 --- a/package/Cargo.toml +++ b/package/Cargo.toml @@ -9,6 +9,8 @@ license = "MPL-2.0" anyhow.workspace = true clap.workspace = true futures.workspace = true +helios-fusion.workspace = true +helios-protostar.workspace = true hex.workspace = true illumos-utils.workspace = true indicatif.workspace = true diff --git a/package/src/bin/omicron-package.rs b/package/src/bin/omicron-package.rs index edc40c1443..65aa251381 100644 --- a/package/src/bin/omicron-package.rs +++ b/package/src/bin/omicron-package.rs @@ -7,8 +7,8 @@ use anyhow::{anyhow, bail, Context, Result}; use clap::{Parser, Subcommand}; use futures::stream::{self, StreamExt, TryStreamExt}; -use illumos_utils::host::real::HostExecutor; -use illumos_utils::host::BoxedExecutor; +use helios_fusion::BoxedExecutor; +use helios_protostar::HostExecutor; use illumos_utils::{zfs, zone}; use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; use omicron_package::target::KnownTarget; diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index d882821f3c..ab362d92ac 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -31,6 +31,8 @@ dropshot.workspace = true flate2.workspace = true futures.workspace = true glob.workspace = true +helios-fusion.workspace = true +helios-protostar.workspace = true http.workspace = true hyper-staticfile.workspace = true gateway-client.workspace = true @@ -84,6 +86,7 @@ opte-ioctl.workspace = true [dev-dependencies] assert_matches.workspace = true expectorate.workspace = true +helios-tokamak.workspace = true http.workspace = true hyper.workspace = true omicron-test-utils.workspace = true diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs index 51296b2e78..91a482c855 100644 --- a/sled-agent/src/bootstrap/pre_server.rs +++ b/sled-agent/src/bootstrap/pre_server.rs @@ -22,11 +22,11 @@ use cancel_safe_futures::TryStreamExt; use ddm_admin_client::Client as DdmAdminClient; use futures::stream; use futures::StreamExt; +use helios_fusion::BoxedExecutor; +use helios_protostar::HostExecutor; use illumos_utils::addrobj::AddrObject; use illumos_utils::dladm; use illumos_utils::dladm::Dladm; -use illumos_utils::host::real::HostExecutor; -use illumos_utils::host::BoxedExecutor; use illumos_utils::zfs; use illumos_utils::zfs::Zfs; use illumos_utils::zone; @@ -548,8 +548,7 @@ impl BootstrapNetworking { async fn enable_ipv6_forwarding( executor: &BoxedExecutor, ) -> Result<(), StartError> { - let mut command = - std::process::Command::new(illumos_utils::host::PFEXEC); + let mut command = std::process::Command::new(helios_fusion::PFEXEC); command.args(&[ "/usr/sbin/routeadm", // Needed to access all zones, which are on the underlay. diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index fd3e7763d2..51d56044bd 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -35,8 +35,8 @@ use ddm_admin_client::DdmError; use dropshot::HttpServer; use futures::Future; use futures::StreamExt; +use helios_fusion::BoxedExecutor; use illumos_utils::dladm; -use illumos_utils::host::BoxedExecutor; use illumos_utils::zfs; use illumos_utils::zone; use illumos_utils::zone::Zones; @@ -101,7 +101,7 @@ pub enum StartError { EnsureEtherstubError { name: &'static str, #[source] - err: illumos_utils::host::ExecutionError, + err: helios_fusion::ExecutionError, }, #[error(transparent)] @@ -132,7 +132,7 @@ pub enum StartError { DeleteXdeDevices(#[source] illumos_utils::opte::Error), #[error("Failed to enable ipv6-forwarding")] - EnableIpv6Forwarding(#[from] illumos_utils::host::ExecutionError), + EnableIpv6Forwarding(#[from] helios_fusion::ExecutionError), #[error("Incorrect binary packaging: {0}")] IncorrectBuildPackaging(&'static str), diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index c328c37d33..100ea33a99 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -7,11 +7,11 @@ use crate::updates::ConfigUpdates; use camino::{Utf8Path, Utf8PathBuf}; use dropshot::ConfigLogging; +use helios_fusion::BoxedExecutor; use illumos_utils::dladm::Dladm; use illumos_utils::dladm::FindPhysicalLinkError; use illumos_utils::dladm::PhysicalLink; use illumos_utils::dladm::CHELSIO_LINK_PREFIX; -use illumos_utils::host::BoxedExecutor; use illumos_utils::zpool::ZpoolName; use omicron_common::vlan::VlanID; use serde::Deserialize; diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index d59db9a90b..6b91c707da 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -23,8 +23,8 @@ use crate::zone_bundle::ZoneBundler; use anyhow::anyhow; use backoff::BackoffError; use futures::lock::{Mutex, MutexGuard}; +use helios_fusion::BoxedExecutor; use illumos_utils::dladm::Etherstub; -use illumos_utils::host::BoxedExecutor; use illumos_utils::link::VnicAllocator; use illumos_utils::opte::PortManager; use illumos_utils::running_zone::{InstalledZone, RunningZone}; diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index a3c86af9e5..57a3fc6878 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -15,8 +15,8 @@ use crate::params::{ use crate::storage_manager::StorageResources; use crate::zone_bundle::BundleError; use crate::zone_bundle::ZoneBundler; +use helios_fusion::BoxedExecutor; use illumos_utils::dladm::Etherstub; -use illumos_utils::host::BoxedExecutor; use illumos_utils::link::VnicAllocator; use illumos_utils::opte::PortManager; use illumos_utils::vmm_reservoir; diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index fcec9fd2d1..a0fc027900 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -12,7 +12,7 @@ use crate::nexus::NexusClientWithResolver; use crate::services::ServiceManager; use crate::storage_manager::StorageManager; use bootstore::schemes::v0 as bootstore; -use illumos_utils::host::BoxedExecutor; +use helios_fusion::BoxedExecutor; use internal_dns::resolver::Resolver; use slog::Logger; use std::net::SocketAddr; diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 4a8f82a8ad..e74a3d7f18 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -46,12 +46,12 @@ use camino::{Utf8Path, Utf8PathBuf}; use ddm_admin_client::{Client as DdmAdminClient, DdmError}; use dpd_client::{types as DpdTypes, Client as DpdClient, Error as DpdError}; use dropshot::HandlerTaskMode; +use helios_fusion::{BoxedExecutor, PFEXEC}; use illumos_utils::addrobj::AddrObject; use illumos_utils::addrobj::IPV6_LINK_LOCAL_NAME; use illumos_utils::dladm::{ Dladm, Etherstub, EtherstubVnic, GetSimnetError, PhysicalLink, }; -use illumos_utils::host::{BoxedExecutor, PFEXEC}; use illumos_utils::link::{Link, VnicAllocator}; use illumos_utils::opte::{Port, PortManager, PortTicket}; use illumos_utils::running_zone::{ @@ -204,7 +204,7 @@ pub enum Error { NtpZoneNotReady, #[error("Execution error: {0}")] - ExecutionError(#[from] illumos_utils::host::ExecutionError), + ExecutionError(#[from] helios_fusion::ExecutionError), #[error("Error resolving DNS name: {0}")] ResolveError(#[from] internal_dns::resolver::ResolveError), @@ -2860,13 +2860,13 @@ mod test { use super::*; use crate::params::{ServiceZoneService, ZoneType}; use async_trait::async_trait; + use helios_fusion::{Input, Output, OutputExt}; + use helios_tokamak::{CommandSequence, FakeExecutorBuilder}; use illumos_utils::{ dladm::{ Etherstub, BOOTSTRAP_ETHERSTUB_NAME, UNDERLAY_ETHERSTUB_NAME, UNDERLAY_ETHERSTUB_VNIC_NAME, }, - host::fake::{CommandSequence, FakeExecutorBuilder}, - host::{Input, Output, OutputExt}, zone::{ZLOGIN, ZONEADM, ZONECFG}, }; use key_manager::{ diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 5fa4b77b96..2700d44b46 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -25,8 +25,8 @@ use crate::zone_bundle::BundleError; use bootstore::schemes::v0 as bootstore; use camino::Utf8PathBuf; use dropshot::HttpError; +use helios_fusion::BoxedExecutor; use illumos_utils::dladm::Dladm; -use illumos_utils::host::BoxedExecutor; use illumos_utils::opte::params::SetVirtualNetworkInterfaceHost; use illumos_utils::opte::PortManager; use illumos_utils::zone::Zones; @@ -62,7 +62,7 @@ pub enum Error { SwapDevice(#[from] crate::swap_device::SwapDeviceError), #[error("Failed to acquire etherstub: {0}")] - Etherstub(illumos_utils::host::ExecutionError), + Etherstub(helios_fusion::ExecutionError), #[error("Failed to acquire etherstub VNIC: {0}")] EtherstubVnic(illumos_utils::dladm::CreateVnicError), @@ -71,7 +71,7 @@ pub enum Error { Bootstrap(#[from] crate::bootstrap::BootstrapError), #[error("Failed to remove Omicron address: {0}")] - DeleteAddress(#[from] illumos_utils::host::ExecutionError), + DeleteAddress(#[from] helios_fusion::ExecutionError), #[error("Failed to operate on underlay device: {0}")] Underlay(#[from] underlay::Error), diff --git a/sled-agent/src/storage/dump_setup.rs b/sled-agent/src/storage/dump_setup.rs index f6c3cd3909..278f393125 100644 --- a/sled-agent/src/storage/dump_setup.rs +++ b/sled-agent/src/storage/dump_setup.rs @@ -1,8 +1,8 @@ use crate::storage_manager::DiskWrapper; use camino::Utf8PathBuf; use derive_more::{AsRef, Deref, From}; +use helios_fusion::{BoxedExecutor, ExecutionError}; use illumos_utils::dumpadm::DumpAdmError; -use illumos_utils::host::{BoxedExecutor, ExecutionError}; use illumos_utils::zone::{AdmError, Zones}; use illumos_utils::zpool::{ZpoolHealth, ZpoolName}; use omicron_common::disk::DiskIdentity; diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 40eecc4981..274aaf2eeb 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -13,8 +13,8 @@ use derive_more::From; use futures::stream::FuturesOrdered; use futures::FutureExt; use futures::StreamExt; +use helios_fusion::BoxedExecutor; use illumos_utils::dumpadm::DumpHdrError; -use illumos_utils::host::BoxedExecutor; use illumos_utils::zfs::{Mountpoint, Zfs}; use illumos_utils::zpool::{Zpool, ZpoolInfo, ZpoolKind, ZpoolName}; use key_manager::StorageKeyRequester; diff --git a/sled-agent/src/swap_device.rs b/sled-agent/src/swap_device.rs index 3fa29933d9..ac56422d40 100644 --- a/sled-agent/src/swap_device.rs +++ b/sled-agent/src/swap_device.rs @@ -4,7 +4,7 @@ //! Operations for creating a system swap device. -use illumos_utils::host::{BoxedExecutor, ExecutionError}; +use helios_fusion::{BoxedExecutor, ExecutionError}; use std::io::Read; use zeroize::Zeroize; diff --git a/sled-hardware/Cargo.toml b/sled-hardware/Cargo.toml index df1513be01..ab6c2a0574 100644 --- a/sled-hardware/Cargo.toml +++ b/sled-hardware/Cargo.toml @@ -10,6 +10,7 @@ anyhow.workspace = true camino.workspace = true cfg-if.workspace = true futures.workspace = true +helios-fusion.workspace = true illumos-utils.workspace = true key-manager.workspace = true libc.workspace = true @@ -30,4 +31,5 @@ illumos-devinfo = { git = "https://github.com/oxidecomputer/illumos-devinfo", br libefi-illumos = { git = "https://github.com/oxidecomputer/libefi-illumos", branch = "master" } [dev-dependencies] +helios-tokamak.workspace = true omicron-test-utils.workspace = true diff --git a/sled-hardware/src/cleanup.rs b/sled-hardware/src/cleanup.rs index dee9be8ea9..401d7c6972 100644 --- a/sled-hardware/src/cleanup.rs +++ b/sled-hardware/src/cleanup.rs @@ -6,12 +6,12 @@ use anyhow::Error; use futures::stream::{self, StreamExt, TryStreamExt}; +use helios_fusion::{BoxedExecutor, ExecutionError, PFEXEC}; use illumos_utils::dladm::Dladm; use illumos_utils::dladm::BOOTSTRAP_ETHERSTUB_NAME; use illumos_utils::dladm::BOOTSTRAP_ETHERSTUB_VNIC_NAME; use illumos_utils::dladm::UNDERLAY_ETHERSTUB_NAME; use illumos_utils::dladm::UNDERLAY_ETHERSTUB_VNIC_NAME; -use illumos_utils::host::{BoxedExecutor, ExecutionError, PFEXEC}; use illumos_utils::link::LinkKind; use illumos_utils::opte; use illumos_utils::zone::IPADM; diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs index cfdd5596c1..b10dfc97e0 100644 --- a/sled-hardware/src/disk.rs +++ b/sled-hardware/src/disk.rs @@ -3,8 +3,8 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use camino::{Utf8Path, Utf8PathBuf}; +use helios_fusion::BoxedExecutor; use illumos_utils::fstyp::Fstyp; -use illumos_utils::host::BoxedExecutor; use illumos_utils::zfs; use illumos_utils::zfs::DestroyDatasetErrorVariant; use illumos_utils::zfs::EncryptionDetails; diff --git a/sled-hardware/src/illumos/partitions.rs b/sled-hardware/src/illumos/partitions.rs index 77cdbaed07..3b248ff050 100644 --- a/sled-hardware/src/illumos/partitions.rs +++ b/sled-hardware/src/illumos/partitions.rs @@ -7,7 +7,7 @@ use crate::illumos::gpt; use crate::{DiskError, DiskPaths, DiskVariant, Partition}; use camino::Utf8Path; -use illumos_utils::host::BoxedExecutor; +use helios_fusion::BoxedExecutor; use illumos_utils::zpool::{Zpool, ZpoolName}; use slog::info; use slog::Logger; @@ -158,8 +158,8 @@ mod test { use super::*; use crate::DiskPaths; use camino::Utf8PathBuf; - use illumos_utils::host::fake::{FakeChild, FakeExecutorBuilder}; - use illumos_utils::host::{Input, OutputExt, PFEXEC}; + use helios_fusion::{Input, OutputExt, PFEXEC}; + use helios_tokamak::{FakeChild, FakeExecutorBuilder}; use illumos_utils::zpool::{ZpoolKind, ZPOOL}; use omicron_test_utils::dev::test_setup_log; use std::path::Path; diff --git a/sled-hardware/src/non_illumos/mod.rs b/sled-hardware/src/non_illumos/mod.rs index 5c14692111..6f0f8d4c66 100644 --- a/sled-hardware/src/non_illumos/mod.rs +++ b/sled-hardware/src/non_illumos/mod.rs @@ -4,7 +4,7 @@ use crate::disk::{DiskError, DiskPaths, DiskVariant, Partition, UnparsedDisk}; use crate::{Baseboard, SledMode}; -use illumos_utils::host::BoxedExecutor; +use helios_fusion::BoxedExecutor; use slog::Logger; use std::collections::HashSet; use tokio::sync::broadcast; diff --git a/sled-hardware/src/underlay.rs b/sled-hardware/src/underlay.rs index fd99418649..ccaf277adb 100644 --- a/sled-hardware/src/underlay.rs +++ b/sled-hardware/src/underlay.rs @@ -5,6 +5,7 @@ //! Finding the underlay network physical links and address objects. use crate::is_gimlet; +use helios_fusion::BoxedExecutor; use illumos_utils::addrobj; use illumos_utils::addrobj::AddrObject; use illumos_utils::dladm; @@ -14,7 +15,6 @@ use illumos_utils::dladm::GetLinkpropError; use illumos_utils::dladm::PhysicalLink; use illumos_utils::dladm::SetLinkpropError; use illumos_utils::dladm::CHELSIO_LINK_PREFIX; -use illumos_utils::host::BoxedExecutor; use illumos_utils::zone::Zones; use omicron_common::api::external::MacAddr; use std::net::Ipv6Addr; @@ -30,7 +30,7 @@ pub enum Error { #[error( "Failed to create an IPv6 link-local address for underlay devices: {0}" )] - UnderlayDeviceAddress(#[from] illumos_utils::host::ExecutionError), + UnderlayDeviceAddress(#[from] helios_fusion::ExecutionError), #[error(transparent)] BadAddrObj(#[from] addrobj::ParseError), diff --git a/wicketd/Cargo.toml b/wicketd/Cargo.toml index 449299f422..618dae8433 100644 --- a/wicketd/Cargo.toml +++ b/wicketd/Cargo.toml @@ -59,6 +59,7 @@ expectorate.workspace = true flate2.workspace = true fs-err.workspace = true gateway-test-utils.workspace = true +helios-tokamak.workspace = true http.workspace = true installinator.workspace = true installinator-artifact-client.workspace = true diff --git a/wicketd/tests/integration_tests/updates.rs b/wicketd/tests/integration_tests/updates.rs index 8c6032ff6f..016883b9b6 100644 --- a/wicketd/tests/integration_tests/updates.rs +++ b/wicketd/tests/integration_tests/updates.rs @@ -11,7 +11,7 @@ use camino_tempfile::Utf8TempDir; use clap::Parser; use gateway_messages::SpPort; use gateway_test_utils::setup as gateway_setup; -use illumos_utils::host::fake::FakeExecutorBuilder; +use helios_tokamak::FakeExecutorBuilder; use installinator::HOST_PHASE_2_FILE_NAME; use omicron_common::{ api::internal::nexus::KnownArtifactKind, From fa07b40a9ef68455198cc3ecb15163df180e4326 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 21 Aug 2023 16:11:16 -0700 Subject: [PATCH 54/57] SharedByteQueue --- helios/tokamak/src/executor.rs | 14 +++++++------- helios/tokamak/src/lib.rs | 2 +- .../src/{byte_queue.rs => shared_byte_queue.rs} | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) rename helios/tokamak/src/{byte_queue.rs => shared_byte_queue.rs} (86%) diff --git a/helios/tokamak/src/executor.rs b/helios/tokamak/src/executor.rs index 57a7405ea2..888d900a22 100644 --- a/helios/tokamak/src/executor.rs +++ b/helios/tokamak/src/executor.rs @@ -4,7 +4,7 @@ //! A "fake" [Executor] implementation, which can respond to host requests. -use crate::byte_queue::ByteQueue; +use crate::shared_byte_queue::SharedByteQueue; use async_trait::async_trait; use helios_fusion::{ @@ -155,9 +155,9 @@ pub struct FakeChild { id: u64, command: Command, executor: Arc, - stdin: ByteQueue, - stdout: ByteQueue, - stderr: ByteQueue, + stdin: SharedByteQueue, + stdout: SharedByteQueue, + stderr: SharedByteQueue, } impl FakeChild { @@ -185,9 +185,9 @@ impl FakeChild { id, command: copy_command, executor, - stdin: ByteQueue::new(), - stdout: ByteQueue::new(), - stderr: ByteQueue::new(), + stdin: SharedByteQueue::new(), + stdout: SharedByteQueue::new(), + stderr: SharedByteQueue::new(), }) } diff --git a/helios/tokamak/src/lib.rs b/helios/tokamak/src/lib.rs index c6f48aadf3..352d2763c7 100644 --- a/helios/tokamak/src/lib.rs +++ b/helios/tokamak/src/lib.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -mod byte_queue; mod executor; +mod shared_byte_queue; pub use executor::*; diff --git a/helios/tokamak/src/byte_queue.rs b/helios/tokamak/src/shared_byte_queue.rs similarity index 86% rename from helios/tokamak/src/byte_queue.rs rename to helios/tokamak/src/shared_byte_queue.rs index 3ea1f686c3..52e91c266b 100644 --- a/helios/tokamak/src/byte_queue.rs +++ b/helios/tokamak/src/shared_byte_queue.rs @@ -10,17 +10,17 @@ use std::sync::{Arc, Mutex}; /// /// This is primarily used to emulate stdin / stdout / stderr. #[derive(Clone)] -pub struct ByteQueue { +pub struct SharedByteQueue { buf: Arc>>, } -impl ByteQueue { +impl SharedByteQueue { pub fn new() -> Self { Self { buf: Arc::new(Mutex::new(VecDeque::new())) } } } -impl std::io::Write for ByteQueue { +impl std::io::Write for SharedByteQueue { fn write(&mut self, buf: &[u8]) -> std::io::Result { self.buf.lock().unwrap().write(buf) } @@ -30,7 +30,7 @@ impl std::io::Write for ByteQueue { } } -impl std::io::Read for ByteQueue { +impl std::io::Read for SharedByteQueue { fn read(&mut self, buf: &mut [u8]) -> std::io::Result { self.buf.lock().unwrap().read(buf) } From b016f0d407298d8a053c544c66a167ce8209c5a3 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 24 Aug 2023 12:09:26 -0700 Subject: [PATCH 55/57] patch docs --- helios/fusion/src/executor.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/helios/fusion/src/executor.rs b/helios/fusion/src/executor.rs index 5a28f29519..8fd000f964 100644 --- a/helios/fusion/src/executor.rs +++ b/helios/fusion/src/executor.rs @@ -69,8 +69,8 @@ pub type BoxedExecutor = Arc; /// Describes an "executor", which can run [Command]s and return a response. /// -/// - In production, this is usually simply a [super::real::HostExecutor]. -/// - Under test, this can be customized, and a [super::fake::FakeExecutor] may be used. +/// - In production, this is usually `helios_protostar`'s executor. +/// - Under test, this can be customized, and `helios_tokamak`'s executor may be used. #[async_trait] pub trait Executor: Send + Sync { /// Executes a task, waiting for it to complete, and returning output. From 679040939fa79529363a2cb051dfd6dac36628d0 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 29 Sep 2023 15:29:31 -0700 Subject: [PATCH 56/57] hakari --- Cargo.lock | 1 - workspace-hack/Cargo.toml | 14 ++------------ 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ee44ee4636..c0ed8629bf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5424,7 +5424,6 @@ dependencies = [ "semver 1.0.19", "serde", "sha2", - "signature 2.1.0", "similar", "slog", "spin 0.9.8", diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index d3e00b1831..a6a0d8e7d2 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -18,7 +18,7 @@ anyhow = { version = "1", features = ["backtrace"] } bit-set = { version = "0.5" } bit-vec = { version = "0.6" } bitflags-dff4ba8e3ae991db = { package = "bitflags", version = "1" } -bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2", default-features = false, features = ["serde"] } +bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2", default-features = false, features = ["serde", "std"] } bitvec = { version = "1" } bstr-6f8ce4dd05d13bba = { package = "bstr", version = "0.2" } bstr-dff4ba8e3ae991db = { package = "bstr", version = "1" } @@ -80,7 +80,6 @@ schemars = { version = "0.8", features = ["bytes", "chrono", "uuid1"] } semver = { version = "1", features = ["serde"] } serde = { version = "1", features = ["alloc", "derive", "rc"] } sha2 = { version = "0.10", features = ["oid"] } -signature = { version = "2", default-features = false, features = ["digest", "rand_core", "std"] } similar = { version = "2", features = ["inline", "unicode"] } slog = { version = "2", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } spin = { version = "0.9" } @@ -111,7 +110,7 @@ anyhow = { version = "1", features = ["backtrace"] } bit-set = { version = "0.5" } bit-vec = { version = "0.6" } bitflags-dff4ba8e3ae991db = { package = "bitflags", version = "1" } -bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2", default-features = false, features = ["serde"] } +bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2", default-features = false, features = ["serde", "std"] } bitvec = { version = "1" } bstr-6f8ce4dd05d13bba = { package = "bstr", version = "0.2" } bstr-dff4ba8e3ae991db = { package = "bstr", version = "1" } @@ -174,7 +173,6 @@ schemars = { version = "0.8", features = ["bytes", "chrono", "uuid1"] } semver = { version = "1", features = ["serde"] } serde = { version = "1", features = ["alloc", "derive", "rc"] } sha2 = { version = "0.10", features = ["oid"] } -signature = { version = "2", default-features = false, features = ["digest", "rand_core", "std"] } similar = { version = "2", features = ["inline", "unicode"] } slog = { version = "2", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } spin = { version = "0.9" } @@ -203,56 +201,48 @@ zeroize = { version = "1", features = ["std", "zeroize_derive"] } zip = { version = "0.6", default-features = false, features = ["bzip2", "deflate"] } [target.x86_64-unknown-linux-gnu.dependencies] -bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24" } mio = { version = "0.8", features = ["net", "os-ext"] } once_cell = { version = "1", features = ["unstable"] } rustix = { version = "0.38", features = ["fs", "termios"] } [target.x86_64-unknown-linux-gnu.build-dependencies] -bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24" } mio = { version = "0.8", features = ["net", "os-ext"] } once_cell = { version = "1", features = ["unstable"] } rustix = { version = "0.38", features = ["fs", "termios"] } [target.x86_64-apple-darwin.dependencies] -bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24" } mio = { version = "0.8", features = ["net", "os-ext"] } once_cell = { version = "1", features = ["unstable"] } rustix = { version = "0.38", features = ["fs", "termios"] } [target.x86_64-apple-darwin.build-dependencies] -bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24" } mio = { version = "0.8", features = ["net", "os-ext"] } once_cell = { version = "1", features = ["unstable"] } rustix = { version = "0.38", features = ["fs", "termios"] } [target.aarch64-apple-darwin.dependencies] -bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24" } mio = { version = "0.8", features = ["net", "os-ext"] } once_cell = { version = "1", features = ["unstable"] } rustix = { version = "0.38", features = ["fs", "termios"] } [target.aarch64-apple-darwin.build-dependencies] -bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24" } mio = { version = "0.8", features = ["net", "os-ext"] } once_cell = { version = "1", features = ["unstable"] } rustix = { version = "0.38", features = ["fs", "termios"] } [target.x86_64-unknown-illumos.dependencies] -bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24" } mio = { version = "0.8", features = ["net", "os-ext"] } once_cell = { version = "1", features = ["unstable"] } rustix = { version = "0.38", features = ["fs", "termios"] } [target.x86_64-unknown-illumos.build-dependencies] -bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24" } mio = { version = "0.8", features = ["net", "os-ext"] } once_cell = { version = "1", features = ["unstable"] } From 108f4d6b2295adbeff63081eee838f11bc5e0e16 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 29 Sep 2023 15:33:17 -0700 Subject: [PATCH 57/57] more hakari --- Cargo.lock | 3 +++ helios/fusion/Cargo.toml | 1 + helios/protostar/Cargo.toml | 1 + helios/tokamak/Cargo.toml | 1 + 4 files changed, 6 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index c0ed8629bf..27bb636eb5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3029,6 +3029,7 @@ version = "0.1.0" dependencies = [ "async-trait", "itertools 0.11.0", + "omicron-workspace-hack 0.1.0", "shlex", "slog", "thiserror", @@ -3049,6 +3050,7 @@ dependencies = [ "libc", "omicron-common 0.1.0", "omicron-test-utils", + "omicron-workspace-hack 0.1.0", "schemars", "serde", "shlex", @@ -3074,6 +3076,7 @@ dependencies = [ "libc", "omicron-common 0.1.0", "omicron-test-utils", + "omicron-workspace-hack 0.1.0", "schemars", "serde", "shlex", diff --git a/helios/fusion/Cargo.toml b/helios/fusion/Cargo.toml index bac3d9c7a2..d189f9fc0a 100644 --- a/helios/fusion/Cargo.toml +++ b/helios/fusion/Cargo.toml @@ -12,3 +12,4 @@ shlex.workspace = true slog.workspace = true thiserror.workspace = true tokio.workspace = true +omicron-workspace-hack = { version = "0.1", path = "../../workspace-hack" } diff --git a/helios/protostar/Cargo.toml b/helios/protostar/Cargo.toml index a2fbdf7f82..f20cf7177c 100644 --- a/helios/protostar/Cargo.toml +++ b/helios/protostar/Cargo.toml @@ -24,6 +24,7 @@ thiserror.workspace = true tokio.workspace = true uuid.workspace = true zone.workspace = true +omicron-workspace-hack = { version = "0.1", path = "../../workspace-hack" } [dev-dependencies] omicron-test-utils.workspace = true diff --git a/helios/tokamak/Cargo.toml b/helios/tokamak/Cargo.toml index c09c7e37c8..0ab1004964 100644 --- a/helios/tokamak/Cargo.toml +++ b/helios/tokamak/Cargo.toml @@ -24,6 +24,7 @@ thiserror.workspace = true tokio.workspace = true uuid.workspace = true zone.workspace = true +omicron-workspace-hack = { version = "0.1", path = "../../workspace-hack" } [dev-dependencies] omicron-test-utils.workspace = true