From 1eaad083ca8a4363f783f550266084d84ba8c866 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Sat, 9 Mar 2024 19:20:01 -0800 Subject: [PATCH] test environment could be more realistic (#5239) --- Cargo.lock | 2 + dev-tools/omdb/tests/env.out | 3 + dev-tools/omdb/tests/successes.out | 13 +- dev-tools/omicron-dev/src/bin/omicron-dev.rs | 10 +- .../tests/output/collector_basic.txt | 4 +- .../output/collector_sled_agent_errors.txt | 2 +- .../app/background/inventory_collection.rs | 4 +- nexus/src/lib.rs | 22 +- nexus/test-interface/src/lib.rs | 1 + nexus/test-utils/Cargo.toml | 2 + nexus/test-utils/src/lib.rs | 376 +++++++++++++++--- .../tests/integration_tests/initialization.rs | 29 +- nexus/tests/integration_tests/sleds.rs | 14 +- sled-agent/src/sim/sled_agent.rs | 2 +- 14 files changed, 383 insertions(+), 101 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0cfc7b4500..db48902f75 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4648,6 +4648,7 @@ dependencies = [ "headers", "http 0.2.12", "hyper 0.14.28", + "illumos-utils", "internal-dns", "nexus-config", "nexus-db-queries", @@ -4664,6 +4665,7 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", + "sled-agent-client", "slog", "tokio", "tokio-util", diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index 3e6e89d508..ef8cf1631e 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -3,6 +3,7 @@ termination: Exited(0) --------------------------------------------- stdout: SERIAL IP ROLE ID +sim-039be560 [::1]:REDACTED_PORT scrimlet REDACTED_UUID_REDACTED_UUID_REDACTED sim-b6d65341 [::1]:REDACTED_PORT scrimlet REDACTED_UUID_REDACTED_UUID_REDACTED --------------------------------------------- stderr: @@ -268,6 +269,7 @@ termination: Exited(0) --------------------------------------------- stdout: SERIAL IP ROLE ID +sim-039be560 [::1]:REDACTED_PORT scrimlet REDACTED_UUID_REDACTED_UUID_REDACTED sim-b6d65341 [::1]:REDACTED_PORT scrimlet REDACTED_UUID_REDACTED_UUID_REDACTED --------------------------------------------- stderr: @@ -281,6 +283,7 @@ termination: Exited(0) --------------------------------------------- stdout: SERIAL IP ROLE ID +sim-039be560 [::1]:REDACTED_PORT scrimlet REDACTED_UUID_REDACTED_UUID_REDACTED sim-b6d65341 [::1]:REDACTED_PORT scrimlet REDACTED_UUID_REDACTED_UUID_REDACTED --------------------------------------------- stderr: diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index fe590acf55..2da6e4dceb 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -77,12 +77,10 @@ termination: Exited(0) stdout: SERVICE INSTANCE_ID ADDR SLED_SERIAL CruciblePantry REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT sim-b6d65341 -Dendrite REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT sim-b6d65341 -Dendrite REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT sim-b6d65341 ExternalDns REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT sim-b6d65341 InternalDns REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT sim-b6d65341 Nexus REDACTED_UUID_REDACTED_UUID_REDACTED [::ffff:127.0.0.1]:REDACTED_PORT sim-b6d65341 -Mgd REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT sim-b6d65341 +Mgd REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT sim-039be560 Mgd REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT sim-b6d65341 --------------------------------------------- stderr: @@ -93,17 +91,19 @@ EXECUTING COMMAND: omdb ["db", "services", "list-by-sled"] termination: Exited(0) --------------------------------------------- stdout: +sled: sim-039be560 (id REDACTED_UUID_REDACTED_UUID_REDACTED) + + SERVICE INSTANCE_ID ADDR + Mgd REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT + sled: sim-b6d65341 (id REDACTED_UUID_REDACTED_UUID_REDACTED) SERVICE INSTANCE_ID ADDR CruciblePantry REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT - Dendrite REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT - Dendrite REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT ExternalDns REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT InternalDns REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT Nexus REDACTED_UUID_REDACTED_UUID_REDACTED [::ffff:127.0.0.1]:REDACTED_PORT Mgd REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT - Mgd REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT --------------------------------------------- stderr: @@ -115,6 +115,7 @@ termination: Exited(0) --------------------------------------------- stdout: SERIAL IP ROLE ID +sim-039be560 [::1]:REDACTED_PORT scrimlet REDACTED_UUID_REDACTED_UUID_REDACTED sim-b6d65341 [::1]:REDACTED_PORT scrimlet REDACTED_UUID_REDACTED_UUID_REDACTED --------------------------------------------- stderr: diff --git a/dev-tools/omicron-dev/src/bin/omicron-dev.rs b/dev-tools/omicron-dev/src/bin/omicron-dev.rs index 5e0c6486d6..705049bdb1 100644 --- a/dev-tools/omicron-dev/src/bin/omicron-dev.rs +++ b/dev-tools/omicron-dev/src/bin/omicron-dev.rs @@ -543,10 +543,12 @@ async fn cmd_run_all(args: &RunAllArgs) -> Result<(), anyhow::Error> { cptestctx.silo_name, cptestctx.external_dns_zone_name, ); - println!( - "omicron-dev: management gateway: http://{}", - cptestctx.gateway.client.bind_address, - ); + for (location, gateway) in &cptestctx.gateway { + println!( + "omicron-dev: management gateway: http://{} ({})", + gateway.client.bind_address, location, + ); + } println!("omicron-dev: silo name: {}", cptestctx.silo_name,); println!( "omicron-dev: privileged user name: {}", diff --git a/nexus/inventory/tests/output/collector_basic.txt b/nexus/inventory/tests/output/collector_basic.txt index 4a05f09e1c..0fc1c552ab 100644 --- a/nexus/inventory/tests/output/collector_basic.txt +++ b/nexus/inventory/tests/output/collector_basic.txt @@ -71,12 +71,12 @@ rot pages found: CfpaScratch baseboard part "i86pc" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtc2NyYXRjaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" sled agents found: - sled 03265caf-da7d-46c7-b1c2-39fa90ce5c65 (Gimlet) + sled 03265caf-da7d-46c7-b1c2-39fa90ce5c65 (Scrimlet) baseboard Some(BaseboardId { part_number: "sim-gimlet", serial_number: "sim-03265caf-da7d-46c7-b1c2-39fa90ce5c65" }) zone generation: Generation(3) zones found: zone 8b88a56f-3eb6-4d80-ba42-75d867bc427d type oximeter - sled 9cb9b78f-5614-440c-b66d-e8e81fab69b0 (Gimlet) + sled 9cb9b78f-5614-440c-b66d-e8e81fab69b0 (Scrimlet) baseboard Some(BaseboardId { part_number: "sim-gimlet", serial_number: "sim-9cb9b78f-5614-440c-b66d-e8e81fab69b0" }) zone generation: Generation(3) zones found: diff --git a/nexus/inventory/tests/output/collector_sled_agent_errors.txt b/nexus/inventory/tests/output/collector_sled_agent_errors.txt index aaa31fd1bb..7b9bbce84e 100644 --- a/nexus/inventory/tests/output/collector_sled_agent_errors.txt +++ b/nexus/inventory/tests/output/collector_sled_agent_errors.txt @@ -70,7 +70,7 @@ rot pages found: CfpaScratch baseboard part "i86pc" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtc2NyYXRjaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" sled agents found: - sled 9cb9b78f-5614-440c-b66d-e8e81fab69b0 (Gimlet) + sled 9cb9b78f-5614-440c-b66d-e8e81fab69b0 (Scrimlet) baseboard Some(BaseboardId { part_number: "sim-gimlet", serial_number: "sim-9cb9b78f-5614-440c-b66d-e8e81fab69b0" }) zone generation: Generation(3) zones found: diff --git a/nexus/src/app/background/inventory_collection.rs b/nexus/src/app/background/inventory_collection.rs index 27f08ec738..0666c136fc 100644 --- a/nexus/src/app/background/inventory_collection.rs +++ b/nexus/src/app/background/inventory_collection.rs @@ -314,9 +314,9 @@ mod test { page_size: NonZeroU32::new(3).unwrap(), }; - // There will be one sled agent set up as part of the test context. + // There will be two sled agents set up as part of the test context. let found_urls = db_enum.list_sled_agents().await.unwrap(); - assert_eq!(found_urls.len(), 1); + assert_eq!(found_urls.len(), 2); // Insert some sleds. let rack_id = Uuid::new_v4(); diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index 771a78f0b1..c0fba31afb 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -27,6 +27,7 @@ use dropshot::ConfigDropshot; use external_api::http_entrypoints::external_api; use internal_api::http_entrypoints::internal_api; use nexus_config::NexusConfig; +use nexus_types::external_api::views::SledProvisionPolicy; use nexus_types::internal_api::params::ServiceKind; use omicron_common::address::IpRange; use omicron_common::api::internal::shared::{ @@ -237,6 +238,7 @@ impl nexus_test_interface::NexusServer for Server { external_dns_zone_name: &str, recovery_silo: nexus_types::internal_api::params::RecoverySiloConfig, certs: Vec, + disable_sled_id: Uuid, ) -> Self { // Perform the "handoff from RSS". // @@ -302,7 +304,25 @@ impl nexus_test_interface::NexusServer for Server { .expect("Could not initialize rack"); // Start the Nexus external API. - Server::start(internal_server).await.unwrap() + let rv = Server::start(internal_server).await.unwrap(); + + // Historically, tests have assumed that there's only one provisionable + // sled, and that's convenient for a lot of purposes. Mark our second + // sled non-provisionable. + let nexus = &rv.apictx().nexus; + nexus + .sled_set_provision_policy( + &opctx, + &nexus_db_queries::db::lookup::LookupPath::new( + &opctx, + nexus.datastore(), + ) + .sled_id(disable_sled_id), + SledProvisionPolicy::NonProvisionable, + ) + .await + .unwrap(); + rv } async fn get_http_server_external_address(&self) -> SocketAddr { diff --git a/nexus/test-interface/src/lib.rs b/nexus/test-interface/src/lib.rs index 0f53ac6445..10bc9e63f0 100644 --- a/nexus/test-interface/src/lib.rs +++ b/nexus/test-interface/src/lib.rs @@ -56,6 +56,7 @@ pub trait NexusServer: Send + Sync + 'static { external_dns_zone_name: &str, recovery_silo: nexus_types::internal_api::params::RecoverySiloConfig, tls_certificates: Vec, + disable_sled_id: Uuid, ) -> Self; async fn get_http_server_external_address(&self) -> SocketAddr; diff --git a/nexus/test-utils/Cargo.toml b/nexus/test-utils/Cargo.toml index e612547fa8..861527108b 100644 --- a/nexus/test-utils/Cargo.toml +++ b/nexus/test-utils/Cargo.toml @@ -20,6 +20,7 @@ gateway-test-utils.workspace = true headers.workspace = true http.workspace = true hyper.workspace = true +illumos-utils.workspace = true internal-dns.workspace = true nexus-config.workspace = true nexus-db-queries.workspace = true @@ -35,6 +36,7 @@ oximeter-producer.workspace = true serde.workspace = true serde_json.workspace = true serde_urlencoded.workspace = true +sled-agent-client.workspace = true slog.workspace = true tokio.workspace = true tokio-util.workspace = true diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 4ef77b3352..9681d9ff97 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -33,12 +33,20 @@ use nexus_types::internal_api::params::RecoverySiloConfig; use nexus_types::internal_api::params::ServiceKind; use nexus_types::internal_api::params::ServiceNic; use nexus_types::internal_api::params::ServicePutRequest; +use nexus_types::inventory::OmicronZoneConfig; +use nexus_types::inventory::OmicronZoneDataset; +use nexus_types::inventory::OmicronZoneType; +use nexus_types::inventory::OmicronZonesConfig; use omicron_common::address::DNS_OPTE_IPV4_SUBNET; use omicron_common::address::NEXUS_OPTE_IPV4_SUBNET; +use omicron_common::api::external::Generation; use omicron_common::api::external::MacAddr; +use omicron_common::api::external::Vni; use omicron_common::api::external::{IdentityMetadata, Name}; use omicron_common::api::internal::nexus::ProducerEndpoint; use omicron_common::api::internal::nexus::ProducerKind; +use omicron_common::api::internal::shared::NetworkInterface; +use omicron_common::api::internal::shared::NetworkInterfaceKind; use omicron_common::api::internal::shared::SwitchLocation; use omicron_sled_agent::sim; use omicron_test_utils::dev; @@ -65,6 +73,7 @@ pub mod http_testing; pub mod resource_helpers; pub const SLED_AGENT_UUID: &str = "b6d65341-167c-41df-9b5c-41cded99c229"; +pub const SLED_AGENT2_UUID: &str = "039be560-54cc-49e3-88df-1a29dadbf913"; pub const RACK_UUID: &str = "c19a698f-c6f9-4a17-ae30-20d711b8f7dc"; pub const SWITCH_UUID: &str = "dae4e1f1-410e-4314-bff1-fec0504be07e"; pub const OXIMETER_UUID: &str = "39e6175b-4df2-4730-b11d-cbc1e60a2e78"; @@ -88,9 +97,11 @@ pub struct ControlPlaneTestContext { pub logctx: LogContext, pub sled_agent_storage: camino_tempfile::Utf8TempDir, pub sled_agent: sim::Server, + pub sled_agent2_storage: camino_tempfile::Utf8TempDir, + pub sled_agent2: sim::Server, pub oximeter: Oximeter, pub producer: ProducerServer, - pub gateway: GatewayTestContext, + pub gateway: HashMap, pub dendrite: HashMap, pub mgd: HashMap, pub external_dns_zone_name: String, @@ -110,9 +121,12 @@ impl ControlPlaneTestContext { self.database.cleanup().await.unwrap(); self.clickhouse.cleanup().await.unwrap(); self.sled_agent.http_server.close().await.unwrap(); + self.sled_agent2.http_server.close().await.unwrap(); self.oximeter.close().await.unwrap(); self.producer.close().await.unwrap(); - self.gateway.teardown().await; + for (_, gateway) in self.gateway { + gateway.teardown().await; + } for (_, mut dendrite) in self.dendrite { dendrite.cleanup().await.unwrap(); } @@ -179,18 +193,18 @@ impl RackInitRequestBuilder { // Keeps track of: // - The "ServicePutRequest" (for handoff to Nexus) // - The internal DNS configuration for this service - fn add_service( + fn add_service_with_id( &mut self, + zone_id: Uuid, address: SocketAddrV6, kind: ServiceKind, service_name: internal_dns::ServiceName, sled_id: Uuid, ) { - let zone_id = Uuid::new_v4(); self.services.push(ServicePutRequest { address, kind, - service_id: Uuid::new_v4(), + service_id: zone_id, sled_id, zone_id: Some(zone_id), }); @@ -203,6 +217,22 @@ impl RackInitRequestBuilder { .expect("Failed to set up DNS for {kind}"); } + fn add_service_without_dns( + &mut self, + zone_id: Uuid, + address: SocketAddrV6, + kind: ServiceKind, + sled_id: Uuid, + ) { + self.services.push(ServicePutRequest { + address, + kind, + service_id: zone_id, + sled_id, + zone_id: Some(zone_id), + }); + } + // Keeps track of: // - The "DatasetPutRequest" (for handoff to Nexus) // - The internal DNS configuration for this service @@ -245,9 +275,11 @@ pub struct ControlPlaneTestContextBuilder<'a, N: NexusServer> { pub clickhouse: Option, pub sled_agent_storage: Option, pub sled_agent: Option, + pub sled_agent2_storage: Option, + pub sled_agent2: Option, pub oximeter: Option, pub producer: Option, - pub gateway: Option, + pub gateway: HashMap, pub dendrite: HashMap, pub mgd: HashMap, @@ -260,6 +292,8 @@ pub struct ControlPlaneTestContextBuilder<'a, N: NexusServer> { pub external_dns: Option, pub internal_dns: Option, dns_config: Option, + omicron_zones: Vec, + omicron_zones2: Vec, pub silo_name: Option, pub user_name: Option, @@ -289,9 +323,11 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { clickhouse: None, sled_agent_storage: None, sled_agent: None, + sled_agent2_storage: None, + sled_agent2: None, oximeter: None, producer: None, - gateway: None, + gateway: HashMap::new(), dendrite: HashMap::new(), mgd: HashMap::new(), nexus_internal: None, @@ -300,6 +336,8 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { external_dns: None, internal_dns: None, dns_config: None, + omicron_zones: Vec::new(), + omicron_zones2: Vec::new(), silo_name: None, user_name: None, } @@ -380,6 +418,18 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { DatasetKind::Cockroach, internal_dns::ServiceName::Cockroach, ); + let pool_name = illumos_utils::zpool::ZpoolName::new_external(zpool_id) + .to_string() + .parse() + .unwrap(); + self.omicron_zones.push(OmicronZoneConfig { + id: dataset_id, + underlay_address: *address.ip(), + zone_type: OmicronZoneType::CockroachDb { + address: address.to_string(), + dataset: OmicronZoneDataset { pool_name }, + }, + }); self.database = Some(database); } @@ -416,37 +466,40 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { .as_mut() .expect("Tests expect to set a port of Clickhouse") .set_port(port); + + let pool_name = illumos_utils::zpool::ZpoolName::new_external(zpool_id) + .to_string() + .parse() + .unwrap(); + self.omicron_zones.push(OmicronZoneConfig { + id: dataset_id, + underlay_address: *address.ip(), + zone_type: OmicronZoneType::Clickhouse { + address: address.to_string(), + dataset: OmicronZoneDataset { pool_name }, + }, + }); } - pub async fn start_gateway(&mut self) { - // For now, this MGS is not configured to match up in any way with - // either the simulated sled agent or the Dendrite instances. It's - // useful for testing stuff unrelated to that. But at some point we - // will probably want the reported data to match up better. + pub async fn start_gateway( + &mut self, + switch_location: SwitchLocation, + port: Option, + ) { debug!(&self.logctx.log, "Starting Management Gateway"); - let gateway = gateway_test_utils::setup::test_setup( + let (mgs_config, sp_sim_config) = + gateway_test_utils::setup::load_test_config(); + let mgs_addr = + port.map(|port| SocketAddrV6::new(Ipv6Addr::LOCALHOST, port, 0, 0)); + let gateway = gateway_test_utils::setup::test_setup_with_config( self.test_name, gateway_messages::SpPort::One, + mgs_config, + &sp_sim_config, + mgs_addr, ) .await; - let fake_mgs_zone_id = Uuid::new_v4(); - let SocketAddr::V6(v6addr) = gateway.client.bind_address else { - panic!("MGS unexpectedly listening on IPv4?"); - }; - let zone = self - .rack_init_builder - .internal_dns_config - .host_zone(fake_mgs_zone_id, *v6addr.ip()) - .expect("Failed to add DNS for MGS zone"); - self.rack_init_builder - .internal_dns_config - .service_backend_zone( - internal_dns::ServiceName::ManagementGatewayService, - &zone, - v6addr.port(), - ) - .expect("Failed to add DNS for MGS service"); - self.gateway = Some(gateway); + self.gateway.insert(switch_location, gateway); } pub async fn start_dendrite(&mut self, switch_location: SwitchLocation) { @@ -466,11 +519,16 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { let config = DpdConfig { address: std::net::SocketAddr::V6(address) }; self.config.pkg.dendrite.insert(switch_location, config); - let sled_id = Uuid::parse_str(SLED_AGENT_UUID).unwrap(); - self.rack_init_builder.add_service( + let sled_id = Uuid::parse_str(match switch_location { + SwitchLocation::Switch0 => SLED_AGENT_UUID, + SwitchLocation::Switch1 => SLED_AGENT2_UUID, + }) + .unwrap(); + + self.rack_init_builder.add_service_without_dns( + sled_id, address, ServiceKind::Dendrite, - internal_dns::ServiceName::Dendrite, sled_id, ); } @@ -490,15 +548,46 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { let config = MgdConfig { address: std::net::SocketAddr::V6(address) }; self.config.pkg.mgd.insert(switch_location, config); - let sled_id = Uuid::parse_str(SLED_AGENT_UUID).unwrap(); - self.rack_init_builder.add_service( + let sled_id = Uuid::parse_str(match switch_location { + SwitchLocation::Switch0 => SLED_AGENT_UUID, + SwitchLocation::Switch1 => SLED_AGENT2_UUID, + }) + .unwrap(); + + self.rack_init_builder.add_service_without_dns( + sled_id, address, ServiceKind::Mgd, - internal_dns::ServiceName::Mgd, sled_id, ); } + pub async fn record_switch_dns(&mut self) { + let log = &self.logctx.log; + debug!(log, "Recording DNS for the switch zones"); + for (sled_id, switch_location) in &[ + (SLED_AGENT_UUID, SwitchLocation::Switch0), + (SLED_AGENT2_UUID, SwitchLocation::Switch1), + ] { + let id = sled_id.parse().unwrap(); + self.rack_init_builder + .internal_dns_config + .host_zone_switch( + id, + Ipv6Addr::LOCALHOST, + self.dendrite.get(switch_location).unwrap().port, + self.gateway + .get(switch_location) + .unwrap() + .client + .bind_address + .port(), + self.mgd.get(switch_location).unwrap().port, + ) + .unwrap(); + } + } + pub async fn start_oximeter(&mut self) { let log = &self.logctx.log; debug!(log, "Starting Oximeter"); @@ -585,16 +674,14 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { .mac_addrs .next() .expect("ran out of MAC addresses"); - self.rack_init_builder.add_service( + let external_address = + self.config.deployment.dropshot_external.dropshot.bind_address.ip(); + let nexus_id = self.config.deployment.id; + self.rack_init_builder.add_service_with_id( + nexus_id, address, ServiceKind::Nexus { - external_address: self - .config - .deployment - .dropshot_external - .dropshot - .bind_address - .ip(), + external_address, nic: ServiceNic { id: Uuid::new_v4(), name: "nexus".parse().unwrap(), @@ -610,6 +697,32 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { sled_id, ); + self.omicron_zones.push(OmicronZoneConfig { + id: nexus_id, + underlay_address: *address.ip(), + zone_type: OmicronZoneType::Nexus { + external_dns_servers: self + .config + .deployment + .external_dns_servers + .clone(), + external_ip: external_address, + external_tls: self.config.deployment.dropshot_external.tls, + internal_address: address.to_string(), + nic: NetworkInterface { + id: Uuid::new_v4(), + ip: external_address, + kind: NetworkInterfaceKind::Service { id: nexus_id }, + mac, + name: format!("nexus-{}", nexus_id).parse().unwrap(), + primary: true, + slot: 0, + subnet: (*NEXUS_OPTE_IPV4_SUBNET).into(), + vni: Vni::SERVICES_VNI, + }, + }, + }); + self.nexus_internal = Some(nexus_internal); self.nexus_internal_addr = Some(nexus_internal_addr); } @@ -701,6 +814,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { &external_dns_zone_name, recovery_silo, tls_certificates, + SLED_AGENT2_UUID.parse().unwrap(), ) .await; @@ -729,12 +843,22 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { self.server = Some(server); } - pub async fn start_sled(&mut self, sim_mode: sim::SimMode) { + pub async fn start_sled( + &mut self, + switch_location: SwitchLocation, + sim_mode: sim::SimMode, + ) { let nexus_address = self.nexus_internal_addr.expect("Must launch Nexus first"); // Set up a single sled agent. - let sa_id = Uuid::parse_str(SLED_AGENT_UUID).unwrap(); + let sa_id: Uuid = if switch_location == SwitchLocation::Switch0 { + SLED_AGENT_UUID + } else { + SLED_AGENT2_UUID + } + .parse() + .unwrap(); let tempdir = camino_tempfile::tempdir().unwrap(); let sled_agent = start_sled_agent( self.logctx.log.new(o!( @@ -749,8 +873,40 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { .await .expect("Failed to start sled agent"); - self.sled_agent = Some(sled_agent); - self.sled_agent_storage = Some(tempdir); + if switch_location == SwitchLocation::Switch0 { + self.sled_agent = Some(sled_agent); + self.sled_agent_storage = Some(tempdir); + } else { + self.sled_agent2 = Some(sled_agent); + self.sled_agent2_storage = Some(tempdir); + } + } + + pub async fn configure_sled_agent( + &mut self, + switch_location: SwitchLocation, + ) { + let (field, zones) = if switch_location == SwitchLocation::Switch0 { + (&self.sled_agent, &self.omicron_zones) + } else { + (&self.sled_agent2, &self.omicron_zones2) + }; + + // Tell our Sled Agent to report the zones that we configured. + let Some(sled_agent) = field else { + panic!("expected sled agent has not been created"); + }; + let client = sled_agent_client::Client::new( + &format!("http://{}", sled_agent.http_server.local_addr()), + self.logctx.log.clone(), + ); + client + .omicron_zones_put(&OmicronZonesConfig { + zones: zones.clone(), + generation: Generation::new().next(), + }) + .await + .expect("Failed to configure sled agent with our zones"); } // Set up the Crucible Pantry on an existing Sled Agent. @@ -768,12 +924,21 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { }; let sled_id = Uuid::parse_str(SLED_AGENT_UUID).unwrap(); - self.rack_init_builder.add_service( + let zone_id = Uuid::new_v4(); + self.rack_init_builder.add_service_with_id( + zone_id, address, ServiceKind::CruciblePantry, internal_dns::ServiceName::CruciblePantry, sled_id, ); + self.omicron_zones.push(OmicronZoneConfig { + id: zone_id, + underlay_address: *address.ip(), + zone_type: OmicronZoneType::CruciblePantry { + address: address.to_string(), + }, + }); } // Set up an external DNS server. @@ -796,7 +961,9 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { .mac_addrs .next() .expect("ran out of MAC addresses"); - self.rack_init_builder.add_service( + let zone_id = Uuid::new_v4(); + self.rack_init_builder.add_service_with_id( + zone_id, dropshot_address, ServiceKind::ExternalDns { external_address: (*dns_address.ip()).into(), @@ -814,6 +981,33 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { internal_dns::ServiceName::ExternalDns, sled_id, ); + + let zpool_id = Uuid::new_v4(); + let pool_name = illumos_utils::zpool::ZpoolName::new_external(zpool_id) + .to_string() + .parse() + .unwrap(); + self.omicron_zones.push(OmicronZoneConfig { + id: zone_id, + underlay_address: *dropshot_address.ip(), + zone_type: OmicronZoneType::ExternalDns { + dataset: OmicronZoneDataset { pool_name }, + dns_address: dns_address.to_string(), + http_address: dropshot_address.to_string(), + nic: NetworkInterface { + id: Uuid::new_v4(), + ip: (*dns_address.ip()).into(), + kind: NetworkInterfaceKind::Service { id: zone_id }, + mac, + name: format!("external-dns-{}", zone_id).parse().unwrap(), + primary: true, + slot: 0, + subnet: (*DNS_OPTE_IPV4_SUBNET).into(), + vni: Vni::SERVICES_VNI, + }, + }, + }); + self.external_dns = Some(dns); } @@ -826,13 +1020,32 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { let SocketAddr::V6(address) = dns.dropshot_server.local_addr() else { panic!("Unsupported IPv4 DNS address"); }; - self.rack_init_builder.add_service( + let zone_id = Uuid::new_v4(); + self.rack_init_builder.add_service_with_id( + zone_id, address, ServiceKind::InternalDns, internal_dns::ServiceName::InternalDns, sled_id, ); + let zpool_id = Uuid::new_v4(); + let pool_name = illumos_utils::zpool::ZpoolName::new_external(zpool_id) + .to_string() + .parse() + .unwrap(); + self.omicron_zones.push(OmicronZoneConfig { + id: zone_id, + underlay_address: *address.ip(), + zone_type: OmicronZoneType::InternalDns { + dataset: OmicronZoneDataset { pool_name }, + dns_address: dns.dns_server.local_address().to_string(), + http_address: address.to_string(), + gz_address: Ipv6Addr::LOCALHOST, + gz_address_index: 0, + }, + }); + self.internal_dns = Some(dns); } @@ -846,10 +1059,12 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { clickhouse: self.clickhouse.unwrap(), sled_agent_storage: self.sled_agent_storage.unwrap(), sled_agent: self.sled_agent.unwrap(), + sled_agent2_storage: self.sled_agent2_storage.unwrap(), + sled_agent2: self.sled_agent2.unwrap(), oximeter: self.oximeter.unwrap(), producer: self.producer.unwrap(), logctx: self.logctx, - gateway: self.gateway.unwrap(), + gateway: self.gateway, dendrite: self.dendrite, mgd: self.mgd, external_dns_zone_name: self.external_dns_zone_name.unwrap(), @@ -873,13 +1088,16 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { if let Some(sled_agent) = self.sled_agent { sled_agent.http_server.close().await.unwrap(); } + if let Some(sled_agent2) = self.sled_agent2 { + sled_agent2.http_server.close().await.unwrap(); + } if let Some(oximeter) = self.oximeter { oximeter.close().await.unwrap(); } if let Some(producer) = self.producer { producer.close().await.unwrap(); } - if let Some(gateway) = self.gateway { + for (_, gateway) in self.gateway { gateway.teardown().await; } for (_, mut dendrite) in self.dendrite { @@ -990,8 +1208,20 @@ async fn setup_with_config_impl( Box::new(|builder| builder.start_clickhouse().boxed()), ), ( - "start_gateway", - Box::new(|builder| builder.start_gateway().boxed()), + "start_gateway_switch0", + Box::new(|builder| { + builder + .start_gateway(SwitchLocation::Switch0, None) + .boxed() + }), + ), + ( + "start_gateway_switch1", + Box::new(|builder| { + builder + .start_gateway(SwitchLocation::Switch1, None) + .boxed() + }), ), ( "start_dendrite_switch0", @@ -1017,6 +1247,10 @@ async fn setup_with_config_impl( builder.start_mgd(SwitchLocation::Switch1).boxed() }), ), + ( + "record_switch_dns", + Box::new(|builder| builder.record_switch_dns().boxed()), + ), ( "start_internal_dns", Box::new(|builder| builder.start_internal_dns().boxed()), @@ -1030,9 +1264,19 @@ async fn setup_with_config_impl( Box::new(|builder| builder.start_nexus_internal().boxed()), ), ( - "start_sled", + "start_sled1", Box::new(move |builder| { - builder.start_sled(sim_mode).boxed() + builder + .start_sled(SwitchLocation::Switch0, sim_mode) + .boxed() + }), + ), + ( + "start_sled2", + Box::new(move |builder| { + builder + .start_sled(SwitchLocation::Switch1, sim_mode) + .boxed() }), ), ( @@ -1043,6 +1287,22 @@ async fn setup_with_config_impl( "populate_internal_dns", Box::new(|builder| builder.populate_internal_dns().boxed()), ), + ( + "configure_sled_agent1", + Box::new(|builder| { + builder + .configure_sled_agent(SwitchLocation::Switch0) + .boxed() + }), + ), + ( + "configure_sled_agent2", + Box::new(|builder| { + builder + .configure_sled_agent(SwitchLocation::Switch1) + .boxed() + }), + ), ( "start_nexus_external", Box::new(|builder| { diff --git a/nexus/tests/integration_tests/initialization.rs b/nexus/tests/integration_tests/initialization.rs index b77a121080..a76aef832e 100644 --- a/nexus/tests/integration_tests/initialization.rs +++ b/nexus/tests/integration_tests/initialization.rs @@ -2,17 +2,13 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use std::collections::HashMap; -use std::net::{Ipv6Addr, SocketAddrV6}; - -use gateway_messages::SpPort; -use gateway_test_utils::setup as mgs_setup; use nexus_config::Database; use nexus_config::InternalDns; use nexus_test_interface::NexusServer; use nexus_test_utils::{load_test_config, ControlPlaneTestContextBuilder}; use omicron_common::address::MGS_PORT; use omicron_common::api::internal::shared::SwitchLocation; +use std::collections::HashMap; use tokio::time::sleep; use tokio::time::timeout; use tokio::time::Duration; @@ -78,19 +74,6 @@ async fn test_nexus_boots_before_cockroach() { #[tokio::test] async fn test_nexus_boots_before_dendrite() { - // Start MGS + Sim SP. This is needed for the Dendrite client initialization - // inside of Nexus initialization - let (mgs_config, sp_sim_config) = mgs_setup::load_test_config(); - let mgs_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, MGS_PORT, 0, 0); - let mgs = mgs_setup::test_setup_with_config( - "test_nexus_boots_before_dendrite", - SpPort::One, - mgs_config, - &sp_sim_config, - Some(mgs_addr), - ) - .await; - let mut config = load_test_config(); let mut builder = @@ -101,6 +84,14 @@ async fn test_nexus_boots_before_dendrite() { let log = builder.logctx.log.new(o!("component" => "test")); + // Start MGS + Sim SP. This is needed for the Dendrite client initialization + // inside of Nexus initialization. We must use MGS_PORT here because Nexus + // hardcodes it. + info!(&log, "Starting MGS"); + builder.start_gateway(SwitchLocation::Switch0, Some(MGS_PORT)).await; + builder.start_gateway(SwitchLocation::Switch1, None).await; + info!(&log, "Started MGS"); + let populate = true; builder.start_crdb(populate).await; builder.start_internal_dns().await; @@ -150,6 +141,7 @@ async fn test_nexus_boots_before_dendrite() { info!(log, "Started mgd"); info!(log, "Populating internal DNS records"); + builder.record_switch_dns().await; builder.populate_internal_dns().await; info!(log, "Populated internal DNS records"); @@ -157,7 +149,6 @@ async fn test_nexus_boots_before_dendrite() { nexus_handle.await.expect("Test: Task starting Nexus has failed"); builder.teardown().await; - mgs.teardown().await; } // Helper to ensure we perform the same setup for the positive and negative test diff --git a/nexus/tests/integration_tests/sleds.rs b/nexus/tests/integration_tests/sleds.rs index b551cf51b5..743a76be17 100644 --- a/nexus/tests/integration_tests/sleds.rs +++ b/nexus/tests/integration_tests/sleds.rs @@ -48,9 +48,9 @@ async fn sled_instance_list( async fn test_sleds_list(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; - // Verify that there is one sled to begin with. + // Verify that there are two sleds to begin with. let sleds_url = "/v1/system/hardware/sleds"; - assert_eq!(sleds_list(&client, &sleds_url).await.len(), 1); + assert_eq!(sleds_list(&client, &sleds_url).await.len(), 2); // Now start a few more sled agents. let nsleds = 3; @@ -76,7 +76,7 @@ async fn test_sleds_list(cptestctx: &ControlPlaneTestContext) { // List sleds again. let sleds_found = sleds_list(&client, &sleds_url).await; - assert_eq!(sleds_found.len(), nsleds + 1); + assert_eq!(sleds_found.len(), nsleds + 2); let sledids_found = sleds_found.iter().map(|sv| sv.identity.id).collect::>(); @@ -97,9 +97,9 @@ async fn test_physical_disk_create_list_delete( let external_client = &cptestctx.external_client; let internal_client = &cptestctx.internal_client; - // Verify that there is one sled to begin with. + // Verify that there are two sleds to begin with. let sleds_url = "/v1/system/hardware/sleds"; - assert_eq!(sleds_list(&external_client, &sleds_url).await.len(), 1); + assert_eq!(sleds_list(&external_client, &sleds_url).await.len(), 2); // The test framework may set up some disks initially. let disks_url = @@ -140,9 +140,9 @@ async fn test_physical_disk_create_list_delete( async fn test_sled_instance_list(cptestctx: &ControlPlaneTestContext) { let external_client = &cptestctx.external_client; - // Verify that there is one sled to begin with. + // Verify that there are two sleds to begin with. let sleds_url = "/v1/system/hardware/sleds"; - assert_eq!(sleds_list(&external_client, &sleds_url).await.len(), 1); + assert_eq!(sleds_list(&external_client, &sleds_url).await.len(), 2); // Verify that there are no instances. let instances_url = diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 483b2d6aa8..0b90bef590 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -740,7 +740,7 @@ impl SledAgent { Ok(Inventory { sled_id: self.id, sled_agent_address, - sled_role: SledRole::Gimlet, + sled_role: SledRole::Scrimlet, baseboard: self.config.hardware.baseboard.clone(), usable_hardware_threads: self.config.hardware.hardware_threads, usable_physical_ram: ByteCount::try_from(