From 79fee6a2c9f0099fb9cca6effbb862fd75c0a4e4 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Thu, 21 Sep 2023 09:24:04 -0700 Subject: [PATCH 1/8] Move instance networking functions into their own module (#4123) `nexus/src/app/instance.rs` and `sled.rs` implement several networking-related functions that are less directly concerned with instance or sled management than their sibling routines. Tidy things up a bit by creating an `instance_network` module and moving instance V2P and NAT management functions there. Also, move instance NAT entry deletion logic into its own function that's called from the instance delete saga instead of implementing it inline in that saga. These changes aim to reduce clutter in `instance.rs` and to move NAT entry deletion to a function that can be reused by subsequent changes to the way Nexus handles instance stop. Except for some minor edits to error handling in `instance_delete_dpd_config` (needed because this function no longer returns a `steno::ActionError`), this PR only rearranges existing code and has no functional changes. --- nexus/src/app/instance.rs | 144 ------- nexus/src/app/instance_network.rs | 497 +++++++++++++++++++++++++ nexus/src/app/mod.rs | 1 + nexus/src/app/sagas/instance_delete.rs | 67 +--- nexus/src/app/sled.rs | 251 ------------- 5 files changed, 503 insertions(+), 457 deletions(-) create mode 100644 nexus/src/app/instance_network.rs diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 1ac63d19f6..f07ceae4a0 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -11,7 +11,6 @@ use super::MAX_NICS_PER_INSTANCE; use super::MAX_VCPU_PER_INSTANCE; use super::MIN_MEMORY_BYTES_PER_INSTANCE; use crate::app::sagas; -use crate::app::sagas::retry_until_known_result; use crate::cidata::InstanceCiData; use crate::external_api::params; use cancel_safe_futures::prelude::*; @@ -41,7 +40,6 @@ use omicron_common::api::external::NameOrId; use omicron_common::api::external::UpdateResult; use omicron_common::api::external::Vni; use omicron_common::api::internal::nexus; -use omicron_common::api::internal::shared::SwitchLocation; use propolis_client::support::tungstenite::protocol::frame::coding::CloseCode; use propolis_client::support::tungstenite::protocol::CloseFrame; use propolis_client::support::tungstenite::Message as WebSocketMessage; @@ -54,9 +52,7 @@ use sled_agent_client::types::InstancePutStateBody; use sled_agent_client::types::InstanceStateRequested; use sled_agent_client::types::SourceNatConfig; use sled_agent_client::Client as SledAgentClient; -use std::collections::HashSet; use std::net::SocketAddr; -use std::str::FromStr; use std::sync::Arc; use tokio::io::{AsyncRead, AsyncWrite}; use uuid::Uuid; @@ -1232,146 +1228,6 @@ impl super::Nexus { Ok(()) } - // Switches with uplinks configured and boundary services enabled - pub(crate) async fn boundary_switches( - &self, - opctx: &OpContext, - ) -> Result, Error> { - let mut boundary_switches: HashSet = HashSet::new(); - let uplinks = self.list_switch_ports_with_uplinks(opctx).await?; - for uplink in &uplinks { - let location: SwitchLocation = - uplink.switch_location.parse().map_err(|_| { - Error::internal_error(&format!( - "invalid switch location in uplink config: {}", - uplink.switch_location - )) - })?; - boundary_switches.insert(location); - } - Ok(boundary_switches) - } - - /// Ensures that the Dendrite configuration for the supplied instance is - /// up-to-date. - /// - /// # Parameters - /// - /// - `opctx`: An operation context that grants read and list-children - /// permissions on the identified instance. - /// - `instance_id`: The ID of the instance to act on. - /// - `sled_ip_address`: The internal IP address assigned to the sled's - /// sled agent. - /// - `ip_index_filter`: An optional filter on the index into the instance's - /// external IP array. - /// - If this is `Some(n)`, this routine configures DPD state for only the - /// Nth external IP in the collection returned from CRDB. The caller is - /// responsible for ensuring that the IP collection has stable indices - /// when making this call. - /// - If this is `None`, this routine configures DPD for all external - /// IPs. - pub(crate) async fn instance_ensure_dpd_config( - &self, - opctx: &OpContext, - instance_id: Uuid, - sled_ip_address: &std::net::SocketAddrV6, - ip_index_filter: Option, - dpd_client: &Arc, - ) -> Result<(), Error> { - let log = &self.log; - - info!(log, "looking up instance's primary network interface"; - "instance_id" => %instance_id); - - let (.., authz_instance) = LookupPath::new(opctx, &self.db_datastore) - .instance_id(instance_id) - .lookup_for(authz::Action::ListChildren) - .await?; - - // All external IPs map to the primary network interface, so find that - // interface. If there is no such interface, there's no way to route - // traffic destined to those IPs, so there's nothing to configure and - // it's safe to return early. - let network_interface = match self - .db_datastore - .derive_guest_network_interface_info(&opctx, &authz_instance) - .await? - .into_iter() - .find(|interface| interface.primary) - { - Some(interface) => interface, - None => { - info!(log, "Instance has no primary network interface"; - "instance_id" => %instance_id); - return Ok(()); - } - }; - - let mac_address = - macaddr::MacAddr6::from_str(&network_interface.mac.to_string()) - .map_err(|e| { - Error::internal_error(&format!( - "failed to convert mac address: {e}" - )) - })?; - - let vni: u32 = network_interface.vni.into(); - - info!(log, "looking up instance's external IPs"; - "instance_id" => %instance_id); - - let ips = self - .db_datastore - .instance_lookup_external_ips(&opctx, instance_id) - .await?; - - if let Some(wanted_index) = ip_index_filter { - if let None = ips.get(wanted_index) { - return Err(Error::internal_error(&format!( - "failed to find external ip address at index: {}", - wanted_index - ))); - } - } - - for target_ip in ips - .iter() - .enumerate() - .filter(|(index, _)| { - if let Some(wanted_index) = ip_index_filter { - *index == wanted_index - } else { - true - } - }) - .map(|(_, ip)| ip) - { - retry_until_known_result(log, || async { - dpd_client - .ensure_nat_entry( - &log, - target_ip.ip, - dpd_client::types::MacAddr { - a: mac_address.into_array(), - }, - *target_ip.first_port, - *target_ip.last_port, - vni, - sled_ip_address.ip(), - ) - .await - }) - .await - .map_err(|e| { - Error::internal_error(&format!( - "failed to ensure dpd entry: {e}" - )) - })?; - } - - Ok(()) - } - /// Returns the requested range of serial console output bytes, /// provided they are still in the propolis-server's cache. pub(crate) async fn instance_serial_console_data( diff --git a/nexus/src/app/instance_network.rs b/nexus/src/app/instance_network.rs new file mode 100644 index 0000000000..6b819a9e11 --- /dev/null +++ b/nexus/src/app/instance_network.rs @@ -0,0 +1,497 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Routines that manage instance-related networking state. + +use crate::app::sagas::retry_until_known_result; +use nexus_db_queries::authz; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::identity::Asset; +use nexus_db_queries::db::lookup::LookupPath; +use omicron_common::api::external::DataPageParams; +use omicron_common::api::external::Error; +use omicron_common::api::internal::shared::SwitchLocation; +use sled_agent_client::types::SetVirtualNetworkInterfaceHost; +use std::collections::HashSet; +use std::str::FromStr; +use std::sync::Arc; +use uuid::Uuid; + +impl super::Nexus { + /// Returns the set of switches with uplinks configured and boundary + /// services enabled. + pub(crate) async fn boundary_switches( + &self, + opctx: &OpContext, + ) -> Result, Error> { + let mut boundary_switches: HashSet = HashSet::new(); + let uplinks = self.list_switch_ports_with_uplinks(opctx).await?; + for uplink in &uplinks { + let location: SwitchLocation = + uplink.switch_location.parse().map_err(|_| { + Error::internal_error(&format!( + "invalid switch location in uplink config: {}", + uplink.switch_location + )) + })?; + boundary_switches.insert(location); + } + Ok(boundary_switches) + } + + /// Ensures that V2P mappings exist that indicate that the instance with ID + /// `instance_id` is resident on the sled with ID `sled_id`. + pub(crate) async fn create_instance_v2p_mappings( + &self, + opctx: &OpContext, + instance_id: Uuid, + sled_id: Uuid, + ) -> Result<(), Error> { + info!(&self.log, "creating V2P mappings for instance"; + "instance_id" => %instance_id, + "sled_id" => %sled_id); + + // For every sled that isn't the sled this instance was allocated to, create + // a virtual to physical mapping for each of this instance's NICs. + // + // For the mappings to be correct, a few invariants must hold: + // + // - mappings must be set whenever an instance's sled changes (eg. + // during instance creation, migration, stop + start) + // + // - an instances' sled must not change while its corresponding mappings + // are being created + // + // - the same mapping creation must be broadcast to all sleds + // + // A more targeted approach would be to see what other instances share + // the VPC this instance is in (or more generally, what instances should + // have connectivity to this one), see what sleds those are allocated + // to, and only create V2P mappings for those sleds. + // + // There's additional work with this approach: + // + // - it means that delete calls are required as well as set calls, + // meaning that now the ordering of those matters (this may also + // necessitate a generation number for V2P mappings) + // + // - V2P mappings have to be bidirectional in order for both instances's + // packets to make a round trip. This isn't a problem with the + // broadcast approach because one of the sides will exist already, but + // it is something to orchestrate with a more targeted approach. + // + // TODO-correctness Default firewall rules currently will block + // instances in different VPCs from connecting to each other. If it ever + // stops doing this, the broadcast approach will create V2P mappings + // that shouldn't exist. + let (.., authz_instance) = LookupPath::new(&opctx, &self.db_datastore) + .instance_id(instance_id) + .lookup_for(authz::Action::Read) + .await?; + + let instance_nics = self + .db_datastore + .derive_guest_network_interface_info(&opctx, &authz_instance) + .await?; + + // Look up the supplied sled's physical host IP. + let physical_host_ip = + *self.sled_lookup(&self.opctx_alloc, &sled_id)?.fetch().await?.1.ip; + + let mut last_sled_id: Option = None; + loop { + let pagparams = DataPageParams { + marker: last_sled_id.as_ref(), + direction: dropshot::PaginationOrder::Ascending, + limit: std::num::NonZeroU32::new(10).unwrap(), + }; + + let sleds_page = + self.sled_list(&self.opctx_alloc, &pagparams).await?; + let mut join_handles = + Vec::with_capacity(sleds_page.len() * instance_nics.len()); + + for sled in &sleds_page { + // set_v2p not required for sled instance was allocated to, OPTE + // currently does that automatically + // + // TODO(#3107): Remove this when XDE stops creating mappings + // implicitly. + if sled.id() == sled_id { + continue; + } + + for nic in &instance_nics { + let client = self.sled_client(&sled.id()).await?; + let nic_id = nic.id; + let mapping = SetVirtualNetworkInterfaceHost { + virtual_ip: nic.ip, + virtual_mac: nic.mac.clone(), + physical_host_ip, + vni: nic.vni.clone(), + }; + + let log = self.log.clone(); + + // This function is idempotent: calling the set_v2p ioctl with + // the same information is a no-op. + join_handles.push(tokio::spawn(futures::future::lazy( + move |_ctx| async move { + retry_until_known_result(&log, || async { + client.set_v2p(&nic_id, &mapping).await + }) + .await + }, + ))); + } + } + + // Concurrently run each future to completion, but return the last + // error seen. + let mut error = None; + for join_handle in join_handles { + let result = join_handle + .await + .map_err(|e| Error::internal_error(&e.to_string()))? + .await; + + if result.is_err() { + error!(self.log, "{:?}", result); + error = Some(result); + } + } + if let Some(e) = error { + return e.map(|_| ()).map_err(|e| e.into()); + } + + if sleds_page.len() < 10 { + break; + } + + if let Some(last) = sleds_page.last() { + last_sled_id = Some(last.id()); + } + } + + Ok(()) + } + + /// Ensure that the necessary v2p mappings for an instance are deleted + pub(crate) async fn delete_instance_v2p_mappings( + &self, + opctx: &OpContext, + instance_id: Uuid, + ) -> Result<(), Error> { + // For every sled that isn't the sled this instance was allocated to, delete + // the virtual to physical mapping for each of this instance's NICs. If + // there isn't a V2P mapping, del_v2p should be a no-op. + let (.., authz_instance, db_instance) = + LookupPath::new(&opctx, &self.db_datastore) + .instance_id(instance_id) + .fetch_for(authz::Action::Read) + .await?; + + let instance_nics = self + .db_datastore + .derive_guest_network_interface_info(&opctx, &authz_instance) + .await?; + + // Lookup the physical host IP of the sled hosting this instance + let instance_sled_id = db_instance.runtime().sled_id; + let physical_host_ip = *self + .sled_lookup(&self.opctx_alloc, &instance_sled_id)? + .fetch() + .await? + .1 + .ip; + + let mut last_sled_id: Option = None; + + loop { + let pagparams = DataPageParams { + marker: last_sled_id.as_ref(), + direction: dropshot::PaginationOrder::Ascending, + limit: std::num::NonZeroU32::new(10).unwrap(), + }; + + let sleds_page = + self.sled_list(&self.opctx_alloc, &pagparams).await?; + let mut join_handles = + Vec::with_capacity(sleds_page.len() * instance_nics.len()); + + for sled in &sleds_page { + // del_v2p not required for sled instance was allocated to, OPTE + // currently does that automatically + // + // TODO(#3107): Remove this when XDE stops deleting mappings + // implicitly. + if sled.id() == instance_sled_id { + continue; + } + + for nic in &instance_nics { + let client = self.sled_client(&sled.id()).await?; + let nic_id = nic.id; + let mapping = SetVirtualNetworkInterfaceHost { + virtual_ip: nic.ip, + virtual_mac: nic.mac.clone(), + physical_host_ip, + vni: nic.vni.clone(), + }; + + let log = self.log.clone(); + + // This function is idempotent: calling the set_v2p ioctl with + // the same information is a no-op. + join_handles.push(tokio::spawn(futures::future::lazy( + move |_ctx| async move { + retry_until_known_result(&log, || async { + client.del_v2p(&nic_id, &mapping).await + }) + .await + }, + ))); + } + } + + // Concurrently run each future to completion, but return the last + // error seen. + let mut error = None; + for join_handle in join_handles { + let result = join_handle + .await + .map_err(|e| Error::internal_error(&e.to_string()))? + .await; + + if result.is_err() { + error!(self.log, "{:?}", result); + error = Some(result); + } + } + if let Some(e) = error { + return e.map(|_| ()).map_err(|e| e.into()); + } + + if sleds_page.len() < 10 { + break; + } + + if let Some(last) = sleds_page.last() { + last_sled_id = Some(last.id()); + } + } + + Ok(()) + } + + /// Ensures that the Dendrite configuration for the supplied instance is + /// up-to-date. + /// + /// # Parameters + /// + /// - `opctx`: An operation context that grants read and list-children + /// permissions on the identified instance. + /// - `instance_id`: The ID of the instance to act on. + /// - `sled_ip_address`: The internal IP address assigned to the sled's + /// sled agent. + /// - `ip_index_filter`: An optional filter on the index into the instance's + /// external IP array. + /// - If this is `Some(n)`, this routine configures DPD state for only the + /// Nth external IP in the collection returned from CRDB. The caller is + /// responsible for ensuring that the IP collection has stable indices + /// when making this call. + /// - If this is `None`, this routine configures DPD for all external + /// IPs. + pub(crate) async fn instance_ensure_dpd_config( + &self, + opctx: &OpContext, + instance_id: Uuid, + sled_ip_address: &std::net::SocketAddrV6, + ip_index_filter: Option, + dpd_client: &Arc, + ) -> Result<(), Error> { + let log = &self.log; + + info!(log, "looking up instance's primary network interface"; + "instance_id" => %instance_id); + + let (.., authz_instance) = LookupPath::new(opctx, &self.db_datastore) + .instance_id(instance_id) + .lookup_for(authz::Action::ListChildren) + .await?; + + // All external IPs map to the primary network interface, so find that + // interface. If there is no such interface, there's no way to route + // traffic destined to those IPs, so there's nothing to configure and + // it's safe to return early. + let network_interface = match self + .db_datastore + .derive_guest_network_interface_info(&opctx, &authz_instance) + .await? + .into_iter() + .find(|interface| interface.primary) + { + Some(interface) => interface, + None => { + info!(log, "Instance has no primary network interface"; + "instance_id" => %instance_id); + return Ok(()); + } + }; + + let mac_address = + macaddr::MacAddr6::from_str(&network_interface.mac.to_string()) + .map_err(|e| { + Error::internal_error(&format!( + "failed to convert mac address: {e}" + )) + })?; + + let vni: u32 = network_interface.vni.into(); + + info!(log, "looking up instance's external IPs"; + "instance_id" => %instance_id); + + let ips = self + .db_datastore + .instance_lookup_external_ips(&opctx, instance_id) + .await?; + + if let Some(wanted_index) = ip_index_filter { + if let None = ips.get(wanted_index) { + return Err(Error::internal_error(&format!( + "failed to find external ip address at index: {}", + wanted_index + ))); + } + } + + for target_ip in ips + .iter() + .enumerate() + .filter(|(index, _)| { + if let Some(wanted_index) = ip_index_filter { + *index == wanted_index + } else { + true + } + }) + .map(|(_, ip)| ip) + { + retry_until_known_result(log, || async { + dpd_client + .ensure_nat_entry( + &log, + target_ip.ip, + dpd_client::types::MacAddr { + a: mac_address.into_array(), + }, + *target_ip.first_port, + *target_ip.last_port, + vni, + sled_ip_address.ip(), + ) + .await + }) + .await + .map_err(|e| { + Error::internal_error(&format!( + "failed to ensure dpd entry: {e}" + )) + })?; + } + + Ok(()) + } + + /// Attempts to delete all of the Dendrite NAT configuration for the + /// instance identified by `authz_instance`. + /// + /// # Return value + /// + /// - `Ok(())` if all NAT entries were successfully deleted. + /// - If an operation fails before this routine begins to walk and delete + /// individual NAT entries, this routine returns `Err` and reports that + /// error. + /// - If an operation fails while this routine is walking NAT entries, it + /// will continue trying to delete subsequent entries but will return the + /// first error it encountered. + pub(crate) async fn instance_delete_dpd_config( + &self, + opctx: &OpContext, + authz_instance: &authz::Instance, + ) -> Result<(), Error> { + let log = &self.log; + let instance_id = authz_instance.id(); + + info!(log, "deleting instance dpd configuration"; + "instance_id" => %instance_id); + + let external_ips = self + .db_datastore + .instance_lookup_external_ips(opctx, instance_id) + .await?; + + let boundary_switches = self.boundary_switches(opctx).await?; + + let mut errors = vec![]; + for entry in external_ips { + for switch in &boundary_switches { + debug!(log, "deleting instance nat mapping"; + "instance_id" => %instance_id, + "switch" => switch.to_string(), + "entry" => #?entry); + + let client_result = + self.dpd_clients.get(switch).ok_or_else(|| { + Error::internal_error(&format!( + "unable to find dendrite client for {switch}" + )) + }); + + let dpd_client = match client_result { + Ok(client) => client, + Err(new_error) => { + errors.push(new_error); + continue; + } + }; + + let result = retry_until_known_result(log, || async { + dpd_client + .ensure_nat_entry_deleted( + log, + entry.ip, + *entry.first_port, + ) + .await + }) + .await; + + if let Err(e) = result { + let e = Error::internal_error(&format!( + "failed to delete nat entry via dpd: {e}" + )); + + error!(log, "error deleting nat mapping: {e:#?}"; + "instance_id" => %instance_id, + "switch" => switch.to_string(), + "entry" => #?entry); + errors.push(e); + } else { + debug!(log, "deleting nat mapping successful"; + "instance_id" => %instance_id, + "switch" => switch.to_string(), + "entry" => #?entry); + } + } + } + + if let Some(e) = errors.into_iter().nth(0) { + return Err(e); + } + + Ok(()) + } +} diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 4dd93f7707..5bab5e2820 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -42,6 +42,7 @@ mod external_ip; mod iam; mod image; mod instance; +mod instance_network; mod ip_pool; mod metrics; mod network_interface; diff --git a/nexus/src/app/sagas/instance_delete.rs b/nexus/src/app/sagas/instance_delete.rs index 88a5823ad0..005e9724a6 100644 --- a/nexus/src/app/sagas/instance_delete.rs +++ b/nexus/src/app/sagas/instance_delete.rs @@ -8,7 +8,6 @@ use super::ActionRegistry; use super::NexusActionContext; use super::NexusSaga; use crate::app::sagas::declare_saga_actions; -use crate::app::sagas::retry_until_known_result; use nexus_db_queries::db; use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::{authn, authz}; @@ -151,70 +150,14 @@ async fn sid_delete_network_config( &sagactx, ¶ms.serialized_authn, ); + let authz_instance = ¶ms.authz_instance; let osagactx = sagactx.user_data(); - let datastore = &osagactx.datastore(); - let log = sagactx.user_data().log(); - - debug!(log, "fetching external ip addresses"); - - let external_ips = &datastore - .instance_lookup_external_ips(&opctx, params.authz_instance.id()) + osagactx + .nexus() + .instance_delete_dpd_config(&opctx, authz_instance) .await - .map_err(ActionError::action_failed)?; - - let mut errors: Vec = vec![]; - - // Here we are attempting to delete every existing NAT entry while deferring - // any error handling. If we don't defer error handling, we might end up - // bailing out before we've attempted deletion of all entries. - for entry in external_ips { - for switch in ¶ms.boundary_switches { - debug!(log, "deleting nat mapping"; "switch" => switch.to_string(), "entry" => #?entry); - - let client_result = - osagactx.nexus().dpd_clients.get(switch).ok_or_else(|| { - ActionError::action_failed(Error::internal_error(&format!( - "unable to find dendrite client for {switch}" - ))) - }); - - let dpd_client = match client_result { - Ok(client) => client, - Err(new_error) => { - errors.push(new_error); - continue; - } - }; - - let result = retry_until_known_result(log, || async { - dpd_client - .ensure_nat_entry_deleted(log, entry.ip, *entry.first_port) - .await - }) - .await; - - match result { - Ok(_) => { - debug!(log, "deleting nat mapping successful"; "switch" => switch.to_string(), "entry" => format!("{entry:#?}")); - } - Err(e) => { - let new_error = - ActionError::action_failed(Error::internal_error( - &format!("failed to delete nat entry via dpd: {e}"), - )); - error!(log, "{new_error:#?}"); - errors.push(new_error); - } - } - } - } - - if let Some(error) = errors.first() { - return Err(error.clone()); - } - - Ok(()) + .map_err(ActionError::action_failed) } async fn sid_delete_instance_record( diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index 56a330f526..da89e7e25a 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -4,15 +4,12 @@ //! Sleds, and the hardware and services within them. -use crate::app::sagas::retry_until_known_result; use crate::internal_api::params::{ PhysicalDiskDeleteRequest, PhysicalDiskPutRequest, SledAgentStartupInfo, SledRole, ZpoolPutRequest, }; -use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; -use nexus_db_queries::db::identity::Asset; use nexus_db_queries::db::lookup; use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::model::DatasetKind; @@ -20,7 +17,6 @@ use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; -use sled_agent_client::types::SetVirtualNetworkInterfaceHost; use sled_agent_client::Client as SledAgentClient; use std::net::SocketAddrV6; use std::sync::Arc; @@ -317,251 +313,4 @@ impl super::Nexus { .await?; Ok(()) } - - // OPTE V2P mappings - - /// Ensures that V2P mappings exist that indicate that the instance with ID - /// `instance_id` is resident on the sled with ID `sled_id`. - pub(crate) async fn create_instance_v2p_mappings( - &self, - opctx: &OpContext, - instance_id: Uuid, - sled_id: Uuid, - ) -> Result<(), Error> { - info!(&self.log, "creating V2P mappings for instance"; - "instance_id" => %instance_id, - "sled_id" => %sled_id); - - // For every sled that isn't the sled this instance was allocated to, create - // a virtual to physical mapping for each of this instance's NICs. - // - // For the mappings to be correct, a few invariants must hold: - // - // - mappings must be set whenever an instance's sled changes (eg. - // during instance creation, migration, stop + start) - // - // - an instances' sled must not change while its corresponding mappings - // are being created - // - // - the same mapping creation must be broadcast to all sleds - // - // A more targeted approach would be to see what other instances share - // the VPC this instance is in (or more generally, what instances should - // have connectivity to this one), see what sleds those are allocated - // to, and only create V2P mappings for those sleds. - // - // There's additional work with this approach: - // - // - it means that delete calls are required as well as set calls, - // meaning that now the ordering of those matters (this may also - // necessitate a generation number for V2P mappings) - // - // - V2P mappings have to be bidirectional in order for both instances's - // packets to make a round trip. This isn't a problem with the - // broadcast approach because one of the sides will exist already, but - // it is something to orchestrate with a more targeted approach. - // - // TODO-correctness Default firewall rules currently will block - // instances in different VPCs from connecting to each other. If it ever - // stops doing this, the broadcast approach will create V2P mappings - // that shouldn't exist. - let (.., authz_instance) = LookupPath::new(&opctx, &self.db_datastore) - .instance_id(instance_id) - .lookup_for(authz::Action::Read) - .await?; - - let instance_nics = self - .db_datastore - .derive_guest_network_interface_info(&opctx, &authz_instance) - .await?; - - // Look up the supplied sled's physical host IP. - let physical_host_ip = - *self.sled_lookup(&self.opctx_alloc, &sled_id)?.fetch().await?.1.ip; - - let mut last_sled_id: Option = None; - loop { - let pagparams = DataPageParams { - marker: last_sled_id.as_ref(), - direction: dropshot::PaginationOrder::Ascending, - limit: std::num::NonZeroU32::new(10).unwrap(), - }; - - let sleds_page = - self.sled_list(&self.opctx_alloc, &pagparams).await?; - let mut join_handles = - Vec::with_capacity(sleds_page.len() * instance_nics.len()); - - for sled in &sleds_page { - // set_v2p not required for sled instance was allocated to, OPTE - // currently does that automatically - // - // TODO(#3107): Remove this when XDE stops creating mappings - // implicitly. - if sled.id() == sled_id { - continue; - } - - for nic in &instance_nics { - let client = self.sled_client(&sled.id()).await?; - let nic_id = nic.id; - let mapping = SetVirtualNetworkInterfaceHost { - virtual_ip: nic.ip, - virtual_mac: nic.mac.clone(), - physical_host_ip, - vni: nic.vni.clone(), - }; - - let log = self.log.clone(); - - // This function is idempotent: calling the set_v2p ioctl with - // the same information is a no-op. - join_handles.push(tokio::spawn(futures::future::lazy( - move |_ctx| async move { - retry_until_known_result(&log, || async { - client.set_v2p(&nic_id, &mapping).await - }) - .await - }, - ))); - } - } - - // Concurrently run each future to completion, but return the last - // error seen. - let mut error = None; - for join_handle in join_handles { - let result = join_handle - .await - .map_err(|e| Error::internal_error(&e.to_string()))? - .await; - - if result.is_err() { - error!(self.log, "{:?}", result); - error = Some(result); - } - } - if let Some(e) = error { - return e.map(|_| ()).map_err(|e| e.into()); - } - - if sleds_page.len() < 10 { - break; - } - - if let Some(last) = sleds_page.last() { - last_sled_id = Some(last.id()); - } - } - - Ok(()) - } - - /// Ensure that the necessary v2p mappings for an instance are deleted - pub(crate) async fn delete_instance_v2p_mappings( - &self, - opctx: &OpContext, - instance_id: Uuid, - ) -> Result<(), Error> { - // For every sled that isn't the sled this instance was allocated to, delete - // the virtual to physical mapping for each of this instance's NICs. If - // there isn't a V2P mapping, del_v2p should be a no-op. - let (.., authz_instance, db_instance) = - LookupPath::new(&opctx, &self.db_datastore) - .instance_id(instance_id) - .fetch_for(authz::Action::Read) - .await?; - - let instance_nics = self - .db_datastore - .derive_guest_network_interface_info(&opctx, &authz_instance) - .await?; - - // Lookup the physical host IP of the sled hosting this instance - let instance_sled_id = db_instance.runtime().sled_id; - let physical_host_ip = *self - .sled_lookup(&self.opctx_alloc, &instance_sled_id)? - .fetch() - .await? - .1 - .ip; - - let mut last_sled_id: Option = None; - - loop { - let pagparams = DataPageParams { - marker: last_sled_id.as_ref(), - direction: dropshot::PaginationOrder::Ascending, - limit: std::num::NonZeroU32::new(10).unwrap(), - }; - - let sleds_page = - self.sled_list(&self.opctx_alloc, &pagparams).await?; - let mut join_handles = - Vec::with_capacity(sleds_page.len() * instance_nics.len()); - - for sled in &sleds_page { - // del_v2p not required for sled instance was allocated to, OPTE - // currently does that automatically - // - // TODO(#3107): Remove this when XDE stops deleting mappings - // implicitly. - if sled.id() == instance_sled_id { - continue; - } - - for nic in &instance_nics { - let client = self.sled_client(&sled.id()).await?; - let nic_id = nic.id; - let mapping = SetVirtualNetworkInterfaceHost { - virtual_ip: nic.ip, - virtual_mac: nic.mac.clone(), - physical_host_ip, - vni: nic.vni.clone(), - }; - - let log = self.log.clone(); - - // This function is idempotent: calling the set_v2p ioctl with - // the same information is a no-op. - join_handles.push(tokio::spawn(futures::future::lazy( - move |_ctx| async move { - retry_until_known_result(&log, || async { - client.del_v2p(&nic_id, &mapping).await - }) - .await - }, - ))); - } - } - - // Concurrently run each future to completion, but return the last - // error seen. - let mut error = None; - for join_handle in join_handles { - let result = join_handle - .await - .map_err(|e| Error::internal_error(&e.to_string()))? - .await; - - if result.is_err() { - error!(self.log, "{:?}", result); - error = Some(result); - } - } - if let Some(e) = error { - return e.map(|_| ()).map_err(|e| e.into()); - } - - if sleds_page.len() < 10 { - break; - } - - if let Some(last) = sleds_page.last() { - last_sled_id = Some(last.id()); - } - } - - Ok(()) - } } From 46ffb37edf51f86fa7b0d46bd0c3a834eb213a75 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Thu, 21 Sep 2023 15:12:15 -0700 Subject: [PATCH 2/8] consolidate omicron-dev, omdb, and xtask into "dev-tools" directory (#4129) --- Cargo.lock | 2 +- Cargo.toml | 14 +++++++------- {omdb => dev-tools/omdb}/Cargo.toml | 0 dev-tools/{ => omdb}/build.rs | 0 {omdb => dev-tools/omdb}/src/bin/omdb/db.rs | 0 {omdb => dev-tools/omdb}/src/bin/omdb/main.rs | 0 {omdb => dev-tools/omdb}/src/bin/omdb/nexus.rs | 0 .../omdb}/src/bin/omdb/sled_agent.rs | 0 dev-tools/omdb/tests/config.test.toml | 1 + {omdb => dev-tools/omdb}/tests/env.out | 0 {omdb => dev-tools/omdb}/tests/successes.out | 0 {omdb => dev-tools/omdb}/tests/test_all_output.rs | 0 {omdb => dev-tools/omdb}/tests/usage_errors.out | 0 dev-tools/{ => omicron-dev}/Cargo.toml | 2 +- {omdb => dev-tools/omicron-dev}/build.rs | 0 dev-tools/{ => omicron-dev}/src/bin/omicron-dev.rs | 2 +- .../tests/output/cmd-omicron-dev-bad-cmd-stderr | 0 .../tests/output/cmd-omicron-dev-bad-cmd-stdout | 0 .../cmd-omicron-dev-db-populate-noargs-stderr | 0 .../cmd-omicron-dev-db-populate-noargs-stdout | 0 .../output/cmd-omicron-dev-db-wipe-noargs-stderr | 0 .../output/cmd-omicron-dev-db-wipe-noargs-stdout | 0 .../tests/output/cmd-omicron-dev-noargs-stderr | 0 .../tests/output/cmd-omicron-dev-noargs-stdout | 0 .../{ => omicron-dev}/tests/test_omicron_dev.rs | 0 {xtask => dev-tools/xtask}/Cargo.toml | 0 {xtask => dev-tools/xtask}/src/main.rs | 0 omdb/tests/config.test.toml | 1 - 28 files changed, 11 insertions(+), 11 deletions(-) rename {omdb => dev-tools/omdb}/Cargo.toml (100%) rename dev-tools/{ => omdb}/build.rs (100%) rename {omdb => dev-tools/omdb}/src/bin/omdb/db.rs (100%) rename {omdb => dev-tools/omdb}/src/bin/omdb/main.rs (100%) rename {omdb => dev-tools/omdb}/src/bin/omdb/nexus.rs (100%) rename {omdb => dev-tools/omdb}/src/bin/omdb/sled_agent.rs (100%) create mode 120000 dev-tools/omdb/tests/config.test.toml rename {omdb => dev-tools/omdb}/tests/env.out (100%) rename {omdb => dev-tools/omdb}/tests/successes.out (100%) rename {omdb => dev-tools/omdb}/tests/test_all_output.rs (100%) rename {omdb => dev-tools/omdb}/tests/usage_errors.out (100%) rename dev-tools/{ => omicron-dev}/Cargo.toml (97%) rename {omdb => dev-tools/omicron-dev}/build.rs (100%) rename dev-tools/{ => omicron-dev}/src/bin/omicron-dev.rs (99%) rename dev-tools/{ => omicron-dev}/tests/output/cmd-omicron-dev-bad-cmd-stderr (100%) rename dev-tools/{ => omicron-dev}/tests/output/cmd-omicron-dev-bad-cmd-stdout (100%) rename dev-tools/{ => omicron-dev}/tests/output/cmd-omicron-dev-db-populate-noargs-stderr (100%) rename dev-tools/{ => omicron-dev}/tests/output/cmd-omicron-dev-db-populate-noargs-stdout (100%) rename dev-tools/{ => omicron-dev}/tests/output/cmd-omicron-dev-db-wipe-noargs-stderr (100%) rename dev-tools/{ => omicron-dev}/tests/output/cmd-omicron-dev-db-wipe-noargs-stdout (100%) rename dev-tools/{ => omicron-dev}/tests/output/cmd-omicron-dev-noargs-stderr (100%) rename dev-tools/{ => omicron-dev}/tests/output/cmd-omicron-dev-noargs-stdout (100%) rename dev-tools/{ => omicron-dev}/tests/test_omicron_dev.rs (100%) rename {xtask => dev-tools/xtask}/Cargo.toml (100%) rename {xtask => dev-tools/xtask}/src/main.rs (100%) delete mode 120000 omdb/tests/config.test.toml diff --git a/Cargo.lock b/Cargo.lock index 02f61b98b1..cfe40e849b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4860,7 +4860,7 @@ dependencies = [ ] [[package]] -name = "omicron-dev-tools" +name = "omicron-dev" version = "0.1.0" dependencies = [ "anyhow", diff --git a/Cargo.toml b/Cargo.toml index 65caa502f8..cadf7dc367 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,9 @@ members = [ "common", "ddm-admin-client", "deploy", - "dev-tools", + "dev-tools/omdb", + "dev-tools/omicron-dev", + "dev-tools/xtask", "dns-server", "dns-service-client", "dpd-client", @@ -37,7 +39,6 @@ members = [ "nexus/test-utils-macros", "nexus/test-utils", "nexus/types", - "omdb", "oxide-client", "oximeter-client", "oximeter/collector", @@ -62,7 +63,6 @@ members = [ "wicket", "wicketd-client", "wicketd", - "xtask", ] default-members = [ @@ -74,7 +74,9 @@ default-members = [ "ddm-admin-client", "dpd-client", "deploy", - "dev-tools", + "dev-tools/omdb", + "dev-tools/omicron-dev", + "dev-tools/xtask", "dns-server", "dns-service-client", "gateway", @@ -98,7 +100,6 @@ default-members = [ "nexus/db-queries", "nexus/defaults", "nexus/types", - "omdb", "oxide-client", "oximeter-client", "oximeter/collector", @@ -123,7 +124,6 @@ default-members = [ "wicket-dbg", "wicketd", "wicketd-client", - "xtask", ] resolver = "2" @@ -234,7 +234,7 @@ nexus-types = { path = "nexus/types" } num-integer = "0.1.45" num = { version = "0.4.1", default-features = false, features = [ "libm" ] } omicron-common = { path = "common" } -omicron-dev-tools = { path = "dev-tools" } +omicron-dev = { path = "dev-tools/omicron-dev" } omicron-gateway = { path = "gateway" } omicron-nexus = { path = "nexus" } omicron-omdb = { path = "omdb" } diff --git a/omdb/Cargo.toml b/dev-tools/omdb/Cargo.toml similarity index 100% rename from omdb/Cargo.toml rename to dev-tools/omdb/Cargo.toml diff --git a/dev-tools/build.rs b/dev-tools/omdb/build.rs similarity index 100% rename from dev-tools/build.rs rename to dev-tools/omdb/build.rs diff --git a/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs similarity index 100% rename from omdb/src/bin/omdb/db.rs rename to dev-tools/omdb/src/bin/omdb/db.rs diff --git a/omdb/src/bin/omdb/main.rs b/dev-tools/omdb/src/bin/omdb/main.rs similarity index 100% rename from omdb/src/bin/omdb/main.rs rename to dev-tools/omdb/src/bin/omdb/main.rs diff --git a/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs similarity index 100% rename from omdb/src/bin/omdb/nexus.rs rename to dev-tools/omdb/src/bin/omdb/nexus.rs diff --git a/omdb/src/bin/omdb/sled_agent.rs b/dev-tools/omdb/src/bin/omdb/sled_agent.rs similarity index 100% rename from omdb/src/bin/omdb/sled_agent.rs rename to dev-tools/omdb/src/bin/omdb/sled_agent.rs diff --git a/dev-tools/omdb/tests/config.test.toml b/dev-tools/omdb/tests/config.test.toml new file mode 120000 index 0000000000..6050ca47dd --- /dev/null +++ b/dev-tools/omdb/tests/config.test.toml @@ -0,0 +1 @@ +../../../nexus/tests/config.test.toml \ No newline at end of file diff --git a/omdb/tests/env.out b/dev-tools/omdb/tests/env.out similarity index 100% rename from omdb/tests/env.out rename to dev-tools/omdb/tests/env.out diff --git a/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out similarity index 100% rename from omdb/tests/successes.out rename to dev-tools/omdb/tests/successes.out diff --git a/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs similarity index 100% rename from omdb/tests/test_all_output.rs rename to dev-tools/omdb/tests/test_all_output.rs diff --git a/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out similarity index 100% rename from omdb/tests/usage_errors.out rename to dev-tools/omdb/tests/usage_errors.out diff --git a/dev-tools/Cargo.toml b/dev-tools/omicron-dev/Cargo.toml similarity index 97% rename from dev-tools/Cargo.toml rename to dev-tools/omicron-dev/Cargo.toml index 4a7c25b337..2061489cbb 100644 --- a/dev-tools/Cargo.toml +++ b/dev-tools/omicron-dev/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "omicron-dev-tools" +name = "omicron-dev" version = "0.1.0" edition = "2021" license = "MPL-2.0" diff --git a/omdb/build.rs b/dev-tools/omicron-dev/build.rs similarity index 100% rename from omdb/build.rs rename to dev-tools/omicron-dev/build.rs diff --git a/dev-tools/src/bin/omicron-dev.rs b/dev-tools/omicron-dev/src/bin/omicron-dev.rs similarity index 99% rename from dev-tools/src/bin/omicron-dev.rs rename to dev-tools/omicron-dev/src/bin/omicron-dev.rs index 70018e9d9b..14617d6ba4 100644 --- a/dev-tools/src/bin/omicron-dev.rs +++ b/dev-tools/omicron-dev/src/bin/omicron-dev.rs @@ -327,7 +327,7 @@ async fn cmd_run_all(args: &RunAllArgs) -> Result<(), anyhow::Error> { let mut signal_stream = signals.fuse(); // Read configuration. - let config_str = include_str!("../../../nexus/examples/config.toml"); + let config_str = include_str!("../../../../nexus/examples/config.toml"); let mut config: omicron_common::nexus_config::Config = toml::from_str(config_str).context("parsing example config")?; config.pkg.log = dropshot::ConfigLogging::File { diff --git a/dev-tools/tests/output/cmd-omicron-dev-bad-cmd-stderr b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-bad-cmd-stderr similarity index 100% rename from dev-tools/tests/output/cmd-omicron-dev-bad-cmd-stderr rename to dev-tools/omicron-dev/tests/output/cmd-omicron-dev-bad-cmd-stderr diff --git a/dev-tools/tests/output/cmd-omicron-dev-bad-cmd-stdout b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-bad-cmd-stdout similarity index 100% rename from dev-tools/tests/output/cmd-omicron-dev-bad-cmd-stdout rename to dev-tools/omicron-dev/tests/output/cmd-omicron-dev-bad-cmd-stdout diff --git a/dev-tools/tests/output/cmd-omicron-dev-db-populate-noargs-stderr b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-populate-noargs-stderr similarity index 100% rename from dev-tools/tests/output/cmd-omicron-dev-db-populate-noargs-stderr rename to dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-populate-noargs-stderr diff --git a/dev-tools/tests/output/cmd-omicron-dev-db-populate-noargs-stdout b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-populate-noargs-stdout similarity index 100% rename from dev-tools/tests/output/cmd-omicron-dev-db-populate-noargs-stdout rename to dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-populate-noargs-stdout diff --git a/dev-tools/tests/output/cmd-omicron-dev-db-wipe-noargs-stderr b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-wipe-noargs-stderr similarity index 100% rename from dev-tools/tests/output/cmd-omicron-dev-db-wipe-noargs-stderr rename to dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-wipe-noargs-stderr diff --git a/dev-tools/tests/output/cmd-omicron-dev-db-wipe-noargs-stdout b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-wipe-noargs-stdout similarity index 100% rename from dev-tools/tests/output/cmd-omicron-dev-db-wipe-noargs-stdout rename to dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-wipe-noargs-stdout diff --git a/dev-tools/tests/output/cmd-omicron-dev-noargs-stderr b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-noargs-stderr similarity index 100% rename from dev-tools/tests/output/cmd-omicron-dev-noargs-stderr rename to dev-tools/omicron-dev/tests/output/cmd-omicron-dev-noargs-stderr diff --git a/dev-tools/tests/output/cmd-omicron-dev-noargs-stdout b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-noargs-stdout similarity index 100% rename from dev-tools/tests/output/cmd-omicron-dev-noargs-stdout rename to dev-tools/omicron-dev/tests/output/cmd-omicron-dev-noargs-stdout diff --git a/dev-tools/tests/test_omicron_dev.rs b/dev-tools/omicron-dev/tests/test_omicron_dev.rs similarity index 100% rename from dev-tools/tests/test_omicron_dev.rs rename to dev-tools/omicron-dev/tests/test_omicron_dev.rs diff --git a/xtask/Cargo.toml b/dev-tools/xtask/Cargo.toml similarity index 100% rename from xtask/Cargo.toml rename to dev-tools/xtask/Cargo.toml diff --git a/xtask/src/main.rs b/dev-tools/xtask/src/main.rs similarity index 100% rename from xtask/src/main.rs rename to dev-tools/xtask/src/main.rs diff --git a/omdb/tests/config.test.toml b/omdb/tests/config.test.toml deleted file mode 120000 index 9a42a12b61..0000000000 --- a/omdb/tests/config.test.toml +++ /dev/null @@ -1 +0,0 @@ -../../nexus/tests/config.test.toml \ No newline at end of file From db0dfa29f7fff93584e657f9f116f7231233ea59 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 22 Sep 2023 07:12:52 -0700 Subject: [PATCH 3/8] Bump dropshot from `99cea06` to `fa728d0` (#4122) --- Cargo.lock | 85 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 52 insertions(+), 33 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cfe40e849b..a82a44cb58 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -853,7 +853,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "599aa35200ffff8f04c1925aa1acc92fa2e08874379ef42e210a80e527e60838" dependencies = [ "serde", - "toml 0.7.6", + "toml 0.7.8", ] [[package]] @@ -1427,7 +1427,7 @@ dependencies = [ "tokio", "tokio-rustls", "tokio-util", - "toml 0.7.6", + "toml 0.7.8", "tracing", "usdt", "uuid", @@ -1482,7 +1482,7 @@ dependencies = [ "tempfile", "thiserror", "tokio-rustls", - "toml 0.7.6", + "toml 0.7.8", "twox-hash", "uuid", "vergen", @@ -1739,7 +1739,7 @@ dependencies = [ "slog", "thiserror", "tokio", - "toml 0.7.6", + "toml 0.7.8", ] [[package]] @@ -2011,7 +2011,7 @@ dependencies = [ "tempfile", "thiserror", "tokio", - "toml 0.7.6", + "toml 0.7.8", "trust-dns-client", "trust-dns-proto", "trust-dns-resolver", @@ -2093,14 +2093,14 @@ dependencies = [ "serde", "serde_json", "slog", - "toml 0.7.6", + "toml 0.7.8", "uuid", ] [[package]] name = "dropshot" version = "0.9.1-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#99cea069c61d1ff5be0ff4f9d338f24bbea93128" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#fa728d07970824fd5f3bd57a3d4dc0fdbea09bfd" dependencies = [ "async-stream", "async-trait", @@ -2116,6 +2116,7 @@ dependencies = [ "http", "hyper", "indexmap 2.0.0", + "multer", "openapiv3", "paste", "percent-encoding", @@ -2135,7 +2136,7 @@ dependencies = [ "slog-term", "tokio", "tokio-rustls", - "toml 0.7.6", + "toml 0.7.8", "usdt", "uuid", "version_check", @@ -2145,7 +2146,7 @@ dependencies = [ [[package]] name = "dropshot_endpoint" version = "0.9.1-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#99cea069c61d1ff5be0ff4f9d338f24bbea93128" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#fa728d07970824fd5f3bd57a3d4dc0fdbea09bfd" dependencies = [ "proc-macro2", "quote", @@ -2272,7 +2273,7 @@ dependencies = [ "russh-keys", "serde_json", "tokio", - "toml 0.7.6", + "toml 0.7.8", "trust-dns-resolver", "uuid", ] @@ -3199,7 +3200,7 @@ dependencies = [ "thiserror", "tlvc 0.3.1 (git+https://github.com/oxidecomputer/tlvc.git)", "tlvc-text", - "toml 0.7.6", + "toml 0.7.8", "x509-cert", "zerocopy 0.6.3", "zip", @@ -3374,7 +3375,7 @@ dependencies = [ "smf", "thiserror", "tokio", - "toml 0.7.6", + "toml 0.7.8", "uuid", "zone", ] @@ -3491,7 +3492,7 @@ dependencies = [ "thiserror", "tokio", "tokio-stream", - "toml 0.7.6", + "toml 0.7.8", "tufaceous-lib", "update-engine", "uuid", @@ -4159,6 +4160,24 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "multer" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01acbdc23469fd8fe07ab135923371d5f5a422fbf9c522158677c8eb15bc51c2" +dependencies = [ + "bytes", + "encoding_rs", + "futures-util", + "http", + "httparse", + "log", + "memchr", + "mime", + "spin 0.9.8", + "version_check", +] + [[package]] name = "nanorand" version = "0.7.0" @@ -4356,7 +4375,7 @@ dependencies = [ "thiserror", "tokio", "tokio-postgres", - "toml 0.7.6", + "toml 0.7.8", "usdt", "uuid", ] @@ -4801,7 +4820,7 @@ dependencies = [ "thiserror", "tokio", "tokio-postgres", - "toml 0.7.6", + "toml 0.7.8", "uuid", ] @@ -4841,7 +4860,7 @@ dependencies = [ "thiserror", "tokio", "tokio-postgres", - "toml 0.7.6", + "toml 0.7.8", "uuid", ] @@ -4856,7 +4875,7 @@ dependencies = [ "serde", "serde_derive", "thiserror", - "toml 0.7.6", + "toml 0.7.8", ] [[package]] @@ -4887,7 +4906,7 @@ dependencies = [ "subprocess", "tokio", "tokio-postgres", - "toml 0.7.6", + "toml 0.7.8", ] [[package]] @@ -4929,7 +4948,7 @@ dependencies = [ "tokio-stream", "tokio-tungstenite 0.18.0", "tokio-util", - "toml 0.7.6", + "toml 0.7.8", "uuid", ] @@ -5037,7 +5056,7 @@ dependencies = [ "thiserror", "tokio", "tokio-postgres", - "toml 0.7.6", + "toml 0.7.8", "tough", "trust-dns-resolver", "usdt", @@ -5110,7 +5129,7 @@ dependencies = [ "tempfile", "thiserror", "tokio", - "toml 0.7.6", + "toml 0.7.8", "topological-sort", "walkdir", ] @@ -5225,7 +5244,7 @@ dependencies = [ "tofino", "tokio", "tokio-tungstenite 0.18.0", - "toml 0.7.6", + "toml 0.7.8", "usdt", "uuid", "zeroize", @@ -5278,7 +5297,7 @@ dependencies = [ "tempfile", "thiserror", "tokio", - "toml 0.7.6", + "toml 0.7.8", "walkdir", ] @@ -5560,7 +5579,7 @@ dependencies = [ "subprocess", "thiserror", "tokio", - "toml 0.7.6", + "toml 0.7.8", "uuid", ] @@ -6464,7 +6483,7 @@ dependencies = [ "tokio", "tokio-tungstenite 0.17.2", "tokio-util", - "toml 0.7.6", + "toml 0.7.8", "usdt", "uuid", ] @@ -6477,7 +6496,7 @@ dependencies = [ "serde", "serde_derive", "thiserror", - "toml 0.7.6", + "toml 0.7.8", ] [[package]] @@ -8143,7 +8162,7 @@ dependencies = [ "sprockets-rot", "thiserror", "tokio", - "toml 0.7.6", + "toml 0.7.8", ] [[package]] @@ -8869,9 +8888,9 @@ dependencies = [ [[package]] name = "toml" -version = "0.7.6" +version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c17e963a819c331dcacd7ab957d80bc2b9a9c1e71c804826d2f283dd65306542" +checksum = "dd79e69d3b627db300ff956027cc6c3798cef26d22526befdfcd12feeb6d2257" dependencies = [ "serde", "serde_spanned", @@ -9145,7 +9164,7 @@ dependencies = [ "sha2", "slog", "tar", - "toml 0.7.6", + "toml 0.7.8", "tough", "url", "zip", @@ -9205,7 +9224,7 @@ version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" dependencies = [ - "cfg-if 1.0.0", + "cfg-if 0.1.10", "rand 0.8.5", "static_assertions", ] @@ -9758,7 +9777,7 @@ dependencies = [ "textwrap 0.16.0", "tokio", "tokio-util", - "toml 0.7.6", + "toml 0.7.8", "toml_edit", "tui-tree-widget", "unicode-width", @@ -9859,7 +9878,7 @@ dependencies = [ "tokio", "tokio-stream", "tokio-util", - "toml 0.7.6", + "toml 0.7.8", "tough", "trust-dns-resolver", "tufaceous", From 9b13247d643a7dd052d91ce109cc758c833f0c8c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 22 Sep 2023 07:13:23 -0700 Subject: [PATCH 4/8] Bump chrono from 0.4.30 to 0.4.31 (#4113) --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a82a44cb58..cd4961fae4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -934,9 +934,9 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.30" +version = "0.4.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "defd4e7873dbddba6c7c91e199c7fcb946abc4a6a4ac3195400bcfb01b5de877" +checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" dependencies = [ "android-tzdata", "iana-time-zone", From 394f5f4e3ab2dd65b620dfca7c671f659165f1c4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 22 Sep 2023 07:22:50 -0700 Subject: [PATCH 5/8] Bump progenitor from `3734e56` to `5c941c0` (#4120) --- Cargo.lock | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cd4961fae4..89cb31c915 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6317,7 +6317,7 @@ dependencies = [ [[package]] name = "progenitor" version = "0.3.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#3734e566108a173ce0cdad2d917bacc85d8090fc" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#5c941c0b41b0235031f3ade33a9c119945f1fd51" dependencies = [ "progenitor-client", "progenitor-impl", @@ -6328,7 +6328,7 @@ dependencies = [ [[package]] name = "progenitor-client" version = "0.3.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#3734e566108a173ce0cdad2d917bacc85d8090fc" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#5c941c0b41b0235031f3ade33a9c119945f1fd51" dependencies = [ "bytes", "futures-core", @@ -6342,7 +6342,7 @@ dependencies = [ [[package]] name = "progenitor-impl" version = "0.3.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#3734e566108a173ce0cdad2d917bacc85d8090fc" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#5c941c0b41b0235031f3ade33a9c119945f1fd51" dependencies = [ "getopts", "heck 0.4.1", @@ -6364,7 +6364,7 @@ dependencies = [ [[package]] name = "progenitor-macro" version = "0.3.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#3734e566108a173ce0cdad2d917bacc85d8090fc" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#5c941c0b41b0235031f3ade33a9c119945f1fd51" dependencies = [ "openapiv3", "proc-macro2", @@ -9238,7 +9238,7 @@ checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" [[package]] name = "typify" version = "0.0.13" -source = "git+https://github.com/oxidecomputer/typify#6cd9a7164a83169945be0bf697a43e6a7488fa31" +source = "git+https://github.com/oxidecomputer/typify#de16c4238a2b34400d0fece086a6469951c3236b" dependencies = [ "typify-impl", "typify-macro", @@ -9247,7 +9247,7 @@ dependencies = [ [[package]] name = "typify-impl" version = "0.0.13" -source = "git+https://github.com/oxidecomputer/typify#6cd9a7164a83169945be0bf697a43e6a7488fa31" +source = "git+https://github.com/oxidecomputer/typify#de16c4238a2b34400d0fece086a6469951c3236b" dependencies = [ "heck 0.4.1", "log", @@ -9264,7 +9264,7 @@ dependencies = [ [[package]] name = "typify-macro" version = "0.0.13" -source = "git+https://github.com/oxidecomputer/typify#6cd9a7164a83169945be0bf697a43e6a7488fa31" +source = "git+https://github.com/oxidecomputer/typify#de16c4238a2b34400d0fece086a6469951c3236b" dependencies = [ "proc-macro2", "quote", @@ -9305,9 +9305,9 @@ checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" [[package]] name = "unicode-ident" -version = "1.0.11" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "unicode-linebreak" From 014387780fc9df84607dda172b402e1bf010186d Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Fri, 22 Sep 2023 11:30:14 -0700 Subject: [PATCH 6/8] omdb could be easier to configure (#4133) --- Cargo.lock | 1 + dev-tools/omdb/Cargo.toml | 1 + dev-tools/omdb/src/bin/omdb/db.rs | 38 ++++-- dev-tools/omdb/src/bin/omdb/main.rs | 102 ++++++++++++++-- dev-tools/omdb/src/bin/omdb/nexus.rs | 33 +++-- dev-tools/omdb/src/bin/omdb/sled_agent.rs | 14 ++- dev-tools/omdb/tests/env.out | 115 ++++++++++++++++++ dev-tools/omdb/tests/successes.out | 8 ++ dev-tools/omdb/tests/test_all_output.rs | 53 ++++++-- dev-tools/omdb/tests/usage_errors.out | 73 +++++++++-- dns-server/src/dns_server.rs | 4 +- dns-server/src/lib.rs | 2 +- dns-server/tests/basic_test.rs | 2 +- internal-dns-cli/src/bin/dnswait.rs | 2 +- internal-dns/src/config.rs | 4 +- internal-dns/src/names.rs | 2 +- internal-dns/src/resolver.rs | 42 +++---- nexus/src/context.rs | 13 +- nexus/test-utils/src/lib.rs | 7 +- nexus/tests/integration_tests/certificates.rs | 2 +- .../tests/integration_tests/initialization.rs | 4 +- 21 files changed, 423 insertions(+), 99 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 89cb31c915..5037950172 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5075,6 +5075,7 @@ dependencies = [ "dropshot", "expectorate", "humantime", + "internal-dns 0.1.0", "nexus-client 0.1.0", "nexus-db-model", "nexus-db-queries", diff --git a/dev-tools/omdb/Cargo.toml b/dev-tools/omdb/Cargo.toml index 0bf4ee17b7..c9ebbe35ad 100644 --- a/dev-tools/omdb/Cargo.toml +++ b/dev-tools/omdb/Cargo.toml @@ -15,6 +15,7 @@ clap.workspace = true diesel.workspace = true dropshot.workspace = true humantime.workspace = true +internal-dns.workspace = true nexus-client.workspace = true nexus-db-model.workspace = true nexus-db-queries.workspace = true diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 0ebf16d46b..74b8dffedc 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -12,6 +12,7 @@ //! would be the only consumer -- and in that case it's okay to query the //! database directly. +use crate::Omdb; use anyhow::anyhow; use anyhow::bail; use anyhow::Context; @@ -56,7 +57,7 @@ pub struct DbArgs { /// limit to apply to queries that fetch rows #[clap( long = "fetch-limit", - default_value_t = NonZeroU32::new(100).unwrap() + default_value_t = NonZeroU32::new(500).unwrap() )] fetch_limit: NonZeroU32, @@ -131,17 +132,38 @@ enum ServicesCommands { impl DbArgs { /// Run a `omdb db` subcommand. - pub async fn run_cmd( + pub(crate) async fn run_cmd( &self, + omdb: &Omdb, log: &slog::Logger, ) -> Result<(), anyhow::Error> { - // This is a little goofy. The database URL is required, but can come - // from the environment, in which case it won't be on the command line. - let Some(db_url) = &self.db_url else { - bail!( - "database URL must be specified with --db-url or OMDB_DB_URL" - ); + let db_url = match &self.db_url { + Some(cli_or_env_url) => cli_or_env_url.clone(), + None => { + eprintln!( + "note: database URL not specified. Will search DNS." + ); + eprintln!("note: (override with --db-url or OMDB_DB_URL)"); + let addrs = omdb + .dns_lookup_all( + log.clone(), + internal_dns::ServiceName::Cockroach, + ) + .await?; + + format!( + "postgresql://root@{}/omicron?sslmode=disable", + addrs + .into_iter() + .map(|a| a.to_string()) + .collect::>() + .join(",") + ) + .parse() + .context("failed to parse constructed postgres URL")? + } }; + eprintln!("note: using database URL {}", &db_url); let db_config = db::Config { url: db_url.clone() }; let pool = Arc::new(db::Pool::new(&log.clone(), &db_config)); diff --git a/dev-tools/omdb/src/bin/omdb/main.rs b/dev-tools/omdb/src/bin/omdb/main.rs index d0c47f8e2a..166ed3043f 100644 --- a/dev-tools/omdb/src/bin/omdb/main.rs +++ b/dev-tools/omdb/src/bin/omdb/main.rs @@ -13,7 +13,21 @@ //! But at this stage we'd rather have tools that work on latest than not //! have them because we couldn't prioritize keeping them stable. //! -//! 2. Where possible, when the tool encounters something unexpected, it should +//! 2. Debuggers should never lie! Documentation and command names should be +//! precise about what they're reporting. In a working system, these things +//! might all be the same: +//! +//! - the list of instances with zones and propolis processes running on +//! a sled +//! - the list of instances that sled agent knows about +//! - the list of instances that Nexus or the database reports should be +//! running on a sled +//! +//! But in a broken system, these things might be all different. People use +//! debuggers to understand broken systems. The debugger should say which of +//! these it's reporting, rather than "the list of instances on a sled". +//! +//! 3. Where possible, when the tool encounters something unexpected, it should //! print what it can (including the error message and bad data) and then //! continue. It generally shouldn't stop on the first error. (We often //! find strange things when debugging but we need our tools to tell us as @@ -22,6 +36,9 @@ use anyhow::Context; use clap::Parser; use clap::Subcommand; +use omicron_common::address::Ipv6Subnet; +use std::net::SocketAddr; +use std::net::SocketAddrV6; mod db; mod nexus; @@ -31,14 +48,16 @@ mod sled_agent; async fn main() -> Result<(), anyhow::Error> { let args = Omdb::parse(); - let log = dropshot::ConfigLogging::StderrTerminal { level: args.log_level } - .to_logger("omdb") - .context("failed to create logger")?; + let log = dropshot::ConfigLogging::StderrTerminal { + level: args.log_level.clone(), + } + .to_logger("omdb") + .context("failed to create logger")?; - match args.command { - OmdbCommands::Db(db) => db.run_cmd(&log).await, - OmdbCommands::Nexus(nexus) => nexus.run_cmd(&log).await, - OmdbCommands::SledAgent(sled) => sled.run_cmd(&log).await, + match &args.command { + OmdbCommands::Db(db) => db.run_cmd(&args, &log).await, + OmdbCommands::Nexus(nexus) => nexus.run_cmd(&args, &log).await, + OmdbCommands::SledAgent(sled) => sled.run_cmd(&args, &log).await, } } @@ -58,10 +77,77 @@ struct Omdb { )] log_level: dropshot::ConfigLoggingLevel, + #[arg(env = "OMDB_DNS_SERVER", long)] + dns_server: Option, + #[command(subcommand)] command: OmdbCommands, } +impl Omdb { + async fn dns_lookup_all( + &self, + log: slog::Logger, + service_name: internal_dns::ServiceName, + ) -> Result, anyhow::Error> { + let resolver = self.dns_resolver(log).await?; + resolver + .lookup_all_socket_v6(service_name) + .await + .with_context(|| format!("looking up {:?} in DNS", service_name)) + } + + async fn dns_resolver( + &self, + log: slog::Logger, + ) -> Result { + match &self.dns_server { + Some(dns_server) => { + internal_dns::resolver::Resolver::new_from_addrs( + log, + &[*dns_server], + ) + .with_context(|| { + format!( + "creating DNS resolver for DNS server {:?}", + dns_server + ) + }) + } + None => { + // In principle, we should look at /etc/resolv.conf to find the + // DNS servers. In practice, this usually isn't populated + // today. See oxidecomputer/omicron#2122. + // + // However, the address selected below should work for most + // existing Omicron deployments today. That's because while the + // base subnet is in principle configurable in config-rss.toml, + // it's very uncommon to change it from the default value used + // here. + // + // Yet another option would be to find a local IP address that + // looks like it's probably on the underlay network and use that + // to find the subnet to use. But again, this is unlikely to be + // wrong and it's easy to override. + let subnet = + Ipv6Subnet::new("fd00:1122:3344:0100::".parse().unwrap()); + eprintln!("note: using DNS server for subnet {}", subnet.net()); + eprintln!( + "note: (if this is not right, use --dns-server \ + to specify an alternate DNS server)", + ); + internal_dns::resolver::Resolver::new_from_subnet(log, subnet) + .with_context(|| { + format!( + "creating DNS resolver for subnet {}", + subnet.net() + ) + }) + } + } + } +} + #[derive(Debug, Subcommand)] #[allow(clippy::large_enum_variant)] enum OmdbCommands { diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index 531158ab16..7599fc209d 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -4,7 +4,7 @@ //! omdb commands that query or update specific Nexus instances -use anyhow::bail; +use crate::Omdb; use anyhow::Context; use chrono::SecondsFormat; use chrono::Utc; @@ -55,19 +55,32 @@ enum BackgroundTasksCommands { impl NexusArgs { /// Run a `omdb nexus` subcommand. - pub async fn run_cmd( + pub(crate) async fn run_cmd( &self, + omdb: &Omdb, log: &slog::Logger, ) -> Result<(), anyhow::Error> { - // This is a little goofy. The nexus URL is required, but can come - // from the environment, in which case it won't be on the command line. - let Some(nexus_url) = &self.nexus_internal_url else { - bail!( - "nexus URL must be specified with --nexus-internal-url or \ - OMDB_NEXUS_URL" - ); + let nexus_url = match &self.nexus_internal_url { + Some(cli_or_env_url) => cli_or_env_url.clone(), + None => { + eprintln!( + "note: Nexus URL not specified. Will pick one from DNS." + ); + let addrs = omdb + .dns_lookup_all( + log.clone(), + internal_dns::ServiceName::Nexus, + ) + .await?; + let addr = addrs.into_iter().next().expect( + "expected at least one Nexus address from \ + successful DNS lookup", + ); + format!("http://{}", addr) + } }; - let client = nexus_client::Client::new(nexus_url, log.clone()); + eprintln!("note: using Nexus URL {}", &nexus_url); + let client = nexus_client::Client::new(&nexus_url, log.clone()); match &self.command { NexusCommands::BackgroundTasks(BackgroundTasksArgs { diff --git a/dev-tools/omdb/src/bin/omdb/sled_agent.rs b/dev-tools/omdb/src/bin/omdb/sled_agent.rs index 09f34936ca..c413a2ba43 100644 --- a/dev-tools/omdb/src/bin/omdb/sled_agent.rs +++ b/dev-tools/omdb/src/bin/omdb/sled_agent.rs @@ -4,12 +4,13 @@ //! omdb commands that query or update specific Sleds +use crate::Omdb; use anyhow::bail; use anyhow::Context; use clap::Args; use clap::Subcommand; -/// Arguments to the "omdb sled" subcommand +/// Arguments to the "omdb sled-agent" subcommand #[derive(Debug, Args)] pub struct SledAgentArgs { /// URL of the Sled internal API @@ -20,7 +21,7 @@ pub struct SledAgentArgs { command: SledAgentCommands, } -/// Subcommands for the "omdb sled" subcommand +/// Subcommands for the "omdb sled-agent" subcommand #[derive(Debug, Subcommand)] enum SledAgentCommands { /// print information about zones @@ -45,9 +46,10 @@ enum ZpoolCommands { } impl SledAgentArgs { - /// Run a `omdb sled` subcommand. - pub async fn run_cmd( + /// Run a `omdb sled-agent` subcommand. + pub(crate) async fn run_cmd( &self, + _omdb: &Omdb, log: &slog::Logger, ) -> Result<(), anyhow::Error> { // This is a little goofy. The sled URL is required, but can come @@ -72,7 +74,7 @@ impl SledAgentArgs { } } -/// Runs `omdb sled zones list` +/// Runs `omdb sled-agent zones list` async fn cmd_zones_list( client: &sled_agent_client::Client, ) -> Result<(), anyhow::Error> { @@ -91,7 +93,7 @@ async fn cmd_zones_list( Ok(()) } -/// Runs `omdb sled zpools list` +/// Runs `omdb sled-agent zpools list` async fn cmd_zpools_list( client: &sled_agent_client::Client, ) -> Result<(), anyhow::Error> { diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index 237743be93..e7a50da935 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -6,6 +6,7 @@ SERIAL IP ROLE ID sim-b6d65341 [::1]:REDACTED_PORT - REDACTED_UUID_REDACTED_UUID_REDACTED --------------------------------------------- stderr: +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable note: database schema version matches expected (4.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "--db-url", "junk", "sleds"] @@ -58,6 +59,7 @@ task: "external_endpoints" --------------------------------------------- stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT ============================================= EXECUTING COMMAND: omdb ["nexus", "--nexus-internal-url", "junk", "background-tasks", "doc"] termination: Exited(1) @@ -65,6 +67,7 @@ termination: Exited(1) stdout: --------------------------------------------- stderr: +note: using Nexus URL junk Error: listing background tasks Caused by: @@ -72,3 +75,115 @@ Caused by: 1: builder error: relative URL without a base 2: relative URL without a base ============================================= +EXECUTING COMMAND: omdb ["nexus", "background-tasks", "doc"] +termination: Exited(0) +--------------------------------------------- +stdout: +task: "dns_config_external" + watches external DNS data stored in CockroachDB + + +task: "dns_config_internal" + watches internal DNS data stored in CockroachDB + + +task: "dns_propagation_external" + propagates latest external DNS configuration (from "dns_config_external" + background task) to the latest list of DNS servers (from + "dns_servers_external" background task) + + +task: "dns_propagation_internal" + propagates latest internal DNS configuration (from "dns_config_internal" + background task) to the latest list of DNS servers (from + "dns_servers_internal" background task) + + +task: "dns_servers_external" + watches list of external DNS servers stored in CockroachDB + + +task: "dns_servers_internal" + watches list of internal DNS servers stored in CockroachDB + + +task: "external_endpoints" + reads config for silos and TLS certificates to determine the right set of + HTTP endpoints, their HTTP server names, and which TLS certificates to use + on each one + + +--------------------------------------------- +stderr: +note: Nexus URL not specified. Will pick one from DNS. +note: using Nexus URL http://[::ffff:127.0.0.1]:REDACTED_PORT +============================================= +EXECUTING COMMAND: omdb ["--dns-server", "[::1]:REDACTED_PORT", "nexus", "background-tasks", "doc"] +termination: Exited(0) +--------------------------------------------- +stdout: +task: "dns_config_external" + watches external DNS data stored in CockroachDB + + +task: "dns_config_internal" + watches internal DNS data stored in CockroachDB + + +task: "dns_propagation_external" + propagates latest external DNS configuration (from "dns_config_external" + background task) to the latest list of DNS servers (from + "dns_servers_external" background task) + + +task: "dns_propagation_internal" + propagates latest internal DNS configuration (from "dns_config_internal" + background task) to the latest list of DNS servers (from + "dns_servers_internal" background task) + + +task: "dns_servers_external" + watches list of external DNS servers stored in CockroachDB + + +task: "dns_servers_internal" + watches list of internal DNS servers stored in CockroachDB + + +task: "external_endpoints" + reads config for silos and TLS certificates to determine the right set of + HTTP endpoints, their HTTP server names, and which TLS certificates to use + on each one + + +--------------------------------------------- +stderr: +note: Nexus URL not specified. Will pick one from DNS. +note: using Nexus URL http://[::ffff:127.0.0.1]:REDACTED_PORT +============================================= +EXECUTING COMMAND: omdb ["db", "sleds"] +termination: Exited(0) +--------------------------------------------- +stdout: +SERIAL IP ROLE ID +sim-b6d65341 [::1]:REDACTED_PORT - REDACTED_UUID_REDACTED_UUID_REDACTED +--------------------------------------------- +stderr: +note: database URL not specified. Will search DNS. +note: (override with --db-url or OMDB_DB_URL) +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable +note: database schema version matches expected (4.0.0) +============================================= +EXECUTING COMMAND: omdb ["--dns-server", "[::1]:REDACTED_PORT", "db", "sleds"] +termination: Exited(0) +--------------------------------------------- +stdout: +SERIAL IP ROLE ID +sim-b6d65341 [::1]:REDACTED_PORT - REDACTED_UUID_REDACTED_UUID_REDACTED +--------------------------------------------- +stderr: +note: database URL not specified. Will search DNS. +note: (override with --db-url or OMDB_DB_URL) +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable +note: database schema version matches expected (4.0.0) +============================================= diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 90e399f246..7532e9b61e 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -7,6 +7,7 @@ internal control-plane.oxide.internal 1 rack setup external oxide-dev.test 2 create silo: "test-suite-silo" --------------------------------------------- stderr: +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable note: database schema version matches expected (4.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "dns", "diff", "external", "2"] @@ -22,6 +23,7 @@ changes: names added: 1, names removed: 0 + test-suite-silo.sys A 127.0.0.1 --------------------------------------------- stderr: +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable note: database schema version matches expected (4.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "dns", "names", "external", "2"] @@ -33,6 +35,7 @@ External zone: oxide-dev.test test-suite-silo.sys A 127.0.0.1 --------------------------------------------- stderr: +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable note: database schema version matches expected (4.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "services", "list-instances"] @@ -48,6 +51,7 @@ InternalDns REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT Nexus REDACTED_UUID_REDACTED_UUID_REDACTED [::ffff:127.0.0.1]:REDACTED_PORT sim-b6d65341 --------------------------------------------- stderr: +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable note: database schema version matches expected (4.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "services", "list-by-sled"] @@ -66,6 +70,7 @@ sled: sim-b6d65341 (id REDACTED_UUID_REDACTED_UUID_REDACTED) --------------------------------------------- stderr: +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable note: database schema version matches expected (4.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "sleds"] @@ -76,6 +81,7 @@ SERIAL IP ROLE ID sim-b6d65341 [::1]:REDACTED_PORT - REDACTED_UUID_REDACTED_UUID_REDACTED --------------------------------------------- stderr: +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable note: database schema version matches expected (4.0.0) ============================================= EXECUTING COMMAND: omdb ["nexus", "background-tasks", "doc"] @@ -118,6 +124,7 @@ task: "external_endpoints" --------------------------------------------- stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ ============================================= EXECUTING COMMAND: omdb ["nexus", "background-tasks", "show"] termination: Exited(0) @@ -198,4 +205,5 @@ task: "external_endpoints" --------------------------------------------- stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ ============================================= diff --git a/dev-tools/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs index 031bc225a6..0eddcb492c 100644 --- a/dev-tools/omdb/tests/test_all_output.rs +++ b/dev-tools/omdb/tests/test_all_output.rs @@ -38,12 +38,14 @@ async fn test_omdb_usage_errors() { &["db"], &["db", "--help"], &["db", "dns"], - &["db", "dns", "show"], &["db", "dns", "diff"], &["db", "dns", "names"], &["db", "services"], &["nexus"], &["nexus", "background-tasks"], + &["sled-agent"], + &["sled-agent", "zones"], + &["sled-agent", "zpools"], ]; for args in invocations { @@ -69,6 +71,9 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { &["db", "sleds"], &["nexus", "background-tasks", "doc"], &["nexus", "background-tasks", "show"], + // We can't easily test the sled agent output because that's only + // provided by a real sled agent, which is not available in the + // ControlPlaneTestContext. ]; for args in invocations { @@ -102,6 +107,7 @@ async fn test_omdb_env_settings(cptestctx: &ControlPlaneTestContext) { let postgres_url = cptestctx.database.listen_url().to_string(); let nexus_internal_url = format!("http://{}", cptestctx.internal_client.bind_address); + let dns_sockaddr = cptestctx.internal_dns.dns_server.local_address(); let mut output = String::new(); // Database URL @@ -109,12 +115,12 @@ async fn test_omdb_env_settings(cptestctx: &ControlPlaneTestContext) { let args = &["db", "--db-url", &postgres_url, "sleds"]; do_run(&mut output, |exec| exec, &cmd_path, args).await; - // Case 2: specified in both places. + // Case 2: specified in multiple places (command-line argument wins) let args = &["db", "--db-url", "junk", "sleds"]; let p = postgres_url.clone(); do_run( &mut output, - move |exec| exec.env("ODMB_DB_URL", &p), + move |exec| exec.env("OMDB_DB_URL", &p), &cmd_path, args, ) @@ -131,18 +137,51 @@ async fn test_omdb_env_settings(cptestctx: &ControlPlaneTestContext) { ]; do_run(&mut output, |exec| exec, &cmd_path.clone(), args).await; - // Case 2: specified in both places. + // Case 2: specified in multiple places (command-line argument wins) let args = &["nexus", "--nexus-internal-url", "junk", "background-tasks", "doc"]; - let p = postgres_url.clone(); + let n = nexus_internal_url.clone(); + do_run( + &mut output, + move |exec| exec.env("OMDB_NEXUS_URL", &n), + &cmd_path, + args, + ) + .await; + + // Verify that if you provide a working internal DNS server, you can omit + // the URLs. That's true regardless of whether you pass it on the command + // line or via an environment variable. + let args = &["nexus", "background-tasks", "doc"]; do_run( &mut output, - move |exec| exec.env("ODMB_DB_URL", &p), + move |exec| exec.env("OMDB_DNS_SERVER", dns_sockaddr.to_string()), &cmd_path, args, ) .await; + let args = &[ + "--dns-server", + &dns_sockaddr.to_string(), + "nexus", + "background-tasks", + "doc", + ]; + do_run(&mut output, move |exec| exec, &cmd_path, args).await; + + let args = &["db", "sleds"]; + do_run( + &mut output, + move |exec| exec.env("OMDB_DNS_SERVER", dns_sockaddr.to_string()), + &cmd_path, + args, + ) + .await; + + let args = &["--dns-server", &dns_sockaddr.to_string(), "db", "sleds"]; + do_run(&mut output, move |exec| exec, &cmd_path, args).await; + assert_contents("tests/env.out", &output); } @@ -194,7 +233,7 @@ async fn do_run( output.push_str(&redact_variable(&stdout_text)); write!(output, "---------------------------------------------\n").unwrap(); write!(output, "stderr:\n").unwrap(); - output.push_str(&stderr_text); + output.push_str(&redact_variable(&stderr_text)); write!(output, "=============================================\n").unwrap(); } diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out index b381ab46c1..92d0212fb4 100644 --- a/dev-tools/omdb/tests/usage_errors.out +++ b/dev-tools/omdb/tests/usage_errors.out @@ -15,8 +15,9 @@ Commands: help Print this message or the help of the given subcommand(s) Options: - --log-level log level filter [env: LOG_LEVEL=] [default: warn] - -h, --help Print help (see more with '--help') + --log-level log level filter [env: LOG_LEVEL=] [default: warn] + --dns-server [env: OMDB_DNS_SERVER=] + -h, --help Print help (see more with '--help') ============================================= EXECUTING COMMAND: omdb ["--help"] termination: Exited(0) @@ -42,6 +43,9 @@ Options: [env: LOG_LEVEL=] [default: warn] + --dns-server + [env: OMDB_DNS_SERVER=] + -h, --help Print help (see a summary with '-h') --------------------------------------------- @@ -89,7 +93,7 @@ Commands: Options: --db-url URL of the database SQL interface [env: OMDB_DB_URL=] - --fetch-limit limit to apply to queries that fetch rows [default: 100] + --fetch-limit limit to apply to queries that fetch rows [default: 500] -h, --help Print help ============================================= EXECUTING COMMAND: omdb ["db", "--help"] @@ -108,7 +112,7 @@ Commands: Options: --db-url URL of the database SQL interface [env: OMDB_DB_URL=] - --fetch-limit limit to apply to queries that fetch rows [default: 100] + --fetch-limit limit to apply to queries that fetch rows [default: 500] -h, --help Print help --------------------------------------------- stderr: @@ -132,14 +136,6 @@ Commands: Options: -h, --help Print help ============================================= -EXECUTING COMMAND: omdb ["db", "dns", "show"] -termination: Exited(1) ---------------------------------------------- -stdout: ---------------------------------------------- -stderr: -Error: database URL must be specified with --db-url or OMDB_DB_URL -============================================= EXECUTING COMMAND: omdb ["db", "dns", "diff"] termination: Exited(2) --------------------------------------------- @@ -224,3 +220,56 @@ Commands: Options: -h, --help Print help ============================================= +EXECUTING COMMAND: omdb ["sled-agent"] +termination: Exited(2) +--------------------------------------------- +stdout: +--------------------------------------------- +stderr: +Debug a specific Sled + +Usage: omdb sled-agent [OPTIONS] + +Commands: + zones print information about zones + zpools print information about zpools + help Print this message or the help of the given subcommand(s) + +Options: + --sled-agent-url URL of the Sled internal API [env: OMDB_SLED_AGENT_URL=] + -h, --help Print help +============================================= +EXECUTING COMMAND: omdb ["sled-agent", "zones"] +termination: Exited(2) +--------------------------------------------- +stdout: +--------------------------------------------- +stderr: +print information about zones + +Usage: omdb sled-agent zones + +Commands: + list Print list of all running control plane zones + help Print this message or the help of the given subcommand(s) + +Options: + -h, --help Print help +============================================= +EXECUTING COMMAND: omdb ["sled-agent", "zpools"] +termination: Exited(2) +--------------------------------------------- +stdout: +--------------------------------------------- +stderr: +print information about zpools + +Usage: omdb sled-agent zpools + +Commands: + list Print list of all zpools managed by the sled agent + help Print this message or the help of the given subcommand(s) + +Options: + -h, --help Print help +============================================= diff --git a/dns-server/src/dns_server.rs b/dns-server/src/dns_server.rs index a857538531..01a8430b62 100644 --- a/dns-server/src/dns_server.rs +++ b/dns-server/src/dns_server.rs @@ -56,8 +56,8 @@ impl Drop for ServerHandle { } impl ServerHandle { - pub fn local_address(&self) -> &SocketAddr { - &self.local_address + pub fn local_address(&self) -> SocketAddr { + self.local_address } } diff --git a/dns-server/src/lib.rs b/dns-server/src/lib.rs index d6dd75b4bd..ea8625a667 100644 --- a/dns-server/src/lib.rs +++ b/dns-server/src/lib.rs @@ -164,7 +164,7 @@ impl TransientServer { pub async fn resolver(&self) -> Result { let mut resolver_config = ResolverConfig::new(); resolver_config.add_name_server(NameServerConfig { - socket_addr: *self.dns_server.local_address(), + socket_addr: self.dns_server.local_address(), protocol: Protocol::Udp, tls_dns_name: None, trust_nx_responses: false, diff --git a/dns-server/tests/basic_test.rs b/dns-server/tests/basic_test.rs index 0c10fd75c9..98cd1487ab 100644 --- a/dns-server/tests/basic_test.rs +++ b/dns-server/tests/basic_test.rs @@ -375,7 +375,7 @@ async fn init_client_server( let mut rc = ResolverConfig::new(); rc.add_name_server(NameServerConfig { - socket_addr: *dns_server.local_address(), + socket_addr: dns_server.local_address(), protocol: Protocol::Udp, tls_dns_name: None, trust_nx_responses: false, diff --git a/internal-dns-cli/src/bin/dnswait.rs b/internal-dns-cli/src/bin/dnswait.rs index df4832f346..9e003ed14f 100644 --- a/internal-dns-cli/src/bin/dnswait.rs +++ b/internal-dns-cli/src/bin/dnswait.rs @@ -72,7 +72,7 @@ async fn main() -> Result<()> { } else { let addrs = opt.nameserver_addresses; info!(&log, "using explicit nameservers"; "nameservers" => ?addrs); - Resolver::new_from_addrs(log.clone(), addrs) + Resolver::new_from_addrs(log.clone(), &addrs) .context("creating resolver with explicit nameserver addresses")? }; diff --git a/internal-dns/src/config.rs b/internal-dns/src/config.rs index 5572e193dc..e5272cd23a 100644 --- a/internal-dns/src/config.rs +++ b/internal-dns/src/config.rs @@ -281,7 +281,7 @@ impl DnsConfigBuilder { let set = self .service_instances_zones - .entry(service.clone()) + .entry(service) .or_insert_with(BTreeMap::new); match set.insert(zone.clone(), port) { None => Ok(()), @@ -320,7 +320,7 @@ impl DnsConfigBuilder { let set = self .service_instances_sleds - .entry(service.clone()) + .entry(service) .or_insert_with(BTreeMap::new); let sled_id = sled.0; match set.insert(sled.clone(), port) { diff --git a/internal-dns/src/names.rs b/internal-dns/src/names.rs index 663e04bcd9..44ed9228e2 100644 --- a/internal-dns/src/names.rs +++ b/internal-dns/src/names.rs @@ -14,7 +14,7 @@ pub const DNS_ZONE: &str = "control-plane.oxide.internal"; pub const DNS_ZONE_EXTERNAL_TESTING: &str = "oxide-dev.test"; /// Names of services within the control plane -#[derive(Clone, Debug, Hash, Eq, Ord, PartialEq, PartialOrd)] +#[derive(Clone, Copy, Debug, Hash, Eq, Ord, PartialEq, PartialOrd)] pub enum ServiceName { Clickhouse, ClickhouseKeeper, diff --git a/internal-dns/src/resolver.rs b/internal-dns/src/resolver.rs index 68065db36d..114333cb61 100644 --- a/internal-dns/src/resolver.rs +++ b/internal-dns/src/resolver.rs @@ -55,13 +55,13 @@ impl Resolver { /// Construct a new DNS resolver from specific DNS server addresses. pub fn new_from_addrs( log: slog::Logger, - dns_addrs: Vec, + dns_addrs: &[SocketAddr], ) -> Result { info!(log, "new DNS resolver"; "addresses" => ?dns_addrs); let mut rc = ResolverConfig::new(); let dns_server_count = dns_addrs.len(); - for socket_addr in dns_addrs.into_iter() { + for &socket_addr in dns_addrs.into_iter() { rc.add_name_server(NameServerConfig { socket_addr, protocol: Protocol::Udp, @@ -137,7 +137,7 @@ impl Resolver { subnet: Ipv6Subnet, ) -> Result { let dns_ips = Self::servers_from_subnet(subnet); - Resolver::new_from_addrs(log, dns_ips) + Resolver::new_from_addrs(log, &dns_ips) } /// Remove all entries from the resolver's cache. @@ -455,7 +455,7 @@ mod test { } fn dns_server_address(&self) -> SocketAddr { - *self.dns_server.local_address() + self.dns_server.local_address() } fn cleanup_successful(mut self) { @@ -465,7 +465,7 @@ mod test { fn resolver(&self) -> anyhow::Result { let log = self.log.new(o!("component" => "DnsResolver")); - Resolver::new_from_addrs(log, vec![self.dns_server_address()]) + Resolver::new_from_addrs(log, &[self.dns_server_address()]) .context("creating resolver for test DNS server") } @@ -591,7 +591,7 @@ mod test { let zone = dns_builder.host_zone(Uuid::new_v4(), *db_ip.ip()).unwrap(); dns_builder - .service_backend_zone(srv_crdb.clone(), &zone, db_ip.port()) + .service_backend_zone(srv_crdb, &zone, db_ip.port()) .unwrap(); } @@ -599,21 +599,13 @@ mod test { .host_zone(Uuid::new_v4(), *clickhouse_addr.ip()) .unwrap(); dns_builder - .service_backend_zone( - srv_clickhouse.clone(), - &zone, - clickhouse_addr.port(), - ) + .service_backend_zone(srv_clickhouse, &zone, clickhouse_addr.port()) .unwrap(); let zone = dns_builder.host_zone(Uuid::new_v4(), *crucible_addr.ip()).unwrap(); dns_builder - .service_backend_zone( - srv_backend.clone(), - &zone, - crucible_addr.port(), - ) + .service_backend_zone(srv_backend, &zone, crucible_addr.port()) .unwrap(); let mut dns_config = dns_builder.build(); @@ -694,9 +686,7 @@ mod test { let ip1 = Ipv6Addr::from_str("ff::01").unwrap(); let zone = dns_builder.host_zone(Uuid::new_v4(), ip1).unwrap(); let srv_crdb = ServiceName::Cockroach; - dns_builder - .service_backend_zone(srv_crdb.clone(), &zone, 12345) - .unwrap(); + dns_builder.service_backend_zone(srv_crdb, &zone, 12345).unwrap(); let dns_config = dns_builder.build(); dns_server.update(&dns_config).await.unwrap(); let found_ip = resolver @@ -711,9 +701,7 @@ mod test { let ip2 = Ipv6Addr::from_str("ee::02").unwrap(); let zone = dns_builder.host_zone(Uuid::new_v4(), ip2).unwrap(); let srv_crdb = ServiceName::Cockroach; - dns_builder - .service_backend_zone(srv_crdb.clone(), &zone, 54321) - .unwrap(); + dns_builder.service_backend_zone(srv_crdb, &zone, 54321).unwrap(); let mut dns_config = dns_builder.build(); dns_config.generation += 1; dns_server.update(&dns_config).await.unwrap(); @@ -802,7 +790,7 @@ mod test { let dns_server = DnsServer::create(&logctx.log).await; let resolver = Resolver::new_from_addrs( logctx.log.clone(), - vec![*dns_server.dns_server.local_address()], + &[dns_server.dns_server.local_address()], ) .unwrap(); @@ -879,9 +867,9 @@ mod test { let dns_server2 = DnsServer::create(&logctx.log).await; let resolver = Resolver::new_from_addrs( logctx.log.clone(), - vec![ - *dns_server1.dns_server.local_address(), - *dns_server2.dns_server.local_address(), + &[ + dns_server1.dns_server.local_address(), + dns_server2.dns_server.local_address(), ], ) .unwrap(); @@ -955,7 +943,7 @@ mod test { let dns_server = DnsServer::create(&logctx.log).await; let resolver = Resolver::new_from_addrs( logctx.log.clone(), - vec![*dns_server.dns_server.local_address()], + &[dns_server.dns_server.local_address()], ) .unwrap(); diff --git a/nexus/src/context.rs b/nexus/src/context.rs index 73125cc617..7fd0a33c30 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -158,7 +158,7 @@ impl ServerContext { internal_dns::resolver::Resolver::new_from_addrs( log.new(o!("component" => "DnsResolver")), - vec![address], + &[address], ) .map_err(|e| format!("Failed to create DNS resolver: {}", e))? } @@ -169,11 +169,12 @@ impl ServerContext { nexus_config::Database::FromUrl { url } => url.clone(), nexus_config::Database::FromDns => { info!(log, "Accessing DB url from DNS"); - // It's been requested but unfortunately not supported to directly - // connect using SRV based lookup. - // TODO-robustness: the set of cockroachdb hosts we'll use will be - // fixed to whatever we got back from DNS at Nexus start. This means - // a new cockroachdb instance won't picked up until Nexus restarts. + // It's been requested but unfortunately not supported to + // directly connect using SRV based lookup. + // TODO-robustness: the set of cockroachdb hosts we'll use will + // be fixed to whatever we got back from DNS at Nexus start. + // This means a new cockroachdb instance won't picked up until + // Nexus restarts. let addrs = loop { match resolver .lookup_all_socket_v6(ServiceName::Cockroach) diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 3c3247790a..d219da7e96 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -433,7 +433,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { self.config.deployment.internal_dns = nexus_config::InternalDns::FromAddress { - address: *self + address: self .internal_dns .as_ref() .expect("Must initialize internal DNS server first") @@ -659,8 +659,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { let dns = dns_server::TransientServer::new(&log).await.unwrap(); - let SocketAddr::V6(dns_address) = *dns.dns_server.local_address() - else { + let SocketAddr::V6(dns_address) = dns.dns_server.local_address() else { panic!("Unsupported IPv4 DNS address"); }; let SocketAddr::V6(dropshot_address) = dns.dropshot_server.local_addr() @@ -998,7 +997,7 @@ pub async fn start_dns_server( let mut resolver_config = ResolverConfig::new(); resolver_config.add_name_server(NameServerConfig { - socket_addr: *dns_server.local_address(), + socket_addr: dns_server.local_address(), protocol: Protocol::Udp, tls_dns_name: None, trust_nx_responses: false, diff --git a/nexus/tests/integration_tests/certificates.rs b/nexus/tests/integration_tests/certificates.rs index 9532dacf92..1843fc28c8 100644 --- a/nexus/tests/integration_tests/certificates.rs +++ b/nexus/tests/integration_tests/certificates.rs @@ -360,7 +360,7 @@ async fn test_silo_certificates() { // create the other Silos and their users. let resolver = Arc::new( CustomDnsResolver::new( - *cptestctx.external_dns.dns_server.local_address(), + cptestctx.external_dns.dns_server.local_address(), ) .unwrap(), ); diff --git a/nexus/tests/integration_tests/initialization.rs b/nexus/tests/integration_tests/initialization.rs index 1251077c2e..2d4c76dc99 100644 --- a/nexus/tests/integration_tests/initialization.rs +++ b/nexus/tests/integration_tests/initialization.rs @@ -41,7 +41,7 @@ async fn test_nexus_boots_before_cockroach() { omicron_common::nexus_config::Database::FromDns; builder.config.deployment.internal_dns = omicron_common::nexus_config::InternalDns::FromAddress { - address: *builder + address: builder .internal_dns .as_ref() .expect("Must start Internal DNS before acquiring an address") @@ -121,7 +121,7 @@ async fn test_nexus_boots_before_dendrite() { builder.config.pkg.dendrite = HashMap::new(); builder.config.deployment.internal_dns = omicron_common::nexus_config::InternalDns::FromAddress { - address: *builder + address: builder .internal_dns .as_ref() .expect("Must start Internal DNS before acquiring an address") From eabdb5c432609e44a046aa08a10797a803dfe7bf Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Fri, 22 Sep 2023 14:17:21 -0700 Subject: [PATCH 7/8] Created lookup for zpool_id and added to omdb (#4134) Updated the db-queries to support the zpool_id lookup. Added a disks subcommand to the omdb db command, show information about disks in the database --------- Co-authored-by: Alan Hanson --- dev-tools/omdb/src/bin/omdb/db.rs | 216 ++++++++++++++++++ dev-tools/omdb/tests/usage_errors.out | 2 + nexus/db-queries/src/authz/api_resources.rs | 8 + nexus/db-queries/src/authz/oso_generic.rs | 1 + .../src/authz/policy_test/resources.rs | 7 + nexus/db-queries/src/db/lookup.rs | 14 ++ nexus/db-queries/tests/output/authz-roles.out | 14 ++ 7 files changed, 262 insertions(+) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 74b8dffedc..555f921bee 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -24,16 +24,20 @@ use clap::ValueEnum; use diesel::expression::SelectableHelper; use diesel::query_dsl::QueryDsl; use diesel::ExpressionMethods; +use nexus_db_model::Disk; use nexus_db_model::DnsGroup; use nexus_db_model::DnsName; use nexus_db_model::DnsVersion; use nexus_db_model::DnsZone; +use nexus_db_model::Instance; use nexus_db_model::Sled; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; use nexus_db_queries::db::identity::Asset; +use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::model::ServiceKind; use nexus_db_queries::db::DataStore; +use nexus_types::identity::Resource; use nexus_types::internal_api::params::DnsRecord; use nexus_types::internal_api::params::Srv; use omicron_common::api::external::DataPageParams; @@ -68,6 +72,8 @@ pub struct DbArgs { /// Subcommands that query or update the database #[derive(Debug, Subcommand)] enum DbCommands { + /// Print information about disks + Disks(DiskArgs), /// Print information about internal and external DNS Dns(DnsArgs), /// Print information about control plane services @@ -76,6 +82,26 @@ enum DbCommands { Sleds, } +#[derive(Debug, Args)] +struct DiskArgs { + #[command(subcommand)] + command: DiskCommands, +} + +#[derive(Debug, Subcommand)] +enum DiskCommands { + /// Get info for a specific disk + Info(DiskInfoArgs), + /// Summarize current disks + List, +} + +#[derive(Debug, Args)] +struct DiskInfoArgs { + /// The UUID of the volume + uuid: Uuid, +} + #[derive(Debug, Args)] struct DnsArgs { #[command(subcommand)] @@ -180,6 +206,12 @@ impl DbArgs { let opctx = OpContext::for_tests(log.clone(), datastore.clone()); match &self.command { + DbCommands::Disks(DiskArgs { + command: DiskCommands::Info(uuid), + }) => cmd_db_disk_info(&opctx, &datastore, uuid).await, + DbCommands::Disks(DiskArgs { command: DiskCommands::List }) => { + cmd_db_disk_list(&datastore, self.fetch_limit).await + } DbCommands::Dns(DnsArgs { command: DnsCommands::Show }) => { cmd_db_dns_show(&opctx, &datastore, self.fetch_limit).await } @@ -288,6 +320,190 @@ fn first_page<'a, T>(limit: NonZeroU32) -> DataPageParams<'a, T> { } } +// Disks + +/// Run `omdb db disk list`. +async fn cmd_db_disk_list( + datastore: &DataStore, + limit: NonZeroU32, +) -> Result<(), anyhow::Error> { + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct DiskRow { + name: String, + id: String, + size: String, + state: String, + attached_to: String, + } + + let ctx = || "listing disks".to_string(); + + use db::schema::disk::dsl; + let disks = dsl::disk + .filter(dsl::time_deleted.is_null()) + .limit(i64::from(u32::from(limit))) + .select(Disk::as_select()) + .load_async(datastore.pool_for_tests().await?) + .await + .context("loading disks")?; + + check_limit(&disks, limit, ctx); + + let rows = disks.into_iter().map(|disk| DiskRow { + name: disk.name().to_string(), + id: disk.id().to_string(), + size: disk.size.to_string(), + state: disk.runtime().disk_state, + attached_to: match disk.runtime().attach_instance_id { + Some(uuid) => uuid.to_string(), + None => "-".to_string(), + }, + }); + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + + println!("{}", table); + + Ok(()) +} + +/// Run `omdb db disk info `. +async fn cmd_db_disk_info( + opctx: &OpContext, + datastore: &DataStore, + args: &DiskInfoArgs, +) -> Result<(), anyhow::Error> { + // The row describing the instance + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct UpstairsRow { + host_serial: String, + disk_name: String, + instance_name: String, + propolis_zone: String, + } + + // The rows describing the downstairs regions for this disk/volume + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct DownstairsRow { + host_serial: String, + region: String, + zone: String, + physical_disk: String, + } + + use db::schema::disk::dsl as disk_dsl; + + let disk = disk_dsl::disk + .filter(disk_dsl::id.eq(args.uuid)) + .limit(1) + .select(Disk::as_select()) + .load_async(datastore.pool_for_tests().await?) + .await + .context("loading requested disk")?; + + let Some(disk) = disk.into_iter().next() else { + bail!("no disk: {} found", args.uuid); + }; + + // For information about where this disk is attached. + let mut rows = Vec::new(); + + // If the disk is attached to an instance, show information + // about that instance. + if let Some(instance_uuid) = disk.runtime().attach_instance_id { + // Get the instance this disk is attached to + use db::schema::instance::dsl as instance_dsl; + let instance = instance_dsl::instance + .filter(instance_dsl::id.eq(instance_uuid)) + .limit(1) + .select(Instance::as_select()) + .load_async(datastore.pool_for_tests().await?) + .await + .context("loading requested instance")?; + + let Some(instance) = instance.into_iter().next() else { + bail!("no instance: {} found", instance_uuid); + }; + + let instance_name = instance.name().to_string(); + let propolis_id = instance.runtime().propolis_id.to_string(); + let my_sled_id = instance.runtime().sled_id; + + let (_, my_sled) = LookupPath::new(opctx, datastore) + .sled_id(my_sled_id) + .fetch() + .await + .context("failed to look up sled")?; + + let usr = UpstairsRow { + host_serial: my_sled.serial_number().to_string(), + disk_name: disk.name().to_string(), + instance_name, + propolis_zone: format!("oxz_propolis-server_{}", propolis_id), + }; + rows.push(usr); + } else { + // If the disk is not attached to anything, just print empty + // fields. + let usr = UpstairsRow { + host_serial: "-".to_string(), + disk_name: disk.name().to_string(), + instance_name: "-".to_string(), + propolis_zone: "-".to_string(), + }; + rows.push(usr); + } + + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + + println!("{}", table); + + // Get the dataset backing this volume. + let regions = datastore.get_allocated_regions(disk.volume_id).await?; + + let mut rows = Vec::with_capacity(3); + for (dataset, region) in regions { + let my_pool_id = dataset.pool_id; + let (_, my_zpool) = LookupPath::new(opctx, datastore) + .zpool_id(my_pool_id) + .fetch() + .await + .context("failed to look up zpool")?; + + let my_sled_id = my_zpool.sled_id; + + let (_, my_sled) = LookupPath::new(opctx, datastore) + .sled_id(my_sled_id) + .fetch() + .await + .context("failed to look up sled")?; + + rows.push(DownstairsRow { + host_serial: my_sled.serial_number().to_string(), + region: region.id().to_string(), + zone: format!("oxz_crucible_{}", dataset.id()), + physical_disk: my_zpool.physical_disk_id.to_string(), + }); + } + + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + + println!("{}", table); + + Ok(()) +} + // SERVICES #[derive(Tabled)] diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out index 92d0212fb4..8afcf13fb1 100644 --- a/dev-tools/omdb/tests/usage_errors.out +++ b/dev-tools/omdb/tests/usage_errors.out @@ -86,6 +86,7 @@ Query the control plane database (CockroachDB) Usage: omdb db [OPTIONS] Commands: + disks Print information about disks dns Print information about internal and external DNS services Print information about control plane services sleds Print information about sleds @@ -105,6 +106,7 @@ Query the control plane database (CockroachDB) Usage: omdb db [OPTIONS] Commands: + disks Print information about disks dns Print information about internal and external DNS services Print information about control plane services sleds Print information about sleds diff --git a/nexus/db-queries/src/authz/api_resources.rs b/nexus/db-queries/src/authz/api_resources.rs index 6cacfe4cb2..ec959e2907 100644 --- a/nexus/db-queries/src/authz/api_resources.rs +++ b/nexus/db-queries/src/authz/api_resources.rs @@ -905,6 +905,14 @@ authz_resource! { polar_snippet = FleetChild, } +authz_resource! { + name = "Zpool", + parent = "Fleet", + primary_key = Uuid, + roles_allowed = false, + polar_snippet = FleetChild, +} + authz_resource! { name = "SledInstance", parent = "Fleet", diff --git a/nexus/db-queries/src/authz/oso_generic.rs b/nexus/db-queries/src/authz/oso_generic.rs index 1ce86950bc..bcd7a42945 100644 --- a/nexus/db-queries/src/authz/oso_generic.rs +++ b/nexus/db-queries/src/authz/oso_generic.rs @@ -150,6 +150,7 @@ pub fn make_omicron_oso(log: &slog::Logger) -> Result { IdentityProvider::init(), SamlIdentityProvider::init(), Sled::init(), + Zpool::init(), Service::init(), UpdateArtifact::init(), UserBuiltin::init(), diff --git a/nexus/db-queries/src/authz/policy_test/resources.rs b/nexus/db-queries/src/authz/policy_test/resources.rs index a0dad0e17e..054fe6430b 100644 --- a/nexus/db-queries/src/authz/policy_test/resources.rs +++ b/nexus/db-queries/src/authz/policy_test/resources.rs @@ -88,6 +88,13 @@ pub async fn make_resources( LookupType::ById(sled_id), )); + let zpool_id = "aaaaaaaa-1233-af7d-9220-afe1d8090900".parse().unwrap(); + builder.new_resource(authz::Zpool::new( + authz::FLEET, + zpool_id, + LookupType::ById(zpool_id), + )); + make_services(&mut builder).await; builder.new_resource(authz::PhysicalDisk::new( diff --git a/nexus/db-queries/src/db/lookup.rs b/nexus/db-queries/src/db/lookup.rs index c01daa9c87..e7e7bb47fc 100644 --- a/nexus/db-queries/src/db/lookup.rs +++ b/nexus/db-queries/src/db/lookup.rs @@ -347,6 +347,11 @@ impl<'a> LookupPath<'a> { Sled::PrimaryKey(Root { lookup_root: self }, id) } + /// Select a resource of type Zpool, identified by its id + pub fn zpool_id(self, id: Uuid) -> Zpool<'a> { + Zpool::PrimaryKey(Root { lookup_root: self }, id) + } + /// Select a resource of type Service, identified by its id pub fn service_id(self, id: Uuid) -> Service<'a> { Service::PrimaryKey(Root { lookup_root: self }, id) @@ -788,6 +793,15 @@ lookup_resource! { primary_key_columns = [ { column_name = "id", rust_type = Uuid } ] } +lookup_resource! { + name = "Zpool", + ancestors = [], + children = [], + lookup_by_name = false, + soft_deletes = true, + primary_key_columns = [ { column_name = "id", rust_type = Uuid } ] +} + lookup_resource! { name = "SledInstance", ancestors = [], diff --git a/nexus/db-queries/tests/output/authz-roles.out b/nexus/db-queries/tests/output/authz-roles.out index 7531672514..72031c567e 100644 --- a/nexus/db-queries/tests/output/authz-roles.out +++ b/nexus/db-queries/tests/output/authz-roles.out @@ -782,6 +782,20 @@ resource: Sled id "8a785566-adaf-c8d8-e886-bee7f9b73ca7" silo1-proj1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ unauthenticated ! ! ! ! ! ! ! ! +resource: Zpool id "aaaaaaaa-1233-af7d-9220-afe1d8090900" + + USER Q R LC RP M MP CC D + fleet-admin ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ + fleet-collaborator ✘ ✔ ✔ ✔ ✘ ✘ ✘ ✘ + fleet-viewer ✘ ✔ ✔ ✔ ✘ ✘ ✘ ✘ + silo1-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + unauthenticated ! ! ! ! ! ! ! ! + resource: Service id "6b1f15ee-d6b3-424c-8436-94413a0b682d" USER Q R LC RP M MP CC D From dfb6853f28404469e3f91f850bc0d39d5c8f3ff0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 24 Sep 2023 14:20:51 -0700 Subject: [PATCH 8/8] Bump crossterm from 0.26.1 to 0.27.0 (#4109) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [crossterm](https://github.com/crossterm-rs/crossterm) from 0.26.1 to 0.27.0.
Release notes

Sourced from crossterm's releases.

0.27.0

Version 0.27

Added ⭐

  • Add NO_COLOR support (https://no-color.org/)
  • Add option to force overwrite NO_COLOR (#802)
  • Add support for scroll left/right events on windows and unix systems (#788).
  • Add window_size function to fetch pixel width/height of screen for more sophisticated rendering in terminals.
  • Add support for deserializing hex color strings to `Color`` e.g #fffff.

Changes

  • Make the events module an optional feature events (to make crossterm more lightweight) (#776)

Breaking ⚠️

  • Set minimum rustc version to 1.58 (#798)
  • Change all error types to std::io::Result (#765)

@​Gronis, @​kevin-vigor, @​Wilfred, @​benjajaja, @​blt-r, @​Piturnah, @​kdheepak, @​DeathVenom54, @​senekor, @​joseluis, @​gibbz00, @​lesleyrs, @​jhartzell42

Changelog

Sourced from crossterm's changelog.

Version 0.27

Added ⭐

  • Add NO_COLOR support (https://no-color.org/)
  • Add option to force overwrite NO_COLOR (#802)
  • Add support for scroll left/right events on windows and unix systems (#788).
  • Add window_size function to fetch pixel width/height of screen for more sophisticated rendering in terminals.
  • Add support for deserializing hex color strings to `Color`` e.g #fffff.

Changes

  • Make the events module an optional feature events (to make crossterm more lightweight) (#776)

Breaking ⚠️

  • Set minimum rustc version to 1.58 (#798)
  • Change all error types to std::io::Result (#765)
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=crossterm&package-manager=cargo&previous-version=0.26.1&new-version=0.27.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore major version` will close this group update PR and stop Dependabot creating any more for the specific dependency's major version (unless you unignore this specific dependency's major version or upgrade to it yourself) - `@dependabot ignore minor version` will close this group update PR and stop Dependabot creating any more for the specific dependency's minor version (unless you unignore this specific dependency's minor version or upgrade to it yourself) - `@dependabot ignore ` will close this group update PR and stop Dependabot creating any more for the specific dependency (unless you unignore this specific dependency or upgrade to it yourself) - `@dependabot unignore ` will remove all of the ignore conditions of the specified dependency - `@dependabot unignore ` will remove the ignore condition of the specified dependency and ignore conditions
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 6 +++--- Cargo.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5037950172..f32a5a6ad9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1355,7 +1355,6 @@ checksum = "a84cda67535339806297f1b331d6dd6320470d2a0fe65381e79ee9e156dd3d13" dependencies = [ "bitflags 1.3.2", "crossterm_winapi", - "futures-core", "libc", "mio", "parking_lot 0.12.1", @@ -1373,6 +1372,7 @@ checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" dependencies = [ "bitflags 2.4.0", "crossterm_winapi", + "futures-core", "libc", "mio", "parking_lot 0.12.1", @@ -9751,7 +9751,7 @@ dependencies = [ "camino", "ciborium", "clap 4.4.3", - "crossterm 0.26.1", + "crossterm 0.27.0", "futures", "hex", "humantime", @@ -9811,7 +9811,7 @@ dependencies = [ "camino", "ciborium", "clap 4.4.3", - "crossterm 0.26.1", + "crossterm 0.27.0", "ratatui", "reedline", "serde", diff --git a/Cargo.toml b/Cargo.toml index cadf7dc367..5390b61086 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -158,7 +158,7 @@ clap = { version = "4.4", features = ["derive", "env", "wrap_help"] } cookie = "0.16" criterion = { version = "0.5.1", features = [ "async_tokio" ] } crossbeam = "0.8" -crossterm = { version = "0.26.1", features = ["event-stream"] } +crossterm = { version = "0.27.0", features = ["event-stream"] } crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "aeb69dda26c7e1a8b6eada425670cd4b83f91c07" } crucible-client-types = { git = "https://github.com/oxidecomputer/crucible", rev = "aeb69dda26c7e1a8b6eada425670cd4b83f91c07" } crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "aeb69dda26c7e1a8b6eada425670cd4b83f91c07" }