diff --git a/Cargo.lock b/Cargo.lock index dc9954e3f7..f32a5a6ad9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -934,9 +934,9 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.30" +version = "0.4.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "defd4e7873dbddba6c7c91e199c7fcb946abc4a6a4ac3195400bcfb01b5de877" +checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" dependencies = [ "android-tzdata", "iana-time-zone", @@ -1355,7 +1355,6 @@ checksum = "a84cda67535339806297f1b331d6dd6320470d2a0fe65381e79ee9e156dd3d13" dependencies = [ "bitflags 1.3.2", "crossterm_winapi", - "futures-core", "libc", "mio", "parking_lot 0.12.1", @@ -1373,6 +1372,7 @@ checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" dependencies = [ "bitflags 2.4.0", "crossterm_winapi", + "futures-core", "libc", "mio", "parking_lot 0.12.1", @@ -2100,7 +2100,7 @@ dependencies = [ [[package]] name = "dropshot" version = "0.9.1-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#99cea069c61d1ff5be0ff4f9d338f24bbea93128" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#fa728d07970824fd5f3bd57a3d4dc0fdbea09bfd" dependencies = [ "async-stream", "async-trait", @@ -2116,6 +2116,7 @@ dependencies = [ "http", "hyper", "indexmap 2.0.0", + "multer", "openapiv3", "paste", "percent-encoding", @@ -2145,7 +2146,7 @@ dependencies = [ [[package]] name = "dropshot_endpoint" version = "0.9.1-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#99cea069c61d1ff5be0ff4f9d338f24bbea93128" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#fa728d07970824fd5f3bd57a3d4dc0fdbea09bfd" dependencies = [ "proc-macro2", "quote", @@ -4159,6 +4160,24 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "multer" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01acbdc23469fd8fe07ab135923371d5f5a422fbf9c522158677c8eb15bc51c2" +dependencies = [ + "bytes", + "encoding_rs", + "futures-util", + "http", + "httparse", + "log", + "memchr", + "mime", + "spin 0.9.8", + "version_check", +] + [[package]] name = "nanorand" version = "0.7.0" @@ -4860,7 +4879,7 @@ dependencies = [ ] [[package]] -name = "omicron-dev-tools" +name = "omicron-dev" version = "0.1.0" dependencies = [ "anyhow", @@ -5054,18 +5073,27 @@ dependencies = [ "clap 4.4.3", "diesel", "dropshot", + "expectorate", "humantime", + "internal-dns 0.1.0", "nexus-client 0.1.0", "nexus-db-model", "nexus-db-queries", + "nexus-test-utils", + "nexus-test-utils-macros", "nexus-types", "omicron-common 0.1.0", + "omicron-nexus", "omicron-rpaths", + "omicron-test-utils", "pq-sys", + "regex", "serde", "serde_json", + "sled-agent-client", "slog", "strum", + "subprocess", "tabled", "textwrap 0.16.0", "tokio", @@ -6280,9 +6308,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.66" +version = "1.0.67" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +checksum = "3d433d9f1a3e8c1263d9456598b16fec66f4acc9a74dacffd35c7bb09b3a1328" dependencies = [ "unicode-ident", ] @@ -6290,7 +6318,7 @@ dependencies = [ [[package]] name = "progenitor" version = "0.3.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#3734e566108a173ce0cdad2d917bacc85d8090fc" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#5c941c0b41b0235031f3ade33a9c119945f1fd51" dependencies = [ "progenitor-client", "progenitor-impl", @@ -6301,7 +6329,7 @@ dependencies = [ [[package]] name = "progenitor-client" version = "0.3.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#3734e566108a173ce0cdad2d917bacc85d8090fc" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#5c941c0b41b0235031f3ade33a9c119945f1fd51" dependencies = [ "bytes", "futures-core", @@ -6315,7 +6343,7 @@ dependencies = [ [[package]] name = "progenitor-impl" version = "0.3.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#3734e566108a173ce0cdad2d917bacc85d8090fc" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#5c941c0b41b0235031f3ade33a9c119945f1fd51" dependencies = [ "getopts", "heck 0.4.1", @@ -6337,7 +6365,7 @@ dependencies = [ [[package]] name = "progenitor-macro" version = "0.3.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#3734e566108a173ce0cdad2d917bacc85d8090fc" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#5c941c0b41b0235031f3ade33a9c119945f1fd51" dependencies = [ "openapiv3", "proc-macro2", @@ -7557,9 +7585,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.106" +version = "1.0.107" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cc66a619ed80bf7a0f6b17dd063a84b88f6dea1813737cf469aef1d081142c2" +checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65" dependencies = [ "itoa", "ryu", @@ -9073,9 +9101,9 @@ checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" [[package]] name = "trybuild" -version = "1.0.84" +version = "1.0.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5c89fd17b7536f2cf66c97cff6e811e89e728ca0ed13caeed610c779360d8b4" +checksum = "196a58260a906cedb9bf6d8034b6379d0c11f552416960452f267402ceeddff1" dependencies = [ "basic-toml", "glob", @@ -9197,7 +9225,7 @@ version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" dependencies = [ - "cfg-if 1.0.0", + "cfg-if 0.1.10", "rand 0.8.5", "static_assertions", ] @@ -9211,7 +9239,7 @@ checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" [[package]] name = "typify" version = "0.0.13" -source = "git+https://github.com/oxidecomputer/typify#6cd9a7164a83169945be0bf697a43e6a7488fa31" +source = "git+https://github.com/oxidecomputer/typify#de16c4238a2b34400d0fece086a6469951c3236b" dependencies = [ "typify-impl", "typify-macro", @@ -9220,7 +9248,7 @@ dependencies = [ [[package]] name = "typify-impl" version = "0.0.13" -source = "git+https://github.com/oxidecomputer/typify#6cd9a7164a83169945be0bf697a43e6a7488fa31" +source = "git+https://github.com/oxidecomputer/typify#de16c4238a2b34400d0fece086a6469951c3236b" dependencies = [ "heck 0.4.1", "log", @@ -9237,7 +9265,7 @@ dependencies = [ [[package]] name = "typify-macro" version = "0.0.13" -source = "git+https://github.com/oxidecomputer/typify#6cd9a7164a83169945be0bf697a43e6a7488fa31" +source = "git+https://github.com/oxidecomputer/typify#de16c4238a2b34400d0fece086a6469951c3236b" dependencies = [ "proc-macro2", "quote", @@ -9278,9 +9306,9 @@ checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" [[package]] name = "unicode-ident" -version = "1.0.11" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "unicode-linebreak" @@ -9723,7 +9751,7 @@ dependencies = [ "camino", "ciborium", "clap 4.4.3", - "crossterm 0.26.1", + "crossterm 0.27.0", "futures", "hex", "humantime", @@ -9783,7 +9811,7 @@ dependencies = [ "camino", "ciborium", "clap 4.4.3", - "crossterm 0.26.1", + "crossterm 0.27.0", "ratatui", "reedline", "serde", diff --git a/Cargo.toml b/Cargo.toml index 5463d0a262..945fdf469a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,9 @@ members = [ "common", "ddm-admin-client", "deploy", - "dev-tools", + "dev-tools/omdb", + "dev-tools/omicron-dev", + "dev-tools/xtask", "dns-server", "dns-service-client", "dpd-client", @@ -37,7 +39,6 @@ members = [ "nexus/test-utils-macros", "nexus/test-utils", "nexus/types", - "omdb", "oxide-client", "oximeter-client", "oximeter/collector", @@ -62,7 +63,6 @@ members = [ "wicket", "wicketd-client", "wicketd", - "xtask", ] default-members = [ @@ -74,7 +74,9 @@ default-members = [ "ddm-admin-client", "dpd-client", "deploy", - "dev-tools", + "dev-tools/omdb", + "dev-tools/omicron-dev", + "dev-tools/xtask", "dns-server", "dns-service-client", "gateway", @@ -98,7 +100,6 @@ default-members = [ "nexus/db-queries", "nexus/defaults", "nexus/types", - "omdb", "oxide-client", "oximeter-client", "oximeter/collector", @@ -123,7 +124,6 @@ default-members = [ "wicket-dbg", "wicketd", "wicketd-client", - "xtask", ] resolver = "2" @@ -154,11 +154,11 @@ chacha20poly1305 = "0.10.1" ciborium = "0.2.1" cfg-if = "1.0" chrono = { version = "0.4", features = [ "serde" ] } -clap = { version = "4.4", features = ["derive", "env"] } +clap = { version = "4.4", features = ["derive", "env", "wrap_help"] } cookie = "0.16" criterion = { version = "0.5.1", features = [ "async_tokio" ] } crossbeam = "0.8" -crossterm = { version = "0.26.1", features = ["event-stream"] } +crossterm = { version = "0.27.0", features = ["event-stream"] } crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "aeb69dda26c7e1a8b6eada425670cd4b83f91c07" } crucible-client-types = { git = "https://github.com/oxidecomputer/crucible", rev = "aeb69dda26c7e1a8b6eada425670cd4b83f91c07" } crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "aeb69dda26c7e1a8b6eada425670cd4b83f91c07" } @@ -234,7 +234,7 @@ nexus-types = { path = "nexus/types" } num-integer = "0.1.45" num = { version = "0.4.1", default-features = false, features = [ "libm" ] } omicron-common = { path = "common" } -omicron-dev-tools = { path = "dev-tools" } +omicron-dev = { path = "dev-tools/omicron-dev" } omicron-gateway = { path = "gateway" } omicron-nexus = { path = "nexus" } omicron-omdb = { path = "omdb" } @@ -301,7 +301,7 @@ semver = { version = "1.0.18", features = ["std", "serde"] } serde = { version = "1.0", default-features = false, features = [ "derive" ] } serde_derive = "1.0" serde_human_bytes = { git = "http://github.com/oxidecomputer/serde_human_bytes", branch = "main" } -serde_json = "1.0.106" +serde_json = "1.0.107" serde_path_to_error = "0.1.14" serde_tokenstream = "0.2" serde_urlencoded = "0.7.1" @@ -359,7 +359,7 @@ trust-dns-client = "0.22" trust-dns-proto = "0.22" trust-dns-resolver = "0.22" trust-dns-server = "0.22" -trybuild = "1.0.84" +trybuild = "1.0.85" tufaceous = { path = "tufaceous" } tufaceous-lib = { path = "tufaceous-lib" } unicode-width = "0.1.10" diff --git a/certificates/src/lib.rs b/certificates/src/lib.rs index a2240306d2..6bd7fa32de 100644 --- a/certificates/src/lib.rs +++ b/certificates/src/lib.rs @@ -9,6 +9,7 @@ use omicron_common::api::external::Error; use openssl::asn1::Asn1Time; use openssl::pkey::PKey; use openssl::x509::X509; +use std::borrow::Borrow; use std::ffi::CString; mod openssl_ext; @@ -38,7 +39,7 @@ pub enum CertificateError { #[error("Error validating certificate hostname")] ErrorValidatingHostname(#[source] openssl::error::ErrorStack), - #[error("Certificate not valid for {hostname:?}: {cert_description:?}")] + #[error("Certificate not valid for given hostnames {hostname:?}: {cert_description}")] NoDnsNameMatchingHostname { hostname: String, cert_description: String }, #[error("Unsupported certificate purpose (not usable for server auth)")] @@ -103,15 +104,15 @@ impl CertificateValidator { /// `key` is expected to be the private key for the leaf certificate of /// `certs` in PEM format. /// - /// If `hostname` is not `None`, the leaf certificate of `certs` must be - /// valid for `hostname`, as determined by a dNSName entry in its subject - /// alternate names or (if there are no dNSName SANs) the cert's common - /// name. - pub fn validate( + /// If `possible_hostnames` is empty, no hostname validation is performed. + /// If `possible_hostnames` is not empty, we require _at least one_ of its + /// hostnames to match the SANs (or CN, if no SANs are present) of the leaf + /// certificate. + pub fn validate>( &self, certs: &[u8], key: &[u8], - hostname: Option<&str>, + possible_hostnames: &[S], ) -> Result<(), CertificateError> { // Checks on the certs themselves. let mut certs = X509::stack_from_pem(certs) @@ -134,16 +135,25 @@ impl CertificateValidator { // to use with verifying the private key. let cert = certs.swap_remove(0); - if let Some(hostname) = hostname { - let c_hostname = CString::new(hostname).map_err(|_| { - CertificateError::InvalidValidationHostname( - hostname.to_string(), - ) - })?; - if !cert - .valid_for_hostname(&c_hostname) - .map_err(CertificateError::ErrorValidatingHostname)? - { + if !possible_hostnames.is_empty() { + let mut found_valid_hostname = false; + for hostname in possible_hostnames { + let hostname = hostname.borrow(); + let c_hostname = CString::new(hostname).map_err(|_| { + CertificateError::InvalidValidationHostname( + hostname.to_string(), + ) + })?; + if cert + .valid_for_hostname(&c_hostname) + .map_err(CertificateError::ErrorValidatingHostname)? + { + found_valid_hostname = true; + break; + } + } + + if !found_valid_hostname { let cert_description = cert.hostname_description().unwrap_or_else(|err| { format!( @@ -152,7 +162,7 @@ impl CertificateValidator { ) }); return Err(CertificateError::NoDnsNameMatchingHostname { - hostname: hostname.to_string(), + hostname: possible_hostnames.join(", "), cert_description, }); } @@ -197,13 +207,13 @@ mod tests { fn validate_cert_with_params( params: CertificateParams, - hostname: Option<&str>, + possible_hostnames: &[&str], ) -> Result<(), CertificateError> { let cert_chain = CertificateChain::with_params(params); CertificateValidator::default().validate( cert_chain.cert_chain_as_pem().as_bytes(), cert_chain.end_cert_private_key_as_pem().as_bytes(), - hostname, + possible_hostnames, ) } @@ -218,7 +228,7 @@ mod tests { let mut params = CertificateParams::new([]); params.subject_alt_names = vec![SanType::DnsName(dns_name.to_string())]; - match validate_cert_with_params(params, Some(hostname)) { + match validate_cert_with_params(params, &[hostname]) { Ok(()) => (), Err(err) => panic!( "certificate with SAN {dns_name} \ @@ -236,7 +246,7 @@ mod tests { let mut params = CertificateParams::new([]); params.subject_alt_names = vec![SanType::DnsName(dns_name.to_string())]; - match validate_cert_with_params(params, Some(server_hostname)) { + match validate_cert_with_params(params, &[server_hostname]) { Ok(()) => panic!( "certificate with SAN {dns_name} unexpectedly \ passed validation for hostname {server_hostname}" @@ -269,7 +279,7 @@ mod tests { let mut params = CertificateParams::new([]); params.distinguished_name = dn; - match validate_cert_with_params(params, Some(hostname)) { + match validate_cert_with_params(params, &[hostname]) { Ok(()) => (), Err(err) => panic!( "certificate with SAN {dns_name} \ @@ -289,7 +299,7 @@ mod tests { let mut params = CertificateParams::new([]); params.distinguished_name = dn; - match validate_cert_with_params(params, Some(server_hostname)) { + match validate_cert_with_params(params, &[server_hostname]) { Ok(()) => panic!( "certificate with SAN {dns_name} unexpectedly \ passed validation for hostname {server_hostname}" @@ -326,7 +336,7 @@ mod tests { params.subject_alt_names = vec![SanType::DnsName(SUBJECT_ALT_NAME.to_string())]; - match validate_cert_with_params(params, Some(HOSTNAME)) { + match validate_cert_with_params(params, &[HOSTNAME]) { Ok(()) => panic!( "certificate unexpectedly passed validation for hostname" ), @@ -346,6 +356,40 @@ mod tests { } } + #[test] + fn cert_validated_if_any_possible_hostname_is_valid() { + // Expected-successful matches that contain a mix of valid and invalid + // possible hostnames + for (dns_name, hostnames) in &[ + ( + "oxide.computer", + // Since "any valid hostname" is allowed, an empty list of + // hostnames is also allowed + &[] as &[&str], + ), + ("oxide.computer", &["oxide.computer", "not-oxide.computer"]), + ( + "*.oxide.computer", + &["*.oxide.computer", "foo.bar.oxide.computer"], + ), + ( + "*.oxide.computer", + &["foo.bar.not-oxide.computer", "foo.oxide.computer"], + ), + ] { + let mut params = CertificateParams::new([]); + params.subject_alt_names = + vec![SanType::DnsName(dns_name.to_string())]; + match validate_cert_with_params(params, hostnames) { + Ok(()) => (), + Err(err) => panic!( + "certificate with SAN {dns_name} \ + failed to validate for hostname {hostnames:?}: {err}" + ), + } + } + } + #[test] fn test_cert_extended_key_usage() { const HOST: &str = "foo.oxide.computer"; @@ -371,7 +415,7 @@ mod tests { params.extended_key_usages = ext_key_usage.clone(); assert!( - validate_cert_with_params(params, Some(HOST)).is_ok(), + validate_cert_with_params(params, &[HOST]).is_ok(), "unexpected failure with {ext_key_usage:?}" ); } @@ -391,7 +435,7 @@ mod tests { assert!( matches!( - validate_cert_with_params(params, Some(HOST)), + validate_cert_with_params(params, &[HOST]), Err(CertificateError::UnsupportedPurpose) ), "unexpected success with {ext_key_usage:?}" diff --git a/omdb/Cargo.toml b/dev-tools/omdb/Cargo.toml similarity index 75% rename from omdb/Cargo.toml rename to dev-tools/omdb/Cargo.toml index 0be0bdce6f..c9ebbe35ad 100644 --- a/omdb/Cargo.toml +++ b/dev-tools/omdb/Cargo.toml @@ -15,6 +15,7 @@ clap.workspace = true diesel.workspace = true dropshot.workspace = true humantime.workspace = true +internal-dns.workspace = true nexus-client.workspace = true nexus-db-model.workspace = true nexus-db-queries.workspace = true @@ -24,6 +25,7 @@ omicron-common.workspace = true pq-sys = "*" serde.workspace = true serde_json.workspace = true +sled-agent-client.workspace = true slog.workspace = true strum.workspace = true tabled.workspace = true @@ -31,6 +33,15 @@ textwrap.workspace = true tokio = { workspace = true, features = [ "full" ] } uuid.workspace = true +[dev-dependencies] +expectorate.workspace = true +nexus-test-utils.workspace = true +nexus-test-utils-macros.workspace = true +omicron-nexus.workspace = true +omicron-test-utils.workspace = true +regex.workspace = true +subprocess.workspace = true + # Disable doc builds by default for our binaries to work around issue # rust-lang/cargo#8373. These docs would not be very useful anyway. [[bin]] diff --git a/dev-tools/build.rs b/dev-tools/omdb/build.rs similarity index 100% rename from dev-tools/build.rs rename to dev-tools/omdb/build.rs diff --git a/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs similarity index 73% rename from omdb/src/bin/omdb/db.rs rename to dev-tools/omdb/src/bin/omdb/db.rs index e9f1911375..555f921bee 100644 --- a/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -12,6 +12,7 @@ //! would be the only consumer -- and in that case it's okay to query the //! database directly. +use crate::Omdb; use anyhow::anyhow; use anyhow::bail; use anyhow::Context; @@ -23,16 +24,20 @@ use clap::ValueEnum; use diesel::expression::SelectableHelper; use diesel::query_dsl::QueryDsl; use diesel::ExpressionMethods; +use nexus_db_model::Disk; use nexus_db_model::DnsGroup; use nexus_db_model::DnsName; use nexus_db_model::DnsVersion; use nexus_db_model::DnsZone; +use nexus_db_model::Instance; use nexus_db_model::Sled; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; use nexus_db_queries::db::identity::Asset; +use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::model::ServiceKind; use nexus_db_queries::db::DataStore; +use nexus_types::identity::Resource; use nexus_types::internal_api::params::DnsRecord; use nexus_types::internal_api::params::Srv; use omicron_common::api::external::DataPageParams; @@ -56,7 +61,7 @@ pub struct DbArgs { /// limit to apply to queries that fetch rows #[clap( long = "fetch-limit", - default_value_t = NonZeroU32::new(100).unwrap() + default_value_t = NonZeroU32::new(500).unwrap() )] fetch_limit: NonZeroU32, @@ -67,6 +72,8 @@ pub struct DbArgs { /// Subcommands that query or update the database #[derive(Debug, Subcommand)] enum DbCommands { + /// Print information about disks + Disks(DiskArgs), /// Print information about internal and external DNS Dns(DnsArgs), /// Print information about control plane services @@ -75,6 +82,26 @@ enum DbCommands { Sleds, } +#[derive(Debug, Args)] +struct DiskArgs { + #[command(subcommand)] + command: DiskCommands, +} + +#[derive(Debug, Subcommand)] +enum DiskCommands { + /// Get info for a specific disk + Info(DiskInfoArgs), + /// Summarize current disks + List, +} + +#[derive(Debug, Args)] +struct DiskInfoArgs { + /// The UUID of the volume + uuid: Uuid, +} + #[derive(Debug, Args)] struct DnsArgs { #[command(subcommand)] @@ -131,17 +158,38 @@ enum ServicesCommands { impl DbArgs { /// Run a `omdb db` subcommand. - pub async fn run_cmd( + pub(crate) async fn run_cmd( &self, + omdb: &Omdb, log: &slog::Logger, ) -> Result<(), anyhow::Error> { - // This is a little goofy. The database URL is required, but can come - // from the environment, in which case it won't be on the command line. - let Some(db_url) = &self.db_url else { - bail!( - "database URL must be specified with --db-url or OMDB_DB_URL" - ); + let db_url = match &self.db_url { + Some(cli_or_env_url) => cli_or_env_url.clone(), + None => { + eprintln!( + "note: database URL not specified. Will search DNS." + ); + eprintln!("note: (override with --db-url or OMDB_DB_URL)"); + let addrs = omdb + .dns_lookup_all( + log.clone(), + internal_dns::ServiceName::Cockroach, + ) + .await?; + + format!( + "postgresql://root@{}/omicron?sslmode=disable", + addrs + .into_iter() + .map(|a| a.to_string()) + .collect::>() + .join(",") + ) + .parse() + .context("failed to parse constructed postgres URL")? + } }; + eprintln!("note: using database URL {}", &db_url); let db_config = db::Config { url: db_url.clone() }; let pool = Arc::new(db::Pool::new(&log.clone(), &db_config)); @@ -158,6 +206,12 @@ impl DbArgs { let opctx = OpContext::for_tests(log.clone(), datastore.clone()); match &self.command { + DbCommands::Disks(DiskArgs { + command: DiskCommands::Info(uuid), + }) => cmd_db_disk_info(&opctx, &datastore, uuid).await, + DbCommands::Disks(DiskArgs { command: DiskCommands::List }) => { + cmd_db_disk_list(&datastore, self.fetch_limit).await + } DbCommands::Dns(DnsArgs { command: DnsCommands::Show }) => { cmd_db_dns_show(&opctx, &datastore, self.fetch_limit).await } @@ -210,7 +264,7 @@ async fn check_schema_version(datastore: &DataStore) { Ok(found_version) => { if found_version == expected_version { eprintln!( - "note: databaase schema version matches expected ({})", + "note: database schema version matches expected ({})", expected_version ); return; @@ -266,6 +320,190 @@ fn first_page<'a, T>(limit: NonZeroU32) -> DataPageParams<'a, T> { } } +// Disks + +/// Run `omdb db disk list`. +async fn cmd_db_disk_list( + datastore: &DataStore, + limit: NonZeroU32, +) -> Result<(), anyhow::Error> { + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct DiskRow { + name: String, + id: String, + size: String, + state: String, + attached_to: String, + } + + let ctx = || "listing disks".to_string(); + + use db::schema::disk::dsl; + let disks = dsl::disk + .filter(dsl::time_deleted.is_null()) + .limit(i64::from(u32::from(limit))) + .select(Disk::as_select()) + .load_async(datastore.pool_for_tests().await?) + .await + .context("loading disks")?; + + check_limit(&disks, limit, ctx); + + let rows = disks.into_iter().map(|disk| DiskRow { + name: disk.name().to_string(), + id: disk.id().to_string(), + size: disk.size.to_string(), + state: disk.runtime().disk_state, + attached_to: match disk.runtime().attach_instance_id { + Some(uuid) => uuid.to_string(), + None => "-".to_string(), + }, + }); + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + + println!("{}", table); + + Ok(()) +} + +/// Run `omdb db disk info `. +async fn cmd_db_disk_info( + opctx: &OpContext, + datastore: &DataStore, + args: &DiskInfoArgs, +) -> Result<(), anyhow::Error> { + // The row describing the instance + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct UpstairsRow { + host_serial: String, + disk_name: String, + instance_name: String, + propolis_zone: String, + } + + // The rows describing the downstairs regions for this disk/volume + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct DownstairsRow { + host_serial: String, + region: String, + zone: String, + physical_disk: String, + } + + use db::schema::disk::dsl as disk_dsl; + + let disk = disk_dsl::disk + .filter(disk_dsl::id.eq(args.uuid)) + .limit(1) + .select(Disk::as_select()) + .load_async(datastore.pool_for_tests().await?) + .await + .context("loading requested disk")?; + + let Some(disk) = disk.into_iter().next() else { + bail!("no disk: {} found", args.uuid); + }; + + // For information about where this disk is attached. + let mut rows = Vec::new(); + + // If the disk is attached to an instance, show information + // about that instance. + if let Some(instance_uuid) = disk.runtime().attach_instance_id { + // Get the instance this disk is attached to + use db::schema::instance::dsl as instance_dsl; + let instance = instance_dsl::instance + .filter(instance_dsl::id.eq(instance_uuid)) + .limit(1) + .select(Instance::as_select()) + .load_async(datastore.pool_for_tests().await?) + .await + .context("loading requested instance")?; + + let Some(instance) = instance.into_iter().next() else { + bail!("no instance: {} found", instance_uuid); + }; + + let instance_name = instance.name().to_string(); + let propolis_id = instance.runtime().propolis_id.to_string(); + let my_sled_id = instance.runtime().sled_id; + + let (_, my_sled) = LookupPath::new(opctx, datastore) + .sled_id(my_sled_id) + .fetch() + .await + .context("failed to look up sled")?; + + let usr = UpstairsRow { + host_serial: my_sled.serial_number().to_string(), + disk_name: disk.name().to_string(), + instance_name, + propolis_zone: format!("oxz_propolis-server_{}", propolis_id), + }; + rows.push(usr); + } else { + // If the disk is not attached to anything, just print empty + // fields. + let usr = UpstairsRow { + host_serial: "-".to_string(), + disk_name: disk.name().to_string(), + instance_name: "-".to_string(), + propolis_zone: "-".to_string(), + }; + rows.push(usr); + } + + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + + println!("{}", table); + + // Get the dataset backing this volume. + let regions = datastore.get_allocated_regions(disk.volume_id).await?; + + let mut rows = Vec::with_capacity(3); + for (dataset, region) in regions { + let my_pool_id = dataset.pool_id; + let (_, my_zpool) = LookupPath::new(opctx, datastore) + .zpool_id(my_pool_id) + .fetch() + .await + .context("failed to look up zpool")?; + + let my_sled_id = my_zpool.sled_id; + + let (_, my_sled) = LookupPath::new(opctx, datastore) + .sled_id(my_sled_id) + .fetch() + .await + .context("failed to look up sled")?; + + rows.push(DownstairsRow { + host_serial: my_sled.serial_number().to_string(), + region: region.id().to_string(), + zone: format!("oxz_crucible_{}", dataset.id()), + physical_disk: my_zpool.physical_disk_id.to_string(), + }); + } + + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + + println!("{}", table); + + Ok(()) +} + // SERVICES #[derive(Tabled)] diff --git a/dev-tools/omdb/src/bin/omdb/main.rs b/dev-tools/omdb/src/bin/omdb/main.rs new file mode 100644 index 0000000000..166ed3043f --- /dev/null +++ b/dev-tools/omdb/src/bin/omdb/main.rs @@ -0,0 +1,166 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! CLI for debugging Omicron internal state +//! +//! GROUND RULES: +//! +//! 1. There aren't a lot of ground rules here. At least for now, this is a +//! place to put any kind of runtime tooling for Omicron that seems useful. +//! You can query the database directly (see notes in db.rs), use internal +//! APIs, etc. To the degree that we can stick to stable interfaces, great. +//! But at this stage we'd rather have tools that work on latest than not +//! have them because we couldn't prioritize keeping them stable. +//! +//! 2. Debuggers should never lie! Documentation and command names should be +//! precise about what they're reporting. In a working system, these things +//! might all be the same: +//! +//! - the list of instances with zones and propolis processes running on +//! a sled +//! - the list of instances that sled agent knows about +//! - the list of instances that Nexus or the database reports should be +//! running on a sled +//! +//! But in a broken system, these things might be all different. People use +//! debuggers to understand broken systems. The debugger should say which of +//! these it's reporting, rather than "the list of instances on a sled". +//! +//! 3. Where possible, when the tool encounters something unexpected, it should +//! print what it can (including the error message and bad data) and then +//! continue. It generally shouldn't stop on the first error. (We often +//! find strange things when debugging but we need our tools to tell us as +//! much as they can!) + +use anyhow::Context; +use clap::Parser; +use clap::Subcommand; +use omicron_common::address::Ipv6Subnet; +use std::net::SocketAddr; +use std::net::SocketAddrV6; + +mod db; +mod nexus; +mod sled_agent; + +#[tokio::main] +async fn main() -> Result<(), anyhow::Error> { + let args = Omdb::parse(); + + let log = dropshot::ConfigLogging::StderrTerminal { + level: args.log_level.clone(), + } + .to_logger("omdb") + .context("failed to create logger")?; + + match &args.command { + OmdbCommands::Db(db) => db.run_cmd(&args, &log).await, + OmdbCommands::Nexus(nexus) => nexus.run_cmd(&args, &log).await, + OmdbCommands::SledAgent(sled) => sled.run_cmd(&args, &log).await, + } +} + +/// Omicron debugger (unstable) +/// +/// This tool provides commands for directly querying Omicron components about +/// their internal state using internal APIs. This is a prototype. The +/// commands and output are unstable and may change. +#[derive(Debug, Parser)] +struct Omdb { + /// log level filter + #[arg( + env, + long, + value_parser = parse_dropshot_log_level, + default_value = "warn", + )] + log_level: dropshot::ConfigLoggingLevel, + + #[arg(env = "OMDB_DNS_SERVER", long)] + dns_server: Option, + + #[command(subcommand)] + command: OmdbCommands, +} + +impl Omdb { + async fn dns_lookup_all( + &self, + log: slog::Logger, + service_name: internal_dns::ServiceName, + ) -> Result, anyhow::Error> { + let resolver = self.dns_resolver(log).await?; + resolver + .lookup_all_socket_v6(service_name) + .await + .with_context(|| format!("looking up {:?} in DNS", service_name)) + } + + async fn dns_resolver( + &self, + log: slog::Logger, + ) -> Result { + match &self.dns_server { + Some(dns_server) => { + internal_dns::resolver::Resolver::new_from_addrs( + log, + &[*dns_server], + ) + .with_context(|| { + format!( + "creating DNS resolver for DNS server {:?}", + dns_server + ) + }) + } + None => { + // In principle, we should look at /etc/resolv.conf to find the + // DNS servers. In practice, this usually isn't populated + // today. See oxidecomputer/omicron#2122. + // + // However, the address selected below should work for most + // existing Omicron deployments today. That's because while the + // base subnet is in principle configurable in config-rss.toml, + // it's very uncommon to change it from the default value used + // here. + // + // Yet another option would be to find a local IP address that + // looks like it's probably on the underlay network and use that + // to find the subnet to use. But again, this is unlikely to be + // wrong and it's easy to override. + let subnet = + Ipv6Subnet::new("fd00:1122:3344:0100::".parse().unwrap()); + eprintln!("note: using DNS server for subnet {}", subnet.net()); + eprintln!( + "note: (if this is not right, use --dns-server \ + to specify an alternate DNS server)", + ); + internal_dns::resolver::Resolver::new_from_subnet(log, subnet) + .with_context(|| { + format!( + "creating DNS resolver for subnet {}", + subnet.net() + ) + }) + } + } + } +} + +#[derive(Debug, Subcommand)] +#[allow(clippy::large_enum_variant)] +enum OmdbCommands { + /// Query the control plane database (CockroachDB) + Db(db::DbArgs), + /// Debug a specific Nexus instance + Nexus(nexus::NexusArgs), + /// Debug a specific Sled + SledAgent(sled_agent::SledAgentArgs), +} + +fn parse_dropshot_log_level( + s: &str, +) -> Result { + serde_json::from_str(&format!("{:?}", s)).context("parsing log level") +} diff --git a/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs similarity index 90% rename from omdb/src/bin/omdb/nexus.rs rename to dev-tools/omdb/src/bin/omdb/nexus.rs index 0d59b28142..7599fc209d 100644 --- a/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -4,7 +4,7 @@ //! omdb commands that query or update specific Nexus instances -use anyhow::bail; +use crate::Omdb; use anyhow::Context; use chrono::SecondsFormat; use chrono::Utc; @@ -34,17 +34,17 @@ pub struct NexusArgs { #[derive(Debug, Subcommand)] enum NexusCommands { /// print information about background tasks - BackgroundTask(BackgroundTaskArgs), + BackgroundTasks(BackgroundTasksArgs), } #[derive(Debug, Args)] -struct BackgroundTaskArgs { +struct BackgroundTasksArgs { #[command(subcommand)] - command: BackgroundTaskCommands, + command: BackgroundTasksCommands, } #[derive(Debug, Subcommand)] -enum BackgroundTaskCommands { +enum BackgroundTasksCommands { /// Show documentation about background tasks Doc, /// Print a summary of the status of all background tasks @@ -55,41 +55,55 @@ enum BackgroundTaskCommands { impl NexusArgs { /// Run a `omdb nexus` subcommand. - pub async fn run_cmd( + pub(crate) async fn run_cmd( &self, + omdb: &Omdb, log: &slog::Logger, ) -> Result<(), anyhow::Error> { - // This is a little goofy. The database URL is required, but can come - // from the environment, in which case it won't be on the command line. - let Some(nexus_url) = &self.nexus_internal_url else { - bail!( - "nexus URL must be specified with --nexus-internal-url or \ - OMDB_NEXUS_URL" - ); + let nexus_url = match &self.nexus_internal_url { + Some(cli_or_env_url) => cli_or_env_url.clone(), + None => { + eprintln!( + "note: Nexus URL not specified. Will pick one from DNS." + ); + let addrs = omdb + .dns_lookup_all( + log.clone(), + internal_dns::ServiceName::Nexus, + ) + .await?; + let addr = addrs.into_iter().next().expect( + "expected at least one Nexus address from \ + successful DNS lookup", + ); + format!("http://{}", addr) + } }; - let client = nexus_client::Client::new(nexus_url, log.clone()); + eprintln!("note: using Nexus URL {}", &nexus_url); + let client = nexus_client::Client::new(&nexus_url, log.clone()); match &self.command { - NexusCommands::BackgroundTask(BackgroundTaskArgs { - command: BackgroundTaskCommands::Doc, - }) => cmd_nexus_background_task_doc(&client).await, - NexusCommands::BackgroundTask(BackgroundTaskArgs { - command: BackgroundTaskCommands::List, - }) => cmd_nexus_background_task_list(&client).await, - NexusCommands::BackgroundTask(BackgroundTaskArgs { - command: BackgroundTaskCommands::Show, - }) => cmd_nexus_background_task_show(&client).await, + NexusCommands::BackgroundTasks(BackgroundTasksArgs { + command: BackgroundTasksCommands::Doc, + }) => cmd_nexus_background_tasks_doc(&client).await, + NexusCommands::BackgroundTasks(BackgroundTasksArgs { + command: BackgroundTasksCommands::List, + }) => cmd_nexus_background_tasks_list(&client).await, + NexusCommands::BackgroundTasks(BackgroundTasksArgs { + command: BackgroundTasksCommands::Show, + }) => cmd_nexus_background_tasks_show(&client).await, } } } -/// Runs `omdb nexus background-task doc` -async fn cmd_nexus_background_task_doc( +/// Runs `omdb nexus background-tasks doc` +async fn cmd_nexus_background_tasks_doc( client: &nexus_client::Client, ) -> Result<(), anyhow::Error> { let response = client.bgtask_list().await.context("listing background tasks")?; let tasks = response.into_inner(); + let tasks: BTreeMap<_, _> = tasks.into_iter().collect(); for (_, bgtask) in &tasks { println!("task: {:?}", bgtask.name); println!( @@ -108,8 +122,8 @@ async fn cmd_nexus_background_task_doc( Ok(()) } -/// Runs `omdb nexus background-task list` -async fn cmd_nexus_background_task_list( +/// Runs `omdb nexus background-tasks list` +async fn cmd_nexus_background_tasks_list( client: &nexus_client::Client, ) -> Result<(), anyhow::Error> { let response = @@ -124,8 +138,8 @@ async fn cmd_nexus_background_task_list( Ok(()) } -/// Runs `omdb nexus background-task show` -async fn cmd_nexus_background_task_show( +/// Runs `omdb nexus background-tasks show` +async fn cmd_nexus_background_tasks_show( client: &nexus_client::Client, ) -> Result<(), anyhow::Error> { let response = diff --git a/dev-tools/omdb/src/bin/omdb/sled_agent.rs b/dev-tools/omdb/src/bin/omdb/sled_agent.rs new file mode 100644 index 0000000000..c413a2ba43 --- /dev/null +++ b/dev-tools/omdb/src/bin/omdb/sled_agent.rs @@ -0,0 +1,112 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! omdb commands that query or update specific Sleds + +use crate::Omdb; +use anyhow::bail; +use anyhow::Context; +use clap::Args; +use clap::Subcommand; + +/// Arguments to the "omdb sled-agent" subcommand +#[derive(Debug, Args)] +pub struct SledAgentArgs { + /// URL of the Sled internal API + #[clap(long, env("OMDB_SLED_AGENT_URL"))] + sled_agent_url: Option, + + #[command(subcommand)] + command: SledAgentCommands, +} + +/// Subcommands for the "omdb sled-agent" subcommand +#[derive(Debug, Subcommand)] +enum SledAgentCommands { + /// print information about zones + #[clap(subcommand)] + Zones(ZoneCommands), + + /// print information about zpools + #[clap(subcommand)] + Zpools(ZpoolCommands), +} + +#[derive(Debug, Subcommand)] +enum ZoneCommands { + /// Print list of all running control plane zones + List, +} + +#[derive(Debug, Subcommand)] +enum ZpoolCommands { + /// Print list of all zpools managed by the sled agent + List, +} + +impl SledAgentArgs { + /// Run a `omdb sled-agent` subcommand. + pub(crate) async fn run_cmd( + &self, + _omdb: &Omdb, + log: &slog::Logger, + ) -> Result<(), anyhow::Error> { + // This is a little goofy. The sled URL is required, but can come + // from the environment, in which case it won't be on the command line. + let Some(sled_agent_url) = &self.sled_agent_url else { + bail!( + "sled URL must be specified with --sled-agent-url or \ + OMDB_SLED_AGENT_URL" + ); + }; + let client = + sled_agent_client::Client::new(sled_agent_url, log.clone()); + + match &self.command { + SledAgentCommands::Zones(ZoneCommands::List) => { + cmd_zones_list(&client).await + } + SledAgentCommands::Zpools(ZpoolCommands::List) => { + cmd_zpools_list(&client).await + } + } + } +} + +/// Runs `omdb sled-agent zones list` +async fn cmd_zones_list( + client: &sled_agent_client::Client, +) -> Result<(), anyhow::Error> { + let response = client.zones_list().await.context("listing zones")?; + let zones = response.into_inner(); + let zones: Vec<_> = zones.into_iter().collect(); + + println!("zones:"); + if zones.is_empty() { + println!(" "); + } + for zone in &zones { + println!(" {:?}", zone); + } + + Ok(()) +} + +/// Runs `omdb sled-agent zpools list` +async fn cmd_zpools_list( + client: &sled_agent_client::Client, +) -> Result<(), anyhow::Error> { + let response = client.zpools_get().await.context("listing zpools")?; + let zpools = response.into_inner(); + + println!("zpools:"); + if zpools.is_empty() { + println!(" "); + } + for zpool in &zpools { + println!(" {:?}", zpool); + } + + Ok(()) +} diff --git a/dev-tools/omdb/tests/config.test.toml b/dev-tools/omdb/tests/config.test.toml new file mode 120000 index 0000000000..6050ca47dd --- /dev/null +++ b/dev-tools/omdb/tests/config.test.toml @@ -0,0 +1 @@ +../../../nexus/tests/config.test.toml \ No newline at end of file diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out new file mode 100644 index 0000000000..e7a50da935 --- /dev/null +++ b/dev-tools/omdb/tests/env.out @@ -0,0 +1,189 @@ +EXECUTING COMMAND: omdb ["db", "--db-url", "postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable", "sleds"] +termination: Exited(0) +--------------------------------------------- +stdout: +SERIAL IP ROLE ID +sim-b6d65341 [::1]:REDACTED_PORT - REDACTED_UUID_REDACTED_UUID_REDACTED +--------------------------------------------- +stderr: +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable +note: database schema version matches expected (4.0.0) +============================================= +EXECUTING COMMAND: omdb ["db", "--db-url", "junk", "sleds"] +termination: Exited(2) +--------------------------------------------- +stdout: +--------------------------------------------- +stderr: +error: invalid value 'junk' for '--db-url ': invalid connection string: unexpected EOF + +For more information, try '--help'. +============================================= +EXECUTING COMMAND: omdb ["nexus", "--nexus-internal-url", "http://127.0.0.1:REDACTED_PORT", "background-tasks", "doc"] +termination: Exited(0) +--------------------------------------------- +stdout: +task: "dns_config_external" + watches external DNS data stored in CockroachDB + + +task: "dns_config_internal" + watches internal DNS data stored in CockroachDB + + +task: "dns_propagation_external" + propagates latest external DNS configuration (from "dns_config_external" + background task) to the latest list of DNS servers (from + "dns_servers_external" background task) + + +task: "dns_propagation_internal" + propagates latest internal DNS configuration (from "dns_config_internal" + background task) to the latest list of DNS servers (from + "dns_servers_internal" background task) + + +task: "dns_servers_external" + watches list of external DNS servers stored in CockroachDB + + +task: "dns_servers_internal" + watches list of internal DNS servers stored in CockroachDB + + +task: "external_endpoints" + reads config for silos and TLS certificates to determine the right set of + HTTP endpoints, their HTTP server names, and which TLS certificates to use + on each one + + +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT +============================================= +EXECUTING COMMAND: omdb ["nexus", "--nexus-internal-url", "junk", "background-tasks", "doc"] +termination: Exited(1) +--------------------------------------------- +stdout: +--------------------------------------------- +stderr: +note: using Nexus URL junk +Error: listing background tasks + +Caused by: + 0: Communication Error: builder error: relative URL without a base + 1: builder error: relative URL without a base + 2: relative URL without a base +============================================= +EXECUTING COMMAND: omdb ["nexus", "background-tasks", "doc"] +termination: Exited(0) +--------------------------------------------- +stdout: +task: "dns_config_external" + watches external DNS data stored in CockroachDB + + +task: "dns_config_internal" + watches internal DNS data stored in CockroachDB + + +task: "dns_propagation_external" + propagates latest external DNS configuration (from "dns_config_external" + background task) to the latest list of DNS servers (from + "dns_servers_external" background task) + + +task: "dns_propagation_internal" + propagates latest internal DNS configuration (from "dns_config_internal" + background task) to the latest list of DNS servers (from + "dns_servers_internal" background task) + + +task: "dns_servers_external" + watches list of external DNS servers stored in CockroachDB + + +task: "dns_servers_internal" + watches list of internal DNS servers stored in CockroachDB + + +task: "external_endpoints" + reads config for silos and TLS certificates to determine the right set of + HTTP endpoints, their HTTP server names, and which TLS certificates to use + on each one + + +--------------------------------------------- +stderr: +note: Nexus URL not specified. Will pick one from DNS. +note: using Nexus URL http://[::ffff:127.0.0.1]:REDACTED_PORT +============================================= +EXECUTING COMMAND: omdb ["--dns-server", "[::1]:REDACTED_PORT", "nexus", "background-tasks", "doc"] +termination: Exited(0) +--------------------------------------------- +stdout: +task: "dns_config_external" + watches external DNS data stored in CockroachDB + + +task: "dns_config_internal" + watches internal DNS data stored in CockroachDB + + +task: "dns_propagation_external" + propagates latest external DNS configuration (from "dns_config_external" + background task) to the latest list of DNS servers (from + "dns_servers_external" background task) + + +task: "dns_propagation_internal" + propagates latest internal DNS configuration (from "dns_config_internal" + background task) to the latest list of DNS servers (from + "dns_servers_internal" background task) + + +task: "dns_servers_external" + watches list of external DNS servers stored in CockroachDB + + +task: "dns_servers_internal" + watches list of internal DNS servers stored in CockroachDB + + +task: "external_endpoints" + reads config for silos and TLS certificates to determine the right set of + HTTP endpoints, their HTTP server names, and which TLS certificates to use + on each one + + +--------------------------------------------- +stderr: +note: Nexus URL not specified. Will pick one from DNS. +note: using Nexus URL http://[::ffff:127.0.0.1]:REDACTED_PORT +============================================= +EXECUTING COMMAND: omdb ["db", "sleds"] +termination: Exited(0) +--------------------------------------------- +stdout: +SERIAL IP ROLE ID +sim-b6d65341 [::1]:REDACTED_PORT - REDACTED_UUID_REDACTED_UUID_REDACTED +--------------------------------------------- +stderr: +note: database URL not specified. Will search DNS. +note: (override with --db-url or OMDB_DB_URL) +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable +note: database schema version matches expected (4.0.0) +============================================= +EXECUTING COMMAND: omdb ["--dns-server", "[::1]:REDACTED_PORT", "db", "sleds"] +termination: Exited(0) +--------------------------------------------- +stdout: +SERIAL IP ROLE ID +sim-b6d65341 [::1]:REDACTED_PORT - REDACTED_UUID_REDACTED_UUID_REDACTED +--------------------------------------------- +stderr: +note: database URL not specified. Will search DNS. +note: (override with --db-url or OMDB_DB_URL) +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable +note: database schema version matches expected (4.0.0) +============================================= diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out new file mode 100644 index 0000000000..7532e9b61e --- /dev/null +++ b/dev-tools/omdb/tests/successes.out @@ -0,0 +1,209 @@ +EXECUTING COMMAND: omdb ["db", "dns", "show"] +termination: Exited(0) +--------------------------------------------- +stdout: +GROUP ZONE ver UPDATED REASON +internal control-plane.oxide.internal 1 rack setup +external oxide-dev.test 2 create silo: "test-suite-silo" +--------------------------------------------- +stderr: +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable +note: database schema version matches expected (4.0.0) +============================================= +EXECUTING COMMAND: omdb ["db", "dns", "diff", "external", "2"] +termination: Exited(0) +--------------------------------------------- +stdout: +DNS zone: oxide-dev.test (External) +requested version: 2 (created at ) +version created by Nexus: REDACTED_UUID_REDACTED_UUID_REDACTED +version created because: create silo: "test-suite-silo" +changes: names added: 1, names removed: 0 + ++ test-suite-silo.sys A 127.0.0.1 +--------------------------------------------- +stderr: +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable +note: database schema version matches expected (4.0.0) +============================================= +EXECUTING COMMAND: omdb ["db", "dns", "names", "external", "2"] +termination: Exited(0) +--------------------------------------------- +stdout: +External zone: oxide-dev.test + NAME RECORDS + test-suite-silo.sys A 127.0.0.1 +--------------------------------------------- +stderr: +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable +note: database schema version matches expected (4.0.0) +============================================= +EXECUTING COMMAND: omdb ["db", "services", "list-instances"] +termination: Exited(0) +--------------------------------------------- +stdout: +SERVICE INSTANCE_ID ADDR SLED_SERIAL +CruciblePantry REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT sim-b6d65341 +Dendrite REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT sim-b6d65341 +Dendrite REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT sim-b6d65341 +ExternalDns REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT sim-b6d65341 +InternalDns REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT sim-b6d65341 +Nexus REDACTED_UUID_REDACTED_UUID_REDACTED [::ffff:127.0.0.1]:REDACTED_PORT sim-b6d65341 +--------------------------------------------- +stderr: +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable +note: database schema version matches expected (4.0.0) +============================================= +EXECUTING COMMAND: omdb ["db", "services", "list-by-sled"] +termination: Exited(0) +--------------------------------------------- +stdout: +sled: sim-b6d65341 (id REDACTED_UUID_REDACTED_UUID_REDACTED) + + SERVICE INSTANCE_ID ADDR + CruciblePantry REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT + Dendrite REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT + Dendrite REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT + ExternalDns REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT + InternalDns REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT + Nexus REDACTED_UUID_REDACTED_UUID_REDACTED [::ffff:127.0.0.1]:REDACTED_PORT + +--------------------------------------------- +stderr: +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable +note: database schema version matches expected (4.0.0) +============================================= +EXECUTING COMMAND: omdb ["db", "sleds"] +termination: Exited(0) +--------------------------------------------- +stdout: +SERIAL IP ROLE ID +sim-b6d65341 [::1]:REDACTED_PORT - REDACTED_UUID_REDACTED_UUID_REDACTED +--------------------------------------------- +stderr: +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable +note: database schema version matches expected (4.0.0) +============================================= +EXECUTING COMMAND: omdb ["nexus", "background-tasks", "doc"] +termination: Exited(0) +--------------------------------------------- +stdout: +task: "dns_config_external" + watches external DNS data stored in CockroachDB + + +task: "dns_config_internal" + watches internal DNS data stored in CockroachDB + + +task: "dns_propagation_external" + propagates latest external DNS configuration (from "dns_config_external" + background task) to the latest list of DNS servers (from + "dns_servers_external" background task) + + +task: "dns_propagation_internal" + propagates latest internal DNS configuration (from "dns_config_internal" + background task) to the latest list of DNS servers (from + "dns_servers_internal" background task) + + +task: "dns_servers_external" + watches list of external DNS servers stored in CockroachDB + + +task: "dns_servers_internal" + watches list of internal DNS servers stored in CockroachDB + + +task: "external_endpoints" + reads config for silos and TLS certificates to determine the right set of + HTTP endpoints, their HTTP server names, and which TLS certificates to use + on each one + + +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +============================================= +EXECUTING COMMAND: omdb ["nexus", "background-tasks", "show"] +termination: Exited(0) +--------------------------------------------- +stdout: +task: "dns_config_internal" + configured period: every 1m + currently executing: no + last completed activation: iter 3, triggered by an explicit signal + started at (s ago) and ran for ms + last generation found: 1 + +task: "dns_servers_internal" + configured period: every 1m + currently executing: no + last completed activation: iter 3, triggered by an explicit signal + started at (s ago) and ran for ms + servers found: 1 + + DNS_SERVER_ADDR + [::1]:REDACTED_PORT + +task: "dns_propagation_internal" + configured period: every 1m + currently executing: no + last completed activation: iter 5, triggered by a dependent task completing + started at (s ago) and ran for ms + attempt to propagate generation: 1 + + DNS_SERVER_ADDR LAST_RESULT + [::1]:REDACTED_PORT success + + +task: "dns_config_external" + configured period: every 1m + currently executing: no + last completed activation: iter 3, triggered by an explicit signal + started at (s ago) and ran for ms + last generation found: 2 + +task: "dns_servers_external" + configured period: every 1m + currently executing: no + last completed activation: iter 3, triggered by an explicit signal + started at (s ago) and ran for ms + servers found: 1 + + DNS_SERVER_ADDR + [::1]:REDACTED_PORT + +task: "dns_propagation_external" + configured period: every 1m + currently executing: no + last completed activation: iter 5, triggered by a dependent task completing + started at (s ago) and ran for ms + attempt to propagate generation: 2 + + DNS_SERVER_ADDR LAST_RESULT + [::1]:REDACTED_PORT success + + +task: "external_endpoints" + configured period: every 1m + currently executing: no + last completed activation: iter 3, triggered by an explicit signal + started at (s ago) and ran for ms + external API endpoints: 2 ('*' below marks default) + + SILO_ID DNS_NAME + REDACTED_UUID_REDACTED_UUID_REDACTED default-silo.sys.oxide-dev.test + * REDACTED_UUID_REDACTED_UUID_REDACTED test-suite-silo.sys.oxide-dev.test + + warnings: 2 + warning: silo REDACTED_UUID_REDACTED_UUID_REDACTED with DNS name "default-silo.sys.oxide-dev.test" has no usable certificates + warning: silo REDACTED_UUID_REDACTED_UUID_REDACTED with DNS name "test-suite-silo.sys.oxide-dev.test" has no usable certificates + + TLS certificates: 0 + +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +============================================= diff --git a/dev-tools/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs new file mode 100644 index 0000000000..0eddcb492c --- /dev/null +++ b/dev-tools/omdb/tests/test_all_output.rs @@ -0,0 +1,293 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Smoke tests for the `omdb` tool +//! +//! Feel free to change the tool's output. This test just makes it easy to make +//! sure you're only breaking what you intend. + +use expectorate::assert_contents; +use nexus_test_utils_macros::nexus_test; +use omicron_test_utils::dev::test_cmds::path_to_executable; +use omicron_test_utils::dev::test_cmds::run_command; +use std::fmt::Write; +use std::path::Path; +use subprocess::Exec; + +/// name of the "omdb" executable +const CMD_OMDB: &str = env!("CARGO_BIN_EXE_omdb"); + +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + +#[tokio::test] +async fn test_omdb_usage_errors() { + let cmd_path = path_to_executable(CMD_OMDB); + let mut output = String::new(); + let invocations: &[&[&'static str]] = &[ + // No arguments + &[], + // Help output + &["--help"], + // Bogus command + &["not-a-command"], + // Bogus argument + &["--not-a-command"], + // Command help output + &["db"], + &["db", "--help"], + &["db", "dns"], + &["db", "dns", "diff"], + &["db", "dns", "names"], + &["db", "services"], + &["nexus"], + &["nexus", "background-tasks"], + &["sled-agent"], + &["sled-agent", "zones"], + &["sled-agent", "zpools"], + ]; + + for args in invocations { + do_run(&mut output, |exec| exec, &cmd_path, args).await; + } + + assert_contents("tests/usage_errors.out", &output); +} + +#[nexus_test] +async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { + let cmd_path = path_to_executable(CMD_OMDB); + let postgres_url = cptestctx.database.listen_url(); + let nexus_internal_url = + format!("http://{}/", cptestctx.internal_client.bind_address); + let mut output = String::new(); + let invocations: &[&[&'static str]] = &[ + &["db", "dns", "show"], + &["db", "dns", "diff", "external", "2"], + &["db", "dns", "names", "external", "2"], + &["db", "services", "list-instances"], + &["db", "services", "list-by-sled"], + &["db", "sleds"], + &["nexus", "background-tasks", "doc"], + &["nexus", "background-tasks", "show"], + // We can't easily test the sled agent output because that's only + // provided by a real sled agent, which is not available in the + // ControlPlaneTestContext. + ]; + + for args in invocations { + println!("running commands with args: {:?}", args); + let p = postgres_url.to_string(); + let u = nexus_internal_url.clone(); + do_run( + &mut output, + move |exec| exec.env("OMDB_DB_URL", &p).env("OMDB_NEXUS_URL", &u), + &cmd_path, + args, + ) + .await; + } + + assert_contents("tests/successes.out", &output); +} + +/// Verify that we properly deal with cases where: +/// +/// - a URL is specified on the command line +/// - a URL is specified in both places +/// +/// for both of the URLs that we accept. We don't need to check the cases where +/// (1) no URL is specified in either place because that's covered by the usage +/// test above, nor (2) the URL is specified only in the environment because +/// that's covered by the success tests above. +#[nexus_test] +async fn test_omdb_env_settings(cptestctx: &ControlPlaneTestContext) { + let cmd_path = path_to_executable(CMD_OMDB); + let postgres_url = cptestctx.database.listen_url().to_string(); + let nexus_internal_url = + format!("http://{}", cptestctx.internal_client.bind_address); + let dns_sockaddr = cptestctx.internal_dns.dns_server.local_address(); + let mut output = String::new(); + + // Database URL + // Case 1: specified on the command line + let args = &["db", "--db-url", &postgres_url, "sleds"]; + do_run(&mut output, |exec| exec, &cmd_path, args).await; + + // Case 2: specified in multiple places (command-line argument wins) + let args = &["db", "--db-url", "junk", "sleds"]; + let p = postgres_url.clone(); + do_run( + &mut output, + move |exec| exec.env("OMDB_DB_URL", &p), + &cmd_path, + args, + ) + .await; + + // Nexus URL + // Case 1: specified on the command line + let args = &[ + "nexus", + "--nexus-internal-url", + &nexus_internal_url.clone(), + "background-tasks", + "doc", + ]; + do_run(&mut output, |exec| exec, &cmd_path.clone(), args).await; + + // Case 2: specified in multiple places (command-line argument wins) + let args = + &["nexus", "--nexus-internal-url", "junk", "background-tasks", "doc"]; + let n = nexus_internal_url.clone(); + do_run( + &mut output, + move |exec| exec.env("OMDB_NEXUS_URL", &n), + &cmd_path, + args, + ) + .await; + + // Verify that if you provide a working internal DNS server, you can omit + // the URLs. That's true regardless of whether you pass it on the command + // line or via an environment variable. + let args = &["nexus", "background-tasks", "doc"]; + do_run( + &mut output, + move |exec| exec.env("OMDB_DNS_SERVER", dns_sockaddr.to_string()), + &cmd_path, + args, + ) + .await; + + let args = &[ + "--dns-server", + &dns_sockaddr.to_string(), + "nexus", + "background-tasks", + "doc", + ]; + do_run(&mut output, move |exec| exec, &cmd_path, args).await; + + let args = &["db", "sleds"]; + do_run( + &mut output, + move |exec| exec.env("OMDB_DNS_SERVER", dns_sockaddr.to_string()), + &cmd_path, + args, + ) + .await; + + let args = &["--dns-server", &dns_sockaddr.to_string(), "db", "sleds"]; + do_run(&mut output, move |exec| exec, &cmd_path, args).await; + + assert_contents("tests/env.out", &output); +} + +async fn do_run( + output: &mut String, + modexec: F, + cmd_path: &Path, + args: &[&str], +) where + F: FnOnce(Exec) -> Exec + Send + 'static, +{ + println!("running command with args: {:?}", args); + write!( + output, + "EXECUTING COMMAND: {} {:?}\n", + cmd_path.file_name().expect("missing command").to_string_lossy(), + args.iter().map(|r| redact_variable(r)).collect::>(), + ) + .unwrap(); + + // Using `subprocess`, the child process will be spawned synchronously. In + // some cases it then tries to make an HTTP request back into this process. + // But if the executor is blocked on the child process, the HTTP server + // acceptor won't run and we'll deadlock. So we use spawn_blocking() to run + // the child process from a different thread. tokio requires that these be + // 'static (it does not know that we're going to wait synchronously for the + // task) so we need to create owned versions of these arguments. + let cmd_path = cmd_path.to_owned(); + let owned_args: Vec<_> = args.into_iter().map(|s| s.to_string()).collect(); + let (exit_status, stdout_text, stderr_text) = + tokio::task::spawn_blocking(move || { + let exec = modexec( + Exec::cmd(cmd_path) + // Set RUST_BACKTRACE for consistency between CI and + // developers' local runs. We set it to 0 only to match + // what someone would see who wasn't debugging it, but we + // could as well use 1 or "full" to store that instead. + .env("RUST_BACKTRACE", "0") + .args(&owned_args), + ); + run_command(exec) + }) + .await + .unwrap(); + + write!(output, "termination: {:?}\n", exit_status).unwrap(); + write!(output, "---------------------------------------------\n").unwrap(); + write!(output, "stdout:\n").unwrap(); + output.push_str(&redact_variable(&stdout_text)); + write!(output, "---------------------------------------------\n").unwrap(); + write!(output, "stderr:\n").unwrap(); + output.push_str(&redact_variable(&stderr_text)); + write!(output, "=============================================\n").unwrap(); +} + +/// Redacts text from stdout/stderr that may change from invocation to invocation +/// (e.g., assigned TCP port numbers, timestamps) +/// +/// This allows use to use expectorate to verify the shape of the CLI output. +fn redact_variable(input: &str) -> String { + // Replace TCP port numbers. We include the localhost characters to avoid + // catching any random sequence of numbers. + let s = regex::Regex::new(r"\[::1\]:\d{4,5}") + .unwrap() + .replace_all(input, "[::1]:REDACTED_PORT") + .to_string(); + let s = regex::Regex::new(r"\[::ffff:127.0.0.1\]:\d{4,5}") + .unwrap() + .replace_all(&s, "[::ffff:127.0.0.1]:REDACTED_PORT") + .to_string(); + let s = regex::Regex::new(r"127\.0\.0\.1:\d{4,5}") + .unwrap() + .replace_all(&s, "127.0.0.1:REDACTED_PORT") + .to_string(); + + // Replace uuids. + let s = regex::Regex::new( + "[a-zA-Z0-9]{8}-[a-zA-Z0-9]{4}-[a-zA-Z0-9]{4}-\ + [a-zA-Z0-9]{4}-[a-zA-Z0-9]{12}", + ) + .unwrap() + .replace_all(&s, "REDACTED_UUID_REDACTED_UUID_REDACTED") + .to_string(); + + // Replace timestamps. + let s = regex::Regex::new(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z") + .unwrap() + .replace_all(&s, "") + .to_string(); + + let s = regex::Regex::new(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z") + .unwrap() + .replace_all(&s, "") + .to_string(); + + // Replace formatted durations. These are pretty specific to the background + // task output. + let s = regex::Regex::new(r"\d+s ago") + .unwrap() + .replace_all(&s, "s ago") + .to_string(); + + let s = regex::Regex::new(r"\d+ms") + .unwrap() + .replace_all(&s, "ms") + .to_string(); + + s +} diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out new file mode 100644 index 0000000000..8afcf13fb1 --- /dev/null +++ b/dev-tools/omdb/tests/usage_errors.out @@ -0,0 +1,277 @@ +EXECUTING COMMAND: omdb [] +termination: Exited(2) +--------------------------------------------- +stdout: +--------------------------------------------- +stderr: +Omicron debugger (unstable) + +Usage: omdb [OPTIONS] + +Commands: + db Query the control plane database (CockroachDB) + nexus Debug a specific Nexus instance + sled-agent Debug a specific Sled + help Print this message or the help of the given subcommand(s) + +Options: + --log-level log level filter [env: LOG_LEVEL=] [default: warn] + --dns-server [env: OMDB_DNS_SERVER=] + -h, --help Print help (see more with '--help') +============================================= +EXECUTING COMMAND: omdb ["--help"] +termination: Exited(0) +--------------------------------------------- +stdout: +Omicron debugger (unstable) + +This tool provides commands for directly querying Omicron components about their internal state +using internal APIs. This is a prototype. The commands and output are unstable and may change. + +Usage: omdb [OPTIONS] + +Commands: + db Query the control plane database (CockroachDB) + nexus Debug a specific Nexus instance + sled-agent Debug a specific Sled + help Print this message or the help of the given subcommand(s) + +Options: + --log-level + log level filter + + [env: LOG_LEVEL=] + [default: warn] + + --dns-server + [env: OMDB_DNS_SERVER=] + + -h, --help + Print help (see a summary with '-h') +--------------------------------------------- +stderr: +============================================= +EXECUTING COMMAND: omdb ["not-a-command"] +termination: Exited(2) +--------------------------------------------- +stdout: +--------------------------------------------- +stderr: +error: unrecognized subcommand 'not-a-command' + +Usage: omdb [OPTIONS] + +For more information, try '--help'. +============================================= +EXECUTING COMMAND: omdb ["--not-a-command"] +termination: Exited(2) +--------------------------------------------- +stdout: +--------------------------------------------- +stderr: +error: unexpected argument '--not-a-command' found + +Usage: omdb [OPTIONS] + +For more information, try '--help'. +============================================= +EXECUTING COMMAND: omdb ["db"] +termination: Exited(2) +--------------------------------------------- +stdout: +--------------------------------------------- +stderr: +Query the control plane database (CockroachDB) + +Usage: omdb db [OPTIONS] + +Commands: + disks Print information about disks + dns Print information about internal and external DNS + services Print information about control plane services + sleds Print information about sleds + help Print this message or the help of the given subcommand(s) + +Options: + --db-url URL of the database SQL interface [env: OMDB_DB_URL=] + --fetch-limit limit to apply to queries that fetch rows [default: 500] + -h, --help Print help +============================================= +EXECUTING COMMAND: omdb ["db", "--help"] +termination: Exited(0) +--------------------------------------------- +stdout: +Query the control plane database (CockroachDB) + +Usage: omdb db [OPTIONS] + +Commands: + disks Print information about disks + dns Print information about internal and external DNS + services Print information about control plane services + sleds Print information about sleds + help Print this message or the help of the given subcommand(s) + +Options: + --db-url URL of the database SQL interface [env: OMDB_DB_URL=] + --fetch-limit limit to apply to queries that fetch rows [default: 500] + -h, --help Print help +--------------------------------------------- +stderr: +============================================= +EXECUTING COMMAND: omdb ["db", "dns"] +termination: Exited(2) +--------------------------------------------- +stdout: +--------------------------------------------- +stderr: +Print information about internal and external DNS + +Usage: omdb db dns + +Commands: + show Summarize current version of all DNS zones + diff Show what changed in a given DNS version + names Show the full contents of a given DNS zone and version + help Print this message or the help of the given subcommand(s) + +Options: + -h, --help Print help +============================================= +EXECUTING COMMAND: omdb ["db", "dns", "diff"] +termination: Exited(2) +--------------------------------------------- +stdout: +--------------------------------------------- +stderr: +error: the following required arguments were not provided: + + + +Usage: omdb db dns diff + +For more information, try '--help'. +============================================= +EXECUTING COMMAND: omdb ["db", "dns", "names"] +termination: Exited(2) +--------------------------------------------- +stdout: +--------------------------------------------- +stderr: +error: the following required arguments were not provided: + + + +Usage: omdb db dns names + +For more information, try '--help'. +============================================= +EXECUTING COMMAND: omdb ["db", "services"] +termination: Exited(2) +--------------------------------------------- +stdout: +--------------------------------------------- +stderr: +Print information about control plane services + +Usage: omdb db services + +Commands: + list-instances List service instances + list-by-sled List service instances, grouped by sled + help Print this message or the help of the given subcommand(s) + +Options: + -h, --help Print help +============================================= +EXECUTING COMMAND: omdb ["nexus"] +termination: Exited(2) +--------------------------------------------- +stdout: +--------------------------------------------- +stderr: +Debug a specific Nexus instance + +Usage: omdb nexus [OPTIONS] + +Commands: + background-tasks print information about background tasks + help Print this message or the help of the given subcommand(s) + +Options: + --nexus-internal-url URL of the Nexus internal API [env: + OMDB_NEXUS_URL=] + -h, --help Print help +============================================= +EXECUTING COMMAND: omdb ["nexus", "background-tasks"] +termination: Exited(2) +--------------------------------------------- +stdout: +--------------------------------------------- +stderr: +print information about background tasks + +Usage: omdb nexus background-tasks + +Commands: + doc Show documentation about background tasks + list Print a summary of the status of all background tasks + show Print human-readable summary of the status of each background task + help Print this message or the help of the given subcommand(s) + +Options: + -h, --help Print help +============================================= +EXECUTING COMMAND: omdb ["sled-agent"] +termination: Exited(2) +--------------------------------------------- +stdout: +--------------------------------------------- +stderr: +Debug a specific Sled + +Usage: omdb sled-agent [OPTIONS] + +Commands: + zones print information about zones + zpools print information about zpools + help Print this message or the help of the given subcommand(s) + +Options: + --sled-agent-url URL of the Sled internal API [env: OMDB_SLED_AGENT_URL=] + -h, --help Print help +============================================= +EXECUTING COMMAND: omdb ["sled-agent", "zones"] +termination: Exited(2) +--------------------------------------------- +stdout: +--------------------------------------------- +stderr: +print information about zones + +Usage: omdb sled-agent zones + +Commands: + list Print list of all running control plane zones + help Print this message or the help of the given subcommand(s) + +Options: + -h, --help Print help +============================================= +EXECUTING COMMAND: omdb ["sled-agent", "zpools"] +termination: Exited(2) +--------------------------------------------- +stdout: +--------------------------------------------- +stderr: +print information about zpools + +Usage: omdb sled-agent zpools + +Commands: + list Print list of all zpools managed by the sled agent + help Print this message or the help of the given subcommand(s) + +Options: + -h, --help Print help +============================================= diff --git a/dev-tools/Cargo.toml b/dev-tools/omicron-dev/Cargo.toml similarity index 97% rename from dev-tools/Cargo.toml rename to dev-tools/omicron-dev/Cargo.toml index 4a7c25b337..2061489cbb 100644 --- a/dev-tools/Cargo.toml +++ b/dev-tools/omicron-dev/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "omicron-dev-tools" +name = "omicron-dev" version = "0.1.0" edition = "2021" license = "MPL-2.0" diff --git a/omdb/build.rs b/dev-tools/omicron-dev/build.rs similarity index 100% rename from omdb/build.rs rename to dev-tools/omicron-dev/build.rs diff --git a/dev-tools/src/bin/omicron-dev.rs b/dev-tools/omicron-dev/src/bin/omicron-dev.rs similarity index 99% rename from dev-tools/src/bin/omicron-dev.rs rename to dev-tools/omicron-dev/src/bin/omicron-dev.rs index 70018e9d9b..14617d6ba4 100644 --- a/dev-tools/src/bin/omicron-dev.rs +++ b/dev-tools/omicron-dev/src/bin/omicron-dev.rs @@ -327,7 +327,7 @@ async fn cmd_run_all(args: &RunAllArgs) -> Result<(), anyhow::Error> { let mut signal_stream = signals.fuse(); // Read configuration. - let config_str = include_str!("../../../nexus/examples/config.toml"); + let config_str = include_str!("../../../../nexus/examples/config.toml"); let mut config: omicron_common::nexus_config::Config = toml::from_str(config_str).context("parsing example config")?; config.pkg.log = dropshot::ConfigLogging::File { diff --git a/dev-tools/tests/output/cmd-omicron-dev-bad-cmd-stderr b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-bad-cmd-stderr similarity index 100% rename from dev-tools/tests/output/cmd-omicron-dev-bad-cmd-stderr rename to dev-tools/omicron-dev/tests/output/cmd-omicron-dev-bad-cmd-stderr diff --git a/dev-tools/tests/output/cmd-omicron-dev-bad-cmd-stdout b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-bad-cmd-stdout similarity index 100% rename from dev-tools/tests/output/cmd-omicron-dev-bad-cmd-stdout rename to dev-tools/omicron-dev/tests/output/cmd-omicron-dev-bad-cmd-stdout diff --git a/dev-tools/tests/output/cmd-omicron-dev-db-populate-noargs-stderr b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-populate-noargs-stderr similarity index 100% rename from dev-tools/tests/output/cmd-omicron-dev-db-populate-noargs-stderr rename to dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-populate-noargs-stderr diff --git a/dev-tools/tests/output/cmd-omicron-dev-db-populate-noargs-stdout b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-populate-noargs-stdout similarity index 100% rename from dev-tools/tests/output/cmd-omicron-dev-db-populate-noargs-stdout rename to dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-populate-noargs-stdout diff --git a/dev-tools/tests/output/cmd-omicron-dev-db-wipe-noargs-stderr b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-wipe-noargs-stderr similarity index 100% rename from dev-tools/tests/output/cmd-omicron-dev-db-wipe-noargs-stderr rename to dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-wipe-noargs-stderr diff --git a/dev-tools/tests/output/cmd-omicron-dev-db-wipe-noargs-stdout b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-wipe-noargs-stdout similarity index 100% rename from dev-tools/tests/output/cmd-omicron-dev-db-wipe-noargs-stdout rename to dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-wipe-noargs-stdout diff --git a/dev-tools/tests/output/cmd-omicron-dev-noargs-stderr b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-noargs-stderr similarity index 100% rename from dev-tools/tests/output/cmd-omicron-dev-noargs-stderr rename to dev-tools/omicron-dev/tests/output/cmd-omicron-dev-noargs-stderr diff --git a/dev-tools/tests/output/cmd-omicron-dev-noargs-stdout b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-noargs-stdout similarity index 100% rename from dev-tools/tests/output/cmd-omicron-dev-noargs-stdout rename to dev-tools/omicron-dev/tests/output/cmd-omicron-dev-noargs-stdout diff --git a/dev-tools/tests/test_omicron_dev.rs b/dev-tools/omicron-dev/tests/test_omicron_dev.rs similarity index 100% rename from dev-tools/tests/test_omicron_dev.rs rename to dev-tools/omicron-dev/tests/test_omicron_dev.rs diff --git a/xtask/Cargo.toml b/dev-tools/xtask/Cargo.toml similarity index 100% rename from xtask/Cargo.toml rename to dev-tools/xtask/Cargo.toml diff --git a/xtask/src/main.rs b/dev-tools/xtask/src/main.rs similarity index 100% rename from xtask/src/main.rs rename to dev-tools/xtask/src/main.rs diff --git a/dns-server/src/dns_server.rs b/dns-server/src/dns_server.rs index a857538531..01a8430b62 100644 --- a/dns-server/src/dns_server.rs +++ b/dns-server/src/dns_server.rs @@ -56,8 +56,8 @@ impl Drop for ServerHandle { } impl ServerHandle { - pub fn local_address(&self) -> &SocketAddr { - &self.local_address + pub fn local_address(&self) -> SocketAddr { + self.local_address } } diff --git a/dns-server/src/lib.rs b/dns-server/src/lib.rs index d6dd75b4bd..ea8625a667 100644 --- a/dns-server/src/lib.rs +++ b/dns-server/src/lib.rs @@ -164,7 +164,7 @@ impl TransientServer { pub async fn resolver(&self) -> Result { let mut resolver_config = ResolverConfig::new(); resolver_config.add_name_server(NameServerConfig { - socket_addr: *self.dns_server.local_address(), + socket_addr: self.dns_server.local_address(), protocol: Protocol::Udp, tls_dns_name: None, trust_nx_responses: false, diff --git a/dns-server/tests/basic_test.rs b/dns-server/tests/basic_test.rs index 0c10fd75c9..98cd1487ab 100644 --- a/dns-server/tests/basic_test.rs +++ b/dns-server/tests/basic_test.rs @@ -375,7 +375,7 @@ async fn init_client_server( let mut rc = ResolverConfig::new(); rc.add_name_server(NameServerConfig { - socket_addr: *dns_server.local_address(), + socket_addr: dns_server.local_address(), protocol: Protocol::Udp, tls_dns_name: None, trust_nx_responses: false, diff --git a/internal-dns-cli/src/bin/dnswait.rs b/internal-dns-cli/src/bin/dnswait.rs index df4832f346..9e003ed14f 100644 --- a/internal-dns-cli/src/bin/dnswait.rs +++ b/internal-dns-cli/src/bin/dnswait.rs @@ -72,7 +72,7 @@ async fn main() -> Result<()> { } else { let addrs = opt.nameserver_addresses; info!(&log, "using explicit nameservers"; "nameservers" => ?addrs); - Resolver::new_from_addrs(log.clone(), addrs) + Resolver::new_from_addrs(log.clone(), &addrs) .context("creating resolver with explicit nameserver addresses")? }; diff --git a/internal-dns/src/config.rs b/internal-dns/src/config.rs index 5572e193dc..e5272cd23a 100644 --- a/internal-dns/src/config.rs +++ b/internal-dns/src/config.rs @@ -281,7 +281,7 @@ impl DnsConfigBuilder { let set = self .service_instances_zones - .entry(service.clone()) + .entry(service) .or_insert_with(BTreeMap::new); match set.insert(zone.clone(), port) { None => Ok(()), @@ -320,7 +320,7 @@ impl DnsConfigBuilder { let set = self .service_instances_sleds - .entry(service.clone()) + .entry(service) .or_insert_with(BTreeMap::new); let sled_id = sled.0; match set.insert(sled.clone(), port) { diff --git a/internal-dns/src/names.rs b/internal-dns/src/names.rs index 663e04bcd9..44ed9228e2 100644 --- a/internal-dns/src/names.rs +++ b/internal-dns/src/names.rs @@ -14,7 +14,7 @@ pub const DNS_ZONE: &str = "control-plane.oxide.internal"; pub const DNS_ZONE_EXTERNAL_TESTING: &str = "oxide-dev.test"; /// Names of services within the control plane -#[derive(Clone, Debug, Hash, Eq, Ord, PartialEq, PartialOrd)] +#[derive(Clone, Copy, Debug, Hash, Eq, Ord, PartialEq, PartialOrd)] pub enum ServiceName { Clickhouse, ClickhouseKeeper, diff --git a/internal-dns/src/resolver.rs b/internal-dns/src/resolver.rs index 68065db36d..114333cb61 100644 --- a/internal-dns/src/resolver.rs +++ b/internal-dns/src/resolver.rs @@ -55,13 +55,13 @@ impl Resolver { /// Construct a new DNS resolver from specific DNS server addresses. pub fn new_from_addrs( log: slog::Logger, - dns_addrs: Vec, + dns_addrs: &[SocketAddr], ) -> Result { info!(log, "new DNS resolver"; "addresses" => ?dns_addrs); let mut rc = ResolverConfig::new(); let dns_server_count = dns_addrs.len(); - for socket_addr in dns_addrs.into_iter() { + for &socket_addr in dns_addrs.into_iter() { rc.add_name_server(NameServerConfig { socket_addr, protocol: Protocol::Udp, @@ -137,7 +137,7 @@ impl Resolver { subnet: Ipv6Subnet, ) -> Result { let dns_ips = Self::servers_from_subnet(subnet); - Resolver::new_from_addrs(log, dns_ips) + Resolver::new_from_addrs(log, &dns_ips) } /// Remove all entries from the resolver's cache. @@ -455,7 +455,7 @@ mod test { } fn dns_server_address(&self) -> SocketAddr { - *self.dns_server.local_address() + self.dns_server.local_address() } fn cleanup_successful(mut self) { @@ -465,7 +465,7 @@ mod test { fn resolver(&self) -> anyhow::Result { let log = self.log.new(o!("component" => "DnsResolver")); - Resolver::new_from_addrs(log, vec![self.dns_server_address()]) + Resolver::new_from_addrs(log, &[self.dns_server_address()]) .context("creating resolver for test DNS server") } @@ -591,7 +591,7 @@ mod test { let zone = dns_builder.host_zone(Uuid::new_v4(), *db_ip.ip()).unwrap(); dns_builder - .service_backend_zone(srv_crdb.clone(), &zone, db_ip.port()) + .service_backend_zone(srv_crdb, &zone, db_ip.port()) .unwrap(); } @@ -599,21 +599,13 @@ mod test { .host_zone(Uuid::new_v4(), *clickhouse_addr.ip()) .unwrap(); dns_builder - .service_backend_zone( - srv_clickhouse.clone(), - &zone, - clickhouse_addr.port(), - ) + .service_backend_zone(srv_clickhouse, &zone, clickhouse_addr.port()) .unwrap(); let zone = dns_builder.host_zone(Uuid::new_v4(), *crucible_addr.ip()).unwrap(); dns_builder - .service_backend_zone( - srv_backend.clone(), - &zone, - crucible_addr.port(), - ) + .service_backend_zone(srv_backend, &zone, crucible_addr.port()) .unwrap(); let mut dns_config = dns_builder.build(); @@ -694,9 +686,7 @@ mod test { let ip1 = Ipv6Addr::from_str("ff::01").unwrap(); let zone = dns_builder.host_zone(Uuid::new_v4(), ip1).unwrap(); let srv_crdb = ServiceName::Cockroach; - dns_builder - .service_backend_zone(srv_crdb.clone(), &zone, 12345) - .unwrap(); + dns_builder.service_backend_zone(srv_crdb, &zone, 12345).unwrap(); let dns_config = dns_builder.build(); dns_server.update(&dns_config).await.unwrap(); let found_ip = resolver @@ -711,9 +701,7 @@ mod test { let ip2 = Ipv6Addr::from_str("ee::02").unwrap(); let zone = dns_builder.host_zone(Uuid::new_v4(), ip2).unwrap(); let srv_crdb = ServiceName::Cockroach; - dns_builder - .service_backend_zone(srv_crdb.clone(), &zone, 54321) - .unwrap(); + dns_builder.service_backend_zone(srv_crdb, &zone, 54321).unwrap(); let mut dns_config = dns_builder.build(); dns_config.generation += 1; dns_server.update(&dns_config).await.unwrap(); @@ -802,7 +790,7 @@ mod test { let dns_server = DnsServer::create(&logctx.log).await; let resolver = Resolver::new_from_addrs( logctx.log.clone(), - vec![*dns_server.dns_server.local_address()], + &[dns_server.dns_server.local_address()], ) .unwrap(); @@ -879,9 +867,9 @@ mod test { let dns_server2 = DnsServer::create(&logctx.log).await; let resolver = Resolver::new_from_addrs( logctx.log.clone(), - vec![ - *dns_server1.dns_server.local_address(), - *dns_server2.dns_server.local_address(), + &[ + dns_server1.dns_server.local_address(), + dns_server2.dns_server.local_address(), ], ) .unwrap(); @@ -955,7 +943,7 @@ mod test { let dns_server = DnsServer::create(&logctx.log).await; let resolver = Resolver::new_from_addrs( logctx.log.clone(), - vec![*dns_server.dns_server.local_address()], + &[dns_server.dns_server.local_address()], ) .unwrap(); diff --git a/nexus/db-model/src/certificate.rs b/nexus/db-model/src/certificate.rs index 18faa0d37d..2cd1bcf08a 100644 --- a/nexus/db-model/src/certificate.rs +++ b/nexus/db-model/src/certificate.rs @@ -45,15 +45,14 @@ impl Certificate { id: Uuid, service: ServiceKind, params: params::CertificateCreate, + possible_dns_names: &[String], ) -> Result { let validator = CertificateValidator::default(); validator.validate( params.cert.as_bytes(), params.key.as_bytes(), - // TODO-correctness: We should pass a hostname here for cert - // validation: https://github.com/oxidecomputer/omicron/issues/4045 - None, + possible_dns_names, )?; Ok(Self::new_unvalidated(silo_id, id, service, params)) diff --git a/nexus/db-queries/src/authz/api_resources.rs b/nexus/db-queries/src/authz/api_resources.rs index 6cacfe4cb2..ec959e2907 100644 --- a/nexus/db-queries/src/authz/api_resources.rs +++ b/nexus/db-queries/src/authz/api_resources.rs @@ -905,6 +905,14 @@ authz_resource! { polar_snippet = FleetChild, } +authz_resource! { + name = "Zpool", + parent = "Fleet", + primary_key = Uuid, + roles_allowed = false, + polar_snippet = FleetChild, +} + authz_resource! { name = "SledInstance", parent = "Fleet", diff --git a/nexus/db-queries/src/authz/oso_generic.rs b/nexus/db-queries/src/authz/oso_generic.rs index 1ce86950bc..bcd7a42945 100644 --- a/nexus/db-queries/src/authz/oso_generic.rs +++ b/nexus/db-queries/src/authz/oso_generic.rs @@ -150,6 +150,7 @@ pub fn make_omicron_oso(log: &slog::Logger) -> Result { IdentityProvider::init(), SamlIdentityProvider::init(), Sled::init(), + Zpool::init(), Service::init(), UpdateArtifact::init(), UserBuiltin::init(), diff --git a/nexus/db-queries/src/authz/policy_test/resources.rs b/nexus/db-queries/src/authz/policy_test/resources.rs index a0dad0e17e..054fe6430b 100644 --- a/nexus/db-queries/src/authz/policy_test/resources.rs +++ b/nexus/db-queries/src/authz/policy_test/resources.rs @@ -88,6 +88,13 @@ pub async fn make_resources( LookupType::ById(sled_id), )); + let zpool_id = "aaaaaaaa-1233-af7d-9220-afe1d8090900".parse().unwrap(); + builder.new_resource(authz::Zpool::new( + authz::FLEET, + zpool_id, + LookupType::ById(zpool_id), + )); + make_services(&mut builder).await; builder.new_resource(authz::PhysicalDisk::new( diff --git a/nexus/db-queries/src/db/datastore/dns.rs b/nexus/db-queries/src/db/datastore/dns.rs index 8bd9d857ac..ddf2718930 100644 --- a/nexus/db-queries/src/db/datastore/dns.rs +++ b/nexus/db-queries/src/db/datastore/dns.rs @@ -60,7 +60,18 @@ impl DataStore { /// /// We do not generally expect there to be more than 1-2 DNS zones in a /// group (and nothing today creates more than one). - async fn dns_zones_list_all( + pub async fn dns_zones_list_all( + &self, + opctx: &OpContext, + dns_group: DnsGroup, + ) -> ListResultVec { + let conn = self.pool_authorized(opctx).await?; + self.dns_zones_list_all_on_connection(opctx, conn, dns_group).await + } + + /// Variant of [`Self::dns_zones_list_all`] which may be called from a + /// transaction context. + pub(crate) async fn dns_zones_list_all_on_connection( &self, opctx: &OpContext, conn: &(impl async_bb8_diesel::AsyncConnection< @@ -396,7 +407,6 @@ impl DataStore { /// **Callers almost certainly want to wake up the corresponding Nexus /// background task to cause these changes to be propagated to the /// corresponding DNS servers.** - #[must_use] pub async fn dns_update( &self, opctx: &OpContext, @@ -413,8 +423,9 @@ impl DataStore { { opctx.authorize(authz::Action::Modify, &authz::DNS_CONFIG).await?; - let zones = - self.dns_zones_list_all(opctx, conn, update.dns_group).await?; + let zones = self + .dns_zones_list_all_on_connection(opctx, conn, update.dns_group) + .await?; let result = conn .transaction_async(|c| async move { diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index e507550fa9..54346b31c0 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -32,6 +32,8 @@ use async_bb8_diesel::PoolError; use chrono::Utc; use diesel::prelude::*; use diesel::upsert::excluded; +use nexus_db_model::DnsGroup; +use nexus_db_model::DnsZone; use nexus_db_model::ExternalIp; use nexus_db_model::IncompleteNetworkInterface; use nexus_db_model::InitialDnsGroup; @@ -65,6 +67,7 @@ pub struct RackInit { pub internal_dns: InitialDnsGroup, pub external_dns: InitialDnsGroup, pub recovery_silo: external_params::SiloCreate, + pub recovery_silo_fq_dns_name: String, pub recovery_user_id: external_params::UserId, pub recovery_user_password_hash: omicron_passwords::PasswordHashString, pub dns_update: DnsVersionUpdateBuilder, @@ -197,6 +200,7 @@ impl DataStore { conn: &(impl AsyncConnection + Sync), log: &slog::Logger, recovery_silo: external_params::SiloCreate, + recovery_silo_fq_dns_name: String, recovery_user_id: external_params::UserId, recovery_user_password_hash: omicron_passwords::PasswordHashString, dns_update: DnsVersionUpdateBuilder, @@ -210,7 +214,14 @@ impl DataStore { AsyncConnection, { let db_silo = self - .silo_create_conn(conn, opctx, opctx, recovery_silo, dns_update) + .silo_create_conn( + conn, + opctx, + opctx, + recovery_silo, + &[recovery_silo_fq_dns_name], + dns_update, + ) .await .map_err(RackInitError::Silo) .map_err(TxnError::CustomError)?; @@ -521,6 +532,7 @@ impl DataStore { &conn, &log, rack_init.recovery_silo, + rack_init.recovery_silo_fq_dns_name, rack_init.recovery_user_id, rack_init.recovery_user_password_hash, rack_init.dns_update, @@ -594,16 +606,16 @@ impl DataStore { pub async fn nexus_external_addresses( &self, opctx: &OpContext, - ) -> Result, Error> { + ) -> Result<(Vec, Vec), Error> { opctx.authorize(authz::Action::Read, &authz::DNS_CONFIG).await?; use crate::db::schema::external_ip::dsl as extip_dsl; use crate::db::schema::service::dsl as service_dsl; - type TxnError = TransactionError<()>; + type TxnError = TransactionError; self.pool_authorized(opctx) .await? .transaction_async(|conn| async move { - Ok(extip_dsl::external_ip + let ips = extip_dsl::external_ip .inner_join( service_dsl::service.on(service_dsl::id .eq(extip_dsl::parent_id.assume_not_null())), @@ -617,11 +629,21 @@ impl DataStore { .await? .into_iter() .map(|external_ip| external_ip.ip.ip()) - .collect()) + .collect(); + + let dns_zones = self + .dns_zones_list_all_on_connection( + opctx, + &conn, + DnsGroup::External, + ) + .await?; + + Ok((ips, dns_zones)) }) .await .map_err(|error: TxnError| match error { - TransactionError::CustomError(()) => unimplemented!(), + TransactionError::CustomError(err) => err, TransactionError::Pool(e) => { public_error_from_diesel_pool(e, ErrorHandler::Server) } @@ -699,6 +721,10 @@ mod test { tls_certificates: vec![], mapped_fleet_roles: Default::default(), }, + recovery_silo_fq_dns_name: format!( + "test-silo.sys.{}", + internal_dns::DNS_ZONE + ), recovery_user_id: "test-user".parse().unwrap(), // empty string password recovery_user_password_hash: "$argon2id$v=19$m=98304,t=13,\ diff --git a/nexus/db-queries/src/db/datastore/silo.rs b/nexus/db-queries/src/db/datastore/silo.rs index fd86af8a6b..ed2b97257e 100644 --- a/nexus/db-queries/src/db/datastore/silo.rs +++ b/nexus/db-queries/src/db/datastore/silo.rs @@ -123,6 +123,7 @@ impl DataStore { opctx: &OpContext, nexus_opctx: &OpContext, new_silo_params: params::SiloCreate, + new_silo_dns_names: &[String], dns_update: DnsVersionUpdateBuilder, ) -> CreateResult { let conn = self.pool_authorized(opctx).await?; @@ -131,6 +132,7 @@ impl DataStore { opctx, nexus_opctx, new_silo_params, + new_silo_dns_names, dns_update, ) .await @@ -143,6 +145,7 @@ impl DataStore { opctx: &OpContext, nexus_opctx: &OpContext, new_silo_params: params::SiloCreate, + new_silo_dns_names: &[String], dns_update: DnsVersionUpdateBuilder, ) -> CreateResult where @@ -253,6 +256,7 @@ impl DataStore { Uuid::new_v4(), ServiceKind::Nexus, c, + new_silo_dns_names, ) }) .collect::, _>>() diff --git a/nexus/db-queries/src/db/lookup.rs b/nexus/db-queries/src/db/lookup.rs index c01daa9c87..e7e7bb47fc 100644 --- a/nexus/db-queries/src/db/lookup.rs +++ b/nexus/db-queries/src/db/lookup.rs @@ -347,6 +347,11 @@ impl<'a> LookupPath<'a> { Sled::PrimaryKey(Root { lookup_root: self }, id) } + /// Select a resource of type Zpool, identified by its id + pub fn zpool_id(self, id: Uuid) -> Zpool<'a> { + Zpool::PrimaryKey(Root { lookup_root: self }, id) + } + /// Select a resource of type Service, identified by its id pub fn service_id(self, id: Uuid) -> Service<'a> { Service::PrimaryKey(Root { lookup_root: self }, id) @@ -788,6 +793,15 @@ lookup_resource! { primary_key_columns = [ { column_name = "id", rust_type = Uuid } ] } +lookup_resource! { + name = "Zpool", + ancestors = [], + children = [], + lookup_by_name = false, + soft_deletes = true, + primary_key_columns = [ { column_name = "id", rust_type = Uuid } ] +} + lookup_resource! { name = "SledInstance", ancestors = [], diff --git a/nexus/db-queries/tests/output/authz-roles.out b/nexus/db-queries/tests/output/authz-roles.out index 7531672514..72031c567e 100644 --- a/nexus/db-queries/tests/output/authz-roles.out +++ b/nexus/db-queries/tests/output/authz-roles.out @@ -782,6 +782,20 @@ resource: Sled id "8a785566-adaf-c8d8-e886-bee7f9b73ca7" silo1-proj1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ unauthenticated ! ! ! ! ! ! ! ! +resource: Zpool id "aaaaaaaa-1233-af7d-9220-afe1d8090900" + + USER Q R LC RP M MP CC D + fleet-admin ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ + fleet-collaborator ✘ ✔ ✔ ✔ ✘ ✘ ✘ ✘ + fleet-viewer ✘ ✔ ✔ ✔ ✘ ✘ ✘ ✘ + silo1-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + unauthenticated ! ! ! ! ! ! ! ! + resource: Service id "6b1f15ee-d6b3-424c-8436-94413a0b682d" USER Q R LC RP M MP CC D diff --git a/nexus/src/app/certificate.rs b/nexus/src/app/certificate.rs index 200039b6f2..71be93f5b7 100644 --- a/nexus/src/app/certificate.rs +++ b/nexus/src/app/certificate.rs @@ -46,12 +46,17 @@ impl super::Nexus { .authn .silo_required() .internal_context("creating a Certificate")?; + + let silo_fq_dns_names = + self.silo_fq_dns_names(opctx, authz_silo.id()).await?; + let kind = params.service; let new_certificate = db::model::Certificate::new( authz_silo.id(), Uuid::new_v4(), kind.into(), params, + &silo_fq_dns_names, )?; let cert = self .db_datastore diff --git a/nexus/src/app/external_endpoints.rs b/nexus/src/app/external_endpoints.rs index c96258de0d..f95c64d3eb 100644 --- a/nexus/src/app/external_endpoints.rs +++ b/nexus/src/app/external_endpoints.rs @@ -933,6 +933,7 @@ mod test { Uuid::new_v4(), ServiceKind::Nexus, cert_create, + &["dummy.sys.oxide1.test".to_string()], ) .unwrap(); let ee2 = ExternalEndpoints::new(vec![], vec![cert], vec![]); @@ -962,6 +963,7 @@ mod test { Uuid::new_v4(), ServiceKind::Nexus, cert_create, + &["dummy.sys.oxide1.test".to_string()], ) .unwrap(); @@ -1095,6 +1097,7 @@ mod test { Uuid::new_v4(), ServiceKind::Nexus, silo1_cert1_params, + &["silo1.sys.oxide1.test".to_string()], ) .unwrap(); let silo1_cert2_params = @@ -1120,6 +1123,7 @@ mod test { Uuid::new_v4(), ServiceKind::Nexus, silo2_cert2_params, + &["silo2.sys.oxide1.test".to_string()], ) .unwrap(); let silo3_cert_params = @@ -1129,6 +1133,7 @@ mod test { Uuid::new_v4(), ServiceKind::Nexus, silo3_cert_params, + &["silo3.sys.oxide1.test".to_string()], ) .unwrap(); // Corrupt a byte of this last certificate. (This has to be done after diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 1ac63d19f6..f07ceae4a0 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -11,7 +11,6 @@ use super::MAX_NICS_PER_INSTANCE; use super::MAX_VCPU_PER_INSTANCE; use super::MIN_MEMORY_BYTES_PER_INSTANCE; use crate::app::sagas; -use crate::app::sagas::retry_until_known_result; use crate::cidata::InstanceCiData; use crate::external_api::params; use cancel_safe_futures::prelude::*; @@ -41,7 +40,6 @@ use omicron_common::api::external::NameOrId; use omicron_common::api::external::UpdateResult; use omicron_common::api::external::Vni; use omicron_common::api::internal::nexus; -use omicron_common::api::internal::shared::SwitchLocation; use propolis_client::support::tungstenite::protocol::frame::coding::CloseCode; use propolis_client::support::tungstenite::protocol::CloseFrame; use propolis_client::support::tungstenite::Message as WebSocketMessage; @@ -54,9 +52,7 @@ use sled_agent_client::types::InstancePutStateBody; use sled_agent_client::types::InstanceStateRequested; use sled_agent_client::types::SourceNatConfig; use sled_agent_client::Client as SledAgentClient; -use std::collections::HashSet; use std::net::SocketAddr; -use std::str::FromStr; use std::sync::Arc; use tokio::io::{AsyncRead, AsyncWrite}; use uuid::Uuid; @@ -1232,146 +1228,6 @@ impl super::Nexus { Ok(()) } - // Switches with uplinks configured and boundary services enabled - pub(crate) async fn boundary_switches( - &self, - opctx: &OpContext, - ) -> Result, Error> { - let mut boundary_switches: HashSet = HashSet::new(); - let uplinks = self.list_switch_ports_with_uplinks(opctx).await?; - for uplink in &uplinks { - let location: SwitchLocation = - uplink.switch_location.parse().map_err(|_| { - Error::internal_error(&format!( - "invalid switch location in uplink config: {}", - uplink.switch_location - )) - })?; - boundary_switches.insert(location); - } - Ok(boundary_switches) - } - - /// Ensures that the Dendrite configuration for the supplied instance is - /// up-to-date. - /// - /// # Parameters - /// - /// - `opctx`: An operation context that grants read and list-children - /// permissions on the identified instance. - /// - `instance_id`: The ID of the instance to act on. - /// - `sled_ip_address`: The internal IP address assigned to the sled's - /// sled agent. - /// - `ip_index_filter`: An optional filter on the index into the instance's - /// external IP array. - /// - If this is `Some(n)`, this routine configures DPD state for only the - /// Nth external IP in the collection returned from CRDB. The caller is - /// responsible for ensuring that the IP collection has stable indices - /// when making this call. - /// - If this is `None`, this routine configures DPD for all external - /// IPs. - pub(crate) async fn instance_ensure_dpd_config( - &self, - opctx: &OpContext, - instance_id: Uuid, - sled_ip_address: &std::net::SocketAddrV6, - ip_index_filter: Option, - dpd_client: &Arc, - ) -> Result<(), Error> { - let log = &self.log; - - info!(log, "looking up instance's primary network interface"; - "instance_id" => %instance_id); - - let (.., authz_instance) = LookupPath::new(opctx, &self.db_datastore) - .instance_id(instance_id) - .lookup_for(authz::Action::ListChildren) - .await?; - - // All external IPs map to the primary network interface, so find that - // interface. If there is no such interface, there's no way to route - // traffic destined to those IPs, so there's nothing to configure and - // it's safe to return early. - let network_interface = match self - .db_datastore - .derive_guest_network_interface_info(&opctx, &authz_instance) - .await? - .into_iter() - .find(|interface| interface.primary) - { - Some(interface) => interface, - None => { - info!(log, "Instance has no primary network interface"; - "instance_id" => %instance_id); - return Ok(()); - } - }; - - let mac_address = - macaddr::MacAddr6::from_str(&network_interface.mac.to_string()) - .map_err(|e| { - Error::internal_error(&format!( - "failed to convert mac address: {e}" - )) - })?; - - let vni: u32 = network_interface.vni.into(); - - info!(log, "looking up instance's external IPs"; - "instance_id" => %instance_id); - - let ips = self - .db_datastore - .instance_lookup_external_ips(&opctx, instance_id) - .await?; - - if let Some(wanted_index) = ip_index_filter { - if let None = ips.get(wanted_index) { - return Err(Error::internal_error(&format!( - "failed to find external ip address at index: {}", - wanted_index - ))); - } - } - - for target_ip in ips - .iter() - .enumerate() - .filter(|(index, _)| { - if let Some(wanted_index) = ip_index_filter { - *index == wanted_index - } else { - true - } - }) - .map(|(_, ip)| ip) - { - retry_until_known_result(log, || async { - dpd_client - .ensure_nat_entry( - &log, - target_ip.ip, - dpd_client::types::MacAddr { - a: mac_address.into_array(), - }, - *target_ip.first_port, - *target_ip.last_port, - vni, - sled_ip_address.ip(), - ) - .await - }) - .await - .map_err(|e| { - Error::internal_error(&format!( - "failed to ensure dpd entry: {e}" - )) - })?; - } - - Ok(()) - } - /// Returns the requested range of serial console output bytes, /// provided they are still in the propolis-server's cache. pub(crate) async fn instance_serial_console_data( diff --git a/nexus/src/app/instance_network.rs b/nexus/src/app/instance_network.rs new file mode 100644 index 0000000000..6b819a9e11 --- /dev/null +++ b/nexus/src/app/instance_network.rs @@ -0,0 +1,497 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Routines that manage instance-related networking state. + +use crate::app::sagas::retry_until_known_result; +use nexus_db_queries::authz; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::identity::Asset; +use nexus_db_queries::db::lookup::LookupPath; +use omicron_common::api::external::DataPageParams; +use omicron_common::api::external::Error; +use omicron_common::api::internal::shared::SwitchLocation; +use sled_agent_client::types::SetVirtualNetworkInterfaceHost; +use std::collections::HashSet; +use std::str::FromStr; +use std::sync::Arc; +use uuid::Uuid; + +impl super::Nexus { + /// Returns the set of switches with uplinks configured and boundary + /// services enabled. + pub(crate) async fn boundary_switches( + &self, + opctx: &OpContext, + ) -> Result, Error> { + let mut boundary_switches: HashSet = HashSet::new(); + let uplinks = self.list_switch_ports_with_uplinks(opctx).await?; + for uplink in &uplinks { + let location: SwitchLocation = + uplink.switch_location.parse().map_err(|_| { + Error::internal_error(&format!( + "invalid switch location in uplink config: {}", + uplink.switch_location + )) + })?; + boundary_switches.insert(location); + } + Ok(boundary_switches) + } + + /// Ensures that V2P mappings exist that indicate that the instance with ID + /// `instance_id` is resident on the sled with ID `sled_id`. + pub(crate) async fn create_instance_v2p_mappings( + &self, + opctx: &OpContext, + instance_id: Uuid, + sled_id: Uuid, + ) -> Result<(), Error> { + info!(&self.log, "creating V2P mappings for instance"; + "instance_id" => %instance_id, + "sled_id" => %sled_id); + + // For every sled that isn't the sled this instance was allocated to, create + // a virtual to physical mapping for each of this instance's NICs. + // + // For the mappings to be correct, a few invariants must hold: + // + // - mappings must be set whenever an instance's sled changes (eg. + // during instance creation, migration, stop + start) + // + // - an instances' sled must not change while its corresponding mappings + // are being created + // + // - the same mapping creation must be broadcast to all sleds + // + // A more targeted approach would be to see what other instances share + // the VPC this instance is in (or more generally, what instances should + // have connectivity to this one), see what sleds those are allocated + // to, and only create V2P mappings for those sleds. + // + // There's additional work with this approach: + // + // - it means that delete calls are required as well as set calls, + // meaning that now the ordering of those matters (this may also + // necessitate a generation number for V2P mappings) + // + // - V2P mappings have to be bidirectional in order for both instances's + // packets to make a round trip. This isn't a problem with the + // broadcast approach because one of the sides will exist already, but + // it is something to orchestrate with a more targeted approach. + // + // TODO-correctness Default firewall rules currently will block + // instances in different VPCs from connecting to each other. If it ever + // stops doing this, the broadcast approach will create V2P mappings + // that shouldn't exist. + let (.., authz_instance) = LookupPath::new(&opctx, &self.db_datastore) + .instance_id(instance_id) + .lookup_for(authz::Action::Read) + .await?; + + let instance_nics = self + .db_datastore + .derive_guest_network_interface_info(&opctx, &authz_instance) + .await?; + + // Look up the supplied sled's physical host IP. + let physical_host_ip = + *self.sled_lookup(&self.opctx_alloc, &sled_id)?.fetch().await?.1.ip; + + let mut last_sled_id: Option = None; + loop { + let pagparams = DataPageParams { + marker: last_sled_id.as_ref(), + direction: dropshot::PaginationOrder::Ascending, + limit: std::num::NonZeroU32::new(10).unwrap(), + }; + + let sleds_page = + self.sled_list(&self.opctx_alloc, &pagparams).await?; + let mut join_handles = + Vec::with_capacity(sleds_page.len() * instance_nics.len()); + + for sled in &sleds_page { + // set_v2p not required for sled instance was allocated to, OPTE + // currently does that automatically + // + // TODO(#3107): Remove this when XDE stops creating mappings + // implicitly. + if sled.id() == sled_id { + continue; + } + + for nic in &instance_nics { + let client = self.sled_client(&sled.id()).await?; + let nic_id = nic.id; + let mapping = SetVirtualNetworkInterfaceHost { + virtual_ip: nic.ip, + virtual_mac: nic.mac.clone(), + physical_host_ip, + vni: nic.vni.clone(), + }; + + let log = self.log.clone(); + + // This function is idempotent: calling the set_v2p ioctl with + // the same information is a no-op. + join_handles.push(tokio::spawn(futures::future::lazy( + move |_ctx| async move { + retry_until_known_result(&log, || async { + client.set_v2p(&nic_id, &mapping).await + }) + .await + }, + ))); + } + } + + // Concurrently run each future to completion, but return the last + // error seen. + let mut error = None; + for join_handle in join_handles { + let result = join_handle + .await + .map_err(|e| Error::internal_error(&e.to_string()))? + .await; + + if result.is_err() { + error!(self.log, "{:?}", result); + error = Some(result); + } + } + if let Some(e) = error { + return e.map(|_| ()).map_err(|e| e.into()); + } + + if sleds_page.len() < 10 { + break; + } + + if let Some(last) = sleds_page.last() { + last_sled_id = Some(last.id()); + } + } + + Ok(()) + } + + /// Ensure that the necessary v2p mappings for an instance are deleted + pub(crate) async fn delete_instance_v2p_mappings( + &self, + opctx: &OpContext, + instance_id: Uuid, + ) -> Result<(), Error> { + // For every sled that isn't the sled this instance was allocated to, delete + // the virtual to physical mapping for each of this instance's NICs. If + // there isn't a V2P mapping, del_v2p should be a no-op. + let (.., authz_instance, db_instance) = + LookupPath::new(&opctx, &self.db_datastore) + .instance_id(instance_id) + .fetch_for(authz::Action::Read) + .await?; + + let instance_nics = self + .db_datastore + .derive_guest_network_interface_info(&opctx, &authz_instance) + .await?; + + // Lookup the physical host IP of the sled hosting this instance + let instance_sled_id = db_instance.runtime().sled_id; + let physical_host_ip = *self + .sled_lookup(&self.opctx_alloc, &instance_sled_id)? + .fetch() + .await? + .1 + .ip; + + let mut last_sled_id: Option = None; + + loop { + let pagparams = DataPageParams { + marker: last_sled_id.as_ref(), + direction: dropshot::PaginationOrder::Ascending, + limit: std::num::NonZeroU32::new(10).unwrap(), + }; + + let sleds_page = + self.sled_list(&self.opctx_alloc, &pagparams).await?; + let mut join_handles = + Vec::with_capacity(sleds_page.len() * instance_nics.len()); + + for sled in &sleds_page { + // del_v2p not required for sled instance was allocated to, OPTE + // currently does that automatically + // + // TODO(#3107): Remove this when XDE stops deleting mappings + // implicitly. + if sled.id() == instance_sled_id { + continue; + } + + for nic in &instance_nics { + let client = self.sled_client(&sled.id()).await?; + let nic_id = nic.id; + let mapping = SetVirtualNetworkInterfaceHost { + virtual_ip: nic.ip, + virtual_mac: nic.mac.clone(), + physical_host_ip, + vni: nic.vni.clone(), + }; + + let log = self.log.clone(); + + // This function is idempotent: calling the set_v2p ioctl with + // the same information is a no-op. + join_handles.push(tokio::spawn(futures::future::lazy( + move |_ctx| async move { + retry_until_known_result(&log, || async { + client.del_v2p(&nic_id, &mapping).await + }) + .await + }, + ))); + } + } + + // Concurrently run each future to completion, but return the last + // error seen. + let mut error = None; + for join_handle in join_handles { + let result = join_handle + .await + .map_err(|e| Error::internal_error(&e.to_string()))? + .await; + + if result.is_err() { + error!(self.log, "{:?}", result); + error = Some(result); + } + } + if let Some(e) = error { + return e.map(|_| ()).map_err(|e| e.into()); + } + + if sleds_page.len() < 10 { + break; + } + + if let Some(last) = sleds_page.last() { + last_sled_id = Some(last.id()); + } + } + + Ok(()) + } + + /// Ensures that the Dendrite configuration for the supplied instance is + /// up-to-date. + /// + /// # Parameters + /// + /// - `opctx`: An operation context that grants read and list-children + /// permissions on the identified instance. + /// - `instance_id`: The ID of the instance to act on. + /// - `sled_ip_address`: The internal IP address assigned to the sled's + /// sled agent. + /// - `ip_index_filter`: An optional filter on the index into the instance's + /// external IP array. + /// - If this is `Some(n)`, this routine configures DPD state for only the + /// Nth external IP in the collection returned from CRDB. The caller is + /// responsible for ensuring that the IP collection has stable indices + /// when making this call. + /// - If this is `None`, this routine configures DPD for all external + /// IPs. + pub(crate) async fn instance_ensure_dpd_config( + &self, + opctx: &OpContext, + instance_id: Uuid, + sled_ip_address: &std::net::SocketAddrV6, + ip_index_filter: Option, + dpd_client: &Arc, + ) -> Result<(), Error> { + let log = &self.log; + + info!(log, "looking up instance's primary network interface"; + "instance_id" => %instance_id); + + let (.., authz_instance) = LookupPath::new(opctx, &self.db_datastore) + .instance_id(instance_id) + .lookup_for(authz::Action::ListChildren) + .await?; + + // All external IPs map to the primary network interface, so find that + // interface. If there is no such interface, there's no way to route + // traffic destined to those IPs, so there's nothing to configure and + // it's safe to return early. + let network_interface = match self + .db_datastore + .derive_guest_network_interface_info(&opctx, &authz_instance) + .await? + .into_iter() + .find(|interface| interface.primary) + { + Some(interface) => interface, + None => { + info!(log, "Instance has no primary network interface"; + "instance_id" => %instance_id); + return Ok(()); + } + }; + + let mac_address = + macaddr::MacAddr6::from_str(&network_interface.mac.to_string()) + .map_err(|e| { + Error::internal_error(&format!( + "failed to convert mac address: {e}" + )) + })?; + + let vni: u32 = network_interface.vni.into(); + + info!(log, "looking up instance's external IPs"; + "instance_id" => %instance_id); + + let ips = self + .db_datastore + .instance_lookup_external_ips(&opctx, instance_id) + .await?; + + if let Some(wanted_index) = ip_index_filter { + if let None = ips.get(wanted_index) { + return Err(Error::internal_error(&format!( + "failed to find external ip address at index: {}", + wanted_index + ))); + } + } + + for target_ip in ips + .iter() + .enumerate() + .filter(|(index, _)| { + if let Some(wanted_index) = ip_index_filter { + *index == wanted_index + } else { + true + } + }) + .map(|(_, ip)| ip) + { + retry_until_known_result(log, || async { + dpd_client + .ensure_nat_entry( + &log, + target_ip.ip, + dpd_client::types::MacAddr { + a: mac_address.into_array(), + }, + *target_ip.first_port, + *target_ip.last_port, + vni, + sled_ip_address.ip(), + ) + .await + }) + .await + .map_err(|e| { + Error::internal_error(&format!( + "failed to ensure dpd entry: {e}" + )) + })?; + } + + Ok(()) + } + + /// Attempts to delete all of the Dendrite NAT configuration for the + /// instance identified by `authz_instance`. + /// + /// # Return value + /// + /// - `Ok(())` if all NAT entries were successfully deleted. + /// - If an operation fails before this routine begins to walk and delete + /// individual NAT entries, this routine returns `Err` and reports that + /// error. + /// - If an operation fails while this routine is walking NAT entries, it + /// will continue trying to delete subsequent entries but will return the + /// first error it encountered. + pub(crate) async fn instance_delete_dpd_config( + &self, + opctx: &OpContext, + authz_instance: &authz::Instance, + ) -> Result<(), Error> { + let log = &self.log; + let instance_id = authz_instance.id(); + + info!(log, "deleting instance dpd configuration"; + "instance_id" => %instance_id); + + let external_ips = self + .db_datastore + .instance_lookup_external_ips(opctx, instance_id) + .await?; + + let boundary_switches = self.boundary_switches(opctx).await?; + + let mut errors = vec![]; + for entry in external_ips { + for switch in &boundary_switches { + debug!(log, "deleting instance nat mapping"; + "instance_id" => %instance_id, + "switch" => switch.to_string(), + "entry" => #?entry); + + let client_result = + self.dpd_clients.get(switch).ok_or_else(|| { + Error::internal_error(&format!( + "unable to find dendrite client for {switch}" + )) + }); + + let dpd_client = match client_result { + Ok(client) => client, + Err(new_error) => { + errors.push(new_error); + continue; + } + }; + + let result = retry_until_known_result(log, || async { + dpd_client + .ensure_nat_entry_deleted( + log, + entry.ip, + *entry.first_port, + ) + .await + }) + .await; + + if let Err(e) = result { + let e = Error::internal_error(&format!( + "failed to delete nat entry via dpd: {e}" + )); + + error!(log, "error deleting nat mapping: {e:#?}"; + "instance_id" => %instance_id, + "switch" => switch.to_string(), + "entry" => #?entry); + errors.push(e); + } else { + debug!(log, "deleting nat mapping successful"; + "instance_id" => %instance_id, + "switch" => switch.to_string(), + "entry" => #?entry); + } + } + } + + if let Some(e) = errors.into_iter().nth(0) { + return Err(e); + } + + Ok(()) + } +} diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 4dd93f7707..5bab5e2820 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -42,6 +42,7 @@ mod external_ip; mod iam; mod image; mod instance; +mod instance_network; mod ip_pool; mod metrics; mod network_interface; diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index b1ee6bc452..3ac4b9063d 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -164,7 +164,10 @@ impl super::Nexus { format!("create silo: {:?}", silo_name.as_str()), self.id.to_string(), ); - dns_update.add_name(silo_dns_name(silo_name), dns_records)?; + let silo_dns_name = silo_dns_name(silo_name); + let recovery_silo_fq_dns_name = + format!("{silo_dns_name}.{}", request.external_dns_zone_name); + dns_update.add_name(silo_dns_name, dns_records)?; // Administrators of the Recovery Silo are automatically made // administrators of the Fleet. @@ -196,6 +199,7 @@ impl super::Nexus { internal_dns, external_dns, recovery_silo, + recovery_silo_fq_dns_name, recovery_user_id: request.recovery_silo.user_name, recovery_user_password_hash: request .recovery_silo diff --git a/nexus/src/app/sagas/instance_delete.rs b/nexus/src/app/sagas/instance_delete.rs index 88a5823ad0..005e9724a6 100644 --- a/nexus/src/app/sagas/instance_delete.rs +++ b/nexus/src/app/sagas/instance_delete.rs @@ -8,7 +8,6 @@ use super::ActionRegistry; use super::NexusActionContext; use super::NexusSaga; use crate::app::sagas::declare_saga_actions; -use crate::app::sagas::retry_until_known_result; use nexus_db_queries::db; use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::{authn, authz}; @@ -151,70 +150,14 @@ async fn sid_delete_network_config( &sagactx, ¶ms.serialized_authn, ); + let authz_instance = ¶ms.authz_instance; let osagactx = sagactx.user_data(); - let datastore = &osagactx.datastore(); - let log = sagactx.user_data().log(); - - debug!(log, "fetching external ip addresses"); - - let external_ips = &datastore - .instance_lookup_external_ips(&opctx, params.authz_instance.id()) + osagactx + .nexus() + .instance_delete_dpd_config(&opctx, authz_instance) .await - .map_err(ActionError::action_failed)?; - - let mut errors: Vec = vec![]; - - // Here we are attempting to delete every existing NAT entry while deferring - // any error handling. If we don't defer error handling, we might end up - // bailing out before we've attempted deletion of all entries. - for entry in external_ips { - for switch in ¶ms.boundary_switches { - debug!(log, "deleting nat mapping"; "switch" => switch.to_string(), "entry" => #?entry); - - let client_result = - osagactx.nexus().dpd_clients.get(switch).ok_or_else(|| { - ActionError::action_failed(Error::internal_error(&format!( - "unable to find dendrite client for {switch}" - ))) - }); - - let dpd_client = match client_result { - Ok(client) => client, - Err(new_error) => { - errors.push(new_error); - continue; - } - }; - - let result = retry_until_known_result(log, || async { - dpd_client - .ensure_nat_entry_deleted(log, entry.ip, *entry.first_port) - .await - }) - .await; - - match result { - Ok(_) => { - debug!(log, "deleting nat mapping successful"; "switch" => switch.to_string(), "entry" => format!("{entry:#?}")); - } - Err(e) => { - let new_error = - ActionError::action_failed(Error::internal_error( - &format!("failed to delete nat entry via dpd: {e}"), - )); - error!(log, "{new_error:#?}"); - errors.push(new_error); - } - } - } - } - - if let Some(error) = errors.first() { - return Err(error.clone()); - } - - Ok(()) + .map_err(ActionError::action_failed) } async fn sid_delete_instance_record( diff --git a/nexus/src/app/silo.rs b/nexus/src/app/silo.rs index fd0414aad3..a6ffd8ef5e 100644 --- a/nexus/src/app/silo.rs +++ b/nexus/src/app/silo.rs @@ -65,6 +65,25 @@ impl super::Nexus { } } + pub(crate) async fn silo_fq_dns_names( + &self, + opctx: &OpContext, + silo_id: Uuid, + ) -> ListResultVec { + let (_, silo) = + self.silo_lookup(opctx, silo_id.into())?.fetch().await?; + let silo_dns_name = silo_dns_name(&silo.name()); + let external_dns_zones = self + .db_datastore + .dns_zones_list_all(opctx, nexus_db_model::DnsGroup::External) + .await?; + + Ok(external_dns_zones + .into_iter() + .map(|zone| format!("{silo_dns_name}.{}", zone.zone_name)) + .collect()) + } + pub(crate) async fn silo_create( &self, opctx: &OpContext, @@ -79,9 +98,9 @@ impl super::Nexus { // Set up an external DNS name for this Silo's API and console // endpoints (which are the same endpoint). - let dns_records: Vec = datastore - .nexus_external_addresses(nexus_opctx) - .await? + let (nexus_external_ips, nexus_external_dns_zones) = + datastore.nexus_external_addresses(nexus_opctx).await?; + let dns_records: Vec = nexus_external_ips .into_iter() .map(|addr| match addr { IpAddr::V4(addr) => DnsRecord::A(addr), @@ -95,10 +114,22 @@ impl super::Nexus { format!("create silo: {:?}", silo_name.as_str()), self.id.to_string(), ); - dns_update.add_name(silo_dns_name(silo_name), dns_records)?; + let silo_dns_name = silo_dns_name(silo_name); + let new_silo_dns_names = nexus_external_dns_zones + .into_iter() + .map(|zone| format!("{silo_dns_name}.{}", zone.zone_name)) + .collect::>(); + + dns_update.add_name(silo_dns_name, dns_records)?; let silo = datastore - .silo_create(&opctx, &nexus_opctx, new_silo_params, dns_update) + .silo_create( + &opctx, + &nexus_opctx, + new_silo_params, + &new_silo_dns_names, + dns_update, + ) .await?; self.background_tasks .activate(&self.background_tasks.task_external_dns_config); diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index 56a330f526..da89e7e25a 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -4,15 +4,12 @@ //! Sleds, and the hardware and services within them. -use crate::app::sagas::retry_until_known_result; use crate::internal_api::params::{ PhysicalDiskDeleteRequest, PhysicalDiskPutRequest, SledAgentStartupInfo, SledRole, ZpoolPutRequest, }; -use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; -use nexus_db_queries::db::identity::Asset; use nexus_db_queries::db::lookup; use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::model::DatasetKind; @@ -20,7 +17,6 @@ use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; -use sled_agent_client::types::SetVirtualNetworkInterfaceHost; use sled_agent_client::Client as SledAgentClient; use std::net::SocketAddrV6; use std::sync::Arc; @@ -317,251 +313,4 @@ impl super::Nexus { .await?; Ok(()) } - - // OPTE V2P mappings - - /// Ensures that V2P mappings exist that indicate that the instance with ID - /// `instance_id` is resident on the sled with ID `sled_id`. - pub(crate) async fn create_instance_v2p_mappings( - &self, - opctx: &OpContext, - instance_id: Uuid, - sled_id: Uuid, - ) -> Result<(), Error> { - info!(&self.log, "creating V2P mappings for instance"; - "instance_id" => %instance_id, - "sled_id" => %sled_id); - - // For every sled that isn't the sled this instance was allocated to, create - // a virtual to physical mapping for each of this instance's NICs. - // - // For the mappings to be correct, a few invariants must hold: - // - // - mappings must be set whenever an instance's sled changes (eg. - // during instance creation, migration, stop + start) - // - // - an instances' sled must not change while its corresponding mappings - // are being created - // - // - the same mapping creation must be broadcast to all sleds - // - // A more targeted approach would be to see what other instances share - // the VPC this instance is in (or more generally, what instances should - // have connectivity to this one), see what sleds those are allocated - // to, and only create V2P mappings for those sleds. - // - // There's additional work with this approach: - // - // - it means that delete calls are required as well as set calls, - // meaning that now the ordering of those matters (this may also - // necessitate a generation number for V2P mappings) - // - // - V2P mappings have to be bidirectional in order for both instances's - // packets to make a round trip. This isn't a problem with the - // broadcast approach because one of the sides will exist already, but - // it is something to orchestrate with a more targeted approach. - // - // TODO-correctness Default firewall rules currently will block - // instances in different VPCs from connecting to each other. If it ever - // stops doing this, the broadcast approach will create V2P mappings - // that shouldn't exist. - let (.., authz_instance) = LookupPath::new(&opctx, &self.db_datastore) - .instance_id(instance_id) - .lookup_for(authz::Action::Read) - .await?; - - let instance_nics = self - .db_datastore - .derive_guest_network_interface_info(&opctx, &authz_instance) - .await?; - - // Look up the supplied sled's physical host IP. - let physical_host_ip = - *self.sled_lookup(&self.opctx_alloc, &sled_id)?.fetch().await?.1.ip; - - let mut last_sled_id: Option = None; - loop { - let pagparams = DataPageParams { - marker: last_sled_id.as_ref(), - direction: dropshot::PaginationOrder::Ascending, - limit: std::num::NonZeroU32::new(10).unwrap(), - }; - - let sleds_page = - self.sled_list(&self.opctx_alloc, &pagparams).await?; - let mut join_handles = - Vec::with_capacity(sleds_page.len() * instance_nics.len()); - - for sled in &sleds_page { - // set_v2p not required for sled instance was allocated to, OPTE - // currently does that automatically - // - // TODO(#3107): Remove this when XDE stops creating mappings - // implicitly. - if sled.id() == sled_id { - continue; - } - - for nic in &instance_nics { - let client = self.sled_client(&sled.id()).await?; - let nic_id = nic.id; - let mapping = SetVirtualNetworkInterfaceHost { - virtual_ip: nic.ip, - virtual_mac: nic.mac.clone(), - physical_host_ip, - vni: nic.vni.clone(), - }; - - let log = self.log.clone(); - - // This function is idempotent: calling the set_v2p ioctl with - // the same information is a no-op. - join_handles.push(tokio::spawn(futures::future::lazy( - move |_ctx| async move { - retry_until_known_result(&log, || async { - client.set_v2p(&nic_id, &mapping).await - }) - .await - }, - ))); - } - } - - // Concurrently run each future to completion, but return the last - // error seen. - let mut error = None; - for join_handle in join_handles { - let result = join_handle - .await - .map_err(|e| Error::internal_error(&e.to_string()))? - .await; - - if result.is_err() { - error!(self.log, "{:?}", result); - error = Some(result); - } - } - if let Some(e) = error { - return e.map(|_| ()).map_err(|e| e.into()); - } - - if sleds_page.len() < 10 { - break; - } - - if let Some(last) = sleds_page.last() { - last_sled_id = Some(last.id()); - } - } - - Ok(()) - } - - /// Ensure that the necessary v2p mappings for an instance are deleted - pub(crate) async fn delete_instance_v2p_mappings( - &self, - opctx: &OpContext, - instance_id: Uuid, - ) -> Result<(), Error> { - // For every sled that isn't the sled this instance was allocated to, delete - // the virtual to physical mapping for each of this instance's NICs. If - // there isn't a V2P mapping, del_v2p should be a no-op. - let (.., authz_instance, db_instance) = - LookupPath::new(&opctx, &self.db_datastore) - .instance_id(instance_id) - .fetch_for(authz::Action::Read) - .await?; - - let instance_nics = self - .db_datastore - .derive_guest_network_interface_info(&opctx, &authz_instance) - .await?; - - // Lookup the physical host IP of the sled hosting this instance - let instance_sled_id = db_instance.runtime().sled_id; - let physical_host_ip = *self - .sled_lookup(&self.opctx_alloc, &instance_sled_id)? - .fetch() - .await? - .1 - .ip; - - let mut last_sled_id: Option = None; - - loop { - let pagparams = DataPageParams { - marker: last_sled_id.as_ref(), - direction: dropshot::PaginationOrder::Ascending, - limit: std::num::NonZeroU32::new(10).unwrap(), - }; - - let sleds_page = - self.sled_list(&self.opctx_alloc, &pagparams).await?; - let mut join_handles = - Vec::with_capacity(sleds_page.len() * instance_nics.len()); - - for sled in &sleds_page { - // del_v2p not required for sled instance was allocated to, OPTE - // currently does that automatically - // - // TODO(#3107): Remove this when XDE stops deleting mappings - // implicitly. - if sled.id() == instance_sled_id { - continue; - } - - for nic in &instance_nics { - let client = self.sled_client(&sled.id()).await?; - let nic_id = nic.id; - let mapping = SetVirtualNetworkInterfaceHost { - virtual_ip: nic.ip, - virtual_mac: nic.mac.clone(), - physical_host_ip, - vni: nic.vni.clone(), - }; - - let log = self.log.clone(); - - // This function is idempotent: calling the set_v2p ioctl with - // the same information is a no-op. - join_handles.push(tokio::spawn(futures::future::lazy( - move |_ctx| async move { - retry_until_known_result(&log, || async { - client.del_v2p(&nic_id, &mapping).await - }) - .await - }, - ))); - } - } - - // Concurrently run each future to completion, but return the last - // error seen. - let mut error = None; - for join_handle in join_handles { - let result = join_handle - .await - .map_err(|e| Error::internal_error(&e.to_string()))? - .await; - - if result.is_err() { - error!(self.log, "{:?}", result); - error = Some(result); - } - } - if let Some(e) = error { - return e.map(|_| ()).map_err(|e| e.into()); - } - - if sleds_page.len() < 10 { - break; - } - - if let Some(last) = sleds_page.last() { - last_sled_id = Some(last.id()); - } - } - - Ok(()) - } } diff --git a/nexus/src/context.rs b/nexus/src/context.rs index 73125cc617..7fd0a33c30 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -158,7 +158,7 @@ impl ServerContext { internal_dns::resolver::Resolver::new_from_addrs( log.new(o!("component" => "DnsResolver")), - vec![address], + &[address], ) .map_err(|e| format!("Failed to create DNS resolver: {}", e))? } @@ -169,11 +169,12 @@ impl ServerContext { nexus_config::Database::FromUrl { url } => url.clone(), nexus_config::Database::FromDns => { info!(log, "Accessing DB url from DNS"); - // It's been requested but unfortunately not supported to directly - // connect using SRV based lookup. - // TODO-robustness: the set of cockroachdb hosts we'll use will be - // fixed to whatever we got back from DNS at Nexus start. This means - // a new cockroachdb instance won't picked up until Nexus restarts. + // It's been requested but unfortunately not supported to + // directly connect using SRV based lookup. + // TODO-robustness: the set of cockroachdb hosts we'll use will + // be fixed to whatever we got back from DNS at Nexus start. + // This means a new cockroachdb instance won't picked up until + // Nexus restarts. let addrs = loop { match resolver .lookup_all_socket_v6(ServiceName::Cockroach) diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index fb88ac83a5..d219da7e96 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -93,6 +93,10 @@ pub struct ControlPlaneTestContext { } impl ControlPlaneTestContext { + pub fn wildcard_silo_dns_name(&self) -> String { + format!("*.sys.{}", self.external_dns_zone_name) + } + pub async fn teardown(mut self) { self.server.close().await; self.database.cleanup().await.unwrap(); @@ -429,7 +433,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { self.config.deployment.internal_dns = nexus_config::InternalDns::FromAddress { - address: *self + address: self .internal_dns .as_ref() .expect("Must initialize internal DNS server first") @@ -655,8 +659,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { let dns = dns_server::TransientServer::new(&log).await.unwrap(); - let SocketAddr::V6(dns_address) = *dns.dns_server.local_address() - else { + let SocketAddr::V6(dns_address) = dns.dns_server.local_address() else { panic!("Unsupported IPv4 DNS address"); }; let SocketAddr::V6(dropshot_address) = dns.dropshot_server.local_addr() @@ -994,7 +997,7 @@ pub async fn start_dns_server( let mut resolver_config = ResolverConfig::new(); resolver_config.add_name_server(NameServerConfig { - socket_addr: *dns_server.local_address(), + socket_addr: dns_server.local_address(), protocol: Protocol::Udp, tls_dns_name: None, trust_nx_responses: false, diff --git a/nexus/tests/integration_tests/certificates.rs b/nexus/tests/integration_tests/certificates.rs index a35b4dec5c..1843fc28c8 100644 --- a/nexus/tests/integration_tests/certificates.rs +++ b/nexus/tests/integration_tests/certificates.rs @@ -150,7 +150,7 @@ async fn test_crud(cptestctx: &ControlPlaneTestContext) { let certs = certs_list(&client).await; assert!(certs.is_empty()); - let chain = CertificateChain::new(); + let chain = CertificateChain::new(cptestctx.wildcard_silo_dns_name()); let (cert, key) = (chain.cert_chain_as_pem(), chain.end_cert_private_key_as_pem()); @@ -190,14 +190,19 @@ async fn test_cannot_create_certificate_with_bad_key( ) { let client = &cptestctx.external_client; - let chain = CertificateChain::new(); + let chain = CertificateChain::new(cptestctx.wildcard_silo_dns_name()); let (cert, der_key) = (chain.cert_chain_as_pem(), chain.end_cert_private_key_as_der()); let key = String::from_utf8_lossy(&der_key).to_string(); // Cannot create a certificate with a bad key (e.g. not PEM encoded) - cert_create_expect_error(&client, CERT_NAME, cert, key).await; + let error = cert_create_expect_error(&client, CERT_NAME, cert, key).await; + let expected = "Failed to parse private key"; + assert!( + error.contains(expected), + "{error:?} does not contain {expected:?}" + ); } #[nexus_test] @@ -206,14 +211,19 @@ async fn test_cannot_create_certificate_with_mismatched_key( ) { let client = &cptestctx.external_client; - let chain1 = CertificateChain::new(); + let chain1 = CertificateChain::new(cptestctx.wildcard_silo_dns_name()); let cert1 = chain1.cert_chain_as_pem(); - let chain2 = CertificateChain::new(); + let chain2 = CertificateChain::new(cptestctx.wildcard_silo_dns_name()); let key2 = chain2.end_cert_private_key_as_pem(); // Cannot create a certificate with a key that doesn't match the cert - cert_create_expect_error(&client, CERT_NAME, cert1, key2).await; + let error = cert_create_expect_error(&client, CERT_NAME, cert1, key2).await; + let expected = "Certificate and private key do not match"; + assert!( + error.contains(expected), + "{error:?} does not contain {expected:?}" + ); } #[nexus_test] @@ -222,7 +232,7 @@ async fn test_cannot_create_certificate_with_bad_cert( ) { let client = &cptestctx.external_client; - let chain = CertificateChain::new(); + let chain = CertificateChain::new(cptestctx.wildcard_silo_dns_name()); let (cert, key) = (chain.cert_chain_as_pem(), chain.end_cert_private_key_as_pem()); @@ -231,7 +241,15 @@ async fn test_cannot_create_certificate_with_bad_cert( let cert = String::from_utf8(tmp).unwrap(); - cert_create_expect_error(&client, CERT_NAME, cert, key).await; + let error = cert_create_expect_error(&client, CERT_NAME, cert, key).await; + + // TODO-correctness It's suprising this is the error we get back instead of + // "Failed to parse certificate". Why? + let expected = "Certificate exists, but is empty"; + assert!( + error.contains(expected), + "{error:?} does not contain {expected:?}" + ); } #[nexus_test] @@ -240,14 +258,45 @@ async fn test_cannot_create_certificate_with_expired_cert( ) { let client = &cptestctx.external_client; - let mut params = rcgen::CertificateParams::new(vec!["localhost".into()]); + let mut params = + rcgen::CertificateParams::new(vec![cptestctx.wildcard_silo_dns_name()]); params.not_after = std::time::SystemTime::UNIX_EPOCH.into(); let chain = CertificateChain::with_params(params); let (cert, key) = (chain.cert_chain_as_pem(), chain.end_cert_private_key_as_pem()); - cert_create_expect_error(&client, CERT_NAME, cert, key).await; + let error = cert_create_expect_error(&client, CERT_NAME, cert, key).await; + let expected = "Certificate exists, but is expired"; + assert!( + error.contains(expected), + "{error:?} does not contain {expected:?}" + ); +} + +#[nexus_test] +async fn test_cannot_create_certificate_with_incorrect_subject_alt_name( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + let bad_subject_alt_name = + format!("some-other-silo.sys.{}", cptestctx.external_dns_zone_name); + + let chain = CertificateChain::new(&bad_subject_alt_name); + let (cert, key) = + (chain.cert_chain_as_pem(), chain.end_cert_private_key_as_pem()); + + let error = cert_create_expect_error(&client, CERT_NAME, cert, key).await; + for expected in [ + "Certificate not valid for".to_string(), + format!("SANs: {bad_subject_alt_name}"), + ] { + assert!( + error.contains(&expected), + "{error:?} does not contain {expected:?}" + ); + } } #[tokio::test] @@ -311,7 +360,7 @@ async fn test_silo_certificates() { // create the other Silos and their users. let resolver = Arc::new( CustomDnsResolver::new( - *cptestctx.external_dns.dns_server.local_address(), + cptestctx.external_dns.dns_server.local_address(), ) .unwrap(), ); diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 2b802ef5e6..e04d26cc45 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -10,6 +10,7 @@ use crate::integration_tests::unauthorized::HTTP_SERVER; use chrono::Utc; use http::method::Method; +use internal_dns::names::DNS_ZONE_EXTERNAL_TESTING; use lazy_static::lazy_static; use nexus_db_queries::authn; use nexus_db_queries::db::fixed_data::silo::DEFAULT_SILO; @@ -334,7 +335,8 @@ lazy_static! { pub static ref DEMO_CERTIFICATES_URL: String = format!("/v1/certificates"); pub static ref DEMO_CERTIFICATE_URL: String = format!("/v1/certificates/demo-certificate"); - pub static ref DEMO_CERTIFICATE: CertificateChain = CertificateChain::new(); + pub static ref DEMO_CERTIFICATE: CertificateChain = + CertificateChain::new(format!("*.sys.{DNS_ZONE_EXTERNAL_TESTING}")); pub static ref DEMO_CERTIFICATE_CREATE: params::CertificateCreate = params::CertificateCreate { identity: IdentityMetadataCreateParams { diff --git a/nexus/tests/integration_tests/initialization.rs b/nexus/tests/integration_tests/initialization.rs index 1251077c2e..2d4c76dc99 100644 --- a/nexus/tests/integration_tests/initialization.rs +++ b/nexus/tests/integration_tests/initialization.rs @@ -41,7 +41,7 @@ async fn test_nexus_boots_before_cockroach() { omicron_common::nexus_config::Database::FromDns; builder.config.deployment.internal_dns = omicron_common::nexus_config::InternalDns::FromAddress { - address: *builder + address: builder .internal_dns .as_ref() .expect("Must start Internal DNS before acquiring an address") @@ -121,7 +121,7 @@ async fn test_nexus_boots_before_dendrite() { builder.config.pkg.dendrite = HashMap::new(); builder.config.deployment.internal_dns = omicron_common::nexus_config::InternalDns::FromAddress { - address: *builder + address: builder .internal_dns .as_ref() .expect("Must start Internal DNS before acquiring an address") diff --git a/omdb/src/bin/omdb/main.rs b/omdb/src/bin/omdb/main.rs deleted file mode 100644 index 861df47b51..0000000000 --- a/omdb/src/bin/omdb/main.rs +++ /dev/null @@ -1,76 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! CLI for debugging Omicron internal state -//! -//! GROUND RULES: -//! -//! 1. There aren't a lot of ground rules here. At least for now, this is a -//! place to put any kind of runtime tooling for Omicron that seems useful. -//! You can query the database directly (see notes in db.rs), use internal -//! APIs, etc. To the degree that we can stick to stable interfaces, great. -//! But at this stage we'd rather have tools that work on latest than not -//! have them because we couldn't prioritize keeping them stable. -//! -//! 2. Where possible, when the tool encounters something unexpected, it should -//! print what it can (including the error message and bad data) and then -//! continue. It generally shouldn't stop on the first error. (We often -//! find strange things when debugging but we need our tools to tell us as -//! much as they can!) - -use anyhow::Context; -use clap::Parser; -use clap::Subcommand; - -mod db; -mod nexus; - -#[tokio::main] -async fn main() -> Result<(), anyhow::Error> { - let args = Omdb::parse(); - - let log = dropshot::ConfigLogging::StderrTerminal { level: args.log_level } - .to_logger("omdb") - .context("failed to create logger")?; - - match args.command { - OmdbCommands::Nexus(nexus) => nexus.run_cmd(&log).await, - OmdbCommands::Db(db) => db.run_cmd(&log).await, - } -} - -/// Omicron debugger (unstable) -/// -/// This tool provides commands for directly querying Omicron components about -/// their internal state using internal APIs. This is a prototype. The -/// commands and output are unstable and may change. -#[derive(Debug, Parser)] -struct Omdb { - /// log level filter - #[arg( - env, - long, - value_parser = parse_dropshot_log_level, - default_value = "warn", - )] - log_level: dropshot::ConfigLoggingLevel, - - #[command(subcommand)] - command: OmdbCommands, -} - -#[derive(Debug, Subcommand)] -#[allow(clippy::large_enum_variant)] -enum OmdbCommands { - /// Query the control plane database (CockroachDB) - Db(db::DbArgs), - /// Debug a specific Nexus instance - Nexus(nexus::NexusArgs), -} - -fn parse_dropshot_log_level( - s: &str, -) -> Result { - serde_json::from_str(&format!("{:?}", s)).context("parsing log level") -} diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index c33653c468..3e7ec3b5e5 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -2755,6 +2755,17 @@ "format": "uint32", "minimum": 0 }, + "ref_time": { + "description": "The NTP reference time (i.e. what chrony thinks the current time is, not necessarily the current system time).", + "type": "number", + "format": "double" + }, + "stratum": { + "description": "The NTP stratum (our upstream's stratum plus one).", + "type": "integer", + "format": "uint8", + "minimum": 0 + }, "sync": { "description": "The synchronization state of the sled, true when the system clock and the NTP clock are in sync (to within a small window).", "type": "boolean" @@ -2764,6 +2775,8 @@ "correction", "ip_addr", "ref_id", + "ref_time", + "stratum", "sync" ] }, diff --git a/package/src/bin/omicron-package.rs b/package/src/bin/omicron-package.rs index 959d8e204a..a0146eee50 100644 --- a/package/src/bin/omicron-package.rs +++ b/package/src/bin/omicron-package.rs @@ -185,7 +185,9 @@ async fn do_target( subcommand: &TargetCommand, ) -> Result<()> { let target_dir = artifact_dir.join("target"); - tokio::fs::create_dir_all(&target_dir).await?; + tokio::fs::create_dir_all(&target_dir).await.with_context(|| { + format!("failed to create directory {}", target_dir.display()) + })?; match subcommand { TargetCommand::Create { image, machine, switch } => { let target = KnownTarget::new( @@ -195,7 +197,11 @@ async fn do_target( )?; let path = get_single_target(&target_dir, name).await?; - tokio::fs::write(&path, Target::from(target).to_string()).await?; + tokio::fs::write(&path, Target::from(target).to_string()) + .await + .with_context(|| { + format!("failed to write target to {}", path.display()) + })?; replace_active_link(&name, &target_dir).await?; @@ -263,7 +269,13 @@ async fn replace_active_link( bail!("Target file {} does not exist", src.display()); } let _ = tokio::fs::remove_file(&dst).await; - tokio::fs::symlink(src, dst).await?; + tokio::fs::symlink(src, &dst).await.with_context(|| { + format!( + "failed creating symlink to {} at {}", + src.display(), + dst.display() + ) + })?; Ok(()) } @@ -881,7 +893,9 @@ async fn main() -> Result<()> { if let Ok(manifest) = env::var("CARGO_MANIFEST_DIR") { let manifest_dir = PathBuf::from(manifest); let root = manifest_dir.parent().unwrap(); - env::set_current_dir(&root)?; + env::set_current_dir(root).with_context(|| { + format!("failed to set current directory to {}", root.display()) + })?; } match &args.subcommand { diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 5ef9594a2a..d0fa2fbe4d 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -855,6 +855,11 @@ pub struct TimeSync { pub ref_id: u32, /// The NTP reference IP address. pub ip_addr: IpAddr, + /// The NTP stratum (our upstream's stratum plus one). + pub stratum: u8, + /// The NTP reference time (i.e. what chrony thinks the current time is, not + /// necessarily the current system time). + pub ref_time: f64, // This could be f32, but there is a problem with progenitor/typify // where, although the f32 correctly becomes "float" (and not "double") in // the API spec, that "float" gets converted back to f64 when generating diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index d127512aa6..805c889295 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -455,6 +455,8 @@ impl ServiceInner { sync: ts.sync, ref_id: ts.ref_id, ip_addr: ts.ip_addr, + stratum: ts.stratum, + ref_time: ts.ref_time, correction: ts.correction, }) } diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index ceda5c75b7..9868bb4f0d 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -2373,6 +2373,8 @@ impl ServiceManager { sync: true, ref_id: 0, ip_addr: IPV6_UNSPECIFIED, + stratum: 0, + ref_time: 0.0, correction: 0.00, }); }; @@ -2401,6 +2403,10 @@ impl ServiceManager { .map_err(|_| Error::NtpZoneNotReady)?; let ip_addr = IpAddr::from_str(v[1]).unwrap_or(IPV6_UNSPECIFIED); + let stratum = u8::from_str(v[2]) + .map_err(|_| Error::NtpZoneNotReady)?; + let ref_time = f64::from_str(v[3]) + .map_err(|_| Error::NtpZoneNotReady)?; let correction = f64::from_str(v[4]) .map_err(|_| Error::NtpZoneNotReady)?; @@ -2410,13 +2416,23 @@ impl ServiceManager { let peer_sync = !ip_addr.is_unspecified() || (ref_id != 0 && ref_id != 0x7f7f0101); - let sync = peer_sync && correction.abs() <= 0.05; + let sync = stratum < 10 + && ref_time > 1234567890.0 + && peer_sync + && correction.abs() <= 0.05; if sync { self.boottime_rewrite(existing_zones.values()); } - Ok(TimeSync { sync, ref_id, ip_addr, correction }) + Ok(TimeSync { + sync, + ref_id, + ip_addr, + stratum, + ref_time, + correction, + }) } else { Err(Error::NtpZoneNotReady) } diff --git a/test-utils/src/certificates.rs b/test-utils/src/certificates.rs index 08698aabd5..ab84f30b15 100644 --- a/test-utils/src/certificates.rs +++ b/test-utils/src/certificates.rs @@ -13,8 +13,9 @@ pub struct CertificateChain { } impl CertificateChain { - pub fn new() -> Self { - let params = rcgen::CertificateParams::new(vec!["localhost".into()]); + pub fn new>(subject_alt_name: S) -> Self { + let params = + rcgen::CertificateParams::new(vec![subject_alt_name.into()]); Self::with_params(params) } diff --git a/tools/build-global-zone-packages.sh b/tools/build-global-zone-packages.sh index 8589dcd270..54af9d6327 100755 --- a/tools/build-global-zone-packages.sh +++ b/tools/build-global-zone-packages.sh @@ -5,20 +5,20 @@ set -eux TOOLS_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" # Use the default "out" dir in omicron to find the needed packages if one isn't given -tarball_src_dir="$(readlink -f ${1:-"$TOOLS_DIR/../out"})" +tarball_src_dir="$(readlink -f "${1:-"$TOOLS_DIR/../out"}")" # Stash the final tgz in the given src dir if a different target isn't given -out_dir="$(readlink -f ${2:-$tarball_src_dir})" +out_dir="$(readlink -f "${2:-"$tarball_src_dir"}")" # Make sure needed packages exist deps=( - $tarball_src_dir/omicron-sled-agent.tar - $tarball_src_dir/maghemite.tar - $tarball_src_dir/propolis-server.tar.gz - $tarball_src_dir/overlay.tar.gz + "$tarball_src_dir/omicron-sled-agent.tar" + "$tarball_src_dir/maghemite.tar" + "$tarball_src_dir/propolis-server.tar.gz" + "$tarball_src_dir/overlay.tar.gz" ) -for dep in ${deps[@]}; do +for dep in "${deps[@]}"; do if [[ ! -e $dep ]]; then - echo "Missing Global Zone dep: $(basename $dep)" + echo "Missing Global Zone dep: $(basename "$dep")" exit 1 fi done @@ -57,4 +57,4 @@ cp "$tarball_src_dir/propolis-server.tar.gz" "$tmp_gz/root/opt/oxide" cp "$tarball_src_dir/overlay.tar.gz" "$tmp_gz/root/opt/oxide" # Create the final output and we're done -cd "$tmp_gz" && tar cvfz $out_dir/global-zone-packages.tar.gz oxide.json root +cd "$tmp_gz" && tar cvfz "$out_dir"/global-zone-packages.tar.gz oxide.json root diff --git a/tools/build-host-image.sh b/tools/build-host-image.sh index 211dbd3e77..d492e84b81 100755 --- a/tools/build-host-image.sh +++ b/tools/build-host-image.sh @@ -60,7 +60,7 @@ function main HELIOS_PATH=$1 GLOBAL_ZONE_TARBALL_PATH=$2 - TOOLS_DIR="$(pwd)/$(dirname $0)" + TOOLS_DIR="$(pwd)/$(dirname "$0")" # Grab the opte version OPTE_VER=$(cat "$TOOLS_DIR/opte_version") @@ -73,7 +73,7 @@ function main # Extract the global zone tarball into a tmp_gz directory echo "Extracting gz packages into $tmp_gz" - ptime -m tar xvzf $GLOBAL_ZONE_TARBALL_PATH -C $tmp_gz + ptime -m tar xvzf "$GLOBAL_ZONE_TARBALL_PATH" -C "$tmp_gz" # If the user specified a switch zone (which is probably named # `switch-SOME_VARIANT.tar.gz`), stage it in the right place and rename it @@ -90,7 +90,7 @@ function main fi # Move to the helios checkout - cd $HELIOS_PATH + cd "$HELIOS_PATH" # Create the "./helios-build" command, which lets us build images gmake setup @@ -106,7 +106,7 @@ function main *) exit $rc ;; esac - pfexec zfs create -p rpool/images/$USER + pfexec zfs create -p rpool/images/"$USER" HELIOS_REPO=https://pkg.oxide.computer/helios/2/dev/ diff --git a/tools/build-trampoline-global-zone-packages.sh b/tools/build-trampoline-global-zone-packages.sh index 6081e7c9a7..87013fb563 100755 --- a/tools/build-trampoline-global-zone-packages.sh +++ b/tools/build-trampoline-global-zone-packages.sh @@ -5,18 +5,18 @@ set -eux TOOLS_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" # Use the default "out" dir in omicron to find the needed packages if one isn't given -tarball_src_dir="$(readlink -f ${1:-"$TOOLS_DIR/../out"})" +tarball_src_dir="$(readlink -f "${1:-"$TOOLS_DIR/../out"}")" # Stash the final tgz in the given src dir if a different target isn't given -out_dir="$(readlink -f ${2:-$tarball_src_dir})" +out_dir="$(readlink -f "${2:-$tarball_src_dir}")" # Make sure needed packages exist deps=( - $tarball_src_dir/installinator.tar - $tarball_src_dir/maghemite.tar + "$tarball_src_dir"/installinator.tar + "$tarball_src_dir"/maghemite.tar ) -for dep in ${deps[@]}; do +for dep in "${deps[@]}"; do if [[ ! -e $dep ]]; then - echo "Missing Trampoline Global Zone dep: $(basename $dep)" + echo "Missing Trampoline Global Zone dep: $(basename "$dep")" exit 1 fi done @@ -48,4 +48,4 @@ tar -xvfz "$tarball_src_dir/maghemite.tar" cd - # Create the final output and we're done -cd "$tmp_trampoline" && tar cvfz $out_dir/trampoline-global-zone-packages.tar.gz oxide.json root +cd "$tmp_trampoline" && tar cvfz "$out_dir"/trampoline-global-zone-packages.tar.gz oxide.json root diff --git a/tools/ci_check_opte_ver.sh b/tools/ci_check_opte_ver.sh index 2162df2a31..26382690e1 100755 --- a/tools/ci_check_opte_ver.sh +++ b/tools/ci_check_opte_ver.sh @@ -15,7 +15,7 @@ for rev in "${opte_deps_revs[@]}"; do done # Grab the API version for this revision -API_VER=$(curl -s https://raw.githubusercontent.com/oxidecomputer/opte/$OPTE_REV/crates/opte-api/src/lib.rs | sed -n 's/pub const API_VERSION: u64 = \([0-9]*\);/\1/p') +API_VER=$(curl -s https://raw.githubusercontent.com/oxidecomputer/opte/"$OPTE_REV"/crates/opte-api/src/lib.rs | sed -n 's/pub const API_VERSION: u64 = \([0-9]*\);/\1/p') # Grab the patch version which is based on the number of commits. # Essentially `git rev-list --count $OPTE_REV` but without cloning the repo. diff --git a/tools/ci_download_clickhouse b/tools/ci_download_clickhouse index 8e743d75e0..7d634a3237 100755 --- a/tools/ci_download_clickhouse +++ b/tools/ci_download_clickhouse @@ -11,7 +11,7 @@ set -o xtrace set -o errexit SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -ARG0="$(basename ${BASH_SOURCE[0]})" +ARG0="$(basename "${BASH_SOURCE[0]}")" TARGET_DIR="out" # Location where intermediate artifacts are downloaded / unpacked. @@ -89,7 +89,7 @@ function main function fail { - echo "$ARG0: $@" >&2 + echo "$ARG0: $*" >&2 exit 1 } @@ -124,7 +124,6 @@ function configure_os TARBALL_FILENAME="$TARBALL_DIRNAME$CIDL_DASHREV.$CIDL_PLATFORM.tar.gz" TARBALL_FILE="$DOWNLOAD_DIR/$TARBALL_FILENAME" - TARBALL_DIR="$DOWNLOAD_DIR/$TARBALL_DIRNAME" } function do_download_curl diff --git a/tools/ci_download_cockroachdb b/tools/ci_download_cockroachdb index 2a4c5db6c7..8b002b4359 100755 --- a/tools/ci_download_cockroachdb +++ b/tools/ci_download_cockroachdb @@ -11,7 +11,7 @@ set -o xtrace set -o errexit SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -ARG0="$(basename ${BASH_SOURCE[0]})" +ARG0="$(basename "${BASH_SOURCE[0]}")" # If you change this, you must also update the md5sums below CIDL_VERSION="$(cat "$SOURCE_DIR/cockroachdb_version")" @@ -93,7 +93,7 @@ function main function fail { - echo "$ARG0: $@" >&2 + echo "$ARG0: $*" >&2 exit 1 } diff --git a/tools/ci_download_console b/tools/ci_download_console index 606b527878..a6837af0e9 100755 --- a/tools/ci_download_console +++ b/tools/ci_download_console @@ -9,15 +9,13 @@ set -o xtrace set -o errexit SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -ARG0="$(basename ${BASH_SOURCE[0]})" +ARG0="$(basename "${BASH_SOURCE[0]}")" TARGET_DIR="out" # Location where intermediate artifacts are downloaded / unpacked. DOWNLOAD_DIR="$TARGET_DIR/downloads" # Location where the final console directory should end up. DEST_DIR="./$TARGET_DIR/console-assets" -# Base URL -URL_BASE="https://dl.oxide.computer/releases/console" source "$SOURCE_DIR/console_version" @@ -71,7 +69,7 @@ function main function fail { - echo "$ARG0: $@" >&2 + echo "$ARG0: $*" >&2 exit 1 } diff --git a/tools/ci_download_dendrite_openapi b/tools/ci_download_dendrite_openapi index 0e4610b90e..fe5c91282e 100755 --- a/tools/ci_download_dendrite_openapi +++ b/tools/ci_download_dendrite_openapi @@ -9,7 +9,7 @@ set -o xtrace set -o errexit SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -ARG0="$(basename ${BASH_SOURCE[0]})" +ARG0="$(basename "${BASH_SOURCE[0]}")" TARGET_DIR="out" # Location where intermediate artifacts are downloaded / unpacked. @@ -60,7 +60,7 @@ function main function fail { - echo "$ARG0: $@" >&2 + echo "$ARG0: $*" >&2 exit 1 } diff --git a/tools/ci_download_dendrite_stub b/tools/ci_download_dendrite_stub index d9a2063d3a..870a847af0 100755 --- a/tools/ci_download_dendrite_stub +++ b/tools/ci_download_dendrite_stub @@ -75,7 +75,7 @@ function main function fail { - echo "$ARG0: $@" >&2 + echo "$ARG0: $*" >&2 exit 1 } @@ -119,7 +119,7 @@ function do_assemble mkdir "$DEST_DIR" cp -r "$DOWNLOAD_DIR/root" "$DEST_DIR/root" # Symbolic links for backwards compatibility with existing setups - ln -s $PWD/out/dendrite-stub/root/opt/oxide/dendrite/bin/ $PWD/out/dendrite-stub/bin + ln -s "$PWD"/out/dendrite-stub/root/opt/oxide/dendrite/bin/ "$PWD"/out/dendrite-stub/bin } function fetch_and_verify diff --git a/tools/ci_download_maghemite_openapi b/tools/ci_download_maghemite_openapi index ce512a365d..37ff4f5547 100755 --- a/tools/ci_download_maghemite_openapi +++ b/tools/ci_download_maghemite_openapi @@ -9,7 +9,7 @@ set -o xtrace set -o errexit SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -ARG0="$(basename ${BASH_SOURCE[0]})" +ARG0="$(basename "${BASH_SOURCE[0]}")" TARGET_DIR="out" # Location where intermediate artifacts are downloaded / unpacked. @@ -60,7 +60,7 @@ function main function fail { - echo "$ARG0: $@" >&2 + echo "$ARG0: $*" >&2 exit 1 } diff --git a/tools/ci_download_softnpu_machinery b/tools/ci_download_softnpu_machinery index e164e0300f..2575d6a186 100755 --- a/tools/ci_download_softnpu_machinery +++ b/tools/ci_download_softnpu_machinery @@ -20,7 +20,7 @@ SOFTNPU_COMMIT="64beaff129b7f63a04a53dd5ed0ec09f012f5756" # This is the softnpu ASIC simulator echo "fetching npuzone" mkdir -p $OUT_DIR -$TOOLS_DIR/ensure_buildomat_artifact.sh \ +"$TOOLS_DIR"/ensure_buildomat_artifact.sh \ -O $OUT_DIR \ "npuzone" \ "$SOFTNPU_REPO" \ diff --git a/tools/ci_download_transceiver_control b/tools/ci_download_transceiver_control index 3a52d7c72e..633e42167b 100755 --- a/tools/ci_download_transceiver_control +++ b/tools/ci_download_transceiver_control @@ -74,7 +74,7 @@ function main function fail { - echo "$ARG0: $@" >&2 + echo "$ARG0: $*" >&2 exit 1 } diff --git a/tools/create_virtual_hardware.sh b/tools/create_virtual_hardware.sh index f493d1f8d2..dd6d9af9dd 100755 --- a/tools/create_virtual_hardware.sh +++ b/tools/create_virtual_hardware.sh @@ -43,7 +43,7 @@ function ensure_simulated_links { done if [[ -z "$(get_vnic_name_if_exists "sc0_1")" ]]; then - dladm create-vnic -t "sc0_1" -l $PHYSICAL_LINK -m a8:e1:de:01:70:1d + dladm create-vnic -t "sc0_1" -l "$PHYSICAL_LINK" -m a8:e1:de:01:70:1d fi success "Vnic sc0_1 exists" } @@ -60,7 +60,7 @@ function ensure_softnpu_zone { --ports sc0_0,tfportrear0_0 \ --ports sc0_1,tfportqsfp0_0 } - $SOURCE_DIR/scrimlet/softnpu-init.sh + "$SOURCE_DIR"/scrimlet/softnpu-init.sh success "softnpu zone exists" } diff --git a/tools/destroy_gimlet_virtual_hardware.sh b/tools/destroy_gimlet_virtual_hardware.sh index 288cb32644..7c5fada14d 100644 --- a/tools/destroy_gimlet_virtual_hardware.sh +++ b/tools/destroy_gimlet_virtual_hardware.sh @@ -11,7 +11,7 @@ set -u set -x SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -cd "${SOURCE_DIR}/.." +cd "${SOURCE_DIR}/.." || exit OMICRON_TOP="$PWD" . "$SOURCE_DIR/virtual_hardware.sh" diff --git a/tools/destroy_virtual_hardware.sh b/tools/destroy_virtual_hardware.sh index c1f82c7c49..ae6fef0673 100755 --- a/tools/destroy_virtual_hardware.sh +++ b/tools/destroy_virtual_hardware.sh @@ -11,7 +11,7 @@ set -u set -x SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -cd "${SOURCE_DIR}/.." +cd "${SOURCE_DIR}/.." || exit OMICRON_TOP="$PWD" . "$SOURCE_DIR/virtual_hardware.sh" diff --git a/tools/install_opte.sh b/tools/install_opte.sh index 7c769fa049..f670adf163 100755 --- a/tools/install_opte.sh +++ b/tools/install_opte.sh @@ -53,7 +53,7 @@ OPTE_VERSION="$(cat "$OMICRON_TOP/tools/opte_version")" # Actually install the xde kernel module and opteadm tool RC=0 -pfexec pkg install -v pkg://helios-dev/driver/network/opte@$OPTE_VERSION || RC=$? +pfexec pkg install -v pkg://helios-dev/driver/network/opte@"$OPTE_VERSION" || RC=$? if [[ "$RC" -eq 0 ]]; then echo "xde driver installed successfully" elif [[ "$RC" -eq 4 ]]; then diff --git a/tools/reflector/helpers.sh b/tools/reflector/helpers.sh index c521a0fa8a..92d132faae 100644 --- a/tools/reflector/helpers.sh +++ b/tools/reflector/helpers.sh @@ -96,9 +96,9 @@ function update_pr { # Compare the integration branch with the target branch local TARGET_TO_INTEGRATION - TARGET_TO_INTEGRATION="$(git rev-list --count $TARGET_BRANCH..$INTEGRATION_BRANCH)" + TARGET_TO_INTEGRATION="$(git rev-list --count "$TARGET_BRANCH".."$INTEGRATION_BRANCH")" local INTEGRATION_TO_TARGET - INTEGRATION_TO_TARGET="$(git rev-list --count $INTEGRATION_BRANCH..$TARGET_BRANCH)" + INTEGRATION_TO_TARGET="$(git rev-list --count "$INTEGRATION_BRANCH".."$TARGET_BRANCH")" # Check for an existing pull request from the integration branch to the target branch eval "$(gh pr view "$INTEGRATION_BRANCH" --repo "$GITHUB_REPOSITORY" --json url,number,state | jq -r 'to_entries[] | "\(.key | ascii_upcase)=\(.value)"')" diff --git a/tools/scrimlet/softnpu-init.sh b/tools/scrimlet/softnpu-init.sh index 57ef4566a3..6a2a9e10ce 100755 --- a/tools/scrimlet/softnpu-init.sh +++ b/tools/scrimlet/softnpu-init.sh @@ -9,13 +9,13 @@ GATEWAY_IP=${GATEWAY_IP:=$(netstat -rn -f inet | grep default | awk -F ' ' '{pri echo "Using $GATEWAY_IP as gateway ip" if [[ ! -v GATEWAY_MAC ]]; then - ping $GATEWAY_IP + ping "$GATEWAY_IP" sleep 1 - ping $GATEWAY_IP + ping "$GATEWAY_IP" sleep 1 - ping $GATEWAY_IP + ping "$GATEWAY_IP" sleep 1 - ping $GATEWAY_IP + ping "$GATEWAY_IP" sleep 1 fi @@ -52,16 +52,16 @@ z_scadm () { --server /softnpu/server \ --client /softnpu/client \ standalone \ - $@ + "$@" } # Configure upstream network gateway ARP entry -z_scadm add-arp-entry $GATEWAY_IP $GATEWAY_MAC +z_scadm add-arp-entry "$GATEWAY_IP" "$GATEWAY_MAC" PXA_MAC=${PXA_MAC:-a8:e1:de:01:70:1d} if [[ -v PXA_START ]]; then - z_scadm add-proxy-arp $PXA_START $PXA_END $PXA_MAC + z_scadm add-proxy-arp "$PXA_START" "$PXA_END" "$PXA_MAC" fi z_scadm dump-state diff --git a/tools/uninstall_opte.sh b/tools/uninstall_opte.sh index 6dfd581c77..a833d029aa 100755 --- a/tools/uninstall_opte.sh +++ b/tools/uninstall_opte.sh @@ -95,7 +95,7 @@ function to_stock_helios { # If we have packages to reject, prompt the user to confirm. if [[ ${#PKGS_TO_REJECT[@]} -gt 0 ]]; then - echo "These packages will be removed: ${PKGS_TO_REJECT[@]}" + echo "These packages will be removed: ${PKGS_TO_REJECT[*]}" read -p "Confirm (Y/n): " RESPONSE case $(echo $RESPONSE | tr '[A-Z]' '[a-z]') in n|no ) @@ -122,7 +122,7 @@ function to_stock_helios { pkg set-publisher --sticky --search-first "$HELIOS_PUBLISHER" # Now install stock consolidation but reject the packages we don't want. - pkg install --no-refresh -v ${REJECT_ARGS[@]} "$CONSOLIDATION" + pkg install --no-refresh -v "${REJECT_ARGS[@]}" "$CONSOLIDATION" } # If helios-dev exists, echo the full osnet-incorporation package we'll be diff --git a/tools/update_crucible.sh b/tools/update_crucible.sh index 07369586b4..af834091ca 100755 --- a/tools/update_crucible.sh +++ b/tools/update_crucible.sh @@ -4,7 +4,6 @@ set -o pipefail set -o errexit SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -ARG0="$(basename "${BASH_SOURCE[0]}")" function usage { echo "usage: $0 [-c COMMIT] [-n]" diff --git a/tools/update_dendrite.sh b/tools/update_dendrite.sh index 16f8d29692..0e3df107a4 100755 --- a/tools/update_dendrite.sh +++ b/tools/update_dendrite.sh @@ -4,7 +4,6 @@ set -o pipefail set -o errexit SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -ARG0="$(basename "${BASH_SOURCE[0]}")" function usage { echo "usage: $0 [-c COMMIT] [-n]" @@ -38,7 +37,7 @@ function update_openapi { fi echo "Updating Dendrite OpenAPI from: $TARGET_COMMIT" set -x - echo "$OUTPUT" > $OPENAPI_PATH + echo "$OUTPUT" > "$OPENAPI_PATH" set +x } @@ -61,7 +60,7 @@ function update_dendrite_stub_shas { fi echo "Updating Dendrite stub checksums from: $TARGET_COMMIT" set -x - echo "$OUTPUT" > $STUB_CHECKSUM_PATH + echo "$OUTPUT" > "$STUB_CHECKSUM_PATH" set +x } diff --git a/tools/update_maghemite.sh b/tools/update_maghemite.sh index 36c5591e0f..a4a9b1291e 100755 --- a/tools/update_maghemite.sh +++ b/tools/update_maghemite.sh @@ -4,7 +4,6 @@ set -o pipefail set -o errexit SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -ARG0="$(basename "${BASH_SOURCE[0]}")" function usage { echo "usage: $0 [-c COMMIT] [-n]" @@ -37,7 +36,7 @@ function update_openapi { fi echo "Updating Maghemite OpenAPI from: $TARGET_COMMIT" set -x - echo "$OUTPUT" > $OPENAPI_PATH + echo "$OUTPUT" > "$OPENAPI_PATH" set +x } diff --git a/tools/update_propolis.sh b/tools/update_propolis.sh index 8acd7809d0..f97aa8dad7 100755 --- a/tools/update_propolis.sh +++ b/tools/update_propolis.sh @@ -4,7 +4,6 @@ set -o pipefail set -o errexit SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -ARG0="$(basename "${BASH_SOURCE[0]}")" function usage { echo "usage: $0 [-c COMMIT] [-n]" diff --git a/tools/update_transceiver_control.sh b/tools/update_transceiver_control.sh index 94f7f6e3be..8493408cf7 100755 --- a/tools/update_transceiver_control.sh +++ b/tools/update_transceiver_control.sh @@ -4,7 +4,6 @@ set -o pipefail set -o errexit SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -ARG0="$(basename "${BASH_SOURCE[0]}")" function usage { echo "usage: $0 [-c COMMIT] [-n]" @@ -15,10 +14,6 @@ function usage { exit 1 } -PACKAGES=( - "xcvradm" -) - REPO="oxidecomputer/transceiver-control" . "$SOURCE_DIR/update_helpers.sh" @@ -37,7 +32,7 @@ function update_transceiver_control { fi echo "Updating transceiver control from: $TARGET_COMMIT" set -x - echo "$OUTPUT" > $OPENAPI_PATH + echo "$OUTPUT" > "$OPENAPI_PATH" set +x } diff --git a/wicketd/src/rss_config.rs b/wicketd/src/rss_config.rs index 8e6709a30f..1dc9f84985 100644 --- a/wicketd/src/rss_config.rs +++ b/wicketd/src/rss_config.rs @@ -556,10 +556,14 @@ impl CertificateValidator { } fn validate(&self, cert: &str, key: &str) -> Result<(), CertificateError> { - self.inner.validate( - cert.as_bytes(), - key.as_bytes(), - self.silo_dns_name.as_deref(), - ) + // Cert validation accepts multiple possible silo DNS names, but at rack + // setup time we only have one. Stuff it into a Vec. + let silo_dns_names = + if let Some(silo_dns_name) = self.silo_dns_name.as_deref() { + vec![silo_dns_name] + } else { + vec![] + }; + self.inner.validate(cert.as_bytes(), key.as_bytes(), &silo_dns_names) } }