diff --git a/Cargo.lock b/Cargo.lock index c33ede43ce..fb3228964d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -853,7 +853,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "599aa35200ffff8f04c1925aa1acc92fa2e08874379ef42e210a80e527e60838" dependencies = [ "serde", - "toml 0.7.6", + "toml 0.7.8", ] [[package]] @@ -934,9 +934,9 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.30" +version = "0.4.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "defd4e7873dbddba6c7c91e199c7fcb946abc4a6a4ac3195400bcfb01b5de877" +checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" dependencies = [ "android-tzdata", "iana-time-zone", @@ -1355,7 +1355,6 @@ checksum = "a84cda67535339806297f1b331d6dd6320470d2a0fe65381e79ee9e156dd3d13" dependencies = [ "bitflags 1.3.2", "crossterm_winapi", - "futures-core", "libc", "mio", "parking_lot 0.12.1", @@ -1373,6 +1372,7 @@ checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" dependencies = [ "bitflags 2.4.0", "crossterm_winapi", + "futures-core", "libc", "mio", "parking_lot 0.12.1", @@ -1427,7 +1427,7 @@ dependencies = [ "tokio", "tokio-rustls", "tokio-util", - "toml 0.7.6", + "toml 0.7.8", "tracing", "usdt", "uuid", @@ -1482,7 +1482,7 @@ dependencies = [ "tempfile", "thiserror", "tokio-rustls", - "toml 0.7.6", + "toml 0.7.8", "twox-hash", "uuid", "vergen", @@ -1739,7 +1739,7 @@ dependencies = [ "slog", "thiserror", "tokio", - "toml 0.7.6", + "toml 0.7.8", ] [[package]] @@ -2011,7 +2011,7 @@ dependencies = [ "tempfile", "thiserror", "tokio", - "toml 0.7.6", + "toml 0.7.8", "trust-dns-client", "trust-dns-proto", "trust-dns-resolver", @@ -2093,14 +2093,14 @@ dependencies = [ "serde", "serde_json", "slog", - "toml 0.7.6", + "toml 0.7.8", "uuid", ] [[package]] name = "dropshot" version = "0.9.1-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#99cea069c61d1ff5be0ff4f9d338f24bbea93128" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#fa728d07970824fd5f3bd57a3d4dc0fdbea09bfd" dependencies = [ "async-stream", "async-trait", @@ -2116,6 +2116,7 @@ dependencies = [ "http", "hyper", "indexmap 2.0.0", + "multer", "openapiv3", "paste", "percent-encoding", @@ -2135,7 +2136,7 @@ dependencies = [ "slog-term", "tokio", "tokio-rustls", - "toml 0.7.6", + "toml 0.7.8", "usdt", "uuid", "version_check", @@ -2145,7 +2146,7 @@ dependencies = [ [[package]] name = "dropshot_endpoint" version = "0.9.1-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#99cea069c61d1ff5be0ff4f9d338f24bbea93128" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#fa728d07970824fd5f3bd57a3d4dc0fdbea09bfd" dependencies = [ "proc-macro2", "quote", @@ -2272,7 +2273,7 @@ dependencies = [ "russh-keys", "serde_json", "tokio", - "toml 0.7.6", + "toml 0.7.8", "trust-dns-resolver", "uuid", ] @@ -3199,7 +3200,7 @@ dependencies = [ "thiserror", "tlvc 0.3.1 (git+https://github.com/oxidecomputer/tlvc.git)", "tlvc-text", - "toml 0.7.6", + "toml 0.7.8", "x509-cert", "zerocopy 0.6.3", "zip", @@ -3374,7 +3375,7 @@ dependencies = [ "smf", "thiserror", "tokio", - "toml 0.7.6", + "toml 0.7.8", "uuid", "zone", ] @@ -3491,7 +3492,7 @@ dependencies = [ "thiserror", "tokio", "tokio-stream", - "toml 0.7.6", + "toml 0.7.8", "tufaceous-lib", "update-engine", "uuid", @@ -4159,6 +4160,24 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "multer" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01acbdc23469fd8fe07ab135923371d5f5a422fbf9c522158677c8eb15bc51c2" +dependencies = [ + "bytes", + "encoding_rs", + "futures-util", + "http", + "httparse", + "log", + "memchr", + "mime", + "spin 0.9.8", + "version_check", +] + [[package]] name = "nanorand" version = "0.7.0" @@ -4356,7 +4375,7 @@ dependencies = [ "thiserror", "tokio", "tokio-postgres", - "toml 0.7.6", + "toml 0.7.8", "usdt", "uuid", ] @@ -4801,7 +4820,7 @@ dependencies = [ "thiserror", "tokio", "tokio-postgres", - "toml 0.7.6", + "toml 0.7.8", "uuid", ] @@ -4841,7 +4860,7 @@ dependencies = [ "thiserror", "tokio", "tokio-postgres", - "toml 0.7.6", + "toml 0.7.8", "uuid", ] @@ -4856,11 +4875,11 @@ dependencies = [ "serde", "serde_derive", "thiserror", - "toml 0.7.6", + "toml 0.7.8", ] [[package]] -name = "omicron-dev-tools" +name = "omicron-dev" version = "0.1.0" dependencies = [ "anyhow", @@ -4887,7 +4906,7 @@ dependencies = [ "subprocess", "tokio", "tokio-postgres", - "toml 0.7.6", + "toml 0.7.8", ] [[package]] @@ -4929,7 +4948,7 @@ dependencies = [ "tokio-stream", "tokio-tungstenite 0.18.0", "tokio-util", - "toml 0.7.6", + "toml 0.7.8", "uuid", ] @@ -5037,7 +5056,7 @@ dependencies = [ "thiserror", "tokio", "tokio-postgres", - "toml 0.7.6", + "toml 0.7.8", "tough", "trust-dns-resolver", "usdt", @@ -5056,6 +5075,7 @@ dependencies = [ "dropshot", "expectorate", "humantime", + "internal-dns 0.1.0", "nexus-client 0.1.0", "nexus-db-model", "nexus-db-queries", @@ -5110,7 +5130,7 @@ dependencies = [ "tempfile", "thiserror", "tokio", - "toml 0.7.6", + "toml 0.7.8", "topological-sort", "walkdir", ] @@ -5225,7 +5245,7 @@ dependencies = [ "tofino", "tokio", "tokio-tungstenite 0.18.0", - "toml 0.7.6", + "toml 0.7.8", "usdt", "uuid", "zeroize", @@ -5278,7 +5298,7 @@ dependencies = [ "tempfile", "thiserror", "tokio", - "toml 0.7.6", + "toml 0.7.8", "walkdir", ] @@ -5560,7 +5580,7 @@ dependencies = [ "subprocess", "thiserror", "tokio", - "toml 0.7.6", + "toml 0.7.8", "uuid", ] @@ -6298,7 +6318,7 @@ dependencies = [ [[package]] name = "progenitor" version = "0.3.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#3734e566108a173ce0cdad2d917bacc85d8090fc" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#5c941c0b41b0235031f3ade33a9c119945f1fd51" dependencies = [ "progenitor-client", "progenitor-impl", @@ -6309,7 +6329,7 @@ dependencies = [ [[package]] name = "progenitor-client" version = "0.3.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#3734e566108a173ce0cdad2d917bacc85d8090fc" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#5c941c0b41b0235031f3ade33a9c119945f1fd51" dependencies = [ "bytes", "futures-core", @@ -6323,7 +6343,7 @@ dependencies = [ [[package]] name = "progenitor-impl" version = "0.3.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#3734e566108a173ce0cdad2d917bacc85d8090fc" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#5c941c0b41b0235031f3ade33a9c119945f1fd51" dependencies = [ "getopts", "heck 0.4.1", @@ -6345,7 +6365,7 @@ dependencies = [ [[package]] name = "progenitor-macro" version = "0.3.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#3734e566108a173ce0cdad2d917bacc85d8090fc" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#5c941c0b41b0235031f3ade33a9c119945f1fd51" dependencies = [ "openapiv3", "proc-macro2", @@ -6464,7 +6484,7 @@ dependencies = [ "tokio", "tokio-tungstenite 0.17.2", "tokio-util", - "toml 0.7.6", + "toml 0.7.8", "usdt", "uuid", ] @@ -6477,7 +6497,7 @@ dependencies = [ "serde", "serde_derive", "thiserror", - "toml 0.7.6", + "toml 0.7.8", ] [[package]] @@ -8143,7 +8163,7 @@ dependencies = [ "sprockets-rot", "thiserror", "tokio", - "toml 0.7.6", + "toml 0.7.8", ] [[package]] @@ -8869,9 +8889,9 @@ dependencies = [ [[package]] name = "toml" -version = "0.7.6" +version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c17e963a819c331dcacd7ab957d80bc2b9a9c1e71c804826d2f283dd65306542" +checksum = "dd79e69d3b627db300ff956027cc6c3798cef26d22526befdfcd12feeb6d2257" dependencies = [ "serde", "serde_spanned", @@ -9145,7 +9165,7 @@ dependencies = [ "sha2", "slog", "tar", - "toml 0.7.6", + "toml 0.7.8", "tough", "url", "zip", @@ -9205,7 +9225,7 @@ version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" dependencies = [ - "cfg-if 1.0.0", + "cfg-if 0.1.10", "rand 0.8.5", "static_assertions", ] @@ -9219,7 +9239,7 @@ checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" [[package]] name = "typify" version = "0.0.13" -source = "git+https://github.com/oxidecomputer/typify#6cd9a7164a83169945be0bf697a43e6a7488fa31" +source = "git+https://github.com/oxidecomputer/typify#de16c4238a2b34400d0fece086a6469951c3236b" dependencies = [ "typify-impl", "typify-macro", @@ -9228,7 +9248,7 @@ dependencies = [ [[package]] name = "typify-impl" version = "0.0.13" -source = "git+https://github.com/oxidecomputer/typify#6cd9a7164a83169945be0bf697a43e6a7488fa31" +source = "git+https://github.com/oxidecomputer/typify#de16c4238a2b34400d0fece086a6469951c3236b" dependencies = [ "heck 0.4.1", "log", @@ -9245,7 +9265,7 @@ dependencies = [ [[package]] name = "typify-macro" version = "0.0.13" -source = "git+https://github.com/oxidecomputer/typify#6cd9a7164a83169945be0bf697a43e6a7488fa31" +source = "git+https://github.com/oxidecomputer/typify#de16c4238a2b34400d0fece086a6469951c3236b" dependencies = [ "proc-macro2", "quote", @@ -9286,9 +9306,9 @@ checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" [[package]] name = "unicode-ident" -version = "1.0.11" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "unicode-linebreak" @@ -9731,7 +9751,7 @@ dependencies = [ "camino", "ciborium", "clap 4.4.3", - "crossterm 0.26.1", + "crossterm 0.27.0", "futures", "hex", "humantime", @@ -9758,7 +9778,7 @@ dependencies = [ "textwrap 0.16.0", "tokio", "tokio-util", - "toml 0.7.6", + "toml 0.7.8", "toml_edit", "tui-tree-widget", "unicode-width", @@ -9791,7 +9811,7 @@ dependencies = [ "camino", "ciborium", "clap 4.4.3", - "crossterm 0.26.1", + "crossterm 0.27.0", "ratatui", "reedline", "serde", @@ -9859,7 +9879,7 @@ dependencies = [ "tokio", "tokio-stream", "tokio-util", - "toml 0.7.6", + "toml 0.7.8", "tough", "trust-dns-resolver", "tufaceous", diff --git a/Cargo.toml b/Cargo.toml index 151b37bb30..ad2d8c9156 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,9 @@ members = [ "common", "ddm-admin-client", "deploy", - "dev-tools", + "dev-tools/omdb", + "dev-tools/omicron-dev", + "dev-tools/xtask", "dns-server", "dns-service-client", "dpd-client", @@ -37,7 +39,6 @@ members = [ "nexus/test-utils-macros", "nexus/test-utils", "nexus/types", - "omdb", "oxide-client", "oximeter-client", "oximeter/collector", @@ -62,7 +63,6 @@ members = [ "wicket", "wicketd-client", "wicketd", - "xtask", ] default-members = [ @@ -74,7 +74,9 @@ default-members = [ "ddm-admin-client", "dpd-client", "deploy", - "dev-tools", + "dev-tools/omdb", + "dev-tools/omicron-dev", + "dev-tools/xtask", "dns-server", "dns-service-client", "gateway", @@ -98,7 +100,6 @@ default-members = [ "nexus/db-queries", "nexus/defaults", "nexus/types", - "omdb", "oxide-client", "oximeter-client", "oximeter/collector", @@ -123,7 +124,6 @@ default-members = [ "wicket-dbg", "wicketd", "wicketd-client", - "xtask", ] resolver = "2" @@ -159,7 +159,7 @@ clap = { version = "4.4", features = ["derive", "env", "wrap_help"] } cookie = "0.16" criterion = { version = "0.5.1", features = [ "async_tokio" ] } crossbeam = "0.8" -crossterm = { version = "0.26.1", features = ["event-stream"] } +crossterm = { version = "0.27.0", features = ["event-stream"] } crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "aeb69dda26c7e1a8b6eada425670cd4b83f91c07" } crucible-client-types = { git = "https://github.com/oxidecomputer/crucible", rev = "aeb69dda26c7e1a8b6eada425670cd4b83f91c07" } crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "aeb69dda26c7e1a8b6eada425670cd4b83f91c07" } @@ -235,7 +235,7 @@ nexus-types = { path = "nexus/types" } num-integer = "0.1.45" num = { version = "0.4.1", default-features = false, features = [ "libm" ] } omicron-common = { path = "common" } -omicron-dev-tools = { path = "dev-tools" } +omicron-dev = { path = "dev-tools/omicron-dev" } omicron-gateway = { path = "gateway" } omicron-nexus = { path = "nexus" } omicron-omdb = { path = "omdb" } diff --git a/omdb/Cargo.toml b/dev-tools/omdb/Cargo.toml similarity index 97% rename from omdb/Cargo.toml rename to dev-tools/omdb/Cargo.toml index 0bf4ee17b7..c9ebbe35ad 100644 --- a/omdb/Cargo.toml +++ b/dev-tools/omdb/Cargo.toml @@ -15,6 +15,7 @@ clap.workspace = true diesel.workspace = true dropshot.workspace = true humantime.workspace = true +internal-dns.workspace = true nexus-client.workspace = true nexus-db-model.workspace = true nexus-db-queries.workspace = true diff --git a/dev-tools/build.rs b/dev-tools/omdb/build.rs similarity index 100% rename from dev-tools/build.rs rename to dev-tools/omdb/build.rs diff --git a/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs similarity index 73% rename from omdb/src/bin/omdb/db.rs rename to dev-tools/omdb/src/bin/omdb/db.rs index 2d7470613c..e2525c7651 100644 --- a/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -12,6 +12,7 @@ //! would be the only consumer -- and in that case it's okay to query the //! database directly. +use crate::Omdb; use anyhow::anyhow; use anyhow::bail; use anyhow::Context; @@ -23,16 +24,20 @@ use clap::ValueEnum; use diesel::expression::SelectableHelper; use diesel::query_dsl::QueryDsl; use diesel::ExpressionMethods; +use nexus_db_model::Disk; use nexus_db_model::DnsGroup; use nexus_db_model::DnsName; use nexus_db_model::DnsVersion; use nexus_db_model::DnsZone; +use nexus_db_model::Instance; use nexus_db_model::Sled; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; use nexus_db_queries::db::identity::Asset; +use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::model::ServiceKind; use nexus_db_queries::db::DataStore; +use nexus_types::identity::Resource; use nexus_types::internal_api::params::DnsRecord; use nexus_types::internal_api::params::Srv; use omicron_common::api::external::DataPageParams; @@ -56,7 +61,7 @@ pub struct DbArgs { /// limit to apply to queries that fetch rows #[clap( long = "fetch-limit", - default_value_t = NonZeroU32::new(100).unwrap() + default_value_t = NonZeroU32::new(500).unwrap() )] fetch_limit: NonZeroU32, @@ -67,6 +72,8 @@ pub struct DbArgs { /// Subcommands that query or update the database #[derive(Debug, Subcommand)] enum DbCommands { + /// Print information about disks + Disks(DiskArgs), /// Print information about internal and external DNS Dns(DnsArgs), /// Print information about control plane services @@ -75,6 +82,26 @@ enum DbCommands { Sleds, } +#[derive(Debug, Args)] +struct DiskArgs { + #[command(subcommand)] + command: DiskCommands, +} + +#[derive(Debug, Subcommand)] +enum DiskCommands { + /// Get info for a specific disk + Info(DiskInfoArgs), + /// Summarize current disks + List, +} + +#[derive(Debug, Args)] +struct DiskInfoArgs { + /// The UUID of the volume + uuid: Uuid, +} + #[derive(Debug, Args)] struct DnsArgs { #[command(subcommand)] @@ -131,17 +158,38 @@ enum ServicesCommands { impl DbArgs { /// Run a `omdb db` subcommand. - pub async fn run_cmd( + pub(crate) async fn run_cmd( &self, + omdb: &Omdb, log: &slog::Logger, ) -> Result<(), anyhow::Error> { - // This is a little goofy. The database URL is required, but can come - // from the environment, in which case it won't be on the command line. - let Some(db_url) = &self.db_url else { - bail!( - "database URL must be specified with --db-url or OMDB_DB_URL" - ); + let db_url = match &self.db_url { + Some(cli_or_env_url) => cli_or_env_url.clone(), + None => { + eprintln!( + "note: database URL not specified. Will search DNS." + ); + eprintln!("note: (override with --db-url or OMDB_DB_URL)"); + let addrs = omdb + .dns_lookup_all( + log.clone(), + internal_dns::ServiceName::Cockroach, + ) + .await?; + + format!( + "postgresql://root@{}/omicron?sslmode=disable", + addrs + .into_iter() + .map(|a| a.to_string()) + .collect::>() + .join(",") + ) + .parse() + .context("failed to parse constructed postgres URL")? + } }; + eprintln!("note: using database URL {}", &db_url); let db_config = db::Config { url: db_url.clone() }; let pool = Arc::new(db::Pool::new(&log.clone(), &db_config)); @@ -158,6 +206,12 @@ impl DbArgs { let opctx = OpContext::for_tests(log.clone(), datastore.clone()); match &self.command { + DbCommands::Disks(DiskArgs { + command: DiskCommands::Info(uuid), + }) => cmd_db_disk_info(&opctx, &datastore, uuid).await, + DbCommands::Disks(DiskArgs { command: DiskCommands::List }) => { + cmd_db_disk_list(&datastore, self.fetch_limit).await + } DbCommands::Dns(DnsArgs { command: DnsCommands::Show }) => { cmd_db_dns_show(&opctx, &datastore, self.fetch_limit).await } @@ -266,6 +320,190 @@ fn first_page<'a, T>(limit: NonZeroU32) -> DataPageParams<'a, T> { } } +// Disks + +/// Run `omdb db disk list`. +async fn cmd_db_disk_list( + datastore: &DataStore, + limit: NonZeroU32, +) -> Result<(), anyhow::Error> { + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct DiskRow { + name: String, + id: String, + size: String, + state: String, + attached_to: String, + } + + let ctx = || "listing disks".to_string(); + + use db::schema::disk::dsl; + let disks = dsl::disk + .filter(dsl::time_deleted.is_null()) + .limit(i64::from(u32::from(limit))) + .select(Disk::as_select()) + .load_async(datastore.pool_for_tests().await?) + .await + .context("loading disks")?; + + check_limit(&disks, limit, ctx); + + let rows = disks.into_iter().map(|disk| DiskRow { + name: disk.name().to_string(), + id: disk.id().to_string(), + size: disk.size.to_string(), + state: disk.runtime().disk_state, + attached_to: match disk.runtime().attach_instance_id { + Some(uuid) => uuid.to_string(), + None => "-".to_string(), + }, + }); + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + + println!("{}", table); + + Ok(()) +} + +/// Run `omdb db disk info `. +async fn cmd_db_disk_info( + opctx: &OpContext, + datastore: &DataStore, + args: &DiskInfoArgs, +) -> Result<(), anyhow::Error> { + // The row describing the instance + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct UpstairsRow { + host_serial: String, + disk_name: String, + instance_name: String, + propolis_zone: String, + } + + // The rows describing the downstairs regions for this disk/volume + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct DownstairsRow { + host_serial: String, + region: String, + zone: String, + physical_disk: String, + } + + use db::schema::disk::dsl as disk_dsl; + + let disk = disk_dsl::disk + .filter(disk_dsl::id.eq(args.uuid)) + .limit(1) + .select(Disk::as_select()) + .load_async(datastore.pool_for_tests().await?) + .await + .context("loading requested disk")?; + + let Some(disk) = disk.into_iter().next() else { + bail!("no disk: {} found", args.uuid); + }; + + // For information about where this disk is attached. + let mut rows = Vec::new(); + + // If the disk is attached to an instance, show information + // about that instance. + if let Some(instance_uuid) = disk.runtime().attach_instance_id { + // Get the instance this disk is attached to + use db::schema::instance::dsl as instance_dsl; + let instance = instance_dsl::instance + .filter(instance_dsl::id.eq(instance_uuid)) + .limit(1) + .select(Instance::as_select()) + .load_async(datastore.pool_for_tests().await?) + .await + .context("loading requested instance")?; + + let Some(instance) = instance.into_iter().next() else { + bail!("no instance: {} found", instance_uuid); + }; + + let instance_name = instance.name().to_string(); + let propolis_id = instance.runtime().propolis_id.to_string(); + let my_sled_id = instance.runtime().sled_id; + + let (_, my_sled) = LookupPath::new(opctx, datastore) + .sled_id(my_sled_id) + .fetch() + .await + .context("failed to look up sled")?; + + let usr = UpstairsRow { + host_serial: my_sled.serial_number().to_string(), + disk_name: disk.name().to_string(), + instance_name, + propolis_zone: format!("oxz_propolis-server_{}", propolis_id), + }; + rows.push(usr); + } else { + // If the disk is not attached to anything, just print empty + // fields. + let usr = UpstairsRow { + host_serial: "-".to_string(), + disk_name: disk.name().to_string(), + instance_name: "-".to_string(), + propolis_zone: "-".to_string(), + }; + rows.push(usr); + } + + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + + println!("{}", table); + + // Get the dataset backing this volume. + let regions = datastore.get_allocated_regions(disk.volume_id).await?; + + let mut rows = Vec::with_capacity(3); + for (dataset, region) in regions { + let my_pool_id = dataset.pool_id; + let (_, my_zpool) = LookupPath::new(opctx, datastore) + .zpool_id(my_pool_id) + .fetch() + .await + .context("failed to look up zpool")?; + + let my_sled_id = my_zpool.sled_id; + + let (_, my_sled) = LookupPath::new(opctx, datastore) + .sled_id(my_sled_id) + .fetch() + .await + .context("failed to look up sled")?; + + rows.push(DownstairsRow { + host_serial: my_sled.serial_number().to_string(), + region: region.id().to_string(), + zone: format!("oxz_crucible_{}", dataset.id()), + physical_disk: my_zpool.physical_disk_id.to_string(), + }); + } + + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + + println!("{}", table); + + Ok(()) +} + // SERVICES #[derive(Tabled)] diff --git a/dev-tools/omdb/src/bin/omdb/main.rs b/dev-tools/omdb/src/bin/omdb/main.rs new file mode 100644 index 0000000000..166ed3043f --- /dev/null +++ b/dev-tools/omdb/src/bin/omdb/main.rs @@ -0,0 +1,166 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! CLI for debugging Omicron internal state +//! +//! GROUND RULES: +//! +//! 1. There aren't a lot of ground rules here. At least for now, this is a +//! place to put any kind of runtime tooling for Omicron that seems useful. +//! You can query the database directly (see notes in db.rs), use internal +//! APIs, etc. To the degree that we can stick to stable interfaces, great. +//! But at this stage we'd rather have tools that work on latest than not +//! have them because we couldn't prioritize keeping them stable. +//! +//! 2. Debuggers should never lie! Documentation and command names should be +//! precise about what they're reporting. In a working system, these things +//! might all be the same: +//! +//! - the list of instances with zones and propolis processes running on +//! a sled +//! - the list of instances that sled agent knows about +//! - the list of instances that Nexus or the database reports should be +//! running on a sled +//! +//! But in a broken system, these things might be all different. People use +//! debuggers to understand broken systems. The debugger should say which of +//! these it's reporting, rather than "the list of instances on a sled". +//! +//! 3. Where possible, when the tool encounters something unexpected, it should +//! print what it can (including the error message and bad data) and then +//! continue. It generally shouldn't stop on the first error. (We often +//! find strange things when debugging but we need our tools to tell us as +//! much as they can!) + +use anyhow::Context; +use clap::Parser; +use clap::Subcommand; +use omicron_common::address::Ipv6Subnet; +use std::net::SocketAddr; +use std::net::SocketAddrV6; + +mod db; +mod nexus; +mod sled_agent; + +#[tokio::main] +async fn main() -> Result<(), anyhow::Error> { + let args = Omdb::parse(); + + let log = dropshot::ConfigLogging::StderrTerminal { + level: args.log_level.clone(), + } + .to_logger("omdb") + .context("failed to create logger")?; + + match &args.command { + OmdbCommands::Db(db) => db.run_cmd(&args, &log).await, + OmdbCommands::Nexus(nexus) => nexus.run_cmd(&args, &log).await, + OmdbCommands::SledAgent(sled) => sled.run_cmd(&args, &log).await, + } +} + +/// Omicron debugger (unstable) +/// +/// This tool provides commands for directly querying Omicron components about +/// their internal state using internal APIs. This is a prototype. The +/// commands and output are unstable and may change. +#[derive(Debug, Parser)] +struct Omdb { + /// log level filter + #[arg( + env, + long, + value_parser = parse_dropshot_log_level, + default_value = "warn", + )] + log_level: dropshot::ConfigLoggingLevel, + + #[arg(env = "OMDB_DNS_SERVER", long)] + dns_server: Option, + + #[command(subcommand)] + command: OmdbCommands, +} + +impl Omdb { + async fn dns_lookup_all( + &self, + log: slog::Logger, + service_name: internal_dns::ServiceName, + ) -> Result, anyhow::Error> { + let resolver = self.dns_resolver(log).await?; + resolver + .lookup_all_socket_v6(service_name) + .await + .with_context(|| format!("looking up {:?} in DNS", service_name)) + } + + async fn dns_resolver( + &self, + log: slog::Logger, + ) -> Result { + match &self.dns_server { + Some(dns_server) => { + internal_dns::resolver::Resolver::new_from_addrs( + log, + &[*dns_server], + ) + .with_context(|| { + format!( + "creating DNS resolver for DNS server {:?}", + dns_server + ) + }) + } + None => { + // In principle, we should look at /etc/resolv.conf to find the + // DNS servers. In practice, this usually isn't populated + // today. See oxidecomputer/omicron#2122. + // + // However, the address selected below should work for most + // existing Omicron deployments today. That's because while the + // base subnet is in principle configurable in config-rss.toml, + // it's very uncommon to change it from the default value used + // here. + // + // Yet another option would be to find a local IP address that + // looks like it's probably on the underlay network and use that + // to find the subnet to use. But again, this is unlikely to be + // wrong and it's easy to override. + let subnet = + Ipv6Subnet::new("fd00:1122:3344:0100::".parse().unwrap()); + eprintln!("note: using DNS server for subnet {}", subnet.net()); + eprintln!( + "note: (if this is not right, use --dns-server \ + to specify an alternate DNS server)", + ); + internal_dns::resolver::Resolver::new_from_subnet(log, subnet) + .with_context(|| { + format!( + "creating DNS resolver for subnet {}", + subnet.net() + ) + }) + } + } + } +} + +#[derive(Debug, Subcommand)] +#[allow(clippy::large_enum_variant)] +enum OmdbCommands { + /// Query the control plane database (CockroachDB) + Db(db::DbArgs), + /// Debug a specific Nexus instance + Nexus(nexus::NexusArgs), + /// Debug a specific Sled + SledAgent(sled_agent::SledAgentArgs), +} + +fn parse_dropshot_log_level( + s: &str, +) -> Result { + serde_json::from_str(&format!("{:?}", s)).context("parsing log level") +} diff --git a/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs similarity index 95% rename from omdb/src/bin/omdb/nexus.rs rename to dev-tools/omdb/src/bin/omdb/nexus.rs index 531158ab16..7599fc209d 100644 --- a/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -4,7 +4,7 @@ //! omdb commands that query or update specific Nexus instances -use anyhow::bail; +use crate::Omdb; use anyhow::Context; use chrono::SecondsFormat; use chrono::Utc; @@ -55,19 +55,32 @@ enum BackgroundTasksCommands { impl NexusArgs { /// Run a `omdb nexus` subcommand. - pub async fn run_cmd( + pub(crate) async fn run_cmd( &self, + omdb: &Omdb, log: &slog::Logger, ) -> Result<(), anyhow::Error> { - // This is a little goofy. The nexus URL is required, but can come - // from the environment, in which case it won't be on the command line. - let Some(nexus_url) = &self.nexus_internal_url else { - bail!( - "nexus URL must be specified with --nexus-internal-url or \ - OMDB_NEXUS_URL" - ); + let nexus_url = match &self.nexus_internal_url { + Some(cli_or_env_url) => cli_or_env_url.clone(), + None => { + eprintln!( + "note: Nexus URL not specified. Will pick one from DNS." + ); + let addrs = omdb + .dns_lookup_all( + log.clone(), + internal_dns::ServiceName::Nexus, + ) + .await?; + let addr = addrs.into_iter().next().expect( + "expected at least one Nexus address from \ + successful DNS lookup", + ); + format!("http://{}", addr) + } }; - let client = nexus_client::Client::new(nexus_url, log.clone()); + eprintln!("note: using Nexus URL {}", &nexus_url); + let client = nexus_client::Client::new(&nexus_url, log.clone()); match &self.command { NexusCommands::BackgroundTasks(BackgroundTasksArgs { diff --git a/omdb/src/bin/omdb/sled_agent.rs b/dev-tools/omdb/src/bin/omdb/sled_agent.rs similarity index 90% rename from omdb/src/bin/omdb/sled_agent.rs rename to dev-tools/omdb/src/bin/omdb/sled_agent.rs index 09f34936ca..c413a2ba43 100644 --- a/omdb/src/bin/omdb/sled_agent.rs +++ b/dev-tools/omdb/src/bin/omdb/sled_agent.rs @@ -4,12 +4,13 @@ //! omdb commands that query or update specific Sleds +use crate::Omdb; use anyhow::bail; use anyhow::Context; use clap::Args; use clap::Subcommand; -/// Arguments to the "omdb sled" subcommand +/// Arguments to the "omdb sled-agent" subcommand #[derive(Debug, Args)] pub struct SledAgentArgs { /// URL of the Sled internal API @@ -20,7 +21,7 @@ pub struct SledAgentArgs { command: SledAgentCommands, } -/// Subcommands for the "omdb sled" subcommand +/// Subcommands for the "omdb sled-agent" subcommand #[derive(Debug, Subcommand)] enum SledAgentCommands { /// print information about zones @@ -45,9 +46,10 @@ enum ZpoolCommands { } impl SledAgentArgs { - /// Run a `omdb sled` subcommand. - pub async fn run_cmd( + /// Run a `omdb sled-agent` subcommand. + pub(crate) async fn run_cmd( &self, + _omdb: &Omdb, log: &slog::Logger, ) -> Result<(), anyhow::Error> { // This is a little goofy. The sled URL is required, but can come @@ -72,7 +74,7 @@ impl SledAgentArgs { } } -/// Runs `omdb sled zones list` +/// Runs `omdb sled-agent zones list` async fn cmd_zones_list( client: &sled_agent_client::Client, ) -> Result<(), anyhow::Error> { @@ -91,7 +93,7 @@ async fn cmd_zones_list( Ok(()) } -/// Runs `omdb sled zpools list` +/// Runs `omdb sled-agent zpools list` async fn cmd_zpools_list( client: &sled_agent_client::Client, ) -> Result<(), anyhow::Error> { diff --git a/dev-tools/omdb/tests/config.test.toml b/dev-tools/omdb/tests/config.test.toml new file mode 120000 index 0000000000..6050ca47dd --- /dev/null +++ b/dev-tools/omdb/tests/config.test.toml @@ -0,0 +1 @@ +../../../nexus/tests/config.test.toml \ No newline at end of file diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out new file mode 100644 index 0000000000..e7a50da935 --- /dev/null +++ b/dev-tools/omdb/tests/env.out @@ -0,0 +1,189 @@ +EXECUTING COMMAND: omdb ["db", "--db-url", "postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable", "sleds"] +termination: Exited(0) +--------------------------------------------- +stdout: +SERIAL IP ROLE ID +sim-b6d65341 [::1]:REDACTED_PORT - REDACTED_UUID_REDACTED_UUID_REDACTED +--------------------------------------------- +stderr: +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable +note: database schema version matches expected (4.0.0) +============================================= +EXECUTING COMMAND: omdb ["db", "--db-url", "junk", "sleds"] +termination: Exited(2) +--------------------------------------------- +stdout: +--------------------------------------------- +stderr: +error: invalid value 'junk' for '--db-url ': invalid connection string: unexpected EOF + +For more information, try '--help'. +============================================= +EXECUTING COMMAND: omdb ["nexus", "--nexus-internal-url", "http://127.0.0.1:REDACTED_PORT", "background-tasks", "doc"] +termination: Exited(0) +--------------------------------------------- +stdout: +task: "dns_config_external" + watches external DNS data stored in CockroachDB + + +task: "dns_config_internal" + watches internal DNS data stored in CockroachDB + + +task: "dns_propagation_external" + propagates latest external DNS configuration (from "dns_config_external" + background task) to the latest list of DNS servers (from + "dns_servers_external" background task) + + +task: "dns_propagation_internal" + propagates latest internal DNS configuration (from "dns_config_internal" + background task) to the latest list of DNS servers (from + "dns_servers_internal" background task) + + +task: "dns_servers_external" + watches list of external DNS servers stored in CockroachDB + + +task: "dns_servers_internal" + watches list of internal DNS servers stored in CockroachDB + + +task: "external_endpoints" + reads config for silos and TLS certificates to determine the right set of + HTTP endpoints, their HTTP server names, and which TLS certificates to use + on each one + + +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT +============================================= +EXECUTING COMMAND: omdb ["nexus", "--nexus-internal-url", "junk", "background-tasks", "doc"] +termination: Exited(1) +--------------------------------------------- +stdout: +--------------------------------------------- +stderr: +note: using Nexus URL junk +Error: listing background tasks + +Caused by: + 0: Communication Error: builder error: relative URL without a base + 1: builder error: relative URL without a base + 2: relative URL without a base +============================================= +EXECUTING COMMAND: omdb ["nexus", "background-tasks", "doc"] +termination: Exited(0) +--------------------------------------------- +stdout: +task: "dns_config_external" + watches external DNS data stored in CockroachDB + + +task: "dns_config_internal" + watches internal DNS data stored in CockroachDB + + +task: "dns_propagation_external" + propagates latest external DNS configuration (from "dns_config_external" + background task) to the latest list of DNS servers (from + "dns_servers_external" background task) + + +task: "dns_propagation_internal" + propagates latest internal DNS configuration (from "dns_config_internal" + background task) to the latest list of DNS servers (from + "dns_servers_internal" background task) + + +task: "dns_servers_external" + watches list of external DNS servers stored in CockroachDB + + +task: "dns_servers_internal" + watches list of internal DNS servers stored in CockroachDB + + +task: "external_endpoints" + reads config for silos and TLS certificates to determine the right set of + HTTP endpoints, their HTTP server names, and which TLS certificates to use + on each one + + +--------------------------------------------- +stderr: +note: Nexus URL not specified. Will pick one from DNS. +note: using Nexus URL http://[::ffff:127.0.0.1]:REDACTED_PORT +============================================= +EXECUTING COMMAND: omdb ["--dns-server", "[::1]:REDACTED_PORT", "nexus", "background-tasks", "doc"] +termination: Exited(0) +--------------------------------------------- +stdout: +task: "dns_config_external" + watches external DNS data stored in CockroachDB + + +task: "dns_config_internal" + watches internal DNS data stored in CockroachDB + + +task: "dns_propagation_external" + propagates latest external DNS configuration (from "dns_config_external" + background task) to the latest list of DNS servers (from + "dns_servers_external" background task) + + +task: "dns_propagation_internal" + propagates latest internal DNS configuration (from "dns_config_internal" + background task) to the latest list of DNS servers (from + "dns_servers_internal" background task) + + +task: "dns_servers_external" + watches list of external DNS servers stored in CockroachDB + + +task: "dns_servers_internal" + watches list of internal DNS servers stored in CockroachDB + + +task: "external_endpoints" + reads config for silos and TLS certificates to determine the right set of + HTTP endpoints, their HTTP server names, and which TLS certificates to use + on each one + + +--------------------------------------------- +stderr: +note: Nexus URL not specified. Will pick one from DNS. +note: using Nexus URL http://[::ffff:127.0.0.1]:REDACTED_PORT +============================================= +EXECUTING COMMAND: omdb ["db", "sleds"] +termination: Exited(0) +--------------------------------------------- +stdout: +SERIAL IP ROLE ID +sim-b6d65341 [::1]:REDACTED_PORT - REDACTED_UUID_REDACTED_UUID_REDACTED +--------------------------------------------- +stderr: +note: database URL not specified. Will search DNS. +note: (override with --db-url or OMDB_DB_URL) +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable +note: database schema version matches expected (4.0.0) +============================================= +EXECUTING COMMAND: omdb ["--dns-server", "[::1]:REDACTED_PORT", "db", "sleds"] +termination: Exited(0) +--------------------------------------------- +stdout: +SERIAL IP ROLE ID +sim-b6d65341 [::1]:REDACTED_PORT - REDACTED_UUID_REDACTED_UUID_REDACTED +--------------------------------------------- +stderr: +note: database URL not specified. Will search DNS. +note: (override with --db-url or OMDB_DB_URL) +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable +note: database schema version matches expected (4.0.0) +============================================= diff --git a/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out similarity index 93% rename from omdb/tests/successes.out rename to dev-tools/omdb/tests/successes.out index 90e399f246..7532e9b61e 100644 --- a/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -7,6 +7,7 @@ internal control-plane.oxide.internal 1 rack setup external oxide-dev.test 2 create silo: "test-suite-silo" --------------------------------------------- stderr: +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable note: database schema version matches expected (4.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "dns", "diff", "external", "2"] @@ -22,6 +23,7 @@ changes: names added: 1, names removed: 0 + test-suite-silo.sys A 127.0.0.1 --------------------------------------------- stderr: +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable note: database schema version matches expected (4.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "dns", "names", "external", "2"] @@ -33,6 +35,7 @@ External zone: oxide-dev.test test-suite-silo.sys A 127.0.0.1 --------------------------------------------- stderr: +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable note: database schema version matches expected (4.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "services", "list-instances"] @@ -48,6 +51,7 @@ InternalDns REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT Nexus REDACTED_UUID_REDACTED_UUID_REDACTED [::ffff:127.0.0.1]:REDACTED_PORT sim-b6d65341 --------------------------------------------- stderr: +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable note: database schema version matches expected (4.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "services", "list-by-sled"] @@ -66,6 +70,7 @@ sled: sim-b6d65341 (id REDACTED_UUID_REDACTED_UUID_REDACTED) --------------------------------------------- stderr: +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable note: database schema version matches expected (4.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "sleds"] @@ -76,6 +81,7 @@ SERIAL IP ROLE ID sim-b6d65341 [::1]:REDACTED_PORT - REDACTED_UUID_REDACTED_UUID_REDACTED --------------------------------------------- stderr: +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable note: database schema version matches expected (4.0.0) ============================================= EXECUTING COMMAND: omdb ["nexus", "background-tasks", "doc"] @@ -118,6 +124,7 @@ task: "external_endpoints" --------------------------------------------- stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ ============================================= EXECUTING COMMAND: omdb ["nexus", "background-tasks", "show"] termination: Exited(0) @@ -198,4 +205,5 @@ task: "external_endpoints" --------------------------------------------- stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ ============================================= diff --git a/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs similarity index 83% rename from omdb/tests/test_all_output.rs rename to dev-tools/omdb/tests/test_all_output.rs index 031bc225a6..0eddcb492c 100644 --- a/omdb/tests/test_all_output.rs +++ b/dev-tools/omdb/tests/test_all_output.rs @@ -38,12 +38,14 @@ async fn test_omdb_usage_errors() { &["db"], &["db", "--help"], &["db", "dns"], - &["db", "dns", "show"], &["db", "dns", "diff"], &["db", "dns", "names"], &["db", "services"], &["nexus"], &["nexus", "background-tasks"], + &["sled-agent"], + &["sled-agent", "zones"], + &["sled-agent", "zpools"], ]; for args in invocations { @@ -69,6 +71,9 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { &["db", "sleds"], &["nexus", "background-tasks", "doc"], &["nexus", "background-tasks", "show"], + // We can't easily test the sled agent output because that's only + // provided by a real sled agent, which is not available in the + // ControlPlaneTestContext. ]; for args in invocations { @@ -102,6 +107,7 @@ async fn test_omdb_env_settings(cptestctx: &ControlPlaneTestContext) { let postgres_url = cptestctx.database.listen_url().to_string(); let nexus_internal_url = format!("http://{}", cptestctx.internal_client.bind_address); + let dns_sockaddr = cptestctx.internal_dns.dns_server.local_address(); let mut output = String::new(); // Database URL @@ -109,12 +115,12 @@ async fn test_omdb_env_settings(cptestctx: &ControlPlaneTestContext) { let args = &["db", "--db-url", &postgres_url, "sleds"]; do_run(&mut output, |exec| exec, &cmd_path, args).await; - // Case 2: specified in both places. + // Case 2: specified in multiple places (command-line argument wins) let args = &["db", "--db-url", "junk", "sleds"]; let p = postgres_url.clone(); do_run( &mut output, - move |exec| exec.env("ODMB_DB_URL", &p), + move |exec| exec.env("OMDB_DB_URL", &p), &cmd_path, args, ) @@ -131,18 +137,51 @@ async fn test_omdb_env_settings(cptestctx: &ControlPlaneTestContext) { ]; do_run(&mut output, |exec| exec, &cmd_path.clone(), args).await; - // Case 2: specified in both places. + // Case 2: specified in multiple places (command-line argument wins) let args = &["nexus", "--nexus-internal-url", "junk", "background-tasks", "doc"]; - let p = postgres_url.clone(); + let n = nexus_internal_url.clone(); + do_run( + &mut output, + move |exec| exec.env("OMDB_NEXUS_URL", &n), + &cmd_path, + args, + ) + .await; + + // Verify that if you provide a working internal DNS server, you can omit + // the URLs. That's true regardless of whether you pass it on the command + // line or via an environment variable. + let args = &["nexus", "background-tasks", "doc"]; do_run( &mut output, - move |exec| exec.env("ODMB_DB_URL", &p), + move |exec| exec.env("OMDB_DNS_SERVER", dns_sockaddr.to_string()), &cmd_path, args, ) .await; + let args = &[ + "--dns-server", + &dns_sockaddr.to_string(), + "nexus", + "background-tasks", + "doc", + ]; + do_run(&mut output, move |exec| exec, &cmd_path, args).await; + + let args = &["db", "sleds"]; + do_run( + &mut output, + move |exec| exec.env("OMDB_DNS_SERVER", dns_sockaddr.to_string()), + &cmd_path, + args, + ) + .await; + + let args = &["--dns-server", &dns_sockaddr.to_string(), "db", "sleds"]; + do_run(&mut output, move |exec| exec, &cmd_path, args).await; + assert_contents("tests/env.out", &output); } @@ -194,7 +233,7 @@ async fn do_run( output.push_str(&redact_variable(&stdout_text)); write!(output, "---------------------------------------------\n").unwrap(); write!(output, "stderr:\n").unwrap(); - output.push_str(&stderr_text); + output.push_str(&redact_variable(&stderr_text)); write!(output, "=============================================\n").unwrap(); } diff --git a/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out similarity index 79% rename from omdb/tests/usage_errors.out rename to dev-tools/omdb/tests/usage_errors.out index b381ab46c1..8afcf13fb1 100644 --- a/omdb/tests/usage_errors.out +++ b/dev-tools/omdb/tests/usage_errors.out @@ -15,8 +15,9 @@ Commands: help Print this message or the help of the given subcommand(s) Options: - --log-level log level filter [env: LOG_LEVEL=] [default: warn] - -h, --help Print help (see more with '--help') + --log-level log level filter [env: LOG_LEVEL=] [default: warn] + --dns-server [env: OMDB_DNS_SERVER=] + -h, --help Print help (see more with '--help') ============================================= EXECUTING COMMAND: omdb ["--help"] termination: Exited(0) @@ -42,6 +43,9 @@ Options: [env: LOG_LEVEL=] [default: warn] + --dns-server + [env: OMDB_DNS_SERVER=] + -h, --help Print help (see a summary with '-h') --------------------------------------------- @@ -82,6 +86,7 @@ Query the control plane database (CockroachDB) Usage: omdb db [OPTIONS] Commands: + disks Print information about disks dns Print information about internal and external DNS services Print information about control plane services sleds Print information about sleds @@ -89,7 +94,7 @@ Commands: Options: --db-url URL of the database SQL interface [env: OMDB_DB_URL=] - --fetch-limit limit to apply to queries that fetch rows [default: 100] + --fetch-limit limit to apply to queries that fetch rows [default: 500] -h, --help Print help ============================================= EXECUTING COMMAND: omdb ["db", "--help"] @@ -101,6 +106,7 @@ Query the control plane database (CockroachDB) Usage: omdb db [OPTIONS] Commands: + disks Print information about disks dns Print information about internal and external DNS services Print information about control plane services sleds Print information about sleds @@ -108,7 +114,7 @@ Commands: Options: --db-url URL of the database SQL interface [env: OMDB_DB_URL=] - --fetch-limit limit to apply to queries that fetch rows [default: 100] + --fetch-limit limit to apply to queries that fetch rows [default: 500] -h, --help Print help --------------------------------------------- stderr: @@ -132,14 +138,6 @@ Commands: Options: -h, --help Print help ============================================= -EXECUTING COMMAND: omdb ["db", "dns", "show"] -termination: Exited(1) ---------------------------------------------- -stdout: ---------------------------------------------- -stderr: -Error: database URL must be specified with --db-url or OMDB_DB_URL -============================================= EXECUTING COMMAND: omdb ["db", "dns", "diff"] termination: Exited(2) --------------------------------------------- @@ -224,3 +222,56 @@ Commands: Options: -h, --help Print help ============================================= +EXECUTING COMMAND: omdb ["sled-agent"] +termination: Exited(2) +--------------------------------------------- +stdout: +--------------------------------------------- +stderr: +Debug a specific Sled + +Usage: omdb sled-agent [OPTIONS] + +Commands: + zones print information about zones + zpools print information about zpools + help Print this message or the help of the given subcommand(s) + +Options: + --sled-agent-url URL of the Sled internal API [env: OMDB_SLED_AGENT_URL=] + -h, --help Print help +============================================= +EXECUTING COMMAND: omdb ["sled-agent", "zones"] +termination: Exited(2) +--------------------------------------------- +stdout: +--------------------------------------------- +stderr: +print information about zones + +Usage: omdb sled-agent zones + +Commands: + list Print list of all running control plane zones + help Print this message or the help of the given subcommand(s) + +Options: + -h, --help Print help +============================================= +EXECUTING COMMAND: omdb ["sled-agent", "zpools"] +termination: Exited(2) +--------------------------------------------- +stdout: +--------------------------------------------- +stderr: +print information about zpools + +Usage: omdb sled-agent zpools + +Commands: + list Print list of all zpools managed by the sled agent + help Print this message or the help of the given subcommand(s) + +Options: + -h, --help Print help +============================================= diff --git a/dev-tools/Cargo.toml b/dev-tools/omicron-dev/Cargo.toml similarity index 97% rename from dev-tools/Cargo.toml rename to dev-tools/omicron-dev/Cargo.toml index 4a7c25b337..2061489cbb 100644 --- a/dev-tools/Cargo.toml +++ b/dev-tools/omicron-dev/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "omicron-dev-tools" +name = "omicron-dev" version = "0.1.0" edition = "2021" license = "MPL-2.0" diff --git a/omdb/build.rs b/dev-tools/omicron-dev/build.rs similarity index 100% rename from omdb/build.rs rename to dev-tools/omicron-dev/build.rs diff --git a/dev-tools/src/bin/omicron-dev.rs b/dev-tools/omicron-dev/src/bin/omicron-dev.rs similarity index 99% rename from dev-tools/src/bin/omicron-dev.rs rename to dev-tools/omicron-dev/src/bin/omicron-dev.rs index 70018e9d9b..14617d6ba4 100644 --- a/dev-tools/src/bin/omicron-dev.rs +++ b/dev-tools/omicron-dev/src/bin/omicron-dev.rs @@ -327,7 +327,7 @@ async fn cmd_run_all(args: &RunAllArgs) -> Result<(), anyhow::Error> { let mut signal_stream = signals.fuse(); // Read configuration. - let config_str = include_str!("../../../nexus/examples/config.toml"); + let config_str = include_str!("../../../../nexus/examples/config.toml"); let mut config: omicron_common::nexus_config::Config = toml::from_str(config_str).context("parsing example config")?; config.pkg.log = dropshot::ConfigLogging::File { diff --git a/dev-tools/tests/output/cmd-omicron-dev-bad-cmd-stderr b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-bad-cmd-stderr similarity index 100% rename from dev-tools/tests/output/cmd-omicron-dev-bad-cmd-stderr rename to dev-tools/omicron-dev/tests/output/cmd-omicron-dev-bad-cmd-stderr diff --git a/dev-tools/tests/output/cmd-omicron-dev-bad-cmd-stdout b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-bad-cmd-stdout similarity index 100% rename from dev-tools/tests/output/cmd-omicron-dev-bad-cmd-stdout rename to dev-tools/omicron-dev/tests/output/cmd-omicron-dev-bad-cmd-stdout diff --git a/dev-tools/tests/output/cmd-omicron-dev-db-populate-noargs-stderr b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-populate-noargs-stderr similarity index 100% rename from dev-tools/tests/output/cmd-omicron-dev-db-populate-noargs-stderr rename to dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-populate-noargs-stderr diff --git a/dev-tools/tests/output/cmd-omicron-dev-db-populate-noargs-stdout b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-populate-noargs-stdout similarity index 100% rename from dev-tools/tests/output/cmd-omicron-dev-db-populate-noargs-stdout rename to dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-populate-noargs-stdout diff --git a/dev-tools/tests/output/cmd-omicron-dev-db-wipe-noargs-stderr b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-wipe-noargs-stderr similarity index 100% rename from dev-tools/tests/output/cmd-omicron-dev-db-wipe-noargs-stderr rename to dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-wipe-noargs-stderr diff --git a/dev-tools/tests/output/cmd-omicron-dev-db-wipe-noargs-stdout b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-wipe-noargs-stdout similarity index 100% rename from dev-tools/tests/output/cmd-omicron-dev-db-wipe-noargs-stdout rename to dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-wipe-noargs-stdout diff --git a/dev-tools/tests/output/cmd-omicron-dev-noargs-stderr b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-noargs-stderr similarity index 100% rename from dev-tools/tests/output/cmd-omicron-dev-noargs-stderr rename to dev-tools/omicron-dev/tests/output/cmd-omicron-dev-noargs-stderr diff --git a/dev-tools/tests/output/cmd-omicron-dev-noargs-stdout b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-noargs-stdout similarity index 100% rename from dev-tools/tests/output/cmd-omicron-dev-noargs-stdout rename to dev-tools/omicron-dev/tests/output/cmd-omicron-dev-noargs-stdout diff --git a/dev-tools/tests/test_omicron_dev.rs b/dev-tools/omicron-dev/tests/test_omicron_dev.rs similarity index 100% rename from dev-tools/tests/test_omicron_dev.rs rename to dev-tools/omicron-dev/tests/test_omicron_dev.rs diff --git a/xtask/Cargo.toml b/dev-tools/xtask/Cargo.toml similarity index 100% rename from xtask/Cargo.toml rename to dev-tools/xtask/Cargo.toml diff --git a/xtask/src/main.rs b/dev-tools/xtask/src/main.rs similarity index 100% rename from xtask/src/main.rs rename to dev-tools/xtask/src/main.rs diff --git a/dns-server/src/dns_server.rs b/dns-server/src/dns_server.rs index a857538531..01a8430b62 100644 --- a/dns-server/src/dns_server.rs +++ b/dns-server/src/dns_server.rs @@ -56,8 +56,8 @@ impl Drop for ServerHandle { } impl ServerHandle { - pub fn local_address(&self) -> &SocketAddr { - &self.local_address + pub fn local_address(&self) -> SocketAddr { + self.local_address } } diff --git a/dns-server/src/lib.rs b/dns-server/src/lib.rs index d6dd75b4bd..ea8625a667 100644 --- a/dns-server/src/lib.rs +++ b/dns-server/src/lib.rs @@ -164,7 +164,7 @@ impl TransientServer { pub async fn resolver(&self) -> Result { let mut resolver_config = ResolverConfig::new(); resolver_config.add_name_server(NameServerConfig { - socket_addr: *self.dns_server.local_address(), + socket_addr: self.dns_server.local_address(), protocol: Protocol::Udp, tls_dns_name: None, trust_nx_responses: false, diff --git a/dns-server/tests/basic_test.rs b/dns-server/tests/basic_test.rs index 0c10fd75c9..98cd1487ab 100644 --- a/dns-server/tests/basic_test.rs +++ b/dns-server/tests/basic_test.rs @@ -375,7 +375,7 @@ async fn init_client_server( let mut rc = ResolverConfig::new(); rc.add_name_server(NameServerConfig { - socket_addr: *dns_server.local_address(), + socket_addr: dns_server.local_address(), protocol: Protocol::Udp, tls_dns_name: None, trust_nx_responses: false, diff --git a/internal-dns-cli/src/bin/dnswait.rs b/internal-dns-cli/src/bin/dnswait.rs index df4832f346..9e003ed14f 100644 --- a/internal-dns-cli/src/bin/dnswait.rs +++ b/internal-dns-cli/src/bin/dnswait.rs @@ -72,7 +72,7 @@ async fn main() -> Result<()> { } else { let addrs = opt.nameserver_addresses; info!(&log, "using explicit nameservers"; "nameservers" => ?addrs); - Resolver::new_from_addrs(log.clone(), addrs) + Resolver::new_from_addrs(log.clone(), &addrs) .context("creating resolver with explicit nameserver addresses")? }; diff --git a/internal-dns/src/config.rs b/internal-dns/src/config.rs index 5572e193dc..e5272cd23a 100644 --- a/internal-dns/src/config.rs +++ b/internal-dns/src/config.rs @@ -281,7 +281,7 @@ impl DnsConfigBuilder { let set = self .service_instances_zones - .entry(service.clone()) + .entry(service) .or_insert_with(BTreeMap::new); match set.insert(zone.clone(), port) { None => Ok(()), @@ -320,7 +320,7 @@ impl DnsConfigBuilder { let set = self .service_instances_sleds - .entry(service.clone()) + .entry(service) .or_insert_with(BTreeMap::new); let sled_id = sled.0; match set.insert(sled.clone(), port) { diff --git a/internal-dns/src/names.rs b/internal-dns/src/names.rs index 663e04bcd9..44ed9228e2 100644 --- a/internal-dns/src/names.rs +++ b/internal-dns/src/names.rs @@ -14,7 +14,7 @@ pub const DNS_ZONE: &str = "control-plane.oxide.internal"; pub const DNS_ZONE_EXTERNAL_TESTING: &str = "oxide-dev.test"; /// Names of services within the control plane -#[derive(Clone, Debug, Hash, Eq, Ord, PartialEq, PartialOrd)] +#[derive(Clone, Copy, Debug, Hash, Eq, Ord, PartialEq, PartialOrd)] pub enum ServiceName { Clickhouse, ClickhouseKeeper, diff --git a/internal-dns/src/resolver.rs b/internal-dns/src/resolver.rs index 68065db36d..114333cb61 100644 --- a/internal-dns/src/resolver.rs +++ b/internal-dns/src/resolver.rs @@ -55,13 +55,13 @@ impl Resolver { /// Construct a new DNS resolver from specific DNS server addresses. pub fn new_from_addrs( log: slog::Logger, - dns_addrs: Vec, + dns_addrs: &[SocketAddr], ) -> Result { info!(log, "new DNS resolver"; "addresses" => ?dns_addrs); let mut rc = ResolverConfig::new(); let dns_server_count = dns_addrs.len(); - for socket_addr in dns_addrs.into_iter() { + for &socket_addr in dns_addrs.into_iter() { rc.add_name_server(NameServerConfig { socket_addr, protocol: Protocol::Udp, @@ -137,7 +137,7 @@ impl Resolver { subnet: Ipv6Subnet, ) -> Result { let dns_ips = Self::servers_from_subnet(subnet); - Resolver::new_from_addrs(log, dns_ips) + Resolver::new_from_addrs(log, &dns_ips) } /// Remove all entries from the resolver's cache. @@ -455,7 +455,7 @@ mod test { } fn dns_server_address(&self) -> SocketAddr { - *self.dns_server.local_address() + self.dns_server.local_address() } fn cleanup_successful(mut self) { @@ -465,7 +465,7 @@ mod test { fn resolver(&self) -> anyhow::Result { let log = self.log.new(o!("component" => "DnsResolver")); - Resolver::new_from_addrs(log, vec![self.dns_server_address()]) + Resolver::new_from_addrs(log, &[self.dns_server_address()]) .context("creating resolver for test DNS server") } @@ -591,7 +591,7 @@ mod test { let zone = dns_builder.host_zone(Uuid::new_v4(), *db_ip.ip()).unwrap(); dns_builder - .service_backend_zone(srv_crdb.clone(), &zone, db_ip.port()) + .service_backend_zone(srv_crdb, &zone, db_ip.port()) .unwrap(); } @@ -599,21 +599,13 @@ mod test { .host_zone(Uuid::new_v4(), *clickhouse_addr.ip()) .unwrap(); dns_builder - .service_backend_zone( - srv_clickhouse.clone(), - &zone, - clickhouse_addr.port(), - ) + .service_backend_zone(srv_clickhouse, &zone, clickhouse_addr.port()) .unwrap(); let zone = dns_builder.host_zone(Uuid::new_v4(), *crucible_addr.ip()).unwrap(); dns_builder - .service_backend_zone( - srv_backend.clone(), - &zone, - crucible_addr.port(), - ) + .service_backend_zone(srv_backend, &zone, crucible_addr.port()) .unwrap(); let mut dns_config = dns_builder.build(); @@ -694,9 +686,7 @@ mod test { let ip1 = Ipv6Addr::from_str("ff::01").unwrap(); let zone = dns_builder.host_zone(Uuid::new_v4(), ip1).unwrap(); let srv_crdb = ServiceName::Cockroach; - dns_builder - .service_backend_zone(srv_crdb.clone(), &zone, 12345) - .unwrap(); + dns_builder.service_backend_zone(srv_crdb, &zone, 12345).unwrap(); let dns_config = dns_builder.build(); dns_server.update(&dns_config).await.unwrap(); let found_ip = resolver @@ -711,9 +701,7 @@ mod test { let ip2 = Ipv6Addr::from_str("ee::02").unwrap(); let zone = dns_builder.host_zone(Uuid::new_v4(), ip2).unwrap(); let srv_crdb = ServiceName::Cockroach; - dns_builder - .service_backend_zone(srv_crdb.clone(), &zone, 54321) - .unwrap(); + dns_builder.service_backend_zone(srv_crdb, &zone, 54321).unwrap(); let mut dns_config = dns_builder.build(); dns_config.generation += 1; dns_server.update(&dns_config).await.unwrap(); @@ -802,7 +790,7 @@ mod test { let dns_server = DnsServer::create(&logctx.log).await; let resolver = Resolver::new_from_addrs( logctx.log.clone(), - vec![*dns_server.dns_server.local_address()], + &[dns_server.dns_server.local_address()], ) .unwrap(); @@ -879,9 +867,9 @@ mod test { let dns_server2 = DnsServer::create(&logctx.log).await; let resolver = Resolver::new_from_addrs( logctx.log.clone(), - vec![ - *dns_server1.dns_server.local_address(), - *dns_server2.dns_server.local_address(), + &[ + dns_server1.dns_server.local_address(), + dns_server2.dns_server.local_address(), ], ) .unwrap(); @@ -955,7 +943,7 @@ mod test { let dns_server = DnsServer::create(&logctx.log).await; let resolver = Resolver::new_from_addrs( logctx.log.clone(), - vec![*dns_server.dns_server.local_address()], + &[dns_server.dns_server.local_address()], ) .unwrap(); diff --git a/nexus/db-queries/src/authz/api_resources.rs b/nexus/db-queries/src/authz/api_resources.rs index 6cacfe4cb2..ec959e2907 100644 --- a/nexus/db-queries/src/authz/api_resources.rs +++ b/nexus/db-queries/src/authz/api_resources.rs @@ -905,6 +905,14 @@ authz_resource! { polar_snippet = FleetChild, } +authz_resource! { + name = "Zpool", + parent = "Fleet", + primary_key = Uuid, + roles_allowed = false, + polar_snippet = FleetChild, +} + authz_resource! { name = "SledInstance", parent = "Fleet", diff --git a/nexus/db-queries/src/authz/oso_generic.rs b/nexus/db-queries/src/authz/oso_generic.rs index 1ce86950bc..bcd7a42945 100644 --- a/nexus/db-queries/src/authz/oso_generic.rs +++ b/nexus/db-queries/src/authz/oso_generic.rs @@ -150,6 +150,7 @@ pub fn make_omicron_oso(log: &slog::Logger) -> Result { IdentityProvider::init(), SamlIdentityProvider::init(), Sled::init(), + Zpool::init(), Service::init(), UpdateArtifact::init(), UserBuiltin::init(), diff --git a/nexus/db-queries/src/authz/policy_test/resources.rs b/nexus/db-queries/src/authz/policy_test/resources.rs index a0dad0e17e..054fe6430b 100644 --- a/nexus/db-queries/src/authz/policy_test/resources.rs +++ b/nexus/db-queries/src/authz/policy_test/resources.rs @@ -88,6 +88,13 @@ pub async fn make_resources( LookupType::ById(sled_id), )); + let zpool_id = "aaaaaaaa-1233-af7d-9220-afe1d8090900".parse().unwrap(); + builder.new_resource(authz::Zpool::new( + authz::FLEET, + zpool_id, + LookupType::ById(zpool_id), + )); + make_services(&mut builder).await; builder.new_resource(authz::PhysicalDisk::new( diff --git a/nexus/db-queries/src/db/lookup.rs b/nexus/db-queries/src/db/lookup.rs index 0425660fbb..72a32f562c 100644 --- a/nexus/db-queries/src/db/lookup.rs +++ b/nexus/db-queries/src/db/lookup.rs @@ -347,6 +347,11 @@ impl<'a> LookupPath<'a> { Sled::PrimaryKey(Root { lookup_root: self }, id) } + /// Select a resource of type Zpool, identified by its id + pub fn zpool_id(self, id: Uuid) -> Zpool<'a> { + Zpool::PrimaryKey(Root { lookup_root: self }, id) + } + /// Select a resource of type Service, identified by its id pub fn service_id(self, id: Uuid) -> Service<'a> { Service::PrimaryKey(Root { lookup_root: self }, id) @@ -788,6 +793,15 @@ lookup_resource! { primary_key_columns = [ { column_name = "id", rust_type = Uuid } ] } +lookup_resource! { + name = "Zpool", + ancestors = [], + children = [], + lookup_by_name = false, + soft_deletes = true, + primary_key_columns = [ { column_name = "id", rust_type = Uuid } ] +} + lookup_resource! { name = "SledInstance", ancestors = [], diff --git a/nexus/db-queries/tests/output/authz-roles.out b/nexus/db-queries/tests/output/authz-roles.out index 7531672514..72031c567e 100644 --- a/nexus/db-queries/tests/output/authz-roles.out +++ b/nexus/db-queries/tests/output/authz-roles.out @@ -782,6 +782,20 @@ resource: Sled id "8a785566-adaf-c8d8-e886-bee7f9b73ca7" silo1-proj1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ unauthenticated ! ! ! ! ! ! ! ! +resource: Zpool id "aaaaaaaa-1233-af7d-9220-afe1d8090900" + + USER Q R LC RP M MP CC D + fleet-admin ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ + fleet-collaborator ✘ ✔ ✔ ✔ ✘ ✘ ✘ ✘ + fleet-viewer ✘ ✔ ✔ ✔ ✘ ✘ ✘ ✘ + silo1-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + unauthenticated ! ! ! ! ! ! ! ! + resource: Service id "6b1f15ee-d6b3-424c-8436-94413a0b682d" USER Q R LC RP M MP CC D diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 1ac63d19f6..f07ceae4a0 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -11,7 +11,6 @@ use super::MAX_NICS_PER_INSTANCE; use super::MAX_VCPU_PER_INSTANCE; use super::MIN_MEMORY_BYTES_PER_INSTANCE; use crate::app::sagas; -use crate::app::sagas::retry_until_known_result; use crate::cidata::InstanceCiData; use crate::external_api::params; use cancel_safe_futures::prelude::*; @@ -41,7 +40,6 @@ use omicron_common::api::external::NameOrId; use omicron_common::api::external::UpdateResult; use omicron_common::api::external::Vni; use omicron_common::api::internal::nexus; -use omicron_common::api::internal::shared::SwitchLocation; use propolis_client::support::tungstenite::protocol::frame::coding::CloseCode; use propolis_client::support::tungstenite::protocol::CloseFrame; use propolis_client::support::tungstenite::Message as WebSocketMessage; @@ -54,9 +52,7 @@ use sled_agent_client::types::InstancePutStateBody; use sled_agent_client::types::InstanceStateRequested; use sled_agent_client::types::SourceNatConfig; use sled_agent_client::Client as SledAgentClient; -use std::collections::HashSet; use std::net::SocketAddr; -use std::str::FromStr; use std::sync::Arc; use tokio::io::{AsyncRead, AsyncWrite}; use uuid::Uuid; @@ -1232,146 +1228,6 @@ impl super::Nexus { Ok(()) } - // Switches with uplinks configured and boundary services enabled - pub(crate) async fn boundary_switches( - &self, - opctx: &OpContext, - ) -> Result, Error> { - let mut boundary_switches: HashSet = HashSet::new(); - let uplinks = self.list_switch_ports_with_uplinks(opctx).await?; - for uplink in &uplinks { - let location: SwitchLocation = - uplink.switch_location.parse().map_err(|_| { - Error::internal_error(&format!( - "invalid switch location in uplink config: {}", - uplink.switch_location - )) - })?; - boundary_switches.insert(location); - } - Ok(boundary_switches) - } - - /// Ensures that the Dendrite configuration for the supplied instance is - /// up-to-date. - /// - /// # Parameters - /// - /// - `opctx`: An operation context that grants read and list-children - /// permissions on the identified instance. - /// - `instance_id`: The ID of the instance to act on. - /// - `sled_ip_address`: The internal IP address assigned to the sled's - /// sled agent. - /// - `ip_index_filter`: An optional filter on the index into the instance's - /// external IP array. - /// - If this is `Some(n)`, this routine configures DPD state for only the - /// Nth external IP in the collection returned from CRDB. The caller is - /// responsible for ensuring that the IP collection has stable indices - /// when making this call. - /// - If this is `None`, this routine configures DPD for all external - /// IPs. - pub(crate) async fn instance_ensure_dpd_config( - &self, - opctx: &OpContext, - instance_id: Uuid, - sled_ip_address: &std::net::SocketAddrV6, - ip_index_filter: Option, - dpd_client: &Arc, - ) -> Result<(), Error> { - let log = &self.log; - - info!(log, "looking up instance's primary network interface"; - "instance_id" => %instance_id); - - let (.., authz_instance) = LookupPath::new(opctx, &self.db_datastore) - .instance_id(instance_id) - .lookup_for(authz::Action::ListChildren) - .await?; - - // All external IPs map to the primary network interface, so find that - // interface. If there is no such interface, there's no way to route - // traffic destined to those IPs, so there's nothing to configure and - // it's safe to return early. - let network_interface = match self - .db_datastore - .derive_guest_network_interface_info(&opctx, &authz_instance) - .await? - .into_iter() - .find(|interface| interface.primary) - { - Some(interface) => interface, - None => { - info!(log, "Instance has no primary network interface"; - "instance_id" => %instance_id); - return Ok(()); - } - }; - - let mac_address = - macaddr::MacAddr6::from_str(&network_interface.mac.to_string()) - .map_err(|e| { - Error::internal_error(&format!( - "failed to convert mac address: {e}" - )) - })?; - - let vni: u32 = network_interface.vni.into(); - - info!(log, "looking up instance's external IPs"; - "instance_id" => %instance_id); - - let ips = self - .db_datastore - .instance_lookup_external_ips(&opctx, instance_id) - .await?; - - if let Some(wanted_index) = ip_index_filter { - if let None = ips.get(wanted_index) { - return Err(Error::internal_error(&format!( - "failed to find external ip address at index: {}", - wanted_index - ))); - } - } - - for target_ip in ips - .iter() - .enumerate() - .filter(|(index, _)| { - if let Some(wanted_index) = ip_index_filter { - *index == wanted_index - } else { - true - } - }) - .map(|(_, ip)| ip) - { - retry_until_known_result(log, || async { - dpd_client - .ensure_nat_entry( - &log, - target_ip.ip, - dpd_client::types::MacAddr { - a: mac_address.into_array(), - }, - *target_ip.first_port, - *target_ip.last_port, - vni, - sled_ip_address.ip(), - ) - .await - }) - .await - .map_err(|e| { - Error::internal_error(&format!( - "failed to ensure dpd entry: {e}" - )) - })?; - } - - Ok(()) - } - /// Returns the requested range of serial console output bytes, /// provided they are still in the propolis-server's cache. pub(crate) async fn instance_serial_console_data( diff --git a/nexus/src/app/instance_network.rs b/nexus/src/app/instance_network.rs new file mode 100644 index 0000000000..6b819a9e11 --- /dev/null +++ b/nexus/src/app/instance_network.rs @@ -0,0 +1,497 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Routines that manage instance-related networking state. + +use crate::app::sagas::retry_until_known_result; +use nexus_db_queries::authz; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::identity::Asset; +use nexus_db_queries::db::lookup::LookupPath; +use omicron_common::api::external::DataPageParams; +use omicron_common::api::external::Error; +use omicron_common::api::internal::shared::SwitchLocation; +use sled_agent_client::types::SetVirtualNetworkInterfaceHost; +use std::collections::HashSet; +use std::str::FromStr; +use std::sync::Arc; +use uuid::Uuid; + +impl super::Nexus { + /// Returns the set of switches with uplinks configured and boundary + /// services enabled. + pub(crate) async fn boundary_switches( + &self, + opctx: &OpContext, + ) -> Result, Error> { + let mut boundary_switches: HashSet = HashSet::new(); + let uplinks = self.list_switch_ports_with_uplinks(opctx).await?; + for uplink in &uplinks { + let location: SwitchLocation = + uplink.switch_location.parse().map_err(|_| { + Error::internal_error(&format!( + "invalid switch location in uplink config: {}", + uplink.switch_location + )) + })?; + boundary_switches.insert(location); + } + Ok(boundary_switches) + } + + /// Ensures that V2P mappings exist that indicate that the instance with ID + /// `instance_id` is resident on the sled with ID `sled_id`. + pub(crate) async fn create_instance_v2p_mappings( + &self, + opctx: &OpContext, + instance_id: Uuid, + sled_id: Uuid, + ) -> Result<(), Error> { + info!(&self.log, "creating V2P mappings for instance"; + "instance_id" => %instance_id, + "sled_id" => %sled_id); + + // For every sled that isn't the sled this instance was allocated to, create + // a virtual to physical mapping for each of this instance's NICs. + // + // For the mappings to be correct, a few invariants must hold: + // + // - mappings must be set whenever an instance's sled changes (eg. + // during instance creation, migration, stop + start) + // + // - an instances' sled must not change while its corresponding mappings + // are being created + // + // - the same mapping creation must be broadcast to all sleds + // + // A more targeted approach would be to see what other instances share + // the VPC this instance is in (or more generally, what instances should + // have connectivity to this one), see what sleds those are allocated + // to, and only create V2P mappings for those sleds. + // + // There's additional work with this approach: + // + // - it means that delete calls are required as well as set calls, + // meaning that now the ordering of those matters (this may also + // necessitate a generation number for V2P mappings) + // + // - V2P mappings have to be bidirectional in order for both instances's + // packets to make a round trip. This isn't a problem with the + // broadcast approach because one of the sides will exist already, but + // it is something to orchestrate with a more targeted approach. + // + // TODO-correctness Default firewall rules currently will block + // instances in different VPCs from connecting to each other. If it ever + // stops doing this, the broadcast approach will create V2P mappings + // that shouldn't exist. + let (.., authz_instance) = LookupPath::new(&opctx, &self.db_datastore) + .instance_id(instance_id) + .lookup_for(authz::Action::Read) + .await?; + + let instance_nics = self + .db_datastore + .derive_guest_network_interface_info(&opctx, &authz_instance) + .await?; + + // Look up the supplied sled's physical host IP. + let physical_host_ip = + *self.sled_lookup(&self.opctx_alloc, &sled_id)?.fetch().await?.1.ip; + + let mut last_sled_id: Option = None; + loop { + let pagparams = DataPageParams { + marker: last_sled_id.as_ref(), + direction: dropshot::PaginationOrder::Ascending, + limit: std::num::NonZeroU32::new(10).unwrap(), + }; + + let sleds_page = + self.sled_list(&self.opctx_alloc, &pagparams).await?; + let mut join_handles = + Vec::with_capacity(sleds_page.len() * instance_nics.len()); + + for sled in &sleds_page { + // set_v2p not required for sled instance was allocated to, OPTE + // currently does that automatically + // + // TODO(#3107): Remove this when XDE stops creating mappings + // implicitly. + if sled.id() == sled_id { + continue; + } + + for nic in &instance_nics { + let client = self.sled_client(&sled.id()).await?; + let nic_id = nic.id; + let mapping = SetVirtualNetworkInterfaceHost { + virtual_ip: nic.ip, + virtual_mac: nic.mac.clone(), + physical_host_ip, + vni: nic.vni.clone(), + }; + + let log = self.log.clone(); + + // This function is idempotent: calling the set_v2p ioctl with + // the same information is a no-op. + join_handles.push(tokio::spawn(futures::future::lazy( + move |_ctx| async move { + retry_until_known_result(&log, || async { + client.set_v2p(&nic_id, &mapping).await + }) + .await + }, + ))); + } + } + + // Concurrently run each future to completion, but return the last + // error seen. + let mut error = None; + for join_handle in join_handles { + let result = join_handle + .await + .map_err(|e| Error::internal_error(&e.to_string()))? + .await; + + if result.is_err() { + error!(self.log, "{:?}", result); + error = Some(result); + } + } + if let Some(e) = error { + return e.map(|_| ()).map_err(|e| e.into()); + } + + if sleds_page.len() < 10 { + break; + } + + if let Some(last) = sleds_page.last() { + last_sled_id = Some(last.id()); + } + } + + Ok(()) + } + + /// Ensure that the necessary v2p mappings for an instance are deleted + pub(crate) async fn delete_instance_v2p_mappings( + &self, + opctx: &OpContext, + instance_id: Uuid, + ) -> Result<(), Error> { + // For every sled that isn't the sled this instance was allocated to, delete + // the virtual to physical mapping for each of this instance's NICs. If + // there isn't a V2P mapping, del_v2p should be a no-op. + let (.., authz_instance, db_instance) = + LookupPath::new(&opctx, &self.db_datastore) + .instance_id(instance_id) + .fetch_for(authz::Action::Read) + .await?; + + let instance_nics = self + .db_datastore + .derive_guest_network_interface_info(&opctx, &authz_instance) + .await?; + + // Lookup the physical host IP of the sled hosting this instance + let instance_sled_id = db_instance.runtime().sled_id; + let physical_host_ip = *self + .sled_lookup(&self.opctx_alloc, &instance_sled_id)? + .fetch() + .await? + .1 + .ip; + + let mut last_sled_id: Option = None; + + loop { + let pagparams = DataPageParams { + marker: last_sled_id.as_ref(), + direction: dropshot::PaginationOrder::Ascending, + limit: std::num::NonZeroU32::new(10).unwrap(), + }; + + let sleds_page = + self.sled_list(&self.opctx_alloc, &pagparams).await?; + let mut join_handles = + Vec::with_capacity(sleds_page.len() * instance_nics.len()); + + for sled in &sleds_page { + // del_v2p not required for sled instance was allocated to, OPTE + // currently does that automatically + // + // TODO(#3107): Remove this when XDE stops deleting mappings + // implicitly. + if sled.id() == instance_sled_id { + continue; + } + + for nic in &instance_nics { + let client = self.sled_client(&sled.id()).await?; + let nic_id = nic.id; + let mapping = SetVirtualNetworkInterfaceHost { + virtual_ip: nic.ip, + virtual_mac: nic.mac.clone(), + physical_host_ip, + vni: nic.vni.clone(), + }; + + let log = self.log.clone(); + + // This function is idempotent: calling the set_v2p ioctl with + // the same information is a no-op. + join_handles.push(tokio::spawn(futures::future::lazy( + move |_ctx| async move { + retry_until_known_result(&log, || async { + client.del_v2p(&nic_id, &mapping).await + }) + .await + }, + ))); + } + } + + // Concurrently run each future to completion, but return the last + // error seen. + let mut error = None; + for join_handle in join_handles { + let result = join_handle + .await + .map_err(|e| Error::internal_error(&e.to_string()))? + .await; + + if result.is_err() { + error!(self.log, "{:?}", result); + error = Some(result); + } + } + if let Some(e) = error { + return e.map(|_| ()).map_err(|e| e.into()); + } + + if sleds_page.len() < 10 { + break; + } + + if let Some(last) = sleds_page.last() { + last_sled_id = Some(last.id()); + } + } + + Ok(()) + } + + /// Ensures that the Dendrite configuration for the supplied instance is + /// up-to-date. + /// + /// # Parameters + /// + /// - `opctx`: An operation context that grants read and list-children + /// permissions on the identified instance. + /// - `instance_id`: The ID of the instance to act on. + /// - `sled_ip_address`: The internal IP address assigned to the sled's + /// sled agent. + /// - `ip_index_filter`: An optional filter on the index into the instance's + /// external IP array. + /// - If this is `Some(n)`, this routine configures DPD state for only the + /// Nth external IP in the collection returned from CRDB. The caller is + /// responsible for ensuring that the IP collection has stable indices + /// when making this call. + /// - If this is `None`, this routine configures DPD for all external + /// IPs. + pub(crate) async fn instance_ensure_dpd_config( + &self, + opctx: &OpContext, + instance_id: Uuid, + sled_ip_address: &std::net::SocketAddrV6, + ip_index_filter: Option, + dpd_client: &Arc, + ) -> Result<(), Error> { + let log = &self.log; + + info!(log, "looking up instance's primary network interface"; + "instance_id" => %instance_id); + + let (.., authz_instance) = LookupPath::new(opctx, &self.db_datastore) + .instance_id(instance_id) + .lookup_for(authz::Action::ListChildren) + .await?; + + // All external IPs map to the primary network interface, so find that + // interface. If there is no such interface, there's no way to route + // traffic destined to those IPs, so there's nothing to configure and + // it's safe to return early. + let network_interface = match self + .db_datastore + .derive_guest_network_interface_info(&opctx, &authz_instance) + .await? + .into_iter() + .find(|interface| interface.primary) + { + Some(interface) => interface, + None => { + info!(log, "Instance has no primary network interface"; + "instance_id" => %instance_id); + return Ok(()); + } + }; + + let mac_address = + macaddr::MacAddr6::from_str(&network_interface.mac.to_string()) + .map_err(|e| { + Error::internal_error(&format!( + "failed to convert mac address: {e}" + )) + })?; + + let vni: u32 = network_interface.vni.into(); + + info!(log, "looking up instance's external IPs"; + "instance_id" => %instance_id); + + let ips = self + .db_datastore + .instance_lookup_external_ips(&opctx, instance_id) + .await?; + + if let Some(wanted_index) = ip_index_filter { + if let None = ips.get(wanted_index) { + return Err(Error::internal_error(&format!( + "failed to find external ip address at index: {}", + wanted_index + ))); + } + } + + for target_ip in ips + .iter() + .enumerate() + .filter(|(index, _)| { + if let Some(wanted_index) = ip_index_filter { + *index == wanted_index + } else { + true + } + }) + .map(|(_, ip)| ip) + { + retry_until_known_result(log, || async { + dpd_client + .ensure_nat_entry( + &log, + target_ip.ip, + dpd_client::types::MacAddr { + a: mac_address.into_array(), + }, + *target_ip.first_port, + *target_ip.last_port, + vni, + sled_ip_address.ip(), + ) + .await + }) + .await + .map_err(|e| { + Error::internal_error(&format!( + "failed to ensure dpd entry: {e}" + )) + })?; + } + + Ok(()) + } + + /// Attempts to delete all of the Dendrite NAT configuration for the + /// instance identified by `authz_instance`. + /// + /// # Return value + /// + /// - `Ok(())` if all NAT entries were successfully deleted. + /// - If an operation fails before this routine begins to walk and delete + /// individual NAT entries, this routine returns `Err` and reports that + /// error. + /// - If an operation fails while this routine is walking NAT entries, it + /// will continue trying to delete subsequent entries but will return the + /// first error it encountered. + pub(crate) async fn instance_delete_dpd_config( + &self, + opctx: &OpContext, + authz_instance: &authz::Instance, + ) -> Result<(), Error> { + let log = &self.log; + let instance_id = authz_instance.id(); + + info!(log, "deleting instance dpd configuration"; + "instance_id" => %instance_id); + + let external_ips = self + .db_datastore + .instance_lookup_external_ips(opctx, instance_id) + .await?; + + let boundary_switches = self.boundary_switches(opctx).await?; + + let mut errors = vec![]; + for entry in external_ips { + for switch in &boundary_switches { + debug!(log, "deleting instance nat mapping"; + "instance_id" => %instance_id, + "switch" => switch.to_string(), + "entry" => #?entry); + + let client_result = + self.dpd_clients.get(switch).ok_or_else(|| { + Error::internal_error(&format!( + "unable to find dendrite client for {switch}" + )) + }); + + let dpd_client = match client_result { + Ok(client) => client, + Err(new_error) => { + errors.push(new_error); + continue; + } + }; + + let result = retry_until_known_result(log, || async { + dpd_client + .ensure_nat_entry_deleted( + log, + entry.ip, + *entry.first_port, + ) + .await + }) + .await; + + if let Err(e) = result { + let e = Error::internal_error(&format!( + "failed to delete nat entry via dpd: {e}" + )); + + error!(log, "error deleting nat mapping: {e:#?}"; + "instance_id" => %instance_id, + "switch" => switch.to_string(), + "entry" => #?entry); + errors.push(e); + } else { + debug!(log, "deleting nat mapping successful"; + "instance_id" => %instance_id, + "switch" => switch.to_string(), + "entry" => #?entry); + } + } + } + + if let Some(e) = errors.into_iter().nth(0) { + return Err(e); + } + + Ok(()) + } +} diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 4dd93f7707..5bab5e2820 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -42,6 +42,7 @@ mod external_ip; mod iam; mod image; mod instance; +mod instance_network; mod ip_pool; mod metrics; mod network_interface; diff --git a/nexus/src/app/sagas/instance_delete.rs b/nexus/src/app/sagas/instance_delete.rs index 88a5823ad0..005e9724a6 100644 --- a/nexus/src/app/sagas/instance_delete.rs +++ b/nexus/src/app/sagas/instance_delete.rs @@ -8,7 +8,6 @@ use super::ActionRegistry; use super::NexusActionContext; use super::NexusSaga; use crate::app::sagas::declare_saga_actions; -use crate::app::sagas::retry_until_known_result; use nexus_db_queries::db; use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::{authn, authz}; @@ -151,70 +150,14 @@ async fn sid_delete_network_config( &sagactx, ¶ms.serialized_authn, ); + let authz_instance = ¶ms.authz_instance; let osagactx = sagactx.user_data(); - let datastore = &osagactx.datastore(); - let log = sagactx.user_data().log(); - - debug!(log, "fetching external ip addresses"); - - let external_ips = &datastore - .instance_lookup_external_ips(&opctx, params.authz_instance.id()) + osagactx + .nexus() + .instance_delete_dpd_config(&opctx, authz_instance) .await - .map_err(ActionError::action_failed)?; - - let mut errors: Vec = vec![]; - - // Here we are attempting to delete every existing NAT entry while deferring - // any error handling. If we don't defer error handling, we might end up - // bailing out before we've attempted deletion of all entries. - for entry in external_ips { - for switch in ¶ms.boundary_switches { - debug!(log, "deleting nat mapping"; "switch" => switch.to_string(), "entry" => #?entry); - - let client_result = - osagactx.nexus().dpd_clients.get(switch).ok_or_else(|| { - ActionError::action_failed(Error::internal_error(&format!( - "unable to find dendrite client for {switch}" - ))) - }); - - let dpd_client = match client_result { - Ok(client) => client, - Err(new_error) => { - errors.push(new_error); - continue; - } - }; - - let result = retry_until_known_result(log, || async { - dpd_client - .ensure_nat_entry_deleted(log, entry.ip, *entry.first_port) - .await - }) - .await; - - match result { - Ok(_) => { - debug!(log, "deleting nat mapping successful"; "switch" => switch.to_string(), "entry" => format!("{entry:#?}")); - } - Err(e) => { - let new_error = - ActionError::action_failed(Error::internal_error( - &format!("failed to delete nat entry via dpd: {e}"), - )); - error!(log, "{new_error:#?}"); - errors.push(new_error); - } - } - } - } - - if let Some(error) = errors.first() { - return Err(error.clone()); - } - - Ok(()) + .map_err(ActionError::action_failed) } async fn sid_delete_instance_record( diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index 56a330f526..da89e7e25a 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -4,15 +4,12 @@ //! Sleds, and the hardware and services within them. -use crate::app::sagas::retry_until_known_result; use crate::internal_api::params::{ PhysicalDiskDeleteRequest, PhysicalDiskPutRequest, SledAgentStartupInfo, SledRole, ZpoolPutRequest, }; -use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; -use nexus_db_queries::db::identity::Asset; use nexus_db_queries::db::lookup; use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::model::DatasetKind; @@ -20,7 +17,6 @@ use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; -use sled_agent_client::types::SetVirtualNetworkInterfaceHost; use sled_agent_client::Client as SledAgentClient; use std::net::SocketAddrV6; use std::sync::Arc; @@ -317,251 +313,4 @@ impl super::Nexus { .await?; Ok(()) } - - // OPTE V2P mappings - - /// Ensures that V2P mappings exist that indicate that the instance with ID - /// `instance_id` is resident on the sled with ID `sled_id`. - pub(crate) async fn create_instance_v2p_mappings( - &self, - opctx: &OpContext, - instance_id: Uuid, - sled_id: Uuid, - ) -> Result<(), Error> { - info!(&self.log, "creating V2P mappings for instance"; - "instance_id" => %instance_id, - "sled_id" => %sled_id); - - // For every sled that isn't the sled this instance was allocated to, create - // a virtual to physical mapping for each of this instance's NICs. - // - // For the mappings to be correct, a few invariants must hold: - // - // - mappings must be set whenever an instance's sled changes (eg. - // during instance creation, migration, stop + start) - // - // - an instances' sled must not change while its corresponding mappings - // are being created - // - // - the same mapping creation must be broadcast to all sleds - // - // A more targeted approach would be to see what other instances share - // the VPC this instance is in (or more generally, what instances should - // have connectivity to this one), see what sleds those are allocated - // to, and only create V2P mappings for those sleds. - // - // There's additional work with this approach: - // - // - it means that delete calls are required as well as set calls, - // meaning that now the ordering of those matters (this may also - // necessitate a generation number for V2P mappings) - // - // - V2P mappings have to be bidirectional in order for both instances's - // packets to make a round trip. This isn't a problem with the - // broadcast approach because one of the sides will exist already, but - // it is something to orchestrate with a more targeted approach. - // - // TODO-correctness Default firewall rules currently will block - // instances in different VPCs from connecting to each other. If it ever - // stops doing this, the broadcast approach will create V2P mappings - // that shouldn't exist. - let (.., authz_instance) = LookupPath::new(&opctx, &self.db_datastore) - .instance_id(instance_id) - .lookup_for(authz::Action::Read) - .await?; - - let instance_nics = self - .db_datastore - .derive_guest_network_interface_info(&opctx, &authz_instance) - .await?; - - // Look up the supplied sled's physical host IP. - let physical_host_ip = - *self.sled_lookup(&self.opctx_alloc, &sled_id)?.fetch().await?.1.ip; - - let mut last_sled_id: Option = None; - loop { - let pagparams = DataPageParams { - marker: last_sled_id.as_ref(), - direction: dropshot::PaginationOrder::Ascending, - limit: std::num::NonZeroU32::new(10).unwrap(), - }; - - let sleds_page = - self.sled_list(&self.opctx_alloc, &pagparams).await?; - let mut join_handles = - Vec::with_capacity(sleds_page.len() * instance_nics.len()); - - for sled in &sleds_page { - // set_v2p not required for sled instance was allocated to, OPTE - // currently does that automatically - // - // TODO(#3107): Remove this when XDE stops creating mappings - // implicitly. - if sled.id() == sled_id { - continue; - } - - for nic in &instance_nics { - let client = self.sled_client(&sled.id()).await?; - let nic_id = nic.id; - let mapping = SetVirtualNetworkInterfaceHost { - virtual_ip: nic.ip, - virtual_mac: nic.mac.clone(), - physical_host_ip, - vni: nic.vni.clone(), - }; - - let log = self.log.clone(); - - // This function is idempotent: calling the set_v2p ioctl with - // the same information is a no-op. - join_handles.push(tokio::spawn(futures::future::lazy( - move |_ctx| async move { - retry_until_known_result(&log, || async { - client.set_v2p(&nic_id, &mapping).await - }) - .await - }, - ))); - } - } - - // Concurrently run each future to completion, but return the last - // error seen. - let mut error = None; - for join_handle in join_handles { - let result = join_handle - .await - .map_err(|e| Error::internal_error(&e.to_string()))? - .await; - - if result.is_err() { - error!(self.log, "{:?}", result); - error = Some(result); - } - } - if let Some(e) = error { - return e.map(|_| ()).map_err(|e| e.into()); - } - - if sleds_page.len() < 10 { - break; - } - - if let Some(last) = sleds_page.last() { - last_sled_id = Some(last.id()); - } - } - - Ok(()) - } - - /// Ensure that the necessary v2p mappings for an instance are deleted - pub(crate) async fn delete_instance_v2p_mappings( - &self, - opctx: &OpContext, - instance_id: Uuid, - ) -> Result<(), Error> { - // For every sled that isn't the sled this instance was allocated to, delete - // the virtual to physical mapping for each of this instance's NICs. If - // there isn't a V2P mapping, del_v2p should be a no-op. - let (.., authz_instance, db_instance) = - LookupPath::new(&opctx, &self.db_datastore) - .instance_id(instance_id) - .fetch_for(authz::Action::Read) - .await?; - - let instance_nics = self - .db_datastore - .derive_guest_network_interface_info(&opctx, &authz_instance) - .await?; - - // Lookup the physical host IP of the sled hosting this instance - let instance_sled_id = db_instance.runtime().sled_id; - let physical_host_ip = *self - .sled_lookup(&self.opctx_alloc, &instance_sled_id)? - .fetch() - .await? - .1 - .ip; - - let mut last_sled_id: Option = None; - - loop { - let pagparams = DataPageParams { - marker: last_sled_id.as_ref(), - direction: dropshot::PaginationOrder::Ascending, - limit: std::num::NonZeroU32::new(10).unwrap(), - }; - - let sleds_page = - self.sled_list(&self.opctx_alloc, &pagparams).await?; - let mut join_handles = - Vec::with_capacity(sleds_page.len() * instance_nics.len()); - - for sled in &sleds_page { - // del_v2p not required for sled instance was allocated to, OPTE - // currently does that automatically - // - // TODO(#3107): Remove this when XDE stops deleting mappings - // implicitly. - if sled.id() == instance_sled_id { - continue; - } - - for nic in &instance_nics { - let client = self.sled_client(&sled.id()).await?; - let nic_id = nic.id; - let mapping = SetVirtualNetworkInterfaceHost { - virtual_ip: nic.ip, - virtual_mac: nic.mac.clone(), - physical_host_ip, - vni: nic.vni.clone(), - }; - - let log = self.log.clone(); - - // This function is idempotent: calling the set_v2p ioctl with - // the same information is a no-op. - join_handles.push(tokio::spawn(futures::future::lazy( - move |_ctx| async move { - retry_until_known_result(&log, || async { - client.del_v2p(&nic_id, &mapping).await - }) - .await - }, - ))); - } - } - - // Concurrently run each future to completion, but return the last - // error seen. - let mut error = None; - for join_handle in join_handles { - let result = join_handle - .await - .map_err(|e| Error::internal_error(&e.to_string()))? - .await; - - if result.is_err() { - error!(self.log, "{:?}", result); - error = Some(result); - } - } - if let Some(e) = error { - return e.map(|_| ()).map_err(|e| e.into()); - } - - if sleds_page.len() < 10 { - break; - } - - if let Some(last) = sleds_page.last() { - last_sled_id = Some(last.id()); - } - } - - Ok(()) - } } diff --git a/nexus/src/context.rs b/nexus/src/context.rs index 73125cc617..7fd0a33c30 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -158,7 +158,7 @@ impl ServerContext { internal_dns::resolver::Resolver::new_from_addrs( log.new(o!("component" => "DnsResolver")), - vec![address], + &[address], ) .map_err(|e| format!("Failed to create DNS resolver: {}", e))? } @@ -169,11 +169,12 @@ impl ServerContext { nexus_config::Database::FromUrl { url } => url.clone(), nexus_config::Database::FromDns => { info!(log, "Accessing DB url from DNS"); - // It's been requested but unfortunately not supported to directly - // connect using SRV based lookup. - // TODO-robustness: the set of cockroachdb hosts we'll use will be - // fixed to whatever we got back from DNS at Nexus start. This means - // a new cockroachdb instance won't picked up until Nexus restarts. + // It's been requested but unfortunately not supported to + // directly connect using SRV based lookup. + // TODO-robustness: the set of cockroachdb hosts we'll use will + // be fixed to whatever we got back from DNS at Nexus start. + // This means a new cockroachdb instance won't picked up until + // Nexus restarts. let addrs = loop { match resolver .lookup_all_socket_v6(ServiceName::Cockroach) diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 3c3247790a..d219da7e96 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -433,7 +433,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { self.config.deployment.internal_dns = nexus_config::InternalDns::FromAddress { - address: *self + address: self .internal_dns .as_ref() .expect("Must initialize internal DNS server first") @@ -659,8 +659,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { let dns = dns_server::TransientServer::new(&log).await.unwrap(); - let SocketAddr::V6(dns_address) = *dns.dns_server.local_address() - else { + let SocketAddr::V6(dns_address) = dns.dns_server.local_address() else { panic!("Unsupported IPv4 DNS address"); }; let SocketAddr::V6(dropshot_address) = dns.dropshot_server.local_addr() @@ -998,7 +997,7 @@ pub async fn start_dns_server( let mut resolver_config = ResolverConfig::new(); resolver_config.add_name_server(NameServerConfig { - socket_addr: *dns_server.local_address(), + socket_addr: dns_server.local_address(), protocol: Protocol::Udp, tls_dns_name: None, trust_nx_responses: false, diff --git a/nexus/tests/integration_tests/certificates.rs b/nexus/tests/integration_tests/certificates.rs index 9532dacf92..1843fc28c8 100644 --- a/nexus/tests/integration_tests/certificates.rs +++ b/nexus/tests/integration_tests/certificates.rs @@ -360,7 +360,7 @@ async fn test_silo_certificates() { // create the other Silos and their users. let resolver = Arc::new( CustomDnsResolver::new( - *cptestctx.external_dns.dns_server.local_address(), + cptestctx.external_dns.dns_server.local_address(), ) .unwrap(), ); diff --git a/nexus/tests/integration_tests/initialization.rs b/nexus/tests/integration_tests/initialization.rs index 1251077c2e..2d4c76dc99 100644 --- a/nexus/tests/integration_tests/initialization.rs +++ b/nexus/tests/integration_tests/initialization.rs @@ -41,7 +41,7 @@ async fn test_nexus_boots_before_cockroach() { omicron_common::nexus_config::Database::FromDns; builder.config.deployment.internal_dns = omicron_common::nexus_config::InternalDns::FromAddress { - address: *builder + address: builder .internal_dns .as_ref() .expect("Must start Internal DNS before acquiring an address") @@ -121,7 +121,7 @@ async fn test_nexus_boots_before_dendrite() { builder.config.pkg.dendrite = HashMap::new(); builder.config.deployment.internal_dns = omicron_common::nexus_config::InternalDns::FromAddress { - address: *builder + address: builder .internal_dns .as_ref() .expect("Must start Internal DNS before acquiring an address") diff --git a/omdb/src/bin/omdb/main.rs b/omdb/src/bin/omdb/main.rs deleted file mode 100644 index d0c47f8e2a..0000000000 --- a/omdb/src/bin/omdb/main.rs +++ /dev/null @@ -1,80 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! CLI for debugging Omicron internal state -//! -//! GROUND RULES: -//! -//! 1. There aren't a lot of ground rules here. At least for now, this is a -//! place to put any kind of runtime tooling for Omicron that seems useful. -//! You can query the database directly (see notes in db.rs), use internal -//! APIs, etc. To the degree that we can stick to stable interfaces, great. -//! But at this stage we'd rather have tools that work on latest than not -//! have them because we couldn't prioritize keeping them stable. -//! -//! 2. Where possible, when the tool encounters something unexpected, it should -//! print what it can (including the error message and bad data) and then -//! continue. It generally shouldn't stop on the first error. (We often -//! find strange things when debugging but we need our tools to tell us as -//! much as they can!) - -use anyhow::Context; -use clap::Parser; -use clap::Subcommand; - -mod db; -mod nexus; -mod sled_agent; - -#[tokio::main] -async fn main() -> Result<(), anyhow::Error> { - let args = Omdb::parse(); - - let log = dropshot::ConfigLogging::StderrTerminal { level: args.log_level } - .to_logger("omdb") - .context("failed to create logger")?; - - match args.command { - OmdbCommands::Db(db) => db.run_cmd(&log).await, - OmdbCommands::Nexus(nexus) => nexus.run_cmd(&log).await, - OmdbCommands::SledAgent(sled) => sled.run_cmd(&log).await, - } -} - -/// Omicron debugger (unstable) -/// -/// This tool provides commands for directly querying Omicron components about -/// their internal state using internal APIs. This is a prototype. The -/// commands and output are unstable and may change. -#[derive(Debug, Parser)] -struct Omdb { - /// log level filter - #[arg( - env, - long, - value_parser = parse_dropshot_log_level, - default_value = "warn", - )] - log_level: dropshot::ConfigLoggingLevel, - - #[command(subcommand)] - command: OmdbCommands, -} - -#[derive(Debug, Subcommand)] -#[allow(clippy::large_enum_variant)] -enum OmdbCommands { - /// Query the control plane database (CockroachDB) - Db(db::DbArgs), - /// Debug a specific Nexus instance - Nexus(nexus::NexusArgs), - /// Debug a specific Sled - SledAgent(sled_agent::SledAgentArgs), -} - -fn parse_dropshot_log_level( - s: &str, -) -> Result { - serde_json::from_str(&format!("{:?}", s)).context("parsing log level") -} diff --git a/omdb/tests/config.test.toml b/omdb/tests/config.test.toml deleted file mode 120000 index 9a42a12b61..0000000000 --- a/omdb/tests/config.test.toml +++ /dev/null @@ -1 +0,0 @@ -../../nexus/tests/config.test.toml \ No newline at end of file diff --git a/omdb/tests/env.out b/omdb/tests/env.out deleted file mode 100644 index 237743be93..0000000000 --- a/omdb/tests/env.out +++ /dev/null @@ -1,74 +0,0 @@ -EXECUTING COMMAND: omdb ["db", "--db-url", "postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable", "sleds"] -termination: Exited(0) ---------------------------------------------- -stdout: -SERIAL IP ROLE ID -sim-b6d65341 [::1]:REDACTED_PORT - REDACTED_UUID_REDACTED_UUID_REDACTED ---------------------------------------------- -stderr: -note: database schema version matches expected (4.0.0) -============================================= -EXECUTING COMMAND: omdb ["db", "--db-url", "junk", "sleds"] -termination: Exited(2) ---------------------------------------------- -stdout: ---------------------------------------------- -stderr: -error: invalid value 'junk' for '--db-url ': invalid connection string: unexpected EOF - -For more information, try '--help'. -============================================= -EXECUTING COMMAND: omdb ["nexus", "--nexus-internal-url", "http://127.0.0.1:REDACTED_PORT", "background-tasks", "doc"] -termination: Exited(0) ---------------------------------------------- -stdout: -task: "dns_config_external" - watches external DNS data stored in CockroachDB - - -task: "dns_config_internal" - watches internal DNS data stored in CockroachDB - - -task: "dns_propagation_external" - propagates latest external DNS configuration (from "dns_config_external" - background task) to the latest list of DNS servers (from - "dns_servers_external" background task) - - -task: "dns_propagation_internal" - propagates latest internal DNS configuration (from "dns_config_internal" - background task) to the latest list of DNS servers (from - "dns_servers_internal" background task) - - -task: "dns_servers_external" - watches list of external DNS servers stored in CockroachDB - - -task: "dns_servers_internal" - watches list of internal DNS servers stored in CockroachDB - - -task: "external_endpoints" - reads config for silos and TLS certificates to determine the right set of - HTTP endpoints, their HTTP server names, and which TLS certificates to use - on each one - - ---------------------------------------------- -stderr: -============================================= -EXECUTING COMMAND: omdb ["nexus", "--nexus-internal-url", "junk", "background-tasks", "doc"] -termination: Exited(1) ---------------------------------------------- -stdout: ---------------------------------------------- -stderr: -Error: listing background tasks - -Caused by: - 0: Communication Error: builder error: relative URL without a base - 1: builder error: relative URL without a base - 2: relative URL without a base -=============================================