diff --git a/.github/buildomat/jobs/build-and-test-helios.sh b/.github/buildomat/jobs/build-and-test-helios.sh index d3d071cb3e..a0137eddde 100755 --- a/.github/buildomat/jobs/build-and-test-helios.sh +++ b/.github/buildomat/jobs/build-and-test-helios.sh @@ -6,7 +6,7 @@ #: rust_toolchain = true #: output_rules = [ #: "%/work/*", -#: "%/var/tmp/omicron_tmp/*", +#: "%/var/tmp/omicron_tmp/**/*", #: "!/var/tmp/omicron_tmp/crdb-base*", #: "!/var/tmp/omicron_tmp/rustc*", #: ] diff --git a/.github/buildomat/jobs/build-and-test-linux.sh b/.github/buildomat/jobs/build-and-test-linux.sh index c4e99f0f2a..bc4de5cc1a 100755 --- a/.github/buildomat/jobs/build-and-test-linux.sh +++ b/.github/buildomat/jobs/build-and-test-linux.sh @@ -6,7 +6,7 @@ #: rust_toolchain = true #: output_rules = [ #: "%/work/*", -#: "%/var/tmp/omicron_tmp/*", +#: "%/var/tmp/omicron_tmp/**/*", #: "!/var/tmp/omicron_tmp/crdb-base*", #: "!/var/tmp/omicron_tmp/rustc*", #: ] diff --git a/Cargo.lock b/Cargo.lock index 007a1b6782..5cf2e3ad1f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1398,7 +1398,7 @@ dependencies = [ [[package]] name = "crucible-agent-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=64e28cea69b427b05064defaf8800a4d678b4612#64e28cea69b427b05064defaf8800a4d678b4612" +source = "git+https://github.com/oxidecomputer/crucible?rev=e10f8793f8414fdb9a165219f17e45fa014d088b#e10f8793f8414fdb9a165219f17e45fa014d088b" dependencies = [ "anyhow", "chrono", @@ -1414,7 +1414,7 @@ dependencies = [ [[package]] name = "crucible-pantry-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=64e28cea69b427b05064defaf8800a4d678b4612#64e28cea69b427b05064defaf8800a4d678b4612" +source = "git+https://github.com/oxidecomputer/crucible?rev=e10f8793f8414fdb9a165219f17e45fa014d088b#e10f8793f8414fdb9a165219f17e45fa014d088b" dependencies = [ "anyhow", "chrono", @@ -1431,7 +1431,7 @@ dependencies = [ [[package]] name = "crucible-smf" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/crucible?rev=64e28cea69b427b05064defaf8800a4d678b4612#64e28cea69b427b05064defaf8800a4d678b4612" +source = "git+https://github.com/oxidecomputer/crucible?rev=e10f8793f8414fdb9a165219f17e45fa014d088b#e10f8793f8414fdb9a165219f17e45fa014d088b" dependencies = [ "crucible-workspace-hack", "libc", @@ -3940,11 +3940,11 @@ checksum = "d3c48237b9604c5a4702de6b824e02006c3214327564636aef27c1028a8fa0ed" [[package]] name = "lazy_static" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" dependencies = [ - "spin 0.5.2", + "spin 0.9.8", ] [[package]] @@ -4023,7 +4023,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" dependencies = [ "cfg-if", - "windows-targets 0.48.5", + "windows-targets 0.52.5", ] [[package]] @@ -4806,6 +4806,7 @@ dependencies = [ "anyhow", "async-bb8-diesel", "chrono", + "cockroach-admin-client", "diesel", "dns-service-client", "futures", @@ -5300,6 +5301,7 @@ dependencies = [ "openapi-lint", "openapiv3", "pq-sys", + "proptest", "schemars", "serde", "serde_json", @@ -5308,6 +5310,7 @@ dependencies = [ "slog-dtrace", "slog-error-chain", "subprocess", + "test-strategy", "thiserror", "tokio", "tokio-postgres", @@ -8550,7 +8553,7 @@ dependencies = [ [[package]] name = "serde_human_bytes" version = "0.1.0" -source = "git+http://github.com/oxidecomputer/serde_human_bytes?branch=main#0a09794501b6208120528c3b457d5f3a8cb17424" +source = "git+https://github.com/oxidecomputer/serde_human_bytes?branch=main#0a09794501b6208120528c3b457d5f3a8cb17424" dependencies = [ "hex", "serde", @@ -9206,7 +9209,7 @@ dependencies = [ [[package]] name = "sprockets-common" version = "0.1.0" -source = "git+http://github.com/oxidecomputer/sprockets?rev=77df31efa5619d0767ffc837ef7468101608aee9#77df31efa5619d0767ffc837ef7468101608aee9" +source = "git+https://github.com/oxidecomputer/sprockets?rev=77df31efa5619d0767ffc837ef7468101608aee9#77df31efa5619d0767ffc837ef7468101608aee9" dependencies = [ "derive_more", "hubpack 0.1.0", @@ -9218,7 +9221,7 @@ dependencies = [ [[package]] name = "sprockets-rot" version = "0.1.0" -source = "git+http://github.com/oxidecomputer/sprockets?rev=77df31efa5619d0767ffc837ef7468101608aee9#77df31efa5619d0767ffc837ef7468101608aee9" +source = "git+https://github.com/oxidecomputer/sprockets?rev=77df31efa5619d0767ffc837ef7468101608aee9#77df31efa5619d0767ffc837ef7468101608aee9" dependencies = [ "corncobs", "derive_more", @@ -9286,9 +9289,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "steno" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a1e7ccea133c197729abfd16dccf91a3c4d0da1e94bb0c0aa164c2b8a227481" +checksum = "a77bea0b19d52c04783569b87bb3e8d47c85e285be239a9b0428241bdbb95460" dependencies = [ "anyhow", "async-trait", @@ -9865,7 +9868,7 @@ dependencies = [ [[package]] name = "tofino" version = "0.1.0" -source = "git+http://github.com/oxidecomputer/tofino?branch=main#1b66b89c3727d2191082df057b068ec52560e334" +source = "git+https://github.com/oxidecomputer/tofino?branch=main#1b66b89c3727d2191082df057b068ec52560e334" dependencies = [ "anyhow", "cc", diff --git a/Cargo.toml b/Cargo.toml index b65a1f7c32..8d4b783dcb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -260,9 +260,9 @@ cookie = "0.18" criterion = { version = "0.5.1", features = [ "async_tokio" ] } crossbeam = "0.8" crossterm = { version = "0.27.0", features = ["event-stream"] } -crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "64e28cea69b427b05064defaf8800a4d678b4612" } -crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "64e28cea69b427b05064defaf8800a4d678b4612" } -crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "64e28cea69b427b05064defaf8800a4d678b4612" } +crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "e10f8793f8414fdb9a165219f17e45fa014d088b" } +crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "e10f8793f8414fdb9a165219f17e45fa014d088b" } +crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "e10f8793f8414fdb9a165219f17e45fa014d088b" } csv = "1.3.0" curve25519-dalek = "4" datatest-stable = "0.2.9" @@ -437,7 +437,7 @@ schemars = "0.8.16" secrecy = "0.8.0" semver = { version = "1.0.23", features = ["std", "serde"] } serde = { version = "1.0", default-features = false, features = [ "derive", "rc" ] } -serde_human_bytes = { git = "http://github.com/oxidecomputer/serde_human_bytes", branch = "main" } +serde_human_bytes = { git = "https://github.com/oxidecomputer/serde_human_bytes", branch = "main" } serde_json = "1.0.117" serde_path_to_error = "0.1.16" serde_tokenstream = "0.2" @@ -470,16 +470,16 @@ slog-term = "2.9.1" smf = "0.2" socket2 = { version = "0.5", features = ["all"] } sp-sim = { path = "sp-sim" } -sprockets-common = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } -sprockets-host = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } -sprockets-rot = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } +sprockets-common = { git = "https://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } +sprockets-host = { git = "https://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } +sprockets-rot = { git = "https://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } sqlformat = "0.2.3" sqlparser = { version = "0.45.0", features = [ "visitor" ] } static_assertions = "1.1.0" # Please do not change the Steno version to a Git dependency. It makes it # harder than expected to make breaking changes (even if you specify a specific # SHA). Cut a new Steno release instead. See omicron#2117. -steno = "0.4.0" +steno = "0.4.1" strum = { version = "0.26", features = [ "derive" ] } subprocess = "0.2.9" supports-color = "3.0.0" @@ -494,7 +494,7 @@ termios = "0.3" textwrap = "0.16.1" test-strategy = "0.3.1" thiserror = "1.0" -tofino = { git = "http://github.com/oxidecomputer/tofino", branch = "main" } +tofino = { git = "https://github.com/oxidecomputer/tofino", branch = "main" } tokio = "1.37.0" tokio-postgres = { version = "0.7", features = [ "with-chrono-0_4", "with-uuid-1" ] } tokio-stream = "0.1.15" diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs index 9043592414..fc64c48a63 100644 --- a/clients/nexus-client/src/lib.rs +++ b/clients/nexus-client/src/lib.rs @@ -43,6 +43,7 @@ progenitor::generate_api!( TypedUuidForUpstairsKind = omicron_uuid_kinds::TypedUuid, TypedUuidForUpstairsRepairKind = omicron_uuid_kinds::TypedUuid, TypedUuidForUpstairsSessionKind = omicron_uuid_kinds::TypedUuid, + TypedUuidForZpoolKind = omicron_uuid_kinds::TypedUuid, }, patch = { SledAgentInfo = { derives = [PartialEq, Eq] }, diff --git a/cockroach-admin/Cargo.toml b/cockroach-admin/Cargo.toml index 49401afb9d..07f9807463 100644 --- a/cockroach-admin/Cargo.toml +++ b/cockroach-admin/Cargo.toml @@ -40,8 +40,10 @@ nexus-test-utils.workspace = true omicron-test-utils.workspace = true openapi-lint.workspace = true openapiv3.workspace = true +proptest.workspace = true serde_json.workspace = true subprocess.workspace = true +test-strategy.workspace = true url.workspace = true [lints] diff --git a/cockroach-admin/proptest-regressions/cockroach_cli.txt b/cockroach-admin/proptest-regressions/cockroach_cli.txt new file mode 100644 index 0000000000..7583f353c0 --- /dev/null +++ b/cockroach-admin/proptest-regressions/cockroach_cli.txt @@ -0,0 +1,7 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 924830db4b84d94683d81ff27010fec0df19a72155729e60fda503b58460390e # shrinks to input = _NodeDecommissionParseDoesntPanicOnArbitraryInputArgs { input: [10, 10] } diff --git a/cockroach-admin/src/cockroach_cli.rs b/cockroach-admin/src/cockroach_cli.rs index 00478b81a1..1951866ce7 100644 --- a/cockroach-admin/src/cockroach_cli.rs +++ b/cockroach-admin/src/cockroach_cli.rs @@ -82,10 +82,41 @@ impl CockroachCli { pub async fn node_status( &self, ) -> Result, CockroachCliError> { + self.invoke_cli_with_format_csv( + ["node", "status"].into_iter(), + NodeStatus::parse_from_csv, + "node status", + ) + .await + } + + pub async fn node_decommission( + &self, + node_id: &str, + ) -> Result { + self.invoke_cli_with_format_csv( + ["node", "decommission", node_id, "--wait", "none"].into_iter(), + NodeDecommission::parse_from_csv, + "node decommission", + ) + .await + } + + async fn invoke_cli_with_format_csv<'a, F, I, T>( + &self, + subcommand_args: I, + parse_output: F, + subcommand_description: &'static str, + ) -> Result + where + F: FnOnce(&[u8]) -> Result, + I: Iterator, + { let mut command = Command::new(&self.path_to_cockroach_binary); + for arg in subcommand_args { + command.arg(arg); + } command - .arg("node") - .arg("status") .arg("--host") .arg(&format!("{}", self.cockroach_address)) .arg("--insecure") @@ -97,14 +128,14 @@ impl CockroachCli { if !output.status.success() { return Err(output_to_exec_error(command.as_std(), &output).into()); } - NodeStatus::parse_from_csv(io::Cursor::new(&output.stdout)).map_err( - |err| CockroachCliError::ParseOutput { - subcommand: "node status", + parse_output(&output.stdout).map_err(|err| { + CockroachCliError::ParseOutput { + subcommand: subcommand_description, stdout: String::from_utf8_lossy(&output.stdout).to_string(), stderr: String::from_utf8_lossy(&output.stderr).to_string(), err, - }, - ) + } + }) } } @@ -123,10 +154,8 @@ pub struct NodeStatus { } // Slightly different `NodeStatus` that matches what we get from `cockroach`: -// -// * `id` column instead of `node_id` -// * timestamps are a fixed format with no timezone, so we have a custom -// deserializer +// timestamps are a fixed format with no timezone (but are actually UTC), so we +// have a custom deserializer, and the ID column is `id` instead of `node_id`. #[derive(Debug, Clone, PartialEq, Eq, Deserialize)] struct CliNodeStatus { id: String, @@ -189,12 +218,9 @@ where } impl NodeStatus { - pub fn parse_from_csv(reader: R) -> Result, csv::Error> - where - R: io::Read, - { + pub fn parse_from_csv(data: &[u8]) -> Result, csv::Error> { let mut statuses = Vec::new(); - let mut reader = csv::Reader::from_reader(reader); + let mut reader = csv::Reader::from_reader(io::Cursor::new(data)); for result in reader.deserialize() { let record: CliNodeStatus = result?; statuses.push(record.into()); @@ -203,17 +229,146 @@ impl NodeStatus { } } +// The cockroach CLI and `crdb_internal.gossip_liveness` table use a string for +// node membership, but there are only three meaningful values per +// https://github.com/cockroachdb/cockroach/blob/0c92c710d2baadfdc5475be8d2238cf26cb152ca/pkg/kv/kvserver/liveness/livenesspb/liveness.go#L96, +// so we'll convert into a Rust enum and leave the "unknown" case for future +// changes that expand or reword these values. +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +#[serde(tag = "state", rename_all = "lowercase")] +pub enum NodeMembership { + Active, + Decommissioning, + Decommissioned, + Unknown { value: String }, +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub struct NodeDecommission { + pub node_id: String, + pub is_live: bool, + pub replicas: i64, + pub is_decommissioning: bool, + pub membership: NodeMembership, + pub is_draining: bool, + pub notes: Vec, +} + +// Slightly different `NodeDecommission` that matches what we get from +// `cockroach`: this omites `notes`, which isn't really a CSV field at all, but +// is instead where we collect the non-CSV string output from the CLI, uses +// a custom deserializer for `membership` to handle unknown variants, and the ID +// column is `id` instead of `node_id`. +#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] +struct CliNodeDecommission { + pub id: String, + pub is_live: bool, + pub replicas: i64, + pub is_decommissioning: bool, + #[serde(deserialize_with = "parse_node_membership")] + pub membership: NodeMembership, + pub is_draining: bool, +} + +impl From<(CliNodeDecommission, Vec)> for NodeDecommission { + fn from((cli, notes): (CliNodeDecommission, Vec)) -> Self { + Self { + node_id: cli.id, + is_live: cli.is_live, + replicas: cli.replicas, + is_decommissioning: cli.is_decommissioning, + membership: cli.membership, + is_draining: cli.is_draining, + notes, + } + } +} + +fn parse_node_membership<'de, D>(d: D) -> Result +where + D: serde::Deserializer<'de>, +{ + struct CockroachNodeMembershipVisitor; + + impl<'de> de::Visitor<'de> for CockroachNodeMembershipVisitor { + type Value = NodeMembership; + + fn expecting( + &self, + formatter: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + formatter.write_str("a Cockroach node membership string") + } + + fn visit_str(self, v: &str) -> Result + where + E: de::Error, + { + let membership = match v { + "active" => NodeMembership::Active, + "decommissioning" => NodeMembership::Decommissioning, + "decommissioned" => NodeMembership::Decommissioned, + _ => NodeMembership::Unknown { value: v.to_string() }, + }; + Ok(membership) + } + } + + d.deserialize_str(CockroachNodeMembershipVisitor) +} + +impl NodeDecommission { + pub fn parse_from_csv(data: &[u8]) -> Result { + // Reading the node decommission output is awkward because it isn't + // fully CSV. We expect a CSV header, then a row for each node being + // decommissioned, then (maybe) a blank line followed by a note that is + // just a string, not related to the initial CSV data. Even though the + // CLI supports decommissioning more than one node in one invocation, we + // only provide an API to decommission a single node, so we expect: + // + // 1. The CSV header line + // 2. The one row of CSV data + // 3. Trailing notes + // + // We'll collect the notes as a separate field and return them to our + // caller. + + // First we'll run the data through a csv::Reader; this will pull out + // the header row and the one row of data. + let mut reader = csv::Reader::from_reader(io::Cursor::new(data)); + let record: CliNodeDecommission = + reader.deserialize().next().ok_or_else(|| { + io::Error::other("fewer than two lines of output") + })??; + + // Get the position where the reader ended after that one row; we'll + // collect any remaining nonempty lines as `notes`. + let extra_data = &data[reader.position().byte() as usize..]; + let mut notes = Vec::new(); + for line in String::from_utf8_lossy(extra_data).lines() { + let line = line.trim(); + if !line.is_empty() { + notes.push(line.to_string()); + } + } + + Ok(Self::from((record, notes))) + } +} + #[cfg(test)] mod tests { use super::*; use chrono::NaiveDate; use nexus_test_utils::db::test_setup_database; use omicron_test_utils::dev; + use test_strategy::proptest; use url::Url; #[test] fn test_node_status_parse_single_line_from_csv() { - let input = r#"id,address,sql_address,build,started_at,updated_at,locality,is_available,is_live + let input = br#"id,address,sql_address,build,started_at,updated_at,locality,is_available,is_live 1,[::1]:42021,[::1]:42021,v22.1.9,2024-05-21 15:19:50.523796,2024-05-21 16:31:28.050069,,true,true"#; let expected = NodeStatus { node_id: "1".to_string(), @@ -239,14 +394,13 @@ mod tests { is_live: true, }; - let statuses = NodeStatus::parse_from_csv(io::Cursor::new(input)) - .expect("parsed input"); + let statuses = NodeStatus::parse_from_csv(input).expect("parsed input"); assert_eq!(statuses, vec![expected]); } #[test] fn test_node_status_parse_multiple_lines_from_csv() { - let input = r#"id,address,sql_address,build,started_at,updated_at,locality,is_available,is_live + let input = br#"id,address,sql_address,build,started_at,updated_at,locality,is_available,is_live 1,[fd00:1122:3344:109::3]:32221,[fd00:1122:3344:109::3]:32221,v22.1.9-dirty,2024-05-18 19:18:00.597145,2024-05-21 15:22:34.290434,,true,true 2,[fd00:1122:3344:105::3]:32221,[fd00:1122:3344:105::3]:32221,v22.1.9-dirty,2024-05-18 19:17:01.796714,2024-05-21 15:22:34.901268,,true,true 3,[fd00:1122:3344:10b::3]:32221,[fd00:1122:3344:10b::3]:32221,v22.1.9-dirty,2024-05-18 19:18:52.37564,2024-05-21 15:22:36.341146,,true,true @@ -370,14 +524,78 @@ mod tests { }, ]; - let statuses = NodeStatus::parse_from_csv(io::Cursor::new(input)) - .expect("parsed input"); + let statuses = NodeStatus::parse_from_csv(input).expect("parsed input"); assert_eq!(statuses.len(), expected.len()); for (status, expected) in statuses.iter().zip(&expected) { assert_eq!(status, expected); } } + #[test] + fn test_node_decommission_parse_with_no_trailing_notes() { + let input = + br#"id,is_live,replicas,is_decommissioning,membership,is_draining +6,true,24,true,decommissioning,false"#; + let expected = NodeDecommission { + node_id: "6".to_string(), + is_live: true, + replicas: 24, + is_decommissioning: true, + membership: NodeMembership::Decommissioning, + is_draining: false, + notes: vec![], + }; + + let statuses = + NodeDecommission::parse_from_csv(input).expect("parsed input"); + assert_eq!(statuses, expected); + } + + #[test] + fn test_node_decommission_parse_with_trailing_notes() { + let input = + br#"id,is_live,replicas,is_decommissioning,membership,is_draining +6,false,0,true,decommissioned,false + +No more data reported on target nodes. Please verify cluster health before removing the nodes. +"#; + let expected = NodeDecommission { + node_id: "6".to_string(), + is_live: false, + replicas: 0, + is_decommissioning: true, + membership: NodeMembership::Decommissioned, + is_draining: false, + notes: vec!["No more data reported on target nodes. \ + Please verify cluster health before removing the nodes." + .to_string()], + }; + + let statuses = + NodeDecommission::parse_from_csv(input).expect("parsed input"); + assert_eq!(statuses, expected); + } + + #[test] + fn test_node_decommission_parse_with_unexpected_membership_value() { + let input = + br#"id,is_live,replicas,is_decommissioning,membership,is_draining +6,false,0,true,foobar,false"#; + let expected = NodeDecommission { + node_id: "6".to_string(), + is_live: false, + replicas: 0, + is_decommissioning: true, + membership: NodeMembership::Unknown { value: "foobar".to_string() }, + is_draining: false, + notes: vec![], + }; + + let statuses = + NodeDecommission::parse_from_csv(input).expect("parsed input"); + assert_eq!(statuses, expected); + } + // Ensure that if `cockroach node status` changes in a future CRDB version // bump, we have a test that will fail to force us to check whether our // current parsing is still valid. @@ -435,4 +653,82 @@ mod tests { db.cleanup().await.unwrap(); logctx.cleanup_successful(); } + + // Ensure that if `cockroach node decommission` changes in a future CRDB + // version bump, we have a test that will fail to force us to check whether + // our current parsing is still valid. + #[tokio::test] + async fn test_node_decommission_compatibility() { + let logctx = + dev::test_setup_log("test_node_decommission_compatibility"); + let mut db = test_setup_database(&logctx.log).await; + let db_url = db.listen_url().to_string(); + + let expected_headers = + "id,is_live,replicas,is_decommissioning,membership,is_draining"; + + // Manually run cockroach node decommission to grab just the CSV header + // line (which the `csv` crate normally eats on our behalf) and check + // it's exactly what we expect. + let mut command = Command::new("cockroach"); + command + .arg("node") + .arg("decommission") + .arg("1") + .arg("--wait") + .arg("none") + .arg("--url") + .arg(&db_url) + .arg("--format") + .arg("csv"); + let output = + command.output().await.expect("ran `cockroach node decommission`"); + + let stdout = String::from_utf8_lossy(&output.stdout); + let mut lines = stdout.lines(); + let headers = lines.next().expect("header line"); + assert_eq!( + headers, expected_headers, + "`cockroach node decommission --format csv` headers \ + may have changed?" + ); + + // We should also be able to run our wrapper against this cockroach. + let url: Url = db_url.parse().expect("valid url"); + let cockroach_address: SocketAddrV6 = format!( + "{}:{}", + url.host().expect("url has host"), + url.port().expect("url has port") + ) + .parse() + .expect("valid SocketAddrV6"); + let cli = CockroachCli::new("cockroach".into(), cockroach_address); + let result = cli + .node_decommission("1") + .await + .expect("got node decommission result"); + + // We can't check all the fields exactly (e.g., replicas), but most we + // know based on the fact that our test database is a single node, so + // won't actually decommission itself. + assert_eq!(result.node_id, "1"); + assert_eq!(result.is_live, true); + assert_eq!(result.is_decommissioning, true); + assert_eq!(result.membership, NodeMembership::Decommissioning); + assert_eq!(result.is_draining, false); + assert_eq!(result.notes, &[] as &[&str]); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[proptest] + fn node_status_parse_doesnt_panic_on_arbitrary_input(input: Vec) { + _ = NodeStatus::parse_from_csv(&input); + } + + #[proptest] + fn node_decommission_parse_doesnt_panic_on_arbitrary_input(input: Vec) { + _ = NodeDecommission::parse_from_csv(&input); + } } diff --git a/cockroach-admin/src/http_entrypoints.rs b/cockroach-admin/src/http_entrypoints.rs index bf12eb933b..1c02d23ae2 100644 --- a/cockroach-admin/src/http_entrypoints.rs +++ b/cockroach-admin/src/http_entrypoints.rs @@ -2,12 +2,14 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use crate::cockroach_cli::NodeDecommission; use crate::cockroach_cli::NodeStatus; use crate::context::ServerContext; use dropshot::endpoint; use dropshot::HttpError; use dropshot::HttpResponseOk; use dropshot::RequestContext; +use dropshot::TypedBody; use omicron_uuid_kinds::OmicronZoneUuid; use schemars::JsonSchema; use serde::Deserialize; @@ -18,8 +20,9 @@ type CrdbApiDescription = dropshot::ApiDescription>; pub fn api() -> CrdbApiDescription { fn register_endpoints(api: &mut CrdbApiDescription) -> Result<(), String> { - api.register(node_id)?; + api.register(local_node_id)?; api.register(node_status)?; + api.register(node_decommission)?; Ok(()) } @@ -53,7 +56,7 @@ async fn node_status( /// CockroachDB Node ID #[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] #[serde(rename_all = "snake_case")] -pub struct NodeId { +pub struct LocalNodeId { /// The ID of this Omicron zone. /// /// This is included to ensure correctness even if a socket address on a @@ -75,11 +78,33 @@ pub struct NodeId { method = GET, path = "/node/id", }] -async fn node_id( +async fn local_node_id( rqctx: RequestContext>, -) -> Result, HttpError> { +) -> Result, HttpError> { let ctx = rqctx.context(); let node_id = ctx.node_id().await?.to_string(); let zone_id = ctx.zone_id(); - Ok(HttpResponseOk(NodeId { zone_id, node_id })) + Ok(HttpResponseOk(LocalNodeId { zone_id, node_id })) +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub struct NodeId { + pub node_id: String, +} + +/// Decommission a node from the CRDB cluster +#[endpoint { + method = POST, + path = "/node/decommission", +}] +async fn node_decommission( + rqctx: RequestContext>, + body: TypedBody, +) -> Result, HttpError> { + let ctx = rqctx.context(); + let NodeId { node_id } = body.into_inner(); + let decommission_status = + ctx.cockroach_cli().node_decommission(&node_id).await?; + Ok(HttpResponseOk(decommission_status)) } diff --git a/dev-tools/reconfigurator-cli/src/main.rs b/dev-tools/reconfigurator-cli/src/main.rs index bc212281b2..3234a3fdc2 100644 --- a/dev-tools/reconfigurator-cli/src/main.rs +++ b/dev-tools/reconfigurator-cli/src/main.rs @@ -433,6 +433,8 @@ enum BlueprintEditCommands { /// sled on which to deploy the new instance sled_id: SledUuid, }, + /// add a CockroachDB instance to a particular sled + AddCockroach { sled_id: SledUuid }, } #[derive(Debug, Args)] @@ -756,6 +758,18 @@ fn cmd_blueprint_edit( ); format!("added Nexus zone to sled {}", sled_id) } + BlueprintEditCommands::AddCockroach { sled_id } => { + let current = + builder.sled_num_zones_of_kind(sled_id, ZoneKind::CockroachDb); + let added = builder + .sled_ensure_zone_multiple_cockroachdb(sled_id, current + 1) + .context("failed to add CockroachDB zone")?; + assert_matches::assert_matches!( + added, + EnsureMultiple::Changed { added: 1, removed: 0 } + ); + format!("added CockroachDB zone to sled {}", sled_id) + } }; let new_blueprint = builder.build(); diff --git a/dev-tools/reconfigurator-cli/tests/test_basic.rs b/dev-tools/reconfigurator-cli/tests/test_basic.rs index 1ae78487a3..0a997c0118 100644 --- a/dev-tools/reconfigurator-cli/tests/test_basic.rs +++ b/dev-tools/reconfigurator-cli/tests/test_basic.rs @@ -8,6 +8,7 @@ use expectorate::assert_contents; use nexus_db_queries::authn; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; +use nexus_test_utils::resource_helpers::DiskTestBuilder; use nexus_test_utils::SLED_AGENT_UUID; use nexus_test_utils_macros::nexus_test; use nexus_types::deployment::Blueprint; @@ -56,6 +57,15 @@ type ControlPlaneTestContext = #[nexus_test] async fn test_blueprint_edit(cptestctx: &ControlPlaneTestContext) { // Setup + // + // Add a zpool to all sleds, just to ensure that all new zones can find + // a transient filesystem wherever they end up being placed. + DiskTestBuilder::new(&cptestctx) + .on_all_sleds() + .with_zpool_count(1) + .build() + .await; + let nexus = &cptestctx.server.server_context().nexus; let datastore = nexus.datastore(); let log = &cptestctx.logctx.log; diff --git a/nexus/db-model/src/deployment.rs b/nexus/db-model/src/deployment.rs index e6a66543c7..c398f6d11c 100644 --- a/nexus/db-model/src/deployment.rs +++ b/nexus/db-model/src/deployment.rs @@ -31,7 +31,7 @@ use omicron_common::disk::DiskIdentity; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; -use omicron_uuid_kinds::{ExternalIpKind, SledKind}; +use omicron_uuid_kinds::{ExternalIpKind, SledKind, ZpoolKind}; use uuid::Uuid; /// See [`nexus_types::deployment::Blueprint`]. @@ -248,6 +248,7 @@ pub struct BpOmicronZone { disposition: DbBpZoneDisposition, pub external_ip_id: Option>, + pub filesystem_pool: Option>, } impl BpOmicronZone { @@ -264,6 +265,7 @@ impl BpOmicronZone { sled_id, blueprint_zone.id.into_untyped_uuid(), blueprint_zone.underlay_address, + blueprint_zone.filesystem_pool.as_ref().map(|pool| pool.id()), &blueprint_zone.zone_type.clone().into(), external_ip_id, )?; @@ -291,6 +293,10 @@ impl BpOmicronZone { snat_last_port: zone.snat_last_port, disposition: to_db_bp_zone_disposition(blueprint_zone.disposition), external_ip_id: zone.external_ip_id.map(From::from), + filesystem_pool: blueprint_zone + .filesystem_pool + .as_ref() + .map(|pool| pool.id().into()), }) } @@ -302,6 +308,7 @@ impl BpOmicronZone { sled_id: self.sled_id.into(), id: self.id, underlay_address: self.underlay_address, + filesystem_pool: self.filesystem_pool.map(|id| id.into()), zone_type: self.zone_type, primary_service_ip: self.primary_service_ip, primary_service_port: self.primary_service_port, diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index 4abd7fe927..14c4684e1e 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -37,6 +37,7 @@ use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::SledKind; use omicron_uuid_kinds::SledUuid; +use omicron_uuid_kinds::ZpoolKind; use omicron_uuid_kinds::ZpoolUuid; use uuid::Uuid; @@ -1025,6 +1026,7 @@ pub struct InvOmicronZone { pub snat_ip: Option, pub snat_first_port: Option, pub snat_last_port: Option, + pub filesystem_pool: Option>, } impl InvOmicronZone { @@ -1039,6 +1041,7 @@ impl InvOmicronZone { sled_id, zone.id, zone.underlay_address, + zone.filesystem_pool.as_ref().map(|pool| pool.id()), &zone.zone_type, external_ip_id, )?; @@ -1064,6 +1067,7 @@ impl InvOmicronZone { snat_ip: zone.snat_ip, snat_first_port: zone.snat_first_port, snat_last_port: zone.snat_last_port, + filesystem_pool: zone.filesystem_pool.map(|id| id.into()), }) } @@ -1075,6 +1079,7 @@ impl InvOmicronZone { sled_id: self.sled_id.into(), id: self.id, underlay_address: self.underlay_address, + filesystem_pool: self.filesystem_pool.map(|id| id.into()), zone_type: self.zone_type, primary_service_ip: self.primary_service_ip, primary_service_port: self.primary_service_port, diff --git a/nexus/db-model/src/omicron_zone_config.rs b/nexus/db-model/src/omicron_zone_config.rs index 3b18a749a7..bb3eac7046 100644 --- a/nexus/db-model/src/omicron_zone_config.rs +++ b/nexus/db-model/src/omicron_zone_config.rs @@ -23,8 +23,9 @@ use nexus_types::deployment::{ }; use nexus_types::inventory::{NetworkInterface, OmicronZoneType}; use omicron_common::api::internal::shared::NetworkInterfaceKind; +use omicron_common::zpool_name::ZpoolName; use omicron_uuid_kinds::{ - ExternalIpUuid, GenericUuid, OmicronZoneUuid, SledUuid, + ExternalIpUuid, GenericUuid, OmicronZoneUuid, SledUuid, ZpoolUuid, }; use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; use uuid::Uuid; @@ -34,6 +35,7 @@ pub(crate) struct OmicronZone { pub(crate) sled_id: SledUuid, pub(crate) id: Uuid, pub(crate) underlay_address: ipv6::Ipv6Addr, + pub(crate) filesystem_pool: Option, pub(crate) zone_type: ZoneType, pub(crate) primary_service_ip: ipv6::Ipv6Addr, pub(crate) primary_service_port: SqlU16, @@ -60,6 +62,7 @@ impl OmicronZone { sled_id: SledUuid, zone_id: Uuid, zone_underlay_address: Ipv6Addr, + filesystem_pool: Option, zone_type: &nexus_types::inventory::OmicronZoneType, external_ip_id: Option, ) -> anyhow::Result { @@ -201,6 +204,7 @@ impl OmicronZone { sled_id, id, underlay_address, + filesystem_pool, zone_type, primary_service_ip, primary_service_port, @@ -365,6 +369,9 @@ impl OmicronZone { disposition, id: OmicronZoneUuid::from_untyped_uuid(common.id), underlay_address: std::net::Ipv6Addr::from(common.underlay_address), + filesystem_pool: common + .filesystem_pool + .map(|id| ZpoolName::new_external(id)), zone_type, }) } @@ -468,6 +475,9 @@ impl OmicronZone { Ok(nexus_types::inventory::OmicronZoneConfig { id: common.id, underlay_address: std::net::Ipv6Addr::from(common.underlay_address), + filesystem_pool: common + .filesystem_pool + .map(|id| ZpoolName::new_external(id)), zone_type, }) } @@ -558,6 +568,7 @@ impl OmicronZone { Ok(ZoneConfigCommon { id: self.id, underlay_address: self.underlay_address, + filesystem_pool: self.filesystem_pool, zone_type: self.zone_type, primary_service_address, snat_ip: self.snat_ip, @@ -582,6 +593,7 @@ impl OmicronZone { struct ZoneConfigCommon { id: Uuid, underlay_address: ipv6::Ipv6Addr, + filesystem_pool: Option, zone_type: ZoneType, primary_service_address: SocketAddrV6, snat_ip: Option, diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 7fa07aa131..990aab151c 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1476,6 +1476,7 @@ table! { snat_ip -> Nullable, snat_first_port -> Nullable, snat_last_port -> Nullable, + filesystem_pool -> Nullable, } } @@ -1592,6 +1593,7 @@ table! { snat_last_port -> Nullable, disposition -> crate::DbBpZoneDispositionEnum, external_ip_id -> Nullable, + filesystem_pool -> Nullable, } } diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 6f537bb522..8255d684cb 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,8 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(80, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(81, 0, 0); + /// List of all past database schema versions, in *reverse* order /// /// If you want to change the Omicron database schema, you must update this. @@ -28,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(81, "add-nullable-filesystem-pool"), KnownVersion::new(80, "add-instance-id-to-migrations"), KnownVersion::new(79, "nic-spoof-allow"), KnownVersion::new(78, "vpc-subnet-routing"), diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index 627f1f60ab..dac1c2847d 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -1035,6 +1035,7 @@ mod test { IdentityMetadataCreateParams, MacAddr, Vni, }; use omicron_common::api::internal::shared::SourceNatConfig; + use omicron_common::zpool_name::ZpoolName; use omicron_test_utils::dev; use omicron_uuid_kinds::{ExternalIpUuid, OmicronZoneUuid}; use omicron_uuid_kinds::{GenericUuid, ZpoolUuid}; @@ -1287,6 +1288,10 @@ mod test { fn_to_get_all!(ip_pool_range, IpPoolRange); fn_to_get_all!(dataset, Dataset); + fn random_zpool() -> ZpoolName { + ZpoolName::new_external(ZpoolUuid::new_v4()) + } + fn random_dataset() -> OmicronZoneDataset { OmicronZoneDataset { pool_name: illumos_utils::zpool::ZpoolName::new_external( @@ -1361,6 +1366,7 @@ mod test { let mut macs = MacAddr::iter_system(); let mut blueprint_zones = BTreeMap::new(); + let dataset = random_dataset(); blueprint_zones.insert( SledUuid::from_untyped_uuid(sled1.id()), BlueprintZonesConfig { @@ -1370,9 +1376,10 @@ mod test { disposition: BlueprintZoneDisposition::InService, id: external_dns_id, underlay_address: Ipv6Addr::LOCALHOST, + filesystem_pool: Some(dataset.pool_name.clone()), zone_type: BlueprintZoneType::ExternalDns( blueprint_zone_type::ExternalDns { - dataset: random_dataset(), + dataset, http_address: "[::1]:80".parse().unwrap(), dns_address: OmicronZoneExternalFloatingAddr { id: ExternalIpUuid::new_v4(), @@ -1399,6 +1406,7 @@ mod test { disposition: BlueprintZoneDisposition::InService, id: ntp1_id, underlay_address: Ipv6Addr::LOCALHOST, + filesystem_pool: Some(random_zpool()), zone_type: BlueprintZoneType::BoundaryNtp( blueprint_zone_type::BoundaryNtp { address: "[::1]:80".parse().unwrap(), @@ -1441,6 +1449,7 @@ mod test { disposition: BlueprintZoneDisposition::InService, id: nexus_id, underlay_address: Ipv6Addr::LOCALHOST, + filesystem_pool: Some(random_zpool()), zone_type: BlueprintZoneType::Nexus( blueprint_zone_type::Nexus { internal_address: "[::1]:80".parse().unwrap(), @@ -1473,6 +1482,7 @@ mod test { disposition: BlueprintZoneDisposition::InService, id: ntp2_id, underlay_address: Ipv6Addr::LOCALHOST, + filesystem_pool: Some(random_zpool()), zone_type: BlueprintZoneType::BoundaryNtp( blueprint_zone_type::BoundaryNtp { address: "[::1]:80".parse().unwrap(), @@ -1514,6 +1524,7 @@ mod test { disposition: BlueprintZoneDisposition::InService, id: ntp3_id, underlay_address: Ipv6Addr::LOCALHOST, + filesystem_pool: Some(random_zpool()), zone_type: BlueprintZoneType::InternalNtp( blueprint_zone_type::InternalNtp { address: "[::1]:80".parse().unwrap(), @@ -1696,6 +1707,7 @@ mod test { disposition: BlueprintZoneDisposition::InService, id: nexus_id1, underlay_address: Ipv6Addr::LOCALHOST, + filesystem_pool: Some(random_zpool()), zone_type: BlueprintZoneType::Nexus( blueprint_zone_type::Nexus { internal_address: "[::1]:80".parse().unwrap(), @@ -1728,6 +1740,7 @@ mod test { disposition: BlueprintZoneDisposition::InService, id: nexus_id2, underlay_address: Ipv6Addr::LOCALHOST, + filesystem_pool: Some(random_zpool()), zone_type: BlueprintZoneType::Nexus( blueprint_zone_type::Nexus { internal_address: "[::1]:80".parse().unwrap(), @@ -1969,6 +1982,7 @@ mod test { disposition: BlueprintZoneDisposition::InService, id: nexus_id, underlay_address: Ipv6Addr::LOCALHOST, + filesystem_pool: Some(random_zpool()), zone_type: BlueprintZoneType::Nexus( blueprint_zone_type::Nexus { internal_address: "[::1]:80".parse().unwrap(), @@ -2067,6 +2081,7 @@ mod test { let mut macs = MacAddr::iter_system(); let mut blueprint_zones = BTreeMap::new(); + let dataset = random_dataset(); blueprint_zones.insert( SledUuid::from_untyped_uuid(sled.id()), BlueprintZonesConfig { @@ -2076,9 +2091,10 @@ mod test { disposition: BlueprintZoneDisposition::InService, id: external_dns_id, underlay_address: Ipv6Addr::LOCALHOST, + filesystem_pool: Some(dataset.pool_name.clone()), zone_type: BlueprintZoneType::ExternalDns( blueprint_zone_type::ExternalDns { - dataset: random_dataset(), + dataset, http_address: "[::1]:80".parse().unwrap(), dns_address: OmicronZoneExternalFloatingAddr { id: ExternalIpUuid::new_v4(), @@ -2105,6 +2121,7 @@ mod test { disposition: BlueprintZoneDisposition::InService, id: nexus_id, underlay_address: Ipv6Addr::LOCALHOST, + filesystem_pool: Some(random_zpool()), zone_type: BlueprintZoneType::Nexus( blueprint_zone_type::Nexus { internal_address: "[::1]:80".parse().unwrap(), diff --git a/nexus/inventory/src/collector.rs b/nexus/inventory/src/collector.rs index a64d092e1c..b65d87505f 100644 --- a/nexus/inventory/src/collector.rs +++ b/nexus/inventory/src/collector.rs @@ -379,7 +379,9 @@ mod test { use gateway_messages::SpPort; use nexus_types::inventory::Collection; use omicron_common::api::external::Generation; + use omicron_common::zpool_name::ZpoolName; use omicron_sled_agent::sim; + use omicron_uuid_kinds::ZpoolUuid; use std::fmt::Write; use std::net::Ipv6Addr; use std::net::SocketAddrV6; @@ -547,6 +549,7 @@ mod test { let sled_url = format!("http://{}/", agent.http_server.local_addr()); let client = sled_agent_client::Client::new(&sled_url, log); + let filesystem_pool = ZpoolName::new_external(ZpoolUuid::new_v4()); let zone_address = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 123, 0, 0); client .omicron_zones_put(&sled_agent_client::types::OmicronZonesConfig { @@ -558,6 +561,7 @@ mod test { sled_agent_client::types::OmicronZoneType::Oximeter { address: zone_address.to_string(), }, + filesystem_pool: Some(filesystem_pool), }], }) .await diff --git a/nexus/reconfigurator/execution/Cargo.toml b/nexus/reconfigurator/execution/Cargo.toml index 34056b45a1..00103528bb 100644 --- a/nexus/reconfigurator/execution/Cargo.toml +++ b/nexus/reconfigurator/execution/Cargo.toml @@ -11,6 +11,7 @@ omicron-rpaths.workspace = true [dependencies] anyhow.workspace = true +cockroach-admin-client.workspace = true dns-service-client.workspace = true chrono.workspace = true futures.workspace = true diff --git a/nexus/reconfigurator/execution/src/cockroachdb.rs b/nexus/reconfigurator/execution/src/cockroachdb.rs index 101a7372c5..6bd72955c7 100644 --- a/nexus/reconfigurator/execution/src/cockroachdb.rs +++ b/nexus/reconfigurator/execution/src/cockroachdb.rs @@ -49,6 +49,7 @@ mod test { ) { let nexus = &cptestctx.server.server_context().nexus; let datastore = nexus.datastore(); + let resolver = nexus.resolver(); let log = &cptestctx.logctx.log; let opctx = OpContext::for_background( log.clone(), @@ -92,6 +93,7 @@ mod test { crate::realize_blueprint_with_overrides( &opctx, datastore, + resolver, &blueprint, "test-suite", &overrides, diff --git a/nexus/reconfigurator/execution/src/datasets.rs b/nexus/reconfigurator/execution/src/datasets.rs index c4f5cbae82..e007c2528e 100644 --- a/nexus/reconfigurator/execution/src/datasets.rs +++ b/nexus/reconfigurator/execution/src/datasets.rs @@ -278,6 +278,7 @@ mod tests { disposition: BlueprintZoneDisposition::InService, id: OmicronZoneUuid::new_v4(), underlay_address: "::1".parse().unwrap(), + filesystem_pool: Some(ZpoolName::new_external(new_zpool_id)), zone_type: BlueprintZoneType::Crucible( blueprint_zone_type::Crucible { address: "[::1]:0".parse().unwrap(), diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index ef4996db54..f3b718ee54 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -457,6 +457,7 @@ mod test { use crate::overridables::Overridables; use crate::Sled; use dns_service_client::DnsDiff; + use internal_dns::resolver::Resolver; use internal_dns::ServiceName; use internal_dns::DNS_ZONE; use nexus_db_model::DnsGroup; @@ -471,6 +472,7 @@ mod test { use nexus_reconfigurator_planning::example::example; use nexus_reconfigurator_preparation::PlanningInputFromDb; use nexus_test_utils::resource_helpers::create_silo; + use nexus_test_utils::resource_helpers::DiskTestBuilder; use nexus_test_utils_macros::nexus_test; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintTarget; @@ -499,9 +501,11 @@ mod test { use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::Generation; use omicron_common::api::external::IdentityMetadataCreateParams; + use omicron_common::zpool_name::ZpoolName; use omicron_test_utils::dev::test_setup_log; use omicron_uuid_kinds::ExternalIpUuid; use omicron_uuid_kinds::OmicronZoneUuid; + use omicron_uuid_kinds::ZpoolUuid; use sled_agent_client::ZoneKind; use std::collections::BTreeMap; use std::collections::BTreeSet; @@ -621,6 +625,9 @@ mod test { disposition: BlueprintZoneDisposition::Quiesced, id: out_of_service_id, underlay_address: out_of_service_addr, + filesystem_pool: Some(ZpoolName::new_external( + ZpoolUuid::new_v4(), + )), zone_type: BlueprintZoneType::Oximeter( blueprint_zone_type::Oximeter { address: SocketAddrV6::new( @@ -1134,8 +1141,17 @@ mod test { async fn test_silos_external_dns_end_to_end( cptestctx: &ControlPlaneTestContext, ) { + // Add a zpool to all sleds, just to ensure that all new zones can find + // a transient filesystem wherever they end up being placed. + DiskTestBuilder::new(&cptestctx) + .on_all_sleds() + .with_zpool_count(1) + .build() + .await; + let nexus = &cptestctx.server.server_context().nexus; let datastore = nexus.datastore(); + let resolver = nexus.resolver(); let log = &cptestctx.logctx.log; let opctx = OpContext::for_background( log.clone(), @@ -1169,6 +1185,7 @@ mod test { crate::realize_blueprint_with_overrides( &opctx, datastore, + resolver, &blueprint, "test-suite", &overrides, @@ -1193,6 +1210,7 @@ mod test { cptestctx, &opctx, datastore, + resolver, &blueprint, &overrides, "squidport", @@ -1305,6 +1323,7 @@ mod test { crate::realize_blueprint_with_overrides( &opctx, datastore, + resolver, &blueprint2, "test-suite", &overrides, @@ -1378,6 +1397,7 @@ mod test { crate::realize_blueprint_with_overrides( &opctx, datastore, + resolver, &blueprint2, "test-suite", &overrides, @@ -1399,6 +1419,7 @@ mod test { &cptestctx, &opctx, datastore, + resolver, &blueprint2, &overrides, "tickety-boo", @@ -1412,6 +1433,7 @@ mod test { crate::realize_blueprint_with_overrides( &opctx, datastore, + resolver, &blueprint2, "test-suite", &overrides, @@ -1457,6 +1479,7 @@ mod test { cptestctx: &ControlPlaneTestContext, opctx: &OpContext, datastore: &DataStore, + resolver: &Resolver, blueprint: &Blueprint, overrides: &Overridables, silo_name: &str, @@ -1504,6 +1527,7 @@ mod test { crate::realize_blueprint_with_overrides( &opctx, datastore, + resolver, &blueprint, "test-suite", &overrides, diff --git a/nexus/reconfigurator/execution/src/external_networking.rs b/nexus/reconfigurator/execution/src/external_networking.rs index 3ac1de96d5..a451eeda0f 100644 --- a/nexus/reconfigurator/execution/src/external_networking.rs +++ b/nexus/reconfigurator/execution/src/external_networking.rs @@ -445,7 +445,9 @@ mod tests { use omicron_common::address::NUM_SOURCE_NAT_PORTS; use omicron_common::api::external::MacAddr; use omicron_common::api::external::Vni; + use omicron_common::zpool_name::ZpoolName; use omicron_uuid_kinds::ExternalIpUuid; + use omicron_uuid_kinds::ZpoolUuid; use oxnet::IpNet; use std::net::IpAddr; use std::net::Ipv6Addr; @@ -593,6 +595,9 @@ mod tests { disposition: BlueprintZoneDisposition::InService, id: self.nexus_id, underlay_address: Ipv6Addr::LOCALHOST, + filesystem_pool: Some(ZpoolName::new_external( + ZpoolUuid::new_v4(), + )), zone_type: BlueprintZoneType::Nexus( blueprint_zone_type::Nexus { internal_address: "[::1]:0".parse().unwrap(), @@ -607,6 +612,9 @@ mod tests { disposition: BlueprintZoneDisposition::InService, id: self.dns_id, underlay_address: Ipv6Addr::LOCALHOST, + filesystem_pool: Some(ZpoolName::new_external( + ZpoolUuid::new_v4(), + )), zone_type: BlueprintZoneType::ExternalDns( blueprint_zone_type::ExternalDns { dataset: OmicronZoneDataset { @@ -624,6 +632,9 @@ mod tests { disposition: BlueprintZoneDisposition::InService, id: self.ntp_id, underlay_address: Ipv6Addr::LOCALHOST, + filesystem_pool: Some(ZpoolName::new_external( + ZpoolUuid::new_v4(), + )), zone_type: BlueprintZoneType::BoundaryNtp( blueprint_zone_type::BoundaryNtp { address: "[::1]:0".parse().unwrap(), diff --git a/nexus/reconfigurator/execution/src/lib.rs b/nexus/reconfigurator/execution/src/lib.rs index 63bb4b24f0..0e9ab394f1 100644 --- a/nexus/reconfigurator/execution/src/lib.rs +++ b/nexus/reconfigurator/execution/src/lib.rs @@ -7,6 +7,7 @@ //! See `nexus_reconfigurator_planning` crate-level docs for background. use anyhow::{anyhow, Context}; +use internal_dns::resolver::Resolver; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; use nexus_types::deployment::Blueprint; @@ -76,6 +77,7 @@ impl From for Sled { pub async fn realize_blueprint( opctx: &OpContext, datastore: &DataStore, + resolver: &Resolver, blueprint: &Blueprint, nexus_label: S, ) -> Result<(), Vec> @@ -85,6 +87,7 @@ where realize_blueprint_with_overrides( opctx, datastore, + resolver, blueprint, nexus_label, &Default::default(), @@ -95,6 +98,7 @@ where pub async fn realize_blueprint_with_overrides( opctx: &OpContext, datastore: &DataStore, + resolver: &Resolver, blueprint: &Blueprint, nexus_label: S, overrides: &Overridables, @@ -204,6 +208,14 @@ where .await .map_err(|e| vec![anyhow!("{}", InlineErrorChain::new(&e))])?; + omicron_zones::clean_up_expunged_zones( + &opctx, + datastore, + resolver, + blueprint.all_omicron_zones(BlueprintZoneFilter::Expunged), + ) + .await?; + sled_state::decommission_sleds( &opctx, datastore, diff --git a/nexus/reconfigurator/execution/src/omicron_zones.rs b/nexus/reconfigurator/execution/src/omicron_zones.rs index a40d65411b..404124ba25 100644 --- a/nexus/reconfigurator/execution/src/omicron_zones.rs +++ b/nexus/reconfigurator/execution/src/omicron_zones.rs @@ -6,17 +6,32 @@ use crate::Sled; use anyhow::anyhow; +use anyhow::bail; use anyhow::Context; +use cockroach_admin_client::types::NodeDecommission; +use cockroach_admin_client::types::NodeId; use futures::stream; use futures::StreamExt; +use internal_dns::resolver::Resolver; +use internal_dns::ServiceName; use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use nexus_types::deployment::BlueprintZoneConfig; +use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZoneFilter; +use nexus_types::deployment::BlueprintZoneType; use nexus_types::deployment::BlueprintZonesConfig; +use omicron_common::address::COCKROACH_ADMIN_PORT; use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledUuid; use slog::info; use slog::warn; +use slog::Logger; +use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; +use std::net::SocketAddr; +use std::net::SocketAddrV6; /// Idempotently ensure that the specified Omicron zones are deployed to the /// corresponding sleds @@ -85,29 +100,228 @@ pub(crate) async fn deploy_zones( } } +/// Idempontently perform any cleanup actions necessary for expunged zones. +pub(crate) async fn clean_up_expunged_zones( + opctx: &OpContext, + datastore: &DataStore, + resolver: &R, + expunged_zones: impl Iterator, +) -> Result<(), Vec> { + let errors: Vec = stream::iter(expunged_zones) + .filter_map(|(sled_id, config)| async move { + // We expect to only be called with expunged zones; skip any with a + // different disposition. + if config.disposition != BlueprintZoneDisposition::Expunged { + return None; + } + + let log = opctx.log.new(slog::o!( + "sled_id" => sled_id.to_string(), + "zone_id" => config.id.to_string(), + "zone_type" => config.zone_type.kind().to_string(), + )); + + let result = match &config.zone_type { + // Zones which need no cleanup work after expungement. + BlueprintZoneType::Nexus(_) => None, + + // Zones which need cleanup after expungement. + BlueprintZoneType::CockroachDb(_) => Some( + decommission_cockroachdb_node( + opctx, datastore, resolver, config.id, &log, + ) + .await, + ), + + // Zones that may or may not need cleanup work - we haven't + // gotten to these yet! + BlueprintZoneType::BoundaryNtp(_) + | BlueprintZoneType::Clickhouse(_) + | BlueprintZoneType::ClickhouseKeeper(_) + | BlueprintZoneType::Crucible(_) + | BlueprintZoneType::CruciblePantry(_) + | BlueprintZoneType::ExternalDns(_) + | BlueprintZoneType::InternalDns(_) + | BlueprintZoneType::InternalNtp(_) + | BlueprintZoneType::Oximeter(_) => { + warn!( + log, + "unsupported zone type for expungement cleanup; \ + not performing any cleanup"; + ); + None + } + }?; + + match result { + Err(error) => { + warn!( + log, "failed to clean up expunged zone"; + "error" => #%error, + ); + Some(error) + } + Ok(()) => { + info!(log, "successfully cleaned up after expunged zone"); + None + } + } + }) + .collect() + .await; + + if errors.is_empty() { + Ok(()) + } else { + Err(errors) + } +} + +// Helper trait that is implemented by `Resolver`, but allows unit tests to +// inject a fake resolver that points to a mock server when calling +// `decommission_cockroachdb_node()`. +pub(crate) trait CleanupResolver { + async fn resolve_cockroach_admin_addresses( + &self, + ) -> anyhow::Result>; +} + +impl CleanupResolver for Resolver { + async fn resolve_cockroach_admin_addresses( + &self, + ) -> anyhow::Result> { + let crdb_ips = self + .lookup_all_ipv6(ServiceName::Cockroach) + .await + .context("failed to resolve cockroach IPs in internal DNS")?; + let ip_to_admin_addr = |ip| { + SocketAddr::V6(SocketAddrV6::new(ip, COCKROACH_ADMIN_PORT, 0, 0)) + }; + Ok(crdb_ips.into_iter().map(ip_to_admin_addr)) + } +} + +async fn decommission_cockroachdb_node( + opctx: &OpContext, + datastore: &DataStore, + resolver: &R, + zone_id: OmicronZoneUuid, + log: &Logger, +) -> anyhow::Result<()> { + // We need the node ID of this zone. Check and see whether the + // crdb_node_id_collector RPW found the node ID for this zone. If it hasn't, + // we're in trouble: we don't know whether this zone came up far enough to + // join the cockroach cluster (and therefore needs decommissioning) but was + // expunged before the collector RPW could identify it, or if the zone never + // joined the cockroach cluster (and therefore doesn't have a node ID and + // doesn't need decommissioning). + // + // For now, we punt on this problem. If we were certain that the zone's + // socket address could never be reused, we could ask one of the running + // cockroach nodes for the status of all nodes, find the ID of the one with + // this zone's listening address (if one exists), and decommission that ID. + // But if the address could be reused, that risks decommissioning a live + // node! We'll just log a warning and require a human to figure out how to + // clean this up. + // + // TODO-cleanup Can we decommission nodes in this state automatically? If + // not, can we be noisier than just a `warn!` log without failing blueprint + // exeuction entirely? + let Some(node_id) = + datastore.cockroachdb_node_id(opctx, zone_id).await.with_context( + || format!("failed to look up node ID of cockroach zone {zone_id}"), + )? + else { + warn!( + log, + "expunged cockroach zone has no known node ID; \ + support intervention is required for zone cleanup" + ); + return Ok(()); + }; + + // To decommission a CRDB node, we need to talk to one of the still-running + // nodes; look them up in DNS try them all in order. (It would probably be + // fine to try them all concurrently, but this isn't a very common + // operation, so keeping it simple seems fine.) + // + // TODO-cleanup: Replace this with a qorb-based connection pool once it's + // ready. + let admin_addrs = resolver.resolve_cockroach_admin_addresses().await?; + + let mut num_admin_addrs_tried = 0; + for admin_addr in admin_addrs { + let admin_url = format!("http://{admin_addr}"); + let log = log.new(slog::o!("admin_url" => admin_url.clone())); + let client = + cockroach_admin_client::Client::new(&admin_url, log.clone()); + + let body = NodeId { node_id: node_id.clone() }; + match client + .node_decommission(&body) + .await + .map(|response| response.into_inner()) + { + Ok(NodeDecommission { + is_decommissioning, + is_draining, + is_live, + membership, + node_id: _, + notes, + replicas, + }) => { + info!( + log, "successfully sent cockroach decommission request"; + "is_decommissioning" => is_decommissioning, + "is_draining" => is_draining, + "is_live" => is_live, + "membership" => ?membership, + "notes" => ?notes, + "replicas" => replicas, + ); + + return Ok(()); + } + + Err(err) => { + warn!( + log, "failed sending decommission request \ + (will try other servers)"; + "err" => InlineErrorChain::new(&err), + ); + num_admin_addrs_tried += 1; + } + } + } + + bail!( + "failed to decommission cockroach zone {zone_id} (node {node_id}): \ + failed to contact {num_admin_addrs_tried} admin servers" + ); +} + #[cfg(test)] mod test { - use super::deploy_zones; + use super::*; use crate::Sled; + use cockroach_admin_client::types::NodeMembership; use httptest::matchers::{all_of, json_decoded, request}; - use httptest::responders::status_code; + use httptest::responders::{json_encoded, status_code}; use httptest::Expectation; - use nexus_db_queries::context::OpContext; use nexus_test_utils_macros::nexus_test; use nexus_types::deployment::{ - blueprint_zone_type, BlueprintZoneType, CockroachDbPreserveDowngrade, - OmicronZonesConfig, - }; - use nexus_types::deployment::{ - Blueprint, BlueprintTarget, BlueprintZoneConfig, - BlueprintZoneDisposition, BlueprintZonesConfig, + blueprint_zone_type, Blueprint, BlueprintTarget, + CockroachDbPreserveDowngrade, OmicronZonesConfig, }; use nexus_types::inventory::OmicronZoneDataset; use omicron_common::api::external::Generation; + use omicron_common::zpool_name::ZpoolName; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledUuid; + use omicron_uuid_kinds::ZpoolUuid; use std::collections::BTreeMap; - use std::net::SocketAddr; + use std::iter; use uuid::Uuid; type ControlPlaneTestContext = @@ -183,19 +397,17 @@ mod test { // the full set of zones that must be running. // See `rack_setup::service::ServiceInner::run` for more details. fn make_zones() -> BlueprintZonesConfig { + let zpool = ZpoolName::new_external(ZpoolUuid::new_v4()); BlueprintZonesConfig { generation: Generation::new(), zones: vec![BlueprintZoneConfig { disposition: BlueprintZoneDisposition::InService, id: OmicronZoneUuid::new_v4(), underlay_address: "::1".parse().unwrap(), + filesystem_pool: Some(zpool.clone()), zone_type: BlueprintZoneType::InternalDns( blueprint_zone_type::InternalDns { - dataset: OmicronZoneDataset { - pool_name: format!("oxp_{}", Uuid::new_v4()) - .parse() - .unwrap(), - }, + dataset: OmicronZoneDataset { pool_name: zpool }, dns_address: "[::1]:0".parse().unwrap(), gz_address: "::1".parse().unwrap(), gz_address_index: 0, @@ -295,6 +507,9 @@ mod test { disposition, id: OmicronZoneUuid::new_v4(), underlay_address: "::1".parse().unwrap(), + filesystem_pool: Some(ZpoolName::new_external( + ZpoolUuid::new_v4(), + )), zone_type: BlueprintZoneType::InternalNtp( blueprint_zone_type::InternalNtp { address: "[::1]:0".parse().unwrap(), @@ -343,4 +558,172 @@ mod test { s1.verify_and_clear(); s2.verify_and_clear(); } + + #[nexus_test] + async fn test_clean_up_cockroach_zones( + cptestctx: &ControlPlaneTestContext, + ) { + // Test setup boilerplate. + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + // Construct the cockroach zone we're going to try to clean up. + let any_sled_id = SledUuid::new_v4(); + let crdb_zone = BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::Expunged, + id: OmicronZoneUuid::new_v4(), + underlay_address: "::1".parse().unwrap(), + filesystem_pool: Some(ZpoolName::new_external(ZpoolUuid::new_v4())), + zone_type: BlueprintZoneType::CockroachDb( + blueprint_zone_type::CockroachDb { + address: "[::1]:0".parse().unwrap(), + dataset: OmicronZoneDataset { + pool_name: format!("oxp_{}", Uuid::new_v4()) + .parse() + .unwrap(), + }, + }, + ), + }; + + // Start a mock cockroach-admin server. + let mut mock_admin = httptest::Server::run(); + + // Create our fake resolver that will point to our mock server. + struct FixedResolver(Vec); + impl CleanupResolver for FixedResolver { + async fn resolve_cockroach_admin_addresses( + &self, + ) -> anyhow::Result> { + Ok(self.0.clone().into_iter()) + } + } + let fake_resolver = FixedResolver(vec![mock_admin.addr()]); + + // We haven't yet inserted a mapping from zone ID to cockroach node ID + // in the db, so trying to clean up the zone should log a warning but + // otherwise succeed, without attempting to contact our mock admin + // server. (We don't have a good way to confirm the warning was logged, + // so we'll just check for an Ok return and no contact to mock_admin.) + clean_up_expunged_zones( + &opctx, + datastore, + &fake_resolver, + iter::once((any_sled_id, &crdb_zone)), + ) + .await + .expect("unknown node ID: no cleanup"); + mock_admin.verify_and_clear(); + + // Record a zone ID <-> node ID mapping. + let crdb_node_id = "test-node"; + datastore + .set_cockroachdb_node_id( + &opctx, + crdb_zone.id, + crdb_node_id.to_string(), + ) + .await + .expect("assigned node ID"); + + // Cleaning up the zone should now contact the mock-admin server and + // attempt to decommission it. + let add_decommission_expecation = + move |mock_server: &mut httptest::Server| { + mock_server.expect( + Expectation::matching(all_of![ + request::method_path("POST", "/node/decommission"), + request::body(json_decoded(move |n: &NodeId| { + n.node_id == crdb_node_id + })) + ]) + .respond_with(json_encoded( + NodeDecommission { + is_decommissioning: true, + is_draining: true, + is_live: false, + membership: NodeMembership::Decommissioning, + node_id: crdb_node_id.to_string(), + notes: vec![], + replicas: 0, + }, + )), + ); + }; + add_decommission_expecation(&mut mock_admin); + clean_up_expunged_zones( + &opctx, + datastore, + &fake_resolver, + iter::once((any_sled_id, &crdb_zone)), + ) + .await + .expect("decommissioned test node"); + mock_admin.verify_and_clear(); + + // If we have multiple cockroach-admin servers, and the first N of them + // don't respond successfully, we should keep trying until we find one + // that does (or they all fail). We'll start with the "all fail" case. + let mut mock_bad1 = httptest::Server::run(); + let mut mock_bad2 = httptest::Server::run(); + let add_decommission_failure_expecation = + move |mock_server: &mut httptest::Server| { + mock_server.expect( + Expectation::matching(all_of![ + request::method_path("POST", "/node/decommission"), + request::body(json_decoded(move |n: &NodeId| { + n.node_id == crdb_node_id + })) + ]) + .respond_with(status_code(503)), + ); + }; + add_decommission_failure_expecation(&mut mock_bad1); + add_decommission_failure_expecation(&mut mock_bad2); + let mut fake_resolver = + FixedResolver(vec![mock_bad1.addr(), mock_bad2.addr()]); + let mut err = clean_up_expunged_zones( + &opctx, + datastore, + &fake_resolver, + iter::once((any_sled_id, &crdb_zone)), + ) + .await + .expect_err("no successful response should result in failure"); + assert_eq!(err.len(), 1); + let err = err.pop().unwrap(); + assert_eq!( + err.to_string(), + format!( + "failed to decommission cockroach zone {} \ + (node {crdb_node_id}): failed to contact 2 admin servers", + crdb_zone.id + ) + ); + mock_bad1.verify_and_clear(); + mock_bad2.verify_and_clear(); + mock_admin.verify_and_clear(); + + // Now we try again, but put the good server at the end of the list; we + // should contact the two bad servers, but then succeed on the good one. + add_decommission_failure_expecation(&mut mock_bad1); + add_decommission_failure_expecation(&mut mock_bad2); + add_decommission_expecation(&mut mock_admin); + fake_resolver.0.push(mock_admin.addr()); + clean_up_expunged_zones( + &opctx, + datastore, + &fake_resolver, + iter::once((any_sled_id, &crdb_zone)), + ) + .await + .expect("decommissioned test node"); + mock_bad1.verify_and_clear(); + mock_bad2.verify_and_clear(); + mock_admin.verify_and_clear(); + } } diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index 1609a6c0cd..4177d4884f 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -6,7 +6,6 @@ use crate::ip_allocator::IpAllocator; use crate::planner::zone_needs_expungement; -use crate::planner::DiscretionaryOmicronZone; use crate::planner::ZoneExpungeReason; use anyhow::anyhow; use internal_dns::config::Host; @@ -631,7 +630,6 @@ impl<'a> BlueprintBuilder<'a> { let sled_subnet = sled_info.subnet; let ip = self.sled_alloc_ip(sled_id)?; let ntp_address = SocketAddrV6::new(ip, NTP_PORT, 0, 0); - // Construct the list of internal DNS servers. // // It'd be tempting to get this list from the other internal NTP @@ -659,18 +657,22 @@ impl<'a> BlueprintBuilder<'a> { }) .collect(); + let zone_type = + BlueprintZoneType::InternalNtp(blueprint_zone_type::InternalNtp { + address: ntp_address, + ntp_servers, + dns_servers, + domain: None, + }); + let filesystem_pool = + self.sled_select_zpool(sled_id, zone_type.kind())?; + let zone = BlueprintZoneConfig { disposition: BlueprintZoneDisposition::InService, id: self.rng.zone_rng.next(), underlay_address: ip, - zone_type: BlueprintZoneType::InternalNtp( - blueprint_zone_type::InternalNtp { - address: ntp_address, - ntp_servers, - dns_servers, - domain: None, - }, - ), + filesystem_pool: Some(filesystem_pool), + zone_type, }; self.sled_add_zone(sled_id, zone)?; @@ -713,16 +715,19 @@ impl<'a> BlueprintBuilder<'a> { let ip = self.sled_alloc_ip(sled_id)?; let port = omicron_common::address::CRUCIBLE_PORT; let address = SocketAddrV6::new(ip, port, 0, 0); + let zone_type = + BlueprintZoneType::Crucible(blueprint_zone_type::Crucible { + address, + dataset: OmicronZoneDataset { pool_name: pool_name.clone() }, + }); + let filesystem_pool = pool_name; + let zone = BlueprintZoneConfig { disposition: BlueprintZoneDisposition::InService, id: self.rng.zone_rng.next(), underlay_address: ip, - zone_type: BlueprintZoneType::Crucible( - blueprint_zone_type::Crucible { - address, - dataset: OmicronZoneDataset { pool_name }, - }, - ), + filesystem_pool: Some(filesystem_pool), + zone_type, }; self.sled_add_zone(sled_id, zone)?; @@ -835,19 +840,23 @@ impl<'a> BlueprintBuilder<'a> { let ip = self.sled_alloc_ip(sled_id)?; let port = omicron_common::address::NEXUS_INTERNAL_PORT; let internal_address = SocketAddrV6::new(ip, port, 0, 0); + let zone_type = + BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + internal_address, + external_ip, + nic, + external_tls, + external_dns_servers: external_dns_servers.clone(), + }); + let filesystem_pool = + self.sled_select_zpool(sled_id, zone_type.kind())?; + let zone = BlueprintZoneConfig { disposition: BlueprintZoneDisposition::InService, id: nexus_id, underlay_address: ip, - zone_type: BlueprintZoneType::Nexus( - blueprint_zone_type::Nexus { - internal_address, - external_ip, - nic, - external_tls, - external_dns_servers: external_dns_servers.clone(), - }, - ), + filesystem_pool: Some(filesystem_pool), + zone_type, }; self.sled_add_zone(sled_id, zone)?; } @@ -884,22 +893,26 @@ impl<'a> BlueprintBuilder<'a> { for _ in 0..num_crdb_to_add { let zone_id = self.rng.zone_rng.next(); let underlay_ip = self.sled_alloc_ip(sled_id)?; - let pool_name = self.sled_alloc_zpool( - sled_id, - DiscretionaryOmicronZone::CockroachDb, - )?; + let pool_name = + self.sled_select_zpool(sled_id, ZoneKind::CockroachDb)?; let port = omicron_common::address::COCKROACH_PORT; let address = SocketAddrV6::new(underlay_ip, port, 0, 0); + let zone_type = BlueprintZoneType::CockroachDb( + blueprint_zone_type::CockroachDb { + address, + dataset: OmicronZoneDataset { + pool_name: pool_name.clone(), + }, + }, + ); + let filesystem_pool = pool_name; + let zone = BlueprintZoneConfig { disposition: BlueprintZoneDisposition::InService, id: zone_id, underlay_address: underlay_ip, - zone_type: BlueprintZoneType::CockroachDb( - blueprint_zone_type::CockroachDb { - address, - dataset: OmicronZoneDataset { pool_name }, - }, - ), + filesystem_pool: Some(filesystem_pool), + zone_type, }; self.sled_add_zone(sled_id, zone)?; } @@ -963,44 +976,40 @@ impl<'a> BlueprintBuilder<'a> { allocator.alloc().ok_or(Error::OutOfAddresses { sled_id }) } - fn sled_alloc_zpool( - &mut self, + // Selects a zpools for this zone type. + // + // This zpool may be used for either durable storage or transient + // storage - the usage is up to the caller. + // + // If `zone_kind` already exists on `sled_id`, it is prevented + // from using the same zpool as exisitng zones with the same kind. + fn sled_select_zpool( + &self, sled_id: SledUuid, - kind: DiscretionaryOmicronZone, + zone_kind: ZoneKind, ) -> Result { - let resources = self.sled_resources(sled_id)?; + let all_in_service_zpools = + self.sled_resources(sled_id)?.all_zpools(ZpoolFilter::InService); - // We refuse to choose a zpool for a zone of a given `kind` if this - // sled already has a zone of that kind on the same zpool. Build up a - // set of invalid zpools for this sled/kind pair. + // We refuse to choose a zpool for a zone of a given `zone_kind` if this + // sled already has a durable zone of that kind on the same zpool. Build + // up a set of invalid zpools for this sled/kind pair. let mut skip_zpools = BTreeSet::new(); for zone_config in self.current_sled_zones(sled_id) { - match (kind, &zone_config.zone_type) { - ( - DiscretionaryOmicronZone::Nexus, - BlueprintZoneType::Nexus(_), - ) => { - // TODO handle this case once we track transient datasets + if let Some(zpool) = zone_config.zone_type.durable_zpool() { + if zone_kind == zone_config.zone_type.kind() { + skip_zpools.insert(zpool); } - ( - DiscretionaryOmicronZone::CockroachDb, - BlueprintZoneType::CockroachDb(crdb), - ) => { - skip_zpools.insert(&crdb.dataset.pool_name); - } - (DiscretionaryOmicronZone::Nexus, _) - | (DiscretionaryOmicronZone::CockroachDb, _) => (), } } - for &zpool_id in resources.all_zpools(ZpoolFilter::InService) { + for &zpool_id in all_in_service_zpools { let zpool_name = ZpoolName::new_external(zpool_id); if !skip_zpools.contains(&zpool_name) { return Ok(zpool_name); } } - - Err(Error::NoAvailableZpool { sled_id, kind: kind.into() }) + Err(Error::NoAvailableZpool { sled_id, kind: zone_kind }) } fn sled_resources( @@ -1298,12 +1307,14 @@ pub mod test { // On any given zpool, we should have at most one zone of any given // kind. + // + // TODO: we may want a similar check for non-durable datasets? let mut kinds_by_zpool: BTreeMap< ZpoolUuid, BTreeMap, > = BTreeMap::new(); for (_, zone) in blueprint.all_omicron_zones(BlueprintZoneFilter::All) { - if let Some(dataset) = zone.zone_type.dataset() { + if let Some(dataset) = zone.zone_type.durable_dataset() { let kind = zone.zone_type.kind(); if let Some(previous) = kinds_by_zpool .entry(dataset.pool_name.id()) @@ -1647,6 +1658,31 @@ pub mod test { logctx.cleanup_successful(); } + // Tests that provisioning zones with durable zones co-locates their zone filesystems. + #[test] + fn test_zone_filesystem_zpool_colocated() { + static TEST_NAME: &str = + "blueprint_builder_test_zone_filesystem_zpool_colocated"; + let logctx = test_setup_log(TEST_NAME); + let (_, _, blueprint) = + example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); + + for (_, zone_config) in &blueprint.blueprint_zones { + for zone in &zone_config.zones { + // The pool should only be optional for backwards compatibility. + let filesystem_pool = zone + .filesystem_pool + .as_ref() + .expect("Should have filesystem pool"); + + if let Some(durable_pool) = zone.zone_type.durable_zpool() { + assert_eq!(durable_pool, filesystem_pool); + } + } + } + logctx.cleanup_successful(); + } + #[test] fn test_add_nexus_with_no_existing_nexus_zones() { static TEST_NAME: &str = diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/zones.rs b/nexus/reconfigurator/planning/src/blueprint_builder/zones.rs index 68e2b9c2a2..6cb76539ec 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/zones.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/zones.rs @@ -190,6 +190,9 @@ mod tests { }; use maplit::btreeset; + use nexus_types::deployment::SledDisk; + use nexus_types::external_api::views::PhysicalDiskPolicy; + use nexus_types::external_api::views::PhysicalDiskState; use nexus_types::{ deployment::{ blueprint_zone_type, BlueprintZoneType, SledDetails, SledFilter, @@ -198,7 +201,11 @@ mod tests { external_api::views::{SledPolicy, SledState}, }; use omicron_common::address::Ipv6Subnet; + use omicron_common::disk::DiskIdentity; + use omicron_common::zpool_name::ZpoolName; use omicron_test_utils::dev::test_setup_log; + use omicron_uuid_kinds::PhysicalDiskUuid; + use omicron_uuid_kinds::ZpoolUuid; use crate::{ blueprint_builder::{ @@ -233,7 +240,19 @@ mod tests { subnet: Ipv6Subnet::new( "fd00:1::".parse().unwrap(), ), - zpools: BTreeMap::new(), + zpools: BTreeMap::from([( + ZpoolUuid::new_v4(), + SledDisk { + disk_identity: DiskIdentity { + vendor: String::from("fake-vendor"), + serial: String::from("fake-serial"), + model: String::from("fake-model"), + }, + disk_id: PhysicalDiskUuid::new_v4(), + policy: PhysicalDiskPolicy::InService, + state: PhysicalDiskState::Active, + }, + )]), }, }, ) @@ -280,11 +299,15 @@ mod tests { let change = builder.zones.change_sled_zones(existing_sled_id); let new_zone_id = OmicronZoneUuid::new_v4(); + // NOTE: This pool doesn't actually exist on the sled, but nothing is + // checking for that in this test? + let filesystem_pool = ZpoolName::new_external(ZpoolUuid::new_v4()); change .add_zone(BlueprintZoneConfig { disposition: BlueprintZoneDisposition::InService, id: new_zone_id, underlay_address: Ipv6Addr::UNSPECIFIED, + filesystem_pool: Some(filesystem_pool), zone_type: BlueprintZoneType::Oximeter( blueprint_zone_type::Oximeter { address: SocketAddrV6::new( diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 4e288e8d8a..08c25c20fd 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -672,13 +672,21 @@ pub(crate) fn zone_needs_expungement( } // Should we expunge the zone because durable storage is gone? - if let Some(durable_storage_zpool) = zone_config.zone_type.zpool() { + if let Some(durable_storage_zpool) = zone_config.zone_type.durable_zpool() { let zpool_id = durable_storage_zpool.id(); if !sled_details.resources.zpool_is_provisionable(&zpool_id) { return Some(ZoneExpungeReason::DiskExpunged); } }; + // Should we expunge the zone because transient storage is gone? + if let Some(ref filesystem_zpool) = zone_config.filesystem_pool { + let zpool_id = filesystem_zpool.id(); + if !sled_details.resources.zpool_is_provisionable(&zpool_id) { + return Some(ZoneExpungeReason::DiskExpunged); + } + }; + None } @@ -730,6 +738,7 @@ mod test { use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; use sled_agent_client::ZoneKind; + use std::collections::HashMap; use std::mem; use typed_rng::TypedUuidRng; @@ -1204,8 +1213,9 @@ mod test { } #[test] - fn test_disk_expungement_removes_zones() { - static TEST_NAME: &str = "planner_disk_expungement_removes_zones"; + fn test_disk_expungement_removes_zones_durable_zpool() { + static TEST_NAME: &str = + "planner_disk_expungement_removes_zones_durable_zpool"; let logctx = test_setup_log(TEST_NAME); // Create an example system with a single sled @@ -1221,9 +1231,30 @@ mod test { // The example system should be assigning crucible zones to each // in-service disk. When we expunge one of these disks, the planner // should remove the associated zone. + // + // Find a disk which is only used by a single zone, if one exists. + // + // If we don't do this, we might select a physical disk supporting + // multiple zones of distinct types. + let mut zpool_by_zone_usage = HashMap::new(); + for zones in blueprint1.blueprint_zones.values() { + for zone in &zones.zones { + let pool = zone.filesystem_pool.as_ref().unwrap(); + zpool_by_zone_usage + .entry(pool.id()) + .and_modify(|count| *count += 1) + .or_insert_with(|| 1); + } + } let (_, sled_details) = builder.sleds_mut().iter_mut().next().unwrap(); - let (_, disk) = - sled_details.resources.zpools.iter_mut().next().unwrap(); + let (_, disk) = sled_details + .resources + .zpools + .iter_mut() + .find(|(zpool_id, _disk)| { + *zpool_by_zone_usage.get(*zpool_id).unwrap() == 1 + }) + .expect("Couldn't find zpool only used by a single zone"); disk.policy = PhysicalDiskPolicy::Expunged; let input = builder.build(); @@ -1270,6 +1301,113 @@ mod test { logctx.cleanup_successful(); } + #[test] + fn test_disk_expungement_removes_zones_transient_filesystem() { + static TEST_NAME: &str = + "planner_disk_expungement_removes_zones_transient_filesystem"; + let logctx = test_setup_log(TEST_NAME); + + // Create an example system with a single sled + let (collection, input, blueprint1) = + example(&logctx.log, TEST_NAME, 1); + + let mut builder = input.into_builder(); + + // Aside: Avoid churning on the quantity of Nexus zones - we're okay + // staying at one. + builder.policy_mut().target_nexus_zone_count = 1; + + // Find whatever pool NTP was using + let pool_to_expunge = blueprint1 + .blueprint_zones + .iter() + .find_map(|(_, zones_config)| { + for zone_config in &zones_config.zones { + if zone_config.zone_type.is_ntp() { + return zone_config.filesystem_pool.clone(); + } + } + None + }) + .expect("No NTP zone pool?"); + + // This is mostly for test stability across "example system" changes: + // Find how many other zones are using this same zpool. + let zones_using_zpool = blueprint1.blueprint_zones.iter().fold( + 0, + |acc, (_, zones_config)| { + let mut zones_using_zpool = 0; + for zone_config in &zones_config.zones { + if let Some(pool) = &zone_config.filesystem_pool { + if pool == &pool_to_expunge { + zones_using_zpool += 1; + continue; + } + } + if let Some(pool) = zone_config.zone_type.durable_zpool() { + if pool == &pool_to_expunge { + zones_using_zpool += 1; + continue; + } + } + } + acc + zones_using_zpool + }, + ); + assert!( + zones_using_zpool > 0, + "We should be expunging at least one zone using this zpool" + ); + + // For that pool, find the physical disk behind it, and mark it + // expunged. + let (_, sled_details) = builder.sleds_mut().iter_mut().next().unwrap(); + let disk = sled_details + .resources + .zpools + .get_mut(&pool_to_expunge.id()) + .unwrap(); + disk.policy = PhysicalDiskPolicy::Expunged; + + let input = builder.build(); + + let blueprint2 = Planner::new_based_on( + logctx.log.clone(), + &blueprint1, + &input, + "test: expunge a disk with a zone on top", + &collection, + ) + .expect("failed to create planner") + .with_rng_seed((TEST_NAME, "bp2")) + .plan() + .expect("failed to plan"); + + let diff = blueprint2.diff_since_blueprint(&blueprint1); + println!("1 -> 2 (expunge a disk):\n{}", diff.display()); + assert_eq!(diff.sleds_added.len(), 0); + assert_eq!(diff.sleds_removed.len(), 0); + assert_eq!(diff.sleds_modified.len(), 1); + + // We should be removing all zones using this zpool + assert_eq!(diff.zones.added.len(), 0); + assert_eq!(diff.zones.removed.len(), 0); + assert_eq!(diff.zones.modified.len(), 1); + + let (_zone_id, modified_zones) = + diff.zones.modified.iter().next().unwrap(); + assert_eq!(modified_zones.zones.len(), zones_using_zpool); + for modified_zone in &modified_zones.zones { + assert_eq!( + modified_zone.zone.disposition(), + BlueprintZoneDisposition::Expunged, + "Should have expunged this zone" + ); + } + + logctx.cleanup_successful(); + } + /// Check that the planner will skip non-provisionable sleds when allocating /// extra Nexus zones #[test] diff --git a/nexus/src/app/background/driver.rs b/nexus/src/app/background/driver.rs index ad547a443c..c93729a335 100644 --- a/nexus/src/app/background/driver.rs +++ b/nexus/src/app/background/driver.rs @@ -54,9 +54,9 @@ struct Task { status: watch::Receiver, /// join handle for the tokio task that's executing this background task tokio_task: tokio::task::JoinHandle<()>, - /// `Notify` used to wake up the tokio task when a caller explicit wants to + /// `Activator` used to wake up the tokio task when a caller explicit wants to /// activate the background task - notify: Arc, + activator: Activator, } impl Driver { @@ -97,17 +97,16 @@ impl Driver { /// /// This function panics if the `name` or `activator` has previously been /// passed to a call to this function. - #[allow(clippy::too_many_arguments)] - pub fn register( + pub fn register( &mut self, - name: String, - description: String, - period: Duration, - imp: Box, - opctx: OpContext, - watchers: Vec>, - activator: &Activator, - ) -> TaskName { + taskdef: TaskDefinition<'_, N, D>, + ) -> TaskName + where + N: ToString, + D: ToString, + { + let name = taskdef.name.to_string(); + // Activation of the background task happens in a separate tokio task. // Set up a channel so that tokio task can report status back to us. let (status_tx, status_rx) = watch::channel(TaskStatus { @@ -118,7 +117,8 @@ impl Driver { // We'll use a `Notify` to wake up that tokio task when an activation is // requested. The caller provides their own Activator, which just // provides a specific Notify for us to use here. - if let Err(previous) = activator.wired_up.compare_exchange( + let activator = taskdef.activator; + if let Err(previous) = activator.0.wired_up.compare_exchange( false, true, Ordering::SeqCst, @@ -131,23 +131,32 @@ impl Driver { previous, name ); } - let notify = Arc::clone(&activator.notify); // Spawn the tokio task that will manage activation of the background // task. - let opctx = opctx.child(BTreeMap::from([( + let opctx = taskdef.opctx.child(BTreeMap::from([( "background_task".to_string(), name.clone(), )])); - let task_exec = - TaskExec::new(period, imp, Arc::clone(¬ify), opctx, status_tx); - let tokio_task = tokio::task::spawn(task_exec.run(watchers)); + let task_exec = TaskExec::new( + taskdef.period, + taskdef.task_impl, + Arc::clone(&activator.0), + opctx, + status_tx, + ); + let tokio_task = tokio::task::spawn(task_exec.run(taskdef.watchers)); // Create an object to track our side of the background task's state. // This just provides the handles we need to read status and wake up the // tokio task. - let task = - Task { description, period, status: status_rx, tokio_task, notify }; + let task = Task { + description: taskdef.description.to_string(), + period: taskdef.period, + status: status_rx, + tokio_task, + activator: activator.clone(), + }; if self.tasks.insert(TaskName(name.clone()), task).is_some() { panic!("started two background tasks called {:?}", name); } @@ -190,7 +199,7 @@ impl Driver { /// If the task is currently running, it will be activated again when it /// finishes. pub(super) fn activate(&self, task: &TaskName) { - self.task_required(task).notify.notify_one(); + self.task_required(task).activator.activate(); } /// Returns the runtime status of the background task @@ -212,6 +221,26 @@ impl Drop for Driver { } } +/// Describes a background task to be registered with [`Driver::register()`] +/// +/// See [`Driver::register()`] for more on how these fields get used. +pub struct TaskDefinition<'a, N: ToString, D: ToString> { + /// identifier for this task + pub name: N, + /// short human-readable summary of this task + pub description: D, + /// driver should activate the task if it hasn't run in this long + pub period: Duration, + /// impl of [`BackgroundTask`] that represents the work of the task + pub task_impl: Box, + /// `OpContext` used for task activations + pub opctx: OpContext, + /// list of watchers that will trigger activation of this task + pub watchers: Vec>, + /// an [`Activator]` that will be wired up to activate this task + pub activator: &'a Activator, +} + /// Activates a background task /// /// See [`crate::app::background`] module-level documentation for more on what @@ -227,18 +256,22 @@ impl Drop for Driver { /// corresponding task has been created and then wired up with just an /// `&Activator` (not a `&mut Activator`). See the [`super::init`] module-level /// documentation for more on why. -pub struct Activator { - pub(super) notify: Arc, +#[derive(Clone)] +pub struct Activator(Arc); + +/// Shared state for an `Activator`. +struct ActivatorInner { + pub(super) notify: Notify, pub(super) wired_up: AtomicBool, } impl Activator { /// Create an activator that is not yet wired up to any background task pub fn new() -> Activator { - Activator { - notify: Arc::new(Notify::new()), + Self(Arc::new(ActivatorInner { + notify: Notify::new(), wired_up: AtomicBool::new(false), - } + })) } /// Activate the background task that this Activator has been wired up to @@ -246,7 +279,18 @@ impl Activator { /// If this Activator has not yet been wired up with [`Driver::register()`], /// then whenever it _is_ wired up, that task will be immediately activated. pub fn activate(&self) { - self.notify.notify_one(); + self.0.notify.notify_one(); + } +} + +impl ActivatorInner { + async fn activated(&self) { + debug_assert!( + self.wired_up.load(Ordering::SeqCst), + "nothing should await activation from an activator that hasn't \ + been wired up" + ); + self.notify.notified().await } } @@ -259,7 +303,7 @@ struct TaskExec { imp: Box, /// used to receive notifications from the Driver that someone has requested /// explicit activation - notify: Arc, + activation: Arc, /// passed through to the background task impl when activated opctx: OpContext, /// used to send current status back to the Driver @@ -272,11 +316,11 @@ impl TaskExec { fn new( period: Duration, imp: Box, - notify: Arc, + activation: Arc, opctx: OpContext, status_tx: watch::Sender, ) -> TaskExec { - TaskExec { period, imp, notify, opctx, status_tx, iteration: 0 } + TaskExec { period, imp, activation, opctx, status_tx, iteration: 0 } } /// Body of the tokio task that manages activation of this background task @@ -296,7 +340,7 @@ impl TaskExec { self.activate(ActivationReason::Timeout).await; }, - _ = self.notify.notified() => { + _ = self.activation.activated() => { self.activate(ActivationReason::Signaled).await; } @@ -390,8 +434,8 @@ impl GenericWatcher for watch::Receiver { mod test { use super::BackgroundTask; use super::Driver; + use crate::app::background::driver::TaskDefinition; use crate::app::background::Activator; - use crate::app::sagas::SagaRequest; use assert_matches::assert_matches; use chrono::Utc; use futures::future::BoxFuture; @@ -403,7 +447,6 @@ mod test { use std::time::Instant; use tokio::sync::mpsc; use tokio::sync::mpsc::error::TryRecvError; - use tokio::sync::mpsc::Sender; use tokio::sync::watch; type ControlPlaneTestContext = @@ -481,35 +524,38 @@ mod test { let mut driver = Driver::new(); assert_eq!(*rx1.borrow(), 0); - let h1 = driver.register( - "t1".to_string(), - "test task".to_string(), - Duration::from_millis(100), - Box::new(t1), - opctx.child(std::collections::BTreeMap::new()), - vec![Box::new(dep_rx1.clone()), Box::new(dep_rx2.clone())], - &act1, - ); + let h1 = driver.register(TaskDefinition { + name: "t1", + description: "test task", + period: Duration::from_millis(100), + task_impl: Box::new(t1), + opctx: opctx.child(std::collections::BTreeMap::new()), + watchers: vec![ + Box::new(dep_rx1.clone()), + Box::new(dep_rx2.clone()), + ], + activator: &act1, + }); - let h2 = driver.register( - "t2".to_string(), - "test task".to_string(), - Duration::from_secs(300), // should never fire in this test - Box::new(t2), - opctx.child(std::collections::BTreeMap::new()), - vec![Box::new(dep_rx1.clone())], - &act2, - ); + let h2 = driver.register(TaskDefinition { + name: "t2", + description: "test task", + period: Duration::from_secs(300), // should never fire in this test + task_impl: Box::new(t2), + opctx: opctx.child(std::collections::BTreeMap::new()), + watchers: vec![Box::new(dep_rx1.clone())], + activator: &act2, + }); - let h3 = driver.register( - "t3".to_string(), - "test task".to_string(), - Duration::from_secs(300), // should never fire in this test - Box::new(t3), + let h3 = driver.register(TaskDefinition { + name: "t3", + description: "test task", + period: Duration::from_secs(300), // should never fire in this test + task_impl: Box::new(t3), opctx, - vec![Box::new(dep_rx1), Box::new(dep_rx2)], - &act3, - ); + watchers: vec![Box::new(dep_rx1), Box::new(dep_rx2)], + activator: &act3, + }); // Wait for four activations of our task. (This is three periods.) That // should take between 300ms and 400ms. Allow extra time for a busy @@ -654,15 +700,15 @@ mod test { let before_wall = Utc::now(); let before_instant = Instant::now(); let act1 = Activator::new(); - let h1 = driver.register( - "t1".to_string(), - "test task".to_string(), - Duration::from_secs(300), // should not elapse during test - Box::new(t1), - opctx.child(std::collections::BTreeMap::new()), - vec![Box::new(dep_rx1.clone())], - &act1, - ); + let h1 = driver.register(TaskDefinition { + name: "t1", + description: "test task", + period: Duration::from_secs(300), // should not elapse during test + task_impl: Box::new(t1), + opctx: opctx.child(std::collections::BTreeMap::new()), + watchers: vec![Box::new(dep_rx1.clone())], + activator: &act1, + }); // Wait to enter the first activation. let which = ready_rx1.recv().await.unwrap(); @@ -760,84 +806,4 @@ mod test { // such a task that would allow us to reliably distinguish between these // two without also spending a lot of wall-clock time on this test. } - - /// Simple BackgroundTask impl that sends a test-only SagaRequest - struct SagaRequestTask { - saga_request: Sender, - } - - impl SagaRequestTask { - fn new(saga_request: Sender) -> SagaRequestTask { - SagaRequestTask { saga_request } - } - } - - impl BackgroundTask for SagaRequestTask { - fn activate<'a>( - &'a mut self, - _: &'a OpContext, - ) -> BoxFuture<'a, serde_json::Value> { - async { - let _ = self.saga_request.send(SagaRequest::TestOnly).await; - serde_json::Value::Null - } - .boxed() - } - } - - #[nexus_test(server = crate::Server)] - async fn test_saga_request_flow(cptestctx: &ControlPlaneTestContext) { - let nexus = &cptestctx.server.server_context().nexus; - let datastore = nexus.datastore(); - let opctx = OpContext::for_tests( - cptestctx.logctx.log.clone(), - datastore.clone(), - ); - - let (saga_request, mut saga_request_recv) = SagaRequest::channel(); - let t1 = SagaRequestTask::new(saga_request); - - let mut driver = Driver::new(); - let (_dep_tx1, dep_rx1) = watch::channel(0); - let act1 = Activator::new(); - - let h1 = driver.register( - "t1".to_string(), - "test saga request flow task".to_string(), - Duration::from_secs(300), // should not fire in this test - Box::new(t1), - opctx.child(std::collections::BTreeMap::new()), - vec![Box::new(dep_rx1.clone())], - &act1, - ); - - assert!(matches!( - saga_request_recv.try_recv(), - Err(mpsc::error::TryRecvError::Empty), - )); - - driver.activate(&h1); - - // wait 1 second for the saga request to arrive - tokio::select! { - _ = tokio::time::sleep(tokio::time::Duration::from_secs(1)) => { - assert!(false); - } - - saga_request = saga_request_recv.recv() => { - match saga_request { - None => { - assert!(false); - } - - Some(saga_request) => { - assert!(matches!( - saga_request, - SagaRequest::TestOnly, - )); - } - } - } - } - } } diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index fd37de344d..3c66d3242b 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -87,6 +87,7 @@ //! It's not foolproof but hopefully these mechanisms will catch the easy //! mistakes. +use super::driver::TaskDefinition; use super::tasks::abandoned_vmm_reaper; use super::tasks::bfd; use super::tasks::blueprint_execution; @@ -112,7 +113,7 @@ use super::tasks::vpc_routes; use super::Activator; use super::Driver; use crate::app::oximeter::PRODUCER_LEASE_DURATION; -use crate::app::sagas::SagaRequest; +use crate::app::saga::StartSaga; use nexus_config::BackgroundTaskConfig; use nexus_config::DnsTasksConfig; use nexus_db_model::DnsGroup; @@ -121,7 +122,6 @@ use nexus_db_queries::db::DataStore; use oximeter::types::ProducerRegistry; use std::collections::BTreeMap; use std::sync::Arc; -use tokio::sync::mpsc::Sender; use tokio::sync::watch; use uuid::Uuid; @@ -253,8 +253,7 @@ impl BackgroundTasksInitializer { rack_id: Uuid, nexus_id: Uuid, resolver: internal_dns::resolver::Resolver, - saga_request: Sender, - v2p_watcher: (watch::Sender<()>, watch::Receiver<()>), + sagas: Arc, producer_registry: ProducerRegistry, ) -> Driver { let mut driver = self.driver; @@ -328,18 +327,18 @@ impl BackgroundTasksInitializer { datastore.clone(), PRODUCER_LEASE_DURATION, ); - driver.register( - String::from("metrics_producer_gc"), - String::from( + + driver.register(TaskDefinition { + name: "metrics_producer_gc", + description: "unregisters Oximeter metrics producers that have not \ renewed their lease", - ), - config.metrics_producer_gc.period_secs, - Box::new(gc), - opctx.child(BTreeMap::new()), - vec![], - task_metrics_producer_gc, - ) + period: config.metrics_producer_gc.period_secs, + task_impl: Box::new(gc), + opctx: opctx.child(BTreeMap::new()), + watchers: vec![], + activator: task_metrics_producer_gc, + }) }; // Background task: External endpoints list watcher @@ -348,95 +347,95 @@ impl BackgroundTasksInitializer { datastore.clone(), self.external_endpoints_tx, ); - driver.register( - String::from("external_endpoints"), - String::from( + driver.register(TaskDefinition { + name: "external_endpoints", + description: "reads config for silos and TLS certificates to determine \ the right set of HTTP endpoints, their HTTP server \ names, and which TLS certificates to use on each one", - ), - config.external_endpoints.period_secs, - Box::new(watcher), - opctx.child(BTreeMap::new()), - vec![], - task_external_endpoints, - ); + period: config.external_endpoints.period_secs, + task_impl: Box::new(watcher), + opctx: opctx.child(BTreeMap::new()), + watchers: vec![], + activator: task_external_endpoints, + }); } - driver.register( - "nat_v4_garbage_collector".to_string(), - String::from( + driver.register(TaskDefinition { + name: "nat_v4_garbage_collector", + description: "prunes soft-deleted IPV4 NAT entries from ipv4_nat_entry \ table based on a predetermined retention policy", - ), - config.nat_cleanup.period_secs, - Box::new(nat_cleanup::Ipv4NatGarbageCollector::new( + period: config.nat_cleanup.period_secs, + task_impl: Box::new(nat_cleanup::Ipv4NatGarbageCollector::new( datastore.clone(), resolver.clone(), )), - opctx.child(BTreeMap::new()), - vec![], - task_nat_cleanup, - ); - - driver.register( - "bfd_manager".to_string(), - String::from( - "Manages bidirectional fowarding detection (BFD) \ + opctx: opctx.child(BTreeMap::new()), + watchers: vec![], + activator: task_nat_cleanup, + }); + + driver.register(TaskDefinition { + name: "bfd_manager", + description: "Manages bidirectional fowarding detection (BFD) \ configuration on rack switches", - ), - config.bfd_manager.period_secs, - Box::new(bfd::BfdManager::new(datastore.clone(), resolver.clone())), - opctx.child(BTreeMap::new()), - vec![], - task_bfd_manager, - ); + period: config.bfd_manager.period_secs, + task_impl: Box::new(bfd::BfdManager::new( + datastore.clone(), + resolver.clone(), + )), + opctx: opctx.child(BTreeMap::new()), + watchers: vec![], + activator: task_bfd_manager, + }); // Background task: phantom disk detection { let detector = phantom_disks::PhantomDiskDetector::new(datastore.clone()); - driver.register( - String::from("phantom_disks"), - String::from("detects and un-deletes phantom disks"), - config.phantom_disks.period_secs, - Box::new(detector), - opctx.child(BTreeMap::new()), - vec![], - task_phantom_disks, - ); + driver.register(TaskDefinition { + name: "phantom_disks", + description: "detects and un-deletes phantom disks", + period: config.phantom_disks.period_secs, + task_impl: Box::new(detector), + opctx: opctx.child(BTreeMap::new()), + watchers: vec![], + activator: task_phantom_disks, + }); }; // Background task: blueprint loader let blueprint_loader = blueprint_load::TargetBlueprintLoader::new(datastore.clone()); let rx_blueprint = blueprint_loader.watcher(); - driver.register( - String::from("blueprint_loader"), - String::from("Loads the current target blueprint from the DB"), - config.blueprints.period_secs_load, - Box::new(blueprint_loader), - opctx.child(BTreeMap::new()), - vec![], - task_blueprint_loader, - ); + driver.register(TaskDefinition { + name: "blueprint_loader", + description: "Loads the current target blueprint from the DB", + period: config.blueprints.period_secs_load, + task_impl: Box::new(blueprint_loader), + opctx: opctx.child(BTreeMap::new()), + watchers: vec![], + activator: task_blueprint_loader, + }); // Background task: blueprint executor let blueprint_executor = blueprint_execution::BlueprintExecutor::new( datastore.clone(), + resolver.clone(), rx_blueprint.clone(), nexus_id.to_string(), ); let rx_blueprint_exec = blueprint_executor.watcher(); - driver.register( - String::from("blueprint_executor"), - String::from("Executes the target blueprint"), - config.blueprints.period_secs_execute, - Box::new(blueprint_executor), - opctx.child(BTreeMap::new()), - vec![Box::new(rx_blueprint.clone())], - task_blueprint_executor, - ); + driver.register(TaskDefinition { + name: "blueprint_executor", + description: "Executes the target blueprint", + period: config.blueprints.period_secs_execute, + task_impl: Box::new(blueprint_executor), + opctx: opctx.child(BTreeMap::new()), + watchers: vec![Box::new(rx_blueprint.clone())], + activator: task_blueprint_executor, + }); // Background task: CockroachDB node ID collector let crdb_node_id_collector = @@ -444,15 +443,15 @@ impl BackgroundTasksInitializer { datastore.clone(), rx_blueprint.clone(), ); - driver.register( - String::from("crdb_node_id_collector"), - String::from("Collects node IDs of running CockroachDB zones"), - config.blueprints.period_secs_collect_crdb_node_ids, - Box::new(crdb_node_id_collector), - opctx.child(BTreeMap::new()), - vec![Box::new(rx_blueprint)], - task_crdb_node_id_collector, - ); + driver.register(TaskDefinition { + name: "crdb_node_id_collector", + description: "Collects node IDs of running CockroachDB zones", + period: config.blueprints.period_secs_collect_crdb_node_ids, + task_impl: Box::new(crdb_node_id_collector), + opctx: opctx.child(BTreeMap::new()), + watchers: vec![Box::new(rx_blueprint)], + activator: task_crdb_node_id_collector, + }); // Background task: inventory collector // @@ -472,97 +471,96 @@ impl BackgroundTasksInitializer { config.inventory.disable, ); let inventory_watcher = collector.watcher(); - driver.register( - String::from("inventory_collection"), - String::from( + driver.register(TaskDefinition { + name: "inventory_collection", + description: "collects hardware and software inventory data from the \ whole system", - ), - config.inventory.period_secs, - Box::new(collector), - opctx.child(BTreeMap::new()), - vec![Box::new(rx_blueprint_exec)], - task_inventory_collection, - ); + period: config.inventory.period_secs, + task_impl: Box::new(collector), + opctx: opctx.child(BTreeMap::new()), + watchers: vec![Box::new(rx_blueprint_exec)], + activator: task_inventory_collection, + }); inventory_watcher }; - driver.register( - "physical_disk_adoption".to_string(), - "ensure new physical disks are automatically marked in-service" - .to_string(), - config.physical_disk_adoption.period_secs, - Box::new(physical_disk_adoption::PhysicalDiskAdoption::new( - datastore.clone(), - inventory_watcher.clone(), - config.physical_disk_adoption.disable, - rack_id, - )), - opctx.child(BTreeMap::new()), - vec![Box::new(inventory_watcher)], - task_physical_disk_adoption, - ); - - driver.register( - "service_zone_nat_tracker".to_string(), - String::from( + driver.register(TaskDefinition { + name: "physical_disk_adoption", + description: + "ensure new physical disks are automatically marked in-service", + period: config.physical_disk_adoption.period_secs, + task_impl: Box::new( + physical_disk_adoption::PhysicalDiskAdoption::new( + datastore.clone(), + inventory_watcher.clone(), + config.physical_disk_adoption.disable, + rack_id, + ), + ), + opctx: opctx.child(BTreeMap::new()), + watchers: vec![Box::new(inventory_watcher)], + activator: task_physical_disk_adoption, + }); + + driver.register(TaskDefinition { + name: "service_zone_nat_tracker", + description: "ensures service zone nat records are recorded in NAT RPW \ table", - ), - config.sync_service_zone_nat.period_secs, - Box::new(ServiceZoneNatTracker::new( + period: config.sync_service_zone_nat.period_secs, + task_impl: Box::new(ServiceZoneNatTracker::new( datastore.clone(), resolver.clone(), )), - opctx.child(BTreeMap::new()), - vec![], - task_service_zone_nat_tracker, - ); - - driver.register( - "switch_port_config_manager".to_string(), - String::from("manages switch port settings for rack switches"), - config.switch_port_settings_manager.period_secs, - Box::new(SwitchPortSettingsManager::new( + opctx: opctx.child(BTreeMap::new()), + watchers: vec![], + activator: task_service_zone_nat_tracker, + }); + + driver.register(TaskDefinition { + name: "switch_port_config_manager", + description: "manages switch port settings for rack switches", + period: config.switch_port_settings_manager.period_secs, + task_impl: Box::new(SwitchPortSettingsManager::new( datastore.clone(), resolver.clone(), )), - opctx.child(BTreeMap::new()), - vec![], - task_switch_port_settings_manager, - ); - - driver.register( - "v2p_manager".to_string(), - String::from("manages opte v2p mappings for vpc networking"), - config.v2p_mapping_propagation.period_secs, - Box::new(V2PManager::new(datastore.clone())), - opctx.child(BTreeMap::new()), - vec![Box::new(v2p_watcher.1)], - task_v2p_manager, - ); + opctx: opctx.child(BTreeMap::new()), + watchers: vec![], + activator: task_switch_port_settings_manager, + }); + + driver.register(TaskDefinition { + name: "v2p_manager", + description: "manages opte v2p mappings for vpc networking", + period: config.v2p_mapping_propagation.period_secs, + task_impl: Box::new(V2PManager::new(datastore.clone())), + opctx: opctx.child(BTreeMap::new()), + watchers: vec![], + activator: task_v2p_manager, + }); // Background task: detect if a region needs replacement and begin the // process { let detector = region_replacement::RegionReplacementDetector::new( datastore.clone(), - saga_request.clone(), + sagas.clone(), ); - driver.register( - String::from("region_replacement"), - String::from( + driver.register(TaskDefinition { + name: "region_replacement", + description: "detects if a region requires replacing and begins the \ process", - ), - config.region_replacement.period_secs, - Box::new(detector), - opctx.child(BTreeMap::new()), - vec![], - task_region_replacement, - ); + period: config.region_replacement.period_secs, + task_impl: Box::new(detector), + opctx: opctx.child(BTreeMap::new()), + watchers: vec![], + activator: task_region_replacement, + }); }; // Background task: drive region replacements forward to completion @@ -570,18 +568,18 @@ impl BackgroundTasksInitializer { let detector = region_replacement_driver::RegionReplacementDriver::new( datastore.clone(), - saga_request.clone(), + sagas.clone(), ); - driver.register( - String::from("region_replacement_driver"), - String::from("drive region replacements forward to completion"), - config.region_replacement_driver.period_secs, - Box::new(detector), - opctx.child(BTreeMap::new()), - vec![], - task_region_replacement_driver, - ); + driver.register(TaskDefinition { + name: "region_replacement_driver", + description: "drive region replacements forward to completion", + period: config.region_replacement_driver.period_secs, + task_impl: Box::new(detector), + opctx: opctx.child(BTreeMap::new()), + watchers: vec![], + activator: task_region_replacement_driver, + }); }; { @@ -590,62 +588,64 @@ impl BackgroundTasksInitializer { resolver.clone(), producer_registry, instance_watcher::WatcherIdentity { nexus_id, rack_id }, - v2p_watcher.0, + task_v2p_manager.clone(), ); - driver.register( - "instance_watcher".to_string(), - "periodically checks instance states".to_string(), - config.instance_watcher.period_secs, - Box::new(watcher), - opctx.child(BTreeMap::new()), - vec![], - task_instance_watcher, - ) + driver.register(TaskDefinition { + name: "instance_watcher", + description: "periodically checks instance states", + period: config.instance_watcher.period_secs, + task_impl: Box::new(watcher), + opctx: opctx.child(BTreeMap::new()), + watchers: vec![], + activator: task_instance_watcher, + }) }; // Background task: service firewall rule propagation - driver.register( - String::from("service_firewall_rule_propagation"), - String::from( + driver.register(TaskDefinition { + name: "service_firewall_rule_propagation", + description: "propagates VPC firewall rules for Omicron services with \ external network connectivity", + period: config.service_firewall_propagation.period_secs, + task_impl: Box::new( + service_firewall_rules::ServiceRulePropagator::new( + datastore.clone(), + ), ), - config.service_firewall_propagation.period_secs, - Box::new(service_firewall_rules::ServiceRulePropagator::new( - datastore.clone(), - )), - opctx.child(BTreeMap::new()), - vec![], - task_service_firewall_propagation, - ); + opctx: opctx.child(BTreeMap::new()), + watchers: vec![], + activator: task_service_firewall_propagation, + }); // Background task: OPTE port route propagation { let watcher = vpc_routes::VpcRouteManager::new(datastore.clone()); - driver.register( - "vpc_route_manager".to_string(), - "propagates updated VPC routes to all OPTE ports".into(), - config.switch_port_settings_manager.period_secs, - Box::new(watcher), - opctx.child(BTreeMap::new()), - vec![], - task_vpc_route_manager, - ) + driver.register(TaskDefinition { + name: "vpc_route_manager", + description: "propagates updated VPC routes to all OPTE ports", + period: config.switch_port_settings_manager.period_secs, + task_impl: Box::new(watcher), + opctx: opctx.child(BTreeMap::new()), + watchers: vec![], + activator: task_vpc_route_manager, + }) }; // Background task: abandoned VMM reaping - driver.register( - String::from("abandoned_vmm_reaper"), - String::from( + driver.register(TaskDefinition { + name: "abandoned_vmm_reaper", + description: "deletes sled reservations for VMMs that have been abandoned \ by their instances", - ), - config.abandoned_vmm_reaper.period_secs, - Box::new(abandoned_vmm_reaper::AbandonedVmmReaper::new(datastore)), - opctx.child(BTreeMap::new()), - vec![], - task_abandoned_vmm_reaper, - ); + period: config.abandoned_vmm_reaper.period_secs, + task_impl: Box::new(abandoned_vmm_reaper::AbandonedVmmReaper::new( + datastore, + )), + opctx: opctx.child(BTreeMap::new()), + watchers: vec![], + activator: task_abandoned_vmm_reaper, + }); driver } @@ -673,32 +673,35 @@ fn init_dns( dns_config::DnsConfigWatcher::new(Arc::clone(&datastore), dns_group); let dns_config_watcher = dns_config.watcher(); let task_name_config = format!("dns_config_{}", dns_group); - driver.register( - task_name_config.clone(), - format!("watches {} DNS data stored in CockroachDB", dns_group), - config.period_secs_config, - Box::new(dns_config), - opctx.child(metadata.clone()), - vec![], - task_config, - ); + driver.register(TaskDefinition { + name: task_name_config.clone(), + description: format!( + "watches {} DNS data stored in CockroachDB", + dns_group + ), + period: config.period_secs_config, + task_impl: Box::new(dns_config), + opctx: opctx.child(metadata.clone()), + watchers: vec![], + activator: task_config, + }); // Background task: DNS server list watcher let dns_servers = dns_servers::DnsServersWatcher::new(dns_group, resolver); let dns_servers_watcher = dns_servers.watcher(); let task_name_servers = format!("dns_servers_{}", dns_group); - driver.register( - task_name_servers.clone(), - format!( + driver.register(TaskDefinition { + name: task_name_servers.clone(), + description: format!( "watches list of {} DNS servers stored in internal DNS", dns_group, ), - config.period_secs_servers, - Box::new(dns_servers), - opctx.child(metadata.clone()), - vec![], - task_servers, - ); + period: config.period_secs_servers, + task_impl: Box::new(dns_servers), + opctx: opctx.child(metadata.clone()), + watchers: vec![], + activator: task_servers, + }); // Background task: DNS propagation let dns_propagate = dns_propagation::DnsPropagator::new( @@ -706,25 +709,30 @@ fn init_dns( dns_servers_watcher.clone(), config.max_concurrent_server_updates, ); - driver.register( - format!("dns_propagation_{}", dns_group), - format!( + driver.register(TaskDefinition { + name: format!("dns_propagation_{}", dns_group), + description: format!( "propagates latest {} DNS configuration (from {:?} background \ task) to the latest list of DNS servers (from {:?} background \ task)", dns_group, task_name_config, task_name_servers, ), - config.period_secs_propagation, - Box::new(dns_propagate), - opctx.child(metadata), - vec![Box::new(dns_config_watcher), Box::new(dns_servers_watcher)], - task_propagation, - ); + period: config.period_secs_propagation, + task_impl: Box::new(dns_propagate), + opctx: opctx.child(metadata), + watchers: vec![ + Box::new(dns_config_watcher), + Box::new(dns_servers_watcher), + ], + activator: task_propagation, + }); } #[cfg(test)] pub mod test { + use crate::app::saga::StartSaga; use dropshot::HandlerTaskMode; + use futures::FutureExt; use nexus_db_model::DnsGroup; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::datastore::DnsVersionUpdateBuilder; @@ -733,9 +741,39 @@ pub mod test { use nexus_types::internal_api::params as nexus_params; use omicron_test_utils::dev::poll; use std::net::SocketAddr; + use std::sync::atomic::AtomicU64; + use std::sync::atomic::Ordering; use std::time::Duration; use tempfile::TempDir; + /// Used by various tests of tasks that kick off sagas + pub(crate) struct NoopStartSaga { + count: AtomicU64, + } + + impl NoopStartSaga { + pub(crate) fn new() -> Self { + Self { count: AtomicU64::new(0) } + } + + pub(crate) fn count_reset(&self) -> u64 { + self.count.swap(0, Ordering::SeqCst) + } + } + + impl StartSaga for NoopStartSaga { + fn saga_start( + &self, + _: steno::SagaDag, + ) -> futures::prelude::future::BoxFuture< + '_, + Result<(), omicron_common::api::external::Error>, + > { + let _ = self.count.fetch_add(1, Ordering::SeqCst); + async { Ok(()) }.boxed() + } + } + type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; diff --git a/nexus/src/app/background/tasks/blueprint_execution.rs b/nexus/src/app/background/tasks/blueprint_execution.rs index 253a89a18d..16bf872f2a 100644 --- a/nexus/src/app/background/tasks/blueprint_execution.rs +++ b/nexus/src/app/background/tasks/blueprint_execution.rs @@ -7,6 +7,7 @@ use crate::app::background::BackgroundTask; use futures::future::BoxFuture; use futures::FutureExt; +use internal_dns::resolver::Resolver; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; use nexus_types::deployment::{Blueprint, BlueprintTarget}; @@ -18,6 +19,7 @@ use tokio::sync::watch; /// the state of the system based on the `Blueprint`. pub struct BlueprintExecutor { datastore: Arc, + resolver: Resolver, rx_blueprint: watch::Receiver>>, nexus_label: String, tx: watch::Sender, @@ -26,13 +28,14 @@ pub struct BlueprintExecutor { impl BlueprintExecutor { pub fn new( datastore: Arc, + resolver: Resolver, rx_blueprint: watch::Receiver< Option>, >, nexus_label: String, ) -> BlueprintExecutor { let (tx, _) = watch::channel(0); - BlueprintExecutor { datastore, rx_blueprint, nexus_label, tx } + BlueprintExecutor { datastore, resolver, rx_blueprint, nexus_label, tx } } pub fn watcher(&self) -> watch::Receiver { @@ -76,6 +79,7 @@ impl BlueprintExecutor { let result = nexus_reconfigurator_execution::realize_blueprint( opctx, &self.datastore, + &self.resolver, blueprint, &self.nexus_label, ) @@ -129,9 +133,11 @@ mod test { use nexus_types::external_api::views::SledState; use nexus_types::inventory::OmicronZoneDataset; use omicron_common::api::external::Generation; + use omicron_common::zpool_name::ZpoolName; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledUuid; + use omicron_uuid_kinds::ZpoolUuid; use serde::Deserialize; use serde_json::json; use std::collections::BTreeMap; @@ -184,6 +190,7 @@ mod test { // Set up the test. let nexus = &cptestctx.server.server_context().nexus; let datastore = nexus.datastore(); + let resolver = nexus.resolver(); let opctx = OpContext::for_background( cptestctx.logctx.log.clone(), nexus.authz.clone(), @@ -230,6 +237,7 @@ mod test { let (blueprint_tx, blueprint_rx) = watch::channel(None); let mut task = BlueprintExecutor::new( datastore.clone(), + resolver.clone(), blueprint_rx, String::from("test-suite"), ); @@ -260,16 +268,18 @@ mod test { fn make_zones( disposition: BlueprintZoneDisposition, ) -> BlueprintZonesConfig { + let pool_id = ZpoolUuid::new_v4(); BlueprintZonesConfig { generation: Generation::new(), zones: vec![BlueprintZoneConfig { disposition, id: OmicronZoneUuid::new_v4(), underlay_address: "::1".parse().unwrap(), + filesystem_pool: Some(ZpoolName::new_external(pool_id)), zone_type: BlueprintZoneType::InternalDns( blueprint_zone_type::InternalDns { dataset: OmicronZoneDataset { - pool_name: format!("oxp_{}", Uuid::new_v4()) + pool_name: format!("oxp_{}", pool_id) .parse() .unwrap(), }, diff --git a/nexus/src/app/background/tasks/crdb_node_id_collector.rs b/nexus/src/app/background/tasks/crdb_node_id_collector.rs index 2a0e1c6d3d..d33dfe2634 100644 --- a/nexus/src/app/background/tasks/crdb_node_id_collector.rs +++ b/nexus/src/app/background/tasks/crdb_node_id_collector.rs @@ -185,7 +185,7 @@ async fn ensure_node_id_known( let admin_client = cockroach_admin_client::Client::new(&admin_url, opctx.log.clone()); let node = admin_client - .node_id() + .local_node_id() .await .with_context(|| { format!("failed to fetch node ID for zone {zone_id} at {admin_url}") @@ -241,8 +241,10 @@ mod tests { use nexus_test_utils::db::test_setup_database; use nexus_types::deployment::BlueprintZoneConfig; use nexus_types::deployment::BlueprintZoneDisposition; + use omicron_common::zpool_name::ZpoolName; use omicron_test_utils::dev; use omicron_uuid_kinds::SledUuid; + use omicron_uuid_kinds::ZpoolUuid; use std::collections::BTreeMap; use std::iter; use std::net::SocketAddr; @@ -265,16 +267,18 @@ mod tests { .get_mut(&sled_id) .expect("found entry for test sled"); + let zpool_id = ZpoolUuid::new_v4(); let make_crdb_zone_config = |disposition, id, addr: SocketAddrV6| BlueprintZoneConfig { disposition, id, underlay_address: *addr.ip(), + filesystem_pool: Some(ZpoolName::new_external(zpool_id)), zone_type: BlueprintZoneType::CockroachDb( blueprint_zone_type::CockroachDb { address: addr, dataset: nexus_types::inventory::OmicronZoneDataset { - pool_name: format!("oxp_{}", Uuid::new_v4()) + pool_name: format!("oxp_{}", zpool_id) .parse() .unwrap(), }, @@ -312,6 +316,7 @@ mod tests { disposition: BlueprintZoneDisposition::InService, id: OmicronZoneUuid::new_v4(), underlay_address: "::1".parse().unwrap(), + filesystem_pool: Some(ZpoolName::new_external(ZpoolUuid::new_v4())), zone_type: BlueprintZoneType::CruciblePantry( blueprint_zone_type::CruciblePantry { address: "[::1]:0".parse().unwrap(), @@ -497,7 +502,7 @@ mod tests { // Node 1 succeeds. admin1.expect(Expectation::matching(any()).times(1).respond_with( - json_encoded(cockroach_admin_client::types::NodeId { + json_encoded(cockroach_admin_client::types::LocalNodeId { zone_id: crdb_zone_id1, node_id: crdb_node_id1.to_string(), }), @@ -510,7 +515,7 @@ mod tests { ); // Node 3 succeeds, but with an unexpected zone_id. admin3.expect(Expectation::matching(any()).times(1).respond_with( - json_encoded(cockroach_admin_client::types::NodeId { + json_encoded(cockroach_admin_client::types::LocalNodeId { zone_id: crdb_zone_id4, node_id: crdb_node_id3.to_string(), }), diff --git a/nexus/src/app/background/tasks/instance_watcher.rs b/nexus/src/app/background/tasks/instance_watcher.rs index a6e579eb8a..ce202a2a08 100644 --- a/nexus/src/app/background/tasks/instance_watcher.rs +++ b/nexus/src/app/background/tasks/instance_watcher.rs @@ -4,6 +4,7 @@ //! Background task for pulling instance state from sled-agents. +use crate::app::background::Activator; use crate::app::background::BackgroundTask; use futures::{future::BoxFuture, FutureExt}; use http::StatusCode; @@ -37,7 +38,7 @@ pub(crate) struct InstanceWatcher { resolver: internal_dns::resolver::Resolver, metrics: Arc>, id: WatcherIdentity, - v2p_notification_tx: tokio::sync::watch::Sender<()>, + v2p_manager: Activator, } const MAX_SLED_AGENTS: NonZeroU32 = unsafe { @@ -51,13 +52,13 @@ impl InstanceWatcher { resolver: internal_dns::resolver::Resolver, producer_registry: &ProducerRegistry, id: WatcherIdentity, - v2p_notification_tx: tokio::sync::watch::Sender<()>, + v2p_manager: Activator, ) -> Self { let metrics = Arc::new(Mutex::new(metrics::Metrics::default())); producer_registry .register_producer(metrics::Producer(metrics.clone())) .unwrap(); - Self { datastore, resolver, metrics, id, v2p_notification_tx } + Self { datastore, resolver, metrics, id, v2p_manager } } fn check_instance( @@ -77,7 +78,7 @@ impl InstanceWatcher { .collect(), ); let client = client.clone(); - let v2p_notification_tx = self.v2p_notification_tx.clone(); + let v2p_manager = self.v2p_manager.clone(); async move { slog::trace!(opctx.log, "checking on instance..."); @@ -162,7 +163,7 @@ impl InstanceWatcher { &opctx.log, &InstanceUuid::from_untyped_uuid(target.instance_id), &new_runtime_state, - v2p_notification_tx, + &v2p_manager, ) .await .map_err(|e| { diff --git a/nexus/src/app/background/tasks/region_replacement.rs b/nexus/src/app/background/tasks/region_replacement.rs index 9e14c294ba..f852f21734 100644 --- a/nexus/src/app/background/tasks/region_replacement.rs +++ b/nexus/src/app/background/tasks/region_replacement.rs @@ -12,7 +12,10 @@ use crate::app::authn; use crate::app::background::BackgroundTask; +use crate::app::saga::StartSaga; use crate::app::sagas; +use crate::app::sagas::region_replacement_start::SagaRegionReplacementStart; +use crate::app::sagas::NexusSaga; use crate::app::RegionAllocationStrategy; use futures::future::BoxFuture; use futures::FutureExt; @@ -23,39 +26,31 @@ use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::TypedUuid; use serde_json::json; use std::sync::Arc; -use tokio::sync::mpsc::error::SendError; -use tokio::sync::mpsc::Sender; pub struct RegionReplacementDetector { datastore: Arc, - saga_request: Sender, + sagas: Arc, } impl RegionReplacementDetector { - pub fn new( - datastore: Arc, - saga_request: Sender, - ) -> Self { - RegionReplacementDetector { datastore, saga_request } + pub fn new(datastore: Arc, sagas: Arc) -> Self { + RegionReplacementDetector { datastore, sagas } } async fn send_start_request( &self, serialized_authn: authn::saga::Serialized, request: RegionReplacement, - ) -> Result<(), SendError> { - let saga_request = sagas::SagaRequest::RegionReplacementStart { - params: sagas::region_replacement_start::Params { - serialized_authn, - request, - allocation_strategy: - RegionAllocationStrategy::RandomWithDistinctSleds { - seed: None, - }, - }, + ) -> Result<(), omicron_common::api::external::Error> { + let params = sagas::region_replacement_start::Params { + serialized_authn, + request, + allocation_strategy: + RegionAllocationStrategy::RandomWithDistinctSleds { seed: None }, }; - self.saga_request.send(saga_request).await + let saga_dag = SagaRegionReplacementStart::prepare(¶ms)?; + self.sagas.saga_start(saga_dag).await } } @@ -201,9 +196,9 @@ impl BackgroundTask for RegionReplacementDetector { #[cfg(test)] mod test { use super::*; + use crate::app::background::init::test::NoopStartSaga; use nexus_db_model::RegionReplacement; use nexus_test_utils_macros::nexus_test; - use tokio::sync::mpsc; use uuid::Uuid; type ControlPlaneTestContext = @@ -220,9 +215,9 @@ mod test { datastore.clone(), ); - let (saga_request_tx, mut saga_request_rx) = mpsc::channel(1); + let starter = Arc::new(NoopStartSaga::new()); let mut task = - RegionReplacementDetector::new(datastore.clone(), saga_request_tx); + RegionReplacementDetector::new(datastore.clone(), starter.clone()); // Noop test let result = task.activate(&opctx).await; @@ -253,6 +248,6 @@ mod test { }) ); - saga_request_rx.try_recv().unwrap(); + assert_eq!(starter.count_reset(), 1); } } diff --git a/nexus/src/app/background/tasks/region_replacement_driver.rs b/nexus/src/app/background/tasks/region_replacement_driver.rs index 06155ffa24..9dd8b6055f 100644 --- a/nexus/src/app/background/tasks/region_replacement_driver.rs +++ b/nexus/src/app/background/tasks/region_replacement_driver.rs @@ -20,7 +20,11 @@ use crate::app::authn; use crate::app::background::BackgroundTask; +use crate::app::saga::StartSaga; use crate::app::sagas; +use crate::app::sagas::region_replacement_drive::SagaRegionReplacementDrive; +use crate::app::sagas::region_replacement_finish::SagaRegionReplacementFinish; +use crate::app::sagas::NexusSaga; use futures::future::BoxFuture; use futures::FutureExt; use nexus_db_queries::context::OpContext; @@ -28,19 +32,15 @@ use nexus_db_queries::db::DataStore; use nexus_types::internal_api::background::RegionReplacementDriverStatus; use serde_json::json; use std::sync::Arc; -use tokio::sync::mpsc::Sender; pub struct RegionReplacementDriver { datastore: Arc, - saga_request: Sender, + sagas: Arc, } impl RegionReplacementDriver { - pub fn new( - datastore: Arc, - saga_request: Sender, - ) -> Self { - RegionReplacementDriver { datastore, saga_request } + pub fn new(datastore: Arc, sagas: Arc) -> Self { + RegionReplacementDriver { datastore, sagas } } /// Drive running region replacements forward @@ -119,36 +119,32 @@ impl RegionReplacementDriver { // (or determine if it is complete). let request_id = request.id; - - let result = self - .saga_request - .send(sagas::SagaRequest::RegionReplacementDrive { - params: sagas::region_replacement_drive::Params { - serialized_authn: - authn::saga::Serialized::for_opctx(opctx), - request, - }, - }) - .await; - + let params = sagas::region_replacement_drive::Params { + serialized_authn: authn::saga::Serialized::for_opctx(opctx), + request, + }; + let result = async { + let saga_dag = + SagaRegionReplacementDrive::prepare(¶ms)?; + self.sagas.saga_start(saga_dag).await + } + .await; match result { - Ok(()) => { - let s = format!("{request_id}: drive invoked ok"); - + Ok(_) => { + let s = format!("{request_id}: drive saga started ok"); info!(&log, "{s}"); status.drive_invoked_ok.push(s); } - Err(e) => { let s = format!( - "sending region replacement drive request for \ + "starting region replacement drive saga for \ {request_id} failed: {e}", ); error!(&log, "{s}"); status.errors.push(s); } - }; + } } } } @@ -193,22 +189,19 @@ impl RegionReplacementDriver { }; let request_id = request.id; - - let result = - self.saga_request - .send(sagas::SagaRequest::RegionReplacementFinish { - params: sagas::region_replacement_finish::Params { - serialized_authn: - authn::saga::Serialized::for_opctx(opctx), - region_volume_id: old_region_volume_id, - request, - }, - }) - .await; - + let params = sagas::region_replacement_finish::Params { + serialized_authn: authn::saga::Serialized::for_opctx(opctx), + region_volume_id: old_region_volume_id, + request, + }; + let result = async { + let saga_dag = SagaRegionReplacementFinish::prepare(¶ms)?; + self.sagas.saga_start(saga_dag).await + } + .await; match result { - Ok(()) => { - let s = format!("{request_id}: finish invoked ok"); + Ok(_) => { + let s = format!("{request_id}: finish saga started ok"); info!(&log, "{s}"); status.finish_invoked_ok.push(s); @@ -216,14 +209,14 @@ impl RegionReplacementDriver { Err(e) => { let s = format!( - "sending region replacement finish request for \ + "starting region replacement finish saga for \ {request_id} failed: {e}" ); error!(&log, "{s}"); status.errors.push(s); } - }; + } } } } @@ -253,6 +246,7 @@ impl BackgroundTask for RegionReplacementDriver { #[cfg(test)] mod test { use super::*; + use crate::app::background::init::test::NoopStartSaga; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use nexus_db_model::Region; @@ -268,7 +262,6 @@ mod test { use omicron_uuid_kinds::UpstairsKind; use omicron_uuid_kinds::UpstairsRepairKind; use omicron_uuid_kinds::UpstairsSessionKind; - use tokio::sync::mpsc; use uuid::Uuid; type ControlPlaneTestContext = @@ -285,9 +278,9 @@ mod test { datastore.clone(), ); - let (saga_request_tx, mut saga_request_rx) = mpsc::channel(1); + let starter = Arc::new(NoopStartSaga::new()); let mut task = - RegionReplacementDriver::new(datastore.clone(), saga_request_tx); + RegionReplacementDriver::new(datastore.clone(), starter.clone()); // Noop test let result = task.activate(&opctx).await; @@ -320,17 +313,12 @@ mod test { assert_eq!( result.drive_invoked_ok, - vec![format!("{request_id}: drive invoked ok")] + vec![format!("{request_id}: drive saga started ok")] ); assert!(result.finish_invoked_ok.is_empty()); assert!(result.errors.is_empty()); - let request = saga_request_rx.try_recv().unwrap(); - - assert!(matches!( - request, - sagas::SagaRequest::RegionReplacementDrive { .. } - )); + assert_eq!(starter.count_reset(), 1); } #[nexus_test(server = crate::Server)] @@ -344,9 +332,9 @@ mod test { datastore.clone(), ); - let (saga_request_tx, mut saga_request_rx) = mpsc::channel(1); + let starter = Arc::new(NoopStartSaga::new()); let mut task = - RegionReplacementDriver::new(datastore.clone(), saga_request_tx); + RegionReplacementDriver::new(datastore.clone(), starter.clone()); // Noop test let result = task.activate(&opctx).await; @@ -421,16 +409,11 @@ mod test { assert!(result.drive_invoked_ok.is_empty()); assert_eq!( result.finish_invoked_ok, - vec![format!("{request_id}: finish invoked ok")] + vec![format!("{request_id}: finish saga started ok")] ); assert!(result.errors.is_empty()); - let request = saga_request_rx.try_recv().unwrap(); - - assert!(matches!( - request, - sagas::SagaRequest::RegionReplacementFinish { .. } - )); + assert_eq!(starter.count_reset(), 1); } #[nexus_test(server = crate::Server)] @@ -444,9 +427,9 @@ mod test { datastore.clone(), ); - let (saga_request_tx, mut saga_request_rx) = mpsc::channel(1); + let starter = Arc::new(NoopStartSaga::new()); let mut task = - RegionReplacementDriver::new(datastore.clone(), saga_request_tx); + RegionReplacementDriver::new(datastore.clone(), starter.clone()); // Noop test let result = task.activate(&opctx).await; @@ -519,17 +502,12 @@ mod test { assert_eq!( result.drive_invoked_ok, - vec![format!("{request_id}: drive invoked ok")] + vec![format!("{request_id}: drive saga started ok")] ); assert!(result.finish_invoked_ok.is_empty()); assert!(result.errors.is_empty()); - let saga_request = saga_request_rx.try_recv().unwrap(); - - assert!(matches!( - saga_request, - sagas::SagaRequest::RegionReplacementDrive { .. } - )); + assert_eq!(starter.count_reset(), 1); // Now, pretend that an Upstairs sent a notification that it // successfully finished a repair @@ -580,12 +558,7 @@ mod test { ); } - let saga_request = saga_request_rx.try_recv().unwrap(); - - assert!(matches!( - saga_request, - sagas::SagaRequest::RegionReplacementFinish { .. } - )); + assert_eq!(starter.count_reset(), 1); } #[nexus_test(server = crate::Server)] @@ -599,9 +572,9 @@ mod test { datastore.clone(), ); - let (saga_request_tx, mut saga_request_rx) = mpsc::channel(1); + let starter = Arc::new(NoopStartSaga::new()); let mut task = - RegionReplacementDriver::new(datastore.clone(), saga_request_tx); + RegionReplacementDriver::new(datastore.clone(), starter.clone()); // Noop test let result = task.activate(&opctx).await; @@ -673,17 +646,12 @@ mod test { assert_eq!( result.drive_invoked_ok, - vec![format!("{request_id}: drive invoked ok")] + vec![format!("{request_id}: drive saga started ok")] ); assert!(result.finish_invoked_ok.is_empty()); assert!(result.errors.is_empty()); - let saga_request = saga_request_rx.try_recv().unwrap(); - - assert!(matches!( - saga_request, - sagas::SagaRequest::RegionReplacementDrive { .. } - )); + assert_eq!(starter.count_reset(), 1); // Now, pretend that an Upstairs sent a notification that it failed to // finish a repair @@ -721,16 +689,11 @@ mod test { assert_eq!( result.drive_invoked_ok, - vec![format!("{request_id}: drive invoked ok")] + vec![format!("{request_id}: drive saga started ok")] ); assert!(result.finish_invoked_ok.is_empty()); assert!(result.errors.is_empty()); - let saga_request = saga_request_rx.try_recv().unwrap(); - - assert!(matches!( - saga_request, - sagas::SagaRequest::RegionReplacementDrive { .. } - )); + assert_eq!(starter.count_reset(), 1); } } diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index a61cceda8b..a41fa0bd4e 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -1546,13 +1546,13 @@ impl super::Nexus { ) -> Result<(), Error> { notify_instance_updated( &self.datastore(), - &self.resolver().await, + self.resolver(), &self.opctx_alloc, opctx, &self.log, instance_id, new_runtime_state, - self.v2p_notification_tx.clone(), + &self.background_tasks.task_v2p_manager, ) .await?; self.vpc_needed_notify_sleds(); @@ -2005,7 +2005,7 @@ pub(crate) async fn notify_instance_updated( log: &slog::Logger, instance_id: &InstanceUuid, new_runtime_state: &nexus::SledInstanceState, - v2p_notification_tx: tokio::sync::watch::Sender<()>, + v2p_manager: &crate::app::background::Activator, ) -> Result, Error> { let propolis_id = new_runtime_state.propolis_id; @@ -2045,7 +2045,7 @@ pub(crate) async fn notify_instance_updated( &authz_instance, db_instance.runtime(), &new_runtime_state.instance_state, - v2p_notification_tx.clone(), + v2p_manager, ) .await?; diff --git a/nexus/src/app/instance_network.rs b/nexus/src/app/instance_network.rs index c2d46c5499..5f5274dea2 100644 --- a/nexus/src/app/instance_network.rs +++ b/nexus/src/app/instance_network.rs @@ -4,6 +4,7 @@ //! Routines that manage instance-related networking state. +use crate::app::background; use crate::app::switch_port; use ipnetwork::IpNetwork; use nexus_db_model::ExternalIp; @@ -73,7 +74,7 @@ impl Nexus { instance_ensure_dpd_config( &self.db_datastore, &self.log, - &self.resolver().await, + self.resolver(), opctx, &self.opctx_alloc, instance_id, @@ -134,11 +135,10 @@ impl Nexus { opctx: &OpContext, authz_instance: &authz::Instance, ) -> Result<(), Error> { - let resolver = self.resolver().await; instance_delete_dpd_config( &self.db_datastore, &self.log, - &resolver, + self.resolver(), opctx, &self.opctx_alloc, authz_instance, @@ -177,7 +177,7 @@ impl Nexus { ) -> Result<(), Error> { delete_dpd_config_by_entry( &self.db_datastore, - &self.resolver().await, + self.resolver(), &self.log, opctx, &self.opctx_alloc, @@ -198,7 +198,7 @@ impl Nexus { probe_delete_dpd_config( &self.db_datastore, &self.log, - &self.resolver().await, + self.resolver(), opctx, &self.opctx_alloc, probe_id, @@ -267,7 +267,7 @@ pub(crate) async fn ensure_updated_instance_network_config( authz_instance: &authz::Instance, prev_instance_state: &db::model::InstanceRuntimeState, new_instance_state: &nexus::InstanceRuntimeState, - v2p_notification_tx: tokio::sync::watch::Sender<()>, + v2p_manager: &background::Activator, ) -> Result<(), Error> { let instance_id = InstanceUuid::from_untyped_uuid(authz_instance.id()); @@ -298,7 +298,7 @@ pub(crate) async fn ensure_updated_instance_network_config( opctx, opctx_alloc, authz_instance, - v2p_notification_tx, + v2p_manager, ) .await?; return Ok(()); @@ -379,13 +379,7 @@ pub(crate) async fn ensure_updated_instance_network_config( Err(e) => return Err(e), }; - if let Err(e) = v2p_notification_tx.send(()) { - error!( - log, - "error notifying background task of v2p change"; - "error" => ?e - ) - }; + v2p_manager.activate(); let (.., sled) = LookupPath::new(opctx, datastore).sled_id(new_sled_id).fetch().await?; @@ -703,15 +697,9 @@ async fn clear_instance_networking_state( opctx: &OpContext, opctx_alloc: &OpContext, authz_instance: &authz::Instance, - v2p_notification_tx: tokio::sync::watch::Sender<()>, + v2p_manager: &background::Activator, ) -> Result<(), Error> { - if let Err(e) = v2p_notification_tx.send(()) { - error!( - log, - "error notifying background task of v2p change"; - "error" => ?e - ) - }; + v2p_manager.activate(); instance_delete_dpd_config( datastore, diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index b6874fa417..cee62f1107 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -7,7 +7,6 @@ use self::external_endpoints::NexusCertResolver; use self::saga::SagaExecutor; use crate::app::oximeter::LazyTimeseriesClient; -use crate::app::sagas::SagaRequest; use crate::populate::populate_start; use crate::populate::PopulateArgs; use crate::populate::PopulateStatus; @@ -134,7 +133,7 @@ pub struct Nexus { authz: Arc, /// saga execution coordinator - sagas: SagaExecutor, + sagas: Arc, /// Task representing completion of recovered Sagas recovery_task: std::sync::Mutex>, @@ -206,9 +205,6 @@ pub struct Nexus { /// Default Crucible region allocation strategy default_region_allocation_strategy: RegionAllocationStrategy, - - /// Channel for notifying background task of change to opte v2p state - v2p_notification_tx: tokio::sync::watch::Sender<()>, } impl Nexus { @@ -252,10 +248,10 @@ impl Nexus { sec_store, )); - let sagas = SagaExecutor::new( + let sagas = Arc::new(SagaExecutor::new( Arc::clone(&sec_client), log.new(o!("component" => "SagaExecutor")), - ); + )); let client_state = dpd_client::ClientState { tag: String::from("nexus"), @@ -405,10 +401,6 @@ impl Nexus { Arc::clone(&db_datastore) as Arc, ); - let v2p_watcher_channel = tokio::sync::watch::channel(()); - - let (saga_request, mut saga_request_recv) = SagaRequest::channel(); - let (background_tasks_initializer, background_tasks) = background::BackgroundTasksInitializer::new(); @@ -465,7 +457,6 @@ impl Nexus { .pkg .default_region_allocation_strategy .clone(), - v2p_notification_tx: v2p_watcher_channel.0.clone(), }; // TODO-cleanup all the extra Arcs here seems wrong @@ -523,42 +514,15 @@ impl Nexus { rack_id, task_config.deployment.id, resolver, - saga_request, - v2p_watcher_channel.clone(), + task_nexus.sagas.clone(), task_registry, ); if let Err(_) = task_nexus.background_tasks_driver.set(driver) { - panic!( - "concurrent initialization of \ - background_tasks_driver?!" - ) + panic!("multiple initialization of background_tasks_driver"); } }); - // Spawn a task to receive SagaRequests from RPWs, and execute them - { - let nexus = nexus.clone(); - tokio::spawn(async move { - loop { - match saga_request_recv.recv().await { - None => { - // If this channel is closed, then RPWs will not be - // able to request that sagas be run. This will - // likely only occur when Nexus itself is shutting - // down, so emit an error and exit the task. - error!(&nexus.log, "saga request channel closed!"); - break; - } - - Some(saga_request) => { - nexus.handle_saga_request(saga_request).await; - } - } - } - }); - } - Ok(nexus) } @@ -941,112 +905,23 @@ impl Nexus { *mid } - pub(crate) async fn resolver(&self) -> internal_dns::resolver::Resolver { - self.internal_resolver.clone() - } - - /// Reliable persistent workflows can request that sagas be executed by - /// sending a SagaRequest to a supplied channel. Execute those here. - pub(crate) async fn handle_saga_request( - self: &Arc, - saga_request: SagaRequest, - ) { - match saga_request { - #[cfg(test)] - SagaRequest::TestOnly => { - unimplemented!(); - } - - SagaRequest::RegionReplacementStart { params } => { - let nexus = self.clone(); - tokio::spawn(async move { - let saga_result = nexus - .sagas - .saga_execute::( - params, - ) - .await; - - match saga_result { - Ok(_) => { - info!( - nexus.log, - "region replacement start saga completed ok" - ); - } - - Err(e) => { - warn!(nexus.log, "region replacement start saga returned an error: {e}"); - } - } - }); - } - - SagaRequest::RegionReplacementDrive { params } => { - let nexus = self.clone(); - tokio::spawn(async move { - let saga_result = nexus - .sagas - .saga_execute::( - params, - ) - .await; - - match saga_result { - Ok(_) => { - info!( - nexus.log, - "region replacement drive saga completed ok" - ); - } - - Err(e) => { - warn!(nexus.log, "region replacement drive saga returned an error: {e}"); - } - } - }); - } - - SagaRequest::RegionReplacementFinish { params } => { - let nexus = self.clone(); - tokio::spawn(async move { - let saga_result = nexus - .sagas - .saga_execute::( - params, - ) - .await; - - match saga_result { - Ok(_) => { - info!( - nexus.log, - "region replacement finish saga completed ok" - ); - } - - Err(e) => { - warn!(nexus.log, "region replacement finish saga returned an error: {e}"); - } - } - }); - } - } + pub fn resolver(&self) -> &internal_dns::resolver::Resolver { + &self.internal_resolver } pub(crate) async fn dpd_clients( &self, ) -> Result, String> { - let resolver = self.resolver().await; - dpd_clients(&resolver, &self.log).await + let resolver = self.resolver(); + dpd_clients(resolver, &self.log).await } pub(crate) async fn mg_clients( &self, ) -> Result, String> { - let resolver = self.resolver().await; + let resolver = self.resolver(); let mappings = - switch_zone_address_mappings(&resolver, &self.log).await?; + switch_zone_address_mappings(resolver, &self.log).await?; let mut clients: Vec<(SwitchLocation, mg_admin_client::Client)> = vec![]; for (location, addr) in &mappings { diff --git a/nexus/src/app/saga.rs b/nexus/src/app/saga.rs index baa9513d7b..ed4ccf44fd 100644 --- a/nexus/src/app/saga.rs +++ b/nexus/src/app/saga.rs @@ -49,12 +49,12 @@ //! or inject errors. use super::sagas::NexusSaga; -use super::sagas::SagaInitError; use super::sagas::ACTION_REGISTRY; use crate::saga_interface::SagaContext; use crate::Nexus; use anyhow::Context; use futures::future::BoxFuture; +use futures::FutureExt; use futures::StreamExt; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; @@ -66,10 +66,8 @@ use omicron_common::api::external::ResourceType; use omicron_common::bail_unless; use std::sync::Arc; use std::sync::OnceLock; -use steno::DagBuilder; use steno::SagaDag; use steno::SagaId; -use steno::SagaName; use steno::SagaResult; use steno::SagaResultOk; use uuid::Uuid; @@ -79,12 +77,31 @@ use uuid::Uuid; pub(crate) fn create_saga_dag( params: N::Params, ) -> Result { - let builder = DagBuilder::new(SagaName::new(N::NAME)); - let dag = N::make_saga_dag(¶ms, builder)?; - let params = serde_json::to_value(¶ms).map_err(|e| { - SagaInitError::SerializeError(String::from("saga params"), e) - })?; - Ok(SagaDag::new(dag, params)) + N::prepare(¶ms) +} + +/// Interface for kicking off sagas +/// +/// See [`SagaExecutor`] for the implementation within Nexus. Some tests use +/// alternate implementations that don't actually run the sagas. +pub(crate) trait StartSaga: Send + Sync { + /// Create a new saga (of type `N` with parameters `params`), start it + /// running, but do not wait for it to finish. + fn saga_start(&self, dag: SagaDag) -> BoxFuture<'_, Result<(), Error>>; +} + +impl StartSaga for SagaExecutor { + fn saga_start(&self, dag: SagaDag) -> BoxFuture<'_, Result<(), Error>> { + async move { + let runnable_saga = self.saga_prepare(dag).await?; + // start() returns a future that can be used to wait for the saga to + // complete. We don't need that here. (Cancelling this has no + // effect on the running saga.) + let _ = runnable_saga.start().await?; + Ok(()) + } + .boxed() + } } /// Handle to a self-contained subsystem for kicking off sagas @@ -242,7 +259,6 @@ impl SagaExecutor { &self, params: N::Params, ) -> Result { - // Construct the DAG specific to this saga. let dag = create_saga_dag::(params)?; let runnable_saga = self.saga_prepare(dag).await?; let running_saga = runnable_saga.start().await?; diff --git a/nexus/src/app/sagas/common_storage.rs b/nexus/src/app/sagas/common_storage.rs index 44dd72c571..eaf144eaa9 100644 --- a/nexus/src/app/sagas/common_storage.rs +++ b/nexus/src/app/sagas/common_storage.rs @@ -27,7 +27,6 @@ pub(crate) async fn get_pantry_address( ) -> Result { nexus .resolver() - .await .lookup_socket_v6(ServiceName::CruciblePantry) .await .map_err(|e| e.to_string()) diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs index 204d004938..bdccd7f79b 100644 --- a/nexus/src/app/sagas/disk_create.rs +++ b/nexus/src/app/sagas/disk_create.rs @@ -829,7 +829,6 @@ pub(crate) mod test { use nexus_db_queries::context::OpContext; use nexus_db_queries::{authn::saga::Serialized, db::datastore::DataStore}; use nexus_test_utils::resource_helpers::create_project; - use nexus_test_utils::resource_helpers::DiskTest; use nexus_test_utils_macros::nexus_test; use omicron_common::api::external::ByteCount; use omicron_common::api::external::IdentityMetadataCreateParams; @@ -839,6 +838,8 @@ pub(crate) mod test { type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; + type DiskTest<'a> = + nexus_test_utils::resource_helpers::DiskTest<'a, crate::Server>; const DISK_NAME: &str = "my-disk"; const PROJECT_NAME: &str = "springfield-squidport"; @@ -977,9 +978,9 @@ pub(crate) mod test { async fn no_region_allocations_exist( datastore: &DataStore, - test: &DiskTest, + test: &DiskTest<'_>, ) -> bool { - for zpool in &test.zpools { + for zpool in test.zpools() { for dataset in &zpool.datasets { if datastore .regions_total_occupied_size(dataset.id) @@ -996,9 +997,9 @@ pub(crate) mod test { async fn no_regions_ensured( sled_agent: &SledAgent, - test: &DiskTest, + test: &DiskTest<'_>, ) -> bool { - for zpool in &test.zpools { + for zpool in test.zpools() { for dataset in &zpool.datasets { let crucible_dataset = sled_agent.get_crucible_dataset(zpool.id, dataset.id).await; @@ -1012,7 +1013,7 @@ pub(crate) mod test { pub(crate) async fn verify_clean_slate( cptestctx: &ControlPlaneTestContext, - test: &DiskTest, + test: &DiskTest<'_>, ) { let sled_agent = &cptestctx.sled_agent.sled_agent; let datastore = cptestctx.server.server_context().nexus.datastore(); diff --git a/nexus/src/app/sagas/mod.rs b/nexus/src/app/sagas/mod.rs index 1604d6013d..d278fb5600 100644 --- a/nexus/src/app/sagas/mod.rs +++ b/nexus/src/app/sagas/mod.rs @@ -15,9 +15,11 @@ use std::sync::Arc; use steno::new_action_noop_undo; use steno::ActionContext; use steno::ActionError; +use steno::DagBuilder; +use steno::SagaDag; +use steno::SagaName; use steno::SagaType; use thiserror::Error; -use tokio::sync::mpsc; use uuid::Uuid; pub mod disk_create; @@ -70,6 +72,17 @@ pub(crate) trait NexusSaga { params: &Self::Params, builder: steno::DagBuilder, ) -> Result; + + fn prepare( + params: &Self::Params, + ) -> Result { + let builder = DagBuilder::new(SagaName::new(Self::NAME)); + let dag = Self::make_saga_dag(¶ms, builder)?; + let params = serde_json::to_value(¶ms).map_err(|e| { + SagaInitError::SerializeError(format!("saga params: {params:?}"), e) + })?; + Ok(SagaDag::new(dag, params)) + } } #[derive(Debug, Error)] @@ -318,35 +331,3 @@ pub(crate) use __action_name; pub(crate) use __emit_action; pub(crate) use __stringify_ident; pub(crate) use declare_saga_actions; - -/// Reliable persistent workflows can request that sagas be run as part of their -/// activation by sending a SagaRequest through a supplied channel to Nexus. -pub enum SagaRequest { - #[cfg(test)] - TestOnly, - - RegionReplacementStart { - params: region_replacement_start::Params, - }, - - RegionReplacementDrive { - params: region_replacement_drive::Params, - }, - - RegionReplacementFinish { - params: region_replacement_finish::Params, - }, -} - -impl SagaRequest { - pub fn channel() -> (mpsc::Sender, mpsc::Receiver) - { - // Limit the maximum number of saga requests that background tasks can - // queue for Nexus to run. - // - // Note this value was chosen arbitrarily! - const MAX_QUEUED_SAGA_REQUESTS: usize = 128; - - mpsc::channel(MAX_QUEUED_SAGA_REQUESTS) - } -} diff --git a/nexus/src/app/sagas/region_replacement_start.rs b/nexus/src/app/sagas/region_replacement_start.rs index b71944a460..be16656813 100644 --- a/nexus/src/app/sagas/region_replacement_start.rs +++ b/nexus/src/app/sagas/region_replacement_start.rs @@ -799,7 +799,6 @@ pub(crate) mod test { use nexus_db_queries::context::OpContext; use nexus_test_utils::resource_helpers::create_disk; use nexus_test_utils::resource_helpers::create_project; - use nexus_test_utils::resource_helpers::DiskTest; use nexus_test_utils_macros::nexus_test; use nexus_types::identity::Asset; use sled_agent_client::types::VolumeConstructionRequest; @@ -807,6 +806,8 @@ pub(crate) mod test { type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; + type DiskTest<'a> = + nexus_test_utils::resource_helpers::DiskTest<'a, crate::Server>; const DISK_NAME: &str = "my-disk"; const PROJECT_NAME: &str = "springfield-squidport"; @@ -816,7 +817,7 @@ pub(crate) mod test { cptestctx: &ControlPlaneTestContext, ) { let mut disk_test = DiskTest::new(cptestctx).await; - disk_test.add_zpool_with_dataset(&cptestctx).await; + disk_test.add_zpool_with_dataset(cptestctx.first_sled()).await; let client = &cptestctx.external_client; let nexus = &cptestctx.server.server_context().nexus; @@ -1008,7 +1009,7 @@ pub(crate) mod test { pub(crate) async fn verify_clean_slate( cptestctx: &ControlPlaneTestContext, - test: &DiskTest, + test: &DiskTest<'_>, request: &RegionReplacement, affected_volume_original: &Volume, affected_region_original: &Region, @@ -1032,11 +1033,11 @@ pub(crate) mod test { async fn three_region_allocations_exist( datastore: &DataStore, - test: &DiskTest, + test: &DiskTest<'_>, ) -> bool { let mut count = 0; - for zpool in &test.zpools { + for zpool in test.zpools() { for dataset in &zpool.datasets { if datastore .regions_total_occupied_size(dataset.id) @@ -1131,7 +1132,7 @@ pub(crate) mod test { cptestctx: &ControlPlaneTestContext, ) { let mut disk_test = DiskTest::new(cptestctx).await; - disk_test.add_zpool_with_dataset(&cptestctx).await; + disk_test.add_zpool_with_dataset(cptestctx.first_sled()).await; let log = &cptestctx.logctx.log; @@ -1208,7 +1209,7 @@ pub(crate) mod test { cptestctx: &ControlPlaneTestContext, ) { let mut disk_test = DiskTest::new(cptestctx).await; - disk_test.add_zpool_with_dataset(&cptestctx).await; + disk_test.add_zpool_with_dataset(cptestctx.first_sled()).await; let client = &cptestctx.external_client; let nexus = &cptestctx.server.server_context().nexus; diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index a16ec6932e..9e665a1de1 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -1617,7 +1617,6 @@ mod test { use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::delete_disk; use nexus_test_utils::resource_helpers::object_create; - use nexus_test_utils::resource_helpers::DiskTest; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params::InstanceDiskAttachment; use omicron_common::api::external::ByteCount; @@ -1630,6 +1629,9 @@ mod test { use sled_agent_client::TestInterfaces as SledAgentTestInterfaces; use std::str::FromStr; + type DiskTest<'a> = + nexus_test_utils::resource_helpers::DiskTest<'a, crate::Server>; + #[test] fn test_create_snapshot_from_disk_modify_request() { let disk = VolumeConstructionRequest::Volume { @@ -1956,7 +1958,7 @@ mod test { async fn verify_clean_slate( cptestctx: &ControlPlaneTestContext, - test: &DiskTest, + test: &DiskTest<'_>, ) { // Verifies: // - No disk records exist diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 7d69e6b3b0..38cdac5fcb 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -53,6 +53,7 @@ use omicron_common::api::internal::nexus::ProducerKind; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::api::internal::shared::NetworkInterfaceKind; use omicron_common::api::internal::shared::SwitchLocation; +use omicron_common::zpool_name::ZpoolName; use omicron_sled_agent::sim; use omicron_test_utils::dev; use omicron_uuid_kinds::ExternalIpUuid; @@ -129,6 +130,14 @@ pub struct ControlPlaneTestContext { } impl ControlPlaneTestContext { + pub fn first_sled(&self) -> SledUuid { + SledUuid::from_untyped_uuid(self.sled_agent.sled_agent.id) + } + + pub fn all_sled_agents(&self) -> impl Iterator { + [&self.sled_agent, &self.sled_agent2].into_iter() + } + pub fn wildcard_silo_dns_name(&self) -> String { format!("*.sys.{}", self.external_dns_zone_name) } @@ -421,6 +430,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { disposition: BlueprintZoneDisposition::InService, id: OmicronZoneUuid::from_untyped_uuid(dataset_id), underlay_address: *address.ip(), + filesystem_pool: Some(ZpoolName::new_external(zpool_id)), zone_type: BlueprintZoneType::CockroachDb( blueprint_zone_type::CockroachDb { address, @@ -473,6 +483,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { disposition: BlueprintZoneDisposition::InService, id: OmicronZoneUuid::from_untyped_uuid(dataset_id), underlay_address: *address.ip(), + filesystem_pool: Some(ZpoolName::new_external(zpool_id)), zone_type: BlueprintZoneType::Clickhouse( blueprint_zone_type::Clickhouse { address, @@ -661,6 +672,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { disposition: BlueprintZoneDisposition::InService, id: nexus_id, underlay_address: *address.ip(), + filesystem_pool: Some(ZpoolName::new_external(ZpoolUuid::new_v4())), zone_type: BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { external_dns_servers: self .config @@ -983,6 +995,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { disposition: BlueprintZoneDisposition::InService, id: zone_id, underlay_address: *address.ip(), + filesystem_pool: Some(ZpoolName::new_external(ZpoolUuid::new_v4())), zone_type: BlueprintZoneType::CruciblePantry( blueprint_zone_type::CruciblePantry { address }, ), @@ -1024,6 +1037,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { disposition: BlueprintZoneDisposition::InService, id: zone_id, underlay_address: *dropshot_address.ip(), + filesystem_pool: Some(ZpoolName::new_external(zpool_id)), zone_type: BlueprintZoneType::ExternalDns( blueprint_zone_type::ExternalDns { dataset: OmicronZoneDataset { pool_name }, @@ -1086,6 +1100,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { disposition: BlueprintZoneDisposition::InService, id: zone_id, underlay_address: *http_address.ip(), + filesystem_pool: Some(ZpoolName::new_external(zpool_id)), zone_type: BlueprintZoneType::InternalDns( blueprint_zone_type::InternalDns { dataset: OmicronZoneDataset { pool_name }, diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index ccebffd197..013ce7b630 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -45,10 +45,12 @@ use omicron_sled_agent::sim::SledAgent; use omicron_test_utils::dev::poll::wait_for_condition; use omicron_test_utils::dev::poll::CondCheckError; use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; use oxnet::Ipv4Net; use oxnet::Ipv6Net; use slog::debug; +use std::collections::BTreeMap; use std::net::IpAddr; use std::sync::Arc; use std::time::Duration; @@ -807,45 +809,157 @@ pub struct TestZpool { pub datasets: Vec, } -pub struct DiskTest { - pub sled_agent: Arc, - pub zpools: Vec, +enum WhichSledAgents { + Specific(SledUuid), + All, } -impl DiskTest { +/// A test utility to add zpools to some sleds. +/// +/// This builder helps initialize a set of zpools on sled agents with +/// minimal overhead, though additional disks can be added via the [DiskTest] +/// API after initialization. +pub struct DiskTestBuilder<'a, N: NexusServer> { + cptestctx: &'a ControlPlaneTestContext, + sled_agents: WhichSledAgents, + zpool_count: u32, +} + +impl<'a, N: NexusServer> DiskTestBuilder<'a, N> { + /// Creates a new [DiskTestBuilder] with default configuration options. + pub fn new(cptestctx: &'a ControlPlaneTestContext) -> Self { + Self { + cptestctx, + sled_agents: WhichSledAgents::Specific( + SledUuid::from_untyped_uuid(cptestctx.sled_agent.sled_agent.id), + ), + zpool_count: DiskTest::<'a, N>::DEFAULT_ZPOOL_COUNT, + } + } + + /// Specifies that zpools should be added on all sleds + pub fn on_all_sleds(mut self) -> Self { + self.sled_agents = WhichSledAgents::All; + self + } + + /// Chooses a specific sled where zpools should be added + pub fn on_specific_sled(mut self, sled_id: SledUuid) -> Self { + self.sled_agents = WhichSledAgents::Specific(sled_id); + self + } + + /// Selects a specific number of zpools to be created + pub fn with_zpool_count(mut self, count: u32) -> Self { + self.zpool_count = count; + self + } + + /// Creates a DiskTest, actually creating the requested zpools. + pub async fn build(self) -> DiskTest<'a, N> { + DiskTest::new_from_builder( + self.cptestctx, + self.sled_agents, + self.zpool_count, + ) + .await + } +} + +struct PerSledDiskState { + zpools: Vec, +} + +pub struct ZpoolIterator<'a> { + sleds: &'a BTreeMap, + sled: Option, + index: usize, +} + +impl<'a> Iterator for ZpoolIterator<'a> { + type Item = &'a TestZpool; + + fn next(&mut self) -> Option { + loop { + let Some(sled_id) = self.sled else { + return None; + }; + + let Some(pools) = self.sleds.get(&sled_id) else { + return None; + }; + + if let Some(pool) = pools.zpools.get(self.index) { + self.index += 1; + return Some(pool); + } + + self.sled = self + .sleds + .range(( + std::ops::Bound::Excluded(&sled_id), + std::ops::Bound::Unbounded, + )) + .map(|(id, _)| *id) + .next(); + self.index = 0; + } + } +} + +pub struct DiskTest<'a, N: NexusServer> { + cptestctx: &'a ControlPlaneTestContext, + sleds: BTreeMap, +} + +impl<'a, N: NexusServer> DiskTest<'a, N> { pub const DEFAULT_ZPOOL_SIZE_GIB: u32 = 10; pub const DEFAULT_ZPOOL_COUNT: u32 = 3; - /// Creates a new "DiskTest", but does not actually add any zpools. - pub async fn empty( - cptestctx: &ControlPlaneTestContext, - ) -> Self { - let sled_agent = cptestctx.sled_agent.sled_agent.clone(); - - Self { sled_agent, zpools: vec![] } + /// Creates a new [DiskTest] with default configuration. + /// + /// This is the same as calling [DiskTestBuilder::new] and + /// [DiskTestBuilder::build]. + pub async fn new(cptestctx: &'a ControlPlaneTestContext) -> Self { + DiskTestBuilder::new(cptestctx).build().await } - pub async fn new( - cptestctx: &ControlPlaneTestContext, + async fn new_from_builder( + cptestctx: &'a ControlPlaneTestContext, + sled_agents: WhichSledAgents, + zpool_count: u32, ) -> Self { - let sled_agent = cptestctx.sled_agent.sled_agent.clone(); + let input_sleds = match sled_agents { + WhichSledAgents::Specific(id) => { + vec![id] + } + WhichSledAgents::All => cptestctx + .all_sled_agents() + .map(|agent| SledUuid::from_untyped_uuid(agent.sled_agent.id)) + .collect(), + }; - let mut disk_test = Self { sled_agent, zpools: vec![] }; + let mut sleds = BTreeMap::new(); + for sled_id in input_sleds { + sleds.insert(sled_id, PerSledDiskState { zpools: vec![] }); + } + + let mut disk_test = Self { cptestctx, sleds }; - // Create three Zpools, each 10 GiB, each with one Crucible dataset. - for _ in 0..Self::DEFAULT_ZPOOL_COUNT { - disk_test.add_zpool_with_dataset(cptestctx).await; + for sled_id in + disk_test.sleds.keys().cloned().collect::>() + { + for _ in 0..zpool_count { + disk_test.add_zpool_with_dataset(sled_id).await; + } } disk_test } - pub async fn add_zpool_with_dataset( - &mut self, - cptestctx: &ControlPlaneTestContext, - ) { + pub async fn add_zpool_with_dataset(&mut self, sled_id: SledUuid) { self.add_zpool_with_dataset_ext( - cptestctx, + sled_id, Uuid::new_v4(), ZpoolUuid::new_v4(), Uuid::new_v4(), @@ -854,14 +968,38 @@ impl DiskTest { .await } - pub async fn add_zpool_with_dataset_ext( + fn get_sled(&self, sled_id: SledUuid) -> Arc { + let sleds = self.cptestctx.all_sled_agents(); + sleds + .into_iter() + .find_map(|server| { + if server.sled_agent.id == sled_id.into_untyped_uuid() { + Some(server.sled_agent.clone()) + } else { + None + } + }) + .expect("Cannot find sled") + } + + pub fn zpools(&self) -> ZpoolIterator { + ZpoolIterator { + sleds: &self.sleds, + sled: self.sleds.keys().next().copied(), + index: 0, + } + } + + pub async fn add_zpool_with_dataset_ext( &mut self, - cptestctx: &ControlPlaneTestContext, + sled_id: SledUuid, physical_disk_id: Uuid, zpool_id: ZpoolUuid, dataset_id: Uuid, gibibytes: u32, ) { + let cptestctx = self.cptestctx; + // To get a dataset, we actually need to create a new simulated physical // disk, zpool, and dataset, all contained within one another. let zpool = TestZpool { @@ -884,40 +1022,55 @@ impl DiskTest { model: disk_identity.model.clone(), variant: nexus_types::external_api::params::PhysicalDiskKind::U2, - sled_id: self.sled_agent.id, + sled_id: sled_id.into_untyped_uuid(), }; let zpool_request = nexus_types::internal_api::params::ZpoolPutRequest { id: zpool.id.into_untyped_uuid(), physical_disk_id, - sled_id: self.sled_agent.id, + sled_id: sled_id.into_untyped_uuid(), }; + // Find the sled on which we're adding a zpool + let sleds = cptestctx.all_sled_agents(); + let sled_agent = sleds + .into_iter() + .find_map(|server| { + if server.sled_agent.id == sled_id.into_untyped_uuid() { + Some(server.sled_agent.clone()) + } else { + None + } + }) + .expect("Cannot find sled"); + + let zpools = &mut self + .sleds + .entry(sled_id) + .or_insert_with(|| PerSledDiskState { zpools: Vec::new() }) + .zpools; + // Tell the simulated sled agent to create the disk and zpool containing // these datasets. - self.sled_agent + sled_agent .create_external_physical_disk( physical_disk_id, disk_identity.clone(), ) .await; - self.sled_agent + sled_agent .create_zpool(zpool.id, physical_disk_id, zpool.size.to_bytes()) .await; for dataset in &zpool.datasets { // Sled Agent side: Create the Dataset, make sure regions can be // created immediately if Nexus requests anything. - let address = self - .sled_agent - .create_crucible_dataset(zpool.id, dataset.id) - .await; - let crucible = self - .sled_agent - .get_crucible_dataset(zpool.id, dataset.id) - .await; + let address = + sled_agent.create_crucible_dataset(zpool.id, dataset.id).await; + let crucible = + sled_agent.get_crucible_dataset(zpool.id, dataset.id).await; crucible .set_create_callback(Box::new(|_| RegionState::Created)) .await; @@ -991,58 +1144,65 @@ impl DiskTest { .await .expect("expected to find inventory collection"); - self.zpools.push(zpool); + zpools.push(zpool); } pub async fn set_requested_then_created_callback(&self) { - for zpool in &self.zpools { - for dataset in &zpool.datasets { - let crucible = self - .sled_agent - .get_crucible_dataset(zpool.id, dataset.id) - .await; - let called = std::sync::atomic::AtomicBool::new(false); - crucible - .set_create_callback(Box::new(move |_| { - if !called.load(std::sync::atomic::Ordering::SeqCst) { - called.store( - true, - std::sync::atomic::Ordering::SeqCst, - ); - RegionState::Requested - } else { - RegionState::Created - } - })) - .await; + for (sled_id, state) in &self.sleds { + for zpool in &state.zpools { + for dataset in &zpool.datasets { + let crucible = self + .get_sled(*sled_id) + .get_crucible_dataset(zpool.id, dataset.id) + .await; + let called = std::sync::atomic::AtomicBool::new(false); + crucible + .set_create_callback(Box::new(move |_| { + if !called.load(std::sync::atomic::Ordering::SeqCst) + { + called.store( + true, + std::sync::atomic::Ordering::SeqCst, + ); + RegionState::Requested + } else { + RegionState::Created + } + })) + .await; + } } } } pub async fn set_always_fail_callback(&self) { - for zpool in &self.zpools { - for dataset in &zpool.datasets { - let crucible = self - .sled_agent - .get_crucible_dataset(zpool.id, dataset.id) - .await; - crucible - .set_create_callback(Box::new(|_| RegionState::Failed)) - .await; + for (sled_id, state) in &self.sleds { + for zpool in &state.zpools { + for dataset in &zpool.datasets { + let crucible = self + .get_sled(*sled_id) + .get_crucible_dataset(zpool.id, dataset.id) + .await; + crucible + .set_create_callback(Box::new(|_| RegionState::Failed)) + .await; + } } } } /// Returns true if all Crucible resources were cleaned up, false otherwise. pub async fn crucible_resources_deleted(&self) -> bool { - for zpool in &self.zpools { - for dataset in &zpool.datasets { - let crucible = self - .sled_agent - .get_crucible_dataset(zpool.id, dataset.id) - .await; - if !crucible.is_empty().await { - return false; + for (sled_id, state) in &self.sleds { + for zpool in &state.zpools { + for dataset in &zpool.datasets { + let crucible = self + .get_sled(*sled_id) + .get_crucible_dataset(zpool.id, dataset.id) + .await; + if !crucible.is_empty().await { + return false; + } } } } diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs index 4c707ba0fa..ded4a346fb 100644 --- a/nexus/tests/integration_tests/disks.rs +++ b/nexus/tests/integration_tests/disks.rs @@ -29,7 +29,6 @@ use nexus_test_utils::resource_helpers::create_instance; use nexus_test_utils::resource_helpers::create_instance_with; use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::objects_list_page_authz; -use nexus_test_utils::resource_helpers::DiskTest; use nexus_test_utils::SLED_AGENT_UUID; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; @@ -54,6 +53,12 @@ use uuid::Uuid; type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; +type DiskTest<'a> = + nexus_test_utils::resource_helpers::DiskTest<'a, omicron_nexus::Server>; +type DiskTestBuilder<'a> = nexus_test_utils::resource_helpers::DiskTestBuilder< + 'a, + omicron_nexus::Server, +>; const PROJECT_NAME: &str = "springfield-squidport-disks"; const PROJECT_NAME_2: &str = "bouncymeadow-octopusharbor-disks"; @@ -992,9 +997,9 @@ async fn test_disk_backed_by_multiple_region_sets( assert_eq!(10, DiskTest::DEFAULT_ZPOOL_SIZE_GIB); // Create another three zpools, all 10 gibibytes, each with one dataset - test.add_zpool_with_dataset(cptestctx).await; - test.add_zpool_with_dataset(cptestctx).await; - test.add_zpool_with_dataset(cptestctx).await; + test.add_zpool_with_dataset(cptestctx.first_sled()).await; + test.add_zpool_with_dataset(cptestctx.first_sled()).await; + test.add_zpool_with_dataset(cptestctx.first_sled()).await; create_project_and_pool(client).await; @@ -1322,9 +1327,11 @@ async fn test_disk_virtual_provisioning_collection_failed_delete( ); // Set the third agent to fail when deleting regions - let zpool = &disk_test.zpools[2]; + let zpool = + &disk_test.zpools().nth(2).expect("Expected at least three zpools"); let dataset = &zpool.datasets[0]; - disk_test + cptestctx + .sled_agent .sled_agent .get_crucible_dataset(zpool.id, dataset.id) .await @@ -1361,7 +1368,8 @@ async fn test_disk_virtual_provisioning_collection_failed_delete( assert_eq!(disk.state, DiskState::Faulted); // Set the third agent to respond normally - disk_test + cptestctx + .sled_agent .sled_agent .get_crucible_dataset(zpool.id, dataset.id) .await @@ -1545,7 +1553,7 @@ async fn test_disk_size_accounting(cptestctx: &ControlPlaneTestContext) { create_project_and_pool(client).await; // Total occupied size should start at 0 - for zpool in &test.zpools { + for zpool in test.zpools() { for dataset in &zpool.datasets { assert_eq!( datastore @@ -1584,7 +1592,7 @@ async fn test_disk_size_accounting(cptestctx: &ControlPlaneTestContext) { // Total occupied size is 7 GiB * 3 (each Crucible disk requires three // regions to make a region set for an Upstairs, one region per dataset) - for zpool in &test.zpools { + for zpool in test.zpools() { for dataset in &zpool.datasets { assert_eq!( datastore @@ -1621,7 +1629,7 @@ async fn test_disk_size_accounting(cptestctx: &ControlPlaneTestContext) { .expect("unexpected success creating 4 GiB disk"); // Total occupied size is still 7 GiB * 3 - for zpool in &test.zpools { + for zpool in test.zpools() { for dataset in &zpool.datasets { assert_eq!( datastore @@ -1645,7 +1653,7 @@ async fn test_disk_size_accounting(cptestctx: &ControlPlaneTestContext) { .expect("unexpected failure deleting 7 GiB disk"); // Total occupied size should be 0 - for zpool in &test.zpools { + for zpool in test.zpools() { for dataset in &zpool.datasets { assert_eq!( datastore @@ -1681,7 +1689,7 @@ async fn test_disk_size_accounting(cptestctx: &ControlPlaneTestContext) { .expect("unexpected failure creating 10 GiB disk"); // Total occupied size should be 10 GiB * 3 - for zpool in &test.zpools { + for zpool in test.zpools() { for dataset in &zpool.datasets { assert_eq!( datastore @@ -1702,15 +1710,15 @@ async fn test_multiple_disks_multiple_zpools( let client = &cptestctx.external_client; // Create six 10 GB zpools, each with one dataset - let mut test = DiskTest::new(&cptestctx).await; + let _test = DiskTestBuilder::new(&cptestctx) + .on_specific_sled(cptestctx.first_sled()) + .with_zpool_count(6) + .build() + .await; // Assert default is still 10 GiB assert_eq!(10, DiskTest::DEFAULT_ZPOOL_SIZE_GIB); - test.add_zpool_with_dataset(cptestctx).await; - test.add_zpool_with_dataset(cptestctx).await; - test.add_zpool_with_dataset(cptestctx).await; - create_project_and_pool(client).await; // Ask for a 10 gibibyte disk, this should succeed @@ -2083,7 +2091,7 @@ async fn test_single_region_allocate(cptestctx: &ControlPlaneTestContext) { // request let mut number_of_matching_regions = 0; - for zpool in &disk_test.zpools { + for zpool in disk_test.zpools() { for dataset in &zpool.datasets { let total_size = datastore .regions_total_occupied_size(dataset.id) @@ -2114,8 +2122,11 @@ async fn test_region_allocation_strategy_random_is_idempotent( OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); // Create four 10 GiB zpools, each with one dataset. - let mut disk_test = DiskTest::new(&cptestctx).await; - disk_test.add_zpool_with_dataset(&cptestctx).await; + let _test = DiskTestBuilder::new(&cptestctx) + .on_specific_sled(cptestctx.first_sled()) + .with_zpool_count(4) + .build() + .await; // Assert default is still 10 GiB assert_eq!(10, DiskTest::DEFAULT_ZPOOL_SIZE_GIB); @@ -2181,8 +2192,11 @@ async fn test_region_allocation_strategy_random_is_idempotent_arbitrary( OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); // Create four 10 GiB zpools, each with one dataset. - let mut disk_test = DiskTest::new(&cptestctx).await; - disk_test.add_zpool_with_dataset(&cptestctx).await; + let _test = DiskTestBuilder::new(&cptestctx) + .on_specific_sled(cptestctx.first_sled()) + .with_zpool_count(4) + .build() + .await; // Assert default is still 10 GiB assert_eq!(10, DiskTest::DEFAULT_ZPOOL_SIZE_GIB); @@ -2240,12 +2254,15 @@ async fn test_single_region_allocate_for_replace( let opctx = OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); - // Create three 10 GiB zpools, each with one dataset. - let mut disk_test = DiskTest::new(&cptestctx).await; - - // One more zpool and dataset is required to meet `region_allocate`'s - // redundancy requirement. - disk_test.add_zpool_with_dataset(&cptestctx).await; + // Create four 10 GiB zpools, each with one dataset. + // + // We add one more then the "three" default to meet `region_allocate`'s + // redundancy requirements. + let _test = DiskTestBuilder::new(&cptestctx) + .on_specific_sled(cptestctx.first_sled()) + .with_zpool_count(4) + .build() + .await; // Assert default is still 10 GiB assert_eq!(10, DiskTest::DEFAULT_ZPOOL_SIZE_GIB); @@ -2517,7 +2534,7 @@ async fn test_no_halt_disk_delete_one_region_on_expunged_agent( .unwrap(); // Choose one of the datasets, and drop the simulated Crucible agent - let zpool = &disk_test.zpools[0]; + let zpool = disk_test.zpools().next().expect("Expected at least one zpool"); let dataset = &zpool.datasets[0]; cptestctx.sled_agent.sled_agent.drop_dataset(zpool.id, dataset.id).await; diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index a8e12ae5d9..a29b45c4ce 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -14,7 +14,6 @@ use internal_dns::names::DNS_ZONE_EXTERNAL_TESTING; use nexus_db_queries::authn; use nexus_db_queries::db::fixed_data::silo::DEFAULT_SILO; use nexus_db_queries::db::identity::Resource; -use nexus_test_utils::resource_helpers::DiskTest; use nexus_test_utils::PHYSICAL_DISK_UUID; use nexus_test_utils::RACK_UUID; use nexus_test_utils::SLED_AGENT_UUID; @@ -41,6 +40,9 @@ use std::net::IpAddr; use std::net::Ipv4Addr; use std::str::FromStr; +type DiskTest<'a> = + nexus_test_utils::resource_helpers::DiskTest<'a, omicron_nexus::Server>; + pub static HARDWARE_RACK_URL: Lazy = Lazy::new(|| format!("/v1/system/hardware/racks/{}", RACK_UUID)); pub const HARDWARE_UNINITIALIZED_SLEDS: &'static str = diff --git a/nexus/tests/integration_tests/snapshots.rs b/nexus/tests/integration_tests/snapshots.rs index b4b8ad3090..987e8146de 100644 --- a/nexus/tests/integration_tests/snapshots.rs +++ b/nexus/tests/integration_tests/snapshots.rs @@ -25,7 +25,6 @@ use nexus_test_utils::resource_helpers::create_default_ip_pool; use nexus_test_utils::resource_helpers::create_disk; use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::object_create; -use nexus_test_utils::resource_helpers::DiskTest; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; use nexus_types::external_api::views; @@ -44,6 +43,12 @@ use uuid::Uuid; type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; +type DiskTest<'a> = + nexus_test_utils::resource_helpers::DiskTest<'a, omicron_nexus::Server>; +type DiskTestBuilder<'a> = nexus_test_utils::resource_helpers::DiskTestBuilder< + 'a, + omicron_nexus::Server, +>; const PROJECT_NAME: &str = "springfield-squidport-disks"; @@ -962,9 +967,10 @@ async fn test_snapshot_unwind(cptestctx: &ControlPlaneTestContext) { .unwrap(); // Set the third region's running snapshot callback so it fails - let zpool = &disk_test.zpools[2]; + let zpool = disk_test.zpools().nth(2).expect("Not enough zpools"); let dataset = &zpool.datasets[0]; - disk_test + cptestctx + .sled_agent .sled_agent .get_crucible_dataset(zpool.id, dataset.id) .await @@ -1460,12 +1466,16 @@ async fn test_region_allocation_for_snapshot( let opctx = OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); - // Create three 10 GiB zpools, each with one dataset. - let mut disk_test = DiskTest::new(&cptestctx).await; - - // An additional disk is required, otherwise the allocation will fail with - // "not enough storage" - disk_test.add_zpool_with_dataset(&cptestctx).await; + // Create four 10 GiB zpools, each with one dataset. + // + // We add one more than the "three" default to avoid failing + // with "not enough storage". + let sled_id = cptestctx.first_sled(); + let mut disk_test = DiskTestBuilder::new(&cptestctx) + .on_specific_sled(cptestctx.first_sled()) + .with_zpool_count(4) + .build() + .await; // Assert default is still 10 GiB assert_eq!(10, DiskTest::DEFAULT_ZPOOL_SIZE_GIB); @@ -1611,7 +1621,7 @@ async fn test_region_allocation_for_snapshot( assert_eq!(allocated_regions.len(), 1); // If an additional region is required, make sure that works too. - disk_test.add_zpool_with_dataset(&cptestctx).await; + disk_test.add_zpool_with_dataset(sled_id).await; datastore .arbitrary_region_allocate( diff --git a/nexus/tests/integration_tests/unauthorized.rs b/nexus/tests/integration_tests/unauthorized.rs index e1a37403c0..93e40dbc2e 100644 --- a/nexus/tests/integration_tests/unauthorized.rs +++ b/nexus/tests/integration_tests/unauthorized.rs @@ -18,13 +18,14 @@ use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::http_testing::RequestBuilder; use nexus_test_utils::http_testing::TestResponse; -use nexus_test_utils::resource_helpers::DiskTest; use nexus_test_utils_macros::nexus_test; use omicron_uuid_kinds::ZpoolUuid; use once_cell::sync::Lazy; type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; +type DiskTest<'a> = + nexus_test_utils::resource_helpers::DiskTest<'a, omicron_nexus::Server>; // This test hits a list Nexus API endpoints using both unauthenticated and // unauthorized requests to make sure we get the expected behavior (generally: @@ -56,9 +57,10 @@ type ControlPlaneTestContext = #[nexus_test] async fn test_unauthorized(cptestctx: &ControlPlaneTestContext) { let mut disk_test = DiskTest::new(cptestctx).await; + let sled_id = cptestctx.first_sled(); disk_test .add_zpool_with_dataset_ext( - cptestctx, + sled_id, nexus_test_utils::PHYSICAL_DISK_UUID.parse().unwrap(), ZpoolUuid::new_v4(), uuid::Uuid::new_v4(), diff --git a/nexus/tests/integration_tests/volume_management.rs b/nexus/tests/integration_tests/volume_management.rs index 73322e518f..692ea3ef1e 100644 --- a/nexus/tests/integration_tests/volume_management.rs +++ b/nexus/tests/integration_tests/volume_management.rs @@ -17,7 +17,6 @@ use nexus_test_utils::http_testing::RequestBuilder; use nexus_test_utils::resource_helpers::create_default_ip_pool; use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::object_create; -use nexus_test_utils::resource_helpers::DiskTest; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; use nexus_types::external_api::views; @@ -41,6 +40,12 @@ use uuid::Uuid; type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; +type DiskTest<'a> = + nexus_test_utils::resource_helpers::DiskTest<'a, omicron_nexus::Server>; +type DiskTestBuilder<'a> = nexus_test_utils::resource_helpers::DiskTestBuilder< + 'a, + omicron_nexus::Server, +>; const PROJECT_NAME: &str = "springfield-squidport-disks"; @@ -2159,8 +2164,17 @@ async fn test_keep_your_targets_straight(cptestctx: &ControlPlaneTestContext) { let datastore = nexus.datastore(); // Four zpools, one dataset each - let mut disk_test = DiskTest::new(&cptestctx).await; - disk_test.add_zpool_with_dataset(&cptestctx).await; + let disk_test = DiskTestBuilder::new(&cptestctx) + .on_specific_sled(cptestctx.first_sled()) + .with_zpool_count(4) + .build() + .await; + + let mut iter = disk_test.zpools(); + let zpool0 = iter.next().expect("Expected four zpools"); + let zpool1 = iter.next().expect("Expected four zpools"); + let zpool2 = iter.next().expect("Expected four zpools"); + let zpool3 = iter.next().expect("Expected four zpools"); // This bug occurs when region_snapshot records share a snapshot_addr, so // insert those here manually. @@ -2169,38 +2183,38 @@ async fn test_keep_your_targets_straight(cptestctx: &ControlPlaneTestContext) { let region_snapshots = vec![ // first snapshot-create ( - disk_test.zpools[0].datasets[0].id, + zpool0.datasets[0].id, Uuid::new_v4(), Uuid::new_v4(), String::from("[fd00:1122:3344:101:7]:19016"), ), ( - disk_test.zpools[1].datasets[0].id, + zpool1.datasets[0].id, Uuid::new_v4(), Uuid::new_v4(), String::from("[fd00:1122:3344:102:7]:19016"), ), ( - disk_test.zpools[2].datasets[0].id, + zpool2.datasets[0].id, Uuid::new_v4(), Uuid::new_v4(), String::from("[fd00:1122:3344:103:7]:19016"), ), // second snapshot-create ( - disk_test.zpools[0].datasets[0].id, + zpool0.datasets[0].id, Uuid::new_v4(), Uuid::new_v4(), String::from("[fd00:1122:3344:101:7]:19016"), // duplicate! ), ( - disk_test.zpools[3].datasets[0].id, + zpool3.datasets[0].id, Uuid::new_v4(), Uuid::new_v4(), String::from("[fd00:1122:3344:104:7]:19016"), ), ( - disk_test.zpools[2].datasets[0].id, + zpool2.datasets[0].id, Uuid::new_v4(), Uuid::new_v4(), String::from("[fd00:1122:3344:103:7]:19017"), @@ -2460,9 +2474,10 @@ async fn test_disk_create_saga_unwinds_correctly( let base_disk_name: Name = "base-disk".parse().unwrap(); // Set the third agent to fail creating the region - let zpool = &disk_test.zpools[2]; + let zpool = &disk_test.zpools().nth(2).expect("Expected three zpools"); let dataset = &zpool.datasets[0]; - disk_test + cptestctx + .sled_agent .sled_agent .get_crucible_dataset(zpool.id, dataset.id) .await @@ -2528,9 +2543,11 @@ async fn test_snapshot_create_saga_unwinds_correctly( let _disk: Disk = object_create(client, &disks_url, &base_disk).await; // Set the third agent to fail creating the region for the snapshot - let zpool = &disk_test.zpools[2]; + let zpool = + &disk_test.zpools().nth(2).expect("Expected at least three zpools"); let dataset = &zpool.datasets[0]; - disk_test + cptestctx + .sled_agent .sled_agent .get_crucible_dataset(zpool.id, dataset.id) .await diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index d64cde2d06..4e655a1ed0 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -623,6 +623,7 @@ pub struct BlueprintZoneConfig { pub id: OmicronZoneUuid, pub underlay_address: Ipv6Addr, + pub filesystem_pool: Option, pub zone_type: BlueprintZoneType, } @@ -873,6 +874,7 @@ impl BlueprintZoneConfig { disposition, id: OmicronZoneUuid::from_untyped_uuid(config.id), underlay_address: config.underlay_address, + filesystem_pool: config.filesystem_pool, zone_type, }) } @@ -883,6 +885,7 @@ impl From for OmicronZoneConfig { Self { id: z.id.into_untyped_uuid(), underlay_address: z.underlay_address, + filesystem_pool: z.filesystem_pool, zone_type: z.zone_type.into(), } } @@ -931,6 +934,7 @@ impl BlueprintZoneDisposition { match self { Self::InService => match filter { BlueprintZoneFilter::All => true, + BlueprintZoneFilter::Expunged => false, BlueprintZoneFilter::ShouldBeRunning => true, BlueprintZoneFilter::ShouldBeExternallyReachable => true, BlueprintZoneFilter::ShouldBeInInternalDns => true, @@ -938,6 +942,7 @@ impl BlueprintZoneDisposition { }, Self::Quiesced => match filter { BlueprintZoneFilter::All => true, + BlueprintZoneFilter::Expunged => false, // Quiesced zones are still running. BlueprintZoneFilter::ShouldBeRunning => true, @@ -954,6 +959,7 @@ impl BlueprintZoneDisposition { }, Self::Expunged => match filter { BlueprintZoneFilter::All => true, + BlueprintZoneFilter::Expunged => true, BlueprintZoneFilter::ShouldBeRunning => false, BlueprintZoneFilter::ShouldBeExternallyReachable => false, BlueprintZoneFilter::ShouldBeInInternalDns => false, @@ -998,6 +1004,9 @@ pub enum BlueprintZoneFilter { /// All zones. All, + /// Zones that have been expunged. + Expunged, + /// Zones that are desired to be in the RUNNING state ShouldBeRunning, diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs index 028f2301ba..3e8bc9aa2c 100644 --- a/nexus/types/src/deployment/planning_input.rs +++ b/nexus/types/src/deployment/planning_input.rs @@ -402,6 +402,8 @@ pub struct SledResources { /// /// (used to allocate storage for control plane zones with persistent /// storage) + // NOTE: I'd really like to make this private, to make it harder to + // accidentally pick a zpool that is not in-service. pub zpools: BTreeMap, /// the IPv6 subnet of this sled on the underlay network diff --git a/nexus/types/src/deployment/zone_type.rs b/nexus/types/src/deployment/zone_type.rs index dc0fd98129..4c40bfc1de 100644 --- a/nexus/types/src/deployment/zone_type.rs +++ b/nexus/types/src/deployment/zone_type.rs @@ -35,33 +35,10 @@ pub enum BlueprintZoneType { impl BlueprintZoneType { /// Returns the zpool being used by this zone, if any. - pub fn zpool(&self) -> Option<&omicron_common::zpool_name::ZpoolName> { - match self { - BlueprintZoneType::ExternalDns( - blueprint_zone_type::ExternalDns { dataset, .. }, - ) - | BlueprintZoneType::Clickhouse( - blueprint_zone_type::Clickhouse { dataset, .. }, - ) - | BlueprintZoneType::ClickhouseKeeper( - blueprint_zone_type::ClickhouseKeeper { dataset, .. }, - ) - | BlueprintZoneType::CockroachDb( - blueprint_zone_type::CockroachDb { dataset, .. }, - ) - | BlueprintZoneType::Crucible(blueprint_zone_type::Crucible { - dataset, - .. - }) - | BlueprintZoneType::InternalDns( - blueprint_zone_type::InternalDns { dataset, .. }, - ) => Some(&dataset.pool_name), - BlueprintZoneType::BoundaryNtp(_) - | BlueprintZoneType::InternalNtp(_) - | BlueprintZoneType::Nexus(_) - | BlueprintZoneType::Oximeter(_) - | BlueprintZoneType::CruciblePantry(_) => None, - } + pub fn durable_zpool( + &self, + ) -> Option<&omicron_common::zpool_name::ZpoolName> { + self.durable_dataset().map(|dataset| &dataset.pool_name) } pub fn external_networking( @@ -141,12 +118,8 @@ impl BlueprintZoneType { } } - // Returns the dataset associated with this zone. - // - // TODO-cleanup This currently returns `None` for zones that only have - // transient datasets. This should change to a non-optional value once Nexus - // is aware of them. - pub fn dataset(&self) -> Option<&OmicronZoneDataset> { + /// Returns a durable dataset associated with this zone, if any exists. + pub fn durable_dataset(&self) -> Option<&OmicronZoneDataset> { match self { BlueprintZoneType::Clickhouse( blueprint_zone_type::Clickhouse { dataset, .. }, diff --git a/openapi/cockroach-admin.json b/openapi/cockroach-admin.json index a46b0014a1..3b03475ec5 100644 --- a/openapi/cockroach-admin.json +++ b/openapi/cockroach-admin.json @@ -10,17 +10,51 @@ "version": "0.0.1" }, "paths": { + "/node/decommission": { + "post": { + "summary": "Decommission a node from the CRDB cluster", + "operationId": "node_decommission", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/NodeId" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/NodeDecommission" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/node/id": { "get": { "summary": "Get the CockroachDB node ID of the local cockroach instance.", - "operationId": "node_id", + "operationId": "local_node_id", "responses": { "200": { "description": "successful operation", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/NodeId" + "$ref": "#/components/schemas/LocalNodeId" } } } @@ -94,7 +128,7 @@ "request_id" ] }, - "NodeId": { + "LocalNodeId": { "description": "CockroachDB Node ID", "type": "object", "properties": { @@ -115,6 +149,120 @@ "zone_id" ] }, + "NodeDecommission": { + "type": "object", + "properties": { + "is_decommissioning": { + "type": "boolean" + }, + "is_draining": { + "type": "boolean" + }, + "is_live": { + "type": "boolean" + }, + "membership": { + "$ref": "#/components/schemas/NodeMembership" + }, + "node_id": { + "type": "string" + }, + "notes": { + "type": "array", + "items": { + "type": "string" + } + }, + "replicas": { + "type": "integer", + "format": "int64" + } + }, + "required": [ + "is_decommissioning", + "is_draining", + "is_live", + "membership", + "node_id", + "notes", + "replicas" + ] + }, + "NodeId": { + "type": "object", + "properties": { + "node_id": { + "type": "string" + } + }, + "required": [ + "node_id" + ] + }, + "NodeMembership": { + "oneOf": [ + { + "type": "object", + "properties": { + "state": { + "type": "string", + "enum": [ + "active" + ] + } + }, + "required": [ + "state" + ] + }, + { + "type": "object", + "properties": { + "state": { + "type": "string", + "enum": [ + "decommissioning" + ] + } + }, + "required": [ + "state" + ] + }, + { + "type": "object", + "properties": { + "state": { + "type": "string", + "enum": [ + "decommissioned" + ] + } + }, + "required": [ + "state" + ] + }, + { + "type": "object", + "properties": { + "state": { + "type": "string", + "enum": [ + "unknown" + ] + }, + "value": { + "type": "string" + } + }, + "required": [ + "state", + "value" + ] + } + ] + }, "NodeStatus": { "type": "object", "properties": { diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 9d495a726c..6d380891aa 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -1967,6 +1967,14 @@ } ] }, + "filesystem_pool": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/ZpoolName" + } + ] + }, "id": { "$ref": "#/components/schemas/TypedUuidForOmicronZoneKind" }, diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index be13ba7a8b..8165cfa9d6 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -3732,6 +3732,15 @@ "description": "Describes one Omicron-managed zone running on a sled", "type": "object", "properties": { + "filesystem_pool": { + "nullable": true, + "description": "The pool on which we'll place this zone's filesystem.\n\nNote that this is transient -- the sled agent is permitted to destroy the zone's dataset on this pool each time the zone is initialized.", + "allOf": [ + { + "$ref": "#/components/schemas/ZpoolName" + } + ] + }, "id": { "type": "string", "format": "uuid" diff --git a/package-manifest.toml b/package-manifest.toml index add044a6d7..723cc4da84 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -504,10 +504,10 @@ only_for_targets.image = "standard" # 3. Use source.type = "manual" instead of "prebuilt" source.type = "prebuilt" source.repo = "crucible" -source.commit = "64e28cea69b427b05064defaf8800a4d678b4612" +source.commit = "e10f8793f8414fdb9a165219f17e45fa014d088b" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible.sha256.txt -source.sha256 = "e9051934c7d6e274158d4afdb4523797c913acd1a1262f973bc0ab7a2a253b5f" +source.sha256 = "e3f7ace2da974da6379485c6871ffc88ce7430c9ff519d5ac9dc04c55ce9f189" output.type = "zone" output.intermediate_only = true @@ -516,10 +516,10 @@ service_name = "crucible_pantry_prebuilt" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "crucible" -source.commit = "64e28cea69b427b05064defaf8800a4d678b4612" +source.commit = "e10f8793f8414fdb9a165219f17e45fa014d088b" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible-pantry.sha256.txt -source.sha256 = "a8850bfaf08c11a7baa2e4b14b859613b77d9952dc8d20433ebea8136f8a00d3" +source.sha256 = "a5505a51871e910735f449acb6887610a08244773e19d474f66cb00e533842d0" output.type = "zone" output.intermediate_only = true @@ -533,10 +533,10 @@ service_name = "crucible_dtrace" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "crucible" -source.commit = "64e28cea69b427b05064defaf8800a4d678b4612" +source.commit = "e10f8793f8414fdb9a165219f17e45fa014d088b" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible-dtrace.sha256.txt -source.sha256 = "fe51b1c771f990761c4f8bf95aa26febbfa452df97f8da7d2f329dad88f63e1d" +source.sha256 = "29e79e0df79fc46b244745fac807b0e1817954c0a17b1923d725f257d31010e9" output.type = "tarball" # Refer to diff --git a/schema/all-zones-requests.json b/schema/all-zones-requests.json index 20b99b2064..910feb8c74 100644 --- a/schema/all-zones-requests.json +++ b/schema/all-zones-requests.json @@ -240,6 +240,17 @@ "zone_type" ], "properties": { + "filesystem_pool": { + "description": "The pool on which we'll place this zone's filesystem.\n\nNote that this is transient -- the sled agent is permitted to destroy the zone's dataset on this pool each time the zone is initialized.", + "anyOf": [ + { + "$ref": "#/definitions/ZpoolName" + }, + { + "type": "null" + } + ] + }, "id": { "type": "string", "format": "uuid" diff --git a/schema/crdb/add-nullable-filesystem-pool/up1.sql b/schema/crdb/add-nullable-filesystem-pool/up1.sql new file mode 100644 index 0000000000..53c58db51c --- /dev/null +++ b/schema/crdb/add-nullable-filesystem-pool/up1.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.inv_omicron_zone ADD COLUMN IF NOT EXISTS filesystem_pool UUID; diff --git a/schema/crdb/add-nullable-filesystem-pool/up2.sql b/schema/crdb/add-nullable-filesystem-pool/up2.sql new file mode 100644 index 0000000000..f1a3b71e30 --- /dev/null +++ b/schema/crdb/add-nullable-filesystem-pool/up2.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.bp_omicron_zone ADD COLUMN IF NOT EXISTS filesystem_pool UUID; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 990b1047a2..57f6838a55 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3247,6 +3247,10 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_zone ( snat_last_port INT4 CHECK (snat_last_port IS NULL OR snat_last_port BETWEEN 0 AND 65535), + -- TODO: This is nullable for backwards compatibility. + -- Eventually, that nullability should be removed. + filesystem_pool UUID, + PRIMARY KEY (inv_collection_id, id) ); @@ -3493,6 +3497,10 @@ CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_zone ( -- created yet. external_ip_id UUID, + -- TODO: This is nullable for backwards compatibility. + -- Eventually, that nullability should be removed. + filesystem_pool UUID, + PRIMARY KEY (blueprint_id, id) ); @@ -4129,7 +4137,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '80.0.0', NULL) + (TRUE, NOW(), NOW(), '81.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/rss-service-plan-v3.json b/schema/rss-service-plan-v3.json index 481c92cc36..fd4b9c7064 100644 --- a/schema/rss-service-plan-v3.json +++ b/schema/rss-service-plan-v3.json @@ -397,6 +397,17 @@ "zone_type" ], "properties": { + "filesystem_pool": { + "description": "The pool on which we'll place this zone's filesystem.\n\nNote that this is transient -- the sled agent is permitted to destroy the zone's dataset on this pool each time the zone is initialized.", + "anyOf": [ + { + "$ref": "#/definitions/ZpoolName" + }, + { + "type": "null" + } + ] + }, "id": { "type": "string", "format": "uuid" diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 9575e08c17..465a4abb56 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -341,6 +341,13 @@ impl From for sled_agent_client::types::OmicronZonesConfig { pub struct OmicronZoneConfig { pub id: Uuid, pub underlay_address: Ipv6Addr, + + /// The pool on which we'll place this zone's filesystem. + /// + /// Note that this is transient -- the sled agent is permitted to + /// destroy the zone's dataset on this pool each time the zone is + /// initialized. + pub filesystem_pool: Option, pub zone_type: OmicronZoneType, } @@ -349,6 +356,7 @@ impl From for sled_agent_client::types::OmicronZoneConfig { Self { id: local.id, underlay_address: local.underlay_address, + filesystem_pool: local.filesystem_pool, zone_type: local.zone_type.into(), } } diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index f13c15723c..39235b91eb 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -31,6 +31,7 @@ use omicron_common::backoff::{ }; use omicron_common::ledger::{self, Ledger, Ledgerable}; use omicron_uuid_kinds::{GenericUuid, OmicronZoneUuid, SledUuid, ZpoolUuid}; +use rand::prelude::SliceRandom; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use sled_agent_client::{ @@ -404,7 +405,8 @@ impl Plan { ) .unwrap(); let dataset_name = - sled.alloc_from_u2_zpool(DatasetKind::InternalDns)?; + sled.alloc_dataset_from_u2s(DatasetKind::InternalDns)?; + let filesystem_pool = Some(dataset_name.pool().clone()); sled.request.zones.push(OmicronZoneConfig { // TODO-cleanup use TypedUuid everywhere @@ -419,6 +421,7 @@ impl Plan { gz_address: dns_subnet.gz_address(), gz_address_index: i.try_into().expect("Giant indices?"), }, + filesystem_pool, }); } @@ -442,7 +445,8 @@ impl Plan { ) .unwrap(); let dataset_name = - sled.alloc_from_u2_zpool(DatasetKind::CockroachDb)?; + sled.alloc_dataset_from_u2s(DatasetKind::CockroachDb)?; + let filesystem_pool = Some(dataset_name.pool().clone()); sled.request.zones.push(OmicronZoneConfig { // TODO-cleanup use TypedUuid everywhere id: id.into_untyped_uuid(), @@ -453,6 +457,7 @@ impl Plan { }, address, }, + filesystem_pool, }); } @@ -485,7 +490,8 @@ impl Plan { let dns_port = omicron_common::address::DNS_PORT; let dns_address = SocketAddr::new(external_ip, dns_port); let dataset_kind = DatasetKind::ExternalDns; - let dataset_name = sled.alloc_from_u2_zpool(dataset_kind)?; + let dataset_name = sled.alloc_dataset_from_u2s(dataset_kind)?; + let filesystem_pool = Some(dataset_name.pool().clone()); sled.request.zones.push(OmicronZoneConfig { // TODO-cleanup use TypedUuid everywhere @@ -499,6 +505,7 @@ impl Plan { dns_address, nic, }, + filesystem_pool, }); } @@ -520,6 +527,7 @@ impl Plan { ) .unwrap(); let (nic, external_ip) = svc_port_builder.next_nexus(id)?; + let filesystem_pool = Some(sled.alloc_zpool_from_u2s()?); sled.request.zones.push(OmicronZoneConfig { // TODO-cleanup use TypedUuid everywhere id: id.into_untyped_uuid(), @@ -542,6 +550,7 @@ impl Plan { external_tls: !config.external_certificates.is_empty(), external_dns_servers: config.dns_servers.clone(), }, + filesystem_pool, }); } @@ -563,6 +572,7 @@ impl Plan { omicron_common::address::OXIMETER_PORT, ) .unwrap(); + let filesystem_pool = Some(sled.alloc_zpool_from_u2s()?); sled.request.zones.push(OmicronZoneConfig { // TODO-cleanup use TypedUuid everywhere id: id.into_untyped_uuid(), @@ -575,6 +585,7 @@ impl Plan { 0, ), }, + filesystem_pool, }) } @@ -599,7 +610,8 @@ impl Plan { ) .unwrap(); let dataset_name = - sled.alloc_from_u2_zpool(DatasetKind::Clickhouse)?; + sled.alloc_dataset_from_u2s(DatasetKind::Clickhouse)?; + let filesystem_pool = Some(dataset_name.pool().clone()); sled.request.zones.push(OmicronZoneConfig { // TODO-cleanup use TypedUuid everywhere id: id.into_untyped_uuid(), @@ -610,6 +622,7 @@ impl Plan { pool_name: dataset_name.pool().clone(), }, }, + filesystem_pool, }); } @@ -636,7 +649,8 @@ impl Plan { ) .unwrap(); let dataset_name = - sled.alloc_from_u2_zpool(DatasetKind::ClickhouseKeeper)?; + sled.alloc_dataset_from_u2s(DatasetKind::ClickhouseKeeper)?; + let filesystem_pool = Some(dataset_name.pool().clone()); sled.request.zones.push(OmicronZoneConfig { // TODO-cleanup use TypedUuid everywhere id: id.into_untyped_uuid(), @@ -647,6 +661,7 @@ impl Plan { pool_name: dataset_name.pool().clone(), }, }, + filesystem_pool, }); } @@ -661,6 +676,7 @@ impl Plan { let address = sled.addr_alloc.next().expect("Not enough addrs"); let port = omicron_common::address::CRUCIBLE_PANTRY_PORT; let id = OmicronZoneUuid::new_v4(); + let filesystem_pool = Some(sled.alloc_zpool_from_u2s()?); dns_builder .host_zone_with_one_backend( id, @@ -676,6 +692,7 @@ impl Plan { zone_type: OmicronZoneType::CruciblePantry { address: SocketAddrV6::new(address, port, 0, 0), }, + filesystem_pool, }); } @@ -704,6 +721,7 @@ impl Plan { address, dataset: OmicronZoneDataset { pool_name: pool.clone() }, }, + filesystem_pool: Some(pool.clone()), }); } } @@ -716,6 +734,7 @@ impl Plan { let id = OmicronZoneUuid::new_v4(); let address = sled.addr_alloc.next().expect("Not enough addrs"); let ntp_address = SocketAddrV6::new(address, NTP_PORT, 0, 0); + let filesystem_pool = Some(sled.alloc_zpool_from_u2s()?); let (zone_type, svcname) = if idx < BOUNDARY_NTP_COUNT { boundary_ntp_servers @@ -753,6 +772,7 @@ impl Plan { id: id.into_untyped_uuid(), underlay_address: address, zone_type, + filesystem_pool, }); } @@ -878,9 +898,16 @@ impl SledInfo { } } + fn alloc_zpool_from_u2s(&self) -> Result { + self.u2_zpools + .choose(&mut rand::thread_rng()) + .map(|z| z.clone()) + .ok_or_else(|| PlanError::NotEnoughSleds) + } + /// Allocates a dataset of the specified type from one of the U.2 pools on /// this Sled - fn alloc_from_u2_zpool( + fn alloc_dataset_from_u2s( &mut self, kind: DatasetKind, ) -> Result { diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index ae1d8243ca..58d854eea1 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -2098,6 +2098,7 @@ impl ServiceManager { }, underlay_address, id, + .. }, .. }) => { @@ -3291,13 +3292,13 @@ impl ServiceManager { ) -> Result { let name = zone.zone_name(); - // For each new zone request, we pick a U.2 to store the zone - // filesystem. Note: This isn't known to Nexus right now, so it's a - // local-to-sled decision. + // If the caller has requested a specific durable dataset, + // ensure that it is encrypted and that it exists. // - // Currently, the zone filesystem should be destroyed between - // reboots, so it's fine to make this decision locally. - let root = if let Some(dataset) = zone.dataset_name() { + // Typically, the transient filesystem pool will be placed on the same + // zpool as the durable dataset (to reduce the fault domain), but that + // decision belongs to Nexus, and is not enforced here. + if let Some(dataset) = zone.dataset_name() { // Check that the dataset is actually ready to be used. let [zoned, canmount, encryption] = illumos_utils::zfs::Zfs::get_values( @@ -3327,11 +3328,6 @@ impl ServiceManager { check_property("encryption", encryption, "aes-256-gcm")?; } - // If the zone happens to already manage a dataset, then - // we co-locate the zone dataset on the same zpool. - // - // This slightly reduces the underlying fault domain for the - // service. let data_pool = dataset.pool(); if !all_u2_pools.contains(&data_pool) { warn!( @@ -3344,20 +3340,35 @@ impl ServiceManager { device: format!("zpool: {data_pool}"), }); } - data_pool.dataset_mountpoint(&mount_config.root, ZONE_DATASET) - } else { - // If the zone it not coupled to other datsets, we pick one - // arbitrarily. - let mut rng = rand::thread_rng(); - all_u2_pools - .choose(&mut rng) - .map(|pool| { - pool.dataset_mountpoint(&mount_config.root, ZONE_DATASET) - }) + } + + let filesystem_pool = match (&zone.filesystem_pool, zone.dataset_name()) + { + // If a pool was explicitly requested, use it. + (Some(pool), _) => pool.clone(), + // NOTE: The following cases are for backwards compatibility. + // + // If no pool was selected, prefer to use the same pool as the + // durable dataset. Otherwise, pick one randomly. + (None, Some(dataset)) => dataset.pool().clone(), + (None, None) => all_u2_pools + .choose(&mut rand::thread_rng()) .ok_or_else(|| Error::U2NotFound)? - .clone() + .clone(), }; - Ok(root) + + if !all_u2_pools.contains(&filesystem_pool) { + warn!( + self.inner.log, + "zone filesystem dataset requested on a zpool which doesn't exist"; + "zone" => &name, + "zpool" => %filesystem_pool + ); + return Err(Error::MissingDevice { + device: format!("zpool: {filesystem_pool}"), + }); + } + Ok(filesystem_pool.dataset_mountpoint(&mount_config.root, ZONE_DATASET)) } pub async fn cockroachdb_initialize(&self) -> Result<(), Error> { @@ -4361,6 +4372,7 @@ mod test { id, underlay_address: Ipv6Addr::LOCALHOST, zone_type, + filesystem_pool: None, }], }, Some(&tmp_dir), @@ -4392,6 +4404,7 @@ mod test { dns_servers: vec![], domain: None, }, + filesystem_pool: None, }], }, Some(&tmp_dir), @@ -4808,6 +4821,7 @@ mod test { dns_servers: vec![], domain: None, }, + filesystem_pool: None, }]; let tmp_dir = String::from(test_config.config_dir.path().as_str()); @@ -4836,6 +4850,7 @@ mod test { dns_servers: vec![], domain: None, }, + filesystem_pool: None, }); // Now try to apply that list with an older generation number. This diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs index 5b66342a1a..215cb7d5f4 100644 --- a/sled-agent/src/sim/server.rs +++ b/sled-agent/src/sim/server.rs @@ -356,19 +356,27 @@ pub async fn run_standalone_server( let internal_dns_version = Generation::try_from(dns_config.generation) .expect("invalid internal dns version"); + let all_u2_zpools = server.sled_agent.get_zpools().await; + let get_random_zpool = || { + use rand::seq::SliceRandom; + let pool = all_u2_zpools + .choose(&mut rand::thread_rng()) + .expect("No external zpools found, but we need one"); + ZpoolName::new_external(ZpoolUuid::from_untyped_uuid(pool.id)) + }; + // Record the internal DNS server as though RSS had provisioned it so // that Nexus knows about it. let http_bound = match dns.dropshot_server.local_addr() { SocketAddr::V4(_) => panic!("did not expect v4 address"), SocketAddr::V6(a) => a, }; + let pool_name = ZpoolName::new_external(ZpoolUuid::new_v4()); let mut zones = vec![OmicronZoneConfig { id: Uuid::new_v4(), underlay_address: *http_bound.ip(), zone_type: OmicronZoneType::InternalDns { - dataset: OmicronZoneDataset { - pool_name: ZpoolName::new_external(ZpoolUuid::new_v4()), - }, + dataset: OmicronZoneDataset { pool_name: pool_name.clone() }, http_address: http_bound, dns_address: match dns.dns_server.local_address() { SocketAddr::V4(_) => panic!("did not expect v4 address"), @@ -377,6 +385,8 @@ pub async fn run_standalone_server( gz_address: Ipv6Addr::LOCALHOST, gz_address_index: 0, }, + // Co-locate the filesystem pool with the dataset + filesystem_pool: Some(pool_name), }]; let mut internal_services_ip_pool_ranges = vec![]; @@ -415,6 +425,7 @@ pub async fn run_standalone_server( external_tls: false, external_dns_servers: vec![], }, + filesystem_pool: Some(get_random_zpool()), }); internal_services_ip_pool_ranges.push(match ip { @@ -432,13 +443,12 @@ pub async fn run_standalone_server( { let ip = *external_dns_internal_addr.ip(); let id = Uuid::new_v4(); + let pool_name = ZpoolName::new_external(ZpoolUuid::new_v4()); zones.push(OmicronZoneConfig { id, underlay_address: ip, zone_type: OmicronZoneType::ExternalDns { - dataset: OmicronZoneDataset { - pool_name: ZpoolName::new_external(ZpoolUuid::new_v4()), - }, + dataset: OmicronZoneDataset { pool_name: pool_name.clone() }, http_address: external_dns_internal_addr, dns_address: SocketAddr::V6(external_dns_internal_addr), nic: nexus_types::inventory::NetworkInterface { @@ -457,6 +467,8 @@ pub async fn run_standalone_server( transit_ips: vec![], }, }, + // Co-locate the filesystem pool with the dataset + filesystem_pool: Some(pool_name), }); internal_services_ip_pool_ranges diff --git a/tools/console_version b/tools/console_version index c44dc93c45..8b0f764c7d 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="8febc79856f1b18b0fdc76f39e56876de25f112c" -SHA2="4c7e39db3ebd0602e3683e18d632a9008ab123479af15d48c86580a279fdeff7" +COMMIT="00b760269f08e207d70cf481f7d3e8f87a7d066d" +SHA2="c81245d15493f1e9437a8ee2ce3f8d1cd0b8cb0d4225eeb08954f2206e2b9b75" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 7dfc9a1402..adcc511763 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -67,7 +67,7 @@ ipnetwork = { version = "0.20.0", features = ["schemars"] } itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12.1" } itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10.5" } lalrpop-util = { version = "0.19.12" } -lazy_static = { version = "1.4.0", default-features = false, features = ["spin_no_std"] } +lazy_static = { version = "1.5.0", default-features = false, features = ["spin_no_std"] } libc = { version = "0.2.155", features = ["extra_traits"] } log = { version = "0.4.21", default-features = false, features = ["std"] } managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] } @@ -171,7 +171,7 @@ ipnetwork = { version = "0.20.0", features = ["schemars"] } itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12.1" } itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10.5" } lalrpop-util = { version = "0.19.12" } -lazy_static = { version = "1.4.0", default-features = false, features = ["spin_no_std"] } +lazy_static = { version = "1.5.0", default-features = false, features = ["spin_no_std"] } libc = { version = "0.2.155", features = ["extra_traits"] } log = { version = "0.4.21", default-features = false, features = ["std"] } managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] }