From 9f4ba0688873b6adf05e4c7359bf76ad91676be4 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Tue, 27 Aug 2024 18:06:57 -0700 Subject: [PATCH 1/5] add "live tests" (#6427) --- .config/nextest.toml | 14 +- .github/buildomat/build-and-test.sh | 6 + Cargo.lock | 43 ++++ Cargo.toml | 6 + README.adoc | 2 + dev-tools/xtask/Cargo.toml | 2 + dev-tools/xtask/src/check_workspace_deps.rs | 1 + dev-tools/xtask/src/clippy.rs | 25 +- dev-tools/xtask/src/common.rs | 34 +++ dev-tools/xtask/src/live_tests.rs | 156 ++++++++++++ dev-tools/xtask/src/main.rs | 6 + end-to-end-tests/README.adoc | 2 + live-tests/Cargo.toml | 41 ++++ live-tests/README.adoc | 78 ++++++ live-tests/build.rs | 10 + live-tests/macros/Cargo.toml | 16 ++ live-tests/macros/src/lib.rs | 86 +++++++ live-tests/tests/common/mod.rs | 249 ++++++++++++++++++++ live-tests/tests/common/reconfigurator.rs | 103 ++++++++ live-tests/tests/test_nexus_add_remove.rs | 229 ++++++++++++++++++ nexus/db-queries/src/db/datastore/mod.rs | 35 ++- nexus/reconfigurator/preparation/src/lib.rs | 130 +++++----- nexus/src/app/deployment.rs | 63 +---- 23 files changed, 1192 insertions(+), 145 deletions(-) create mode 100644 dev-tools/xtask/src/common.rs create mode 100644 dev-tools/xtask/src/live_tests.rs create mode 100644 live-tests/Cargo.toml create mode 100644 live-tests/README.adoc create mode 100644 live-tests/build.rs create mode 100644 live-tests/macros/Cargo.toml create mode 100644 live-tests/macros/src/lib.rs create mode 100644 live-tests/tests/common/mod.rs create mode 100644 live-tests/tests/common/reconfigurator.rs create mode 100644 live-tests/tests/test_nexus_add_remove.rs diff --git a/.config/nextest.toml b/.config/nextest.toml index 95d4c20102..67cabe9f0e 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -10,7 +10,9 @@ experimental = ["setup-scripts"] [[profile.default.scripts]] # Exclude omicron-dev tests from crdb-seed as we explicitly want to simulate an # environment where the seed file doesn't exist. -filter = 'rdeps(nexus-test-utils) - package(omicron-dev)' +# Exclude omicron-live-tests because those don't need this and also don't have +# it available in the environment in which they run. +filter = 'rdeps(nexus-test-utils) - package(omicron-dev) - package(omicron-live-tests)' setup = 'crdb-seed' [profile.ci] @@ -21,18 +23,26 @@ fail-fast = false # invocations of nextest happen. command = 'cargo run -p crdb-seed --profile test' +[test-groups] # The ClickHouse cluster tests currently rely on a hard-coded set of ports for # the nodes in the cluster. We would like to relax this in the future, at which # point this test-group configuration can be removed or at least loosened to # support testing in parallel. For now, enforce strict serialization for all # tests with `replicated` in the name. -[test-groups] clickhouse-cluster = { max-threads = 1 } +# While most Omicron tests operate with their own simulated control plane, the +# live-tests operate on a more realistic, shared control plane and test +# behaviors that conflict with each other. They need to be run serially. +live-tests = { max-threads = 1 } [[profile.default.overrides]] filter = 'package(oximeter-db) and test(replicated)' test-group = 'clickhouse-cluster' +[[profile.default.overrides]] +filter = 'package(omicron-live-tests)' +test-group = 'live-tests' + [[profile.default.overrides]] # These tests can time out under heavy contention. filter = 'binary_id(omicron-nexus::test_all) and test(::schema::)' diff --git a/.github/buildomat/build-and-test.sh b/.github/buildomat/build-and-test.sh index 1e4b655cb9..50bbd8194d 100755 --- a/.github/buildomat/build-and-test.sh +++ b/.github/buildomat/build-and-test.sh @@ -89,6 +89,12 @@ ptime -m timeout 2h cargo nextest run --profile ci --locked --verbose banner doctest ptime -m timeout 1h cargo test --doc --locked --verbose --no-fail-fast +# Build the live-tests. This is only supported on illumos. +# We also can't actually run them here. See the README for more details. +if [[ $target_os == "illumos" ]]; then + ptime -m cargo xtask live-tests +fi + # We expect the seed CRDB to be placed here, so we explicitly remove it so the # rmdir check below doesn't get triggered. Nextest doesn't have support for # teardown scripts so this is the best we've got. diff --git a/Cargo.lock b/Cargo.lock index b9ca47dc1e..eaabc31493 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4596,6 +4596,15 @@ version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" +[[package]] +name = "live-tests-macros" +version = "0.1.0" +dependencies = [ + "omicron-workspace-hack", + "quote", + "syn 2.0.74", +] + [[package]] name = "lock_api" version = "0.4.12" @@ -6066,6 +6075,38 @@ dependencies = [ "uuid", ] +[[package]] +name = "omicron-live-tests" +version = "0.1.0" +dependencies = [ + "anyhow", + "assert_matches", + "dropshot 0.10.2-dev", + "futures", + "internal-dns", + "live-tests-macros", + "nexus-client", + "nexus-config", + "nexus-db-model", + "nexus-db-queries", + "nexus-reconfigurator-planning", + "nexus-reconfigurator-preparation", + "nexus-sled-agent-shared", + "nexus-types", + "omicron-common", + "omicron-rpaths", + "omicron-test-utils", + "omicron-workspace-hack", + "pq-sys", + "reqwest", + "serde", + "slog", + "slog-error-chain", + "textwrap", + "tokio", + "uuid", +] + [[package]] name = "omicron-nexus" version = "0.1.0" @@ -12305,6 +12346,7 @@ version = "0.1.0" dependencies = [ "anyhow", "camino", + "camino-tempfile", "cargo_metadata", "cargo_toml", "clap", @@ -12313,6 +12355,7 @@ dependencies = [ "serde", "swrite", "tabled", + "textwrap", "toml 0.8.19", "usdt", ] diff --git a/Cargo.toml b/Cargo.toml index 5f74b259b4..c8df0dc17b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,6 +52,8 @@ members = [ "internal-dns", "ipcc", "key-manager", + "live-tests", + "live-tests/macros", "nexus", "nexus-config", "nexus-sled-agent-shared", @@ -170,6 +172,9 @@ default-members = [ "internal-dns", "ipcc", "key-manager", + # Do not include live-tests in the list of default members because its tests + # only work in a deployed system. The macros can be here, though. + "live-tests/macros", "nexus", "nexus-config", "nexus-sled-agent-shared", @@ -403,6 +408,7 @@ libc = "0.2.158" libfalcon = { git = "https://github.com/oxidecomputer/falcon", rev = "e69694a1f7cc9fe31fab27f321017280531fb5f7" } libnvme = { git = "https://github.com/oxidecomputer/libnvme", rev = "dd5bb221d327a1bc9287961718c3c10d6bd37da0" } linear-map = "1.2.0" +live-tests-macros = { path = "live-tests/macros" } macaddr = { version = "1.0.1", features = ["serde_std"] } maplit = "1.0.2" mockall = "0.13" diff --git a/README.adoc b/README.adoc index d48a5c9736..c2473a7a07 100644 --- a/README.adoc +++ b/README.adoc @@ -62,6 +62,8 @@ Nextest https://github.com/nextest-rs/nextest/issues/16[does not support doctest Similarly, you can run tests inside a https://github.com/oxidecomputer/falcon[Falcon] based VM. This is described in the `test-utils` https://github.com/oxidecomputer/omicron/tree/main/test-utils[README]. +There's also a xref:./live-tests/README.adoc[`live-tests`] test suite that can be run by hand in a _deployed_ Omicron system. + === rustfmt and clippy You can **format the code** using `cargo fmt`. Make sure to run this before pushing changes. The CI checks that the code is correctly formatted. diff --git a/dev-tools/xtask/Cargo.toml b/dev-tools/xtask/Cargo.toml index ec1b7825c6..508d0c73ee 100644 --- a/dev-tools/xtask/Cargo.toml +++ b/dev-tools/xtask/Cargo.toml @@ -24,6 +24,7 @@ workspace = true # downstream binaries do depend on it.) anyhow.workspace = true camino.workspace = true +camino-tempfile.workspace = true cargo_toml = "0.20" cargo_metadata.workspace = true clap.workspace = true @@ -32,5 +33,6 @@ macaddr.workspace = true serde.workspace = true swrite.workspace = true tabled.workspace = true +textwrap.workspace = true toml.workspace = true usdt.workspace = true diff --git a/dev-tools/xtask/src/check_workspace_deps.rs b/dev-tools/xtask/src/check_workspace_deps.rs index 73d5643ffb..92e83d8724 100644 --- a/dev-tools/xtask/src/check_workspace_deps.rs +++ b/dev-tools/xtask/src/check_workspace_deps.rs @@ -128,6 +128,7 @@ pub fn run_cmd() -> Result<()> { // The tests here should not be run by default, as they require // a running control plane. "end-to-end-tests", + "omicron-live-tests", ] .contains(&package.name.as_str()) .then_some(&package.id) diff --git a/dev-tools/xtask/src/clippy.rs b/dev-tools/xtask/src/clippy.rs index 7924a05574..229d0e126e 100644 --- a/dev-tools/xtask/src/clippy.rs +++ b/dev-tools/xtask/src/clippy.rs @@ -4,7 +4,8 @@ //! Subcommand: cargo xtask clippy -use anyhow::{bail, Context, Result}; +use crate::common::run_subcmd; +use anyhow::Result; use clap::Parser; use std::process::Command; @@ -51,25 +52,5 @@ pub fn run_cmd(args: ClippyArgs) -> Result<()> { .arg("--deny") .arg("warnings"); - eprintln!( - "running: {:?} {}", - &cargo, - command - .get_args() - .map(|arg| format!("{:?}", arg.to_str().unwrap())) - .collect::>() - .join(" ") - ); - - let exit_status = command - .spawn() - .context("failed to spawn child process")? - .wait() - .context("failed to wait for child process")?; - - if !exit_status.success() { - bail!("clippy failed: {}", exit_status); - } - - Ok(()) + run_subcmd(command) } diff --git a/dev-tools/xtask/src/common.rs b/dev-tools/xtask/src/common.rs new file mode 100644 index 0000000000..03b17a560f --- /dev/null +++ b/dev-tools/xtask/src/common.rs @@ -0,0 +1,34 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Common xtask command helpers + +use anyhow::{bail, Context, Result}; +use std::process::Command; + +/// Runs the given command, printing some basic debug information around it, and +/// failing with an error message if the command does not exit successfully +pub fn run_subcmd(mut command: Command) -> Result<()> { + eprintln!( + "running: {} {}", + command.get_program().to_str().unwrap(), + command + .get_args() + .map(|arg| format!("{:?}", arg.to_str().unwrap())) + .collect::>() + .join(" ") + ); + + let exit_status = command + .spawn() + .context("failed to spawn child process")? + .wait() + .context("failed to wait for child process")?; + + if !exit_status.success() { + bail!("failed: {}", exit_status); + } + + Ok(()) +} diff --git a/dev-tools/xtask/src/live_tests.rs b/dev-tools/xtask/src/live_tests.rs new file mode 100644 index 0000000000..35d40343ee --- /dev/null +++ b/dev-tools/xtask/src/live_tests.rs @@ -0,0 +1,156 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Subcommand: cargo xtask live-tests + +use crate::common::run_subcmd; +use anyhow::{bail, Context, Result}; +use clap::Parser; +use std::process::Command; + +#[derive(Parser)] +pub struct Args {} + +pub fn run_cmd(_args: Args) -> Result<()> { + const NAME: &str = "live-tests-archive"; + + // The live tests operate in deployed environments, which always run + // illumos. Bail out quickly if someone tries to run this on a system whose + // binaries won't be usable. (We could compile this subcommand out + // altogether on non-illumos systems, but it seems more confusing to be + // silently missing something you might expect to be there. Plus, you can + // still check and even build *this* code on non-illumos systems.) + if cfg!(not(target_os = "illumos")) { + bail!("live-tests archive can only be built on illumos systems"); + } + + let tmpdir_root = + camino_tempfile::tempdir().context("creating temporary directory")?; + let final_tarball = camino::Utf8PathBuf::try_from( + std::env::current_dir() + .map(|d| d.join("target")) + .context("getting current directory")?, + ) + .context("non-UTF-8 current directory")? + .join(format!("{}.tgz", NAME)); + let proto_root = tmpdir_root.path().join(NAME); + let nextest_archive_file = proto_root.join("omicron-live-tests.tar.zst"); + + eprintln!("using temporary directory: {}", tmpdir_root.path()); + eprintln!("will create archive file: {}", nextest_archive_file); + eprintln!("output tarball: {}", final_tarball); + eprintln!(); + + std::fs::create_dir(&proto_root) + .with_context(|| format!("mkdir {:?}", &proto_root))?; + + let cargo = + std::env::var("CARGO").unwrap_or_else(|_| String::from("cargo")); + let mut command = Command::new(&cargo); + + command.arg("nextest"); + command.arg("archive"); + command.arg("--package"); + command.arg("omicron-live-tests"); + command.arg("--archive-file"); + command.arg(&nextest_archive_file); + run_subcmd(command)?; + + // Using nextest archives requires that the source be separately transmitted + // to the system where the tests will be run. We're trying to automate + // that. So let's bundle up the source and the nextest archive into one big + // tarball. But which source files do we bundle? We need: + // + // - Cargo.toml (nextest expects to find this) + // - .config/nextest.toml (nextest's configuration, which is used while + // running the tests) + // - live-tests (this is where the tests live, and they might expect stuff + // that exists in here like expectorate files) + // + // plus the nextext archive file. + // + // To avoid creating a tarbomb, we want all the files prefixed with + // "live-tests-archive/". There's no great way to do this with the illumos + // tar(1) except to create a temporary directory called "live-tests-archive" + // that contains the files and then tar'ing up that. + // + // Ironically, an easy way to construct that directory is with tar(1). + let mut command = Command::new("bash"); + command.arg("-c"); + command.arg(format!( + "tar cf - Cargo.toml .config/nextest.toml live-tests | \ + tar xf - -C {:?}", + &proto_root + )); + run_subcmd(command)?; + + let mut command = Command::new("tar"); + command.arg("cf"); + command.arg(&final_tarball); + command.arg("-C"); + command.arg(tmpdir_root.path()); + command.arg(NAME); + run_subcmd(command)?; + + drop(tmpdir_root); + + eprint!("created: "); + println!("{}", &final_tarball); + eprintln!("\nTo use this:\n"); + eprintln!( + "1. Copy the tarball to the switch zone in a deployed Omicron system.\n" + ); + let raw = &[ + "scp \\", + &format!("{} \\", &final_tarball), + "root@YOUR_SCRIMLET_GZ_IP:/zone/oxz_switch/root/root", + ] + .join("\n"); + let text = textwrap::wrap( + &raw, + textwrap::Options::new(160) + .initial_indent(" e.g., ") + .subsequent_indent(" "), + ); + eprintln!("{}\n", text.join("\n")); + eprintln!("2. Copy the `cargo-nextest` binary to the same place.\n"); + let raw = &[ + "scp \\", + "$(which cargo-nextest) \\", + "root@YOUR_SCRIMLET_GZ_IP:/zone/oxz_switch/root/root", + ] + .join("\n"); + let text = textwrap::wrap( + &raw, + textwrap::Options::new(160) + .initial_indent(" e.g., ") + .subsequent_indent(" "), + ); + eprintln!("{}\n", text.join("\n")); + eprintln!("3. On that system, unpack the tarball with:\n"); + eprintln!(" tar xzf {}\n", final_tarball.file_name().unwrap()); + eprintln!("4. On that system, run tests with:\n"); + // TMPDIR=/var/tmp puts stuff on disk, cached as needed, rather than the + // default /tmp which requires that stuff be in-memory. That can lead to + // great sadness if the tests wind up writing a lot of data. + let raw = &[ + "TMPDIR=/var/tmp ./cargo-nextest nextest run \\", + &format!( + "--archive-file {}/{} \\", + NAME, + nextest_archive_file.file_name().unwrap() + ), + &format!("--workspace-remap {}", NAME), + ] + .join("\n"); + let text = textwrap::wrap( + &raw, + textwrap::Options::new(160) + .initial_indent(" ") + .subsequent_indent(" "), + ); + eprintln!("{}\n", text.join("\n")); + + Ok(()) +} diff --git a/dev-tools/xtask/src/main.rs b/dev-tools/xtask/src/main.rs index 02fd05a198..9880adeb67 100644 --- a/dev-tools/xtask/src/main.rs +++ b/dev-tools/xtask/src/main.rs @@ -16,8 +16,10 @@ use std::process::Command; mod check_features; mod check_workspace_deps; mod clippy; +mod common; #[cfg_attr(not(target_os = "illumos"), allow(dead_code))] mod external; +mod live_tests; mod usdt; #[cfg(target_os = "illumos")] @@ -59,6 +61,9 @@ enum Cmds { /// Download binaries, OpenAPI specs, and other out-of-repo utilities. Download(external::External), + /// Create a bundle of live tests + LiveTests(live_tests::Args), + /// Utilities for working with MGS. MgsDev(external::External), /// Utilities for working with Omicron. @@ -127,6 +132,7 @@ fn main() -> Result<()> { external.exec_bin("xtask-downloader") } } + Cmds::LiveTests(args) => live_tests::run_cmd(args), Cmds::MgsDev(external) => external.exec_bin("mgs-dev"), Cmds::OmicronDev(external) => external.exec_bin("omicron-dev"), Cmds::Openapi(external) => external.exec_bin("openapi-manager"), diff --git a/end-to-end-tests/README.adoc b/end-to-end-tests/README.adoc index b9766db809..3e31f2b382 100644 --- a/end-to-end-tests/README.adoc +++ b/end-to-end-tests/README.adoc @@ -4,6 +4,8 @@ These tests run in Buildomat. They are built by the xref:../.github/buildomat/jo This package is not built or run by default (it is excluded from `default-members` in xref:../Cargo.toml[]). +See also: xref:../live-tests/README.adoc[omicron-live-tests]. + == Running these tests on your machine 1. xref:../docs/how-to-run.adoc[Make yourself a Gimlet]. diff --git a/live-tests/Cargo.toml b/live-tests/Cargo.toml new file mode 100644 index 0000000000..e0eaf2c338 --- /dev/null +++ b/live-tests/Cargo.toml @@ -0,0 +1,41 @@ +[package] +name = "omicron-live-tests" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[build-dependencies] +omicron-rpaths.workspace = true + +[dependencies] +# See omicron-rpaths for more about the "pq-sys" dependency. +pq-sys = "*" +omicron-workspace-hack.workspace = true + +[dev-dependencies] +anyhow.workspace = true +assert_matches.workspace = true +dropshot.workspace = true +futures.workspace = true +internal-dns.workspace = true +live-tests-macros.workspace = true +nexus-client.workspace = true +nexus-config.workspace = true +nexus-db-model.workspace = true +nexus-db-queries.workspace = true +nexus-reconfigurator-planning.workspace = true +nexus-reconfigurator-preparation.workspace = true +nexus-sled-agent-shared.workspace = true +nexus-types.workspace = true +omicron-common.workspace = true +omicron-test-utils.workspace = true +reqwest.workspace = true +serde.workspace = true +slog.workspace = true +slog-error-chain.workspace = true +textwrap.workspace = true +tokio.workspace = true +uuid.workspace = true + +[lints] +workspace = true diff --git a/live-tests/README.adoc b/live-tests/README.adoc new file mode 100644 index 0000000000..9ea3a93e48 --- /dev/null +++ b/live-tests/README.adoc @@ -0,0 +1,78 @@ += Omicron live tests + +The `omicron-live-tests` package contains automated tests that operate in the context of an already-deployed "real" Oxide system (e.g., `a4x2` or our `london` or `madrid` test environments). This is a home for automated tests for all kinds of Reconfigurator behavior (e.g., add/expunge of all zones, add/expunge sled, upgrades, etc.). It can probably be used for non-Reconfigurator behavior, too. + +This package is not built or tested by default because the tests generally can't work in a dev environment and there's no way to have `cargo` build and check them but not run the tests by default. + +== Why a separate test suite? + +What makes these tests different from the rest of the test suite is that they require connectivity to the underlay network of the deployed system and they make API calls to various components in that system and they assume that this will behave like a real production system. By contrast, the normal tests instead _set up_ a bunch of components using simulated sled agents and localhost networking, which is great for starting from a predictable state and running tests in parallel, but the simulated sled agents and networking make it impossible to exercise quite a lot of Reconfigurator's functionality. + +There are also the `end-to-end-tests`. That environment is more realistic than the main test suite, but not faithful enough for many Reconfigurator tests. + +== Production systems + +There are some safeguards so that these tests won't run on production systems: they refuse to run if they find any Oxide-hardware sleds in the system whose serial numbers don't correspond to known test environments. + +== Usage + +These tests are not currently run automatically (though they are _built_ in CI). + +You can run them yourself. First, deploy Omicron using `a4x2` or one of the hardware test rigs. In your Omicron workspace, run `cargo xtask live-tests` to build an archive and then follow the instructions: + +``` +$ cargo xtask live-tests + Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.96s + Running `target/debug/xtask live-tests` +using temporary directory: /dangerzone/omicron_tmp/.tmp0ItZUD +will create archive file: /dangerzone/omicron_tmp/.tmp0ItZUD/live-tests-archive/omicron-live-tests.tar.zst +output tarball: /home/dap/omicron-work/target/live-tests-archive.tgz + +running: /home/dap/.rustup/toolchains/1.80.1-x86_64-unknown-illumos/bin/cargo "nextest" "archive" "--package" "omicron-live-tests" "--archive-file" "/dangerzone/omicron_tmp/.tmp0ItZUD/live-tests-archive/omicron-live-tests.tar.zst" + Finished `test` profile [unoptimized + debuginfo] target(s) in 0.89s +info: experimental features enabled: setup-scripts + Archiving 1 binary, 1 build script output directory, and 1 linked path to /dangerzone/omicron_tmp/.tmp0ItZUD/live-tests-archive/omicron-live-tests.tar.zst + Archived 35 files to /dangerzone/omicron_tmp/.tmp0ItZUD/live-tests-archive/omicron-live-tests.tar.zst in 0.31s +running: bash "-c" "tar cf - Cargo.toml .config/nextest.toml live-tests | tar xf - -C \"/dangerzone/omicron_tmp/.tmp0ItZUD/live-tests-archive\"" +running: tar "cf" "/home/dap/omicron-work/target/live-tests-archive.tgz" "-C" "/dangerzone/omicron_tmp/.tmp0ItZUD" "live-tests-archive" +created: /home/dap/omicron-work/target/live-tests-archive.tgz + +To use this: + +1. Copy the tarball to the switch zone in a deployed Omicron system. + + e.g., scp \ + /home/dap/omicron-work/target/live-tests-archive.tgz \ + root@YOUR_SCRIMLET_GZ_IP:/zone/oxz_switch/root/root + +2. Copy the `cargo-nextest` binary to the same place. + + e.g., scp \ + $(which cargo-nextest) \ + root@YOUR_SCRIMLET_GZ_IP:/zone/oxz_switch/root/root + +3. On that system, unpack the tarball with: + + tar xzf live-tests-archive.tgz + +4. On that system, run tests with: + + TMPDIR=/var/tmp ./cargo-nextest nextest run \ + --archive-file live-tests-archive/omicron-live-tests.tar.zst \ + --workspace-remap live-tests-archive +``` + +Follow the instructions, run the tests, and you'll see the usual `nextest`-style output: + +``` +root@oxz_switch:~# TMPDIR=/var/tmp ./cargo-nextest nextest run --archive-file live-tests-archive/omicron-live-tests.tar.zst --workspace-remap live-tests-archive + Extracting 1 binary, 1 build script output directory, and 1 linked path to /var/tmp/nextest-archive-Lqx9VZ + Extracted 35 files to /var/tmp/nextest-archive-Lqx9VZ in 1.01s +info: experimental features enabled: setup-scripts + Starting 1 test across 1 binary (run ID: a5fc9163-9dd5-4b23-b89f-55f8f39ebbbc, nextest profile: default) + SLOW [> 60.000s] omicron-live-tests::test_nexus_add_remove test_nexus_add_remove + PASS [ 61.975s] omicron-live-tests::test_nexus_add_remove test_nexus_add_remove +------------ + Summary [ 61.983s] 1 test run: 1 passed (1 slow), 0 skipped +root@oxz_switch:~# +``` diff --git a/live-tests/build.rs b/live-tests/build.rs new file mode 100644 index 0000000000..1ba9acd41c --- /dev/null +++ b/live-tests/build.rs @@ -0,0 +1,10 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// See omicron-rpaths for documentation. +// NOTE: This file MUST be kept in sync with the other build.rs files in this +// repository. +fn main() { + omicron_rpaths::configure_default_omicron_rpaths(); +} diff --git a/live-tests/macros/Cargo.toml b/live-tests/macros/Cargo.toml new file mode 100644 index 0000000000..81d094d926 --- /dev/null +++ b/live-tests/macros/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "live-tests-macros" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lib] +proc-macro = true + +[lints] +workspace = true + +[dependencies] +quote.workspace = true +syn = { workspace = true, features = [ "fold", "parsing" ] } +omicron-workspace-hack.workspace = true diff --git a/live-tests/macros/src/lib.rs b/live-tests/macros/src/lib.rs new file mode 100644 index 0000000000..4fdd4029b5 --- /dev/null +++ b/live-tests/macros/src/lib.rs @@ -0,0 +1,86 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Macro to wrap a live test function that automatically creates and cleans up +//! the `LiveTestContext` + +use proc_macro::TokenStream; +use quote::quote; +use syn::{parse_macro_input, ItemFn}; + +/// Define a test function that uses `LiveTestContext` +/// +/// This is usable only within the `omicron-live-tests` crate. +/// +/// Similar to `nexus_test`, this macro lets you define a test function that +/// behaves like `tokio::test` except that it accepts an argument of type +/// `&LiveTestContext`. The `LiveTestContext` is cleaned up on _successful_ +/// return of the test function. On failure, debugging information is +/// deliberately left around. +/// +/// Example usage: +/// +/// ```ignore +/// #[live_test] +/// async fn test_my_test_case(lc: &LiveTestContext) { +/// assert!(true); +/// } +/// ``` +/// +/// We use this instead of implementing Drop on LiveTestContext because we want +/// the teardown to only happen when the test doesn't fail (which causes a panic +/// and unwind). +#[proc_macro_attribute] +pub fn live_test(_attrs: TokenStream, input: TokenStream) -> TokenStream { + let input_func = parse_macro_input!(input as ItemFn); + + let mut correct_signature = true; + if input_func.sig.variadic.is_some() + || input_func.sig.inputs.len() != 1 + || input_func.sig.asyncness.is_none() + { + correct_signature = false; + } + + // Verify we're returning an empty tuple + correct_signature &= match input_func.sig.output { + syn::ReturnType::Default => true, + syn::ReturnType::Type(_, ref t) => { + if let syn::Type::Tuple(syn::TypeTuple { elems, .. }) = &**t { + elems.is_empty() + } else { + false + } + } + }; + if !correct_signature { + panic!("func signature must be async fn(&LiveTestContext)"); + } + + let func_ident_string = input_func.sig.ident.to_string(); + let func_ident = input_func.sig.ident.clone(); + let new_block = quote! { + { + #input_func + + let ctx = crate::common::LiveTestContext::new( + #func_ident_string + ).await.expect("setting up LiveTestContext"); + #func_ident(&ctx).await; + ctx.cleanup_successful(); + } + }; + let mut sig = input_func.sig.clone(); + sig.inputs.clear(); + let func = ItemFn { + attrs: input_func.attrs, + vis: input_func.vis, + sig, + block: Box::new(syn::parse2(new_block).unwrap()), + }; + TokenStream::from(quote!( + #[::tokio::test] + #func + )) +} diff --git a/live-tests/tests/common/mod.rs b/live-tests/tests/common/mod.rs new file mode 100644 index 0000000000..28f677f5ed --- /dev/null +++ b/live-tests/tests/common/mod.rs @@ -0,0 +1,249 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +pub mod reconfigurator; + +use anyhow::{anyhow, ensure, Context}; +use dropshot::test_util::LogContext; +use internal_dns::resolver::Resolver; +use internal_dns::ServiceName; +use nexus_config::PostgresConfigWithUrl; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use nexus_types::deployment::SledFilter; +use omicron_common::address::Ipv6Subnet; +use slog::info; +use slog::o; +use std::ffi::OsStr; +use std::net::SocketAddrV6; +use std::path::Component; +use std::sync::Arc; + +/// Contains data and interfaces useful for running tests against an existing +/// deployed control plane +pub struct LiveTestContext { + logctx: LogContext, + opctx: OpContext, + resolver: Resolver, + datastore: Arc, +} + +impl LiveTestContext { + /// Make a new `LiveTestContext` for a test called `test_name`. + pub async fn new( + test_name: &'static str, + ) -> Result { + let logctx = omicron_test_utils::dev::test_setup_log(test_name); + let log = &logctx.log; + let resolver = create_resolver(log)?; + check_execution_environment(&resolver).await?; + let datastore = create_datastore(&log, &resolver).await?; + let opctx = OpContext::for_tests(log.clone(), datastore.clone()); + check_hardware_environment(&opctx, &datastore).await?; + Ok(LiveTestContext { logctx, opctx, resolver, datastore }) + } + + /// Clean up this `LiveTestContext` + /// + /// This mainly removes log files created by the test. We do this in this + /// explicit cleanup function rather than on `Drop` because we want the log + /// files preserved on test failure. + pub fn cleanup_successful(self) { + self.logctx.cleanup_successful(); + } + + /// Returns a logger suitable for use in the test + pub fn log(&self) -> &slog::Logger { + &self.logctx.log + } + + /// Returns an `OpContext` suitable for use in tests + pub fn opctx(&self) -> &OpContext { + &self.opctx + } + + /// Returns a `DataStore` pointing at this deployed system's database + pub fn datastore(&self) -> &DataStore { + &self.datastore + } + + /// Returns a client for a Nexus internal API at the given socket address + pub fn specific_internal_nexus_client( + &self, + sockaddr: SocketAddrV6, + ) -> nexus_client::Client { + let url = format!("http://{}", sockaddr); + let log = self.logctx.log.new(o!("nexus_internal_url" => url.clone())); + nexus_client::Client::new(&url, log) + } + + /// Returns a list of clients for the internal APIs for all Nexus instances + /// found in DNS + pub async fn all_internal_nexus_clients( + &self, + ) -> Result, anyhow::Error> { + Ok(self + .resolver + .lookup_all_socket_v6(ServiceName::Nexus) + .await + .context("looking up Nexus in internal DNS")? + .into_iter() + .map(|s| self.specific_internal_nexus_client(s)) + .collect()) + } +} + +fn create_resolver(log: &slog::Logger) -> Result { + // In principle, we should look at /etc/resolv.conf to find the DNS servers. + // In practice, this usually isn't populated today. See + // oxidecomputer/omicron#2122. + // + // However, the address selected below should work for most existing Omicron + // deployments today. That's because while the base subnet is in principle + // configurable in config-rss.toml, it's very uncommon to change it from the + // default value used here. + let subnet = Ipv6Subnet::new("fd00:1122:3344:0100::".parse().unwrap()); + eprintln!("note: using DNS server for subnet {}", subnet.net()); + internal_dns::resolver::Resolver::new_from_subnet(log.clone(), subnet) + .with_context(|| { + format!("creating DNS resolver for subnet {}", subnet.net()) + }) +} + +/// Creates a DataStore pointing at the CockroachDB cluster that's in DNS +async fn create_datastore( + log: &slog::Logger, + resolver: &Resolver, +) -> Result, anyhow::Error> { + let sockaddrs = resolver + .lookup_all_socket_v6(ServiceName::Cockroach) + .await + .context("resolving CockroachDB")?; + + let url = format!( + "postgresql://root@{}/omicron?sslmode=disable", + sockaddrs + .into_iter() + .map(|a| a.to_string()) + .collect::>() + .join(",") + ) + .parse::() + .context("failed to parse constructed postgres URL")?; + + let db_config = nexus_db_queries::db::Config { url }; + let pool = + Arc::new(nexus_db_queries::db::Pool::new_single_host(log, &db_config)); + DataStore::new_failfast(log, pool) + .await + .context("creating DataStore") + .map(Arc::new) +} + +/// Performs quick checks to determine if the user is running these tests in the +/// wrong place and bails out if so +/// +/// This isn't perfect but seeks to fail fast in obviously bogus environments +/// that someone might accidentally try to run this in. +async fn check_execution_environment( + resolver: &Resolver, +) -> Result<(), anyhow::Error> { + ensure!( + cfg!(target_os = "illumos"), + "live tests can only be run on deployed systems, which run illumos" + ); + + // The only real requirement for these tests is that they're run from a + // place with connectivity to the underlay network of a deployed control + // plane. The easiest way to tell is to look up something in internal DNS. + resolver.lookup_ip(ServiceName::InternalDns).await.map_err(|e| { + let text = format!( + "check_execution_environment(): failed to look up internal DNS \ + in the internal DNS servers.\n\n \ + Are you trying to run this in a development environment? \ + This test can only be run on deployed systems and only from a \ + context with connectivity to the underlay network.\n\n \ + raw error: {}", + slog_error_chain::InlineErrorChain::new(&e) + ); + anyhow!("{}", textwrap::wrap(&text, 80).join("\n")) + })?; + + // Warn the user if the temporary directory is /tmp. This check is + // heuristic. There are other ways they may have specified a tmpfs + // temporary directory and we don't claim to catch all of them. + // + // We could also just go ahead and use /var/tmp, but it's not clear we can + // reliably do that at this point (if Rust or other components have cached + // TMPDIR) and it would be hard to override. + let tmpdir = std::env::temp_dir(); + let mut tmpdir_components = tmpdir.components().take(2); + if let Some(first) = tmpdir_components.next() { + if let Some(next) = tmpdir_components.next() { + if first == Component::RootDir + && next == Component::Normal(OsStr::new("tmp")) + { + eprintln!( + "WARNING: temporary directory appears to be under /tmp, \ + which is generally tmpfs. Consider setting \ + TMPDIR=/var/tmp to avoid runaway tests using too much\ + memory and swap." + ); + } + } + } + + Ok(()) +} + +/// Performs additional checks to determine if we're running in an environment +/// that we believe is safe to run tests +/// +/// These tests may make arbitrary modifications to the system. We don't want +/// to run this in dogfood or other pre-production or production environments. +/// This function uses an allowlist of Oxide serials corresponding to test +/// environments so that it never accidentally runs on a production system. +/// +/// Non-Oxide hardware (e.g., PCs, a4x2, etc.) are always allowed. +async fn check_hardware_environment( + opctx: &OpContext, + datastore: &DataStore, +) -> Result<(), anyhow::Error> { + const ALLOWED_GIMLET_SERIALS: &[&str] = &[ + // test rig: "madrid" + "BRM42220004", + "BRM42220081", + "BRM42220007", + "BRM42220046", + // test rig: "london" + "BRM42220036", + "BRM42220062", + "BRM42220030", + "BRM44220007", + ]; + + // Refuse to operate in an environment that might contain real Oxide + // hardware that's not known to be part of a test rig. This is deliberately + // conservative. + let scary_sleds = datastore + .sled_list_all_batched(opctx, SledFilter::Commissioned) + .await + .context("check_environment: listing commissioned sleds")? + .into_iter() + .filter_map(|s| { + (s.part_number() != "i86pc" + && !ALLOWED_GIMLET_SERIALS.contains(&s.serial_number())) + .then(|| s.serial_number().to_owned()) + }) + .collect::>(); + if scary_sleds.is_empty() { + info!(&opctx.log, "environment verified"); + Ok(()) + } else { + Err(anyhow!( + "refusing to operate in an environment with an unknown system: {}", + scary_sleds.join(", ") + )) + } +} diff --git a/live-tests/tests/common/reconfigurator.rs b/live-tests/tests/common/reconfigurator.rs new file mode 100644 index 0000000000..8f2560bb49 --- /dev/null +++ b/live-tests/tests/common/reconfigurator.rs @@ -0,0 +1,103 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Helpers common to Reconfigurator tests + +use anyhow::{ensure, Context}; +use nexus_client::types::BlueprintTargetSet; +use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; +use nexus_types::deployment::{Blueprint, PlanningInput}; +use slog::{debug, info}; + +/// Modify the system by editing the current target blueprint +/// +/// More precisely, this function: +/// +/// - fetches the current target blueprint +/// - creates a new BlueprintBuilder based on it +/// - invokes the caller's `edit_fn`, which may modify the builder however it +/// likes +/// - generates a new blueprint (thus based on the current target) +/// - uploads the new blueprint +/// - sets the new blueprint as the current target +/// - enables the new blueprint +/// +/// ## Errors +/// +/// This function fails if the current target blueprint is not already enabled. +/// That's because a disabled target blueprint means somebody doesn't want +/// Reconfigurator running or doesn't want it using that blueprint. We don't +/// want the test to inadvertently override that behavior. In a typical use +/// case, a developer enables the initial target blueprint before running these +/// tests and then doesn't need to think about it again for the lifetime of +/// their test environment. +pub async fn blueprint_edit_current_target( + log: &slog::Logger, + planning_input: &PlanningInput, + nexus: &nexus_client::Client, + edit_fn: &dyn Fn(&mut BlueprintBuilder) -> Result<(), anyhow::Error>, +) -> Result<(Blueprint, Blueprint), anyhow::Error> { + // Fetch the current target configuration. + info!(log, "editing current target blueprint"); + let target_blueprint = nexus + .blueprint_target_view() + .await + .context("fetch current target config")? + .into_inner(); + debug!(log, "found current target blueprint"; + "blueprint_id" => %target_blueprint.target_id + ); + ensure!( + target_blueprint.enabled, + "refusing to modify a system with target blueprint disabled" + ); + + // Fetch the actual blueprint. + let blueprint1 = nexus + .blueprint_view(&target_blueprint.target_id) + .await + .context("fetch current target blueprint")? + .into_inner(); + debug!(log, "fetched current target blueprint"; + "blueprint_id" => %target_blueprint.target_id + ); + + // Make a new builder based on that blueprint and use `edit_fn` to edit it. + let mut builder = BlueprintBuilder::new_based_on( + log, + &blueprint1, + &planning_input, + "test-suite", + ) + .context("creating BlueprintBuilder")?; + + edit_fn(&mut builder)?; + + // Assemble the new blueprint, import it, and make it the new target. + let blueprint2 = builder.build(); + info!(log, "assembled new blueprint based on target"; + "current_target_id" => %target_blueprint.target_id, + "new_blueprint_id" => %blueprint2.id, + ); + nexus + .blueprint_import(&blueprint2) + .await + .context("importing new blueprint")?; + debug!(log, "imported new blueprint"; + "blueprint_id" => %blueprint2.id, + ); + nexus + .blueprint_target_set(&BlueprintTargetSet { + enabled: true, + target_id: blueprint2.id, + }) + .await + .expect("setting new target"); + info!(log, "finished editing target blueprint"; + "old_target_id" => %blueprint1.id, + "new_target_id" => %blueprint2.id, + ); + + Ok((blueprint1, blueprint2)) +} diff --git a/live-tests/tests/test_nexus_add_remove.rs b/live-tests/tests/test_nexus_add_remove.rs new file mode 100644 index 0000000000..70e55b704a --- /dev/null +++ b/live-tests/tests/test_nexus_add_remove.rs @@ -0,0 +1,229 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +mod common; + +use anyhow::Context; +use assert_matches::assert_matches; +use common::reconfigurator::blueprint_edit_current_target; +use common::LiveTestContext; +use futures::TryStreamExt; +use live_tests_macros::live_test; +use nexus_client::types::Saga; +use nexus_client::types::SagaState; +use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; +use nexus_reconfigurator_planning::blueprint_builder::EnsureMultiple; +use nexus_reconfigurator_preparation::PlanningInputFromDb; +use nexus_sled_agent_shared::inventory::ZoneKind; +use nexus_types::deployment::SledFilter; +use omicron_common::address::NEXUS_INTERNAL_PORT; +use omicron_test_utils::dev::poll::wait_for_condition; +use omicron_test_utils::dev::poll::CondCheckError; +use slog::{debug, info}; +use std::net::SocketAddrV6; +use std::time::Duration; + +// TODO-coverage This test could check other stuff: +// +// - that after adding: +// - the new Nexus appears in external DNS +// - we can _use_ the new Nexus from the outside +// (e.g., using an `oxide_client` using a custom reqwest resolver that +// points only at that one IP so that we can make sure we're always getting +// that one) +// - that after expungement, it doesn't appear in external DNS any more +// +#[live_test] +async fn test_nexus_add_remove(lc: &LiveTestContext) { + // Test setup + let log = lc.log(); + let opctx = lc.opctx(); + let datastore = lc.datastore(); + let planning_input = PlanningInputFromDb::assemble(&opctx, &datastore) + .await + .expect("planning input"); + let initial_nexus_clients = lc.all_internal_nexus_clients().await.unwrap(); + let nexus = initial_nexus_clients.first().expect("internal Nexus client"); + + // First, deploy a new Nexus zone to an arbitrary sled. + let sled_id = planning_input + .all_sled_ids(SledFilter::Commissioned) + .next() + .expect("any sled id"); + let (blueprint1, blueprint2) = blueprint_edit_current_target( + log, + &planning_input, + &nexus, + &|builder: &mut BlueprintBuilder| { + let nnexus = builder + .sled_num_running_zones_of_kind(sled_id, ZoneKind::Nexus); + let count = builder + .sled_ensure_zone_multiple_nexus(sled_id, nnexus + 1) + .context("adding Nexus zone")?; + assert_matches!( + count, + EnsureMultiple::Changed { added: 1, removed: 0 } + ); + Ok(()) + }, + ) + .await + .expect("editing blueprint to add zone"); + + // Figure out which zone is new and make a new client for it. + let diff = blueprint2.diff_since_blueprint(&blueprint1); + let new_zone = diff + .zones + .added + .values() + .next() + .expect("at least one sled with added zones") + .zones + .first() + .expect("at least one added zone on that sled"); + assert_eq!(new_zone.kind(), ZoneKind::Nexus); + let new_zone_addr = new_zone.underlay_address(); + let new_zone_sockaddr = + SocketAddrV6::new(new_zone_addr, NEXUS_INTERNAL_PORT, 0, 0); + let new_zone_client = lc.specific_internal_nexus_client(new_zone_sockaddr); + + // Wait for the new Nexus zone to show up and be usable. + let initial_sagas_list = wait_for_condition( + || async { + list_sagas(&new_zone_client).await.map_err(|e| { + debug!(log, + "waiting for new Nexus to be available: listing sagas: {e:#}" + ); + CondCheckError::<()>::NotYet + }) + }, + &Duration::from_millis(50), + &Duration::from_secs(60), + ) + .await + .expect("new Nexus to be usable"); + assert!(initial_sagas_list.is_empty()); + info!(log, "new Nexus is online"); + + // Create a demo saga from the new Nexus zone. We'll use this to test that + // when the zone is expunged, its saga gets moved to a different Nexus. + let demo_saga = new_zone_client + .saga_demo_create() + .await + .expect("new Nexus saga demo create"); + let saga_id = demo_saga.saga_id; + let sagas_list = + list_sagas(&new_zone_client).await.expect("new Nexus sagas_list"); + assert_eq!(sagas_list.len(), 1); + assert_eq!(sagas_list[0].id, saga_id); + info!(log, "created demo saga"; "demo_saga" => ?demo_saga); + + // Now expunge the zone we just created. + let _ = blueprint_edit_current_target( + log, + &planning_input, + &nexus, + &|builder: &mut BlueprintBuilder| { + builder + .sled_expunge_zone(sled_id, new_zone.id()) + .context("expunging zone") + }, + ) + .await + .expect("editing blueprint to expunge zone"); + + // At some point, we should be unable to reach this Nexus any more. + wait_for_condition( + || async { + match new_zone_client.saga_list(None, None, None).await { + Err(nexus_client::Error::CommunicationError(error)) => { + info!(log, "expunged Nexus no longer reachable"; + "error" => slog_error_chain::InlineErrorChain::new(&error), + ); + Ok(()) + } + Ok(_) => { + debug!(log, "expunged Nexus is still reachable"); + Err(CondCheckError::<()>::NotYet) + } + Err(error) => { + debug!(log, "expunged Nexus is still reachable"; + "error" => slog_error_chain::InlineErrorChain::new(&error), + ); + Err(CondCheckError::NotYet) + } + } + }, + &Duration::from_millis(50), + &Duration::from_secs(60), + ) + .await + .unwrap(); + + // Wait for some other Nexus instance to pick up the saga. + let nexus_found = wait_for_condition( + || async { + for nexus_client in &initial_nexus_clients { + assert!(nexus_client.baseurl() != new_zone_client.baseurl()); + let Ok(sagas) = list_sagas(&nexus_client).await else { + continue; + }; + + debug!(log, "found sagas (last): {:?}", sagas); + if sagas.into_iter().any(|s| s.id == saga_id) { + return Ok(nexus_client); + } + } + + return Err(CondCheckError::<()>::NotYet); + }, + &Duration::from_millis(50), + &Duration::from_secs(60), + ) + .await + .unwrap(); + + info!(log, "found saga in a different Nexus instance"; + "saga_id" => %saga_id, + "found_nexus" => nexus_found.baseurl(), + ); + assert!(nexus_found.baseurl() != new_zone_client.baseurl()); + + // Now, complete the demo saga on whichever instance is running it now. + // `saga_demo_complete` is not synchronous. It just unblocks the saga. + // We'll need to poll a bit to wait for it to finish. + nexus_found + .saga_demo_complete(&demo_saga.demo_saga_id) + .await + .expect("complete demo saga"); + let found = wait_for_condition( + || async { + let sagas = list_sagas(&nexus_found).await.expect("listing sagas"); + debug!(log, "found sagas (last): {:?}", sagas); + let found = sagas.into_iter().find(|s| s.id == saga_id).unwrap(); + if matches!(found.state, SagaState::Succeeded) { + Ok(found) + } else { + Err(CondCheckError::<()>::NotYet) + } + }, + &Duration::from_millis(50), + &Duration::from_secs(30), + ) + .await + .unwrap(); + + assert_eq!(found.id, saga_id); + assert!(matches!(found.state, SagaState::Succeeded)); +} + +async fn list_sagas( + client: &nexus_client::Client, +) -> Result, anyhow::Error> { + client + .saga_list_stream(None, None) + .try_collect::>() + .await + .context("listing sagas") +} diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index d424e08b61..9e69800fed 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -27,6 +27,7 @@ use crate::db::{ error::{public_error_from_diesel, ErrorHandler}, }; use ::oximeter::types::ProducerRegistry; +use anyhow::{anyhow, bail, Context}; use async_bb8_diesel::AsyncRunQueryDsl; use diesel::pg::Pg; use diesel::prelude::*; @@ -207,7 +208,7 @@ impl DataStore { /// Constructs a new Datastore object. /// - /// Only returns if the database schema is compatible with Nexus's known + /// Only returns when the database schema is compatible with Nexus's known /// schema version. pub async fn new( log: &Logger, @@ -241,6 +242,38 @@ impl DataStore { Ok(datastore) } + /// Constructs a new Datastore, failing if the schema version does not match + /// this program's expected version + pub async fn new_failfast( + log: &Logger, + pool: Arc, + ) -> Result { + let datastore = + Self::new_unchecked(log.new(o!("component" => "datastore")), pool) + .map_err(|e| anyhow!("{}", e))?; + const EXPECTED_VERSION: SemverVersion = nexus_db_model::SCHEMA_VERSION; + let (found_version, found_target) = datastore + .database_schema_version() + .await + .context("loading database schema version")?; + + if let Some(found_target) = found_target { + bail!( + "database schema check failed: apparently mid-upgrade \ + (found_target = {found_target})" + ); + } + + if found_version != EXPECTED_VERSION { + bail!( + "database schema check failed: \ + expected {EXPECTED_VERSION}, found {found_version}", + ); + } + + Ok(datastore) + } + pub fn register_producers(&self, registry: &ProducerRegistry) { registry .register_producer( diff --git a/nexus/reconfigurator/preparation/src/lib.rs b/nexus/reconfigurator/preparation/src/lib.rs index fc0e4638f8..a637241aaa 100644 --- a/nexus/reconfigurator/preparation/src/lib.rs +++ b/nexus/reconfigurator/preparation/src/lib.rs @@ -35,6 +35,7 @@ use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::Error; +use omicron_common::api::external::InternalContext; use omicron_common::api::external::LookupType; use omicron_common::disk::DiskIdentity; use omicron_common::policy::BOUNDARY_NTP_REDUNDANCY; @@ -72,6 +73,74 @@ pub struct PlanningInputFromDb<'a> { } impl PlanningInputFromDb<'_> { + pub async fn assemble( + opctx: &OpContext, + datastore: &DataStore, + ) -> Result { + opctx.check_complex_operations_allowed()?; + let sled_rows = datastore + .sled_list_all_batched(opctx, SledFilter::Commissioned) + .await + .internal_context("fetching all sleds")?; + let zpool_rows = datastore + .zpool_list_all_external_batched(opctx) + .await + .internal_context("fetching all external zpool rows")?; + let ip_pool_range_rows = { + let (authz_service_ip_pool, _) = datastore + .ip_pools_service_lookup(opctx) + .await + .internal_context("fetching IP services pool")?; + datastore + .ip_pool_list_ranges_batched(opctx, &authz_service_ip_pool) + .await + .internal_context("listing services IP pool ranges")? + }; + let external_ip_rows = datastore + .external_ip_list_service_all_batched(opctx) + .await + .internal_context("fetching service external IPs")?; + let service_nic_rows = datastore + .service_network_interfaces_all_list_batched(opctx) + .await + .internal_context("fetching service NICs")?; + let internal_dns_version = datastore + .dns_group_latest_version(opctx, DnsGroup::Internal) + .await + .internal_context("fetching internal DNS version")? + .version; + let external_dns_version = datastore + .dns_group_latest_version(opctx, DnsGroup::External) + .await + .internal_context("fetching external DNS version")? + .version; + let cockroachdb_settings = datastore + .cockroachdb_settings(opctx) + .await + .internal_context("fetching cockroachdb settings")?; + + let planning_input = PlanningInputFromDb { + sled_rows: &sled_rows, + zpool_rows: &zpool_rows, + ip_pool_range_rows: &ip_pool_range_rows, + target_boundary_ntp_zone_count: BOUNDARY_NTP_REDUNDANCY, + target_nexus_zone_count: NEXUS_REDUNDANCY, + target_cockroachdb_zone_count: COCKROACHDB_REDUNDANCY, + target_cockroachdb_cluster_version: + CockroachDbClusterVersion::POLICY, + external_ip_rows: &external_ip_rows, + service_nic_rows: &service_nic_rows, + log: &opctx.log, + internal_dns_version, + external_dns_version, + cockroachdb_settings: &cockroachdb_settings, + } + .build() + .internal_context("assembling planning_input")?; + + Ok(planning_input) + } + pub fn build(&self) -> Result { let service_ip_pool_ranges = self.ip_pool_range_rows.iter().map(IpRange::from).collect(); @@ -195,65 +264,8 @@ pub async fn reconfigurator_state_load( datastore: &DataStore, ) -> Result { opctx.check_complex_operations_allowed()?; - let sled_rows = datastore - .sled_list_all_batched(opctx, SledFilter::Commissioned) - .await - .context("listing sleds")?; - let zpool_rows = datastore - .zpool_list_all_external_batched(opctx) - .await - .context("listing zpools")?; - let ip_pool_range_rows = { - let (authz_service_ip_pool, _) = datastore - .ip_pools_service_lookup(opctx) - .await - .context("fetching IP services pool")?; - datastore - .ip_pool_list_ranges_batched(opctx, &authz_service_ip_pool) - .await - .context("listing services IP pool ranges")? - }; - let external_ip_rows = datastore - .external_ip_list_service_all_batched(opctx) - .await - .context("fetching service external IPs")?; - let service_nic_rows = datastore - .service_network_interfaces_all_list_batched(opctx) - .await - .context("fetching service NICs")?; - let internal_dns_version = datastore - .dns_group_latest_version(opctx, DnsGroup::Internal) - .await - .context("fetching internal DNS version")? - .version; - let external_dns_version = datastore - .dns_group_latest_version(opctx, DnsGroup::External) - .await - .context("fetching external DNS version")? - .version; - let cockroachdb_settings = datastore - .cockroachdb_settings(opctx) - .await - .context("fetching cockroachdb settings")?; - - let planning_input = PlanningInputFromDb { - sled_rows: &sled_rows, - zpool_rows: &zpool_rows, - ip_pool_range_rows: &ip_pool_range_rows, - target_boundary_ntp_zone_count: BOUNDARY_NTP_REDUNDANCY, - target_nexus_zone_count: NEXUS_REDUNDANCY, - target_cockroachdb_zone_count: COCKROACHDB_REDUNDANCY, - target_cockroachdb_cluster_version: CockroachDbClusterVersion::POLICY, - external_ip_rows: &external_ip_rows, - service_nic_rows: &service_nic_rows, - log: &opctx.log, - internal_dns_version, - external_dns_version, - cockroachdb_settings: &cockroachdb_settings, - } - .build() - .context("assembling planning_input")?; - + let planning_input = + PlanningInputFromDb::assemble(opctx, datastore).await?; let collection_ids = datastore .inventory_collections() .await diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index 50ae332d3f..79e7a93e6d 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -4,7 +4,6 @@ //! Configuration of the deployment system -use nexus_db_model::DnsGroup; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_reconfigurator_planning::planner::Planner; @@ -13,9 +12,7 @@ use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintMetadata; use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::BlueprintTargetSet; -use nexus_types::deployment::CockroachDbClusterVersion; use nexus_types::deployment::PlanningInput; -use nexus_types::deployment::SledFilter; use nexus_types::inventory::Collection; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; @@ -25,9 +22,6 @@ use omicron_common::api::external::InternalContext; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; -use omicron_common::policy::BOUNDARY_NTP_REDUNDANCY; -use omicron_common::policy::COCKROACHDB_REDUNDANCY; -use omicron_common::policy::NEXUS_REDUNDANCY; use slog_error_chain::InlineErrorChain; use uuid::Uuid; @@ -132,61 +126,8 @@ impl super::Nexus { ) -> Result { let creator = self.id.to_string(); let datastore = self.datastore(); - - let sled_rows = datastore - .sled_list_all_batched(opctx, SledFilter::Commissioned) - .await?; - let zpool_rows = - datastore.zpool_list_all_external_batched(opctx).await?; - let ip_pool_range_rows = { - let (authz_service_ip_pool, _) = - datastore.ip_pools_service_lookup(opctx).await?; - datastore - .ip_pool_list_ranges_batched(opctx, &authz_service_ip_pool) - .await? - }; - let external_ip_rows = - datastore.external_ip_list_service_all_batched(opctx).await?; - let service_nic_rows = datastore - .service_network_interfaces_all_list_batched(opctx) - .await?; - - let internal_dns_version = datastore - .dns_group_latest_version(opctx, DnsGroup::Internal) - .await - .internal_context( - "fetching internal DNS version for blueprint planning", - )? - .version; - let external_dns_version = datastore - .dns_group_latest_version(opctx, DnsGroup::External) - .await - .internal_context( - "fetching external DNS version for blueprint planning", - )? - .version; - let cockroachdb_settings = - datastore.cockroachdb_settings(opctx).await.internal_context( - "fetching cockroachdb settings for blueprint planning", - )?; - - let planning_input = PlanningInputFromDb { - sled_rows: &sled_rows, - zpool_rows: &zpool_rows, - ip_pool_range_rows: &ip_pool_range_rows, - external_ip_rows: &external_ip_rows, - service_nic_rows: &service_nic_rows, - target_boundary_ntp_zone_count: BOUNDARY_NTP_REDUNDANCY, - target_nexus_zone_count: NEXUS_REDUNDANCY, - target_cockroachdb_zone_count: COCKROACHDB_REDUNDANCY, - target_cockroachdb_cluster_version: - CockroachDbClusterVersion::POLICY, - log: &opctx.log, - internal_dns_version, - external_dns_version, - cockroachdb_settings: &cockroachdb_settings, - } - .build()?; + let planning_input = + PlanningInputFromDb::assemble(opctx, datastore).await?; // The choice of which inventory collection to use here is not // necessarily trivial. Inventory collections may be incomplete due to From f89e1c3ff0b8944756ec56a4920fb441ca13a725 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 28 Aug 2024 02:22:07 +0000 Subject: [PATCH 2/5] Bump diesel from 2.2.2 to 2.2.3 (#6424) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index eaabc31493..901d9c052a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1950,9 +1950,9 @@ checksum = "a7993efb860416547839c115490d4951c6d0f8ec04a3594d9dd99d50ed7ec170" [[package]] name = "diesel" -version = "2.2.2" +version = "2.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf97ee7261bb708fa3402fa9c17a54b70e90e3cb98afb3dc8999d5512cb03f94" +checksum = "65e13bab2796f412722112327f3e575601a3e9cdcbe426f0d30dbf43f3f5dc71" dependencies = [ "bitflags 2.6.0", "byteorder", diff --git a/Cargo.toml b/Cargo.toml index c8df0dc17b..f44638d1d6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -335,7 +335,7 @@ debug-ignore = "1.0.5" derive_more = "0.99.18" derive-where = "1.2.7" # Having the i-implement-... feature here makes diesel go away from the workspace-hack -diesel = { version = "2.2.2", features = ["i-implement-a-third-party-backend-and-opt-into-breaking-changes", "postgres", "r2d2", "chrono", "serde_json", "network-address", "uuid"] } +diesel = { version = "2.2.3", features = ["i-implement-a-third-party-backend-and-opt-into-breaking-changes", "postgres", "r2d2", "chrono", "serde_json", "network-address", "uuid"] } diesel-dtrace = { git = "https://github.com/oxidecomputer/diesel-dtrace", branch = "main" } dns-server = { path = "dns-server" } dns-server-api = { path = "dns-server-api" } From 57b0900f6a41896a85a5c4036df7a18c0141dc7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karen=20C=C3=A1rcamo?= Date: Wed, 28 Aug 2024 15:45:44 +1200 Subject: [PATCH 3/5] [reconfigurator] ClickHouse XML generation types (#6412) ## Overview This commit adds a library to generate ClickHouse replica server and keeper configuration files. A lot of the code in the `clickhouse-admin/types` directory has been copied over from [clickward](https://github.com/oxidecomputer/clickward), but there are several additions and modifications: - New `new()` and `default()` methods that set default Oxide values. - File generation is per node, as opposed to all files generated in a single directory like clickward. ## Usage To generate a replica server configuration file: ```rust let keepers = vec![ KeeperNodeConfig::new("ff::01".to_string()), KeeperNodeConfig::new("ff::02".to_string()), KeeperNodeConfig::new("ff::03".to_string()), ]; let servers = vec![ ServerNodeConfig::new("ff::08".to_string()), ServerNodeConfig::new("ff::09".to_string()), ]; let config = ClickhouseServerConfig::new( Utf8PathBuf::from(config_dir.path()), ServerId(1), Utf8PathBuf::from_str("./").unwrap(), Ipv6Addr::from_str("ff::08").unwrap(), keepers, servers, ); config.generate_xml_file().unwrap(); ``` To generate a keeper configuration file: ```rust let keepers = vec![ RaftServerConfig::new(KeeperId(1), "ff::01".to_string()), RaftServerConfig::new(KeeperId(2), "ff::02".to_string()), RaftServerConfig::new(KeeperId(3), "ff::03".to_string()), ]; let config = ClickhouseKeeperConfig::new( Utf8PathBuf::from(config_dir.path()), KeeperId(1), keepers, Utf8PathBuf::from_str("./").unwrap(), Ipv6Addr::from_str("ff::08").unwrap(), ); config.generate_xml_file().unwrap(); ``` ## Purpose As part of the work to roll out replicated ClickHouse, we'll need to dynamically generate the node configuration files via the `clickhouse-admin` service. This commit is part of the work necessary to do so. Related: https://github.com/oxidecomputer/omicron/issues/5999 , https://github.com/oxidecomputer/omicron/issues/3824 --- Cargo.lock | 17 + Cargo.toml | 2 + clickhouse-admin/Cargo.toml | 1 + clickhouse-admin/types/Cargo.toml | 20 + clickhouse-admin/types/src/config.rs | 515 ++++++++++++++++++ clickhouse-admin/types/src/lib.rs | 242 ++++++++ .../types/testutils/keeper-config.xml | 47 ++ .../types/testutils/replica-server-config.xml | 106 ++++ common/src/address.rs | 7 +- nexus/src/app/oximeter.rs | 4 +- .../src/bin/clickhouse-schema-updater.rs | 4 +- sled-agent/src/rack_setup/plan/service.rs | 6 +- sled-agent/src/services.rs | 34 +- 13 files changed, 982 insertions(+), 23 deletions(-) create mode 100644 clickhouse-admin/types/Cargo.toml create mode 100644 clickhouse-admin/types/src/config.rs create mode 100644 clickhouse-admin/types/src/lib.rs create mode 100644 clickhouse-admin/types/testutils/keeper-config.xml create mode 100644 clickhouse-admin/types/testutils/replica-server-config.xml diff --git a/Cargo.lock b/Cargo.lock index 901d9c052a..f1ae026d56 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1127,6 +1127,22 @@ dependencies = [ "serde", ] +[[package]] +name = "clickhouse-admin-types" +version = "0.1.0" +dependencies = [ + "anyhow", + "camino", + "camino-tempfile", + "derive_more", + "expectorate", + "omicron-common", + "omicron-workspace-hack", + "schemars", + "serde", + "serde_json", +] + [[package]] name = "clickward" version = "0.1.0" @@ -5850,6 +5866,7 @@ dependencies = [ "chrono", "clap", "clickhouse-admin-api", + "clickhouse-admin-types", "dropshot 0.10.2-dev", "expectorate", "http 0.2.12", diff --git a/Cargo.toml b/Cargo.toml index f44638d1d6..8c24784132 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -122,6 +122,7 @@ default-members = [ "certificates", "clickhouse-admin", "clickhouse-admin/api", + "clickhouse-admin/types", "clients/bootstrap-agent-client", "clients/cockroach-admin-client", "clients/ddm-admin-client", @@ -311,6 +312,7 @@ chrono = { version = "0.4", features = [ "serde" ] } ciborium = "0.2.2" clap = { version = "4.5", features = ["cargo", "derive", "env", "wrap_help"] } clickhouse-admin-api = { path = "clickhouse-admin/api" } +clickhouse-admin-types = { path = "clickhouse-admin/types" } clickward = { git = "https://github.com/oxidecomputer/clickward", rev = "ceec762e6a87d2a22bf56792a3025e145caa095e" } cockroach-admin-api = { path = "cockroach-admin/api" } cockroach-admin-client = { path = "clients/cockroach-admin-client" } diff --git a/clickhouse-admin/Cargo.toml b/clickhouse-admin/Cargo.toml index 033836dfe0..270f779d7e 100644 --- a/clickhouse-admin/Cargo.toml +++ b/clickhouse-admin/Cargo.toml @@ -10,6 +10,7 @@ camino.workspace = true chrono.workspace = true clap.workspace = true clickhouse-admin-api.workspace = true +clickhouse-admin-types.workspace = true dropshot.workspace = true http.workspace = true illumos-utils.workspace = true diff --git a/clickhouse-admin/types/Cargo.toml b/clickhouse-admin/types/Cargo.toml new file mode 100644 index 0000000000..a90a7c2e39 --- /dev/null +++ b/clickhouse-admin/types/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "clickhouse-admin-types" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +camino.workspace = true +camino-tempfile.workspace = true +derive_more.workspace = true +omicron-common.workspace = true +omicron-workspace-hack.workspace = true +schemars.workspace = true +serde.workspace = true +serde_json.workspace = true +expectorate.workspace = true diff --git a/clickhouse-admin/types/src/config.rs b/clickhouse-admin/types/src/config.rs new file mode 100644 index 0000000000..3337d733a9 --- /dev/null +++ b/clickhouse-admin/types/src/config.rs @@ -0,0 +1,515 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::{KeeperId, ServerId, OXIMETER_CLUSTER}; +use camino::Utf8PathBuf; +use omicron_common::address::{ + CLICKHOUSE_HTTP_PORT, CLICKHOUSE_INTERSERVER_PORT, + CLICKHOUSE_KEEPER_RAFT_PORT, CLICKHOUSE_KEEPER_TCP_PORT, + CLICKHOUSE_TCP_PORT, +}; +use schemars::{ + gen::SchemaGenerator, + schema::{Schema, SchemaObject}, + JsonSchema, +}; +use serde::{Deserialize, Serialize}; +use std::{fmt::Display, net::Ipv6Addr}; + +// Used for schemars to be able to be used with camino: +// See https://github.com/camino-rs/camino/issues/91#issuecomment-2027908513 +fn path_schema(gen: &mut SchemaGenerator) -> Schema { + let mut schema: SchemaObject = ::json_schema(gen).into(); + schema.format = Some("Utf8PathBuf".to_owned()); + schema.into() +} + +/// Configuration for a ClickHouse replica server +#[derive(Debug, Clone, PartialEq, Eq, JsonSchema, Serialize, Deserialize)] +pub struct ReplicaConfig { + pub logger: LogConfig, + pub macros: Macros, + pub listen_host: Ipv6Addr, + pub http_port: u16, + pub tcp_port: u16, + pub interserver_http_port: u16, + pub remote_servers: RemoteServers, + pub keepers: KeeperConfigsForReplica, + #[schemars(schema_with = "path_schema")] + pub data_path: Utf8PathBuf, +} + +impl ReplicaConfig { + /// A new ClickHouse replica server configuration with default ports and directories + pub fn new( + logger: LogConfig, + macros: Macros, + listen_host: Ipv6Addr, + remote_servers: Vec, + keepers: Vec, + path: Utf8PathBuf, + ) -> Self { + let data_path = path.join("data"); + let remote_servers = RemoteServers::new(remote_servers); + let keepers = KeeperConfigsForReplica::new(keepers); + + Self { + logger, + macros, + listen_host, + http_port: CLICKHOUSE_HTTP_PORT, + tcp_port: CLICKHOUSE_TCP_PORT, + interserver_http_port: CLICKHOUSE_INTERSERVER_PORT, + remote_servers, + keepers, + data_path, + } + } + + pub fn to_xml(&self) -> String { + let ReplicaConfig { + logger, + macros, + listen_host, + http_port, + tcp_port, + interserver_http_port, + remote_servers, + keepers, + data_path, + } = self; + let logger = logger.to_xml(); + let cluster = macros.cluster.clone(); + let id = macros.replica; + let macros = macros.to_xml(); + let keepers = keepers.to_xml(); + let remote_servers = remote_servers.to_xml(); + let user_files_path = data_path.clone().join("user_files"); + let format_schema_path = data_path.clone().join("format_schemas"); + format!( + " + +{logger} + {data_path} + + + + random + + + + + + + + + ::/0 + + default + default + + + + + + + 3600 + 0 + 0 + 0 + 0 + 0 + + + + + {user_files_path} + default + {format_schema_path} + {cluster}_{id} + {listen_host} + {http_port} + {tcp_port} + {interserver_http_port} + {listen_host} + + + + + 604800 + + + 60 + + + 1000 + +{macros} +{remote_servers} +{keepers} + + +" + ) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, JsonSchema, Serialize, Deserialize)] +pub struct Macros { + pub shard: u64, + pub replica: ServerId, + pub cluster: String, +} + +impl Macros { + /// A new macros configuration block with default cluster + pub fn new(replica: ServerId) -> Self { + Self { shard: 1, replica, cluster: OXIMETER_CLUSTER.to_string() } + } + + pub fn to_xml(&self) -> String { + let Macros { shard, replica, cluster } = self; + format!( + " + + {shard} + {replica} + {cluster} + " + ) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, JsonSchema, Serialize, Deserialize)] +pub struct RemoteServers { + pub cluster: String, + pub secret: String, + pub replicas: Vec, +} + +impl RemoteServers { + /// A new remote_servers configuration block with default cluster + pub fn new(replicas: Vec) -> Self { + Self { + cluster: OXIMETER_CLUSTER.to_string(), + // TODO(https://github.com/oxidecomputer/omicron/issues/3823): secret handling TBD + secret: "some-unique-value".to_string(), + replicas, + } + } + + pub fn to_xml(&self) -> String { + let RemoteServers { cluster, secret, replicas } = self; + + let mut s = format!( + " + + <{cluster}> + + {secret} + + true" + ); + + for r in replicas { + let ServerNodeConfig { host, port } = r; + s.push_str(&format!( + " + + {host} + {port} + " + )); + } + + s.push_str(&format!( + " + + + + " + )); + + s + } +} + +#[derive(Debug, Clone, PartialEq, Eq, JsonSchema, Serialize, Deserialize)] +pub struct KeeperConfigsForReplica { + pub nodes: Vec, +} + +impl KeeperConfigsForReplica { + pub fn new(nodes: Vec) -> Self { + Self { nodes } + } + + pub fn to_xml(&self) -> String { + let mut s = String::from(" "); + for node in &self.nodes { + let KeeperNodeConfig { host, port } = node; + + // ClickHouse servers have a small quirk, where when setting the + // keeper hosts as IPv6 addresses in the replica configuration file, + // they must be wrapped in square brackets. + // Otherwise, when running any query, a "Service not found" error + // appears. + // https://github.com/ClickHouse/ClickHouse/blob/a011990fd75628c63c7995c4f15475f1d4125d10/src/Coordination/KeeperStateManager.cpp#L149 + let parsed_host = match host.parse::() { + Ok(_) => format!("[{host}]"), + Err(_) => host.to_string(), + }; + + s.push_str(&format!( + " + + {parsed_host} + {port} + ", + )); + } + s.push_str("\n "); + s + } +} + +#[derive(Debug, Clone, PartialEq, Eq, JsonSchema, Serialize, Deserialize)] +pub struct KeeperNodeConfig { + pub host: String, + pub port: u16, +} + +impl KeeperNodeConfig { + /// A new ClickHouse keeper node configuration with default port + pub fn new(host: String) -> Self { + let port = CLICKHOUSE_KEEPER_TCP_PORT; + Self { host, port } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, JsonSchema, Serialize, Deserialize)] +pub struct ServerNodeConfig { + pub host: String, + pub port: u16, +} + +impl ServerNodeConfig { + /// A new ClickHouse replica node configuration with default port + pub fn new(host: String) -> Self { + let port = CLICKHOUSE_TCP_PORT; + Self { host, port } + } +} + +pub enum NodeType { + Server, + Keeper, +} + +#[derive(Debug, Clone, PartialEq, Eq, JsonSchema, Serialize, Deserialize)] +pub struct LogConfig { + pub level: LogLevel, + #[schemars(schema_with = "path_schema")] + pub log: Utf8PathBuf, + #[schemars(schema_with = "path_schema")] + pub errorlog: Utf8PathBuf, + pub size: u16, + pub count: usize, +} + +impl LogConfig { + /// A new logger configuration with default directories + pub fn new(path: Utf8PathBuf, node_type: NodeType) -> Self { + let prefix = match node_type { + NodeType::Server => "clickhouse", + NodeType::Keeper => "clickhouse-keeper", + }; + + let logs: Utf8PathBuf = path.join("log"); + let log = logs.join(format!("{prefix}.log")); + let errorlog = logs.join(format!("{prefix}.err.log")); + + Self { level: LogLevel::default(), log, errorlog, size: 100, count: 1 } + } + + pub fn to_xml(&self) -> String { + let LogConfig { level, log, errorlog, size, count } = &self; + format!( + " + + {level} + {log} + {errorlog} + {size}M + {count} + +" + ) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, JsonSchema, Serialize, Deserialize)] +pub struct KeeperCoordinationSettings { + pub operation_timeout_ms: u32, + pub session_timeout_ms: u32, + pub raft_logs_level: LogLevel, +} + +impl KeeperCoordinationSettings { + pub fn default() -> Self { + Self { + operation_timeout_ms: 10000, + session_timeout_ms: 30000, + raft_logs_level: LogLevel::Trace, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, JsonSchema, Serialize, Deserialize)] +pub struct RaftServers { + pub servers: Vec, +} + +impl RaftServers { + pub fn new(servers: Vec) -> Self { + Self { servers } + } + pub fn to_xml(&self) -> String { + let mut s = String::new(); + for server in &self.servers { + let RaftServerConfig { id, hostname, port } = server; + s.push_str(&format!( + " + + {id} + {hostname} + {port} + + " + )); + } + + s + } +} + +#[derive(Debug, Clone, PartialEq, Eq, JsonSchema, Serialize, Deserialize)] +pub struct RaftServerConfig { + pub id: KeeperId, + pub hostname: String, + pub port: u16, +} + +impl RaftServerConfig { + pub fn new(id: KeeperId, hostname: String) -> Self { + Self { id, hostname, port: CLICKHOUSE_KEEPER_RAFT_PORT } + } +} + +/// Configuration for a ClickHouse keeper +#[derive(Debug, Clone, PartialEq, Eq, JsonSchema, Serialize, Deserialize)] +pub struct KeeperConfig { + pub logger: LogConfig, + pub listen_host: Ipv6Addr, + pub tcp_port: u16, + pub server_id: KeeperId, + #[schemars(schema_with = "path_schema")] + pub log_storage_path: Utf8PathBuf, + #[schemars(schema_with = "path_schema")] + pub snapshot_storage_path: Utf8PathBuf, + pub coordination_settings: KeeperCoordinationSettings, + pub raft_config: RaftServers, +} + +impl KeeperConfig { + /// A new ClickHouse keeper node configuration with default ports and directories + pub fn new( + logger: LogConfig, + listen_host: Ipv6Addr, + server_id: KeeperId, + datastore_path: Utf8PathBuf, + raft_config: RaftServers, + ) -> Self { + let coordination_path = datastore_path.join("coordination"); + let log_storage_path = coordination_path.join("log"); + let snapshot_storage_path = coordination_path.join("snapshots"); + let coordination_settings = KeeperCoordinationSettings::default(); + Self { + logger, + listen_host, + tcp_port: CLICKHOUSE_KEEPER_TCP_PORT, + server_id, + log_storage_path, + snapshot_storage_path, + coordination_settings, + raft_config, + } + } + + pub fn to_xml(&self) -> String { + let KeeperConfig { + logger, + listen_host, + tcp_port, + server_id, + log_storage_path, + snapshot_storage_path, + coordination_settings, + raft_config, + } = self; + let logger = logger.to_xml(); + let KeeperCoordinationSettings { + operation_timeout_ms, + session_timeout_ms, + raft_logs_level, + } = coordination_settings; + let raft_servers = raft_config.to_xml(); + format!( + " + +{logger} + {listen_host} + + false + {tcp_port} + {server_id} + {log_storage_path} + {snapshot_storage_path} + + {operation_timeout_ms} + {session_timeout_ms} + {raft_logs_level} + + +{raft_servers} + + + + +" + ) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, JsonSchema, Serialize, Deserialize)] +pub enum LogLevel { + Trace, + Debug, +} + +impl LogLevel { + fn default() -> Self { + LogLevel::Trace + } +} + +impl Display for LogLevel { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let s = match self { + LogLevel::Trace => "trace", + LogLevel::Debug => "debug", + }; + write!(f, "{s}") + } +} diff --git a/clickhouse-admin/types/src/lib.rs b/clickhouse-admin/types/src/lib.rs new file mode 100644 index 0000000000..c9cc076de5 --- /dev/null +++ b/clickhouse-admin/types/src/lib.rs @@ -0,0 +1,242 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use anyhow::Result; +use camino::Utf8PathBuf; +use camino_tempfile::NamedUtf8TempFile; +use derive_more::{Add, AddAssign, Display, From}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use std::fs::rename; +use std::io::Write; +use std::net::Ipv6Addr; + +pub mod config; +use config::*; + +pub const OXIMETER_CLUSTER: &str = "oximeter_cluster"; + +/// A unique ID for a ClickHouse Keeper +#[derive( + Debug, + Clone, + Copy, + Eq, + PartialEq, + Ord, + PartialOrd, + From, + Add, + AddAssign, + Display, + JsonSchema, + Serialize, + Deserialize, +)] +pub struct KeeperId(pub u64); + +/// A unique ID for a Clickhouse Server +#[derive( + Debug, + Clone, + Copy, + Eq, + PartialEq, + Ord, + PartialOrd, + From, + Add, + AddAssign, + Display, + JsonSchema, + Serialize, + Deserialize, +)] +pub struct ServerId(pub u64); + +#[derive(Debug, Clone)] +pub struct ClickhouseServerConfig { + pub config_dir: Utf8PathBuf, + pub id: ServerId, + pub datastore_path: Utf8PathBuf, + pub listen_addr: Ipv6Addr, + pub keepers: Vec, + pub servers: Vec, +} + +impl ClickhouseServerConfig { + pub fn new( + config_dir: Utf8PathBuf, + id: ServerId, + datastore_path: Utf8PathBuf, + listen_addr: Ipv6Addr, + keepers: Vec, + servers: Vec, + ) -> Self { + Self { config_dir, id, datastore_path, listen_addr, keepers, servers } + } + + /// Generate a configuration file for a replica server node + pub fn generate_xml_file(&self) -> Result<()> { + let logger = + LogConfig::new(self.datastore_path.clone(), NodeType::Server); + let macros = Macros::new(self.id); + + let config = ReplicaConfig::new( + logger, + macros, + self.listen_addr, + self.servers.clone(), + self.keepers.clone(), + self.datastore_path.clone(), + ); + + // Writing to a temporary file and then renaming it will ensure we + // don't end up with a partially written file after a crash + let mut f = NamedUtf8TempFile::new()?; + f.write_all(config.to_xml().as_bytes())?; + f.flush()?; + rename(f.path(), self.config_dir.join("replica-server-config.xml"))?; + Ok(()) + } +} + +#[derive(Debug, Clone)] +pub struct ClickhouseKeeperConfig { + pub config_dir: Utf8PathBuf, + pub id: KeeperId, + pub raft_servers: Vec, + pub datastore_path: Utf8PathBuf, + pub listen_addr: Ipv6Addr, +} + +impl ClickhouseKeeperConfig { + pub fn new( + config_dir: Utf8PathBuf, + id: KeeperId, + raft_servers: Vec, + datastore_path: Utf8PathBuf, + listen_addr: Ipv6Addr, + ) -> Self { + ClickhouseKeeperConfig { + config_dir, + id, + raft_servers, + datastore_path, + listen_addr, + } + } + + /// Generate a configuration file for a keeper node + pub fn generate_xml_file(&self) -> Result<()> { + let logger = + LogConfig::new(self.datastore_path.clone(), NodeType::Keeper); + let raft_config = RaftServers::new(self.raft_servers.clone()); + let config = KeeperConfig::new( + logger, + self.listen_addr, + self.id, + self.datastore_path.clone(), + raft_config, + ); + + // Writing to a temporary file and then renaming it will ensure we + // don't end up with a partially written file after a crash + let mut f = NamedUtf8TempFile::new()?; + f.write_all(config.to_xml().as_bytes())?; + f.flush()?; + rename(f.path(), self.config_dir.join("keeper-config.xml"))?; + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use std::{net::Ipv6Addr, str::FromStr}; + + use camino::Utf8PathBuf; + use camino_tempfile::Builder; + + use crate::{ + ClickhouseKeeperConfig, ClickhouseServerConfig, KeeperId, + KeeperNodeConfig, RaftServerConfig, ServerId, ServerNodeConfig, + }; + + #[test] + fn test_generate_keeper_config() { + let config_dir = Builder::new() + .tempdir_in( + Utf8PathBuf::try_from(std::env::temp_dir()).unwrap()) + .expect("Could not create directory for ClickHouse configuration generation test" + ); + + let keepers = vec![ + RaftServerConfig::new(KeeperId(1), "ff::01".to_string()), + RaftServerConfig::new(KeeperId(2), "ff::02".to_string()), + RaftServerConfig::new(KeeperId(3), "ff::03".to_string()), + ]; + + let config = ClickhouseKeeperConfig::new( + Utf8PathBuf::from(config_dir.path()), + KeeperId(1), + keepers, + Utf8PathBuf::from_str("./").unwrap(), + Ipv6Addr::from_str("ff::08").unwrap(), + ); + + config.generate_xml_file().unwrap(); + + let expected_file = Utf8PathBuf::from_str("./testutils") + .unwrap() + .join("keeper-config.xml"); + let generated_file = + Utf8PathBuf::from(config_dir.path()).join("keeper-config.xml"); + let generated_content = std::fs::read_to_string(generated_file) + .expect("Failed to read from generated ClickHouse keeper file"); + + expectorate::assert_contents(expected_file, &generated_content); + } + + #[test] + fn test_generate_replica_config() { + let config_dir = Builder::new() + .tempdir_in( + Utf8PathBuf::try_from(std::env::temp_dir()).unwrap()) + .expect("Could not create directory for ClickHouse configuration generation test" + ); + + let keepers = vec![ + KeeperNodeConfig::new("ff::01".to_string()), + KeeperNodeConfig::new("127.0.0.1".to_string()), + KeeperNodeConfig::new("we.dont.want.brackets.com".to_string()), + ]; + + let servers = vec![ + ServerNodeConfig::new("ff::08".to_string()), + ServerNodeConfig::new("ff::09".to_string()), + ]; + + let config = ClickhouseServerConfig::new( + Utf8PathBuf::from(config_dir.path()), + ServerId(1), + Utf8PathBuf::from_str("./").unwrap(), + Ipv6Addr::from_str("ff::08").unwrap(), + keepers, + servers, + ); + + config.generate_xml_file().unwrap(); + + let expected_file = Utf8PathBuf::from_str("./testutils") + .unwrap() + .join("replica-server-config.xml"); + let generated_file = Utf8PathBuf::from(config_dir.path()) + .join("replica-server-config.xml"); + let generated_content = std::fs::read_to_string(generated_file).expect( + "Failed to read from generated ClickHouse replica server file", + ); + + expectorate::assert_contents(expected_file, &generated_content); + } +} diff --git a/clickhouse-admin/types/testutils/keeper-config.xml b/clickhouse-admin/types/testutils/keeper-config.xml new file mode 100644 index 0000000000..e05cf9d954 --- /dev/null +++ b/clickhouse-admin/types/testutils/keeper-config.xml @@ -0,0 +1,47 @@ + + + + + trace + ./log/clickhouse-keeper.log + ./log/clickhouse-keeper.err.log + 100M + 1 + + + ff::8 + + false + 9181 + 1 + ./coordination/log + ./coordination/snapshots + + 10000 + 30000 + trace + + + + + 1 + ff::01 + 9234 + + + + 2 + ff::02 + 9234 + + + + 3 + ff::03 + 9234 + + + + + + diff --git a/clickhouse-admin/types/testutils/replica-server-config.xml b/clickhouse-admin/types/testutils/replica-server-config.xml new file mode 100644 index 0000000000..056fd2cc1c --- /dev/null +++ b/clickhouse-admin/types/testutils/replica-server-config.xml @@ -0,0 +1,106 @@ + + + + + trace + ./log/clickhouse.log + ./log/clickhouse.err.log + 100M + 1 + + + ./data + + + + random + + + + + + + + + ::/0 + + default + default + + + + + + + 3600 + 0 + 0 + 0 + 0 + 0 + + + + + ./data/user_files + default + ./data/format_schemas + oximeter_cluster_1 + ff::8 + 8123 + 9000 + 9009 + ff::8 + + + + + 604800 + + + 60 + + + 1000 + + + + 1 + 1 + oximeter_cluster + + + + + + some-unique-value + + true + + ff::08 + 9000 + + + ff::09 + 9000 + + + + + + + + [ff::01] + 9181 + + + 127.0.0.1 + 9181 + + + we.dont.want.brackets.com + 9181 + + + + diff --git a/common/src/address.rs b/common/src/address.rs index c23f5c41ed..4e8d5f9239 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -33,8 +33,11 @@ pub const SLED_AGENT_PORT: u16 = 12345; pub const COCKROACH_PORT: u16 = 32221; pub const COCKROACH_ADMIN_PORT: u16 = 32222; pub const CRUCIBLE_PORT: u16 = 32345; -pub const CLICKHOUSE_PORT: u16 = 8123; -pub const CLICKHOUSE_KEEPER_PORT: u16 = 9181; +pub const CLICKHOUSE_HTTP_PORT: u16 = 8123; +pub const CLICKHOUSE_INTERSERVER_PORT: u16 = 9009; +pub const CLICKHOUSE_TCP_PORT: u16 = 9000; +pub const CLICKHOUSE_KEEPER_TCP_PORT: u16 = 9181; +pub const CLICKHOUSE_KEEPER_RAFT_PORT: u16 = 9234; pub const CLICKHOUSE_ADMIN_PORT: u16 = 8888; pub const OXIMETER_PORT: u16 = 12223; pub const DENDRITE_PORT: u16 = 12224; diff --git a/nexus/src/app/oximeter.rs b/nexus/src/app/oximeter.rs index 9039d1b8fa..0c7ec3a016 100644 --- a/nexus/src/app/oximeter.rs +++ b/nexus/src/app/oximeter.rs @@ -12,7 +12,7 @@ use internal_dns::ServiceName; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; use nexus_db_queries::db::DataStore; -use omicron_common::address::CLICKHOUSE_PORT; +use omicron_common::address::CLICKHOUSE_HTTP_PORT; use omicron_common::api::external::Error; use omicron_common::api::external::{DataPageParams, ListResultVec}; use omicron_common::api::internal::nexus::{self, ProducerEndpoint}; @@ -65,7 +65,7 @@ impl LazyTimeseriesClient { ClientSource::FromIp { address } => *address, ClientSource::FromDns { resolver } => SocketAddr::new( resolver.lookup_ip(ServiceName::Clickhouse).await?, - CLICKHOUSE_PORT, + CLICKHOUSE_HTTP_PORT, ), }; diff --git a/oximeter/collector/src/bin/clickhouse-schema-updater.rs b/oximeter/collector/src/bin/clickhouse-schema-updater.rs index 20780c37e0..8e432e87c6 100644 --- a/oximeter/collector/src/bin/clickhouse-schema-updater.rs +++ b/oximeter/collector/src/bin/clickhouse-schema-updater.rs @@ -11,7 +11,7 @@ use anyhow::Context; use camino::Utf8PathBuf; use clap::Parser; use clap::Subcommand; -use omicron_common::address::CLICKHOUSE_PORT; +use omicron_common::address::CLICKHOUSE_HTTP_PORT; use oximeter_db::model::OXIMETER_VERSION; use oximeter_db::Client; use slog::Drain; @@ -24,7 +24,7 @@ use std::net::SocketAddrV6; const DEFAULT_HOST: SocketAddr = SocketAddr::V6(SocketAddrV6::new( Ipv6Addr::LOCALHOST, - CLICKHOUSE_PORT, + CLICKHOUSE_HTTP_PORT, 0, 0, )); diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index a376096a87..e0959b0219 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -705,7 +705,7 @@ impl Plan { }; let id = OmicronZoneUuid::new_v4(); let ip = sled.addr_alloc.next().expect("Not enough addrs"); - let port = omicron_common::address::CLICKHOUSE_PORT; + let port = omicron_common::address::CLICKHOUSE_HTTP_PORT; let address = SocketAddrV6::new(ip, port, 0, 0); dns_builder .host_zone_with_one_backend( @@ -748,7 +748,7 @@ impl Plan { let ip = sled.addr_alloc.next().expect("Not enough addrs"); // TODO: This may need to be a different port if/when to have single node // and replicated running side by side as per stage 1 of RFD 468. - let port = omicron_common::address::CLICKHOUSE_PORT; + let port = omicron_common::address::CLICKHOUSE_HTTP_PORT; let address = SocketAddrV6::new(ip, port, 0, 0); dns_builder .host_zone_with_one_backend( @@ -789,7 +789,7 @@ impl Plan { }; let id = OmicronZoneUuid::new_v4(); let ip = sled.addr_alloc.next().expect("Not enough addrs"); - let port = omicron_common::address::CLICKHOUSE_KEEPER_PORT; + let port = omicron_common::address::CLICKHOUSE_KEEPER_TCP_PORT; let address = SocketAddrV6::new(ip, port, 0, 0); dns_builder .host_zone_with_one_backend( diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 22cbb62f70..d6909174ee 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -66,8 +66,8 @@ use nexus_sled_agent_shared::inventory::{ OmicronZoneConfig, OmicronZoneType, OmicronZonesConfig, ZoneKind, }; use omicron_common::address::CLICKHOUSE_ADMIN_PORT; -use omicron_common::address::CLICKHOUSE_KEEPER_PORT; -use omicron_common::address::CLICKHOUSE_PORT; +use omicron_common::address::CLICKHOUSE_HTTP_PORT; +use omicron_common::address::CLICKHOUSE_KEEPER_TCP_PORT; use omicron_common::address::COCKROACH_PORT; use omicron_common::address::CRUCIBLE_PANTRY_PORT; use omicron_common::address::CRUCIBLE_PORT; @@ -1550,7 +1550,7 @@ impl ServiceManager { }; let listen_addr = *underlay_address; - let listen_port = &CLICKHOUSE_PORT.to_string(); + let listen_port = &CLICKHOUSE_HTTP_PORT.to_string(); let nw_setup_service = Self::zone_network_setup_install( Some(&info.underlay_address), @@ -1574,9 +1574,11 @@ impl ServiceManager { .add_property_group(config), ); - let ch_address = - SocketAddr::new(IpAddr::V6(listen_addr), CLICKHOUSE_PORT) - .to_string(); + let ch_address = SocketAddr::new( + IpAddr::V6(listen_addr), + CLICKHOUSE_HTTP_PORT, + ) + .to_string(); let admin_address = SocketAddr::new( IpAddr::V6(listen_addr), @@ -1628,7 +1630,7 @@ impl ServiceManager { }; let listen_addr = *underlay_address; - let listen_port = CLICKHOUSE_PORT.to_string(); + let listen_port = CLICKHOUSE_HTTP_PORT.to_string(); let nw_setup_service = Self::zone_network_setup_install( Some(&info.underlay_address), @@ -1653,9 +1655,11 @@ impl ServiceManager { .add_property_group(config), ); - let ch_address = - SocketAddr::new(IpAddr::V6(listen_addr), CLICKHOUSE_PORT) - .to_string(); + let ch_address = SocketAddr::new( + IpAddr::V6(listen_addr), + CLICKHOUSE_HTTP_PORT, + ) + .to_string(); let admin_address = SocketAddr::new( IpAddr::V6(listen_addr), @@ -1710,7 +1714,7 @@ impl ServiceManager { }; let listen_addr = *underlay_address; - let listen_port = &CLICKHOUSE_KEEPER_PORT.to_string(); + let listen_port = &CLICKHOUSE_KEEPER_TCP_PORT.to_string(); let nw_setup_service = Self::zone_network_setup_install( Some(&info.underlay_address), @@ -1735,9 +1739,11 @@ impl ServiceManager { .add_property_group(config), ); - let ch_address = - SocketAddr::new(IpAddr::V6(listen_addr), CLICKHOUSE_PORT) - .to_string(); + let ch_address = SocketAddr::new( + IpAddr::V6(listen_addr), + CLICKHOUSE_HTTP_PORT, + ) + .to_string(); let admin_address = SocketAddr::new( IpAddr::V6(listen_addr), From c3c5f84b5836116f4d7fd01656c660d24e5d7b96 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 28 Aug 2024 04:29:39 +0000 Subject: [PATCH 4/5] Update taiki-e/install-action digest to 0b73cec (#6463) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`37129d5` -> `0b73cec`](https://togithub.com/taiki-e/install-action/compare/37129d5...0b73cec) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 63752880d6..36c953a097 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@37129d5de13e9122cce55a7a5e7e49981cef514c # v2 + uses: taiki-e/install-action@0b73cec6bfb20724b64cae80024f8fa52195c902 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From 648507dcdb78cd28af3590c405b63d46708d6099 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 28 Aug 2024 11:27:29 -0700 Subject: [PATCH 5/5] [sled agent] API to manage datasets explicitly (#6144) This PR exposes an API from the Sled Agent which allows Nexus to configure datasets independently from Zones. Here's an example subset of `zfs list -o name` on a deployed system, with some annotations in-line ```bash # This is the pool of an arbitrary U.2 oxp_e12f29b8-1ab8-431e-bc96-1c1298947980 # Crucible has a dataset that isn't encrypted at the ZFS layer, because it's encrypted internally... oxp_e12f29b8-1ab8-431e-bc96-1c1298947980/crucible # ... and it contains a lot of region datasets. oxp_e12f29b8-1ab8-431e-bc96-1c1298947980/crucible/regions/... # We have a dataset which uses a trust-quorum-derived encryption key. oxp_e12f29b8-1ab8-431e-bc96-1c1298947980/crypt # Durable datasets (e.g. Cockroach's) can be stored in here. oxp_e12f29b8-1ab8-431e-bc96-1c1298947980/crypt/cockroachdb # The "debug" dataset has been historically created by + managed by the Sled Agent. oxp_e12f29b8-1ab8-431e-bc96-1c1298947980/crypt/debug # Transient zone filesystems also exist here, and are encrypted. oxp_e12f29b8-1ab8-431e-bc96-1c1298947980/crypt/zone oxp_e12f29b8-1ab8-431e-bc96-1c1298947980/crypt/zone/oxz_cockroachdb_8bbea076-ff60-4330-8302-383e18140ef3 oxp_e12f29b8-1ab8-431e-bc96-1c1298947980/crypt/zone/oxz_crucible_a232eba2-e94f-4592-a5a6-ec23f9be3296 ``` ## History Prior to this PR, the sled agent exposed no interfaces to **explicitly** manage datasets on their own. Datasets could be created one of two ways: 1. Created and managed by the sled agent, without telling Nexus. See: the `debug` dataset. 2. Created in response to requests from Nexus to create zones. See: `crucible`, `cockroachdb`, and the `zone` filesystems above. These APIs did not provide a significant amount of control over dataset usage, and provided no mechanism for setting quotas and reservations. ## This PR - Expands Nexus' notion of "dataset kind" to include the following variants: - `zone_root`, for the `crypt/zone` dataset, - `zone`, for any dataset within `crypt/zone` (e.g., `crypt/zone/oxz_cockroachdb_8bbea076-ff60-4330-8302-383e18140ef3`). - `debug` for the `crypt/debug` dataset. - Adds two endpoints to Sled Agent: `datasets_put`, and `datasets_get`, for setting a configuration of expected datasets. At the moment, `datasets_put` is purely additive, and does not remove any missing datasets. - This API provides a mechanism for Nexus to manage quotas and reservations, which it will do in the future. This PR is related to https://github.com/oxidecomputer/omicron/pull/6167, which provides additional tooling through the inventory for inspecting dataset state on deployed sleds. Fixes https://github.com/oxidecomputer/omicron/issues/6042, https://github.com/oxidecomputer/omicron/issues/6107 --------- Co-authored-by: Rain --- clients/sled-agent-client/src/lib.rs | 1 + common/src/api/internal/shared.rs | 188 +++++++- common/src/disk.rs | 248 ++++++++++- illumos-utils/src/zfs.rs | 108 +++-- nexus/db-model/src/dataset.rs | 14 +- nexus/db-model/src/dataset_kind.rs | 14 +- nexus/db-model/src/schema.rs | 1 + nexus/db-model/src/schema_versions.rs | 3 +- nexus/db-queries/src/db/datastore/dataset.rs | 9 +- nexus/db-queries/src/db/datastore/mod.rs | 9 +- .../output/region_allocate_distinct_sleds.sql | 5 +- .../output/region_allocate_random_sleds.sql | 5 +- ..._allocate_with_snapshot_distinct_sleds.sql | 5 +- ...on_allocate_with_snapshot_random_sleds.sql | 5 +- .../reconfigurator/execution/src/datasets.rs | 2 +- .../execution/src/omicron_physical_disks.rs | 2 +- .../tasks/decommissioned_disk_cleaner.rs | 2 +- nexus/src/app/rack.rs | 2 +- .../src/app/sagas/region_replacement_start.rs | 2 +- nexus/src/app/sled.rs | 8 +- nexus/src/lib.rs | 7 +- openapi/nexus-internal.json | 35 +- openapi/sled-agent.json | 293 +++++++++++++ .../dataset-kinds-zone-and-debug/up01.sql | 1 + .../dataset-kinds-zone-and-debug/up02.sql | 1 + .../dataset-kinds-zone-and-debug/up03.sql | 1 + .../dataset-kinds-zone-and-debug/up04.sql | 1 + .../dataset-kinds-zone-and-debug/up05.sql | 4 + schema/crdb/dbinit.sql | 15 +- schema/omicron-datasets.json | 226 ++++++++++ sled-agent/api/src/lib.rs | 24 +- sled-agent/src/backing_fs.rs | 12 +- sled-agent/src/http_entrypoints.rs | 20 +- sled-agent/src/params.rs | 17 +- sled-agent/src/rack_setup/plan/service.rs | 21 +- sled-agent/src/services.rs | 7 +- sled-agent/src/sim/http_entrypoints.rs | 19 + sled-agent/src/sim/sled_agent.rs | 17 +- sled-agent/src/sim/storage.rs | 44 ++ sled-agent/src/sled_agent.rs | 30 +- sled-storage/src/dataset.rs | 166 +------- sled-storage/src/error.rs | 12 +- sled-storage/src/manager.rs | 400 +++++++++++++++++- 43 files changed, 1706 insertions(+), 300 deletions(-) create mode 100644 schema/crdb/dataset-kinds-zone-and-debug/up01.sql create mode 100644 schema/crdb/dataset-kinds-zone-and-debug/up02.sql create mode 100644 schema/crdb/dataset-kinds-zone-and-debug/up03.sql create mode 100644 schema/crdb/dataset-kinds-zone-and-debug/up04.sql create mode 100644 schema/crdb/dataset-kinds-zone-and-debug/up05.sql create mode 100644 schema/omicron-datasets.json diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index b14cf5a96f..be19659c69 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -43,6 +43,7 @@ progenitor::generate_api!( replace = { Baseboard = nexus_sled_agent_shared::inventory::Baseboard, ByteCount = omicron_common::api::external::ByteCount, + DatasetKind = omicron_common::api::internal::shared::DatasetKind, DiskIdentity = omicron_common::disk::DiskIdentity, DiskVariant = omicron_common::disk::DiskVariant, Generation = omicron_common::api::external::Generation, diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index 5945efe16d..4826292863 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -10,13 +10,14 @@ use crate::{ }; use oxnet::{IpNet, Ipv4Net, Ipv6Net}; use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; +use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; use std::{ collections::{HashMap, HashSet}, fmt, net::{IpAddr, Ipv4Addr, Ipv6Addr}, str::FromStr, }; +use strum::EnumCount; use uuid::Uuid; use super::nexus::HostIdentifier; @@ -837,13 +838,11 @@ pub struct ResolvedVpcRouteSet { } /// Describes the purpose of the dataset. -#[derive( - Debug, Serialize, Deserialize, JsonSchema, Clone, Copy, PartialEq, Eq, -)] -#[serde(rename_all = "snake_case")] +#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash, EnumCount)] pub enum DatasetKind { - Crucible, + // Durable datasets for zones Cockroach, + Crucible, /// Used for single-node clickhouse deployments Clickhouse, /// Used for replicated clickhouse deployments @@ -852,24 +851,153 @@ pub enum DatasetKind { ClickhouseServer, ExternalDns, InternalDns, + + // Zone filesystems + ZoneRoot, + Zone { + name: String, + }, + + // Other datasets + Debug, +} + +impl Serialize for DatasetKind { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_str(&self.to_string()) + } +} + +impl<'de> Deserialize<'de> for DatasetKind { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + s.parse().map_err(de::Error::custom) + } +} + +impl JsonSchema for DatasetKind { + fn schema_name() -> String { + "DatasetKind".to_string() + } + + fn json_schema( + gen: &mut schemars::gen::SchemaGenerator, + ) -> schemars::schema::Schema { + // The schema is a bit more complicated than this -- it's either one of + // the fixed values or a string starting with "zone/" -- but this is + // good enough for now. + let mut schema = ::json_schema(gen).into_object(); + schema.metadata().description = Some( + "The kind of dataset. See the `DatasetKind` enum \ + in omicron-common for possible values." + .to_owned(), + ); + schema.into() + } +} + +impl DatasetKind { + pub fn dataset_should_be_encrypted(&self) -> bool { + match self { + // We encrypt all datasets except Crucible. + // + // Crucible already performs encryption internally, and we + // avoid double-encryption. + DatasetKind::Crucible => false, + _ => true, + } + } + + /// Returns true if this dataset is delegated to a non-global zone. + pub fn zoned(&self) -> bool { + use DatasetKind::*; + match self { + Cockroach | Crucible | Clickhouse | ClickhouseKeeper + | ClickhouseServer | ExternalDns | InternalDns => true, + ZoneRoot | Zone { .. } | Debug => false, + } + } + + /// Returns the zone name, if this is a dataset for a zone filesystem. + /// + /// Otherwise, returns "None". + pub fn zone_name(&self) -> Option<&str> { + if let DatasetKind::Zone { name } = self { + Some(name) + } else { + None + } + } } +// Be cautious updating this implementation: +// +// - It should align with [DatasetKind::FromStr], below +// - The strings here are used here comprise the dataset name, stored durably +// on-disk impl fmt::Display for DatasetKind { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { use DatasetKind::*; let s = match self { Crucible => "crucible", - Cockroach => "cockroach", + Cockroach => "cockroachdb", Clickhouse => "clickhouse", ClickhouseKeeper => "clickhouse_keeper", ClickhouseServer => "clickhouse_server", ExternalDns => "external_dns", InternalDns => "internal_dns", + ZoneRoot => "zone", + Zone { name } => { + write!(f, "zone/{}", name)?; + return Ok(()); + } + Debug => "debug", }; write!(f, "{}", s) } } +#[derive(Debug, thiserror::Error)] +pub enum DatasetKindParseError { + #[error("Dataset unknown: {0}")] + UnknownDataset(String), +} + +impl FromStr for DatasetKind { + type Err = DatasetKindParseError; + + fn from_str(s: &str) -> Result { + use DatasetKind::*; + let kind = match s { + "cockroachdb" => Cockroach, + "crucible" => Crucible, + "clickhouse" => Clickhouse, + "clickhouse_keeper" => ClickhouseKeeper, + "clickhouse_server" => ClickhouseServer, + "external_dns" => ExternalDns, + "internal_dns" => InternalDns, + "zone" => ZoneRoot, + "debug" => Debug, + other => { + if let Some(name) = other.strip_prefix("zone/") { + Zone { name: name.to_string() } + } else { + return Err(DatasetKindParseError::UnknownDataset( + s.to_string(), + )); + } + } + }; + Ok(kind) + } +} + /// Identifiers for a single sled. /// /// This is intended primarily to be used in timeseries, to identify @@ -892,6 +1020,7 @@ pub struct SledIdentifiers { #[cfg(test)] mod tests { + use super::*; use crate::api::internal::shared::AllowedSourceIps; use oxnet::{IpNet, Ipv4Net, Ipv6Net}; use std::net::{Ipv4Addr, Ipv6Addr}; @@ -936,4 +1065,49 @@ mod tests { serde_json::from_str(r#"{"allow":"any"}"#).unwrap(), ); } + + #[test] + fn test_dataset_kind_serialization() { + let kinds = [ + DatasetKind::Cockroach, + DatasetKind::Crucible, + DatasetKind::Clickhouse, + DatasetKind::ClickhouseKeeper, + DatasetKind::ClickhouseServer, + DatasetKind::ExternalDns, + DatasetKind::InternalDns, + DatasetKind::ZoneRoot, + DatasetKind::Zone { name: String::from("myzone") }, + DatasetKind::Debug, + ]; + + assert_eq!(kinds.len(), DatasetKind::COUNT); + + for kind in &kinds { + // To string, from string + let as_str = kind.to_string(); + let from_str = + DatasetKind::from_str(&as_str).unwrap_or_else(|_| { + panic!("Failed to convert {kind} to and from string") + }); + assert_eq!( + *kind, from_str, + "{kind} failed to convert to/from a string" + ); + + // Serialize, deserialize + let ser = serde_json::to_string(&kind) + .unwrap_or_else(|_| panic!("Failed to serialize {kind}")); + let de: DatasetKind = serde_json::from_str(&ser) + .unwrap_or_else(|_| panic!("Failed to deserialize {kind}")); + assert_eq!(*kind, de, "{kind} failed serialization"); + + // Test that serialization is equivalent to stringifying. + assert_eq!( + format!("\"{as_str}\""), + ser, + "{kind} does not match stringification/serialization" + ); + } + } } diff --git a/common/src/disk.rs b/common/src/disk.rs index d8b4c2e0a1..ed0bf8666e 100644 --- a/common/src/disk.rs +++ b/common/src/disk.rs @@ -4,18 +4,23 @@ //! Disk related types shared among crates -use std::fmt; - use anyhow::bail; +use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::ZpoolUuid; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use std::collections::BTreeMap; +use std::fmt; use uuid::Uuid; use crate::{ - api::external::Generation, ledger::Ledgerable, zpool_name::ZpoolKind, + api::external::Generation, + ledger::Ledgerable, + zpool_name::{ZpoolKind, ZpoolName}, }; +pub use crate::api::internal::shared::DatasetKind; + #[derive( Clone, Debug, @@ -72,6 +77,243 @@ impl OmicronPhysicalDisksConfig { } } +#[derive( + Debug, + PartialEq, + Eq, + Hash, + Serialize, + Deserialize, + Clone, + JsonSchema, + PartialOrd, + Ord, +)] +pub struct DatasetName { + // A unique identifier for the Zpool on which the dataset is stored. + pool_name: ZpoolName, + // A name for the dataset within the Zpool. + kind: DatasetKind, +} + +impl DatasetName { + pub fn new(pool_name: ZpoolName, kind: DatasetKind) -> Self { + Self { pool_name, kind } + } + + pub fn pool(&self) -> &ZpoolName { + &self.pool_name + } + + pub fn dataset(&self) -> &DatasetKind { + &self.kind + } + + /// Returns the full name of the dataset, as would be returned from + /// "zfs get" or "zfs list". + /// + /// If this dataset should be encrypted, this automatically adds the + /// "crypt" dataset component. + pub fn full_name(&self) -> String { + // Currently, we encrypt all datasets except Crucible. + // + // Crucible already performs encryption internally, and we + // avoid double-encryption. + if self.kind.dataset_should_be_encrypted() { + self.full_encrypted_name() + } else { + self.full_unencrypted_name() + } + } + + fn full_encrypted_name(&self) -> String { + format!("{}/crypt/{}", self.pool_name, self.kind) + } + + fn full_unencrypted_name(&self) -> String { + format!("{}/{}", self.pool_name, self.kind) + } +} + +#[derive( + Copy, + Clone, + Debug, + Deserialize, + Serialize, + JsonSchema, + PartialEq, + Eq, + Hash, + PartialOrd, + Ord, +)] +pub struct GzipLevel(u8); + +// Fastest compression level +const GZIP_LEVEL_MIN: u8 = 1; + +// Best compression ratio +const GZIP_LEVEL_MAX: u8 = 9; + +impl GzipLevel { + pub const fn new() -> Self { + assert!(N >= GZIP_LEVEL_MIN, "Compression level too small"); + assert!(N <= GZIP_LEVEL_MAX, "Compression level too large"); + Self(N) + } +} + +#[derive( + Copy, + Clone, + Debug, + Default, + Deserialize, + Serialize, + JsonSchema, + PartialEq, + Eq, + Hash, + PartialOrd, + Ord, +)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum CompressionAlgorithm { + // Selects a default compression algorithm. This is dependent on both the + // zpool and OS version. + On, + + // Disables compression. + #[default] + Off, + + // Selects the default Gzip compression level. + // + // According to the ZFS docs, this is "gzip-6", but that's a default value, + // which may change with OS updates. + Gzip, + + GzipN { + level: GzipLevel, + }, + Lz4, + Lzjb, + Zle, +} + +impl fmt::Display for CompressionAlgorithm { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use CompressionAlgorithm::*; + let s = match self { + On => "on", + Off => "off", + Gzip => "gzip", + GzipN { level } => { + return write!(f, "gzip-{}", level.0); + } + Lz4 => "lz4", + Lzjb => "lzjb", + Zle => "zle", + }; + write!(f, "{}", s) + } +} + +/// Configuration information necessary to request a single dataset +#[derive( + Clone, + Debug, + Deserialize, + Serialize, + JsonSchema, + PartialEq, + Eq, + Hash, + PartialOrd, + Ord, +)] +pub struct DatasetConfig { + /// The UUID of the dataset being requested + pub id: DatasetUuid, + + /// The dataset's name + pub name: DatasetName, + + /// The compression mode to be used by the dataset + pub compression: CompressionAlgorithm, + + /// The upper bound on the amount of storage used by this dataset + pub quota: Option, + + /// The lower bound on the amount of storage usable by this dataset + pub reservation: Option, +} + +#[derive( + Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +pub struct DatasetsConfig { + /// generation number of this configuration + /// + /// This generation number is owned by the control plane (i.e., RSS or + /// Nexus, depending on whether RSS-to-Nexus handoff has happened). It + /// should not be bumped within Sled Agent. + /// + /// Sled Agent rejects attempts to set the configuration to a generation + /// older than the one it's currently running. + /// + /// Note that "Generation::new()", AKA, the first generation number, + /// is reserved for "no datasets". This is the default configuration + /// for a sled before any requests have been made. + pub generation: Generation, + + pub datasets: BTreeMap, +} + +impl Default for DatasetsConfig { + fn default() -> Self { + Self { generation: Generation::new(), datasets: BTreeMap::new() } + } +} + +impl Ledgerable for DatasetsConfig { + fn is_newer_than(&self, other: &Self) -> bool { + self.generation > other.generation + } + + // No need to do this, the generation number is provided externally. + fn generation_bump(&mut self) {} +} + +/// Identifies how a single dataset management operation may have succeeded or +/// failed. +#[derive(Debug, JsonSchema, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub struct DatasetManagementStatus { + pub dataset_name: DatasetName, + pub err: Option, +} + +/// The result from attempting to manage datasets. +#[derive(Default, Debug, JsonSchema, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +#[must_use = "this `DatasetManagementResult` may contain errors, which should be handled"] +pub struct DatasetsManagementResult { + pub status: Vec, +} + +impl DatasetsManagementResult { + pub fn has_error(&self) -> bool { + for status in &self.status { + if status.err.is_some() { + return true; + } + } + false + } +} + /// Uniquely identifies a disk. #[derive( Debug, diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs index 139e6fe607..5d512677f8 100644 --- a/illumos-utils/src/zfs.rs +++ b/illumos-utils/src/zfs.rs @@ -6,6 +6,7 @@ use crate::{execute, PFEXEC}; use camino::{Utf8Path, Utf8PathBuf}; +use omicron_common::disk::CompressionAlgorithm; use omicron_common::disk::DiskIdentity; use std::fmt; @@ -203,7 +204,8 @@ pub struct EncryptionDetails { #[derive(Debug, Default)] pub struct SizeDetails { pub quota: Option, - pub compression: Option<&'static str>, + pub reservation: Option, + pub compression: CompressionAlgorithm, } #[cfg_attr(any(test, feature = "testing"), mockall::automock, allow(dead_code))] @@ -259,9 +261,27 @@ impl Zfs { Ok(()) } - /// Creates a new ZFS filesystem named `name`, unless one already exists. + /// Creates a new ZFS filesystem unless one already exists. /// - /// Applies an optional quota, provided _in bytes_. + /// - `name`: the full path to the zfs dataset + /// - `mountpoint`: The expected mountpoint of this filesystem. + /// If the filesystem already exists, and is not mounted here, and error is + /// returned. + /// - `zoned`: identifies whether or not this filesystem should be + /// used in a zone. Only used when creating a new filesystem - ignored + /// if the filesystem already exists. + /// - `do_format`: if "false", prevents a new filesystem from being created, + /// and returns an error if it is not found. + /// - `encryption_details`: Ensures a filesystem as an encryption root. + /// For new filesystems, this supplies the key, and all datasets within this + /// root are implicitly encrypted. For existing filesystems, ensures that + /// they are mounted (and that keys are loaded), but does not verify the + /// input details. + /// - `size_details`: If supplied, sets size-related information. These + /// values are set on both new filesystem creation as well as when loading + /// existing filesystems. + /// - `additional_options`: Additional ZFS options, which are only set when + /// creating new filesystems. #[allow(clippy::too_many_arguments)] pub fn ensure_filesystem( name: &str, @@ -274,10 +294,18 @@ impl Zfs { ) -> Result<(), EnsureFilesystemError> { let (exists, mounted) = Self::dataset_exists(name, &mountpoint)?; if exists { - if let Some(SizeDetails { quota, compression }) = size_details { + if let Some(SizeDetails { quota, reservation, compression }) = + size_details + { // apply quota and compression mode (in case they've changed across // sled-agent versions since creation) - Self::apply_properties(name, &mountpoint, quota, compression)?; + Self::apply_properties( + name, + &mountpoint, + quota, + reservation, + compression, + )?; } if encryption_details.is_none() { @@ -351,42 +379,64 @@ impl Zfs { })?; } - if let Some(SizeDetails { quota, compression }) = size_details { + if let Some(SizeDetails { quota, reservation, compression }) = + size_details + { // Apply any quota and compression mode. - Self::apply_properties(name, &mountpoint, quota, compression)?; + Self::apply_properties( + name, + &mountpoint, + quota, + reservation, + compression, + )?; } Ok(()) } + /// Applies the following properties to the filesystem. + /// + /// If any of the options are not supplied, a default "none" or "off" + /// value is supplied. fn apply_properties( name: &str, mountpoint: &Mountpoint, quota: Option, - compression: Option<&'static str>, + reservation: Option, + compression: CompressionAlgorithm, ) -> Result<(), EnsureFilesystemError> { - if let Some(quota) = quota { - if let Err(err) = - Self::set_value(name, "quota", &format!("{quota}")) - { - return Err(EnsureFilesystemError { - name: name.to_string(), - mountpoint: mountpoint.clone(), - // Take the execution error from the SetValueError - err: err.err.into(), - }); - } + let quota = quota + .map(|q| q.to_string()) + .unwrap_or_else(|| String::from("none")); + let reservation = reservation + .map(|r| r.to_string()) + .unwrap_or_else(|| String::from("none")); + let compression = compression.to_string(); + + if let Err(err) = Self::set_value(name, "quota", "a) { + return Err(EnsureFilesystemError { + name: name.to_string(), + mountpoint: mountpoint.clone(), + // Take the execution error from the SetValueError + err: err.err.into(), + }); } - if let Some(compression) = compression { - if let Err(err) = Self::set_value(name, "compression", compression) - { - return Err(EnsureFilesystemError { - name: name.to_string(), - mountpoint: mountpoint.clone(), - // Take the execution error from the SetValueError - err: err.err.into(), - }); - } + if let Err(err) = Self::set_value(name, "reservation", &reservation) { + return Err(EnsureFilesystemError { + name: name.to_string(), + mountpoint: mountpoint.clone(), + // Take the execution error from the SetValueError + err: err.err.into(), + }); + } + if let Err(err) = Self::set_value(name, "compression", &compression) { + return Err(EnsureFilesystemError { + name: name.to_string(), + mountpoint: mountpoint.clone(), + // Take the execution error from the SetValueError + err: err.err.into(), + }); } Ok(()) } diff --git a/nexus/db-model/src/dataset.rs b/nexus/db-model/src/dataset.rs index a9dee990b9..f896f11c5b 100644 --- a/nexus/db-model/src/dataset.rs +++ b/nexus/db-model/src/dataset.rs @@ -8,6 +8,7 @@ use crate::ipv6; use crate::schema::{dataset, region}; use chrono::{DateTime, Utc}; use db_macros::Asset; +use omicron_common::api::internal::shared::DatasetKind as ApiDatasetKind; use serde::{Deserialize, Serialize}; use std::net::{Ipv6Addr, SocketAddrV6}; use uuid::Uuid; @@ -41,6 +42,7 @@ pub struct Dataset { pub kind: DatasetKind, pub size_used: Option, + zone_name: Option, } impl Dataset { @@ -48,12 +50,15 @@ impl Dataset { id: Uuid, pool_id: Uuid, addr: Option, - kind: DatasetKind, + api_kind: ApiDatasetKind, ) -> Self { - let size_used = match kind { - DatasetKind::Crucible => Some(0), - _ => None, + let kind = DatasetKind::from(&api_kind); + let (size_used, zone_name) = match api_kind { + ApiDatasetKind::Crucible => (Some(0), None), + ApiDatasetKind::Zone { name } => (None, Some(name)), + _ => (None, None), }; + Self { identity: DatasetIdentity::new(id), time_deleted: None, @@ -63,6 +68,7 @@ impl Dataset { port: addr.map(|addr| addr.port().into()), kind, size_used, + zone_name, } } diff --git a/nexus/db-model/src/dataset_kind.rs b/nexus/db-model/src/dataset_kind.rs index 4a86efaca1..40ec76ded3 100644 --- a/nexus/db-model/src/dataset_kind.rs +++ b/nexus/db-model/src/dataset_kind.rs @@ -23,10 +23,13 @@ impl_enum_type!( ClickhouseServer => b"clickhouse_server" ExternalDns => b"external_dns" InternalDns => b"internal_dns" + ZoneRoot => b"zone_root" + Zone => b"zone" + Debug => b"debug" ); -impl From for DatasetKind { - fn from(k: internal::shared::DatasetKind) -> Self { +impl From<&internal::shared::DatasetKind> for DatasetKind { + fn from(k: &internal::shared::DatasetKind) -> Self { match k { internal::shared::DatasetKind::Crucible => DatasetKind::Crucible, internal::shared::DatasetKind::Cockroach => DatasetKind::Cockroach, @@ -45,6 +48,13 @@ impl From for DatasetKind { internal::shared::DatasetKind::InternalDns => { DatasetKind::InternalDns } + internal::shared::DatasetKind::ZoneRoot => DatasetKind::ZoneRoot, + // Enums in the database do not have associated data, so this drops + // the "name" of the zone and only considers the type. + // + // The zone name, if it exists, is stored in a separate column. + internal::shared::DatasetKind::Zone { .. } => DatasetKind::Zone, + internal::shared::DatasetKind::Debug => DatasetKind::Debug, } } } diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index f01f33c39d..5d9b3da78f 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1023,6 +1023,7 @@ table! { kind -> crate::DatasetKindEnum, size_used -> Nullable, + zone_name -> Nullable, } } diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index eaed2990c5..2438f37fba 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(92, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(93, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(93, "dataset-kinds-zone-and-debug"), KnownVersion::new(92, "lldp-link-config-nullable"), KnownVersion::new(91, "add-management-gateway-producer-kind"), KnownVersion::new(90, "lookup-bgp-config-by-asn"), diff --git a/nexus/db-queries/src/db/datastore/dataset.rs b/nexus/db-queries/src/db/datastore/dataset.rs index a08e346fe8..0fe1c7912e 100644 --- a/nexus/db-queries/src/db/datastore/dataset.rs +++ b/nexus/db-queries/src/db/datastore/dataset.rs @@ -241,6 +241,7 @@ mod test { use nexus_db_model::SledSystemHardware; use nexus_db_model::SledUpdate; use nexus_test_utils::db::test_setup_database; + use omicron_common::api::internal::shared::DatasetKind as ApiDatasetKind; use omicron_test_utils::dev; #[tokio::test] @@ -291,7 +292,7 @@ mod test { Uuid::new_v4(), zpool_id, Some("[::1]:0".parse().unwrap()), - DatasetKind::Crucible, + ApiDatasetKind::Crucible, )) .await .expect("failed to insert dataset") @@ -324,7 +325,7 @@ mod test { dataset1.id(), zpool_id, Some("[::1]:12345".parse().unwrap()), - DatasetKind::Cockroach, + ApiDatasetKind::Cockroach, )) .await .expect("failed to do-nothing insert dataset"); @@ -340,7 +341,7 @@ mod test { Uuid::new_v4(), zpool_id, Some("[::1]:0".parse().unwrap()), - DatasetKind::Cockroach, + ApiDatasetKind::Cockroach, )) .await .expect("failed to upsert dataset"); @@ -372,7 +373,7 @@ mod test { dataset1.id(), zpool_id, Some("[::1]:12345".parse().unwrap()), - DatasetKind::Cockroach, + ApiDatasetKind::Cockroach, )) .await .expect("failed to do-nothing insert dataset"); diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 9e69800fed..5b1163dc8b 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -431,10 +431,10 @@ mod test { use crate::db::identity::Asset; use crate::db::lookup::LookupPath; use crate::db::model::{ - BlockSize, ConsoleSession, Dataset, DatasetKind, ExternalIp, - PhysicalDisk, PhysicalDiskKind, PhysicalDiskPolicy, PhysicalDiskState, - Project, Rack, Region, SiloUser, SledBaseboard, SledSystemHardware, - SledUpdate, SshKey, Zpool, + BlockSize, ConsoleSession, Dataset, ExternalIp, PhysicalDisk, + PhysicalDiskKind, PhysicalDiskPolicy, PhysicalDiskState, Project, Rack, + Region, SiloUser, SledBaseboard, SledSystemHardware, SledUpdate, + SshKey, Zpool, }; use crate::db::queries::vpc_subnet::InsertVpcSubnetQuery; use chrono::{Duration, Utc}; @@ -450,6 +450,7 @@ mod test { use omicron_common::api::external::{ ByteCount, Error, IdentityMetadataCreateParams, LookupType, Name, }; + use omicron_common::api::internal::shared::DatasetKind; use omicron_test_utils::dev; use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::GenericUuid; diff --git a/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql b/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql index 6331770ef5..4e7dde244b 100644 --- a/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql @@ -270,7 +270,8 @@ WITH dataset.ip, dataset.port, dataset.kind, - dataset.size_used + dataset.size_used, + dataset.zone_name ) ( SELECT @@ -284,6 +285,7 @@ WITH dataset.port, dataset.kind, dataset.size_used, + dataset.zone_name, old_regions.id, old_regions.time_created, old_regions.time_modified, @@ -310,6 +312,7 @@ UNION updated_datasets.port, updated_datasets.kind, updated_datasets.size_used, + updated_datasets.zone_name, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, diff --git a/nexus/db-queries/tests/output/region_allocate_random_sleds.sql b/nexus/db-queries/tests/output/region_allocate_random_sleds.sql index e713121d34..b2c164a6d9 100644 --- a/nexus/db-queries/tests/output/region_allocate_random_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_random_sleds.sql @@ -268,7 +268,8 @@ WITH dataset.ip, dataset.port, dataset.kind, - dataset.size_used + dataset.size_used, + dataset.zone_name ) ( SELECT @@ -282,6 +283,7 @@ WITH dataset.port, dataset.kind, dataset.size_used, + dataset.zone_name, old_regions.id, old_regions.time_created, old_regions.time_modified, @@ -308,6 +310,7 @@ UNION updated_datasets.port, updated_datasets.kind, updated_datasets.size_used, + updated_datasets.zone_name, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, diff --git a/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql b/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql index 0b8dc4fca6..97ee23f82e 100644 --- a/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql @@ -281,7 +281,8 @@ WITH dataset.ip, dataset.port, dataset.kind, - dataset.size_used + dataset.size_used, + dataset.zone_name ) ( SELECT @@ -295,6 +296,7 @@ WITH dataset.port, dataset.kind, dataset.size_used, + dataset.zone_name, old_regions.id, old_regions.time_created, old_regions.time_modified, @@ -321,6 +323,7 @@ UNION updated_datasets.port, updated_datasets.kind, updated_datasets.size_used, + updated_datasets.zone_name, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, diff --git a/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql b/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql index 9ac945f71d..a1cc103594 100644 --- a/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql @@ -279,7 +279,8 @@ WITH dataset.ip, dataset.port, dataset.kind, - dataset.size_used + dataset.size_used, + dataset.zone_name ) ( SELECT @@ -293,6 +294,7 @@ WITH dataset.port, dataset.kind, dataset.size_used, + dataset.zone_name, old_regions.id, old_regions.time_created, old_regions.time_modified, @@ -319,6 +321,7 @@ UNION updated_datasets.port, updated_datasets.kind, updated_datasets.size_used, + updated_datasets.zone_name, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, diff --git a/nexus/reconfigurator/execution/src/datasets.rs b/nexus/reconfigurator/execution/src/datasets.rs index 6444934ba6..2f84378a13 100644 --- a/nexus/reconfigurator/execution/src/datasets.rs +++ b/nexus/reconfigurator/execution/src/datasets.rs @@ -67,7 +67,7 @@ pub(crate) async fn ensure_dataset_records_exist( id.into_untyped_uuid(), pool_id.into_untyped_uuid(), Some(address), - kind.into(), + kind.clone(), ); let maybe_inserted = datastore .dataset_insert_if_not_exists(dataset) diff --git a/nexus/reconfigurator/execution/src/omicron_physical_disks.rs b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs index af95eb8e77..d94bbe2e27 100644 --- a/nexus/reconfigurator/execution/src/omicron_physical_disks.rs +++ b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs @@ -135,7 +135,6 @@ mod test { use httptest::responders::status_code; use httptest::Expectation; use nexus_db_model::Dataset; - use nexus_db_model::DatasetKind; use nexus_db_model::PhysicalDisk; use nexus_db_model::PhysicalDiskKind; use nexus_db_model::PhysicalDiskPolicy; @@ -153,6 +152,7 @@ mod test { use nexus_types::identity::Asset; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Generation; + use omicron_common::api::internal::shared::DatasetKind; use omicron_common::disk::DiskIdentity; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::PhysicalDiskUuid; diff --git a/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs b/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs index 602f3f85e8..6e49ddc7f0 100644 --- a/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs +++ b/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs @@ -179,13 +179,13 @@ mod tests { use diesel::ExpressionMethods; use diesel::QueryDsl; use nexus_db_model::Dataset; - use nexus_db_model::DatasetKind; use nexus_db_model::PhysicalDisk; use nexus_db_model::PhysicalDiskKind; use nexus_db_model::PhysicalDiskPolicy; use nexus_db_model::Region; use nexus_test_utils::SLED_AGENT_UUID; use nexus_test_utils_macros::nexus_test; + use omicron_common::api::internal::shared::DatasetKind; use omicron_uuid_kinds::{ DatasetUuid, PhysicalDiskUuid, RegionUuid, SledUuid, }; diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index f3c0031327..835541c2ea 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -147,7 +147,7 @@ impl super::Nexus { dataset.dataset_id, dataset.zpool_id, Some(dataset.request.address), - dataset.request.kind.into(), + dataset.request.kind, ) }) .collect(); diff --git a/nexus/src/app/sagas/region_replacement_start.rs b/nexus/src/app/sagas/region_replacement_start.rs index 86aab2ac22..1bc1491468 100644 --- a/nexus/src/app/sagas/region_replacement_start.rs +++ b/nexus/src/app/sagas/region_replacement_start.rs @@ -747,7 +747,6 @@ pub(crate) mod test { }; use chrono::Utc; use nexus_db_model::Dataset; - use nexus_db_model::DatasetKind; use nexus_db_model::Region; use nexus_db_model::RegionReplacement; use nexus_db_model::RegionReplacementState; @@ -758,6 +757,7 @@ pub(crate) mod test { use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils_macros::nexus_test; use nexus_types::identity::Asset; + use omicron_common::api::internal::shared::DatasetKind; use sled_agent_client::types::VolumeConstructionRequest; use uuid::Uuid; diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index 261045670e..9c21ca73a1 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -12,7 +12,6 @@ use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; use nexus_db_queries::db::lookup; -use nexus_db_queries::db::model::DatasetKind; use nexus_sled_agent_shared::inventory::SledRole; use nexus_types::deployment::DiskFilter; use nexus_types::deployment::SledFilter; @@ -23,6 +22,7 @@ use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; +use omicron_common::api::internal::shared::DatasetKind; use omicron_uuid_kinds::{GenericUuid, SledUuid}; use sled_agent_client::Client as SledAgentClient; use std::net::SocketAddrV6; @@ -292,13 +292,12 @@ impl super::Nexus { // Datasets (contained within zpools) - /// Upserts a dataset into the database, updating it if it already exists. - pub(crate) async fn upsert_dataset( + /// Upserts a crucible dataset into the database, updating it if it already exists. + pub(crate) async fn upsert_crucible_dataset( &self, id: Uuid, zpool_id: Uuid, address: SocketAddrV6, - kind: DatasetKind, ) -> Result<(), Error> { info!( self.log, @@ -307,6 +306,7 @@ impl super::Nexus { "dataset_id" => id.to_string(), "address" => address.to_string() ); + let kind = DatasetKind::Crucible; let dataset = db::model::Dataset::new(id, zpool_id, Some(address), kind); self.db_datastore.dataset_upsert(dataset).await?; diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index d5c853b15b..284e8de2ea 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -384,12 +384,7 @@ impl nexus_test_interface::NexusServer for Server { self.apictx .context .nexus - .upsert_dataset( - dataset_id, - zpool_id, - address, - nexus_db_queries::db::model::DatasetKind::Crucible, - ) + .upsert_crucible_dataset(dataset_id, zpool_id, address) .await .unwrap(); } diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 619a2187b5..da8bbacf8b 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -2711,39 +2711,8 @@ ] }, "DatasetKind": { - "description": "Describes the purpose of the dataset.", - "oneOf": [ - { - "type": "string", - "enum": [ - "crucible", - "cockroach", - "external_dns", - "internal_dns" - ] - }, - { - "description": "Used for single-node clickhouse deployments", - "type": "string", - "enum": [ - "clickhouse" - ] - }, - { - "description": "Used for replicated clickhouse deployments", - "type": "string", - "enum": [ - "clickhouse_keeper" - ] - }, - { - "description": "Used for replicated clickhouse deployments", - "type": "string", - "enum": [ - "clickhouse_server" - ] - } - ] + "description": "The kind of dataset. See the `DatasetKind` enum in omicron-common for possible values.", + "type": "string" }, "DatasetPutRequest": { "description": "Describes a dataset within a pool.", diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index ec2a8bfc4d..bb8e4e0b87 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -176,6 +176,62 @@ } } }, + "/datasets": { + "get": { + "summary": "Lists the datasets that this sled is configured to use", + "operationId": "datasets_get", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DatasetsConfig" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "put": { + "summary": "Configures datasets to be used on this sled", + "operationId": "datasets_put", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DatasetsConfig" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DatasetsManagementResult" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/disks/{disk_id}": { "put": { "operationId": "disk_put", @@ -2005,6 +2061,112 @@ } ] }, + "CompressionAlgorithm": { + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "on" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "off" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "gzip" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "level": { + "$ref": "#/components/schemas/GzipLevel" + }, + "type": { + "type": "string", + "enum": [ + "gzip_n" + ] + } + }, + "required": [ + "level", + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "lz4" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "lzjb" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "zle" + ] + } + }, + "required": [ + "type" + ] + } + ] + }, "CrucibleOpts": { "description": "CrucibleOpts\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"id\", \"lossy\", \"read_only\", \"target\" ], \"properties\": { \"cert_pem\": { \"type\": [ \"string\", \"null\" ] }, \"control\": { \"type\": [ \"string\", \"null\" ] }, \"flush_timeout\": { \"type\": [ \"number\", \"null\" ], \"format\": \"float\" }, \"id\": { \"type\": \"string\", \"format\": \"uuid\" }, \"key\": { \"type\": [ \"string\", \"null\" ] }, \"key_pem\": { \"type\": [ \"string\", \"null\" ] }, \"lossy\": { \"type\": \"boolean\" }, \"read_only\": { \"type\": \"boolean\" }, \"root_cert_pem\": { \"type\": [ \"string\", \"null\" ] }, \"target\": { \"type\": \"array\", \"items\": { \"type\": \"string\" } } } } ```
", "type": "object", @@ -2058,6 +2220,128 @@ "target" ] }, + "DatasetConfig": { + "description": "Configuration information necessary to request a single dataset", + "type": "object", + "properties": { + "compression": { + "description": "The compression mode to be used by the dataset", + "allOf": [ + { + "$ref": "#/components/schemas/CompressionAlgorithm" + } + ] + }, + "id": { + "description": "The UUID of the dataset being requested", + "allOf": [ + { + "$ref": "#/components/schemas/TypedUuidForDatasetKind" + } + ] + }, + "name": { + "description": "The dataset's name", + "allOf": [ + { + "$ref": "#/components/schemas/DatasetName" + } + ] + }, + "quota": { + "nullable": true, + "description": "The upper bound on the amount of storage used by this dataset", + "type": "integer", + "format": "uint", + "minimum": 0 + }, + "reservation": { + "nullable": true, + "description": "The lower bound on the amount of storage usable by this dataset", + "type": "integer", + "format": "uint", + "minimum": 0 + } + }, + "required": [ + "compression", + "id", + "name" + ] + }, + "DatasetKind": { + "description": "The kind of dataset. See the `DatasetKind` enum in omicron-common for possible values.", + "type": "string" + }, + "DatasetManagementStatus": { + "description": "Identifies how a single dataset management operation may have succeeded or failed.", + "type": "object", + "properties": { + "dataset_name": { + "$ref": "#/components/schemas/DatasetName" + }, + "err": { + "nullable": true, + "type": "string" + } + }, + "required": [ + "dataset_name" + ] + }, + "DatasetName": { + "type": "object", + "properties": { + "kind": { + "$ref": "#/components/schemas/DatasetKind" + }, + "pool_name": { + "$ref": "#/components/schemas/ZpoolName" + } + }, + "required": [ + "kind", + "pool_name" + ] + }, + "DatasetsConfig": { + "type": "object", + "properties": { + "datasets": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/DatasetConfig" + } + }, + "generation": { + "description": "generation number of this configuration\n\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\n\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.\n\nNote that \"Generation::new()\", AKA, the first generation number, is reserved for \"no datasets\". This is the default configuration for a sled before any requests have been made.", + "allOf": [ + { + "$ref": "#/components/schemas/Generation" + } + ] + } + }, + "required": [ + "datasets", + "generation" + ] + }, + "DatasetsManagementResult": { + "description": "The result from attempting to manage datasets.", + "type": "object", + "properties": { + "status": { + "type": "array", + "items": { + "$ref": "#/components/schemas/DatasetManagementStatus" + } + } + }, + "required": [ + "status" + ] + }, "DhcpConfig": { "description": "DHCP configuration for a port\n\nNot present here: Hostname (DHCPv4 option 12; used in DHCPv6 option 39); we use `InstanceRuntimeState::hostname` for this value.", "type": "object", @@ -2700,6 +2984,11 @@ "format": "uint64", "minimum": 0 }, + "GzipLevel": { + "type": "integer", + "format": "uint8", + "minimum": 0 + }, "HostIdentifier": { "description": "A `HostIdentifier` represents either an IP host or network (v4 or v6), or an entire VPC (identified by its VNI). It is used in firewall rule host filters.", "oneOf": [ @@ -4762,6 +5051,10 @@ "sync" ] }, + "TypedUuidForDatasetKind": { + "type": "string", + "format": "uuid" + }, "TypedUuidForInstanceKind": { "type": "string", "format": "uuid" diff --git a/schema/crdb/dataset-kinds-zone-and-debug/up01.sql b/schema/crdb/dataset-kinds-zone-and-debug/up01.sql new file mode 100644 index 0000000000..1cfe718d00 --- /dev/null +++ b/schema/crdb/dataset-kinds-zone-and-debug/up01.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.dataset_kind ADD VALUE IF NOT EXISTS 'zone_root' AFTER 'internal_dns'; diff --git a/schema/crdb/dataset-kinds-zone-and-debug/up02.sql b/schema/crdb/dataset-kinds-zone-and-debug/up02.sql new file mode 100644 index 0000000000..93178e3685 --- /dev/null +++ b/schema/crdb/dataset-kinds-zone-and-debug/up02.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.dataset_kind ADD VALUE IF NOT EXISTS 'zone' AFTER 'zone_root'; diff --git a/schema/crdb/dataset-kinds-zone-and-debug/up03.sql b/schema/crdb/dataset-kinds-zone-and-debug/up03.sql new file mode 100644 index 0000000000..58d215d177 --- /dev/null +++ b/schema/crdb/dataset-kinds-zone-and-debug/up03.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.dataset_kind ADD VALUE IF NOT EXISTS 'debug' AFTER 'zone'; diff --git a/schema/crdb/dataset-kinds-zone-and-debug/up04.sql b/schema/crdb/dataset-kinds-zone-and-debug/up04.sql new file mode 100644 index 0000000000..b92bce1b6c --- /dev/null +++ b/schema/crdb/dataset-kinds-zone-and-debug/up04.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.dataset ADD COLUMN IF NOT EXISTS zone_name TEXT; diff --git a/schema/crdb/dataset-kinds-zone-and-debug/up05.sql b/schema/crdb/dataset-kinds-zone-and-debug/up05.sql new file mode 100644 index 0000000000..3f33b79c72 --- /dev/null +++ b/schema/crdb/dataset-kinds-zone-and-debug/up05.sql @@ -0,0 +1,4 @@ +ALTER TABLE omicron.public.dataset ADD CONSTRAINT IF NOT EXISTS zone_name_for_zone_kind CHECK ( + (kind != 'zone') OR + (kind = 'zone' AND zone_name IS NOT NULL) +) diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index d531672832..e851d2ed6b 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -509,7 +509,10 @@ CREATE TYPE IF NOT EXISTS omicron.public.dataset_kind AS ENUM ( 'clickhouse_keeper', 'clickhouse_server', 'external_dns', - 'internal_dns' + 'internal_dns', + 'zone_root', + 'zone', + 'debug' ); /* @@ -535,6 +538,9 @@ CREATE TABLE IF NOT EXISTS omicron.public.dataset ( /* An upper bound on the amount of space that might be in-use */ size_used INT, + /* Only valid if kind = zone -- the name of this zone */ + zone_name TEXT, + /* Crucible must make use of 'size_used'; other datasets manage their own storage */ CONSTRAINT size_used_column_set_for_crucible CHECK ( (kind != 'crucible') OR @@ -544,6 +550,11 @@ CREATE TABLE IF NOT EXISTS omicron.public.dataset ( CONSTRAINT ip_and_port_set_for_crucible CHECK ( (kind != 'crucible') OR (kind = 'crucible' AND ip IS NOT NULL and port IS NOT NULL) + ), + + CONSTRAINT zone_name_for_zone_kind CHECK ( + (kind != 'zone') OR + (kind = 'zone' AND zone_name IS NOT NULL) ) ); @@ -4214,7 +4225,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '92.0.0', NULL) + (TRUE, NOW(), NOW(), '93.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/omicron-datasets.json b/schema/omicron-datasets.json new file mode 100644 index 0000000000..07fc2cfb13 --- /dev/null +++ b/schema/omicron-datasets.json @@ -0,0 +1,226 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "DatasetsConfig", + "type": "object", + "required": [ + "datasets", + "generation" + ], + "properties": { + "datasets": { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/DatasetConfig" + } + }, + "generation": { + "description": "generation number of this configuration\n\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\n\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.\n\nNote that \"Generation::new()\", AKA, the first generation number, is reserved for \"no datasets\". This is the default configuration for a sled before any requests have been made.", + "allOf": [ + { + "$ref": "#/definitions/Generation" + } + ] + } + }, + "definitions": { + "CompressionAlgorithm": { + "oneOf": [ + { + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "on" + ] + } + } + }, + { + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "off" + ] + } + } + }, + { + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "gzip" + ] + } + } + }, + { + "type": "object", + "required": [ + "level", + "type" + ], + "properties": { + "level": { + "$ref": "#/definitions/GzipLevel" + }, + "type": { + "type": "string", + "enum": [ + "gzip_n" + ] + } + } + }, + { + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "lz4" + ] + } + } + }, + { + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "lzjb" + ] + } + } + }, + { + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "zle" + ] + } + } + } + ] + }, + "DatasetConfig": { + "description": "Configuration information necessary to request a single dataset", + "type": "object", + "required": [ + "compression", + "id", + "name" + ], + "properties": { + "compression": { + "description": "The compression mode to be used by the dataset", + "allOf": [ + { + "$ref": "#/definitions/CompressionAlgorithm" + } + ] + }, + "id": { + "description": "The UUID of the dataset being requested", + "allOf": [ + { + "$ref": "#/definitions/TypedUuidForDatasetKind" + } + ] + }, + "name": { + "description": "The dataset's name", + "allOf": [ + { + "$ref": "#/definitions/DatasetName" + } + ] + }, + "quota": { + "description": "The upper bound on the amount of storage used by this dataset", + "type": [ + "integer", + "null" + ], + "format": "uint", + "minimum": 0.0 + }, + "reservation": { + "description": "The lower bound on the amount of storage usable by this dataset", + "type": [ + "integer", + "null" + ], + "format": "uint", + "minimum": 0.0 + } + } + }, + "DatasetKind": { + "description": "The kind of dataset. See the `DatasetKind` enum in omicron-common for possible values.", + "type": "string" + }, + "DatasetName": { + "type": "object", + "required": [ + "kind", + "pool_name" + ], + "properties": { + "kind": { + "$ref": "#/definitions/DatasetKind" + }, + "pool_name": { + "$ref": "#/definitions/ZpoolName" + } + } + }, + "Generation": { + "description": "Generation numbers stored in the database, used for optimistic concurrency control", + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "GzipLevel": { + "type": "integer", + "format": "uint8", + "minimum": 0.0 + }, + "TypedUuidForDatasetKind": { + "type": "string", + "format": "uuid" + }, + "ZpoolName": { + "title": "The name of a Zpool", + "description": "Zpool names are of the format ox{i,p}_. They are either Internal or External, and should be unique", + "type": "string", + "pattern": "^ox[ip]_[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$" + } + } +} \ No newline at end of file diff --git a/sled-agent/api/src/lib.rs b/sled-agent/api/src/lib.rs index 410747bf46..d9e49a5c56 100644 --- a/sled-agent/api/src/lib.rs +++ b/sled-agent/api/src/lib.rs @@ -21,7 +21,10 @@ use omicron_common::{ SwitchPorts, VirtualNetworkInterfaceHost, }, }, - disk::{DiskVariant, DisksManagementResult, OmicronPhysicalDisksConfig}, + disk::{ + DatasetsConfig, DatasetsManagementResult, DiskVariant, + DisksManagementResult, OmicronPhysicalDisksConfig, + }, }; use omicron_uuid_kinds::{PropolisUuid, ZpoolUuid}; use schemars::JsonSchema; @@ -168,6 +171,25 @@ pub trait SledAgentApi { body: TypedBody, ) -> Result; + /// Configures datasets to be used on this sled + #[endpoint { + method = PUT, + path = "/datasets", + }] + async fn datasets_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result, HttpError>; + + /// Lists the datasets that this sled is configured to use + #[endpoint { + method = GET, + path = "/datasets", + }] + async fn datasets_get( + rqctx: RequestContext, + ) -> Result, HttpError>; + #[endpoint { method = GET, path = "/omicron-physical-disks", diff --git a/sled-agent/src/backing_fs.rs b/sled-agent/src/backing_fs.rs index 2e9ea4c8d9..a0f7826db3 100644 --- a/sled-agent/src/backing_fs.rs +++ b/sled-agent/src/backing_fs.rs @@ -25,6 +25,7 @@ use camino::Utf8PathBuf; use illumos_utils::zfs::{ EnsureFilesystemError, GetValueError, Mountpoint, SizeDetails, Zfs, }; +use omicron_common::disk::CompressionAlgorithm; use std::io; #[derive(Debug, thiserror::Error)] @@ -50,7 +51,7 @@ struct BackingFs<'a> { // Optional quota, in _bytes_ quota: Option, // Optional compression mode - compression: Option<&'static str>, + compression: CompressionAlgorithm, // Linked service service: Option<&'static str>, // Subdirectories to ensure @@ -63,7 +64,7 @@ impl<'a> BackingFs<'a> { name, mountpoint: "legacy", quota: None, - compression: None, + compression: CompressionAlgorithm::Off, service: None, subdirs: None, } @@ -79,8 +80,8 @@ impl<'a> BackingFs<'a> { self } - const fn compression(mut self, compression: &'static str) -> Self { - self.compression = Some(compression); + const fn compression(mut self, compression: CompressionAlgorithm) -> Self { + self.compression = compression; self } @@ -101,7 +102,7 @@ const BACKING_FMD_SUBDIRS: [&'static str; 3] = ["rsrc", "ckpt", "xprt"]; const BACKING_FMD_SERVICE: &'static str = "svc:/system/fmd:default"; const BACKING_FMD_QUOTA: usize = 500 * (1 << 20); // 500 MiB -const BACKING_COMPRESSION: &'static str = "on"; +const BACKING_COMPRESSION: CompressionAlgorithm = CompressionAlgorithm::On; const BACKINGFS_COUNT: usize = 1; static BACKINGFS: [BackingFs; BACKINGFS_COUNT] = @@ -137,6 +138,7 @@ pub(crate) fn ensure_backing_fs( let size_details = Some(SizeDetails { quota: bfs.quota, + reservation: None, compression: bfs.compression, }); diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 221224a2e9..1d61d97675 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -28,7 +28,8 @@ use omicron_common::api::internal::shared::{ VirtualNetworkInterfaceHost, }; use omicron_common::disk::{ - DiskVariant, DisksManagementResult, M2Slot, OmicronPhysicalDisksConfig, + DatasetsConfig, DatasetsManagementResult, DiskVariant, + DisksManagementResult, M2Slot, OmicronPhysicalDisksConfig, }; use sled_agent_api::*; use sled_agent_types::boot_disk::{ @@ -219,6 +220,23 @@ impl SledAgentApi for SledAgentImpl { .map_err(HttpError::from) } + async fn datasets_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let body_args = body.into_inner(); + let result = sa.datasets_ensure(body_args).await?; + Ok(HttpResponseOk(result)) + } + + async fn datasets_get( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.datasets_config_list().await?)) + } + async fn zone_bundle_cleanup( rqctx: RequestContext, ) -> Result>, HttpError> diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 419e897d75..de0b086752 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -3,9 +3,8 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use nexus_sled_agent_shared::inventory::{OmicronZoneConfig, OmicronZoneType}; +use omicron_common::disk::{DatasetKind, DatasetName}; pub use sled_hardware::DendriteAsic; -use sled_storage::dataset::DatasetName; -use sled_storage::dataset::DatasetType; use std::net::SocketAddrV6; /// Extension trait for `OmicronZoneConfig`. @@ -49,25 +48,25 @@ pub(crate) trait OmicronZoneTypeExt { | OmicronZoneType::Oximeter { .. } | OmicronZoneType::CruciblePantry { .. } => None, OmicronZoneType::Clickhouse { dataset, address, .. } => { - Some((dataset, DatasetType::Clickhouse, address)) + Some((dataset, DatasetKind::Clickhouse, address)) } OmicronZoneType::ClickhouseKeeper { dataset, address, .. } => { - Some((dataset, DatasetType::ClickhouseKeeper, address)) + Some((dataset, DatasetKind::ClickhouseKeeper, address)) } OmicronZoneType::ClickhouseServer { dataset, address, .. } => { - Some((dataset, DatasetType::ClickhouseServer, address)) + Some((dataset, DatasetKind::ClickhouseServer, address)) } OmicronZoneType::CockroachDb { dataset, address, .. } => { - Some((dataset, DatasetType::CockroachDb, address)) + Some((dataset, DatasetKind::Cockroach, address)) } OmicronZoneType::Crucible { dataset, address, .. } => { - Some((dataset, DatasetType::Crucible, address)) + Some((dataset, DatasetKind::Crucible, address)) } OmicronZoneType::ExternalDns { dataset, http_address, .. } => { - Some((dataset, DatasetType::ExternalDns, http_address)) + Some((dataset, DatasetKind::ExternalDns, http_address)) } OmicronZoneType::InternalDns { dataset, http_address, .. } => { - Some((dataset, DatasetType::InternalDns, http_address)) + Some((dataset, DatasetKind::InternalDns, http_address)) } }?; diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index e0959b0219..7bf3a7a875 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -32,7 +32,8 @@ use omicron_common::backoff::{ retry_notify_ext, retry_policy_internal_service_aggressive, BackoffError, }; use omicron_common::disk::{ - DiskVariant, OmicronPhysicalDiskConfig, OmicronPhysicalDisksConfig, + DatasetKind, DatasetName, DiskVariant, OmicronPhysicalDiskConfig, + OmicronPhysicalDisksConfig, }; use omicron_common::ledger::{self, Ledger, Ledgerable}; use omicron_common::policy::{ @@ -50,7 +51,7 @@ use sled_agent_client::{ }; use sled_agent_types::rack_init::RackInitializeRequest as Config; use sled_agent_types::sled::StartSledAgentRequest; -use sled_storage::dataset::{DatasetName, DatasetType, CONFIG_DATASET}; +use sled_storage::dataset::CONFIG_DATASET; use sled_storage::manager::StorageHandle; use slog::Logger; use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; @@ -497,7 +498,7 @@ impl Plan { ) .unwrap(); let dataset_name = - sled.alloc_dataset_from_u2s(DatasetType::InternalDns)?; + sled.alloc_dataset_from_u2s(DatasetKind::InternalDns)?; let filesystem_pool = Some(dataset_name.pool().clone()); sled.request.zones.push(BlueprintZoneConfig { @@ -539,7 +540,7 @@ impl Plan { ) .unwrap(); let dataset_name = - sled.alloc_dataset_from_u2s(DatasetType::CockroachDb)?; + sled.alloc_dataset_from_u2s(DatasetKind::Cockroach)?; let filesystem_pool = Some(dataset_name.pool().clone()); sled.request.zones.push(BlueprintZoneConfig { disposition: BlueprintZoneDisposition::InService, @@ -587,7 +588,7 @@ impl Plan { let dns_address = from_sockaddr_to_external_floating_addr( SocketAddr::new(external_ip, dns_port), ); - let dataset_kind = DatasetType::ExternalDns; + let dataset_kind = DatasetKind::ExternalDns; let dataset_name = sled.alloc_dataset_from_u2s(dataset_kind)?; let filesystem_pool = Some(dataset_name.pool().clone()); @@ -716,7 +717,7 @@ impl Plan { ) .unwrap(); let dataset_name = - sled.alloc_dataset_from_u2s(DatasetType::Clickhouse)?; + sled.alloc_dataset_from_u2s(DatasetKind::Clickhouse)?; let filesystem_pool = Some(dataset_name.pool().clone()); sled.request.zones.push(BlueprintZoneConfig { disposition: BlueprintZoneDisposition::InService, @@ -759,7 +760,7 @@ impl Plan { ) .unwrap(); let dataset_name = - sled.alloc_dataset_from_u2s(DatasetType::ClickhouseServer)?; + sled.alloc_dataset_from_u2s(DatasetKind::ClickhouseServer)?; let filesystem_pool = Some(dataset_name.pool().clone()); sled.request.zones.push(BlueprintZoneConfig { disposition: BlueprintZoneDisposition::InService, @@ -800,7 +801,7 @@ impl Plan { ) .unwrap(); let dataset_name = - sled.alloc_dataset_from_u2s(DatasetType::ClickhouseKeeper)?; + sled.alloc_dataset_from_u2s(DatasetKind::ClickhouseKeeper)?; let filesystem_pool = Some(dataset_name.pool().clone()); sled.request.zones.push(BlueprintZoneConfig { disposition: BlueprintZoneDisposition::InService, @@ -1034,7 +1035,7 @@ pub struct SledInfo { u2_zpools: Vec, /// spreads components across a Sled's zpools u2_zpool_allocators: - HashMap + Send + Sync>>, + HashMap + Send + Sync>>, /// whether this Sled is a scrimlet is_scrimlet: bool, /// allocator for addresses in this Sled's subnet @@ -1075,7 +1076,7 @@ impl SledInfo { /// this Sled fn alloc_dataset_from_u2s( &mut self, - kind: DatasetType, + kind: DatasetKind, ) -> Result { // We have two goals here: // diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index d6909174ee..7677dfbd8a 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -90,6 +90,7 @@ use omicron_common::api::internal::shared::{ use omicron_common::backoff::{ retry_notify, retry_policy_internal_service_aggressive, BackoffError, }; +use omicron_common::disk::{DatasetKind, DatasetName}; use omicron_common::ledger::{self, Ledger, Ledgerable}; use omicron_ddm_admin_client::{Client as DdmAdminClient, DdmError}; use once_cell::sync::OnceCell; @@ -103,9 +104,7 @@ use sled_hardware::underlay; use sled_hardware::SledMode; use sled_hardware_types::Baseboard; use sled_storage::config::MountConfig; -use sled_storage::dataset::{ - DatasetName, DatasetType, CONFIG_DATASET, INSTALL_DATASET, ZONE_DATASET, -}; +use sled_storage::dataset::{CONFIG_DATASET, INSTALL_DATASET, ZONE_DATASET}; use sled_storage::manager::StorageHandle; use slog::Logger; use std::collections::BTreeMap; @@ -1881,7 +1880,7 @@ impl ServiceManager { let dataset_name = DatasetName::new( dataset.pool_name.clone(), - DatasetType::Crucible, + DatasetKind::Crucible, ) .full_name(); let uuid = &Uuid::new_v4().to_string(); diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index aead47658f..ac583a1a74 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -30,6 +30,8 @@ use omicron_common::api::internal::shared::VirtualNetworkInterfaceHost; use omicron_common::api::internal::shared::{ ResolvedVpcRouteSet, ResolvedVpcRouteState, SwitchPorts, }; +use omicron_common::disk::DatasetsConfig; +use omicron_common::disk::DatasetsManagementResult; use omicron_common::disk::DisksManagementResult; use omicron_common::disk::OmicronPhysicalDisksConfig; use sled_agent_api::*; @@ -299,6 +301,23 @@ impl SledAgentApi for SledAgentSimImpl { )) } + async fn datasets_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let body_args = body.into_inner(); + let result = sa.datasets_ensure(body_args).await?; + Ok(HttpResponseOk(result)) + } + + async fn datasets_get( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.datasets_config_list().await?)) + } + async fn omicron_physical_disks_put( rqctx: RequestContext, body: TypedBody, diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 7292b3dee1..aaac7f63d0 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -35,8 +35,8 @@ use omicron_common::api::internal::shared::{ VirtualNetworkInterfaceHost, }; use omicron_common::disk::{ - DiskIdentity, DiskVariant, DisksManagementResult, - OmicronPhysicalDisksConfig, + DatasetsConfig, DatasetsManagementResult, DiskIdentity, DiskVariant, + DisksManagementResult, OmicronPhysicalDisksConfig, }; use omicron_uuid_kinds::{GenericUuid, InstanceUuid, PropolisUuid, ZpoolUuid}; use oxnet::Ipv6Net; @@ -868,6 +868,19 @@ impl SledAgent { }) } + pub async fn datasets_ensure( + &self, + config: DatasetsConfig, + ) -> Result { + self.storage.lock().await.datasets_ensure(config).await + } + + pub async fn datasets_config_list( + &self, + ) -> Result { + self.storage.lock().await.datasets_config_list().await + } + pub async fn omicron_physical_disks_list( &self, ) -> Result { diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index ac8f80069b..144fb48aa9 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -18,6 +18,9 @@ use crucible_agent_client::types::{ use dropshot::HandlerTaskMode; use dropshot::HttpError; use futures::lock::Mutex; +use omicron_common::disk::DatasetManagementStatus; +use omicron_common::disk::DatasetsConfig; +use omicron_common::disk::DatasetsManagementResult; use omicron_common::disk::DiskIdentity; use omicron_common::disk::DiskManagementStatus; use omicron_common::disk::DiskVariant; @@ -555,6 +558,7 @@ pub struct Storage { sled_id: Uuid, log: Logger, config: Option, + dataset_config: Option, physical_disks: HashMap, next_disk_slot: i64, zpools: HashMap, @@ -568,6 +572,7 @@ impl Storage { sled_id, log, config: None, + dataset_config: None, physical_disks: HashMap::new(), next_disk_slot: 0, zpools: HashMap::new(), @@ -581,6 +586,45 @@ impl Storage { &self.physical_disks } + pub async fn datasets_config_list( + &self, + ) -> Result { + let Some(config) = self.dataset_config.as_ref() else { + return Err(HttpError::for_not_found( + None, + "No control plane datasets".into(), + )); + }; + Ok(config.clone()) + } + + pub async fn datasets_ensure( + &mut self, + config: DatasetsConfig, + ) -> Result { + if let Some(stored_config) = self.dataset_config.as_ref() { + if stored_config.generation < config.generation { + return Err(HttpError::for_client_error( + None, + http::StatusCode::BAD_REQUEST, + "Generation number too old".to_string(), + )); + } + } + self.dataset_config.replace(config.clone()); + + Ok(DatasetsManagementResult { + status: config + .datasets + .values() + .map(|config| DatasetManagementStatus { + dataset_name: config.name.clone(), + err: None, + }) + .collect(), + }) + } + pub async fn omicron_physical_disks_list( &mut self, ) -> Result { diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index d69ccedb7d..f13d8caccf 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -51,7 +51,10 @@ use omicron_common::api::{ use omicron_common::backoff::{ retry_notify, retry_policy_internal_service_aggressive, BackoffError, }; -use omicron_common::disk::{DisksManagementResult, OmicronPhysicalDisksConfig}; +use omicron_common::disk::{ + DatasetsConfig, DatasetsManagementResult, DisksManagementResult, + OmicronPhysicalDisksConfig, +}; use omicron_ddm_admin_client::Client as DdmAdminClient; use omicron_uuid_kinds::{InstanceUuid, PropolisUuid}; use sled_agent_api::Zpool; @@ -808,6 +811,29 @@ impl SledAgent { self.inner.zone_bundler.cleanup().await.map_err(Error::from) } + pub async fn datasets_config_list(&self) -> Result { + Ok(self.storage().datasets_config_list().await?) + } + + pub async fn datasets_ensure( + &self, + config: DatasetsConfig, + ) -> Result { + info!(self.log, "datasets ensure"); + let datasets_result = self.storage().datasets_ensure(config).await?; + info!(self.log, "datasets ensure: Updated storage"); + + // TODO(https://github.com/oxidecomputer/omicron/issues/6177): + // At the moment, we don't actually remove any datasets -- this function + // just adds new datasets. + // + // Once we start removing old datasets, we should probably ensure that + // they are not longer in-use before returning (similar to + // omicron_physical_disks_ensure). + + Ok(datasets_result) + } + /// Requests the set of physical disks currently managed by the Sled Agent. /// /// This should be contrasted by the set of disks in the inventory, which @@ -896,7 +922,7 @@ impl SledAgent { &self, requested_zones: OmicronZonesConfig, ) -> Result<(), Error> { - // TODO: + // TODO(https://github.com/oxidecomputer/omicron/issues/6043): // - If these are the set of filesystems, we should also consider // removing the ones which are not listed here. // - It's probably worth sending a bulk request to the storage system, diff --git a/sled-storage/src/dataset.rs b/sled-storage/src/dataset.rs index 74f2be782f..e2b024db11 100644 --- a/sled-storage/src/dataset.rs +++ b/sled-storage/src/dataset.rs @@ -15,10 +15,10 @@ use illumos_utils::zfs::{ use illumos_utils::zpool::ZpoolName; use key_manager::StorageKeyRequester; use omicron_common::api::internal::shared::DatasetKind; -use omicron_common::disk::{DiskIdentity, DiskVariant}; +use omicron_common::disk::{ + CompressionAlgorithm, DatasetName, DiskIdentity, DiskVariant, GzipLevel, +}; use rand::distributions::{Alphanumeric, DistString}; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; use slog::{debug, info, Logger}; use std::process::Stdio; use std::str::FromStr; @@ -45,7 +45,8 @@ cfg_if! { // tuned as needed. pub const DUMP_DATASET_QUOTA: usize = 100 * (1 << 30); // passed to zfs create -o compression= -pub const DUMP_DATASET_COMPRESSION: &'static str = "gzip-9"; +pub const DUMP_DATASET_COMPRESSION: CompressionAlgorithm = + CompressionAlgorithm::GzipN { level: GzipLevel::new::<9>() }; // U.2 datasets live under the encrypted dataset and inherit encryption pub const ZONE_DATASET: &'static str = "crypt/zone"; @@ -102,12 +103,17 @@ struct ExpectedDataset { // Identifies if the dataset should be deleted on boot wipe: bool, // Optional compression mode - compression: Option<&'static str>, + compression: CompressionAlgorithm, } impl ExpectedDataset { const fn new(name: &'static str) -> Self { - ExpectedDataset { name, quota: None, wipe: false, compression: None } + ExpectedDataset { + name, + quota: None, + wipe: false, + compression: CompressionAlgorithm::Off, + } } const fn quota(mut self, quota: usize) -> Self { @@ -120,151 +126,12 @@ impl ExpectedDataset { self } - const fn compression(mut self, compression: &'static str) -> Self { - self.compression = Some(compression); + const fn compression(mut self, compression: CompressionAlgorithm) -> Self { + self.compression = compression; self } } -/// The type of a dataset, and an auxiliary information necessary to -/// successfully launch a zone managing the associated data. -/// -/// There is currently no auxiliary data here, but there's a separation from -/// omicron-common's `DatasetKind` in case there might be some in the future. -#[derive( - Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, -)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum DatasetType { - // TODO: `DatasetKind` uses `Cockroach`, not `CockroachDb`, for historical - // reasons. It may be worth using the same name for both. - CockroachDb, - Crucible, - Clickhouse, - ClickhouseKeeper, - ClickhouseServer, - ExternalDns, - InternalDns, -} - -impl DatasetType { - pub fn dataset_should_be_encrypted(&self) -> bool { - match self { - // We encrypt all datasets except Crucible. - // - // Crucible already performs encryption internally, and we - // avoid double-encryption. - DatasetType::Crucible => false, - _ => true, - } - } - - pub fn kind(&self) -> DatasetKind { - match self { - Self::Crucible => DatasetKind::Crucible, - Self::CockroachDb => DatasetKind::Cockroach, - Self::Clickhouse => DatasetKind::Clickhouse, - Self::ClickhouseKeeper => DatasetKind::ClickhouseKeeper, - Self::ClickhouseServer => DatasetKind::ClickhouseServer, - Self::ExternalDns => DatasetKind::ExternalDns, - Self::InternalDns => DatasetKind::InternalDns, - } - } -} - -#[derive(Debug, thiserror::Error)] -pub enum DatasetKindParseError { - #[error("Dataset unknown: {0}")] - UnknownDataset(String), -} - -impl FromStr for DatasetType { - type Err = DatasetKindParseError; - - fn from_str(s: &str) -> Result { - use DatasetType::*; - let kind = match s { - "crucible" => Crucible, - "cockroachdb" => CockroachDb, - "clickhouse" => Clickhouse, - "clickhouse_keeper" => ClickhouseKeeper, - "external_dns" => ExternalDns, - "internal_dns" => InternalDns, - _ => { - return Err(DatasetKindParseError::UnknownDataset( - s.to_string(), - )) - } - }; - Ok(kind) - } -} - -impl std::fmt::Display for DatasetType { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - use DatasetType::*; - let s = match self { - Crucible => "crucible", - CockroachDb => "cockroachdb", - Clickhouse => "clickhouse", - ClickhouseKeeper => "clickhouse_keeper", - ClickhouseServer => "clickhouse_server", - ExternalDns => "external_dns", - InternalDns => "internal_dns", - }; - write!(f, "{}", s) - } -} - -#[derive( - Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone, JsonSchema, -)] -pub struct DatasetName { - // A unique identifier for the Zpool on which the dataset is stored. - pool_name: ZpoolName, - // A name for the dataset within the Zpool. - kind: DatasetType, -} - -impl DatasetName { - pub fn new(pool_name: ZpoolName, kind: DatasetType) -> Self { - Self { pool_name, kind } - } - - pub fn pool(&self) -> &ZpoolName { - &self.pool_name - } - - pub fn dataset(&self) -> &DatasetType { - &self.kind - } - - /// Returns the full name of the dataset, as would be returned from - /// "zfs get" or "zfs list". - /// - /// If this dataset should be encrypted, this automatically adds the - /// "crypt" dataset component. - pub fn full_name(&self) -> String { - // Currently, we encrypt all datasets except Crucible. - // - // Crucible already performs encryption internally, and we - // avoid double-encryption. - if self.kind.dataset_should_be_encrypted() { - self.full_encrypted_name() - } else { - self.full_unencrypted_name() - } - } - - fn full_encrypted_name(&self) -> String { - format!("{}/crypt/{}", self.pool_name, self.kind) - } - - fn full_unencrypted_name(&self) -> String { - format!("{}/{}", self.pool_name, self.kind) - } -} - #[derive(Debug, thiserror::Error)] pub enum DatasetError { #[error("Cannot open {path} due to {error}")] @@ -431,6 +298,7 @@ pub(crate) async fn ensure_zpool_has_datasets( let encryption_details = None; let size_details = Some(SizeDetails { quota: dataset.quota, + reservation: None, compression: dataset.compression, }); Zfs::ensure_filesystem( @@ -577,7 +445,7 @@ async fn ensure_zpool_dataset_is_encrypted( zpool_name: &ZpoolName, unencrypted_dataset: &str, ) -> Result<(), DatasetEncryptionMigrationError> { - let Ok(kind) = DatasetType::from_str(&unencrypted_dataset) else { + let Ok(kind) = DatasetKind::from_str(&unencrypted_dataset) else { info!(log, "Unrecognized dataset kind"); return Ok(()); }; @@ -818,7 +686,7 @@ mod test { #[test] fn serialize_dataset_name() { let pool = ZpoolName::new_internal(ZpoolUuid::new_v4()); - let kind = DatasetType::Crucible; + let kind = DatasetKind::Crucible; let name = DatasetName::new(pool, kind); serde_json::to_string(&name).unwrap(); } diff --git a/sled-storage/src/error.rs b/sled-storage/src/error.rs index 4c5582fd79..988f7f363a 100644 --- a/sled-storage/src/error.rs +++ b/sled-storage/src/error.rs @@ -4,11 +4,12 @@ //! Storage related errors -use crate::dataset::{DatasetError, DatasetName}; +use crate::dataset::DatasetError; use crate::disk::DiskError; use camino::Utf8PathBuf; use omicron_common::api::external::ByteCountRangeError; use omicron_common::api::external::Generation; +use omicron_common::disk::DatasetName; use uuid::Uuid; #[derive(thiserror::Error, Debug)] @@ -83,6 +84,15 @@ pub enum Error { current: Generation, }, + #[error("Invalid configuration (UUID mismatch in arguments)")] + ConfigUuidMismatch, + + #[error("Dataset configuration out-of-date (asked for {requested}, but latest is {current})")] + DatasetConfigurationOutdated { requested: Generation, current: Generation }, + + #[error("Dataset configuration changed for the same generation number: {generation}")] + DatasetConfigurationChanged { generation: Generation }, + #[error("Failed to update ledger in internal storage")] Ledger(#[from] omicron_common::ledger::Error), diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs index 3cbf00530a..88e1bbaa34 100644 --- a/sled-storage/src/manager.rs +++ b/sled-storage/src/manager.rs @@ -7,7 +7,7 @@ use std::collections::HashSet; use crate::config::MountConfig; -use crate::dataset::{DatasetName, CONFIG_DATASET}; +use crate::dataset::CONFIG_DATASET; use crate::disk::RawDisk; use crate::error::Error; use crate::resources::{AllDisks, StorageResources}; @@ -18,11 +18,14 @@ use illumos_utils::zfs::{Mountpoint, Zfs}; use illumos_utils::zpool::ZpoolName; use key_manager::StorageKeyRequester; use omicron_common::disk::{ - DiskIdentity, DiskVariant, DisksManagementResult, + DatasetConfig, DatasetManagementStatus, DatasetName, DatasetsConfig, + DatasetsManagementResult, DiskIdentity, DiskVariant, DisksManagementResult, OmicronPhysicalDisksConfig, }; use omicron_common::ledger::Ledger; -use slog::{info, o, warn, Logger}; +use omicron_uuid_kinds::DatasetUuid; +use omicron_uuid_kinds::GenericUuid; +use slog::{error, info, o, warn, Logger}; use std::future::Future; use tokio::sync::{mpsc, oneshot, watch}; use tokio::time::{interval, Duration, MissedTickBehavior}; @@ -62,6 +65,9 @@ const SYNCHRONIZE_INTERVAL: Duration = Duration::from_secs(10); // The filename of the ledger storing physical disk info const DISKS_LEDGER_FILENAME: &str = "omicron-physical-disks.json"; +// The filename of the ledger storing dataset info +const DATASETS_LEDGER_FILENAME: &str = "omicron-datasets.json"; + #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum StorageManagerState { // We know that any attempts to manage disks will fail, as the key manager @@ -114,6 +120,16 @@ pub(crate) enum StorageRequest { tx: DebugIgnore>>, }, + DatasetsEnsure { + config: DatasetsConfig, + tx: DebugIgnore< + oneshot::Sender>, + >, + }, + DatasetsList { + tx: DebugIgnore>>, + }, + // Requests to explicitly manage or stop managing a set of devices OmicronPhysicalDisksEnsure { config: OmicronPhysicalDisksConfig, @@ -240,6 +256,31 @@ impl StorageHandle { rx.map(|result| result.unwrap()) } + pub async fn datasets_ensure( + &self, + config: DatasetsConfig, + ) -> Result { + let (tx, rx) = oneshot::channel(); + self.tx + .send(StorageRequest::DatasetsEnsure { config, tx: tx.into() }) + .await + .unwrap(); + + rx.await.unwrap() + } + + /// Reads the last value written to storage by + /// [Self::datasets_ensure]. + pub async fn datasets_config_list(&self) -> Result { + let (tx, rx) = oneshot::channel(); + self.tx + .send(StorageRequest::DatasetsList { tx: tx.into() }) + .await + .unwrap(); + + rx.await.unwrap() + } + pub async fn omicron_physical_disks_ensure( &self, config: OmicronPhysicalDisksConfig, @@ -322,6 +363,10 @@ impl StorageHandle { rx.await.unwrap() } + // TODO(https://github.com/oxidecomputer/omicron/issues/6043): + // + // Deprecate usage of this function, prefer to call "datasets_ensure" + // and ask for the set of all datasets from Nexus. pub async fn upsert_filesystem( &self, dataset_id: Uuid, @@ -428,6 +473,12 @@ impl StorageManager { self.ensure_using_exactly_these_disks(raw_disks).await; let _ = tx.0.send(Ok(())); } + StorageRequest::DatasetsEnsure { config, tx } => { + let _ = tx.0.send(self.datasets_ensure(config).await); + } + StorageRequest::DatasetsList { tx } => { + let _ = tx.0.send(self.datasets_config_list().await); + } StorageRequest::OmicronPhysicalDisksEnsure { config, tx } => { let _ = tx.0.send(self.omicron_physical_disks_ensure(config).await); @@ -485,6 +536,10 @@ impl StorageManager { ); } + // Sled Agents can remember which disks they need to manage by reading + // a configuration file from the M.2s. + // + // This function returns the paths to those configuration files. async fn all_omicron_disk_ledgers(&self) -> Vec { self.resources .disks() @@ -494,6 +549,19 @@ impl StorageManager { .collect() } + // Sled Agents can remember which datasets they need to manage by reading + // a configuration file from the M.2s. + // + // This function returns the paths to those configuration files. + async fn all_omicron_dataset_ledgers(&self) -> Vec { + self.resources + .disks() + .all_m2_mountpoints(CONFIG_DATASET) + .into_iter() + .map(|p| p.join(DATASETS_LEDGER_FILENAME)) + .collect() + } + // Manages a newly detected disk that has been attached to this sled. // // For U.2s: we update our inventory. @@ -545,9 +613,11 @@ impl StorageManager { self.resources.insert_or_update_disk(raw_disk).await } - async fn load_ledger(&self) -> Option> { + async fn load_disks_ledger( + &self, + ) -> Option> { let ledger_paths = self.all_omicron_disk_ledgers().await; - let log = self.log.new(o!("request" => "load_ledger")); + let log = self.log.new(o!("request" => "load_disks_ledger")); let maybe_ledger = Ledger::::new( &log, ledger_paths.clone(), @@ -579,7 +649,7 @@ impl StorageManager { // Now that we're actually able to unpack U.2s, attempt to load the // set of disks which we previously stored in the ledger, if one // existed. - let ledger = self.load_ledger().await; + let ledger = self.load_disks_ledger().await; if let Some(ledger) = ledger { info!(self.log, "Setting StorageResources state to match ledger"); @@ -591,9 +661,160 @@ impl StorageManager { info!(self.log, "KeyManager ready, but no ledger detected"); } + // We don't load any configuration for datasets, since we aren't + // currently storing any dataset information in-memory. + // + // If we ever wanted to do so, however, we could load that information + // here. + Ok(()) } + async fn datasets_ensure( + &mut self, + config: DatasetsConfig, + ) -> Result { + let log = self.log.new(o!("request" => "datasets_ensure")); + + // As a small input-check, confirm that the UUID of the map of inputs + // matches the DatasetConfig. + // + // The dataset configs are sorted by UUID so they always appear in the + // same order, but this check prevents adding an entry of: + // - (UUID: X, Config(UUID: Y)), for X != Y + if !config.datasets.iter().all(|(id, config)| *id == config.id) { + return Err(Error::ConfigUuidMismatch); + } + + // We rely on the schema being stable across reboots -- observe + // "test_datasets_schema" below for that property guarantee. + let ledger_paths = self.all_omicron_dataset_ledgers().await; + let maybe_ledger = + Ledger::::new(&log, ledger_paths.clone()).await; + + let mut ledger = match maybe_ledger { + Some(ledger) => { + info!( + log, + "Comparing 'requested datasets' to ledger on internal storage" + ); + let ledger_data = ledger.data(); + if config.generation < ledger_data.generation { + warn!( + log, + "Request looks out-of-date compared to prior request"; + "requested_generation" => ?config.generation, + "ledger_generation" => ?ledger_data.generation, + ); + return Err(Error::DatasetConfigurationOutdated { + requested: config.generation, + current: ledger_data.generation, + }); + } else if config.generation == ledger_data.generation { + info!( + log, + "Requested generation number matches prior request", + ); + + if ledger_data != &config { + error!( + log, + "Requested configuration changed (with the same generation)"; + "generation" => ?config.generation + ); + return Err(Error::DatasetConfigurationChanged { + generation: config.generation, + }); + } + } else { + info!( + log, + "Request looks newer than prior requests"; + "requested_generation" => ?config.generation, + "ledger_generation" => ?ledger_data.generation, + ); + } + ledger + } + None => { + info!(log, "No previously-stored 'requested datasets', creating new ledger"); + Ledger::::new_with( + &log, + ledger_paths.clone(), + DatasetsConfig::default(), + ) + } + }; + + let result = self.datasets_ensure_internal(&log, &config).await; + + let ledger_data = ledger.data_mut(); + if *ledger_data == config { + return Ok(result); + } + *ledger_data = config; + ledger.commit().await?; + + Ok(result) + } + + // Attempts to ensure that each dataset exist. + // + // Does not return an error, because the [DatasetsManagementResult] type + // includes details about all possible errors that may occur on + // a per-dataset granularity. + async fn datasets_ensure_internal( + &mut self, + log: &Logger, + config: &DatasetsConfig, + ) -> DatasetsManagementResult { + let mut status = vec![]; + for dataset in config.datasets.values() { + status.push(self.dataset_ensure_internal(log, dataset).await); + } + DatasetsManagementResult { status } + } + + async fn dataset_ensure_internal( + &mut self, + log: &Logger, + config: &DatasetConfig, + ) -> DatasetManagementStatus { + let log = log.new(o!("name" => config.name.full_name())); + info!(log, "Ensuring dataset"); + let mut status = DatasetManagementStatus { + dataset_name: config.name.clone(), + err: None, + }; + + if let Err(err) = self.ensure_dataset(config).await { + warn!(log, "Failed to ensure dataset"; "dataset" => ?status.dataset_name, "err" => ?err); + status.err = Some(err.to_string()); + }; + + status + } + + // Lists datasets that this sled is configured to use. + async fn datasets_config_list(&mut self) -> Result { + let log = self.log.new(o!("request" => "datasets_config_list")); + + let ledger_paths = self.all_omicron_dataset_ledgers().await; + let maybe_ledger = + Ledger::::new(&log, ledger_paths.clone()).await; + + match maybe_ledger { + Some(ledger) => { + info!(log, "Found ledger on internal storage"); + return Ok(ledger.data().clone()); + } + None => { + info!(log, "No ledger detected on internal storage"); + return Err(Error::LedgerNotFound); + } + } + } + // Makes an U.2 disk managed by the control plane within [`StorageResources`]. async fn omicron_physical_disks_ensure( &mut self, @@ -765,6 +986,77 @@ impl StorageManager { } } + // Ensures a dataset exists within a zpool, according to `config`. + async fn ensure_dataset( + &mut self, + config: &DatasetConfig, + ) -> Result<(), Error> { + info!(self.log, "ensure_dataset"; "config" => ?config); + + // We can only place datasets within managed disks. + // If a disk is attached to this sled, but not a part of the Control + // Plane, it is treated as "not found" for dataset placement. + if !self + .resources + .disks() + .iter_managed() + .any(|(_, disk)| disk.zpool_name() == config.name.pool()) + { + return Err(Error::ZpoolNotFound(format!( + "{}", + config.name.pool(), + ))); + } + + let zoned = config.name.dataset().zoned(); + let mountpoint_path = if zoned { + Utf8PathBuf::from("/data") + } else { + config.name.pool().dataset_mountpoint( + &Utf8PathBuf::from("/"), + &config.name.dataset().to_string(), + ) + }; + let mountpoint = Mountpoint::Path(mountpoint_path); + + let fs_name = &config.name.full_name(); + let do_format = true; + + // The "crypt" dataset needs these details, but should already exist + // by the time we're creating datasets inside. + let encryption_details = None; + let size_details = Some(illumos_utils::zfs::SizeDetails { + quota: config.quota, + reservation: config.reservation, + compression: config.compression, + }); + Zfs::ensure_filesystem( + fs_name, + mountpoint, + zoned, + do_format, + encryption_details, + size_details, + None, + )?; + // Ensure the dataset has a usable UUID. + if let Ok(id_str) = Zfs::get_oxide_value(&fs_name, "uuid") { + if let Ok(id) = id_str.parse::() { + if id != config.id { + return Err(Error::UuidMismatch { + name: Box::new(config.name.clone()), + old: id.into_untyped_uuid(), + new: config.id.into_untyped_uuid(), + }); + } + return Ok(()); + } + } + Zfs::set_oxide_value(&fs_name, "uuid", &config.id.to_string())?; + + Ok(()) + } + // Attempts to add a dataset within a zpool, according to `request`. async fn add_dataset( &mut self, @@ -824,16 +1116,19 @@ impl StorageManager { /// systems. #[cfg(all(test, target_os = "illumos"))] mod tests { - use crate::dataset::DatasetType; use crate::disk::RawSyntheticDisk; use crate::manager_test_harness::StorageManagerTestHarness; use super::*; use camino_tempfile::tempdir_in; + use omicron_common::api::external::Generation; + use omicron_common::disk::CompressionAlgorithm; + use omicron_common::disk::DatasetKind; use omicron_common::disk::DiskManagementError; use omicron_common::ledger; use omicron_test_utils::dev::test_setup_log; use sled_hardware::DiskFirmware; + use std::collections::BTreeMap; use std::sync::atomic::Ordering; use uuid::Uuid; @@ -1299,7 +1594,7 @@ mod tests { let dataset_id = Uuid::new_v4(); let zpool_name = ZpoolName::new_external(config.disks[0].pool_id); let dataset_name = - DatasetName::new(zpool_name.clone(), DatasetType::Crucible); + DatasetName::new(zpool_name.clone(), DatasetKind::Crucible); harness .handle() .upsert_filesystem(dataset_id, dataset_name) @@ -1309,6 +1604,86 @@ mod tests { harness.cleanup().await; logctx.cleanup_successful(); } + + #[tokio::test] + async fn ensure_datasets() { + illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); + let logctx = test_setup_log("ensure_datasets"); + let mut harness = StorageManagerTestHarness::new(&logctx.log).await; + + // Test setup: Add a U.2 and M.2, adopt them into the "control plane" + // for usage. + harness.handle().key_manager_ready().await; + let raw_disks = + harness.add_vdevs(&["u2_under_test.vdev", "m2_helping.vdev"]).await; + let config = harness.make_config(1, &raw_disks); + let result = harness + .handle() + .omicron_physical_disks_ensure(config.clone()) + .await + .expect("Ensuring disks should work after key manager is ready"); + assert!(!result.has_error(), "{:?}", result); + + // Create a dataset on the newly formatted U.2 + let id = DatasetUuid::new_v4(); + let zpool_name = ZpoolName::new_external(config.disks[0].pool_id); + let name = DatasetName::new(zpool_name.clone(), DatasetKind::Crucible); + let datasets = BTreeMap::from([( + id, + DatasetConfig { + id, + name, + compression: CompressionAlgorithm::Off, + quota: None, + reservation: None, + }, + )]); + // "Generation = 1" is reserved as "no requests seen yet", so we jump + // past it. + let generation = Generation::new().next(); + let mut config = DatasetsConfig { generation, datasets }; + + let status = + harness.handle().datasets_ensure(config.clone()).await.unwrap(); + assert!(!status.has_error()); + + // List datasets, expect to see what we just created + let observed_config = + harness.handle().datasets_config_list().await.unwrap(); + assert_eq!(config, observed_config); + + // Calling "datasets_ensure" with the same input should succeed. + let status = + harness.handle().datasets_ensure(config.clone()).await.unwrap(); + assert!(!status.has_error()); + + let current_config_generation = config.generation; + let next_config_generation = config.generation.next(); + + // Calling "datasets_ensure" with an old generation should fail + config.generation = Generation::new(); + let err = + harness.handle().datasets_ensure(config.clone()).await.unwrap_err(); + assert!(matches!(err, Error::DatasetConfigurationOutdated { .. })); + + // However, calling it with a different input and the same generation + // number should fail. + config.generation = current_config_generation; + config.datasets.values_mut().next().unwrap().reservation = Some(1024); + let err = + harness.handle().datasets_ensure(config.clone()).await.unwrap_err(); + assert!(matches!(err, Error::DatasetConfigurationChanged { .. })); + + // If we bump the generation number while making a change, updated + // configs will work. + config.generation = next_config_generation; + let status = + harness.handle().datasets_ensure(config.clone()).await.unwrap(); + assert!(!status.has_error()); + + harness.cleanup().await; + logctx.cleanup_successful(); + } } #[cfg(test)] @@ -1322,4 +1697,13 @@ mod test { &serde_json::to_string_pretty(&schema).unwrap(), ); } + + #[test] + fn test_datasets_schema() { + let schema = schemars::schema_for!(DatasetsConfig); + expectorate::assert_contents( + "../schema/omicron-datasets.json", + &serde_json::to_string_pretty(&schema).unwrap(), + ); + } }