diff --git a/.config/nextest.toml b/.config/nextest.toml index ba07186c8a..79774e3658 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -18,3 +18,15 @@ fail-fast = false [script.crdb-seed] command = 'cargo run -p crdb-seed' + +# The ClickHouse cluster tests currently rely on a hard-coded set of ports for +# the nodes in the cluster. We would like to relax this in the future, at which +# point this test-group configuration can be removed or at least loosened to +# support testing in parallel. For now, enforce strict serialization for all +# tests with `replicated` in the name. +[test-groups] +clickhouse-cluster = { max-threads = 1 } + +[[profile.default.overrides]] +filter = 'package(oximeter-db) and test(replicated)' +test-group = 'clickhouse-cluster' diff --git a/.github/buildomat/build-and-test.sh b/.github/buildomat/build-and-test.sh index d8de288239..6fda8bb8d7 100755 --- a/.github/buildomat/build-and-test.sh +++ b/.github/buildomat/build-and-test.sh @@ -51,6 +51,7 @@ export RUSTFLAGS="-D warnings" export RUSTDOCFLAGS="-D warnings" export TMPDIR=$TEST_TMPDIR export RUST_BACKTRACE=1 +export CARGO_INCREMENTAL=0 ptime -m cargo test --locked --verbose --no-run # diff --git a/.github/buildomat/jobs/ci-tools.sh b/.github/buildomat/jobs/ci-tools.sh index 702561a951..07a63af30c 100755 --- a/.github/buildomat/jobs/ci-tools.sh +++ b/.github/buildomat/jobs/ci-tools.sh @@ -28,6 +28,7 @@ banner end-to-end-tests # export CARGO_PROFILE_DEV_DEBUG=1 export CARGO_PROFILE_TEST_DEBUG=1 +export CARGO_INCREMENTAL=0 ptime -m cargo build --locked -p end-to-end-tests --tests --bin bootstrap \ --message-format json-render-diagnostics >/tmp/output.end-to-end.json diff --git a/.github/buildomat/jobs/clippy.sh b/.github/buildomat/jobs/clippy.sh index 5fd31adb76..abbcda2150 100755 --- a/.github/buildomat/jobs/clippy.sh +++ b/.github/buildomat/jobs/clippy.sh @@ -28,5 +28,6 @@ banner prerequisites ptime -m bash ./tools/install_builder_prerequisites.sh -y banner clippy +export CARGO_INCREMENTAL=0 ptime -m cargo xtask clippy ptime -m cargo doc diff --git a/.github/buildomat/jobs/package.sh b/.github/buildomat/jobs/package.sh index 86937d1908..0605ab6883 100755 --- a/.github/buildomat/jobs/package.sh +++ b/.github/buildomat/jobs/package.sh @@ -44,6 +44,7 @@ ptime -m ./tools/install_builder_prerequisites.sh -yp ptime -m ./tools/ci_download_softnpu_machinery # Build the test target +export CARGO_INCREMENTAL=0 ptime -m cargo run --locked --release --bin omicron-package -- \ -t test target create -i standard -m non-gimlet -s softnpu -r single-sled ptime -m cargo run --locked --release --bin omicron-package -- \ diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 1b3a892338..f2581845d9 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -19,6 +19,8 @@ jobs: check-omicron-deployment: runs-on: ${{ matrix.os }} + env: + CARGO_INCREMENTAL: 0 strategy: fail-fast: false matrix: @@ -49,6 +51,8 @@ jobs: # of our code. clippy-lint: runs-on: ubuntu-22.04 + env: + CARGO_INCREMENTAL: 0 steps: # This repo is unstable and unnecessary: https://github.com/microsoft/linux-package-repositories/issues/34 - name: Disable packages.microsoft.com repo @@ -75,6 +79,8 @@ jobs: # the separate "rustdocs" repo. build-docs: runs-on: ubuntu-22.04 + env: + CARGO_INCREMENTAL: 0 steps: # This repo is unstable and unnecessary: https://github.com/microsoft/linux-package-repositories/issues/34 - name: Disable packages.microsoft.com repo diff --git a/Cargo.lock b/Cargo.lock index 009ac21d79..58d0653728 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -52,21 +52,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "aes-gcm-siv" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae0784134ba9375416d469ec31e7c5f9fa94405049cf08c5ce5b4698be673e0d" -dependencies = [ - "aead", - "aes", - "cipher", - "ctr", - "polyval", - "subtle", - "zeroize", -] - [[package]] name = "ahash" version = "0.8.3" @@ -87,12 +72,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "allocator-api2" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" - [[package]] name = "android-tzdata" version = "0.1.1" @@ -190,16 +169,6 @@ dependencies = [ "syn 2.0.32", ] -[[package]] -name = "api_identity" -version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#3dcc8d2eb648c87b42454882a2ce024b409cbb8c" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.32", -] - [[package]] name = "approx" version = "0.5.1" @@ -248,12 +217,6 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" -[[package]] -name = "ascii" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d92bec98840b8f03a5ff5413de5293bfcd8bf96467cf5452609f939ec6f5de16" - [[package]] name = "ascii-canvas" version = "3.0.0" @@ -296,17 +259,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "async-recursion" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fd55a5ba1179988837d24ab4c7cc8ed6efdeff578ede0416b4225a5fca35bd0" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.32", -] - [[package]] name = "async-stream" version = "0.3.5" @@ -495,7 +447,7 @@ dependencies = [ [[package]] name = "bhyve_api" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2" +source = "git+https://github.com/oxidecomputer/propolis?rev=54398875a2125227d13827d4236dce943c019b1c#54398875a2125227d13827d4236dce943c019b1c" dependencies = [ "bhyve_api_sys", "libc", @@ -505,21 +457,12 @@ dependencies = [ [[package]] name = "bhyve_api_sys" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2" +source = "git+https://github.com/oxidecomputer/propolis?rev=54398875a2125227d13827d4236dce943c019b1c#54398875a2125227d13827d4236dce943c019b1c" dependencies = [ "libc", "strum", ] -[[package]] -name = "bincode" -version = "1.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" -dependencies = [ - "serde", -] - [[package]] name = "bindgen" version = "0.65.1" @@ -558,12 +501,6 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" -[[package]] -name = "bit_field" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc827186963e592360843fb5ba4b973e145841266c1357f7180c43526f2e5b61" - [[package]] name = "bitfield" version = "0.14.0" @@ -585,26 +522,6 @@ dependencies = [ "serde", ] -[[package]] -name = "bitstruct" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1b10c3912af09af44ea1dafe307edb5ed374b2a32658eb610e372270c9017b4" -dependencies = [ - "bitstruct_derive", -] - -[[package]] -name = "bitstruct_derive" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35fd19022c2b750d14eb9724c204d08ab7544570105b3b466d8a9f2f3feded27" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "bitvec" version = "1.0.1" @@ -678,7 +595,7 @@ dependencies = [ "derive_more", "hex", "hkdf", - "omicron-common 0.1.0", + "omicron-common", "omicron-rpaths", "omicron-test-utils", "omicron-workspace-hack", @@ -704,10 +621,8 @@ dependencies = [ name = "bootstrap-agent-client" version = "0.1.0" dependencies = [ - "async-trait", - "chrono", "ipnetwork", - "omicron-common 0.1.0", + "omicron-common", "omicron-workspace-hack", "progenitor", "regress", @@ -909,7 +824,6 @@ version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" dependencies = [ - "jobserver", "libc", ] @@ -1114,26 +1028,6 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28c122c3980598d243d63d9a704629a2d748d101f278052ff068be5a4423ab6f" -[[package]] -name = "const_format" -version = "0.2.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c990efc7a285731f9a4378d81aff2f0e85a2c8781a05ef0f8baa8dac54d0ff48" -dependencies = [ - "const_format_proc_macros", -] - -[[package]] -name = "const_format_proc_macros" -version = "0.2.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e026b6ce194a874cb9cf32cd5772d1ef9767cc8fcb5765948d74f37a9d8b2bf6" -dependencies = [ - "proc-macro2", - "quote", - "unicode-xid", -] - [[package]] name = "constant_time_eq" version = "0.2.6" @@ -1193,18 +1087,6 @@ dependencies = [ "libc", ] -[[package]] -name = "cpuid_profile_config" -version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2" -dependencies = [ - "propolis", - "serde", - "serde_derive", - "thiserror", - "toml 0.7.8", -] - [[package]] name = "crc" version = "3.0.1" @@ -1385,55 +1267,10 @@ dependencies = [ "winapi", ] -[[package]] -name = "crucible" -version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=da534e73380f3cc53ca0de073e1ea862ae32109b#da534e73380f3cc53ca0de073e1ea862ae32109b" -dependencies = [ - "aes-gcm-siv", - "anyhow", - "async-recursion", - "async-trait", - "base64 0.21.5", - "bytes", - "chrono", - "crucible-client-types", - "crucible-common", - "crucible-protocol", - "crucible-workspace-hack", - "dropshot", - "futures", - "futures-core", - "itertools 0.11.0", - "libc", - "omicron-common 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", - "oximeter 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", - "oximeter-producer 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", - "rand 0.8.5", - "rand_chacha 0.3.1", - "reqwest", - "ringbuffer", - "schemars", - "serde", - "serde_json", - "slog", - "slog-async", - "slog-dtrace", - "slog-term", - "tokio", - "tokio-rustls", - "tokio-util", - "toml 0.8.8", - "tracing", - "usdt", - "uuid", - "version_check", -] - [[package]] name = "crucible-agent-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=da534e73380f3cc53ca0de073e1ea862ae32109b#da534e73380f3cc53ca0de073e1ea862ae32109b" +source = "git+https://github.com/oxidecomputer/crucible?rev=51a3121c8318fc7ac97d74f917ce1d37962e785f#51a3121c8318fc7ac97d74f917ce1d37962e785f" dependencies = [ "anyhow", "chrono", @@ -1446,51 +1283,10 @@ dependencies = [ "serde_json", ] -[[package]] -name = "crucible-client-types" -version = "0.1.0" -source = "git+https://github.com/oxidecomputer/crucible?rev=da534e73380f3cc53ca0de073e1ea862ae32109b#da534e73380f3cc53ca0de073e1ea862ae32109b" -dependencies = [ - "base64 0.21.5", - "crucible-workspace-hack", - "schemars", - "serde", - "serde_json", - "uuid", -] - -[[package]] -name = "crucible-common" -version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=da534e73380f3cc53ca0de073e1ea862ae32109b#da534e73380f3cc53ca0de073e1ea862ae32109b" -dependencies = [ - "anyhow", - "atty", - "crucible-workspace-hack", - "nix 0.26.2 (registry+https://github.com/rust-lang/crates.io-index)", - "rusqlite", - "rustls-pemfile", - "schemars", - "serde", - "serde_json", - "slog", - "slog-async", - "slog-bunyan", - "slog-dtrace", - "slog-term", - "tempfile", - "thiserror", - "tokio-rustls", - "toml 0.8.8", - "twox-hash", - "uuid", - "vergen", -] - [[package]] name = "crucible-pantry-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=da534e73380f3cc53ca0de073e1ea862ae32109b#da534e73380f3cc53ca0de073e1ea862ae32109b" +source = "git+https://github.com/oxidecomputer/crucible?rev=51a3121c8318fc7ac97d74f917ce1d37962e785f#51a3121c8318fc7ac97d74f917ce1d37962e785f" dependencies = [ "anyhow", "chrono", @@ -1504,27 +1300,10 @@ dependencies = [ "uuid", ] -[[package]] -name = "crucible-protocol" -version = "0.0.0" -source = "git+https://github.com/oxidecomputer/crucible?rev=da534e73380f3cc53ca0de073e1ea862ae32109b#da534e73380f3cc53ca0de073e1ea862ae32109b" -dependencies = [ - "anyhow", - "bincode", - "bytes", - "crucible-common", - "crucible-workspace-hack", - "num_enum 0.7.0", - "schemars", - "serde", - "tokio-util", - "uuid", -] - [[package]] name = "crucible-smf" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/crucible?rev=da534e73380f3cc53ca0de073e1ea862ae32109b#da534e73380f3cc53ca0de073e1ea862ae32109b" +source = "git+https://github.com/oxidecomputer/crucible?rev=51a3121c8318fc7ac97d74f917ce1d37962e785f#51a3121c8318fc7ac97d74f917ce1d37962e785f" dependencies = [ "crucible-workspace-hack", "libc", @@ -1737,7 +1516,7 @@ version = "0.1.0" dependencies = [ "anyhow", "either", - "omicron-common 0.1.0", + "omicron-common", "omicron-workspace-hack", "omicron-zone-package", "progenitor", @@ -1972,15 +1751,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f77af9e75578c1ab34f5f04545a8b05be0c36fbd7a9bb3cf2d2a971e435fdbb9" -[[package]] -name = "dladm" -version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2" -dependencies = [ - "libc", - "strum", -] - [[package]] name = "dlpi" version = "0.2.0" @@ -1988,7 +1758,7 @@ source = "git+https://github.com/oxidecomputer/dlpi-sys#1d587ea98cf2d36f1b1624b0 dependencies = [ "libc", "libdlpi-sys", - "num_enum 0.5.11", + "num_enum", "pretty-hex 0.2.1", "thiserror", "tokio", @@ -2002,14 +1772,14 @@ dependencies = [ "camino", "chrono", "clap 4.4.3", - "dns-service-client 0.1.0", + "dns-service-client", "dropshot", "expectorate", "http", "omicron-test-utils", "omicron-workspace-hack", "openapi-lint", - "openapiv3", + "openapiv3 1.0.3", "pretty-hex 0.3.0", "schemars", "serde", @@ -2043,25 +1813,7 @@ dependencies = [ "reqwest", "schemars", "serde", - "serde_json", "slog", - "uuid", -] - -[[package]] -name = "dns-service-client" -version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#3dcc8d2eb648c87b42454882a2ce024b409cbb8c" -dependencies = [ - "chrono", - "http", - "progenitor", - "reqwest", - "schemars", - "serde", - "serde_json", - "slog", - "uuid", ] [[package]] @@ -2115,7 +1867,7 @@ dependencies = [ [[package]] name = "dropshot" version = "0.9.1-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#fa728d07970824fd5f3bd57a3d4dc0fdbea09bfd" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#ff87a0175a6c8ce4462cfe7486edd7000f01be6e" dependencies = [ "async-stream", "async-trait", @@ -2132,7 +1884,7 @@ dependencies = [ "hyper", "indexmap 2.1.0", "multer", - "openapiv3", + "openapiv3 2.0.0-rc.1", "paste", "percent-encoding", "proc-macro2", @@ -2151,7 +1903,7 @@ dependencies = [ "slog-term", "tokio", "tokio-rustls", - "toml 0.7.8", + "toml 0.8.8", "usdt", "uuid", "version_check", @@ -2161,7 +1913,7 @@ dependencies = [ [[package]] name = "dropshot_endpoint" version = "0.9.1-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#fa728d07970824fd5f3bd57a3d4dc0fdbea09bfd" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#ff87a0175a6c8ce4462cfe7486edd7000f01be6e" dependencies = [ "proc-macro2", "quote", @@ -2275,9 +2027,7 @@ dependencies = [ "anyhow", "async-trait", "base64 0.21.5", - "camino", "chrono", - "futures", "http", "omicron-sled-agent", "omicron-test-utils", @@ -2287,11 +2037,9 @@ dependencies = [ "reqwest", "russh", "russh-keys", - "serde_json", "tokio", "toml 0.8.8", "trust-dns-resolver", - "uuid", ] [[package]] @@ -2312,19 +2060,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "env_logger" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" -dependencies = [ - "atty", - "humantime", - "log", - "regex", - "termcolor", -] - [[package]] name = "env_logger" version = "0.10.0" @@ -2342,15 +2077,6 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" -[[package]] -name = "erased-serde" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "837c0466252947ada828b975e12daf82e18bb5444e4df87be6038d4469e2a3d2" -dependencies = [ - "serde", -] - [[package]] name = "errno" version = "0.3.2" @@ -2389,12 +2115,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" -[[package]] -name = "fallible-streaming-iterator" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" - [[package]] name = "fastrand" version = "2.0.0" @@ -2699,9 +2419,7 @@ dependencies = [ "futures", "gateway-client", "gateway-messages", - "hex", - "libc", - "omicron-common 0.1.0", + "omicron-common", "omicron-workspace-hack", "reqwest", "serde", @@ -2763,7 +2481,7 @@ dependencies = [ "hubpack 0.1.2", "hubtools", "lru-cache", - "nix 0.26.2 (git+https://github.com/jgallagher/nix?branch=r0.26-illumos)", + "nix", "once_cell", "paste", "serde", @@ -2856,19 +2574,6 @@ version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0" -[[package]] -name = "git2" -version = "0.17.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b989d6a7ca95a362cf2cfc5ad688b3a467be1f87e480b8dad07fee8c79b0044" -dependencies = [ - "bitflags 1.3.2", - "libc", - "libgit2-sys", - "log", - "url", -] - [[package]] name = "glob" version = "0.3.1" @@ -2953,19 +2658,6 @@ name = "hashbrown" version = "0.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f93e7192158dbcda357bdec5fb5788eebf8bbac027f3f33e719d29135ae84156" -dependencies = [ - "ahash", - "allocator-api2", -] - -[[package]] -name = "hashlink" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "312f66718a2d7789ffef4f4b7b213138ed9f1eb3aa1d0d82fc99f88fb3ffd26f" -dependencies = [ - "hashbrown 0.14.2", -] [[package]] name = "headers" @@ -3349,7 +3041,7 @@ source = "git+https://github.com/oxidecomputer/illumos-devinfo?branch=main#4323b dependencies = [ "anyhow", "libc", - "num_enum 0.5.11", + "num_enum", ] [[package]] @@ -3373,7 +3065,7 @@ dependencies = [ "libc", "macaddr", "mockall", - "omicron-common 0.1.0", + "omicron-common", "omicron-workspace-hack", "opte-ioctl", "oxide-vpc", @@ -3482,17 +3174,15 @@ dependencies = [ "ipcc-key-value", "itertools 0.11.0", "libc", - "omicron-common 0.1.0", + "omicron-common", "omicron-test-utils", "omicron-workspace-hack", - "once_cell", "partial-io", - "progenitor-client", "proptest", "reqwest", - "serde", "sha2", "sled-hardware", + "sled-storage", "slog", "slog-async", "slog-envlogger", @@ -3503,7 +3193,6 @@ dependencies = [ "thiserror", "tokio", "tokio-stream", - "toml 0.8.8", "tufaceous-lib", "update-engine", "uuid", @@ -3532,23 +3221,21 @@ version = "0.1.0" dependencies = [ "anyhow", "async-trait", - "camino", "clap 4.4.3", "dropshot", "expectorate", "hyper", "installinator-common", - "omicron-common 0.1.0", + "omicron-common", "omicron-test-utils", "omicron-workspace-hack", "openapi-lint", - "openapiv3", + "openapiv3 1.0.3", "schemars", "serde", "serde_json", "slog", "subprocess", - "tokio", "uuid", ] @@ -3559,7 +3246,6 @@ dependencies = [ "anyhow", "camino", "illumos-utils", - "omicron-common 0.1.0", "omicron-workspace-hack", "schemars", "serde", @@ -3586,12 +3272,12 @@ dependencies = [ "assert_matches", "chrono", "dns-server", - "dns-service-client 0.1.0", + "dns-service-client", "dropshot", "expectorate", "futures", "hyper", - "omicron-common 0.1.0", + "omicron-common", "omicron-test-utils", "omicron-workspace-hack", "progenitor", @@ -3603,26 +3289,6 @@ dependencies = [ "tempfile", "thiserror", "tokio", - "trust-dns-proto", - "trust-dns-resolver", - "uuid", -] - -[[package]] -name = "internal-dns" -version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#3dcc8d2eb648c87b42454882a2ce024b409cbb8c" -dependencies = [ - "anyhow", - "chrono", - "dns-service-client 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", - "futures", - "hyper", - "omicron-common 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", - "reqwest", - "slog", - "thiserror", - "trust-dns-proto", "trust-dns-resolver", "uuid", ] @@ -3634,8 +3300,8 @@ dependencies = [ "anyhow", "clap 4.4.3", "dropshot", - "internal-dns 0.1.0", - "omicron-common 0.1.0", + "internal-dns", + "omicron-common", "omicron-workspace-hack", "slog", "tokio", @@ -3659,7 +3325,7 @@ version = "0.1.0" dependencies = [ "ciborium", "libc", - "omicron-common 0.1.0", + "omicron-common", "omicron-workspace-hack", "proptest", "serde", @@ -3737,15 +3403,6 @@ version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" -[[package]] -name = "jobserver" -version = "0.1.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2" -dependencies = [ - "libc", -] - [[package]] name = "js-sys" version = "0.3.64" @@ -3770,7 +3427,7 @@ version = "0.1.0" dependencies = [ "async-trait", "hkdf", - "omicron-common 0.1.0", + "omicron-common", "omicron-workspace-hack", "secrecy", "sha3", @@ -3870,18 +3527,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b024e211b1b371da58cd69e4fb8fa4ed16915edcc0e2e1fb04ac4bad61959f25" -[[package]] -name = "libgit2-sys" -version = "0.15.2+1.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a80df2e11fb4a61f4ba2ab42dbe7f74468da143f1a75c74e11dee7c813f694fa" -dependencies = [ - "cc", - "libc", - "libz-sys", - "pkg-config", -] - [[package]] name = "libloading" version = "0.7.4" @@ -3908,7 +3553,7 @@ dependencies = [ "colored", "dlpi", "libc", - "num_enum 0.5.11", + "num_enum", "nvpair", "nvpair-sys", "rusty-doors", @@ -3917,16 +3562,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "libsqlite3-sys" -version = "0.26.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afc22eff61b133b115c6e8c74e818c628d6d5e7a502afea6f64dee076dd94326" -dependencies = [ - "pkg-config", - "vcpkg", -] - [[package]] name = "libsw" version = "3.3.0" @@ -3958,18 +3593,6 @@ dependencies = [ "vcpkg", ] -[[package]] -name = "libz-sys" -version = "1.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d97137b25e321a73eef1418d1d5d2eda4d77e12813f8e6dead84bc52c5870a7b" -dependencies = [ - "cc", - "libc", - "pkg-config", - "vcpkg", -] - [[package]] name = "linear-map" version = "1.2.0" @@ -4030,7 +3653,7 @@ dependencies = [ "const-oid", "crc-any", "der", - "env_logger 0.10.0", + "env_logger", "hex", "log", "lpc55_areas", @@ -4139,7 +3762,7 @@ version = "0.1.0" dependencies = [ "anyhow", "either", - "omicron-common 0.1.0", + "omicron-common", "omicron-workspace-hack", "omicron-zone-package", "progenitor", @@ -4302,8 +3925,8 @@ dependencies = [ "chrono", "futures", "ipnetwork", - "omicron-common 0.1.0", - "omicron-passwords 0.1.0", + "omicron-common", + "omicron-passwords", "omicron-workspace-hack", "progenitor", "regress", @@ -4315,26 +3938,6 @@ dependencies = [ "uuid", ] -[[package]] -name = "nexus-client" -version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#3dcc8d2eb648c87b42454882a2ce024b409cbb8c" -dependencies = [ - "chrono", - "futures", - "ipnetwork", - "omicron-common 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", - "omicron-passwords 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", - "progenitor", - "regress", - "reqwest", - "schemars", - "serde", - "serde_json", - "slog", - "uuid", -] - [[package]] name = "nexus-db-model" version = "0.1.0" @@ -4351,11 +3954,11 @@ dependencies = [ "nexus-defaults", "nexus-types", "omicron-certificates", - "omicron-common 0.1.0", - "omicron-passwords 0.1.0", + "omicron-common", + "omicron-passwords", "omicron-rpaths", "omicron-workspace-hack", - "parse-display 0.8.2", + "parse-display", "pq-sys", "rand 0.8.5", "ref-cast", @@ -4366,7 +3969,6 @@ dependencies = [ "sled-agent-client", "steno", "strum", - "thiserror", "uuid", ] @@ -4392,44 +3994,36 @@ dependencies = [ "futures", "gateway-client", "headers", - "hex", "http", "hyper", "hyper-rustls", - "internal-dns 0.1.0", + "internal-dns", "ipnetwork", "itertools 0.11.0", "lazy_static", "macaddr", "newtype_derive", "nexus-db-model", - "nexus-defaults", "nexus-inventory", "nexus-test-utils", "nexus-types", - "omicron-common 0.1.0", - "omicron-passwords 0.1.0", + "omicron-common", + "omicron-passwords", "omicron-rpaths", "omicron-sled-agent", "omicron-test-utils", "omicron-workspace-hack", - "once_cell", - "openapiv3", + "openapiv3 1.0.3", "openssl", - "openssl-probe", - "openssl-sys", "oso", - "oximeter 0.1.0", + "oximeter", "paste", "pem 1.1.1", "petgraph", "pq-sys", - "rand 0.8.5", "rcgen", "ref-cast", "regex", - "reqwest", - "ring 0.16.20", "rustls", "samael", "serde", @@ -4446,8 +4040,6 @@ dependencies = [ "term", "thiserror", "tokio", - "tokio-postgres", - "toml 0.8.8", "usdt", "uuid", ] @@ -4458,7 +4050,7 @@ version = "0.1.0" dependencies = [ "ipnetwork", "lazy_static", - "omicron-common 0.1.0", + "omicron-common", "omicron-workspace-hack", "rand 0.8.5", "serde_json", @@ -4471,7 +4063,6 @@ dependencies = [ "anyhow", "chrono", "expectorate", - "futures", "gateway-client", "gateway-messages", "gateway-test-utils", @@ -4489,10 +4080,8 @@ name = "nexus-test-interface" version = "0.1.0" dependencies = [ "async-trait", - "dropshot", - "internal-dns 0.1.0", "nexus-types", - "omicron-common 0.1.0", + "omicron-common", "omicron-workspace-hack", "slog", "uuid", @@ -4509,33 +4098,29 @@ dependencies = [ "chrono", "crucible-agent-client", "dns-server", - "dns-service-client 0.1.0", + "dns-service-client", "dropshot", "gateway-messages", "gateway-test-utils", "headers", "http", "hyper", - "internal-dns 0.1.0", + "internal-dns", "nexus-db-queries", "nexus-test-interface", "nexus-types", - "omicron-common 0.1.0", - "omicron-passwords 0.1.0", + "omicron-common", + "omicron-passwords", "omicron-sled-agent", "omicron-test-utils", "omicron-workspace-hack", - "oximeter 0.1.0", - "oximeter-client", + "oximeter", "oximeter-collector", - "oximeter-producer 0.1.0", - "parse-display 0.8.2", + "oximeter-producer", "serde", "serde_json", "serde_urlencoded", "slog", - "tempfile", - "trust-dns-proto", "trust-dns-resolver", "uuid", ] @@ -4545,7 +4130,6 @@ name = "nexus-test-utils-macros" version = "0.1.0" dependencies = [ "omicron-workspace-hack", - "proc-macro2", "quote", "syn 2.0.32", ] @@ -4555,20 +4139,17 @@ name = "nexus-types" version = "0.1.0" dependencies = [ "anyhow", - "api_identity 0.1.0", + "api_identity", "base64 0.21.5", "chrono", - "dns-service-client 0.1.0", + "dns-service-client", "futures", "gateway-client", - "newtype_derive", - "omicron-common 0.1.0", - "omicron-passwords 0.1.0", + "omicron-common", + "omicron-passwords", "omicron-workspace-hack", "openssl", - "openssl-probe", - "openssl-sys", - "parse-display 0.8.2", + "parse-display", "schemars", "serde", "serde_json", @@ -4586,20 +4167,6 @@ dependencies = [ "smallvec 1.11.0", ] -[[package]] -name = "nix" -version = "0.26.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a" -dependencies = [ - "bitflags 1.3.2", - "cfg-if 1.0.0", - "libc", - "memoffset 0.7.1", - "pin-utils", - "static_assertions", -] - [[package]] name = "nix" version = "0.26.2" @@ -4765,16 +4332,7 @@ version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f646caf906c20226733ed5b1374287eb97e3c2a5c227ce668c1f2ce20ae57c9" dependencies = [ - "num_enum_derive 0.5.11", -] - -[[package]] -name = "num_enum" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70bf6736f74634d299d00086f02986875b3c2d924781a6a2cb6c201e73da0ceb" -dependencies = [ - "num_enum_derive 0.7.0", + "num_enum_derive", ] [[package]] @@ -4789,18 +4347,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "num_enum_derive" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56ea360eafe1022f7cc56cd7b869ed57330fb2453d0c7831d99b74c65d2f5597" -dependencies = [ - "proc-macro-crate", - "proc-macro2", - "quote", - "syn 2.0.32", -] - [[package]] name = "num_threads" version = "0.1.6" @@ -4869,7 +4415,7 @@ version = "0.1.0" dependencies = [ "display-error-chain", "foreign-types 0.3.2", - "omicron-common 0.1.0", + "omicron-common", "omicron-test-utils", "omicron-workspace-hack", "openssl", @@ -4883,7 +4429,7 @@ name = "omicron-common" version = "0.1.0" dependencies = [ "anyhow", - "api_identity 0.1.0", + "api_identity", "async-trait", "backoff", "camino", @@ -4894,23 +4440,20 @@ dependencies = [ "futures", "hex", "http", - "hyper", "ipnetwork", "lazy_static", "libc", "macaddr", "omicron-workspace-hack", - "parse-display 0.8.2", + "parse-display", "progenitor", "proptest", "rand 0.8.5", "regress", "reqwest", - "ring 0.16.20", "schemars", "semver 1.0.20", "serde", - "serde_derive", "serde_human_bytes", "serde_json", "serde_urlencoded", @@ -4925,46 +4468,6 @@ dependencies = [ "uuid", ] -[[package]] -name = "omicron-common" -version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#3dcc8d2eb648c87b42454882a2ce024b409cbb8c" -dependencies = [ - "anyhow", - "api_identity 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", - "async-trait", - "backoff", - "camino", - "chrono", - "dropshot", - "futures", - "hex", - "http", - "hyper", - "ipnetwork", - "lazy_static", - "macaddr", - "parse-display 0.7.0", - "progenitor", - "rand 0.8.5", - "reqwest", - "ring 0.16.20", - "schemars", - "semver 1.0.20", - "serde", - "serde_derive", - "serde_human_bytes", - "serde_json", - "serde_with", - "slog", - "strum", - "thiserror", - "tokio", - "tokio-postgres", - "toml 0.7.8", - "uuid", -] - [[package]] name = "omicron-deploy" version = "0.1.0" @@ -4977,7 +4480,6 @@ dependencies = [ "serde", "serde_derive", "thiserror", - "toml 0.8.8", ] [[package]] @@ -4996,10 +4498,9 @@ dependencies = [ "libc", "nexus-test-interface", "nexus-test-utils", - "omicron-common 0.1.0", + "omicron-common", "omicron-nexus", "omicron-rpaths", - "omicron-sled-agent", "omicron-test-utils", "omicron-workspace-hack", "openssl", @@ -5019,9 +4520,7 @@ name = "omicron-gateway" version = "0.1.0" dependencies = [ "anyhow", - "async-trait", "base64 0.21.5", - "ciborium", "clap 4.4.3", "dropshot", "expectorate", @@ -5034,15 +4533,14 @@ dependencies = [ "hyper", "illumos-utils", "ipcc-key-value", - "omicron-common 0.1.0", + "omicron-common", "omicron-test-utils", "omicron-workspace-hack", "once_cell", "openapi-lint", - "openapiv3", + "openapiv3 1.0.3", "schemars", "serde", - "serde_human_bytes", "serde_json", "signal-hook", "signal-hook-tokio", @@ -5054,7 +4552,6 @@ dependencies = [ "tokio", "tokio-stream", "tokio-tungstenite 0.18.0", - "tokio-util", "toml 0.8.8", "uuid", ] @@ -5068,18 +4565,16 @@ dependencies = [ "async-bb8-diesel", "async-trait", "base64 0.21.5", - "bb8", "camino", "cancel-safe-futures", "chrono", "clap 4.4.3", - "cookie", "criterion", "crucible-agent-client", "crucible-pantry-client", "diesel", "dns-server", - "dns-service-client 0.1.0", + "dns-service-client", "dpd-client", "dropshot", "expectorate", @@ -5095,14 +4590,13 @@ dependencies = [ "hubtools", "hyper", "hyper-rustls", - "internal-dns 0.1.0", + "internal-dns", "ipnetwork", "itertools 0.11.0", "lazy_static", "macaddr", "mg-admin-client", "mime_guess", - "newtype_derive", "nexus-db-model", "nexus-db-queries", "nexus-defaults", @@ -5112,26 +4606,23 @@ dependencies = [ "nexus-test-utils-macros", "nexus-types", "num-integer", - "omicron-common 0.1.0", - "omicron-passwords 0.1.0", + "omicron-common", + "omicron-passwords", "omicron-rpaths", "omicron-sled-agent", "omicron-test-utils", "omicron-workspace-hack", "once_cell", "openapi-lint", - "openapiv3", + "openapiv3 1.0.3", "openssl", - "openssl-probe", - "openssl-sys", - "oso", "oxide-client", - "oximeter 0.1.0", + "oximeter", "oximeter-client", "oximeter-db", "oximeter-instruments", - "oximeter-producer 0.1.0", - "parse-display 0.8.2", + "oximeter-producer", + "parse-display", "paste", "pem 1.1.1", "petgraph", @@ -5168,10 +4659,8 @@ dependencies = [ "thiserror", "tokio", "tokio-postgres", - "toml 0.8.8", "tough", "trust-dns-resolver", - "usdt", "uuid", ] @@ -5191,15 +4680,15 @@ dependencies = [ "gateway-messages", "gateway-test-utils", "humantime", - "internal-dns 0.1.0", + "internal-dns", "ipnetwork", - "nexus-client 0.1.0", + "nexus-client", "nexus-db-model", "nexus-db-queries", "nexus-test-utils", "nexus-test-utils-macros", "nexus-types", - "omicron-common 0.1.0", + "omicron-common", "omicron-nexus", "omicron-rpaths", "omicron-test-utils", @@ -5230,7 +4719,6 @@ dependencies = [ "hex", "illumos-utils", "indicatif", - "omicron-common 0.1.0", "omicron-workspace-hack", "omicron-zone-package", "petgraph", @@ -5239,7 +4727,6 @@ dependencies = [ "ring 0.16.20", "semver 1.0.20", "serde", - "serde_derive", "sled-hardware", "slog", "slog-async", @@ -5248,7 +4735,6 @@ dependencies = [ "strum", "swrite", "tar", - "tempfile", "thiserror", "tokio", "toml 0.8.8", @@ -5271,19 +4757,6 @@ dependencies = [ "thiserror", ] -[[package]] -name = "omicron-passwords" -version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#3dcc8d2eb648c87b42454882a2ce024b409cbb8c" -dependencies = [ - "argon2", - "rand 0.8.5", - "schemars", - "serde", - "serde_with", - "thiserror", -] - [[package]] name = "omicron-rpaths" version = "0.1.0" @@ -5299,7 +4772,6 @@ dependencies = [ "assert_matches", "async-trait", "base64 0.21.5", - "bincode", "bootstore", "bootstrap-agent-client", "bytes", @@ -5310,11 +4782,10 @@ dependencies = [ "chrono", "clap 4.4.3", "crucible-agent-client", - "crucible-client-types", "ddm-admin-client", "derive_more", "dns-server", - "dns-service-client 0.1.0", + "dns-service-client", "dpd-client", "dropshot", "expectorate", @@ -5326,28 +4797,27 @@ dependencies = [ "hyper", "hyper-staticfile", "illumos-utils", - "internal-dns 0.1.0", + "internal-dns", "ipnetwork", "itertools 0.11.0", "key-manager", "libc", "macaddr", - "nexus-client 0.1.0", - "omicron-common 0.1.0", + "mg-admin-client", + "nexus-client", + "omicron-common", "omicron-test-utils", "omicron-workspace-hack", "once_cell", "openapi-lint", - "openapiv3", + "openapiv3 1.0.3", "opte-ioctl", - "oximeter 0.1.0", + "oximeter", "oximeter-instruments", - "oximeter-producer 0.1.0", - "percent-encoding", + "oximeter-producer", "pretty_assertions", - "progenitor", "propolis-client", - "propolis-server", + "propolis-mock-server", "rand 0.8.5", "rcgen", "reqwest", @@ -5359,6 +4829,7 @@ dependencies = [ "sha3", "sled-agent-client", "sled-hardware", + "sled-storage", "slog", "slog-async", "slog-dtrace", @@ -5371,7 +4842,6 @@ dependencies = [ "thiserror", "tofino", "tokio", - "tokio-tungstenite 0.18.0", "toml 0.8.8", "usdt", "uuid", @@ -5390,12 +4860,11 @@ dependencies = [ "dropshot", "expectorate", "filetime", - "futures", "headers", "hex", "http", "libc", - "omicron-common 0.1.0", + "omicron-common", "omicron-workspace-hack", "pem 1.1.1", "rcgen", @@ -5422,12 +4891,10 @@ dependencies = [ "bit-vec", "bitflags 1.3.2", "bitflags 2.4.0", - "bitvec", "bstr 0.2.17", "bstr 1.6.0", "byteorder", "bytes", - "cc", "chrono", "cipher", "clap 4.4.3", @@ -5453,7 +4920,6 @@ dependencies = [ "generic-array", "getrandom 0.2.10", "hashbrown 0.13.2", - "hashbrown 0.14.2", "hex", "hyper", "hyper-rustls", @@ -5473,16 +4939,16 @@ dependencies = [ "num-iter", "num-traits", "once_cell", - "openapiv3", + "openapiv3 2.0.0-rc.1", "petgraph", "postgres-types", "ppv-lite86", "predicates 3.0.4", + "proc-macro2", "rand 0.8.5", "rand_chacha 0.3.1", "regex", "regex-automata 0.4.3", - "regex-syntax 0.6.29", "regex-syntax 0.8.2", "reqwest", "ring 0.16.20", @@ -5513,7 +4979,6 @@ dependencies = [ "trust-dns-proto", "unicode-bidi", "unicode-normalization", - "unicode-xid", "usdt", "uuid", "yasna", @@ -5571,7 +5036,7 @@ dependencies = [ "heck 0.4.1", "indexmap 2.1.0", "lazy_static", - "openapiv3", + "openapiv3 1.0.3", "regex", ] @@ -5586,6 +5051,17 @@ dependencies = [ "serde_json", ] +[[package]] +name = "openapiv3" +version = "2.0.0-rc.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25316406f0191559189c56d99731b63130775de7284d98df5e976ce67882ca8a" +dependencies = [ + "indexmap 2.1.0", + "serde", + "serde_json", +] + [[package]] name = "openssl" version = "0.10.57" @@ -5751,35 +5227,19 @@ dependencies = [ name = "oximeter" version = "0.1.0" dependencies = [ - "approx", - "bytes", - "chrono", - "num", - "omicron-common 0.1.0", - "omicron-workspace-hack", - "oximeter-macro-impl 0.1.0", - "rstest", - "schemars", - "serde", - "strum", - "thiserror", - "trybuild", - "uuid", -] - -[[package]] -name = "oximeter" -version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#3dcc8d2eb648c87b42454882a2ce024b409cbb8c" -dependencies = [ + "approx", "bytes", "chrono", - "num-traits", - "omicron-common 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", - "oximeter-macro-impl 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", + "num", + "omicron-common", + "omicron-workspace-hack", + "oximeter-macro-impl", + "rstest", "schemars", "serde", + "strum", "thiserror", + "trybuild", "uuid", ] @@ -5789,7 +5249,7 @@ version = "0.1.0" dependencies = [ "chrono", "futures", - "omicron-common 0.1.0", + "omicron-common", "omicron-workspace-hack", "progenitor", "reqwest", @@ -5803,19 +5263,22 @@ name = "oximeter-collector" version = "0.1.0" dependencies = [ "anyhow", + "camino", + "chrono", "clap 4.4.3", "dropshot", "expectorate", "futures", - "internal-dns 0.1.0", - "nexus-client 0.1.0", + "hyper", + "internal-dns", + "nexus-client", "nexus-types", - "omicron-common 0.1.0", + "omicron-common", "omicron-test-utils", "omicron-workspace-hack", "openapi-lint", - "openapiv3", - "oximeter 0.1.0", + "openapiv3 1.0.3", + "oximeter", "oximeter-client", "oximeter-db", "rand 0.8.5", @@ -5827,6 +5290,7 @@ dependencies = [ "slog-async", "slog-dtrace", "slog-term", + "strum", "subprocess", "thiserror", "tokio", @@ -5842,15 +5306,17 @@ dependencies = [ "async-trait", "bcs", "bytes", + "camino", "chrono", "clap 4.4.3", "dropshot", "expectorate", "highway", "itertools 0.11.0", + "omicron-common", "omicron-test-utils", "omicron-workspace-hack", - "oximeter 0.1.0", + "oximeter", "regex", "reqwest", "schemars", @@ -5861,6 +5327,7 @@ dependencies = [ "slog-dtrace", "slog-term", "strum", + "tempfile", "thiserror", "tokio", "usdt", @@ -5878,7 +5345,7 @@ dependencies = [ "http", "kstat-rs", "omicron-workspace-hack", - "oximeter 0.1.0", + "oximeter", "rand 0.8.5", "slog", "slog-async", @@ -5898,16 +5365,6 @@ dependencies = [ "syn 2.0.32", ] -[[package]] -name = "oximeter-macro-impl" -version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#3dcc8d2eb648c87b42454882a2ce024b409cbb8c" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.32", -] - [[package]] name = "oximeter-producer" version = "0.1.0" @@ -5916,31 +5373,10 @@ dependencies = [ "chrono", "clap 4.4.3", "dropshot", - "nexus-client 0.1.0", - "omicron-common 0.1.0", + "nexus-client", + "omicron-common", "omicron-workspace-hack", - "oximeter 0.1.0", - "reqwest", - "schemars", - "serde", - "slog", - "slog-dtrace", - "thiserror", - "tokio", - "uuid", -] - -[[package]] -name = "oximeter-producer" -version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#3dcc8d2eb648c87b42454882a2ce024b409cbb8c" -dependencies = [ - "chrono", - "dropshot", - "nexus-client 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", - "omicron-common 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", - "oximeter 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", - "reqwest", + "oximeter", "schemars", "serde", "slog", @@ -6031,17 +5467,6 @@ dependencies = [ "windows-targets 0.48.5", ] -[[package]] -name = "parse-display" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac6b32f6c8212838b74c0f5ba412194e88897923020810d9bec72d3594c2588d" -dependencies = [ - "once_cell", - "parse-display-derive 0.7.0", - "regex", -] - [[package]] name = "parse-display" version = "0.8.2" @@ -6049,25 +5474,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c6509d08722b53e8dafe97f2027b22ccbe3a5db83cb352931e9716b0aa44bc5c" dependencies = [ "once_cell", - "parse-display-derive 0.8.2", + "parse-display-derive", "regex", ] -[[package]] -name = "parse-display-derive" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed6ec9ab2477935d04fcdf7c51c9ee94a1be988938886de3239aed40980b7180" -dependencies = [ - "once_cell", - "proc-macro2", - "quote", - "regex", - "regex-syntax 0.6.29", - "structmeta 0.1.6", - "syn 1.0.109", -] - [[package]] name = "parse-display-derive" version = "0.8.2" @@ -6079,7 +5489,7 @@ dependencies = [ "quote", "regex", "regex-syntax 0.7.5", - "structmeta 0.2.0", + "structmeta", "syn 2.0.32", ] @@ -6611,17 +6021,17 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.67" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d433d9f1a3e8c1263d9456598b16fec66f4acc9a74dacffd35c7bb09b3a1328" +checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da" dependencies = [ "unicode-ident", ] [[package]] name = "progenitor" -version = "0.3.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#5c941c0b41b0235031f3ade33a9c119945f1fd51" +version = "0.4.0" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#9339b57628e1e76b1d7131ef93a6c0db2ab0a762" dependencies = [ "progenitor-client", "progenitor-impl", @@ -6631,8 +6041,8 @@ dependencies = [ [[package]] name = "progenitor-client" -version = "0.3.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#5c941c0b41b0235031f3ade33a9c119945f1fd51" +version = "0.4.0" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#9339b57628e1e76b1d7131ef93a6c0db2ab0a762" dependencies = [ "bytes", "futures-core", @@ -6645,14 +6055,14 @@ dependencies = [ [[package]] name = "progenitor-impl" -version = "0.3.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#5c941c0b41b0235031f3ade33a9c119945f1fd51" +version = "0.4.0" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#9339b57628e1e76b1d7131ef93a6c0db2ab0a762" dependencies = [ "getopts", "heck 0.4.1", "http", "indexmap 2.1.0", - "openapiv3", + "openapiv3 2.0.0-rc.1", "proc-macro2", "quote", "regex", @@ -6667,10 +6077,10 @@ dependencies = [ [[package]] name = "progenitor-macro" -version = "0.3.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#5c941c0b41b0235031f3ade33a9c119945f1fd51" +version = "0.4.0" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#9339b57628e1e76b1d7131ef93a6c0db2ab0a762" dependencies = [ - "openapiv3", + "openapiv3 2.0.0-rc.1", "proc-macro2", "progenitor-impl", "quote", @@ -6682,53 +6092,17 @@ dependencies = [ "syn 2.0.32", ] -[[package]] -name = "propolis" -version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2" -dependencies = [ - "anyhow", - "bhyve_api", - "bitflags 2.4.0", - "bitstruct", - "byteorder", - "crucible", - "crucible-client-types", - "dladm", - "erased-serde", - "futures", - "lazy_static", - "libc", - "nexus-client 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", - "oximeter 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", - "propolis_types", - "rfb", - "serde", - "serde_arrays", - "serde_json", - "slog", - "strum", - "thiserror", - "tokio", - "usdt", - "uuid", - "viona_api", -] - [[package]] name = "propolis-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2" +source = "git+https://github.com/oxidecomputer/propolis?rev=54398875a2125227d13827d4236dce943c019b1c#54398875a2125227d13827d4236dce943c019b1c" dependencies = [ "async-trait", "base64 0.21.5", - "crucible-client-types", "futures", "progenitor", - "propolis_types", "rand 0.8.5", "reqwest", - "ring 0.16.20", "schemars", "serde", "serde_json", @@ -6740,73 +6114,39 @@ dependencies = [ ] [[package]] -name = "propolis-server" -version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2" +name = "propolis-mock-server" +version = "0.0.0" +source = "git+https://github.com/oxidecomputer/propolis?rev=54398875a2125227d13827d4236dce943c019b1c#54398875a2125227d13827d4236dce943c019b1c" dependencies = [ "anyhow", - "async-trait", "atty", "base64 0.21.5", - "bit_field", - "bitvec", - "bytes", - "cfg-if 1.0.0", - "chrono", "clap 4.4.3", - "const_format", - "crucible-client-types", "dropshot", - "erased-serde", "futures", - "http", "hyper", - "internal-dns 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", - "lazy_static", - "nexus-client 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", - "omicron-common 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", - "oximeter 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", - "oximeter-producer 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", - "propolis", - "propolis-client", - "propolis-server-config", - "rfb", - "ron 0.7.1", + "progenitor", + "propolis_types", + "rand 0.8.5", + "reqwest", "schemars", "serde", - "serde_derive", "serde_json", "slog", "slog-async", "slog-bunyan", "slog-dtrace", "slog-term", - "strum", "thiserror", "tokio", "tokio-tungstenite 0.20.1", - "tokio-util", - "toml 0.7.8", - "usdt", "uuid", ] -[[package]] -name = "propolis-server-config" -version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2" -dependencies = [ - "cpuid_profile_config", - "serde", - "serde_derive", - "thiserror", - "toml 0.7.8", -] - [[package]] name = "propolis_types" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2" +source = "git+https://github.com/oxidecomputer/propolis?rev=54398875a2125227d13827d4236dce943c019b1c#54398875a2125227d13827d4236dce943c019b1c" dependencies = [ "schemars", "serde", @@ -7199,9 +6539,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.11.20" +version = "0.11.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e9ad3fe7488d7e34558a2033d45a0c90b72d97b4f80705666fea71472e2e6a1" +checksum = "046cd98826c46c2ac8ddecae268eb5c2e58628688a5fc7a2643704a73faba95b" dependencies = [ "base64 0.21.5", "bytes", @@ -7227,6 +6567,7 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", + "system-configuration", "tokio", "tokio-native-tls", "tokio-rustls", @@ -7251,21 +6592,6 @@ dependencies = [ "quick-error", ] -[[package]] -name = "rfb" -version = "0.1.0" -source = "git+https://github.com/oxidecomputer/rfb?rev=0cac8d9c25eb27acfa35df80f3b9d371de98ab3b#0cac8d9c25eb27acfa35df80f3b9d371de98ab3b" -dependencies = [ - "ascii", - "async-trait", - "bitflags 1.3.2", - "env_logger 0.9.3", - "futures", - "log", - "thiserror", - "tokio", -] - [[package]] name = "ring" version = "0.16.20" @@ -7295,23 +6621,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "ringbuffer" -version = "0.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3df6368f71f205ff9c33c076d170dd56ebf68e8161c733c0caa07a7a5509ed53" - -[[package]] -name = "ron" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88073939a61e5b7680558e6be56b419e208420c2adb92be54921fa6b72283f1a" -dependencies = [ - "base64 0.13.1", - "bitflags 1.3.2", - "serde", -] - [[package]] name = "ron" version = "0.8.1" @@ -7398,20 +6707,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "rusqlite" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "549b9d036d571d42e6e85d1c1425e2ac83491075078ca9a15be021c56b1641f2" -dependencies = [ - "bitflags 2.4.0", - "fallible-iterator", - "fallible-streaming-iterator", - "hashlink", - "libsqlite3-sys", - "smallvec 1.11.0", -] - [[package]] name = "russh" version = "0.39.0" @@ -7834,9 +7129,9 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.188" +version = "1.0.192" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" +checksum = "bca2a08484b285dcb282d0f67b26cadc0df8b19f8c12502c13d966bf9482f001" dependencies = [ "serde_derive", ] @@ -7870,20 +7165,11 @@ dependencies = [ "smallvec 0.6.14", ] -[[package]] -name = "serde_arrays" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38636132857f68ec3d5f3eb121166d2af33cb55174c4d5ff645db6165cbef0fd" -dependencies = [ - "serde", -] - [[package]] name = "serde_derive" -version = "1.0.188" +version = "1.0.192" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" +checksum = "d6c7207fbec9faa48073f3e3074cbe553af6ea512d7c21ba46e434e70ea9fbc1" dependencies = [ "proc-macro2", "quote", @@ -8063,9 +7349,9 @@ dependencies = [ [[package]] name = "sha1" -version = "0.10.5" +version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f04293dc80c3993519f2d7f6f511707ee7094fe0c6d3406feb330cdb3540eba3" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" dependencies = [ "cfg-if 1.0.0", "cpufeatures", @@ -8221,12 +7507,13 @@ dependencies = [ "async-trait", "chrono", "ipnetwork", - "omicron-common 0.1.0", + "omicron-common", "omicron-workspace-hack", "progenitor", "regress", "reqwest", "serde", + "sled-storage", "slog", "uuid", ] @@ -8241,12 +7528,10 @@ dependencies = [ "futures", "illumos-devinfo", "illumos-utils", - "key-manager", "libc", "libefi-illumos", "macaddr", - "nexus-client 0.1.0", - "omicron-common 0.1.0", + "omicron-common", "omicron-test-utils", "omicron-workspace-hack", "rand 0.8.5", @@ -8260,6 +7545,32 @@ dependencies = [ "uuid", ] +[[package]] +name = "sled-storage" +version = "0.1.0" +dependencies = [ + "async-trait", + "camino", + "camino-tempfile", + "cfg-if 1.0.0", + "derive_more", + "glob", + "illumos-utils", + "key-manager", + "omicron-common", + "omicron-test-utils", + "omicron-workspace-hack", + "rand 0.8.5", + "schemars", + "serde", + "serde_json", + "sled-hardware", + "slog", + "thiserror", + "tokio", + "uuid", +] + [[package]] name = "slog" version = "2.7.0" @@ -8473,7 +7784,7 @@ dependencies = [ "futures", "gateway-messages", "hex", - "omicron-common 0.1.0", + "omicron-common", "omicron-gateway", "omicron-workspace-hack", "serde", @@ -8616,18 +7927,6 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" -[[package]] -name = "structmeta" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "104842d6278bf64aa9d2f182ba4bde31e8aec7a131d29b7f444bb9b344a09e2a" -dependencies = [ - "proc-macro2", - "quote", - "structmeta-derive 0.1.6", - "syn 1.0.109", -] - [[package]] name = "structmeta" version = "0.2.0" @@ -8636,21 +7935,10 @@ checksum = "78ad9e09554f0456d67a69c1584c9798ba733a5b50349a6c0d0948710523922d" dependencies = [ "proc-macro2", "quote", - "structmeta-derive 0.2.0", + "structmeta-derive", "syn 2.0.32", ] -[[package]] -name = "structmeta-derive" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24420be405b590e2d746d83b01f09af673270cf80e9b003a5fa7b651c58c7d93" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "structmeta-derive" version = "0.2.0" @@ -8787,6 +8075,27 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "system-configuration" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "tabled" version = "0.14.0" @@ -8910,7 +8219,7 @@ checksum = "b8361c808554228ad09bfed70f5c823caf8a3450b6881cc3a38eb57e8c08c1d9" dependencies = [ "proc-macro2", "quote", - "structmeta 0.2.0", + "structmeta", "syn 2.0.32", ] @@ -9093,7 +8402,7 @@ name = "tlvc-text" version = "0.3.0" source = "git+https://github.com/oxidecomputer/tlvc.git#e644a21a7ca973ed31499106ea926bd63ebccc6f" dependencies = [ - "ron 0.8.1", + "ron", "serde", "tlvc 0.3.1 (git+https://github.com/oxidecomputer/tlvc.git)", "zerocopy 0.6.4", @@ -9510,7 +8819,7 @@ dependencies = [ "datatest-stable", "fs-err", "humantime", - "omicron-common 0.1.0", + "omicron-common", "omicron-test-utils", "omicron-workspace-hack", "predicates 3.0.4", @@ -9539,7 +8848,7 @@ dependencies = [ "hex", "hubtools", "itertools 0.11.0", - "omicron-common 0.1.0", + "omicron-common", "omicron-test-utils", "omicron-workspace-hack", "rand 0.8.5", @@ -9604,17 +8913,6 @@ dependencies = [ "utf-8", ] -[[package]] -name = "twox-hash" -version = "1.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" -dependencies = [ - "cfg-if 1.0.0", - "rand 0.8.5", - "static_assertions", -] - [[package]] name = "typenum" version = "1.16.0" @@ -9623,8 +8921,8 @@ checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" [[package]] name = "typify" -version = "0.0.13" -source = "git+https://github.com/oxidecomputer/typify#de16c4238a2b34400d0fece086a6469951c3236b" +version = "0.0.14" +source = "git+https://github.com/oxidecomputer/typify#c9d6453fc3cf69726d539925b838b267f886cb53" dependencies = [ "typify-impl", "typify-macro", @@ -9632,8 +8930,8 @@ dependencies = [ [[package]] name = "typify-impl" -version = "0.0.13" -source = "git+https://github.com/oxidecomputer/typify#de16c4238a2b34400d0fece086a6469951c3236b" +version = "0.0.14" +source = "git+https://github.com/oxidecomputer/typify#c9d6453fc3cf69726d539925b838b267f886cb53" dependencies = [ "heck 0.4.1", "log", @@ -9649,8 +8947,8 @@ dependencies = [ [[package]] name = "typify-macro" -version = "0.0.13" -source = "git+https://github.com/oxidecomputer/typify#de16c4238a2b34400d0fece086a6469951c3236b" +version = "0.0.14" +source = "git+https://github.com/oxidecomputer/typify#c9d6453fc3cf69726d539925b838b267f886cb53" dependencies = [ "proc-macro2", "quote", @@ -9898,42 +9196,12 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" -[[package]] -name = "vergen" -version = "8.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbc5ad0d9d26b2c49a5ab7da76c3e79d3ee37e7821799f8223fcb8f2f391a2e7" -dependencies = [ - "anyhow", - "git2", - "rustc_version 0.4.0", - "rustversion", - "time", -] - [[package]] name = "version_check" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" -[[package]] -name = "viona_api" -version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2" -dependencies = [ - "libc", - "viona_api_sys", -] - -[[package]] -name = "viona_api_sys" -version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=4019eb10fc2f4ba9bf210d0461dc6292b68309c2#4019eb10fc2f4ba9bf210d0461dc6292b68309c2" -dependencies = [ - "libc", -] - [[package]] name = "vsss-rs" version = "3.3.1" @@ -10148,15 +9416,13 @@ dependencies = [ "ciborium", "clap 4.4.3", "crossterm", - "debug-ignore", "futures", - "hex", "humantime", "indexmap 2.1.0", "indicatif", "itertools 0.11.0", - "omicron-common 0.1.0", - "omicron-passwords 0.1.0", + "omicron-common", + "omicron-passwords", "omicron-workspace-hack", "once_cell", "owo-colors", @@ -10164,7 +9430,6 @@ dependencies = [ "ratatui", "reqwest", "rpassword", - "semver 1.0.20", "serde", "serde_json", "shell-words", @@ -10182,7 +9447,6 @@ dependencies = [ "tui-tree-widget", "unicode-width", "update-engine", - "uuid", "wicket-common", "wicketd-client", "zeroize", @@ -10194,7 +9458,7 @@ version = "0.1.0" dependencies = [ "anyhow", "gateway-client", - "omicron-common 0.1.0", + "omicron-common", "omicron-workspace-hack", "schemars", "serde", @@ -10214,7 +9478,6 @@ dependencies = [ "clap 4.4.3", "crossterm", "omicron-workspace-hack", - "ratatui", "reedline", "serde", "slog", @@ -10260,17 +9523,17 @@ dependencies = [ "installinator-artifact-client", "installinator-artifactd", "installinator-common", - "internal-dns 0.1.0", + "internal-dns", "ipnetwork", "itertools 0.11.0", "maplit", "omicron-certificates", - "omicron-common 0.1.0", - "omicron-passwords 0.1.0", + "omicron-common", + "omicron-passwords", "omicron-test-utils", "omicron-workspace-hack", "openapi-lint", - "openapiv3", + "openapiv3 1.0.3", "rand 0.8.5", "reqwest", "schemars", diff --git a/Cargo.toml b/Cargo.toml index 0e13946533..82bca496a5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -57,6 +57,7 @@ members = [ "rpaths", "sled-agent", "sled-hardware", + "sled-storage", "sp-sim", "test-utils", "tufaceous-lib", @@ -122,6 +123,7 @@ default-members = [ "rpaths", "sled-agent", "sled-hardware", + "sled-storage", "sp-sim", "test-utils", "tufaceous-lib", @@ -167,10 +169,9 @@ cookie = "0.16" criterion = { version = "0.5.1", features = [ "async_tokio" ] } crossbeam = "0.8" crossterm = { version = "0.27.0", features = ["event-stream"] } -crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "da534e73380f3cc53ca0de073e1ea862ae32109b" } -crucible-client-types = { git = "https://github.com/oxidecomputer/crucible", rev = "da534e73380f3cc53ca0de073e1ea862ae32109b" } -crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "da534e73380f3cc53ca0de073e1ea862ae32109b" } -crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "da534e73380f3cc53ca0de073e1ea862ae32109b" } +crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "51a3121c8318fc7ac97d74f917ce1d37962e785f" } +crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "51a3121c8318fc7ac97d74f917ce1d37962e785f" } +crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "51a3121c8318fc7ac97d74f917ce1d37962e785f" } curve25519-dalek = "4" datatest-stable = "0.2.3" display-error-chain = "0.2.0" @@ -289,9 +290,9 @@ pretty-hex = "0.3.0" proc-macro2 = "1.0" progenitor = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } progenitor-client = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } -bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "4019eb10fc2f4ba9bf210d0461dc6292b68309c2" } -propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "4019eb10fc2f4ba9bf210d0461dc6292b68309c2", features = [ "generated-migration" ] } -propolis-server = { git = "https://github.com/oxidecomputer/propolis", rev = "4019eb10fc2f4ba9bf210d0461dc6292b68309c2", default-features = false, features = ["mock-only"] } +bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "54398875a2125227d13827d4236dce943c019b1c" } +propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "54398875a2125227d13827d4236dce943c019b1c" } +propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "54398875a2125227d13827d4236dce943c019b1c" } proptest = "1.3.1" quote = "1.0" rand = "0.8.5" @@ -329,6 +330,7 @@ similar-asserts = "1.5.0" sled = "0.34" sled-agent-client = { path = "clients/sled-agent-client" } sled-hardware = { path = "sled-hardware" } +sled-storage = { path = "sled-storage" } slog = { version = "2.7", features = [ "dynamic-keys", "max_level_trace", "release_max_level_debug" ] } slog-async = "2.8" slog-dtrace = "0.2" @@ -544,9 +546,9 @@ opt-level = 3 #steno = { path = "../steno" } #[patch."https://github.com/oxidecomputer/propolis"] #propolis-client = { path = "../propolis/lib/propolis-client" } +#propolis-mock-server = { path = "../propolis/bin/mock-server" } #[patch."https://github.com/oxidecomputer/crucible"] #crucible-agent-client = { path = "../crucible/agent-client" } -#crucible-client-types = { path = "../crucible/crucible-client-types" } #crucible-pantry-client = { path = "../crucible/pantry-client" } #crucible-smf = { path = "../crucible/smf" } #[patch.crates-io] diff --git a/clients/bootstrap-agent-client/Cargo.toml b/clients/bootstrap-agent-client/Cargo.toml index 42ae59b7aa..3474c5814a 100644 --- a/clients/bootstrap-agent-client/Cargo.toml +++ b/clients/bootstrap-agent-client/Cargo.toml @@ -5,8 +5,6 @@ edition = "2021" license = "MPL-2.0" [dependencies] -async-trait.workspace = true -chrono.workspace = true omicron-common.workspace = true progenitor.workspace = true ipnetwork.workspace = true diff --git a/clients/dns-service-client/Cargo.toml b/clients/dns-service-client/Cargo.toml index 681c06672f..6132222b8a 100644 --- a/clients/dns-service-client/Cargo.toml +++ b/clients/dns-service-client/Cargo.toml @@ -11,7 +11,5 @@ progenitor.workspace = true reqwest = { workspace = true, features = ["json", "rustls-tls", "stream"] } schemars.workspace = true serde.workspace = true -serde_json.workspace = true slog.workspace = true -uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/clients/sled-agent-client/Cargo.toml b/clients/sled-agent-client/Cargo.toml index b2ed07caba..e2cc737e70 100644 --- a/clients/sled-agent-client/Cargo.toml +++ b/clients/sled-agent-client/Cargo.toml @@ -14,5 +14,6 @@ regress.workspace = true reqwest = { workspace = true, features = [ "json", "rustls-tls", "stream" ] } serde.workspace = true slog.workspace = true +sled-storage.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index 0df21d894e..30b554a021 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -6,6 +6,7 @@ use async_trait::async_trait; use std::convert::TryFrom; +use std::str::FromStr; use uuid::Uuid; progenitor::generate_api!( @@ -528,3 +529,27 @@ impl TestInterfaces for Client { .expect("disk_finish_transition() failed unexpectedly"); } } + +impl From for types::DatasetKind { + fn from(k: sled_storage::dataset::DatasetKind) -> Self { + use sled_storage::dataset::DatasetKind::*; + match k { + CockroachDb => Self::CockroachDb, + Crucible => Self::Crucible, + Clickhouse => Self::Clickhouse, + ClickhouseKeeper => Self::ClickhouseKeeper, + ExternalDns => Self::ExternalDns, + InternalDns => Self::InternalDns, + } + } +} + +impl From for types::DatasetName { + fn from(n: sled_storage::dataset::DatasetName) -> Self { + Self { + pool_name: types::ZpoolName::from_str(&n.pool().to_string()) + .unwrap(), + kind: n.dataset().clone().into(), + } + } +} diff --git a/common/Cargo.toml b/common/Cargo.toml index 75c1efab55..49997e619c 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -15,18 +15,15 @@ dropshot.workspace = true futures.workspace = true hex.workspace = true http.workspace = true -hyper.workspace = true ipnetwork.workspace = true macaddr.workspace = true lazy_static.workspace = true proptest = { workspace = true, optional = true } rand.workspace = true reqwest = { workspace = true, features = ["rustls-tls", "stream"] } -ring.workspace = true schemars = { workspace = true, features = ["chrono", "uuid1"] } semver.workspace = true serde.workspace = true -serde_derive.workspace = true serde_human_bytes.workspace = true serde_json.workspace = true serde_with.workspace = true diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index 784da8fcc6..155fbf971b 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -103,6 +103,17 @@ pub struct BgpPeerConfig { pub port: String, /// Address of the peer. pub addr: Ipv4Addr, + /// How long to keep a session alive without a keepalive in seconds. + /// Defaults to 6. + pub hold_time: Option, + /// How long to keep a peer in idle after a state machine reset in seconds. + pub idle_hold_time: Option, + /// How long to delay sending open messages to a peer. In seconds. + pub delay_open: Option, + /// The interval in seconds between peer connection retry attempts. + pub connect_retry: Option, + /// The interval to send keepalive messages at. + pub keepalive: Option, } #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] diff --git a/common/src/disk.rs b/common/src/disk.rs index 3ea8091326..3ae9c31e01 100644 --- a/common/src/disk.rs +++ b/common/src/disk.rs @@ -5,7 +5,7 @@ //! Disk related types shared among crates /// Uniquely identifies a disk. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)] pub struct DiskIdentity { pub vendor: String, pub serial: String, diff --git a/dev-tools/omicron-dev/Cargo.toml b/dev-tools/omicron-dev/Cargo.toml index ec7cafb559..ce9a6ac32d 100644 --- a/dev-tools/omicron-dev/Cargo.toml +++ b/dev-tools/omicron-dev/Cargo.toml @@ -21,7 +21,6 @@ nexus-test-interface.workspace = true omicron-common.workspace = true omicron-nexus.workspace = true omicron-test-utils.workspace = true -omicron-sled-agent.workspace = true # See omicron-rpaths for more about the "pq-sys" dependency. pq-sys = "*" rcgen.workspace = true diff --git a/dev-tools/thing-flinger/Cargo.toml b/dev-tools/thing-flinger/Cargo.toml index 1a6c05a546..2acbaf5659 100644 --- a/dev-tools/thing-flinger/Cargo.toml +++ b/dev-tools/thing-flinger/Cargo.toml @@ -13,7 +13,6 @@ omicron-package.workspace = true serde.workspace = true serde_derive.workspace = true thiserror.workspace = true -toml.workspace = true omicron-workspace-hack.workspace = true [[bin]] diff --git a/end-to-end-tests/Cargo.toml b/end-to-end-tests/Cargo.toml index 87f63ea1df..e78a8792d3 100644 --- a/end-to-end-tests/Cargo.toml +++ b/end-to-end-tests/Cargo.toml @@ -8,9 +8,7 @@ license = "MPL-2.0" anyhow = { workspace = true, features = ["backtrace"] } async-trait.workspace = true base64.workspace = true -camino.workspace = true chrono.workspace = true -futures.workspace = true http.workspace = true omicron-sled-agent.workspace = true omicron-test-utils.workspace = true @@ -19,9 +17,7 @@ rand.workspace = true reqwest.workspace = true russh = "0.39.0" russh-keys = "0.38.0" -serde_json.workspace = true tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } toml.workspace = true trust-dns-resolver.workspace = true -uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/gateway-cli/Cargo.toml b/gateway-cli/Cargo.toml index ba66fa4c4f..2412bf950f 100644 --- a/gateway-cli/Cargo.toml +++ b/gateway-cli/Cargo.toml @@ -8,9 +8,7 @@ license = "MPL-2.0" anyhow.workspace = true clap.workspace = true futures.workspace = true -hex.workspace = true omicron-common.workspace = true -libc.workspace = true reqwest.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/gateway/Cargo.toml b/gateway/Cargo.toml index 9cf41f6c2e..75c31e9977 100644 --- a/gateway/Cargo.toml +++ b/gateway/Cargo.toml @@ -6,9 +6,7 @@ license = "MPL-2.0" [dependencies] anyhow.workspace = true -async-trait.workspace = true base64.workspace = true -ciborium.workspace = true clap.workspace = true dropshot.workspace = true futures.workspace = true @@ -23,7 +21,6 @@ omicron-common.workspace = true once_cell.workspace = true schemars.workspace = true serde.workspace = true -serde_human_bytes.workspace = true signal-hook.workspace = true signal-hook-tokio.workspace = true slog.workspace = true @@ -32,7 +29,6 @@ thiserror.workspace = true tokio = { workspace = true, features = ["full"] } tokio-stream.workspace = true tokio-tungstenite.workspace = true -tokio-util.workspace = true toml.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/illumos-utils/Cargo.toml b/illumos-utils/Cargo.toml index a291a15e78..497454e047 100644 --- a/illumos-utils/Cargo.toml +++ b/illumos-utils/Cargo.toml @@ -44,3 +44,6 @@ toml.workspace = true [features] # Enable to generate MockZones testing = ["mockall"] +# Useful for tests that want real functionality and ability to run without +# pfexec +tmp_keypath = [] diff --git a/illumos-utils/src/lib.rs b/illumos-utils/src/lib.rs index 345f097ae2..1faa4c5c37 100644 --- a/illumos-utils/src/lib.rs +++ b/illumos-utils/src/lib.rs @@ -4,6 +4,9 @@ //! Wrappers around illumos-specific commands. +#[allow(unused)] +use std::sync::atomic::{AtomicBool, Ordering}; + use cfg_if::cfg_if; pub mod addrobj; @@ -93,7 +96,7 @@ mod inner { // Helper function for starting the process and checking the // exit code result. - pub fn execute( + pub fn execute_helper( command: &mut std::process::Command, ) -> Result { let output = command.output().map_err(|err| { @@ -108,6 +111,34 @@ mod inner { } } +// Due to feature unification, the `testing` feature is enabled when some tests +// don't actually want to use it. We allow them to opt out of the use of the +// free function here. We also explicitly opt-in where mocks are used. +// +// Note that this only works if the tests that use mocks and those that don't +// are run sequentially. However, this is how we do things in CI with nextest, +// so there is no problem currently. +// +// We can remove all this when we get rid of the mocks. +#[cfg(any(test, feature = "testing"))] +pub static USE_MOCKS: AtomicBool = AtomicBool::new(false); + +pub fn execute( + command: &mut std::process::Command, +) -> Result { + cfg_if! { + if #[cfg(any(test, feature = "testing"))] { + if USE_MOCKS.load(Ordering::SeqCst) { + mock_inner::execute_helper(command) + } else { + inner::execute_helper(command) + } + } else { + inner::execute_helper(command) + } + } +} + cfg_if! { if #[cfg(any(test, feature = "testing"))] { pub use mock_inner::*; diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs index a6af997619..e9554100af 100644 --- a/illumos-utils/src/zfs.rs +++ b/illumos-utils/src/zfs.rs @@ -20,7 +20,16 @@ pub const ZONE_ZFS_RAMDISK_DATASET_MOUNTPOINT: &str = "/zone"; pub const ZONE_ZFS_RAMDISK_DATASET: &str = "rpool/zone"; pub const ZFS: &str = "/usr/sbin/zfs"; + +/// This path is intentionally on a `tmpfs` to prevent copy-on-write behavior +/// and to ensure it goes away on power off. +/// +/// We want minimize the time the key files are in memory, and so we rederive +/// the keys and recreate the files on demand when creating and mounting +/// encrypted filesystems. We then zero them and unlink them. pub const KEYPATH_ROOT: &str = "/var/run/oxide/"; +// Use /tmp so we don't have to worry about running tests with pfexec +pub const TEST_KEYPATH_ROOT: &str = "/tmp"; /// Error returned by [`Zfs::list_datasets`]. #[derive(thiserror::Error, Debug)] @@ -158,19 +167,27 @@ impl fmt::Display for Keypath { } } +#[cfg(not(feature = "tmp_keypath"))] +impl From<&DiskIdentity> for Keypath { + fn from(id: &DiskIdentity) -> Self { + build_keypath(id, KEYPATH_ROOT) + } +} + +#[cfg(feature = "tmp_keypath")] impl From<&DiskIdentity> for Keypath { fn from(id: &DiskIdentity) -> Self { - let filename = format!( - "{}-{}-{}-zfs-aes-256-gcm.key", - id.vendor, id.serial, id.model - ); - let mut path = Utf8PathBuf::new(); - path.push(KEYPATH_ROOT); - path.push(filename); - Keypath(path) + build_keypath(id, TEST_KEYPATH_ROOT) } } +fn build_keypath(id: &DiskIdentity, root: &str) -> Keypath { + let filename = + format!("{}-{}-{}-zfs-aes-256-gcm.key", id.vendor, id.serial, id.model); + let path: Utf8PathBuf = [root, &filename].iter().collect(); + Keypath(path) +} + #[derive(Debug)] pub struct EncryptionDetails { pub keypath: Keypath, diff --git a/illumos-utils/src/zpool.rs b/illumos-utils/src/zpool.rs index 81ded2655e..f2c395e22b 100644 --- a/illumos-utils/src/zpool.rs +++ b/illumos-utils/src/zpool.rs @@ -39,6 +39,13 @@ pub struct CreateError { err: Error, } +#[derive(thiserror::Error, Debug)] +#[error("Failed to destroy zpool: {err}")] +pub struct DestroyError { + #[from] + err: Error, +} + #[derive(thiserror::Error, Debug)] #[error("Failed to list zpools: {err}")] pub struct ListError { @@ -89,7 +96,7 @@ impl FromStr for ZpoolHealth { } /// Describes a Zpool. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)] pub struct ZpoolInfo { name: String, size: u64, @@ -121,6 +128,17 @@ impl ZpoolInfo { pub fn health(&self) -> ZpoolHealth { self.health } + + #[cfg(any(test, feature = "testing"))] + pub fn new_hardcoded(name: String) -> ZpoolInfo { + ZpoolInfo { + name, + size: 1024 * 1024 * 64, + allocated: 1024, + free: 1024 * 1023 * 64, + health: ZpoolHealth::Online, + } + } } impl FromStr for ZpoolInfo { @@ -167,7 +185,10 @@ pub struct Zpool {} #[cfg_attr(any(test, feature = "testing"), mockall::automock, allow(dead_code))] impl Zpool { - pub fn create(name: ZpoolName, vdev: &Utf8Path) -> Result<(), CreateError> { + pub fn create( + name: &ZpoolName, + vdev: &Utf8Path, + ) -> Result<(), CreateError> { let mut cmd = std::process::Command::new(PFEXEC); cmd.env_clear(); cmd.env("LC_ALL", "C.UTF-8"); @@ -189,7 +210,17 @@ impl Zpool { Ok(()) } - pub fn import(name: ZpoolName) -> Result<(), Error> { + pub fn destroy(name: &ZpoolName) -> Result<(), DestroyError> { + let mut cmd = std::process::Command::new(PFEXEC); + cmd.env_clear(); + cmd.env("LC_ALL", "C.UTF-8"); + cmd.arg(ZPOOL).arg("destroy"); + cmd.arg(&name.to_string()); + execute(&mut cmd).map_err(Error::from)?; + Ok(()) + } + + pub fn import(name: &ZpoolName) -> Result<(), Error> { let mut cmd = std::process::Command::new(PFEXEC); cmd.env_clear(); cmd.env("LC_ALL", "C.UTF-8"); diff --git a/installinator-artifactd/Cargo.toml b/installinator-artifactd/Cargo.toml index b14ca4002f..e9ddc222cd 100644 --- a/installinator-artifactd/Cargo.toml +++ b/installinator-artifactd/Cargo.toml @@ -7,7 +7,6 @@ license = "MPL-2.0" [dependencies] anyhow.workspace = true async-trait.workspace = true -camino.workspace = true clap.workspace = true dropshot.workspace = true hyper.workspace = true @@ -15,7 +14,6 @@ schemars.workspace = true serde.workspace = true serde_json.workspace = true slog.workspace = true -tokio.workspace = true uuid.workspace = true installinator-common.workspace = true diff --git a/installinator-common/Cargo.toml b/installinator-common/Cargo.toml index 8fea234e20..4381de74eb 100644 --- a/installinator-common/Cargo.toml +++ b/installinator-common/Cargo.toml @@ -8,7 +8,6 @@ license = "MPL-2.0" anyhow.workspace = true camino.workspace = true illumos-utils.workspace = true -omicron-common.workspace = true schemars.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/installinator/Cargo.toml b/installinator/Cargo.toml index a4f170ddba..d489e73ec1 100644 --- a/installinator/Cargo.toml +++ b/installinator/Cargo.toml @@ -23,13 +23,11 @@ installinator-common.workspace = true ipcc-key-value.workspace = true itertools.workspace = true libc.workspace = true -once_cell.workspace = true omicron-common.workspace = true -progenitor-client.workspace = true reqwest.workspace = true -serde.workspace = true sha2.workspace = true sled-hardware.workspace = true +sled-storage.workspace = true slog.workspace = true slog-async.workspace = true slog-envlogger.workspace = true @@ -38,7 +36,6 @@ smf.workspace = true tempfile.workspace = true thiserror.workspace = true tokio = { workspace = true, features = ["full"] } -toml.workspace = true tufaceous-lib.workspace = true update-engine.workspace = true uuid.workspace = true diff --git a/installinator/src/hardware.rs b/installinator/src/hardware.rs index ffa0b74739..b037384cbe 100644 --- a/installinator/src/hardware.rs +++ b/installinator/src/hardware.rs @@ -6,10 +6,11 @@ use anyhow::anyhow; use anyhow::ensure; use anyhow::Context; use anyhow::Result; -use sled_hardware::Disk; use sled_hardware::DiskVariant; use sled_hardware::HardwareManager; use sled_hardware::SledMode; +use sled_storage::disk::Disk; +use sled_storage::disk::RawDisk; use slog::info; use slog::Logger; @@ -28,7 +29,8 @@ impl Hardware { anyhow!("failed to create HardwareManager: {err}") })?; - let disks = hardware.disks(); + let disks: Vec = + hardware.disks().into_iter().map(|disk| disk.into()).collect(); info!( log, "found gimlet hardware"; diff --git a/installinator/src/write.rs b/installinator/src/write.rs index 6c0c1f63c7..22dd2adbf6 100644 --- a/installinator/src/write.rs +++ b/installinator/src/write.rs @@ -122,8 +122,9 @@ impl WriteDestination { ); let zpool_name = disk.zpool_name().clone(); - let control_plane_dir = zpool_name - .dataset_mountpoint(sled_hardware::INSTALL_DATASET); + let control_plane_dir = zpool_name.dataset_mountpoint( + sled_storage::dataset::INSTALL_DATASET, + ); match drives.entry(slot) { Entry::Vacant(entry) => { diff --git a/internal-dns/Cargo.toml b/internal-dns/Cargo.toml index ecb2d48bda..96993ce6a2 100644 --- a/internal-dns/Cargo.toml +++ b/internal-dns/Cargo.toml @@ -14,7 +14,6 @@ omicron-common.workspace = true reqwest = { workspace = true, features = ["rustls-tls", "stream"] } slog.workspace = true thiserror.workspace = true -trust-dns-proto.workspace = true trust-dns-resolver.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/internal-dns/tests/output/test-server.json b/internal-dns/tests/output/test-server.json index 5f4d6d155e..5720dec19f 100644 --- a/internal-dns/tests/output/test-server.json +++ b/internal-dns/tests/output/test-server.json @@ -33,18 +33,6 @@ } }, "components": { - "responses": { - "Error": { - "description": "Error", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Error" - } - } - } - } - }, "schemas": { "Error": { "description": "Error information from a response.", @@ -65,6 +53,18 @@ "request_id" ] } + }, + "responses": { + "Error": { + "description": "Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + } + } + } + } } } } \ No newline at end of file diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index 3add4ad559..4fc13a31d8 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -12,12 +12,10 @@ anyhow.workspace = true assert_matches.workspace = true async-trait.workspace = true base64.workspace = true -bb8.workspace = true cancel-safe-futures.workspace = true camino.workspace = true clap.workspace = true chrono.workspace = true -cookie.workspace = true crucible-agent-client.workspace = true crucible-pantry-client.workspace = true dns-service-client.workspace = true @@ -36,16 +34,12 @@ ipnetwork.workspace = true lazy_static.workspace = true macaddr.workspace = true mime_guess.workspace = true -newtype_derive.workspace = true # Not under "dev-dependencies"; these also need to be implemented for # integration tests. nexus-test-interface.workspace = true num-integer.workspace = true once_cell.workspace = true openssl.workspace = true -openssl-sys.workspace = true -openssl-probe.workspace = true -oso.workspace = true oximeter-client.workspace = true oximeter-db.workspace = true parse-display.workspace = true @@ -75,11 +69,9 @@ tempfile.workspace = true thiserror.workspace = true tokio = { workspace = true, features = ["full"] } tokio-postgres = { workspace = true, features = ["with-serde_json-1"] } -toml.workspace = true tough.workspace = true trust-dns-resolver.workspace = true uuid.workspace = true -usdt.workspace = true nexus-defaults.workspace = true nexus-db-model.workspace = true diff --git a/nexus/db-model/Cargo.toml b/nexus/db-model/Cargo.toml index a5cb9a06be..b7514c4806 100644 --- a/nexus/db-model/Cargo.toml +++ b/nexus/db-model/Cargo.toml @@ -20,7 +20,6 @@ parse-display.workspace = true pq-sys = "*" rand.workspace = true ref-cast.workspace = true -thiserror.workspace = true schemars = { workspace = true, features = ["chrono", "uuid1"] } semver.workspace = true serde.workspace = true diff --git a/nexus/db-model/src/oximeter_info.rs b/nexus/db-model/src/oximeter_info.rs index ac30384c59..39bde98ea8 100644 --- a/nexus/db-model/src/oximeter_info.rs +++ b/nexus/db-model/src/oximeter_info.rs @@ -8,7 +8,7 @@ use chrono::{DateTime, Utc}; use nexus_types::internal_api; use uuid::Uuid; -/// Message used to notify Nexus that this oximeter instance is up and running. +/// A record representing a registered `oximeter` collector. #[derive(Queryable, Insertable, Debug, Clone, Copy)] #[diesel(table_name = oximeter)] pub struct OximeterInfo { @@ -18,8 +18,9 @@ pub struct OximeterInfo { pub time_created: DateTime, /// When this resource was last modified. pub time_modified: DateTime, - /// The address on which this oximeter instance listens for requests + /// The address on which this `oximeter` instance listens for requests. pub ip: ipnetwork::IpNetwork, + /// The port on which this `oximeter` instance listens for requests. pub port: SqlU16, } diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml index 5edf4f1e89..b1b8f3b28f 100644 --- a/nexus/db-queries/Cargo.toml +++ b/nexus/db-queries/Cargo.toml @@ -21,25 +21,18 @@ diesel-dtrace.workspace = true dropshot.workspace = true futures.workspace = true headers.workspace = true -hex.workspace = true http.workspace = true hyper.workspace = true ipnetwork.workspace = true lazy_static.workspace = true macaddr.workspace = true newtype_derive.workspace = true -once_cell.workspace = true openssl.workspace = true -openssl-sys.workspace = true -openssl-probe.workspace = true oso.workspace = true paste.workspace = true # See omicron-rpaths for more about the "pq-sys" dependency. pq-sys = "*" -rand.workspace = true ref-cast.workspace = true -reqwest = { workspace = true, features = [ "json" ] } -ring.workspace = true samael.workspace = true serde.workspace = true serde_json.workspace = true @@ -51,14 +44,11 @@ static_assertions.workspace = true steno.workspace = true thiserror.workspace = true tokio = { workspace = true, features = [ "full" ] } -tokio-postgres = { workspace = true, features = [ "with-serde_json-1" ] } -toml.workspace = true uuid.workspace = true usdt.workspace = true authz-macros.workspace = true db-macros.workspace = true -nexus-defaults.workspace = true nexus-db-model.workspace = true nexus-types.workspace = true omicron-common.workspace = true diff --git a/nexus/db-queries/src/db/datastore/oximeter.rs b/nexus/db-queries/src/db/datastore/oximeter.rs index c9b3a59b05..55b650ea53 100644 --- a/nexus/db-queries/src/db/datastore/oximeter.rs +++ b/nexus/db-queries/src/db/datastore/oximeter.rs @@ -21,7 +21,20 @@ use omicron_common::api::external::ResourceType; use uuid::Uuid; impl DataStore { - // Create a record for a new Oximeter instance + /// Lookup an oximeter instance by its ID. + pub async fn oximeter_lookup( + &self, + id: &Uuid, + ) -> Result { + use db::schema::oximeter::dsl; + dsl::oximeter + .find(*id) + .first_async(&*self.pool_connection_unauthorized().await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + /// Create a record for a new Oximeter instance pub async fn oximeter_create( &self, info: &OximeterInfo, @@ -55,7 +68,7 @@ impl DataStore { Ok(()) } - // List the oximeter collector instances + /// List the oximeter collector instances pub async fn oximeter_list( &self, page_params: &DataPageParams<'_, Uuid>, @@ -69,7 +82,7 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } - // Create a record for a new producer endpoint + /// Create a record for a new producer endpoint pub async fn producer_endpoint_create( &self, producer: &ProducerEndpoint, @@ -102,7 +115,27 @@ impl DataStore { Ok(()) } - // List the producer endpoint records by the oximeter instance to which they're assigned. + /// Delete a record for a producer endpoint, by its ID. + /// + /// This is idempotent, and deleting a record that is already removed is a + /// no-op. If the record existed, then the ID of the `oximeter` collector is + /// returned. If there was no record, `None` is returned. + pub async fn producer_endpoint_delete( + &self, + id: &Uuid, + ) -> Result, Error> { + use db::schema::metric_producer::dsl; + diesel::delete(dsl::metric_producer.find(*id)) + .returning(dsl::oximeter_id) + .get_result_async::( + &*self.pool_connection_unauthorized().await?, + ) + .await + .optional() + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + /// List the producer endpoint records by the oximeter instance to which they're assigned. pub async fn producers_list_by_oximeter_id( &self, oximeter_id: Uuid, diff --git a/nexus/inventory/Cargo.toml b/nexus/inventory/Cargo.toml index 965ff3f02a..202aff49b2 100644 --- a/nexus/inventory/Cargo.toml +++ b/nexus/inventory/Cargo.toml @@ -7,7 +7,6 @@ license = "MPL-2.0" [dependencies] anyhow.workspace = true chrono.workspace = true -futures.workspace = true gateway-client.workspace = true gateway-messages.workspace = true nexus-types.workspace = true diff --git a/nexus/inventory/src/collector.rs b/nexus/inventory/src/collector.rs index d40b09d2be..1676f44083 100644 --- a/nexus/inventory/src/collector.rs +++ b/nexus/inventory/src/collector.rs @@ -280,6 +280,15 @@ mod test { let message = regex::Regex::new(r"os error \d+") .unwrap() .replace_all(&e, "os error <>"); + // Communication errors differ based on the configuration of the + // machine running the test. For example whether or not the machine + // has IPv6 configured will determine if an error is network + // unreachable or a timeout due to sending a packet to a known + // discard prefix. So just key in on the communication error in a + // general sense. + let message = regex::Regex::new(r"Communication Error.*") + .unwrap() + .replace_all(&message, "Communication Error <>"); write!(&mut s, "error: {}\n", message).unwrap(); } diff --git a/nexus/inventory/tests/output/collector_errors.txt b/nexus/inventory/tests/output/collector_errors.txt index f231cc7d97..4404046253 100644 --- a/nexus/inventory/tests/output/collector_errors.txt +++ b/nexus/inventory/tests/output/collector_errors.txt @@ -41,4 +41,4 @@ cabooses found: RotSlotB baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimSidecarRot" errors: -error: MGS "http://[100::1]:12345": listing ignition targets: Communication Error: error sending request for url (http://[100::1]:12345/ignition): error trying to connect: tcp connect error: Network is unreachable (os error <>): error sending request for url (http://[100::1]:12345/ignition): error trying to connect: tcp connect error: Network is unreachable (os error <>): error trying to connect: tcp connect error: Network is unreachable (os error <>): tcp connect error: Network is unreachable (os error <>): Network is unreachable (os error <>) +error: MGS "http://[100::1]:12345": listing ignition targets: Communication Error <> diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 17d033c5a0..923bb1777e 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -1320,7 +1320,9 @@ impl super::Nexus { .await?; // If the supplied instance state indicates that the instance no longer - // has an active VMM, attempt to delete the virtual provisioning record + // has an active VMM, attempt to delete the virtual provisioning record, + // and the assignment of the Propolis metric producer to an oximeter + // collector. // // As with updating networking state, this must be done before // committing the new runtime state to the database: once the DB is @@ -1338,6 +1340,21 @@ impl super::Nexus { (&new_runtime_state.instance_state.gen).into(), ) .await?; + + // TODO-correctness: The `notify_instance_updated` method can run + // concurrently with itself in some situations, such as where a + // sled-agent attempts to update Nexus about a stopped instance; + // that times out; and it makes another request to a different + // Nexus. The call to `unassign_producer` is racy in those + // situations, and we may end with instances with no metrics. + // + // This unfortunate case should be handled as part of + // instance-lifecycle improvements, notably using a reliable + // persistent workflow to correctly update the oximete assignment as + // an instance's state changes. + // + // Tracked in https://github.com/oxidecomputer/omicron/issues/3742. + self.unassign_producer(instance_id).await?; } // Write the new instance and VMM states back to CRDB. This needs to be diff --git a/nexus/src/app/oximeter.rs b/nexus/src/app/oximeter.rs index bc947cf4bc..7dfa2fb68b 100644 --- a/nexus/src/app/oximeter.rs +++ b/nexus/src/app/oximeter.rs @@ -87,32 +87,43 @@ impl super::Nexus { "address" => oximeter_info.address, ); - // Regardless, notify the collector of any assigned metric producers. This should be empty - // if this Oximeter collector is registering for the first time, but may not be if the - // service is re-registering after failure. - let pagparams = DataPageParams { - marker: None, - direction: PaginationOrder::Ascending, - limit: std::num::NonZeroU32::new(100).unwrap(), - }; - let producers = self - .db_datastore - .producers_list_by_oximeter_id( - oximeter_info.collector_id, - &pagparams, - ) - .await?; - if !producers.is_empty() { + // Regardless, notify the collector of any assigned metric producers. + // + // This should be empty if this Oximeter collector is registering for + // the first time, but may not be if the service is re-registering after + // failure. + let client = self.build_oximeter_client( + &oximeter_info.collector_id, + oximeter_info.address, + ); + let mut last_producer_id = None; + loop { + let pagparams = DataPageParams { + marker: last_producer_id.as_ref(), + direction: PaginationOrder::Ascending, + limit: std::num::NonZeroU32::new(100).unwrap(), + }; + let producers = self + .db_datastore + .producers_list_by_oximeter_id( + oximeter_info.collector_id, + &pagparams, + ) + .await?; + if producers.is_empty() { + return Ok(()); + } debug!( self.log, - "registered oximeter collector that is already assigned producers, re-assigning them to the collector"; + "re-assigning existing metric producers to a collector"; "n_producers" => producers.len(), "collector_id" => ?oximeter_info.collector_id, ); - let client = self.build_oximeter_client( - &oximeter_info.collector_id, - oximeter_info.address, - ); + // Be sure to continue paginating from the last producer. + // + // Safety: We check just above if the list is empty, so there is a + // last element. + last_producer_id.replace(producers.last().unwrap().id()); for producer in producers.into_iter() { let producer_info = oximeter_client::types::ProducerEndpoint { id: producer.id(), @@ -132,7 +143,6 @@ impl super::Nexus { .map_err(Error::from)?; } } - Ok(()) } /// Register as a metric producer with the oximeter metric collection server. @@ -187,6 +197,58 @@ impl super::Nexus { Ok(()) } + /// Idempotently un-assign a producer from an oximeter collector. + pub(crate) async fn unassign_producer( + &self, + id: &Uuid, + ) -> Result<(), Error> { + if let Some(collector_id) = + self.db_datastore.producer_endpoint_delete(id).await? + { + debug!( + self.log, + "deleted metric producer assignment"; + "producer_id" => %id, + "collector_id" => %collector_id, + ); + let oximeter_info = + self.db_datastore.oximeter_lookup(&collector_id).await?; + let address = + SocketAddr::new(oximeter_info.ip.ip(), *oximeter_info.port); + let client = self.build_oximeter_client(&id, address); + if let Err(e) = client.producer_delete(&id).await { + error!( + self.log, + "failed to delete producer from collector"; + "producer_id" => %id, + "collector_id" => %collector_id, + "address" => %address, + "error" => ?e, + ); + return Err(Error::internal_error( + format!("failed to delete producer from collector: {e:?}") + .as_str(), + )); + } else { + debug!( + self.log, + "successfully deleted producer from collector"; + "producer_id" => %id, + "collector_id" => %collector_id, + "address" => %address, + ); + Ok(()) + } + } else { + trace!( + self.log, + "un-assigned non-existent metric producer"; + "producer_id" => %id, + ); + Ok(()) + } + } + /// Returns a results from the timeseries DB based on the provided query /// parameters. /// diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index bed690f839..163f3bd5bb 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -675,10 +675,15 @@ impl super::Nexus { addresses: info.addresses.iter().map(|a| a.address).collect(), bgp_peers: peer_info .iter() - .map(|(_p, asn, addr)| BgpPeerConfig { + .map(|(p, asn, addr)| BgpPeerConfig { addr: *addr, asn: *asn, port: port.port_name.clone(), + hold_time: Some(p.hold_time.0.into()), + connect_retry: Some(p.connect_retry.0.into()), + delay_open: Some(p.delay_open.0.into()), + idle_hold_time: Some(p.idle_hold_time.0.into()), + keepalive: Some(p.keepalive.0.into()), }) .collect(), switch: port.switch_location.parse().unwrap(), diff --git a/nexus/src/app/sagas/switch_port_settings_apply.rs b/nexus/src/app/sagas/switch_port_settings_apply.rs index 830792826e..0c06d6ff83 100644 --- a/nexus/src/app/sagas/switch_port_settings_apply.rs +++ b/nexus/src/app/sagas/switch_port_settings_apply.rs @@ -962,6 +962,11 @@ pub(crate) async fn bootstore_update( asn: *asn, port: switch_port_name.into(), addr, + hold_time: Some(p.hold_time.0.into()), + connect_retry: Some(p.connect_retry.0.into()), + delay_open: Some(p.delay_open.0.into()), + idle_hold_time: Some(p.idle_hold_time.0.into()), + keepalive: Some(p.keepalive.0.into()), }), IpAddr::V6(_) => { warn!(opctx.log, "IPv6 peers not yet supported"); diff --git a/nexus/src/bin/nexus.rs b/nexus/src/bin/nexus.rs index 24fef5c8d2..b67085db2c 100644 --- a/nexus/src/bin/nexus.rs +++ b/nexus/src/bin/nexus.rs @@ -27,6 +27,7 @@ struct Args { short = 'O', long = "openapi", help = "Print the external OpenAPI Spec document and exit", + conflicts_with = "openapi_internal", action )] openapi: bool, @@ -40,7 +41,7 @@ struct Args { openapi_internal: bool, #[clap(name = "CONFIG_FILE_PATH", action)] - config_file_path: PathBuf, + config_file_path: Option, } #[tokio::main] @@ -53,14 +54,25 @@ async fn main() { async fn do_run() -> Result<(), CmdError> { let args = Args::parse(); - let config = Config::from_file(args.config_file_path) - .map_err(|e| CmdError::Failure(anyhow!(e)))?; - if args.openapi { run_openapi_external().map_err(|err| CmdError::Failure(anyhow!(err))) } else if args.openapi_internal { run_openapi_internal().map_err(|err| CmdError::Failure(anyhow!(err))) } else { + let config_path = match args.config_file_path { + Some(path) => path, + None => { + use clap::CommandFactory; + + eprintln!("{}", Args::command().render_help()); + return Err(CmdError::Usage( + "CONFIG_FILE_PATH is required".to_string(), + )); + } + }; + let config = Config::from_file(config_path) + .map_err(|e| CmdError::Failure(anyhow!(e)))?; + run_server(&config).await.map_err(|err| CmdError::Failure(anyhow!(err))) } } diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 48be2de6b0..eba97a88ec 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -3992,6 +3992,7 @@ async fn vpc_firewall_rules_update( method = GET, path = "/v1/vpc-routers", tags = ["vpcs"], + unpublished = true, }] async fn vpc_router_list( rqctx: RequestContext>, @@ -4027,6 +4028,7 @@ async fn vpc_router_list( method = GET, path = "/v1/vpc-routers/{router}", tags = ["vpcs"], + unpublished = true, }] async fn vpc_router_view( rqctx: RequestContext>, @@ -4056,6 +4058,7 @@ async fn vpc_router_view( method = POST, path = "/v1/vpc-routers", tags = ["vpcs"], + unpublished = true, }] async fn vpc_router_create( rqctx: RequestContext>, @@ -4087,6 +4090,7 @@ async fn vpc_router_create( method = DELETE, path = "/v1/vpc-routers/{router}", tags = ["vpcs"], + unpublished = true, }] async fn vpc_router_delete( rqctx: RequestContext>, @@ -4116,6 +4120,7 @@ async fn vpc_router_delete( method = PUT, path = "/v1/vpc-routers/{router}", tags = ["vpcs"], + unpublished = true, }] async fn vpc_router_update( rqctx: RequestContext>, @@ -4151,6 +4156,7 @@ async fn vpc_router_update( method = GET, path = "/v1/vpc-router-routes", tags = ["vpcs"], + unpublished = true, }] async fn vpc_router_route_list( rqctx: RequestContext>, @@ -4188,6 +4194,7 @@ async fn vpc_router_route_list( method = GET, path = "/v1/vpc-router-routes/{route}", tags = ["vpcs"], + unpublished = true, }] async fn vpc_router_route_view( rqctx: RequestContext>, @@ -4220,6 +4227,7 @@ async fn vpc_router_route_view( method = POST, path = "/v1/vpc-router-routes", tags = ["vpcs"], + unpublished = true, }] async fn vpc_router_route_create( rqctx: RequestContext>, @@ -4251,6 +4259,7 @@ async fn vpc_router_route_create( method = DELETE, path = "/v1/vpc-router-routes/{route}", tags = ["vpcs"], + unpublished = true, }] async fn vpc_router_route_delete( rqctx: RequestContext>, @@ -4282,6 +4291,7 @@ async fn vpc_router_route_delete( method = PUT, path = "/v1/vpc-router-routes/{route}", tags = ["vpcs"], + unpublished = true, }] async fn vpc_router_route_update( rqctx: RequestContext>, diff --git a/nexus/test-interface/Cargo.toml b/nexus/test-interface/Cargo.toml index 0071ffaa28..b96afa6dbf 100644 --- a/nexus/test-interface/Cargo.toml +++ b/nexus/test-interface/Cargo.toml @@ -6,8 +6,6 @@ license = "MPL-2.0" [dependencies] async-trait.workspace = true -dropshot.workspace = true -internal-dns.workspace = true nexus-types.workspace = true omicron-common.workspace = true slog.workspace = true diff --git a/nexus/test-utils-macros/Cargo.toml b/nexus/test-utils-macros/Cargo.toml index d3d28a7640..5ed57b9c4a 100644 --- a/nexus/test-utils-macros/Cargo.toml +++ b/nexus/test-utils-macros/Cargo.toml @@ -8,7 +8,6 @@ license = "MPL-2.0" proc-macro = true [dependencies] -proc-macro2.workspace = true quote.workspace = true syn = { workspace = true, features = [ "fold", "parsing" ] } omicron-workspace-hack.workspace = true diff --git a/nexus/test-utils/Cargo.toml b/nexus/test-utils/Cargo.toml index 56cee27b37..024cba958b 100644 --- a/nexus/test-utils/Cargo.toml +++ b/nexus/test-utils/Cargo.toml @@ -28,16 +28,12 @@ omicron-passwords.workspace = true omicron-sled-agent.workspace = true omicron-test-utils.workspace = true oximeter.workspace = true -oximeter-client.workspace = true oximeter-collector.workspace = true oximeter-producer.workspace = true -parse-display.workspace = true serde.workspace = true serde_json.workspace = true serde_urlencoded.workspace = true slog.workspace = true -tempfile.workspace = true -trust-dns-proto.workspace = true trust-dns-resolver.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/nexus/tests/integration_tests/oximeter.rs b/nexus/tests/integration_tests/oximeter.rs index 2cda594e18..65aaa18642 100644 --- a/nexus/tests/integration_tests/oximeter.rs +++ b/nexus/tests/integration_tests/oximeter.rs @@ -4,11 +4,17 @@ //! Integration tests for oximeter collectors and producers. +use dropshot::Method; +use http::StatusCode; use nexus_test_interface::NexusServer; use nexus_test_utils_macros::nexus_test; +use omicron_common::api::internal::nexus::ProducerEndpoint; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use oximeter_db::DbWrite; +use std::collections::BTreeSet; use std::net; +use std::net::Ipv6Addr; +use std::net::SocketAddr; use std::time::Duration; use uuid::Uuid; @@ -332,3 +338,87 @@ async fn test_oximeter_reregistration() { ); context.teardown().await; } + +// A regression test for https://github.com/oxidecomputer/omicron/issues/4498 +#[tokio::test] +async fn test_oximeter_collector_reregistration_gets_all_assignments() { + let mut context = nexus_test_utils::test_setup::( + "test_oximeter_collector_reregistration_gets_all_assignments", + ) + .await; + let oximeter_id = nexus_test_utils::OXIMETER_UUID.parse().unwrap(); + + // Create a bunch of producer records. + // + // Note that the actual count is arbitrary, but it should be larger than the + // internal pagination limit used in `Nexus::upsert_oximeter_collector()`, + // which is currently 100. + const N_PRODUCERS: usize = 150; + let mut ids = BTreeSet::new(); + for _ in 0..N_PRODUCERS { + let id = Uuid::new_v4(); + ids.insert(id); + let info = ProducerEndpoint { + id, + address: SocketAddr::new(Ipv6Addr::LOCALHOST.into(), 12345), + base_route: String::from("/collect"), + interval: Duration::from_secs(1), + }; + context + .internal_client + .make_request( + Method::POST, + "/metrics/producers", + Some(&info), + StatusCode::NO_CONTENT, + ) + .await + .expect("failed to register test producer"); + } + + // Check that `oximeter` has these registered. + let producers = + context.oximeter.list_producers(None, N_PRODUCERS * 2).await; + let actual_ids: BTreeSet<_> = + producers.iter().map(|info| info.id).collect(); + + // There is an additional producer that's created as part of the normal test + // setup, so we'll check that all of the new producers exist, and that + // there's exactly 1 additional one. + assert!( + ids.is_subset(&actual_ids), + "oximeter did not get the right set of producers" + ); + assert_eq!( + ids.len(), + actual_ids.len() - 1, + "oximeter did not get the right set of producers" + ); + + // Drop and restart oximeter, which should result in the exact same set of + // producers again. + drop(context.oximeter); + context.oximeter = nexus_test_utils::start_oximeter( + context.logctx.log.new(o!("component" => "oximeter")), + context.server.get_http_server_internal_address().await, + context.clickhouse.port(), + oximeter_id, + ) + .await + .expect("failed to restart oximeter"); + + let producers = + context.oximeter.list_producers(None, N_PRODUCERS * 2).await; + let actual_ids: BTreeSet<_> = + producers.iter().map(|info| info.id).collect(); + assert!( + ids.is_subset(&actual_ids), + "oximeter did not get the right set of producers after re-registering" + ); + assert_eq!( + ids.len(), + actual_ids.len() - 1, + "oximeter did not get the right set of producers after re-registering" + ); + context.teardown().await; +} diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs index d79dd09fc1..213e7f9e4f 100644 --- a/nexus/tests/integration_tests/schema.rs +++ b/nexus/tests/integration_tests/schema.rs @@ -168,13 +168,15 @@ async fn query_crdb_schema_version(crdb: &CockroachInstance) -> String { // // Note that for the purposes of schema comparisons, we don't care about parsing // the contents of the database, merely the schema and equality of contained data. -#[derive(Eq, PartialEq, Clone, Debug)] +#[derive(PartialEq, Clone, Debug)] enum AnySqlType { DateTime, String(String), Bool(bool), Uuid(Uuid), Int8(i64), + Float4(f32), + TextArray(Vec), // TODO: This isn't exhaustive, feel free to add more. // // These should only be necessary for rows where the database schema changes also choose to @@ -213,6 +215,14 @@ impl<'a> tokio_postgres::types::FromSql<'a> for AnySqlType { if i64::accepts(ty) { return Ok(AnySqlType::Int8(i64::from_sql(ty, raw)?)); } + if f32::accepts(ty) { + return Ok(AnySqlType::Float4(f32::from_sql(ty, raw)?)); + } + if Vec::::accepts(ty) { + return Ok(AnySqlType::TextArray(Vec::::from_sql( + ty, raw, + )?)); + } Err(anyhow::anyhow!( "Cannot parse type {ty}. If you're trying to use this type in a table which is populated \ during a schema migration, consider adding it to `AnySqlType`." @@ -224,7 +234,7 @@ during a schema migration, consider adding it to `AnySqlType`." } } -#[derive(Eq, PartialEq, Debug)] +#[derive(PartialEq, Debug)] struct NamedSqlValue { // It's a little redunant to include the column name alongside each value, // but it results in a prettier diff. @@ -240,7 +250,7 @@ impl NamedSqlValue { } // A generic representation of a row of SQL data -#[derive(Eq, PartialEq, Debug)] +#[derive(PartialEq, Debug)] struct Row { values: Vec, } @@ -262,19 +272,7 @@ impl<'a> From<&'a [&'static str]> for ColumnSelector<'a> { } } -async fn crdb_show_constraints( - crdb: &CockroachInstance, - table: &str, -) -> Vec { - let client = crdb.connect().await.expect("failed to connect"); - - let sql = format!("SHOW CONSTRAINTS FROM {table}"); - let rows = client - .query(&sql, &[]) - .await - .unwrap_or_else(|_| panic!("failed to query {table}")); - client.cleanup().await.expect("cleaning up after wipe"); - +fn process_rows(rows: &Vec) -> Vec { let mut result = vec![]; for row in rows { let mut row_result = Row::new(); @@ -290,6 +288,22 @@ async fn crdb_show_constraints( result } +async fn crdb_show_constraints( + crdb: &CockroachInstance, + table: &str, +) -> Vec { + let client = crdb.connect().await.expect("failed to connect"); + + let sql = format!("SHOW CONSTRAINTS FROM {table}"); + let rows = client + .query(&sql, &[]) + .await + .unwrap_or_else(|_| panic!("failed to query {table}")); + client.cleanup().await.expect("cleaning up after wipe"); + + process_rows(&rows) +} + async fn crdb_select( crdb: &CockroachInstance, columns: ColumnSelector<'_>, @@ -324,19 +338,20 @@ async fn crdb_select( .unwrap_or_else(|_| panic!("failed to query {table}")); client.cleanup().await.expect("cleaning up after wipe"); - let mut result = vec![]; - for row in rows { - let mut row_result = Row::new(); - for i in 0..row.len() { - let column_name = row.columns()[i].name(); - row_result.values.push(NamedSqlValue { - column: column_name.to_string(), - value: row.get(i), - }); - } - result.push(row_result); - } - result + process_rows(&rows) +} + +async fn crdb_list_enums(crdb: &CockroachInstance) -> Vec { + let client = crdb.connect().await.expect("failed to connect"); + + // https://www.cockroachlabs.com/docs/stable/show-enums + let rows = client + .query("show enums;", &[]) + .await + .unwrap_or_else(|_| panic!("failed to list enums")); + client.cleanup().await.expect("cleaning up after wipe"); + + process_rows(&rows) } async fn read_all_schema_versions() -> BTreeSet { @@ -569,10 +584,11 @@ const PG_INDEXES: [&'static str; 5] = const TABLES: [&'static str; 4] = ["table_catalog", "table_schema", "table_name", "table_type"]; -#[derive(Eq, PartialEq, Debug)] +#[derive(PartialEq, Debug)] struct InformationSchema { columns: Vec, constraint_column_usage: Vec, + enums: Vec, key_column_usage: Vec, referential_constraints: Vec, views: Vec, @@ -589,6 +605,13 @@ impl InformationSchema { // the columns diff especially needs this: it can be 20k lines otherwise similar_asserts::assert_eq!(self.tables, other.tables); similar_asserts::assert_eq!(self.columns, other.columns); + similar_asserts::assert_eq!( + self.enums, + other.enums, + "Enums did not match. Members must have the same order in dbinit.sql and \ + migrations. If a migration adds a member, it should use BEFORE or AFTER \ + to add it in the same order as dbinit.sql." + ); similar_asserts::assert_eq!(self.views, other.views); similar_asserts::assert_eq!( self.table_constraints, @@ -624,6 +647,8 @@ impl InformationSchema { ) .await; + let enums = crdb_list_enums(crdb).await; + let constraint_column_usage = crdb_select( crdb, CONSTRAINT_COLUMN_USAGE.as_slice().into(), @@ -694,6 +719,7 @@ impl InformationSchema { Self { columns, constraint_column_usage, + enums, key_column_usage, referential_constraints, views, diff --git a/nexus/tests/output/cmd-nexus-noargs-stderr b/nexus/tests/output/cmd-nexus-noargs-stderr index f371553325..8dff679340 100644 --- a/nexus/tests/output/cmd-nexus-noargs-stderr +++ b/nexus/tests/output/cmd-nexus-noargs-stderr @@ -1,6 +1,13 @@ -error: the following required arguments were not provided: - +See README.adoc for more information -Usage: nexus +Usage: nexus [OPTIONS] [CONFIG_FILE_PATH] -For more information, try '--help'. +Arguments: + [CONFIG_FILE_PATH] + +Options: + -O, --openapi Print the external OpenAPI Spec document and exit + -I, --openapi-internal Print the internal OpenAPI Spec document and exit + -h, --help Print help + +nexus: CONFIG_FILE_PATH is required diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index e55eaa4df6..8c5fe953e3 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -191,16 +191,6 @@ vpc_delete DELETE /v1/vpcs/{vpc} vpc_firewall_rules_update PUT /v1/vpc-firewall-rules vpc_firewall_rules_view GET /v1/vpc-firewall-rules vpc_list GET /v1/vpcs -vpc_router_create POST /v1/vpc-routers -vpc_router_delete DELETE /v1/vpc-routers/{router} -vpc_router_list GET /v1/vpc-routers -vpc_router_route_create POST /v1/vpc-router-routes -vpc_router_route_delete DELETE /v1/vpc-router-routes/{route} -vpc_router_route_list GET /v1/vpc-router-routes -vpc_router_route_update PUT /v1/vpc-router-routes/{route} -vpc_router_route_view GET /v1/vpc-router-routes/{route} -vpc_router_update PUT /v1/vpc-routers/{router} -vpc_router_view GET /v1/vpc-routers/{router} vpc_subnet_create POST /v1/vpc-subnets vpc_subnet_delete DELETE /v1/vpc-subnets/{subnet} vpc_subnet_list GET /v1/vpc-subnets diff --git a/nexus/tests/output/unexpected-authz-endpoints.txt b/nexus/tests/output/unexpected-authz-endpoints.txt index b034ac3869..1cd87a75e5 100644 --- a/nexus/tests/output/unexpected-authz-endpoints.txt +++ b/nexus/tests/output/unexpected-authz-endpoints.txt @@ -1,4 +1,14 @@ API endpoints tested by unauthorized.rs but not found in the OpenAPI spec: +GET "/v1/vpc-routers?project=demo-project&vpc=demo-vpc" +POST "/v1/vpc-routers?project=demo-project&vpc=demo-vpc" +GET "/v1/vpc-routers/demo-vpc-router?project=demo-project&vpc=demo-vpc" +PUT "/v1/vpc-routers/demo-vpc-router?project=demo-project&vpc=demo-vpc" +DELETE "/v1/vpc-routers/demo-vpc-router?project=demo-project&vpc=demo-vpc" +GET "/v1/vpc-router-routes?project=demo-project&vpc=demo-vpc&router=demo-vpc-router" +POST "/v1/vpc-router-routes?project=demo-project&vpc=demo-vpc&router=demo-vpc-router" +GET "/v1/vpc-router-routes/demo-router-route?project=demo-project&vpc=demo-vpc&router=demo-vpc-router" +PUT "/v1/vpc-router-routes/demo-router-route?project=demo-project&vpc=demo-vpc&router=demo-vpc-router" +DELETE "/v1/vpc-router-routes/demo-router-route?project=demo-project&vpc=demo-vpc&router=demo-vpc-router" POST "/v1/system/update/refresh" GET "/v1/system/update/version" GET "/v1/system/update/components" diff --git a/nexus/types/Cargo.toml b/nexus/types/Cargo.toml index 5722b065cf..9cb94a8484 100644 --- a/nexus/types/Cargo.toml +++ b/nexus/types/Cargo.toml @@ -9,10 +9,7 @@ anyhow.workspace = true chrono.workspace = true base64.workspace = true futures.workspace = true -newtype_derive.workspace = true openssl.workspace = true -openssl-sys.workspace = true -openssl-probe.workspace = true parse-display.workspace = true schemars = { workspace = true, features = ["chrono", "uuid1"] } serde.workspace = true diff --git a/openapi/bootstrap-agent.json b/openapi/bootstrap-agent.json index 6dcf756737..2c7ffbc337 100644 --- a/openapi/bootstrap-agent.json +++ b/openapi/bootstrap-agent.json @@ -160,18 +160,6 @@ } }, "components": { - "responses": { - "Error": { - "description": "Error", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Error" - } - } - } - } - }, "schemas": { "Baseboard": { "description": "Describes properties that should uniquely identify a Gimlet.", @@ -277,6 +265,41 @@ "format": "uint32", "minimum": 0 }, + "connect_retry": { + "nullable": true, + "description": "The interval in seconds between peer connection retry attempts.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "delay_open": { + "nullable": true, + "description": "How long to delay sending open messages to a peer. In seconds.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "hold_time": { + "nullable": true, + "description": "How long to keep a session alive without a keepalive in seconds. Defaults to 6.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "idle_hold_time": { + "nullable": true, + "description": "How long to keep a peer in idle after a state machine reset in seconds.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "keepalive": { + "nullable": true, + "description": "The interval to send keepalive messages at.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, "port": { "description": "Switch port the peer is reachable on.", "type": "string" @@ -942,6 +965,18 @@ "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", "type": "string" } + }, + "responses": { + "Error": { + "description": "Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + } + } + } + } } } } \ No newline at end of file diff --git a/openapi/dns-server.json b/openapi/dns-server.json index 7ffd21eb24..41b351d4c1 100644 --- a/openapi/dns-server.json +++ b/openapi/dns-server.json @@ -54,18 +54,6 @@ } }, "components": { - "responses": { - "Error": { - "description": "Error", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Error" - } - } - } - } - }, "schemas": { "DnsConfig": { "type": "object", @@ -251,6 +239,18 @@ "weight" ] } + }, + "responses": { + "Error": { + "description": "Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + } + } + } + } } } } \ No newline at end of file diff --git a/openapi/gateway.json b/openapi/gateway.json index 97cb7994aa..9eacbe122d 100644 --- a/openapi/gateway.json +++ b/openapi/gateway.json @@ -1393,18 +1393,6 @@ } }, "components": { - "responses": { - "Error": { - "description": "Error", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Error" - } - } - } - } - }, "schemas": { "Duration": { "type": "object", @@ -3109,6 +3097,18 @@ "power_reset" ] } + }, + "responses": { + "Error": { + "description": "Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + } + } + } + } } } } \ No newline at end of file diff --git a/openapi/installinator-artifactd.json b/openapi/installinator-artifactd.json index 3132af6ff6..136e60a8c4 100644 --- a/openapi/installinator-artifactd.json +++ b/openapi/installinator-artifactd.json @@ -95,18 +95,6 @@ } }, "components": { - "responses": { - "Error": { - "description": "Error", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Error" - } - } - } - } - }, "schemas": { "Duration": { "type": "object", @@ -2323,6 +2311,18 @@ "slots_written" ] } + }, + "responses": { + "Error": { + "description": "Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + } + } + } + } } } } \ No newline at end of file diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 888ee03c89..fcb285d9eb 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -730,18 +730,6 @@ } }, "components": { - "responses": { - "Error": { - "description": "Error", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Error" - } - } - } - } - }, "schemas": { "ActivationReason": { "description": "Describes why a background task was activated\n\nThis is only used for debugging. This is deliberately not made available to the background task itself. See \"Design notes\" in the module-level documentation for details.", @@ -854,6 +842,41 @@ "format": "uint32", "minimum": 0 }, + "connect_retry": { + "nullable": true, + "description": "The interval in seconds between peer connection retry attempts.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "delay_open": { + "nullable": true, + "description": "How long to delay sending open messages to a peer. In seconds.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "hold_time": { + "nullable": true, + "description": "How long to keep a session alive without a keepalive in seconds. Defaults to 6.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "idle_hold_time": { + "nullable": true, + "description": "How long to keep a peer in idle after a state machine reset in seconds.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "keepalive": { + "nullable": true, + "description": "The interval to send keepalive messages at.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, "port": { "description": "Switch port the peer is reachable on.", "type": "string" @@ -5479,6 +5502,18 @@ } ] } + }, + "responses": { + "Error": { + "description": "Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + } + } + } + } } } } \ No newline at end of file diff --git a/openapi/nexus.json b/openapi/nexus.json index f1bfa4351f..74162a9b2b 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -6582,14 +6582,13 @@ } } }, - "/v1/vpc-router-routes": { + "/v1/vpc-subnets": { "get": { "tags": [ "vpcs" ], - "summary": "List routes", - "description": "List the routes associated with a router in a particular VPC.", - "operationId": "vpc_router_route_list", + "summary": "List subnets", + "operationId": "vpc_subnet_list", "parameters": [ { "in": "query", @@ -6619,14 +6618,6 @@ "$ref": "#/components/schemas/NameOrId" } }, - { - "in": "query", - "name": "router", - "description": "Name or ID of the router", - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, { "in": "query", "name": "sort_by", @@ -6637,7 +6628,7 @@ { "in": "query", "name": "vpc", - "description": "Name or ID of the VPC, only required if `subnet` is provided as a `Name`", + "description": "Name or ID of the VPC", "schema": { "$ref": "#/components/schemas/NameOrId" } @@ -6649,7 +6640,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/RouterRouteResultsPage" + "$ref": "#/components/schemas/VpcSubnetResultsPage" } } } @@ -6663,7 +6654,7 @@ }, "x-dropshot-pagination": { "required": [ - "router" + "vpc" ] } }, @@ -6671,8 +6662,8 @@ "tags": [ "vpcs" ], - "summary": "Create a router", - "operationId": "vpc_router_route_create", + "summary": "Create a subnet", + "operationId": "vpc_subnet_create", "parameters": [ { "in": "query", @@ -6682,19 +6673,11 @@ "$ref": "#/components/schemas/NameOrId" } }, - { - "in": "query", - "name": "router", - "description": "Name or ID of the router", - "required": true, - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, { "in": "query", "name": "vpc", - "description": "Name or ID of the VPC, only required if `subnet` is provided as a `Name`", + "description": "Name or ID of the VPC", + "required": true, "schema": { "$ref": "#/components/schemas/NameOrId" } @@ -6704,7 +6687,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/RouterRouteCreate" + "$ref": "#/components/schemas/VpcSubnetCreate" } } }, @@ -6716,7 +6699,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/RouterRoute" + "$ref": "#/components/schemas/VpcSubnet" } } } @@ -6730,18 +6713,18 @@ } } }, - "/v1/vpc-router-routes/{route}": { + "/v1/vpc-subnets/{subnet}": { "get": { "tags": [ "vpcs" ], - "summary": "Fetch a route", - "operationId": "vpc_router_route_view", + "summary": "Fetch a subnet", + "operationId": "vpc_subnet_view", "parameters": [ { "in": "path", - "name": "route", - "description": "Name or ID of the route", + "name": "subnet", + "description": "Name or ID of the subnet", "required": true, "schema": { "$ref": "#/components/schemas/NameOrId" @@ -6755,19 +6738,10 @@ "$ref": "#/components/schemas/NameOrId" } }, - { - "in": "query", - "name": "router", - "description": "Name or ID of the router", - "required": true, - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, { "in": "query", "name": "vpc", - "description": "Name or ID of the VPC, only required if `subnet` is provided as a `Name`", + "description": "Name or ID of the VPC", "schema": { "$ref": "#/components/schemas/NameOrId" } @@ -6779,7 +6753,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/RouterRoute" + "$ref": "#/components/schemas/VpcSubnet" } } } @@ -6796,13 +6770,13 @@ "tags": [ "vpcs" ], - "summary": "Update a route", - "operationId": "vpc_router_route_update", + "summary": "Update a subnet", + "operationId": "vpc_subnet_update", "parameters": [ { "in": "path", - "name": "route", - "description": "Name or ID of the route", + "name": "subnet", + "description": "Name or ID of the subnet", "required": true, "schema": { "$ref": "#/components/schemas/NameOrId" @@ -6816,18 +6790,10 @@ "$ref": "#/components/schemas/NameOrId" } }, - { - "in": "query", - "name": "router", - "description": "Name or ID of the router", - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, { "in": "query", "name": "vpc", - "description": "Name or ID of the VPC, only required if `subnet` is provided as a `Name`", + "description": "Name or ID of the VPC", "schema": { "$ref": "#/components/schemas/NameOrId" } @@ -6837,7 +6803,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/RouterRouteUpdate" + "$ref": "#/components/schemas/VpcSubnetUpdate" } } }, @@ -6849,7 +6815,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/RouterRoute" + "$ref": "#/components/schemas/VpcSubnet" } } } @@ -6866,13 +6832,13 @@ "tags": [ "vpcs" ], - "summary": "Delete a route", - "operationId": "vpc_router_route_delete", + "summary": "Delete a subnet", + "operationId": "vpc_subnet_delete", "parameters": [ { "in": "path", - "name": "route", - "description": "Name or ID of the route", + "name": "subnet", + "description": "Name or ID of the subnet", "required": true, "schema": { "$ref": "#/components/schemas/NameOrId" @@ -6886,18 +6852,10 @@ "$ref": "#/components/schemas/NameOrId" } }, - { - "in": "query", - "name": "router", - "description": "Name or ID of the router", - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, { "in": "query", "name": "vpc", - "description": "Name or ID of the VPC, only required if `subnet` is provided as a `Name`", + "description": "Name or ID of the VPC", "schema": { "$ref": "#/components/schemas/NameOrId" } @@ -6916,14 +6874,23 @@ } } }, - "/v1/vpc-routers": { + "/v1/vpc-subnets/{subnet}/network-interfaces": { "get": { "tags": [ "vpcs" ], - "summary": "List routers", - "operationId": "vpc_router_list", + "summary": "List network interfaces", + "operationId": "vpc_subnet_list_network_interfaces", "parameters": [ + { + "in": "path", + "name": "subnet", + "description": "Name or ID of the subnet", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, { "in": "query", "name": "limit", @@ -6974,7 +6941,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/VpcRouterResultsPage" + "$ref": "#/components/schemas/InstanceNetworkInterfaceResultsPage" } } } @@ -6987,30 +6954,89 @@ } }, "x-dropshot-pagination": { - "required": [ - "vpc" - ] + "required": [] } - }, - "post": { + } + }, + "/v1/vpcs": { + "get": { "tags": [ "vpcs" ], - "summary": "Create a VPC router", - "operationId": "vpc_router_create", + "summary": "List VPCs", + "operationId": "vpc_list", "parameters": [ + { + "in": "query", + "name": "limit", + "description": "Maximum number of items returned by a single call", + "schema": { + "nullable": true, + "type": "integer", + "format": "uint32", + "minimum": 1 + } + }, + { + "in": "query", + "name": "page_token", + "description": "Token returned by previous call to retrieve the subsequent page", + "schema": { + "nullable": true, + "type": "string" + } + }, { "in": "query", "name": "project", - "description": "Name or ID of the project, only required if `vpc` is provided as a `Name`", + "description": "Name or ID of the project", "schema": { "$ref": "#/components/schemas/NameOrId" } }, { "in": "query", - "name": "vpc", - "description": "Name or ID of the VPC", + "name": "sort_by", + "schema": { + "$ref": "#/components/schemas/NameOrIdSortMode" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VpcResultsPage" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + }, + "x-dropshot-pagination": { + "required": [ + "project" + ] + } + }, + "post": { + "tags": [ + "vpcs" + ], + "summary": "Create a VPC", + "operationId": "vpc_create", + "parameters": [ + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", "required": true, "schema": { "$ref": "#/components/schemas/NameOrId" @@ -7021,7 +7047,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/VpcRouterCreate" + "$ref": "#/components/schemas/VpcCreate" } } }, @@ -7033,7 +7059,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/VpcRouter" + "$ref": "#/components/schemas/Vpc" } } } @@ -7047,18 +7073,18 @@ } } }, - "/v1/vpc-routers/{router}": { + "/v1/vpcs/{vpc}": { "get": { "tags": [ "vpcs" ], - "summary": "Fetch a router", - "operationId": "vpc_router_view", + "summary": "Fetch a VPC", + "operationId": "vpc_view", "parameters": [ { "in": "path", - "name": "router", - "description": "Name or ID of the router", + "name": "vpc", + "description": "Name or ID of the VPC", "required": true, "schema": { "$ref": "#/components/schemas/NameOrId" @@ -7067,15 +7093,7 @@ { "in": "query", "name": "project", - "description": "Name or ID of the project, only required if `vpc` is provided as a `Name`", - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, - { - "in": "query", - "name": "vpc", - "description": "Name or ID of the VPC", + "description": "Name or ID of the project", "schema": { "$ref": "#/components/schemas/NameOrId" } @@ -7087,7 +7105,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/VpcRouter" + "$ref": "#/components/schemas/Vpc" } } } @@ -7104,13 +7122,13 @@ "tags": [ "vpcs" ], - "summary": "Update a router", - "operationId": "vpc_router_update", + "summary": "Update a VPC", + "operationId": "vpc_update", "parameters": [ { "in": "path", - "name": "router", - "description": "Name or ID of the router", + "name": "vpc", + "description": "Name or ID of the VPC", "required": true, "schema": { "$ref": "#/components/schemas/NameOrId" @@ -7119,15 +7137,7 @@ { "in": "query", "name": "project", - "description": "Name or ID of the project, only required if `vpc` is provided as a `Name`", - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, - { - "in": "query", - "name": "vpc", - "description": "Name or ID of the VPC", + "description": "Name or ID of the project", "schema": { "$ref": "#/components/schemas/NameOrId" } @@ -7137,7 +7147,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/VpcRouterUpdate" + "$ref": "#/components/schemas/VpcUpdate" } } }, @@ -7149,7 +7159,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/VpcRouter" + "$ref": "#/components/schemas/Vpc" } } } @@ -7166,13 +7176,13 @@ "tags": [ "vpcs" ], - "summary": "Delete a router", - "operationId": "vpc_router_delete", + "summary": "Delete a VPC", + "operationId": "vpc_delete", "parameters": [ { "in": "path", - "name": "router", - "description": "Name or ID of the router", + "name": "vpc", + "description": "Name or ID of the VPC", "required": true, "schema": { "$ref": "#/components/schemas/NameOrId" @@ -7181,15 +7191,7 @@ { "in": "query", "name": "project", - "description": "Name or ID of the project, only required if `vpc` is provided as a `Name`", - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, - { - "in": "query", - "name": "vpc", - "description": "Name or ID of the VPC", + "description": "Name or ID of the project", "schema": { "$ref": "#/components/schemas/NameOrId" } @@ -7207,661 +7209,21 @@ } } } - }, - "/v1/vpc-subnets": { - "get": { - "tags": [ - "vpcs" - ], - "summary": "List subnets", - "operationId": "vpc_subnet_list", - "parameters": [ - { - "in": "query", - "name": "limit", - "description": "Maximum number of items returned by a single call", - "schema": { - "nullable": true, - "type": "integer", - "format": "uint32", - "minimum": 1 - } - }, - { - "in": "query", - "name": "page_token", - "description": "Token returned by previous call to retrieve the subsequent page", - "schema": { - "nullable": true, - "type": "string" - } - }, - { - "in": "query", - "name": "project", - "description": "Name or ID of the project, only required if `vpc` is provided as a `Name`", - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, - { - "in": "query", - "name": "sort_by", - "schema": { - "$ref": "#/components/schemas/NameOrIdSortMode" - } - }, - { - "in": "query", - "name": "vpc", - "description": "Name or ID of the VPC", - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - } - ], - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/VpcSubnetResultsPage" - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - }, - "x-dropshot-pagination": { - "required": [ - "vpc" - ] - } - }, - "post": { - "tags": [ - "vpcs" - ], - "summary": "Create a subnet", - "operationId": "vpc_subnet_create", - "parameters": [ - { - "in": "query", - "name": "project", - "description": "Name or ID of the project, only required if `vpc` is provided as a `Name`", - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, - { - "in": "query", - "name": "vpc", - "description": "Name or ID of the VPC", - "required": true, - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/VpcSubnetCreate" - } - } - }, - "required": true - }, - "responses": { - "201": { - "description": "successful creation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/VpcSubnet" - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - } - }, - "/v1/vpc-subnets/{subnet}": { - "get": { - "tags": [ - "vpcs" - ], - "summary": "Fetch a subnet", - "operationId": "vpc_subnet_view", - "parameters": [ - { - "in": "path", - "name": "subnet", - "description": "Name or ID of the subnet", - "required": true, - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, - { - "in": "query", - "name": "project", - "description": "Name or ID of the project, only required if `vpc` is provided as a `Name`", - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, - { - "in": "query", - "name": "vpc", - "description": "Name or ID of the VPC", - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - } - ], - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/VpcSubnet" - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - }, - "put": { - "tags": [ - "vpcs" - ], - "summary": "Update a subnet", - "operationId": "vpc_subnet_update", - "parameters": [ - { - "in": "path", - "name": "subnet", - "description": "Name or ID of the subnet", - "required": true, - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, - { - "in": "query", - "name": "project", - "description": "Name or ID of the project, only required if `vpc` is provided as a `Name`", - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, - { - "in": "query", - "name": "vpc", - "description": "Name or ID of the VPC", - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/VpcSubnetUpdate" - } - } - }, - "required": true - }, - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/VpcSubnet" - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - }, - "delete": { - "tags": [ - "vpcs" - ], - "summary": "Delete a subnet", - "operationId": "vpc_subnet_delete", - "parameters": [ - { - "in": "path", - "name": "subnet", - "description": "Name or ID of the subnet", - "required": true, - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, - { - "in": "query", - "name": "project", - "description": "Name or ID of the project, only required if `vpc` is provided as a `Name`", - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, - { - "in": "query", - "name": "vpc", - "description": "Name or ID of the VPC", - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - } - ], - "responses": { - "204": { - "description": "successful deletion" - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - } - }, - "/v1/vpc-subnets/{subnet}/network-interfaces": { - "get": { - "tags": [ - "vpcs" - ], - "summary": "List network interfaces", - "operationId": "vpc_subnet_list_network_interfaces", - "parameters": [ - { - "in": "path", - "name": "subnet", - "description": "Name or ID of the subnet", - "required": true, - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, - { - "in": "query", - "name": "limit", - "description": "Maximum number of items returned by a single call", - "schema": { - "nullable": true, - "type": "integer", - "format": "uint32", - "minimum": 1 - } - }, - { - "in": "query", - "name": "page_token", - "description": "Token returned by previous call to retrieve the subsequent page", - "schema": { - "nullable": true, - "type": "string" - } - }, - { - "in": "query", - "name": "project", - "description": "Name or ID of the project, only required if `vpc` is provided as a `Name`", - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, - { - "in": "query", - "name": "sort_by", - "schema": { - "$ref": "#/components/schemas/NameOrIdSortMode" - } - }, - { - "in": "query", - "name": "vpc", - "description": "Name or ID of the VPC", - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - } - ], - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/InstanceNetworkInterfaceResultsPage" - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - }, - "x-dropshot-pagination": { - "required": [] - } - } - }, - "/v1/vpcs": { - "get": { - "tags": [ - "vpcs" - ], - "summary": "List VPCs", - "operationId": "vpc_list", - "parameters": [ - { - "in": "query", - "name": "limit", - "description": "Maximum number of items returned by a single call", - "schema": { - "nullable": true, - "type": "integer", - "format": "uint32", - "minimum": 1 - } - }, - { - "in": "query", - "name": "page_token", - "description": "Token returned by previous call to retrieve the subsequent page", - "schema": { - "nullable": true, - "type": "string" - } - }, - { - "in": "query", - "name": "project", - "description": "Name or ID of the project", - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, - { - "in": "query", - "name": "sort_by", - "schema": { - "$ref": "#/components/schemas/NameOrIdSortMode" - } - } - ], - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/VpcResultsPage" - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - }, - "x-dropshot-pagination": { - "required": [ - "project" - ] - } - }, - "post": { - "tags": [ - "vpcs" - ], - "summary": "Create a VPC", - "operationId": "vpc_create", - "parameters": [ - { - "in": "query", - "name": "project", - "description": "Name or ID of the project", - "required": true, - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/VpcCreate" - } - } - }, - "required": true - }, - "responses": { - "201": { - "description": "successful creation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Vpc" - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - } - }, - "/v1/vpcs/{vpc}": { - "get": { - "tags": [ - "vpcs" - ], - "summary": "Fetch a VPC", - "operationId": "vpc_view", - "parameters": [ - { - "in": "path", - "name": "vpc", - "description": "Name or ID of the VPC", - "required": true, - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, - { - "in": "query", - "name": "project", - "description": "Name or ID of the project", - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - } - ], - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Vpc" - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - }, - "put": { - "tags": [ - "vpcs" - ], - "summary": "Update a VPC", - "operationId": "vpc_update", - "parameters": [ - { - "in": "path", - "name": "vpc", - "description": "Name or ID of the VPC", - "required": true, - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, - { - "in": "query", - "name": "project", - "description": "Name or ID of the project", - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/VpcUpdate" - } - } - }, - "required": true - }, - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Vpc" - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - }, - "delete": { - "tags": [ - "vpcs" - ], - "summary": "Delete a VPC", - "operationId": "vpc_delete", - "parameters": [ - { - "in": "path", - "name": "vpc", - "description": "Name or ID of the VPC", - "required": true, - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, - { - "in": "query", - "name": "project", - "description": "Name or ID of the project", - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - } - ], - "responses": { - "204": { - "description": "successful deletion" - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - } - } - }, - "components": { - "responses": { - "Error": { - "description": "Error", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Error" - } - } - } - } - }, - "schemas": { - "Address": { - "description": "An address tied to an address lot.", - "type": "object", - "properties": { - "address": { - "description": "The address and prefix length of this address.", - "allOf": [ - { - "$ref": "#/components/schemas/IpNet" - } - ] + } + }, + "components": { + "schemas": { + "Address": { + "description": "An address tied to an address lot.", + "type": "object", + "properties": { + "address": { + "description": "The address and prefix length of this address.", + "allOf": [ + { + "$ref": "#/components/schemas/IpNet" + } + ] }, "address_lot": { "description": "The address lot this address is drawn from.", @@ -12935,181 +12297,34 @@ } ] }, - "time_created": { - "description": "timestamp when this resource was created", - "type": "string", - "format": "date-time" - }, - "time_modified": { - "description": "timestamp when this resource was last modified", - "type": "string", - "format": "date-time" - } - }, - "required": [ - "description", - "id", - "name", - "time_created", - "time_modified" - ] - }, - "ProjectCreate": { - "description": "Create-time parameters for a `Project`", - "type": "object", - "properties": { - "description": { - "type": "string" - }, - "name": { - "$ref": "#/components/schemas/Name" - } - }, - "required": [ - "description", - "name" - ] - }, - "ProjectResultsPage": { - "description": "A single page of results", - "type": "object", - "properties": { - "items": { - "description": "list of items on this page of results", - "type": "array", - "items": { - "$ref": "#/components/schemas/Project" - } - }, - "next_page": { - "nullable": true, - "description": "token used to fetch the next page of results (if any)", - "type": "string" - } - }, - "required": [ - "items" - ] - }, - "ProjectRole": { - "type": "string", - "enum": [ - "admin", - "collaborator", - "viewer" - ] - }, - "ProjectRolePolicy": { - "description": "Policy for a particular resource\n\nNote that the Policy only describes access granted explicitly for this resource. The policies of parent resources can also cause a user to have access to this resource.", - "type": "object", - "properties": { - "role_assignments": { - "description": "Roles directly assigned on this resource", - "type": "array", - "items": { - "$ref": "#/components/schemas/ProjectRoleRoleAssignment" - } - } - }, - "required": [ - "role_assignments" - ] - }, - "ProjectRoleRoleAssignment": { - "description": "Describes the assignment of a particular role on a particular resource to a particular identity (user, group, etc.)\n\nThe resource is not part of this structure. Rather, `RoleAssignment`s are put into a `Policy` and that Policy is applied to a particular resource.", - "type": "object", - "properties": { - "identity_id": { - "type": "string", - "format": "uuid" - }, - "identity_type": { - "$ref": "#/components/schemas/IdentityType" - }, - "role_name": { - "$ref": "#/components/schemas/ProjectRole" - } - }, - "required": [ - "identity_id", - "identity_type", - "role_name" - ] - }, - "ProjectUpdate": { - "description": "Updateable properties of a `Project`", - "type": "object", - "properties": { - "description": { - "nullable": true, - "type": "string" - }, - "name": { - "nullable": true, - "allOf": [ - { - "$ref": "#/components/schemas/Name" - } - ] - } - } - }, - "Rack": { - "description": "View of an Rack", - "type": "object", - "properties": { - "id": { - "description": "unique, immutable, system-controlled identifier for each resource", - "type": "string", - "format": "uuid" - }, - "time_created": { - "description": "timestamp when this resource was created", - "type": "string", - "format": "date-time" - }, - "time_modified": { - "description": "timestamp when this resource was last modified", - "type": "string", - "format": "date-time" - } - }, - "required": [ - "id", - "time_created", - "time_modified" - ] - }, - "RackResultsPage": { - "description": "A single page of results", - "type": "object", - "properties": { - "items": { - "description": "list of items on this page of results", - "type": "array", - "items": { - "$ref": "#/components/schemas/Rack" - } - }, - "next_page": { - "nullable": true, - "description": "token used to fetch the next page of results (if any)", - "type": "string" + "time_created": { + "description": "timestamp when this resource was created", + "type": "string", + "format": "date-time" + }, + "time_modified": { + "description": "timestamp when this resource was last modified", + "type": "string", + "format": "date-time" } }, "required": [ - "items" + "description", + "id", + "name", + "time_created", + "time_modified" ] }, - "Role": { - "description": "View of a Role", + "ProjectCreate": { + "description": "Create-time parameters for a `Project`", "type": "object", "properties": { "description": { "type": "string" }, "name": { - "$ref": "#/components/schemas/RoleName" + "$ref": "#/components/schemas/Name" } }, "required": [ @@ -13117,14 +12332,7 @@ "name" ] }, - "RoleName": { - "title": "A name for a built-in role", - "description": "Role names consist of two string components separated by dot (\".\").", - "type": "string", - "pattern": "[a-z-]+\\.[a-z-]+", - "maxLength": 63 - }, - "RoleResultsPage": { + "ProjectResultsPage": { "description": "A single page of results", "type": "object", "properties": { @@ -13132,7 +12340,7 @@ "description": "list of items on this page of results", "type": "array", "items": { - "$ref": "#/components/schemas/Role" + "$ref": "#/components/schemas/Project" } }, "next_page": { @@ -13145,269 +12353,77 @@ "items" ] }, - "Route": { - "description": "A route to a destination network through a gateway address.", - "type": "object", - "properties": { - "dst": { - "description": "The route destination.", - "allOf": [ - { - "$ref": "#/components/schemas/IpNet" - } - ] - }, - "gw": { - "description": "The route gateway.", - "type": "string", - "format": "ip" - }, - "vid": { - "nullable": true, - "description": "VLAN id the gateway is reachable over.", - "type": "integer", - "format": "uint16", - "minimum": 0 - } - }, - "required": [ - "dst", - "gw" + "ProjectRole": { + "type": "string", + "enum": [ + "admin", + "collaborator", + "viewer" ] }, - "RouteConfig": { - "description": "Route configuration data associated with a switch port configuration.", + "ProjectRolePolicy": { + "description": "Policy for a particular resource\n\nNote that the Policy only describes access granted explicitly for this resource. The policies of parent resources can also cause a user to have access to this resource.", "type": "object", "properties": { - "routes": { - "description": "The set of routes assigned to a switch port.", + "role_assignments": { + "description": "Roles directly assigned on this resource", "type": "array", "items": { - "$ref": "#/components/schemas/Route" + "$ref": "#/components/schemas/ProjectRoleRoleAssignment" } } }, "required": [ - "routes" - ] - }, - "RouteDestination": { - "description": "A `RouteDestination` is used to match traffic with a routing rule, on the destination of that traffic.\n\nWhen traffic is to be sent to a destination that is within a given `RouteDestination`, the corresponding `RouterRoute` applies, and traffic will be forward to the `RouteTarget` for that rule.", - "oneOf": [ - { - "description": "Route applies to traffic destined for a specific IP address", - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "ip" - ] - }, - "value": { - "type": "string", - "format": "ip" - } - }, - "required": [ - "type", - "value" - ] - }, - { - "description": "Route applies to traffic destined for a specific IP subnet", - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "ip_net" - ] - }, - "value": { - "$ref": "#/components/schemas/IpNet" - } - }, - "required": [ - "type", - "value" - ] - }, - { - "description": "Route applies to traffic destined for the given VPC.", - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "vpc" - ] - }, - "value": { - "$ref": "#/components/schemas/Name" - } - }, - "required": [ - "type", - "value" - ] - }, - { - "description": "Route applies to traffic", - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "subnet" - ] - }, - "value": { - "$ref": "#/components/schemas/Name" - } - }, - "required": [ - "type", - "value" - ] - } + "role_assignments" ] }, - "RouteTarget": { - "description": "A `RouteTarget` describes the possible locations that traffic matching a route destination can be sent.", - "oneOf": [ - { - "description": "Forward traffic to a particular IP address.", - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "ip" - ] - }, - "value": { - "type": "string", - "format": "ip" - } - }, - "required": [ - "type", - "value" - ] - }, - { - "description": "Forward traffic to a VPC", - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "vpc" - ] - }, - "value": { - "$ref": "#/components/schemas/Name" - } - }, - "required": [ - "type", - "value" - ] - }, - { - "description": "Forward traffic to a VPC Subnet", - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "subnet" - ] - }, - "value": { - "$ref": "#/components/schemas/Name" - } - }, - "required": [ - "type", - "value" - ] + "ProjectRoleRoleAssignment": { + "description": "Describes the assignment of a particular role on a particular resource to a particular identity (user, group, etc.)\n\nThe resource is not part of this structure. Rather, `RoleAssignment`s are put into a `Policy` and that Policy is applied to a particular resource.", + "type": "object", + "properties": { + "identity_id": { + "type": "string", + "format": "uuid" }, - { - "description": "Forward traffic to a specific instance", - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "instance" - ] - }, - "value": { - "$ref": "#/components/schemas/Name" - } - }, - "required": [ - "type", - "value" - ] + "identity_type": { + "$ref": "#/components/schemas/IdentityType" }, - { - "description": "Forward traffic to an internet gateway", - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "internet_gateway" - ] - }, - "value": { - "$ref": "#/components/schemas/Name" - } - }, - "required": [ - "type", - "value" - ] + "role_name": { + "$ref": "#/components/schemas/ProjectRole" } + }, + "required": [ + "identity_id", + "identity_type", + "role_name" ] }, - "RouterRoute": { - "description": "A route defines a rule that governs where traffic should be sent based on its destination.", + "ProjectUpdate": { + "description": "Updateable properties of a `Project`", "type": "object", "properties": { "description": { - "description": "human-readable free-form text about a resource", + "nullable": true, "type": "string" }, - "destination": { - "$ref": "#/components/schemas/RouteDestination" - }, - "id": { - "description": "unique, immutable, system-controlled identifier for each resource", - "type": "string", - "format": "uuid" - }, - "kind": { - "description": "Describes the kind of router. Set at creation. `read-only`", - "allOf": [ - { - "$ref": "#/components/schemas/RouterRouteKind" - } - ] - }, "name": { - "description": "unique, mutable, user-controlled identifier for each resource", + "nullable": true, "allOf": [ { "$ref": "#/components/schemas/Name" } ] - }, - "target": { - "$ref": "#/components/schemas/RouteTarget" + } + } + }, + "Rack": { + "description": "View of an Rack", + "type": "object", + "properties": { + "id": { + "description": "unique, immutable, system-controlled identifier for each resource", + "type": "string", + "format": "uuid" }, "time_created": { "description": "timestamp when this resource was created", @@ -13418,83 +12434,59 @@ "description": "timestamp when this resource was last modified", "type": "string", "format": "date-time" - }, - "vpc_router_id": { - "description": "The ID of the VPC Router to which the route belongs", - "type": "string", - "format": "uuid" } }, "required": [ - "description", - "destination", "id", - "kind", - "name", - "target", "time_created", - "time_modified", - "vpc_router_id" + "time_modified" + ] + }, + "RackResultsPage": { + "description": "A single page of results", + "type": "object", + "properties": { + "items": { + "description": "list of items on this page of results", + "type": "array", + "items": { + "$ref": "#/components/schemas/Rack" + } + }, + "next_page": { + "nullable": true, + "description": "token used to fetch the next page of results (if any)", + "type": "string" + } + }, + "required": [ + "items" ] }, - "RouterRouteCreate": { - "description": "Create-time parameters for a `RouterRoute`", + "Role": { + "description": "View of a Role", "type": "object", "properties": { "description": { "type": "string" }, - "destination": { - "$ref": "#/components/schemas/RouteDestination" - }, "name": { - "$ref": "#/components/schemas/Name" - }, - "target": { - "$ref": "#/components/schemas/RouteTarget" + "$ref": "#/components/schemas/RoleName" } }, "required": [ "description", - "destination", - "name", - "target" + "name" ] }, - "RouterRouteKind": { - "description": "The kind of a `RouterRoute`\n\nThe kind determines certain attributes such as if the route is modifiable and describes how or where the route was created.", - "oneOf": [ - { - "description": "Determines the default destination of traffic, such as whether it goes to the internet or not.\n\n`Destination: An Internet Gateway` `Modifiable: true`", - "type": "string", - "enum": [ - "default" - ] - }, - { - "description": "Automatically added for each VPC Subnet in the VPC\n\n`Destination: A VPC Subnet` `Modifiable: false`", - "type": "string", - "enum": [ - "vpc_subnet" - ] - }, - { - "description": "Automatically added when VPC peering is established\n\n`Destination: A different VPC` `Modifiable: false`", - "type": "string", - "enum": [ - "vpc_peering" - ] - }, - { - "description": "Created by a user; see `RouteTarget`\n\n`Destination: User defined` `Modifiable: true`", - "type": "string", - "enum": [ - "custom" - ] - } - ] + "RoleName": { + "title": "A name for a built-in role", + "description": "Role names consist of two string components separated by dot (\".\").", + "type": "string", + "pattern": "[a-z-]+\\.[a-z-]+", + "maxLength": 63 }, - "RouterRouteResultsPage": { + "RoleResultsPage": { "description": "A single page of results", "type": "object", "properties": { @@ -13502,7 +12494,7 @@ "description": "list of items on this page of results", "type": "array", "items": { - "$ref": "#/components/schemas/RouterRoute" + "$ref": "#/components/schemas/Role" } }, "next_page": { @@ -13515,32 +12507,50 @@ "items" ] }, - "RouterRouteUpdate": { - "description": "Updateable properties of a `RouterRoute`", + "Route": { + "description": "A route to a destination network through a gateway address.", "type": "object", "properties": { - "description": { - "nullable": true, - "type": "string" - }, - "destination": { - "$ref": "#/components/schemas/RouteDestination" - }, - "name": { - "nullable": true, + "dst": { + "description": "The route destination.", "allOf": [ { - "$ref": "#/components/schemas/Name" + "$ref": "#/components/schemas/IpNet" } ] }, - "target": { - "$ref": "#/components/schemas/RouteTarget" + "gw": { + "description": "The route gateway.", + "type": "string", + "format": "ip" + }, + "vid": { + "nullable": true, + "description": "VLAN id the gateway is reachable over.", + "type": "integer", + "format": "uint16", + "minimum": 0 + } + }, + "required": [ + "dst", + "gw" + ] + }, + "RouteConfig": { + "description": "Route configuration data associated with a switch port configuration.", + "type": "object", + "properties": { + "routes": { + "description": "The set of routes assigned to a switch port.", + "type": "array", + "items": { + "$ref": "#/components/schemas/Route" + } } }, "required": [ - "destination", - "target" + "routes" ] }, "SamlIdentityProvider": { @@ -15706,118 +14716,6 @@ "items" ] }, - "VpcRouter": { - "description": "A VPC router defines a series of rules that indicate where traffic should be sent depending on its destination.", - "type": "object", - "properties": { - "description": { - "description": "human-readable free-form text about a resource", - "type": "string" - }, - "id": { - "description": "unique, immutable, system-controlled identifier for each resource", - "type": "string", - "format": "uuid" - }, - "kind": { - "$ref": "#/components/schemas/VpcRouterKind" - }, - "name": { - "description": "unique, mutable, user-controlled identifier for each resource", - "allOf": [ - { - "$ref": "#/components/schemas/Name" - } - ] - }, - "time_created": { - "description": "timestamp when this resource was created", - "type": "string", - "format": "date-time" - }, - "time_modified": { - "description": "timestamp when this resource was last modified", - "type": "string", - "format": "date-time" - }, - "vpc_id": { - "description": "The VPC to which the router belongs.", - "type": "string", - "format": "uuid" - } - }, - "required": [ - "description", - "id", - "kind", - "name", - "time_created", - "time_modified", - "vpc_id" - ] - }, - "VpcRouterCreate": { - "description": "Create-time parameters for a `VpcRouter`", - "type": "object", - "properties": { - "description": { - "type": "string" - }, - "name": { - "$ref": "#/components/schemas/Name" - } - }, - "required": [ - "description", - "name" - ] - }, - "VpcRouterKind": { - "type": "string", - "enum": [ - "system", - "custom" - ] - }, - "VpcRouterResultsPage": { - "description": "A single page of results", - "type": "object", - "properties": { - "items": { - "description": "list of items on this page of results", - "type": "array", - "items": { - "$ref": "#/components/schemas/VpcRouter" - } - }, - "next_page": { - "nullable": true, - "description": "token used to fetch the next page of results (if any)", - "type": "string" - } - }, - "required": [ - "items" - ] - }, - "VpcRouterUpdate": { - "description": "Updateable properties of a `VpcRouter`", - "type": "object", - "properties": { - "description": { - "nullable": true, - "type": "string" - }, - "name": { - "nullable": true, - "allOf": [ - { - "$ref": "#/components/schemas/Name" - } - ] - } - } - }, "VpcSubnet": { "description": "A VPC subnet represents a logical grouping for instances that allows network traffic between them, within a IPv4 subnetwork or optionall an IPv6 subnetwork.", "type": "object", @@ -16058,6 +14956,18 @@ } ] } + }, + "responses": { + "Error": { + "description": "Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + } + } + } + } } }, "tags": [ diff --git a/openapi/oximeter.json b/openapi/oximeter.json index ebc7957c2e..529d20e921 100644 --- a/openapi/oximeter.json +++ b/openapi/oximeter.json @@ -134,18 +134,6 @@ } }, "components": { - "responses": { - "Error": { - "description": "Error", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Error" - } - } - } - } - }, "schemas": { "CollectorInfo": { "type": "object", @@ -244,6 +232,18 @@ "items" ] } + }, + "responses": { + "Error": { + "description": "Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + } + } + } + } } } } \ No newline at end of file diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index febf431793..ed202ddbdb 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -991,18 +991,6 @@ } }, "components": { - "responses": { - "Error": { - "description": "Error", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Error" - } - } - } - } - }, "schemas": { "AddSledRequest": { "description": "A request to Add a given sled after rack initialization has occurred", @@ -1124,6 +1112,41 @@ "format": "uint32", "minimum": 0 }, + "connect_retry": { + "nullable": true, + "description": "The interval in seconds between peer connection retry attempts.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "delay_open": { + "nullable": true, + "description": "How long to delay sending open messages to a peer. In seconds.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "hold_time": { + "nullable": true, + "description": "How long to keep a session alive without a keepalive in seconds. Defaults to 6.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "idle_hold_time": { + "nullable": true, + "description": "How long to keep a peer in idle after a state machine reset in seconds.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "keepalive": { + "nullable": true, + "description": "The interval to send keepalive messages at.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, "port": { "description": "Switch port the peer is reachable on.", "type": "string" @@ -6445,6 +6468,18 @@ "type": "string", "pattern": "^ox[ip]_[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$" } + }, + "responses": { + "Error": { + "description": "Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + } + } + } + } } } } \ No newline at end of file diff --git a/openapi/wicketd.json b/openapi/wicketd.json index e0b37f1ba2..60ad9a42df 100644 --- a/openapi/wicketd.json +++ b/openapi/wicketd.json @@ -677,18 +677,6 @@ } }, "components": { - "responses": { - "Error": { - "description": "Error", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Error" - } - } - } - } - }, "schemas": { "AbortUpdateOptions": { "type": "object", @@ -861,6 +849,41 @@ "format": "uint32", "minimum": 0 }, + "connect_retry": { + "nullable": true, + "description": "The interval in seconds between peer connection retry attempts.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "delay_open": { + "nullable": true, + "description": "How long to delay sending open messages to a peer. In seconds.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "hold_time": { + "nullable": true, + "description": "How long to keep a session alive without a keepalive in seconds. Defaults to 6.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "idle_hold_time": { + "nullable": true, + "description": "How long to keep a peer in idle after a state machine reset in seconds.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "keepalive": { + "nullable": true, + "description": "The interval to send keepalive messages at.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, "port": { "description": "Switch port the peer is reachable on.", "type": "string" @@ -4671,6 +4694,18 @@ "power_reset" ] } + }, + "responses": { + "Error": { + "description": "Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + } + } + } + } } } } \ No newline at end of file diff --git a/oximeter/collector/Cargo.toml b/oximeter/collector/Cargo.toml index 470d9db312..92c91ca101 100644 --- a/oximeter/collector/Cargo.toml +++ b/oximeter/collector/Cargo.toml @@ -7,6 +7,8 @@ license = "MPL-2.0" [dependencies] anyhow.workspace = true +camino.workspace = true +chrono.workspace = true clap.workspace = true dropshot.workspace = true futures.workspace = true @@ -25,6 +27,7 @@ slog.workspace = true slog-async.workspace = true slog-dtrace.workspace = true slog-term.workspace = true +strum.workspace = true thiserror.workspace = true tokio.workspace = true toml.workspace = true @@ -33,6 +36,7 @@ omicron-workspace-hack.workspace = true [dev-dependencies] expectorate.workspace = true +hyper.workspace = true omicron-test-utils.workspace = true openapi-lint.workspace = true openapiv3.workspace = true diff --git a/oximeter/collector/src/agent.rs b/oximeter/collector/src/agent.rs new file mode 100644 index 0000000000..23ff32ed66 --- /dev/null +++ b/oximeter/collector/src/agent.rs @@ -0,0 +1,889 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! The oximeter agent handles collection tasks for each producer. + +// Copyright 2023 Oxide Computer Company + +use crate::self_stats; +use crate::DbConfig; +use crate::Error; +use crate::ProducerEndpoint; +use anyhow::anyhow; +use internal_dns::resolver::Resolver; +use internal_dns::ServiceName; +use omicron_common::address::CLICKHOUSE_PORT; +use oximeter::types::ProducerResults; +use oximeter::types::ProducerResultsItem; +use oximeter_db::Client; +use oximeter_db::DbWrite; +use slog::debug; +use slog::error; +use slog::info; +use slog::o; +use slog::trace; +use slog::warn; +use slog::Logger; +use std::collections::btree_map::Entry; +use std::collections::BTreeMap; +use std::net::SocketAddr; +use std::net::SocketAddrV6; +use std::ops::Bound; +use std::sync::Arc; +use std::time::Duration; +use tokio::sync::mpsc; +use tokio::sync::oneshot; +use tokio::sync::Mutex; +use tokio::task::JoinHandle; +use tokio::time::interval; +use uuid::Uuid; + +type CollectionToken = oneshot::Sender<()>; + +// Messages for controlling a collection task +#[derive(Debug)] +enum CollectionMessage { + // Explicit request that the task collect data from its producer + // + // Also sends a oneshot that is signalled once the task scrapes + // data from the Producer, and places it in the Clickhouse server. + Collect(CollectionToken), + // Request that the task update its interval and the socket address on which it collects data + // from its producer. + Update(ProducerEndpoint), + // Request that the task exit + Shutdown, + // Return the current statistics from a single task. + #[cfg(test)] + Statistics { + reply_tx: oneshot::Sender, + }, +} + +async fn perform_collection( + log: &Logger, + self_target: &mut self_stats::CollectionTaskStats, + client: &reqwest::Client, + producer: &ProducerEndpoint, + outbox: &mpsc::Sender<(Option, ProducerResults)>, + token: Option, +) { + debug!(log, "collecting from producer"); + let res = client + .get(format!( + "http://{}{}", + producer.address, + producer.collection_route() + )) + .send() + .await; + match res { + Ok(res) => { + if res.status().is_success() { + match res.json::().await { + Ok(results) => { + debug!( + log, + "collected results from producer"; + "n_results" => results.len() + ); + self_target.collections.datum.increment(); + outbox.send((token, results)).await.unwrap(); + } + Err(e) => { + warn!( + log, + "failed to collect results from producer"; + "error" => ?e, + ); + self_target + .failures_for_reason( + self_stats::FailureReason::Deserialization, + ) + .datum + .increment() + } + } + } else { + warn!( + log, + "failed to receive metric results from producer"; + "status_code" => res.status().as_u16(), + ); + self_target + .failures_for_reason(self_stats::FailureReason::Other( + res.status(), + )) + .datum + .increment() + } + } + Err(e) => { + error!( + log, + "failed to send collection request to producer"; + "error" => ?e + ); + self_target + .failures_for_reason(self_stats::FailureReason::Unreachable) + .datum + .increment() + } + } +} + +// Background task used to collect metrics from one producer on an interval. +// +// This function is started by the `OximeterAgent`, when a producer is registered. The task loops +// endlessly, and collects metrics from the assigned producer on a timeout. The assigned agent can +// also send a `CollectionMessage`, for example to update the collection interval. This is not +// currently used, but will likely be exposed via control plane interfaces in the future. +async fn collection_task( + log: Logger, + collector: self_stats::OximeterCollector, + mut producer: ProducerEndpoint, + mut inbox: mpsc::Receiver, + outbox: mpsc::Sender<(Option, ProducerResults)>, +) { + let client = reqwest::Client::new(); + let mut collection_timer = interval(producer.interval); + collection_timer.tick().await; // completes immediately + debug!( + log, + "starting oximeter collection task"; + "interval" => ?producer.interval, + ); + + // Set up the collection of self statistics about this collection task. + let mut stats = self_stats::CollectionTaskStats::new(collector, &producer); + let mut self_collection_timer = interval(self_stats::COLLECTION_INTERVAL); + self_collection_timer.tick().await; + + loop { + tokio::select! { + message = inbox.recv() => { + match message { + None => { + debug!(log, "collection task inbox closed, shutting down"); + return; + } + Some(CollectionMessage::Shutdown) => { + debug!(log, "collection task received shutdown request"); + return; + }, + Some(CollectionMessage::Collect(token)) => { + debug!(log, "collection task received explicit request to collect"); + perform_collection(&log, &mut stats, &client, &producer, &outbox, Some(token)).await; + }, + Some(CollectionMessage::Update(new_info)) => { + producer = new_info; + debug!( + log, + "collection task received request to update its producer information"; + "interval" => ?producer.interval, + "address" => producer.address, + ); + collection_timer = interval(producer.interval); + collection_timer.tick().await; // completes immediately + } + #[cfg(test)] + Some(CollectionMessage::Statistics { reply_tx }) => { + debug!( + log, + "received request for current task statistics" + ); + reply_tx.send(stats.clone()).expect("failed to send statistics"); + } + } + } + _ = self_collection_timer.tick() => { + debug!(log, "reporting oximeter self-collection statistics"); + outbox.send((None, stats.sample())).await.unwrap(); + } + _ = collection_timer.tick() => { + perform_collection(&log, &mut stats, &client, &producer, &outbox, None).await; + } + } + } +} + +// Struct representing a task for collecting metric data from a single producer +#[derive(Debug)] +struct CollectionTask { + // Channel used to send messages from the agent to the actual task. The task owns the other + // side. + pub inbox: mpsc::Sender, + // Handle to the actual tokio task running the collection loop. + #[allow(dead_code)] + pub task: JoinHandle<()>, +} + +// A task run by `oximeter` in standalone mode, which simply prints results as +// they're received. +async fn results_printer( + log: Logger, + mut rx: mpsc::Receiver<(Option, ProducerResults)>, +) { + loop { + match rx.recv().await { + Some((_, results)) => { + for res in results.into_iter() { + match res { + ProducerResultsItem::Ok(samples) => { + for sample in samples.into_iter() { + info!( + log, + ""; + "sample" => ?sample, + ); + } + } + ProducerResultsItem::Err(e) => { + error!( + log, + "received error from a producer"; + "err" => ?e, + ); + } + } + } + } + None => { + debug!(log, "result queue closed, exiting"); + return; + } + } + } +} + +// Aggregation point for all results, from all collection tasks. +async fn results_sink( + log: Logger, + client: Client, + batch_size: usize, + batch_interval: Duration, + mut rx: mpsc::Receiver<(Option, ProducerResults)>, +) { + let mut timer = interval(batch_interval); + timer.tick().await; // completes immediately + let mut batch = Vec::with_capacity(batch_size); + loop { + let mut collection_token = None; + let insert = tokio::select! { + _ = timer.tick() => { + if batch.is_empty() { + trace!(log, "batch interval expired, but no samples to insert"); + false + } else { + true + } + } + results = rx.recv() => { + match results { + Some((token, results)) => { + let flattened_results = { + let mut flattened = Vec::with_capacity(results.len()); + for inner_batch in results.into_iter() { + match inner_batch { + ProducerResultsItem::Ok(samples) => flattened.extend(samples.into_iter()), + ProducerResultsItem::Err(e) => { + debug!( + log, + "received error (not samples) from a producer: {}", + e.to_string() + ); + } + } + } + flattened + }; + batch.extend(flattened_results); + + collection_token = token; + if collection_token.is_some() { + true + } else { + batch.len() >= batch_size + } + } + None => { + warn!(log, "result queue closed, exiting"); + return; + } + } + } + }; + + if insert { + debug!(log, "inserting {} samples into database", batch.len()); + match client.insert_samples(&batch).await { + Ok(()) => trace!(log, "successfully inserted samples"), + Err(e) => { + warn!( + log, + "failed to insert some results into metric DB: {}", + e.to_string() + ); + } + } + // TODO-correctness The `insert_samples` call above may fail. The method itself needs + // better handling of partially-inserted results in that case, but we may need to retry + // or otherwise handle an error here as well. + // + // See https://github.com/oxidecomputer/omicron/issues/740 for a + // disucssion. + batch.clear(); + } + + if let Some(token) = collection_token { + let _ = token.send(()); + } + } +} + +/// The internal agent the oximeter server uses to collect metrics from producers. +#[derive(Debug)] +pub struct OximeterAgent { + /// The collector ID for this agent + pub id: Uuid, + log: Logger, + // Oximeter target used by this agent to produce metrics about itself. + collection_target: self_stats::OximeterCollector, + // Handle to the TX-side of a channel for collecting results from the collection tasks + result_sender: mpsc::Sender<(Option, ProducerResults)>, + // The actual tokio tasks running the collection on a timer. + collection_tasks: + Arc>>, +} + +impl OximeterAgent { + /// Construct a new agent with the given ID and logger. + pub async fn with_id( + id: Uuid, + address: SocketAddrV6, + db_config: DbConfig, + resolver: &Resolver, + log: &Logger, + ) -> Result { + let (result_sender, result_receiver) = mpsc::channel(8); + let log = log.new(o!( + "component" => "oximeter-agent", + "collector_id" => id.to_string(), + )); + let insertion_log = log.new(o!("component" => "results-sink")); + + // Construct the ClickHouse client first, propagate an error if we can't reach the + // database. + let db_address = if let Some(address) = db_config.address { + address + } else { + SocketAddr::new( + resolver.lookup_ip(ServiceName::Clickhouse).await?, + CLICKHOUSE_PORT, + ) + }; + + // Determine the version of the database. + // + // There are three cases + // + // - The database exists and is at the expected version. Continue in + // this case. + // + // - The database exists and is at a lower-than-expected version. We + // fail back to the caller here, which will retry indefinitely until the + // DB has been updated. + // + // - The DB doesn't exist at all. This reports a version number of 0. We + // need to create the DB here, at the latest version. This is used in + // fresh installations and tests. + let client = Client::new(db_address, &log); + match client.check_db_is_at_expected_version().await { + Ok(_) => {} + Err(oximeter_db::Error::DatabaseVersionMismatch { + found: 0, + .. + }) => { + debug!(log, "oximeter database does not exist, creating"); + let replicated = client.is_oximeter_cluster().await?; + client + .initialize_db_with_version( + replicated, + oximeter_db::OXIMETER_VERSION, + ) + .await?; + } + Err(e) => return Err(Error::from(e)), + } + + // Set up tracking of statistics about ourselves. + let collection_target = self_stats::OximeterCollector { + collector_id: id, + collector_ip: (*address.ip()).into(), + collector_port: address.port(), + }; + + // Spawn the task for aggregating and inserting all metrics + tokio::spawn(async move { + results_sink( + insertion_log, + client, + db_config.batch_size, + Duration::from_secs(db_config.batch_interval), + result_receiver, + ) + .await + }); + Ok(Self { + id, + log, + collection_target, + result_sender, + collection_tasks: Arc::new(Mutex::new(BTreeMap::new())), + }) + } + + /// Construct a new standalone `oximeter` collector. + /// + /// In this mode, `oximeter` can be used to test the collection of metrics + /// from producers, without requiring all the normal machinery of the + /// control plane. The collector is run as usual, but additionally starts a + /// API server to stand-in for Nexus. The registrations of the producers and + /// collectors occurs through the normal code path, but uses this mock Nexus + /// instead of the real thing. + pub async fn new_standalone( + id: Uuid, + address: SocketAddrV6, + db_config: Option, + log: &Logger, + ) -> Result { + let (result_sender, result_receiver) = mpsc::channel(8); + let log = log.new(o!( + "component" => "oximeter-standalone", + "collector_id" => id.to_string(), + )); + + // If we have configuration for ClickHouse, we'll spawn the results + // sink task as usual. If not, we'll spawn a dummy task that simply + // prints the results as they're received. + let insertion_log = log.new(o!("component" => "results-sink")); + if let Some(db_config) = db_config { + let Some(address) = db_config.address else { + return Err(Error::Standalone(anyhow!( + "Must provide explicit IP address in standalone mode" + ))); + }; + let client = Client::new(address, &log); + let replicated = client.is_oximeter_cluster().await?; + if !replicated { + client.init_single_node_db().await?; + } else { + client.init_replicated_db().await?; + } + + // Spawn the task for aggregating and inserting all metrics + tokio::spawn(async move { + results_sink( + insertion_log, + client, + db_config.batch_size, + Duration::from_secs(db_config.batch_interval), + result_receiver, + ) + .await + }); + } else { + tokio::spawn(results_printer(insertion_log, result_receiver)); + } + + // Set up tracking of statistics about ourselves. + let collection_target = self_stats::OximeterCollector { + collector_id: id, + collector_ip: (*address.ip()).into(), + collector_port: address.port(), + }; + Ok(Self { + id, + log, + collection_target, + result_sender, + collection_tasks: Arc::new(Mutex::new(BTreeMap::new())), + }) + } + + /// Register a new producer with this oximeter instance. + pub async fn register_producer( + &self, + info: ProducerEndpoint, + ) -> Result<(), Error> { + let id = info.id; + match self.collection_tasks.lock().await.entry(id) { + Entry::Vacant(value) => { + debug!( + self.log, + "registered new metric producer"; + "producer_id" => id.to_string(), + "address" => info.address, + ); + + // Build channel to control the task and receive results. + let (tx, rx) = mpsc::channel(4); + let q = self.result_sender.clone(); + let log = self.log.new(o!("component" => "collection-task", "producer_id" => id.to_string())); + let info_clone = info.clone(); + let target = self.collection_target; + let task = tokio::spawn(async move { + collection_task(log, target, info_clone, rx, q).await; + }); + value.insert((info, CollectionTask { inbox: tx, task })); + } + Entry::Occupied(mut value) => { + debug!( + self.log, + "received request to register existing metric \ + producer, updating collection information"; + "producer_id" => id.to_string(), + "interval" => ?info.interval, + "address" => info.address, + ); + value.get_mut().0 = info.clone(); + value + .get() + .1 + .inbox + .send(CollectionMessage::Update(info)) + .await + .unwrap(); + } + } + Ok(()) + } + + /// Forces a collection from all producers. + /// + /// Returns once all those values have been inserted into Clickhouse, + /// or an error occurs trying to perform the collection. + pub async fn force_collection(&self) { + let mut collection_oneshots = vec![]; + let collection_tasks = self.collection_tasks.lock().await; + for (_id, (_endpoint, task)) in collection_tasks.iter() { + let (tx, rx) = oneshot::channel(); + // Scrape from each producer, into oximeter... + task.inbox.send(CollectionMessage::Collect(tx)).await.unwrap(); + // ... and keep track of the token that indicates once the metric + // has made it into Clickhouse. + collection_oneshots.push(rx); + } + drop(collection_tasks); + + // Only return once all producers finish processing the token we + // provided. + // + // NOTE: This can either mean that the collection completed + // successfully, or an error occurred in the collection pathway. + futures::future::join_all(collection_oneshots).await; + } + + /// List existing producers. + pub async fn list_producers( + &self, + start_id: Option, + limit: usize, + ) -> Vec { + let start = if let Some(id) = start_id { + Bound::Excluded(id) + } else { + Bound::Unbounded + }; + self.collection_tasks + .lock() + .await + .range((start, Bound::Unbounded)) + .take(limit) + .map(|(_id, (info, _t))| info.clone()) + .collect() + } + + /// Delete a producer by ID, stopping its collection task. + pub async fn delete_producer(&self, id: Uuid) -> Result<(), Error> { + let (_info, task) = self + .collection_tasks + .lock() + .await + .remove(&id) + .ok_or_else(|| Error::NoSuchProducer(id))?; + debug!( + self.log, + "removed collection task from set"; + "producer_id" => %id, + ); + match task.inbox.send(CollectionMessage::Shutdown).await { + Ok(_) => debug!( + self.log, + "shut down collection task"; + "producer_id" => %id, + ), + Err(e) => error!( + self.log, + "failed to shut down collection task"; + "producer_id" => %id, + "error" => ?e, + ), + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::CollectionMessage; + use super::OximeterAgent; + use super::ProducerEndpoint; + use crate::self_stats::FailureReason; + use hyper::service::make_service_fn; + use hyper::service::service_fn; + use hyper::Body; + use hyper::Request; + use hyper::Response; + use hyper::Server; + use hyper::StatusCode; + use omicron_test_utils::dev::test_setup_log; + use std::convert::Infallible; + use std::net::Ipv6Addr; + use std::net::SocketAddr; + use std::net::SocketAddrV6; + use std::time::Duration; + use tokio::sync::oneshot; + use tokio::time::Instant; + use uuid::Uuid; + + // Test that we count successful collections from a target correctly. + #[tokio::test] + async fn test_self_stat_collection_count() { + let logctx = test_setup_log("test_self_stat_collection_count"); + let log = &logctx.log; + + // Spawn an oximeter collector ... + let collector = OximeterAgent::new_standalone( + Uuid::new_v4(), + SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0), + None, + log, + ) + .await + .unwrap(); + + // And a dummy server that will always report empty statistics. There + // will be no actual data here, but the sample counter will increment. + let addr = + SocketAddr::V6(SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0)); + async fn handler( + _: Request, + ) -> Result, Infallible> { + Ok(Response::new(Body::from("[]"))) + } + let make_svc = make_service_fn(|_conn| async { + Ok::<_, Infallible>(service_fn(handler)) + }); + let server = Server::bind(&addr).serve(make_svc); + let address = server.local_addr(); + let _task = tokio::task::spawn(server); + + // Register the dummy producer. + let interval = Duration::from_secs(1); + let endpoint = ProducerEndpoint { + id: Uuid::new_v4(), + address, + base_route: String::from("/"), + interval, + }; + collector + .register_producer(endpoint) + .await + .expect("failed to register dummy producer"); + + // Step time until there has been exactly `N_COLLECTIONS` collections. + tokio::time::pause(); + let now = Instant::now(); + const N_COLLECTIONS: usize = 5; + let wait_for = interval * N_COLLECTIONS as u32 + interval / 2; + while now.elapsed() < wait_for { + tokio::time::advance(interval / 10).await; + } + + // Request the statistics from the task itself. + let (reply_tx, rx) = oneshot::channel(); + collector + .collection_tasks + .lock() + .await + .values() + .next() + .unwrap() + .1 + .inbox + .send(CollectionMessage::Statistics { reply_tx }) + .await + .expect("failed to request statistics from task"); + let stats = rx.await.expect("failed to receive statistics from task"); + assert_eq!(stats.collections.datum.value(), N_COLLECTIONS as u64); + assert!(stats.failed_collections.is_empty()); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_self_stat_unreachable_counter() { + let logctx = test_setup_log("test_self_stat_unreachable_counter"); + let log = &logctx.log; + + // Spawn an oximeter collector ... + let collector = OximeterAgent::new_standalone( + Uuid::new_v4(), + SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0), + None, + log, + ) + .await + .unwrap(); + + // Register a bogus producer, which is equivalent to a producer that is + // unreachable. + let interval = Duration::from_secs(1); + let endpoint = ProducerEndpoint { + id: Uuid::new_v4(), + address: SocketAddr::V6(SocketAddrV6::new( + Ipv6Addr::LOCALHOST, + 0, + 0, + 0, + )), + base_route: String::from("/"), + interval, + }; + collector + .register_producer(endpoint) + .await + .expect("failed to register bogus producer"); + + // Step time until there has been exactly `N_COLLECTIONS` collections. + tokio::time::pause(); + let now = Instant::now(); + const N_COLLECTIONS: usize = 5; + let wait_for = interval * N_COLLECTIONS as u32 + interval / 2; + while now.elapsed() < wait_for { + tokio::time::advance(interval / 10).await; + } + + // Request the statistics from the task itself. + let (reply_tx, rx) = oneshot::channel(); + collector + .collection_tasks + .lock() + .await + .values() + .next() + .unwrap() + .1 + .inbox + .send(CollectionMessage::Statistics { reply_tx }) + .await + .expect("failed to request statistics from task"); + let stats = rx.await.expect("failed to receive statistics from task"); + assert_eq!(stats.collections.datum.value(), 0); + assert_eq!( + stats + .failed_collections + .get(&FailureReason::Unreachable) + .unwrap() + .datum + .value(), + N_COLLECTIONS as u64 + ); + assert_eq!(stats.failed_collections.len(), 1); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_self_stat_error_counter() { + let logctx = test_setup_log("test_self_stat_error_counter"); + let log = &logctx.log; + + // Spawn an oximeter collector ... + let collector = OximeterAgent::new_standalone( + Uuid::new_v4(), + SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0), + None, + log, + ) + .await + .unwrap(); + + // And a dummy server that will always fail with a 500. + let addr = + SocketAddr::V6(SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0)); + async fn handler( + _: Request, + ) -> Result, Infallible> { + let mut res = Response::new(Body::from("im ded")); + *res.status_mut() = StatusCode::INTERNAL_SERVER_ERROR; + Ok(res) + } + let make_svc = make_service_fn(|_conn| async { + Ok::<_, Infallible>(service_fn(handler)) + }); + let server = Server::bind(&addr).serve(make_svc); + let address = server.local_addr(); + let _task = tokio::task::spawn(server); + + // Register the rather flaky producer. + let interval = Duration::from_secs(1); + let endpoint = ProducerEndpoint { + id: Uuid::new_v4(), + address, + base_route: String::from("/"), + interval, + }; + collector + .register_producer(endpoint) + .await + .expect("failed to register flaky producer"); + + // Step time until there has been exactly `N_COLLECTIONS` collections. + tokio::time::pause(); + let now = Instant::now(); + const N_COLLECTIONS: usize = 5; + let wait_for = interval * N_COLLECTIONS as u32 + interval / 2; + while now.elapsed() < wait_for { + tokio::time::advance(interval / 10).await; + } + + // Request the statistics from the task itself. + let (reply_tx, rx) = oneshot::channel(); + collector + .collection_tasks + .lock() + .await + .values() + .next() + .unwrap() + .1 + .inbox + .send(CollectionMessage::Statistics { reply_tx }) + .await + .expect("failed to request statistics from task"); + let stats = rx.await.expect("failed to receive statistics from task"); + assert_eq!(stats.collections.datum.value(), 0); + assert_eq!( + stats + .failed_collections + .get(&FailureReason::Other(StatusCode::INTERNAL_SERVER_ERROR)) + .unwrap() + .datum + .value(), + N_COLLECTIONS as u64 + ); + assert_eq!(stats.failed_collections.len(), 1); + logctx.cleanup_successful(); + } +} diff --git a/oximeter/collector/src/bin/clickhouse-schema-updater.rs b/oximeter/collector/src/bin/clickhouse-schema-updater.rs new file mode 100644 index 0000000000..20780c37e0 --- /dev/null +++ b/oximeter/collector/src/bin/clickhouse-schema-updater.rs @@ -0,0 +1,126 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! CLI tool to apply offline updates to ClickHouse schema. + +// Copyright 2023 Oxide Computer Company + +use anyhow::anyhow; +use anyhow::Context; +use camino::Utf8PathBuf; +use clap::Parser; +use clap::Subcommand; +use omicron_common::address::CLICKHOUSE_PORT; +use oximeter_db::model::OXIMETER_VERSION; +use oximeter_db::Client; +use slog::Drain; +use slog::Level; +use slog::LevelFilter; +use slog::Logger; +use std::net::Ipv6Addr; +use std::net::SocketAddr; +use std::net::SocketAddrV6; + +const DEFAULT_HOST: SocketAddr = SocketAddr::V6(SocketAddrV6::new( + Ipv6Addr::LOCALHOST, + CLICKHOUSE_PORT, + 0, + 0, +)); + +fn parse_log_level(s: &str) -> anyhow::Result { + s.parse().map_err(|_| anyhow!("Invalid log level")) +} + +/// Tool to apply offline updates to ClickHouse schema. +#[derive(Clone, Debug, Parser)] +struct Args { + /// IP address and port at which to access ClickHouse. + #[arg(long, default_value_t = DEFAULT_HOST, env = "CLICKHOUSE_HOST")] + host: SocketAddr, + + /// Directory from which to read schema files for each version. + #[arg( + short = 's', + long, + default_value_t = Utf8PathBuf::from("/opt/oxide/oximeter/schema") + )] + schema_directory: Utf8PathBuf, + + /// The log level while running the command. + #[arg( + short, + long, + value_parser = parse_log_level, + default_value_t = Level::Warning + )] + log_level: Level, + + #[command(subcommand)] + cmd: Cmd, +} + +#[derive(Clone, Debug, Subcommand)] +enum Cmd { + /// List all schema in the directory available for an upgrade + #[clap(visible_alias = "ls")] + List, + /// Apply an upgrade to a specific version + #[clap(visible_aliases = ["up", "apply"])] + Upgrade { + /// The version to which to upgrade. + #[arg(default_value_t = OXIMETER_VERSION)] + version: u64, + }, +} + +fn build_logger(level: Level) -> Logger { + let decorator = slog_term::TermDecorator::new().build(); + let drain = slog_term::FullFormat::new(decorator).build().fuse(); + let drain = slog_async::Async::new(drain).build().fuse(); + let drain = LevelFilter::new(drain, level).fuse(); + Logger::root(drain, slog::o!("unit" => "clickhouse_schema_updater")) +} + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + let args = Args::parse(); + let log = build_logger(args.log_level); + let client = Client::new(args.host, &log); + let is_replicated = client.is_oximeter_cluster().await?; + match args.cmd { + Cmd::List => { + let latest = client + .read_latest_version() + .await + .context("Failed to read latest version")?; + let available_versions = Client::read_available_schema_versions( + &log, + is_replicated, + &args.schema_directory, + ) + .await?; + println!("Latest version: {latest}"); + println!("Available versions:"); + for ver in available_versions { + print!(" {ver}"); + if ver == latest { + print!(" (reported by database)"); + } + if ver == OXIMETER_VERSION { + print!(" (expected by oximeter)"); + } + println!(); + } + } + Cmd::Upgrade { version } => { + client + .ensure_schema(is_replicated, version, args.schema_directory) + .await + .context("Failed to upgrade schema")?; + println!("Upgrade to oximeter database version {version} complete"); + } + } + Ok(()) +} diff --git a/oximeter/collector/src/http_entrypoints.rs b/oximeter/collector/src/http_entrypoints.rs new file mode 100644 index 0000000000..493083a40d --- /dev/null +++ b/oximeter/collector/src/http_entrypoints.rs @@ -0,0 +1,133 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Oximeter collector server HTTP API + +// Copyright 2023 Oxide Computer Company + +use crate::OximeterAgent; +use dropshot::endpoint; +use dropshot::ApiDescription; +use dropshot::EmptyScanParams; +use dropshot::HttpError; +use dropshot::HttpResponseDeleted; +use dropshot::HttpResponseOk; +use dropshot::HttpResponseUpdatedNoContent; +use dropshot::PaginationParams; +use dropshot::Query; +use dropshot::RequestContext; +use dropshot::ResultsPage; +use dropshot::TypedBody; +use dropshot::WhichPage; +use omicron_common::api::internal::nexus::ProducerEndpoint; +use schemars::JsonSchema; +use serde::Deserialize; +use serde::Serialize; +use std::sync::Arc; +use uuid::Uuid; + +// Build the HTTP API internal to the control plane +pub fn oximeter_api() -> ApiDescription> { + let mut api = ApiDescription::new(); + api.register(producers_post) + .expect("Could not register producers_post API handler"); + api.register(producers_list) + .expect("Could not register producers_list API handler"); + api.register(producer_delete) + .expect("Could not register producers_delete API handler"); + api.register(collector_info) + .expect("Could not register collector_info API handler"); + api +} + +// Handle a request from Nexus to register a new producer with this collector. +#[endpoint { + method = POST, + path = "/producers", +}] +async fn producers_post( + request_context: RequestContext>, + body: TypedBody, +) -> Result { + let agent = request_context.context(); + let producer_info = body.into_inner(); + agent + .register_producer(producer_info) + .await + .map_err(HttpError::from) + .map(|_| HttpResponseUpdatedNoContent()) +} + +// Parameters for paginating the list of producers. +#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] +struct ProducerPage { + id: Uuid, +} + +// List all producers +#[endpoint { + method = GET, + path = "/producers", +}] +async fn producers_list( + request_context: RequestContext>, + query: Query>, +) -> Result>, HttpError> { + let agent = request_context.context(); + let pagination = query.into_inner(); + let limit = request_context.page_limit(&pagination)?.get() as usize; + let start = match &pagination.page { + WhichPage::First(..) => None, + WhichPage::Next(ProducerPage { id }) => Some(*id), + }; + let producers = agent.list_producers(start, limit).await; + ResultsPage::new( + producers, + &EmptyScanParams {}, + |info: &ProducerEndpoint, _| ProducerPage { id: info.id }, + ) + .map(HttpResponseOk) +} + +#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] +struct ProducerIdPathParams { + producer_id: Uuid, +} + +// Delete a producer by ID. +#[endpoint { + method = DELETE, + path = "/producers/{producer_id}", +}] +async fn producer_delete( + request_context: RequestContext>, + path: dropshot::Path, +) -> Result { + let agent = request_context.context(); + let producer_id = path.into_inner().producer_id; + agent + .delete_producer(producer_id) + .await + .map_err(HttpError::from) + .map(|_| HttpResponseDeleted()) +} + +#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] +pub struct CollectorInfo { + /// The collector's UUID. + pub id: Uuid, +} + +// Return identifying information about this collector +#[endpoint { + method = GET, + path = "/info", +}] +async fn collector_info( + request_context: RequestContext>, +) -> Result, HttpError> { + let agent = request_context.context(); + let info = CollectorInfo { id: agent.id }; + Ok(HttpResponseOk(info)) +} diff --git a/oximeter/collector/src/lib.rs b/oximeter/collector/src/lib.rs index b7a14cec45..f3c793d5c2 100644 --- a/oximeter/collector/src/lib.rs +++ b/oximeter/collector/src/lib.rs @@ -6,65 +6,41 @@ // Copyright 2023 Oxide Computer Company -use anyhow::anyhow; -use anyhow::Context; -use dropshot::endpoint; -use dropshot::ApiDescription; use dropshot::ConfigDropshot; use dropshot::ConfigLogging; -use dropshot::EmptyScanParams; use dropshot::HttpError; -use dropshot::HttpResponseDeleted; -use dropshot::HttpResponseOk; -use dropshot::HttpResponseUpdatedNoContent; use dropshot::HttpServer; use dropshot::HttpServerStarter; -use dropshot::PaginationParams; -use dropshot::Query; -use dropshot::RequestContext; -use dropshot::ResultsPage; -use dropshot::TypedBody; -use dropshot::WhichPage; use internal_dns::resolver::ResolveError; use internal_dns::resolver::Resolver; use internal_dns::ServiceName; -use omicron_common::address::CLICKHOUSE_PORT; use omicron_common::address::NEXUS_INTERNAL_PORT; use omicron_common::api::internal::nexus::ProducerEndpoint; use omicron_common::backoff; use omicron_common::FileKv; -use oximeter::types::ProducerResults; -use oximeter::types::ProducerResultsItem; -use oximeter_db::model::OXIMETER_VERSION; -use oximeter_db::Client; -use oximeter_db::DbWrite; use serde::Deserialize; use serde::Serialize; use slog::debug; use slog::error; use slog::info; use slog::o; -use slog::trace; use slog::warn; use slog::Drain; use slog::Logger; -use std::collections::btree_map::Entry; -use std::collections::BTreeMap; use std::net::SocketAddr; use std::net::SocketAddrV6; -use std::ops::Bound; use std::path::Path; use std::sync::Arc; -use std::time::Duration; use thiserror::Error; -use tokio::sync::mpsc; -use tokio::sync::oneshot; -use tokio::sync::Mutex; -use tokio::task::JoinHandle; -use tokio::time::interval; use uuid::Uuid; +mod agent; +mod http_entrypoints; +mod self_stats; mod standalone; + +pub use agent::OximeterAgent; +pub use http_entrypoints::oximeter_api; pub use standalone::standalone_nexus_api; pub use standalone::Server as StandaloneNexus; @@ -102,289 +78,6 @@ impl From for HttpError { } } -/// A simple representation of a producer, used mostly for standalone mode. -/// -/// These are usually specified as a structured string, formatted like: -/// `"@
"`. -#[derive(Copy, Clone, Debug)] -pub struct ProducerInfo { - /// The ID of the producer. - pub id: Uuid, - /// The address on which the producer listens. - pub address: SocketAddr, -} - -impl std::str::FromStr for ProducerInfo { - type Err = anyhow::Error; - fn from_str(s: &str) -> Result { - let (id, addr) = s - .split_once('@') - .context("Producer info should written as @
")?; - let id = id.parse().context("Invalid UUID")?; - let address = addr.parse().context("Invalid address")?; - Ok(Self { id, address }) - } -} - -type CollectionToken = oneshot::Sender<()>; - -// Messages for controlling a collection task -#[derive(Debug)] -enum CollectionMessage { - // Explicit request that the task collect data from its producer - // - // Also sends a oneshot that is signalled once the task scrapes - // data from the Producer, and places it in the Clickhouse server. - Collect(CollectionToken), - // Request that the task update its interval and the socket address on which it collects data - // from its producer. - Update(ProducerEndpoint), - // Request that the task exit - Shutdown, -} - -async fn perform_collection( - log: &Logger, - client: &reqwest::Client, - producer: &ProducerEndpoint, - outbox: &mpsc::Sender<(Option, ProducerResults)>, - token: Option, -) { - debug!(log, "collecting from producer"); - let res = client - .get(format!( - "http://{}{}", - producer.address, - producer.collection_route() - )) - .send() - .await; - match res { - Ok(res) => { - if res.status().is_success() { - match res.json::().await { - Ok(results) => { - debug!( - log, - "collected {} total results", - results.len(); - ); - outbox.send((token, results)).await.unwrap(); - } - Err(e) => { - warn!( - log, - "failed to collect results from producer: {}", - e.to_string(); - ); - } - } - } else { - warn!( - log, - "failed to receive metric results from producer"; - "status_code" => res.status().as_u16(), - ); - } - } - Err(e) => { - warn!( - log, - "failed to send collection request to producer: {}", - e.to_string(); - ); - } - } -} - -// Background task used to collect metrics from one producer on an interval. -// -// This function is started by the `OximeterAgent`, when a producer is registered. The task loops -// endlessly, and collects metrics from the assigned producer on a timeout. The assigned agent can -// also send a `CollectionMessage`, for example to update the collection interval. This is not -// currently used, but will likely be exposed via control plane interfaces in the future. -async fn collection_task( - log: Logger, - mut producer: ProducerEndpoint, - mut inbox: mpsc::Receiver, - outbox: mpsc::Sender<(Option, ProducerResults)>, -) { - let client = reqwest::Client::new(); - let mut collection_timer = interval(producer.interval); - collection_timer.tick().await; // completes immediately - debug!( - log, - "starting oximeter collection task"; - "interval" => ?producer.interval, - ); - - loop { - tokio::select! { - message = inbox.recv() => { - match message { - None => { - debug!(log, "collection task inbox closed, shutting down"); - return; - } - Some(CollectionMessage::Shutdown) => { - debug!(log, "collection task received shutdown request"); - return; - }, - Some(CollectionMessage::Collect(token)) => { - debug!(log, "collection task received explicit request to collect"); - perform_collection(&log, &client, &producer, &outbox, Some(token)).await; - }, - Some(CollectionMessage::Update(new_info)) => { - producer = new_info; - debug!( - log, - "collection task received request to update its producer information"; - "interval" => ?producer.interval, - "address" => producer.address, - ); - collection_timer = interval(producer.interval); - collection_timer.tick().await; // completes immediately - } - } - } - _ = collection_timer.tick() => { - perform_collection(&log, &client, &producer, &outbox, None).await; - } - } - } -} - -// Struct representing a task for collecting metric data from a single producer -#[derive(Debug)] -struct CollectionTask { - // Channel used to send messages from the agent to the actual task. The task owns the other - // side. - pub inbox: mpsc::Sender, - // Handle to the actual tokio task running the collection loop. - #[allow(dead_code)] - pub task: JoinHandle<()>, -} - -// A task run by `oximeter` in standalone mode, which simply prints results as -// they're received. -async fn results_printer( - log: Logger, - mut rx: mpsc::Receiver<(Option, ProducerResults)>, -) { - loop { - match rx.recv().await { - Some((_, results)) => { - for res in results.into_iter() { - match res { - ProducerResultsItem::Ok(samples) => { - for sample in samples.into_iter() { - info!( - log, - ""; - "sample" => ?sample, - ); - } - } - ProducerResultsItem::Err(e) => { - error!( - log, - "received error from a producer"; - "err" => ?e, - ); - } - } - } - } - None => { - debug!(log, "result queue closed, exiting"); - return; - } - } - } -} - -// Aggregation point for all results, from all collection tasks. -async fn results_sink( - log: Logger, - client: Client, - batch_size: usize, - batch_interval: Duration, - mut rx: mpsc::Receiver<(Option, ProducerResults)>, -) { - let mut timer = interval(batch_interval); - timer.tick().await; // completes immediately - let mut batch = Vec::with_capacity(batch_size); - loop { - let mut collection_token = None; - let insert = tokio::select! { - _ = timer.tick() => { - if batch.is_empty() { - trace!(log, "batch interval expired, but no samples to insert"); - false - } else { - true - } - } - results = rx.recv() => { - match results { - Some((token, results)) => { - let flattened_results = { - let mut flattened = Vec::with_capacity(results.len()); - for inner_batch in results.into_iter() { - match inner_batch { - ProducerResultsItem::Ok(samples) => flattened.extend(samples.into_iter()), - ProducerResultsItem::Err(e) => { - debug!( - log, - "received error (not samples) from a producer: {}", - e.to_string() - ); - } - } - } - flattened - }; - batch.extend(flattened_results); - - collection_token = token; - if collection_token.is_some() { - true - } else { - batch.len() >= batch_size - } - } - None => { - warn!(log, "result queue closed, exiting"); - return; - } - } - } - }; - - if insert { - debug!(log, "inserting {} samples into database", batch.len()); - match client.insert_samples(&batch).await { - Ok(()) => trace!(log, "successfully inserted samples"), - Err(e) => { - warn!( - log, - "failed to insert some results into metric DB: {}", - e.to_string() - ); - } - } - // TODO-correctness The `insert_samples` call above may fail. The method itself needs - // better handling of partially-inserted results in that case, but we may need to retry - // or otherwise handle an error here as well. - batch.clear(); - } - - if let Some(token) = collection_token { - let _ = token.send(()); - } - } -} - /// Configuration for interacting with the metric database. #[derive(Debug, Clone, Copy, Deserialize, Serialize)] pub struct DbConfig { @@ -403,7 +96,12 @@ pub struct DbConfig { } impl DbConfig { + /// Default number of samples to wait for before inserting a batch into + /// ClickHouse. pub const DEFAULT_BATCH_SIZE: usize = 1000; + + /// Default number of seconds to wait before inserting a batch into + /// ClickHouse. pub const DEFAULT_BATCH_INTERVAL: u64 = 5; // Construct config with an address, using the defaults for other fields @@ -416,244 +114,6 @@ impl DbConfig { } } -/// The internal agent the oximeter server uses to collect metrics from producers. -#[derive(Debug)] -pub struct OximeterAgent { - /// The collector ID for this agent - pub id: Uuid, - log: Logger, - // Handle to the TX-side of a channel for collecting results from the collection tasks - result_sender: mpsc::Sender<(Option, ProducerResults)>, - // The actual tokio tasks running the collection on a timer. - collection_tasks: - Arc>>, -} - -impl OximeterAgent { - /// Construct a new agent with the given ID and logger. - pub async fn with_id( - id: Uuid, - db_config: DbConfig, - resolver: &Resolver, - log: &Logger, - ) -> Result { - let (result_sender, result_receiver) = mpsc::channel(8); - let log = log.new(o!( - "component" => "oximeter-agent", - "collector_id" => id.to_string(), - )); - let insertion_log = log.new(o!("component" => "results-sink")); - - // Construct the ClickHouse client first, propagate an error if we can't reach the - // database. - let db_address = if let Some(address) = db_config.address { - address - } else { - SocketAddr::new( - resolver.lookup_ip(ServiceName::Clickhouse).await?, - CLICKHOUSE_PORT, - ) - }; - let client = Client::new(db_address, &log); - let replicated = client.is_oximeter_cluster().await?; - client.initialize_db_with_version(replicated, OXIMETER_VERSION).await?; - - // Spawn the task for aggregating and inserting all metrics - tokio::spawn(async move { - results_sink( - insertion_log, - client, - db_config.batch_size, - Duration::from_secs(db_config.batch_interval), - result_receiver, - ) - .await - }); - Ok(Self { - id, - log, - result_sender, - collection_tasks: Arc::new(Mutex::new(BTreeMap::new())), - }) - } - - /// Construct a new standalone `oximeter` collector. - pub async fn new_standalone( - id: Uuid, - db_config: Option, - log: &Logger, - ) -> Result { - let (result_sender, result_receiver) = mpsc::channel(8); - let log = log.new(o!( - "component" => "oximeter-standalone", - "collector_id" => id.to_string(), - )); - - // If we have configuration for ClickHouse, we'll spawn the results - // sink task as usual. If not, we'll spawn a dummy task that simply - // prints the results as they're received. - let insertion_log = log.new(o!("component" => "results-sink")); - if let Some(db_config) = db_config { - let Some(address) = db_config.address else { - return Err(Error::Standalone(anyhow!( - "Must provide explicit IP address in standalone mode" - ))); - }; - let client = Client::new(address, &log); - let replicated = client.is_oximeter_cluster().await?; - if !replicated { - client.init_single_node_db().await?; - } else { - client.init_replicated_db().await?; - } - - // Spawn the task for aggregating and inserting all metrics - tokio::spawn(async move { - results_sink( - insertion_log, - client, - db_config.batch_size, - Duration::from_secs(db_config.batch_interval), - result_receiver, - ) - .await - }); - } else { - tokio::spawn(results_printer(insertion_log, result_receiver)); - } - - // Construct the ClickHouse client first, propagate an error if we can't reach the - // database. - Ok(Self { - id, - log, - result_sender, - collection_tasks: Arc::new(Mutex::new(BTreeMap::new())), - }) - } - - /// Register a new producer with this oximeter instance. - pub async fn register_producer( - &self, - info: ProducerEndpoint, - ) -> Result<(), Error> { - let id = info.id; - match self.collection_tasks.lock().await.entry(id) { - Entry::Vacant(value) => { - debug!( - self.log, - "registered new metric producer"; - "producer_id" => id.to_string(), - "address" => info.address, - ); - - // Build channel to control the task and receive results. - let (tx, rx) = mpsc::channel(4); - let q = self.result_sender.clone(); - let log = self.log.new(o!("component" => "collection-task", "producer_id" => id.to_string())); - let info_clone = info.clone(); - let task = tokio::spawn(async move { - collection_task(log, info_clone, rx, q).await; - }); - value.insert((info, CollectionTask { inbox: tx, task })); - } - Entry::Occupied(mut value) => { - debug!( - self.log, - "received request to register existing metric \ - producer, updating collection information"; - "producer_id" => id.to_string(), - "interval" => ?info.interval, - "address" => info.address, - ); - value.get_mut().0 = info.clone(); - value - .get() - .1 - .inbox - .send(CollectionMessage::Update(info)) - .await - .unwrap(); - } - } - Ok(()) - } - - /// Forces a collection from all producers. - /// - /// Returns once all those values have been inserted into Clickhouse, - /// or an error occurs trying to perform the collection. - pub async fn force_collection(&self) { - let mut collection_oneshots = vec![]; - let collection_tasks = self.collection_tasks.lock().await; - for (_id, (_endpoint, task)) in collection_tasks.iter() { - let (tx, rx) = oneshot::channel(); - // Scrape from each producer, into oximeter... - task.inbox.send(CollectionMessage::Collect(tx)).await.unwrap(); - // ... and keep track of the token that indicates once the metric - // has made it into Clickhouse. - collection_oneshots.push(rx); - } - drop(collection_tasks); - - // Only return once all producers finish processing the token we - // provided. - // - // NOTE: This can either mean that the collection completed - // successfully, or an error occurred in the collection pathway. - futures::future::join_all(collection_oneshots).await; - } - - /// List existing producers. - pub async fn list_producers( - &self, - start_id: Option, - limit: usize, - ) -> Vec { - let start = if let Some(id) = start_id { - Bound::Excluded(id) - } else { - Bound::Unbounded - }; - self.collection_tasks - .lock() - .await - .range((start, Bound::Unbounded)) - .take(limit) - .map(|(_id, (info, _t))| info.clone()) - .collect() - } - - /// Delete a producer by ID, stopping its collection task. - pub async fn delete_producer(&self, id: Uuid) -> Result<(), Error> { - let (_info, task) = self - .collection_tasks - .lock() - .await - .remove(&id) - .ok_or_else(|| Error::NoSuchProducer(id))?; - debug!( - self.log, - "removed collection task from set"; - "producer_id" => %id, - ); - match task.inbox.send(CollectionMessage::Shutdown).await { - Ok(_) => debug!( - self.log, - "shut down collection task"; - "producer_id" => %id, - ), - Err(e) => error!( - self.log, - "failed to shut down collection task"; - "producer_id" => %id, - "error" => ?e, - ), - } - Ok(()) - } -} - /// Configuration used to initialize an oximeter server #[derive(Clone, Debug, Deserialize, Serialize)] pub struct Config { @@ -712,6 +172,9 @@ impl Oximeter { /// /// This can be used to override / ignore the logging configuration in /// `config`, using `log` instead. + /// + /// Note that this blocks until the ClickHouse database is available **and + /// at the expected version**. pub async fn with_logger( config: &Config, args: &OximeterArguments, @@ -736,14 +199,21 @@ impl Oximeter { let make_agent = || async { debug!(log, "creating ClickHouse client"); Ok(Arc::new( - OximeterAgent::with_id(args.id, config.db, &resolver, &log) - .await?, + OximeterAgent::with_id( + args.id, + args.address, + config.db, + &resolver, + &log, + ) + .await?, )) }; let log_client_failure = |error, delay| { warn!( log, - "failed to initialize ClickHouse database, will retry in {:?}", delay; + "failed to create ClickHouse client"; + "retry_after" => ?delay, "error" => ?error, ); }; @@ -825,7 +295,13 @@ impl Oximeter { ) -> Result { let db_config = clickhouse.map(DbConfig::with_address); let agent = Arc::new( - OximeterAgent::new_standalone(args.id, db_config, &log).await?, + OximeterAgent::new_standalone( + args.id, + args.address, + db_config, + &log, + ) + .await?, ); let dropshot_log = log.new(o!("component" => "dropshot")); @@ -908,108 +384,3 @@ impl Oximeter { self.agent.delete_producer(id).await } } - -// Build the HTTP API internal to the control plane -pub fn oximeter_api() -> ApiDescription> { - let mut api = ApiDescription::new(); - api.register(producers_post) - .expect("Could not register producers_post API handler"); - api.register(producers_list) - .expect("Could not register producers_list API handler"); - api.register(producer_delete) - .expect("Could not register producers_delete API handler"); - api.register(collector_info) - .expect("Could not register collector_info API handler"); - api -} - -// Handle a request from Nexus to register a new producer with this collector. -#[endpoint { - method = POST, - path = "/producers", -}] -async fn producers_post( - request_context: RequestContext>, - body: TypedBody, -) -> Result { - let agent = request_context.context(); - let producer_info = body.into_inner(); - agent - .register_producer(producer_info) - .await - .map_err(HttpError::from) - .map(|_| HttpResponseUpdatedNoContent()) -} - -// Parameters for paginating the list of producers. -#[derive(Clone, Copy, Debug, Deserialize, schemars::JsonSchema, Serialize)] -struct ProducerPage { - id: Uuid, -} - -// List all producers -#[endpoint { - method = GET, - path = "/producers", -}] -async fn producers_list( - request_context: RequestContext>, - query: Query>, -) -> Result>, HttpError> { - let agent = request_context.context(); - let pagination = query.into_inner(); - let limit = request_context.page_limit(&pagination)?.get() as usize; - let start = match &pagination.page { - WhichPage::First(..) => None, - WhichPage::Next(ProducerPage { id }) => Some(*id), - }; - let producers = agent.list_producers(start, limit).await; - ResultsPage::new( - producers, - &EmptyScanParams {}, - |info: &ProducerEndpoint, _| ProducerPage { id: info.id }, - ) - .map(HttpResponseOk) -} - -#[derive(Clone, Copy, Debug, Deserialize, schemars::JsonSchema, Serialize)] -struct ProducerIdPathParams { - producer_id: Uuid, -} - -// Delete a producer by ID. -#[endpoint { - method = DELETE, - path = "/producers/{producer_id}", -}] -async fn producer_delete( - request_context: RequestContext>, - path: dropshot::Path, -) -> Result { - let agent = request_context.context(); - let producer_id = path.into_inner().producer_id; - agent - .delete_producer(producer_id) - .await - .map_err(HttpError::from) - .map(|_| HttpResponseDeleted()) -} - -#[derive(Clone, Copy, Debug, Deserialize, schemars::JsonSchema, Serialize)] -pub struct CollectorInfo { - /// The collector's UUID. - pub id: Uuid, -} - -// Return identifying information about this collector -#[endpoint { - method = GET, - path = "/info", -}] -async fn collector_info( - request_context: RequestContext>, -) -> Result, HttpError> { - let agent = request_context.context(); - let info = CollectorInfo { id: agent.id }; - Ok(HttpResponseOk(info)) -} diff --git a/oximeter/collector/src/self_stats.rs b/oximeter/collector/src/self_stats.rs new file mode 100644 index 0000000000..dd1701203e --- /dev/null +++ b/oximeter/collector/src/self_stats.rs @@ -0,0 +1,171 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Metrics oximeter reports about itself + +// Copyright 2023 Oxide Computer Company + +use crate::ProducerEndpoint; +use oximeter::types::Cumulative; +use oximeter::types::ProducerResultsItem; +use oximeter::Metric; +use oximeter::MetricsError; +use oximeter::Sample; +use oximeter::Target; +use reqwest::StatusCode; +use std::collections::BTreeMap; +use std::net::IpAddr; +use std::time::Duration; +use uuid::Uuid; + +/// The interval on which we report self statistics +pub const COLLECTION_INTERVAL: Duration = Duration::from_secs(60); + +/// A target representing a single oximeter collector. +#[derive(Clone, Copy, Debug, Target)] +pub struct OximeterCollector { + /// The collector's ID. + pub collector_id: Uuid, + /// The collector server's IP address. + pub collector_ip: IpAddr, + /// The collector server's port. + pub collector_port: u16, +} + +/// The number of successful collections from a single producer. +#[derive(Clone, Debug, Metric)] +pub struct Collections { + /// The producer's ID. + pub producer_id: Uuid, + /// The producer's IP address. + pub producer_ip: IpAddr, + /// The producer's port. + pub producer_port: u16, + /// The base route in the producer server used to collect metrics. + /// + /// The full route is `{base_route}/{producer_id}`. + pub base_route: String, + pub datum: Cumulative, +} + +/// Small enum to help understand why oximeter failed to collect from a +/// producer. +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +#[non_exhaustive] +pub enum FailureReason { + /// The producer could not be reached. + Unreachable, + /// Error during deserialization. + Deserialization, + /// Some other reason, which includes the status code. + Other(StatusCode), +} + +impl std::fmt::Display for FailureReason { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Self::Unreachable => write!(f, "unreachable"), + Self::Deserialization => write!(f, "deserialization"), + Self::Other(c) => write!(f, "{}", c.as_u16()), + } + } +} + +/// The number of failed collections from a single producer. +#[derive(Clone, Debug, Metric)] +pub struct FailedCollections { + /// The producer's ID. + pub producer_id: Uuid, + /// The producer's IP address. + pub producer_ip: IpAddr, + /// The producer's port. + pub producer_port: u16, + /// The base route in the producer server used to collect metrics. + /// + /// The full route is `{base_route}/{producer_id}`. + pub base_route: String, + /// The reason we could not collect. + // + // NOTE: This should always be generated through a `FailureReason`. + pub reason: String, + pub datum: Cumulative, +} + +/// Oximeter collection statistics maintained by each collection task. +#[derive(Clone, Debug)] +pub struct CollectionTaskStats { + pub collector: OximeterCollector, + pub collections: Collections, + pub failed_collections: BTreeMap, +} + +impl CollectionTaskStats { + pub fn new( + collector: OximeterCollector, + producer: &ProducerEndpoint, + ) -> Self { + Self { + collector, + collections: Collections { + producer_id: producer.id, + producer_ip: producer.address.ip(), + producer_port: producer.address.port(), + base_route: producer.base_route.clone(), + datum: Cumulative::new(0), + }, + failed_collections: BTreeMap::new(), + } + } + + pub fn failures_for_reason( + &mut self, + reason: FailureReason, + ) -> &mut FailedCollections { + self.failed_collections.entry(reason).or_insert_with(|| { + FailedCollections { + producer_id: self.collections.producer_id, + producer_ip: self.collections.producer_ip, + producer_port: self.collections.producer_port, + base_route: self.collections.base_route.clone(), + reason: reason.to_string(), + datum: Cumulative::new(0), + } + }) + } + + pub fn sample(&self) -> Vec { + fn to_item(res: Result) -> ProducerResultsItem { + match res { + Ok(s) => ProducerResultsItem::Ok(vec![s]), + Err(s) => ProducerResultsItem::Err(s), + } + } + let mut samples = Vec::with_capacity(1 + self.failed_collections.len()); + samples.push(to_item(Sample::new(&self.collector, &self.collections))); + samples.extend( + self.failed_collections + .values() + .map(|metric| to_item(Sample::new(&self.collector, metric))), + ); + samples + } +} + +#[cfg(test)] +mod tests { + use super::FailureReason; + use super::StatusCode; + + #[test] + fn test_failure_reason_serialization() { + let data = &[ + (FailureReason::Deserialization, "deserialization"), + (FailureReason::Unreachable, "unreachable"), + (FailureReason::Other(StatusCode::INTERNAL_SERVER_ERROR), "500"), + ]; + for (variant, as_str) in data.iter() { + assert_eq!(variant.to_string(), *as_str); + } + } +} diff --git a/oximeter/db/Cargo.toml b/oximeter/db/Cargo.toml index d37c57ccce..4d53869d0d 100644 --- a/oximeter/db/Cargo.toml +++ b/oximeter/db/Cargo.toml @@ -10,10 +10,12 @@ anyhow.workspace = true async-trait.workspace = true bcs.workspace = true bytes = { workspace = true, features = [ "serde" ] } +camino.workspace = true chrono.workspace = true clap.workspace = true dropshot.workspace = true highway.workspace = true +omicron-common.workspace = true oximeter.workspace = true regex.workspace = true reqwest = { workspace = true, features = [ "json" ] } @@ -35,6 +37,7 @@ itertools.workspace = true omicron-test-utils.workspace = true slog-dtrace.workspace = true strum.workspace = true +tempfile.workspace = true [[bin]] name = "oxdb" diff --git a/oximeter/db/schema/README.md b/oximeter/db/schema/README.md new file mode 100644 index 0000000000..2f1633138d --- /dev/null +++ b/oximeter/db/schema/README.md @@ -0,0 +1,39 @@ +# ClickHouse schema files + +This directory contains the SQL files for different versions of the ClickHouse +timeseries database used by `oximeter`. In general, schema are expected to be +applied while the database is online, but no other clients exist. This is +similar to the current situation for _offline upgrade_ we use when updating the +main control plane database in CockroachDB. + +## Constraints, or why ClickHouse is weird + +While this tool is modeled after the mechanism for applying updates in +CockroachDB, ClickHouse is a significantly different DBMS. There are no +transactions; no unique primary keys; a single DB server can house both +replicated and single-node tables. This means we need to be pretty careful when +updating the schema. Changes must be idempotent, as with the CRDB schema, but at +this point we do not support inserting or modifying data at all. + +Similar to the CRDB offline update tool, we assume no non-update modifications +of the database are running concurrently. However, given ClickHouse's lack of +transactions, we actually require that there are no writes of any kind. In +practice, this means `oximeter` **must not** be running when this is called. +Similarly, there must be only a single instance of this program at a time. + +To run this program: + +- Ensure the ClickHouse server is running, and grab its IP address; + ```bash + $ pfexec zlogin oxz_clickhouse_e449eb80-3371-40a6-a316-d6e64b039357 'ipadm show-addr -o addrobj,addr | grep omicron6' + oxControlService20/omicron6 fd00:1122:3344:101::e/64 + ``` +- Log into the `oximeter` zone, `zlogin oxz_oximeter_` +- Run this tool, pointing it at the desired schema directory, e.g.: + +```bash +# /opt/oxide/oximeter/bin/clickhouse-schema-updater \ + --host \ + --schema-dir /opt/oxide/oximeter/sql + up VERSION +``` diff --git a/oximeter/db/schema/replicated/2/up.sql b/oximeter/db/schema/replicated/2/up.sql new file mode 100644 index 0000000000..eb01b8c1a3 --- /dev/null +++ b/oximeter/db/schema/replicated/2/up.sql @@ -0,0 +1,819 @@ +CREATE DATABASE IF NOT EXISTS oximeter ON CLUSTER oximeter_cluster; + +/* The version table contains metadata about the `oximeter` database */ +CREATE TABLE IF NOT EXISTS oximeter.version ON CLUSTER oximeter_cluster +( + value UInt64, + timestamp DateTime64(9, 'UTC') +) +ENGINE = ReplicatedMergeTree() +ORDER BY (value, timestamp); + +/* The measurement tables contain all individual samples from each timeseries. + * + * Each table stores a single datum type, and otherwise contains nearly the same + * structure. The primary sorting key is on the timeseries name, key, and then + * timestamp, so that all timeseries from the same schema are grouped, followed + * by all samples from the same timeseries. + * + * This reflects that one usually looks up the _key_ in one or more field table, + * and then uses that to index quickly into the measurements tables. + */ +CREATE TABLE IF NOT EXISTS oximeter.measurements_bool_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt8 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_bool_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_bool ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt8 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_bool_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_i8_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Int8 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_i8_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_i8 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Int8 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_i8_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_u8_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt8 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_u8_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_u8 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt8 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_u8_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_i16_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Int16 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_i16_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_i16 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Int16 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_i16_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_u16_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt16 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_u16_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_u16 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt16 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_u16_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_i32_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Int32 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_i32_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_i32 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Int32 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_i32_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_u32_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt32 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_u32_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_u32 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt32 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_u32_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_i64_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Int64 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_i64_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_i64 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Int64 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_i64_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_u64_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt64 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_u64_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_u64 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt64 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_u64_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_f32_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Float32 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_f32_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_f32 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Float32 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_f32_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_f32_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Float32 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_f32_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_f32 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Float64 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_f32_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_f64_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Float64 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_f64_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_f64 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Float64 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_f64_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_string_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum String +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_string_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_string ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum String +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_string_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_bytes_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Array(UInt8) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_bytes_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_bytes ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Array(UInt8) +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_bytes_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativei64_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + datum Int64 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_cumulativei64_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativei64 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + datum Int64 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_cumulativei64_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativeu64_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + datum UInt64 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_cumulativeu64_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativeu64 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + datum UInt64 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_cumulativeu64_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef32_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + datum Float32 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_cumulativef32_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef32 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + datum Float32 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_cumulativef32_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef64_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + datum Float64 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_cumulativef64_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef64 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + datum Float64 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_cumulativef64_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami8_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int8), + counts Array(UInt64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogrami8_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami8 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int8), + counts Array(UInt64) +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogrami8_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu8_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt8), + counts Array(UInt64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramu8_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu8 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt8), + counts Array(UInt64) +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramu8_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami16_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int16), + counts Array(UInt64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogrami16_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami16 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int16), + counts Array(UInt64) +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogrami16_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu16_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt16), + counts Array(UInt64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramu16_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu16 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt16), + counts Array(UInt64) +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramu16_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami32_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int32), + counts Array(UInt64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogrami32_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami32 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int32), + counts Array(UInt64) +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogrami32_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu32_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt32), + counts Array(UInt64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramu32_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu32 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt32), + counts Array(UInt64) +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramu32_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami64_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int64), + counts Array(UInt64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogrami64_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami64 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int64), + counts Array(UInt64) +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogrami64_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu64_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt64), + counts Array(UInt64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramu64_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu64 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt64), + counts Array(UInt64) +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramu64_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf32_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Float32), + counts Array(UInt64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramf32_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf32 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Float32), + counts Array(UInt64) +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramf32_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf64_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Float64), + counts Array(UInt64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramf64_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf64 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Float64), + counts Array(UInt64) +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramf64_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +/* The field tables store named dimensions of each timeseries. + * + * As with the measurement tables, there is one field table for each field data + * type. Fields are deduplicated by using the "replacing merge tree", though + * this behavior **must not** be relied upon for query correctness. + * + * The index for the fields differs from the measurements, however. Rows are + * sorted by timeseries name, then field name, field value, and finally + * timeseries key. This reflects the most common pattern for looking them up: + * by field name and possibly value, within a timeseries. The resulting keys are + * usually then used to look up measurements. + * + * NOTE: We may want to consider a secondary index on these tables, sorting by + * timeseries name and then key, since it would improve lookups where one + * already has the key. Realistically though, these tables are quite small and + * so performance benefits will be low in absolute terms. + */ +CREATE TABLE IF NOT EXISTS oximeter.fields_bool ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value UInt8 +) +ENGINE = ReplicatedReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_i8 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value Int8 +) +ENGINE = ReplicatedReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_u8 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value UInt8 +) +ENGINE = ReplicatedReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_i16 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value Int16 +) +ENGINE = ReplicatedReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_u16 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value UInt16 +) +ENGINE = ReplicatedReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_i32 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value Int32 +) +ENGINE = ReplicatedReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_u32 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value UInt32 +) +ENGINE = ReplicatedReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_i64 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value Int64 +) +ENGINE = ReplicatedReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_u64 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value UInt64 +) +ENGINE = ReplicatedReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_ipaddr ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value IPv6 +) +ENGINE = ReplicatedReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_string ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value String +) +ENGINE = ReplicatedReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_uuid ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value UUID +) +ENGINE = ReplicatedReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +/* The timeseries schema table stores the extracted schema for the samples + * oximeter collects. + */ +CREATE TABLE IF NOT EXISTS oximeter.timeseries_schema ON CLUSTER oximeter_cluster +( + timeseries_name String, + fields Nested( + name String, + type Enum( + 'Bool' = 1, + 'I64' = 2, + 'IpAddr' = 3, + 'String' = 4, + 'Uuid' = 6 + ), + source Enum( + 'Target' = 1, + 'Metric' = 2 + ) + ), + datum_type Enum( + 'Bool' = 1, + 'I64' = 2, + 'F64' = 3, + 'String' = 4, + 'Bytes' = 5, + 'CumulativeI64' = 6, + 'CumulativeF64' = 7, + 'HistogramI64' = 8, + 'HistogramF64' = 9, + 'I8' = 10, + 'U8' = 11, + 'I16' = 12, + 'U16' = 13, + 'I32' = 14, + 'U32' = 15, + 'U64' = 16, + 'F32' = 17, + 'CumulativeU64' = 18, + 'CumulativeF32' = 19, + 'HistogramI8' = 20, + 'HistogramU8' = 21, + 'HistogramI16' = 22, + 'HistogramU16' = 23, + 'HistogramI32' = 24, + 'HistogramU32' = 25, + 'HistogramU64' = 26, + 'HistogramF32' = 27 + ), + created DateTime64(9, 'UTC') +) +ENGINE = ReplicatedMergeTree() +ORDER BY (timeseries_name, fields.name); diff --git a/oximeter/db/schema/replicated/3/up.sql b/oximeter/db/schema/replicated/3/up.sql new file mode 100644 index 0000000000..073d643564 --- /dev/null +++ b/oximeter/db/schema/replicated/3/up.sql @@ -0,0 +1,22 @@ +/* This adds missing field types to the timeseries schema table field.type + * column, by augmentin the enum to capture new values. Note that the existing + * variants can't be moved or changed, so the new ones are added at the end. The + * client never sees this discriminant, only the string, so it should not + * matter. + */ +ALTER TABLE oximeter.timeseries_schema + MODIFY COLUMN IF EXISTS fields.type + Array(Enum( + 'Bool' = 1, + 'I64' = 2, + 'IpAddr' = 3, + 'String' = 4, + 'Uuid' = 6, + 'I8' = 7, + 'U8' = 8, + 'I16' = 9, + 'U16' = 10, + 'I32' = 11, + 'U32' = 12, + 'U64' = 13 + )); diff --git a/oximeter/db/src/db-replicated-init.sql b/oximeter/db/schema/replicated/db-init.sql similarity index 93% rename from oximeter/db/src/db-replicated-init.sql rename to oximeter/db/schema/replicated/db-init.sql index ec11854e44..4429f41364 100644 --- a/oximeter/db/src/db-replicated-init.sql +++ b/oximeter/db/schema/replicated/db-init.sql @@ -1,5 +1,6 @@ CREATE DATABASE IF NOT EXISTS oximeter ON CLUSTER oximeter_cluster; --- + +/* The version table contains metadata about the `oximeter` database */ CREATE TABLE IF NOT EXISTS oximeter.version ON CLUSTER oximeter_cluster ( value UInt64, @@ -7,7 +8,17 @@ CREATE TABLE IF NOT EXISTS oximeter.version ON CLUSTER oximeter_cluster ) ENGINE = ReplicatedMergeTree() ORDER BY (value, timestamp); --- + +/* The measurement tables contain all individual samples from each timeseries. + * + * Each table stores a single datum type, and otherwise contains nearly the same + * structure. The primary sorting key is on the timeseries name, key, and then + * timestamp, so that all timeseries from the same schema are grouped, followed + * by all samples from the same timeseries. + * + * This reflects that one usually looks up the _key_ in one or more field table, + * and then uses that to index quickly into the measurements tables. + */ CREATE TABLE IF NOT EXISTS oximeter.measurements_bool_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -18,7 +29,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_bool_local ON CLUSTER oximeter_ ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_bool_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_bool ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -27,7 +38,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_bool ON CLUSTER oximeter_cluste datum UInt8 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_bool_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_i8_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -38,7 +49,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i8_local ON CLUSTER oximeter_cl ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_i8_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_i8 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -47,7 +58,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i8 ON CLUSTER oximeter_cluster datum Int8 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_i8_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_u8_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -58,7 +69,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u8_local ON CLUSTER oximeter_cl ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_u8_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_u8 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -67,7 +78,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u8 ON CLUSTER oximeter_cluster datum UInt8 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_u8_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_i16_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -78,7 +89,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i16_local ON CLUSTER oximeter_c ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_i16_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_i16 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -87,7 +98,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i16 ON CLUSTER oximeter_cluster datum Int16 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_i16_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_u16_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -98,7 +109,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u16_local ON CLUSTER oximeter_c ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_u16_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_u16 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -107,7 +118,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u16 ON CLUSTER oximeter_cluster datum UInt16 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_u16_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_i32_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -118,7 +129,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i32_local ON CLUSTER oximeter_c ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_i32_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_i32 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -127,7 +138,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i32 ON CLUSTER oximeter_cluster datum Int32 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_i32_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_u32_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -138,7 +149,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u32_local ON CLUSTER oximeter_c ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_u32_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_u32 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -147,7 +158,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u32 ON CLUSTER oximeter_cluster datum UInt32 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_u32_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_i64_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -158,7 +169,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i64_local ON CLUSTER oximeter_c ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_i64_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_i64 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -167,7 +178,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i64 ON CLUSTER oximeter_cluster datum Int64 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_i64_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_u64_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -178,7 +189,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u64_local ON CLUSTER oximeter_c ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_u64_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_u64 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -187,7 +198,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u64 ON CLUSTER oximeter_cluster datum UInt64 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_u64_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_f32_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -198,7 +209,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_f32_local ON CLUSTER oximeter_c ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_f32_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_f32 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -207,7 +218,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_f32 ON CLUSTER oximeter_cluster datum Float32 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_f32_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_f64_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -218,7 +229,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_f64_local ON CLUSTER oximeter_c ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_f64_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_f64 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -227,7 +238,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_f64 ON CLUSTER oximeter_cluster datum Float64 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_f64_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_string_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -238,7 +249,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_string_local ON CLUSTER oximete ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_string_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_string ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -247,7 +258,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_string ON CLUSTER oximeter_clus datum String ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_string_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_bytes_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -258,7 +269,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_bytes_local ON CLUSTER oximeter ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_bytes_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_bytes ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -267,7 +278,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_bytes ON CLUSTER oximeter_clust datum Array(UInt8) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_bytes_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativei64_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -279,7 +290,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativei64_local ON CLUSTER ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_cumulativei64_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativei64 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -289,7 +300,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativei64 ON CLUSTER oximet datum Int64 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_cumulativei64_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativeu64_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -301,7 +312,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativeu64_local ON CLUSTER ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_cumulativeu64_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativeu64 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -311,7 +322,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativeu64 ON CLUSTER oximet datum UInt64 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_cumulativeu64_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef32_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -323,7 +334,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef32_local ON CLUSTER ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_cumulativef32_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef32 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -333,7 +344,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef32 ON CLUSTER oximet datum Float32 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_cumulativef32_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef64_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -345,7 +356,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef64_local ON CLUSTER ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_cumulativef64_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef64 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -355,7 +366,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef64 ON CLUSTER oximet datum Float64 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_cumulativef64_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami8_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -368,7 +379,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami8_local ON CLUSTER ox ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogrami8_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami8 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -379,7 +390,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami8 ON CLUSTER oximeter counts Array(UInt64) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogrami8_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu8_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -392,7 +403,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu8_local ON CLUSTER ox ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramu8_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu8 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -403,7 +414,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu8 ON CLUSTER oximeter counts Array(UInt64) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramu8_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami16_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -416,7 +427,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami16_local ON CLUSTER o ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogrami16_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami16 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -427,7 +438,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami16 ON CLUSTER oximete counts Array(UInt64) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogrami16_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu16_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -440,7 +451,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu16_local ON CLUSTER o ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramu16_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu16 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -451,7 +462,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu16 ON CLUSTER oximete counts Array(UInt64) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramu16_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami32_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -464,7 +475,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami32_local ON CLUSTER o ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogrami32_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami32 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -475,7 +486,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami32 ON CLUSTER oximete counts Array(UInt64) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogrami32_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu32_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -488,7 +499,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu32_local ON CLUSTER o ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramu32_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu32 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -499,7 +510,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu32 ON CLUSTER oximete counts Array(UInt64) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramu32_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami64_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -512,7 +523,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami64_local ON CLUSTER o ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogrami64_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami64 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -523,7 +534,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami64 ON CLUSTER oximete counts Array(UInt64) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogrami64_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu64_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -536,7 +547,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu64_local ON CLUSTER o ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramu64_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu64 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -547,7 +558,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu64 ON CLUSTER oximete counts Array(UInt64) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramu64_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf32_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -560,7 +571,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf32_local ON CLUSTER o ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramf32_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf32 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -571,7 +582,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf32 ON CLUSTER oximete counts Array(UInt64) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramf32_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf64_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -584,7 +595,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf64_local ON CLUSTER o ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramf64_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf64 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -595,7 +606,24 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf64 ON CLUSTER oximete counts Array(UInt64) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramf64_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + +/* The field tables store named dimensions of each timeseries. + * + * As with the measurement tables, there is one field table for each field data + * type. Fields are deduplicated by using the "replacing merge tree", though + * this behavior **must not** be relied upon for query correctness. + * + * The index for the fields differs from the measurements, however. Rows are + * sorted by timeseries name, then field name, field value, and finally + * timeseries key. This reflects the most common pattern for looking them up: + * by field name and possibly value, within a timeseries. The resulting keys are + * usually then used to look up measurements. + * + * NOTE: We may want to consider a secondary index on these tables, sorting by + * timeseries name and then key, since it would improve lookups where one + * already has the key. Realistically though, these tables are quite small and + * so performance benefits will be low in absolute terms. + */ CREATE TABLE IF NOT EXISTS oximeter.fields_bool ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -605,7 +633,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_bool ON CLUSTER oximeter_cluster ) ENGINE = ReplicatedReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_i8 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -615,7 +643,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i8 ON CLUSTER oximeter_cluster ) ENGINE = ReplicatedReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_u8 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -625,7 +653,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u8 ON CLUSTER oximeter_cluster ) ENGINE = ReplicatedReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_i16 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -635,7 +663,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i16 ON CLUSTER oximeter_cluster ) ENGINE = ReplicatedReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_u16 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -645,7 +673,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u16 ON CLUSTER oximeter_cluster ) ENGINE = ReplicatedReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_i32 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -655,7 +683,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i32 ON CLUSTER oximeter_cluster ) ENGINE = ReplicatedReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_u32 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -665,7 +693,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u32 ON CLUSTER oximeter_cluster ) ENGINE = ReplicatedReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_i64 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -675,7 +703,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i64 ON CLUSTER oximeter_cluster ) ENGINE = ReplicatedReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_u64 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -685,7 +713,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u64 ON CLUSTER oximeter_cluster ) ENGINE = ReplicatedReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_ipaddr ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -695,7 +723,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_ipaddr ON CLUSTER oximeter_cluster ) ENGINE = ReplicatedReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_string ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -705,7 +733,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_string ON CLUSTER oximeter_cluster ) ENGINE = ReplicatedReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_uuid ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -715,7 +743,10 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_uuid ON CLUSTER oximeter_cluster ) ENGINE = ReplicatedReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + +/* The timeseries schema table stores the extracted schema for the samples + * oximeter collects. + */ CREATE TABLE IF NOT EXISTS oximeter.timeseries_schema ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -726,7 +757,14 @@ CREATE TABLE IF NOT EXISTS oximeter.timeseries_schema ON CLUSTER oximeter_cluste 'I64' = 2, 'IpAddr' = 3, 'String' = 4, - 'Uuid' = 6 + 'Uuid' = 6, + 'I8' = 7, + 'U8' = 8, + 'I16' = 9, + 'U16' = 10, + 'I32' = 11, + 'U32' = 12, + 'U64' = 13 ), source Enum( 'Target' = 1, diff --git a/oximeter/db/src/db-wipe-replicated.sql b/oximeter/db/schema/replicated/db-wipe.sql similarity index 100% rename from oximeter/db/src/db-wipe-replicated.sql rename to oximeter/db/schema/replicated/db-wipe.sql diff --git a/oximeter/db/src/db-single-node-init.sql b/oximeter/db/schema/single-node/2/up.sql similarity index 88% rename from oximeter/db/src/db-single-node-init.sql rename to oximeter/db/schema/single-node/2/up.sql index 2fb5c36397..4756e2897d 100644 --- a/oximeter/db/src/db-single-node-init.sql +++ b/oximeter/db/schema/single-node/2/up.sql @@ -1,5 +1,6 @@ CREATE DATABASE IF NOT EXISTS oximeter; --- + +/* The version table contains metadata about the `oximeter` database */ CREATE TABLE IF NOT EXISTS oximeter.version ( value UInt64, @@ -7,7 +8,17 @@ CREATE TABLE IF NOT EXISTS oximeter.version ) ENGINE = MergeTree() ORDER BY (value, timestamp); --- + +/* The measurement tables contain all individual samples from each timeseries. + * + * Each table stores a single datum type, and otherwise contains nearly the same + * structure. The primary sorting key is on the timeseries name, key, and then + * timestamp, so that all timeseries from the same schema are grouped, followed + * by all samples from the same timeseries. + * + * This reflects that one usually looks up the _key_ in one or more field table, + * and then uses that to index quickly into the measurements tables. + */ CREATE TABLE IF NOT EXISTS oximeter.measurements_bool ( timeseries_name String, @@ -18,7 +29,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_bool ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_i8 ( timeseries_name String, @@ -29,7 +40,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i8 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_u8 ( timeseries_name String, @@ -40,7 +51,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u8 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_i16 ( timeseries_name String, @@ -51,7 +62,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i16 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_u16 ( timeseries_name String, @@ -62,7 +73,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u16 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_i32 ( timeseries_name String, @@ -73,7 +84,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i32 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_u32 ( timeseries_name String, @@ -84,7 +95,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u32 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_i64 ( timeseries_name String, @@ -95,7 +106,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i64 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_u64 ( timeseries_name String, @@ -106,7 +117,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u64 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_f32 ( timeseries_name String, @@ -117,7 +128,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_f32 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_f64 ( timeseries_name String, @@ -128,7 +139,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_f64 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_string ( timeseries_name String, @@ -139,7 +150,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_string ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_bytes ( timeseries_name String, @@ -150,7 +161,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_bytes ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativei64 ( timeseries_name String, @@ -162,7 +173,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativei64 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativeu64 ( timeseries_name String, @@ -174,7 +185,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativeu64 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef32 ( timeseries_name String, @@ -186,8 +197,8 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef32 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- --- + + CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef64 ( timeseries_name String, @@ -199,7 +210,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef64 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami8 ( timeseries_name String, @@ -212,7 +223,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami8 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu8 ( timeseries_name String, @@ -225,7 +236,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu8 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami16 ( timeseries_name String, @@ -238,7 +249,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami16 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu16 ( timeseries_name String, @@ -251,7 +262,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu16 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami32 ( timeseries_name String, @@ -264,7 +275,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami32 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu32 ( timeseries_name String, @@ -277,7 +288,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu32 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami64 ( timeseries_name String, @@ -290,7 +301,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami64 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu64 ( timeseries_name String, @@ -303,7 +314,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu64 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf32 ( timeseries_name String, @@ -316,7 +327,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf32 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf64 ( timeseries_name String, @@ -329,7 +340,24 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf64 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + +/* The field tables store named dimensions of each timeseries. + * + * As with the measurement tables, there is one field table for each field data + * type. Fields are deduplicated by using the "replacing merge tree", though + * this behavior **must not** be relied upon for query correctness. + * + * The index for the fields differs from the measurements, however. Rows are + * sorted by timeseries name, then field name, field value, and finally + * timeseries key. This reflects the most common pattern for looking them up: + * by field name and possibly value, within a timeseries. The resulting keys are + * usually then used to look up measurements. + * + * NOTE: We may want to consider a secondary index on these tables, sorting by + * timeseries name and then key, since it would improve lookups where one + * already has the key. Realistically though, these tables are quite small and + * so performance benefits will be low in absolute terms. + */ CREATE TABLE IF NOT EXISTS oximeter.fields_bool ( timeseries_name String, @@ -339,7 +367,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_bool ) ENGINE = ReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_i8 ( timeseries_name String, @@ -349,7 +377,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i8 ) ENGINE = ReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_u8 ( timeseries_name String, @@ -359,7 +387,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u8 ) ENGINE = ReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_i16 ( timeseries_name String, @@ -369,7 +397,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i16 ) ENGINE = ReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_u16 ( timeseries_name String, @@ -379,7 +407,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u16 ) ENGINE = ReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_i32 ( timeseries_name String, @@ -389,7 +417,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i32 ) ENGINE = ReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_u32 ( timeseries_name String, @@ -399,7 +427,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u32 ) ENGINE = ReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_i64 ( timeseries_name String, @@ -409,7 +437,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i64 ) ENGINE = ReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_u64 ( timeseries_name String, @@ -419,7 +447,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u64 ) ENGINE = ReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_ipaddr ( timeseries_name String, @@ -429,7 +457,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_ipaddr ) ENGINE = ReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_string ( timeseries_name String, @@ -439,7 +467,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_string ) ENGINE = ReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_uuid ( timeseries_name String, @@ -449,7 +477,10 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_uuid ) ENGINE = ReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + +/* The timeseries schema table stores the extracted schema for the samples + * oximeter collects. + */ CREATE TABLE IF NOT EXISTS oximeter.timeseries_schema ( timeseries_name String, diff --git a/oximeter/db/schema/single-node/3/up.sql b/oximeter/db/schema/single-node/3/up.sql new file mode 100644 index 0000000000..073d643564 --- /dev/null +++ b/oximeter/db/schema/single-node/3/up.sql @@ -0,0 +1,22 @@ +/* This adds missing field types to the timeseries schema table field.type + * column, by augmentin the enum to capture new values. Note that the existing + * variants can't be moved or changed, so the new ones are added at the end. The + * client never sees this discriminant, only the string, so it should not + * matter. + */ +ALTER TABLE oximeter.timeseries_schema + MODIFY COLUMN IF EXISTS fields.type + Array(Enum( + 'Bool' = 1, + 'I64' = 2, + 'IpAddr' = 3, + 'String' = 4, + 'Uuid' = 6, + 'I8' = 7, + 'U8' = 8, + 'I16' = 9, + 'U16' = 10, + 'I32' = 11, + 'U32' = 12, + 'U64' = 13 + )); diff --git a/oximeter/db/schema/single-node/db-init.sql b/oximeter/db/schema/single-node/db-init.sql new file mode 100644 index 0000000000..ee5e91c4b7 --- /dev/null +++ b/oximeter/db/schema/single-node/db-init.sql @@ -0,0 +1,540 @@ +CREATE DATABASE IF NOT EXISTS oximeter; + +/* The version table contains metadata about the `oximeter` database */ +CREATE TABLE IF NOT EXISTS oximeter.version +( + value UInt64, + timestamp DateTime64(9, 'UTC') +) +ENGINE = MergeTree() +ORDER BY (value, timestamp); + +/* The measurement tables contain all individual samples from each timeseries. + * + * Each table stores a single datum type, and otherwise contains nearly the same + * structure. The primary sorting key is on the timeseries name, key, and then + * timestamp, so that all timeseries from the same schema are grouped, followed + * by all samples from the same timeseries. + * + * This reflects that one usually looks up the _key_ in one or more field table, + * and then uses that to index quickly into the measurements tables. + */ +CREATE TABLE IF NOT EXISTS oximeter.measurements_bool +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt8 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_i8 +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Int8 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_u8 +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt8 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_i16 +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Int16 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_u16 +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt16 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_i32 +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Int32 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_u32 +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt32 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_i64 +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Int64 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_u64 +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt64 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_f32 +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Float32 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_f64 +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Float64 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_string +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum String +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_bytes +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Array(UInt8) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativei64 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + datum Int64 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativeu64 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + datum UInt64 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef32 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + datum Float32 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + + +CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef64 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + datum Float64 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami8 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int8), + counts Array(UInt64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu8 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt8), + counts Array(UInt64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami16 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int16), + counts Array(UInt64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu16 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt16), + counts Array(UInt64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami32 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int32), + counts Array(UInt64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu32 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt32), + counts Array(UInt64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami64 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int64), + counts Array(UInt64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu64 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt64), + counts Array(UInt64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf32 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Float32), + counts Array(UInt64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf64 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Float64), + counts Array(UInt64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +/* The field tables store named dimensions of each timeseries. + * + * As with the measurement tables, there is one field table for each field data + * type. Fields are deduplicated by using the "replacing merge tree", though + * this behavior **must not** be relied upon for query correctness. + * + * The index for the fields differs from the measurements, however. Rows are + * sorted by timeseries name, then field name, field value, and finally + * timeseries key. This reflects the most common pattern for looking them up: + * by field name and possibly value, within a timeseries. The resulting keys are + * usually then used to look up measurements. + * + * NOTE: We may want to consider a secondary index on these tables, sorting by + * timeseries name and then key, since it would improve lookups where one + * already has the key. Realistically though, these tables are quite small and + * so performance benefits will be low in absolute terms. + */ +CREATE TABLE IF NOT EXISTS oximeter.fields_bool +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value UInt8 +) +ENGINE = ReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_i8 +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value Int8 +) +ENGINE = ReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_u8 +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value UInt8 +) +ENGINE = ReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_i16 +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value Int16 +) +ENGINE = ReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_u16 +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value UInt16 +) +ENGINE = ReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_i32 +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value Int32 +) +ENGINE = ReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_u32 +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value UInt32 +) +ENGINE = ReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_i64 +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value Int64 +) +ENGINE = ReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_u64 +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value UInt64 +) +ENGINE = ReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_ipaddr +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value IPv6 +) +ENGINE = ReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_string +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value String +) +ENGINE = ReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_uuid +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value UUID +) +ENGINE = ReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +/* The timeseries schema table stores the extracted schema for the samples + * oximeter collects. + */ +CREATE TABLE IF NOT EXISTS oximeter.timeseries_schema +( + timeseries_name String, + fields Nested( + name String, + type Enum( + 'Bool' = 1, + 'I64' = 2, + 'IpAddr' = 3, + 'String' = 4, + 'Uuid' = 6, + 'I8' = 7, + 'U8' = 8, + 'I16' = 9, + 'U16' = 10, + 'I32' = 11, + 'U32' = 12, + 'U64' = 13 + ), + source Enum( + 'Target' = 1, + 'Metric' = 2 + ) + ), + datum_type Enum( + 'Bool' = 1, + 'I64' = 2, + 'F64' = 3, + 'String' = 4, + 'Bytes' = 5, + 'CumulativeI64' = 6, + 'CumulativeF64' = 7, + 'HistogramI64' = 8, + 'HistogramF64' = 9, + 'I8' = 10, + 'U8' = 11, + 'I16' = 12, + 'U16' = 13, + 'I32' = 14, + 'U32' = 15, + 'U64' = 16, + 'F32' = 17, + 'CumulativeU64' = 18, + 'CumulativeF32' = 19, + 'HistogramI8' = 20, + 'HistogramU8' = 21, + 'HistogramI16' = 22, + 'HistogramU16' = 23, + 'HistogramI32' = 24, + 'HistogramU32' = 25, + 'HistogramU64' = 26, + 'HistogramF32' = 27 + ), + created DateTime64(9, 'UTC') +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, fields.name); diff --git a/oximeter/db/src/db-wipe-single-node.sql b/oximeter/db/schema/single-node/db-wipe.sql similarity index 100% rename from oximeter/db/src/db-wipe-single-node.sql rename to oximeter/db/schema/single-node/db-wipe.sql diff --git a/oximeter/db/src/client.rs b/oximeter/db/src/client.rs index 88b95c3764..e1ed06554c 100644 --- a/oximeter/db/src/client.rs +++ b/oximeter/db/src/client.rs @@ -23,6 +23,8 @@ use dropshot::PaginationOrder; use dropshot::ResultsPage; use dropshot::WhichPage; use oximeter::types::Sample; +use regex::Regex; +use regex::RegexBuilder; use slog::debug; use slog::error; use slog::info; @@ -35,6 +37,11 @@ use std::collections::BTreeSet; use std::convert::TryFrom; use std::net::SocketAddr; use std::num::NonZeroU32; +use std::ops::Bound; +use std::path::Path; +use std::path::PathBuf; +use std::sync::OnceLock; +use tokio::fs; use tokio::sync::Mutex; use uuid::Uuid; @@ -267,14 +274,319 @@ impl Client { .map_err(|e| Error::Database(e.to_string())) } + /// Read the available schema versions in the provided directory. + pub async fn read_available_schema_versions( + log: &Logger, + is_replicated: bool, + schema_dir: impl AsRef, + ) -> Result, Error> { + let dir = schema_dir.as_ref().join(if is_replicated { + "replicated" + } else { + "single-node" + }); + let mut rd = + fs::read_dir(&dir).await.map_err(|err| Error::ReadSchemaDir { + context: format!( + "Failed to read schema directory '{}'", + dir.display() + ), + err, + })?; + let mut versions = BTreeSet::new(); + debug!(log, "reading entries from schema dir"; "dir" => dir.display()); + while let Some(entry) = + rd.next_entry().await.map_err(|err| Error::ReadSchemaDir { + context: String::from("Failed to read directory entry"), + err, + })? + { + let name = entry + .file_name() + .into_string() + .map_err(|bad| Error::NonUtf8SchemaDirEntry(bad.to_owned()))?; + let md = + entry.metadata().await.map_err(|err| Error::ReadSchemaDir { + context: String::from("Failed to fetch entry metatdata"), + err, + })?; + if !md.is_dir() { + debug!(log, "skipping non-directory"; "name" => &name); + continue; + } + match name.parse() { + Ok(ver) => { + debug!(log, "valid version dir"; "ver" => ver); + assert!(versions.insert(ver), "Versions should be unique"); + } + Err(e) => warn!( + log, + "found directory with non-u64 name, skipping"; + "name" => name, + "error" => ?e, + ), + } + } + Ok(versions) + } + + /// Ensure that the database is upgraded to the desired version of the + /// schema. + /// + /// NOTE: This function is not safe for concurrent usage! + pub async fn ensure_schema( + &self, + replicated: bool, + desired_version: u64, + schema_dir: impl AsRef, + ) -> Result<(), Error> { + let schema_dir = schema_dir.as_ref(); + let latest = self.read_latest_version().await?; + if latest == desired_version { + debug!( + self.log, + "database already at desired version"; + "version" => latest, + ); + return Ok(()); + } + debug!( + self.log, + "starting upgrade to desired version {}", desired_version + ); + let available = Self::read_available_schema_versions( + &self.log, replicated, schema_dir, + ) + .await?; + // We explicitly ignore version 0, which implies the database doesn't + // exist at all. + if latest > 0 && !available.contains(&latest) { + return Err(Error::MissingSchemaVersion(latest)); + } + if !available.contains(&desired_version) { + return Err(Error::MissingSchemaVersion(desired_version)); + } + + // Check we have no gaps in version numbers, starting with the latest + // version and walking through all available ones strictly greater. This + // is to check that the _next_ version is also 1 greater than the + // latest. + let range = (Bound::Excluded(latest), Bound::Included(desired_version)); + if available + .range(latest..) + .zip(available.range(range)) + .any(|(current, next)| next - current != 1) + { + return Err(Error::NonSequentialSchemaVersions); + } + + // Walk through all changes between current version (exclusive) and + // the desired version (inclusive). + let versions_to_apply = available.range(range); + let mut current = latest; + for version in versions_to_apply { + if let Err(e) = self + .apply_one_schema_upgrade(replicated, *version, schema_dir) + .await + { + error!( + self.log, + "failed to apply schema upgrade"; + "current_version" => current, + "next_version" => *version, + "replicated" => replicated, + "schema_dir" => schema_dir.display(), + "error" => ?e, + ); + return Err(e); + } + current = *version; + self.insert_version(current).await?; + } + Ok(()) + } + + fn verify_schema_upgrades( + files: &BTreeMap, + ) -> Result<(), Error> { + let re = schema_validation_regex(); + for (path, sql) in files.values() { + if re.is_match(&sql) { + return Err(Error::SchemaUpdateModifiesData { + path: path.clone(), + statement: sql.clone(), + }); + } + if sql.matches(';').count() > 1 { + return Err(Error::MultipleSqlStatementsInSchemaUpdate { + path: path.clone(), + }); + } + } + Ok(()) + } + + async fn apply_one_schema_upgrade( + &self, + replicated: bool, + next_version: u64, + schema_dir: impl AsRef, + ) -> Result<(), Error> { + let schema_dir = schema_dir.as_ref(); + let upgrade_file_contents = Self::read_schema_upgrade_sql_files( + &self.log, + replicated, + next_version, + schema_dir, + ) + .await?; + + // We need to be pretty careful at this point with any data-modifying + // statements. There should be no INSERT queries, for example, which we + // check here. ClickHouse doesn't support much in the way of data + // modification, which makes this pretty easy. + Self::verify_schema_upgrades(&upgrade_file_contents)?; + + // Apply each file in sequence in the upgrade directory. + for (name, (path, sql)) in upgrade_file_contents.into_iter() { + debug!( + self.log, + "apply schema upgrade file"; + "version" => next_version, + "path" => path.display(), + "filename" => &name, + ); + match self.execute(sql).await { + Ok(_) => debug!( + self.log, + "successfully applied schema upgrade file"; + "version" => next_version, + "path" => path.display(), + "name" => name, + ), + Err(e) => { + return Err(e); + } + } + } + Ok(()) + } + + fn full_upgrade_path( + replicated: bool, + version: u64, + schema_dir: impl AsRef, + ) -> PathBuf { + schema_dir + .as_ref() + .join(if replicated { "replicated" } else { "single-node" }) + .join(version.to_string()) + } + + // Read all SQL files, in order, in the schema directory for the provided + // version. + async fn read_schema_upgrade_sql_files( + log: &Logger, + replicated: bool, + version: u64, + schema_dir: impl AsRef, + ) -> Result, Error> { + let version_schema_dir = + Self::full_upgrade_path(replicated, version, schema_dir.as_ref()); + let mut rd = + fs::read_dir(&version_schema_dir).await.map_err(|err| { + Error::ReadSchemaDir { + context: format!( + "Failed to read schema directory '{}'", + version_schema_dir.display() + ), + err, + } + })?; + + let mut upgrade_files = BTreeMap::new(); + debug!(log, "reading SQL files from schema dir"; "dir" => version_schema_dir.display()); + while let Some(entry) = + rd.next_entry().await.map_err(|err| Error::ReadSchemaDir { + context: String::from("Failed to read directory entry"), + err, + })? + { + let path = entry.path(); + let Some(ext) = path.extension() else { + warn!( + log, + "skipping schema dir entry without an extension"; + "dir" => version_schema_dir.display(), + "path" => path.display(), + ); + continue; + }; + let Some(ext) = ext.to_str() else { + warn!( + log, + "skipping schema dir entry with non-UTF8 extension"; + "dir" => version_schema_dir.display(), + "path" => path.display(), + ); + continue; + }; + if ext.eq_ignore_ascii_case("sql") { + let Some(stem) = path.file_stem() else { + warn!( + log, + "skipping schema SQL file with no name"; + "dir" => version_schema_dir.display(), + "path" => path.display(), + ); + continue; + }; + let Some(name) = stem.to_str() else { + warn!( + log, + "skipping schema SQL file with non-UTF8 name"; + "dir" => version_schema_dir.display(), + "path" => path.display(), + ); + continue; + }; + let contents = + fs::read_to_string(&path).await.map_err(|err| { + Error::ReadSqlFile { + context: format!( + "Reading SQL file '{}' for upgrade", + path.display(), + ), + err, + } + })?; + upgrade_files + .insert(name.to_string(), (path.to_owned(), contents)); + } else { + warn!( + log, + "skipping non-SQL schema dir entry"; + "dir" => version_schema_dir.display(), + "path" => path.display(), + ); + continue; + } + } + Ok(upgrade_files) + } + /// Validates that the schema used by the DB matches the version used by /// the executable using it. /// - /// This function will wipe metrics data if the version stored within + /// This function will **wipe** metrics data if the version stored within /// the DB is less than the schema version of Oximeter. /// If the version in the DB is newer than what is known to Oximeter, an /// error is returned. /// + /// If you would like to non-destructively upgrade the database, then either + /// the included binary `clickhouse-schema-updater` or the method + /// [`Client::ensure_schema()`] should be used instead. + /// /// NOTE: This function is not safe for concurrent usage! pub async fn initialize_db_with_version( &self, @@ -304,11 +616,10 @@ impl Client { } else if version > expected_version { // If the on-storage version is greater than the constant embedded // into this binary, we may have downgraded. - return Err(Error::Database( - format!( - "Expected version {expected_version}, saw {version}. Downgrading is not supported.", - ) - )); + return Err(Error::DatabaseVersionMismatch { + expected: crate::model::OXIMETER_VERSION, + found: version, + }); } else { // If the version matches, we don't need to update the DB return Ok(()); @@ -319,7 +630,8 @@ impl Client { Ok(()) } - async fn read_latest_version(&self) -> Result { + /// Read the latest version applied in the database. + pub async fn read_latest_version(&self) -> Result { let sql = format!( "SELECT MAX(value) FROM {db_name}.version;", db_name = crate::DATABASE_NAME, @@ -354,6 +666,20 @@ impl Client { Ok(version) } + /// Return Ok if the DB is at exactly the version compatible with this + /// client. + pub async fn check_db_is_at_expected_version(&self) -> Result<(), Error> { + let ver = self.read_latest_version().await?; + if ver == crate::model::OXIMETER_VERSION { + Ok(()) + } else { + Err(Error::DatabaseVersionMismatch { + expected: crate::model::OXIMETER_VERSION, + found: ver, + }) + } + } + async fn insert_version(&self, version: u64) -> Result<(), Error> { let sql = format!( "INSERT INTO {db_name}.version (*) VALUES ({version}, now());", @@ -365,7 +691,7 @@ impl Client { /// Verifies if instance is part of oximeter_cluster pub async fn is_oximeter_cluster(&self) -> Result { - let sql = String::from("SHOW CLUSTERS FORMAT JSONEachRow;"); + let sql = "SHOW CLUSTERS FORMAT JSONEachRow;"; let res = self.execute_with_body(sql).await?; Ok(res.contains("oximeter_cluster")) } @@ -501,7 +827,11 @@ impl Client { S: AsRef, { let sql = sql.as_ref().to_string(); - trace!(self.log, "executing SQL query: {}", sql); + trace!( + self.log, + "executing SQL query"; + "sql" => &sql, + ); let id = usdt::UniqueId::new(); probes::query__start!(|| (&id, &sql)); let response = handle_db_response( @@ -720,6 +1050,42 @@ impl Client { // many as one per sample. It's not clear how to structure this in a way that's useful. Ok(()) } + + // Run one or more SQL statements. + // + // This is intended to be used for the methods which run SQL from one of the + // SQL files in the crate, e.g., the DB initialization or update files. + async fn run_many_sql_statements( + &self, + sql: impl AsRef, + ) -> Result<(), Error> { + for stmt in sql.as_ref().split(';').filter(|s| !s.trim().is_empty()) { + self.execute(stmt).await?; + } + Ok(()) + } +} + +// A regex used to validate supported schema updates. +static SCHEMA_VALIDATION_REGEX: OnceLock = OnceLock::new(); +fn schema_validation_regex() -> &'static Regex { + SCHEMA_VALIDATION_REGEX.get_or_init(|| { + RegexBuilder::new(concat!( + // Cannot insert rows + r#"(INSERT INTO)|"#, + // Cannot delete rows in a table + r#"(ALTER TABLE .* DELETE)|"#, + // Cannot update values in a table + r#"(ALTER TABLE .* UPDATE)|"#, + // Cannot drop column values + r#"(ALTER TABLE .* CLEAR COLUMN)|"#, + // Or issue lightweight deletes + r#"(DELETE FROM)"#, + )) + .case_insensitive(true) + .build() + .expect("Invalid regex") + }) } #[derive(Debug)] @@ -767,40 +1133,38 @@ impl DbWrite for Client { /// Initialize the replicated telemetry database, creating tables as needed. async fn init_replicated_db(&self) -> Result<(), Error> { - // The HTTP client doesn't support multiple statements per query, so we break them out here - // manually. debug!(self.log, "initializing ClickHouse database"); - let sql = include_str!("./db-replicated-init.sql"); - for query in sql.split("\n--\n") { - self.execute(query.to_string()).await?; - } - Ok(()) + self.run_many_sql_statements(include_str!( + "../schema/replicated/db-init.sql" + )) + .await + } + + /// Wipe the ClickHouse database entirely from a replicated set up. + async fn wipe_replicated_db(&self) -> Result<(), Error> { + debug!(self.log, "wiping ClickHouse database"); + self.run_many_sql_statements(include_str!( + "../schema/replicated/db-wipe.sql" + )) + .await } /// Initialize a single node telemetry database, creating tables as needed. async fn init_single_node_db(&self) -> Result<(), Error> { - // The HTTP client doesn't support multiple statements per query, so we break them out here - // manually. debug!(self.log, "initializing ClickHouse database"); - let sql = include_str!("./db-single-node-init.sql"); - for query in sql.split("\n--\n") { - self.execute(query.to_string()).await?; - } - Ok(()) + self.run_many_sql_statements(include_str!( + "../schema/single-node/db-init.sql" + )) + .await } /// Wipe the ClickHouse database entirely from a single node set up. async fn wipe_single_node_db(&self) -> Result<(), Error> { debug!(self.log, "wiping ClickHouse database"); - let sql = include_str!("./db-wipe-single-node.sql").to_string(); - self.execute(sql).await - } - - /// Wipe the ClickHouse database entirely from a replicated set up. - async fn wipe_replicated_db(&self) -> Result<(), Error> { - debug!(self.log, "wiping ClickHouse database"); - let sql = include_str!("./db-wipe-replicated.sql").to_string(); - self.execute(sql).await + self.run_many_sql_statements(include_str!( + "../schema/single-node/db-wipe.sql" + )) + .await } } @@ -839,7 +1203,9 @@ mod tests { use oximeter::Metric; use oximeter::Target; use std::net::Ipv6Addr; + use std::path::PathBuf; use std::time::Duration; + use tempfile::TempDir; use tokio::time::sleep; use uuid::Uuid; @@ -1062,18 +1428,20 @@ mod tests { db.cleanup().await.expect("Failed to cleanup ClickHouse server"); } - #[tokio::test] - async fn test_replicated() { - let cur_dir = std::env::current_dir().unwrap(); + async fn create_cluster() -> ClickHouseCluster { + let cur_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); let replica_config = cur_dir.as_path().join("src/configs/replica_config.xml"); - let cur_dir = std::env::current_dir().unwrap(); let keeper_config = cur_dir.as_path().join("src/configs/keeper_config.xml"); - - let mut cluster = ClickHouseCluster::new(replica_config, keeper_config) + ClickHouseCluster::new(replica_config, keeper_config) .await - .expect("Failed to initialise ClickHouse Cluster"); + .expect("Failed to initialise ClickHouse Cluster") + } + + #[tokio::test] + async fn test_replicated() { + let mut cluster = create_cluster().await; // Tests that the expected error is returned on a wrong address bad_db_connection_test().await.unwrap(); @@ -3164,7 +3532,7 @@ mod tests { ) -> Result<(), Error> { use strum::IntoEnumIterator; usdt::register_probes().unwrap(); - let logctx = test_setup_log("test_update_schema_cache_on_new_sample"); + let logctx = test_setup_log("test_select_all_datum_types"); let log = &logctx.log; let client = Client::new(address, &log); @@ -3345,4 +3713,521 @@ mod tests { ); } } + + async fn create_test_upgrade_schema_directory( + replicated: bool, + versions: &[u64], + ) -> (TempDir, Vec) { + assert!(!versions.is_empty()); + let schema_dir = TempDir::new().expect("failed to create tempdir"); + let mut paths = Vec::with_capacity(versions.len()); + for version in versions.iter() { + let version_dir = Client::full_upgrade_path( + replicated, + *version, + schema_dir.as_ref(), + ); + fs::create_dir_all(&version_dir) + .await + .expect("failed to make version directory"); + paths.push(version_dir); + } + (schema_dir, paths) + } + + #[tokio::test] + async fn test_read_schema_upgrade_sql_files() { + let logctx = test_setup_log("test_read_schema_upgrade_sql_files"); + let log = &logctx.log; + const REPLICATED: bool = false; + const VERSION: u64 = 1; + let (schema_dir, version_dirs) = + create_test_upgrade_schema_directory(REPLICATED, &[VERSION]).await; + let version_dir = &version_dirs[0]; + + // Create a few SQL files in there. + const SQL: &str = "SELECT NOW();"; + let filenames: Vec<_> = (0..3).map(|i| format!("up-{i}.sql")).collect(); + for name in filenames.iter() { + let full_path = version_dir.join(name); + fs::write(full_path, SQL).await.expect("Failed to write dummy SQL"); + } + + let upgrade_files = Client::read_schema_upgrade_sql_files( + log, + REPLICATED, + VERSION, + schema_dir.path(), + ) + .await + .expect("Failed to read schema upgrade files"); + for filename in filenames.iter() { + let stem = filename.split_once('.').unwrap().0; + assert_eq!( + upgrade_files.get(stem).unwrap().1, + SQL, + "upgrade SQL file contents are not correct" + ); + } + logctx.cleanup_successful(); + } + + async fn test_apply_one_schema_upgrade_impl( + log: &Logger, + address: SocketAddr, + replicated: bool, + ) { + let test_name = format!( + "test_apply_one_schema_upgrade_{}", + if replicated { "replicated" } else { "single_node" } + ); + let client = Client::new(address, &log); + + // We'll test moving from version 1, which just creates a database and + // table, to version 2, which adds two columns to that table in + // different SQL files. + client.execute(format!("CREATE DATABASE {test_name};")).await.unwrap(); + client + .execute(format!( + "\ + CREATE TABLE {test_name}.tbl (\ + `col0` UInt8 \ + )\ + ENGINE = MergeTree() + ORDER BY `col0`;\ + " + )) + .await + .unwrap(); + + // Write out the upgrading SQL files. + // + // Note that all of these statements are going in the version 2 schema + // directory. + let (schema_dir, version_dirs) = + create_test_upgrade_schema_directory(replicated, &[NEXT_VERSION]) + .await; + const NEXT_VERSION: u64 = 2; + let first_sql = + format!("ALTER TABLE {test_name}.tbl ADD COLUMN `col1` UInt16;"); + let second_sql = + format!("ALTER TABLE {test_name}.tbl ADD COLUMN `col2` String;"); + let all_sql = [first_sql, second_sql]; + let version_dir = &version_dirs[0]; + for (i, sql) in all_sql.iter().enumerate() { + let path = version_dir.join(format!("up-{i}.sql")); + fs::write(path, sql) + .await + .expect("failed to write out upgrade SQL file"); + } + + // Apply the upgrade itself. + client + .apply_one_schema_upgrade( + replicated, + NEXT_VERSION, + schema_dir.path(), + ) + .await + .expect("Failed to apply one schema upgrade"); + + // Check that it actually worked! + let body = client + .execute_with_body(format!( + "\ + SELECT name, type FROM system.columns \ + WHERE database = '{test_name}' AND table = 'tbl' \ + ORDER BY name \ + FORMAT CSV;\ + " + )) + .await + .unwrap(); + let mut lines = body.lines(); + assert_eq!(lines.next().unwrap(), "\"col0\",\"UInt8\""); + assert_eq!(lines.next().unwrap(), "\"col1\",\"UInt16\""); + assert_eq!(lines.next().unwrap(), "\"col2\",\"String\""); + assert!(lines.next().is_none()); + } + + #[tokio::test] + async fn test_apply_one_schema_upgrade_replicated() { + const TEST_NAME: &str = "test_apply_one_schema_upgrade_replicated"; + let logctx = test_setup_log(TEST_NAME); + let log = &logctx.log; + let mut cluster = create_cluster().await; + let address = cluster.replica_1.address; + test_apply_one_schema_upgrade_impl(log, address, true).await; + + // TODO-cleanup: These should be arrays. + // See https://github.com/oxidecomputer/omicron/issues/4460. + cluster + .keeper_1 + .cleanup() + .await + .expect("Failed to cleanup ClickHouse keeper 1"); + cluster + .keeper_2 + .cleanup() + .await + .expect("Failed to cleanup ClickHouse keeper 2"); + cluster + .keeper_3 + .cleanup() + .await + .expect("Failed to cleanup ClickHouse keeper 3"); + cluster + .replica_1 + .cleanup() + .await + .expect("Failed to cleanup ClickHouse server 1"); + cluster + .replica_2 + .cleanup() + .await + .expect("Failed to cleanup ClickHouse server 2"); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_apply_one_schema_upgrade_single_node() { + const TEST_NAME: &str = "test_apply_one_schema_upgrade_single_node"; + let logctx = test_setup_log(TEST_NAME); + let log = &logctx.log; + let mut db = ClickHouseInstance::new_single_node(0) + .await + .expect("Failed to start ClickHouse"); + let address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db.port()); + test_apply_one_schema_upgrade_impl(log, address, false).await; + db.cleanup().await.expect("Failed to cleanup ClickHouse server"); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_schema_with_version_gaps_fails() { + let logctx = + test_setup_log("test_ensure_schema_with_version_gaps_fails"); + let log = &logctx.log; + let mut db = ClickHouseInstance::new_single_node(0) + .await + .expect("Failed to start ClickHouse"); + let address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db.port()); + let client = Client::new(address, &log); + const REPLICATED: bool = false; + client + .initialize_db_with_version( + REPLICATED, + crate::model::OXIMETER_VERSION, + ) + .await + .expect("failed to initialize DB"); + + const BOGUS_VERSION: u64 = u64::MAX; + let (schema_dir, _) = create_test_upgrade_schema_directory( + REPLICATED, + &[crate::model::OXIMETER_VERSION, BOGUS_VERSION], + ) + .await; + + let err = client + .ensure_schema(REPLICATED, BOGUS_VERSION, schema_dir.path()) + .await + .expect_err( + "Should have received an error when ensuring \ + non-sequential version numbers", + ); + let Error::NonSequentialSchemaVersions = err else { + panic!( + "Expected an Error::NonSequentialSchemaVersions, found {err:?}" + ); + }; + db.cleanup().await.expect("Failed to cleanup ClickHouse server"); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_schema_with_missing_desired_schema_version_fails() { + let logctx = test_setup_log( + "test_ensure_schema_with_missing_desired_schema_version_fails", + ); + let log = &logctx.log; + let mut db = ClickHouseInstance::new_single_node(0) + .await + .expect("Failed to start ClickHouse"); + let address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db.port()); + let client = Client::new(address, &log); + const REPLICATED: bool = false; + client + .initialize_db_with_version( + REPLICATED, + crate::model::OXIMETER_VERSION, + ) + .await + .expect("failed to initialize DB"); + + let (schema_dir, _) = create_test_upgrade_schema_directory( + REPLICATED, + &[crate::model::OXIMETER_VERSION], + ) + .await; + + const BOGUS_VERSION: u64 = u64::MAX; + let err = client.ensure_schema( + REPLICATED, + BOGUS_VERSION, + schema_dir.path(), + ).await + .expect_err("Should have received an error when ensuring a non-existing version"); + let Error::MissingSchemaVersion(missing) = err else { + panic!("Expected an Error::MissingSchemaVersion, found {err:?}"); + }; + assert_eq!(missing, BOGUS_VERSION); + + db.cleanup().await.expect("Failed to cleanup ClickHouse server"); + logctx.cleanup_successful(); + } + + async fn test_ensure_schema_walks_through_multiple_steps_impl( + log: &Logger, + address: SocketAddr, + replicated: bool, + ) { + let test_name = format!( + "test_ensure_schema_walks_through_multiple_steps_{}", + if replicated { "replicated" } else { "single_node" } + ); + let client = Client::new(address, &log); + + // We need to actually have the oximeter DB here, and the version table, + // since `ensure_schema()` writes out versions to the DB as they're + // applied. + client.initialize_db_with_version(replicated, 1).await.unwrap(); + + // We'll test moving from version 1, which just creates a database and + // table, to version 3, stopping off at version 2. This is similar to + // the `test_apply_one_schema_upgrade` test, but we split the two + // modifications over two versions, rather than as multiple schema + // upgrades in one version bump. + client.execute(format!("CREATE DATABASE {test_name};")).await.unwrap(); + client + .execute(format!( + "\ + CREATE TABLE {test_name}.tbl (\ + `col0` UInt8 \ + )\ + ENGINE = MergeTree() + ORDER BY `col0`;\ + " + )) + .await + .unwrap(); + + // Write out the upgrading SQL files. + // + // Note that each statement goes into a different version. + const VERSIONS: [u64; 3] = [1, 2, 3]; + let (schema_dir, version_dirs) = + create_test_upgrade_schema_directory(replicated, &VERSIONS).await; + let first_sql = String::new(); + let second_sql = + format!("ALTER TABLE {test_name}.tbl ADD COLUMN `col1` UInt16;"); + let third_sql = + format!("ALTER TABLE {test_name}.tbl ADD COLUMN `col2` String;"); + let all_sql = [first_sql, second_sql, third_sql]; + for (version_dir, sql) in version_dirs.iter().zip(all_sql) { + let path = version_dir.join("up.sql"); + fs::write(path, sql) + .await + .expect("failed to write out upgrade SQL file"); + } + + // Apply the sequence of upgrades. + client + .ensure_schema( + replicated, + *VERSIONS.last().unwrap(), + schema_dir.path(), + ) + .await + .expect("Failed to apply one schema upgrade"); + + // Check that it actually worked! + let body = client + .execute_with_body(format!( + "\ + SELECT name, type FROM system.columns \ + WHERE database = '{test_name}' AND table = 'tbl' \ + ORDER BY name \ + FORMAT CSV;\ + " + )) + .await + .unwrap(); + let mut lines = body.lines(); + assert_eq!(lines.next().unwrap(), "\"col0\",\"UInt8\""); + assert_eq!(lines.next().unwrap(), "\"col1\",\"UInt16\""); + assert_eq!(lines.next().unwrap(), "\"col2\",\"String\""); + assert!(lines.next().is_none()); + + let latest_version = client.read_latest_version().await.unwrap(); + assert_eq!( + latest_version, + *VERSIONS.last().unwrap(), + "Updated version not written to the database" + ); + } + + #[tokio::test] + async fn test_ensure_schema_walks_through_multiple_steps_single_node() { + const TEST_NAME: &str = + "test_ensure_schema_walks_through_multiple_steps_single_node"; + let logctx = test_setup_log(TEST_NAME); + let log = &logctx.log; + let mut db = ClickHouseInstance::new_single_node(0) + .await + .expect("Failed to start ClickHouse"); + let address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db.port()); + test_ensure_schema_walks_through_multiple_steps_impl( + log, address, false, + ) + .await; + db.cleanup().await.expect("Failed to cleanup ClickHouse server"); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_schema_walks_through_multiple_steps_replicated() { + const TEST_NAME: &str = + "test_ensure_schema_walks_through_multiple_steps_replicated"; + let logctx = test_setup_log(TEST_NAME); + let log = &logctx.log; + let mut cluster = create_cluster().await; + let address = cluster.replica_1.address; + test_ensure_schema_walks_through_multiple_steps_impl( + log, address, true, + ) + .await; + cluster + .keeper_1 + .cleanup() + .await + .expect("Failed to cleanup ClickHouse keeper 1"); + cluster + .keeper_2 + .cleanup() + .await + .expect("Failed to cleanup ClickHouse keeper 2"); + cluster + .keeper_3 + .cleanup() + .await + .expect("Failed to cleanup ClickHouse keeper 3"); + cluster + .replica_1 + .cleanup() + .await + .expect("Failed to cleanup ClickHouse server 1"); + cluster + .replica_2 + .cleanup() + .await + .expect("Failed to cleanup ClickHouse server 2"); + logctx.cleanup_successful(); + } + + #[test] + fn test_verify_schema_upgrades() { + let mut map = BTreeMap::new(); + + // Check that we fail if the upgrade tries to insert data. + map.insert( + "up".into(), + ( + PathBuf::from("/foo/bar/up.sql"), + String::from( + "INSERT INTO oximeter.version (*) VALUES (100, now());", + ), + ), + ); + assert!(Client::verify_schema_upgrades(&map).is_err()); + + // Sanity check for the normal case. + map.clear(); + map.insert( + "up".into(), + ( + PathBuf::from("/foo/bar/up.sql"), + String::from("ALTER TABLE oximeter.measurements_bool ADD COLUMN foo UInt64;") + ), + ); + assert!(Client::verify_schema_upgrades(&map).is_ok()); + + // Check that we fail if the upgrade ties to delete any data. + map.clear(); + map.insert( + "up".into(), + ( + PathBuf::from("/foo/bar/up.sql"), + String::from("ALTER TABLE oximeter.measurements_bool DELETE WHERE timestamp < NOW();") + ), + ); + assert!(Client::verify_schema_upgrades(&map).is_err()); + + // Check that we fail if the upgrade contains multiple SQL statements. + map.clear(); + map.insert( + "up".into(), + ( + PathBuf::from("/foo/bar/up.sql"), + String::from( + "\ + ALTER TABLE oximeter.measurements_bool \ + ADD COLUMN foo UInt8; \ + ALTER TABLE oximeter.measurements_bool \ + ADD COLUMN bar UInt8; \ + ", + ), + ), + ); + assert!(Client::verify_schema_upgrades(&map).is_err()); + } + + // Regression test for https://github.com/oxidecomputer/omicron/issues/4369. + // + // This tests that we can successfully query all extant field types from the + // schema table. There may be no such values, but the query itself should + // succeed. + #[tokio::test] + async fn test_select_all_field_types() { + use strum::IntoEnumIterator; + usdt::register_probes().unwrap(); + let logctx = test_setup_log("test_select_all_field_types"); + let log = &logctx.log; + + let mut db = ClickHouseInstance::new_single_node(0) + .await + .expect("Failed to start ClickHouse"); + let address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db.port()); + let client = Client::new(address, &log); + client + .init_single_node_db() + .await + .expect("Failed to initialize timeseries database"); + + // Attempt to select all schema with each field type. + for ty in oximeter::FieldType::iter() { + let sql = format!( + "SELECT COUNT() \ + FROM {}.timeseries_schema \ + WHERE arrayFirstIndex(x -> x = '{:?}', fields.type) > 0;", + crate::DATABASE_NAME, + crate::model::DbFieldType::from(ty), + ); + let res = client.execute_with_body(sql).await.unwrap(); + let count = res.trim().parse::().unwrap(); + assert_eq!(count, 0); + } + db.cleanup().await.expect("Failed to cleanup ClickHouse server"); + logctx.cleanup_successful(); + } } diff --git a/oximeter/db/src/lib.rs b/oximeter/db/src/lib.rs index 11ecbeddc8..425c5189ee 100644 --- a/oximeter/db/src/lib.rs +++ b/oximeter/db/src/lib.rs @@ -15,7 +15,9 @@ use serde::{Deserialize, Serialize}; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::convert::TryFrom; +use std::io; use std::num::NonZeroU32; +use std::path::PathBuf; use thiserror::Error; mod client; @@ -23,7 +25,9 @@ pub mod model; pub mod query; pub use client::{Client, DbWrite}; -#[derive(Clone, Debug, Error)] +pub use model::OXIMETER_VERSION; + +#[derive(Debug, Error)] pub enum Error { #[error("Oximeter core error: {0}")] Oximeter(#[from] oximeter::MetricsError), @@ -79,6 +83,38 @@ pub enum Error { #[error("Query must resolve to a single timeseries if limit is specified")] InvalidLimitQuery, + + #[error("Database is not at expected version")] + DatabaseVersionMismatch { expected: u64, found: u64 }, + + #[error("Could not read schema directory")] + ReadSchemaDir { + context: String, + #[source] + err: io::Error, + }, + + #[error("Could not read SQL file from path")] + ReadSqlFile { + context: String, + #[source] + err: io::Error, + }, + + #[error("Non-UTF8 schema directory entry")] + NonUtf8SchemaDirEntry(std::ffi::OsString), + + #[error("Missing desired schema version: {0}")] + MissingSchemaVersion(u64), + + #[error("Data-modifying operations are not supported in schema updates")] + SchemaUpdateModifiesData { path: PathBuf, statement: String }, + + #[error("Schema update SQL files should contain at most 1 statement")] + MultipleSqlStatementsInSchemaUpdate { path: PathBuf }, + + #[error("Schema update versions must be sequential without gaps")] + NonSequentialSchemaVersions, } /// A timeseries name. diff --git a/oximeter/db/src/model.rs b/oximeter/db/src/model.rs index 41c7ab9d24..715e025a04 100644 --- a/oximeter/db/src/model.rs +++ b/oximeter/db/src/model.rs @@ -38,11 +38,12 @@ use uuid::Uuid; /// Describes the version of the Oximeter database. /// -/// See: [crate::Client::initialize_db_with_version] for usage. +/// For usage and details see: /// -/// TODO(#4271): The current implementation of versioning will wipe the metrics -/// database if this number is incremented. -pub const OXIMETER_VERSION: u64 = 2; +/// - [`crate::Client::initialize_db_with_version`] +/// - [`crate::Client::ensure_schema`] +/// - The `clickhouse-schema-updater` binary in this crate +pub const OXIMETER_VERSION: u64 = 3; // Wrapper type to represent a boolean in the database. // diff --git a/oximeter/oximeter/src/types.rs b/oximeter/oximeter/src/types.rs index 0cc3299ec4..325974781e 100644 --- a/oximeter/oximeter/src/types.rs +++ b/oximeter/oximeter/src/types.rs @@ -42,6 +42,7 @@ use uuid::Uuid; JsonSchema, Serialize, Deserialize, + strum::EnumIter, )] #[serde(rename_all = "snake_case")] pub enum FieldType { diff --git a/oximeter/producer/Cargo.toml b/oximeter/producer/Cargo.toml index ef2f16c8ad..79f6c754f7 100644 --- a/oximeter/producer/Cargo.toml +++ b/oximeter/producer/Cargo.toml @@ -11,7 +11,6 @@ dropshot.workspace = true nexus-client.workspace = true omicron-common.workspace = true oximeter.workspace = true -reqwest = { workspace = true, features = [ "json" ] } schemars = { workspace = true, features = [ "uuid1", "bytes", "chrono" ] } serde.workspace = true slog.workspace = true diff --git a/package-manifest.toml b/package-manifest.toml index a1ffe91d6b..73a1fa2deb 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -107,9 +107,12 @@ setup_hint = """ service_name = "oximeter" only_for_targets.image = "standard" source.type = "local" -source.rust.binary_names = ["oximeter"] +source.rust.binary_names = ["oximeter", "clickhouse-schema-updater"] source.rust.release = true -source.paths = [ { from = "smf/oximeter", to = "/var/svc/manifest/site/oximeter" } ] +source.paths = [ + { from = "smf/oximeter", to = "/var/svc/manifest/site/oximeter" }, + { from = "oximeter/db/schema", to = "/opt/oxide/oximeter/schema" }, +] output.type = "zone" [package.clickhouse] @@ -381,10 +384,10 @@ only_for_targets.image = "standard" # 3. Use source.type = "manual" instead of "prebuilt" source.type = "prebuilt" source.repo = "crucible" -source.commit = "da534e73380f3cc53ca0de073e1ea862ae32109b" +source.commit = "51a3121c8318fc7ac97d74f917ce1d37962e785f" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible.sha256.txt -source.sha256 = "572ac3b19e51b4e476266a62c2b7e06eff81c386cb48247c4b9f9b1e2ee81895" +source.sha256 = "897d0fd6c0b82db42256a63a13c228152e1117434afa2681f649b291e3c6f46d" output.type = "zone" [package.crucible-pantry] @@ -392,10 +395,10 @@ service_name = "crucible_pantry" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "crucible" -source.commit = "da534e73380f3cc53ca0de073e1ea862ae32109b" +source.commit = "51a3121c8318fc7ac97d74f917ce1d37962e785f" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible-pantry.sha256.txt -source.sha256 = "812269958e18f54d72bc10bb4fb81f26c084cf762da7fd98e63d58c689be9ad1" +source.sha256 = "fe545de7ac4f15454d7827927149c5f0fc68ce9545b4f1ef96aac9ac8039805a" output.type = "zone" # Refer to @@ -406,10 +409,10 @@ service_name = "propolis-server" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "propolis" -source.commit = "4019eb10fc2f4ba9bf210d0461dc6292b68309c2" +source.commit = "54398875a2125227d13827d4236dce943c019b1c" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/propolis/image//propolis-server.sha256.txt -source.sha256 = "aa1d9dc5c9117c100f9636901e8eec6679d7dfbf869c46b7f2873585f94a1b89" +source.sha256 = "01b8563db6626f90ee3fb6d97e7921b0a680373d843c1bea7ebf46fcea4f7b28" output.type = "zone" [package.mg-ddm-gz] @@ -422,7 +425,7 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "82aa17646265449ee0ede9410208e510fa4a5877" +source.commit = "aefdfd3a57e5ca1949d4a913b8e35ce8cd7dfa8b" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//maghemite.sha256.txt source.sha256 = "d871406ed926571efebdab248de08d4f1ca6c31d4f9a691ce47b186474165c57" @@ -438,7 +441,7 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "82aa17646265449ee0ede9410208e510fa4a5877" +source.commit = "aefdfd3a57e5ca1949d4a913b8e35ce8cd7dfa8b" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt source.sha256 = "85ec05a8726989b5cb0a567de6b0855f6f84b6f3409ac99ccaf372be5821e45d" @@ -453,10 +456,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "82aa17646265449ee0ede9410208e510fa4a5877" +source.commit = "aefdfd3a57e5ca1949d4a913b8e35ce8cd7dfa8b" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "1badd6adfece0a1b661f7efb9a2ca65e471f45cf9c8ecbd72b228ca174311e31" +source.sha256 = "aa7241cd35976f28f25aaf3ce2ce2af14dae1da9d67585c7de3b724dbcc55e60" output.type = "zone" output.intermediate_only = true diff --git a/package/Cargo.toml b/package/Cargo.toml index 9bf0b37a23..6cc0e343db 100644 --- a/package/Cargo.toml +++ b/package/Cargo.toml @@ -12,7 +12,6 @@ futures.workspace = true hex.workspace = true illumos-utils.workspace = true indicatif.workspace = true -omicron-common.workspace = true omicron-zone-package.workspace = true petgraph.workspace = true rayon.workspace = true @@ -20,7 +19,6 @@ reqwest = { workspace = true, features = [ "rustls-tls" ] } ring.workspace = true semver.workspace = true serde.workspace = true -serde_derive.workspace = true sled-hardware.workspace = true slog.workspace = true slog-async.workspace = true @@ -29,7 +27,6 @@ smf.workspace = true strum.workspace = true swrite.workspace = true tar.workspace = true -tempfile.workspace = true thiserror.workspace = true tokio = { workspace = true, features = [ "full" ] } toml.workspace = true diff --git a/schema/crdb/10.0.0/README.md b/schema/crdb/10.0.0/README.md new file mode 100644 index 0000000000..9a98141e37 --- /dev/null +++ b/schema/crdb/10.0.0/README.md @@ -0,0 +1,12 @@ +# Why? + +This migration is part of a PR that adds a check to the schema tests ensuring that the order of enum members is the same when starting from scratch with `dbinit.sql` as it is when building up from existing deployments by running the migrations. The problem: there were already two enums, `dataset_kind` and `service_kind`, where the order did not match, so we have to fix that by putting the enums in the "right" order even on an existing deployment where the order is wrong. To do that, for each of those enums, we: + +1. add `clickhouse_keeper2` member +1. change existing uses of `clickhouse_keeper` to `clickhouse_keeper2` +1. drop `clickhouse_keeper` member +1. add `clickhouse_keeper` back in the right order using `AFTER 'clickhouse'` +1. change uses of `clickhouse_keeper2` back to `clickhouse_keeper` +1. drop `clickhouse_keeper2` + +As there are 6 steps here and two different enums to do them for, there are 12 `up*.sql` files. diff --git a/schema/crdb/10.0.0/up01.sql b/schema/crdb/10.0.0/up01.sql new file mode 100644 index 0000000000..6b92700215 --- /dev/null +++ b/schema/crdb/10.0.0/up01.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.dataset_kind ADD VALUE IF NOT EXISTS 'clickhouse_keeper2' AFTER 'clickhouse'; \ No newline at end of file diff --git a/schema/crdb/10.0.0/up02.sql b/schema/crdb/10.0.0/up02.sql new file mode 100644 index 0000000000..d7c15e1959 --- /dev/null +++ b/schema/crdb/10.0.0/up02.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.service_kind ADD VALUE IF NOT EXISTS 'clickhouse_keeper2' AFTER 'clickhouse'; \ No newline at end of file diff --git a/schema/crdb/10.0.0/up03.sql b/schema/crdb/10.0.0/up03.sql new file mode 100644 index 0000000000..52a32f9f8a --- /dev/null +++ b/schema/crdb/10.0.0/up03.sql @@ -0,0 +1,5 @@ +set local disallow_full_table_scans = off; + +UPDATE omicron.public.dataset +SET kind = 'clickhouse_keeper2' +WHERE kind = 'clickhouse_keeper'; \ No newline at end of file diff --git a/schema/crdb/10.0.0/up04.sql b/schema/crdb/10.0.0/up04.sql new file mode 100644 index 0000000000..c9e193f1c3 --- /dev/null +++ b/schema/crdb/10.0.0/up04.sql @@ -0,0 +1,5 @@ +set local disallow_full_table_scans = off; + +UPDATE omicron.public.service +SET kind = 'clickhouse_keeper2' +WHERE kind = 'clickhouse_keeper'; \ No newline at end of file diff --git a/schema/crdb/10.0.0/up05.sql b/schema/crdb/10.0.0/up05.sql new file mode 100644 index 0000000000..4e64de9425 --- /dev/null +++ b/schema/crdb/10.0.0/up05.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.dataset_kind DROP VALUE 'clickhouse_keeper'; \ No newline at end of file diff --git a/schema/crdb/10.0.0/up06.sql b/schema/crdb/10.0.0/up06.sql new file mode 100644 index 0000000000..4be0ddf616 --- /dev/null +++ b/schema/crdb/10.0.0/up06.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.service_kind DROP VALUE 'clickhouse_keeper'; \ No newline at end of file diff --git a/schema/crdb/10.0.0/up07.sql b/schema/crdb/10.0.0/up07.sql new file mode 100644 index 0000000000..8971f7cf47 --- /dev/null +++ b/schema/crdb/10.0.0/up07.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.dataset_kind ADD VALUE 'clickhouse_keeper' AFTER 'clickhouse'; \ No newline at end of file diff --git a/schema/crdb/10.0.0/up08.sql b/schema/crdb/10.0.0/up08.sql new file mode 100644 index 0000000000..4a53d9b812 --- /dev/null +++ b/schema/crdb/10.0.0/up08.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.service_kind ADD VALUE 'clickhouse_keeper' AFTER 'clickhouse'; \ No newline at end of file diff --git a/schema/crdb/10.0.0/up09.sql b/schema/crdb/10.0.0/up09.sql new file mode 100644 index 0000000000..60f2bbbb27 --- /dev/null +++ b/schema/crdb/10.0.0/up09.sql @@ -0,0 +1,5 @@ +set local disallow_full_table_scans = off; + +UPDATE omicron.public.dataset +SET kind = 'clickhouse_keeper' +WHERE kind = 'clickhouse_keeper2'; \ No newline at end of file diff --git a/schema/crdb/10.0.0/up10.sql b/schema/crdb/10.0.0/up10.sql new file mode 100644 index 0000000000..ad8801709d --- /dev/null +++ b/schema/crdb/10.0.0/up10.sql @@ -0,0 +1,5 @@ +set local disallow_full_table_scans = off; + +UPDATE omicron.public.service +SET kind = 'clickhouse_keeper' +WHERE kind = 'clickhouse_keeper2'; \ No newline at end of file diff --git a/schema/crdb/10.0.0/up11.sql b/schema/crdb/10.0.0/up11.sql new file mode 100644 index 0000000000..2c50c0064e --- /dev/null +++ b/schema/crdb/10.0.0/up11.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.dataset_kind DROP VALUE 'clickhouse_keeper2'; \ No newline at end of file diff --git a/schema/crdb/10.0.0/up12.sql b/schema/crdb/10.0.0/up12.sql new file mode 100644 index 0000000000..376c25bfcd --- /dev/null +++ b/schema/crdb/10.0.0/up12.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.service_kind DROP VALUE 'clickhouse_keeper2'; \ No newline at end of file diff --git a/schema/rss-sled-plan.json b/schema/rss-sled-plan.json index 0534c79aef..2ce8ae3bdc 100644 --- a/schema/rss-sled-plan.json +++ b/schema/rss-sled-plan.json @@ -132,6 +132,51 @@ "format": "uint32", "minimum": 0.0 }, + "connect_retry": { + "description": "The interval in seconds between peer connection retry attempts.", + "type": [ + "integer", + "null" + ], + "format": "uint64", + "minimum": 0.0 + }, + "delay_open": { + "description": "How long to delay sending open messages to a peer. In seconds.", + "type": [ + "integer", + "null" + ], + "format": "uint64", + "minimum": 0.0 + }, + "hold_time": { + "description": "How long to keep a session alive without a keepalive in seconds. Defaults to 6.", + "type": [ + "integer", + "null" + ], + "format": "uint64", + "minimum": 0.0 + }, + "idle_hold_time": { + "description": "How long to keep a peer in idle after a state machine reset in seconds.", + "type": [ + "integer", + "null" + ], + "format": "uint64", + "minimum": 0.0 + }, + "keepalive": { + "description": "The interval to send keepalive messages at.", + "type": [ + "integer", + "null" + ], + "format": "uint64", + "minimum": 0.0 + }, "port": { "description": "Switch port the peer is reachable on.", "type": "string" diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 06cc8092ce..61e61709e1 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -9,7 +9,6 @@ license = "MPL-2.0" anyhow.workspace = true async-trait.workspace = true base64.workspace = true -bincode.workspace = true bootstore.workspace = true bootstrap-agent-client.workspace = true bytes.workspace = true @@ -20,7 +19,6 @@ cfg-if.workspace = true chrono.workspace = true clap.workspace = true # Only used by the simulated sled agent. -crucible-client-types.workspace = true crucible-agent-client.workspace = true ddm-admin-client.workspace = true derive_more.workspace = true @@ -41,16 +39,15 @@ itertools.workspace = true key-manager.workspace = true libc.workspace = true macaddr.workspace = true +mg-admin-client.workspace = true nexus-client.workspace = true omicron-common.workspace = true once_cell.workspace = true oximeter.workspace = true oximeter-instruments.workspace = true oximeter-producer.workspace = true -percent-encoding.workspace = true -progenitor.workspace = true -propolis-client = { workspace = true, features = [ "generated-migration" ] } -propolis-server.workspace = true # Only used by the simulated sled agent +propolis-client.workspace = true +propolis-mock-server.workspace = true # Only used by the simulated sled agent rand = { workspace = true, features = ["getrandom"] } reqwest = { workspace = true, features = ["rustls-tls", "stream"] } schemars = { workspace = true, features = [ "chrono", "uuid1" ] } @@ -60,6 +57,7 @@ serde_json = {workspace = true, features = ["raw_value"]} sha3.workspace = true sled-agent-client.workspace = true sled-hardware.workspace = true +sled-storage.workspace = true slog.workspace = true slog-async.workspace = true slog-dtrace.workspace = true @@ -69,7 +67,6 @@ tar.workspace = true thiserror.workspace = true tofino.workspace = true tokio = { workspace = true, features = [ "full" ] } -tokio-tungstenite.workspace = true toml.workspace = true usdt.workspace = true uuid.workspace = true @@ -97,7 +94,8 @@ slog-async.workspace = true slog-term.workspace = true tempfile.workspace = true -illumos-utils = { workspace = true, features = ["testing"] } +illumos-utils = { workspace = true, features = ["testing", "tmp_keypath"] } +sled-storage = { workspace = true, features = ["testing"] } # # Disable doc builds by default for our binaries to work around issue diff --git a/sled-agent/src/backing_fs.rs b/sled-agent/src/backing_fs.rs index 6ecb9dac43..2e9ea4c8d9 100644 --- a/sled-agent/src/backing_fs.rs +++ b/sled-agent/src/backing_fs.rs @@ -128,7 +128,7 @@ pub(crate) fn ensure_backing_fs( let dataset = format!( "{}/{}/{}", boot_zpool_name, - sled_hardware::disk::M2_BACKING_DATASET, + sled_storage::dataset::M2_BACKING_DATASET, bfs.name ); let mountpoint = Mountpoint::Path(Utf8PathBuf::from(bfs.mountpoint)); diff --git a/sled-agent/src/bootstrap/bootstore.rs b/sled-agent/src/bootstrap/bootstore_setup.rs similarity index 55% rename from sled-agent/src/bootstrap/bootstore.rs rename to sled-agent/src/bootstrap/bootstore_setup.rs index 17267bef55..9eb0a87c03 100644 --- a/sled-agent/src/bootstrap/bootstore.rs +++ b/sled-agent/src/bootstrap/bootstore_setup.rs @@ -5,124 +5,78 @@ //! Helpers for configuring and starting the bootstore during bootstrap agent //! startup. +#![allow(clippy::result_large_err)] + use super::config::BOOTSTORE_PORT; use super::server::StartError; -use crate::storage_manager::StorageResources; use bootstore::schemes::v0 as bootstore; use camino::Utf8PathBuf; use ddm_admin_client::Client as DdmAdminClient; use sled_hardware::underlay::BootstrapInterface; use sled_hardware::Baseboard; +use sled_storage::dataset::CLUSTER_DATASET; +use sled_storage::resources::StorageResources; use slog::Logger; use std::collections::BTreeSet; use std::net::Ipv6Addr; use std::net::SocketAddrV6; use std::time::Duration; -use tokio::task::JoinHandle; const BOOTSTORE_FSM_STATE_FILE: &str = "bootstore-fsm-state.json"; const BOOTSTORE_NETWORK_CONFIG_FILE: &str = "bootstore-network-config.json"; -pub(super) struct BootstoreHandles { - pub(super) node_handle: bootstore::NodeHandle, - - // These two are never used; we keep them to show ownership of the spawned - // tasks. - _node_task_handle: JoinHandle<()>, - _peer_update_task_handle: JoinHandle<()>, -} - -impl BootstoreHandles { - pub(super) async fn spawn( - storage_resources: &StorageResources, - ddm_admin_client: DdmAdminClient, - baseboard: Baseboard, - global_zone_bootstrap_ip: Ipv6Addr, - base_log: &Logger, - ) -> Result { - let config = bootstore::Config { - id: baseboard, - addr: SocketAddrV6::new( - global_zone_bootstrap_ip, - BOOTSTORE_PORT, - 0, - 0, - ), - time_per_tick: Duration::from_millis(250), - learn_timeout: Duration::from_secs(5), - rack_init_timeout: Duration::from_secs(300), - rack_secret_request_timeout: Duration::from_secs(5), - fsm_state_ledger_paths: bootstore_fsm_state_paths( - &storage_resources, - ) - .await?, - network_config_ledger_paths: bootstore_network_config_paths( - &storage_resources, - ) - .await?, - }; - - let (mut node, node_handle) = - bootstore::Node::new(config, base_log).await; - - let join_handle = tokio::spawn(async move { node.run().await }); - - // Spawn a task for polling DDMD and updating bootstore - let peer_update_handle = - tokio::spawn(poll_ddmd_for_bootstore_peer_update( - base_log.new(o!("component" => "bootstore_ddmd_poller")), - node_handle.clone(), - ddm_admin_client, - )); - - Ok(Self { - node_handle, - _node_task_handle: join_handle, - _peer_update_task_handle: peer_update_handle, - }) - } +pub fn new_bootstore_config( + storage_resources: &StorageResources, + baseboard: Baseboard, + global_zone_bootstrap_ip: Ipv6Addr, +) -> Result { + Ok(bootstore::Config { + id: baseboard, + addr: SocketAddrV6::new(global_zone_bootstrap_ip, BOOTSTORE_PORT, 0, 0), + time_per_tick: Duration::from_millis(250), + learn_timeout: Duration::from_secs(5), + rack_init_timeout: Duration::from_secs(300), + rack_secret_request_timeout: Duration::from_secs(5), + fsm_state_ledger_paths: bootstore_fsm_state_paths(&storage_resources)?, + network_config_ledger_paths: bootstore_network_config_paths( + &storage_resources, + )?, + }) } -async fn bootstore_fsm_state_paths( +fn bootstore_fsm_state_paths( storage: &StorageResources, ) -> Result, StartError> { let paths: Vec<_> = storage - .all_m2_mountpoints(sled_hardware::disk::CLUSTER_DATASET) - .await + .all_m2_mountpoints(CLUSTER_DATASET) .into_iter() .map(|p| p.join(BOOTSTORE_FSM_STATE_FILE)) .collect(); if paths.is_empty() { - return Err(StartError::MissingM2Paths( - sled_hardware::disk::CLUSTER_DATASET, - )); + return Err(StartError::MissingM2Paths(CLUSTER_DATASET)); } Ok(paths) } -async fn bootstore_network_config_paths( +fn bootstore_network_config_paths( storage: &StorageResources, ) -> Result, StartError> { let paths: Vec<_> = storage - .all_m2_mountpoints(sled_hardware::disk::CLUSTER_DATASET) - .await + .all_m2_mountpoints(CLUSTER_DATASET) .into_iter() .map(|p| p.join(BOOTSTORE_NETWORK_CONFIG_FILE)) .collect(); if paths.is_empty() { - return Err(StartError::MissingM2Paths( - sled_hardware::disk::CLUSTER_DATASET, - )); + return Err(StartError::MissingM2Paths(CLUSTER_DATASET)); } Ok(paths) } -async fn poll_ddmd_for_bootstore_peer_update( +pub async fn poll_ddmd_for_bootstore_peer_update( log: Logger, bootstore_node_handle: bootstore::NodeHandle, - ddmd_client: DdmAdminClient, ) { let mut current_peers: BTreeSet = BTreeSet::new(); // We're talking to a service's admin interface on localhost and @@ -132,7 +86,7 @@ async fn poll_ddmd_for_bootstore_peer_update( // We also use this timeout in the case of spurious ddmd failures // that require a reconnection from the ddmd_client. const RETRY: tokio::time::Duration = tokio::time::Duration::from_secs(5); - + let ddmd_client = DdmAdminClient::localhost(&log).unwrap(); loop { match ddmd_client .derive_bootstrap_addrs_from_prefixes(&[ @@ -154,7 +108,7 @@ async fn poll_ddmd_for_bootstore_peer_update( log, concat!( "Bootstore comms error: {}. ", - "bootstore::Node task must have paniced", + "bootstore::Node task must have panicked", ), e ); diff --git a/sled-agent/src/bootstrap/early_networking.rs b/sled-agent/src/bootstrap/early_networking.rs index a8aa978f9d..bec309dc27 100644 --- a/sled-agent/src/bootstrap/early_networking.rs +++ b/sled-agent/src/bootstrap/early_networking.rs @@ -17,7 +17,9 @@ use gateway_client::Client as MgsClient; use internal_dns::resolver::{ResolveError, Resolver as DnsResolver}; use internal_dns::ServiceName; use ipnetwork::{IpNetwork, Ipv6Network}; -use omicron_common::address::{Ipv6Subnet, MGS_PORT}; +use mg_admin_client::types::{ApplyRequest, BgpPeerConfig, Prefix4}; +use mg_admin_client::Client as MgdClient; +use omicron_common::address::{Ipv6Subnet, MGD_PORT, MGS_PORT}; use omicron_common::address::{DDMD_PORT, DENDRITE_PORT}; use omicron_common::api::internal::shared::{ PortConfigV1, PortFec, PortSpeed, RackNetworkConfig, RackNetworkConfigV1, @@ -37,6 +39,7 @@ use std::time::{Duration, Instant}; use thiserror::Error; static BOUNDARY_SERVICES_ADDR: &str = "fd00:99::1"; +const BGP_SESSION_RESOLUTION: u64 = 100; /// Errors that can occur during early network setup #[derive(Error, Debug)] @@ -55,6 +58,12 @@ pub enum EarlyNetworkSetupError { #[error("Error during DNS lookup: {0}")] DnsResolver(#[from] ResolveError), + + #[error("BGP configuration error: {0}")] + BgpConfigurationError(String), + + #[error("MGD error: {0}")] + MgdError(String), } enum LookupSwitchZoneAddrsResult { @@ -453,6 +462,67 @@ impl<'a> EarlyNetworkSetup<'a> { ddmd_client.advertise_prefix(Ipv6Subnet::new(ipv6_entry.addr)); } + let mgd = MgdClient::new( + &self.log, + SocketAddrV6::new(switch_zone_underlay_ip, MGD_PORT, 0, 0).into(), + ) + .map_err(|e| { + EarlyNetworkSetupError::MgdError(format!( + "initialize mgd client: {e}" + )) + })?; + + // Iterate through ports and apply BGP config. + for port in &our_ports { + let mut bgp_peer_configs = Vec::new(); + for peer in &port.bgp_peers { + let config = rack_network_config + .bgp + .iter() + .find(|x| x.asn == peer.asn) + .ok_or(EarlyNetworkSetupError::BgpConfigurationError( + format!( + "asn {} referenced by peer undefined", + peer.asn + ), + ))?; + + let bpc = BgpPeerConfig { + asn: peer.asn, + name: format!("{}", peer.addr), + host: format!("{}:179", peer.addr), + hold_time: peer.hold_time.unwrap_or(6), + idle_hold_time: peer.idle_hold_time.unwrap_or(3), + delay_open: peer.delay_open.unwrap_or(0), + connect_retry: peer.connect_retry.unwrap_or(3), + keepalive: peer.keepalive.unwrap_or(2), + resolution: BGP_SESSION_RESOLUTION, + originate: config + .originate + .iter() + .map(|x| Prefix4 { length: x.prefix(), value: x.ip() }) + .collect(), + }; + bgp_peer_configs.push(bpc); + } + + if bgp_peer_configs.is_empty() { + continue; + } + + mgd.inner + .bgp_apply(&ApplyRequest { + peer_group: port.port.clone(), + peers: bgp_peer_configs, + }) + .await + .map_err(|e| { + EarlyNetworkSetupError::BgpConfigurationError(format!( + "BGP peer configuration failed: {e}", + )) + })?; + } + Ok(our_ports) } diff --git a/sled-agent/src/bootstrap/http_entrypoints.rs b/sled-agent/src/bootstrap/http_entrypoints.rs index c69bdeb0ce..7c32bf48a5 100644 --- a/sled-agent/src/bootstrap/http_entrypoints.rs +++ b/sled-agent/src/bootstrap/http_entrypoints.rs @@ -12,7 +12,6 @@ use super::BootstrapError; use super::RssAccessError; use crate::bootstrap::params::RackInitializeRequest; use crate::bootstrap::rack_ops::{RackInitId, RackResetId}; -use crate::storage_manager::StorageResources; use crate::updates::ConfigUpdates; use crate::updates::{Component, UpdateManager}; use bootstore::schemes::v0 as bootstore; @@ -25,6 +24,7 @@ use omicron_common::api::external::Error; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use sled_hardware::Baseboard; +use sled_storage::manager::StorageHandle; use slog::Logger; use std::net::Ipv6Addr; use tokio::sync::mpsc::error::TrySendError; @@ -33,7 +33,7 @@ use tokio::sync::{mpsc, oneshot}; pub(crate) struct BootstrapServerContext { pub(crate) base_log: Logger, pub(crate) global_zone_bootstrap_ip: Ipv6Addr, - pub(crate) storage_resources: StorageResources, + pub(crate) storage_manager: StorageHandle, pub(crate) bootstore_node_handle: bootstore::NodeHandle, pub(crate) baseboard: Baseboard, pub(crate) rss_access: RssAccess, @@ -50,7 +50,7 @@ impl BootstrapServerContext { self.rss_access.start_initializing( &self.base_log, self.global_zone_bootstrap_ip, - &self.storage_resources, + &self.storage_manager, &self.bootstore_node_handle, request, ) diff --git a/sled-agent/src/bootstrap/mod.rs b/sled-agent/src/bootstrap/mod.rs index 96e674acf3..590e13c891 100644 --- a/sled-agent/src/bootstrap/mod.rs +++ b/sled-agent/src/bootstrap/mod.rs @@ -4,7 +4,7 @@ //! Bootstrap-related utilities -mod bootstore; +pub(crate) mod bootstore_setup; pub mod client; pub mod config; pub mod early_networking; @@ -14,7 +14,7 @@ pub(crate) mod params; mod pre_server; mod rack_ops; pub(crate) mod rss_handle; -mod secret_retriever; +pub mod secret_retriever; pub mod server; mod sprockets_server; mod views; diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs index 05493f5aa3..02710ff583 100644 --- a/sled-agent/src/bootstrap/pre_server.rs +++ b/sled-agent/src/bootstrap/pre_server.rs @@ -11,13 +11,15 @@ #![allow(clippy::result_large_err)] use super::maghemite; -use super::secret_retriever::LrtqOrHardcodedSecretRetriever; use super::server::StartError; use crate::config::Config; use crate::config::SidecarRevision; +use crate::long_running_tasks::{ + spawn_all_longrunning_tasks, LongRunningTaskHandles, +}; use crate::services::ServiceManager; use crate::sled_agent::SledAgent; -use crate::storage_manager::StorageManager; +use crate::storage_monitor::UnderlayAccess; use camino::Utf8PathBuf; use cancel_safe_futures::TryStreamExt; use ddm_admin_client::Client as DdmAdminClient; @@ -30,115 +32,16 @@ use illumos_utils::zfs; use illumos_utils::zfs::Zfs; use illumos_utils::zone; use illumos_utils::zone::Zones; -use key_manager::KeyManager; -use key_manager::StorageKeyRequester; use omicron_common::address::Ipv6Subnet; use omicron_common::FileKv; use sled_hardware::underlay; use sled_hardware::DendriteAsic; -use sled_hardware::HardwareManager; -use sled_hardware::HardwareUpdate; use sled_hardware::SledMode; use slog::Drain; use slog::Logger; use std::net::IpAddr; use std::net::Ipv6Addr; -use tokio::sync::broadcast; -use tokio::task::JoinHandle; - -pub(super) struct BootstrapManagers { - pub(super) hardware: HardwareManager, - pub(super) storage: StorageManager, - pub(super) service: ServiceManager, -} - -impl BootstrapManagers { - pub(super) async fn handle_hardware_update( - &self, - update: Result, - sled_agent: Option<&SledAgent>, - log: &Logger, - ) { - match update { - Ok(update) => match update { - HardwareUpdate::TofinoLoaded => { - let baseboard = self.hardware.baseboard(); - if let Err(e) = self - .service - .activate_switch( - sled_agent.map(|sa| sa.switch_zone_underlay_info()), - baseboard, - ) - .await - { - warn!(log, "Failed to activate switch: {e}"); - } - } - HardwareUpdate::TofinoUnloaded => { - if let Err(e) = self.service.deactivate_switch().await { - warn!(log, "Failed to deactivate switch: {e}"); - } - } - HardwareUpdate::TofinoDeviceChange => { - if let Some(sled_agent) = sled_agent { - sled_agent.notify_nexus_about_self(log); - } - } - HardwareUpdate::DiskAdded(disk) => { - self.storage.upsert_disk(disk).await; - } - HardwareUpdate::DiskRemoved(disk) => { - self.storage.delete_disk(disk).await; - } - }, - Err(broadcast::error::RecvError::Lagged(count)) => { - warn!(log, "Hardware monitor missed {count} messages"); - self.check_latest_hardware_snapshot(sled_agent, log).await; - } - Err(broadcast::error::RecvError::Closed) => { - // The `HardwareManager` monitoring task is an infinite loop - - // the only way for us to get `Closed` here is if it panicked, - // so we will propagate such a panic. - panic!("Hardware manager monitor task panicked"); - } - } - } - - // Observe the current hardware state manually. - // - // We use this when we're monitoring hardware for the first - // time, and if we miss notifications. - pub(super) async fn check_latest_hardware_snapshot( - &self, - sled_agent: Option<&SledAgent>, - log: &Logger, - ) { - let underlay_network = sled_agent.map(|sled_agent| { - sled_agent.notify_nexus_about_self(log); - sled_agent.switch_zone_underlay_info() - }); - info!( - log, "Checking current full hardware snapshot"; - "underlay_network_info" => ?underlay_network, - ); - if self.hardware.is_scrimlet_driver_loaded() { - let baseboard = self.hardware.baseboard(); - if let Err(e) = - self.service.activate_switch(underlay_network, baseboard).await - { - warn!(log, "Failed to activate switch: {e}"); - } - } else { - if let Err(e) = self.service.deactivate_switch().await { - warn!(log, "Failed to deactivate switch: {e}"); - } - } - - self.storage - .ensure_using_exactly_these_disks(self.hardware.disks()) - .await; - } -} +use tokio::sync::oneshot; pub(super) struct BootstrapAgentStartup { pub(super) config: Config, @@ -146,8 +49,10 @@ pub(super) struct BootstrapAgentStartup { pub(super) ddm_admin_localhost_client: DdmAdminClient, pub(super) base_log: Logger, pub(super) startup_log: Logger, - pub(super) managers: BootstrapManagers, - pub(super) key_manager_handle: JoinHandle<()>, + pub(super) service_manager: ServiceManager, + pub(super) long_running_task_handles: LongRunningTaskHandles, + pub(super) sled_agent_started_tx: oneshot::Sender, + pub(super) underlay_available_tx: oneshot::Sender, } impl BootstrapAgentStartup { @@ -201,36 +106,23 @@ impl BootstrapAgentStartup { // This should be a no-op if already enabled. BootstrapNetworking::enable_ipv6_forwarding().await?; - // Spawn the `KeyManager` which is needed by the the StorageManager to - // retrieve encryption keys. - let (storage_key_requester, key_manager_handle) = - spawn_key_manager_task(&base_log); - + // Are we a gimlet or scrimlet? let sled_mode = sled_mode_from_config(&config)?; - // Start monitoring hardware. This is blocking so we use - // `spawn_blocking`; similar to above, we move some things in and (on - // success) it gives them back. - let (base_log, log, hardware_manager) = { - tokio::task::spawn_blocking(move || { - info!( - log, "Starting hardware monitor"; - "sled_mode" => ?sled_mode, - ); - let hardware_manager = - HardwareManager::new(&base_log, sled_mode) - .map_err(StartError::StartHardwareManager)?; - Ok::<_, StartError>((base_log, log, hardware_manager)) - }) - .await - .unwrap()? - }; - - // Create a `StorageManager` and (possibly) synthetic disks. - let storage_manager = - StorageManager::new(&base_log, storage_key_requester).await; - upsert_synthetic_zpools_if_needed(&log, &storage_manager, &config) - .await; + // Spawn all important long running tasks that live for the lifetime of + // the process and are used by both the bootstrap agent and sled agent + let ( + long_running_task_handles, + sled_agent_started_tx, + service_manager_ready_tx, + underlay_available_tx, + ) = spawn_all_longrunning_tasks( + &base_log, + sled_mode, + startup_networking.global_zone_bootstrap_ip, + &config, + ) + .await; let global_zone_bootstrap_ip = startup_networking.global_zone_bootstrap_ip; @@ -243,22 +135,27 @@ impl BootstrapAgentStartup { config.skip_timesync, config.sidecar_revision.clone(), config.switch_zone_maghemite_links.clone(), - storage_manager.resources().clone(), - storage_manager.zone_bundler().clone(), + long_running_task_handles.storage_manager.clone(), + long_running_task_handles.zone_bundler.clone(), ); + // Inform the hardware monitor that the service manager is ready + // This is a onetime operation, and so we use a oneshot channel + service_manager_ready_tx + .send(service_manager.clone()) + .map_err(|_| ()) + .expect("Failed to send to StorageMonitor"); + Ok(Self { config, global_zone_bootstrap_ip, ddm_admin_localhost_client, base_log, startup_log: log, - managers: BootstrapManagers { - hardware: hardware_manager, - storage: storage_manager, - service: service_manager, - }, - key_manager_handle, + service_manager, + long_running_task_handles, + sled_agent_started_tx, + underlay_available_tx, }) } } @@ -359,13 +256,10 @@ fn ensure_zfs_key_directory_exists(log: &Logger) -> Result<(), StartError> { // to create and mount encrypted datasets. info!( log, "Ensuring zfs key directory exists"; - "path" => sled_hardware::disk::KEYPATH_ROOT, + "path" => zfs::KEYPATH_ROOT, ); - std::fs::create_dir_all(sled_hardware::disk::KEYPATH_ROOT).map_err(|err| { - StartError::CreateZfsKeyDirectory { - dir: sled_hardware::disk::KEYPATH_ROOT, - err, - } + std::fs::create_dir_all(zfs::KEYPATH_ROOT).map_err(|err| { + StartError::CreateZfsKeyDirectory { dir: zfs::KEYPATH_ROOT, err } }) } @@ -388,23 +282,6 @@ fn ensure_zfs_ramdisk_dataset() -> Result<(), StartError> { .map_err(StartError::EnsureZfsRamdiskDataset) } -async fn upsert_synthetic_zpools_if_needed( - log: &Logger, - storage_manager: &StorageManager, - config: &Config, -) { - if let Some(pools) = &config.zpools { - for pool in pools { - info!( - log, - "Upserting synthetic zpool to Storage Manager: {}", - pool.to_string() - ); - storage_manager.upsert_synthetic_disk(pool.clone()).await; - } - } -} - // Combine the `sled_mode` config with the build-time switch type to determine // the actual sled mode. fn sled_mode_from_config(config: &Config) -> Result { @@ -447,19 +324,6 @@ fn sled_mode_from_config(config: &Config) -> Result { Ok(sled_mode) } -fn spawn_key_manager_task( - log: &Logger, -) -> (StorageKeyRequester, JoinHandle<()>) { - let secret_retriever = LrtqOrHardcodedSecretRetriever::new(); - let (mut key_manager, storage_key_requester) = - KeyManager::new(log, secret_retriever); - - let key_manager_handle = - tokio::spawn(async move { key_manager.run().await }); - - (storage_key_requester, key_manager_handle) -} - #[derive(Debug, Clone)] pub(crate) struct BootstrapNetworking { pub(crate) bootstrap_etherstub: dladm::Etherstub, diff --git a/sled-agent/src/bootstrap/rack_ops.rs b/sled-agent/src/bootstrap/rack_ops.rs index b8721f8332..5cfd0b074a 100644 --- a/sled-agent/src/bootstrap/rack_ops.rs +++ b/sled-agent/src/bootstrap/rack_ops.rs @@ -8,11 +8,11 @@ use crate::bootstrap::http_entrypoints::RackOperationStatus; use crate::bootstrap::params::RackInitializeRequest; use crate::bootstrap::rss_handle::RssHandle; use crate::rack_setup::service::SetupServiceError; -use crate::storage_manager::StorageResources; use bootstore::schemes::v0 as bootstore; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; +use sled_storage::manager::StorageHandle; use slog::Logger; use std::mem; use std::net::Ipv6Addr; @@ -171,7 +171,7 @@ impl RssAccess { &self, parent_log: &Logger, global_zone_bootstrap_ip: Ipv6Addr, - storage_resources: &StorageResources, + storage_manager: &StorageHandle, bootstore_node_handle: &bootstore::NodeHandle, request: RackInitializeRequest, ) -> Result { @@ -207,14 +207,14 @@ impl RssAccess { mem::drop(status); let parent_log = parent_log.clone(); - let storage_resources = storage_resources.clone(); + let storage_manager = storage_manager.clone(); let bootstore_node_handle = bootstore_node_handle.clone(); let status = Arc::clone(&self.status); tokio::spawn(async move { let result = rack_initialize( &parent_log, global_zone_bootstrap_ip, - storage_resources, + storage_manager, bootstore_node_handle, request, ) @@ -342,7 +342,7 @@ enum RssStatus { async fn rack_initialize( parent_log: &Logger, global_zone_bootstrap_ip: Ipv6Addr, - storage_resources: StorageResources, + storage_manager: StorageHandle, bootstore_node_handle: bootstore::NodeHandle, request: RackInitializeRequest, ) -> Result<(), SetupServiceError> { @@ -350,7 +350,7 @@ async fn rack_initialize( parent_log, request, global_zone_bootstrap_ip, - storage_resources, + storage_manager, bootstore_node_handle, ) .await diff --git a/sled-agent/src/bootstrap/rss_handle.rs b/sled-agent/src/bootstrap/rss_handle.rs index c82873d91d..5d9c01e7f2 100644 --- a/sled-agent/src/bootstrap/rss_handle.rs +++ b/sled-agent/src/bootstrap/rss_handle.rs @@ -9,7 +9,6 @@ use super::params::StartSledAgentRequest; use crate::rack_setup::config::SetupServiceConfig; use crate::rack_setup::service::RackSetupService; use crate::rack_setup::service::SetupServiceError; -use crate::storage_manager::StorageResources; use ::bootstrap_agent_client::Client as BootstrapAgentClient; use bootstore::schemes::v0 as bootstore; use futures::stream::FuturesUnordered; @@ -17,6 +16,7 @@ use futures::StreamExt; use omicron_common::backoff::retry_notify; use omicron_common::backoff::retry_policy_local; use omicron_common::backoff::BackoffError; +use sled_storage::manager::StorageHandle; use slog::Logger; use std::net::Ipv6Addr; use std::net::SocketAddrV6; @@ -46,7 +46,7 @@ impl RssHandle { log: &Logger, config: SetupServiceConfig, our_bootstrap_address: Ipv6Addr, - storage_resources: StorageResources, + storage_manager: StorageHandle, bootstore: bootstore::NodeHandle, ) -> Result<(), SetupServiceError> { let (tx, rx) = rss_channel(our_bootstrap_address); @@ -54,7 +54,7 @@ impl RssHandle { let rss = RackSetupService::new( log.new(o!("component" => "RSS")), config, - storage_resources, + storage_manager, tx, bootstore, ); diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index e57e8318e4..f4948de83b 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -8,12 +8,10 @@ use super::config::BOOTSTRAP_AGENT_HTTP_PORT; use super::http_entrypoints; use super::params::RackInitializeRequest; use super::params::StartSledAgentRequest; -use super::pre_server::BootstrapManagers; use super::rack_ops::RackInitId; use super::views::SledAgentResponse; use super::BootstrapError; use super::RssAccessError; -use crate::bootstrap::bootstore::BootstoreHandles; use crate::bootstrap::config::BOOTSTRAP_AGENT_RACK_INIT_PORT; use crate::bootstrap::http_entrypoints::api as http_api; use crate::bootstrap::http_entrypoints::BootstrapServerContext; @@ -24,16 +22,17 @@ use crate::bootstrap::secret_retriever::LrtqOrHardcodedSecretRetriever; use crate::bootstrap::sprockets_server::SprocketsServer; use crate::config::Config as SledConfig; use crate::config::ConfigError; +use crate::long_running_tasks::LongRunningTaskHandles; use crate::server::Server as SledAgentServer; +use crate::services::ServiceManager; use crate::sled_agent::SledAgent; -use crate::storage_manager::StorageResources; +use crate::storage_monitor::UnderlayAccess; use bootstore::schemes::v0 as bootstore; use camino::Utf8PathBuf; use cancel_safe_futures::TryStreamExt; use ddm_admin_client::Client as DdmAdminClient; use ddm_admin_client::DdmError; use dropshot::HttpServer; -use futures::Future; use futures::StreamExt; use illumos_utils::dladm; use illumos_utils::zfs; @@ -42,12 +41,12 @@ use illumos_utils::zone::Zones; use omicron_common::ledger; use omicron_common::ledger::Ledger; use sled_hardware::underlay; -use sled_hardware::HardwareUpdate; +use sled_storage::dataset::CONFIG_DATASET; +use sled_storage::manager::StorageHandle; use slog::Logger; use std::io; use std::net::SocketAddr; use std::net::SocketAddrV6; -use tokio::sync::broadcast; use tokio::sync::mpsc; use tokio::sync::oneshot; use tokio::task::JoinHandle; @@ -175,65 +174,18 @@ impl Server { ddm_admin_localhost_client, base_log, startup_log, - managers, - key_manager_handle, + service_manager, + long_running_task_handles, + sled_agent_started_tx, + underlay_available_tx, } = BootstrapAgentStartup::run(config).await?; - // From this point on we will listen for hardware notifications and - // potentially start the switch zone and be notified of new disks; we - // are responsible for responding to updates from this point on. - let mut hardware_monitor = managers.hardware.monitor(); - let storage_resources = managers.storage.resources(); - - // Check the latest hardware snapshot; we could have missed events - // between the creation of the hardware manager and our subscription of - // its monitor. - managers.check_latest_hardware_snapshot(None, &startup_log).await; - - // Wait for our boot M.2 to show up. - wait_while_handling_hardware_updates( - wait_for_boot_m2(storage_resources, &startup_log), - &mut hardware_monitor, - &managers, - None, // No underlay network yet - &startup_log, - "waiting for boot M.2", - ) - .await; - - // Wait for the bootstore to start. - let bootstore_handles = wait_while_handling_hardware_updates( - BootstoreHandles::spawn( - storage_resources, - ddm_admin_localhost_client.clone(), - managers.hardware.baseboard(), - global_zone_bootstrap_ip, - &base_log, - ), - &mut hardware_monitor, - &managers, - None, // No underlay network yet - &startup_log, - "initializing bootstore", - ) - .await?; - // Do we have a StartSledAgentRequest stored in the ledger? - let maybe_ledger = wait_while_handling_hardware_updates( - async { - let paths = sled_config_paths(storage_resources).await?; - let maybe_ledger = - Ledger::::new(&startup_log, paths) - .await; - Ok::<_, StartError>(maybe_ledger) - }, - &mut hardware_monitor, - &managers, - None, // No underlay network yet - &startup_log, - "loading sled-agent request from ledger", - ) - .await?; + let paths = + sled_config_paths(&long_running_task_handles.storage_manager) + .await?; + let maybe_ledger = + Ledger::::new(&startup_log, paths).await; // We don't yet _act_ on the `StartSledAgentRequest` if we have one, but // if we have one we init our `RssAccess` noting that we're already @@ -250,9 +202,9 @@ impl Server { let bootstrap_context = BootstrapServerContext { base_log: base_log.clone(), global_zone_bootstrap_ip, - storage_resources: storage_resources.clone(), - bootstore_node_handle: bootstore_handles.node_handle.clone(), - baseboard: managers.hardware.baseboard(), + storage_manager: long_running_task_handles.storage_manager.clone(), + bootstore_node_handle: long_running_task_handles.bootstore.clone(), + baseboard: long_running_task_handles.hardware_manager.baseboard(), rss_access, updates: config.updates.clone(), sled_reset_tx, @@ -284,55 +236,36 @@ impl Server { // Do we have a persistent sled-agent request that we need to restore? let state = if let Some(ledger) = maybe_ledger { let start_sled_agent_request = ledger.into_inner(); - let sled_agent_server = wait_while_handling_hardware_updates( - start_sled_agent( - &config, - start_sled_agent_request, - &bootstore_handles.node_handle, - &managers, - &ddm_admin_localhost_client, - &base_log, - &startup_log, - ), - &mut hardware_monitor, - &managers, - None, // No underlay network yet + let sled_agent_server = start_sled_agent( + &config, + start_sled_agent_request, + long_running_task_handles.clone(), + underlay_available_tx, + service_manager.clone(), + &ddm_admin_localhost_client, + &base_log, &startup_log, - "restoring sled-agent (cold boot)", ) .await?; + // Give the HardwareMonitory access to the `SledAgent` let sled_agent = sled_agent_server.sled_agent(); - - // We've created sled-agent; we need to (possibly) reconfigure the - // switch zone, if we're a scrimlet, to give it our underlay network - // information. - let underlay_network_info = sled_agent.switch_zone_underlay_info(); - info!( - startup_log, "Sled Agent started; rescanning hardware"; - "underlay_network_info" => ?underlay_network_info, - ); - managers - .check_latest_hardware_snapshot(Some(&sled_agent), &startup_log) - .await; + sled_agent_started_tx + .send(sled_agent.clone()) + .map_err(|_| ()) + .expect("Failed to send to StorageMonitor"); // For cold boot specifically, we now need to load the services // we're responsible for, while continuing to handle hardware // notifications. This cannot fail: we retry indefinitely until // we're done loading services. - wait_while_handling_hardware_updates( - sled_agent.cold_boot_load_services(), - &mut hardware_monitor, - &managers, - Some(&sled_agent), - &startup_log, - "restoring sled-agent services (cold boot)", - ) - .await; - + sled_agent.cold_boot_load_services().await; SledAgentState::ServerStarted(sled_agent_server) } else { - SledAgentState::Bootstrapping + SledAgentState::Bootstrapping( + Some(sled_agent_started_tx), + Some(underlay_available_tx), + ) }; // Spawn our inner task that handles any future hardware updates and any @@ -340,15 +273,13 @@ impl Server { // agent state. let inner = Inner { config, - hardware_monitor, state, sled_init_rx, sled_reset_rx, - managers, ddm_admin_localhost_client, - bootstore_handles, + long_running_task_handles, + service_manager, _sprockets_server_handle: sprockets_server_handle, - _key_manager_handle: key_manager_handle, base_log, }; let inner_task = tokio::spawn(inner.run()); @@ -377,20 +308,14 @@ impl Server { // bootstrap server). enum SledAgentState { // We're still in the bootstrapping phase, waiting for a sled-agent request. - Bootstrapping, + Bootstrapping( + Option>, + Option>, + ), // ... or the sled agent server is running. ServerStarted(SledAgentServer), } -impl SledAgentState { - fn sled_agent(&self) -> Option<&SledAgent> { - match self { - SledAgentState::Bootstrapping => None, - SledAgentState::ServerStarted(server) => Some(server.sled_agent()), - } - } -} - #[derive(thiserror::Error, Debug)] pub enum SledAgentServerStartError { #[error("Failed to start sled-agent server: {0}")] @@ -425,11 +350,13 @@ impl From for StartError { } } +#[allow(clippy::too_many_arguments)] async fn start_sled_agent( config: &SledConfig, request: StartSledAgentRequest, - bootstore: &bootstore::NodeHandle, - managers: &BootstrapManagers, + long_running_task_handles: LongRunningTaskHandles, + underlay_available_tx: oneshot::Sender, + service_manager: ServiceManager, ddmd_client: &DdmAdminClient, base_log: &Logger, log: &Logger, @@ -444,7 +371,10 @@ async fn start_sled_agent( if request.body.use_trust_quorum { info!(log, "KeyManager: using lrtq secret retriever"); let salt = request.hash_rack_id(); - LrtqOrHardcodedSecretRetriever::init_lrtq(salt, bootstore.clone()) + LrtqOrHardcodedSecretRetriever::init_lrtq( + salt, + long_running_task_handles.bootstore.clone(), + ) } else { info!(log, "KeyManager: using hardcoded secret retriever"); LrtqOrHardcodedSecretRetriever::init_hardcoded(); @@ -452,11 +382,19 @@ async fn start_sled_agent( if request.body.use_trust_quorum && request.body.is_lrtq_learner { info!(log, "Initializing sled as learner"); - bootstore.init_learner().await?; + match long_running_task_handles.bootstore.init_learner().await { + Err(bootstore::NodeRequestError::Fsm( + bootstore::ApiError::AlreadyInitialized, + )) => { + // This is a cold boot. Let's ignore this error and continue. + } + Err(e) => return Err(e.into()), + Ok(()) => (), + } } // Inform the storage service that the key manager is available - managers.storage.key_manager_ready().await; + long_running_task_handles.storage_manager.key_manager_ready().await; // Start trying to notify ddmd of our sled prefix so it can // advertise it to other sleds. @@ -476,9 +414,9 @@ async fn start_sled_agent( config, base_log.clone(), request.clone(), - managers.service.clone(), - managers.storage.clone(), - bootstore.clone(), + long_running_task_handles.clone(), + service_manager, + underlay_available_tx, ) .await .map_err(SledAgentServerStartError::FailedStartingServer)?; @@ -487,7 +425,8 @@ async fn start_sled_agent( // Record this request so the sled agent can be automatically // initialized on the next boot. - let paths = sled_config_paths(managers.storage.resources()).await?; + let paths = + sled_config_paths(&long_running_task_handles.storage_manager).await?; let mut ledger = Ledger::new_with(&log, paths, request); ledger.commit().await?; @@ -526,28 +465,6 @@ fn start_dropshot_server( Ok(http_server) } -/// Wait for at least the M.2 we booted from to show up. -/// -/// TODO-correctness Subsequent steps may assume all M.2s that will ever be -/// present are present once we return from this function; see -/// . -async fn wait_for_boot_m2(storage_resources: &StorageResources, log: &Logger) { - // Wait for at least the M.2 we booted from to show up. - loop { - match storage_resources.boot_disk().await { - Some(disk) => { - info!(log, "Found boot disk M.2: {disk:?}"); - break; - } - None => { - info!(log, "Waiting for boot disk M.2..."); - tokio::time::sleep(core::time::Duration::from_millis(250)) - .await; - } - } - } -} - struct MissingM2Paths(&'static str); impl From for StartError { @@ -563,56 +480,21 @@ impl From for SledAgentServerStartError { } async fn sled_config_paths( - storage: &StorageResources, + storage: &StorageHandle, ) -> Result, MissingM2Paths> { - let paths: Vec<_> = storage - .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET) - .await + let resources = storage.get_latest_resources().await; + let paths: Vec<_> = resources + .all_m2_mountpoints(CONFIG_DATASET) .into_iter() .map(|p| p.join(SLED_AGENT_REQUEST_FILE)) .collect(); if paths.is_empty() { - return Err(MissingM2Paths(sled_hardware::disk::CONFIG_DATASET)); + return Err(MissingM2Paths(CONFIG_DATASET)); } Ok(paths) } -// Helper function to wait for `fut` while handling any updates about hardware. -async fn wait_while_handling_hardware_updates, T>( - fut: F, - hardware_monitor: &mut broadcast::Receiver, - managers: &BootstrapManagers, - sled_agent: Option<&SledAgent>, - log: &Logger, - log_phase: &str, -) -> T { - tokio::pin!(fut); - loop { - tokio::select! { - // Cancel-safe per the docs on `broadcast::Receiver::recv()`. - hardware_update = hardware_monitor.recv() => { - info!( - log, - "Handling hardware update message"; - "phase" => log_phase, - "update" => ?hardware_update, - ); - - managers.handle_hardware_update( - hardware_update, - sled_agent, - log, - ).await; - } - - // Cancel-safe: we're using a `&mut Future`; dropping the - // reference does not cancel the underlying future. - result = &mut fut => return result, - } - } -} - /// Runs the OpenAPI generator, emitting the spec to stdout. pub fn run_openapi() -> Result<(), String> { http_api() @@ -626,18 +508,16 @@ pub fn run_openapi() -> Result<(), String> { struct Inner { config: SledConfig, - hardware_monitor: broadcast::Receiver, state: SledAgentState, sled_init_rx: mpsc::Receiver<( StartSledAgentRequest, oneshot::Sender>, )>, sled_reset_rx: mpsc::Receiver>>, - managers: BootstrapManagers, ddm_admin_localhost_client: DdmAdminClient, - bootstore_handles: BootstoreHandles, + long_running_task_handles: LongRunningTaskHandles, + service_manager: ServiceManager, _sprockets_server_handle: JoinHandle<()>, - _key_manager_handle: JoinHandle<()>, base_log: Logger, } @@ -645,14 +525,7 @@ impl Inner { async fn run(mut self) { let log = self.base_log.new(o!("component" => "SledAgentMain")); loop { - // TODO-correctness We pause handling hardware update messages while - // we handle sled init/reset requests - is that okay? tokio::select! { - // Cancel-safe per the docs on `broadcast::Receiver::recv()`. - hardware_update = self.hardware_monitor.recv() => { - self.handle_hardware_update(hardware_update, &log).await; - } - // Cancel-safe per the docs on `mpsc::Receiver::recv()`. Some((request, response_tx)) = self.sled_init_rx.recv() => { self.handle_start_sled_agent_request( @@ -680,41 +553,36 @@ impl Inner { } } - async fn handle_hardware_update( - &self, - hardware_update: Result, - log: &Logger, - ) { - info!( - log, - "Handling hardware update message"; - "phase" => "bootstore-steady-state", - "update" => ?hardware_update, - ); - - self.managers - .handle_hardware_update( - hardware_update, - self.state.sled_agent(), - &log, - ) - .await; - } - async fn handle_start_sled_agent_request( &mut self, request: StartSledAgentRequest, response_tx: oneshot::Sender>, log: &Logger, ) { - let request_id = request.body.id; - match &self.state { - SledAgentState::Bootstrapping => { + match &mut self.state { + SledAgentState::Bootstrapping( + sled_agent_started_tx, + underlay_available_tx, + ) => { + let request_id = request.body.id; + + // Extract from options to satisfy the borrow checker. + // It is not possible for `start_sled_agent` to be cancelled + // or fail in a safe, restartable manner. Therefore, for now, + // we explicitly unwrap here, and panic on error below. + // + // See https://github.com/oxidecomputer/omicron/issues/4494 + let sled_agent_started_tx = + sled_agent_started_tx.take().unwrap(); + let underlay_available_tx = + underlay_available_tx.take().unwrap(); + let response = match start_sled_agent( &self.config, request, - &self.bootstore_handles.node_handle, - &self.managers, + self.long_running_task_handles.clone(), + underlay_available_tx, + self.service_manager.clone(), &self.ddm_admin_localhost_client, &self.base_log, &log, @@ -725,17 +593,19 @@ impl Inner { // We've created sled-agent; we need to (possibly) // reconfigure the switch zone, if we're a scrimlet, to // give it our underlay network information. - self.managers - .check_latest_hardware_snapshot( - Some(server.sled_agent()), - log, - ) - .await; - + sled_agent_started_tx + .send(server.sled_agent().clone()) + .map_err(|_| ()) + .expect("Failed to send to StorageMonitor"); self.state = SledAgentState::ServerStarted(server); Ok(SledAgentResponse { id: request_id }) } - Err(err) => Err(format!("{err:#}")), + Err(err) => { + // This error is unrecoverable, and if returned we'd + // end up in maintenance mode anyway. + error!(log, "Failed to start sled agent: {err:#}"); + panic!("Failed to start sled agent"); + } }; _ = response_tx.send(response); } @@ -779,11 +649,11 @@ impl Inner { async fn uninstall_sled_local_config(&self) -> Result<(), BootstrapError> { let config_dirs = self - .managers - .storage - .resources() - .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET) + .long_running_task_handles + .storage_manager + .get_latest_resources() .await + .all_m2_mountpoints(CONFIG_DATASET) .into_iter(); for dir in config_dirs { diff --git a/sled-agent/src/common/disk.rs b/sled-agent/src/common/disk.rs index 18160950d3..57868937d0 100644 --- a/sled-agent/src/common/disk.rs +++ b/sled-agent/src/common/disk.rs @@ -9,7 +9,7 @@ use chrono::Utc; use omicron_common::api::external::DiskState; use omicron_common::api::external::Error; use omicron_common::api::internal::nexus::DiskRuntimeState; -use propolis_client::api::DiskAttachmentState as PropolisDiskState; +use propolis_client::types::DiskAttachmentState as PropolisDiskState; use uuid::Uuid; /// Action to be taken on behalf of state transition. diff --git a/sled-agent/src/common/instance.rs b/sled-agent/src/common/instance.rs index 9e285840e0..d7ee8982e0 100644 --- a/sled-agent/src/common/instance.rs +++ b/sled-agent/src/common/instance.rs @@ -10,8 +10,9 @@ use omicron_common::api::external::InstanceState as ApiInstanceState; use omicron_common::api::internal::nexus::{ InstanceRuntimeState, SledInstanceState, VmmRuntimeState, }; -use propolis_client::api::{ +use propolis_client::types::{ InstanceState as PropolisApiState, InstanceStateMonitorResponse, + MigrationState, }; use uuid::Uuid; @@ -36,7 +37,7 @@ impl From for PropolisInstanceState { impl From for ApiInstanceState { fn from(value: PropolisInstanceState) -> Self { - use propolis_client::api::InstanceState as State; + use propolis_client::types::InstanceState as State; match value.0 { // Nexus uses the VMM state as the externally-visible instance state // when an instance has an active VMM. A Propolis that is "creating" @@ -119,7 +120,6 @@ impl ObservedPropolisState { (Some(this_id), Some(propolis_migration)) if this_id == propolis_migration.migration_id => { - use propolis_client::api::MigrationState; match propolis_migration.state { MigrationState::Finish => { ObservedMigrationStatus::Succeeded @@ -510,7 +510,7 @@ mod test { use chrono::Utc; use omicron_common::api::external::Generation; use omicron_common::api::internal::nexus::InstanceRuntimeState; - use propolis_client::api::InstanceState as Observed; + use propolis_client::types::InstanceState as Observed; use uuid::Uuid; fn make_instance() -> InstanceStates { diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index fad4b2e94b..a596cf83db 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -52,6 +52,9 @@ pub struct Config { pub sidecar_revision: SidecarRevision, /// Optional percentage of DRAM to reserve for guest memory pub vmm_reservoir_percentage: Option, + /// Optional DRAM to reserve for guest memory in MiB (mutually exclusive + /// option with vmm_reservoir_percentage). + pub vmm_reservoir_size_mb: Option, /// Optional swap device size in GiB pub swap_device_size_gb: Option, /// Optional VLAN ID to be used for tagging guest VNICs. diff --git a/sled-agent/src/storage/dump_setup.rs b/sled-agent/src/dump_setup.rs similarity index 93% rename from sled-agent/src/storage/dump_setup.rs rename to sled-agent/src/dump_setup.rs index 9b5edc0a7e..e675e6e12d 100644 --- a/sled-agent/src/storage/dump_setup.rs +++ b/sled-agent/src/dump_setup.rs @@ -1,4 +1,3 @@ -use crate::storage_manager::DiskWrapper; use camino::Utf8PathBuf; use derive_more::{AsRef, Deref, From}; use illumos_utils::dumpadm::DumpAdmError; @@ -6,13 +5,15 @@ use illumos_utils::zone::{AdmError, Zones}; use illumos_utils::zpool::{ZpoolHealth, ZpoolName}; use omicron_common::disk::DiskIdentity; use sled_hardware::DiskVariant; +use sled_storage::dataset::{CRASH_DATASET, DUMP_DATASET}; +use sled_storage::disk::Disk; +use sled_storage::pool::Pool; use slog::Logger; -use std::collections::{HashMap, HashSet}; +use std::collections::{BTreeMap, HashSet}; use std::ffi::OsString; use std::path::{Path, PathBuf}; use std::sync::{Arc, Weak}; use std::time::{Duration, SystemTime, SystemTimeError, UNIX_EPOCH}; -use tokio::sync::MutexGuard; pub struct DumpSetup { worker: Arc>, @@ -70,11 +71,11 @@ trait GetMountpoint: std::ops::Deref { } impl GetMountpoint for DebugZpool { type NewType = DebugDataset; - const MOUNTPOINT: &'static str = sled_hardware::disk::DUMP_DATASET; + const MOUNTPOINT: &'static str = DUMP_DATASET; } impl GetMountpoint for CoreZpool { type NewType = CoreDataset; - const MOUNTPOINT: &'static str = sled_hardware::disk::CRASH_DATASET; + const MOUNTPOINT: &'static str = CRASH_DATASET; } struct DumpSetupWorker { @@ -99,50 +100,51 @@ const ARCHIVAL_INTERVAL: Duration = Duration::from_secs(300); impl DumpSetup { pub(crate) async fn update_dumpdev_setup( &self, - disks: &mut MutexGuard<'_, HashMap>, + disks: &BTreeMap, ) { let log = &self.log; let mut m2_dump_slices = Vec::new(); let mut u2_debug_datasets = Vec::new(); let mut m2_core_datasets = Vec::new(); - for (_id, disk_wrapper) in disks.iter() { - match disk_wrapper { - DiskWrapper::Real { disk, .. } => match disk.variant() { - DiskVariant::M2 => { - match disk.dump_device_devfs_path(false) { - Ok(path) => { - m2_dump_slices.push(DumpSlicePath(path)) - } - Err(err) => { - warn!(log, "Error getting dump device devfs path: {err:?}"); - } + for (_id, (disk, _)) in disks.iter() { + if disk.is_synthetic() { + // We only setup dump devices on real disks + continue; + } + match disk.variant() { + DiskVariant::M2 => { + match disk.dump_device_devfs_path(false) { + Ok(path) => m2_dump_slices.push(DumpSlicePath(path)), + Err(err) => { + warn!( + log, + "Error getting dump device devfs path: {err:?}" + ); } - let name = disk.zpool_name(); - if let Ok(info) = illumos_utils::zpool::Zpool::get_info( - &name.to_string(), - ) { - if info.health() == ZpoolHealth::Online { - m2_core_datasets.push(CoreZpool(name.clone())); - } else { - warn!(log, "Zpool {name:?} not online, won't attempt to save process core dumps there"); - } + } + let name = disk.zpool_name(); + if let Ok(info) = + illumos_utils::zpool::Zpool::get_info(&name.to_string()) + { + if info.health() == ZpoolHealth::Online { + m2_core_datasets.push(CoreZpool(name.clone())); + } else { + warn!(log, "Zpool {name:?} not online, won't attempt to save process core dumps there"); } } - DiskVariant::U2 => { - let name = disk.zpool_name(); - if let Ok(info) = illumos_utils::zpool::Zpool::get_info( - &name.to_string(), - ) { - if info.health() == ZpoolHealth::Online { - u2_debug_datasets - .push(DebugZpool(name.clone())); - } else { - warn!(log, "Zpool {name:?} not online, won't attempt to save kernel core dumps there"); - } + } + DiskVariant::U2 => { + let name = disk.zpool_name(); + if let Ok(info) = + illumos_utils::zpool::Zpool::get_info(&name.to_string()) + { + if info.health() == ZpoolHealth::Online { + u2_debug_datasets.push(DebugZpool(name.clone())); + } else { + warn!(log, "Zpool {name:?} not online, won't attempt to save kernel core dumps there"); } } - }, - DiskWrapper::Synthetic { .. } => {} + } } } diff --git a/sled-agent/src/hardware_monitor.rs b/sled-agent/src/hardware_monitor.rs new file mode 100644 index 0000000000..698d2d4608 --- /dev/null +++ b/sled-agent/src/hardware_monitor.rs @@ -0,0 +1,257 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! A task that listens for hardware events from the +//! [`sled_hardware::HardwareManager`] and dispatches them to other parts +//! of the bootstrap agent and sled-agent code. + +use crate::services::ServiceManager; +use crate::sled_agent::SledAgent; +use sled_hardware::{Baseboard, HardwareManager, HardwareUpdate}; +use sled_storage::disk::RawDisk; +use sled_storage::manager::StorageHandle; +use slog::Logger; +use tokio::sync::broadcast::error::RecvError; +use tokio::sync::{broadcast, oneshot}; + +// A thin wrapper around the the [`ServiceManager`] that caches the state +// whether or not the tofino is loaded if the [`ServiceManager`] doesn't exist +// yet. +enum TofinoManager { + Ready(ServiceManager), + NotReady { tofino_loaded: bool }, +} + +impl TofinoManager { + pub fn new() -> TofinoManager { + TofinoManager::NotReady { tofino_loaded: false } + } + + // Must only be called once on the transition from `NotReady` to `Ready`. + // Panics otherwise. + // + // Returns whether the tofino was loaded or not + pub fn become_ready(&mut self, service_manager: ServiceManager) -> bool { + let tofino_loaded = match self { + Self::Ready(_) => panic!("ServiceManager is already available"), + Self::NotReady { tofino_loaded } => *tofino_loaded, + }; + *self = Self::Ready(service_manager); + tofino_loaded + } + + pub fn is_ready(&self) -> bool { + match self { + TofinoManager::Ready(_) => true, + _ => false, + } + } +} + +// A monitor for hardware events +pub struct HardwareMonitor { + log: Logger, + + baseboard: Baseboard, + + // Receive a onetime notification that the SledAgent has started + sled_agent_started_rx: oneshot::Receiver, + + // Receive a onetime notification that the ServiceManager is ready + service_manager_ready_rx: oneshot::Receiver, + + // Receive messages from the [`HardwareManager`] + hardware_rx: broadcast::Receiver, + + // A reference to the hardware manager + hardware_manager: HardwareManager, + + // A handle to [`sled_hardware::manager::StorageManger`] + storage_manager: StorageHandle, + + // A handle to the sled-agent + // + // This will go away once Nexus updates are polled: + // See: + // * https://github.com/oxidecomputer/omicron/issues/1917 + // * https://rfd.shared.oxide.computer/rfd/0433 + sled_agent: Option, + + // The [`ServiceManager`] is instantiated after we start the [`HardwareMonitor`] + // task. However, it is only used to load and unload the switch zone when thes + // state of the tofino changes. We keep track of the tofino state so that we + // can properly load the tofino when the [`ServiceManager`] becomes available + // available. + tofino_manager: TofinoManager, +} + +impl HardwareMonitor { + pub fn new( + log: &Logger, + hardware_manager: &HardwareManager, + storage_manager: &StorageHandle, + ) -> ( + HardwareMonitor, + oneshot::Sender, + oneshot::Sender, + ) { + let (sled_agent_started_tx, sled_agent_started_rx) = oneshot::channel(); + let (service_manager_ready_tx, service_manager_ready_rx) = + oneshot::channel(); + let baseboard = hardware_manager.baseboard(); + let hardware_rx = hardware_manager.monitor(); + let log = log.new(o!("component" => "HardwareMonitor")); + let tofino_manager = TofinoManager::new(); + ( + HardwareMonitor { + log, + baseboard, + sled_agent_started_rx, + service_manager_ready_rx, + hardware_rx, + hardware_manager: hardware_manager.clone(), + storage_manager: storage_manager.clone(), + sled_agent: None, + tofino_manager, + }, + sled_agent_started_tx, + service_manager_ready_tx, + ) + } + + /// Run the main receive loop of the `HardwareMonitor` + /// + /// This should be spawned into a tokio task + pub async fn run(&mut self) { + // Check the latest hardware snapshot; we could have missed events + // between the creation of the hardware manager and our subscription of + // its monitor. + self.check_latest_hardware_snapshot().await; + + loop { + tokio::select! { + Ok(sled_agent) = &mut self.sled_agent_started_rx, + if self.sled_agent.is_none() => + { + info!(self.log, "Sled Agent Started"); + self.sled_agent = Some(sled_agent); + self.check_latest_hardware_snapshot().await; + } + Ok(service_manager) = &mut self.service_manager_ready_rx, + if !self.tofino_manager.is_ready() => + { + let tofino_loaded = + self.tofino_manager.become_ready(service_manager); + if tofino_loaded { + self.activate_switch().await; + } + } + update = self.hardware_rx.recv() => { + info!( + self.log, + "Received hardware update message"; + "update" => ?update, + ); + self.handle_hardware_update(update).await; + } + } + } + } + + // Handle an update from the [`HardwareMonitor`] + async fn handle_hardware_update( + &mut self, + update: Result, + ) { + match update { + Ok(update) => match update { + HardwareUpdate::TofinoLoaded => self.activate_switch().await, + HardwareUpdate::TofinoUnloaded => { + self.deactivate_switch().await + } + HardwareUpdate::TofinoDeviceChange => { + if let Some(sled_agent) = &mut self.sled_agent { + sled_agent.notify_nexus_about_self(&self.log); + } + } + HardwareUpdate::DiskAdded(disk) => { + self.storage_manager.upsert_disk(disk.into()).await; + } + HardwareUpdate::DiskRemoved(disk) => { + self.storage_manager.delete_disk(disk.into()).await; + } + }, + Err(broadcast::error::RecvError::Lagged(count)) => { + warn!(self.log, "Hardware monitor missed {count} messages"); + self.check_latest_hardware_snapshot().await; + } + Err(broadcast::error::RecvError::Closed) => { + // The `HardwareManager` monitoring task is an infinite loop - + // the only way for us to get `Closed` here is if it panicked, + // so we will propagate such a panic. + panic!("Hardware manager monitor task panicked"); + } + } + } + + async fn activate_switch(&mut self) { + match &mut self.tofino_manager { + TofinoManager::Ready(service_manager) => { + if let Err(e) = service_manager + .activate_switch( + self.sled_agent + .as_ref() + .map(|sa| sa.switch_zone_underlay_info()), + self.baseboard.clone(), + ) + .await + { + warn!(self.log, "Failed to activate switch: {e}"); + } + } + TofinoManager::NotReady { tofino_loaded } => { + *tofino_loaded = true; + } + } + } + + async fn deactivate_switch(&mut self) { + match &mut self.tofino_manager { + TofinoManager::Ready(service_manager) => { + if let Err(e) = service_manager.deactivate_switch().await { + warn!(self.log, "Failed to deactivate switch: {e}"); + } + } + TofinoManager::NotReady { tofino_loaded } => { + *tofino_loaded = false; + } + } + } + + // Observe the current hardware state manually. + // + // We use this when we're monitoring hardware for the first + // time, and if we miss notifications. + async fn check_latest_hardware_snapshot(&mut self) { + let underlay_network = self.sled_agent.as_ref().map(|sled_agent| { + sled_agent.notify_nexus_about_self(&self.log); + sled_agent.switch_zone_underlay_info() + }); + info!( + self.log, "Checking current full hardware snapshot"; + "underlay_network_info" => ?underlay_network, + ); + if self.hardware_manager.is_scrimlet_driver_loaded() { + self.activate_switch().await; + } else { + self.deactivate_switch().await; + } + + self.storage_manager + .ensure_using_exactly_these_disks( + self.hardware_manager.disks().into_iter().map(RawDisk::from), + ) + .await; + } +} diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index ab107f9a63..2d0e2c4001 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -364,7 +364,7 @@ async fn zpools_get( rqctx: RequestContext, ) -> Result>, HttpError> { let sa = rqctx.context(); - Ok(HttpResponseOk(sa.zpools_get().await.map_err(|e| Error::from(e))?)) + Ok(HttpResponseOk(sa.zpools_get().await)) } #[endpoint { diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 5c61993293..a6f022f5f2 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -17,7 +17,6 @@ use crate::params::{ InstanceMigrationTargetParams, InstanceStateRequested, VpcFirewallRule, }; use crate::profile::*; -use crate::storage_manager::StorageResources; use crate::zone_bundle::BundleError; use crate::zone_bundle::ZoneBundler; use anyhow::anyhow; @@ -32,7 +31,6 @@ use illumos_utils::svc::wait_for_service; use illumos_utils::zone::Zones; use illumos_utils::zone::PROPOLIS_ZONE_PREFIX; use omicron_common::address::NEXUS_INTERNAL_PORT; -use omicron_common::address::PROPOLIS_PORT; use omicron_common::api::internal::nexus::{ InstanceRuntimeState, SledInstanceState, VmmRuntimeState, }; @@ -43,7 +41,8 @@ use omicron_common::backoff; use propolis_client::Client as PropolisClient; use rand::prelude::SliceRandom; use rand::SeedableRng; -use sled_hardware::disk::ZONE_DATASET; +use sled_storage::dataset::ZONE_DATASET; +use sled_storage::manager::StorageHandle; use slog::Logger; use std::net::IpAddr; use std::net::{SocketAddr, SocketAddrV6}; @@ -191,13 +190,13 @@ struct InstanceInner { log: Logger, // Properties visible to Propolis - properties: propolis_client::api::InstanceProperties, + properties: propolis_client::types::InstanceProperties, // The ID of the Propolis server (and zone) running this instance propolis_id: Uuid, - // The IP address of the Propolis server running this instance - propolis_ip: IpAddr, + // The socket address of the Propolis server running this instance + propolis_addr: SocketAddr, // NIC-related properties vnic_allocator: VnicAllocator, @@ -214,8 +213,7 @@ struct InstanceInner { dhcp_config: DhcpCfg, // Disk related properties - // TODO: replace `propolis_client::handmade::*` with properly-modeled local types - requested_disks: Vec, + requested_disks: Vec, cloud_init_bytes: Option, // Internal State management @@ -226,7 +224,7 @@ struct InstanceInner { nexus_client: NexusClientWithResolver, // Storage resources - storage: StorageResources, + storage: StorageHandle, // Object used to collect zone bundles from this instance when terminated. zone_bundler: ZoneBundler, @@ -380,7 +378,7 @@ impl InstanceInner { /// Sends an instance state PUT request to this instance's Propolis. async fn propolis_state_put( &self, - request: propolis_client::api::InstanceStateRequested, + request: propolis_client::types::InstanceStateRequested, ) -> Result<(), Error> { let res = self .running_state @@ -410,11 +408,11 @@ impl InstanceInner { ) -> Result<(), Error> { let nics = running_zone .opte_ports() - .map(|port| propolis_client::api::NetworkInterfaceRequest { + .map(|port| propolis_client::types::NetworkInterfaceRequest { // TODO-correctness: Remove `.vnic()` call when we use the port // directly. name: port.vnic_name().to_string(), - slot: propolis_client::api::Slot(port.slot()), + slot: propolis_client::types::Slot(port.slot()), }) .collect(); @@ -424,7 +422,7 @@ impl InstanceInner { self.state.instance().migration_id.ok_or_else(|| { Error::Migration(anyhow!("Missing Migration UUID")) })?; - Some(propolis_client::api::InstanceMigrateInitiateRequest { + Some(propolis_client::types::InstanceMigrateInitiateRequest { src_addr: params.src_propolis_addr.to_string(), src_uuid: params.src_propolis_id, migration_id, @@ -433,7 +431,7 @@ impl InstanceInner { None => None, }; - let request = propolis_client::api::InstanceEnsureRequest { + let request = propolis_client::types::InstanceEnsureRequest { properties: self.properties.clone(), nics, disks: self @@ -649,7 +647,7 @@ impl Instance { let instance = InstanceInner { log: log.new(o!("instance_id" => id.to_string())), // NOTE: Mostly lies. - properties: propolis_client::api::InstanceProperties { + properties: propolis_client::types::InstanceProperties { id, name: hardware.properties.hostname.clone(), description: "Test description".to_string(), @@ -662,7 +660,7 @@ impl Instance { vcpus: hardware.properties.ncpus.0 as u8, }, propolis_id, - propolis_ip: propolis_addr.ip(), + propolis_addr, vnic_allocator, port_manager, requested_nics: hardware.nics, @@ -790,7 +788,7 @@ impl Instance { &self, state: crate::params::InstanceStateRequested, ) -> Result { - use propolis_client::api::InstanceStateRequested as PropolisRequest; + use propolis_client::types::InstanceStateRequested as PropolisRequest; let mut inner = self.inner.lock().await; let (propolis_state, next_published) = match state { InstanceStateRequested::MigrationTarget(migration_params) => { @@ -900,8 +898,9 @@ impl Instance { let mut rng = rand::rngs::StdRng::from_entropy(); let root = inner .storage - .all_u2_mountpoints(ZONE_DATASET) + .get_latest_resources() .await + .all_u2_mountpoints(ZONE_DATASET) .choose(&mut rng) .ok_or_else(|| Error::U2NotFound)? .clone(); @@ -964,9 +963,13 @@ impl Instance { .add_property( "listen_addr", "astring", - &inner.propolis_ip.to_string(), + &inner.propolis_addr.ip().to_string(), + ) + .add_property( + "listen_port", + "astring", + &inner.propolis_addr.port().to_string(), ) - .add_property("listen_port", "astring", &PROPOLIS_PORT.to_string()) .add_property("metric_addr", "astring", &metric_addr.to_string()); let profile = ProfileBuilder::new("omicron").add_service( @@ -989,13 +992,11 @@ impl Instance { .map_err(|_| Error::Timeout(fmri.to_string()))?; info!(inner.log, "Propolis SMF service is online"); - let server_addr = SocketAddr::new(inner.propolis_ip, PROPOLIS_PORT); - // We use a custom client builder here because the default progenitor // one has a timeout of 15s but we want to be able to wait indefinitely. let reqwest_client = reqwest::ClientBuilder::new().build().unwrap(); let client = Arc::new(PropolisClient::new_with_client( - &format!("http://{}", server_addr), + &format!("http://{}", &inner.propolis_addr), reqwest_client, )); @@ -1034,7 +1035,9 @@ impl Instance { // known to Propolis. let response = client .instance_state_monitor() - .body(propolis_client::api::InstanceStateMonitorRequest { gen }) + .body(propolis_client::types::InstanceStateMonitorRequest { + gen, + }) .send() .await? .into_inner(); diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index 2860f0624b..fa40a876f0 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -12,7 +12,6 @@ use crate::params::{ InstanceHardware, InstanceMigrationSourceParams, InstancePutStateResponse, InstanceStateRequested, InstanceUnregisterResponse, }; -use crate::storage_manager::StorageResources; use crate::zone_bundle::BundleError; use crate::zone_bundle::ZoneBundler; use illumos_utils::dladm::Etherstub; @@ -23,6 +22,7 @@ use omicron_common::api::external::ByteCount; use omicron_common::api::internal::nexus::InstanceRuntimeState; use omicron_common::api::internal::nexus::SledInstanceState; use omicron_common::api::internal::nexus::VmmRuntimeState; +use sled_storage::manager::StorageHandle; use slog::Logger; use std::collections::BTreeMap; use std::net::SocketAddr; @@ -43,6 +43,9 @@ pub enum Error { #[error("Failed to create reservoir: {0}")] Reservoir(#[from] vmm_reservoir::Error), + #[error("Invalid reservoir configuration: {0}")] + ReservoirConfig(String), + #[error("Cannot find data link: {0}")] Underlay(#[from] sled_hardware::underlay::Error), @@ -50,6 +53,12 @@ pub enum Error { ZoneBundle(#[from] BundleError), } +pub enum ReservoirMode { + None, + Size(u32), + Percentage(u8), +} + struct InstanceManagerInternal { log: Logger, nexus_client: NexusClientWithResolver, @@ -65,7 +74,7 @@ struct InstanceManagerInternal { vnic_allocator: VnicAllocator, port_manager: PortManager, - storage: StorageResources, + storage: StorageHandle, zone_bundler: ZoneBundler, } @@ -73,7 +82,7 @@ pub(crate) struct InstanceManagerServices { pub nexus_client: NexusClientWithResolver, pub vnic_allocator: VnicAllocator, pub port_manager: PortManager, - pub storage: StorageResources, + pub storage: StorageHandle, pub zone_bundler: ZoneBundler, } @@ -89,7 +98,7 @@ impl InstanceManager { nexus_client: NexusClientWithResolver, etherstub: Etherstub, port_manager: PortManager, - storage: StorageResources, + storage: StorageHandle, zone_bundler: ZoneBundler, ) -> Result { Ok(InstanceManager { @@ -108,44 +117,69 @@ impl InstanceManager { }) } - /// Sets the VMM reservoir size to the requested (nonzero) percentage of - /// usable physical RAM, rounded down to nearest aligned size required by - /// the control plane. + /// Sets the VMM reservoir to the requested percentage of usable physical + /// RAM or to a size in MiB. Either mode will round down to the nearest + /// aligned size required by the control plane. pub fn set_reservoir_size( &self, hardware: &sled_hardware::HardwareManager, - target_percent: u8, + mode: ReservoirMode, ) -> Result<(), Error> { - assert!( - target_percent > 0 && target_percent < 100, - "target_percent {} must be nonzero and < 100", - target_percent - ); + let hardware_physical_ram_bytes = hardware.usable_physical_ram_bytes(); + let req_bytes = match mode { + ReservoirMode::None => return Ok(()), + ReservoirMode::Size(mb) => { + let bytes = ByteCount::from_mebibytes_u32(mb).to_bytes(); + if bytes > hardware_physical_ram_bytes { + return Err(Error::ReservoirConfig(format!( + "cannot specify a reservoir of {bytes} bytes when \ + physical memory is {hardware_physical_ram_bytes} bytes", + ))); + } + bytes + } + ReservoirMode::Percentage(percent) => { + if !matches!(percent, 1..=99) { + return Err(Error::ReservoirConfig(format!( + "VMM reservoir percentage of {} must be between 0 and \ + 100", + percent + ))); + }; + (hardware_physical_ram_bytes as f64 * (percent as f64 / 100.0)) + .floor() as u64 + } + }; - let req_bytes = (hardware.usable_physical_ram_bytes() as f64 - * (target_percent as f64 / 100.0)) - .floor() as u64; let req_bytes_aligned = vmm_reservoir::align_reservoir_size(req_bytes); if req_bytes_aligned == 0 { warn!( self.inner.log, - "Requested reservoir size of {} bytes < minimum aligned size of {} bytes", - req_bytes, vmm_reservoir::RESERVOIR_SZ_ALIGN); + "Requested reservoir size of {} bytes < minimum aligned size \ + of {} bytes", + req_bytes, + vmm_reservoir::RESERVOIR_SZ_ALIGN + ); return Ok(()); } - // The max ByteCount value is i64::MAX, which is ~8 million TiB. As this - // value is a percentage of DRAM, constructing this should always work. + // The max ByteCount value is i64::MAX, which is ~8 million TiB. + // As this value is either a percentage of DRAM or a size in MiB + // represented as a u32, constructing this should always work. let reservoir_size = ByteCount::try_from(req_bytes_aligned).unwrap(); + if let ReservoirMode::Percentage(percent) = mode { + info!( + self.inner.log, + "{}% of {} physical ram = {} bytes)", + percent, + hardware_physical_ram_bytes, + req_bytes, + ); + } info!( self.inner.log, - "Setting reservoir size to {} bytes \ - ({}% of {} total = {} bytes requested)", - reservoir_size, - target_percent, - hardware.usable_physical_ram_bytes(), - req_bytes, + "Setting reservoir size to {reservoir_size} bytes" ); vmm_reservoir::ReservoirControl::set(reservoir_size)?; diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs index db89b17b5a..924fd4bd92 100644 --- a/sled-agent/src/lib.rs +++ b/sled-agent/src/lib.rs @@ -20,9 +20,12 @@ pub mod common; mod backing_fs; pub mod bootstrap; pub mod config; +pub(crate) mod dump_setup; +pub(crate) mod hardware_monitor; mod http_entrypoints; mod instance; mod instance_manager; +mod long_running_tasks; mod metrics; mod nexus; pub mod params; @@ -32,8 +35,7 @@ pub mod server; mod services; mod sled_agent; mod smf_helper; -pub(crate) mod storage; -mod storage_manager; +mod storage_monitor; mod swap_device; mod updates; mod zone_bundle; diff --git a/sled-agent/src/long_running_tasks.rs b/sled-agent/src/long_running_tasks.rs new file mode 100644 index 0000000000..f4a665c098 --- /dev/null +++ b/sled-agent/src/long_running_tasks.rs @@ -0,0 +1,241 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! This module is responsible for spawning, starting, and managing long running +//! tasks and task driven subsystems. These tasks run for the remainder of the +//! sled-agent process from the moment they begin. Primarily they include the +//! "managers", like `StorageManager`, `InstanceManager`, etc..., and are used +//! by both the bootstrap agent and the sled-agent. +//! +//! We don't bother keeping track of the spawned tasks handles because we know +//! these tasks are supposed to run forever, and they can shutdown if their +//! handles are dropped. + +use crate::bootstrap::bootstore_setup::{ + new_bootstore_config, poll_ddmd_for_bootstore_peer_update, +}; +use crate::bootstrap::secret_retriever::LrtqOrHardcodedSecretRetriever; +use crate::config::Config; +use crate::hardware_monitor::HardwareMonitor; +use crate::services::ServiceManager; +use crate::sled_agent::SledAgent; +use crate::storage_monitor::{StorageMonitor, UnderlayAccess}; +use crate::zone_bundle::{CleanupContext, ZoneBundler}; +use bootstore::schemes::v0 as bootstore; +use key_manager::{KeyManager, StorageKeyRequester}; +use sled_hardware::{HardwareManager, SledMode}; +use sled_storage::disk::SyntheticDisk; +use sled_storage::manager::{StorageHandle, StorageManager}; +use slog::{info, Logger}; +use std::net::Ipv6Addr; +use tokio::sync::oneshot; + +/// A mechanism for interacting with all long running tasks that can be shared +/// between the bootstrap-agent and sled-agent code. +#[derive(Clone)] +pub struct LongRunningTaskHandles { + /// A mechanism for retrieving storage keys. This interacts with the + /// [`KeyManager`] task. In the future, there may be other handles for + /// retrieving different types of keys. Separating the handles limits the + /// access for a given key type to the code that holds the handle. + pub storage_key_requester: StorageKeyRequester, + + /// A mechanism for talking to the [`StorageManager`] which is responsible + /// for establishing zpools on disks and managing their datasets. + pub storage_manager: StorageHandle, + + /// A mechanism for interacting with the hardware device tree + pub hardware_manager: HardwareManager, + + // A handle for interacting with the bootstore + pub bootstore: bootstore::NodeHandle, + + // A reference to the object used to manage zone bundles + pub zone_bundler: ZoneBundler, +} + +/// Spawn all long running tasks +pub async fn spawn_all_longrunning_tasks( + log: &Logger, + sled_mode: SledMode, + global_zone_bootstrap_ip: Ipv6Addr, + config: &Config, +) -> ( + LongRunningTaskHandles, + oneshot::Sender, + oneshot::Sender, + oneshot::Sender, +) { + let storage_key_requester = spawn_key_manager(log); + let mut storage_manager = + spawn_storage_manager(log, storage_key_requester.clone()); + + let underlay_available_tx = + spawn_storage_monitor(log, storage_manager.clone()); + + let hardware_manager = spawn_hardware_manager(log, sled_mode).await; + + // Start monitoring for hardware changes + let (sled_agent_started_tx, service_manager_ready_tx) = + spawn_hardware_monitor(log, &hardware_manager, &storage_manager); + + // Add some synthetic disks if necessary. + upsert_synthetic_zpools_if_needed(&log, &storage_manager, &config).await; + + // Wait for the boot disk so that we can work with any ledgers, + // such as those needed by the bootstore and sled-agent + info!(log, "Waiting for boot disk"); + let (disk_id, _) = storage_manager.wait_for_boot_disk().await; + info!(log, "Found boot disk {:?}", disk_id); + + let bootstore = spawn_bootstore_tasks( + log, + &mut storage_manager, + &hardware_manager, + global_zone_bootstrap_ip, + ) + .await; + + let zone_bundler = spawn_zone_bundler_tasks(log, &mut storage_manager); + + ( + LongRunningTaskHandles { + storage_key_requester, + storage_manager, + hardware_manager, + bootstore, + zone_bundler, + }, + sled_agent_started_tx, + service_manager_ready_tx, + underlay_available_tx, + ) +} + +fn spawn_key_manager(log: &Logger) -> StorageKeyRequester { + info!(log, "Starting KeyManager"); + let secret_retriever = LrtqOrHardcodedSecretRetriever::new(); + let (mut key_manager, storage_key_requester) = + KeyManager::new(log, secret_retriever); + tokio::spawn(async move { key_manager.run().await }); + storage_key_requester +} + +fn spawn_storage_manager( + log: &Logger, + key_requester: StorageKeyRequester, +) -> StorageHandle { + info!(log, "Starting StorageManager"); + let (manager, handle) = StorageManager::new(log, key_requester); + tokio::spawn(async move { + manager.run().await; + }); + handle +} + +fn spawn_storage_monitor( + log: &Logger, + storage_handle: StorageHandle, +) -> oneshot::Sender { + info!(log, "Starting StorageMonitor"); + let (storage_monitor, underlay_available_tx) = + StorageMonitor::new(log, storage_handle); + tokio::spawn(async move { + storage_monitor.run().await; + }); + underlay_available_tx +} + +async fn spawn_hardware_manager( + log: &Logger, + sled_mode: SledMode, +) -> HardwareManager { + // The `HardwareManager` does not use the the "task/handle" pattern + // and spawns its worker task inside `HardwareManager::new`. Instead of returning + // a handle to send messages to that task, the "Inner/Mutex" pattern is used + // which shares data between the task, the manager itself, and the users of the manager + // since the manager can be freely cloned and passed around. + // + // There are pros and cons to both methods, but the reason to mention it here is that + // the handle in this case is the `HardwareManager` itself. + info!(log, "Starting HardwareManager"; "sled_mode" => ?sled_mode); + let log = log.clone(); + tokio::task::spawn_blocking(move || { + HardwareManager::new(&log, sled_mode).unwrap() + }) + .await + .unwrap() +} + +fn spawn_hardware_monitor( + log: &Logger, + hardware_manager: &HardwareManager, + storage_handle: &StorageHandle, +) -> (oneshot::Sender, oneshot::Sender) { + info!(log, "Starting HardwareMonitor"); + let (mut monitor, sled_agent_started_tx, service_manager_ready_tx) = + HardwareMonitor::new(log, hardware_manager, storage_handle); + tokio::spawn(async move { + monitor.run().await; + }); + (sled_agent_started_tx, service_manager_ready_tx) +} + +async fn spawn_bootstore_tasks( + log: &Logger, + storage_handle: &mut StorageHandle, + hardware_manager: &HardwareManager, + global_zone_bootstrap_ip: Ipv6Addr, +) -> bootstore::NodeHandle { + let storage_resources = storage_handle.get_latest_resources().await; + let config = new_bootstore_config( + &storage_resources, + hardware_manager.baseboard(), + global_zone_bootstrap_ip, + ) + .unwrap(); + + // Create and spawn the bootstore + info!(log, "Starting Bootstore"); + let (mut node, node_handle) = bootstore::Node::new(config, log).await; + tokio::spawn(async move { node.run().await }); + + // Spawn a task for polling DDMD and updating bootstore with peer addresses + info!(log, "Starting Bootstore DDMD poller"); + let log = log.new(o!("component" => "bootstore_ddmd_poller")); + let node_handle2 = node_handle.clone(); + tokio::spawn(async move { + poll_ddmd_for_bootstore_peer_update(log, node_handle2).await + }); + + node_handle +} + +// `ZoneBundler::new` spawns a periodic cleanup task that runs indefinitely +fn spawn_zone_bundler_tasks( + log: &Logger, + storage_handle: &mut StorageHandle, +) -> ZoneBundler { + info!(log, "Starting ZoneBundler related tasks"); + let log = log.new(o!("component" => "ZoneBundler")); + ZoneBundler::new(log, storage_handle.clone(), CleanupContext::default()) +} + +async fn upsert_synthetic_zpools_if_needed( + log: &Logger, + storage_manager: &StorageHandle, + config: &Config, +) { + if let Some(pools) = &config.zpools { + for pool in pools { + info!( + log, + "Upserting synthetic zpool to Storage Manager: {}", + pool.to_string() + ); + let disk = SyntheticDisk::new(pool.clone()).into(); + storage_manager.upsert_disk(disk).await; + } + } +} diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index 2af6fa0023..cc715f4010 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -154,3 +154,51 @@ fn d2n_record( } } } + +// Although it is a bit awkward to define these conversions here, it frees us +// from depending on sled_storage/sled_hardware in the nexus_client crate. + +pub(crate) trait ConvertInto: Sized { + fn convert(self) -> T; +} + +impl ConvertInto + for sled_hardware::DiskVariant +{ + fn convert(self) -> nexus_client::types::PhysicalDiskKind { + use nexus_client::types::PhysicalDiskKind; + + match self { + sled_hardware::DiskVariant::U2 => PhysicalDiskKind::U2, + sled_hardware::DiskVariant::M2 => PhysicalDiskKind::M2, + } + } +} + +impl ConvertInto for sled_hardware::Baseboard { + fn convert(self) -> nexus_client::types::Baseboard { + nexus_client::types::Baseboard { + serial_number: self.identifier().to_string(), + part_number: self.model().to_string(), + revision: self.revision(), + } + } +} + +impl ConvertInto + for sled_storage::dataset::DatasetKind +{ + fn convert(self) -> nexus_client::types::DatasetKind { + use nexus_client::types::DatasetKind; + use sled_storage::dataset::DatasetKind::*; + + match self { + CockroachDb => DatasetKind::Cockroach, + Crucible => DatasetKind::Crucible, + Clickhouse => DatasetKind::Clickhouse, + ClickhouseKeeper => DatasetKind::ClickhouseKeeper, + ExternalDns => DatasetKind::ExternalDns, + InternalDns => DatasetKind::InternalDns, + } + } +} diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index cd84c9acd4..b22bd84975 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -20,6 +20,7 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use sled_hardware::Baseboard; pub use sled_hardware::DendriteAsic; +use sled_storage::dataset::DatasetName; use std::fmt::{Debug, Display, Formatter, Result as FormatResult}; use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; use std::time::Duration; @@ -70,8 +71,8 @@ pub struct InstanceHardware { pub external_ips: Vec, pub firewall_rules: Vec, pub dhcp_config: DhcpConfig, - // TODO: replace `propolis_client::handmade::*` with locally-modeled request type - pub disks: Vec, + // TODO: replace `propolis_client::*` with locally-modeled request type + pub disks: Vec, pub cloud_init_bytes: Option, } @@ -228,64 +229,6 @@ pub struct Zpool { pub disk_type: DiskType, } -/// The type of a dataset, and an auxiliary information necessary -/// to successfully launch a zone managing the associated data. -#[derive( - Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, -)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum DatasetKind { - CockroachDb, - Crucible, - Clickhouse, - ClickhouseKeeper, - ExternalDns, - InternalDns, -} - -impl From for sled_agent_client::types::DatasetKind { - fn from(k: DatasetKind) -> Self { - use DatasetKind::*; - match k { - CockroachDb => Self::CockroachDb, - Crucible => Self::Crucible, - Clickhouse => Self::Clickhouse, - ClickhouseKeeper => Self::ClickhouseKeeper, - ExternalDns => Self::ExternalDns, - InternalDns => Self::InternalDns, - } - } -} - -impl From for nexus_client::types::DatasetKind { - fn from(k: DatasetKind) -> Self { - use DatasetKind::*; - match k { - CockroachDb => Self::Cockroach, - Crucible => Self::Crucible, - Clickhouse => Self::Clickhouse, - ClickhouseKeeper => Self::ClickhouseKeeper, - ExternalDns => Self::ExternalDns, - InternalDns => Self::InternalDns, - } - } -} - -impl std::fmt::Display for DatasetKind { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - use DatasetKind::*; - let s = match self { - Crucible => "crucible", - CockroachDb { .. } => "cockroachdb", - Clickhouse => "clickhouse", - ClickhouseKeeper => "clickhouse_keeper", - ExternalDns { .. } => "external_dns", - InternalDns { .. } => "internal_dns", - }; - write!(f, "{}", s) - } -} - /// Describes service-specific parameters. #[derive( Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, @@ -594,7 +537,7 @@ impl std::fmt::Display for ZoneType { )] pub struct DatasetRequest { pub id: Uuid, - pub name: crate::storage::dataset::DatasetName, + pub name: DatasetName, pub service_address: SocketAddrV6, } diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index 8cd815e7fb..980f5b6ebd 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -6,12 +6,10 @@ use crate::bootstrap::params::StartSledAgentRequest; use crate::params::{ - DatasetKind, DatasetRequest, ServiceType, ServiceZoneRequest, - ServiceZoneService, ZoneType, + DatasetRequest, ServiceType, ServiceZoneRequest, ServiceZoneService, + ZoneType, }; use crate::rack_setup::config::SetupServiceConfig as Config; -use crate::storage::dataset::DatasetName; -use crate::storage_manager::StorageResources; use camino::Utf8PathBuf; use dns_service_client::types::DnsConfigParams; use illumos_utils::zpool::ZpoolName; @@ -36,6 +34,8 @@ use serde::{Deserialize, Serialize}; use sled_agent_client::{ types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError, }; +use sled_storage::dataset::{DatasetKind, DatasetName, CONFIG_DATASET}; +use sled_storage::manager::StorageHandle; use slog::Logger; use std::collections::{BTreeSet, HashMap, HashSet}; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV6}; @@ -125,11 +125,12 @@ const RSS_SERVICE_PLAN_FILENAME: &str = "rss-service-plan.json"; impl Plan { pub async fn load( log: &Logger, - storage: &StorageResources, + storage_manager: &StorageHandle, ) -> Result, PlanError> { - let paths: Vec = storage - .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET) + let paths: Vec = storage_manager + .get_latest_resources() .await + .all_m2_mountpoints(CONFIG_DATASET) .into_iter() .map(|p| p.join(RSS_SERVICE_PLAN_FILENAME)) .collect(); @@ -237,7 +238,7 @@ impl Plan { pub async fn create( log: &Logger, config: &Config, - storage: &StorageResources, + storage_manager: &StorageHandle, sleds: &HashMap, ) -> Result { let mut dns_builder = internal_dns::DnsConfigBuilder::new(); @@ -737,9 +738,10 @@ impl Plan { let plan = Self { services, dns_config }; // Once we've constructed a plan, write it down to durable storage. - let paths: Vec = storage - .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET) + let paths: Vec = storage_manager + .get_latest_resources() .await + .all_m2_mountpoints(CONFIG_DATASET) .into_iter() .map(|p| p.join(RSS_SERVICE_PLAN_FILENAME)) .collect(); diff --git a/sled-agent/src/rack_setup/plan/sled.rs b/sled-agent/src/rack_setup/plan/sled.rs index 163b24cd45..07f33893fc 100644 --- a/sled-agent/src/rack_setup/plan/sled.rs +++ b/sled-agent/src/rack_setup/plan/sled.rs @@ -9,11 +9,12 @@ use crate::bootstrap::{ config::BOOTSTRAP_AGENT_RACK_INIT_PORT, params::StartSledAgentRequest, }; use crate::rack_setup::config::SetupServiceConfig as Config; -use crate::storage_manager::StorageResources; use camino::Utf8PathBuf; use omicron_common::ledger::{self, Ledger, Ledgerable}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use sled_storage::dataset::CONFIG_DATASET; +use sled_storage::manager::StorageHandle; use slog::Logger; use std::collections::{HashMap, HashSet}; use std::net::{Ipv6Addr, SocketAddrV6}; @@ -55,11 +56,12 @@ pub struct Plan { impl Plan { pub async fn load( log: &Logger, - storage: &StorageResources, + storage: &StorageHandle, ) -> Result, PlanError> { let paths: Vec = storage - .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET) + .get_latest_resources() .await + .all_m2_mountpoints(CONFIG_DATASET) .into_iter() .map(|p| p.join(RSS_SLED_PLAN_FILENAME)) .collect(); @@ -78,7 +80,7 @@ impl Plan { pub async fn create( log: &Logger, config: &Config, - storage: &StorageResources, + storage_manager: &StorageHandle, bootstrap_addrs: HashSet, use_trust_quorum: bool, ) -> Result { @@ -123,9 +125,10 @@ impl Plan { let plan = Self { rack_id, sleds, config: config.clone() }; // Once we've constructed a plan, write it down to durable storage. - let paths: Vec = storage - .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET) + let paths: Vec = storage_manager + .get_latest_resources() .await + .all_m2_mountpoints(CONFIG_DATASET) .into_iter() .map(|p| p.join(RSS_SLED_PLAN_FILENAME)) .collect(); diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 5657c7e69a..7dcbfa7045 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -63,7 +63,7 @@ use crate::bootstrap::early_networking::{ use crate::bootstrap::params::BootstrapAddressDiscovery; use crate::bootstrap::params::StartSledAgentRequest; use crate::bootstrap::rss_handle::BootstrapAgentHandle; -use crate::nexus::d2n_params; +use crate::nexus::{d2n_params, ConvertInto}; use crate::params::{ AutonomousServiceOnlyError, ServiceType, ServiceZoneRequest, ServiceZoneService, TimeSync, ZoneType, @@ -74,7 +74,6 @@ use crate::rack_setup::plan::service::{ use crate::rack_setup::plan::sled::{ Plan as SledPlan, PlanError as SledPlanError, }; -use crate::storage_manager::StorageResources; use bootstore::schemes::v0 as bootstore; use camino::Utf8PathBuf; use ddm_admin_client::{Client as DdmAdminClient, DdmError}; @@ -94,6 +93,8 @@ use sled_agent_client::{ types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError, }; use sled_hardware::underlay::BootstrapInterface; +use sled_storage::dataset::CONFIG_DATASET; +use sled_storage::manager::StorageHandle; use slog::Logger; use std::collections::BTreeSet; use std::collections::{HashMap, HashSet}; @@ -187,7 +188,7 @@ impl RackSetupService { pub(crate) fn new( log: Logger, config: Config, - storage_resources: StorageResources, + storage_manager: StorageHandle, local_bootstrap_agent: BootstrapAgentHandle, bootstore: bootstore::NodeHandle, ) -> Self { @@ -196,7 +197,7 @@ impl RackSetupService { if let Err(e) = svc .run( &config, - &storage_resources, + &storage_manager, local_bootstrap_agent, bootstore, ) @@ -563,7 +564,7 @@ impl ServiceInner { dataset_id: dataset.id, request: NexusTypes::DatasetPutRequest { address: dataset.service_address.to_string(), - kind: dataset.name.dataset().clone().into(), + kind: dataset.name.dataset().clone().convert(), }, }) } @@ -612,6 +613,11 @@ impl ServiceInner { addr: b.addr, asn: b.asn, port: b.port.clone(), + hold_time: b.hold_time, + connect_retry: b.connect_retry, + delay_open: b.delay_open, + idle_hold_time: b.idle_hold_time, + keepalive: b.keepalive, }) .collect(), }) @@ -768,7 +774,7 @@ impl ServiceInner { async fn run( &self, config: &Config, - storage_resources: &StorageResources, + storage_manager: &StorageHandle, local_bootstrap_agent: BootstrapAgentHandle, bootstore: bootstore::NodeHandle, ) -> Result<(), SetupServiceError> { @@ -779,9 +785,10 @@ impl ServiceInner { config.az_subnet(), )?; - let marker_paths: Vec = storage_resources - .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET) + let marker_paths: Vec = storage_manager + .get_latest_resources() .await + .all_m2_mountpoints(CONFIG_DATASET) .into_iter() .map(|p| p.join(RSS_COMPLETED_FILENAME)) .collect(); @@ -802,7 +809,7 @@ impl ServiceInner { "RSS configuration looks like it has already been applied", ); - let sled_plan = SledPlan::load(&self.log, storage_resources) + let sled_plan = SledPlan::load(&self.log, storage_manager) .await? .expect("Sled plan should exist if completed marker exists"); if &sled_plan.config != config { @@ -810,7 +817,7 @@ impl ServiceInner { "Configuration changed".to_string(), )); } - let service_plan = ServicePlan::load(&self.log, storage_resources) + let service_plan = ServicePlan::load(&self.log, storage_manager) .await? .expect("Service plan should exist if completed marker exists"); @@ -844,7 +851,7 @@ impl ServiceInner { BootstrapAddressDiscovery::OnlyThese { addrs } => addrs.clone(), }; let maybe_sled_plan = - SledPlan::load(&self.log, storage_resources).await?; + SledPlan::load(&self.log, storage_manager).await?; if let Some(plan) = &maybe_sled_plan { let stored_peers: HashSet = plan.sleds.keys().map(|a| *a.ip()).collect(); @@ -876,7 +883,7 @@ impl ServiceInner { SledPlan::create( &self.log, config, - &storage_resources, + &storage_manager, bootstrap_addrs, config.trust_quorum_peers.is_some(), ) @@ -931,14 +938,14 @@ impl ServiceInner { }) .collect(); let service_plan = if let Some(plan) = - ServicePlan::load(&self.log, storage_resources).await? + ServicePlan::load(&self.log, storage_manager).await? { plan } else { ServicePlan::create( &self.log, &config, - &storage_resources, + &storage_manager, &plan.sleds, ) .await? diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index 156547627c..903c8dabaa 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -8,14 +8,15 @@ use super::config::Config; use super::http_entrypoints::api as http_api; use super::sled_agent::SledAgent; use crate::bootstrap::params::StartSledAgentRequest; +use crate::long_running_tasks::LongRunningTaskHandles; use crate::nexus::NexusClientWithResolver; use crate::services::ServiceManager; -use crate::storage_manager::StorageManager; -use bootstore::schemes::v0 as bootstore; +use crate::storage_monitor::UnderlayAccess; use internal_dns::resolver::Resolver; use slog::Logger; use std::net::SocketAddr; use std::sync::Arc; +use tokio::sync::oneshot; use uuid::Uuid; /// Packages up a [`SledAgent`], running the sled agent API under a Dropshot @@ -39,9 +40,9 @@ impl Server { config: &Config, log: Logger, request: StartSledAgentRequest, + long_running_tasks_handles: LongRunningTaskHandles, services: ServiceManager, - storage: StorageManager, - bootstore: bootstore::NodeHandle, + underlay_available_tx: oneshot::Sender, ) -> Result { info!(log, "setting up sled agent server"); @@ -63,8 +64,8 @@ impl Server { nexus_client, request, services, - storage, - bootstore, + long_running_tasks_handles, + underlay_available_tx, ) .await .map_err(|e| e.to_string())?; diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index d1d8dbfff0..b87c91768b 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -5,7 +5,7 @@ //! Sled-local service management. //! //! For controlling zone-based storage services, refer to -//! [crate::storage_manager::StorageManager]. +//! [sled_storage::manager::StorageManager]. //! //! For controlling virtual machine instances, refer to //! [crate::instance_manager::InstanceManager]. @@ -38,7 +38,6 @@ use crate::params::{ use crate::profile::*; use crate::smf_helper::Service; use crate::smf_helper::SmfHelper; -use crate::storage_manager::StorageResources; use crate::zone_bundle::BundleError; use crate::zone_bundle::ZoneBundler; use anyhow::anyhow; @@ -91,12 +90,13 @@ use omicron_common::nexus_config::{ use once_cell::sync::OnceCell; use rand::prelude::SliceRandom; use rand::SeedableRng; -use sled_hardware::disk::ZONE_DATASET; use sled_hardware::is_gimlet; use sled_hardware::underlay; use sled_hardware::underlay::BOOTSTRAP_PREFIX; use sled_hardware::Baseboard; use sled_hardware::SledMode; +use sled_storage::dataset::{CONFIG_DATASET, INSTALL_DATASET, ZONE_DATASET}; +use sled_storage::manager::StorageHandle; use slog::Logger; use std::collections::BTreeMap; use std::collections::HashSet; @@ -373,7 +373,7 @@ pub struct ServiceManagerInner { advertised_prefixes: Mutex>>, sled_info: OnceCell, switch_zone_bootstrap_address: Ipv6Addr, - storage: StorageResources, + storage: StorageHandle, zone_bundler: ZoneBundler, ledger_directory_override: OnceCell, image_directory_override: OnceCell, @@ -418,10 +418,11 @@ impl ServiceManager { skip_timesync: Option, sidecar_revision: SidecarRevision, switch_zone_maghemite_links: Vec, - storage: StorageResources, + storage: StorageHandle, zone_bundler: ZoneBundler, ) -> Self { let log = log.new(o!("component" => "ServiceManager")); + info!(log, "Creating ServiceManager"); Self { inner: Arc::new(ServiceManagerInner { log: log.clone(), @@ -476,10 +477,9 @@ impl ServiceManager { if let Some(dir) = self.inner.ledger_directory_override.get() { return vec![dir.join(SERVICES_LEDGER_FILENAME)]; } - self.inner - .storage - .all_m2_mountpoints(sled_hardware::disk::CONFIG_DATASET) - .await + let resources = self.inner.storage.get_latest_resources().await; + resources + .all_m2_mountpoints(CONFIG_DATASET) .into_iter() .map(|p| p.join(SERVICES_LEDGER_FILENAME)) .collect() @@ -1096,11 +1096,11 @@ impl ServiceManager { // If the boot disk exists, look for the image in the "install" dataset // there too. - if let Some((_, boot_zpool)) = self.inner.storage.boot_disk().await { - zone_image_paths.push( - boot_zpool - .dataset_mountpoint(sled_hardware::disk::INSTALL_DATASET), - ); + if let Some((_, boot_zpool)) = + self.inner.storage.get_latest_resources().await.boot_disk() + { + zone_image_paths + .push(boot_zpool.dataset_mountpoint(INSTALL_DATASET)); } let installed_zone = InstalledZone::install( @@ -2252,8 +2252,12 @@ impl ServiceManager { // Create zones that should be running let mut zone_requests = AllZoneRequests::default(); - let all_u2_roots = - self.inner.storage.all_u2_mountpoints(ZONE_DATASET).await; + let all_u2_roots = self + .inner + .storage + .get_latest_resources() + .await + .all_u2_mountpoints(ZONE_DATASET); for zone in zones_to_be_added { // Check if we think the zone should already be running let name = zone.zone_name(); @@ -2979,8 +2983,12 @@ impl ServiceManager { let root = if request.zone_type == ZoneType::Switch { Utf8PathBuf::from(ZONE_ZFS_RAMDISK_DATASET_MOUNTPOINT) } else { - let all_u2_roots = - self.inner.storage.all_u2_mountpoints(ZONE_DATASET).await; + let all_u2_roots = self + .inner + .storage + .get_latest_resources() + .await + .all_u2_mountpoints(ZONE_DATASET); let mut rng = rand::rngs::StdRng::from_entropy(); all_u2_roots .choose(&mut rng) @@ -3038,7 +3046,7 @@ impl ServiceManager { mod test { use super::*; use crate::params::{ServiceZoneService, ZoneType}; - use async_trait::async_trait; + use illumos_utils::zpool::ZpoolName; use illumos_utils::{ dladm::{ Etherstub, MockDladm, BOOTSTRAP_ETHERSTUB_NAME, @@ -3047,10 +3055,10 @@ mod test { svc, zone::MockZones, }; - use key_manager::{ - SecretRetriever, SecretRetrieverError, SecretState, VersionedIkm, - }; use omicron_common::address::OXIMETER_PORT; + use sled_storage::disk::{RawDisk, SyntheticDisk}; + + use sled_storage::manager::{FakeStorageManager, StorageHandle}; use std::net::{Ipv6Addr, SocketAddrV6}; use std::os::unix::process::ExitStatusExt; use uuid::Uuid; @@ -3078,6 +3086,7 @@ mod test { // Returns the expectations for a new service to be created. fn expect_new_service() -> Vec> { + illumos_utils::USE_MOCKS.store(true, Ordering::SeqCst); // Create a VNIC let create_vnic_ctx = MockDladm::create_vnic_context(); create_vnic_ctx.expect().return_once( @@ -3120,8 +3129,7 @@ mod test { let wait_ctx = svc::wait_for_service_context(); wait_ctx.expect().return_once(|_, _, _| Ok(())); - // Import the manifest, enable the service - let execute_ctx = illumos_utils::execute_context(); + let execute_ctx = illumos_utils::execute_helper_context(); execute_ctx.expect().times(..).returning(|_| { Ok(std::process::Output { status: std::process::ExitStatus::from_raw(0), @@ -3243,29 +3251,24 @@ mod test { } } - pub struct TestSecretRetriever {} + async fn setup_storage() -> StorageHandle { + let (manager, handle) = FakeStorageManager::new(); - #[async_trait] - impl SecretRetriever for TestSecretRetriever { - async fn get_latest( - &self, - ) -> Result { - let epoch = 0; - let salt = [0u8; 32]; - let secret = [0x1d; 32]; + // Spawn the storage manager as done by sled-agent + tokio::spawn(async move { + manager.run().await; + }); - Ok(VersionedIkm::new(epoch, salt, &secret)) - } + let internal_zpool_name = ZpoolName::new_internal(Uuid::new_v4()); + let internal_disk: RawDisk = + SyntheticDisk::new(internal_zpool_name).into(); + handle.upsert_disk(internal_disk).await; + let external_zpool_name = ZpoolName::new_external(Uuid::new_v4()); + let external_disk: RawDisk = + SyntheticDisk::new(external_zpool_name).into(); + handle.upsert_disk(external_disk).await; - async fn get( - &self, - epoch: u64, - ) -> Result { - if epoch != 0 { - return Err(SecretRetrieverError::NoSuchEpoch(epoch)); - } - Ok(SecretState::Current(self.get_latest().await?)) - } + handle } #[tokio::test] @@ -3276,10 +3279,10 @@ mod test { let log = logctx.log.clone(); let test_config = TestConfig::new().await; - let resources = StorageResources::new_for_test(); + let storage_handle = setup_storage().await; let zone_bundler = ZoneBundler::new( log.clone(), - resources.clone(), + storage_handle.clone(), Default::default(), ); let mgr = ServiceManager::new( @@ -3290,7 +3293,7 @@ mod test { Some(true), SidecarRevision::Physical("rev-test".to_string()), vec![], - resources, + storage_handle, zone_bundler, ); test_config.override_paths(&mgr); @@ -3324,10 +3327,10 @@ mod test { let log = logctx.log.clone(); let test_config = TestConfig::new().await; - let resources = StorageResources::new_for_test(); + let storage_handle = setup_storage().await; let zone_bundler = ZoneBundler::new( log.clone(), - resources.clone(), + storage_handle.clone(), Default::default(), ); let mgr = ServiceManager::new( @@ -3338,7 +3341,7 @@ mod test { Some(true), SidecarRevision::Physical("rev-test".to_string()), vec![], - resources, + storage_handle, zone_bundler, ); test_config.override_paths(&mgr); @@ -3377,10 +3380,10 @@ mod test { // First, spin up a ServiceManager, create a new service, and tear it // down. - let resources = StorageResources::new_for_test(); + let storage_handle = setup_storage().await; let zone_bundler = ZoneBundler::new( log.clone(), - resources.clone(), + storage_handle.clone(), Default::default(), ); let mgr = ServiceManager::new( @@ -3391,7 +3394,7 @@ mod test { Some(true), SidecarRevision::Physical("rev-test".to_string()), vec![], - resources.clone(), + storage_handle.clone(), zone_bundler.clone(), ); test_config.override_paths(&mgr); @@ -3424,7 +3427,7 @@ mod test { Some(true), SidecarRevision::Physical("rev-test".to_string()), vec![], - resources.clone(), + storage_handle.clone(), zone_bundler.clone(), ); test_config.override_paths(&mgr); @@ -3460,10 +3463,10 @@ mod test { // First, spin up a ServiceManager, create a new service, and tear it // down. - let resources = StorageResources::new_for_test(); + let storage_handle = setup_storage().await; let zone_bundler = ZoneBundler::new( log.clone(), - resources.clone(), + storage_handle.clone(), Default::default(), ); let mgr = ServiceManager::new( @@ -3474,7 +3477,7 @@ mod test { Some(true), SidecarRevision::Physical("rev-test".to_string()), vec![], - resources.clone(), + storage_handle.clone(), zone_bundler.clone(), ); test_config.override_paths(&mgr); @@ -3512,7 +3515,7 @@ mod test { Some(true), SidecarRevision::Physical("rev-test".to_string()), vec![], - resources.clone(), + storage_handle, zone_bundler.clone(), ); test_config.override_paths(&mgr); diff --git a/sled-agent/src/sim/disk.rs b/sled-agent/src/sim/disk.rs index 2d2c18be25..0f08289b74 100644 --- a/sled-agent/src/sim/disk.rs +++ b/sled-agent/src/sim/disk.rs @@ -19,7 +19,7 @@ use omicron_common::api::internal::nexus::DiskRuntimeState; use omicron_common::api::internal::nexus::ProducerEndpoint; use oximeter_producer::LogConfig; use oximeter_producer::Server as ProducerServer; -use propolis_client::api::DiskAttachmentState as PropolisDiskState; +use propolis_client::types::DiskAttachmentState as PropolisDiskState; use std::net::{Ipv6Addr, SocketAddr}; use std::sync::Arc; use std::time::Duration; diff --git a/sled-agent/src/sim/http_entrypoints_pantry.rs b/sled-agent/src/sim/http_entrypoints_pantry.rs index 46686a74e2..64b26a83a4 100644 --- a/sled-agent/src/sim/http_entrypoints_pantry.rs +++ b/sled-agent/src/sim/http_entrypoints_pantry.rs @@ -4,11 +4,11 @@ //! HTTP entrypoint functions for simulating the crucible pantry API. -use crucible_client_types::VolumeConstructionRequest; use dropshot::{ endpoint, ApiDescription, HttpError, HttpResponseDeleted, HttpResponseOk, HttpResponseUpdatedNoContent, Path as TypedPath, RequestContext, TypedBody, }; +use propolis_client::types::VolumeConstructionRequest; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::sync::Arc; diff --git a/sled-agent/src/sim/instance.rs b/sled-agent/src/sim/instance.rs index 397a1980a5..15ff83c969 100644 --- a/sled-agent/src/sim/instance.rs +++ b/sled-agent/src/sim/instance.rs @@ -19,9 +19,10 @@ use omicron_common::api::external::ResourceType; use omicron_common::api::internal::nexus::{ InstanceRuntimeState, SledInstanceState, }; -use propolis_client::api::InstanceMigrateStatusResponse as PropolisMigrateStatus; -use propolis_client::api::InstanceState as PropolisInstanceState; -use propolis_client::api::InstanceStateMonitorResponse; +use propolis_client::types::{ + InstanceMigrateStatusResponse as PropolisMigrateStatus, + InstanceState as PropolisInstanceState, InstanceStateMonitorResponse, +}; use std::collections::VecDeque; use std::sync::Arc; use std::sync::Mutex; @@ -131,11 +132,11 @@ impl SimInstanceInner { }); self.queue_migration_status(PropolisMigrateStatus { migration_id, - state: propolis_client::api::MigrationState::Sync, + state: propolis_client::types::MigrationState::Sync, }); self.queue_migration_status(PropolisMigrateStatus { migration_id, - state: propolis_client::api::MigrationState::Finish, + state: propolis_client::types::MigrationState::Finish, }); self.queue_propolis_state(PropolisInstanceState::Running); } diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index e4dac2f4b9..c06ae96f2e 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -35,15 +35,16 @@ use uuid::Uuid; use std::collections::HashMap; use std::str::FromStr; -use crucible_client_types::VolumeConstructionRequest; use dropshot::HttpServer; use illumos_utils::opte::params::{ DeleteVirtualNetworkInterfaceHost, SetVirtualNetworkInterfaceHost, }; use nexus_client::types::PhysicalDiskKind; use omicron_common::address::PROPOLIS_PORT; -use propolis_client::Client as PropolisClient; -use propolis_server::mock_server::Context as PropolisContext; +use propolis_client::{ + types::VolumeConstructionRequest, Client as PropolisClient, +}; +use propolis_mock_server::Context as PropolisContext; /// Simulates management of the control plane on a sled /// @@ -70,13 +71,14 @@ pub struct SledAgent { } fn extract_targets_from_volume_construction_request( - vec: &mut Vec, vcr: &VolumeConstructionRequest, -) { +) -> Result, std::net::AddrParseError> { // A snapshot is simply a flush with an extra parameter, and flushes are // only sent to sub volumes, not the read only parent. Flushes are only // processed by regions, so extract each region that would be affected by a // flush. + + let mut res = vec![]; match vcr { VolumeConstructionRequest::Volume { id: _, @@ -85,9 +87,9 @@ fn extract_targets_from_volume_construction_request( read_only_parent: _, } => { for sub_volume in sub_volumes.iter() { - extract_targets_from_volume_construction_request( - vec, sub_volume, - ); + res.extend(extract_targets_from_volume_construction_request( + sub_volume, + )?); } } @@ -103,7 +105,7 @@ fn extract_targets_from_volume_construction_request( gen: _, } => { for target in &opts.target { - vec.push(*target); + res.push(SocketAddr::from_str(target)?); } } @@ -111,6 +113,7 @@ fn extract_targets_from_volume_construction_request( // noop } } + Ok(res) } impl SledAgent { @@ -171,23 +174,19 @@ impl SledAgent { volume_construction_request: &VolumeConstructionRequest, ) -> Result<(), Error> { let disk_id = match volume_construction_request { - VolumeConstructionRequest::Volume { - id, - block_size: _, - sub_volumes: _, - read_only_parent: _, - } => id, + VolumeConstructionRequest::Volume { id, .. } => id, _ => { panic!("root of volume construction request not a volume!"); } }; - let mut targets = Vec::new(); - extract_targets_from_volume_construction_request( - &mut targets, + let targets = extract_targets_from_volume_construction_request( &volume_construction_request, - ); + ) + .map_err(|e| { + Error::invalid_request(&format!("bad socketaddr: {e:?}")) + })?; let mut region_ids = Vec::new(); @@ -640,11 +639,10 @@ impl SledAgent { ..Default::default() }; let propolis_log = log.new(o!("component" => "propolis-server-mock")); - let private = - Arc::new(PropolisContext::new(Default::default(), propolis_log)); + let private = Arc::new(PropolisContext::new(propolis_log)); info!(log, "Starting mock propolis-server..."); let dropshot_log = log.new(o!("component" => "dropshot")); - let mock_api = propolis_server::mock_server::api(); + let mock_api = propolis_mock_server::api(); let srv = dropshot::HttpServerStarter::new( &dropshot_config, diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index 89b1f59c7a..2528a258d7 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -16,13 +16,13 @@ use chrono::prelude::*; use crucible_agent_client::types::{ CreateRegion, Region, RegionId, RunningSnapshot, Snapshot, State, }; -use crucible_client_types::VolumeConstructionRequest; use dropshot::HandlerTaskMode; use dropshot::HttpError; use futures::lock::Mutex; use nexus_client::types::{ ByteCount, PhysicalDiskKind, PhysicalDiskPutRequest, ZpoolPutRequest, }; +use propolis_client::types::VolumeConstructionRequest; use slog::Logger; use std::collections::HashMap; use std::collections::HashSet; diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index b8852e2bba..9826a987d4 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -10,9 +10,10 @@ use crate::bootstrap::early_networking::{ }; use crate::bootstrap::params::StartSledAgentRequest; use crate::config::Config; -use crate::instance_manager::InstanceManager; +use crate::instance_manager::{InstanceManager, ReservoirMode}; +use crate::long_running_tasks::LongRunningTaskHandles; use crate::metrics::MetricsManager; -use crate::nexus::{NexusClientWithResolver, NexusRequestQueue}; +use crate::nexus::{ConvertInto, NexusClientWithResolver, NexusRequestQueue}; use crate::params::{ DiskStateRequested, InstanceHardware, InstanceMigrationSourceParams, InstancePutStateResponse, InstanceStateRequested, @@ -20,7 +21,7 @@ use crate::params::{ VpcFirewallRule, ZoneBundleMetadata, Zpool, }; use crate::services::{self, ServiceManager}; -use crate::storage_manager::{self, StorageManager}; +use crate::storage_monitor::UnderlayAccess; use crate::updates::{ConfigUpdates, UpdateManager}; use crate::zone_bundle; use crate::zone_bundle::BundleError; @@ -57,13 +58,13 @@ use omicron_common::backoff::{ retry_policy_internal_service_aggressive, BackoffError, }; use oximeter::types::ProducerRegistry; -use sled_hardware::underlay; -use sled_hardware::HardwareManager; -use sled_hardware::{underlay::BootstrapInterface, Baseboard}; +use sled_hardware::{underlay, Baseboard, HardwareManager}; +use sled_storage::manager::StorageHandle; use slog::Logger; use std::collections::BTreeMap; use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; use std::sync::Arc; +use tokio::sync::oneshot; use uuid::Uuid; #[cfg(not(test))] @@ -110,7 +111,7 @@ pub enum Error { Instance(#[from] crate::instance_manager::Error), #[error("Error managing storage: {0}")] - Storage(#[from] crate::storage_manager::Error), + Storage(#[from] sled_storage::error::Error), #[error("Error updating: {0}")] Download(#[from] crate::updates::Error), @@ -227,7 +228,7 @@ struct SledAgentInner { start_request: StartSledAgentRequest, // Component of Sled Agent responsible for storage and dataset management. - storage: StorageManager, + storage: StorageHandle, // Component of Sled Agent responsible for managing Propolis instances. instances: InstanceManager, @@ -287,8 +288,8 @@ impl SledAgent { nexus_client: NexusClientWithResolver, request: StartSledAgentRequest, services: ServiceManager, - storage: StorageManager, - bootstore: bootstore::NodeHandle, + long_running_task_handles: LongRunningTaskHandles, + underlay_available_tx: oneshot::Sender, ) -> Result { // Pass the "parent_log" to all subcomponents that want to set their own // "component" value. @@ -301,14 +302,14 @@ impl SledAgent { )); info!(&log, "SledAgent::new(..) starting"); - let boot_disk = storage - .resources() - .boot_disk() + let storage_manager = &long_running_task_handles.storage_manager; + let boot_disk = storage_manager + .get_latest_resources() .await + .boot_disk() .ok_or_else(|| Error::BootDiskNotFound)?; - // Configure a swap device of the configured size before other system - // setup. + // Configure a swap device of the configured size before other system setup. match config.swap_device_size_gb { Some(sz) if sz > 0 => { info!(log, "Requested swap device of size {} GiB", sz); @@ -363,45 +364,55 @@ impl SledAgent { *sled_address.ip(), ); - storage - .setup_underlay_access(storage_manager::UnderlayAccess { + // Inform the `StorageMonitor` that the underlay is available so that + // it can try to contact nexus. + underlay_available_tx + .send(UnderlayAccess { nexus_client: nexus_client.clone(), sled_id: request.body.id, }) - .await?; - - // TODO-correctness The bootstrap agent _also_ has a `HardwareManager`. - // We only use it for reading properties, but it's not `Clone`able - // because it's holding an inner task handle. Could we add a way to get - // a read-only handle to it, and have bootstrap agent give us that - // instead of creating a new full one ourselves? - let hardware = HardwareManager::new(&parent_log, services.sled_mode()) - .map_err(|e| Error::Hardware(e))?; + .map_err(|_| ()) + .expect("Failed to send to StorageMonitor"); let instances = InstanceManager::new( parent_log.clone(), nexus_client.clone(), etherstub.clone(), port_manager.clone(), - storage.resources().clone(), - storage.zone_bundler().clone(), + storage_manager.clone(), + long_running_task_handles.zone_bundler.clone(), )?; - match config.vmm_reservoir_percentage { - Some(sz) if sz > 0 && sz < 100 => { - instances.set_reservoir_size(&hardware, sz).map_err(|e| { - error!(log, "Failed to set VMM reservoir size: {e}"); - e - })?; - } - Some(0) => { - warn!(log, "Not using VMM reservoir (size 0 bytes requested)"); - } - None => { - warn!(log, "Not using VMM reservoir"); + // Configure the VMM reservoir as either a percentage of DRAM or as an + // exact size in MiB. + let reservoir_mode = match ( + config.vmm_reservoir_percentage, + config.vmm_reservoir_size_mb, + ) { + (None, None) => ReservoirMode::None, + (Some(p), None) => ReservoirMode::Percentage(p), + (None, Some(mb)) => ReservoirMode::Size(mb), + (Some(_), Some(_)) => panic!( + "only one of vmm_reservoir_percentage and \ + vmm_reservoir_size_mb is allowed" + ), + }; + + match reservoir_mode { + ReservoirMode::None => warn!(log, "Not using VMM reservoir"), + ReservoirMode::Size(0) | ReservoirMode::Percentage(0) => { + warn!(log, "Not using VMM reservoir (size 0 bytes requested)") } - Some(sz) => { - panic!("invalid requested VMM reservoir percentage: {}", sz); + _ => { + instances + .set_reservoir_size( + &long_running_task_handles.hardware_manager, + reservoir_mode, + ) + .map_err(|e| { + error!(log, "Failed to setup VMM reservoir: {e}"); + e + })?; } } @@ -419,7 +430,8 @@ impl SledAgent { // until we have this, as we need to know which switches have uplinks to // correctly set up services. let get_network_config = || async { - let serialized_config = bootstore + let serialized_config = long_running_task_handles + .bootstore .get_network_config() .await .map_err(|err| BackoffError::transient(err.to_string()))? @@ -465,7 +477,7 @@ impl SledAgent { let mut metrics_manager = MetricsManager::new( request.body.id, request.body.rack_id, - hardware.baseboard(), + long_running_task_handles.hardware_manager.baseboard(), log.new(o!("component" => "MetricsManager")), )?; @@ -502,15 +514,14 @@ impl SledAgent { endpoint, )); - let zone_bundler = storage.zone_bundler().clone(); let sled_agent = SledAgent { inner: Arc::new(SledAgentInner { id: request.body.id, subnet: request.body.subnet, start_request: request, - storage, + storage: long_running_task_handles.storage_manager.clone(), instances, - hardware, + hardware: long_running_task_handles.hardware_manager.clone(), updates, port_manager, services, @@ -524,8 +535,8 @@ impl SledAgent { // request queue? nexus_request_queue: NexusRequestQueue::new(), rack_network_config, - zone_bundler, - bootstore: bootstore.clone(), + zone_bundler: long_running_task_handles.zone_bundler.clone(), + bootstore: long_running_task_handles.bootstore.clone(), metrics_manager, }), log: log.clone(), @@ -546,6 +557,7 @@ impl SledAgent { /// Blocks until all services have started, retrying indefinitely on /// failure. pub(crate) async fn cold_boot_load_services(&self) { + info!(self.log, "Loading cold boot services"); retry_notify( retry_policy_internal_service_aggressive(), || async { @@ -595,9 +607,7 @@ impl SledAgent { let nexus_client = self.inner.nexus_client.clone(); let sled_address = self.inner.sled_address(); let is_scrimlet = self.inner.hardware.is_scrimlet(); - let baseboard = nexus_client::types::Baseboard::from( - self.inner.hardware.baseboard(), - ); + let baseboard = self.inner.hardware.baseboard().convert(); let usable_hardware_threads = self.inner.hardware.online_processor_count(); let usable_physical_ram = @@ -652,12 +662,15 @@ impl SledAgent { if call_count == 0 { info!( log, - "failed to notify nexus about sled agent"; "error" => err, + "failed to notify nexus about sled agent"; + "error" => %err, ); } else if total_duration > std::time::Duration::from_secs(30) { warn!( log, - "failed to notify nexus about sled agent"; "error" => err, "total duration" => ?total_duration, + "failed to notify nexus about sled agent"; + "error" => %err, + "total duration" => ?total_duration, ); } }; @@ -826,9 +839,18 @@ impl SledAgent { } /// Gets the sled's current list of all zpools. - pub async fn zpools_get(&self) -> Result, Error> { - let zpools = self.inner.storage.get_zpools().await?; - Ok(zpools) + pub async fn zpools_get(&self) -> Vec { + self.inner + .storage + .get_latest_resources() + .await + .get_all_zpools() + .into_iter() + .map(|(name, variant)| Zpool { + id: name.id(), + disk_type: variant.into(), + }) + .collect() } /// Returns whether or not the sled believes itself to be a scrimlet @@ -1068,7 +1090,9 @@ pub async fn add_sled_to_initialized_rack( // Get all known bootstrap addresses via DDM let ddm_admin_client = DdmAdminClient::localhost(&log)?; let addrs = ddm_admin_client - .derive_bootstrap_addrs_from_prefixes(&[BootstrapInterface::GlobalZone]) + .derive_bootstrap_addrs_from_prefixes(&[ + underlay::BootstrapInterface::GlobalZone, + ]) .await?; // Create a set of futures to concurrently map the baseboard to bootstrap ip diff --git a/sled-agent/src/storage/dataset.rs b/sled-agent/src/storage/dataset.rs deleted file mode 100644 index 4efc0f320a..0000000000 --- a/sled-agent/src/storage/dataset.rs +++ /dev/null @@ -1,63 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use crate::params::DatasetKind; -use illumos_utils::zpool::ZpoolName; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use std::str::FromStr; - -#[derive( - Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone, JsonSchema, -)] -pub struct DatasetName { - // A unique identifier for the Zpool on which the dataset is stored. - pool_name: ZpoolName, - // A name for the dataset within the Zpool. - kind: DatasetKind, -} - -impl DatasetName { - pub fn new(pool_name: ZpoolName, kind: DatasetKind) -> Self { - Self { pool_name, kind } - } - - pub fn pool(&self) -> &ZpoolName { - &self.pool_name - } - - pub fn dataset(&self) -> &DatasetKind { - &self.kind - } - - pub fn full(&self) -> String { - format!("{}/{}", self.pool_name, self.kind) - } -} - -impl From for sled_agent_client::types::DatasetName { - fn from(n: DatasetName) -> Self { - Self { - pool_name: sled_agent_client::types::ZpoolName::from_str( - &n.pool().to_string(), - ) - .unwrap(), - kind: n.dataset().clone().into(), - } - } -} - -#[cfg(test)] -mod test { - use super::*; - use uuid::Uuid; - - #[test] - fn serialize_dataset_name() { - let pool = ZpoolName::new_internal(Uuid::new_v4()); - let kind = DatasetKind::Crucible; - let name = DatasetName::new(pool, kind); - toml::to_string(&name).unwrap(); - } -} diff --git a/sled-agent/src/storage/mod.rs b/sled-agent/src/storage/mod.rs deleted file mode 100644 index 74bd59a151..0000000000 --- a/sled-agent/src/storage/mod.rs +++ /dev/null @@ -1,8 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Management of local storage - -pub(crate) mod dataset; -pub(crate) mod dump_setup; diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs deleted file mode 100644 index c31a4dc0bc..0000000000 --- a/sled-agent/src/storage_manager.rs +++ /dev/null @@ -1,1432 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Management of sled-local storage. - -use crate::nexus::NexusClientWithResolver; -use crate::storage::dataset::DatasetName; -use crate::storage::dump_setup::DumpSetup; -use crate::zone_bundle::ZoneBundler; -use camino::Utf8PathBuf; -use derive_more::From; -use futures::stream::FuturesOrdered; -use futures::FutureExt; -use futures::StreamExt; -use illumos_utils::zpool::{ZpoolKind, ZpoolName}; -use illumos_utils::{zfs::Mountpoint, zpool::ZpoolInfo}; -use key_manager::StorageKeyRequester; -use nexus_client::types::PhysicalDiskDeleteRequest; -use nexus_client::types::PhysicalDiskKind; -use nexus_client::types::PhysicalDiskPutRequest; -use nexus_client::types::ZpoolPutRequest; -use omicron_common::api::external::{ByteCount, ByteCountRangeError}; -use omicron_common::backoff; -use omicron_common::disk::DiskIdentity; -use sled_hardware::{Disk, DiskVariant, UnparsedDisk}; -use slog::Logger; -use std::collections::hash_map; -use std::collections::HashMap; -use std::collections::HashSet; -use std::convert::TryFrom; -use std::pin::Pin; -use std::sync::Arc; -use std::sync::OnceLock; -use std::time::Duration; -use tokio::sync::{mpsc, oneshot, Mutex}; -use tokio::task::JoinHandle; -use tokio::time::{interval, MissedTickBehavior}; -use uuid::Uuid; - -use illumos_utils::dumpadm::DumpHdrError; -#[cfg(test)] -use illumos_utils::{zfs::MockZfs as Zfs, zpool::MockZpool as Zpool}; -#[cfg(not(test))] -use illumos_utils::{zfs::Zfs, zpool::Zpool}; - -// A key manager can only become ready once. This occurs during RSS or cold -// boot when the bootstore has detected it has a key share. -static KEY_MANAGER_READY: OnceLock<()> = OnceLock::new(); - -#[derive(thiserror::Error, Debug)] -pub enum Error { - #[error(transparent)] - DiskError(#[from] sled_hardware::DiskError), - - // TODO: We could add the context of "why are we doint this op", maybe? - #[error(transparent)] - ZfsListDataset(#[from] illumos_utils::zfs::ListDatasetsError), - - #[error(transparent)] - ZfsEnsureFilesystem(#[from] illumos_utils::zfs::EnsureFilesystemError), - - #[error(transparent)] - ZfsSetValue(#[from] illumos_utils::zfs::SetValueError), - - #[error(transparent)] - ZfsGetValue(#[from] illumos_utils::zfs::GetValueError), - - #[error(transparent)] - GetZpoolInfo(#[from] illumos_utils::zpool::GetInfoError), - - #[error(transparent)] - Fstyp(#[from] illumos_utils::fstyp::Error), - - #[error(transparent)] - ZoneCommand(#[from] illumos_utils::running_zone::RunCommandError), - - #[error(transparent)] - ZoneBoot(#[from] illumos_utils::running_zone::BootError), - - #[error(transparent)] - ZoneEnsureAddress(#[from] illumos_utils::running_zone::EnsureAddressError), - - #[error(transparent)] - ZoneInstall(#[from] illumos_utils::running_zone::InstallZoneError), - - #[error("No U.2 Zpools found")] - NoU2Zpool, - - #[error("Failed to parse UUID from {path}: {err}")] - ParseUuid { - path: Utf8PathBuf, - #[source] - err: uuid::Error, - }, - - #[error("Dataset {name:?} exists with a different uuid (has {old}, requested {new})")] - UuidMismatch { name: Box, old: Uuid, new: Uuid }, - - #[error("Error parsing pool {name}'s size: {err}")] - BadPoolSize { - name: String, - #[source] - err: ByteCountRangeError, - }, - - #[error("Failed to parse the dataset {name}'s UUID: {err}")] - ParseDatasetUuid { - name: String, - #[source] - err: uuid::Error, - }, - - #[error("Zpool Not Found: {0}")] - ZpoolNotFound(String), - - #[error("Failed to serialize toml (intended for {path:?}): {err}")] - Serialize { - path: Utf8PathBuf, - #[source] - err: toml::ser::Error, - }, - - #[error("Failed to deserialize toml from {path:?}: {err}")] - Deserialize { - path: Utf8PathBuf, - #[source] - err: toml::de::Error, - }, - - #[error("Failed to perform I/O: {message}: {err}")] - Io { - message: String, - #[source] - err: std::io::Error, - }, - - #[error("Underlay not yet initialized")] - UnderlayNotInitialized, - - #[error("Encountered error checking dump device flags: {0}")] - DumpHdr(#[from] DumpHdrError), -} - -/// A ZFS storage pool. -struct Pool { - name: ZpoolName, - info: ZpoolInfo, - parent: DiskIdentity, -} - -impl Pool { - /// Queries for an existing Zpool by name. - /// - /// Returns Ok if the pool exists. - fn new(name: ZpoolName, parent: DiskIdentity) -> Result { - let info = Zpool::get_info(&name.to_string())?; - Ok(Pool { name, info, parent }) - } - - fn parent(&self) -> &DiskIdentity { - &self.parent - } -} - -// The type of a future which is used to send a notification to Nexus. -type NotifyFut = - Pin> + Send>>; - -#[derive(Debug)] -struct NewFilesystemRequest { - dataset_id: Uuid, - dataset_name: DatasetName, - responder: oneshot::Sender>, -} - -struct UnderlayRequest { - underlay: UnderlayAccess, - responder: oneshot::Sender>, -} - -#[derive(PartialEq, Eq, Clone)] -pub(crate) enum DiskWrapper { - Real { disk: Disk, devfs_path: Utf8PathBuf }, - Synthetic { zpool_name: ZpoolName }, -} - -impl From for DiskWrapper { - fn from(disk: Disk) -> Self { - let devfs_path = disk.devfs_path().clone(); - Self::Real { disk, devfs_path } - } -} - -impl DiskWrapper { - fn identity(&self) -> DiskIdentity { - match self { - DiskWrapper::Real { disk, .. } => disk.identity().clone(), - DiskWrapper::Synthetic { zpool_name } => { - let id = zpool_name.id(); - DiskIdentity { - vendor: "synthetic-vendor".to_string(), - serial: format!("synthetic-serial-{id}"), - model: "synthetic-model".to_string(), - } - } - } - } - - fn variant(&self) -> DiskVariant { - match self { - DiskWrapper::Real { disk, .. } => disk.variant(), - DiskWrapper::Synthetic { zpool_name } => match zpool_name.kind() { - ZpoolKind::External => DiskVariant::U2, - ZpoolKind::Internal => DiskVariant::M2, - }, - } - } - - fn zpool_name(&self) -> &ZpoolName { - match self { - DiskWrapper::Real { disk, .. } => disk.zpool_name(), - DiskWrapper::Synthetic { zpool_name } => zpool_name, - } - } -} - -#[derive(Clone)] -pub struct StorageResources { - // All disks, real and synthetic, being managed by this sled - disks: Arc>>, - - // A map of "Uuid" to "pool". - pools: Arc>>, -} - -// The directory within the debug dataset in which bundles are created. -const BUNDLE_DIRECTORY: &str = "bundle"; - -// The directory for zone bundles. -const ZONE_BUNDLE_DIRECTORY: &str = "zone"; - -impl StorageResources { - /// Creates a fabricated view of storage resources. - /// - /// Use this only when you want to reference the disks, but not actually - /// access them. Creates one internal and one external disk. - #[cfg(test)] - pub fn new_for_test() -> Self { - let new_disk_identity = || DiskIdentity { - vendor: "vendor".to_string(), - serial: Uuid::new_v4().to_string(), - model: "model".to_string(), - }; - - Self { - disks: Arc::new(Mutex::new(HashMap::from([ - ( - new_disk_identity(), - DiskWrapper::Synthetic { - zpool_name: ZpoolName::new_internal(Uuid::new_v4()), - }, - ), - ( - new_disk_identity(), - DiskWrapper::Synthetic { - zpool_name: ZpoolName::new_external(Uuid::new_v4()), - }, - ), - ]))), - pools: Arc::new(Mutex::new(HashMap::new())), - } - } - - /// Returns the identity of the boot disk. - /// - /// If this returns `None`, we have not processed the boot disk yet. - pub async fn boot_disk(&self) -> Option<(DiskIdentity, ZpoolName)> { - let disks = self.disks.lock().await; - disks.iter().find_map(|(id, disk)| { - match disk { - // This is the "real" use-case: if we have real disks, query - // their properties to identify if they truly are the boot disk. - DiskWrapper::Real { disk, .. } => { - if disk.is_boot_disk() { - return Some((id.clone(), disk.zpool_name().clone())); - } - } - // This is the "less real" use-case: if we have synthetic disks, - // just label the first M.2-looking one as a "boot disk". - DiskWrapper::Synthetic { .. } => { - if matches!(disk.variant(), DiskVariant::M2) { - return Some((id.clone(), disk.zpool_name().clone())); - } - } - }; - None - }) - } - - // TODO: Could be generic over DiskVariant - - /// Returns all M.2 zpools - pub async fn all_m2_zpools(&self) -> Vec { - self.all_zpools(DiskVariant::M2).await - } - - /// Returns all U.2 zpools - pub async fn all_u2_zpools(&self) -> Vec { - self.all_zpools(DiskVariant::U2).await - } - - /// Returns all mountpoints within all M.2s for a particular dataset. - pub async fn all_m2_mountpoints(&self, dataset: &str) -> Vec { - let m2_zpools = self.all_m2_zpools().await; - m2_zpools - .iter() - .map(|zpool| zpool.dataset_mountpoint(dataset)) - .collect() - } - - /// Returns all mountpoints within all U.2s for a particular dataset. - pub async fn all_u2_mountpoints(&self, dataset: &str) -> Vec { - let u2_zpools = self.all_u2_zpools().await; - u2_zpools - .iter() - .map(|zpool| zpool.dataset_mountpoint(dataset)) - .collect() - } - - /// Returns all zpools of a particular variant - pub async fn all_zpools(&self, variant: DiskVariant) -> Vec { - let disks = self.disks.lock().await; - disks - .values() - .filter_map(|disk| { - if disk.variant() == variant { - return Some(disk.zpool_name().clone()); - } - None - }) - .collect() - } - - /// Return the directories for storing zone service bundles. - pub async fn all_zone_bundle_directories(&self) -> Vec { - self.all_m2_mountpoints(sled_hardware::disk::M2_DEBUG_DATASET) - .await - .into_iter() - .map(|p| p.join(BUNDLE_DIRECTORY).join(ZONE_BUNDLE_DIRECTORY)) - .collect() - } -} - -/// Describes the access to the underlay used by the StorageManager. -pub struct UnderlayAccess { - pub nexus_client: NexusClientWithResolver, - pub sled_id: Uuid, -} - -// A worker that starts zones for pools as they are received. -struct StorageWorker { - log: Logger, - nexus_notifications: FuturesOrdered, - rx: mpsc::Receiver, - underlay: Arc>>, - - // A mechanism for requesting disk encryption keys from the - // [`key_manager::KeyManager`] - key_requester: StorageKeyRequester, - - // Invokes dumpadm(8) and savecore(8) when new disks are encountered - dump_setup: Arc, -} - -#[derive(Clone, Debug)] -enum NotifyDiskRequest { - Add { identity: DiskIdentity, variant: DiskVariant }, - Remove(DiskIdentity), -} - -#[derive(From, Clone, Debug, PartialEq, Eq, Hash)] -enum QueuedDiskCreate { - Real(UnparsedDisk), - Synthetic(ZpoolName), -} - -impl QueuedDiskCreate { - fn is_synthetic(&self) -> bool { - if let QueuedDiskCreate::Synthetic(_) = self { - true - } else { - false - } - } -} - -impl StorageWorker { - // Ensures the named dataset exists as a filesystem with a UUID, optionally - // creating it if `do_format` is true. - // - // Returns the UUID attached to the ZFS filesystem. - fn ensure_dataset( - &mut self, - dataset_id: Uuid, - dataset_name: &DatasetName, - ) -> Result<(), Error> { - let zoned = true; - let fs_name = &dataset_name.full(); - let do_format = true; - let encryption_details = None; - let size_details = None; - Zfs::ensure_filesystem( - &dataset_name.full(), - Mountpoint::Path(Utf8PathBuf::from("/data")), - zoned, - do_format, - encryption_details, - size_details, - None, - )?; - // Ensure the dataset has a usable UUID. - if let Ok(id_str) = Zfs::get_oxide_value(&fs_name, "uuid") { - if let Ok(id) = id_str.parse::() { - if id != dataset_id { - return Err(Error::UuidMismatch { - name: Box::new(dataset_name.clone()), - old: id, - new: dataset_id, - }); - } - return Ok(()); - } - } - Zfs::set_oxide_value(&fs_name, "uuid", &dataset_id.to_string())?; - Ok(()) - } - - // Adds a "notification to nexus" to `nexus_notifications`, - // informing it about the addition of `pool_id` to this sled. - async fn add_zpool_notify(&mut self, pool: &Pool, size: ByteCount) { - // The underlay network is setup once at sled-agent startup. Before - // there is an underlay we want to avoid sending notifications to nexus for - // two reasons: - // 1. They can't possibly succeed - // 2. They increase the backoff time exponentially, so that once - // sled-agent does start it may take much longer to notify nexus - // than it would if we avoid this. This goes especially so for rack - // setup, when bootstrap agent is waiting an aribtrary time for RSS - // initialization. - if self.underlay.lock().await.is_none() { - return; - } - - let pool_id = pool.name.id(); - let DiskIdentity { vendor, serial, model } = pool.parent.clone(); - let underlay = self.underlay.clone(); - - let notify_nexus = move || { - let zpool_request = ZpoolPutRequest { - size: size.into(), - disk_vendor: vendor.clone(), - disk_serial: serial.clone(), - disk_model: model.clone(), - }; - let underlay = underlay.clone(); - - async move { - let underlay_guard = underlay.lock().await; - let Some(underlay) = underlay_guard.as_ref() else { - return Err(backoff::BackoffError::transient( - Error::UnderlayNotInitialized.to_string(), - )); - }; - let sled_id = underlay.sled_id; - let nexus_client = underlay.nexus_client.client().clone(); - drop(underlay_guard); - - nexus_client - .zpool_put(&sled_id, &pool_id, &zpool_request) - .await - .map_err(|e| { - backoff::BackoffError::transient(e.to_string()) - })?; - Ok(()) - } - }; - let log = self.log.clone(); - let name = pool.name.clone(); - let disk = pool.parent().clone(); - let log_post_failure = move |_, call_count, total_duration| { - if call_count == 0 { - info!(log, "failed to notify nexus about a new pool {name} on disk {disk:?}"); - } else if total_duration > std::time::Duration::from_secs(30) { - warn!(log, "failed to notify nexus about a new pool {name} on disk {disk:?}"; - "total duration" => ?total_duration); - } - }; - self.nexus_notifications.push_back( - backoff::retry_notify_ext( - backoff::retry_policy_internal_service_aggressive(), - notify_nexus, - log_post_failure, - ) - .boxed(), - ); - } - - async fn ensure_using_exactly_these_disks( - &mut self, - resources: &StorageResources, - unparsed_disks: Vec, - queued_u2_drives: &mut Option>, - ) -> Result<(), Error> { - // Queue U.2 drives if necessary - // We clear all existing queued drives that are not synthetic and add - // new ones in the loop below - if let Some(queued) = queued_u2_drives { - info!( - self.log, - "Ensure exact disks: clearing non-synthetic queued disks." - ); - queued.retain(|d| d.is_synthetic()); - } - - let mut new_disks = HashMap::new(); - - // We may encounter errors while parsing any of the disks; keep track of - // any errors that occur and return any of them if something goes wrong. - // - // That being said, we should not prevent access to the other disks if - // only one failure occurs. - let mut err: Option = None; - - // Ensure all disks conform to the expected partition layout. - for disk in unparsed_disks.into_iter() { - if disk.variant() == DiskVariant::U2 { - if let Some(queued) = queued_u2_drives { - info!(self.log, "Queuing disk for upsert: {disk:?}"); - queued.insert(disk.into()); - continue; - } - } - match self.add_new_disk(disk, queued_u2_drives).await.map_err( - |err| { - warn!(self.log, "Could not ensure partitions: {err}"); - err - }, - ) { - Ok(disk) => { - new_disks.insert(disk.identity().clone(), disk); - } - Err(e) => { - warn!(self.log, "Cannot parse disk: {e}"); - err = Some(e.into()); - } - }; - } - - let mut disks = resources.disks.lock().await; - - // Remove disks that don't appear in the "new_disks" set. - // - // This also accounts for zpools and notifies Nexus. - let disks_to_be_removed = disks - .iter_mut() - .filter(|(key, old_disk)| { - // If this disk appears in the "new" and "old" set, it should - // only be removed if it has changed. - // - // This treats a disk changing in an unexpected way as a - // "removal and re-insertion". - match old_disk { - DiskWrapper::Real { disk, .. } => { - if let Some(new_disk) = new_disks.get(*key) { - // Changed Disk -> Disk should be removed. - new_disk != disk - } else { - // Real disk, not in the new set -> Disk should be removed. - true - } - } - // Synthetic disk -> Disk should NOT be removed. - DiskWrapper::Synthetic { .. } => false, - } - }) - .map(|(_key, disk)| disk.clone()) - .collect::>(); - - for disk in disks_to_be_removed { - if let Err(e) = self - .delete_disk_locked(&resources, &mut disks, &disk.identity()) - .await - { - warn!(self.log, "Failed to delete disk: {e}"); - err = Some(e); - } - } - - // Add new disks to `resources.disks`. - // - // This also accounts for zpools and notifies Nexus. - for (key, new_disk) in new_disks { - if let Some(old_disk) = disks.get(&key) { - // In this case, the disk should be unchanged. - // - // This assertion should be upheld by the filter above, which - // should remove disks that changed. - assert!(old_disk == &new_disk.into()); - } else { - let disk = DiskWrapper::Real { - disk: new_disk.clone(), - devfs_path: new_disk.devfs_path().clone(), - }; - if let Err(e) = - self.upsert_disk_locked(&resources, &mut disks, disk).await - { - warn!(self.log, "Failed to upsert disk: {e}"); - err = Some(e); - } - } - } - - if let Some(err) = err { - Err(err) - } else { - Ok(()) - } - } - - // Attempt to create a new disk via `sled_hardware::Disk::new()`. If the - // disk addition fails because the the key manager cannot load a secret, - // this indicates a transient error, and so we queue the disk so we can - // try again. - async fn add_new_disk( - &mut self, - unparsed_disk: UnparsedDisk, - queued_u2_drives: &mut Option>, - ) -> Result { - match sled_hardware::Disk::new( - &self.log, - unparsed_disk.clone(), - Some(&self.key_requester), - ) - .await - { - Ok(disk) => Ok(disk), - Err(sled_hardware::DiskError::KeyManager(err)) => { - warn!( - self.log, - "Transient error: {err} - queuing disk {:?}", unparsed_disk - ); - if let Some(queued) = queued_u2_drives { - queued.insert(unparsed_disk.into()); - } else { - *queued_u2_drives = - Some(HashSet::from([unparsed_disk.into()])); - } - Err(sled_hardware::DiskError::KeyManager(err)) - } - Err(err) => { - error!( - self.log, - "Persistent error: {err} - not queueing disk {:?}", - unparsed_disk - ); - Err(err) - } - } - } - - // Attempt to create a new synthetic disk via - // `sled_hardware::Disk::ensure_zpool_ready()`. If the disk addition fails - // because the the key manager cannot load a secret, this indicates a - // transient error, and so we queue the disk so we can try again. - async fn add_new_synthetic_disk( - &mut self, - zpool_name: ZpoolName, - queued_u2_drives: &mut Option>, - ) -> Result<(), sled_hardware::DiskError> { - let synthetic_id = DiskIdentity { - vendor: "fake_vendor".to_string(), - serial: "fake_serial".to_string(), - model: zpool_name.id().to_string(), - }; - match sled_hardware::Disk::ensure_zpool_ready( - &self.log, - &zpool_name, - &synthetic_id, - Some(&self.key_requester), - ) - .await - { - Ok(()) => Ok(()), - Err(sled_hardware::DiskError::KeyManager(err)) => { - warn!( - self.log, - "Transient error: {err} - queuing synthetic disk: {:?}", - zpool_name - ); - if let Some(queued) = queued_u2_drives { - queued.insert(zpool_name.into()); - } else { - *queued_u2_drives = - Some(HashSet::from([zpool_name.into()])); - } - Err(sled_hardware::DiskError::KeyManager(err)) - } - Err(err) => { - error!( - self.log, - "Persistent error: {} - not queueing synthetic disk {:?}", - err, - zpool_name - ); - Err(err) - } - } - } - - async fn upsert_disk( - &mut self, - resources: &StorageResources, - disk: UnparsedDisk, - queued_u2_drives: &mut Option>, - ) -> Result<(), Error> { - // Queue U.2 drives if necessary - if let Some(queued) = queued_u2_drives { - if disk.variant() == DiskVariant::U2 { - info!(self.log, "Queuing disk for upsert: {disk:?}"); - queued.insert(disk.into()); - return Ok(()); - } - } - - info!(self.log, "Upserting disk: {disk:?}"); - - // Ensure the disk conforms to an expected partition layout. - let disk = - self.add_new_disk(disk, queued_u2_drives).await.map_err(|err| { - warn!(self.log, "Could not ensure partitions: {err}"); - err - })?; - - let mut disks = resources.disks.lock().await; - let disk = DiskWrapper::Real { - disk: disk.clone(), - devfs_path: disk.devfs_path().clone(), - }; - self.upsert_disk_locked(resources, &mut disks, disk).await - } - - async fn upsert_synthetic_disk( - &mut self, - resources: &StorageResources, - zpool_name: ZpoolName, - queued_u2_drives: &mut Option>, - ) -> Result<(), Error> { - // Queue U.2 drives if necessary - if let Some(queued) = queued_u2_drives { - if zpool_name.kind() == ZpoolKind::External { - info!( - self.log, - "Queuing synthetic disk for upsert: {zpool_name:?}" - ); - queued.insert(zpool_name.into()); - return Ok(()); - } - } - - info!(self.log, "Upserting synthetic disk for: {zpool_name:?}"); - - self.add_new_synthetic_disk(zpool_name.clone(), queued_u2_drives) - .await?; - let disk = DiskWrapper::Synthetic { zpool_name }; - let mut disks = resources.disks.lock().await; - self.upsert_disk_locked(resources, &mut disks, disk).await - } - - async fn upsert_disk_locked( - &mut self, - resources: &StorageResources, - disks: &mut tokio::sync::MutexGuard< - '_, - HashMap, - >, - disk: DiskWrapper, - ) -> Result<(), Error> { - disks.insert(disk.identity(), disk.clone()); - self.physical_disk_notify(NotifyDiskRequest::Add { - identity: disk.identity(), - variant: disk.variant(), - }) - .await; - self.upsert_zpool(&resources, disk.identity(), disk.zpool_name()) - .await?; - - self.dump_setup.update_dumpdev_setup(disks).await; - - Ok(()) - } - - async fn delete_disk( - &mut self, - resources: &StorageResources, - disk: UnparsedDisk, - ) -> Result<(), Error> { - info!(self.log, "Deleting disk: {disk:?}"); - // TODO: Don't we need to do some accounting, e.g. for all the information - // that's no longer accessible? Or is that up to Nexus to figure out at - // a later point-in-time? - // - // If we're storing zone images on the M.2s for internal services, how - // do we reconcile them? - let mut disks = resources.disks.lock().await; - self.delete_disk_locked(resources, &mut disks, disk.identity()).await - } - - async fn delete_disk_locked( - &mut self, - resources: &StorageResources, - disks: &mut tokio::sync::MutexGuard< - '_, - HashMap, - >, - key: &DiskIdentity, - ) -> Result<(), Error> { - if let Some(parsed_disk) = disks.remove(key) { - resources.pools.lock().await.remove(&parsed_disk.zpool_name().id()); - self.physical_disk_notify(NotifyDiskRequest::Remove(key.clone())) - .await; - } - - self.dump_setup.update_dumpdev_setup(disks).await; - - Ok(()) - } - - /// When the underlay becomes available, we need to notify nexus about any - /// discovered disks and pools, since we don't attempt to notify until there - /// is an underlay available. - async fn notify_nexus_about_existing_resources( - &mut self, - resources: &StorageResources, - ) -> Result<(), Error> { - let disks = resources.disks.lock().await; - for disk in disks.values() { - self.physical_disk_notify(NotifyDiskRequest::Add { - identity: disk.identity(), - variant: disk.variant(), - }) - .await; - } - - // We may encounter errors while processing any of the pools; keep track of - // any errors that occur and return any of them if something goes wrong. - // - // That being said, we should not prevent notification to nexus of the - // other pools if only one failure occurs. - let mut err: Option = None; - - let pools = resources.pools.lock().await; - for pool in pools.values() { - match ByteCount::try_from(pool.info.size()).map_err(|err| { - Error::BadPoolSize { name: pool.name.to_string(), err } - }) { - Ok(size) => self.add_zpool_notify(pool, size).await, - Err(e) => { - warn!(self.log, "Failed to notify nexus about pool: {e}"); - err = Some(e) - } - } - } - - if let Some(err) = err { - Err(err) - } else { - Ok(()) - } - } - - // Adds a "notification to nexus" to `self.nexus_notifications`, informing it - // about the addition/removal of a physical disk to this sled. - async fn physical_disk_notify(&mut self, disk: NotifyDiskRequest) { - // The underlay network is setup once at sled-agent startup. Before - // there is an underlay we want to avoid sending notifications to nexus for - // two reasons: - // 1. They can't possibly succeed - // 2. They increase the backoff time exponentially, so that once - // sled-agent does start it may take much longer to notify nexus - // than it would if we avoid this. This goes especially so for rack - // setup, when bootstrap agent is waiting an aribtrary time for RSS - // initialization. - if self.underlay.lock().await.is_none() { - return; - } - let underlay = self.underlay.clone(); - let disk2 = disk.clone(); - let notify_nexus = move || { - let disk = disk.clone(); - let underlay = underlay.clone(); - async move { - let underlay_guard = underlay.lock().await; - let Some(underlay) = underlay_guard.as_ref() else { - return Err(backoff::BackoffError::transient( - Error::UnderlayNotInitialized.to_string(), - )); - }; - let sled_id = underlay.sled_id; - let nexus_client = underlay.nexus_client.client().clone(); - drop(underlay_guard); - - match &disk { - NotifyDiskRequest::Add { identity, variant } => { - let request = PhysicalDiskPutRequest { - model: identity.model.clone(), - serial: identity.serial.clone(), - vendor: identity.vendor.clone(), - variant: match variant { - DiskVariant::U2 => PhysicalDiskKind::U2, - DiskVariant::M2 => PhysicalDiskKind::M2, - }, - sled_id, - }; - nexus_client - .physical_disk_put(&request) - .await - .map_err(|e| { - backoff::BackoffError::transient(e.to_string()) - })?; - } - NotifyDiskRequest::Remove(disk_identity) => { - let request = PhysicalDiskDeleteRequest { - model: disk_identity.model.clone(), - serial: disk_identity.serial.clone(), - vendor: disk_identity.vendor.clone(), - sled_id, - }; - nexus_client - .physical_disk_delete(&request) - .await - .map_err(|e| { - backoff::BackoffError::transient(e.to_string()) - })?; - } - } - Ok(()) - } - }; - let log = self.log.clone(); - // This notification is often invoked before Nexus has started - // running, so avoid flagging any errors as concerning until some - // time has passed. - let log_post_failure = move |_, call_count, total_duration| { - if call_count == 0 { - info!(log, "failed to notify nexus about {disk2:?}"); - } else if total_duration > std::time::Duration::from_secs(30) { - warn!(log, "failed to notify nexus about {disk2:?}"; - "total duration" => ?total_duration); - } - }; - self.nexus_notifications.push_back( - backoff::retry_notify_ext( - backoff::retry_policy_internal_service_aggressive(), - notify_nexus, - log_post_failure, - ) - .boxed(), - ); - } - - async fn upsert_zpool( - &mut self, - resources: &StorageResources, - parent: DiskIdentity, - pool_name: &ZpoolName, - ) -> Result<(), Error> { - let mut pools = resources.pools.lock().await; - let zpool = Pool::new(pool_name.clone(), parent)?; - - let pool = match pools.entry(pool_name.id()) { - hash_map::Entry::Occupied(mut entry) => { - // The pool already exists. - entry.get_mut().info = zpool.info; - return Ok(()); - } - hash_map::Entry::Vacant(entry) => entry.insert(zpool), - }; - info!(&self.log, "Storage manager processing zpool: {:#?}", pool.info); - - let size = ByteCount::try_from(pool.info.size()).map_err(|err| { - Error::BadPoolSize { name: pool_name.to_string(), err } - })?; - // Notify Nexus of the zpool. - self.add_zpool_notify(&pool, size).await; - Ok(()) - } - - // Attempts to add a dataset within a zpool, according to `request`. - async fn add_dataset( - &mut self, - resources: &StorageResources, - request: &NewFilesystemRequest, - ) -> Result { - info!(self.log, "add_dataset: {:?}", request); - let mut pools = resources.pools.lock().await; - let pool = pools - .get_mut(&request.dataset_name.pool().id()) - .ok_or_else(|| { - Error::ZpoolNotFound(format!( - "{}, looked up while trying to add dataset", - request.dataset_name.pool(), - )) - })?; - let dataset_name = DatasetName::new( - pool.name.clone(), - request.dataset_name.dataset().clone(), - ); - self.ensure_dataset(request.dataset_id, &dataset_name)?; - Ok(dataset_name) - } - - // Small wrapper around `Self::do_work_internal` that ensures we always - // emit info to the log when we exit. - async fn do_work( - &mut self, - resources: StorageResources, - ) -> Result<(), Error> { - // We queue U.2 sleds until the StorageKeyRequester is ready to use. - let mut queued_u2_drives = Some(HashSet::new()); - loop { - match self.do_work_internal(&resources, &mut queued_u2_drives).await - { - Ok(()) => { - info!(self.log, "StorageWorker exited successfully"); - return Ok(()); - } - Err(e) => { - warn!( - self.log, - "StorageWorker encountered unexpected error: {}", e - ); - // ... for now, keep trying. - } - } - } - } - - async fn do_work_internal( - &mut self, - resources: &StorageResources, - queued_u2_drives: &mut Option>, - ) -> Result<(), Error> { - const QUEUED_DISK_RETRY_TIMEOUT: Duration = Duration::from_secs(5); - let mut interval = interval(QUEUED_DISK_RETRY_TIMEOUT); - interval.set_missed_tick_behavior(MissedTickBehavior::Delay); - loop { - tokio::select! { - _ = self.nexus_notifications.next(), - if !self.nexus_notifications.is_empty() => {}, - Some(request) = self.rx.recv() => { - // We want to queue failed requests related to the key manager - match self.handle_storage_worker_request( - resources, queued_u2_drives, request) - .await { - Err(Error::DiskError(_)) => { - // We already handle and log disk errors, no need to - // return here. - } - Err(e) => return Err(e), - Ok(()) => {} - } - } - _ = interval.tick(), if queued_u2_drives.is_some() && - KEY_MANAGER_READY.get().is_some()=> - { - self.upsert_queued_disks(resources, queued_u2_drives).await; - } - } - } - } - - async fn handle_storage_worker_request( - &mut self, - resources: &StorageResources, - queued_u2_drives: &mut Option>, - request: StorageWorkerRequest, - ) -> Result<(), Error> { - use StorageWorkerRequest::*; - match request { - AddDisk(disk) => { - self.upsert_disk(&resources, disk, queued_u2_drives).await?; - } - AddSyntheticDisk(zpool_name) => { - self.upsert_synthetic_disk( - &resources, - zpool_name, - queued_u2_drives, - ) - .await?; - } - RemoveDisk(disk) => { - self.delete_disk(&resources, disk).await?; - } - NewFilesystem(request) => { - let result = self.add_dataset(&resources, &request).await; - let _ = request.responder.send(result); - } - DisksChanged(disks) => { - self.ensure_using_exactly_these_disks( - &resources, - disks, - queued_u2_drives, - ) - .await?; - } - SetupUnderlayAccess(UnderlayRequest { underlay, responder }) => { - // If this is the first time establishing an - // underlay we should notify nexus of all existing - // disks and zpools. - // - // Instead of individual notifications, we should - // send a bulk notification as described in https:// - // github.com/oxidecomputer/omicron/issues/1917 - if self.underlay.lock().await.replace(underlay).is_none() { - self.notify_nexus_about_existing_resources(&resources) - .await?; - } - let _ = responder.send(Ok(())); - } - KeyManagerReady => { - let _ = KEY_MANAGER_READY.set(()); - self.upsert_queued_disks(resources, queued_u2_drives).await; - } - } - Ok(()) - } - - async fn upsert_queued_disks( - &mut self, - resources: &StorageResources, - queued_u2_drives: &mut Option>, - ) { - let queued = queued_u2_drives.take(); - if let Some(queued) = queued { - for disk in queued { - if let Some(saved) = queued_u2_drives { - // We already hit a transient error and recreated our queue. - // Add any remaining queued disks back on the queue so we - // can try again later. - saved.insert(disk); - } else { - match self.upsert_queued_disk(disk, resources).await { - Ok(()) => {} - Err((_, None)) => { - // We already logged this as a persistent error in - // `add_new_disk` or `add_new_synthetic_disk` - } - Err((_, Some(disk))) => { - // We already logged this as a transient error in - // `add_new_disk` or `add_new_synthetic_disk` - *queued_u2_drives = Some(HashSet::from([disk])); - } - } - } - } - } - if queued_u2_drives.is_none() { - info!(self.log, "upserted all queued disks"); - } else { - warn!( - self.log, - "failed to upsert all queued disks - will try again" - ); - } - } - - // Attempt to upsert a queued disk. Return the disk and error if the upsert - // fails due to a transient error. Examples of transient errors are key - // manager errors which indicate that there are not enough sleds available - // to unlock the rack. - async fn upsert_queued_disk( - &mut self, - disk: QueuedDiskCreate, - resources: &StorageResources, - ) -> Result<(), (Error, Option)> { - let mut temp: Option> = None; - let res = match disk { - QueuedDiskCreate::Real(disk) => { - self.upsert_disk(&resources, disk, &mut temp).await - } - QueuedDiskCreate::Synthetic(zpool_name) => { - self.upsert_synthetic_disk(&resources, zpool_name, &mut temp) - .await - } - }; - if let Some(mut disks) = temp.take() { - assert!(res.is_err()); - assert_eq!(disks.len(), 1); - return Err(( - res.unwrap_err(), - disks.drain().next().unwrap().into(), - )); - } - // Any error at this point is not transient. - // We don't requeue the disk. - res.map_err(|e| (e, None)) - } -} - -enum StorageWorkerRequest { - AddDisk(UnparsedDisk), - AddSyntheticDisk(ZpoolName), - RemoveDisk(UnparsedDisk), - DisksChanged(Vec), - NewFilesystem(NewFilesystemRequest), - SetupUnderlayAccess(UnderlayRequest), - KeyManagerReady, -} - -struct StorageManagerInner { - log: Logger, - - resources: StorageResources, - - tx: mpsc::Sender, - - // A handle to a worker which updates "pools". - task: JoinHandle>, -} - -/// A sled-local view of all attached storage. -#[derive(Clone)] -pub struct StorageManager { - inner: Arc, - zone_bundler: ZoneBundler, -} - -impl StorageManager { - /// Creates a new [`StorageManager`] which should manage local storage. - pub async fn new(log: &Logger, key_requester: StorageKeyRequester) -> Self { - let log = log.new(o!("component" => "StorageManager")); - let resources = StorageResources { - disks: Arc::new(Mutex::new(HashMap::new())), - pools: Arc::new(Mutex::new(HashMap::new())), - }; - let (tx, rx) = mpsc::channel(30); - - let zb_log = log.new(o!("component" => "ZoneBundler")); - let zone_bundler = - ZoneBundler::new(zb_log, resources.clone(), Default::default()); - - StorageManager { - inner: Arc::new(StorageManagerInner { - log: log.clone(), - resources: resources.clone(), - tx, - task: tokio::task::spawn(async move { - let dump_setup = Arc::new(DumpSetup::new(&log)); - let mut worker = StorageWorker { - log, - nexus_notifications: FuturesOrdered::new(), - rx, - underlay: Arc::new(Mutex::new(None)), - key_requester, - dump_setup, - }; - - worker.do_work(resources).await - }), - }), - zone_bundler, - } - } - - /// Return a reference to the object used to manage zone bundles. - /// - /// This can be cloned by other code wishing to create and manage their own - /// zone bundles. - pub fn zone_bundler(&self) -> &ZoneBundler { - &self.zone_bundler - } - - /// Ensures that the storage manager tracks exactly the provided disks. - /// - /// This acts similar to a batch [Self::upsert_disk] for all new disks, and - /// [Self::delete_disk] for all removed disks. - /// - /// If errors occur, an arbitrary "one" of them will be returned, but a - /// best-effort attempt to add all disks will still be attempted. - // Receiver implemented by [StorageWorker::ensure_using_exactly_these_disks] - pub async fn ensure_using_exactly_these_disks(&self, unparsed_disks: I) - where - I: IntoIterator, - { - self.inner - .tx - .send(StorageWorkerRequest::DisksChanged( - unparsed_disks.into_iter().collect::>(), - )) - .await - .map_err(|e| e.to_string()) - .expect("Failed to send DisksChanged request"); - } - - /// Adds a disk and associated zpool to the storage manager. - // Receiver implemented by [StorageWorker::upsert_disk]. - pub async fn upsert_disk(&self, disk: UnparsedDisk) { - info!(self.inner.log, "Upserting disk: {disk:?}"); - self.inner - .tx - .send(StorageWorkerRequest::AddDisk(disk)) - .await - .map_err(|e| e.to_string()) - .expect("Failed to send AddDisk request"); - } - - /// Removes a disk, if it's tracked by the storage manager, as well - /// as any associated zpools. - // Receiver implemented by [StorageWorker::delete_disk]. - pub async fn delete_disk(&self, disk: UnparsedDisk) { - info!(self.inner.log, "Deleting disk: {disk:?}"); - self.inner - .tx - .send(StorageWorkerRequest::RemoveDisk(disk)) - .await - .map_err(|e| e.to_string()) - .expect("Failed to send RemoveDisk request"); - } - - /// Adds a synthetic zpool to the storage manager. - // Receiver implemented by [StorageWorker::upsert_synthetic_disk]. - pub async fn upsert_synthetic_disk(&self, name: ZpoolName) { - self.inner - .tx - .send(StorageWorkerRequest::AddSyntheticDisk(name)) - .await - .map_err(|e| e.to_string()) - .expect("Failed to send AddSyntheticDisk request"); - } - - /// Adds underlay access to the storage manager. - pub async fn setup_underlay_access( - &self, - underlay: UnderlayAccess, - ) -> Result<(), Error> { - let (tx, rx) = oneshot::channel(); - self.inner - .tx - .send(StorageWorkerRequest::SetupUnderlayAccess(UnderlayRequest { - underlay, - responder: tx, - })) - .await - .map_err(|e| e.to_string()) - .expect("Failed to send SetupUnderlayAccess request"); - rx.await.expect("Failed to await underlay setup") - } - - pub async fn get_zpools(&self) -> Result, Error> { - let disks = self.inner.resources.disks.lock().await; - let pools = self.inner.resources.pools.lock().await; - - let mut zpools = Vec::with_capacity(pools.len()); - - for (id, pool) in pools.iter() { - let disk_identity = &pool.parent; - let disk_type = if let Some(disk) = disks.get(&disk_identity) { - disk.variant().into() - } else { - // If the zpool claims to be attached to a disk that we - // don't know about, that's an error. - return Err(Error::ZpoolNotFound( - format!("zpool: {id} claims to be from unknown disk: {disk_identity:#?}") - )); - }; - zpools.push(crate::params::Zpool { id: *id, disk_type }); - } - - Ok(zpools) - } - - pub async fn upsert_filesystem( - &self, - dataset_id: Uuid, - dataset_name: DatasetName, - ) -> Result { - let (tx, rx) = oneshot::channel(); - let request = - NewFilesystemRequest { dataset_id, dataset_name, responder: tx }; - - self.inner - .tx - .send(StorageWorkerRequest::NewFilesystem(request)) - .await - .map_err(|e| e.to_string()) - .expect("Storage worker bug (not alive)"); - let dataset_name = rx.await.expect( - "Storage worker bug (dropped responder without responding)", - )?; - - Ok(dataset_name) - } - - /// Inform the storage worker that the KeyManager is capable of retrieving - /// secrets now and that any queued disks can be upserted. - pub async fn key_manager_ready(&self) { - info!(self.inner.log, "KeyManger ready"); - self.inner - .tx - .send(StorageWorkerRequest::KeyManagerReady) - .await - .map_err(|e| e.to_string()) - .expect("Failed to send KeyManagerReady request"); - } - - pub fn resources(&self) -> &StorageResources { - &self.inner.resources - } -} - -impl Drop for StorageManagerInner { - fn drop(&mut self) { - // NOTE: Ideally, with async drop, we'd await completion of the worker - // somehow. - // - // Without that option, we instead opt to simply cancel the worker - // task to ensure it does not remain alive beyond the StorageManager - // itself. - self.task.abort(); - } -} diff --git a/sled-agent/src/storage_monitor.rs b/sled-agent/src/storage_monitor.rs new file mode 100644 index 0000000000..0c9b287396 --- /dev/null +++ b/sled-agent/src/storage_monitor.rs @@ -0,0 +1,373 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! A task that listens for storage events from [`sled_storage::manager::StorageManager`] +//! and dispatches them to other parst of the bootstrap agent and sled agent +//! code. + +use crate::dump_setup::DumpSetup; +use crate::nexus::{ConvertInto, NexusClientWithResolver}; +use derive_more::From; +use futures::stream::FuturesOrdered; +use futures::FutureExt; +use futures::StreamExt; +use nexus_client::types::PhysicalDiskDeleteRequest; +use nexus_client::types::PhysicalDiskPutRequest; +use nexus_client::types::ZpoolPutRequest; +use omicron_common::api::external::ByteCount; +use omicron_common::backoff; +use omicron_common::disk::DiskIdentity; +use sled_storage::manager::StorageHandle; +use sled_storage::pool::Pool; +use sled_storage::resources::StorageResources; +use slog::Logger; +use std::fmt::Debug; +use std::pin::Pin; +use tokio::sync::oneshot; +use uuid::Uuid; + +#[derive(From, Clone, Debug)] +enum NexusDiskRequest { + Put(PhysicalDiskPutRequest), + Delete(PhysicalDiskDeleteRequest), +} + +/// Describes the access to the underlay used by the StorageManager. +#[derive(Clone)] +pub struct UnderlayAccess { + pub nexus_client: NexusClientWithResolver, + pub sled_id: Uuid, +} + +impl Debug for UnderlayAccess { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("UnderlayAccess") + .field("sled_id", &self.sled_id) + .finish() + } +} + +pub struct StorageMonitor { + log: Logger, + storage_manager: StorageHandle, + + // Receive a onetime notification that the underlay is available + underlay_available_rx: oneshot::Receiver, + + // A cached copy of the `StorageResources` from the last update + storage_resources: StorageResources, + + // Ability to access the underlay network + underlay: Option, + + // A queue for sending nexus notifications in order + nexus_notifications: FuturesOrdered, + + // Invokes dumpadm(8) and savecore(8) when new disks are encountered + dump_setup: DumpSetup, +} + +impl StorageMonitor { + pub fn new( + log: &Logger, + storage_manager: StorageHandle, + ) -> (StorageMonitor, oneshot::Sender) { + let (underlay_available_tx, underlay_available_rx) = oneshot::channel(); + let storage_resources = StorageResources::default(); + let dump_setup = DumpSetup::new(&log); + let log = log.new(o!("component" => "StorageMonitor")); + ( + StorageMonitor { + log, + storage_manager, + underlay_available_rx, + storage_resources, + underlay: None, + nexus_notifications: FuturesOrdered::new(), + dump_setup, + }, + underlay_available_tx, + ) + } + + /// Run the main receive loop of the `StorageMonitor` + /// + /// This should be spawned into a tokio task + pub async fn run(mut self) { + loop { + tokio::select! { + res = self.nexus_notifications.next(), + if !self.nexus_notifications.is_empty() => + { + match res { + Some(Ok(s)) => { + info!(self.log, "Nexus notification complete: {s}"); + } + e => error!(self.log, "Nexus notification error: {e:?}") + } + } + resources = self.storage_manager.wait_for_changes() => { + info!( + self.log, + "Received storage manager update"; + "resources" => ?resources + ); + self.handle_resource_update(resources).await; + } + Ok(underlay) = &mut self.underlay_available_rx, + if self.underlay.is_none() => + { + let sled_id = underlay.sled_id; + info!( + self.log, + "Underlay Available"; "sled_id" => %sled_id + ); + self.underlay = Some(underlay); + self.notify_nexus_about_existing_resources(sled_id).await; + } + } + } + } + + /// When the underlay becomes available, we need to notify nexus about any + /// discovered disks and pools, since we don't attempt to notify until there + /// is an underlay available. + async fn notify_nexus_about_existing_resources(&mut self, sled_id: Uuid) { + let current = StorageResources::default(); + let updated = &self.storage_resources; + let nexus_updates = + compute_resource_diffs(&self.log, &sled_id, ¤t, updated); + for put in nexus_updates.disk_puts { + self.physical_disk_notify(put.into()).await; + } + for (pool, put) in nexus_updates.zpool_puts { + self.add_zpool_notify(pool, put).await; + } + } + + async fn handle_resource_update( + &mut self, + updated_resources: StorageResources, + ) { + // If the underlay isn't available, we only record the changes. Nexus + // isn't yet reachable to notify. + if self.underlay.is_some() { + let nexus_updates = compute_resource_diffs( + &self.log, + &self.underlay.as_ref().unwrap().sled_id, + &self.storage_resources, + &updated_resources, + ); + + for put in nexus_updates.disk_puts { + self.physical_disk_notify(put.into()).await; + } + for del in nexus_updates.disk_deletes { + self.physical_disk_notify(del.into()).await; + } + for (pool, put) in nexus_updates.zpool_puts { + self.add_zpool_notify(pool, put).await; + } + } + self.dump_setup.update_dumpdev_setup(updated_resources.disks()).await; + + // Save the updated `StorageResources` + self.storage_resources = updated_resources; + } + + // Adds a "notification to nexus" to `self.nexus_notifications`, informing it + // about the addition/removal of a physical disk to this sled. + async fn physical_disk_notify(&mut self, disk: NexusDiskRequest) { + let underlay = self.underlay.as_ref().unwrap().clone(); + let disk2 = disk.clone(); + let notify_nexus = move || { + let underlay = underlay.clone(); + let disk = disk.clone(); + async move { + let nexus_client = underlay.nexus_client.client().clone(); + + match &disk { + NexusDiskRequest::Put(request) => { + nexus_client + .physical_disk_put(&request) + .await + .map_err(|e| { + backoff::BackoffError::transient(e.to_string()) + })?; + } + NexusDiskRequest::Delete(request) => { + nexus_client + .physical_disk_delete(&request) + .await + .map_err(|e| { + backoff::BackoffError::transient(e.to_string()) + })?; + } + } + let msg = format!("{:?}", disk); + Ok(msg) + } + }; + + let log = self.log.clone(); + // This notification is often invoked before Nexus has started + // running, so avoid flagging any errors as concerning until some + // time has passed. + let log_post_failure = move |err, call_count, total_duration| { + if call_count == 0 { + info!(log, "failed to notify nexus about {disk2:?}"; + "err" => ?err + ); + } else if total_duration > std::time::Duration::from_secs(30) { + warn!(log, "failed to notify nexus about {disk2:?}"; + "err" => ?err, + "total duration" => ?total_duration); + } + }; + self.nexus_notifications.push_back( + backoff::retry_notify_ext( + backoff::retry_policy_internal_service_aggressive(), + notify_nexus, + log_post_failure, + ) + .boxed(), + ); + } + + // Adds a "notification to nexus" to `nexus_notifications`, + // informing it about the addition of `pool_id` to this sled. + async fn add_zpool_notify( + &mut self, + pool: Pool, + zpool_request: ZpoolPutRequest, + ) { + let pool_id = pool.name.id(); + let underlay = self.underlay.as_ref().unwrap().clone(); + + let notify_nexus = move || { + let underlay = underlay.clone(); + let zpool_request = zpool_request.clone(); + async move { + let sled_id = underlay.sled_id; + let nexus_client = underlay.nexus_client.client().clone(); + nexus_client + .zpool_put(&sled_id, &pool_id, &zpool_request) + .await + .map_err(|e| { + backoff::BackoffError::transient(e.to_string()) + })?; + let msg = format!("{:?}", zpool_request); + Ok(msg) + } + }; + + let log = self.log.clone(); + let name = pool.name.clone(); + let disk = pool.parent.clone(); + let log_post_failure = move |err, call_count, total_duration| { + if call_count == 0 { + info!(log, "failed to notify nexus about a new pool {name} on disk {disk:?}"; + "err" => ?err); + } else if total_duration > std::time::Duration::from_secs(30) { + warn!(log, "failed to notify nexus about a new pool {name} on disk {disk:?}"; + "err" => ?err, + "total duration" => ?total_duration); + } + }; + self.nexus_notifications.push_back( + backoff::retry_notify_ext( + backoff::retry_policy_internal_service_aggressive(), + notify_nexus, + log_post_failure, + ) + .boxed(), + ); + } +} + +// The type of a future which is used to send a notification to Nexus. +type NotifyFut = + Pin> + Send>>; + +struct NexusUpdates { + disk_puts: Vec, + disk_deletes: Vec, + zpool_puts: Vec<(Pool, ZpoolPutRequest)>, +} + +fn compute_resource_diffs( + log: &Logger, + sled_id: &Uuid, + current: &StorageResources, + updated: &StorageResources, +) -> NexusUpdates { + let mut disk_puts = vec![]; + let mut disk_deletes = vec![]; + let mut zpool_puts = vec![]; + + let mut put_pool = |disk_id: &DiskIdentity, updated_pool: &Pool| { + match ByteCount::try_from(updated_pool.info.size()) { + Ok(size) => zpool_puts.push(( + updated_pool.clone(), + ZpoolPutRequest { + size: size.into(), + disk_model: disk_id.model.clone(), + disk_serial: disk_id.serial.clone(), + disk_vendor: disk_id.vendor.clone(), + }, + )), + Err(err) => { + error!( + log, + "Error parsing pool size"; + "name" => updated_pool.name.to_string(), + "err" => ?err); + } + } + }; + + // Diff the existing resources with the update to see what has changed + // This loop finds disks and pools that were modified or deleted + for (disk_id, (disk, pool)) in current.disks().iter() { + match updated.disks().get(disk_id) { + Some((updated_disk, updated_pool)) => { + if disk != updated_disk { + disk_puts.push(PhysicalDiskPutRequest { + sled_id: *sled_id, + model: disk_id.model.clone(), + serial: disk_id.serial.clone(), + vendor: disk_id.vendor.clone(), + variant: updated_disk.variant().convert(), + }); + } + if pool != updated_pool { + put_pool(disk_id, updated_pool); + } + } + None => disk_deletes.push(PhysicalDiskDeleteRequest { + model: disk_id.model.clone(), + serial: disk_id.serial.clone(), + vendor: disk_id.vendor.clone(), + sled_id: *sled_id, + }), + } + } + + // Diff the existing resources with the update to see what has changed + // This loop finds new disks and pools + for (disk_id, (updated_disk, updated_pool)) in updated.disks().iter() { + if !current.disks().contains_key(disk_id) { + disk_puts.push(PhysicalDiskPutRequest { + sled_id: *sled_id, + model: disk_id.model.clone(), + serial: disk_id.serial.clone(), + vendor: disk_id.vendor.clone(), + variant: updated_disk.variant().convert(), + }); + put_pool(disk_id, updated_pool); + } + } + + NexusUpdates { disk_puts, disk_deletes, zpool_puts } +} diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs index 91cb850df4..70b9da7708 100644 --- a/sled-agent/src/zone_bundle.rs +++ b/sled-agent/src/zone_bundle.rs @@ -6,7 +6,6 @@ //! Tools for collecting and inspecting service bundles for zones. -use crate::storage_manager::StorageResources; use anyhow::anyhow; use anyhow::Context; use camino::FromPathBufError; @@ -33,6 +32,8 @@ use illumos_utils::zone::AdmError; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; +use sled_storage::dataset::U2_DEBUG_DATASET; +use sled_storage::manager::StorageHandle; use slog::Logger; use std::cmp::Ord; use std::cmp::Ordering; @@ -221,20 +222,12 @@ pub struct ZoneBundler { inner: Arc>, // Channel for notifying the cleanup task that it should reevaluate. notify_cleanup: Arc, - // Tokio task handle running the period cleanup operation. - cleanup_task: Arc>, -} - -impl Drop for ZoneBundler { - fn drop(&mut self) { - self.cleanup_task.abort(); - } } // State shared between tasks, e.g., used when creating a bundle in different // tasks or between a creation and cleanup. struct Inner { - resources: StorageResources, + storage_handle: StorageHandle, cleanup_context: CleanupContext, last_cleanup_at: Instant, } @@ -262,7 +255,8 @@ impl Inner { // that can exist but do not, i.e., those whose parent datasets already // exist; and returns those. async fn bundle_directories(&self) -> Vec { - let expected = self.resources.all_zone_bundle_directories().await; + let resources = self.storage_handle.get_latest_resources().await; + let expected = resources.all_zone_bundle_directories(); let mut out = Vec::with_capacity(expected.len()); for each in expected.into_iter() { if tokio::fs::create_dir_all(&each).await.is_ok() { @@ -322,11 +316,11 @@ impl ZoneBundler { /// Create a new zone bundler. /// /// This creates an object that manages zone bundles on the system. It can - /// be used to create bundles from running zones, and runs a period task to - /// clean them up to free up space. + /// be used to create bundles from running zones, and runs a periodic task + /// to clean them up to free up space. pub fn new( log: Logger, - resources: StorageResources, + storage_handle: StorageHandle, cleanup_context: CleanupContext, ) -> Self { // This is compiled out in tests because there's no way to set our @@ -336,17 +330,19 @@ impl ZoneBundler { .expect("Failed to initialize existing ZFS resources"); let notify_cleanup = Arc::new(Notify::new()); let inner = Arc::new(Mutex::new(Inner { - resources, + storage_handle, cleanup_context, last_cleanup_at: Instant::now(), })); let cleanup_log = log.new(slog::o!("component" => "auto-cleanup-task")); let notify_clone = notify_cleanup.clone(); let inner_clone = inner.clone(); - let cleanup_task = Arc::new(tokio::task::spawn( - Self::periodic_cleanup(cleanup_log, inner_clone, notify_clone), + tokio::task::spawn(Self::periodic_cleanup( + cleanup_log, + inner_clone, + notify_clone, )); - Self { log, inner, notify_cleanup, cleanup_task } + Self { log, inner, notify_cleanup } } /// Trigger an immediate cleanup of low-priority zone bundles. @@ -431,10 +427,9 @@ impl ZoneBundler { ) -> Result { let inner = self.inner.lock().await; let storage_dirs = inner.bundle_directories().await; - let extra_log_dirs = inner - .resources - .all_u2_mountpoints(sled_hardware::disk::U2_DEBUG_DATASET) - .await + let resources = inner.storage_handle.get_latest_resources().await; + let extra_log_dirs = resources + .all_u2_mountpoints(U2_DEBUG_DATASET) .into_iter() .collect(); let context = ZoneBundleContext { cause, storage_dirs, extra_log_dirs }; @@ -2165,7 +2160,6 @@ mod illumos_tests { use super::CleanupPeriod; use super::PriorityOrder; use super::StorageLimit; - use super::StorageResources; use super::Utf8Path; use super::Utf8PathBuf; use super::Uuid; @@ -2178,6 +2172,10 @@ mod illumos_tests { use anyhow::Context; use chrono::TimeZone; use chrono::Utc; + use illumos_utils::zpool::ZpoolName; + use sled_storage::disk::RawDisk; + use sled_storage::disk::SyntheticDisk; + use sled_storage::manager::{FakeStorageManager, StorageHandle}; use slog::Drain; use slog::Logger; use tokio::process::Command; @@ -2219,22 +2217,43 @@ mod illumos_tests { // system, that creates the directories implied by the `StorageResources` // expected disk structure. struct ResourceWrapper { - resources: StorageResources, + storage_handle: StorageHandle, dirs: Vec, } + async fn setup_storage() -> StorageHandle { + let (manager, handle) = FakeStorageManager::new(); + + // Spawn the storage manager as done by sled-agent + tokio::spawn(async move { + manager.run().await; + }); + + // These must be internal zpools + for _ in 0..2 { + let internal_zpool_name = ZpoolName::new_internal(Uuid::new_v4()); + let internal_disk: RawDisk = + SyntheticDisk::new(internal_zpool_name.clone()).into(); + handle.upsert_disk(internal_disk).await; + } + handle + } + impl ResourceWrapper { // Create new storage resources, and mount fake datasets at the required // locations. async fn new() -> Self { - let resources = StorageResources::new_for_test(); - let dirs = resources.all_zone_bundle_directories().await; + // Spawn the storage related tasks required for testing and insert + // synthetic disks. + let storage_handle = setup_storage().await; + let resources = storage_handle.get_latest_resources().await; + let dirs = resources.all_zone_bundle_directories(); for d in dirs.iter() { let id = d.components().nth(3).unwrap().as_str().parse().unwrap(); create_test_dataset(&id, d).await.unwrap(); } - Self { resources, dirs } + Self { storage_handle, dirs } } } @@ -2261,8 +2280,11 @@ mod illumos_tests { let log = test_logger(); let context = CleanupContext::default(); let resource_wrapper = ResourceWrapper::new().await; - let bundler = - ZoneBundler::new(log, resource_wrapper.resources.clone(), context); + let bundler = ZoneBundler::new( + log, + resource_wrapper.storage_handle.clone(), + context, + ); Ok(CleanupTestContext { resource_wrapper, context, bundler }) } diff --git a/sled-hardware/Cargo.toml b/sled-hardware/Cargo.toml index 14ae15996b..36ba633067 100644 --- a/sled-hardware/Cargo.toml +++ b/sled-hardware/Cargo.toml @@ -11,10 +11,8 @@ camino.workspace = true cfg-if.workspace = true futures.workspace = true illumos-utils.workspace = true -key-manager.workspace = true libc.workspace = true macaddr.workspace = true -nexus-client.workspace = true omicron-common.workspace = true rand.workspace = true schemars.workspace = true diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs index e3078cbeea..44658658be 100644 --- a/sled-hardware/src/disk.rs +++ b/sled-hardware/src/disk.rs @@ -4,34 +4,14 @@ use camino::{Utf8Path, Utf8PathBuf}; use illumos_utils::fstyp::Fstyp; -use illumos_utils::zfs; -use illumos_utils::zfs::DestroyDatasetErrorVariant; -use illumos_utils::zfs::EncryptionDetails; -use illumos_utils::zfs::Keypath; -use illumos_utils::zfs::Mountpoint; -use illumos_utils::zfs::SizeDetails; -use illumos_utils::zfs::Zfs; use illumos_utils::zpool::Zpool; use illumos_utils::zpool::ZpoolKind; use illumos_utils::zpool::ZpoolName; -use key_manager::StorageKeyRequester; use omicron_common::disk::DiskIdentity; -use rand::distributions::{Alphanumeric, DistString}; use slog::Logger; use slog::{info, warn}; -use std::sync::OnceLock; -use tokio::fs::{remove_file, File}; -use tokio::io::{AsyncSeekExt, AsyncWriteExt, SeekFrom}; use uuid::Uuid; -/// This path is intentionally on a `tmpfs` to prevent copy-on-write behavior -/// and to ensure it goes away on power off. -/// -/// We want minimize the time the key files are in memory, and so we rederive -/// the keys and recreate the files on demand when creating and mounting -/// encrypted filesystems. We then zero them and unlink them. -pub const KEYPATH_ROOT: &str = "/var/run/oxide/"; - cfg_if::cfg_if! { if #[cfg(target_os = "illumos")] { use crate::illumos::*; @@ -41,7 +21,7 @@ cfg_if::cfg_if! { } #[derive(Debug, thiserror::Error)] -pub enum DiskError { +pub enum PooledDiskError { #[error("Cannot open {path} due to {error}")] IoError { path: Utf8PathBuf, error: std::io::Error }, #[error("Failed to open partition at {path} due to {error}")] @@ -51,10 +31,6 @@ pub enum DiskError { #[error("Requested partition {partition:?} not found on device {path}")] NotFound { path: Utf8PathBuf, partition: Partition }, #[error(transparent)] - DestroyFilesystem(#[from] illumos_utils::zfs::DestroyDatasetError), - #[error(transparent)] - EnsureFilesystem(#[from] illumos_utils::zfs::EnsureFilesystemError), - #[error(transparent)] ZpoolCreate(#[from] illumos_utils::zpool::CreateError), #[error("Cannot import zpool: {0}")] ZpoolImport(illumos_utils::zpool::Error), @@ -62,18 +38,6 @@ pub enum DiskError { CannotFormatMissingDevPath { path: Utf8PathBuf }, #[error("Formatting M.2 devices is not yet implemented")] CannotFormatM2NotImplemented, - #[error("KeyManager error: {0}")] - KeyManager(#[from] key_manager::Error), - #[error("Missing StorageKeyRequester when creating U.2 disk")] - MissingStorageKeyRequester, - #[error("Encrypted filesystem '{0}' missing 'oxide:epoch' property")] - CannotParseEpochProperty(String), - #[error("Encrypted dataset '{dataset}' cannot set 'oxide:agent' property: {err}")] - CannotSetAgentProperty { - dataset: String, - #[source] - err: Box, - }, } /// A partition (or 'slice') of a disk. @@ -126,17 +90,17 @@ impl DiskPaths { } // Finds the first 'variant' partition, and returns the path to it. - fn partition_device_path( + pub fn partition_device_path( &self, partitions: &[Partition], expected_partition: Partition, raw: bool, - ) -> Result { + ) -> Result { for (index, partition) in partitions.iter().enumerate() { if &expected_partition == partition { let path = self.partition_path(index, raw).ok_or_else(|| { - DiskError::NotFound { + PooledDiskError::NotFound { path: self.devfs_path.clone(), partition: expected_partition, } @@ -144,7 +108,7 @@ impl DiskPaths { return Ok(path); } } - Err(DiskError::NotFound { + Err(PooledDiskError::NotFound { path: self.devfs_path.clone(), partition: expected_partition, }) @@ -154,9 +118,9 @@ impl DiskPaths { /// A disk which has been observed by monitoring hardware. /// /// No guarantees are made about the partitions which exist within this disk. -/// This exists as a distinct entity from [Disk] because it may be desirable to -/// monitor for hardware in one context, and conform disks to partition layouts -/// in a different context. +/// This exists as a distinct entity from `Disk` in `sled-storage` because it +/// may be desirable to monitor for hardware in one context, and conform disks +/// to partition layouts in a different context. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct UnparsedDisk { paths: DiskPaths, @@ -202,127 +166,34 @@ impl UnparsedDisk { } } -/// A physical disk conforming to the expected partition layout. +/// A physical disk that is partitioned to contain exactly one zpool +/// +/// A PooledDisk relies on hardware specific information to be constructed +/// and is the highest level disk structure in the `sled-hardware` package. +/// The `sled-storage` package contains `Disk`s whose zpool and datasets can be +/// manipulated. This separation exists to remove the hardware dependent logic +/// from the ZFS related logic which can also operate on file backed zpools. +/// Doing things this way allows us to not put higher level concepts like +/// storage keys into this hardware related package. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Disk { - paths: DiskPaths, - slot: i64, - variant: DiskVariant, - identity: DiskIdentity, - is_boot_disk: bool, - partitions: Vec, - +pub struct PooledDisk { + pub paths: DiskPaths, + pub slot: i64, + pub variant: DiskVariant, + pub identity: DiskIdentity, + pub is_boot_disk: bool, + pub partitions: Vec, // This embeds the assumtion that there is exactly one parsed zpool per // disk. - zpool_name: ZpoolName, -} - -// Helper type for describing expected datasets and their optional quota. -#[derive(Clone, Copy, Debug)] -struct ExpectedDataset { - // Name for the dataset - name: &'static str, - // Optional quota, in _bytes_ - quota: Option, - // Identifies if the dataset should be deleted on boot - wipe: bool, - // Optional compression mode - compression: Option<&'static str>, + pub zpool_name: ZpoolName, } -impl ExpectedDataset { - const fn new(name: &'static str) -> Self { - ExpectedDataset { name, quota: None, wipe: false, compression: None } - } - - const fn quota(mut self, quota: usize) -> Self { - self.quota = Some(quota); - self - } - - const fn wipe(mut self) -> Self { - self.wipe = true; - self - } - - const fn compression(mut self, compression: &'static str) -> Self { - self.compression = Some(compression); - self - } -} - -pub const INSTALL_DATASET: &'static str = "install"; -pub const CRASH_DATASET: &'static str = "crash"; -pub const CLUSTER_DATASET: &'static str = "cluster"; -pub const CONFIG_DATASET: &'static str = "config"; -pub const M2_DEBUG_DATASET: &'static str = "debug"; -pub const M2_BACKING_DATASET: &'static str = "backing"; -// TODO-correctness: This value of 100GiB is a pretty wild guess, and should be -// tuned as needed. -pub const DEBUG_DATASET_QUOTA: usize = 100 * (1 << 30); -// ditto. -pub const DUMP_DATASET_QUOTA: usize = 100 * (1 << 30); -// passed to zfs create -o compression= -pub const DUMP_DATASET_COMPRESSION: &'static str = "gzip-9"; - -// U.2 datasets live under the encrypted dataset and inherit encryption -pub const ZONE_DATASET: &'static str = "crypt/zone"; -pub const DUMP_DATASET: &'static str = "crypt/debug"; -pub const U2_DEBUG_DATASET: &'static str = "crypt/debug"; - -// This is the root dataset for all U.2 drives. Encryption is inherited. -pub const CRYPT_DATASET: &'static str = "crypt"; - -const U2_EXPECTED_DATASET_COUNT: usize = 2; -static U2_EXPECTED_DATASETS: [ExpectedDataset; U2_EXPECTED_DATASET_COUNT] = [ - // Stores filesystems for zones - ExpectedDataset::new(ZONE_DATASET).wipe(), - // For storing full kernel RAM dumps - ExpectedDataset::new(DUMP_DATASET) - .quota(DUMP_DATASET_QUOTA) - .compression(DUMP_DATASET_COMPRESSION), -]; - -const M2_EXPECTED_DATASET_COUNT: usize = 6; -static M2_EXPECTED_DATASETS: [ExpectedDataset; M2_EXPECTED_DATASET_COUNT] = [ - // Stores software images. - // - // Should be duplicated to both M.2s. - ExpectedDataset::new(INSTALL_DATASET), - // Stores crash dumps. - ExpectedDataset::new(CRASH_DATASET), - // Backing store for OS data that should be persisted across reboots. - // Its children are selectively overlay mounted onto parts of the ramdisk - // root. - ExpectedDataset::new(M2_BACKING_DATASET), - // Stores cluster configuration information. - // - // Should be duplicated to both M.2s. - ExpectedDataset::new(CLUSTER_DATASET), - // Stores configuration data, including: - // - What services should be launched on this sled - // - Information about how to initialize the Sled Agent - // - (For scrimlets) RSS setup information - // - // Should be duplicated to both M.2s. - ExpectedDataset::new(CONFIG_DATASET), - // Store debugging data, such as service bundles. - ExpectedDataset::new(M2_DEBUG_DATASET).quota(DEBUG_DATASET_QUOTA), -]; - -impl Disk { - /// Create a new Disk - /// - /// WARNING: In all cases where a U.2 is a possible `DiskVariant`, a - /// `StorageKeyRequester` must be passed so that disk encryption can - /// be used. The `StorageManager` for the sled-agent always has a - /// `StorageKeyRequester` available, and so the only place we should pass - /// `None` is for the M.2s touched by the Installinator. - pub async fn new( +impl PooledDisk { + /// Create a new PooledDisk + pub fn new( log: &Logger, unparsed_disk: UnparsedDisk, - key_requester: Option<&StorageKeyRequester>, - ) -> Result { + ) -> Result { let paths = &unparsed_disk.paths; let variant = unparsed_disk.variant; // Ensure the GPT has the right format. This does not necessarily @@ -340,13 +211,8 @@ impl Disk { )?; let zpool_name = Self::ensure_zpool_exists(log, variant, &zpool_path)?; - Self::ensure_zpool_ready( - log, - &zpool_name, - &unparsed_disk.identity, - key_requester, - ) - .await?; + Self::ensure_zpool_imported(log, &zpool_name)?; + Self::ensure_zpool_failmode_is_continue(log, &zpool_name)?; Ok(Self { paths: unparsed_disk.paths, @@ -359,29 +225,11 @@ impl Disk { }) } - pub async fn ensure_zpool_ready( - log: &Logger, - zpool_name: &ZpoolName, - disk_identity: &DiskIdentity, - key_requester: Option<&StorageKeyRequester>, - ) -> Result<(), DiskError> { - Self::ensure_zpool_imported(log, &zpool_name)?; - Self::ensure_zpool_failmode_is_continue(log, &zpool_name)?; - Self::ensure_zpool_has_datasets( - log, - &zpool_name, - disk_identity, - key_requester, - ) - .await?; - Ok(()) - } - fn ensure_zpool_exists( log: &Logger, variant: DiskVariant, zpool_path: &Utf8Path, - ) -> Result { + ) -> Result { let zpool_name = match Fstyp::get_zpool(&zpool_path) { Ok(zpool_name) => zpool_name, Err(_) => { @@ -406,13 +254,13 @@ impl Disk { DiskVariant::M2 => ZpoolName::new_internal(Uuid::new_v4()), DiskVariant::U2 => ZpoolName::new_external(Uuid::new_v4()), }; - Zpool::create(zpool_name.clone(), &zpool_path)?; + Zpool::create(&zpool_name, &zpool_path)?; zpool_name } }; - Zpool::import(zpool_name.clone()).map_err(|e| { + Zpool::import(&zpool_name).map_err(|e| { warn!(log, "Failed to import zpool {zpool_name}: {e}"); - DiskError::ZpoolImport(e) + PooledDiskError::ZpoolImport(e) })?; Ok(zpool_name) @@ -421,10 +269,10 @@ impl Disk { fn ensure_zpool_imported( log: &Logger, zpool_name: &ZpoolName, - ) -> Result<(), DiskError> { - Zpool::import(zpool_name.clone()).map_err(|e| { + ) -> Result<(), PooledDiskError> { + Zpool::import(&zpool_name).map_err(|e| { warn!(log, "Failed to import zpool {zpool_name}: {e}"); - DiskError::ZpoolImport(e) + PooledDiskError::ZpoolImport(e) })?; Ok(()) } @@ -432,7 +280,7 @@ impl Disk { fn ensure_zpool_failmode_is_continue( log: &Logger, zpool_name: &ZpoolName, - ) -> Result<(), DiskError> { + ) -> Result<(), PooledDiskError> { // Ensure failmode is set to `continue`. See // https://github.com/oxidecomputer/omicron/issues/2766 for details. The // short version is, each pool is only backed by one vdev. There is no @@ -445,214 +293,10 @@ impl Disk { log, "Failed to set failmode=continue on zpool {zpool_name}: {e}" ); - DiskError::ZpoolImport(e) + PooledDiskError::ZpoolImport(e) })?; Ok(()) } - - // Ensure that the zpool contains all the datasets we would like it to - // contain. - async fn ensure_zpool_has_datasets( - log: &Logger, - zpool_name: &ZpoolName, - disk_identity: &DiskIdentity, - key_requester: Option<&StorageKeyRequester>, - ) -> Result<(), DiskError> { - let (root, datasets) = match zpool_name.kind().into() { - DiskVariant::M2 => (None, M2_EXPECTED_DATASETS.iter()), - DiskVariant::U2 => { - (Some(CRYPT_DATASET), U2_EXPECTED_DATASETS.iter()) - } - }; - - let zoned = false; - let do_format = true; - - // Ensure the root encrypted filesystem exists - // Datasets below this in the hierarchy will inherit encryption - if let Some(dataset) = root { - let Some(key_requester) = key_requester else { - return Err(DiskError::MissingStorageKeyRequester); - }; - let mountpoint = zpool_name.dataset_mountpoint(dataset); - let keypath: Keypath = disk_identity.into(); - - let epoch = - if let Ok(epoch_str) = Zfs::get_oxide_value(dataset, "epoch") { - if let Ok(epoch) = epoch_str.parse::() { - epoch - } else { - return Err(DiskError::CannotParseEpochProperty( - dataset.to_string(), - )); - } - } else { - // We got an error trying to call `Zfs::get_oxide_value` - // which indicates that the dataset doesn't exist or there - // was a problem running the command. - // - // Note that `Zfs::get_oxide_value` will succeed even if - // the epoch is missing. `epoch_str` will show up as a dash - // (`-`) and will not parse into a `u64`. So we don't have - // to worry about that case here as it is handled above. - // - // If the error indicated that the command failed for some - // other reason, but the dataset actually existed, we will - // try to create the dataset below and that will fail. So - // there is no harm in just loading the latest secret here. - key_requester.load_latest_secret().await? - }; - - let key = - key_requester.get_key(epoch, disk_identity.clone()).await?; - - let mut keyfile = - KeyFile::create(keypath.clone(), key.expose_secret(), log) - .await - .map_err(|error| DiskError::IoError { - path: keypath.0.clone(), - error, - })?; - - let encryption_details = EncryptionDetails { keypath, epoch }; - - info!( - log, - "Ensuring encrypted filesystem: {} for epoch {}", - dataset, - epoch - ); - let result = Zfs::ensure_filesystem( - &format!("{}/{}", zpool_name, dataset), - Mountpoint::Path(mountpoint), - zoned, - do_format, - Some(encryption_details), - None, - None, - ); - - keyfile.zero_and_unlink().await.map_err(|error| { - DiskError::IoError { path: keyfile.path().0.clone(), error } - })?; - - result?; - }; - - for dataset in datasets.into_iter() { - let mountpoint = zpool_name.dataset_mountpoint(dataset.name); - let name = &format!("{}/{}", zpool_name, dataset.name); - - // Use a value that's alive for the duration of this sled agent - // to answer the question: should we wipe this disk, or have - // we seen it before? - // - // If this value comes from a prior iteration of the sled agent, - // we opt to remove the corresponding dataset. - static AGENT_LOCAL_VALUE: OnceLock = OnceLock::new(); - let agent_local_value = AGENT_LOCAL_VALUE.get_or_init(|| { - Alphanumeric.sample_string(&mut rand::thread_rng(), 20) - }); - - if dataset.wipe { - match Zfs::get_oxide_value(name, "agent") { - Ok(v) if &v == agent_local_value => { - info!( - log, - "Skipping automatic wipe for dataset: {}", name - ); - } - Ok(_) | Err(_) => { - info!( - log, - "Automatically destroying dataset: {}", name - ); - Zfs::destroy_dataset(name).or_else(|err| { - // If we can't find the dataset, that's fine -- it - // might not have been formatted yet. - if let DestroyDatasetErrorVariant::NotFound = - err.err - { - Ok(()) - } else { - Err(err) - } - })?; - } - } - } - - let encryption_details = None; - let size_details = Some(SizeDetails { - quota: dataset.quota, - compression: dataset.compression, - }); - Zfs::ensure_filesystem( - name, - Mountpoint::Path(mountpoint), - zoned, - do_format, - encryption_details, - size_details, - None, - )?; - - if dataset.wipe { - Zfs::set_oxide_value(name, "agent", agent_local_value) - .map_err(|err| DiskError::CannotSetAgentProperty { - dataset: name.clone(), - err: Box::new(err), - })?; - } - } - Ok(()) - } - - pub fn is_boot_disk(&self) -> bool { - self.is_boot_disk - } - - pub fn identity(&self) -> &DiskIdentity { - &self.identity - } - - pub fn variant(&self) -> DiskVariant { - self.variant - } - - pub fn devfs_path(&self) -> &Utf8PathBuf { - &self.paths.devfs_path - } - - pub fn zpool_name(&self) -> &ZpoolName { - &self.zpool_name - } - - pub fn boot_image_devfs_path( - &self, - raw: bool, - ) -> Result { - self.paths.partition_device_path( - &self.partitions, - Partition::BootImage, - raw, - ) - } - - pub fn dump_device_devfs_path( - &self, - raw: bool, - ) -> Result { - self.paths.partition_device_path( - &self.partitions, - Partition::DumpDevice, - raw, - ) - } - - pub fn slot(&self) -> i64 { - self.slot - } } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -671,56 +315,6 @@ impl From for DiskVariant { } } -/// A file that wraps a zfs encryption key. -/// -/// We put this in a RAM backed filesystem and zero and delete it when we are -/// done with it. Unfortunately we cannot do this inside `Drop` because there is no -/// equivalent async drop. -pub struct KeyFile { - path: Keypath, - file: File, - log: Logger, -} - -impl KeyFile { - pub async fn create( - path: Keypath, - key: &[u8; 32], - log: &Logger, - ) -> std::io::Result { - // TODO: fix this to not truncate - // We want to overwrite any existing contents. - // If we truncate we may leave dirty pages around - // containing secrets. - let mut file = tokio::fs::OpenOptions::new() - .create(true) - .write(true) - .open(&path.0) - .await?; - file.write_all(key).await?; - info!(log, "Created keyfile {}", path); - Ok(KeyFile { path, file, log: log.clone() }) - } - - /// These keyfiles live on a tmpfs and we zero the file so the data doesn't - /// linger on the page in memory. - /// - /// It'd be nice to `impl Drop for `KeyFile` and then call `zero` - /// from within the drop handler, but async `Drop` isn't supported. - pub async fn zero_and_unlink(&mut self) -> std::io::Result<()> { - let zeroes = [0u8; 32]; - let _ = self.file.seek(SeekFrom::Start(0)).await?; - self.file.write_all(&zeroes).await?; - info!(self.log, "Zeroed and unlinked keyfile {}", self.path); - remove_file(&self.path().0).await?; - Ok(()) - } - - pub fn path(&self) -> &Keypath { - &self.path - } -} - #[cfg(test)] mod test { use super::*; @@ -832,7 +426,7 @@ mod test { paths .partition_device_path(&[], Partition::ZfsPool, false) .expect_err("Should not have found partition"), - DiskError::NotFound { .. }, + PooledDiskError::NotFound { .. }, )); } } diff --git a/sled-hardware/src/illumos/mod.rs b/sled-hardware/src/illumos/mod.rs index c0145b75e8..19111c6cda 100644 --- a/sled-hardware/src/illumos/mod.rs +++ b/sled-hardware/src/illumos/mod.rs @@ -19,7 +19,6 @@ use std::collections::{HashMap, HashSet}; use std::sync::Arc; use std::sync::Mutex; use tokio::sync::broadcast; -use tokio::task::JoinHandle; use uuid::Uuid; mod gpt; @@ -589,11 +588,11 @@ async fn hardware_tracking_task( /// /// This structure provides interfaces for both querying and for receiving new /// events. +#[derive(Clone)] pub struct HardwareManager { log: Logger, inner: Arc>, tx: broadcast::Sender, - _worker: JoinHandle<()>, } impl HardwareManager { @@ -663,11 +662,11 @@ impl HardwareManager { let log2 = log.clone(); let inner2 = inner.clone(); let tx2 = tx.clone(); - let _worker = tokio::task::spawn(async move { + tokio::task::spawn(async move { hardware_tracking_task(log2, inner2, tx2).await }); - Ok(Self { log, inner, tx, _worker }) + Ok(Self { log, inner, tx }) } pub fn baseboard(&self) -> Baseboard { diff --git a/sled-hardware/src/illumos/partitions.rs b/sled-hardware/src/illumos/partitions.rs index 950074bd3a..4b7e69057d 100644 --- a/sled-hardware/src/illumos/partitions.rs +++ b/sled-hardware/src/illumos/partitions.rs @@ -5,7 +5,7 @@ //! illumos-specific mechanisms for parsing disk info. use crate::illumos::gpt; -use crate::{DiskError, DiskPaths, DiskVariant, Partition}; +use crate::{DiskPaths, DiskVariant, Partition, PooledDiskError}; use camino::Utf8Path; use illumos_utils::zpool::ZpoolName; use slog::info; @@ -41,9 +41,9 @@ fn parse_partition_types( path: &Utf8Path, partitions: &Vec, expected_partitions: &[Partition; N], -) -> Result, DiskError> { +) -> Result, PooledDiskError> { if partitions.len() != N { - return Err(DiskError::BadPartitionLayout { + return Err(PooledDiskError::BadPartitionLayout { path: path.to_path_buf(), why: format!( "Expected {} partitions, only saw {}", @@ -54,7 +54,7 @@ fn parse_partition_types( } for i in 0..N { if partitions[i].index() != i { - return Err(DiskError::BadPartitionLayout { + return Err(PooledDiskError::BadPartitionLayout { path: path.to_path_buf(), why: format!( "The {i}-th partition has index {}", @@ -80,7 +80,7 @@ pub fn ensure_partition_layout( log: &Logger, paths: &DiskPaths, variant: DiskVariant, -) -> Result, DiskError> { +) -> Result, PooledDiskError> { internal_ensure_partition_layout::(log, paths, variant) } @@ -90,7 +90,7 @@ fn internal_ensure_partition_layout( log: &Logger, paths: &DiskPaths, variant: DiskVariant, -) -> Result, DiskError> { +) -> Result, PooledDiskError> { // Open the "Whole Disk" as a raw device to be parsed by the // libefi-illumos library. This lets us peek at the GPT before // making too many assumptions about it. @@ -114,14 +114,16 @@ fn internal_ensure_partition_layout( let dev_path = if let Some(dev_path) = &paths.dev_path { dev_path } else { - return Err(DiskError::CannotFormatMissingDevPath { path }); + return Err(PooledDiskError::CannotFormatMissingDevPath { + path, + }); }; match variant { DiskVariant::U2 => { info!(log, "Formatting zpool on disk {}", paths.devfs_path); // If a zpool does not already exist, create one. let zpool_name = ZpoolName::new_external(Uuid::new_v4()); - Zpool::create(zpool_name, dev_path)?; + Zpool::create(&zpool_name, dev_path)?; return Ok(vec![Partition::ZfsPool]); } DiskVariant::M2 => { @@ -129,12 +131,12 @@ fn internal_ensure_partition_layout( // the expected partitions? Or would it be wiser to infer // that this indicates an unexpected error conditions that // needs mitigation? - return Err(DiskError::CannotFormatM2NotImplemented); + return Err(PooledDiskError::CannotFormatM2NotImplemented); } } } Err(err) => { - return Err(DiskError::Gpt { + return Err(PooledDiskError::Gpt { path, error: anyhow::Error::new(err), }); @@ -197,7 +199,7 @@ mod test { DiskVariant::U2, ); match result { - Err(DiskError::CannotFormatMissingDevPath { .. }) => {} + Err(PooledDiskError::CannotFormatMissingDevPath { .. }) => {} _ => panic!("Should have failed with a missing dev path error"), } @@ -373,7 +375,7 @@ mod test { DiskVariant::M2, ) .expect_err("Should have failed parsing empty GPT"), - DiskError::BadPartitionLayout { .. } + PooledDiskError::BadPartitionLayout { .. } )); logctx.cleanup_successful(); @@ -398,7 +400,7 @@ mod test { DiskVariant::U2, ) .expect_err("Should have failed parsing empty GPT"), - DiskError::BadPartitionLayout { .. } + PooledDiskError::BadPartitionLayout { .. } )); logctx.cleanup_successful(); diff --git a/sled-hardware/src/lib.rs b/sled-hardware/src/lib.rs index 654dfd59d9..2e3fd4a576 100644 --- a/sled-hardware/src/lib.rs +++ b/sled-hardware/src/lib.rs @@ -163,13 +163,3 @@ impl std::fmt::Display for Baseboard { } } } - -impl From for nexus_client::types::Baseboard { - fn from(b: Baseboard) -> nexus_client::types::Baseboard { - nexus_client::types::Baseboard { - serial_number: b.identifier().to_string(), - part_number: b.model().to_string(), - revision: b.revision(), - } - } -} diff --git a/sled-hardware/src/non_illumos/mod.rs b/sled-hardware/src/non_illumos/mod.rs index 6e36330df0..d8372dd8aa 100644 --- a/sled-hardware/src/non_illumos/mod.rs +++ b/sled-hardware/src/non_illumos/mod.rs @@ -2,7 +2,9 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use crate::disk::{DiskError, DiskPaths, DiskVariant, Partition, UnparsedDisk}; +use crate::disk::{ + DiskPaths, DiskVariant, Partition, PooledDiskError, UnparsedDisk, +}; use crate::{Baseboard, SledMode}; use slog::Logger; use std::collections::HashSet; @@ -16,6 +18,7 @@ use tokio::sync::broadcast; /// /// If you're actually trying to run the Sled Agent on non-illumos platforms, /// use the simulated sled agent, which does not attempt to abstract hardware. +#[derive(Clone)] pub struct HardwareManager {} impl HardwareManager { @@ -56,7 +59,7 @@ pub fn ensure_partition_layout( _log: &Logger, _paths: &DiskPaths, _variant: DiskVariant, -) -> Result, DiskError> { +) -> Result, PooledDiskError> { unimplemented!("Accessing hardware unsupported on non-illumos"); } diff --git a/sled-storage/Cargo.toml b/sled-storage/Cargo.toml new file mode 100644 index 0000000000..cb3a790631 --- /dev/null +++ b/sled-storage/Cargo.toml @@ -0,0 +1,33 @@ +[package] +name = "sled-storage" +version = "0.1.0" +edition = "2021" + +[dependencies] +async-trait.workspace = true +camino.workspace = true +cfg-if.workspace = true +derive_more.workspace = true +glob.workspace = true +illumos-utils.workspace = true +key-manager.workspace = true +omicron-common.workspace = true +rand.workspace = true +schemars = { workspace = true, features = [ "chrono", "uuid1" ] } +serde.workspace = true +serde_json.workspace = true +sled-hardware.workspace = true +slog.workspace = true +thiserror.workspace = true +tokio.workspace = true +uuid.workspace = true +omicron-workspace-hack.workspace = true + +[dev-dependencies] +illumos-utils = { workspace = true, features = ["tmp_keypath", "testing"] } +omicron-test-utils.workspace = true +camino-tempfile.workspace = true + +[features] +# Quotas and the like can be shrunk via this feature +testing = [] diff --git a/sled-storage/src/dataset.rs b/sled-storage/src/dataset.rs new file mode 100644 index 0000000000..a2878af7f6 --- /dev/null +++ b/sled-storage/src/dataset.rs @@ -0,0 +1,379 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! ZFS dataset related functionality + +use crate::keyfile::KeyFile; +use camino::Utf8PathBuf; +use cfg_if::cfg_if; +use illumos_utils::zfs::{ + self, DestroyDatasetErrorVariant, EncryptionDetails, Keypath, Mountpoint, + SizeDetails, Zfs, +}; +use illumos_utils::zpool::ZpoolName; +use key_manager::StorageKeyRequester; +use omicron_common::disk::DiskIdentity; +use rand::distributions::{Alphanumeric, DistString}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use sled_hardware::DiskVariant; +use slog::{info, Logger}; +use std::sync::OnceLock; + +pub const INSTALL_DATASET: &'static str = "install"; +pub const CRASH_DATASET: &'static str = "crash"; +pub const CLUSTER_DATASET: &'static str = "cluster"; +pub const CONFIG_DATASET: &'static str = "config"; +pub const M2_DEBUG_DATASET: &'static str = "debug"; +pub const M2_BACKING_DATASET: &'static str = "backing"; + +cfg_if! { + if #[cfg(any(test, feature = "testing"))] { + // Tuned for zone_bundle tests + pub const DEBUG_DATASET_QUOTA: usize = 100 * (1 << 10); + } else { + // TODO-correctness: This value of 100GiB is a pretty wild guess, and should be + // tuned as needed. + pub const DEBUG_DATASET_QUOTA: usize = 100 * (1 << 30); + } +} +// TODO-correctness: This value of 100GiB is a pretty wild guess, and should be +// tuned as needed. +pub const DUMP_DATASET_QUOTA: usize = 100 * (1 << 30); +// passed to zfs create -o compression= +pub const DUMP_DATASET_COMPRESSION: &'static str = "gzip-9"; + +// U.2 datasets live under the encrypted dataset and inherit encryption +pub const ZONE_DATASET: &'static str = "crypt/zone"; +pub const DUMP_DATASET: &'static str = "crypt/debug"; +pub const U2_DEBUG_DATASET: &'static str = "crypt/debug"; + +// This is the root dataset for all U.2 drives. Encryption is inherited. +pub const CRYPT_DATASET: &'static str = "crypt"; + +const U2_EXPECTED_DATASET_COUNT: usize = 2; +static U2_EXPECTED_DATASETS: [ExpectedDataset; U2_EXPECTED_DATASET_COUNT] = [ + // Stores filesystems for zones + ExpectedDataset::new(ZONE_DATASET).wipe(), + // For storing full kernel RAM dumps + ExpectedDataset::new(DUMP_DATASET) + .quota(DUMP_DATASET_QUOTA) + .compression(DUMP_DATASET_COMPRESSION), +]; + +const M2_EXPECTED_DATASET_COUNT: usize = 6; +static M2_EXPECTED_DATASETS: [ExpectedDataset; M2_EXPECTED_DATASET_COUNT] = [ + // Stores software images. + // + // Should be duplicated to both M.2s. + ExpectedDataset::new(INSTALL_DATASET), + // Stores crash dumps. + ExpectedDataset::new(CRASH_DATASET), + // Backing store for OS data that should be persisted across reboots. + // Its children are selectively overlay mounted onto parts of the ramdisk + // root. + ExpectedDataset::new(M2_BACKING_DATASET), + // Stores cluter configuration information. + // + // Should be duplicated to both M.2s. + ExpectedDataset::new(CLUSTER_DATASET), + // Stores configuration data, including: + // - What services should be launched on this sled + // - Information about how to initialize the Sled Agent + // - (For scrimlets) RSS setup information + // + // Should be duplicated to both M.2s. + ExpectedDataset::new(CONFIG_DATASET), + // Store debugging data, such as service bundles. + ExpectedDataset::new(M2_DEBUG_DATASET).quota(DEBUG_DATASET_QUOTA), +]; + +// Helper type for describing expected datasets and their optional quota. +#[derive(Clone, Copy, Debug)] +struct ExpectedDataset { + // Name for the dataset + name: &'static str, + // Optional quota, in _bytes_ + quota: Option, + // Identifies if the dataset should be deleted on boot + wipe: bool, + // Optional compression mode + compression: Option<&'static str>, +} + +impl ExpectedDataset { + const fn new(name: &'static str) -> Self { + ExpectedDataset { name, quota: None, wipe: false, compression: None } + } + + const fn quota(mut self, quota: usize) -> Self { + self.quota = Some(quota); + self + } + + const fn wipe(mut self) -> Self { + self.wipe = true; + self + } + + const fn compression(mut self, compression: &'static str) -> Self { + self.compression = Some(compression); + self + } +} + +/// The type of a dataset, and an auxiliary information necessary +/// to successfully launch a zone managing the associated data. +#[derive( + Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum DatasetKind { + CockroachDb, + Crucible, + Clickhouse, + ClickhouseKeeper, + ExternalDns, + InternalDns, +} + +impl std::fmt::Display for DatasetKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use DatasetKind::*; + let s = match self { + Crucible => "crucible", + CockroachDb { .. } => "cockroachdb", + Clickhouse => "clickhouse", + ClickhouseKeeper => "clickhouse_keeper", + ExternalDns { .. } => "external_dns", + InternalDns { .. } => "internal_dns", + }; + write!(f, "{}", s) + } +} + +#[derive( + Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone, JsonSchema, +)] +pub struct DatasetName { + // A unique identifier for the Zpool on which the dataset is stored. + pool_name: ZpoolName, + // A name for the dataset within the Zpool. + kind: DatasetKind, +} + +impl DatasetName { + pub fn new(pool_name: ZpoolName, kind: DatasetKind) -> Self { + Self { pool_name, kind } + } + + pub fn pool(&self) -> &ZpoolName { + &self.pool_name + } + + pub fn dataset(&self) -> &DatasetKind { + &self.kind + } + + pub fn full(&self) -> String { + format!("{}/{}", self.pool_name, self.kind) + } +} + +#[derive(Debug, thiserror::Error)] +pub enum DatasetError { + #[error("Cannot open {path} due to {error}")] + IoError { path: Utf8PathBuf, error: std::io::Error }, + #[error(transparent)] + DestroyFilesystem(#[from] illumos_utils::zfs::DestroyDatasetError), + #[error(transparent)] + EnsureFilesystem(#[from] illumos_utils::zfs::EnsureFilesystemError), + #[error("KeyManager error: {0}")] + KeyManager(#[from] key_manager::Error), + #[error("Missing StorageKeyRequester when creating U.2 disk")] + MissingStorageKeyRequester, + #[error("Encrypted filesystem '{0}' missing 'oxide:epoch' property")] + CannotParseEpochProperty(String), + #[error("Encrypted dataset '{dataset}' cannot set 'oxide:agent' property: {err}")] + CannotSetAgentProperty { + dataset: String, + #[source] + err: Box, + }, +} + +/// Ensure that the zpool contains all the datasets we would like it to +/// contain. +/// +/// WARNING: In all cases where a U.2 is a possible `DiskVariant`, a +/// `StorageKeyRequester` must be passed so that disk encryption can +/// be used. The `StorageManager` for the sled-agent always has a +/// `StorageKeyRequester` available, and so the only place we should pass +/// `None` is for the M.2s touched by the Installinator. +pub(crate) async fn ensure_zpool_has_datasets( + log: &Logger, + zpool_name: &ZpoolName, + disk_identity: &DiskIdentity, + key_requester: Option<&StorageKeyRequester>, +) -> Result<(), DatasetError> { + let (root, datasets) = match zpool_name.kind().into() { + DiskVariant::M2 => (None, M2_EXPECTED_DATASETS.iter()), + DiskVariant::U2 => (Some(CRYPT_DATASET), U2_EXPECTED_DATASETS.iter()), + }; + + let zoned = false; + let do_format = true; + + // Ensure the root encrypted filesystem exists + // Datasets below this in the hierarchy will inherit encryption + if let Some(dataset) = root { + let Some(key_requester) = key_requester else { + return Err(DatasetError::MissingStorageKeyRequester); + }; + let mountpoint = zpool_name.dataset_mountpoint(dataset); + let keypath: Keypath = disk_identity.into(); + + let epoch = if let Ok(epoch_str) = + Zfs::get_oxide_value(dataset, "epoch") + { + if let Ok(epoch) = epoch_str.parse::() { + epoch + } else { + return Err(DatasetError::CannotParseEpochProperty( + dataset.to_string(), + )); + } + } else { + // We got an error trying to call `Zfs::get_oxide_value` + // which indicates that the dataset doesn't exist or there + // was a problem running the command. + // + // Note that `Zfs::get_oxide_value` will succeed even if + // the epoch is missing. `epoch_str` will show up as a dash + // (`-`) and will not parse into a `u64`. So we don't have + // to worry about that case here as it is handled above. + // + // If the error indicated that the command failed for some + // other reason, but the dataset actually existed, we will + // try to create the dataset below and that will fail. So + // there is no harm in just loading the latest secret here. + info!(log, "Loading latest secret"; "disk_id"=>#?disk_identity); + let epoch = key_requester.load_latest_secret().await?; + info!(log, "Loaded latest secret"; "epoch"=>%epoch, "disk_id"=>#?disk_identity); + epoch + }; + + info!(log, "Retrieving key"; "epoch"=>%epoch, "disk_id"=>#?disk_identity); + let key = key_requester.get_key(epoch, disk_identity.clone()).await?; + info!(log, "Got key"; "epoch"=>%epoch, "disk_id"=>#?disk_identity); + + let mut keyfile = + KeyFile::create(keypath.clone(), key.expose_secret(), log) + .await + .map_err(|error| DatasetError::IoError { + path: keypath.0.clone(), + error, + })?; + + let encryption_details = EncryptionDetails { keypath, epoch }; + + info!( + log, + "Ensuring encrypted filesystem: {} for epoch {}", dataset, epoch + ); + let result = Zfs::ensure_filesystem( + &format!("{}/{}", zpool_name, dataset), + Mountpoint::Path(mountpoint), + zoned, + do_format, + Some(encryption_details), + None, + None, + ); + + keyfile.zero_and_unlink().await.map_err(|error| { + DatasetError::IoError { path: keyfile.path().0.clone(), error } + })?; + + result?; + }; + + for dataset in datasets.into_iter() { + let mountpoint = zpool_name.dataset_mountpoint(dataset.name); + let name = &format!("{}/{}", zpool_name, dataset.name); + + // Use a value that's alive for the duration of this sled agent + // to answer the question: should we wipe this disk, or have + // we seen it before? + // + // If this value comes from a prior iteration of the sled agent, + // we opt to remove the corresponding dataset. + static AGENT_LOCAL_VALUE: OnceLock = OnceLock::new(); + let agent_local_value = AGENT_LOCAL_VALUE.get_or_init(|| { + Alphanumeric.sample_string(&mut rand::thread_rng(), 20) + }); + + if dataset.wipe { + match Zfs::get_oxide_value(name, "agent") { + Ok(v) if &v == agent_local_value => { + info!(log, "Skipping automatic wipe for dataset: {}", name); + } + Ok(_) | Err(_) => { + info!(log, "Automatically destroying dataset: {}", name); + Zfs::destroy_dataset(name).or_else(|err| { + // If we can't find the dataset, that's fine -- it might + // not have been formatted yet. + if matches!( + err.err, + DestroyDatasetErrorVariant::NotFound + ) { + Ok(()) + } else { + Err(err) + } + })?; + } + } + } + + let encryption_details = None; + let size_details = Some(SizeDetails { + quota: dataset.quota, + compression: dataset.compression, + }); + Zfs::ensure_filesystem( + name, + Mountpoint::Path(mountpoint), + zoned, + do_format, + encryption_details, + size_details, + None, + )?; + + if dataset.wipe { + Zfs::set_oxide_value(name, "agent", agent_local_value).map_err( + |err| DatasetError::CannotSetAgentProperty { + dataset: name.clone(), + err: Box::new(err), + }, + )?; + } + } + Ok(()) +} + +#[cfg(test)] +mod test { + use super::*; + use uuid::Uuid; + + #[test] + fn serialize_dataset_name() { + let pool = ZpoolName::new_internal(Uuid::new_v4()); + let kind = DatasetKind::Crucible; + let name = DatasetName::new(pool, kind); + serde_json::to_string(&name).unwrap(); + } +} diff --git a/sled-storage/src/disk.rs b/sled-storage/src/disk.rs new file mode 100644 index 0000000000..f5209def77 --- /dev/null +++ b/sled-storage/src/disk.rs @@ -0,0 +1,243 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Disk related types + +use camino::{Utf8Path, Utf8PathBuf}; +use derive_more::From; +use illumos_utils::zpool::{Zpool, ZpoolKind, ZpoolName}; +use key_manager::StorageKeyRequester; +use omicron_common::disk::DiskIdentity; +use sled_hardware::{ + DiskVariant, Partition, PooledDisk, PooledDiskError, UnparsedDisk, +}; +use slog::Logger; +use std::fs::File; + +use crate::dataset; + +#[derive(Debug, thiserror::Error)] +pub enum DiskError { + #[error(transparent)] + Dataset(#[from] crate::dataset::DatasetError), + #[error(transparent)] + PooledDisk(#[from] sled_hardware::PooledDiskError), +} + +// A synthetic disk that acts as one "found" by the hardware and that is backed +// by a zpool +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct SyntheticDisk { + pub identity: DiskIdentity, + pub zpool_name: ZpoolName, +} + +impl SyntheticDisk { + // Create a zpool and import it for the synthetic disk + // Zpools willl be set to the min size of 64Mib + pub fn create_zpool( + dir: &Utf8Path, + zpool_name: &ZpoolName, + ) -> SyntheticDisk { + // 64 MiB (min size of zpool) + const DISK_SIZE: u64 = 64 * 1024 * 1024; + let path = dir.join(zpool_name.to_string()); + let file = File::create(&path).unwrap(); + file.set_len(DISK_SIZE).unwrap(); + drop(file); + Zpool::create(zpool_name, &path).unwrap(); + Zpool::import(zpool_name).unwrap(); + Zpool::set_failmode_continue(zpool_name).unwrap(); + Self::new(zpool_name.clone()) + } + + pub fn new(zpool_name: ZpoolName) -> SyntheticDisk { + let id = zpool_name.id(); + let identity = DiskIdentity { + vendor: "synthetic-vendor".to_string(), + serial: format!("synthetic-serial-{id}"), + model: "synthetic-model".to_string(), + }; + SyntheticDisk { identity, zpool_name } + } +} + +// An [`UnparsedDisk`] disk learned about from the hardware or a wrapped zpool +#[derive(Debug, Clone, PartialEq, Eq, Hash, From)] +pub enum RawDisk { + Real(UnparsedDisk), + Synthetic(SyntheticDisk), +} + +impl RawDisk { + pub fn is_boot_disk(&self) -> bool { + match self { + Self::Real(disk) => disk.is_boot_disk(), + Self::Synthetic(disk) => { + // Just label any M.2 the boot disk. + disk.zpool_name.kind() == ZpoolKind::Internal + } + } + } + + pub fn identity(&self) -> &DiskIdentity { + match self { + Self::Real(disk) => &disk.identity(), + Self::Synthetic(disk) => &disk.identity, + } + } + + pub fn variant(&self) -> DiskVariant { + match self { + Self::Real(disk) => disk.variant(), + Self::Synthetic(disk) => match disk.zpool_name.kind() { + ZpoolKind::External => DiskVariant::U2, + ZpoolKind::Internal => DiskVariant::M2, + }, + } + } + + #[cfg(test)] + pub fn zpool_name(&self) -> &ZpoolName { + match self { + Self::Real(_) => unreachable!(), + Self::Synthetic(disk) => &disk.zpool_name, + } + } + + pub fn is_synthetic(&self) -> bool { + match self { + Self::Real(_) => false, + Self::Synthetic(_) => true, + } + } + + pub fn is_real(&self) -> bool { + !self.is_synthetic() + } + + pub fn devfs_path(&self) -> &Utf8PathBuf { + match self { + Self::Real(disk) => disk.devfs_path(), + Self::Synthetic(_) => unreachable!(), + } + } +} + +/// A physical [`PooledDisk`] or a [`SyntheticDisk`] that contains or is backed +/// by a single zpool and that has provisioned datasets. This disk is ready for +/// usage by higher level software. +#[derive(Debug, Clone, PartialEq, Eq, Hash, From)] +pub enum Disk { + Real(PooledDisk), + Synthetic(SyntheticDisk), +} + +impl Disk { + pub async fn new( + log: &Logger, + raw_disk: RawDisk, + key_requester: Option<&StorageKeyRequester>, + ) -> Result { + let disk = match raw_disk { + RawDisk::Real(disk) => PooledDisk::new(log, disk)?.into(), + RawDisk::Synthetic(disk) => Disk::Synthetic(disk), + }; + dataset::ensure_zpool_has_datasets( + log, + disk.zpool_name(), + disk.identity(), + key_requester, + ) + .await?; + Ok(disk) + } + + pub fn is_synthetic(&self) -> bool { + match self { + Self::Real(_) => false, + Self::Synthetic(_) => true, + } + } + + pub fn is_real(&self) -> bool { + !self.is_synthetic() + } + + pub fn is_boot_disk(&self) -> bool { + match self { + Self::Real(disk) => disk.is_boot_disk, + Self::Synthetic(disk) => { + // Just label any M.2 the boot disk. + disk.zpool_name.kind() == ZpoolKind::Internal + } + } + } + + pub fn identity(&self) -> &DiskIdentity { + match self { + Self::Real(disk) => &disk.identity, + Self::Synthetic(disk) => &disk.identity, + } + } + + pub fn variant(&self) -> DiskVariant { + match self { + Self::Real(disk) => disk.variant, + Self::Synthetic(disk) => match disk.zpool_name.kind() { + ZpoolKind::External => DiskVariant::U2, + ZpoolKind::Internal => DiskVariant::M2, + }, + } + } + + pub fn devfs_path(&self) -> &Utf8PathBuf { + match self { + Self::Real(disk) => &disk.paths.devfs_path, + Self::Synthetic(_) => unreachable!(), + } + } + + pub fn zpool_name(&self) -> &ZpoolName { + match self { + Self::Real(disk) => &disk.zpool_name, + Self::Synthetic(disk) => &disk.zpool_name, + } + } + + pub fn boot_image_devfs_path( + &self, + raw: bool, + ) -> Result { + match self { + Self::Real(disk) => disk.paths.partition_device_path( + &disk.partitions, + Partition::BootImage, + raw, + ), + Self::Synthetic(_) => unreachable!(), + } + } + + pub fn dump_device_devfs_path( + &self, + raw: bool, + ) -> Result { + match self { + Self::Real(disk) => disk.paths.partition_device_path( + &disk.partitions, + Partition::DumpDevice, + raw, + ), + Self::Synthetic(_) => unreachable!(), + } + } + + pub fn slot(&self) -> i64 { + match self { + Self::Real(disk) => disk.slot, + Self::Synthetic(_) => unreachable!(), + } + } +} diff --git a/sled-storage/src/error.rs b/sled-storage/src/error.rs new file mode 100644 index 0000000000..b9f97ee428 --- /dev/null +++ b/sled-storage/src/error.rs @@ -0,0 +1,81 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Storage related errors + +use crate::dataset::{DatasetError, DatasetName}; +use crate::disk::DiskError; +use camino::Utf8PathBuf; +use omicron_common::api::external::ByteCountRangeError; +use uuid::Uuid; + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error(transparent)] + DiskError(#[from] DiskError), + + #[error(transparent)] + DatasetError(#[from] DatasetError), + + // TODO: We could add the context of "why are we doint this op", maybe? + #[error(transparent)] + ZfsListDataset(#[from] illumos_utils::zfs::ListDatasetsError), + + #[error(transparent)] + ZfsEnsureFilesystem(#[from] illumos_utils::zfs::EnsureFilesystemError), + + #[error(transparent)] + ZfsSetValue(#[from] illumos_utils::zfs::SetValueError), + + #[error(transparent)] + ZfsGetValue(#[from] illumos_utils::zfs::GetValueError), + + #[error(transparent)] + GetZpoolInfo(#[from] illumos_utils::zpool::GetInfoError), + + #[error(transparent)] + Fstyp(#[from] illumos_utils::fstyp::Error), + + #[error(transparent)] + ZoneCommand(#[from] illumos_utils::running_zone::RunCommandError), + + #[error(transparent)] + ZoneBoot(#[from] illumos_utils::running_zone::BootError), + + #[error(transparent)] + ZoneEnsureAddress(#[from] illumos_utils::running_zone::EnsureAddressError), + + #[error(transparent)] + ZoneInstall(#[from] illumos_utils::running_zone::InstallZoneError), + + #[error("No U.2 Zpools found")] + NoU2Zpool, + + #[error("Failed to parse UUID from {path}: {err}")] + ParseUuid { + path: Utf8PathBuf, + #[source] + err: uuid::Error, + }, + + #[error("Dataset {name:?} exists with a different uuid (has {old}, requested {new})")] + UuidMismatch { name: Box, old: Uuid, new: Uuid }, + + #[error("Error parsing pool {name}'s size: {err}")] + BadPoolSize { + name: String, + #[source] + err: ByteCountRangeError, + }, + + #[error("Failed to parse the dataset {name}'s UUID: {err}")] + ParseDatasetUuid { + name: String, + #[source] + err: uuid::Error, + }, + + #[error("Zpool Not Found: {0}")] + ZpoolNotFound(String), +} diff --git a/sled-storage/src/keyfile.rs b/sled-storage/src/keyfile.rs new file mode 100644 index 0000000000..48e5d9a528 --- /dev/null +++ b/sled-storage/src/keyfile.rs @@ -0,0 +1,76 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Key file support for ZFS dataset encryption + +use illumos_utils::zfs::Keypath; +use slog::{error, info, Logger}; +use tokio::fs::{remove_file, File}; +use tokio::io::{AsyncSeekExt, AsyncWriteExt, SeekFrom}; + +/// A file that wraps a zfs encryption key. +/// +/// We put this in a RAM backed filesystem and zero and delete it when we are +/// done with it. Unfortunately we cannot do this inside `Drop` because there is no +/// equivalent async drop. +pub struct KeyFile { + path: Keypath, + file: File, + log: Logger, + zero_and_unlink_called: bool, +} + +impl KeyFile { + pub async fn create( + path: Keypath, + key: &[u8; 32], + log: &Logger, + ) -> std::io::Result { + // We want to overwrite any existing contents. + let mut file = tokio::fs::OpenOptions::new() + .create(true) + .write(true) + .open(&path.0) + .await?; + file.write_all(key).await?; + info!(log, "Created keyfile {}", path); + Ok(KeyFile { + path, + file, + log: log.clone(), + zero_and_unlink_called: false, + }) + } + + /// These keyfiles live on a tmpfs and we zero the file so the data doesn't + /// linger on the page in memory. + /// + /// It'd be nice to `impl Drop for `KeyFile` and then call `zero` + /// from within the drop handler, but async `Drop` isn't supported. + pub async fn zero_and_unlink(&mut self) -> std::io::Result<()> { + self.zero_and_unlink_called = true; + let zeroes = [0u8; 32]; + let _ = self.file.seek(SeekFrom::Start(0)).await?; + self.file.write_all(&zeroes).await?; + info!(self.log, "Zeroed and unlinked keyfile {}", self.path); + remove_file(&self.path().0).await?; + Ok(()) + } + + pub fn path(&self) -> &Keypath { + &self.path + } +} + +impl Drop for KeyFile { + fn drop(&mut self) { + if !self.zero_and_unlink_called { + error!( + self.log, + "Failed to call zero_and_unlink for keyfile"; + "path" => %self.path + ); + } + } +} diff --git a/sled-storage/src/lib.rs b/sled-storage/src/lib.rs new file mode 100644 index 0000000000..d4b64c55a5 --- /dev/null +++ b/sled-storage/src/lib.rs @@ -0,0 +1,17 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Local storage abstraction for use by sled-agent +//! +//! This abstraction operates at the ZFS level and relies on zpool setup on +//! hardware partitions from the `sled-hardware` crate. It utilizes the +//! `illumos-utils` crate to actually perform ZFS related OS calls. + +pub mod dataset; +pub mod disk; +pub mod error; +pub(crate) mod keyfile; +pub mod manager; +pub mod pool; +pub mod resources; diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs new file mode 100644 index 0000000000..50b1c44148 --- /dev/null +++ b/sled-storage/src/manager.rs @@ -0,0 +1,1034 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! The storage manager task + +use std::collections::HashSet; + +use crate::dataset::{DatasetError, DatasetName}; +use crate::disk::{Disk, DiskError, RawDisk}; +use crate::error::Error; +use crate::resources::{AddDiskResult, StorageResources}; +use camino::Utf8PathBuf; +use illumos_utils::zfs::{Mountpoint, Zfs}; +use illumos_utils::zpool::ZpoolName; +use key_manager::StorageKeyRequester; +use omicron_common::disk::DiskIdentity; +use sled_hardware::DiskVariant; +use slog::{error, info, o, warn, Logger}; +use tokio::sync::{mpsc, oneshot, watch}; +use tokio::time::{interval, Duration, MissedTickBehavior}; +use uuid::Uuid; + +// The size of the mpsc bounded channel used to communicate +// between the `StorageHandle` and `StorageManager`. +// +// How did we choose this bound, and why? +// +// Picking a bound can be tricky, but in general, you want the channel to act +// unbounded, such that sends never fail. This makes the channels reliable, +// such that we never drop messages inside the process, and the caller doesn't +// have to choose what to do when overloaded. This simplifies things drastically +// for developers. However, you also don't want to make the channel actually +// unbounded, because that can lead to run-away memory growth and pathological +// behaviors, such that requests get slower over time until the system crashes. +// +// Our team's chosen solution, and used elsewhere in the codebase, is is to +// choose a large enough bound such that we should never hit it in practice +// unless we are truly overloaded. If we hit the bound it means that beyond that +// requests will start to build up and we will eventually topple over. So when +// we hit this bound, we just go ahead and panic. +// +// Picking a channel bound is hard to do empirically, but practically, if +// requests are mostly mutating task local state, a bound of 1024 or even 8192 +// should be plenty. Tasks that must perform longer running ops can spawn helper +// tasks as necessary or include their own handles for replies rather than +// synchronously waiting. Memory for the queue can be kept small with boxing of +// large messages. +// +// Here we start relatively small so that we can evaluate our choice over time. +const QUEUE_SIZE: usize = 256; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum StorageManagerState { + WaitingForKeyManager, + QueueingDisks, + Normal, +} + +#[derive(Debug)] +struct NewFilesystemRequest { + dataset_id: Uuid, + dataset_name: DatasetName, + responder: oneshot::Sender>, +} + +#[derive(Debug)] +enum StorageRequest { + AddDisk(RawDisk), + RemoveDisk(RawDisk), + DisksChanged(HashSet), + NewFilesystem(NewFilesystemRequest), + KeyManagerReady, + /// This will always grab the latest state after any new updates, as it + /// serializes through the `StorageManager` task after all prior requests. + /// This serialization is particularly useful for tests. + GetLatestResources(oneshot::Sender), + + /// Get the internal task state of the manager + GetManagerState(oneshot::Sender), +} + +/// Data managed internally to the StorageManagerTask that can be useful +/// to clients for debugging purposes, and that isn't exposed in other ways. +#[derive(Debug, Clone)] +pub struct StorageManagerData { + pub state: StorageManagerState, + pub queued_u2_drives: HashSet, +} + +/// A mechanism for interacting with the [`StorageManager`] +#[derive(Clone)] +pub struct StorageHandle { + tx: mpsc::Sender, + resource_updates: watch::Receiver, +} + +impl StorageHandle { + /// Adds a disk and associated zpool to the storage manager. + pub async fn upsert_disk(&self, disk: RawDisk) { + self.tx.send(StorageRequest::AddDisk(disk)).await.unwrap(); + } + + /// Removes a disk, if it's tracked by the storage manager, as well + /// as any associated zpools. + pub async fn delete_disk(&self, disk: RawDisk) { + self.tx.send(StorageRequest::RemoveDisk(disk)).await.unwrap(); + } + + /// Ensures that the storage manager tracks exactly the provided disks. + /// + /// This acts similar to a batch [Self::upsert_disk] for all new disks, and + /// [Self::delete_disk] for all removed disks. + /// + /// If errors occur, an arbitrary "one" of them will be returned, but a + /// best-effort attempt to add all disks will still be attempted. + pub async fn ensure_using_exactly_these_disks(&self, raw_disks: I) + where + I: IntoIterator, + { + self.tx + .send(StorageRequest::DisksChanged(raw_disks.into_iter().collect())) + .await + .unwrap(); + } + + /// Notify the [`StorageManager`] that the [`key_manager::KeyManager`] + /// has determined what [`key_manager::SecretRetriever`] to use and + /// it is now possible to retrieve secrets and construct keys. Note + /// that in cases of using the trust quorum, it is possible that the + /// [`key_manager::SecretRetriever`] is ready, but enough key shares cannot + /// be retrieved from other sleds. In this case, we still will be unable + /// to add the disks successfully. In the common case this is a transient + /// error. In other cases it may be fatal. However, that is outside the + /// scope of the cares of this module. + pub async fn key_manager_ready(&self) { + self.tx.send(StorageRequest::KeyManagerReady).await.unwrap(); + } + + /// Wait for a boot disk to be initialized + pub async fn wait_for_boot_disk(&mut self) -> (DiskIdentity, ZpoolName) { + loop { + let resources = self.resource_updates.borrow_and_update(); + if let Some((disk_id, zpool_name)) = resources.boot_disk() { + return (disk_id, zpool_name); + } + drop(resources); + // We panic if the sender is dropped, as this means + // the StorageManager has gone away, which it should not do. + self.resource_updates.changed().await.unwrap(); + } + } + + /// Wait for any storage resource changes + pub async fn wait_for_changes(&mut self) -> StorageResources { + self.resource_updates.changed().await.unwrap(); + self.resource_updates.borrow_and_update().clone() + } + + /// Retrieve the latest value of `StorageResources` from the + /// `StorageManager` task. + pub async fn get_latest_resources(&self) -> StorageResources { + let (tx, rx) = oneshot::channel(); + self.tx.send(StorageRequest::GetLatestResources(tx)).await.unwrap(); + rx.await.unwrap() + } + + /// Return internal data useful for debugging and testing + pub async fn get_manager_state(&self) -> StorageManagerData { + let (tx, rx) = oneshot::channel(); + self.tx.send(StorageRequest::GetManagerState(tx)).await.unwrap(); + rx.await.unwrap() + } + + pub async fn upsert_filesystem( + &self, + dataset_id: Uuid, + dataset_name: DatasetName, + ) -> Result<(), Error> { + let (tx, rx) = oneshot::channel(); + let request = + NewFilesystemRequest { dataset_id, dataset_name, responder: tx }; + self.tx.send(StorageRequest::NewFilesystem(request)).await.unwrap(); + rx.await.unwrap() + } +} + +// Some sled-agent tests cannot currently use the real StorageManager +// and want to fake the entire behavior, but still have access to the +// `StorageResources`. We allow this via use of the `FakeStorageManager` +// that will respond to real storage requests from a real `StorageHandle`. +#[cfg(feature = "testing")] +pub struct FakeStorageManager { + rx: mpsc::Receiver, + resources: StorageResources, + resource_updates: watch::Sender, +} + +#[cfg(feature = "testing")] +impl FakeStorageManager { + pub fn new() -> (Self, StorageHandle) { + let (tx, rx) = mpsc::channel(QUEUE_SIZE); + let resources = StorageResources::default(); + let (update_tx, update_rx) = watch::channel(resources.clone()); + ( + Self { rx, resources, resource_updates: update_tx }, + StorageHandle { tx, resource_updates: update_rx }, + ) + } + + /// Run the main receive loop of the `FakeStorageManager` + /// + /// This should be spawned into a tokio task + pub async fn run(mut self) { + loop { + match self.rx.recv().await { + Some(StorageRequest::AddDisk(raw_disk)) => { + if self.add_disk(raw_disk).disk_inserted() { + self.resource_updates + .send_replace(self.resources.clone()); + } + } + Some(StorageRequest::GetLatestResources(tx)) => { + let _ = tx.send(self.resources.clone()); + } + Some(_) => { + unreachable!(); + } + None => break, + } + } + } + + // Add a disk to `StorageResources` if it is new and return true if so + fn add_disk(&mut self, raw_disk: RawDisk) -> AddDiskResult { + let disk = match raw_disk { + RawDisk::Real(_) => { + panic!( + "Only synthetic disks can be used with `FakeStorageManager`" + ); + } + RawDisk::Synthetic(synthetic_disk) => { + Disk::Synthetic(synthetic_disk) + } + }; + self.resources.insert_fake_disk(disk) + } +} + +/// The storage manager responsible for the state of the storage +/// on a sled. The storage manager runs in its own task and is interacted +/// with via the [`StorageHandle`]. +pub struct StorageManager { + log: Logger, + state: StorageManagerState, + // Used to find the capacity of the channel for tracking purposes + tx: mpsc::Sender, + rx: mpsc::Receiver, + resources: StorageResources, + queued_u2_drives: HashSet, + key_requester: StorageKeyRequester, + resource_updates: watch::Sender, + last_logged_capacity: usize, +} + +impl StorageManager { + pub fn new( + log: &Logger, + key_requester: StorageKeyRequester, + ) -> (StorageManager, StorageHandle) { + let (tx, rx) = mpsc::channel(QUEUE_SIZE); + let resources = StorageResources::default(); + let (update_tx, update_rx) = watch::channel(resources.clone()); + ( + StorageManager { + log: log.new(o!("component" => "StorageManager")), + state: StorageManagerState::WaitingForKeyManager, + tx: tx.clone(), + rx, + resources, + queued_u2_drives: HashSet::new(), + key_requester, + resource_updates: update_tx, + last_logged_capacity: QUEUE_SIZE, + }, + StorageHandle { tx, resource_updates: update_rx }, + ) + } + + /// Run the main receive loop of the `StorageManager` + /// + /// This should be spawned into a tokio task + pub async fn run(mut self) { + loop { + const QUEUED_DISK_RETRY_TIMEOUT: Duration = Duration::from_secs(10); + let mut interval = interval(QUEUED_DISK_RETRY_TIMEOUT); + interval.set_missed_tick_behavior(MissedTickBehavior::Delay); + tokio::select! { + res = self.step() => { + if let Err(e) = res { + warn!(self.log, "{e}"); + } + } + _ = interval.tick(), + if self.state == StorageManagerState::QueueingDisks => + { + if self.add_queued_disks().await { + let _ = self.resource_updates.send_replace(self.resources.clone()); + } + } + } + } + } + + /// Process the next event + /// + /// This is useful for testing/debugging + pub async fn step(&mut self) -> Result<(), Error> { + const CAPACITY_LOG_THRESHOLD: usize = 10; + // We check the capacity and log it every time it changes by at least 10 + // entries in either direction. + let current = self.tx.capacity(); + if self.last_logged_capacity.saturating_sub(current) + >= CAPACITY_LOG_THRESHOLD + { + info!( + self.log, + "Channel capacity decreased"; + "previous" => ?self.last_logged_capacity, + "current" => ?current + ); + self.last_logged_capacity = current; + } else if current.saturating_sub(self.last_logged_capacity) + >= CAPACITY_LOG_THRESHOLD + { + info!( + self.log, + "Channel capacity increased"; + "previous" => ?self.last_logged_capacity, + "current" => ?current + ); + self.last_logged_capacity = current; + } + // The sending side never disappears because we hold a copy + let req = self.rx.recv().await.unwrap(); + info!(self.log, "Received {:?}", req); + let should_send_updates = match req { + StorageRequest::AddDisk(raw_disk) => { + self.add_disk(raw_disk).await?.disk_inserted() + } + StorageRequest::RemoveDisk(raw_disk) => self.remove_disk(raw_disk), + StorageRequest::DisksChanged(raw_disks) => { + self.ensure_using_exactly_these_disks(raw_disks).await + } + StorageRequest::NewFilesystem(request) => { + let result = self.add_dataset(&request).await; + if result.is_err() { + warn!(self.log, "{result:?}"); + } + let _ = request.responder.send(result); + false + } + StorageRequest::KeyManagerReady => { + self.state = StorageManagerState::Normal; + self.add_queued_disks().await + } + StorageRequest::GetLatestResources(tx) => { + let _ = tx.send(self.resources.clone()); + false + } + StorageRequest::GetManagerState(tx) => { + let _ = tx.send(StorageManagerData { + state: self.state, + queued_u2_drives: self.queued_u2_drives.clone(), + }); + false + } + }; + + if should_send_updates { + let _ = self.resource_updates.send_replace(self.resources.clone()); + } + + Ok(()) + } + + // Loop through all queued disks inserting them into [`StorageResources`] + // unless we hit a transient error. If we hit a transient error, we return + // and wait for the next retry window to re-call this method. If we hit a + // permanent error we log it, but we continue inserting queued disks. + // + // Return true if updates should be sent to watchers, false otherwise + async fn add_queued_disks(&mut self) -> bool { + info!( + self.log, + "Attempting to add queued disks"; + "num_disks" => %self.queued_u2_drives.len() + ); + self.state = StorageManagerState::Normal; + + let mut send_updates = false; + + // Disks that should be requeued. + let queued = self.queued_u2_drives.clone(); + let mut to_dequeue = HashSet::new(); + for disk in queued.iter() { + if self.state == StorageManagerState::QueueingDisks { + // We hit a transient error in a prior iteration. + break; + } else { + match self.add_u2_disk(disk.clone()).await { + Err(_) => { + // This is an unrecoverable error, so we don't queue the + // disk again. + to_dequeue.insert(disk); + } + Ok(AddDiskResult::DiskInserted) => { + send_updates = true; + to_dequeue.insert(disk); + } + Ok(AddDiskResult::DiskAlreadyInserted) => { + to_dequeue.insert(disk); + } + Ok(AddDiskResult::DiskQueued) => (), + } + } + } + // Dequeue any inserted disks + self.queued_u2_drives.retain(|k| !to_dequeue.contains(k)); + send_updates + } + + // Add a disk to `StorageResources` if it is new, + // updated, or its pool has been updated as determined by + // [`$crate::resources::StorageResources::insert_disk`] and we decide not to + // queue the disk for later addition. + async fn add_disk( + &mut self, + raw_disk: RawDisk, + ) -> Result { + match raw_disk.variant() { + DiskVariant::U2 => self.add_u2_disk(raw_disk).await, + DiskVariant::M2 => self.add_m2_disk(raw_disk).await, + } + } + + // Add a U.2 disk to [`StorageResources`] or queue it to be added later + async fn add_u2_disk( + &mut self, + raw_disk: RawDisk, + ) -> Result { + if self.state != StorageManagerState::Normal { + self.queued_u2_drives.insert(raw_disk); + return Ok(AddDiskResult::DiskQueued); + } + + match Disk::new(&self.log, raw_disk.clone(), Some(&self.key_requester)) + .await + { + Ok(disk) => self.resources.insert_disk(disk), + Err(err @ DiskError::Dataset(DatasetError::KeyManager(_))) => { + warn!( + self.log, + "Transient error: {err}: queuing disk"; + "disk_id" => ?raw_disk.identity() + ); + self.queued_u2_drives.insert(raw_disk); + self.state = StorageManagerState::QueueingDisks; + Ok(AddDiskResult::DiskQueued) + } + Err(err) => { + error!( + self.log, + "Persistent error: {err}: not queueing disk"; + "disk_id" => ?raw_disk.identity() + ); + Err(err.into()) + } + } + } + + // Add a U.2 disk to [`StorageResources`] if new and return `Ok(true)` if so + // + // + // We never queue M.2 drives, as they don't rely on [`KeyManager`] based + // encryption + async fn add_m2_disk( + &mut self, + raw_disk: RawDisk, + ) -> Result { + let disk = + Disk::new(&self.log, raw_disk.clone(), Some(&self.key_requester)) + .await?; + self.resources.insert_disk(disk) + } + + // Delete a real disk and return `true` if the disk was actually removed + fn remove_disk(&mut self, raw_disk: RawDisk) -> bool { + // If the disk is a U.2, we want to first delete it from any queued disks + let _ = self.queued_u2_drives.remove(&raw_disk); + self.resources.remove_disk(raw_disk.identity()) + } + + // Find all disks to remove that are not in raw_disks and remove them. Then + // take the remaining disks and try to add them all. `StorageResources` will + // inform us if anything changed, and if so we return true, otherwise we + // return false. + async fn ensure_using_exactly_these_disks( + &mut self, + raw_disks: HashSet, + ) -> bool { + let mut should_update = false; + + // Clear out any queued U.2 disks that are real. + // We keep synthetic disks, as they are only added once. + self.queued_u2_drives.retain(|d| d.is_synthetic()); + + let all_ids: HashSet<_> = + raw_disks.iter().map(|d| d.identity()).collect(); + + // Find all existing disks not in the current set + let to_remove: Vec = self + .resources + .disks() + .keys() + .filter_map(|id| { + if !all_ids.contains(id) { + Some(id.clone()) + } else { + None + } + }) + .collect(); + + for id in to_remove { + if self.resources.remove_disk(&id) { + should_update = true; + } + } + + for raw_disk in raw_disks { + let disk_id = raw_disk.identity().clone(); + match self.add_disk(raw_disk).await { + Ok(AddDiskResult::DiskInserted) => should_update = true, + Ok(_) => (), + Err(err) => { + warn!( + self.log, + "Failed to add disk to storage resources: {err}"; + "disk_id" => ?disk_id + ); + } + } + } + + should_update + } + + // Attempts to add a dataset within a zpool, according to `request`. + async fn add_dataset( + &mut self, + request: &NewFilesystemRequest, + ) -> Result<(), Error> { + info!(self.log, "add_dataset: {:?}", request); + if !self + .resources + .disks() + .values() + .any(|(_, pool)| &pool.name == request.dataset_name.pool()) + { + return Err(Error::ZpoolNotFound(format!( + "{}, looked up while trying to add dataset", + request.dataset_name.pool(), + ))); + } + + let zoned = true; + let fs_name = &request.dataset_name.full(); + let do_format = true; + let encryption_details = None; + let size_details = None; + Zfs::ensure_filesystem( + fs_name, + Mountpoint::Path(Utf8PathBuf::from("/data")), + zoned, + do_format, + encryption_details, + size_details, + None, + )?; + // Ensure the dataset has a usable UUID. + if let Ok(id_str) = Zfs::get_oxide_value(&fs_name, "uuid") { + if let Ok(id) = id_str.parse::() { + if id != request.dataset_id { + return Err(Error::UuidMismatch { + name: Box::new(request.dataset_name.clone()), + old: id, + new: request.dataset_id, + }); + } + return Ok(()); + } + } + Zfs::set_oxide_value( + &fs_name, + "uuid", + &request.dataset_id.to_string(), + )?; + + Ok(()) + } +} + +/// All tests only use synthetic disks, but are expected to be run on illumos +/// systems. +#[cfg(all(test, target_os = "illumos"))] +mod tests { + use crate::dataset::DatasetKind; + use crate::disk::SyntheticDisk; + + use super::*; + use async_trait::async_trait; + use camino_tempfile::tempdir; + use illumos_utils::zpool::Zpool; + use key_manager::{ + KeyManager, SecretRetriever, SecretRetrieverError, SecretState, + VersionedIkm, + }; + use omicron_test_utils::dev::test_setup_log; + use std::sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }; + use uuid::Uuid; + + /// A [`key-manager::SecretRetriever`] that only returns hardcoded IKM for + /// epoch 0 + #[derive(Debug, Default)] + struct HardcodedSecretRetriever { + inject_error: Arc, + } + + #[async_trait] + impl SecretRetriever for HardcodedSecretRetriever { + async fn get_latest( + &self, + ) -> Result { + if self.inject_error.load(Ordering::SeqCst) { + return Err(SecretRetrieverError::Bootstore( + "Timeout".to_string(), + )); + } + + let epoch = 0; + let salt = [0u8; 32]; + let secret = [0x1d; 32]; + + Ok(VersionedIkm::new(epoch, salt, &secret)) + } + + /// We don't plan to do any key rotation before trust quorum is ready + async fn get( + &self, + epoch: u64, + ) -> Result { + if self.inject_error.load(Ordering::SeqCst) { + return Err(SecretRetrieverError::Bootstore( + "Timeout".to_string(), + )); + } + if epoch != 0 { + return Err(SecretRetrieverError::NoSuchEpoch(epoch)); + } + Ok(SecretState::Current(self.get_latest().await?)) + } + } + + #[tokio::test] + async fn add_u2_disk_while_not_in_normal_stage_and_ensure_it_gets_queued() { + illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); + let logctx = test_setup_log( + "add_u2_disk_while_not_in_normal_stage_and_ensure_it_gets_queued", + ); + let (mut _key_manager, key_requester) = + KeyManager::new(&logctx.log, HardcodedSecretRetriever::default()); + let (mut manager, _) = StorageManager::new(&logctx.log, key_requester); + let zpool_name = ZpoolName::new_external(Uuid::new_v4()); + let raw_disk: RawDisk = SyntheticDisk::new(zpool_name).into(); + assert_eq!(StorageManagerState::WaitingForKeyManager, manager.state); + manager.add_u2_disk(raw_disk.clone()).await.unwrap(); + assert!(manager.resources.all_u2_zpools().is_empty()); + assert_eq!(manager.queued_u2_drives, HashSet::from([raw_disk.clone()])); + + // Check other non-normal stages and ensure disk gets queued + manager.queued_u2_drives.clear(); + manager.state = StorageManagerState::QueueingDisks; + manager.add_u2_disk(raw_disk.clone()).await.unwrap(); + assert!(manager.resources.all_u2_zpools().is_empty()); + assert_eq!(manager.queued_u2_drives, HashSet::from([raw_disk])); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn ensure_u2_gets_added_to_resources() { + illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); + let logctx = test_setup_log("ensure_u2_gets_added_to_resources"); + let (mut key_manager, key_requester) = + KeyManager::new(&logctx.log, HardcodedSecretRetriever::default()); + let (mut manager, _) = StorageManager::new(&logctx.log, key_requester); + let zpool_name = ZpoolName::new_external(Uuid::new_v4()); + let dir = tempdir().unwrap(); + let disk = SyntheticDisk::create_zpool(dir.path(), &zpool_name).into(); + + // Spawn the key_manager so that it will respond to requests for encryption keys + tokio::spawn(async move { key_manager.run().await }); + + // Set the stage to pretend we've progressed enough to have a key_manager available. + manager.state = StorageManagerState::Normal; + manager.add_u2_disk(disk).await.unwrap(); + assert_eq!(manager.resources.all_u2_zpools().len(), 1); + Zpool::destroy(&zpool_name).unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn wait_for_bootdisk() { + illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); + let logctx = test_setup_log("wait_for_bootdisk"); + let (mut key_manager, key_requester) = + KeyManager::new(&logctx.log, HardcodedSecretRetriever::default()); + let (manager, mut handle) = + StorageManager::new(&logctx.log, key_requester); + // Spawn the key_manager so that it will respond to requests for encryption keys + tokio::spawn(async move { key_manager.run().await }); + + // Spawn the storage manager as done by sled-agent + tokio::spawn(async move { + manager.run().await; + }); + + // Create a synthetic internal disk + let zpool_name = ZpoolName::new_internal(Uuid::new_v4()); + let dir = tempdir().unwrap(); + let disk = SyntheticDisk::create_zpool(dir.path(), &zpool_name).into(); + + handle.upsert_disk(disk).await; + handle.wait_for_boot_disk().await; + Zpool::destroy(&zpool_name).unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn queued_disks_get_added_as_resources() { + illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); + let logctx = test_setup_log("queued_disks_get_added_as_resources"); + let (mut key_manager, key_requester) = + KeyManager::new(&logctx.log, HardcodedSecretRetriever::default()); + let (manager, handle) = StorageManager::new(&logctx.log, key_requester); + + // Spawn the key_manager so that it will respond to requests for encryption keys + tokio::spawn(async move { key_manager.run().await }); + + // Spawn the storage manager as done by sled-agent + tokio::spawn(async move { + manager.run().await; + }); + + // Queue up a disks, as we haven't told the `StorageManager` that + // the `KeyManager` is ready yet. + let zpool_name = ZpoolName::new_external(Uuid::new_v4()); + let dir = tempdir().unwrap(); + let disk = SyntheticDisk::create_zpool(dir.path(), &zpool_name).into(); + handle.upsert_disk(disk).await; + let resources = handle.get_latest_resources().await; + assert!(resources.all_u2_zpools().is_empty()); + + // Now inform the storage manager that the key manager is ready + // The queued disk should be successfully added + handle.key_manager_ready().await; + let resources = handle.get_latest_resources().await; + assert_eq!(resources.all_u2_zpools().len(), 1); + Zpool::destroy(&zpool_name).unwrap(); + logctx.cleanup_successful(); + } + + /// For this test, we are going to step through the msg recv loop directly + /// without running the `StorageManager` in a tokio task. + /// This allows us to control timing precisely. + #[tokio::test] + async fn queued_disks_get_requeued_on_secret_retriever_error() { + illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); + let logctx = test_setup_log( + "queued_disks_get_requeued_on_secret_retriever_error", + ); + let inject_error = Arc::new(AtomicBool::new(false)); + let (mut key_manager, key_requester) = KeyManager::new( + &logctx.log, + HardcodedSecretRetriever { inject_error: inject_error.clone() }, + ); + let (mut manager, handle) = + StorageManager::new(&logctx.log, key_requester); + + // Spawn the key_manager so that it will respond to requests for encryption keys + tokio::spawn(async move { key_manager.run().await }); + + // Queue up a disks, as we haven't told the `StorageManager` that + // the `KeyManager` is ready yet. + let zpool_name = ZpoolName::new_external(Uuid::new_v4()); + let dir = tempdir().unwrap(); + let disk = SyntheticDisk::create_zpool(dir.path(), &zpool_name).into(); + handle.upsert_disk(disk).await; + manager.step().await.unwrap(); + + // We can't wait for a reply through the handle as the storage manager task + // isn't actually running. We just check the resources directly. + assert!(manager.resources.all_u2_zpools().is_empty()); + + // Let's inject an error to the `SecretRetriever` to simulate a trust + // quorum timeout + inject_error.store(true, Ordering::SeqCst); + + // Now inform the storage manager that the key manager is ready + // The queued disk should not be added due to the error + handle.key_manager_ready().await; + manager.step().await.unwrap(); + assert!(manager.resources.all_u2_zpools().is_empty()); + + // Manually simulating a timer tick to add queued disks should also + // still hit the error + manager.add_queued_disks().await; + assert!(manager.resources.all_u2_zpools().is_empty()); + + // Clearing the injected error will cause the disk to get added + inject_error.store(false, Ordering::SeqCst); + manager.add_queued_disks().await; + assert_eq!(1, manager.resources.all_u2_zpools().len()); + + Zpool::destroy(&zpool_name).unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn delete_disk_triggers_notification() { + illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); + let logctx = test_setup_log("delete_disk_triggers_notification"); + let (mut key_manager, key_requester) = + KeyManager::new(&logctx.log, HardcodedSecretRetriever::default()); + let (manager, mut handle) = + StorageManager::new(&logctx.log, key_requester); + + // Spawn the key_manager so that it will respond to requests for encryption keys + tokio::spawn(async move { key_manager.run().await }); + + // Spawn the storage manager as done by sled-agent + tokio::spawn(async move { + manager.run().await; + }); + + // Inform the storage manager that the key manager is ready, so disks + // don't get queued + handle.key_manager_ready().await; + + // Create and add a disk + let zpool_name = ZpoolName::new_external(Uuid::new_v4()); + let dir = tempdir().unwrap(); + let disk: RawDisk = + SyntheticDisk::create_zpool(dir.path(), &zpool_name).into(); + handle.upsert_disk(disk.clone()).await; + + // Wait for the add disk notification + let resources = handle.wait_for_changes().await; + assert_eq!(resources.all_u2_zpools().len(), 1); + + // Delete the disk and wait for a notification + handle.delete_disk(disk).await; + let resources = handle.wait_for_changes().await; + assert!(resources.all_u2_zpools().is_empty()); + + Zpool::destroy(&zpool_name).unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn ensure_using_exactly_these_disks() { + illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); + let logctx = test_setup_log("ensure_using_exactly_these_disks"); + let (mut key_manager, key_requester) = + KeyManager::new(&logctx.log, HardcodedSecretRetriever::default()); + let (manager, mut handle) = + StorageManager::new(&logctx.log, key_requester); + + // Spawn the key_manager so that it will respond to requests for encryption keys + tokio::spawn(async move { key_manager.run().await }); + + // Spawn the storage manager as done by sled-agent + tokio::spawn(async move { + manager.run().await; + }); + + // Create a bunch of file backed external disks with zpools + let dir = tempdir().unwrap(); + let zpools: Vec = + (0..10).map(|_| ZpoolName::new_external(Uuid::new_v4())).collect(); + let disks: Vec = zpools + .iter() + .map(|zpool_name| { + SyntheticDisk::create_zpool(dir.path(), zpool_name).into() + }) + .collect(); + + // Add the first 3 disks, and ensure they get queued, as we haven't + // marked our key manager ready yet + handle + .ensure_using_exactly_these_disks(disks.iter().take(3).cloned()) + .await; + let state = handle.get_manager_state().await; + assert_eq!(state.queued_u2_drives.len(), 3); + assert_eq!(state.state, StorageManagerState::WaitingForKeyManager); + assert!(handle.get_latest_resources().await.all_u2_zpools().is_empty()); + + // Mark the key manager ready and wait for the storage update + handle.key_manager_ready().await; + let resources = handle.wait_for_changes().await; + let expected: HashSet<_> = + disks.iter().take(3).map(|d| d.identity()).collect(); + let actual: HashSet<_> = resources.disks().keys().collect(); + assert_eq!(expected, actual); + + // Add first three disks after the initial one. The returned resources + // should not contain the first disk. + handle + .ensure_using_exactly_these_disks( + disks.iter().skip(1).take(3).cloned(), + ) + .await; + let resources = handle.wait_for_changes().await; + let expected: HashSet<_> = + disks.iter().skip(1).take(3).map(|d| d.identity()).collect(); + let actual: HashSet<_> = resources.disks().keys().collect(); + assert_eq!(expected, actual); + + // Ensure the same set of disks and make sure no change occurs + // Note that we directly request the resources this time so we aren't + // waiting forever for a change notification. + handle + .ensure_using_exactly_these_disks( + disks.iter().skip(1).take(3).cloned(), + ) + .await; + let resources2 = handle.get_latest_resources().await; + assert_eq!(resources, resources2); + + // Add a disjoint set of disks and see that only they come through + handle + .ensure_using_exactly_these_disks( + disks.iter().skip(4).take(5).cloned(), + ) + .await; + let resources = handle.wait_for_changes().await; + let expected: HashSet<_> = + disks.iter().skip(4).take(5).map(|d| d.identity()).collect(); + let actual: HashSet<_> = resources.disks().keys().collect(); + assert_eq!(expected, actual); + + // Finally, change the zpool backing of the 5th disk to be that of the 10th + // and ensure that disk changes. Note that we don't change the identity + // of the 5th disk. + let mut modified_disk = disks[4].clone(); + if let RawDisk::Synthetic(disk) = &mut modified_disk { + disk.zpool_name = disks[9].zpool_name().clone(); + } else { + panic!(); + } + let mut expected: HashSet<_> = + disks.iter().skip(5).take(4).cloned().collect(); + expected.insert(modified_disk); + + handle + .ensure_using_exactly_these_disks(expected.clone().into_iter()) + .await; + let resources = handle.wait_for_changes().await; + + // Ensure the one modified disk changed as we expected + assert_eq!(5, resources.disks().len()); + for raw_disk in expected { + let (disk, pool) = + resources.disks().get(raw_disk.identity()).unwrap(); + assert_eq!(disk.zpool_name(), raw_disk.zpool_name()); + assert_eq!(&pool.name, disk.zpool_name()); + assert_eq!(raw_disk.identity(), &pool.parent); + } + + // Cleanup + for zpool in zpools { + Zpool::destroy(&zpool).unwrap(); + } + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn upsert_filesystem() { + illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); + let logctx = test_setup_log("upsert_filesystem"); + let (mut key_manager, key_requester) = + KeyManager::new(&logctx.log, HardcodedSecretRetriever::default()); + let (manager, handle) = StorageManager::new(&logctx.log, key_requester); + + // Spawn the key_manager so that it will respond to requests for encryption keys + tokio::spawn(async move { key_manager.run().await }); + + // Spawn the storage manager as done by sled-agent + tokio::spawn(async move { + manager.run().await; + }); + + handle.key_manager_ready().await; + + // Create and add a disk + let zpool_name = ZpoolName::new_external(Uuid::new_v4()); + let dir = tempdir().unwrap(); + let disk: RawDisk = + SyntheticDisk::create_zpool(dir.path(), &zpool_name).into(); + handle.upsert_disk(disk.clone()).await; + + // Create a filesystem + let dataset_id = Uuid::new_v4(); + let dataset_name = + DatasetName::new(zpool_name.clone(), DatasetKind::Crucible); + handle.upsert_filesystem(dataset_id, dataset_name).await.unwrap(); + + Zpool::destroy(&zpool_name).unwrap(); + logctx.cleanup_successful(); + } +} diff --git a/sled-storage/src/pool.rs b/sled-storage/src/pool.rs new file mode 100644 index 0000000000..cc71aeb19d --- /dev/null +++ b/sled-storage/src/pool.rs @@ -0,0 +1,35 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! ZFS storage pool + +use crate::error::Error; +use illumos_utils::zpool::{Zpool, ZpoolInfo, ZpoolName}; +use omicron_common::disk::DiskIdentity; + +/// A ZFS storage pool wrapper that tracks information returned from +/// `zpool` commands +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Pool { + pub name: ZpoolName, + pub info: ZpoolInfo, + pub parent: DiskIdentity, +} + +impl Pool { + /// Queries for an existing Zpool by name. + /// + /// Returns Ok if the pool exists. + pub fn new(name: ZpoolName, parent: DiskIdentity) -> Result { + let info = Zpool::get_info(&name.to_string())?; + Ok(Pool { name, info, parent }) + } + + /// Return a Pool consisting of fake info + #[cfg(feature = "testing")] + pub fn new_with_fake_info(name: ZpoolName, parent: DiskIdentity) -> Pool { + let info = ZpoolInfo::new_hardcoded(name.to_string()); + Pool { name, info, parent } + } +} diff --git a/sled-storage/src/resources.rs b/sled-storage/src/resources.rs new file mode 100644 index 0000000000..c1f460dc92 --- /dev/null +++ b/sled-storage/src/resources.rs @@ -0,0 +1,206 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Discovered and usable disks and zpools + +use crate::dataset::M2_DEBUG_DATASET; +use crate::disk::Disk; +use crate::error::Error; +use crate::pool::Pool; +use camino::Utf8PathBuf; +use cfg_if::cfg_if; +use illumos_utils::zpool::ZpoolName; +use omicron_common::disk::DiskIdentity; +use sled_hardware::DiskVariant; +use std::collections::BTreeMap; +use std::sync::Arc; + +// The directory within the debug dataset in which bundles are created. +const BUNDLE_DIRECTORY: &str = "bundle"; + +// The directory for zone bundles. +const ZONE_BUNDLE_DIRECTORY: &str = "zone"; + +pub enum AddDiskResult { + DiskInserted, + DiskAlreadyInserted, + DiskQueued, +} + +impl AddDiskResult { + pub fn disk_inserted(&self) -> bool { + match self { + AddDiskResult::DiskInserted => true, + _ => false, + } + } +} + +/// Storage related resources: disks and zpools +/// +/// This state is internal to the [`crate::manager::StorageManager`] task. Clones +/// of this state can be retrieved by requests to the `StorageManager` task +/// from the [`crate::manager::StorageHandle`]. This state is not `Sync`, and +/// as such does not require any mutexes. However, we do expect to share it +/// relatively frequently, and we want copies of it to be as cheaply made +/// as possible. So any large state is stored inside `Arc`s. On the other +/// hand, we expect infrequent updates to this state, and as such, we use +/// [`std::sync::Arc::make_mut`] to implement clone on write functionality +/// inside the `StorageManager` task if there are any outstanding copies. +/// Therefore, we only pay the cost to update infrequently, and no locks are +/// required by callers when operating on cloned data. The only contention here +/// is for the reference counters of the internal Arcs when `StorageResources` +/// gets cloned or dropped. +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct StorageResources { + // All disks, real and synthetic, being managed by this sled + disks: Arc>, +} + +impl StorageResources { + /// Return a reference to the current snapshot of disks + pub fn disks(&self) -> &BTreeMap { + &self.disks + } + + /// Insert a disk and its zpool + /// + /// If the disk passed in is new or modified, or its pool size or pool + /// name changed, then insert the changed values and return `DiskInserted`. + /// Otherwise, do not insert anything and return `DiskAlreadyInserted`. + /// For instance, if only the pool health changes, because it is not one + /// of the checked values, we will not insert the update and will return + /// `DiskAlreadyInserted`. + pub(crate) fn insert_disk( + &mut self, + disk: Disk, + ) -> Result { + let disk_id = disk.identity().clone(); + let zpool_name = disk.zpool_name().clone(); + let zpool = Pool::new(zpool_name, disk_id.clone())?; + if let Some((stored_disk, stored_pool)) = self.disks.get(&disk_id) { + if stored_disk == &disk + && stored_pool.info.size() == zpool.info.size() + && stored_pool.name == zpool.name + { + return Ok(AddDiskResult::DiskAlreadyInserted); + } + } + // Either the disk or zpool changed + Arc::make_mut(&mut self.disks).insert(disk_id, (disk, zpool)); + Ok(AddDiskResult::DiskInserted) + } + + /// Insert a disk while creating a fake pool + /// This is a workaround for current mock based testing strategies + /// in the sled-agent. + #[cfg(feature = "testing")] + pub fn insert_fake_disk(&mut self, disk: Disk) -> AddDiskResult { + let disk_id = disk.identity().clone(); + let zpool_name = disk.zpool_name().clone(); + let zpool = Pool::new_with_fake_info(zpool_name, disk_id.clone()); + if self.disks.contains_key(&disk_id) { + return AddDiskResult::DiskAlreadyInserted; + } + // Either the disk or zpool changed + Arc::make_mut(&mut self.disks).insert(disk_id, (disk, zpool)); + AddDiskResult::DiskInserted + } + + /// Delete a disk and its zpool + /// + /// Return true, if data was changed, false otherwise + /// + /// Note: We never allow removal of synthetic disks in production as they + /// are only added once. + pub(crate) fn remove_disk(&mut self, id: &DiskIdentity) -> bool { + let Some((disk, _)) = self.disks.get(id) else { + return false; + }; + + cfg_if! { + if #[cfg(test)] { + // For testing purposes, we allow synthetic disks to be deleted. + // Silence an unused variable warning. + _ = disk; + } else { + // In production, we disallow removal of synthetic disks as they + // are only added once. + if disk.is_synthetic() { + return false; + } + } + } + + // Safe to unwrap as we just checked the key existed above + Arc::make_mut(&mut self.disks).remove(id).unwrap(); + true + } + + /// Returns the identity of the boot disk. + /// + /// If this returns `None`, we have not processed the boot disk yet. + pub fn boot_disk(&self) -> Option<(DiskIdentity, ZpoolName)> { + for (id, (disk, _)) in self.disks.iter() { + if disk.is_boot_disk() { + return Some((id.clone(), disk.zpool_name().clone())); + } + } + None + } + + /// Returns all M.2 zpools + pub fn all_m2_zpools(&self) -> Vec { + self.all_zpools(DiskVariant::M2) + } + + /// Returns all U.2 zpools + pub fn all_u2_zpools(&self) -> Vec { + self.all_zpools(DiskVariant::U2) + } + + /// Returns all mountpoints within all M.2s for a particular dataset. + pub fn all_m2_mountpoints(&self, dataset: &str) -> Vec { + self.all_m2_zpools() + .iter() + .map(|zpool| zpool.dataset_mountpoint(dataset)) + .collect() + } + + /// Returns all mountpoints within all U.2s for a particular dataset. + pub fn all_u2_mountpoints(&self, dataset: &str) -> Vec { + self.all_u2_zpools() + .iter() + .map(|zpool| zpool.dataset_mountpoint(dataset)) + .collect() + } + + pub fn get_all_zpools(&self) -> Vec<(ZpoolName, DiskVariant)> { + self.disks + .values() + .map(|(disk, _)| (disk.zpool_name().clone(), disk.variant())) + .collect() + } + + // Returns all zpools of a particular variant + fn all_zpools(&self, variant: DiskVariant) -> Vec { + self.disks + .values() + .filter_map(|(disk, _)| { + if disk.variant() == variant { + return Some(disk.zpool_name().clone()); + } + None + }) + .collect() + } + + /// Return the directories for storing zone service bundles. + pub fn all_zone_bundle_directories(&self) -> Vec { + self.all_m2_mountpoints(M2_DEBUG_DATASET) + .into_iter() + .map(|p| p.join(BUNDLE_DIRECTORY).join(ZONE_BUNDLE_DIRECTORY)) + .collect() + } +} diff --git a/smf/sled-agent/non-gimlet/config.toml b/smf/sled-agent/non-gimlet/config.toml index 684c0f8589..176f4002a5 100644 --- a/smf/sled-agent/non-gimlet/config.toml +++ b/smf/sled-agent/non-gimlet/config.toml @@ -45,6 +45,11 @@ zpools = [ # guest memory is pulled from. vmm_reservoir_percentage = 50 +# Optionally you can specify the size of the VMM reservoir in MiB. +# Note vmm_reservoir_percentage and vmm_reservoir_size_mb cannot be specified +# at the same time. +#vmm_reservoir_size_mb = 2048 + # Swap device size for the system. The device is a sparsely allocated zvol on # the internal zpool of the M.2 that we booted from. # diff --git a/test-utils/Cargo.toml b/test-utils/Cargo.toml index 7b1f70c79e..7f210134a2 100644 --- a/test-utils/Cargo.toml +++ b/test-utils/Cargo.toml @@ -11,7 +11,6 @@ camino.workspace = true camino-tempfile.workspace = true dropshot.workspace = true filetime = { workspace = true, optional = true } -futures.workspace = true headers.workspace = true hex.workspace = true http.workspace = true diff --git a/test-utils/src/dev/clickhouse.rs b/test-utils/src/dev/clickhouse.rs index c35b871684..011de576ca 100644 --- a/test-utils/src/dev/clickhouse.rs +++ b/test-utils/src/dev/clickhouse.rs @@ -23,6 +23,9 @@ use crate::dev::poll; // Timeout used when starting up ClickHouse subprocess. const CLICKHOUSE_TIMEOUT: Duration = Duration::from_secs(30); +// Timeout used when starting a ClickHouse keeper subprocess. +const CLICKHOUSE_KEEPER_TIMEOUT: Duration = Duration::from_secs(30); + /// A `ClickHouseInstance` is used to start and manage a ClickHouse single node server process. #[derive(Debug)] pub struct ClickHouseInstance { @@ -527,7 +530,8 @@ async fn find_clickhouse_port_in_log( pub async fn wait_for_ready(log_path: PathBuf) -> Result<(), anyhow::Error> { let p = poll::wait_for_condition( || async { - let result = discover_ready(&log_path, CLICKHOUSE_TIMEOUT).await; + let result = + discover_ready(&log_path, CLICKHOUSE_KEEPER_TIMEOUT).await; match result { Ok(ready) => Ok(ready), Err(e) => { @@ -547,7 +551,7 @@ pub async fn wait_for_ready(log_path: PathBuf) -> Result<(), anyhow::Error> { } }, &Duration::from_millis(500), - &CLICKHOUSE_TIMEOUT, + &CLICKHOUSE_KEEPER_TIMEOUT, ) .await .context("waiting to discover if ClickHouse is ready for connections")?; diff --git a/tools/ci_download_softnpu_machinery b/tools/ci_download_softnpu_machinery index cb5ea40210..3efb030063 100755 --- a/tools/ci_download_softnpu_machinery +++ b/tools/ci_download_softnpu_machinery @@ -15,7 +15,7 @@ OUT_DIR="out/npuzone" # Pinned commit for softnpu ASIC simulator SOFTNPU_REPO="softnpu" -SOFTNPU_COMMIT="c1c42398c82b0220c8b5fa3bfba9c7a3bcaa0943" +SOFTNPU_COMMIT="dec63e67156fe6e958991bbfa090629868115ab5" # This is the softnpu ASIC simulator echo "fetching npuzone" diff --git a/tools/create_virtual_hardware.sh b/tools/create_virtual_hardware.sh index 1db40208f7..884d356222 100755 --- a/tools/create_virtual_hardware.sh +++ b/tools/create_virtual_hardware.sh @@ -63,8 +63,9 @@ function ensure_softnpu_zone { --omicron-zone \ --ports sc0_0,tfportrear0_0 \ --ports sc0_1,tfportqsfp0_0 \ - --sidecar-lite-branch main - } + --sidecar-lite-commit f0585a29fb0285f7a1220c1118856b0e5c1f75c5 \ + --softnpu-commit dec63e67156fe6e958991bbfa090629868115ab5 + } "$SOURCE_DIR"/scrimlet/softnpu-init.sh success "softnpu zone exists" } diff --git a/tools/maghemite_ddm_openapi_version b/tools/maghemite_ddm_openapi_version index 3f6b566cda..40db886f69 100644 --- a/tools/maghemite_ddm_openapi_version +++ b/tools/maghemite_ddm_openapi_version @@ -1,2 +1,2 @@ -COMMIT="82aa17646265449ee0ede9410208e510fa4a5877" +COMMIT="aefdfd3a57e5ca1949d4a913b8e35ce8cd7dfa8b" SHA2="9737906555a60911636532f00f1dc2866dc7cd6553beb106e9e57beabad41cdf" diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version index 3de723fca6..ad88fef13e 100644 --- a/tools/maghemite_mg_openapi_version +++ b/tools/maghemite_mg_openapi_version @@ -1,2 +1,2 @@ -COMMIT="82aa17646265449ee0ede9410208e510fa4a5877" +COMMIT="aefdfd3a57e5ca1949d4a913b8e35ce8cd7dfa8b" SHA2="b3f55fe24e54530fdf96c22a033f9edc0bad9c0a5e3344763a23e52b251d5113" diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums index a4dd82aaca..7c1644b031 100644 --- a/tools/maghemite_mgd_checksums +++ b/tools/maghemite_mgd_checksums @@ -1,2 +1,2 @@ -CIDL_SHA256="1badd6adfece0a1b661f7efb9a2ca65e471f45cf9c8ecbd72b228ca174311e31" -MGD_LINUX_SHA256="92463e3266f5a702af28504349526189aa0ebb23adb166ec2603182acf6cdb8c" +CIDL_SHA256="aa7241cd35976f28f25aaf3ce2ce2af14dae1da9d67585c7de3b724dbcc55e60" +MGD_LINUX_SHA256="a39387c361ff2c2d0701d66c00b10e43c72fb5ddd1a5900b59ecccb832c80731" \ No newline at end of file diff --git a/tools/update_crucible.sh b/tools/update_crucible.sh index af834091ca..020a33927e 100755 --- a/tools/update_crucible.sh +++ b/tools/update_crucible.sh @@ -21,7 +21,6 @@ PACKAGES=( CRATES=( "crucible-agent-client" - "crucible-client-types" "crucible-pantry-client" "crucible-smf" ) diff --git a/wicket-dbg/Cargo.toml b/wicket-dbg/Cargo.toml index d546c41e44..a00bcb9c1b 100644 --- a/wicket-dbg/Cargo.toml +++ b/wicket-dbg/Cargo.toml @@ -11,7 +11,6 @@ camino.workspace = true ciborium.workspace = true clap.workspace = true crossterm.workspace = true -ratatui.workspace = true serde.workspace = true slog.workspace = true slog-async.workspace = true diff --git a/wicket/Cargo.toml b/wicket/Cargo.toml index 11f476d98c..efb8e51dff 100644 --- a/wicket/Cargo.toml +++ b/wicket/Cargo.toml @@ -13,9 +13,7 @@ camino.workspace = true ciborium.workspace = true clap.workspace = true crossterm.workspace = true -debug-ignore.workspace = true futures.workspace = true -hex = { workspace = true, features = ["serde"] } humantime.workspace = true indexmap.workspace = true indicatif.workspace = true @@ -26,7 +24,6 @@ owo-colors.workspace = true ratatui.workspace = true reqwest.workspace = true rpassword.workspace = true -semver.workspace = true serde.workspace = true serde_json.workspace = true shell-words.workspace = true @@ -42,7 +39,6 @@ toml.workspace = true toml_edit.workspace = true tui-tree-widget = "0.13.0" unicode-width.workspace = true -uuid.workspace = true zeroize.workspace = true omicron-passwords.workspace = true diff --git a/wicket/src/cli/rack_setup/config_toml.rs b/wicket/src/cli/rack_setup/config_toml.rs index 6c1295f1f0..9b1a25a50e 100644 --- a/wicket/src/cli/rack_setup/config_toml.rs +++ b/wicket/src/cli/rack_setup/config_toml.rs @@ -274,6 +274,36 @@ fn populate_network_table( "port", Value::String(Formatted::new(p.port.to_string())), ); + if let Some(x) = p.hold_time { + peer.insert( + "hold_time", + Value::Integer(Formatted::new(x as i64)), + ); + } + if let Some(x) = p.connect_retry { + peer.insert( + "connect_retry", + Value::Integer(Formatted::new(x as i64)), + ); + } + if let Some(x) = p.delay_open { + peer.insert( + "delay_open", + Value::Integer(Formatted::new(x as i64)), + ); + } + if let Some(x) = p.idle_hold_time { + peer.insert( + "idle_hold_time", + Value::Integer(Formatted::new(x as i64)), + ); + } + if let Some(x) = p.keepalive { + peer.insert( + "keepalive", + Value::Integer(Formatted::new(x as i64)), + ); + } peers.push(Value::InlineTable(peer)); } uplink @@ -389,6 +419,11 @@ mod tests { asn: p.asn, port: p.port.clone(), addr: p.addr, + hold_time: p.hold_time, + connect_retry: p.connect_retry, + delay_open: p.delay_open, + idle_hold_time: p.idle_hold_time, + keepalive: p.keepalive, }) .collect(), port: config.port.clone(), @@ -486,6 +521,11 @@ mod tests { asn: 47, addr: "10.2.3.4".parse().unwrap(), port: "port0".into(), + hold_time: Some(6), + connect_retry: Some(3), + delay_open: Some(0), + idle_hold_time: Some(3), + keepalive: Some(2), }], uplink_port_speed: PortSpeed::Speed400G, uplink_port_fec: PortFec::Firecode, diff --git a/wicketd/src/artifacts/extracted_artifacts.rs b/wicketd/src/artifacts/extracted_artifacts.rs index 352d8ad3d5..b796201936 100644 --- a/wicketd/src/artifacts/extracted_artifacts.rs +++ b/wicketd/src/artifacts/extracted_artifacts.rs @@ -169,7 +169,7 @@ impl ExtractedArtifacts { /// /// As the returned file is written to, the data will be hashed; once /// writing is complete, call [`ExtractedArtifacts::store_tempfile()`] to - /// persist the temporary file into an [`ExtractedArtifactDataHandle()`]. + /// persist the temporary file into an [`ExtractedArtifactDataHandle`]. pub(super) fn new_tempfile( &self, ) -> Result { diff --git a/wicketd/src/rss_config.rs b/wicketd/src/rss_config.rs index a96acc56a0..0aaea427f3 100644 --- a/wicketd/src/rss_config.rs +++ b/wicketd/src/rss_config.rs @@ -521,6 +521,11 @@ fn validate_rack_network_config( addr: p.addr, asn: p.asn, port: p.port.clone(), + hold_time: p.hold_time, + connect_retry: p.connect_retry, + delay_open: p.delay_open, + idle_hold_time: p.idle_hold_time, + keepalive: p.keepalive, }) .collect(), switch: match config.switch { diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 1de7f68f2f..c95226b960 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -19,7 +19,6 @@ bit-set = { version = "0.5.3" } bit-vec = { version = "0.6.3" } bitflags-dff4ba8e3ae991db = { package = "bitflags", version = "1.3.2" } bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["serde"] } -bitvec = { version = "1.0.1" } bstr-6f8ce4dd05d13bba = { package = "bstr", version = "0.2.17" } bstr-dff4ba8e3ae991db = { package = "bstr", version = "1.6.0" } byteorder = { version = "1.5.0" } @@ -48,8 +47,7 @@ futures-util = { version = "0.3.29", features = ["channel", "io", "sink"] } gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway-service", rev = "2739c18e80697aa6bc235c935176d14b4d757ee9", features = ["std"] } generic-array = { version = "0.14.7", default-features = false, features = ["more_lengths", "zeroize"] } getrandom = { version = "0.2.10", default-features = false, features = ["js", "rdrand", "std"] } -hashbrown-582f2526e08bb6a0 = { package = "hashbrown", version = "0.14.2", features = ["raw"] } -hashbrown-594e8ee84c453af0 = { package = "hashbrown", version = "0.13.2" } +hashbrown = { version = "0.13.2" } hex = { version = "0.4.3", features = ["serde"] } hyper = { version = "0.14.27", features = ["full"] } indexmap = { version = "2.1.0", features = ["serde"] } @@ -66,21 +64,22 @@ num-bigint = { version = "0.4.4", features = ["rand"] } num-integer = { version = "0.1.45", features = ["i128"] } num-iter = { version = "0.1.43", default-features = false, features = ["i128"] } num-traits = { version = "0.2.16", features = ["i128", "libm"] } -openapiv3 = { version = "1.0.3", default-features = false, features = ["skip_serializing_defaults"] } +openapiv3 = { version = "2.0.0-rc.1", default-features = false, features = ["skip_serializing_defaults"] } petgraph = { version = "0.6.4", features = ["serde-1"] } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } predicates = { version = "3.0.4" } -rand = { version = "0.8.5", features = ["min_const_gen", "small_rng"] } -rand_chacha = { version = "0.3.1" } +proc-macro2 = { version = "1.0.69" } +rand = { version = "0.8.5" } +rand_chacha = { version = "0.3.1", default-features = false, features = ["std"] } regex = { version = "1.10.2" } regex-automata = { version = "0.4.3", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] } -regex-syntax-c38e5c1d305a1b54 = { package = "regex-syntax", version = "0.8.2" } -reqwest = { version = "0.11.20", features = ["blocking", "json", "rustls-tls", "stream"] } +regex-syntax = { version = "0.8.2" } +reqwest = { version = "0.11.22", features = ["blocking", "json", "rustls-tls", "stream"] } ring = { version = "0.16.20", features = ["std"] } schemars = { version = "0.8.13", features = ["bytes", "chrono", "uuid1"] } semver = { version = "1.0.20", features = ["serde"] } -serde = { version = "1.0.188", features = ["alloc", "derive", "rc"] } +serde = { version = "1.0.192", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.108", features = ["raw_value"] } sha2 = { version = "0.10.8", features = ["oid"] } signature = { version = "2.1.0", default-features = false, features = ["digest", "rand_core", "std"] } @@ -113,12 +112,10 @@ bit-set = { version = "0.5.3" } bit-vec = { version = "0.6.3" } bitflags-dff4ba8e3ae991db = { package = "bitflags", version = "1.3.2" } bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["serde"] } -bitvec = { version = "1.0.1" } bstr-6f8ce4dd05d13bba = { package = "bstr", version = "0.2.17" } bstr-dff4ba8e3ae991db = { package = "bstr", version = "1.6.0" } byteorder = { version = "1.5.0" } bytes = { version = "1.5.0", features = ["serde"] } -cc = { version = "1.0.83", default-features = false, features = ["parallel"] } chrono = { version = "0.4.31", features = ["alloc", "serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } clap = { version = "4.4.3", features = ["derive", "env", "wrap_help"] } @@ -143,8 +140,7 @@ futures-util = { version = "0.3.29", features = ["channel", "io", "sink"] } gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway-service", rev = "2739c18e80697aa6bc235c935176d14b4d757ee9", features = ["std"] } generic-array = { version = "0.14.7", default-features = false, features = ["more_lengths", "zeroize"] } getrandom = { version = "0.2.10", default-features = false, features = ["js", "rdrand", "std"] } -hashbrown-582f2526e08bb6a0 = { package = "hashbrown", version = "0.14.2", features = ["raw"] } -hashbrown-594e8ee84c453af0 = { package = "hashbrown", version = "0.13.2" } +hashbrown = { version = "0.13.2" } hex = { version = "0.4.3", features = ["serde"] } hyper = { version = "0.14.27", features = ["full"] } indexmap = { version = "2.1.0", features = ["serde"] } @@ -161,22 +157,22 @@ num-bigint = { version = "0.4.4", features = ["rand"] } num-integer = { version = "0.1.45", features = ["i128"] } num-iter = { version = "0.1.43", default-features = false, features = ["i128"] } num-traits = { version = "0.2.16", features = ["i128", "libm"] } -openapiv3 = { version = "1.0.3", default-features = false, features = ["skip_serializing_defaults"] } +openapiv3 = { version = "2.0.0-rc.1", default-features = false, features = ["skip_serializing_defaults"] } petgraph = { version = "0.6.4", features = ["serde-1"] } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } predicates = { version = "3.0.4" } -rand = { version = "0.8.5", features = ["min_const_gen", "small_rng"] } -rand_chacha = { version = "0.3.1" } +proc-macro2 = { version = "1.0.69" } +rand = { version = "0.8.5" } +rand_chacha = { version = "0.3.1", default-features = false, features = ["std"] } regex = { version = "1.10.2" } regex-automata = { version = "0.4.3", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] } -regex-syntax-3b31131e45eafb45 = { package = "regex-syntax", version = "0.6.29" } -regex-syntax-c38e5c1d305a1b54 = { package = "regex-syntax", version = "0.8.2" } -reqwest = { version = "0.11.20", features = ["blocking", "json", "rustls-tls", "stream"] } +regex-syntax = { version = "0.8.2" } +reqwest = { version = "0.11.22", features = ["blocking", "json", "rustls-tls", "stream"] } ring = { version = "0.16.20", features = ["std"] } schemars = { version = "0.8.13", features = ["bytes", "chrono", "uuid1"] } semver = { version = "1.0.20", features = ["serde"] } -serde = { version = "1.0.188", features = ["alloc", "derive", "rc"] } +serde = { version = "1.0.192", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.108", features = ["raw_value"] } sha2 = { version = "0.10.8", features = ["oid"] } signature = { version = "2.1.0", default-features = false, features = ["digest", "rand_core", "std"] } @@ -198,7 +194,6 @@ tracing = { version = "0.1.37", features = ["log"] } trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.13" } unicode-normalization = { version = "0.1.22" } -unicode-xid = { version = "0.2.4" } usdt = { version = "0.3.5" } uuid = { version = "1.5.0", features = ["serde", "v4"] } yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] }