diff --git a/.github/ISSUE_TEMPLATE/test-flake-from-buildomat.md b/.github/ISSUE_TEMPLATE/test-flake-from-buildomat.md new file mode 100644 index 0000000000..eb1ac2c6e9 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/test-flake-from-buildomat.md @@ -0,0 +1,65 @@ +--- +name: Test flake from buildomat +about: Report a test failure from a CI run either on "main" or on a PR where you don't + think the PR changes caused the failure +title: 'test failed in CI: NAME_OF_TEST' +labels: Test Flake +assignees: '' + +--- + + + +This test failed on a CI run on **"main" (or pull request XXX)**: + + Link here to the GitHub page showing the test failure. + If it's from a PR, this might look like: + https://github.com/oxidecomputer/omicron/pull/4588/checks?check_run_id=19198066410 + It could also be a link to a failure on "main", which would look like: + https://github.com/oxidecomputer/omicron/runs/20589829185 + This is useful because it shows which commit failed and all the surrounding context. + +Log showing the specific test failure: + + + Link here to the specific line of output from the buildomat log showing the failure: + https://buildomat.eng.oxide.computer/wg/0/details/01HGH32FQYKZJNX9J62HNABKPA/31C5jyox8tyHUIuDDevKkXlDZCyNw143z4nOq8wLl3xtjKzT/01HGH32V3P0HH6B56S46AJAT63#S4455 + This is useful because it shows all the details about the test failure. + +Excerpt from the log showing the failure: + +``` +Paste here an excerpt from the log. +This is redundant with the log above but helps people searching for the error message +or test name. It also works if the link above becomes unavailable. +Here's an example: + +------ + +failures: + integration_tests::updates::test_update_races + +test result: FAILED. 0 passed; 1 failed; 0 ignored; 0 measured; 4 filtered out; finished in 4.84s + + +--- STDERR: wicketd::mod integration_tests::updates::test_update_races --- +log file: /var/tmp/omicron_tmp/mod-ae2eb84a30e4213e-test_artifact_upload_while_updating.14133.0.log +note: configured to log to "/var/tmp/omicron_tmp/mod-ae2eb84a30e4213e-test_artifact_upload_while_updating.14133.0.log" +hint: Generated a random key: +hint: +hint: ed25519:826a8f799d4cc767158c990a60f721215bfd71f8f94fa88ba1960037bd6e5554 +hint: +hint: To modify this repository, you will need this key. Use the -k/--key +hint: command line flag or the TUFACEOUS_KEY environment variable: +hint: +hint: export TUFACEOUS_KEY=ed25519:826a8f799d4cc767158c990a60f721215bfd71f8f94fa88ba1960037bd6e5554 +hint: +hint: To prevent this default behavior, use --no-generate-key. +thread 'integration_tests::updates::test_update_races' panicked at wicketd/tests/integration_tests/updates.rs:482:41: +at least one event +stack backtrace: +... +``` diff --git a/.github/ISSUE_TEMPLATE/test-flake-from-local-failure.md b/.github/ISSUE_TEMPLATE/test-flake-from-local-failure.md new file mode 100644 index 0000000000..e963c83926 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/test-flake-from-local-failure.md @@ -0,0 +1,42 @@ +--- +name: Test flake from local failure +about: Report a test failure that happened locally (not CI) that you believe is not + related to local changes +title: 'test failure: TEST_NAME' +labels: Test Flake +assignees: '' + +--- + +On branch **BRANCH** commit **COMMIT**, I saw this test failure: + +``` +Include the trimmed, relevant output from `cargo nextest`. Here's an example: + +------- +failures: + integration_tests::updates::test_update_races + +test result: FAILED. 0 passed; 1 failed; 0 ignored; 0 measured; 4 filtered out; finished in 4.84s + + +--- STDERR: wicketd::mod integration_tests::updates::test_update_races --- +log file: /var/tmp/omicron_tmp/mod-ae2eb84a30e4213e-test_artifact_upload_while_updating.14133.0.log +note: configured to log to "/var/tmp/omicron_tmp/mod-ae2eb84a30e4213e-test_artifact_upload_while_updating.14133.0.log" +hint: Generated a random key: +hint: +hint: ed25519:826a8f799d4cc767158c990a60f721215bfd71f8f94fa88ba1960037bd6e5554 +hint: +hint: To modify this repository, you will need this key. Use the -k/--key +hint: command line flag or the TUFACEOUS_KEY environment variable: +hint: +hint: export TUFACEOUS_KEY=ed25519:826a8f799d4cc767158c990a60f721215bfd71f8f94fa88ba1960037bd6e5554 +hint: +hint: To prevent this default behavior, use --no-generate-key. +thread 'integration_tests::updates::test_update_races' panicked at wicketd/tests/integration_tests/updates.rs:482:41: +at least one event +stack backtrace: +... +``` + +**NOTE: Consider attaching any log files produced by the test.** diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 21125cf034..63752880d6 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@ada21a86dcbd8480ccdd77e11e167f51a002fb3e # v2 + uses: taiki-e/install-action@37129d5de13e9122cce55a7a5e7e49981cef514c # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date diff --git a/Cargo.lock b/Cargo.lock index f88c27a948..0486f5dc9d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -59,7 +59,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", - "getrandom 0.2.14", + "getrandom", "once_cell", "version_check", "zerocopy 0.7.34", @@ -166,7 +166,7 @@ dependencies = [ "omicron-workspace-hack", "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -234,13 +234,14 @@ dependencies = [ [[package]] name = "assert_cmd" -version = "2.0.15" +version = "2.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc65048dd435533bb1baf2ed9956b9a278fbfdcf90301b39ee117f06c0199d37" +checksum = "dc1835b7f27878de8525dc71410b5a31cdcc5f230aed5ba5df968e09c201b23d" dependencies = [ "anstyle", "bstr 1.9.1", "doc-comment", + "libc", "predicates", "predicates-core", "predicates-tree", @@ -273,7 +274,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -295,7 +296,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -306,7 +307,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -359,7 +360,7 @@ dependencies = [ "quote", "serde", "serde_tokenstream", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -375,10 +376,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1" dependencies = [ "futures-core", - "getrandom 0.2.14", + "getrandom", "instant", "pin-project-lite", - "rand 0.8.5", + "rand", "tokio", ] @@ -517,7 +518,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.72", + "syn 2.0.74", "which", ] @@ -550,7 +551,7 @@ checksum = "1657dce144574f921af10a92876a96f0ca05dd830900598d21d91c8e4cf78f74" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -682,7 +683,7 @@ dependencies = [ "omicron-workspace-hack", "pq-sys", "proptest", - "rand 0.8.5", + "rand", "secrecy", "serde", "serde_with", @@ -814,9 +815,9 @@ dependencies = [ [[package]] name = "camino" -version = "1.1.7" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0ec6b951b160caa93cc0c7b209e5a3bff7aae9062213451ac99493cd844c239" +checksum = "8b96ec4966b5813e2c0507c1f86115c8c5abaadc3980879c3424042a02fd1ad3" dependencies = [ "serde", ] @@ -891,9 +892,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "castaway" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a17ed5635fc8536268e5d4de1e22e81ac34419e5f052d4d51f4e01dcc263fcc" +checksum = "0abae9be0aaf9ea96a3b1b8b1b55c602ca751eba1b1500220cea4ecbafe7c0d5" dependencies = [ "rustversion", ] @@ -912,6 +913,11 @@ name = "cc" version = "1.0.97" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "099a5357d84c4c61eb35fc8eafa9a79a902c2f76911e5747ced4e032edd8d9b4" +dependencies = [ + "jobserver", + "libc", + "once_cell", +] [[package]] name = "cert-dev" @@ -1014,7 +1020,7 @@ dependencies = [ "num-traits", "serde", "wasm-bindgen", - "windows-targets 0.52.5", + "windows-targets 0.52.6", ] [[package]] @@ -1068,9 +1074,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.13" +version = "4.5.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fbb260a053428790f3de475e304ff84cdbc4face759ea7a3e64c1edd938a7fc" +checksum = "ed6719fffa43d0d87e5fd8caeab59be1554fb028cd30edc88fc4369b17971019" dependencies = [ "clap_builder", "clap_derive", @@ -1078,9 +1084,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.13" +version = "4.5.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64b17d7ea74e9f833c7dbf2cbe4fb12ff26783eda4782a8975b72f895c9b4d99" +checksum = "216aec2b177652e3846684cbfe25c9964d18ec45234f0f5da5157b207ed1aab6" dependencies = [ "anstream", "anstyle", @@ -1098,7 +1104,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -1107,6 +1113,18 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" +[[package]] +name = "clickhouse-admin-api" +version = "0.1.0" +dependencies = [ + "dropshot", + "omicron-common", + "omicron-uuid-kinds", + "omicron-workspace-hack", + "schemars", + "serde", +] + [[package]] name = "clickward" version = "0.1.0" @@ -1196,13 +1214,14 @@ dependencies = [ [[package]] name = "compact_str" -version = "0.7.1" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f86b9c4c00838774a6d902ef931eff7470720c51d90c2e32cfe15dc304737b3f" +checksum = "6050c3a16ddab2e412160b31f2c871015704239bca62f72f6e5f0be631d3f644" dependencies = [ "castaway", "cfg-if", "itoa", + "rustversion", "ryu", "static_assertions", ] @@ -1461,9 +1480,8 @@ checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" dependencies = [ "bitflags 2.6.0", "crossterm_winapi", - "futures-core", "libc", - "mio", + "mio 0.8.11", "parking_lot 0.12.2", "serde", "signal-hook", @@ -1471,6 +1489,23 @@ dependencies = [ "winapi", ] +[[package]] +name = "crossterm" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6" +dependencies = [ + "bitflags 2.6.0", + "crossterm_winapi", + "futures-core", + "mio 1.0.2", + "parking_lot 0.12.2", + "rustix", + "signal-hook", + "signal-hook-mio", + "winapi", +] + [[package]] name = "crossterm_winapi" version = "0.9.1" @@ -1496,6 +1531,36 @@ dependencies = [ "serde_json", ] +[[package]] +name = "crucible-common" +version = "0.0.1" +source = "git+https://github.com/oxidecomputer/crucible?rev=e58ca3693cb9ce0438947beba10e97ee38a0966b#e58ca3693cb9ce0438947beba10e97ee38a0966b" +dependencies = [ + "anyhow", + "atty", + "crucible-workspace-hack", + "dropshot", + "nix 0.28.0", + "rusqlite", + "rustls-pemfile 1.0.4", + "schemars", + "serde", + "serde_json", + "slog", + "slog-async", + "slog-bunyan", + "slog-dtrace", + "slog-term", + "tempfile", + "thiserror", + "tokio", + "tokio-rustls 0.24.1", + "toml 0.8.19", + "twox-hash", + "uuid", + "vergen", +] + [[package]] name = "crucible-pantry-client" version = "0.0.1" @@ -1544,7 +1609,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76" dependencies = [ "generic-array", - "rand_core 0.6.4", + "rand_core", "subtle", "zeroize", ] @@ -1556,7 +1621,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" dependencies = [ "generic-array", - "rand_core 0.6.4", + "rand_core", "typenum", ] @@ -1611,7 +1676,7 @@ dependencies = [ "curve25519-dalek-derive", "digest", "fiat-crypto", - "rand_core 0.6.4", + "rand_core", "rustc_version 0.4.0", "subtle", "zeroize", @@ -1625,7 +1690,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -1649,7 +1714,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -1660,7 +1725,7 @@ checksum = "733cabb43482b1a1b53eee8583c2b9e8684d592215ea83efd305dd31bc2f0178" dependencies = [ "darling_core", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -1715,13 +1780,13 @@ dependencies = [ "quote", "serde", "serde_tokenstream", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] name = "ddm-admin-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/maghemite?rev=220dd026e83142b83bd93123f465a64dd4600201#220dd026e83142b83bd93123f465a64dd4600201" +source = "git+https://github.com/oxidecomputer/maghemite?rev=9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42#9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42" dependencies = [ "oxnet", "percent-encoding", @@ -1759,7 +1824,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -1792,7 +1857,7 @@ checksum = "5fe87ce4529967e0ba1dcf8450bab64d97dfd5010a6256187ffe2e43e6f0e049" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -1813,7 +1878,7 @@ checksum = "62d671cc41a825ebabc75757b62d3d168c577f9149b2d49ece1dad1f72119d25" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -1834,7 +1899,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -1844,7 +1909,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b" dependencies = [ "derive_builder_core", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -1857,7 +1922,7 @@ dependencies = [ "proc-macro2", "quote", "rustc_version 0.4.0", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -1869,7 +1934,7 @@ dependencies = [ "dhcproto-macros", "hex", "ipnet", - "rand 0.8.5", + "rand", "thiserror", "trust-dns-proto", "url", @@ -1922,7 +1987,7 @@ dependencies = [ "dsl_auto_type", "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -1931,7 +1996,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "209c735641a413bc68c4923a9d6ad4bcb3ca306b794edaa7eb0b3228a99ffb25" dependencies = [ - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -2041,6 +2106,10 @@ dependencies = [ "dns-service-client", "dropshot", "expectorate", + "hickory-client", + "hickory-proto", + "hickory-resolver", + "hickory-server", "http 0.2.12", "omicron-test-utils", "omicron-workspace-hack", @@ -2060,10 +2129,6 @@ dependencies = [ "thiserror", "tokio", "toml 0.8.19", - "trust-dns-client", - "trust-dns-proto", - "trust-dns-resolver", - "trust-dns-server", "uuid", ] @@ -2133,7 +2198,7 @@ dependencies = [ "progenitor", "progenitor-client", "quote", - "rand 0.8.5", + "rand", "regress", "reqwest", "rustfmt-wrapper", @@ -2148,7 +2213,7 @@ dependencies = [ [[package]] name = "dropshot" version = "0.10.2-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#52d900a470b8f08eddf021813470b2a9194f2cc0" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#06c8dab40e28d313f8bb0e15e1027eeace3bce89" dependencies = [ "async-stream", "async-trait", @@ -2163,7 +2228,7 @@ dependencies = [ "hostname 0.4.0", "http 0.2.12", "hyper 0.14.30", - "indexmap 2.3.0", + "indexmap 2.4.0", "multer", "openapiv3", "paste", @@ -2194,14 +2259,14 @@ dependencies = [ [[package]] name = "dropshot_endpoint" version = "0.10.2-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#52d900a470b8f08eddf021813470b2a9194f2cc0" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#06c8dab40e28d313f8bb0e15e1027eeace3bce89" dependencies = [ "heck 0.5.0", "proc-macro2", "quote", "serde", "serde_tokenstream", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -2215,7 +2280,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -2267,7 +2332,7 @@ checksum = "4a3daa8e81a3963a60642bcc1f90a670680bd4a77535faa384e9d1c79d620871" dependencies = [ "curve25519-dalek", "ed25519", - "rand_core 0.6.4", + "rand_core", "serde", "sha2", "subtle", @@ -2295,7 +2360,7 @@ dependencies = [ "hkdf", "pem-rfc7468", "pkcs8", - "rand_core 0.6.4", + "rand_core", "sec1", "subtle", "zeroize", @@ -2343,6 +2408,7 @@ dependencies = [ "clap", "colored", "dhcproto", + "hickory-resolver", "http 0.2.12", "humantime", "hyper 0.14.30", @@ -2353,7 +2419,7 @@ dependencies = [ "omicron-test-utils", "omicron-workspace-hack", "oxide-client", - "rand 0.8.5", + "rand", "reqwest", "russh", "russh-keys", @@ -2363,7 +2429,6 @@ dependencies = [ "socket2 0.5.7", "tokio", "toml 0.8.19", - "trust-dns-resolver", "uuid", ] @@ -2385,6 +2450,18 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "enum-as-inner" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ffccbb6966c05b32ef8fbac435df276c4ae4d3dc55a8cd0eb9745e6c12f546a" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "syn 2.0.74", +] + [[package]] name = "env_logger" version = "0.9.3" @@ -2466,6 +2543,18 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + [[package]] name = "fancy-regex" version = "0.13.0" @@ -2474,7 +2563,7 @@ checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2" dependencies = [ "bit-set", "regex-automata 0.4.6", - "regex-syntax 0.8.3", + "regex-syntax 0.8.4", ] [[package]] @@ -2512,7 +2601,7 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ded41244b729663b1e574f1b4fb731469f69f79c17667b5d776b16cda0479449" dependencies = [ - "rand_core 0.6.4", + "rand_core", "subtle", ] @@ -2524,14 +2613,14 @@ checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d" [[package]] name = "filetime" -version = "0.2.23" +version = "0.2.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd" +checksum = "bf401df4a4e3872c4fe8151134cf483738e74b67fc934d6532c882b3d24a4550" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.4.1", - "windows-sys 0.52.0", + "libredox", + "windows-sys 0.59.0", ] [[package]] @@ -2622,7 +2711,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -2734,7 +2823,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -2828,7 +2917,7 @@ dependencies = [ "gateway-messages", "omicron-workspace-hack", "progenitor", - "rand 0.8.5", + "rand", "reqwest", "schemars", "serde", @@ -2934,7 +3023,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc3655aa6818d65bc620d6911f05aa7b6aeb596291e1e9f79e52df85583d1e30" dependencies = [ "rustix", - "windows-targets 0.52.5", + "windows-targets 0.52.6", ] [[package]] @@ -2946,17 +3035,6 @@ dependencies = [ "unicode-width", ] -[[package]] -name = "getrandom" -version = "0.1.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" -dependencies = [ - "cfg-if", - "libc", - "wasi 0.9.0+wasi-snapshot-preview1", -] - [[package]] name = "getrandom" version = "0.2.14" @@ -2966,7 +3044,7 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi", "wasm-bindgen", ] @@ -2986,6 +3064,19 @@ version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" +[[package]] +name = "git2" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b903b73e45dc0c6c596f2d37eccece7c1c8bb6e4407b001096387c63d0d93724" +dependencies = [ + "bitflags 2.6.0", + "libc", + "libgit2-sys", + "log", + "url", +] + [[package]] name = "glob" version = "0.3.1" @@ -3002,7 +3093,7 @@ dependencies = [ "bstr 1.9.1", "log", "regex-automata 0.4.6", - "regex-syntax 0.8.3", + "regex-syntax 0.8.4", ] [[package]] @@ -3023,7 +3114,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63" dependencies = [ "ff", - "rand_core 0.6.4", + "rand_core", "subtle", ] @@ -3040,7 +3131,7 @@ dependencies = [ "debug-ignore", "fixedbitset", "guppy-workspace-hack", - "indexmap 2.3.0", + "indexmap 2.4.0", "itertools 0.13.0", "nested", "once_cell", @@ -3072,7 +3163,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.12", - "indexmap 2.3.0", + "indexmap 2.4.0", "slab", "tokio", "tokio-util", @@ -3132,6 +3223,15 @@ dependencies = [ "allocator-api2", ] +[[package]] +name = "hashlink" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af" +dependencies = [ + "hashbrown 0.14.5", +] + [[package]] name = "headers" version = "0.3.9" @@ -3230,6 +3330,90 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6fe2267d4ed49bc07b63801559be28c718ea06c4738b7a03c94df7386d2cde46" +[[package]] +name = "hickory-client" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bab9683b08d8f8957a857b0236455d80e1886eaa8c6178af556aa7871fb61b55" +dependencies = [ + "cfg-if", + "data-encoding", + "futures-channel", + "futures-util", + "hickory-proto", + "once_cell", + "radix_trie", + "rand", + "thiserror", + "tokio", + "tracing", +] + +[[package]] +name = "hickory-proto" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07698b8420e2f0d6447a436ba999ec85d8fbf2a398bbd737b82cac4a2e96e512" +dependencies = [ + "async-trait", + "cfg-if", + "data-encoding", + "enum-as-inner 0.6.0", + "futures-channel", + "futures-io", + "futures-util", + "idna 0.4.0", + "ipnet", + "once_cell", + "rand", + "thiserror", + "tinyvec", + "tokio", + "tracing", + "url", +] + +[[package]] +name = "hickory-resolver" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28757f23aa75c98f254cf0405e6d8c25b831b32921b050a66692427679b1f243" +dependencies = [ + "cfg-if", + "futures-util", + "hickory-proto", + "ipconfig", + "lru-cache", + "once_cell", + "parking_lot 0.12.2", + "rand", + "resolv-conf", + "smallvec 1.13.2", + "thiserror", + "tokio", + "tracing", +] + +[[package]] +name = "hickory-server" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9be0e43c556b9b3fdb6c7c71a9a32153a2275d02419e3de809e520bfcfe40c37" +dependencies = [ + "async-trait", + "bytes", + "cfg-if", + "enum-as-inner 0.6.0", + "futures-util", + "hickory-proto", + "serde", + "thiserror", + "time", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "highway" version = "1.2.0" @@ -3432,7 +3616,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2 0.5.7", + "socket2 0.4.10", "tokio", "tower-service", "tracing", @@ -3504,7 +3688,7 @@ dependencies = [ "hyper 0.14.30", "mime_guess", "percent-encoding", - "rand 0.8.5", + "rand", "tokio", "url", "winapi", @@ -3593,6 +3777,16 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "idna" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + [[package]] name = "idna" version = "0.5.0" @@ -3686,9 +3880,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.3.0" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de3fc2e30ba82dd1b3911c8de1ffc143c74a914a14e99514d7637e3099df5ea0" +checksum = "93ead53efc7ea8ed3cfb0c79fc8023fbb782a5432b52830b6518941cebe6505c" dependencies = [ "equivalent", "hashbrown 0.14.5", @@ -3725,6 +3919,16 @@ dependencies = [ "generic-array", ] +[[package]] +name = "instability" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b23a0c8dfe501baac4adf6ebbfa6eddf8f0c07f56b058cc1288017e32397846c" +dependencies = [ + "quote", + "syn 2.0.74", +] + [[package]] name = "installinator" version = "0.1.0" @@ -3734,6 +3938,7 @@ dependencies = [ "buf-list", "bytes", "camino", + "camino-tempfile", "cancel-safe-futures", "clap", "display-error-chain", @@ -3763,7 +3968,6 @@ dependencies = [ "slog-envlogger", "slog-term", "smf", - "tempfile", "test-strategy", "thiserror", "tokio", @@ -3794,6 +3998,7 @@ name = "installinator-client" version = "0.1.0" dependencies = [ "installinator-common", + "omicron-common", "omicron-workspace-hack", "progenitor", "regress", @@ -3814,6 +4019,7 @@ dependencies = [ "camino", "illumos-utils", "libc", + "omicron-common", "omicron-workspace-hack", "proptest", "schemars", @@ -3847,6 +4053,7 @@ dependencies = [ "dropshot", "expectorate", "futures", + "hickory-resolver", "hyper 0.14.30", "omicron-common", "omicron-test-utils", @@ -3861,7 +4068,6 @@ dependencies = [ "tempfile", "thiserror", "tokio", - "trust-dns-resolver", "uuid", ] @@ -3872,12 +4078,12 @@ dependencies = [ "anyhow", "clap", "dropshot", + "hickory-resolver", "internal-dns", "omicron-common", "omicron-workspace-hack", "slog", "tokio", - "trust-dns-resolver", ] [[package]] @@ -3994,6 +4200,15 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +[[package]] +name = "jobserver" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +dependencies = [ + "libc", +] + [[package]] name = "js-sys" version = "0.3.69" @@ -4034,7 +4249,7 @@ version = "0.1.0" source = "git+https://github.com/oxidecomputer/opte?rev=3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d#3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d" dependencies = [ "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -4092,9 +4307,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.155" +version = "0.2.156" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +checksum = "a5f43f184355eefb8d17fc948dbecf6c13be3c141f20d834ae842193a448c72a" [[package]] name = "libdlpi-sys" @@ -4134,7 +4349,7 @@ dependencies = [ "portpicker", "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=6dceb9ef69c217cb78a2018bbedafbc19f6ec1af)", "propolis-server-config", - "rand 0.8.5", + "rand", "regex", "reqwest", "ron 0.7.1", @@ -4153,6 +4368,18 @@ dependencies = [ "zone 0.1.8", ] +[[package]] +name = "libgit2-sys" +version = "0.17.0+1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10472326a8a6477c3c20a64547b0059e4b0d086869eee31e6d7da728a8eb7224" +dependencies = [ + "cc", + "libc", + "libz-sys", + "pkg-config", +] + [[package]] name = "libloading" version = "0.8.3" @@ -4160,7 +4387,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" dependencies = [ "cfg-if", - "windows-targets 0.52.5", + "windows-targets 0.52.6", ] [[package]] @@ -4233,6 +4460,17 @@ checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ "bitflags 2.6.0", "libc", + "redox_syscall 0.5.1", +] + +[[package]] +name = "libsqlite3-sys" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c10584274047cb335c23d3e61bcef8e323adae7c5c8c760540f73610177fc3f" +dependencies = [ + "pkg-config", + "vcpkg", ] [[package]] @@ -4267,6 +4505,18 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "libz-sys" +version = "1.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e143b5e666b2695d28f6bca6497720813f699c9602dd7f5cac91008b8ada7f9" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "linear-map" version = "1.2.0" @@ -4447,7 +4697,7 @@ dependencies = [ [[package]] name = "mg-admin-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/maghemite?rev=220dd026e83142b83bd93123f465a64dd4600201#220dd026e83142b83bd93123f465a64dd4600201" +source = "git+https://github.com/oxidecomputer/maghemite?rev=9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42#9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42" dependencies = [ "anyhow", "chrono", @@ -4514,10 +4764,23 @@ checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" dependencies = [ "libc", "log", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi", "windows-sys 0.48.0", ] +[[package]] +name = "mio" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" +dependencies = [ + "hermit-abi 0.3.9", + "libc", + "log", + "wasi", + "windows-sys 0.52.0", +] + [[package]] name = "mockall" version = "0.13.0" @@ -4541,7 +4804,7 @@ dependencies = [ "cfg-if", "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -4576,7 +4839,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3" dependencies = [ - "getrandom 0.2.14", + "getrandom", ] [[package]] @@ -4764,7 +5027,7 @@ dependencies = [ "oxnet", "parse-display", "pq-sys", - "rand 0.8.5", + "rand", "ref-cast", "schemars", "semver 1.0.23", @@ -4833,7 +5096,7 @@ dependencies = [ "pq-sys", "predicates", "pretty_assertions", - "rand 0.8.5", + "rand", "rcgen", "ref-cast", "regex", @@ -4867,7 +5130,7 @@ dependencies = [ "omicron-workspace-hack", "once_cell", "oxnet", - "rand 0.8.5", + "rand", "serde_json", ] @@ -4922,7 +5185,7 @@ dependencies = [ "omicron-workspace-hack", "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -5018,7 +5281,7 @@ dependencies = [ "debug-ignore", "expectorate", "gateway-client", - "indexmap 2.3.0", + "indexmap 2.4.0", "internal-dns", "ipnet", "maplit", @@ -5032,7 +5295,7 @@ dependencies = [ "omicron-workspace-hack", "oxnet", "proptest", - "rand 0.8.5", + "rand", "sled-agent-client", "slog", "static_assertions", @@ -5098,7 +5361,10 @@ dependencies = [ "omicron-workspace-hack", "schemars", "serde", + "serde_json", "sled-hardware-types", + "strum", + "thiserror", "uuid", ] @@ -5133,6 +5399,7 @@ dependencies = [ "gateway-messages", "gateway-test-utils", "headers", + "hickory-resolver", "http 0.2.12", "hyper 0.14.30", "illumos-utils", @@ -5159,7 +5426,6 @@ dependencies = [ "slog", "tokio", "tokio-util", - "trust-dns-resolver", "uuid", ] @@ -5169,7 +5435,7 @@ version = "0.1.0" dependencies = [ "omicron-workspace-hack", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -5178,17 +5444,22 @@ version = "0.1.0" dependencies = [ "anyhow", "api_identity", + "async-trait", "base64 0.22.1", "chrono", "clap", + "cookie 0.18.1", "derive-where", "derive_more", "dns-service-client", + "dropshot", "futures", "gateway-client", + "http 0.2.12", "humantime", "ipnetwork", "newtype-uuid", + "newtype_derive", "nexus-sled-agent-shared", "omicron-common", "omicron-passwords", @@ -5196,6 +5467,7 @@ dependencies = [ "omicron-workspace-hack", "openssl", "oxnet", + "oxql-types", "parse-display", "proptest", "schemars", @@ -5296,7 +5568,7 @@ checksum = "c165a9ab64cf766f73521c0dd2cfdff64f488b8f0b3e621face3462d3db536d7" dependencies = [ "num-integer", "num-traits", - "rand 0.8.5", + "rand", ] [[package]] @@ -5311,7 +5583,7 @@ dependencies = [ "num-integer", "num-iter", "num-traits", - "rand 0.8.5", + "rand", "serde", "smallvec 1.13.2", "zeroize", @@ -5340,7 +5612,7 @@ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -5408,7 +5680,7 @@ version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcbff9bc912032c62bf65ef1d5aea88983b420f4f839db1e9b0c281a25c9c799" dependencies = [ - "proc-macro-crate", + "proc-macro-crate 1.3.1", "proc-macro2", "quote", "syn 1.0.109", @@ -5496,6 +5768,41 @@ dependencies = [ "thiserror", ] +[[package]] +name = "omicron-clickhouse-admin" +version = "0.1.0" +dependencies = [ + "anyhow", + "camino", + "chrono", + "clap", + "clickhouse-admin-api", + "dropshot", + "expectorate", + "http 0.2.12", + "illumos-utils", + "nexus-test-utils", + "omicron-common", + "omicron-test-utils", + "omicron-uuid-kinds", + "omicron-workspace-hack", + "openapi-lint", + "openapiv3", + "schemars", + "serde", + "serde_json", + "slog", + "slog-async", + "slog-dtrace", + "slog-error-chain", + "subprocess", + "thiserror", + "tokio", + "tokio-postgres", + "toml 0.8.19", + "url", +] + [[package]] name = "omicron-cockroach-admin" version = "0.1.0" @@ -5566,7 +5873,7 @@ dependencies = [ "progenitor", "progenitor-client", "proptest", - "rand 0.8.5", + "rand", "regress", "reqwest", "schemars", @@ -5710,6 +6017,7 @@ dependencies = [ "cockroach-admin-client", "criterion", "crucible-agent-client", + "crucible-common", "crucible-pantry-client", "diesel", "display-error-chain", @@ -5725,6 +6033,7 @@ dependencies = [ "gateway-test-utils", "headers", "hex", + "hickory-resolver", "http 0.2.12", "httptest", "hubtools", @@ -5775,6 +6084,7 @@ dependencies = [ "oximeter-instruments", "oximeter-producer", "oxnet", + "oxql-types", "parse-display", "paste", "pem", @@ -5783,7 +6093,7 @@ dependencies = [ "pretty_assertions", "progenitor-client", "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=24a74d0c76b6a63961ecef76acb1516b6e66c5c9)", - "rand 0.8.5", + "rand", "rcgen", "ref-cast", "regex", @@ -5800,6 +6110,7 @@ dependencies = [ "serde_with", "similar-asserts", "sled-agent-client", + "sled-agent-types", "slog", "slog-async", "slog-dtrace", @@ -5816,7 +6127,6 @@ dependencies = [ "tokio-postgres", "tokio-util", "tough", - "trust-dns-resolver", "tufaceous", "tufaceous-lib", "update-common", @@ -5833,7 +6143,7 @@ dependencies = [ "camino-tempfile", "chrono", "clap", - "crossterm", + "crossterm 0.28.1", "crucible-agent-client", "csv", "diesel", @@ -5844,10 +6154,12 @@ dependencies = [ "gateway-client", "gateway-messages", "gateway-test-utils", + "http 0.2.12", "humantime", "indicatif", "internal-dns", "ipnetwork", + "itertools 0.13.0", "multimap", "nexus-client", "nexus-config", @@ -5928,7 +6240,7 @@ dependencies = [ "clap", "criterion", "omicron-workspace-hack", - "rand 0.8.5", + "rand", "rust-argon2", "schemars", "serde", @@ -6029,8 +6341,6 @@ dependencies = [ "omicron-uuid-kinds", "omicron-workspace-hack", "once_cell", - "openapi-lint", - "openapiv3", "opte-ioctl", "oximeter", "oximeter-instruments", @@ -6039,7 +6349,7 @@ dependencies = [ "pretty_assertions", "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=24a74d0c76b6a63961ecef76acb1516b6e66c5c9)", "propolis-mock-server", - "rand 0.8.5", + "rand", "rcgen", "reqwest", "schemars", @@ -6048,6 +6358,7 @@ dependencies = [ "serde_human_bytes", "serde_json", "sha3", + "sled-agent-api", "sled-agent-client", "sled-agent-types", "sled-hardware", @@ -6132,14 +6443,15 @@ dependencies = [ "anyhow", "base16ct", "base64 0.22.1", + "base64ct", "bit-set", "bit-vec", "bitflags 1.3.2", "bitflags 2.6.0", - "bstr 0.2.17", "bstr 1.9.1", "byteorder", "bytes", + "cc", "chrono", "cipher", "clap", @@ -6148,7 +6460,6 @@ dependencies = [ "const-oid", "crossbeam-epoch", "crossbeam-utils", - "crossterm", "crypto-common", "der", "digest", @@ -6167,13 +6478,14 @@ dependencies = [ "futures-util", "gateway-messages", "generic-array", - "getrandom 0.2.14", + "getrandom", "group", "hashbrown 0.14.5", "hex", + "hickory-proto", "hmac", "hyper 0.14.30", - "indexmap 2.3.0", + "indexmap 2.4.0", "inout", "itertools 0.10.5", "itertools 0.12.1", @@ -6184,9 +6496,10 @@ dependencies = [ "log", "managed", "memchr", - "mio", + "mio 0.8.11", + "nix 0.28.0", "nom", - "num-bigint", + "num-bigint-dig", "num-integer", "num-iter", "num-traits", @@ -6195,29 +6508,35 @@ dependencies = [ "peg-runtime", "pem-rfc7468", "petgraph", + "pkcs8", "postgres-types", "predicates", "proc-macro2", + "quote", "regex", "regex-automata 0.4.6", - "regex-syntax 0.8.3", + "regex-syntax 0.8.4", "reqwest", "ring 0.17.8", + "rsa", "rustix", "schemars", "scopeguard", "semver 1.0.23", "serde", "serde_json", + "sha1", "sha2", + "signal-hook-mio", "similar", "slog", "smallvec 1.13.2", + "socket2 0.5.7", "spin 0.9.8", "string_cache", "subtle", "syn 1.0.109", - "syn 2.0.72", + "syn 2.0.74", "time", "time-macros", "tokio", @@ -6229,14 +6548,12 @@ dependencies = [ "toml_edit 0.19.15", "toml_edit 0.22.20", "tracing", - "trust-dns-proto", "unicode-bidi", "unicode-normalization", "unicode-xid", "usdt", "usdt-impl", "uuid", - "yasna", "zerocopy 0.7.34", "zeroize", ] @@ -6297,7 +6614,7 @@ version = "0.4.0" source = "git+https://github.com/oxidecomputer/openapi-lint?branch=main#ef442ee4343e97b6d9c217d3e7533962fe7d7236" dependencies = [ "heck 0.4.1", - "indexmap 2.3.0", + "indexmap 2.4.0", "lazy_static", "openapiv3", "regex", @@ -6312,6 +6629,7 @@ dependencies = [ "bootstrap-agent-api", "camino", "clap", + "clickhouse-admin-api", "cockroach-admin-api", "dns-server-api", "dropshot", @@ -6322,22 +6640,33 @@ dependencies = [ "nexus-internal-api", "omicron-workspace-hack", "openapi-lint", + "openapi-manager-types", "openapiv3", "owo-colors", "oximeter-api", "serde_json", "similar", + "sled-agent-api", "supports-color", "wicketd-api", ] +[[package]] +name = "openapi-manager-types" +version = "0.1.0" +dependencies = [ + "anyhow", + "camino", + "omicron-workspace-hack", +] + [[package]] name = "openapiv3" version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cc02deea53ffe807708244e5914f6b099ad7015a207ee24317c22112e17d9c5c" dependencies = [ - "indexmap 2.3.0", + "indexmap 2.4.0", "serde", "serde_json", ] @@ -6365,7 +6694,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -6474,18 +6803,18 @@ dependencies = [ "base64 0.22.1", "chrono", "futures", + "hickory-resolver", "http 0.2.12", "hyper 0.14.30", "omicron-workspace-hack", "progenitor", - "rand 0.8.5", + "rand", "regress", "reqwest", "serde", "serde_json", "thiserror", "tokio", - "trust-dns-resolver", "uuid", ] @@ -6512,11 +6841,12 @@ dependencies = [ "chrono", "clap", "omicron-workspace-hack", - "oximeter-impl", "oximeter-macro-impl", + "oximeter-schema", "oximeter-timeseries-macro", + "oximeter-types", "prettyplease", - "syn 2.0.72", + "syn 2.0.74", "toml 0.8.19", "uuid", ] @@ -6573,7 +6903,7 @@ dependencies = [ "oximeter-api", "oximeter-client", "oximeter-db", - "rand 0.8.5", + "rand", "reqwest", "schemars", "serde", @@ -6604,18 +6934,20 @@ dependencies = [ "chrono", "clap", "clickward", - "crossterm", + "crossterm 0.28.1", "dropshot", "expectorate", "futures", "highway", - "indexmap 2.3.0", + "indexmap 2.4.0", "itertools 0.13.0", "num", "omicron-common", "omicron-test-utils", "omicron-workspace-hack", "oximeter", + "oximeter-test-utils", + "oxql-types", "peg", "reedline", "regex", @@ -6639,39 +6971,6 @@ dependencies = [ "uuid", ] -[[package]] -name = "oximeter-impl" -version = "0.1.0" -dependencies = [ - "approx", - "bytes", - "chrono", - "criterion", - "float-ord", - "heck 0.5.0", - "num", - "omicron-common", - "omicron-workspace-hack", - "oximeter-macro-impl", - "prettyplease", - "proc-macro2", - "quote", - "rand 0.8.5", - "rand_distr", - "regex", - "rstest", - "schemars", - "serde", - "serde_json", - "slog-error-chain", - "strum", - "syn 2.0.72", - "thiserror", - "toml 0.8.19", - "trybuild", - "uuid", -] - [[package]] name = "oximeter-instruments" version = "0.1.0" @@ -6686,7 +6985,7 @@ dependencies = [ "libc", "omicron-workspace-hack", "oximeter", - "rand 0.8.5", + "rand", "schemars", "serde", "slog", @@ -6704,7 +7003,7 @@ dependencies = [ "omicron-workspace-hack", "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -6732,15 +7031,73 @@ dependencies = [ "uuid", ] +[[package]] +name = "oximeter-schema" +version = "0.1.0" +dependencies = [ + "anyhow", + "chrono", + "clap", + "heck 0.5.0", + "omicron-workspace-hack", + "oximeter-types", + "prettyplease", + "proc-macro2", + "quote", + "schemars", + "serde", + "slog-error-chain", + "syn 2.0.74", + "toml 0.8.19", +] + +[[package]] +name = "oximeter-test-utils" +version = "0.1.0" +dependencies = [ + "chrono", + "omicron-workspace-hack", + "oximeter-macro-impl", + "oximeter-types", + "uuid", +] + [[package]] name = "oximeter-timeseries-macro" version = "0.1.0" dependencies = [ "omicron-workspace-hack", - "oximeter-impl", + "oximeter-schema", + "oximeter-types", "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", +] + +[[package]] +name = "oximeter-types" +version = "0.1.0" +dependencies = [ + "approx", + "bytes", + "chrono", + "criterion", + "float-ord", + "num", + "omicron-common", + "omicron-workspace-hack", + "oximeter-macro-impl", + "rand", + "rand_distr", + "regex", + "rstest", + "schemars", + "serde", + "serde_json", + "strum", + "thiserror", + "trybuild", + "uuid", ] [[package]] @@ -6767,6 +7124,20 @@ dependencies = [ "serde_json", ] +[[package]] +name = "oxql-types" +version = "0.1.0" +dependencies = [ + "anyhow", + "chrono", + "highway", + "num", + "omicron-workspace-hack", + "oximeter-types", + "schemars", + "serde", +] + [[package]] name = "p256" version = "0.13.2" @@ -6779,6 +7150,18 @@ dependencies = [ "sha2", ] +[[package]] +name = "p384" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70786f51bcc69f6a4c0360e063a4cac5419ef7c5cd5b3c99ad70f3be5ba79209" +dependencies = [ + "ecdsa", + "elliptic-curve", + "primeorder", + "sha2", +] + [[package]] name = "p521" version = "0.13.3" @@ -6789,7 +7172,7 @@ dependencies = [ "ecdsa", "elliptic-curve", "primeorder", - "rand_core 0.6.4", + "rand_core", "sha2", ] @@ -6871,32 +7254,32 @@ dependencies = [ "libc", "redox_syscall 0.5.1", "smallvec 1.13.2", - "windows-targets 0.52.5", + "windows-targets 0.52.6", ] [[package]] name = "parse-display" -version = "0.9.1" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "914a1c2265c98e2446911282c6ac86d8524f495792c38c5bd884f80499c7538a" +checksum = "287d8d3ebdce117b8539f59411e4ed9ec226e0a4153c7f55495c6070d68e6f72" dependencies = [ "parse-display-derive", "regex", - "regex-syntax 0.8.3", + "regex-syntax 0.8.4", ] [[package]] name = "parse-display-derive" -version = "0.9.1" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ae7800a4c974efd12df917266338e79a7a74415173caf7e70aa0a0707345281" +checksum = "7fc048687be30d79502dea2f623d052f3a074012c6eac41726b7ab17213616b1" dependencies = [ "proc-macro2", "quote", "regex", - "regex-syntax 0.8.3", + "regex-syntax 0.8.4", "structmeta 0.3.0", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -6924,7 +7307,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7676374caaee8a325c9e7a2ae557f216c5563a171d6997b0ef8a65af35147700" dependencies = [ "base64ct", - "rand_core 0.6.4", + "rand_core", "subtle", ] @@ -6935,7 +7318,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "346f04948ba92c43e8469c1ee6736c7563d71012b17d40745260fe106aac2166" dependencies = [ "base64ct", - "rand_core 0.6.4", + "rand_core", "subtle", ] @@ -6979,6 +7362,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ed6a7761f76e3b9f92dfb0a60a6a6477c61024b775147ff0973a02653abaf2" dependencies = [ "digest", + "hmac", ] [[package]] @@ -7064,7 +7448,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -7085,7 +7469,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ "fixedbitset", - "indexmap 2.3.0", + "indexmap 2.4.0", "serde", "serde_derive", ] @@ -7134,7 +7518,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -7160,6 +7544,21 @@ dependencies = [ "spki", ] +[[package]] +name = "pkcs5" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e847e2c91a18bfa887dd028ec33f2fe6f25db77db3619024764914affe8b69a6" +dependencies = [ + "aes", + "cbc", + "der", + "pbkdf2 0.12.2", + "scrypt", + "sha2", + "spki", +] + [[package]] name = "pkcs8" version = "0.10.2" @@ -7167,6 +7566,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" dependencies = [ "der", + "pkcs5", + "rand_core", "spki", ] @@ -7266,7 +7667,7 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be97d76faf1bfab666e1375477b23fde79eccf0276e9b63b92a39d676a889ba9" dependencies = [ - "rand 0.8.5", + "rand", ] [[package]] @@ -7289,11 +7690,11 @@ dependencies = [ "base64 0.22.1", "byteorder", "bytes", - "fallible-iterator", + "fallible-iterator 0.2.0", "hmac", "md-5", "memchr", - "rand 0.8.5", + "rand", "sha2", "stringprep", ] @@ -7306,7 +7707,7 @@ checksum = "02048d9e032fb3cc3413bbf7b83a15d84a5d419778e2628751896d856498eee9" dependencies = [ "bytes", "chrono", - "fallible-iterator", + "fallible-iterator 0.2.0", "postgres-protocol", "serde", "serde_json", @@ -7398,7 +7799,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" dependencies = [ "proc-macro2", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -7420,6 +7821,15 @@ dependencies = [ "toml_edit 0.19.15", ] +[[package]] +name = "proc-macro-crate" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d37c51ca738a55da99dc0c4a34860fd675453b8b36209178c2249bb13651284" +dependencies = [ + "toml_edit 0.21.1", +] + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -7486,7 +7896,7 @@ dependencies = [ "getopts", "heck 0.5.0", "http 0.2.12", - "indexmap 2.3.0", + "indexmap 2.4.0", "openapiv3", "proc-macro2", "quote", @@ -7494,7 +7904,7 @@ dependencies = [ "schemars", "serde", "serde_json", - "syn 2.0.72", + "syn 2.0.74", "thiserror", "typify", "unicode-ident", @@ -7514,7 +7924,7 @@ dependencies = [ "serde_json", "serde_tokenstream", "serde_yaml", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -7556,7 +7966,7 @@ dependencies = [ "base64 0.21.7", "futures", "progenitor", - "rand 0.8.5", + "rand", "reqwest", "schemars", "serde", @@ -7577,7 +7987,7 @@ dependencies = [ "base64 0.21.7", "futures", "progenitor", - "rand 0.8.5", + "rand", "reqwest", "schemars", "serde", @@ -7603,7 +8013,7 @@ dependencies = [ "hyper 0.14.30", "progenitor", "propolis_types 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=24a74d0c76b6a63961ecef76acb1516b6e66c5c9)", - "rand 0.8.5", + "rand", "reqwest", "schemars", "serde", @@ -7660,10 +8070,10 @@ dependencies = [ "bitflags 2.6.0", "lazy_static", "num-traits", - "rand 0.8.5", - "rand_chacha 0.3.1", + "rand", + "rand_chacha", "rand_xorshift", - "regex-syntax 0.8.3", + "regex-syntax 0.8.4", "rusty-fork", "tempfile", "unarray", @@ -7737,19 +8147,6 @@ dependencies = [ "nibble_vec", ] -[[package]] -name = "rand" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" -dependencies = [ - "getrandom 0.1.16", - "libc", - "rand_chacha 0.2.2", - "rand_core 0.5.1", - "rand_hc", -] - [[package]] name = "rand" version = "0.8.5" @@ -7757,18 +8154,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha 0.3.1", - "rand_core 0.6.4", -] - -[[package]] -name = "rand_chacha" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" -dependencies = [ - "ppv-lite86", - "rand_core 0.5.1", + "rand_chacha", + "rand_core", ] [[package]] @@ -7778,16 +8165,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core 0.6.4", -] - -[[package]] -name = "rand_core" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" -dependencies = [ - "getrandom 0.1.16", + "rand_core", ] [[package]] @@ -7796,7 +8174,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.14", + "getrandom", ] [[package]] @@ -7806,16 +8184,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" dependencies = [ "num-traits", - "rand 0.8.5", -] - -[[package]] -name = "rand_hc" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" -dependencies = [ - "rand_core 0.5.1", + "rand", ] [[package]] @@ -7824,7 +8193,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a9febe641d2842ffc76ee962668a17578767c4e01735e4802b21ed9a24b2e4e" dependencies = [ - "rand_core 0.6.4", + "rand_core", ] [[package]] @@ -7833,23 +8202,23 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f" dependencies = [ - "rand_core 0.6.4", + "rand_core", ] [[package]] name = "ratatui" -version = "0.27.0" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d16546c5b5962abf8ce6e2881e722b4e0ae3b6f1a08a26ae3573c55853ca68d3" +checksum = "5ba6a365afbe5615999275bea2446b970b10a41102500e27ce7678d50d978303" dependencies = [ "bitflags 2.6.0", "cassowary", "compact_str", - "crossterm", + "crossterm 0.28.1", + "instability", "itertools 0.13.0", "lru", "paste", - "stability", "strum", "strum_macros 0.26.4", "unicode-segmentation", @@ -7902,7 +8271,7 @@ dependencies = [ "dropshot", "expectorate", "humantime", - "indexmap 2.3.0", + "indexmap 2.4.0", "nexus-client", "nexus-db-queries", "nexus-reconfigurator-execution", @@ -7964,19 +8333,19 @@ version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd283d9651eeda4b2a83a43c1c91b266c40fd76ecd39a50a8c630ae69dc72891" dependencies = [ - "getrandom 0.2.14", + "getrandom", "libredox", "thiserror", ] [[package]] name = "reedline" -version = "0.31.0" +version = "0.33.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65ebc241ed0ccea0bbbd775a55a76f0dd9971ef084589dea938751a03ffedc14" +checksum = "2f8c676a3f3814a23c6a0fc9dff6b6c35b2e04df8134aae6f3929cc34de21a53" dependencies = [ "chrono", - "crossterm", + "crossterm 0.27.0", "fd-lock", "itertools 0.12.1", "nu-ansi-term", @@ -8006,7 +8375,7 @@ checksum = "bcc303e793d3734489387d205e9b186fac9c6cfacedd98cbb2e8a5943595f3e6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -8018,7 +8387,7 @@ dependencies = [ "aho-corasick", "memchr", "regex-automata 0.4.6", - "regex-syntax 0.8.3", + "regex-syntax 0.8.4", ] [[package]] @@ -8035,7 +8404,7 @@ checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.3", + "regex-syntax 0.8.4", ] [[package]] @@ -8046,9 +8415,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "regex-syntax" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" [[package]] name = "regress" @@ -8172,7 +8541,7 @@ checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" dependencies = [ "cc", "cfg-if", - "getrandom 0.2.14", + "getrandom", "libc", "spin 0.9.8", "untrusted 0.9.0", @@ -8226,7 +8595,7 @@ dependencies = [ "num-traits", "pkcs1", "pkcs8", - "rand_core 0.6.4", + "rand_core", "serde", "sha2", "signature", @@ -8237,9 +8606,9 @@ dependencies = [ [[package]] name = "rstest" -version = "0.19.0" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d5316d2a1479eeef1ea21e7f9ddc67c191d497abc8fc3ba2467857abbb68330" +checksum = "7b423f0e62bdd61734b67cd21ff50871dfaeb9cc74f869dcd6af974fbcb19936" dependencies = [ "futures", "futures-timer", @@ -8249,18 +8618,19 @@ dependencies = [ [[package]] name = "rstest_macros" -version = "0.19.0" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04a9df72cc1f67020b0d63ad9bfe4a323e459ea7eb68e03bd9824db49f9a4c25" +checksum = "c5e1711e7d14f74b12a58411c542185ef7fb7f2e7f8ee6e2940a883628522b42" dependencies = [ "cfg-if", "glob", + "proc-macro-crate 3.1.0", "proc-macro2", "quote", "regex", "relative-path", "rustc_version 0.4.0", - "syn 2.0.72", + "syn 2.0.74", "unicode-ident", ] @@ -8274,21 +8644,37 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "rusqlite" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b838eba278d213a8beaf485bd313fd580ca4505a00d5871caeb1457c55322cae" +dependencies = [ + "bitflags 2.6.0", + "fallible-iterator 0.3.0", + "fallible-streaming-iterator", + "hashlink", + "libsqlite3-sys", + "smallvec 1.13.2", +] + [[package]] name = "russh" -version = "0.43.0" +version = "0.44.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c9534703dc13be1eefc5708618f4c346da8e4f04f260218613f351ed5e94259" +checksum = "6500eedfaf8cd81597899d896908a4b9cd5cb566db875e843c04ccf92add2c16" dependencies = [ "aes", "aes-gcm", "async-trait", "bitflags 2.6.0", "byteorder", + "cbc", "chacha20", "ctr", "curve25519-dalek", "digest", + "elliptic-curve", "flate2", "futures", "generic-array", @@ -8297,16 +8683,21 @@ dependencies = [ "log", "num-bigint", "once_cell", + "p256", + "p384", + "p521", "poly1305", - "rand 0.8.5", + "rand", + "rand_core", "russh-cryptovec", "russh-keys", "sha1", "sha2", + "ssh-encoding", + "ssh-key", "subtle", "thiserror", "tokio", - "tokio-util", ] [[package]] @@ -8321,41 +8712,53 @@ dependencies = [ [[package]] name = "russh-keys" -version = "0.43.0" +version = "0.44.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa4a5afa2fab6fd49d0c470a3b75c3c70a4f363c38db32df5ae3b44a3abf5ab9" +checksum = "fb8c0bfe024d4edd242f65a2ac6c8bf38a892930050b9eb90909d8fc2c413c8d" dependencies = [ "aes", "async-trait", "bcrypt-pbkdf", - "bit-vec", "block-padding", "byteorder", "cbc", "ctr", "data-encoding", + "der", + "digest", "dirs", + "ecdsa", "ed25519-dalek", + "elliptic-curve", "futures", "hmac", "inout", "log", "md5", - "num-bigint", "num-integer", "p256", + "p384", "p521", "pbkdf2 0.11.0", - "rand 0.7.3", - "rand_core 0.6.4", + "pkcs1", + "pkcs5", + "pkcs8", + "rand", + "rand_core", + "rsa", "russh-cryptovec", + "sec1", "serde", "sha1", "sha2", + "spki", + "ssh-encoding", + "ssh-key", "thiserror", "tokio", "tokio-stream", - "yasna", + "typenum", + "zeroize", ] [[package]] @@ -8574,6 +8977,15 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +[[package]] +name = "salsa20" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97a22f5af31f73a954c10289c93e8a50cc23d971e80ee446f1f6f7137a088213" +dependencies = [ + "cipher", +] + [[package]] name = "samael" version = "0.0.15" @@ -8594,7 +9006,7 @@ dependencies = [ "openssl-sys", "pkg-config", "quick-xml", - "rand 0.8.5", + "rand", "serde", "thiserror", "url", @@ -8652,7 +9064,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -8678,7 +9090,18 @@ checksum = "7f81c2fde025af7e69b1d1420531c8a8811ca898919db177141a85313b1cb932" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", +] + +[[package]] +name = "scrypt" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0516a385866c09368f0b5bcd1caff3366aace790fcd46e2bb032697bb172fd1f" +dependencies = [ + "pbkdf2 0.12.2", + "salsa20", + "sha2", ] [[package]] @@ -8754,9 +9177,9 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.205" +version = "1.0.208" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e33aedb1a7135da52b7c21791455563facbbcc43d0f0f66165b42c21b3dfb150" +checksum = "cff085d2cb684faa248efb494c39b68e522822ac0de72ccf08109abde717cfb2" dependencies = [ "serde_derive", ] @@ -8792,13 +9215,13 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.205" +version = "1.0.208" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "692d6f5ac90220161d6774db30c662202721e64aed9058d2c394f451261420c1" +checksum = "24008e81ff7613ed8e5ba0cfaf24e2c2f1e5b8a0495711e44fcd4882fca62bcf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -8809,7 +9232,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -8823,9 +9246,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.122" +version = "1.0.125" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784b6203951c57ff748476b126ccb5e8e2959a5c19e5c617ab1956be3dbc68da" +checksum = "83c8e735a073ccf5be70aa8066aa984eaf2fa000db6c8d0100ae605b366d31ed" dependencies = [ "itoa", "memchr", @@ -8860,7 +9283,7 @@ checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -8874,14 +9297,14 @@ dependencies = [ [[package]] name = "serde_tokenstream" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8790a7c3fe883e443eaa2af6f705952bc5d6e8671a220b9335c8cae92c037e74" +checksum = "64060d864397305347a78851c51588fd283767e7e7589829e8121d65512340f1" dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -8898,15 +9321,15 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.8.3" +version = "3.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e73139bc5ec2d45e6c5fd85be5a46949c1c39a4c18e56915f5eb4c12f975e377" +checksum = "69cecfa94848272156ea67b2b1a53f20fc7bc638c4a46d2f8abde08f05f4b857" dependencies = [ "base64 0.22.1", "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.3.0", + "indexmap 2.4.0", "serde", "serde_derive", "serde_json", @@ -8916,14 +9339,14 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.8.3" +version = "3.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b80d3d6b56b64335c0180e5ffde23b3c5e08c14c585b51a15bd0e95393f46703" +checksum = "a8fee4991ef4f274617a51ad4af30519438dacb2f56ac773b08a1922ff743350" dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -8932,7 +9355,7 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.3.0", + "indexmap 2.4.0", "itoa", "ryu", "serde", @@ -8995,12 +9418,13 @@ dependencies = [ [[package]] name = "signal-hook-mio" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29ad2e15f37ec9a6cc544097b78a1ec90001e9f71b81338ca39f430adaca99af" +checksum = "34db1a06d485c9142248b7a054f034b349b212551f3dfd19c94d45a754a217cd" dependencies = [ "libc", - "mio", + "mio 0.8.11", + "mio 1.0.2", "signal-hook", ] @@ -9032,7 +9456,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" dependencies = [ "digest", - "rand_core 0.6.4", + "rand_core", ] [[package]] @@ -9046,11 +9470,11 @@ dependencies = [ [[package]] name = "similar" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa42c91313f1d05da9b26f267f931cf178d4aba455b4c4622dd7355eb80c6640" +checksum = "1de1d4f81173b03af4c0cbed3c898f6bff5b870e4a7f5d6f4057d62a7a4b686e" dependencies = [ - "bstr 0.2.17", + "bstr 1.9.1", "unicode-segmentation", ] @@ -9095,6 +9519,23 @@ dependencies = [ "parking_lot 0.11.2", ] +[[package]] +name = "sled-agent-api" +version = "0.1.0" +dependencies = [ + "camino", + "dropshot", + "nexus-sled-agent-shared", + "omicron-common", + "omicron-uuid-kinds", + "omicron-workspace-hack", + "schemars", + "serde", + "sled-agent-types", + "sled-hardware-types", + "uuid", +] + [[package]] name = "sled-agent-client" version = "0.1.0" @@ -9122,21 +9563,25 @@ name = "sled-agent-types" version = "0.1.0" dependencies = [ "anyhow", + "async-trait", "bootstore", "camino", "camino-tempfile", + "chrono", "nexus-sled-agent-shared", "omicron-common", "omicron-test-utils", "omicron-uuid-kinds", "omicron-workspace-hack", "oxnet", + "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=24a74d0c76b6a63961ecef76acb1516b6e66c5c9)", "rcgen", "schemars", "serde", + "serde_human_bytes", "serde_json", + "sha3", "sled-hardware-types", - "sled-storage", "slog", "thiserror", "toml 0.8.19", @@ -9162,7 +9607,7 @@ dependencies = [ "omicron-test-utils", "omicron-uuid-kinds", "omicron-workspace-hack", - "rand 0.8.5", + "rand", "schemars", "serde", "sled-hardware-types", @@ -9206,7 +9651,7 @@ dependencies = [ "omicron-test-utils", "omicron-uuid-kinds", "omicron-workspace-hack", - "rand 0.8.5", + "rand", "schemars", "serde", "serde_json", @@ -9292,7 +9737,7 @@ source = "git+https://github.com/oxidecomputer/slog-error-chain?branch=main#15f6 dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -9419,7 +9864,7 @@ dependencies = [ "heck 0.4.1", "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -9517,17 +9962,58 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] -name = "stability" +name = "ssh-cipher" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ff9eaf853dec4c8802325d8b6d3dffa86cc707fd7a1a4cdbf416e13b061787a" +checksum = "caac132742f0d33c3af65bfcde7f6aa8f62f0e991d80db99149eb9d44708784f" dependencies = [ - "quote", - "syn 2.0.72", + "aes", + "aes-gcm", + "cbc", + "chacha20", + "cipher", + "ctr", + "poly1305", + "ssh-encoding", + "subtle", +] + +[[package]] +name = "ssh-encoding" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb9242b9ef4108a78e8cd1a2c98e193ef372437f8c22be363075233321dd4a15" +dependencies = [ + "base64ct", + "pem-rfc7468", + "sha2", +] + +[[package]] +name = "ssh-key" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca9b366a80cf18bb6406f4cf4d10aebfb46140a8c0c33f666a144c5c76ecbafc" +dependencies = [ + "bcrypt-pbkdf", + "ed25519-dalek", + "num-bigint-dig", + "p256", + "p384", + "p521", + "rand_core", + "rsa", + "sec1", + "sha2", + "signature", + "ssh-cipher", + "ssh-encoding", + "subtle", + "zeroize", ] [[package]] @@ -9613,7 +10099,7 @@ dependencies = [ "proc-macro2", "quote", "structmeta-derive 0.2.0", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -9625,7 +10111,7 @@ dependencies = [ "proc-macro2", "quote", "structmeta-derive 0.3.0", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -9636,7 +10122,7 @@ checksum = "a60bcaff7397072dca0017d1db428e30d5002e00b6847703e2e42005c95fbe00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -9647,7 +10133,7 @@ checksum = "152a0b65a590ff6c3da95cabe2353ee04e6167c896b28e3b14478c2636c922fc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -9682,7 +10168,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -9695,7 +10181,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -9742,9 +10228,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.72" +version = "2.0.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af" +checksum = "1fceb41e3d546d0bd83421d3409b1460cc7444cd389341a4c880fe7a042cb3d7" dependencies = [ "proc-macro2", "quote", @@ -9918,7 +10404,7 @@ dependencies = [ "proc-macro2", "quote", "structmeta 0.2.0", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -9949,7 +10435,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -10086,7 +10572,7 @@ checksum = "8d9ef545650e79f30233c0003bcc2504d7efac6dad25fca40744de773fe2049c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -10139,7 +10625,7 @@ dependencies = [ "backtrace", "bytes", "libc", - "mio", + "mio 0.8.11", "num_cpus", "parking_lot 0.12.2", "pin-project-lite", @@ -10157,7 +10643,7 @@ checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -10179,7 +10665,7 @@ dependencies = [ "async-trait", "byteorder", "bytes", - "fallible-iterator", + "fallible-iterator 0.2.0", "futures-channel", "futures-util", "log", @@ -10189,7 +10675,7 @@ dependencies = [ "pin-project-lite", "postgres-protocol", "postgres-types", - "rand 0.8.5", + "rand", "socket2 0.5.7", "tokio", "tokio-util", @@ -10265,15 +10751,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "toml" -version = "0.5.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" -dependencies = [ - "serde", -] - [[package]] name = "toml" version = "0.7.8" @@ -10313,20 +10790,31 @@ version = "0.19.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" dependencies = [ - "indexmap 2.3.0", + "indexmap 2.4.0", "serde", "serde_spanned", "toml_datetime", "winnow 0.5.40", ] +[[package]] +name = "toml_edit" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8534fd7f78b5405e860340ad6575217ce99f38d4d5c8f2442cb5ecb50090e1" +dependencies = [ + "indexmap 2.4.0", + "toml_datetime", + "winnow 0.5.40", +] + [[package]] name = "toml_edit" version = "0.22.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "583c44c02ad26b0c3f3066fe629275e50627026c51ac2e595cca4c230ce1ce1d" dependencies = [ - "indexmap 2.3.0", + "indexmap 2.4.0", "serde", "serde_spanned", "toml_datetime", @@ -10434,7 +10922,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -10446,26 +10934,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "trust-dns-client" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c408c32e6a9dbb38037cece35740f2cf23c875d8ca134d33631cec83f74d3fe" -dependencies = [ - "cfg-if", - "data-encoding", - "futures-channel", - "futures-util", - "lazy_static", - "radix_trie", - "rand 0.8.5", - "thiserror", - "time", - "tokio", - "tracing", - "trust-dns-proto", -] - [[package]] name = "trust-dns-proto" version = "0.22.0" @@ -10475,64 +10943,21 @@ dependencies = [ "async-trait", "cfg-if", "data-encoding", - "enum-as-inner", + "enum-as-inner 0.5.1", "futures-channel", "futures-io", "futures-util", "idna 0.2.3", "ipnet", "lazy_static", - "rand 0.8.5", + "rand", "smallvec 1.13.2", "thiserror", "tinyvec", - "tokio", "tracing", "url", ] -[[package]] -name = "trust-dns-resolver" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aff21aa4dcefb0a1afbfac26deb0adc93888c7d295fb63ab273ef276ba2b7cfe" -dependencies = [ - "cfg-if", - "futures-util", - "ipconfig", - "lazy_static", - "lru-cache", - "parking_lot 0.12.2", - "resolv-conf", - "smallvec 1.13.2", - "thiserror", - "tokio", - "tracing", - "trust-dns-proto", -] - -[[package]] -name = "trust-dns-server" -version = "0.22.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99022f9befa6daec2a860be68ac28b1f0d9d7ccf441d8c5a695e35a58d88840d" -dependencies = [ - "async-trait", - "bytes", - "cfg-if", - "enum-as-inner", - "futures-executor", - "futures-util", - "serde", - "thiserror", - "time", - "tokio", - "toml 0.5.11", - "tracing", - "trust-dns-client", - "trust-dns-proto", -] - [[package]] name = "try-lock" version = "0.2.5" @@ -10601,7 +11026,7 @@ dependencies = [ "omicron-test-utils", "omicron-workspace-hack", "parse-size", - "rand 0.8.5", + "rand", "ring 0.17.8", "serde", "serde_json", @@ -10618,9 +11043,9 @@ dependencies = [ [[package]] name = "tui-tree-widget" -version = "0.21.0" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ac69db35529be6a75f9d27516ff33df299e2e8e961a1986d52185cef0427352" +checksum = "df0b54061d997162f225bed5d2147574af0648480214759a000e33f6cea0017a" dependencies = [ "ratatui", "unicode-width", @@ -10638,7 +11063,7 @@ dependencies = [ "http 0.2.12", "httparse", "log", - "rand 0.8.5", + "rand", "sha1", "thiserror", "url", @@ -10657,13 +11082,24 @@ dependencies = [ "http 1.1.0", "httparse", "log", - "rand 0.8.5", + "rand", "sha1", "thiserror", "url", "utf-8", ] +[[package]] +name = "twox-hash" +version = "1.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +dependencies = [ + "cfg-if", + "rand", + "static_assertions", +] + [[package]] name = "typed-path" version = "0.7.1" @@ -10676,8 +11112,8 @@ version = "0.1.0" dependencies = [ "newtype-uuid", "omicron-workspace-hack", - "rand 0.8.5", - "rand_core 0.6.4", + "rand", + "rand_core", "rand_seeder", "uuid", ] @@ -10711,7 +11147,7 @@ dependencies = [ "semver 1.0.23", "serde", "serde_json", - "syn 2.0.72", + "syn 2.0.74", "thiserror", "unicode-ident", ] @@ -10728,7 +11164,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.72", + "syn 2.0.74", "typify-impl", ] @@ -10861,7 +11297,7 @@ dependencies = [ "omicron-common", "omicron-test-utils", "omicron-workspace-hack", - "rand 0.8.5", + "rand", "sha2", "slog", "thiserror", @@ -10887,7 +11323,7 @@ dependencies = [ "derive-where", "either", "futures", - "indexmap 2.3.0", + "indexmap 2.4.0", "indicatif", "libsw", "linear-map", @@ -10945,7 +11381,7 @@ dependencies = [ "proc-macro2", "quote", "serde_tokenstream", - "syn 2.0.72", + "syn 2.0.74", "usdt-impl", ] @@ -10963,7 +11399,7 @@ dependencies = [ "quote", "serde", "serde_json", - "syn 2.0.72", + "syn 2.0.74", "thiserror", "thread-id", "version_check", @@ -10979,7 +11415,7 @@ dependencies = [ "proc-macro2", "quote", "serde_tokenstream", - "syn 2.0.72", + "syn 2.0.74", "usdt-impl", ] @@ -11001,7 +11437,7 @@ version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" dependencies = [ - "getrandom 0.2.14", + "getrandom", "serde", ] @@ -11021,6 +11457,22 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +[[package]] +name = "vergen" +version = "8.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2990d9ea5967266ea0ccf413a4aa5c42a93dbcfda9cb49a97de6931726b12566" +dependencies = [ + "anyhow", + "cargo_metadata", + "cfg-if", + "git2", + "regex", + "rustc_version 0.4.0", + "rustversion", + "time", +] + [[package]] name = "version_check" version = "0.9.5" @@ -11053,9 +11505,9 @@ dependencies = [ "curve25519-dalek", "elliptic-curve", "hex", - "rand 0.8.5", - "rand_chacha 0.3.1", - "rand_core 0.6.4", + "rand", + "rand_chacha", + "rand_core", "serde", "subtle", "thiserror-no-std", @@ -11119,12 +11571,6 @@ dependencies = [ "try-lock", ] -[[package]] -name = "wasi" -version = "0.9.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" - [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -11158,7 +11604,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", "wasm-bindgen-shared", ] @@ -11192,7 +11638,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -11265,11 +11711,11 @@ dependencies = [ "camino", "ciborium", "clap", - "crossterm", + "crossterm 0.28.1", "expectorate", "futures", "humantime", - "indexmap 2.3.0", + "indexmap 2.4.0", "indicatif", "itertools 0.13.0", "maplit", @@ -11339,7 +11785,7 @@ dependencies = [ "camino", "ciborium", "clap", - "crossterm", + "crossterm 0.28.1", "omicron-workspace-hack", "reedline", "serde", @@ -11378,6 +11824,7 @@ dependencies = [ "gateway-messages", "gateway-test-utils", "hex", + "hickory-resolver", "http 0.2.12", "hubtools", "hyper 0.14.30", @@ -11400,7 +11847,7 @@ dependencies = [ "openapi-lint", "openapiv3", "oxnet", - "rand 0.8.5", + "rand", "reqwest", "schemars", "serde", @@ -11417,7 +11864,6 @@ dependencies = [ "tokio-util", "toml 0.8.19", "tough", - "trust-dns-resolver", "tufaceous", "tufaceous-lib", "update-common", @@ -11513,7 +11959,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" dependencies = [ "windows-core", - "windows-targets 0.52.5", + "windows-targets 0.52.6", ] [[package]] @@ -11522,7 +11968,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.52.5", + "windows-targets 0.52.6", ] [[package]] @@ -11540,7 +11986,16 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.5", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", ] [[package]] @@ -11560,18 +12015,18 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm 0.52.5", - "windows_aarch64_msvc 0.52.5", - "windows_i686_gnu 0.52.5", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", "windows_i686_gnullvm", - "windows_i686_msvc 0.52.5", - "windows_x86_64_gnu 0.52.5", - "windows_x86_64_gnullvm 0.52.5", - "windows_x86_64_msvc 0.52.5", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", ] [[package]] @@ -11582,9 +12037,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" @@ -11594,9 +12049,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" @@ -11606,15 +12061,15 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" @@ -11624,9 +12079,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" @@ -11636,9 +12091,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" @@ -11648,9 +12103,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" @@ -11660,9 +12115,9 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" @@ -11774,8 +12229,6 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e17bb3549cc1321ae1296b9cdc2698e2b6cb1992adfa19a8c72e5b7a738f44cd" dependencies = [ - "bit-vec", - "num-bigint", "time", ] @@ -11807,7 +12260,7 @@ checksum = "125139de3f6b9d625c39e2efdd73d41bdac468ccd556556440e322be0e1bbd91" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -11818,7 +12271,7 @@ checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] @@ -11838,7 +12291,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.74", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 536941a72d..cfb097ef3c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,8 @@ members = [ "api_identity", "bootstore", "certificates", + "clickhouse-admin", + "clickhouse-admin/api", "clients/bootstrap-agent-client", "clients/cockroach-admin-client", "clients/ddm-admin-client", @@ -29,6 +31,7 @@ members = [ "dev-tools/omicron-dev", "dev-tools/omicron-dev-lib", "dev-tools/openapi-manager", + "dev-tools/openapi-manager/types", "dev-tools/oxlog", "dev-tools/reconfigurator-cli", "dev-tools/releng", @@ -75,16 +78,20 @@ members = [ "oximeter/api", "oximeter/collector", "oximeter/db", - "oximeter/impl", "oximeter/instruments", "oximeter/oximeter-macro-impl", "oximeter/oximeter", + "oximeter/oxql-types", "oximeter/producer", + "oximeter/schema", + "oximeter/test-utils", "oximeter/timeseries-macro", + "oximeter/types", "package", "passwords", "rpaths", "sled-agent", + "sled-agent/api", "sled-agent/bootstrap-agent-api", "sled-agent/types", "sled-hardware", @@ -111,6 +118,8 @@ default-members = [ "api_identity", "bootstore", "certificates", + "clickhouse-admin", + "clickhouse-admin/api", "clients/bootstrap-agent-client", "clients/cockroach-admin-client", "clients/ddm-admin-client", @@ -137,6 +146,7 @@ default-members = [ "dev-tools/omicron-dev", "dev-tools/omicron-dev-lib", "dev-tools/openapi-manager", + "dev-tools/openapi-manager/types", "dev-tools/oxlog", "dev-tools/reconfigurator-cli", "dev-tools/releng", @@ -186,16 +196,20 @@ default-members = [ "oximeter/api", "oximeter/collector", "oximeter/db", - "oximeter/impl", "oximeter/instruments", "oximeter/oximeter-macro-impl", "oximeter/oximeter", + "oximeter/oxql-types", "oximeter/producer", + "oximeter/schema", + "oximeter/test-utils", "oximeter/timeseries-macro", + "oximeter/types", "package", "passwords", "rpaths", "sled-agent", + "sled-agent/api", "sled-agent/bootstrap-agent-api", "sled-agent/types", "sled-hardware", @@ -267,7 +281,7 @@ anstyle = "1.0.8" api_identity = { path = "api_identity" } approx = "0.5.1" assert_matches = "1.5.0" -assert_cmd = "2.0.15" +assert_cmd = "2.0.16" async-bb8-diesel = { git = "https://github.com/oxidecomputer/async-bb8-diesel", rev = "ed7ab5ef0513ba303d33efd41d3e9e381169d59b" } async-trait = "0.1.81" atomicwrites = "0.4.3" @@ -292,6 +306,7 @@ cfg-if = "1.0" chrono = { version = "0.4", features = [ "serde" ] } ciborium = "0.2.2" clap = { version = "4.5", features = ["cargo", "derive", "env", "wrap_help"] } +clickhouse-admin-api = { path = "clickhouse-admin/api" } clickward = { git = "https://github.com/oxidecomputer/clickward", rev = "ceec762e6a87d2a22bf56792a3025e145caa095e" } cockroach-admin-api = { path = "cockroach-admin/api" } cockroach-admin-client = { path = "clients/cockroach-admin-client" } @@ -301,10 +316,11 @@ const_format = "0.2.32" cookie = "0.18" criterion = { version = "0.5.1", features = [ "async_tokio" ] } crossbeam = "0.8" -crossterm = { version = "0.27.0", features = ["event-stream"] } +crossterm = { version = "0.28.1", features = ["event-stream"] } crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "e58ca3693cb9ce0438947beba10e97ee38a0966b" } crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "e58ca3693cb9ce0438947beba10e97ee38a0966b" } crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "e58ca3693cb9ce0438947beba10e97ee38a0966b" } +crucible-common = { git = "https://github.com/oxidecomputer/crucible", rev = "e58ca3693cb9ce0438947beba10e97ee38a0966b" } csv = "1.3.0" curve25519-dalek = "4" datatest-stable = "0.2.9" @@ -326,7 +342,7 @@ dyn-clone = "1.0.17" either = "1.13.0" expectorate = "1.1.0" fatfs = "0.3.6" -filetime = "0.2.23" +filetime = "0.2.24" flate2 = "1.0.31" float-ord = "0.3.2" flume = "0.11.0" @@ -354,6 +370,10 @@ headers = "0.3.9" heck = "0.5" hex = "0.4.3" hex-literal = "0.4.1" +hickory-client = "0.24.1" +hickory-proto = "0.24.1" +hickory-resolver = "0.24.1" +hickory-server = "0.24.1" highway = "1.2.0" hkdf = "0.12.4" http = "0.2.12" @@ -365,7 +385,7 @@ hyper-rustls = "0.26.0" hyper-staticfile = "0.9.5" illumos-utils = { path = "illumos-utils" } indent_write = "2.2.0" -indexmap = "2.3.0" +indexmap = "2.4.0" indicatif = { version = "0.17.8", features = ["rayon"] } installinator = { path = "installinator" } installinator-api = { path = "installinator-api" } @@ -380,7 +400,7 @@ ipnetwork = { version = "0.20", features = ["schemars"] } ispf = { git = "https://github.com/oxidecomputer/ispf" } key-manager = { path = "key-manager" } kstat-rs = "0.2.4" -libc = "0.2.155" +libc = "0.2.156" libfalcon = { git = "https://github.com/oxidecomputer/falcon", rev = "e69694a1f7cc9fe31fab27f321017280531fb5f7" } libnvme = { git = "https://github.com/oxidecomputer/libnvme", rev = "dd5bb221d327a1bc9287961718c3c10d6bd37da0" } linear-map = "1.2.0" @@ -388,8 +408,8 @@ macaddr = { version = "1.0.1", features = ["serde_std"] } maplit = "1.0.2" mockall = "0.13" newtype_derive = "0.1.6" -mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "220dd026e83142b83bd93123f465a64dd4600201" } -ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "220dd026e83142b83bd93123f465a64dd4600201" } +mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42" } +ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42" } multimap = "0.10.0" nexus-auth = { path = "nexus/auth" } nexus-client = { path = "clients/nexus-client" } @@ -414,6 +434,7 @@ nexus-test-utils = { path = "nexus/test-utils" } nexus-types = { path = "nexus/types" } num-integer = "0.1.46" num = { version = "0.4.3", default-features = false, features = [ "libm" ] } +omicron-clickhouse-admin = { path = "clickhouse-admin" } omicron-certificates = { path = "certificates" } omicron-cockroach-admin = { path = "cockroach-admin" } omicron-common = { path = "common" } @@ -434,6 +455,7 @@ oxlog = { path = "dev-tools/oxlog" } oxnet = { git = "https://github.com/oxidecomputer/oxnet" } once_cell = "1.19.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } +openapi-manager-types = { path = "dev-tools/openapi-manager/types" } openapiv3 = "2.0.0" # must match samael's crate! openssl = "0.10" @@ -446,13 +468,16 @@ oximeter-api = { path = "oximeter/api" } oximeter-client = { path = "clients/oximeter-client" } oximeter-db = { path = "oximeter/db/", default-features = false } oximeter-collector = { path = "oximeter/collector" } -oximeter-impl = { path = "oximeter/impl" } oximeter-instruments = { path = "oximeter/instruments" } oximeter-macro-impl = { path = "oximeter/oximeter-macro-impl" } oximeter-producer = { path = "oximeter/producer" } +oximeter-schema = { path = "oximeter/schema" } +oximeter-test-utils = { path = "oximeter/test-utils" } oximeter-timeseries-macro = { path = "oximeter/timeseries-macro" } +oximeter-types = { path = "oximeter/types" } +oxql-types = { path = "oximeter/oxql-types" } p256 = "0.13" -parse-display = "0.9.1" +parse-display = "0.10.0" partial-io = { version = "0.5.4", features = ["proptest1", "tokio1"] } parse-size = "1.0.0" paste = "1.0.15" @@ -477,17 +502,17 @@ rand = "0.8.5" rand_core = "0.6.4" rand_distr = "0.4.3" rand_seeder = "0.3.0" -ratatui = "0.27.0" +ratatui = "0.28.0" rayon = "1.10" rcgen = "0.12.1" -reedline = "0.31.0" +reedline = "0.33.0" ref-cast = "1.0" regex = "1.10.6" regress = "0.9.1" reqwest = { version = "0.11", default-features = false } ring = "0.17.8" rpassword = "7.3.1" -rstest = "0.19.0" +rstest = "0.22.0" rustfmt-wrapper = "0.2" rustls = "0.22.2" rustls-pemfile = "2.1.3" @@ -498,18 +523,18 @@ secrecy = "0.8.0" semver = { version = "1.0.23", features = ["std", "serde"] } serde = { version = "1.0", default-features = false, features = [ "derive", "rc" ] } serde_human_bytes = { git = "https://github.com/oxidecomputer/serde_human_bytes", branch = "main" } -serde_json = "1.0.122" +serde_json = "1.0.125" serde_path_to_error = "0.1.16" serde_tokenstream = "0.2" serde_urlencoded = "0.7.1" -serde_with = "3.8.3" +serde_with = "3.9.0" sha2 = "0.10.8" sha3 = "0.10.8" shell-words = "1.1.0" signal-hook = "0.3" signal-hook-tokio = { version = "0.3", features = [ "futures-v0_3" ] } sigpipe = "0.1.3" -similar = { version = "2.5.0", features = ["bytes"] } +similar = { version = "2.6.0", features = ["bytes"] } similar-asserts = "1.5.0" # Don't change sled's version on accident; sled's on-disk format is not yet # stable and requires manual migrations. In the limit this won't matter because @@ -517,6 +542,7 @@ similar-asserts = "1.5.0" # are still doing mupdate a change to the on-disk format will break existing DNS # server zones. sled = "=0.34.7" +sled-agent-api = { path = "sled-agent/api" } sled-agent-client = { path = "clients/sled-agent-client" } sled-agent-types = { path = "sled-agent/types" } sled-hardware = { path = "sled-hardware" } @@ -562,14 +588,10 @@ tokio-util = { version = "0.7.11", features = ["io", "io-util"] } toml = "0.8.19" toml_edit = "0.22.20" tough = { version = "0.17.1", features = [ "http" ] } -trust-dns-client = "0.22" -trust-dns-proto = "0.22" -trust-dns-resolver = "0.22" -trust-dns-server = "0.22" trybuild = "1.0.99" tufaceous = { path = "tufaceous" } tufaceous-lib = { path = "tufaceous-lib" } -tui-tree-widget = "0.21.0" +tui-tree-widget = "0.22.0" typed-rng = { path = "typed-rng" } unicode-width = "0.1.13" update-common = { path = "update-common" } @@ -724,8 +746,6 @@ opt-level = 3 opt-level = 3 [profile.dev.package.rand_core] opt-level = 3 -[profile.dev.package.rand_hc] -opt-level = 3 [profile.dev.package.rand_xorshift] opt-level = 3 [profile.dev.package.rsa] diff --git a/README.adoc b/README.adoc index 6b24821c6e..d48a5c9736 100644 --- a/README.adoc +++ b/README.adoc @@ -14,7 +14,7 @@ Omicron is open-source. But we're pretty focused on our own goals for the forese https://docs.oxide.computer/api[Docs are automatically generated for the public (externally-facing) API] based on the OpenAPI spec that itself is automatically generated from the server implementation. You can generate your own docs for either the public API or any of the internal APIs by feeding the corresponding OpenAPI specs (in link:./openapi[]) into an OpenAPI doc generator. -There are some internal design docs in the link:./docs[] directory. +There are some internal design docs in the link:./docs[] directory. You might start with link:./docs/control-plane-architecture.adoc[]. For more design documentation and internal Rust API docs, see the https://rust.docs.corp.oxide.computer/omicron/[generated Rust documentation]. You can generate this yourself with: @@ -223,7 +223,7 @@ Note that Omicron contains a nominally circular dependency: We effectively "break" this circular dependency by virtue of the OpenAPI documents being checked in. -==== Updating Managed Services +==== Updating or Creating New Managed Services See the documentation in link:./dev-tools/openapi-manager[`dev-tools/openapi-manager`] for more information. diff --git a/TODO.adoc b/TODO.adoc deleted file mode 100644 index 40c38e14b3..0000000000 --- a/TODO.adoc +++ /dev/null @@ -1,113 +0,0 @@ -:showtitle: -:icons: font - -= TODO - -API endpoints: - -* RFD 24: regions, AZs, etc -* (lots more) - -Work queue (see also: existing GitHub issues): - -* use CARGO_BIN_EXE for paths to binaries -https://doc.rust-lang.org/cargo/reference/environment-variables.html#environment-variables-cargo-sets-for-crates -* dropshot: allow consumers to provide error codes for dropshot errors -* general maintenance and cleanup -** replace &Arc with &T, and some instances of Arc as well -** all identifiers could be newtypes, with a prefix for the type (like AWS - "i-123" for instances) -** rethinking ApiError a bit -- should it use thiserror, or at least impl - std::error::Error? -** scope out switching to sync (see RFD 79) -** proper config + logging for sled agent -* settle on an approach for modification of resources and implement it once -* implement behavior of server restarting (e.g., sled agent starting up) -** This would help validate some of the architectural choices. Current thinking - is that this will notify OXCP of the restart, and OXCP will find instances - that are supposed to be on that server and run instance_ensure(). It will - also want to do that for the disks associated with those instances. - IMPORTANT: this process should also _remove_ any resources that are currently - on that system, so the notification to OXCP about a restart may need to - include the list of resources that the SA knows about and their current - states. -* implement audit log -* implement alerts -* implement external user authentication -* implement external user authorization mechanism -* implement throttling and load shedding described in RFD 6 -* implement hardening in RFD 10 -* implement ETag / If-Match / If-None-Match -* implement limits for all types of resources -* implement scheme for API versioning -** how to identify the requested version -- header or URI? -** translators for older versions? -** integration of supported API versions into build artifact? -** Should all the uses of serde_json disallow unrecognized fields? Should any? -* debugging/monitoring: Prometheus? -* debugging/monitoring: OpenTracing? OpenTelemetry? -* debugging/monitoring: Dynamic tracing? -* debugging/monitoring: Core files? -* Automated testing -** General API testing: there's a lot of boilerplate in hand-generated tests - for each kind of resource. Would it be reasonable / possible to have a sort - of omnibus test that's given the OpenAPI spec (or something like it), - creates a hierarchy with at least one of every possible resource, and does - things like: For each possible resource -*** attempt to (create, get, put, delete) one with an invalid name -*** attempt to (GET, DELETE, PUT) one that does not exist -*** attempt to create one with invalid JSON -*** attempt to create one with a duplicate name of the one we know about -*** exercise list operation with marker and limit (may need to create many of them) -*** for each required input property: -**** attempt to create a resource without that property -*** for each input property: attempt to create a resource with invalid values - for that property -*** list instances of that resource and expect to find the one we know about -*** GET the one instance we know about -*** DELETE the one instance we know about -*** GET the one instance we know about again and expect it to fail -*** list instances again and expect to find nothing -* We will need archivers for deleted records -- especially saga logs - -External dependencies / open questions: - -* Should we create a more first-class notion of objects in the API? -** This would be a good way to enforce built-in limits. -** This would be a good way to enforce uniformity of pagination. -** If each resource provides a way to construct ETags, we could provide - automatic implementation of If-Match, etc. -** With the right interface, we could provide automatic implementations of PUT - or PATCH with JSON Merge Patch and JSON Patch given any one of these. -* would like to require that servers have unique, immutable uuids -* TLS: -** How will we do TLS termination? -** How will we manage server certificates? -** How will we manage client certificates? -* what does bootstrapping / key management look like? -* what does internal authorization look like? - -Other activities: - -* Performance testing -* Stress testing -* Fault testing / under load -* Fuzz testing -* Security review - -Nice-to-haves: - -* API consistency checks: e.g., camel case every where - -Things we're going to want to build once: - -* metric export -* structured event reporting (e.g., audit log, alert log, fault log) -* opentracing-type reporting -* client-side circuit breakers -* service discovery -* client connection pooling -* server-side throttling -* command-line utilities - -Check out linkerd (for inspiration -- it looks K8s-specific) diff --git a/clickhouse-admin/Cargo.toml b/clickhouse-admin/Cargo.toml new file mode 100644 index 0000000000..033836dfe0 --- /dev/null +++ b/clickhouse-admin/Cargo.toml @@ -0,0 +1,42 @@ +[package] +name = "omicron-clickhouse-admin" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[dependencies] +anyhow.workspace = true +camino.workspace = true +chrono.workspace = true +clap.workspace = true +clickhouse-admin-api.workspace = true +dropshot.workspace = true +http.workspace = true +illumos-utils.workspace = true +omicron-common.workspace = true +omicron-uuid-kinds.workspace = true +schemars.workspace = true +slog.workspace = true +slog-async.workspace = true +slog-dtrace.workspace = true +slog-error-chain.workspace = true +serde.workspace = true +thiserror.workspace = true +tokio.workspace = true +tokio-postgres.workspace = true +toml.workspace = true + +omicron-workspace-hack.workspace = true + +[dev-dependencies] +expectorate.workspace = true +nexus-test-utils.workspace = true +omicron-test-utils.workspace = true +openapi-lint.workspace = true +openapiv3.workspace = true +serde_json.workspace = true +subprocess.workspace = true +url.workspace = true + +[lints] +workspace = true diff --git a/clickhouse-admin/api/Cargo.toml b/clickhouse-admin/api/Cargo.toml new file mode 100644 index 0000000000..ceec09f6c8 --- /dev/null +++ b/clickhouse-admin/api/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "clickhouse-admin-api" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +dropshot.workspace = true +omicron-common.workspace = true +omicron-uuid-kinds.workspace = true +omicron-workspace-hack.workspace = true +schemars.workspace = true +serde.workspace = true diff --git a/clickhouse-admin/api/src/lib.rs b/clickhouse-admin/api/src/lib.rs new file mode 100644 index 0000000000..9a011d4387 --- /dev/null +++ b/clickhouse-admin/api/src/lib.rs @@ -0,0 +1,28 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use dropshot::{HttpError, HttpResponseOk, RequestContext}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use std::net::SocketAddrV6; + +#[dropshot::api_description] +pub trait ClickhouseAdminApi { + type Context; + + /// Retrieve the address the ClickHouse server or keeper node is listening on + #[endpoint { + method = GET, + path = "/node/address", + }] + async fn clickhouse_address( + rqctx: RequestContext, + ) -> Result, HttpError>; +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub struct ClickhouseAddress { + pub clickhouse_address: SocketAddrV6, +} diff --git a/clickhouse-admin/src/bin/clickhouse-admin.rs b/clickhouse-admin/src/bin/clickhouse-admin.rs new file mode 100644 index 0000000000..6f28a82804 --- /dev/null +++ b/clickhouse-admin/src/bin/clickhouse-admin.rs @@ -0,0 +1,68 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Executable program to run the Omicron ClickHouse admin interface + +use anyhow::anyhow; +use camino::Utf8PathBuf; +use clap::Parser; +use omicron_clickhouse_admin::{Clickward, Config}; +use omicron_common::cmd::fatal; +use omicron_common::cmd::CmdError; +use std::net::{SocketAddr, SocketAddrV6}; + +#[derive(Debug, Parser)] +#[clap( + name = "clickhouse-admin", + about = "Omicron ClickHouse cluster admin server" +)] +enum Args { + /// Start the ClickHouse admin server + Run { + // TODO: This address is solely for testing now. We should remove it + // once we have more endpoints up and running. + /// Socket address for a running clickhouse server or keeper instance + #[clap(long, short = 'a', action)] + clickhouse_address: SocketAddrV6, + + /// Address on which this server should run + #[clap(long, short = 'H', action)] + http_address: SocketAddrV6, + + /// Path to the server configuration file + #[clap(long, short, action)] + config: Utf8PathBuf, + }, +} + +#[tokio::main] +async fn main() { + if let Err(err) = main_impl().await { + fatal(err); + } +} + +async fn main_impl() -> Result<(), CmdError> { + let args = Args::parse(); + + match args { + Args::Run { clickhouse_address, http_address, config } => { + let mut config = Config::from_file(&config) + .map_err(|err| CmdError::Failure(anyhow!(err)))?; + config.dropshot.bind_address = SocketAddr::V6(http_address); + + let clickward = Clickward::new(clickhouse_address); + + let server = + omicron_clickhouse_admin::start_server(clickward, config) + .await + .map_err(|err| CmdError::Failure(anyhow!(err)))?; + server.await.map_err(|err| { + CmdError::Failure(anyhow!( + "server failed after starting: {err}" + )) + }) + } + } +} diff --git a/clickhouse-admin/src/clickward.rs b/clickhouse-admin/src/clickward.rs new file mode 100644 index 0000000000..114201e44b --- /dev/null +++ b/clickhouse-admin/src/clickward.rs @@ -0,0 +1,51 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use clickhouse_admin_api::ClickhouseAddress; +use dropshot::HttpError; +use slog_error_chain::{InlineErrorChain, SlogInlineError}; +use std::io; +use std::net::SocketAddrV6; + +#[derive(Debug, thiserror::Error, SlogInlineError)] +pub enum ClickwardError { + #[error("clickward failure")] + Failure { + #[source] + err: io::Error, + }, +} + +impl From for HttpError { + fn from(err: ClickwardError) -> Self { + match err { + ClickwardError::Failure { .. } => { + let message = InlineErrorChain::new(&err).to_string(); + HttpError { + status_code: http::StatusCode::INTERNAL_SERVER_ERROR, + error_code: Some(String::from("Internal")), + external_message: message.clone(), + internal_message: message, + } + } + } + } +} + +#[derive(Debug)] +pub struct Clickward { + clickhouse_address: SocketAddrV6, +} + +impl Clickward { + pub fn new(clickhouse_address: SocketAddrV6) -> Self { + Self { clickhouse_address } + } + + pub fn clickhouse_address( + &self, + ) -> Result { + Ok(ClickhouseAddress { clickhouse_address: self.clickhouse_address }) + } +} diff --git a/clickhouse-admin/src/config.rs b/clickhouse-admin/src/config.rs new file mode 100644 index 0000000000..77a624835c --- /dev/null +++ b/clickhouse-admin/src/config.rs @@ -0,0 +1,43 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use camino::Utf8Path; +use camino::Utf8PathBuf; +use dropshot::ConfigDropshot; +use dropshot::ConfigLogging; +use serde::Deserialize; +use serde::Serialize; +use slog_error_chain::SlogInlineError; +use std::io; + +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub struct Config { + pub dropshot: ConfigDropshot, + pub log: ConfigLogging, +} +impl Config { + /// Load a `Config` from the given TOML file + pub fn from_file(path: &Utf8Path) -> Result { + let contents = std::fs::read_to_string(path) + .map_err(|err| LoadError::Read { path: path.to_owned(), err })?; + toml::de::from_str(&contents) + .map_err(|err| LoadError::Parse { path: path.to_owned(), err }) + } +} + +#[derive(Debug, thiserror::Error, SlogInlineError)] +pub enum LoadError { + #[error("failed to read {path}")] + Read { + path: Utf8PathBuf, + #[source] + err: io::Error, + }, + #[error("failed to parse {path} as TOML")] + Parse { + path: Utf8PathBuf, + #[source] + err: toml::de::Error, + }, +} diff --git a/clickhouse-admin/src/context.rs b/clickhouse-admin/src/context.rs new file mode 100644 index 0000000000..cab875fe1d --- /dev/null +++ b/clickhouse-admin/src/context.rs @@ -0,0 +1,21 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::Clickward; +use slog::Logger; + +pub struct ServerContext { + clickward: Clickward, + _log: Logger, +} + +impl ServerContext { + pub fn new(clickward: Clickward, _log: Logger) -> Self { + Self { clickward, _log } + } + + pub fn clickward(&self) -> &Clickward { + &self.clickward + } +} diff --git a/clickhouse-admin/src/http_entrypoints.rs b/clickhouse-admin/src/http_entrypoints.rs new file mode 100644 index 0000000000..05988a73b0 --- /dev/null +++ b/clickhouse-admin/src/http_entrypoints.rs @@ -0,0 +1,31 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::context::ServerContext; +use clickhouse_admin_api::*; +use dropshot::HttpError; +use dropshot::HttpResponseOk; +use dropshot::RequestContext; +use std::sync::Arc; + +type ClickhouseApiDescription = dropshot::ApiDescription>; + +pub fn api() -> ClickhouseApiDescription { + clickhouse_admin_api_mod::api_description::() + .expect("registered entrypoints") +} + +enum ClickhouseAdminImpl {} + +impl ClickhouseAdminApi for ClickhouseAdminImpl { + type Context = Arc; + + async fn clickhouse_address( + rqctx: RequestContext, + ) -> Result, HttpError> { + let ctx = rqctx.context(); + let output = ctx.clickward().clickhouse_address()?; + Ok(HttpResponseOk(output)) + } +} diff --git a/clickhouse-admin/src/lib.rs b/clickhouse-admin/src/lib.rs new file mode 100644 index 0000000000..a48588c544 --- /dev/null +++ b/clickhouse-admin/src/lib.rs @@ -0,0 +1,70 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use context::ServerContext; +use omicron_common::FileKv; +use slog::{debug, error, Drain}; +use slog_dtrace::ProbeRegistration; +use slog_error_chain::SlogInlineError; +use std::error::Error; +use std::io; +use std::sync::Arc; + +mod clickward; +mod config; +mod context; +mod http_entrypoints; + +pub use clickward::Clickward; +pub use config::Config; + +#[derive(Debug, thiserror::Error, SlogInlineError)] +pub enum StartError { + #[error("failed to initialize logger")] + InitializeLogger(#[source] io::Error), + #[error("failed to register dtrace probes: {0}")] + RegisterDtraceProbes(String), + #[error("failed to initialize HTTP server")] + InitializeHttpServer(#[source] Box), +} + +pub type Server = dropshot::HttpServer>; + +/// Start the dropshot server +pub async fn start_server( + clickward: Clickward, + server_config: Config, +) -> Result { + let (drain, registration) = slog_dtrace::with_drain( + server_config + .log + .to_logger("clickhouse-admin") + .map_err(StartError::InitializeLogger)?, + ); + let log = slog::Logger::root(drain.fuse(), slog::o!(FileKv)); + match registration { + ProbeRegistration::Success => { + debug!(log, "registered DTrace probes"); + } + ProbeRegistration::Failed(err) => { + let err = StartError::RegisterDtraceProbes(err); + error!(log, "failed to register DTrace probes"; &err); + return Err(err); + } + } + + let context = ServerContext::new( + clickward, + log.new(slog::o!("component" => "ServerContext")), + ); + let http_server_starter = dropshot::HttpServerStarter::new( + &server_config.dropshot, + http_entrypoints::api(), + Arc::new(context), + &log.new(slog::o!("component" => "dropshot")), + ) + .map_err(StartError::InitializeHttpServer)?; + + Ok(http_server_starter.start()) +} diff --git a/clients/installinator-client/Cargo.toml b/clients/installinator-client/Cargo.toml index ca2de0476a..ba869d79bd 100644 --- a/clients/installinator-client/Cargo.toml +++ b/clients/installinator-client/Cargo.toml @@ -9,6 +9,7 @@ workspace = true [dependencies] installinator-common.workspace = true +omicron-common.workspace = true progenitor.workspace = true regress.workspace = true reqwest = { workspace = true, features = ["rustls-tls", "stream"] } diff --git a/clients/installinator-client/src/lib.rs b/clients/installinator-client/src/lib.rs index a39ff3ff80..3b7abc333b 100644 --- a/clients/installinator-client/src/lib.rs +++ b/clients/installinator-client/src/lib.rs @@ -21,7 +21,7 @@ progenitor::generate_api!( replace = { Duration = std::time::Duration, EventReportForInstallinatorSpec = installinator_common::EventReport, - M2Slot = installinator_common::M2Slot, + M2Slot = omicron_common::disk::M2Slot, ProgressEventForGenericSpec = installinator_common::ProgressEvent, ProgressEventForInstallinatorSpec = installinator_common::ProgressEvent, StepEventForGenericSpec = installinator_common::StepEvent, diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs index b7722144fe..62366c45e1 100644 --- a/clients/nexus-client/src/lib.rs +++ b/clients/nexus-client/src/lib.rs @@ -42,6 +42,7 @@ progenitor::generate_api!( OmicronPhysicalDisksConfig = nexus_types::disk::OmicronPhysicalDisksConfig, RecoverySiloConfig = nexus_sled_agent_shared::recovery_silo::RecoverySiloConfig, TypedUuidForCollectionKind = omicron_uuid_kinds::CollectionUuid, + TypedUuidForDemoSagaKind = omicron_uuid_kinds::DemoSagaUuid, TypedUuidForDownstairsKind = omicron_uuid_kinds::TypedUuid, TypedUuidForPropolisKind = omicron_uuid_kinds::TypedUuid, TypedUuidForSledKind = omicron_uuid_kinds::TypedUuid, diff --git a/clients/oxide-client/Cargo.toml b/clients/oxide-client/Cargo.toml index f2adcacb1b..183640946f 100644 --- a/clients/oxide-client/Cargo.toml +++ b/clients/oxide-client/Cargo.toml @@ -12,6 +12,7 @@ anyhow.workspace = true base64.workspace = true chrono.workspace = true futures.workspace = true +hickory-resolver.workspace = true http.workspace = true hyper.workspace = true progenitor.workspace = true @@ -22,6 +23,5 @@ serde.workspace = true serde_json.workspace = true thiserror.workspace = true tokio = { workspace = true, features = [ "net" ] } -trust-dns-resolver.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/clients/oxide-client/src/lib.rs b/clients/oxide-client/src/lib.rs index 07a190c38e..249ea18146 100644 --- a/clients/oxide-client/src/lib.rs +++ b/clients/oxide-client/src/lib.rs @@ -7,13 +7,13 @@ use anyhow::anyhow; use anyhow::Context; use futures::FutureExt; +use hickory_resolver::config::{ + NameServerConfig, Protocol, ResolverConfig, ResolverOpts, +}; +use hickory_resolver::TokioAsyncResolver; use std::net::SocketAddr; use std::sync::Arc; use thiserror::Error; -use trust_dns_resolver::config::{ - NameServerConfig, Protocol, ResolverConfig, ResolverOpts, -}; -use trust_dns_resolver::TokioAsyncResolver; progenitor::generate_api!( spec = "../../openapi/nexus.json", @@ -46,14 +46,15 @@ impl CustomDnsResolver { socket_addr: dns_addr, protocol: Protocol::Udp, tls_dns_name: None, - trust_nx_responses: false, + trust_negative_responses: false, bind_addr: None, }); + let mut resolver_opts = ResolverOpts::default(); + // Enable edns for potentially larger records + resolver_opts.edns0 = true; - let resolver = Arc::new( - TokioAsyncResolver::tokio(resolver_config, ResolverOpts::default()) - .context("failed to create resolver")?, - ); + let resolver = + Arc::new(TokioAsyncResolver::tokio(resolver_config, resolver_opts)); Ok(CustomDnsResolver { dns_addr, resolver }) } diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index d80e56d16c..b725ac3df5 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -29,6 +29,7 @@ progenitor::generate_api!( BfdPeerConfig = { derives = [Eq, Hash] }, BgpConfig = { derives = [Eq, Hash] }, BgpPeerConfig = { derives = [Eq, Hash] }, + LldpPortConfig = { derives = [Eq, Hash, PartialOrd, Ord] }, OmicronPhysicalDiskConfig = { derives = [Eq, Hash, PartialOrd, Ord] }, PortConfigV2 = { derives = [Eq, Hash] }, RouteConfig = { derives = [Eq, Hash] }, diff --git a/common/src/address.rs b/common/src/address.rs index 5ed5689289..c23f5c41ed 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -8,6 +8,7 @@ //! and Nexus, who need to agree upon addressing schemes. use crate::api::external::{self, Error}; +use crate::policy::{DNS_REDUNDANCY, MAX_DNS_REDUNDANCY}; use ipnetwork::Ipv6Network; use once_cell::sync::Lazy; use oxnet::{Ipv4Net, Ipv6Net}; @@ -25,31 +26,6 @@ pub const MAX_PORT: u16 = u16::MAX; /// minimum possible value for a tcp or udp port pub const MIN_PORT: u16 = u16::MIN; -/// The amount of redundancy for boundary NTP servers. -pub const BOUNDARY_NTP_REDUNDANCY: usize = 2; - -/// The amount of redundancy for Nexus services. -/// -/// This is used by both RSS (to distribute the initial set of services) and the -/// Reconfigurator (to know whether to add new Nexus zones) -pub const NEXUS_REDUNDANCY: usize = 3; - -/// The amount of redundancy for CockroachDb services. -/// -/// This is used by both RSS (to distribute the initial set of services) and the -/// Reconfigurator (to know whether to add new crdb zones) -pub const COCKROACHDB_REDUNDANCY: usize = 5; - -/// The amount of redundancy for internal DNS servers. -/// -/// Must be less than or equal to MAX_DNS_REDUNDANCY. -pub const DNS_REDUNDANCY: usize = 3; - -/// The maximum amount of redundancy for DNS servers. -/// -/// This determines the number of addresses which are reserved for DNS servers. -pub const MAX_DNS_REDUNDANCY: usize = 5; - pub const DNS_PORT: u16 = 53; pub const DNS_HTTP_PORT: u16 = 5353; pub const SLED_AGENT_PORT: u16 = 12345; @@ -59,6 +35,7 @@ pub const COCKROACH_ADMIN_PORT: u16 = 32222; pub const CRUCIBLE_PORT: u16 = 32345; pub const CLICKHOUSE_PORT: u16 = 8123; pub const CLICKHOUSE_KEEPER_PORT: u16 = 9181; +pub const CLICKHOUSE_ADMIN_PORT: u16 = 8888; pub const OXIMETER_PORT: u16 = 12223; pub const DENDRITE_PORT: u16 = 12224; pub const LLDP_PORT: u16 = 12230; diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index c7421aa5ee..07e4fd0b83 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -23,6 +23,7 @@ pub use dropshot::PaginationOrder; pub use error::*; use futures::stream::BoxStream; use oxnet::IpNet; +use oxnet::Ipv4Net; use parse_display::Display; use parse_display::FromStr; use rand::thread_rng; @@ -2228,7 +2229,7 @@ pub struct SwitchPortSettingsView { pub links: Vec, /// Link-layer discovery protocol (LLDP) settings. - pub link_lldp: Vec, + pub link_lldp: Vec, /// Layer 3 interface settings. pub interfaces: Vec, @@ -2370,7 +2371,7 @@ pub struct SwitchPortLinkConfig { /// The link-layer discovery protocol service configuration id for this /// link. - pub lldp_service_config_id: Uuid, + pub lldp_link_config_id: Uuid, /// The name of this link. pub link_name: String, @@ -2390,34 +2391,30 @@ pub struct SwitchPortLinkConfig { /// A link layer discovery protocol (LLDP) service configuration. #[derive(Clone, Debug, Deserialize, JsonSchema, Serialize, PartialEq)] -pub struct LldpServiceConfig { +pub struct LldpLinkConfig { /// The id of this LLDP service instance. pub id: Uuid, - /// The link-layer discovery protocol configuration for this service. - pub lldp_config_id: Option, - /// Whether or not the LLDP service is enabled. pub enabled: bool, -} -/// A link layer discovery protocol (LLDP) base configuration. -#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize, PartialEq)] -pub struct LldpConfig { - #[serde(flatten)] - pub identity: IdentityMetadata, + /// The LLDP link name TLV. + pub link_name: Option, + + /// The LLDP link description TLV. + pub link_description: Option, /// The LLDP chassis identifier TLV. - pub chassis_id: String, + pub chassis_id: Option, - /// THE LLDP system name TLV. - pub system_name: String, + /// The LLDP system name TLV. + pub system_name: Option, - /// THE LLDP system description TLV. - pub system_description: String, + /// The LLDP system description TLV. + pub system_description: Option, - /// THE LLDP management IP TLV. - pub management_ip: oxnet::IpNet, + /// The LLDP management IP TLV. + pub management_ip: Option, } /// Describes the kind of an switch interface. @@ -2492,6 +2489,9 @@ pub struct SwitchPortRouteConfig { /// The VLAN identifier for the route. Use this if the gateway is reachable /// over an 802.1Q tagged L2 segment. pub vlan_id: Option, + + /// Local preference indicating priority within and across protocols. + pub local_pref: Option, } /* @@ -2705,6 +2705,15 @@ pub struct BgpPeerStatus { pub switch: SwitchLocation, } +/// The current status of a BGP peer. +#[derive( + Clone, Debug, Deserialize, JsonSchema, Serialize, PartialEq, Default, +)] +pub struct BgpExported { + /// Exported routes indexed by peer address. + pub exports: HashMap>, +} + /// Opaque object representing BGP message history for a given BGP peer. The /// contents of this object are not yet stable. #[derive(Clone, Debug, Deserialize, Serialize)] diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index cd11bfe92a..a81e21e05b 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -20,6 +20,8 @@ use std::{ use strum::EnumCount; use uuid::Uuid; +use super::nexus::HostIdentifier; + /// The type of network interface #[derive( Clone, @@ -304,6 +306,9 @@ pub struct RouteConfig { /// The VLAN id associated with this route. #[serde(default)] pub vlan_id: Option, + /// The local preference associated with this route. + #[serde(default)] + pub local_pref: Option, } #[derive( @@ -375,6 +380,84 @@ impl FromStr for UplinkAddressConfig { } } +#[derive( + Clone, Debug, Default, Deserialize, Serialize, PartialEq, Eq, JsonSchema, +)] +#[serde(rename_all = "snake_case")] +/// To what extent should this port participate in LLDP +pub enum LldpAdminStatus { + #[default] + Enabled, + Disabled, + RxOnly, + TxOnly, +} + +impl fmt::Display for LldpAdminStatus { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + LldpAdminStatus::Enabled => write!(f, "enabled"), + LldpAdminStatus::Disabled => write!(f, "disabled"), + LldpAdminStatus::RxOnly => write!(f, "rx_only"), + LldpAdminStatus::TxOnly => write!(f, "tx_only"), + } + } +} + +#[derive(Debug, PartialEq, Eq, Deserialize, Serialize)] +pub struct ParseLldpAdminStatusError(String); + +impl std::fmt::Display for ParseLldpAdminStatusError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "LLDP admin status error: {}", self.0) + } +} + +impl FromStr for LldpAdminStatus { + type Err = ParseLldpAdminStatusError; + + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "enabled" => Ok(Self::Enabled), + "disabled" => Ok(Self::Disabled), + "rxonly" | "rx_only" => Ok(Self::RxOnly), + "txonly" | "tx_only" => Ok(Self::TxOnly), + _ => Err(ParseLldpAdminStatusError(format!( + "not a valid admin status: {s}" + ))), + } + } +} + +/// Per-port LLDP configuration settings. Only the "status" setting is +/// mandatory. All other fields have natural defaults or may be inherited from +/// the switch. +#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq, JsonSchema)] +pub struct LldpPortConfig { + /// To what extent should this port participate in LLDP + pub status: LldpAdminStatus, + /// Chassis ID to advertise. If this is set, it will be advertised as a + /// LocallyAssigned ID type. If this is not set, it will be + /// inherited from the switch-level settings. + pub chassis_id: Option, + /// Port ID to advertise. If this is set, it will be advertised as a + /// LocallyAssigned ID type. If this is not set, it will be set to + /// the port name. e.g., qsfp0/0. + pub port_id: Option, + /// Port description to advertise. If this is not set, no + /// description will be advertised. + pub port_description: Option, + /// System name to advertise. If this is not set, it will be + /// inherited from the switch-level settings. + pub system_name: Option, + /// System description to advertise. If this is not set, it will be + /// inherited from the switch-level settings. + pub system_description: Option, + /// Management IP addresses to advertise. If this is not set, it will be + /// inherited from the switch-level settings. + pub management_addrs: Option>, +} + #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq, JsonSchema)] pub struct PortConfigV2 { /// The set of routes associated with this port. @@ -394,6 +477,8 @@ pub struct PortConfigV2 { /// Whether or not to set autonegotiation #[serde(default)] pub autoneg: bool, + /// LLDP configuration for this port + pub lldp: Option, } /// A set of switch uplinks. @@ -410,11 +495,13 @@ pub struct HostPortConfig { /// IP Address and prefix (e.g., `192.168.0.1/16`) to apply to switchport /// (must be in infra_ip pool). May also include an optional VLAN ID. pub addrs: Vec, + + pub lldp: Option, } impl From for HostPortConfig { fn from(x: PortConfigV2) -> Self { - Self { port: x.port, addrs: x.addresses } + Self { port: x.port, addrs: x.addresses, lldp: x.lldp.clone() } } } @@ -636,6 +723,53 @@ pub struct ResolvedVpcRoute { pub target: RouterTarget, } +/// VPC firewall rule after object name resolution has been performed by Nexus +#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] +pub struct ResolvedVpcFirewallRule { + pub status: external::VpcFirewallRuleStatus, + pub direction: external::VpcFirewallRuleDirection, + pub targets: Vec, + pub filter_hosts: Option>, + pub filter_ports: Option>, + pub filter_protocols: Option>, + pub action: external::VpcFirewallRuleAction, + pub priority: external::VpcFirewallRulePriority, +} + +/// A mapping from a virtual NIC to a physical host +#[derive( + Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq, Eq, Hash, +)] +pub struct VirtualNetworkInterfaceHost { + pub virtual_ip: IpAddr, + pub virtual_mac: external::MacAddr, + pub physical_host_ip: Ipv6Addr, + pub vni: external::Vni, +} + +/// DHCP configuration for a port +/// +/// Not present here: Hostname (DHCPv4 option 12; used in DHCPv6 option 39); we +/// use `InstanceRuntimeState::hostname` for this value. +#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] +pub struct DhcpConfig { + /// DNS servers to send to the instance + /// + /// (DHCPv4 option 6; DHCPv6 option 23) + pub dns_servers: Vec, + + /// DNS zone this instance's hostname belongs to (e.g. the `project.example` + /// part of `instance1.project.example`) + /// + /// (DHCPv4 option 15; used in DHCPv6 option 39) + pub host_domain: Option, + + /// DNS search domains + /// + /// (DHCPv4 option 119; DHCPv6 option 24) + pub search_domains: Vec, +} + /// The target for a given router entry. #[derive( Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, @@ -709,14 +843,20 @@ pub enum DatasetKind { // Durable datasets for zones Cockroach, Crucible, + /// Used for single-node clickhouse deployments Clickhouse, + /// Used for replicated clickhouse deployments ClickhouseKeeper, + /// Used for replicated clickhouse deployments + ClickhouseServer, ExternalDns, InternalDns, // Zone filesystems ZoneRoot, - Zone { name: String }, + Zone { + name: String, + }, // Other datasets Debug, @@ -779,7 +919,7 @@ impl DatasetKind { use DatasetKind::*; match self { Cockroach | Crucible | Clickhouse | ClickhouseKeeper - | ExternalDns | InternalDns => true, + | ClickhouseServer | ExternalDns | InternalDns => true, ZoneRoot | Zone { .. } | Debug => false, } } @@ -809,6 +949,7 @@ impl fmt::Display for DatasetKind { Cockroach => "cockroachdb", Clickhouse => "clickhouse", ClickhouseKeeper => "clickhouse_keeper", + ClickhouseServer => "clickhouse_server", ExternalDns => "external_dns", InternalDns => "internal_dns", ZoneRoot => "zone", diff --git a/common/src/disk.rs b/common/src/disk.rs index 619d643547..2902e7540c 100644 --- a/common/src/disk.rs +++ b/common/src/disk.rs @@ -4,11 +4,13 @@ //! Disk related types shared among crates +use anyhow::bail; use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::ZpoolUuid; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::collections::BTreeMap; +use std::fmt; use uuid::Uuid; use crate::{ @@ -199,6 +201,34 @@ impl Ledgerable for DatasetsConfig { fn generation_bump(&mut self) {} } +/// Identifies how a single dataset management operation may have succeeded or +/// failed. +#[derive(Debug, JsonSchema, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub struct DatasetManagementStatus { + pub dataset_name: DatasetName, + pub err: Option, +} + +/// The result from attempting to manage datasets. +#[derive(Default, Debug, JsonSchema, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +#[must_use = "this `DatasetManagementResult` may contain errors, which should be handled"] +pub struct DatasetsManagementResult { + pub status: Vec, +} + +impl DatasetsManagementResult { + pub fn has_error(&self) -> bool { + for status in &self.status { + if status.err.is_some() { + return true; + } + } + false + } +} + /// Uniquely identifies a disk. #[derive( Debug, @@ -244,3 +274,115 @@ impl From for DiskVariant { } } } + +/// Identifies how a single disk management operation may have succeeded or +/// failed. +#[derive(Debug, JsonSchema, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub struct DiskManagementStatus { + pub identity: DiskIdentity, + pub err: Option, +} + +/// The result from attempting to manage underlying disks. +/// +/// This is more complex than a simple "Error" type because it's possible +/// for some disks to be initialized correctly, while others can fail. +/// +/// This structure provides a mechanism for callers to learn about partial +/// failures, and handle them appropriately on a per-disk basis. +#[derive(Default, Debug, JsonSchema, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +#[must_use = "this `DiskManagementResult` may contain errors, which should be handled"] +pub struct DisksManagementResult { + pub status: Vec, +} + +impl DisksManagementResult { + pub fn has_error(&self) -> bool { + for status in &self.status { + if status.err.is_some() { + return true; + } + } + false + } + + pub fn has_retryable_error(&self) -> bool { + for status in &self.status { + if let Some(err) = &status.err { + if err.retryable() { + return true; + } + } + } + false + } +} + +#[derive(Debug, thiserror::Error, JsonSchema, Serialize, Deserialize)] +#[serde(rename_all = "snake_case", tag = "type", content = "value")] +pub enum DiskManagementError { + #[error("Disk requested by control plane, but not found on device")] + NotFound, + + #[error("Expected zpool UUID of {expected}, but saw {observed}")] + ZpoolUuidMismatch { expected: ZpoolUuid, observed: ZpoolUuid }, + + #[error("Failed to access keys necessary to unlock storage. This error may be transient.")] + KeyManager(String), + + #[error("Other error starting disk management: {0}")] + Other(String), +} + +impl DiskManagementError { + fn retryable(&self) -> bool { + match self { + DiskManagementError::KeyManager(_) => true, + _ => false, + } + } +} + +/// Describes an M.2 slot, often in the context of writing a system image to +/// it. +#[derive( + Debug, + Clone, + Copy, + PartialEq, + Eq, + PartialOrd, + Ord, + Deserialize, + Serialize, + JsonSchema, +)] +pub enum M2Slot { + A, + B, +} + +impl fmt::Display for M2Slot { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::A => f.write_str("A"), + Self::B => f.write_str("B"), + } + } +} + +impl TryFrom for M2Slot { + type Error = anyhow::Error; + + fn try_from(value: i64) -> Result { + match value { + // Gimlet should have 2 M.2 drives: drive A is assigned slot 17, and + // drive B is assigned slot 18. + 17 => Ok(Self::A), + 18 => Ok(Self::B), + _ => bail!("unexpected M.2 slot {value}"), + } + } +} diff --git a/common/src/lib.rs b/common/src/lib.rs index e4f53cbfab..6da32c56ba 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -26,6 +26,7 @@ pub mod backoff; pub mod cmd; pub mod disk; pub mod ledger; +pub mod policy; pub mod progenitor_operation_retry; pub mod update; pub mod vlan; diff --git a/common/src/policy.rs b/common/src/policy.rs new file mode 100644 index 0000000000..677dbfe2b9 --- /dev/null +++ b/common/src/policy.rs @@ -0,0 +1,40 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Fleet policy related functionality used by both Reconfigurator and RSS. + +/// The amount of redundancy for boundary NTP servers. +pub const BOUNDARY_NTP_REDUNDANCY: usize = 2; + +/// The amount of redundancy for Nexus services. +/// +/// This is used by both RSS (to distribute the initial set of services) and the +/// Reconfigurator (to know whether to add new Nexus zones) +pub const NEXUS_REDUNDANCY: usize = 3; + +/// The amount of redundancy for CockroachDb services. +/// +/// This is used by both RSS (to distribute the initial set of services) and the +/// Reconfigurator (to know whether to add new crdb zones) +pub const COCKROACHDB_REDUNDANCY: usize = 5; + +/// The amount of redundancy for internal DNS servers. +/// +/// Must be less than or equal to MAX_DNS_REDUNDANCY. +pub const DNS_REDUNDANCY: usize = 3; + +/// The maximum amount of redundancy for DNS servers. +/// +/// This determines the number of addresses which are reserved for DNS servers. +pub const MAX_DNS_REDUNDANCY: usize = 5; + +/// The amount of redundancy for clickhouse servers +/// +/// Clickhouse servers contain lazily replicated data +pub const CLICKHOUSE_SERVER_REDUNDANCY: usize = 3; + +/// The amount of redundancy for clickhouse keepers +/// +/// Keepers maintain strongly consistent metadata about data replication +pub const CLICKHOUSE_KEEPER_REDUNDANCY: usize = 5; diff --git a/dev-tools/downloader/src/lib.rs b/dev-tools/downloader/src/lib.rs index d5b436244c..c3d6e165ff 100644 --- a/dev-tools/downloader/src/lib.rs +++ b/dev-tools/downloader/src/lib.rs @@ -586,7 +586,10 @@ impl<'a> Downloader<'a> { let version = version.trim(); let (url_base, suffix) = match os { - Os::Illumos => ("https://illumos.org/downloads", "tar.gz"), + Os::Illumos => ( + "https://oxide-cockroachdb-build.s3.us-west-2.amazonaws.com", + "tar.gz", + ), Os::Linux | Os::Mac => ("https://binaries.cockroachdb.com", "tgz"), }; let build = match os { diff --git a/dev-tools/omdb/Cargo.toml b/dev-tools/omdb/Cargo.toml index 0990fdb11c..4cc484b9a9 100644 --- a/dev-tools/omdb/Cargo.toml +++ b/dev-tools/omdb/Cargo.toml @@ -28,6 +28,7 @@ gateway-messages.workspace = true gateway-test-utils.workspace = true humantime.workspace = true internal-dns.workspace = true +itertools.workspace = true nexus-client.workspace = true nexus-config.workspace = true nexus-db-model.workspace = true @@ -61,13 +62,14 @@ multimap.workspace = true indicatif.workspace = true [dev-dependencies] +camino-tempfile.workspace = true expectorate.workspace = true +http.workspace = true nexus-test-utils.workspace = true nexus-test-utils-macros.workspace = true omicron-nexus.workspace = true omicron-test-utils.workspace = true subprocess.workspace = true -camino-tempfile.workspace = true # Disable doc builds by default for our binaries to work around issue # rust-lang/cargo#8373. These docs would not be very useful anyway. diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 92aa6cf125..f925fa1e4d 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -72,6 +72,8 @@ use nexus_db_model::RegionReplacementState; use nexus_db_model::RegionReplacementStep; use nexus_db_model::RegionReplacementStepType; use nexus_db_model::RegionSnapshot; +use nexus_db_model::RegionSnapshotReplacement; +use nexus_db_model::RegionSnapshotReplacementState; use nexus_db_model::Sled; use nexus_db_model::Snapshot; use nexus_db_model::SnapshotState; @@ -302,6 +304,9 @@ enum DbCommands { /// Query for information about region replacements, optionally manually /// triggering one. RegionReplacement(RegionReplacementArgs), + /// Query for information about region snapshot replacements, optionally + /// manually triggering one. + RegionSnapshotReplacement(RegionSnapshotReplacementArgs), /// Print information about sleds Sleds(SledsArgs), /// Print information about customer instances @@ -659,6 +664,53 @@ struct SnapshotInfoArgs { uuid: Uuid, } +#[derive(Debug, Args)] +struct RegionSnapshotReplacementArgs { + #[command(subcommand)] + command: RegionSnapshotReplacementCommands, +} + +#[derive(Debug, Subcommand)] +enum RegionSnapshotReplacementCommands { + /// List region snapshot replacement requests + List(RegionSnapshotReplacementListArgs), + /// Show current region snapshot replacements and their status + Status, + /// Show detailed information for a region snapshot replacement + Info(RegionSnapshotReplacementInfoArgs), + /// Manually request a region snapshot replacement + Request(RegionSnapshotReplacementRequestArgs), +} + +#[derive(Debug, Args)] +struct RegionSnapshotReplacementListArgs { + /// Only show region snapshot replacement requests in this state + #[clap(long)] + state: Option, + + /// Only show region snapshot replacement requests after a certain date + #[clap(long)] + after: Option>, +} + +#[derive(Debug, Args)] +struct RegionSnapshotReplacementInfoArgs { + /// The UUID of the region snapshot replacement request + replacement_id: Uuid, +} + +#[derive(Debug, Args)] +struct RegionSnapshotReplacementRequestArgs { + /// The dataset id for a given region snapshot + dataset_id: Uuid, + + /// The region id for a given region snapshot + region_id: Uuid, + + /// The snapshot id for a given region snapshot + snapshot_id: Uuid, +} + #[derive(Debug, Args)] struct ValidateArgs { #[command(subcommand)] @@ -859,6 +911,51 @@ impl DbArgs { DbCommands::Snapshots(SnapshotArgs { command: SnapshotCommands::List, }) => cmd_db_snapshot_list(&datastore, &self.fetch_opts).await, + DbCommands::RegionSnapshotReplacement( + RegionSnapshotReplacementArgs { + command: RegionSnapshotReplacementCommands::List(args), + }, + ) => { + cmd_db_region_snapshot_replacement_list( + &datastore, + &self.fetch_opts, + args, + ) + .await + } + DbCommands::RegionSnapshotReplacement( + RegionSnapshotReplacementArgs { + command: RegionSnapshotReplacementCommands::Status, + }, + ) => { + cmd_db_region_snapshot_replacement_status( + &opctx, + &datastore, + &self.fetch_opts, + ) + .await + } + DbCommands::RegionSnapshotReplacement( + RegionSnapshotReplacementArgs { + command: RegionSnapshotReplacementCommands::Info(args), + }, + ) => { + cmd_db_region_snapshot_replacement_info( + &opctx, &datastore, args, + ) + .await + } + DbCommands::RegionSnapshotReplacement( + RegionSnapshotReplacementArgs { + command: RegionSnapshotReplacementCommands::Request(args), + }, + ) => { + let token = omdb.check_allow_destructive()?; + cmd_db_region_snapshot_replacement_request( + &opctx, &datastore, args, token, + ) + .await + } DbCommands::Validate(ValidateArgs { command: ValidateCommands::ValidateVolumeReferences, }) => cmd_db_validate_volume_references(&datastore).await, @@ -1009,6 +1106,7 @@ async fn lookup_service_info( | BlueprintZoneType::InternalNtp(_) => ServiceKind::Ntp, BlueprintZoneType::Clickhouse(_) => ServiceKind::Clickhouse, BlueprintZoneType::ClickhouseKeeper(_) => ServiceKind::ClickhouseKeeper, + BlueprintZoneType::ClickhouseServer(_) => ServiceKind::ClickhouseServer, BlueprintZoneType::CockroachDb(_) => ServiceKind::Cockroach, BlueprintZoneType::Crucible(_) => ServiceKind::Crucible, BlueprintZoneType::CruciblePantry(_) => ServiceKind::CruciblePantry, @@ -3390,6 +3488,235 @@ async fn cmd_db_network_list_vnics( Ok(()) } +// REGION SNAPSHOT REPLACEMENTS + +/// List all region snapshot replacement requests +async fn cmd_db_region_snapshot_replacement_list( + datastore: &DataStore, + fetch_opts: &DbFetchOptions, + args: &RegionSnapshotReplacementListArgs, +) -> Result<(), anyhow::Error> { + let ctx = || "listing region snapshot replacement requests".to_string(); + let limit = fetch_opts.fetch_limit; + + let requests: Vec = { + let conn = datastore.pool_connection_for_tests().await?; + + use db::schema::region_snapshot_replacement::dsl; + + match (args.state, args.after) { + (Some(state), Some(after)) => { + dsl::region_snapshot_replacement + .filter(dsl::replacement_state.eq(state)) + .filter(dsl::request_time.gt(after)) + .limit(i64::from(u32::from(limit))) + .select(RegionSnapshotReplacement::as_select()) + .get_results_async(&*conn) + .await? + } + + (Some(state), None) => { + dsl::region_snapshot_replacement + .filter(dsl::replacement_state.eq(state)) + .limit(i64::from(u32::from(limit))) + .select(RegionSnapshotReplacement::as_select()) + .get_results_async(&*conn) + .await? + } + + (None, Some(after)) => { + dsl::region_snapshot_replacement + .filter(dsl::request_time.gt(after)) + .limit(i64::from(u32::from(limit))) + .select(RegionSnapshotReplacement::as_select()) + .get_results_async(&*conn) + .await? + } + + (None, None) => { + dsl::region_snapshot_replacement + .limit(i64::from(u32::from(limit))) + .select(RegionSnapshotReplacement::as_select()) + .get_results_async(&*conn) + .await? + } + } + }; + + check_limit(&requests, limit, ctx); + + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct Row { + pub id: Uuid, + pub request_time: DateTime, + pub replacement_state: String, + } + + let mut rows = Vec::with_capacity(requests.len()); + + for request in requests { + rows.push(Row { + id: request.id, + request_time: request.request_time, + replacement_state: format!("{:?}", request.replacement_state), + }); + } + + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .with(tabled::settings::Panel::header( + "Region snapshot replacement requests", + )) + .to_string(); + + println!("{}", table); + + Ok(()) +} + +/// Display all non-complete region snapshot replacements +async fn cmd_db_region_snapshot_replacement_status( + opctx: &OpContext, + datastore: &DataStore, + fetch_opts: &DbFetchOptions, +) -> Result<(), anyhow::Error> { + let ctx = || "listing region snapshot replacement requests".to_string(); + let limit = fetch_opts.fetch_limit; + + let requests: Vec = { + let conn = datastore.pool_connection_for_tests().await?; + + use db::schema::region_snapshot_replacement::dsl; + + dsl::region_snapshot_replacement + .filter( + dsl::replacement_state + .ne(RegionSnapshotReplacementState::Complete), + ) + .limit(i64::from(u32::from(limit))) + .select(RegionSnapshotReplacement::as_select()) + .get_results_async(&*conn) + .await? + }; + + check_limit(&requests, limit, ctx); + + for request in requests { + let steps_left = datastore + .in_progress_region_snapshot_replacement_steps(opctx, request.id) + .await?; + + println!("{}:", request.id); + println!(); + + println!(" started: {}", request.request_time); + println!( + " state: {:?}", + request.replacement_state + ); + println!( + " region snapshot: {} {} {}", + request.old_dataset_id, + request.old_region_id, + request.old_snapshot_id, + ); + println!(" new region id: {:?}", request.new_region_id); + println!(" in-progress steps left: {:?}", steps_left); + println!(); + } + + Ok(()) +} + +/// Show details for a single region snapshot replacement +async fn cmd_db_region_snapshot_replacement_info( + opctx: &OpContext, + datastore: &DataStore, + args: &RegionSnapshotReplacementInfoArgs, +) -> Result<(), anyhow::Error> { + let request = datastore + .get_region_snapshot_replacement_request_by_id( + opctx, + args.replacement_id, + ) + .await?; + + // Show details + let steps_left = datastore + .in_progress_region_snapshot_replacement_steps(opctx, request.id) + .await?; + + println!("{}:", request.id); + println!(); + + println!(" started: {}", request.request_time); + println!(" state: {:?}", request.replacement_state); + println!( + " region snapshot: {} {} {}", + request.old_dataset_id, request.old_region_id, request.old_snapshot_id, + ); + println!(" new region id: {:?}", request.new_region_id); + println!(" in-progress steps left: {:?}", steps_left); + println!(); + + Ok(()) +} + +/// Manually request a region snapshot replacement +async fn cmd_db_region_snapshot_replacement_request( + opctx: &OpContext, + datastore: &DataStore, + args: &RegionSnapshotReplacementRequestArgs, + _destruction_token: DestructiveOperationToken, +) -> Result<(), anyhow::Error> { + let Some(region_snapshot) = datastore + .region_snapshot_get(args.dataset_id, args.region_id, args.snapshot_id) + .await? + else { + bail!("region snapshot not found!"); + }; + + let request = + RegionSnapshotReplacement::for_region_snapshot(®ion_snapshot); + let request_id = request.id; + + // If this function indirectly uses + // `insert_region_snapshot_replacement_request`, there could be an authz + // related `ObjectNotFound` due to the opctx being for the privileged test + // user. Lookup the snapshot here, and directly use + // `insert_snapshot_replacement_request_with_volume_id` instead. + + let db_snapshots = { + use db::schema::snapshot::dsl; + let conn = datastore.pool_connection_for_tests().await?; + dsl::snapshot + .filter(dsl::id.eq(args.snapshot_id)) + .limit(1) + .select(Snapshot::as_select()) + .load_async(&*conn) + .await + .context("loading requested snapshot")? + }; + + assert_eq!(db_snapshots.len(), 1); + + datastore + .insert_region_snapshot_replacement_request_with_volume_id( + opctx, + request, + db_snapshots[0].volume_id, + ) + .await?; + + println!("region snapshot replacement {request_id} created"); + + Ok(()) +} + +// VALIDATION + /// Validate the `volume_references` column of the region snapshots table async fn cmd_db_validate_volume_references( datastore: &DataStore, diff --git a/dev-tools/omdb/src/bin/omdb/mgs/dashboard.rs b/dev-tools/omdb/src/bin/omdb/mgs/dashboard.rs index cd7628a840..b3f34d5791 100644 --- a/dev-tools/omdb/src/bin/omdb/mgs/dashboard.rs +++ b/dev-tools/omdb/src/bin/omdb/mgs/dashboard.rs @@ -1091,7 +1091,7 @@ fn draw_status(f: &mut Frame, parent: Rect, status: &[(&str, &str)]) { } fn draw(f: &mut Frame, dashboard: &mut Dashboard) { - let size = f.size(); + let size = f.area(); let screen = Layout::default() .direction(Direction::Vertical) diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index ec3e519cbc..ede2743404 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -19,12 +19,14 @@ use clap::Subcommand; use clap::ValueEnum; use futures::future::try_join; use futures::TryStreamExt; +use itertools::Itertools; use nexus_client::types::ActivationReason; use nexus_client::types::BackgroundTask; use nexus_client::types::BackgroundTasksActivateRequest; use nexus_client::types::CurrentStatus; use nexus_client::types::LastResult; use nexus_client::types::PhysicalDiskPath; +use nexus_client::types::SagaState; use nexus_client::types::SledSelector; use nexus_client::types::UninitializedSledId; use nexus_db_queries::db::lookup::LookupPath; @@ -32,8 +34,11 @@ use nexus_saga_recovery::LastPass; use nexus_types::deployment::Blueprint; use nexus_types::internal_api::background::LookupRegionPortStatus; use nexus_types::internal_api::background::RegionReplacementDriverStatus; +use nexus_types::internal_api::background::RegionSnapshotReplacementGarbageCollectStatus; +use nexus_types::internal_api::background::RegionSnapshotReplacementStartStatus; use nexus_types::inventory::BaseboardId; use omicron_uuid_kinds::CollectionUuid; +use omicron_uuid_kinds::DemoSagaUuid; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; @@ -43,6 +48,7 @@ use reedline::Reedline; use serde::Deserialize; use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; +use std::collections::BTreeSet; use std::str::FromStr; use tabled::Tabled; use uuid::Uuid; @@ -71,6 +77,8 @@ enum NexusCommands { BackgroundTasks(BackgroundTasksArgs), /// interact with blueprints Blueprints(BlueprintsArgs), + /// view sagas, create and complete demo sagas + Sagas(SagasArgs), /// interact with sleds Sleds(SledsArgs), } @@ -88,11 +96,21 @@ enum BackgroundTasksCommands { /// Print a summary of the status of all background tasks List, /// Print human-readable summary of the status of each background task - Show, + Show(BackgroundTasksShowArgs), /// Activate one or more background tasks Activate(BackgroundTasksActivateArgs), } +#[derive(Debug, Args)] +struct BackgroundTasksShowArgs { + /// Names of background tasks to show (default: all) + /// + /// You can use any background task name here or one of the special strings + /// "all", "dns_external", or "dns_internal". + #[clap(value_name = "TASK_NAME")] + tasks: Vec, +} + #[derive(Debug, Args)] struct BackgroundTasksActivateArgs { /// Name of the background tasks to activate @@ -244,6 +262,36 @@ struct BlueprintImportArgs { input: Utf8PathBuf, } +#[derive(Debug, Args)] +struct SagasArgs { + #[command(subcommand)] + command: SagasCommands, +} + +#[derive(Debug, Subcommand)] +enum SagasCommands { + /// List sagas run by this Nexus + /// + /// Note that this is reporting in-memory state about sagas run by *this* + /// Nexus instance. You'll get different answers if you ask different Nexus + /// instances. + List, + + /// Create a "demo" saga + /// + /// This saga will wait until it's explicitly completed using the + /// "demo-complete" subcommand. + DemoCreate, + + /// Complete a demo saga started with "demo-create". + DemoComplete(DemoSagaIdArgs), +} + +#[derive(Debug, Args)] +struct DemoSagaIdArgs { + demo_saga_id: DemoSagaUuid, +} + #[derive(Debug, Args)] struct SledsArgs { #[command(subcommand)] @@ -326,8 +374,8 @@ impl NexusArgs { command: BackgroundTasksCommands::List, }) => cmd_nexus_background_tasks_list(&client).await, NexusCommands::BackgroundTasks(BackgroundTasksArgs { - command: BackgroundTasksCommands::Show, - }) => cmd_nexus_background_tasks_show(&client).await, + command: BackgroundTasksCommands::Show(args), + }) => cmd_nexus_background_tasks_show(&client, args).await, NexusCommands::BackgroundTasks(BackgroundTasksArgs { command: BackgroundTasksCommands::Activate(args), }) => { @@ -402,6 +450,34 @@ impl NexusArgs { cmd_nexus_blueprints_import(&client, token, args).await } + NexusCommands::Sagas(SagasArgs { command }) => { + if self.nexus_internal_url.is_none() { + eprintln!( + "{}", + textwrap::wrap( + "WARNING: A Nexus instance was selected from DNS \ + because a specific one was not specified. But \ + the `omdb nexus sagas` commands usually only make \ + sense when targeting a specific Nexus instance.", + 80 + ) + .join("\n") + ); + } + match command { + SagasCommands::List => cmd_nexus_sagas_list(&client).await, + SagasCommands::DemoCreate => { + let token = omdb.check_allow_destructive()?; + cmd_nexus_sagas_demo_create(&client, token).await + } + SagasCommands::DemoComplete(args) => { + let token = omdb.check_allow_destructive()?; + cmd_nexus_sagas_demo_complete(&client, args, token) + .await + } + } + } + NexusCommands::Sleds(SledsArgs { command: SledsCommands::ListUninitialized, }) => cmd_nexus_sleds_list_uninitialized(&client).await, @@ -460,7 +536,9 @@ async fn cmd_nexus_background_tasks_list( ) -> Result<(), anyhow::Error> { let response = client.bgtask_list().await.context("listing background tasks")?; - let tasks = response.into_inner(); + // Convert the HashMap to a BTreeMap because we want the keys in sorted + // order. + let tasks = response.into_inner().into_iter().collect::>(); let table_rows = tasks.values().map(BackgroundTaskStatusRow::from); let table = tabled::Table::new(table_rows) .with(tabled::settings::Style::empty()) @@ -473,6 +551,7 @@ async fn cmd_nexus_background_tasks_list( /// Runs `omdb nexus background-tasks show` async fn cmd_nexus_background_tasks_show( client: &nexus_client::Client, + args: &BackgroundTasksShowArgs, ) -> Result<(), anyhow::Error> { let response = client.bgtask_list().await.context("listing background tasks")?; @@ -481,8 +560,50 @@ async fn cmd_nexus_background_tasks_show( let mut tasks = response.into_inner().into_iter().collect::>(); - // We want to pick the order that we print some tasks intentionally. Then - // we want to print anything else that we find. + // Now, pick out the tasks that the user selected. + // + // The set of user tasks may include: + // + // - nothing at all, in which case we include all tasks + // - individual task names + // - certain groups that we recognize, like "dns_external" for all the tasks + // related to external DNS propagation. "all" means "all tasks". + let selected_set: BTreeSet<_> = + args.tasks.iter().map(AsRef::as_ref).collect(); + let selected_all = selected_set.is_empty() || selected_set.contains("all"); + if !selected_all { + for s in &selected_set { + if !tasks.contains_key(*s) + && *s != "all" + && *s != "dns_external" + && *s != "dns_internal" + { + bail!( + "unknown task name: {:?} (known task names: all, \ + dns_external, dns_internal, {})", + s, + tasks.keys().join(", ") + ); + } + } + + tasks.retain(|k, _| { + selected_set.contains(k.as_str()) + || selected_set.contains("all") + || (selected_set.contains("dns_external") + && k.starts_with("dns_") + && k.ends_with("_external")) + || (selected_set.contains("dns_internal") + && k.starts_with("dns_") + && k.ends_with("_internal")) + }); + } + + // Some tasks should be grouped and printed together in a certain order, + // even though their names aren't alphabetical. Notably, the DNS tasks + // logically go from config -> servers -> propagation, so we want to print + // them in that order. So we pick these out first and then print anything + // else that we find in alphabetical order. for name in [ "dns_config_internal", "dns_servers_internal", @@ -496,7 +617,7 @@ async fn cmd_nexus_background_tasks_show( ] { if let Some(bgtask) = tasks.remove(name) { print_task(&bgtask); - } else { + } else if selected_all { eprintln!("warning: expected to find background task {:?}", name); } } @@ -1332,6 +1453,63 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) { } } }; + } else if name == "region_snapshot_replacement_start" { + match serde_json::from_value::( + details.clone(), + ) { + Err(error) => eprintln!( + "warning: failed to interpret task details: {:?}: {:?}", + error, details + ), + + Ok(status) => { + println!( + " total requests created ok: {}", + status.requests_created_ok.len(), + ); + for line in &status.requests_created_ok { + println!(" > {line}"); + } + + println!( + " total start saga invoked ok: {}", + status.start_invoked_ok.len(), + ); + for line in &status.start_invoked_ok { + println!(" > {line}"); + } + + println!(" errors: {}", status.errors.len()); + for line in &status.errors { + println!(" > {line}"); + } + } + } + } else if name == "region_snapshot_replacement_garbage_collection" { + match serde_json::from_value::< + RegionSnapshotReplacementGarbageCollectStatus, + >(details.clone()) + { + Err(error) => eprintln!( + "warning: failed to interpret task details: {:?}: {:?}", + error, details + ), + + Ok(status) => { + println!( + " total garbage collections requested: {}", + status.garbage_collect_requested.len(), + ); + for line in &status.garbage_collect_requested { + println!(" > {line}"); + } + + println!(" errors: {}", status.errors.len()); + for line in &status.errors { + println!(" > {line}"); + } + } + } } else { println!( "warning: unknown background task: {:?} \ @@ -1626,6 +1804,91 @@ async fn cmd_nexus_blueprints_import( Ok(()) } +/// Runs `omdb nexus sagas list` +async fn cmd_nexus_sagas_list( + client: &nexus_client::Client, +) -> Result<(), anyhow::Error> { + // We don't want users to confuse this with a general way to list all sagas. + // Such a command would read database state and it would go under "omdb db". + eprintln!( + "{}", + textwrap::wrap( + "NOTE: This command only reads in-memory state from the targeted \ + Nexus instance. Sagas may be missing if they were run by a \ + different Nexus instance or if they finished before this Nexus \ + instance last started up.", + 80 + ) + .join("\n") + ); + + let saga_stream = client.saga_list_stream(None, None); + let sagas = + saga_stream.try_collect::>().await.context("listing sagas")?; + + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct SagaRow { + saga_id: Uuid, + state: &'static str, + } + let rows = sagas.into_iter().map(|saga| SagaRow { + saga_id: saga.id, + state: match saga.state { + SagaState::Running => "running", + SagaState::Succeeded => "succeeded", + SagaState::Failed { .. } => "failed", + SagaState::Stuck { .. } => "stuck", + }, + }); + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + println!("{}", table); + Ok(()) +} + +/// Runs `omdb nexus sagas demo-create` +async fn cmd_nexus_sagas_demo_create( + client: &nexus_client::Client, + _destruction_token: DestructiveOperationToken, +) -> Result<(), anyhow::Error> { + let demo_saga = + client.saga_demo_create().await.context("creating demo saga")?; + println!("saga id: {}", demo_saga.saga_id); + println!( + "demo saga id: {} (use this with `demo-complete`)", + demo_saga.demo_saga_id, + ); + Ok(()) +} + +/// Runs `omdb nexus sagas demo-complete` +async fn cmd_nexus_sagas_demo_complete( + client: &nexus_client::Client, + args: &DemoSagaIdArgs, + _destruction_token: DestructiveOperationToken, +) -> Result<(), anyhow::Error> { + if let Err(error) = client + .saga_demo_complete(&args.demo_saga_id) + .await + .context("completing demo saga") + { + eprintln!("error: {:#}", error); + eprintln!( + "note: `demo-complete` must be run against the same Nexus \ + instance that is currently running that saga." + ); + eprintln!( + "note: Be sure that you're using the demo_saga_id, not the saga_id." + ); + Err(error) + } else { + Ok(()) + } +} + /// Runs `omdb nexus sleds list-uninitialized` async fn cmd_nexus_sleds_list_uninitialized( client: &nexus_client::Client, diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index 67f113a801..ec407cd123 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -127,6 +127,14 @@ task: "region_replacement_driver" drive region replacements forward to completion +task: "region_snapshot_replacement_garbage_collection" + clean up all region snapshot replacement step volumes + + +task: "region_snapshot_replacement_start" + detect if region snapshots need replacement and begin the process + + task: "saga_recovery" recovers sagas assigned to this Nexus @@ -276,6 +284,14 @@ task: "region_replacement_driver" drive region replacements forward to completion +task: "region_snapshot_replacement_garbage_collection" + clean up all region snapshot replacement step volumes + + +task: "region_snapshot_replacement_start" + detect if region snapshots need replacement and begin the process + + task: "saga_recovery" recovers sagas assigned to this Nexus @@ -412,6 +428,14 @@ task: "region_replacement_driver" drive region replacements forward to completion +task: "region_snapshot_replacement_garbage_collection" + clean up all region snapshot replacement step volumes + + +task: "region_snapshot_replacement_start" + detect if region snapshots need replacement and begin the process + + task: "saga_recovery" recovers sagas assigned to this Nexus @@ -472,6 +496,22 @@ note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=d note: database schema version matches expected () note: listing all commissioned sleds (use -F to filter, e.g. -F in-service) ============================================= +EXECUTING COMMAND: omdb ["nexus", "sagas", "list"] +termination: Exited(0) +--------------------------------------------- +stdout: +SAGA_ID STATE +--------------------------------------------- +stderr: +note: Nexus URL not specified. Will pick one from DNS. +note: using Nexus URL http://[::ffff:127.0.0.1]:REDACTED_PORT +WARNING: A Nexus instance was selected from DNS because a specific one was not +specified. But the `omdb nexus sagas` commands usually only make sense when +targeting a specific Nexus instance. +NOTE: This command only reads in-memory state from the targeted Nexus instance. +Sagas may be missing if they were run by a different Nexus instance or if they +finished before this Nexus instance last started up. +============================================= EXECUTING COMMAND: omdb ["oximeter", "--oximeter-url", "junk", "list-producers"] termination: Exited(1) --------------------------------------------- diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index d4c07899f4..2a9c9c8051 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -328,6 +328,14 @@ task: "region_replacement_driver" drive region replacements forward to completion +task: "region_snapshot_replacement_garbage_collection" + clean up all region snapshot replacement step volumes + + +task: "region_snapshot_replacement_start" + detect if region snapshots need replacement and begin the process + + task: "saga_recovery" recovers sagas assigned to this Nexus @@ -566,6 +574,23 @@ task: "region_replacement_driver" number of region replacement finish sagas started ok: 0 number of errors: 0 +task: "region_snapshot_replacement_garbage_collection" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total garbage collections requested: 0 + errors: 0 + +task: "region_snapshot_replacement_start" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total requests created ok: 0 + total start saga invoked ok: 0 + errors: 0 + task: "saga_recovery" configured period: every 10m currently executing: no @@ -625,6 +650,449 @@ warning: unknown background task: "vpc_route_manager" (don't know how to interpr stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ ============================================= +EXECUTING COMMAND: omdb ["nexus", "background-tasks", "show", "saga_recovery"] +termination: Exited(0) +--------------------------------------------- +stdout: +task: "saga_recovery" + configured period: every 10m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + since Nexus started: + sagas recovered: 0 + sagas recovery errors: 0 + sagas observed started: 0 + sagas inferred finished: 0 + missing from SEC: 0 + bad state in SEC: 0 + last pass: + found sagas: 0 (in-progress, assigned to this Nexus) + recovered: 0 (successfully) + failed: 0 + skipped: 0 (already running) + removed: 0 (newly finished) + no recovered sagas + no saga recovery failures + +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +============================================= +EXECUTING COMMAND: omdb ["nexus", "background-tasks", "show", "blueprint_loader", "blueprint_executor"] +termination: Exited(0) +--------------------------------------------- +stdout: +task: "blueprint_loader" + configured period: every 1m s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + last completion reported error: failed to read target blueprint: Internal Error: no target blueprint set + +task: "blueprint_executor" + configured period: every 10m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + last completion reported error: no blueprint + +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +============================================= +EXECUTING COMMAND: omdb ["nexus", "background-tasks", "show", "dns_internal"] +termination: Exited(0) +--------------------------------------------- +stdout: +task: "dns_config_internal" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + last generation found: 1 + +task: "dns_servers_internal" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + servers found: 1 + + DNS_SERVER_ADDR + [::1]:REDACTED_PORT + +task: "dns_propagation_internal" + configured period: every 1m + currently executing: no + last completed activation: , triggered by a dependent task completing + started at (s ago) and ran for ms + attempt to propagate generation: 1 + + DNS_SERVER_ADDR LAST_RESULT + [::1]:REDACTED_PORT success + + +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +============================================= +EXECUTING COMMAND: omdb ["nexus", "background-tasks", "show", "dns_external"] +termination: Exited(0) +--------------------------------------------- +stdout: +task: "dns_config_external" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + last generation found: 2 + +task: "dns_servers_external" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + servers found: 1 + + DNS_SERVER_ADDR + [::1]:REDACTED_PORT + +task: "dns_propagation_external" + configured period: every 1m + currently executing: no + last completed activation: , triggered by a dependent task completing + started at (s ago) and ran for ms + attempt to propagate generation: 2 + + DNS_SERVER_ADDR LAST_RESULT + [::1]:REDACTED_PORT success + + +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +============================================= +EXECUTING COMMAND: omdb ["nexus", "background-tasks", "show", "all"] +termination: Exited(0) +--------------------------------------------- +stdout: +task: "dns_config_internal" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + last generation found: 1 + +task: "dns_servers_internal" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + servers found: 1 + + DNS_SERVER_ADDR + [::1]:REDACTED_PORT + +task: "dns_propagation_internal" + configured period: every 1m + currently executing: no + last completed activation: , triggered by a dependent task completing + started at (s ago) and ran for ms + attempt to propagate generation: 1 + + DNS_SERVER_ADDR LAST_RESULT + [::1]:REDACTED_PORT success + + +task: "dns_config_external" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + last generation found: 2 + +task: "dns_servers_external" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + servers found: 1 + + DNS_SERVER_ADDR + [::1]:REDACTED_PORT + +task: "dns_propagation_external" + configured period: every 1m + currently executing: no + last completed activation: , triggered by a dependent task completing + started at (s ago) and ran for ms + attempt to propagate generation: 2 + + DNS_SERVER_ADDR LAST_RESULT + [::1]:REDACTED_PORT success + + +task: "nat_v4_garbage_collector" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + last completion reported error: failed to resolve addresses for Dendrite services: no record found for Query { name: Name("_dendrite._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN } + +task: "blueprint_loader" + configured period: every 1m s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + last completion reported error: failed to read target blueprint: Internal Error: no target blueprint set + +task: "blueprint_executor" + configured period: every 10m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + last completion reported error: no blueprint + +task: "abandoned_vmm_reaper" + configured period: every 1m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total abandoned VMMs found: 0 + VMM records deleted: 0 + VMM records already deleted by another Nexus: 0 + sled resource reservations deleted: 0 + +task: "bfd_manager" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + last completion reported error: failed to resolve addresses for Dendrite services: no record found for Query { name: Name("_dendrite._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN } + +task: "crdb_node_id_collector" + configured period: every 10m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + last completion reported error: no blueprint + +task: "decommissioned_disk_cleaner" + configured period: every 1m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms +warning: unknown background task: "decommissioned_disk_cleaner" (don't know how to interpret details: Object {"deleted": Number(0), "error": Null, "error_count": Number(0), "found": Number(0), "not_ready_to_be_deleted": Number(0)}) + +task: "external_endpoints" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + external API endpoints: 2 ('*' below marks default) + + SILO_ID DNS_NAME + ..................... default-silo.sys.oxide-dev.test + * ..................... test-suite-silo.sys.oxide-dev.test + + warnings: 2 + warning: silo ..................... with DNS name "default-silo.sys.oxide-dev.test" has no usable certificates + warning: silo ..................... with DNS name "test-suite-silo.sys.oxide-dev.test" has no usable certificates + + TLS certificates: 0 + +task: "instance_updater" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total instances in need of updates: 0 + instances with destroyed active VMMs: 0 + instances with terminated active migrations: 0 + update sagas started: 0 + update sagas completed successfully: 0 + +task: "instance_watcher" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total instances checked: 0 + checks completed: 0 + successful checks: 0 + update sagas queued: 0 + failed checks: 0 + checks that could not be completed: 0 + stale instance metrics pruned: 0 + +task: "inventory_collection" + configured period: every 10m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + last collection id: ..................... + last collection started: + last collection done: + +task: "lookup_region_port" + configured period: every 1m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total filled in ports: 0 + errors: 0 + +task: "metrics_producer_gc" + configured period: every 1m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms +warning: unknown background task: "metrics_producer_gc" (don't know how to interpret details: Object {"expiration": String(""), "pruned": Array []}) + +task: "phantom_disks" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + number of phantom disks deleted: 0 + number of phantom disk delete errors: 0 + +task: "physical_disk_adoption" + configured period: every s + currently executing: no + last completed activation: , triggered by a dependent task completing + started at (s ago) and ran for ms + last completion reported error: task disabled + +task: "region_replacement" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + number of region replacements started ok: 0 + number of region replacement start errors: 0 + +task: "region_replacement_driver" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + number of region replacement drive sagas started ok: 0 + number of region replacement finish sagas started ok: 0 + number of errors: 0 + +task: "region_snapshot_replacement_garbage_collection" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total garbage collections requested: 0 + errors: 0 + +task: "region_snapshot_replacement_start" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total requests created ok: 0 + total start saga invoked ok: 0 + errors: 0 + +task: "saga_recovery" + configured period: every 10m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + since Nexus started: + sagas recovered: 0 + sagas recovery errors: 0 + sagas observed started: 0 + sagas inferred finished: 0 + missing from SEC: 0 + bad state in SEC: 0 + last pass: + found sagas: 0 (in-progress, assigned to this Nexus) + recovered: 0 (successfully) + failed: 0 + skipped: 0 (already running) + removed: 0 (newly finished) + no recovered sagas + no saga recovery failures + +task: "service_firewall_rule_propagation" + configured period: every 5m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + +task: "service_zone_nat_tracker" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + last completion reported error: inventory collection is None + +task: "switch_port_config_manager" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms +warning: unknown background task: "switch_port_config_manager" (don't know how to interpret details: Object {}) + +task: "v2p_manager" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms +warning: unknown background task: "v2p_manager" (don't know how to interpret details: Object {}) + +task: "vpc_route_manager" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms +warning: unknown background task: "vpc_route_manager" (don't know how to interpret details: Object {}) + +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +============================================= +EXECUTING COMMAND: omdb ["nexus", "sagas", "list"] +termination: Exited(0) +--------------------------------------------- +stdout: +SAGA_ID STATE +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +NOTE: This command only reads in-memory state from the targeted Nexus instance. +Sagas may be missing if they were run by a different Nexus instance or if they +finished before this Nexus instance last started up. +============================================= +EXECUTING COMMAND: omdb ["--destructive", "nexus", "sagas", "demo-create"] +termination: Exited(0) +--------------------------------------------- +stdout: +saga id: ..................... +demo saga id: ..................... (use this with `demo-complete`) +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +============================================= +EXECUTING COMMAND: omdb ["nexus", "sagas", "list"] +termination: Exited(0) +--------------------------------------------- +stdout: +SAGA_ID STATE +..................... running +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +NOTE: This command only reads in-memory state from the targeted Nexus instance. +Sagas may be missing if they were run by a different Nexus instance or if they +finished before this Nexus instance last started up. +============================================= EXECUTING COMMAND: omdb ["--destructive", "nexus", "background-tasks", "activate", "inventory_collection"] termination: Exited(0) --------------------------------------------- @@ -667,8 +1135,8 @@ WARNING: Zones exist without physical disks! COCKROACHDB SETTINGS: - state fingerprint::::::::::::::::: d4d87aa2ad877a4cc2fddd0573952362739110de - cluster.preserve_downgrade_option: "22.1" + state fingerprint::::::::::::::::: + cluster.preserve_downgrade_option: METADATA: created by::::::::::: nexus-test-utils @@ -705,8 +1173,8 @@ WARNING: Zones exist without physical disks! COCKROACHDB SETTINGS: - state fingerprint::::::::::::::::: d4d87aa2ad877a4cc2fddd0573952362739110de - cluster.preserve_downgrade_option: "22.1" + state fingerprint::::::::::::::::: + cluster.preserve_downgrade_option: METADATA: created by::::::::::: nexus-test-utils @@ -746,8 +1214,8 @@ to: blueprint ............. COCKROACHDB SETTINGS: - state fingerprint::::::::::::::::: d4d87aa2ad877a4cc2fddd0573952362739110de (unchanged) - cluster.preserve_downgrade_option: "22.1" (unchanged) + state fingerprint::::::::::::::::: (unchanged) + cluster.preserve_downgrade_option: (unchanged) METADATA: internal DNS version: 1 (unchanged) diff --git a/dev-tools/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs index 6a959d726a..d266e59ce8 100644 --- a/dev-tools/omdb/tests/test_all_output.rs +++ b/dev-tools/omdb/tests/test_all_output.rs @@ -7,9 +7,12 @@ //! Feel free to change the tool's output. This test just makes it easy to make //! sure you're only breaking what you intend. +use dropshot::Method; use expectorate::assert_contents; +use http::StatusCode; use nexus_test_utils::{OXIMETER_UUID, PRODUCER_UUID}; use nexus_test_utils_macros::nexus_test; +use nexus_types::deployment::Blueprint; use nexus_types::deployment::SledFilter; use nexus_types::deployment::UnstableReconfiguratorState; use omicron_test_utils::dev::test_cmds::path_to_executable; @@ -56,6 +59,7 @@ fn assert_oximeter_list_producers_output( #[tokio::test] async fn test_omdb_usage_errors() { + clear_omdb_env(); let cmd_path = path_to_executable(CMD_OMDB); let mut output = String::new(); let invocations: &[&[&'static str]] = &[ @@ -80,7 +84,18 @@ async fn test_omdb_usage_errors() { &["mgs"], &["nexus"], &["nexus", "background-tasks"], + &["nexus", "background-tasks", "show", "--help"], &["nexus", "blueprints"], + &["nexus", "sagas"], + // Missing "--destructive" flag. The URL is bogus but just ensures that + // we get far enough to hit the error we care about. + &[ + "nexus", + "--nexus-internal-url", + "http://[::1]:111", + "sagas", + "demo-create", + ], &["nexus", "sleds"], &["sled-agent"], &["sled-agent", "zones"], @@ -100,6 +115,8 @@ async fn test_omdb_usage_errors() { #[nexus_test] async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { + clear_omdb_env(); + let gwtestctx = gateway_test_utils::setup::test_setup( "test_omdb_success_case", gateway_messages::SpPort::One, @@ -120,6 +137,20 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { let tmppath = tmpdir.path().join("reconfigurator-save.out"); let initial_blueprint_id = cptestctx.initial_blueprint_id.to_string(); + // Get the CockroachDB metadata from the blueprint so we can redact it + let initial_blueprint: Blueprint = dropshot::test_util::read_json( + &mut cptestctx + .internal_client + .make_request_no_body( + Method::GET, + &format!("/deployment/blueprints/all/{initial_blueprint_id}"), + StatusCode::OK, + ) + .await + .unwrap(), + ) + .await; + let mut output = String::new(); let invocations: &[&[&str]] = &[ @@ -134,6 +165,22 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { &["mgs", "inventory"], &["nexus", "background-tasks", "doc"], &["nexus", "background-tasks", "show"], + // background tasks: test picking out specific names + &["nexus", "background-tasks", "show", "saga_recovery"], + &[ + "nexus", + "background-tasks", + "show", + "blueprint_loader", + "blueprint_executor", + ], + // background tasks: test recognized group names + &["nexus", "background-tasks", "show", "dns_internal"], + &["nexus", "background-tasks", "show", "dns_external"], + &["nexus", "background-tasks", "show", "all"], + &["nexus", "sagas", "list"], + &["--destructive", "nexus", "sagas", "demo-create"], + &["nexus", "sagas", "list"], &[ "--destructive", "nexus", @@ -156,6 +203,19 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { // ControlPlaneTestContext. ]; + let mut redactions = ExtraRedactions::new(); + redactions + .variable_length("tmp_path", tmppath.as_str()) + .fixed_length("blueprint_id", &initial_blueprint_id) + .variable_length( + "cockroachdb_fingerprint", + &initial_blueprint.cockroachdb_fingerprint, + ); + let crdb_version = + initial_blueprint.cockroachdb_setting_preserve_downgrade.to_string(); + if initial_blueprint.cockroachdb_setting_preserve_downgrade.is_set() { + redactions.variable_length("cockroachdb_version", &crdb_version); + } for args in invocations { println!("running commands with args: {:?}", args); let p = postgres_url.to_string(); @@ -174,11 +234,7 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { }, &cmd_path, args, - Some( - ExtraRedactions::new() - .variable_length("tmp_path", tmppath.as_str()) - .fixed_length("blueprint_id", &initial_blueprint_id), - ), + Some(&redactions), ) .await; } @@ -244,6 +300,8 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { /// that's covered by the success tests above. #[nexus_test] async fn test_omdb_env_settings(cptestctx: &ControlPlaneTestContext) { + clear_omdb_env(); + let cmd_path = path_to_executable(CMD_OMDB); let postgres_url = cptestctx.database.listen_url().to_string(); let nexus_internal_url = @@ -326,6 +384,16 @@ async fn test_omdb_env_settings(cptestctx: &ControlPlaneTestContext) { let args = &["--dns-server", &dns_sockaddr.to_string(), "db", "sleds"]; do_run(&mut output, move |exec| exec, &cmd_path, args).await; + // That said, the "sagas" command prints an extra warning in this case. + let args = &["nexus", "sagas", "list"]; + do_run( + &mut output, + move |exec| exec.env("OMDB_DNS_SERVER", &dns_sockaddr.to_string()), + &cmd_path, + args, + ) + .await; + // Case: specified in multiple places (command-line argument wins) let args = &["oximeter", "--oximeter-url", "junk", "list-producers"]; let ox = ox_url.clone(); @@ -467,3 +535,22 @@ async fn do_run_extra( write!(output, "=============================================\n").unwrap(); } + +// We're testing behavior that can be affected by OMDB-related environment +// variables. Clear all of them from the current process so that all child +// processes don't have them. OMDB environment variables can affect even the +// help output provided by clap. See clap-rs/clap#5673 for an example. +fn clear_omdb_env() { + // Rust documents that it's not safe to manipulate the environment in a + // multi-threaded process outside of Windows because it's possible that + // other threads are reading or writing the environment and most systems do + // not support this. On illumos, the underlying interfaces are broadly + // thread-safe. Further, Omicron only supports running tests under `cargo + // nextest`, in which case there are no threads running concurrently here + // that may be reading or modifying the environment. + for (env_var, _) in std::env::vars().filter(|(k, _)| k.starts_with("OMDB_")) + { + eprintln!("removing {:?} from environment", env_var); + std::env::remove_var(env_var); + } +} diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out index 85f6d32018..380f0ec9e5 100644 --- a/dev-tools/omdb/tests/usage_errors.out +++ b/dev-tools/omdb/tests/usage_errors.out @@ -104,23 +104,25 @@ Query the control plane database (CockroachDB) Usage: omdb db [OPTIONS] Commands: - rack Print information about the rack - disks Print information about virtual disks - dns Print information about internal and external DNS - inventory Print information about collected hardware/software inventory - physical-disks Print information about physical disks - reconfigurator-save Save the current Reconfigurator inputs to a file - region Print information about regions - region-replacement Query for information about region replacements, optionally manually - triggering one - sleds Print information about sleds - instances Print information about customer instances - network Print information about the network - migrations Print information about migrations - snapshots Print information about snapshots - validate Validate the contents of the database - volumes Print information about volumes - help Print this message or the help of the given subcommand(s) + rack Print information about the rack + disks Print information about virtual disks + dns Print information about internal and external DNS + inventory Print information about collected hardware/software inventory + physical-disks Print information about physical disks + reconfigurator-save Save the current Reconfigurator inputs to a file + region Print information about regions + region-replacement Query for information about region replacements, optionally manually + triggering one + region-snapshot-replacement Query for information about region snapshot replacements, optionally + manually triggering one + sleds Print information about sleds + instances Print information about customer instances + network Print information about the network + migrations Print information about migrations + snapshots Print information about snapshots + validate Validate the contents of the database + volumes Print information about volumes + help Print this message or the help of the given subcommand(s) Options: --log-level log level filter [env: LOG_LEVEL=] [default: warn] @@ -148,23 +150,25 @@ Query the control plane database (CockroachDB) Usage: omdb db [OPTIONS] Commands: - rack Print information about the rack - disks Print information about virtual disks - dns Print information about internal and external DNS - inventory Print information about collected hardware/software inventory - physical-disks Print information about physical disks - reconfigurator-save Save the current Reconfigurator inputs to a file - region Print information about regions - region-replacement Query for information about region replacements, optionally manually - triggering one - sleds Print information about sleds - instances Print information about customer instances - network Print information about the network - migrations Print information about migrations - snapshots Print information about snapshots - validate Validate the contents of the database - volumes Print information about volumes - help Print this message or the help of the given subcommand(s) + rack Print information about the rack + disks Print information about virtual disks + dns Print information about internal and external DNS + inventory Print information about collected hardware/software inventory + physical-disks Print information about physical disks + reconfigurator-save Save the current Reconfigurator inputs to a file + region Print information about regions + region-replacement Query for information about region replacements, optionally manually + triggering one + region-snapshot-replacement Query for information about region snapshot replacements, optionally + manually triggering one + sleds Print information about sleds + instances Print information about customer instances + network Print information about the network + migrations Print information about migrations + snapshots Print information about snapshots + validate Validate the contents of the database + volumes Print information about volumes + help Print this message or the help of the given subcommand(s) Options: --log-level log level filter [env: LOG_LEVEL=] [default: warn] @@ -442,6 +446,7 @@ Usage: omdb nexus [OPTIONS] Commands: background-tasks print information about background tasks blueprints interact with blueprints + sagas view sagas, create and complete demo sagas sleds interact with sleds help Print this message or the help of the given subcommand(s) @@ -486,6 +491,46 @@ Connection Options: Safety Options: -w, --destructive Allow potentially-destructive subcommands ============================================= +EXECUTING COMMAND: omdb ["nexus", "background-tasks", "show", "--help"] +termination: Exited(0) +--------------------------------------------- +stdout: +Print human-readable summary of the status of each background task + +Usage: omdb nexus background-tasks show [OPTIONS] [TASK_NAME]... + +Arguments: + [TASK_NAME]... + Names of background tasks to show (default: all) + + You can use any background task name here or one of the special strings "all", + "dns_external", or "dns_internal". + +Options: + --log-level + log level filter + + [env: LOG_LEVEL=] + [default: warn] + + -h, --help + Print help (see a summary with '-h') + +Connection Options: + --nexus-internal-url + URL of the Nexus internal API + + [env: OMDB_NEXUS_URL=] + + --dns-server + [env: OMDB_DNS_SERVER=] + +Safety Options: + -w, --destructive + Allow potentially-destructive subcommands +--------------------------------------------- +stderr: +============================================= EXECUTING COMMAND: omdb ["nexus", "blueprints"] termination: Exited(2) --------------------------------------------- @@ -518,6 +563,43 @@ Connection Options: Safety Options: -w, --destructive Allow potentially-destructive subcommands ============================================= +EXECUTING COMMAND: omdb ["nexus", "sagas"] +termination: Exited(2) +--------------------------------------------- +stdout: +--------------------------------------------- +stderr: +view sagas, create and complete demo sagas + +Usage: omdb nexus sagas [OPTIONS] + +Commands: + list List sagas run by this Nexus + demo-create Create a "demo" saga + demo-complete Complete a demo saga started with "demo-create" + help Print this message or the help of the given subcommand(s) + +Options: + --log-level log level filter [env: LOG_LEVEL=] [default: warn] + -h, --help Print help + +Connection Options: + --nexus-internal-url URL of the Nexus internal API [env: + OMDB_NEXUS_URL=] + --dns-server [env: OMDB_DNS_SERVER=] + +Safety Options: + -w, --destructive Allow potentially-destructive subcommands +============================================= +EXECUTING COMMAND: omdb ["nexus", "--nexus-internal-url", "http://[::1]:111", "sagas", "demo-create"] +termination: Exited(1) +--------------------------------------------- +stdout: +--------------------------------------------- +stderr: +note: using Nexus URL http://[::1]:111 +Error: This command is potentially destructive. Pass the `-w` / `--destructive` flag to allow it. +============================================= EXECUTING COMMAND: omdb ["nexus", "sleds"] termination: Exited(2) --------------------------------------------- diff --git a/dev-tools/openapi-manager/Cargo.toml b/dev-tools/openapi-manager/Cargo.toml index e60000cc06..2ca1bc3e4d 100644 --- a/dev-tools/openapi-manager/Cargo.toml +++ b/dev-tools/openapi-manager/Cargo.toml @@ -12,6 +12,7 @@ anyhow.workspace = true atomicwrites.workspace = true bootstrap-agent-api.workspace = true camino.workspace = true +clickhouse-admin-api.workspace = true cockroach-admin-api.workspace = true clap.workspace = true dns-server-api.workspace = true @@ -24,9 +25,11 @@ nexus-internal-api.workspace = true omicron-workspace-hack.workspace = true openapiv3.workspace = true openapi-lint.workspace = true +openapi-manager-types.workspace = true owo-colors.workspace = true oximeter-api.workspace = true serde_json.workspace = true +sled-agent-api.workspace = true similar.workspace = true supports-color.workspace = true wicketd-api.workspace = true diff --git a/dev-tools/openapi-manager/src/check.rs b/dev-tools/openapi-manager/src/check.rs index 182ed9fb19..b43e43e7e5 100644 --- a/dev-tools/openapi-manager/src/check.rs +++ b/dev-tools/openapi-manager/src/check.rs @@ -5,17 +5,16 @@ use std::{io::Write, process::ExitCode}; use anyhow::Result; -use camino::Utf8Path; use indent_write::io::IndentWriter; use owo_colors::OwoColorize; use similar::TextDiff; use crate::{ output::{ - display_api_spec, display_error, display_summary, headers::*, plural, - write_diff, OutputOpts, Styles, + display_api_spec, display_api_spec_file, display_error, + display_summary, headers::*, plural, write_diff, OutputOpts, Styles, }, - spec::{all_apis, CheckStatus}, + spec::{all_apis, CheckStale, Environment}, FAILURE_EXIT_CODE, NEEDS_UPDATE_EXIT_CODE, }; @@ -37,7 +36,7 @@ impl CheckResult { } pub(crate) fn check_impl( - dir: &Utf8Path, + env: &Environment, output: &OutputOpts, ) -> Result { let mut styles = Styles::default(); @@ -48,6 +47,7 @@ pub(crate) fn check_impl( let all_apis = all_apis(); let total = all_apis.len(); let count_width = total.to_string().len(); + let count_section_indent = count_section_indent(count_width); let continued_indent = continued_indent(count_width); eprintln!("{:>HEADER_WIDTH$}", SEPARATOR); @@ -58,57 +58,89 @@ pub(crate) fn check_impl( total.style(styles.bold), plural::documents(total), ); - let mut num_up_to_date = 0; + let mut num_fresh = 0; let mut num_stale = 0; - let mut num_missing = 0; let mut num_failed = 0; for (ix, spec) in all_apis.iter().enumerate() { let count = ix + 1; - match spec.check(&dir) { - Ok(status) => match status { - CheckStatus::Ok(summary) => { - eprintln!( - "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}: {}", - UP_TO_DATE.style(styles.success_header), - display_api_spec(spec, &styles), - display_summary(&summary, &styles), - ); + match spec.check(env) { + Ok(status) => { + let total_errors = status.total_errors(); + let total_errors_width = total_errors.to_string().len(); + + if total_errors == 0 { + // Success case. + let extra = if status.extra_files_len() > 0 { + format!( + ", {} extra files", + status.extra_files_len().style(styles.bold) + ) + } else { + "".to_string() + }; - num_up_to_date += 1; - } - CheckStatus::Stale { full_path, actual, expected } => { eprintln!( - "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}", - STALE.style(styles.warning_header), + "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}: {}{extra}", + FRESH.style(styles.success_header), display_api_spec(spec, &styles), + display_summary(&status.summary, &styles), ); - let diff = TextDiff::from_lines(&actual, &expected); - write_diff( - &diff, - &full_path, - &styles, - // Add an indent to align diff with the status message. - &mut IndentWriter::new( - &continued_indent, - std::io::stderr(), - ), - )?; - - num_stale += 1; + num_fresh += 1; + continue; } - CheckStatus::Missing => { - eprintln!( - "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}", - MISSING.style(styles.warning_header), - display_api_spec(spec, &styles), - ); - num_missing += 1; + // Out of date: print errors. + eprintln!( + "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}", + STALE.style(styles.warning_header), + display_api_spec(spec, &styles), + ); + num_stale += 1; + + for (error_ix, (spec_file, error)) in + status.iter_errors().enumerate() + { + let error_count = error_ix + 1; + + let display_heading = |heading: &str| { + eprintln!( + "{:>HEADER_WIDTH$}{count_section_indent}\ + ({error_count:>total_errors_width$}/{total_errors}) {}", + heading.style(styles.warning_header), + display_api_spec_file(spec, spec_file, &styles), + ); + }; + + match error { + CheckStale::Modified { + full_path, + actual, + expected, + } => { + display_heading(MODIFIED); + + let diff = + TextDiff::from_lines(&**actual, &**expected); + write_diff( + &diff, + &full_path, + &styles, + // Add an indent to align diff with the status message. + &mut IndentWriter::new( + &continued_indent, + std::io::stderr(), + ), + )?; + } + CheckStale::New => { + display_heading(NEW); + } + } } - }, + } Err(error) => { eprint!( "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}", @@ -138,13 +170,12 @@ pub(crate) fn check_impl( }; eprintln!( - "{:>HEADER_WIDTH$} {} {} checked: {} up-to-date, {} stale, {} missing, {} failed", + "{:>HEADER_WIDTH$} {} {} checked: {} fresh, {} stale, {} failed", status_header, total.style(styles.bold), plural::documents(total), - num_up_to_date.style(styles.bold), + num_fresh.style(styles.bold), num_stale.style(styles.bold), - num_missing.style(styles.bold), num_failed.style(styles.bold), ); if num_failed > 0 { @@ -170,14 +201,14 @@ pub(crate) fn check_impl( mod tests { use std::process::ExitCode; - use crate::spec::find_openapi_dir; + use crate::spec::Environment; use super::*; #[test] fn check_apis_up_to_date() -> Result { let output = OutputOpts { color: clap::ColorChoice::Auto }; - let dir = find_openapi_dir()?; + let dir = Environment::new(None)?; let result = check_impl(&dir, &output)?; Ok(result.to_exit_code()) diff --git a/dev-tools/openapi-manager/src/dispatch.rs b/dev-tools/openapi-manager/src/dispatch.rs index 937a8b485f..ca2989396f 100644 --- a/dev-tools/openapi-manager/src/dispatch.rs +++ b/dev-tools/openapi-manager/src/dispatch.rs @@ -10,7 +10,7 @@ use clap::{Args, Parser, Subcommand}; use crate::{ check::check_impl, generate::generate_impl, list::list_impl, - output::OutputOpts, spec::openapi_dir, + output::OutputOpts, spec::Environment, }; /// Manage OpenAPI specifications. @@ -73,7 +73,7 @@ pub struct GenerateArgs { impl GenerateArgs { fn exec(self, output: &OutputOpts) -> anyhow::Result { - let dir = openapi_dir(self.dir)?; + let dir = Environment::new(self.dir)?; Ok(generate_impl(&dir, output)?.to_exit_code()) } } @@ -87,8 +87,8 @@ pub struct CheckArgs { impl CheckArgs { fn exec(self, output: &OutputOpts) -> anyhow::Result { - let dir = openapi_dir(self.dir)?; - Ok(check_impl(&dir, output)?.to_exit_code()) + let env = Environment::new(self.dir)?; + Ok(check_impl(&env, output)?.to_exit_code()) } } diff --git a/dev-tools/openapi-manager/src/generate.rs b/dev-tools/openapi-manager/src/generate.rs index f776ff2709..1cf9ebbb61 100644 --- a/dev-tools/openapi-manager/src/generate.rs +++ b/dev-tools/openapi-manager/src/generate.rs @@ -5,7 +5,6 @@ use std::{io::Write, process::ExitCode}; use anyhow::Result; -use camino::Utf8Path; use indent_write::io::IndentWriter; use owo_colors::OwoColorize; @@ -14,7 +13,7 @@ use crate::{ display_api_spec, display_error, display_summary, headers::*, plural, OutputOpts, Styles, }, - spec::{all_apis, OverwriteStatus}, + spec::{all_apis, Environment}, FAILURE_EXIT_CODE, }; @@ -34,7 +33,7 @@ impl GenerateResult { } pub(crate) fn generate_impl( - dir: &Utf8Path, + env: &Environment, output: &OutputOpts, ) -> Result { let mut styles = Styles::default(); @@ -62,27 +61,30 @@ pub(crate) fn generate_impl( for (ix, spec) in all_apis.iter().enumerate() { let count = ix + 1; - match spec.overwrite(&dir) { - Ok((status, summary)) => match status { - OverwriteStatus::Updated => { + match spec.overwrite(env) { + Ok(status) => { + let updated_count = status.updated_count(); + + if updated_count > 0 { eprintln!( - "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}: {}", + "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}: {} ({} {} updated)", UPDATED.style(styles.success_header), display_api_spec(spec, &styles), - display_summary(&summary, &styles), + display_summary(&status.summary, &styles), + updated_count.style(styles.bold), + plural::files(updated_count), ); num_updated += 1; - } - OverwriteStatus::Unchanged => { + } else { eprintln!( "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}: {}", UNCHANGED.style(styles.unchanged_header), display_api_spec(spec, &styles), - display_summary(&summary, &styles), + display_summary(&status.summary, &styles), ); num_unchanged += 1; } - }, + } Err(err) => { eprintln!( "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}", diff --git a/dev-tools/openapi-manager/src/output.rs b/dev-tools/openapi-manager/src/output.rs index 6cd578e778..fee7f0f15c 100644 --- a/dev-tools/openapi-manager/src/output.rs +++ b/dev-tools/openapi-manager/src/output.rs @@ -10,7 +10,7 @@ use indent_write::fmt::IndentWriter; use owo_colors::{OwoColorize, Style}; use similar::{ChangeTag, DiffableStr, TextDiff}; -use crate::spec::{ApiSpec, DocumentSummary}; +use crate::spec::{ApiSpec, ApiSpecFile, DocumentSummary}; #[derive(Debug, Args)] #[clap(next_help_heading = "Global options")] @@ -123,6 +123,21 @@ pub(crate) fn display_api_spec(spec: &ApiSpec, styles: &Styles) -> String { ) } +pub(crate) fn display_api_spec_file( + spec: &ApiSpec, + spec_file: ApiSpecFile<'_>, + styles: &Styles, +) -> String { + match spec_file { + ApiSpecFile::Openapi => { + format!("OpenAPI document {}", spec.filename.style(styles.filename)) + } + ApiSpecFile::Extra(path) => { + format!("Extra file {}", path.style(styles.filename)) + } + } +} + pub(crate) fn display_summary( summary: &DocumentSummary, styles: &Styles, @@ -201,9 +216,14 @@ pub(crate) mod headers { pub(crate) static CHECKING: &str = "Checking"; pub(crate) static GENERATING: &str = "Generating"; - pub(crate) static UP_TO_DATE: &str = "Up-to-date"; + pub(crate) static FRESH: &str = "Fresh"; + + // Stale encompasses: + // - Stale: the file on disk is different from what we generated. + // - Missing: the file on disk does not exist. pub(crate) static STALE: &str = "Stale"; - pub(crate) static MISSING: &str = "Missing"; + pub(crate) static NEW: &str = "-> New"; + pub(crate) static MODIFIED: &str = "-> Modified"; pub(crate) static UPDATED: &str = "Updated"; pub(crate) static UNCHANGED: &str = "Unchanged"; @@ -211,22 +231,38 @@ pub(crate) mod headers { pub(crate) static SUCCESS: &str = "Success"; pub(crate) static FAILURE: &str = "Failure"; - pub(crate) fn continued_indent(count_width: usize) -> String { + fn count_section_width(count_width: usize) -> usize { // Status strings are of the form: // // Generated [ 1/12] api.json: 1 path, 1 schema + // ^^^^^^^^^ // - // So the continued indent is: - // - // HEADER_WIDTH for the status string - // + (count_width * 2) for current and total counts + // So the width of the count section is: + // (count_width * 2) for current and total counts // + 3 for '[/]' // + 2 for spaces on either side. - " ".repeat(HEADER_WIDTH + count_width * 2 + 3 + 2) + count_width * 2 + 3 + 2 + } + + pub(crate) fn count_section_indent(count_width: usize) -> String { + " ".repeat(count_section_width(count_width)) + } + + pub(crate) fn continued_indent(count_width: usize) -> String { + // HEADER_WIDTH for the status string + count_section_width + " ".repeat(HEADER_WIDTH + count_section_width(count_width)) } } pub(crate) mod plural { + pub(crate) fn files(count: usize) -> &'static str { + if count == 1 { + "file" + } else { + "files" + } + } + pub(crate) fn documents(count: usize) -> &'static str { if count == 1 { "document" diff --git a/dev-tools/openapi-manager/src/spec.rs b/dev-tools/openapi-manager/src/spec.rs index f991d35ec4..e74cf7ed7a 100644 --- a/dev-tools/openapi-manager/src/spec.rs +++ b/dev-tools/openapi-manager/src/spec.rs @@ -9,6 +9,7 @@ use atomicwrites::AtomicFile; use camino::{Utf8Path, Utf8PathBuf}; use dropshot::{ApiDescription, ApiDescriptionBuildErrors, StubContext}; use fs_err as fs; +use openapi_manager_types::{ValidationBackend, ValidationContext}; use openapiv3::OpenAPI; /// All APIs managed by openapi-manager. @@ -24,6 +25,17 @@ pub fn all_apis() -> Vec { filename: "bootstrap-agent.json", extra_validation: None, }, + ApiSpec { + title: "ClickHouse Cluster Admin API", + version: "0.0.1", + description: "API for interacting with the Oxide \ + control plane's ClickHouse cluster", + boundary: ApiBoundary::Internal, + api_description: + clickhouse_admin_api::clickhouse_admin_api_mod::stub_api_description, + filename: "clickhouse-admin.json", + extra_validation: None, + }, ApiSpec { title: "CockroachDB Cluster Admin API", version: "0.0.1", @@ -87,6 +99,16 @@ pub fn all_apis() -> Vec { filename: "oximeter.json", extra_validation: None, }, + ApiSpec { + title: "Oxide Sled Agent API", + version: "0.0.1", + description: "API for interacting with individual sleds", + boundary: ApiBoundary::Internal, + api_description: + sled_agent_api::sled_agent_api_mod::stub_api_description, + filename: "sled-agent.json", + extra_validation: None, + }, ApiSpec { title: "Oxide Technician Port Control Service", version: "0.0.1", @@ -122,47 +144,64 @@ pub struct ApiSpec { pub filename: &'static str, /// Extra validation to perform on the OpenAPI spec, if any. - pub extra_validation: Option anyhow::Result<()>>, + pub extra_validation: Option)>, } impl ApiSpec { pub(crate) fn overwrite( &self, - dir: &Utf8Path, - ) -> Result<(OverwriteStatus, DocumentSummary)> { + env: &Environment, + ) -> Result { let contents = self.to_json_bytes()?; - let summary = self + let (summary, validation_result) = self .validate_json(&contents) .context("OpenAPI document validation failed")?; - let full_path = dir.join(&self.filename); - let status = overwrite_file(&full_path, &contents)?; - - Ok((status, summary)) + let full_path = env.openapi_dir.join(&self.filename); + let openapi_doc_status = overwrite_file(&full_path, &contents)?; + + let extra_files = validation_result + .extra_files + .into_iter() + .map(|(path, contents)| { + let full_path = env.workspace_root.join(&path); + let status = overwrite_file(&full_path, &contents)?; + Ok((path, status)) + }) + .collect::>()?; + + Ok(SpecOverwriteStatus { + summary, + openapi_doc: openapi_doc_status, + extra_files, + }) } - pub(crate) fn check(&self, dir: &Utf8Path) -> Result { + pub(crate) fn check(&self, env: &Environment) -> Result { let contents = self.to_json_bytes()?; - let summary = self + let (summary, validation_result) = self .validate_json(&contents) .context("OpenAPI document validation failed")?; - let full_path = dir.join(&self.filename); - let existing_contents = - read_opt(&full_path).context("failed to read contents on disk")?; - - match existing_contents { - Some(existing_contents) if existing_contents == contents => { - Ok(CheckStatus::Ok(summary)) - } - Some(existing_contents) => Ok(CheckStatus::Stale { - full_path, - actual: existing_contents, - expected: contents, - }), - None => Ok(CheckStatus::Missing), - } + let full_path = env.openapi_dir.join(&self.filename); + let openapi_doc_status = check_file(full_path, contents)?; + + let extra_files = validation_result + .extra_files + .into_iter() + .map(|(path, contents)| { + let full_path = env.workspace_root.join(&path); + let status = check_file(full_path, contents)?; + Ok((path, status)) + }) + .collect::>()?; + + Ok(SpecCheckStatus { + summary, + openapi_doc: openapi_doc_status, + extra_files, + }) } pub(crate) fn to_openapi_doc(&self) -> Result { @@ -195,7 +234,10 @@ impl ApiSpec { Ok(contents) } - fn validate_json(&self, contents: &[u8]) -> Result { + fn validate_json( + &self, + contents: &[u8], + ) -> Result<(DocumentSummary, ValidationResult)> { let openapi_doc = contents_to_openapi(contents) .context("JSON returned by ApiDescription is not valid OpenAPI")?; @@ -210,11 +252,51 @@ impl ApiSpec { return Err(anyhow::anyhow!("{}", errors.join("\n\n"))); } - if let Some(extra_validation) = self.extra_validation { - extra_validation(&openapi_doc)?; - } + let extra_files = if let Some(extra_validation) = self.extra_validation + { + let mut validation_context = + ValidationContextImpl { errors: Vec::new(), files: Vec::new() }; + extra_validation( + &openapi_doc, + ValidationContext::new(&mut validation_context), + ); + + if !validation_context.errors.is_empty() { + return Err(anyhow::anyhow!( + "OpenAPI document extended validation failed:\n{}", + validation_context + .errors + .iter() + .map(|e| e.to_string()) + .collect::>() + .join("\n") + )); + } + + validation_context.files + } else { + Vec::new() + }; + + Ok(( + DocumentSummary::new(&openapi_doc), + ValidationResult { extra_files }, + )) + } +} + +struct ValidationContextImpl { + errors: Vec, + files: Vec<(Utf8PathBuf, Vec)>, +} - Ok(DocumentSummary::new(&openapi_doc)) +impl ValidationBackend for ValidationContextImpl { + fn report_error(&mut self, error: anyhow::Error) { + self.errors.push(error); + } + + fn record_file_contents(&mut self, path: Utf8PathBuf, contents: Vec) { + self.files.push((path, contents)); } } @@ -239,6 +321,32 @@ impl fmt::Display for ApiBoundary { } } +#[derive(Debug)] +#[must_use] +pub(crate) struct SpecOverwriteStatus { + pub(crate) summary: DocumentSummary, + openapi_doc: OverwriteStatus, + extra_files: Vec<(Utf8PathBuf, OverwriteStatus)>, +} + +impl SpecOverwriteStatus { + pub(crate) fn updated_count(&self) -> usize { + self.iter() + .filter(|(_, status)| matches!(status, OverwriteStatus::Updated)) + .count() + } + + fn iter( + &self, + ) -> impl Iterator, &OverwriteStatus)> { + std::iter::once((ApiSpecFile::Openapi, &self.openapi_doc)).chain( + self.extra_files.iter().map(|(file_name, status)| { + (ApiSpecFile::Extra(file_name), status) + }), + ) + } +} + #[derive(Debug)] #[must_use] pub(crate) enum OverwriteStatus { @@ -246,12 +354,58 @@ pub(crate) enum OverwriteStatus { Unchanged, } +#[derive(Debug)] +#[must_use] +pub(crate) struct SpecCheckStatus { + pub(crate) summary: DocumentSummary, + pub(crate) openapi_doc: CheckStatus, + pub(crate) extra_files: Vec<(Utf8PathBuf, CheckStatus)>, +} + +impl SpecCheckStatus { + pub(crate) fn total_errors(&self) -> usize { + self.iter_errors().count() + } + + pub(crate) fn extra_files_len(&self) -> usize { + self.extra_files.len() + } + + pub(crate) fn iter_errors( + &self, + ) -> impl Iterator, &CheckStale)> { + std::iter::once((ApiSpecFile::Openapi, &self.openapi_doc)) + .chain(self.extra_files.iter().map(|(file_name, status)| { + (ApiSpecFile::Extra(file_name), status) + })) + .filter_map(|(spec_file, status)| { + if let CheckStatus::Stale(e) = status { + Some((spec_file, e)) + } else { + None + } + }) + } +} + +#[derive(Clone, Copy, Debug)] +pub(crate) enum ApiSpecFile<'a> { + Openapi, + Extra(&'a Utf8Path), +} + #[derive(Debug)] #[must_use] pub(crate) enum CheckStatus { - Ok(DocumentSummary), - Stale { full_path: Utf8PathBuf, actual: Vec, expected: Vec }, - Missing, + Fresh, + Stale(CheckStale), +} + +#[derive(Debug)] +#[must_use] +pub(crate) enum CheckStale { + Modified { full_path: Utf8PathBuf, actual: Vec, expected: Vec }, + New, } #[derive(Debug)] @@ -274,31 +428,45 @@ impl DocumentSummary { } } -pub(crate) fn openapi_dir(dir: Option) -> Result { - match dir { - Some(dir) => Ok(dir.canonicalize_utf8().with_context(|| { - format!("failed to canonicalize directory: {}", dir) - })?), - None => find_openapi_dir().context("failed to find openapi directory"), - } +#[derive(Debug)] +#[must_use] +struct ValidationResult { + // Extra files recorded by the validation context. + extra_files: Vec<(Utf8PathBuf, Vec)>, +} + +pub(crate) struct Environment { + pub(crate) workspace_root: Utf8PathBuf, + pub(crate) openapi_dir: Utf8PathBuf, } -pub(crate) fn find_openapi_dir() -> Result { - let mut root = Utf8PathBuf::from(env!("CARGO_MANIFEST_DIR")); - // This crate is two levels down from the root of omicron, so go up twice. - root.pop(); - root.pop(); +impl Environment { + pub(crate) fn new(openapi_dir: Option) -> Result { + let mut root = Utf8PathBuf::from(env!("CARGO_MANIFEST_DIR")); + // This crate is two levels down from the root of omicron, so go up twice. + root.pop(); + root.pop(); - root.push("openapi"); - let root = root.canonicalize_utf8().with_context(|| { - format!("failed to canonicalize openapi directory: {}", root) - })?; + let workspace_root = root.canonicalize_utf8().with_context(|| { + format!("failed to canonicalize workspace root: {}", root) + })?; - if !root.is_dir() { - anyhow::bail!("openapi root is not a directory: {}", root); - } + let openapi_dir = + openapi_dir.unwrap_or_else(|| workspace_root.join("openapi")); + let openapi_dir = + openapi_dir.canonicalize_utf8().with_context(|| { + format!( + "failed to canonicalize openapi directory: {}", + openapi_dir + ) + })?; + + if !openapi_dir.is_dir() { + anyhow::bail!("openapi root is not a directory: {}", root); + } - Ok(root) + Ok(Self { workspace_root, openapi_dir }) + } } /// Overwrite a file with new contents, if the contents are different. @@ -323,6 +491,29 @@ fn overwrite_file(path: &Utf8Path, contents: &[u8]) -> Result { Ok(OverwriteStatus::Updated) } +/// Check a file against expected contents. +fn check_file( + full_path: Utf8PathBuf, + contents: Vec, +) -> Result { + let existing_contents = + read_opt(&full_path).context("failed to read contents on disk")?; + + match existing_contents { + Some(existing_contents) if existing_contents == contents => { + Ok(CheckStatus::Fresh) + } + Some(existing_contents) => { + Ok(CheckStatus::Stale(CheckStale::Modified { + full_path, + actual: existing_contents, + expected: contents, + })) + } + None => Ok(CheckStatus::Stale(CheckStale::New)), + } +} + fn read_opt(path: &Utf8Path) -> std::io::Result>> { match fs::read(path) { Ok(contents) => Ok(Some(contents)), diff --git a/dev-tools/openapi-manager/types/Cargo.toml b/dev-tools/openapi-manager/types/Cargo.toml new file mode 100644 index 0000000000..262529f1a9 --- /dev/null +++ b/dev-tools/openapi-manager/types/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "openapi-manager-types" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +camino.workspace = true +omicron-workspace-hack.workspace = true diff --git a/dev-tools/openapi-manager/types/src/lib.rs b/dev-tools/openapi-manager/types/src/lib.rs new file mode 100644 index 0000000000..b48ea03e74 --- /dev/null +++ b/dev-tools/openapi-manager/types/src/lib.rs @@ -0,0 +1,12 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Shared types for the OpenAPI manager. +//! +//! API trait crates can depend on this crate to get access to interfaces +//! exposed by the OpenAPI manager. + +mod validation; + +pub use validation::*; diff --git a/dev-tools/openapi-manager/types/src/validation.rs b/dev-tools/openapi-manager/types/src/validation.rs new file mode 100644 index 0000000000..6f22228f4d --- /dev/null +++ b/dev-tools/openapi-manager/types/src/validation.rs @@ -0,0 +1,47 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use camino::Utf8PathBuf; + +/// Context for validation of OpenAPI specifications. +pub struct ValidationContext<'a> { + backend: &'a mut dyn ValidationBackend, +} + +impl<'a> ValidationContext<'a> { + /// Note part of the public API -- only called by the OpenAPI manager. + #[doc(hidden)] + pub fn new(backend: &'a mut dyn ValidationBackend) -> Self { + Self { backend } + } + + /// Reports a validation error. + pub fn report_error(&mut self, error: anyhow::Error) { + self.backend.report_error(error); + } + + /// Records that the file has the given contents. + /// + /// In check mode, if the files differ, an error is logged. + /// + /// In generate mode, the file is overwritten with the given contents. + /// + /// The path is treated as relative to the root of the repository. + pub fn record_file_contents( + &mut self, + path: impl Into, + contents: Vec, + ) { + self.backend.record_file_contents(path.into(), contents); + } +} + +/// The backend for validation. +/// +/// Not part of the public API -- only implemented by the OpenAPI manager. +#[doc(hidden)] +pub trait ValidationBackend { + fn report_error(&mut self, error: anyhow::Error); + fn record_file_contents(&mut self, path: Utf8PathBuf, contents: Vec); +} diff --git a/dev-tools/releng/src/main.rs b/dev-tools/releng/src/main.rs index ee649e79b2..264eec2503 100644 --- a/dev-tools/releng/src/main.rs +++ b/dev-tools/releng/src/main.rs @@ -143,6 +143,10 @@ struct Args { /// Path to a pre-built omicron-package binary (skips building if set) #[clap(long, env = "OMICRON_PACKAGE")] omicron_package_bin: Option, + + /// Build the helios OS image from local sources. + #[clap(long)] + helios_local: bool, } impl Args { @@ -286,7 +290,7 @@ async fn main() -> Result<()> { logger, "helios checkout at {0} is out-of-date; run \ `git pull -C {0}`, or run omicron-releng with \ - --ignore-helios-origin or --helios-path", + --ignore-helios-origin or --helios-dir", shell_words::quote(args.helios_dir.as_str()) ); preflight_ok = false; @@ -496,39 +500,42 @@ async fn main() -> Result<()> { Utc::now().format("%Y-%m-%d %H:%M") ); - // helios-build experiment-image - jobs.push_command( - format!("{}-image", target), - Command::new("ptime") - .arg("-m") - .arg(args.helios_dir.join("helios-build")) - .arg("experiment-image") - .arg("-o") // output directory for image - .arg(args.output_dir.join(format!("os-{}", target))) + let mut image_cmd = Command::new("ptime") + .arg("-m") + .arg(args.helios_dir.join("helios-build")) + .arg("experiment-image") + .arg("-o") // output directory for image + .arg(args.output_dir.join(format!("os-{}", target))) + .arg("-F") // pass extra image builder features + .arg(format!("optever={}", opte_version.trim())) + .arg("-P") // include all files from extra proto area + .arg(proto_dir.join("root")) + .arg("-N") // image name + .arg(image_name) + .arg("-s") // tempdir name suffix + .arg(target.as_str()) + .args(target.image_build_args()) + .current_dir(&args.helios_dir) + .env( + "IMAGE_DATASET", + match target { + Target::Host => &args.host_dataset, + Target::Recovery => &args.recovery_dataset, + }, + ) + .env_remove("CARGO") + .env_remove("RUSTUP_TOOLCHAIN"); + + if !args.helios_local { + image_cmd = image_cmd .arg("-p") // use an external package repository - .arg(format!("helios-dev={}", HELIOS_REPO)) - .arg("-F") // pass extra image builder features - .arg(format!("optever={}", opte_version.trim())) - .arg("-P") // include all files from extra proto area - .arg(proto_dir.join("root")) - .arg("-N") // image name - .arg(image_name) - .arg("-s") // tempdir name suffix - .arg(target.as_str()) - .args(target.image_build_args()) - .current_dir(&args.helios_dir) - .env( - "IMAGE_DATASET", - match target { - Target::Host => &args.host_dataset, - Target::Recovery => &args.recovery_dataset, - }, - ) - .env_remove("CARGO") - .env_remove("RUSTUP_TOOLCHAIN"), - ) - .after("helios-setup") - .after(format!("{}-proto", target)); + .arg(format!("helios-dev={HELIOS_REPO}")) + } + + // helios-build experiment-image + jobs.push_command(format!("{}-image", target), image_cmd) + .after("helios-setup") + .after(format!("{}-proto", target)); } // Build the recovery target after we build the host target. Only one // of these will build at a time since Cargo locks its target directory; diff --git a/dns-server/Cargo.toml b/dns-server/Cargo.toml index d11dabaf85..b4516b8b77 100644 --- a/dns-server/Cargo.toml +++ b/dns-server/Cargo.toml @@ -15,24 +15,24 @@ clap.workspace = true dns-server-api.workspace = true dns-service-client.workspace = true dropshot.workspace = true +hickory-client.workspace = true +hickory-proto.workspace = true +hickory-resolver.workspace = true +hickory-server.workspace = true http.workspace = true pretty-hex.workspace = true schemars.workspace = true serde.workspace = true serde_json.workspace = true sled.workspace = true -slog.workspace = true -slog-term.workspace = true slog-async.workspace = true slog-envlogger.workspace = true +slog-term.workspace = true +slog.workspace = true tempfile.workspace = true thiserror.workspace = true tokio = { workspace = true, features = [ "full" ] } toml.workspace = true -trust-dns-client.workspace = true -trust-dns-proto.workspace = true -trust-dns-resolver.workspace = true -trust-dns-server.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true @@ -44,4 +44,3 @@ openapiv3.workspace = true openapi-lint.workspace = true serde_json.workspace = true subprocess.workspace = true -trust-dns-resolver.workspace = true diff --git a/dns-server/src/bin/dns-server.rs b/dns-server/src/bin/dns-server.rs index 52a9c17c0d..9e8d098ee2 100644 --- a/dns-server/src/bin/dns-server.rs +++ b/dns-server/src/bin/dns-server.rs @@ -3,7 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. //! Executable that starts the HTTP-configurable DNS server used for both -//! internal DNS (RFD 248) and extenral DNS (RFD 357) for the Oxide system +//! internal DNS (RFD 248) and external DNS (RFD 357) for the Oxide system use anyhow::anyhow; use anyhow::Context; diff --git a/dns-server/src/dns_server.rs b/dns-server/src/dns_server.rs index 5c761f2aa3..4ecbe382c8 100644 --- a/dns-server/src/dns_server.rs +++ b/dns-server/src/dns_server.rs @@ -13,6 +13,19 @@ use crate::storage::Store; use anyhow::anyhow; use anyhow::Context; use dns_server_api::DnsRecord; +use hickory_proto::op::Header; +use hickory_proto::op::ResponseCode; +use hickory_proto::rr::rdata::SRV; +use hickory_proto::rr::RData; +use hickory_proto::rr::Record; +use hickory_proto::rr::RecordType; +use hickory_proto::serialize::binary::BinDecodable; +use hickory_proto::serialize::binary::BinDecoder; +use hickory_proto::serialize::binary::BinEncoder; +use hickory_resolver::Name; +use hickory_server::authority::MessageRequest; +use hickory_server::authority::MessageResponse; +use hickory_server::authority::MessageResponseBuilder; use pretty_hex::*; use serde::Deserialize; use slog::{debug, error, info, o, trace, Logger}; @@ -21,17 +34,6 @@ use std::str::FromStr; use std::sync::Arc; use thiserror::Error; use tokio::net::UdpSocket; -use trust_dns_proto::op::header::Header; -use trust_dns_proto::op::response_code::ResponseCode; -use trust_dns_proto::rr::rdata::SRV; -use trust_dns_proto::rr::record_data::RData; -use trust_dns_proto::rr::record_type::RecordType; -use trust_dns_proto::rr::{Name, Record}; -use trust_dns_proto::serialize::binary::{ - BinDecodable, BinDecoder, BinEncoder, -}; -use trust_dns_server::authority::MessageResponse; -use trust_dns_server::authority::{MessageRequest, MessageResponseBuilder}; use uuid::Uuid; /// Configuration related to the DNS server @@ -167,7 +169,10 @@ async fn handle_dns_packet(request: Request) { Err(error) => { let header = Header::response_from_request(mr.header()); let rb_servfail = MessageResponseBuilder::from_message_request(&mr); - error!(log, "failed to handle incoming DNS message: {:#}", error); + error!( + log, + "failed to handle incoming DNS message: {:#?} {:#}", mr, error + ); match error { RequestError::NxDomain(_) => { let rb_nxdomain = @@ -222,7 +227,7 @@ fn dns_record_to_record( let mut a = Record::new(); a.set_name(name.clone()) .set_rr_type(RecordType::A) - .set_data(Some(RData::A(addr))); + .set_data(Some(RData::A(addr.into()))); Ok(a) } @@ -230,7 +235,7 @@ fn dns_record_to_record( let mut aaaa = Record::new(); aaaa.set_name(name.clone()) .set_rr_type(RecordType::AAAA) - .set_data(Some(RData::AAAA(addr))); + .set_data(Some(RData::AAAA(addr.into()))); Ok(aaaa) } diff --git a/dns-server/src/lib.rs b/dns-server/src/lib.rs index 424159e41d..8abd3b945e 100644 --- a/dns-server/src/lib.rs +++ b/dns-server/src/lib.rs @@ -47,13 +47,13 @@ pub mod http_server; pub mod storage; use anyhow::{anyhow, Context}; +use hickory_resolver::config::NameServerConfig; +use hickory_resolver::config::Protocol; +use hickory_resolver::config::ResolverConfig; +use hickory_resolver::config::ResolverOpts; +use hickory_resolver::TokioAsyncResolver; use slog::o; use std::net::SocketAddr; -use trust_dns_resolver::config::NameServerConfig; -use trust_dns_resolver::config::Protocol; -use trust_dns_resolver::config::ResolverConfig; -use trust_dns_resolver::config::ResolverOpts; -use trust_dns_resolver::TokioAsyncResolver; /// Starts both the HTTP and DNS servers over a given store. pub async fn start_servers( @@ -167,12 +167,14 @@ impl TransientServer { socket_addr: self.dns_server.local_address(), protocol: Protocol::Udp, tls_dns_name: None, - trust_nx_responses: false, + trust_negative_responses: false, bind_addr: None, }); + let mut resolver_opts = ResolverOpts::default(); + // Enable edns for potentially larger records + resolver_opts.edns0 = true; let resolver = - TokioAsyncResolver::tokio(resolver_config, ResolverOpts::default()) - .context("creating DNS resolver")?; + TokioAsyncResolver::tokio(resolver_config, resolver_opts); Ok(resolver) } } diff --git a/dns-server/src/storage.rs b/dns-server/src/storage.rs index 85b2e79b8b..b3141f6751 100644 --- a/dns-server/src/storage.rs +++ b/dns-server/src/storage.rs @@ -95,6 +95,8 @@ use anyhow::{anyhow, Context}; use camino::Utf8PathBuf; use dns_server_api::{DnsConfig, DnsConfigParams, DnsConfigZone, DnsRecord}; +use hickory_proto::rr::LowerName; +use hickory_resolver::Name; use serde::{Deserialize, Serialize}; use sled::transaction::ConflictableTransactionError; use slog::{debug, error, info, o, warn}; @@ -104,8 +106,6 @@ use std::sync::atomic::Ordering; use std::sync::Arc; use thiserror::Error; use tokio::sync::Mutex; -use trust_dns_client::rr::LowerName; -use trust_dns_client::rr::Name; const KEY_CONFIG: &'static str = "config"; @@ -586,7 +586,7 @@ impl Store { /// If the returned set would have been empty, returns `QueryError::NoName`. pub(crate) fn query( &self, - mr: &trust_dns_server::authority::MessageRequest, + mr: &hickory_server::authority::MessageRequest, ) -> Result, QueryError> { let name = mr.query().name(); let orig_name = mr.query().original().name(); @@ -784,14 +784,14 @@ mod test { use dns_server_api::DnsConfigParams; use dns_server_api::DnsConfigZone; use dns_server_api::DnsRecord; + use hickory_proto::rr::LowerName; + use hickory_resolver::Name; use omicron_test_utils::dev::test_setup_log; use std::collections::BTreeSet; use std::collections::HashMap; use std::net::Ipv6Addr; use std::str::FromStr; use std::sync::Arc; - use trust_dns_client::rr::LowerName; - use trust_dns_client::rr::Name; /// As usual, `TestContext` groups the various pieces we need in a bunch of /// our tests and helps make sure they get cleaned up properly. diff --git a/dns-server/tests/basic_test.rs b/dns-server/tests/basic_test.rs index b3b7f37378..fa5bfea468 100644 --- a/dns-server/tests/basic_test.rs +++ b/dns-server/tests/basic_test.rs @@ -9,6 +9,12 @@ use dns_service_client::{ Client, }; use dropshot::{test_util::LogContext, HandlerTaskMode}; +use hickory_resolver::error::ResolveErrorKind; +use hickory_resolver::TokioAsyncResolver; +use hickory_resolver::{ + config::{NameServerConfig, Protocol, ResolverConfig, ResolverOpts}, + proto::op::ResponseCode, +}; use omicron_test_utils::dev::test_setup_log; use slog::o; use std::{ @@ -16,12 +22,6 @@ use std::{ net::Ipv6Addr, net::{IpAddr, Ipv4Addr}, }; -use trust_dns_resolver::error::ResolveErrorKind; -use trust_dns_resolver::TokioAsyncResolver; -use trust_dns_resolver::{ - config::{NameServerConfig, Protocol, ResolverConfig, ResolverOpts}, - proto::op::ResponseCode, -}; const TEST_ZONE: &'static str = "oxide.internal"; @@ -374,17 +374,19 @@ async fn init_client_server( ) .await?; - let mut rc = ResolverConfig::new(); - rc.add_name_server(NameServerConfig { + let mut resolver_config = ResolverConfig::new(); + resolver_config.add_name_server(NameServerConfig { socket_addr: dns_server.local_address(), protocol: Protocol::Udp, tls_dns_name: None, - trust_nx_responses: false, + trust_negative_responses: false, bind_addr: None, }); + let mut resolver_opts = ResolverOpts::default(); + // Enable edns for potentially larger records + resolver_opts.edns0 = true; - let resolver = - TokioAsyncResolver::tokio(rc, ResolverOpts::default()).unwrap(); + let resolver = TokioAsyncResolver::tokio(resolver_config, resolver_opts); let client = Client::new(&format!("http://{}", dropshot_server.local_addr()), log); diff --git a/docs/architecture-rev-2024-08-01.svg b/docs/architecture-rev-2024-08-01.svg new file mode 100644 index 0000000000..a952297de4 --- /dev/null +++ b/docs/architecture-rev-2024-08-01.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/control-plane-architecture.adoc b/docs/control-plane-architecture.adoc new file mode 100644 index 0000000000..931da9ce02 --- /dev/null +++ b/docs/control-plane-architecture.adoc @@ -0,0 +1,254 @@ +:showtitle: +:numbered: +:toc: left + +// +// The sources for the diagrams in this document are in the Oxide Google Drive +// folder for Control Plane Architecture: +// https://drive.google.com/open?id=1OI-QxmapK7oYGFRGp0suJdpQDft-qVAz +// + += Control plane architecture + +NOTE: Much of this material originally came from <> and <>. This is now the living documentation for all the material covered here. + +NOTE: The RFD references in this documentation may be Oxide-internal. Where possible, we're trying to move relevant documentation from those RFDs into docs here. + +== What is the control plane + +In software systems the terms **data plane** and **control plane** are often used to refer to the parts of the system that directly provide resources to users (the data plane) and the parts that support the configuration, control, monitoring, and operation of the system (the control plane). Within the Oxide system, we say that the data plane comprises those parts that provide CPU resources (including both the host CPU and hypervisor software), storage resources, and network resources. The control plane provides the APIs through which users provision, configure, and monitor these resources and the mechanisms through which these APIs are implemented. Also part of the control plane are the APIs and facilities through which operators manage the system itself, including fault management, alerting, software updates for various components of the system, and so on. + +Broadly, the control plane must provide: + +* an externally-facing API endpoint described in <> through which users can provision elastic infrastructure backed by the system. This includes APIs for compute instances, storage, networking, as well as supporting resources like organizations, users, groups, ssh keys, tags, and so on. This API may be used by developers directly as well as the developer console backend. See <>. +* an externally-facing API endpoint for all operator functions. This is a long list, including configuration and management of hardware and software components and monitoring. +* implementation of lifecycle activities, like initial system setup; adding, removing, or replacing servers or other components; and the like. +* facilities for remote support by Oxide, including secure access to crash dumps, core files, log files, and system consoles. + +== Fundamental properties + +NOTE: These are design goals. They have not all been fully implemented yet. + +**Availability.** Availability of the control plane refers to the property that requests to provision resources succeed when the underlying resources are available within the system and requests to reconfigure or monitor resources succeed as long as they are well-formed. Unavailability refers to request failure due to hardware or software failure. + +IMPORTANT: Generally, the control plane is expected to remain **available** in the face of any two hardware or software failures, including transient failures of individual compute sleds, power rectifiers, switches, or the like. + +**Durability.** Along the same lines, resources created in the control plane are expected to be durable unless otherwise specified. That is, if the whole system is powered off and on again ("cold start"), the system should converge to a point where all instances, disks, and networking resources that were running before the power outage are available as they were from the user's perspective before the event. Similarly, if a compute server is lost (either through graceful decommissioning or otherwise), it should be possible to resume service of resources that were running on that server (e.g., instances, disks) on other servers in the system. There may be additional constraints on how many servers can fail permanently before data is lost, but in no case should it be possible to permanently lose an instance, disk, or other resource after the permanent failure of two compute sleds. + +IMPORTANT: Resources created by users should generally survive permanent failure of any two hardware or software components. + +**Consistency.** Generally, users can expect strong consistency for resources within some namespace. The bounds of the namespace for a particular resource may vary as described in <>. For example, if a user creates an instance, another user with appropriate permissions should immediately see that instance. In terms of https://en.wikipedia.org/wiki/CAP_theorem[**CAP**], the system is generally CP, with an emphasis on avoiding partitions through reliable software and hardware. + +IMPORTANT: The API namespace is generally expected to provide strong consistency. + +**Scalability and performance.** The API is designed with a scheme for naming and pagination that supports operating on arbitrarily large collections, so in principle it's expected to support arbitrary numbers of most resources. In practice, the system is intended to support on the order of 100 servers in a rack and 10,000 VMs in a rack. While these numbers are unlikely to change drastically in the future, the long-term goal of providing a single view over multiple racks means the system will need to support much larger numbers of servers and other resources. To avoid catastrophic degradation in performance (to the point of unavailability) as the system is scaled, aggressive limits will be imposed on the numbers of most resources. Operators may choose to raise these limits but will be advised to test the system's performance at the new scale. + +IMPORTANT: The API should support arbitrarily large systems. The system itself should be clear about its target scale and avoid catastrophic degradation due to users consuming too many resources. + +**Security.** Older versions of <> discussed control plane security in great detail. That content needs to be extracted from the history and probably put here. + +**Supportability and debuggability.** Effective customer support includes rapidly diagnosing issues and releasing fixes with low-risk updates. To achieve this, all the software in the system, including the control plane, must be built with supportability in mind, which means being able to collect enough information about failures to diagnose them from their first occurrence in the field as much as possible and being able to update software with low risk to the system. Details will be covered in an RFD to-be-named-later. + +== Parts of the control plane + +=== Crash course on hardware architecture + +For our purposes, an Oxide rack comprises three types of boards (systems): + +* Up to 32 compute **sleds** (servers). These are sometimes called **Gimlets**, though "Gimlet" technically refers to a particular hardware generation. Within the sled, the **host system** is the x86 box we generally think of as "the server". +* 1 or 2 **switches**, each attached via PCIe to one of the 32 compute sleds. (The switches are _also_ connected to each of the 32 sleds for networking. This PCIe connection we're talking about is for control of the switch itself, which is only done by one sled.) The chassis that house the switches are sometimes called **Sidecars**, though "Sidecar" technically refers to a particular hardware generation. Sleds that are attached to switches are often called **Scrimlets** (which is a little unfortunate since the name obviously comes from "Gimlet", but it might not be a Gimlet (since Gimlet refers to a specific hardware generation)). +* 1-2 power shelves, each with a **Power Shelf Controller (PSC)** that provides basic monitoring and control for the rectifiers that make up the power shelf. + +Each type of system (Gimlet, Sidecar, and PSC) contains a **service processor** (SP) that's responsible for basic monitoring and control, typically including power control and thermal management. + +<> discusses service processors in more detail. + +=== Components that run alongside specific hardware + +.Overview of the control plane +image::architecture-rev-2024-08-01.svg[Control Plane Architecture] + +At the "bottom" of the stack, we have a few basic components that reside alongside the specific pieces of hardware that they manage: + +* On each sled, the **sled agent** manages instances, storage, networking, and the sled's other resources. Sled agent also collects information about hardware and reports it to Nexus. Each sled also runs either a **boundary NTP** or **internal NTP** service to synchronize the sled's clock. More on boundary NTP below. +* On the two Scrimlets, a "switch zone" provides additional functionality related to the switch: +** **Dendrite** provides APIs for configuring the switch itself (e.g., populating various tables used for packet forwarding, NAT, etc.). +** **Management Gateway Service (MGS)** provides APIs for communicating with all the rack's service processors (including those on the sleds, Sidecars, and PSCs). See <> for details. +** **Wicket** and its associated service **wicketd** provide a text user interface (TUI) that's accessible over the rack's technician ports. Wicket is used for initial system setup (before networking has been configured) and for support. +** **Boundary NTP** provides NTP service for all sleds in the rack based on upstream NTP servers provided by the customer. + +.Components deployed alongside specific hardware +[cols="1h,2,4",stripes="none",options="header"] +|=== +| Component +| How it's deployed +| Availability/scalability + +| Sled agent +| One per sled, tied to that specific sled +| N/A + +| Internal DNS +| One zone per non-Scrimlet sled +| N/A + +| Boundary NTP +| One zone per Scrimlet. Both instances within a rack are fungible. +| There are two. Short-term failure (order of hours or even days) is unlikely to affect anything since sled clocks do not drift that quickly. + +| Dendrite +| Part of the switch zone (one per Scrimlet), tied to that specific switch +| Unavailability of either instance results in loss of ability to configure and monitor the corresponding switch. + +| Management Gateway +| Part of the switch zone (one per Scrimlet) Both instances within one rack are fungible. +| Only one of the two instances are generally required to maintain service. + +| Wicket +| Part of the switch zone (one per Scrimlet). Both instances within one rack are fungible. +| Wickets operate independently. Failure of one means unavailability of the TUI over that technician port. + +|=== + +=== Higher-level components + +Most other components: + +* are deployed in illumos zones +* don't care where they run and can even be deployed multiple times on the same sled +* can be deployed multiple times for availability, horizontal scalability, or both + +They are: + +* **Nexus** provides primary control for the whole control plane. Nexus hosts all user-facing APIs (both operator and customer), the web console, and internal APIs for other control plane components to report inventory, generate alerts, and so on. Nexus is also responsible for background control plane activity, including utilization management, server failure detection and recovery, and the like. Persistent state is stored elsewhere (in CockroachDB), which allows Nexus to be scaled separately. +* **CockroachDB** provides a replicated, strongly-consistent, horizontally scalable database that stores virtually all control plane data. See <> and <> for details. +* **Clickhouse** provides storage and querying services for metric data collected from all components in the rack. See <> for more information. +* **Oximeter** collects metric data from the other components and store it into Clickhouse. See <> for more information. +* **External DNS** operates authoritative DNS nameservers for end users and operators. These are authoritative nameservers for whatever DNS name the customer specifies. They currently just provide DNS names for the the external API and web console. +* **Internal DNS** provides DNS names for all control plane components. This is how most of the control plane discovers its dependencies. (See <> and <>.) + + +.Hardware-agnostic components +[cols="1h,2,4,4",stripes="none",options="header"] +|=== +| Component +| How it's deployed +| Horizontal scalability +| Availability + +| Nexus +| Using zones, as many as needed. Instances are fungible. +| Not architecturally limited. State provided by CockroachDB. +| With N instances needed to handle load, and M instances deployed, can survive M - N failures. + +| CockroachDB +| Using zones, as many as needed. Instances are fungible. +| Required, provided by CockroachDB cluster expansion. +| Required, provided by CockroachDB range replication. + +| Clickhouse +| Using zones, as many as needed. Instances are fungible. +| TBD +| Required, provided by Clickhouse replication (see <>). + +| Oximeter +| Using zones, as many as needed. +| Yes. Configuration managed by Nexus, stored in CockroachDB, and cached in local storage for improved availability when other components are down +| TBD. + +| External DNS +| Using zones, as many as needed. Instances are fungible. +| Not architecturally limited. Generally limited by the number of external DNS server IP addresses provided by the customer, which is usually 2-5. +| Generally, only one is needed for service. + +| Internal DNS +| Using zones, as many as needed. Instances are fungible. +| Hardcoded limit of 5. +| With N instances needed to handle load, and M instances deployed, can survive M - N failures. + +|=== + +== Design principles + +=== Basics + +As much as possible, components are deployed in illumos zones. These are lightweight containers that act as their own complete systems (e.g., with their own dedicated networking stack with its own interfaces, IPs, etc.). + +Oxide-produced components are written in Rust. They communicate over HTTP using APIs managed via OpenAPI using Dropshot. HTTP may not provide the best latency, but we don't expect the throughput of API requests to be so high or the target latency so low that the overhead of HTTP internally will noticeably impact the customer experience. Using OpenAPI enables us to leverage investments in OpenAPI libraries, tooling, and documentation that we need for the external API. Rigorous use of OpenAPI, including automatically generating OpenAPI specifications from server implementations, allows us to automatically identify potentially breaking API changes. This information will eventually be included in metadata associated with each component's update images so that the upgrade software can use this to ensure that only compatible combinations of components are deployed. + +Service discovery happens via DNS. See <> and <>. + +=== Nexus, data flow + +Nexus is the place where system-wide decisions get made. CockroachDB is the source of truth for all configuration. + +Nexus stores all of its state in CockroachDB. It's the only component that communicates directly with CockroachDB. + +Nexus instances operate independently, without directly coordinating with each other except through CockroachDB. + +Generally, when a change gets made, the process is: + +1. Nexus receives a request to make the change (e.g., via the external API) +2. Nexus validates the requested change +3. Nexus stores the information into CockroachDB. (This is the point where change is serialized against any concurrent changes.) +4. Nexus propagates the change to other components that need to know about it. + +There are a few basic contexts in Nexus: + +* **API requests** from either the external or internal API. Here, Nexus is latency-sensitive. When we make database queries or other requests in this context, we usually do _not_ retry transient failures, but leave that to callers (See https://en.wikipedia.org/wiki/End-to-end_principle["end-to-end principle"]). API request handlers may kick off sagas or activate background tasks. +* **Distributed sagas** are a https://www.youtube.com/watch?v=0UTOLRTwOX0[design pattern] for carrying out multi-step operations in a distributed system. Saga actions generally _do_ retry transient errors indefinitely. +* **Background tasks** are periodic or event-triggered activities that manage everything else that has to happen in the system (e.g., change propagation, CockroachDB cluster management, fault tolerance, etc.). Nexus has a framework for background tasks that's oriented around the "reconciler" pattern (see <>). In this context, we also usually don't retry individual operations -- instead, the entire activity will be retried on a periodic basis. Background tasks are structured to re-evaluate the state of the world each time they're run and then determine what to do, on the assumption that things may have changed since the last time they ran. + +It's essential that components provide visibility into what they're doing for debugging and support. Software should be able to exonerate itself when things are broken. + +* API requests are short-lived. The Nexus log is currently the only real way to see what these have done. +* Sagas are potentially long-lived. Without needing any per-saga work, the saga log provides detailed information about which steps have run, which steps are in-progress, and the results of each step that completed. +* Background tasks are continuous processes. They can provide whatever detailed status they want to, including things like: activity counters, error counters, ringbuffers of recent events, data produced by the task, etc. These can be viewed with `omdb`. + +== Cold start + +"Cold start" refers to starting the control plane from a rack that's completely powered off. Achieving this requires careful consideration of where configuration is stored and how configuration changes flow through the system. + +We'll start from the point where sleds are powered on, even though a lot happens with the rectifiers, service processors, Sidecars, etc. before that point. Once host systems are powered on: + +* Sled agents start up, communicate with each other, and form a trust quorum that enables each of them to decrypt their local storage. This local storage includes: +** a **bootstore** containing basic network configuration needed to bring up the rack +** information about what control plane services are running on this sled +* Sled agents apply any needed network configuration and start any services they're supposed to be running: +** On Scrimlets, the switch zone and boundary NTP are started. Boundary NTP synchronizes time from the customer-provided NTP servers. +** On non-Scrimlets, internal DNS is started. The rest of cold boot waits until time has been synchronized from the boundary NTP instances. +** Once time is synchronized, internal DNS services are started so that components can find each other. +** Once internal DNS is available, all other services are started concurrently. +*** CockroachDB nodes start up, discover the rest of the cluster via DNS, and form a cluster. +*** Nexus starts up and waits for CockroachDB to become available. +*** All other services start up and wait for their dependencies to become available. + +For this to work: + +* **Bootstore** must contain enough information to configure networking on the switches and each host to reach other services within the rack as well as the outside world (for NTP). +* **Internal DNS** must be able to come up without any external dependencies, meaning it stores a complete copy of all DNS data locally. + +However, Nexus is the place where all _changes_ to configuration are made, and CockroachDB is the source of truth for all configuration. As a result, when changing bootstore contents or internal DNS, the change is first made at Nexus, stored into CockroachDB, and then propagated to all sleds and internal DNS instances for local persistent storage so that it's available on cold start (of the _sled_) without the rest of the control plane being up. + +This is a very rough approximation, but gives an idea of the dependencies associated with cold start. + +[bibliography] +== References + +Unfortunately, most of these RFDs are not yet public. + +* [[[rfd4, RFD 4]]] https://rfd.shared.oxide.computer/rfd/4/[RFD 4 User Facing API] +* [[[rfd6, RFD 6]]] https://rfd.shared.oxide.computer/rfd/6/[RFD 6 Threat Model]. Note the reference above comes from an earlier version of RFD 6 (7e44771b239c0458aea2b6e2045294d41b79cb22 or earlier). +* [[[rfd24, RFD 24]]] https://rfd.shared.oxide.computer/rfd/24/[RFD 24 Multi-Rack Oxide Deployments] +* [[[rfd30, RFD 30]]] https://rfd.shared.oxide.computer/rfd/30/[RFD 30 Oxide Console Prototype] +* [[[rfd48, RFD 48]]] https://rfd.shared.oxide.computer/rfd/48/[RFD 48 Control Plane Requirements] +* [[[rfd53, RFD 53]]] https://rfd.shared.oxide.computer/rfd/53/[RFD 53 Control plane data storage requirements] +* [[[rfd61, RFD 61]]] https://rfd.shared.oxide.computer/rfd/61/[RFD 61 Control Plane Architecture and Design] +* [[[rfd110, RFD 110]]] https://rfd.shared.oxide.computer/rfd/110/[RFD 110 CockroachDB for the control plane database] +* [[[rfd125, RFD 125]]] https://rfd.shared.oxide.computer/rfd/125/[RFD 125 Telemetry requirements and building blocks] +* [[[rfd162, RFD 162]]] https://rfd.shared.oxide.computer/rfd/162/[RFD 162 Metrics collection architecture and design] +* [[[rfd206, RFD 206]]] https://rfd.shared.oxide.computer/rfd/206/[RFD 206 Service Discovery] +* [[[rfd210, RFD 210]]] https://rfd.shared.oxide.computer/rfd/210/[RFD 210 Omicron, service processors, and power shelf controllers] +* [[[rfd248, RFD 248]]] https://rfd.shared.oxide.computer/rfd/248/[RFD 248 Omicron service discovery: server side] +* [[[rfd373, RFD 373]]] https://rfd.shared.oxide.computer/rfd/373/[RFD 373 Reliable Persistent Workflows] +* [[[rfd468, RFD 468]]] https://rfd.shared.oxide.computer/rfd/468/[RFD 468 Rolling out replicated ClickHouse to new and existing racks] diff --git a/docs/crdb-upgrades.adoc b/docs/crdb-upgrades.adoc index eecfa9194e..52231ee199 100644 --- a/docs/crdb-upgrades.adoc +++ b/docs/crdb-upgrades.adoc @@ -60,13 +60,15 @@ a tick, but they must occur in that order.) . Add an enum variant for the new version to `CockroachDbClusterVersion` in `nexus/types/src/deployment/planning_input.rs`, and change the associated constant `NEWLY_INITIALIZED` to that value. -. Run the test suite, which should catch any unexpected SQL +. Regenerate the Nexus internal OpenAPI document, which contains an enum + of CockroachDB versions: ++ +.... +EXPECTORATE=overwrite cargo nextest run -p omicron-nexus -- integration_tests::commands::test_nexus_openapi_internal +.... +. Run the full test suite, which should catch any unexpected SQL compatibility issues between releases and help validate that your build works. - * You will need to run the `test_omdb_success_cases` test from - omicron-omdb with `EXPECTORATE=overwrite`; this file contains the - expected output of various omdb commands, including a fingerprint of - CockroachDB's cluster state. . Submit a PR for your changes to garbage-compactor; when merged, publish the final build to the `oxide-cockroachdb-build` S3 bucket. . Update `tools/cockroachdb_checksums`. For non-illumos checksums, use diff --git a/docs/demo-saga.adoc b/docs/demo-saga.adoc new file mode 100644 index 0000000000..316050fc23 --- /dev/null +++ b/docs/demo-saga.adoc @@ -0,0 +1,195 @@ +:showtitle: +:numbered: +:toc: left + += Demo saga + +Nexus ships with a "demo" saga that can be used to interactively experiment with sagas, saga recovery, and saga transfer (after Nexus zone expungement). The demo saga consists of a single action that blocks until it's instructed to proceed. You instruct it to proceed using a request to the Nexus _internal_ API. + +In the example below, we'll: + +. Use `omicron-dev run-all` to run a simulated control plane stack +. Start a second Nexus whose execution we can control precisely +. Use the `omdb nexus sagas demo-create` command to kick off a demo saga +. Use the `omdb nexus sagas demo-complete` command to instruct that saga to finish + +For steps 1-2, we're just following the https://github.com/oxidecomputer/omicron/blob/main/docs/how-to-run-simulated.adoc#using-both-omicron-dev-run-all-and-running-nexus-manually[docs for running a simulated stack and a second Nexus]. First, run `omicron-dev run-all`: + +```terminal +$ cargo xtask omicron-dev run-all +... +omicron-dev: setting up all services ... +log file: /dangerzone/omicron_tmp/omicron-dev-omicron-dev.7162.0.log +note: configured to log to "/dangerzone/omicron_tmp/omicron-dev-omicron-dev.7162.0.log" +DB URL: postgresql://root@[::1]:43428/omicron?sslmode=disable +DB address: [::1]:43428 +log file: /dangerzone/omicron_tmp/omicron-dev-omicron-dev.7162.2.log +note: configured to log to "/dangerzone/omicron_tmp/omicron-dev-omicron-dev.7162.2.log" +log file: /dangerzone/omicron_tmp/omicron-dev-omicron-dev.7162.3.log +note: configured to log to "/dangerzone/omicron_tmp/omicron-dev-omicron-dev.7162.3.log" +omicron-dev: services are running. +omicron-dev: nexus external API: 127.0.0.1:12220 +omicron-dev: nexus internal API: [::1]:12221 +omicron-dev: cockroachdb pid: 7166 +omicron-dev: cockroachdb URL: postgresql://root@[::1]:43428/omicron?sslmode=disable +omicron-dev: cockroachdb directory: /dangerzone/omicron_tmp/.tmpkzPi6h +omicron-dev: internal DNS HTTP: http://[::1]:55952 +omicron-dev: internal DNS: [::1]:36474 +omicron-dev: external DNS name: oxide-dev.test +omicron-dev: external DNS HTTP: http://[::1]:64396 +omicron-dev: external DNS: [::1]:35977 +omicron-dev: e.g. `dig @::1 -p 35977 test-suite-silo.sys.oxide-dev.test` +omicron-dev: management gateway: http://[::1]:33325 (switch0) +omicron-dev: management gateway: http://[::1]:61144 (switch1) +omicron-dev: silo name: test-suite-silo +omicron-dev: privileged user name: test-privileged +``` + +Then follow those docs to configure and start a second Nexus: + +```terminal +$ cargo run --bin=nexus -- config-second.toml +... +Aug 12 20:16:25.405 INFO listening, local_addr: [::1]:12223, component: dropshot_internal, name: a4ef738a-1fb0-47b1-9da2-4919c7ec7c7f, file: /home/dap/.cargo/git/checkouts/dropshot-a4a923d29dccc492/52d900a/dropshot/src/server.rs:205 +... +``` + +The rest of these instructions will use `omdb` pointed at the second Nexus instance, so we'll set OMDB_NEXUS_URL in the environment: + +```terminal +$ export OMDB_NEXUS_URL=http://[::1]:12223 +``` + +Now we can use `omdb nexus sagas list` to list the sagas that have run _in that second Nexus process_ only: + +```terminal +$ cargo run --bin=omdb -- nexus sagas list +... +note: using Nexus URL http://[::1]:12223 +NOTE: This command only reads in-memory state from the targeted Nexus instance. +Sagas may be missing if they were run by a different Nexus instance or if they +finished before this Nexus instance last started up. +SAGA_ID STATE +``` + +Now we can create a demo saga: + +```terminal +$ cargo run --bin=omdb -- --destructive nexus sagas demo-create +... +note: using Nexus URL http://[::1]:12223 +saga id: f7765d6a-6e45-4c13-8904-2677b79a97eb +demo saga id: 88eddf09-dda3-4d70-8d99-1d3b441c57da (use this with `demo-complete`) +``` + +We have to use the `--destructive` option because this command by nature changes state in Nexus and `omdb` won't allow commands that change state by default. + +We can see the new saga in the list of sagas now. It's running: + +```terminal +$ cargo run --bin=omdb -- nexus sagas list +... +note: using Nexus URL http://[::1]:12223 +NOTE: This command only reads in-memory state from the targeted Nexus instance. +Sagas may be missing if they were run by a different Nexus instance or if they +finished before this Nexus instance last started up. +SAGA_ID STATE +f7765d6a-6e45-4c13-8904-2677b79a97eb running +``` + +and it will stay running indefinitely until we run `demo-complete`. Let's do that: + +```terminal +$ cargo run --bin=omdb -- --destructive nexus sagas demo-complete 88eddf09-dda3-4d70-8d99-1d3b441c57da +... +note: using Nexus URL http://[::1]:12223 +``` + +and then list sagas again: + +```terminal +$ cargo run --bin=omdb -- nexus sagas list +... +note: using Nexus URL http://[::1]:12223 +NOTE: This command only reads in-memory state from the targeted Nexus instance. +Sagas may be missing if they were run by a different Nexus instance or if they +finished before this Nexus instance last started up. +SAGA_ID STATE +f7765d6a-6e45-4c13-8904-2677b79a97eb succeeded +``` + +It works across recovery, too. You can go through the same loop again, but this time kill Nexus and start it again: + +```terminal +$ cargo run --bin=omdb -- --destructive nexus sagas demo-create +... +note: using Nexus URL http://[::1]:12223 +saga id: 65253cb6-4428-4aa7-9afc-bf9b42166cb5 +demo saga id: 208ebc89-acc6-42d3-9f40-7f5567c8a39b (use this with `demo-complete`) +``` + +Now restart Nexus (^C the second invocation and run it again). Now if we use `omdb` we don't see the earlier saga because it was finished when this new Nexus process started. But we see the one we created later because it was recovered: + +```terminal +$ cargo run --bin=omdb -- nexus sagas list +... +note: using Nexus URL http://[::1]:12223 +NOTE: This command only reads in-memory state from the targeted Nexus instance. +Sagas may be missing if they were run by a different Nexus instance or if they +finished before this Nexus instance last started up. +SAGA_ID STATE +65253cb6-4428-4aa7-9afc-bf9b42166cb5 running +``` + +Side note: we can see it was recovered: + +```terminal +$ cargo run --bin=omdb -- nexus background-tasks show +... +task: "saga_recovery" + configured period: every 10m + currently executing: no + last completed activation: iter 1, triggered by a periodic timer firing + started at 2024-08-12T20:20:41.714Z (44s ago) and ran for 79ms + since Nexus started: + sagas recovered: 1 + sagas recovery errors: 0 + sagas observed started: 0 + sagas inferred finished: 0 + missing from SEC: 0 + bad state in SEC: 0 + last pass: + found sagas: 1 (in-progress, assigned to this Nexus) + recovered: 1 (successfully) + failed: 0 + skipped: 0 (already running) + removed: 0 (newly finished) + recently recovered sagas (1): + TIME SAGA_ID + 2024-08-12T20:20:41Z 65253cb6-4428-4aa7-9afc-bf9b42166cb5 + no saga recovery failures +... +``` + +Now we can complete that saga: + +```terminal +$ cargo run --bin=omdb -- --destructive nexus sagas demo-complete 208ebc89-acc6-42d3-9f40-7f5567c8a39b +... +note: using Nexus URL http://[::1]:12223 +``` + +and see it finish: + +``` +$ cargo run --bin=omdb -- nexus sagas list +... +note: using Nexus URL http://[::1]:12223 +NOTE: This command only reads in-memory state from the targeted Nexus instance. +Sagas may be missing if they were run by a different Nexus instance or if they +finished before this Nexus instance last started up. +SAGA_ID STATE +65253cb6-4428-4aa7-9afc-bf9b42166cb5 succeeded +``` + +Note too that the completion is not synchronous with the `demo-complete` command, though it usually _is_ pretty quick. It's possible you'll catch it `running` if you run `nexus sagas list` right after running `nexus sagas demo-complete`, but you should quickly see it `succeeded` if you keep running `nexus sagas list`. diff --git a/end-to-end-tests/Cargo.toml b/end-to-end-tests/Cargo.toml index eb7cd68812..b2400f7603 100644 --- a/end-to-end-tests/Cargo.toml +++ b/end-to-end-tests/Cargo.toml @@ -19,14 +19,14 @@ omicron-test-utils.workspace = true oxide-client.workspace = true rand.workspace = true reqwest = { workspace = true, features = ["cookies"] } -russh = "0.43.0" -russh-keys = "0.43.0" +russh = "0.44.1" +russh-keys = "0.44.0" serde.workspace = true serde_json.workspace = true sled-agent-types.workspace = true tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } toml.workspace = true -trust-dns-resolver.workspace = true +hickory-resolver.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true ispf.workspace = true diff --git a/end-to-end-tests/src/helpers/ctx.rs b/end-to-end-tests/src/helpers/ctx.rs index d9a2d7027a..5363557502 100644 --- a/end-to-end-tests/src/helpers/ctx.rs +++ b/end-to-end-tests/src/helpers/ctx.rs @@ -1,6 +1,7 @@ use crate::helpers::generate_name; use anyhow::{anyhow, Context as _, Result}; use chrono::Utc; +use hickory_resolver::error::ResolveErrorKind; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use oxide_client::types::{Name, ProjectCreate}; use oxide_client::CustomDnsResolver; @@ -13,7 +14,6 @@ use std::net::IpAddr; use std::net::SocketAddr; use std::sync::Arc; use std::time::Duration; -use trust_dns_resolver::error::ResolveErrorKind; use uuid::Uuid; const RSS_CONFIG_STR: &str = include_str!(concat!( diff --git a/flake.lock b/flake.lock index 5a70a42881..2c0393f722 100644 --- a/flake.lock +++ b/flake.lock @@ -2,11 +2,11 @@ "nodes": { "nixpkgs": { "locked": { - "lastModified": 1712791164, - "narHash": "sha256-3sbWO1mbpWsLepZGbWaMovSO7ndZeFqDSdX0hZ9nVyw=", + "lastModified": 1723175592, + "narHash": "sha256-M0xJ3FbDUc4fRZ84dPGx5VvgFsOzds77KiBMW/mMTnI=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "1042fd8b148a9105f3c0aca3a6177fd1d9360ba5", + "rev": "5e0ca22929f3342b19569b21b2f3462f053e497b", "type": "github" }, "original": { @@ -29,11 +29,11 @@ ] }, "locked": { - "lastModified": 1719368303, - "narHash": "sha256-vhkKOUs9eOZgcPrA6wMw7a7J48pEjVuhzQfitVwVv1g=", + "lastModified": 1723429325, + "narHash": "sha256-4x/32xTCd+xCwFoI/kKSiCr5LQA2ZlyTRYXKEni5HR8=", "owner": "oxalica", "repo": "rust-overlay", - "rev": "32415b22fd3b454e4a1385af64aa5cef9766ff4c", + "rev": "65e3dc0fe079fe8df087cd38f1fe6836a0373aad", "type": "github" }, "original": { diff --git a/gateway/src/error.rs b/gateway/src/error.rs index 5933daa340..ee148e0c98 100644 --- a/gateway/src/error.rs +++ b/gateway/src/error.rs @@ -26,12 +26,8 @@ pub enum StartupError { #[derive(Debug, Error, SlogInlineError)] pub enum SpCommsError { - #[error("discovery process not yet complete")] - DiscoveryNotYetComplete, - #[error("location discovery failed: {reason}")] - DiscoveryFailed { reason: String }, - #[error("nonexistent SP {0:?}")] - SpDoesNotExist(SpIdentifier), + #[error(transparent)] + Discovery(#[from] SpLookupError), #[error("unknown socket address for SP {0:?}")] SpAddressUnknown(SpIdentifier), #[error( @@ -52,13 +48,22 @@ pub enum SpCommsError { }, } +/// Errors returned by attempts to look up a SP in the management switch's +/// discovery map. +#[derive(Debug, Error, SlogInlineError)] +pub enum SpLookupError { + #[error("discovery process not yet complete")] + DiscoveryNotYetComplete, + #[error("location discovery failed: {reason}")] + DiscoveryFailed { reason: String }, + #[error("nonexistent SP {0:?}")] + SpDoesNotExist(SpIdentifier), +} + impl From for HttpError { fn from(error: SpCommsError) -> Self { match error { - SpCommsError::SpDoesNotExist(_) => HttpError::for_bad_request( - Some("InvalidSp".to_string()), - InlineErrorChain::new(&error).to_string(), - ), + SpCommsError::Discovery(err) => HttpError::from(err), SpCommsError::SpCommunicationFailed { err: CommunicationError::SpError( @@ -124,21 +129,11 @@ impl From for HttpError { "UpdateInProgress", InlineErrorChain::new(&error).to_string(), ), - SpCommsError::DiscoveryNotYetComplete => http_err_with_message( - http::StatusCode::SERVICE_UNAVAILABLE, - "DiscoveryNotYetComplete", - InlineErrorChain::new(&error).to_string(), - ), SpCommsError::SpAddressUnknown(_) => http_err_with_message( http::StatusCode::SERVICE_UNAVAILABLE, "SpAddressUnknown", InlineErrorChain::new(&error).to_string(), ), - SpCommsError::DiscoveryFailed { .. } => http_err_with_message( - http::StatusCode::SERVICE_UNAVAILABLE, - "DiscoveryFailed ", - InlineErrorChain::new(&error).to_string(), - ), SpCommsError::Timeout { .. } => http_err_with_message( http::StatusCode::SERVICE_UNAVAILABLE, "Timeout ", @@ -160,6 +155,27 @@ impl From for HttpError { } } +impl From for HttpError { + fn from(error: SpLookupError) -> Self { + match error { + SpLookupError::SpDoesNotExist(_) => HttpError::for_bad_request( + Some("InvalidSp".to_string()), + InlineErrorChain::new(&error).to_string(), + ), + SpLookupError::DiscoveryNotYetComplete => http_err_with_message( + http::StatusCode::SERVICE_UNAVAILABLE, + "DiscoveryNotYetComplete", + InlineErrorChain::new(&error).to_string(), + ), + SpLookupError::DiscoveryFailed { .. } => http_err_with_message( + http::StatusCode::SERVICE_UNAVAILABLE, + "DiscoveryFailed ", + InlineErrorChain::new(&error).to_string(), + ), + } + } +} + // Helper function to return an `HttpError` with the same internal and external // message. MGS is an "internal" service - even when we return a 500-level // status code, we want to give our caller some information about what is going diff --git a/gateway/src/management_switch.rs b/gateway/src/management_switch.rs index a93c44d62c..23dfbe01a8 100644 --- a/gateway/src/management_switch.rs +++ b/gateway/src/management_switch.rs @@ -20,6 +20,7 @@ pub use self::location_map::SwitchPortConfig; pub use self::location_map::SwitchPortDescription; use self::location_map::ValidatedLocationConfig; use crate::error::SpCommsError; +use crate::error::SpLookupError; use crate::error::StartupError; use gateway_messages::IgnitionState; use gateway_sp_comms::default_discovery_addr; @@ -316,18 +317,18 @@ impl ManagementSwitch { self.location_map.get().is_some() } - fn location_map(&self) -> Result<&LocationMap, SpCommsError> { + fn location_map(&self) -> Result<&LocationMap, SpLookupError> { let discovery_result = self .location_map .get() - .ok_or(SpCommsError::DiscoveryNotYetComplete)?; + .ok_or(SpLookupError::DiscoveryNotYetComplete)?; discovery_result .as_ref() - .map_err(|s| SpCommsError::DiscoveryFailed { reason: s.clone() }) + .map_err(|s| SpLookupError::DiscoveryFailed { reason: s.clone() }) } /// Get the identifier of our local switch. - pub fn local_switch(&self) -> Result { + pub fn local_switch(&self) -> Result { let location_map = self.location_map()?; Ok(location_map.port_to_id(self.local_ignition_controller_port)) } @@ -347,11 +348,11 @@ impl ManagementSwitch { /// This method will fail if discovery is not yet complete (i.e., we don't /// know the logical identifiers of any SP yet!) or if `id` specifies an SP /// that doesn't exist in our discovered location map. - fn get_port(&self, id: SpIdentifier) -> Result { + fn get_port(&self, id: SpIdentifier) -> Result { let location_map = self.location_map()?; let port = location_map .id_to_port(id) - .ok_or(SpCommsError::SpDoesNotExist(id))?; + .ok_or(SpLookupError::SpDoesNotExist(id))?; Ok(port) } @@ -362,7 +363,7 @@ impl ManagementSwitch { /// This method will fail if discovery is not yet complete (i.e., we don't /// know the logical identifiers of any SP yet!) or if `id` specifies an SP /// that doesn't exist in our discovered location map. - pub fn sp(&self, id: SpIdentifier) -> Result<&SingleSp, SpCommsError> { + pub fn sp(&self, id: SpIdentifier) -> Result<&SingleSp, SpLookupError> { let port = self.get_port(id)?; Ok(self.port_to_sp(port)) } @@ -377,7 +378,7 @@ impl ManagementSwitch { pub fn ignition_target( &self, id: SpIdentifier, - ) -> Result { + ) -> Result { let port = self.get_port(id)?; Ok(self.port_to_ignition_target[port.0]) } @@ -389,7 +390,7 @@ impl ManagementSwitch { /// therefore can't map our switch ports to SP identities). pub(crate) fn all_sps( &self, - ) -> Result, SpCommsError> + ) -> Result, SpLookupError> { let location_map = self.location_map()?; Ok(location_map diff --git a/illumos-utils/src/opte/firewall_rules.rs b/illumos-utils/src/opte/firewall_rules.rs index 4dcb390e9e..26ab4d6218 100644 --- a/illumos-utils/src/opte/firewall_rules.rs +++ b/illumos-utils/src/opte/firewall_rules.rs @@ -5,7 +5,6 @@ //! Convert Omicron VPC firewall rules to OPTE firewall rules. use super::net_to_cidr; -use crate::opte::params::VpcFirewallRule; use crate::opte::Vni; use macaddr::MacAddr6; use omicron_common::api::external::VpcFirewallRuleAction; @@ -13,6 +12,7 @@ use omicron_common::api::external::VpcFirewallRuleDirection; use omicron_common::api::external::VpcFirewallRuleProtocol; use omicron_common::api::external::VpcFirewallRuleStatus; use omicron_common::api::internal::nexus::HostIdentifier; +use omicron_common::api::internal::shared::ResolvedVpcFirewallRule; use oxide_vpc::api::Address; use oxide_vpc::api::Direction; use oxide_vpc::api::Filters; @@ -34,7 +34,7 @@ trait FromVpcFirewallRule { fn protos(&self) -> Vec; } -impl FromVpcFirewallRule for VpcFirewallRule { +impl FromVpcFirewallRule for ResolvedVpcFirewallRule { fn action(&self) -> FirewallAction { match self.action { VpcFirewallRuleAction::Allow => FirewallAction::Allow, @@ -118,7 +118,7 @@ impl FromVpcFirewallRule for VpcFirewallRule { /// a single host address and protocol, so we must unroll rules with multiple /// hosts/protocols. pub fn opte_firewall_rules( - rules: &[VpcFirewallRule], + rules: &[ResolvedVpcFirewallRule], vni: &Vni, mac: &MacAddr6, ) -> Vec { diff --git a/illumos-utils/src/opte/mod.rs b/illumos-utils/src/opte/mod.rs index d7fd96b0c0..9a86711ae6 100644 --- a/illumos-utils/src/opte/mod.rs +++ b/illumos-utils/src/opte/mod.rs @@ -13,7 +13,6 @@ cfg_if::cfg_if! { } mod firewall_rules; -pub mod params; mod port; mod port_manager; diff --git a/illumos-utils/src/opte/params.rs b/illumos-utils/src/opte/params.rs deleted file mode 100644 index 17c61d680f..0000000000 --- a/illumos-utils/src/opte/params.rs +++ /dev/null @@ -1,65 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use omicron_common::api::external; -use omicron_common::api::internal::nexus::HostIdentifier; -use omicron_common::api::internal::shared::NetworkInterface; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use std::net::IpAddr; -use std::net::Ipv6Addr; - -/// Update firewall rules for a VPC -#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] -pub struct VpcFirewallRulesEnsureBody { - pub vni: external::Vni, - pub rules: Vec, -} - -/// VPC firewall rule after object name resolution has been performed by Nexus -#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] -pub struct VpcFirewallRule { - pub status: external::VpcFirewallRuleStatus, - pub direction: external::VpcFirewallRuleDirection, - pub targets: Vec, - pub filter_hosts: Option>, - pub filter_ports: Option>, - pub filter_protocols: Option>, - pub action: external::VpcFirewallRuleAction, - pub priority: external::VpcFirewallRulePriority, -} - -/// A mapping from a virtual NIC to a physical host -#[derive( - Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq, Eq, Hash, -)] -pub struct VirtualNetworkInterfaceHost { - pub virtual_ip: IpAddr, - pub virtual_mac: external::MacAddr, - pub physical_host_ip: Ipv6Addr, - pub vni: external::Vni, -} - -/// DHCP configuration for a port -/// -/// Not present here: Hostname (DHCPv4 option 12; used in DHCPv6 option 39); we -/// use `InstanceRuntimeState::hostname` for this value. -#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] -pub struct DhcpConfig { - /// DNS servers to send to the instance - /// - /// (DHCPv4 option 6; DHCPv6 option 23) - pub dns_servers: Vec, - - /// DNS zone this instance's hostname belongs to (e.g. the `project.example` - /// part of `instance1.project.example`) - /// - /// (DHCPv4 option 15; used in DHCPv6 option 39) - pub host_domain: Option, - - /// DNS search domains - /// - /// (DHCPv4 option 119; DHCPv6 option 24) - pub search_domains: Vec, -} diff --git a/illumos-utils/src/opte/port_manager.rs b/illumos-utils/src/opte/port_manager.rs index 93c646cfab..735428907e 100644 --- a/illumos-utils/src/opte/port_manager.rs +++ b/illumos-utils/src/opte/port_manager.rs @@ -6,8 +6,6 @@ use crate::dladm::OPTE_LINK_PREFIX; use crate::opte::opte_firewall_rules; -use crate::opte::params::VirtualNetworkInterfaceHost; -use crate::opte::params::VpcFirewallRule; use crate::opte::port::PortData; use crate::opte::Error; use crate::opte::Gateway; @@ -17,6 +15,7 @@ use ipnetwork::IpNetwork; use omicron_common::api::external; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::api::internal::shared::NetworkInterfaceKind; +use omicron_common::api::internal::shared::ResolvedVpcFirewallRule; use omicron_common::api::internal::shared::ResolvedVpcRoute; use omicron_common::api::internal::shared::ResolvedVpcRouteSet; use omicron_common::api::internal::shared::ResolvedVpcRouteState; @@ -24,6 +23,7 @@ use omicron_common::api::internal::shared::RouterId; use omicron_common::api::internal::shared::RouterTarget as ApiRouterTarget; use omicron_common::api::internal::shared::RouterVersion; use omicron_common::api::internal::shared::SourceNatConfig; +use omicron_common::api::internal::shared::VirtualNetworkInterfaceHost; use oxide_vpc::api::AddRouterEntryReq; use oxide_vpc::api::DelRouterEntryReq; use oxide_vpc::api::DhcpCfg; @@ -96,7 +96,7 @@ pub struct PortCreateParams<'a> { pub source_nat: Option, pub ephemeral_ip: Option, pub floating_ips: &'a [IpAddr], - pub firewall_rules: &'a [VpcFirewallRule], + pub firewall_rules: &'a [ResolvedVpcFirewallRule], pub dhcp_config: DhcpCfg, pub is_service: bool, } @@ -664,7 +664,7 @@ impl PortManager { pub fn firewall_rules_ensure( &self, vni: external::Vni, - rules: &[VpcFirewallRule], + rules: &[ResolvedVpcFirewallRule], ) -> Result<(), Error> { use opte_ioctl::OpteHdl; @@ -705,7 +705,7 @@ impl PortManager { pub fn firewall_rules_ensure( &self, vni: external::Vni, - rules: &[VpcFirewallRule], + rules: &[ResolvedVpcFirewallRule], ) -> Result<(), Error> { info!( self.inner.log, diff --git a/illumos-utils/src/smf_helper.rs b/illumos-utils/src/smf_helper.rs index 2c24ceaa4d..2d29376950 100644 --- a/illumos-utils/src/smf_helper.rs +++ b/illumos-utils/src/smf_helper.rs @@ -77,7 +77,7 @@ impl<'t> SmfHelper<'t> { "addpropvalue", &prop.to_string(), &format!("{}:", valtype.to_string()), - &val.to_string(), + &format!("\"{}\"", val.to_string()), ]) .map_err(|err| Error::ZoneCommand { intent: format!("add {} smf property value", prop.to_string()), diff --git a/installinator-common/Cargo.toml b/installinator-common/Cargo.toml index 4c5560148f..039304c9de 100644 --- a/installinator-common/Cargo.toml +++ b/installinator-common/Cargo.toml @@ -11,6 +11,7 @@ workspace = true anyhow.workspace = true camino.workspace = true illumos-utils.workspace = true +omicron-common.workspace = true libc.workspace = true schemars.workspace = true serde.workspace = true diff --git a/installinator-common/src/progress.rs b/installinator-common/src/progress.rs index 900fe70028..9078da6ba5 100644 --- a/installinator-common/src/progress.rs +++ b/installinator-common/src/progress.rs @@ -4,9 +4,9 @@ use std::{collections::BTreeSet, fmt, net::SocketAddr}; -use anyhow::bail; use camino::Utf8PathBuf; use illumos_utils::zpool; +use omicron_common::disk::M2Slot; use schemars::{ gen::SchemaGenerator, schema::{Schema, SchemaObject}, @@ -165,47 +165,6 @@ impl WriteOutput { } } -/// An M.2 slot that was written. -#[derive( - Debug, - Clone, - Copy, - PartialEq, - Eq, - PartialOrd, - Ord, - Deserialize, - Serialize, - JsonSchema, -)] -pub enum M2Slot { - A, - B, -} - -impl fmt::Display for M2Slot { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::A => f.write_str("A"), - Self::B => f.write_str("B"), - } - } -} - -impl TryFrom for M2Slot { - type Error = anyhow::Error; - - fn try_from(value: i64) -> Result { - match value { - // Gimlet should have 2 M.2 drives: drive A is assigned slot 17, and - // drive B is assigned slot 18. - 17 => Ok(Self::A), - 18 => Ok(Self::B), - _ => bail!("unexpected M.2 slot {value}"), - } - } -} - /// The specification for write events. #[derive(JsonSchema)] pub enum WriteSpec {} diff --git a/installinator/Cargo.toml b/installinator/Cargo.toml index 00dfb6440b..0d59950a2a 100644 --- a/installinator/Cargo.toml +++ b/installinator/Cargo.toml @@ -13,6 +13,7 @@ async-trait.workspace = true buf-list.workspace = true bytes.workspace = true camino.workspace = true +camino-tempfile.workspace = true cancel-safe-futures.workspace = true clap.workspace = true display-error-chain.workspace = true @@ -37,7 +38,6 @@ slog-async.workspace = true slog-envlogger.workspace = true slog-term.workspace = true smf.workspace = true -tempfile.workspace = true thiserror.workspace = true tokio = { workspace = true, features = ["full"] } tufaceous-lib.workspace = true @@ -50,7 +50,6 @@ omicron-test-utils.workspace = true hex-literal.workspace = true partial-io.workspace = true proptest.workspace = true -tempfile.workspace = true test-strategy.workspace = true tokio = { workspace = true, features = ["test-util"] } tokio-stream.workspace = true diff --git a/installinator/src/async_temp_file.rs b/installinator/src/async_temp_file.rs index c884908ac8..168fffa2aa 100644 --- a/installinator/src/async_temp_file.rs +++ b/installinator/src/async_temp_file.rs @@ -3,13 +3,13 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use camino::Utf8PathBuf; +use camino_tempfile::NamedUtf8TempFile; +use camino_tempfile::Utf8PathPersistError; +use camino_tempfile::Utf8TempPath; use std::io; use std::pin::Pin; use std::task::Context; use std::task::Poll; -use tempfile::NamedTempFile; -use tempfile::PathPersistError; -use tempfile::TempPath; use tokio::fs::File; use tokio::io::AsyncWrite; @@ -18,7 +18,7 @@ pub(crate) struct AsyncNamedTempFile { // in our `persist()` method below. This allows us to drop the temp path // (deleting the temporary file) if we're dropped before `persist()` is // called. - temp_path: Option, + temp_path: Option, destination: Utf8PathBuf, inner: File, } @@ -41,7 +41,7 @@ impl AsyncNamedTempFile { .to_owned(); let temp_file = - tokio::task::spawn_blocking(|| NamedTempFile::new_in(parent)) + tokio::task::spawn_blocking(|| NamedUtf8TempFile::new_in(parent)) .await .unwrap()?; let temp_path = temp_file.into_temp_path(); @@ -62,7 +62,7 @@ impl AsyncNamedTempFile { tokio::task::spawn_blocking(move || temp_path.persist(&destination)) .await .unwrap() - .map_err(|PathPersistError { error, .. }| error) + .map_err(|Utf8PathPersistError { error, .. }| error) } } diff --git a/installinator/src/write.rs b/installinator/src/write.rs index c7710baff7..583c5a7b51 100644 --- a/installinator/src/write.rs +++ b/installinator/src/write.rs @@ -16,11 +16,14 @@ use bytes::Buf; use camino::{Utf8Path, Utf8PathBuf}; use illumos_utils::zpool::{Zpool, ZpoolName}; use installinator_common::{ - ControlPlaneZonesSpec, ControlPlaneZonesStepId, M2Slot, RawDiskWriter, - StepContext, StepProgress, StepResult, StepSuccess, UpdateEngine, - WriteComponent, WriteError, WriteOutput, WriteSpec, WriteStepId, + ControlPlaneZonesSpec, ControlPlaneZonesStepId, RawDiskWriter, StepContext, + StepProgress, StepResult, StepSuccess, UpdateEngine, WriteComponent, + WriteError, WriteOutput, WriteSpec, WriteStepId, +}; +use omicron_common::{ + disk::M2Slot, + update::{ArtifactHash, ArtifactHashId}, }; -use omicron_common::update::{ArtifactHash, ArtifactHashId}; use sha2::{Digest, Sha256}; use slog::{info, warn, Logger}; use tokio::{ @@ -915,6 +918,7 @@ mod tests { use anyhow::Result; use bytes::{Buf, Bytes}; use camino::Utf8Path; + use camino_tempfile::tempdir; use futures::StreamExt; use installinator_common::{ Event, InstallinatorCompletionMetadata, InstallinatorComponent, @@ -931,7 +935,6 @@ mod tests { PartialAsyncWrite, PartialOp, }; use proptest::prelude::*; - use tempfile::tempdir; use test_strategy::proptest; use tokio::io::AsyncReadExt; use tokio::sync::Mutex; @@ -1029,7 +1032,7 @@ mod tests { ) -> Result<()> { let logctx = test_setup_log("test_write_artifact"); let tempdir = tempdir()?; - let tempdir_path: &Utf8Path = tempdir.path().try_into()?; + let tempdir_path = tempdir.path(); let destination_host = tempdir_path.join("test-host.bin"); let destination_control_plane = diff --git a/internal-dns-cli/Cargo.toml b/internal-dns-cli/Cargo.toml index dae0af0280..3e34c21622 100644 --- a/internal-dns-cli/Cargo.toml +++ b/internal-dns-cli/Cargo.toml @@ -11,9 +11,9 @@ workspace = true anyhow.workspace = true clap.workspace = true dropshot.workspace = true +hickory-resolver.workspace = true internal-dns.workspace = true omicron-common.workspace = true slog.workspace = true tokio.workspace = true -trust-dns-resolver.workspace = true omicron-workspace-hack.workspace = true diff --git a/internal-dns-cli/src/bin/dnswait.rs b/internal-dns-cli/src/bin/dnswait.rs index 9e003ed14f..f9875e71a0 100644 --- a/internal-dns-cli/src/bin/dnswait.rs +++ b/internal-dns-cli/src/bin/dnswait.rs @@ -36,15 +36,17 @@ struct Opt { #[value(rename_all = "kebab-case")] enum ServiceName { Cockroach, - Clickhouse, ClickhouseKeeper, + ClickhouseServer, } impl From for internal_dns::ServiceName { fn from(value: ServiceName) -> Self { match value { ServiceName::Cockroach => internal_dns::ServiceName::Cockroach, - ServiceName::Clickhouse => internal_dns::ServiceName::Clickhouse, + ServiceName::ClickhouseServer => { + internal_dns::ServiceName::ClickhouseServer + } ServiceName::ClickhouseKeeper => { internal_dns::ServiceName::ClickhouseKeeper } @@ -65,10 +67,8 @@ async fn main() -> Result<()> { let resolver = if opt.nameserver_addresses.is_empty() { info!(&log, "using system configuration"); - let async_resolver = - trust_dns_resolver::AsyncResolver::tokio_from_system_conf() - .context("initializing resolver from system configuration")?; - Resolver::new_with_resolver(log.clone(), async_resolver) + Resolver::new_from_system_conf(log.clone()) + .context("initializing resolver from system configuration")? } else { let addrs = opt.nameserver_addresses; info!(&log, "using explicit nameservers"; "nameservers" => ?addrs); diff --git a/internal-dns/Cargo.toml b/internal-dns/Cargo.toml index c08cc012c1..c12035e2cb 100644 --- a/internal-dns/Cargo.toml +++ b/internal-dns/Cargo.toml @@ -18,7 +18,7 @@ omicron-uuid-kinds.workspace = true reqwest = { workspace = true, features = ["rustls-tls", "stream"] } slog.workspace = true thiserror.workspace = true -trust-dns-resolver.workspace = true +hickory-resolver.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/internal-dns/src/config.rs b/internal-dns/src/config.rs index a9ff664030..e9d7ed873d 100644 --- a/internal-dns/src/config.rs +++ b/internal-dns/src/config.rs @@ -510,6 +510,10 @@ mod test { ServiceName::ClickhouseKeeper.dns_name(), "_clickhouse-keeper._tcp", ); + assert_eq!( + ServiceName::ClickhouseServer.dns_name(), + "_clickhouse-server._tcp", + ); assert_eq!(ServiceName::Cockroach.dns_name(), "_cockroach._tcp",); assert_eq!(ServiceName::InternalDns.dns_name(), "_nameservice._tcp",); assert_eq!(ServiceName::Nexus.dns_name(), "_nexus._tcp",); diff --git a/internal-dns/src/names.rs b/internal-dns/src/names.rs index f975029d69..a9fe1a36bf 100644 --- a/internal-dns/src/names.rs +++ b/internal-dns/src/names.rs @@ -25,6 +25,7 @@ pub const DNS_ZONE_EXTERNAL_TESTING: &str = "oxide-dev.test"; pub enum ServiceName { Clickhouse, ClickhouseKeeper, + ClickhouseServer, Cockroach, InternalDns, ExternalDns, @@ -48,6 +49,7 @@ impl ServiceName { match self { ServiceName::Clickhouse => "clickhouse", ServiceName::ClickhouseKeeper => "clickhouse-keeper", + ServiceName::ClickhouseServer => "clickhouse-server", ServiceName::Cockroach => "cockroach", ServiceName::ExternalDns => "external-dns", ServiceName::InternalDns => "nameservice", @@ -73,6 +75,7 @@ impl ServiceName { match self { ServiceName::Clickhouse | ServiceName::ClickhouseKeeper + | ServiceName::ClickhouseServer | ServiceName::Cockroach | ServiceName::InternalDns | ServiceName::ExternalDns diff --git a/internal-dns/src/resolver.rs b/internal-dns/src/resolver.rs index fdd5dce428..5d3832a417 100644 --- a/internal-dns/src/resolver.rs +++ b/internal-dns/src/resolver.rs @@ -2,24 +2,24 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use hickory_resolver::config::{ + LookupIpStrategy, NameServerConfig, Protocol, ResolverConfig, ResolverOpts, +}; +use hickory_resolver::lookup::SrvLookup; +use hickory_resolver::TokioAsyncResolver; use hyper::client::connect::dns::Name; use omicron_common::address::{ Ipv6Subnet, ReservedRackSubnet, AZ_PREFIX, DNS_PORT, }; use slog::{debug, error, info, trace}; use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; -use trust_dns_resolver::config::{ - LookupIpStrategy, NameServerConfig, Protocol, ResolverConfig, ResolverOpts, -}; -use trust_dns_resolver::lookup::SrvLookup; -use trust_dns_resolver::TokioAsyncResolver; pub type DnsError = dns_service_client::Error; #[derive(Debug, Clone, thiserror::Error)] pub enum ResolveError { #[error(transparent)] - Resolve(#[from] trust_dns_resolver::error::ResolveError), + Resolve(#[from] hickory_resolver::error::ResolveError), #[error("Record not found for SRV key: {}", .0.dns_name())] NotFound(crate::ServiceName), @@ -52,6 +52,19 @@ impl reqwest::dns::Resolve for Resolver { } impl Resolver { + /// Construct a new DNS resolver from the system configuration. + pub fn new_from_system_conf( + log: slog::Logger, + ) -> Result { + let (rc, mut opts) = hickory_resolver::system_conf::read_system_conf()?; + // Enable edns for potentially larger records + opts.edns0 = true; + + let resolver = TokioAsyncResolver::tokio(rc, opts); + + Ok(Self { log, resolver }) + } + /// Construct a new DNS resolver from specific DNS server addresses. pub fn new_from_addrs( log: slog::Logger, @@ -66,18 +79,20 @@ impl Resolver { socket_addr, protocol: Protocol::Udp, tls_dns_name: None, - trust_nx_responses: false, + trust_negative_responses: false, bind_addr: None, }); } let mut opts = ResolverOpts::default(); + // Enable edns for potentially larger records + opts.edns0 = true; opts.use_hosts_file = false; opts.num_concurrent_reqs = dns_server_count; // The underlay is IPv6 only, so this helps avoid needless lookups of // the IPv4 variant. opts.ip_strategy = LookupIpStrategy::Ipv6Only; opts.negative_max_ttl = Some(std::time::Duration::from_secs(15)); - let resolver = TokioAsyncResolver::tokio(rc, opts)?; + let resolver = TokioAsyncResolver::tokio(rc, opts); Ok(Self { log, resolver }) } @@ -145,27 +160,6 @@ impl Resolver { self.resolver.clear_cache(); } - /// Looks up a single [`Ipv6Addr`] based on the SRV name. - /// Returns an error if the record does not exist. - // TODO: There are lots of ways this API can expand: Caching, - // actually respecting TTL, looking up ports, etc. - // - // For now, however, it serves as a very simple "get everyone using DNS" - // API that can be improved upon later. - pub async fn lookup_ipv6( - &self, - srv: crate::ServiceName, - ) -> Result { - let name = srv.srv_name(); - debug!(self.log, "lookup_ipv6 srv"; "dns_name" => &name); - let response = self.resolver.ipv6_lookup(&name).await?; - let address = response - .iter() - .next() - .ok_or_else(|| ResolveError::NotFound(srv))?; - Ok(*address) - } - /// Returns the targets of the SRV records for a DNS name /// /// The returned values are generally other DNS names that themselves would @@ -220,6 +214,12 @@ impl Resolver { // TODO-robustness: any callers of this should probably be using // all the targets for a given SRV and not just the first one // we get, see [`Resolver::lookup_all_socket_v6`]. + // + // TODO: There are lots of ways this API can expand: Caching, + // actually respecting TTL, looking up ports, etc. + // + // For now, however, it serves as a very simple "get everyone using DNS" + // API that can be improved upon later. pub async fn lookup_socket_v6( &self, service: crate::ServiceName, @@ -313,7 +313,7 @@ impl Resolver { // (1) it returns `IpAddr`'s rather than `SocketAddr`'s // (2) it doesn't actually return all the addresses from the Additional // section of the DNS server's response. - // See bluejekyll/trust-dns#1980 + // See hickory-dns/hickory-dns#1980 // // (1) is not a huge deal as we can try to match up the targets ourselves // to grab the port for creating a `SocketAddr` but (2) means we need to do @@ -350,10 +350,9 @@ impl Resolver { .await .into_iter() .flat_map(move |target| match target { - Ok((ips, port)) => Some( - ips.into_iter() - .map(move |ip| SocketAddrV6::new(ip, port, 0, 0)), - ), + Ok((ips, port)) => Some(ips.into_iter().map(move |aaaa| { + SocketAddrV6::new(aaaa.into(), port, 0, 0) + })), Err((target, err)) => { error!( log, @@ -511,7 +510,7 @@ mod test { assert!( matches!( dns_error.kind(), - trust_dns_resolver::error::ResolveErrorKind::NoRecordsFound { .. }, + hickory_resolver::error::ResolveErrorKind::NoRecordsFound { .. }, ), "Saw error: {dns_error}", ); @@ -535,11 +534,11 @@ mod test { dns_server.update(&dns_config).await.unwrap(); let resolver = dns_server.resolver().unwrap(); - let found_ip = resolver - .lookup_ipv6(ServiceName::Cockroach) + let found_addr = resolver + .lookup_socket_v6(ServiceName::Cockroach) .await .expect("Should have been able to look up IP address"); - assert_eq!(found_ip, ip,); + assert_eq!(found_addr.ip(), &ip,); dns_server.cleanup_successful(); logctx.cleanup_successful(); @@ -617,11 +616,13 @@ mod test { // Look up Cockroach let resolver = dns_server.resolver().unwrap(); - let ip = resolver - .lookup_ipv6(ServiceName::Cockroach) + let resolved_addr = resolver + .lookup_socket_v6(ServiceName::Cockroach) .await .expect("Should have been able to look up IP address"); - assert!(cockroach_addrs.iter().any(|addr| addr.ip() == &ip)); + assert!(cockroach_addrs + .iter() + .any(|addr| addr.ip() == resolved_addr.ip())); // Look up all the Cockroach addresses. let mut ips = @@ -635,18 +636,18 @@ mod test { ); // Look up Clickhouse - let ip = resolver - .lookup_ipv6(ServiceName::Clickhouse) + let addr = resolver + .lookup_socket_v6(ServiceName::Clickhouse) .await .expect("Should have been able to look up IP address"); - assert_eq!(&ip, clickhouse_addr.ip()); + assert_eq!(addr.ip(), clickhouse_addr.ip()); // Look up Backend Service - let ip = resolver - .lookup_ipv6(srv_backend) + let addr = resolver + .lookup_socket_v6(srv_backend) .await .expect("Should have been able to look up IP address"); - assert_eq!(&ip, crucible_addr.ip()); + assert_eq!(addr.ip(), crucible_addr.ip()); // If we deploy a new generation that removes all records, then we don't // find anything any more. @@ -657,14 +658,14 @@ mod test { // If we remove the records for all services, we won't find them any // more. (e.g., there's no hidden caching going on) let error = resolver - .lookup_ipv6(ServiceName::Cockroach) + .lookup_socket_v6(ServiceName::Cockroach) .await .expect_err("unexpectedly found records"); assert_matches!( error, ResolveError::Resolve(error) if matches!(error.kind(), - trust_dns_resolver::error::ResolveErrorKind::NoRecordsFound { .. } + hickory_resolver::error::ResolveErrorKind::NoRecordsFound { .. } ) ); @@ -694,11 +695,11 @@ mod test { dns_builder.service_backend_zone(srv_crdb, &zone, 12345).unwrap(); let dns_config = dns_builder.build_full_config_for_initial_generation(); dns_server.update(&dns_config).await.unwrap(); - let found_ip = resolver - .lookup_ipv6(ServiceName::Cockroach) + let found_addr = resolver + .lookup_socket_v6(ServiceName::Cockroach) .await .expect("Should have been able to look up IP address"); - assert_eq!(found_ip, ip1); + assert_eq!(found_addr.ip(), &ip1); // If we insert the same record with a new address, it should be // updated. @@ -712,11 +713,11 @@ mod test { dns_builder.build_full_config_for_initial_generation(); dns_config.generation += 1; dns_server.update(&dns_config).await.unwrap(); - let found_ip = resolver - .lookup_ipv6(ServiceName::Cockroach) + let found_addr = resolver + .lookup_socket_v6(ServiceName::Cockroach) .await .expect("Should have been able to look up IP address"); - assert_eq!(found_ip, ip2); + assert_eq!(found_addr.ip(), &ip2); dns_server.cleanup_successful(); logctx.cleanup_successful(); @@ -847,11 +848,11 @@ mod test { dns_server.update(&dns_config).await.unwrap(); // Confirm that we can access this record manually. - let found_ip = resolver - .lookup_ipv6(ServiceName::Nexus) + let found_addr = resolver + .lookup_socket_v6(ServiceName::Nexus) .await .expect("Should have been able to look up IP address"); - assert_eq!(found_ip, ip); + assert_eq!(found_addr.ip(), &ip); // Confirm that the progenitor client can access this record too. let value = client.test_endpoint().await.unwrap(); diff --git a/nexus-config/src/nexus_config.rs b/nexus-config/src/nexus_config.rs index 9d8bf1ac9b..f6e60bb558 100644 --- a/nexus-config/src/nexus_config.rs +++ b/nexus-config/src/nexus_config.rs @@ -391,6 +391,11 @@ pub struct BackgroundTaskConfig { pub saga_recovery: SagaRecoveryConfig, /// configuration for lookup region port task pub lookup_region_port: LookupRegionPortConfig, + /// configuration for region snapshot replacement starter task + pub region_snapshot_replacement_start: RegionSnapshotReplacementStartConfig, + /// configuration for region snapshot replacement garbage collection + pub region_snapshot_replacement_garbage_collection: + RegionSnapshotReplacementGarbageCollectionConfig, } #[serde_as] @@ -627,6 +632,22 @@ pub struct LookupRegionPortConfig { pub period_secs: Duration, } +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct RegionSnapshotReplacementStartConfig { + /// period (in seconds) for periodic activations of this background task + #[serde_as(as = "DurationSeconds")] + pub period_secs: Duration, +} + +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct RegionSnapshotReplacementGarbageCollectionConfig { + /// period (in seconds) for periodic activations of this background task + #[serde_as(as = "DurationSeconds")] + pub period_secs: Duration, +} + /// Configuration for a nexus server #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] pub struct PackageConfig { @@ -874,6 +895,8 @@ mod test { abandoned_vmm_reaper.period_secs = 60 saga_recovery.period_secs = 60 lookup_region_port.period_secs = 60 + region_snapshot_replacement_start.period_secs = 30 + region_snapshot_replacement_garbage_collection.period_secs = 30 [default_region_allocation_strategy] type = "random" seed = 0 @@ -1036,6 +1059,14 @@ mod test { lookup_region_port: LookupRegionPortConfig { period_secs: Duration::from_secs(60), }, + region_snapshot_replacement_start: + RegionSnapshotReplacementStartConfig { + period_secs: Duration::from_secs(30), + }, + region_snapshot_replacement_garbage_collection: + RegionSnapshotReplacementGarbageCollectionConfig { + period_secs: Duration::from_secs(30), + }, }, default_region_allocation_strategy: crate::nexus_config::RegionAllocationStrategy::Random { @@ -1112,6 +1143,8 @@ mod test { abandoned_vmm_reaper.period_secs = 60 saga_recovery.period_secs = 60 lookup_region_port.period_secs = 60 + region_snapshot_replacement_start.period_secs = 30 + region_snapshot_replacement_garbage_collection.period_secs = 30 [default_region_allocation_strategy] type = "random" "##, diff --git a/nexus-sled-agent-shared/Cargo.toml b/nexus-sled-agent-shared/Cargo.toml index 98218523c8..504cd92c37 100644 --- a/nexus-sled-agent-shared/Cargo.toml +++ b/nexus-sled-agent-shared/Cargo.toml @@ -12,7 +12,11 @@ omicron-common.workspace = true omicron-passwords.workspace = true omicron-uuid-kinds.workspace = true omicron-workspace-hack.workspace = true +# TODO: replace uses of propolis_client with local types schemars.workspace = true serde.workspace = true +serde_json.workspace = true sled-hardware-types.workspace = true +strum.workspace = true +thiserror.workspace = true uuid.workspace = true diff --git a/nexus-sled-agent-shared/README.md b/nexus-sled-agent-shared/README.md index eeb3492eea..77b4d64486 100644 --- a/nexus-sled-agent-shared/README.md +++ b/nexus-sled-agent-shared/README.md @@ -3,7 +3,15 @@ Internal types shared between Nexus and sled-agent, with extra dependencies not in omicron-common. -**This crate should only be used for internal types and data structures.** +## Guidelines + +This crate should only be used for **internal types and data structures.** + +It should only be used for types that are used by **both `sled-agent-types` and `nexus-types`**. Prefer to put types in `sled-agent-types` or `nexus-types` if possible. + +- If a type is used by `sled-agent-api`, as well as any part of Nexus except `nexus-types`, put it in `sled-agent-types`. +- If a type is used by `nexus-internal-api`, as well as any part of sled-agent except `sled-agent-types`, put it in `nexus-types`. +- Only if a type is used by both `sled-agent-types` and `nexus-types` should it go here. ## Why not omicron-common? @@ -28,9 +36,10 @@ tokio-postgres, a dependency that is not a necessary component of sled-agent. ## Why not sled-agent-types or nexus-types? Types that are primarily used by sled-agent or nexus should continue to go in -those crates. However, types shared by both should go here. `sled-agent-types` -and `nexus-types` can thus avoid a dependency on each other: they're both "on -the same level" and neither dependency direction is clearly correct. +those crates. However, types used by both `nexus-types` and `sled-agent-types` +should go here. `sled-agent-types` and `nexus-types` can thus avoid a +dependency on each other: they're both "on the same level" and neither +dependency direction is clearly correct. ## Why not Progenitor-generated types? diff --git a/nexus-sled-agent-shared/src/inventory.rs b/nexus-sled-agent-shared/src/inventory.rs index fcb3d4edd4..faee944105 100644 --- a/nexus-sled-agent-shared/src/inventory.rs +++ b/nexus-sled-agent-shared/src/inventory.rs @@ -21,6 +21,7 @@ use serde::{Deserialize, Serialize}; // Export this type for convenience -- this way, dependents don't have to // depend on sled-hardware-types. pub use sled_hardware_types::Baseboard; +use strum::EnumIter; use uuid::Uuid; /// Identifies information about disks which may be attached to Sleds. @@ -179,15 +180,26 @@ pub enum OmicronZoneType { snat_cfg: SourceNatConfig, }, + /// Type of clickhouse zone used for a single node clickhouse deployment Clickhouse { address: SocketAddrV6, dataset: OmicronZoneDataset, }, + /// A zone used to run a Clickhouse Keeper node + /// + /// Keepers are only used in replicated clickhouse setups ClickhouseKeeper { address: SocketAddrV6, dataset: OmicronZoneDataset, }, + + /// A zone used to run a Clickhouse Server in a replicated deployment + ClickhouseServer { + address: SocketAddrV6, + dataset: OmicronZoneDataset, + }, + CockroachDb { address: SocketAddrV6, dataset: OmicronZoneDataset, @@ -257,6 +269,9 @@ impl OmicronZoneType { OmicronZoneType::ClickhouseKeeper { .. } => { ZoneKind::ClickhouseKeeper } + OmicronZoneType::ClickhouseServer { .. } => { + ZoneKind::ClickhouseServer + } OmicronZoneType::CockroachDb { .. } => ZoneKind::CockroachDb, OmicronZoneType::Crucible { .. } => ZoneKind::Crucible, OmicronZoneType::CruciblePantry { .. } => ZoneKind::CruciblePantry, @@ -297,6 +312,7 @@ impl OmicronZoneType { OmicronZoneType::Clickhouse { .. } | OmicronZoneType::ClickhouseKeeper { .. } + | OmicronZoneType::ClickhouseServer { .. } | OmicronZoneType::CockroachDb { .. } | OmicronZoneType::Crucible { .. } | OmicronZoneType::CruciblePantry { .. } @@ -316,6 +332,7 @@ impl OmicronZoneType { | OmicronZoneType::InternalNtp { .. } | OmicronZoneType::Clickhouse { .. } | OmicronZoneType::ClickhouseKeeper { .. } + | OmicronZoneType::ClickhouseServer { .. } | OmicronZoneType::CockroachDb { .. } | OmicronZoneType::Crucible { .. } | OmicronZoneType::CruciblePantry { .. } @@ -334,6 +351,7 @@ impl OmicronZoneType { | OmicronZoneType::InternalNtp { .. } | OmicronZoneType::Clickhouse { .. } | OmicronZoneType::ClickhouseKeeper { .. } + | OmicronZoneType::ClickhouseServer { .. } | OmicronZoneType::CockroachDb { .. } | OmicronZoneType::CruciblePantry { .. } | OmicronZoneType::ExternalDns { .. } @@ -355,6 +373,7 @@ impl OmicronZoneType { OmicronZoneType::InternalNtp { .. } | OmicronZoneType::Clickhouse { .. } | OmicronZoneType::ClickhouseKeeper { .. } + | OmicronZoneType::ClickhouseServer { .. } | OmicronZoneType::CockroachDb { .. } | OmicronZoneType::Crucible { .. } | OmicronZoneType::CruciblePantry { .. } @@ -373,6 +392,7 @@ impl OmicronZoneType { OmicronZoneType::InternalNtp { .. } | OmicronZoneType::Clickhouse { .. } | OmicronZoneType::ClickhouseKeeper { .. } + | OmicronZoneType::ClickhouseServer { .. } | OmicronZoneType::CockroachDb { .. } | OmicronZoneType::Crucible { .. } | OmicronZoneType::CruciblePantry { .. } @@ -407,11 +427,14 @@ impl OmicronZoneType { /// the four representations if at all possible. If you must add a new one, /// please add it here rather than doing something ad-hoc in the calling code /// so it's more legible. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[derive( + Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, EnumIter, +)] pub enum ZoneKind { BoundaryNtp, Clickhouse, ClickhouseKeeper, + ClickhouseServer, CockroachDb, Crucible, CruciblePantry, @@ -435,6 +458,7 @@ impl ZoneKind { ZoneKind::BoundaryNtp | ZoneKind::InternalNtp => Self::NTP_PREFIX, ZoneKind::Clickhouse => "clickhouse", ZoneKind::ClickhouseKeeper => "clickhouse_keeper", + ZoneKind::ClickhouseServer => "clickhouse_server", // Note "cockroachdb" for historical reasons. ZoneKind::CockroachDb => "cockroachdb", ZoneKind::Crucible => "crucible", @@ -454,6 +478,7 @@ impl ZoneKind { ZoneKind::BoundaryNtp | ZoneKind::InternalNtp => Self::NTP_PREFIX, ZoneKind::Clickhouse => "clickhouse", ZoneKind::ClickhouseKeeper => "clickhouse_keeper", + ZoneKind::ClickhouseServer => "clickhouse_server", // Note "cockroachdb" for historical reasons. ZoneKind::CockroachDb => "cockroachdb", ZoneKind::Crucible => "crucible", @@ -476,6 +501,7 @@ impl ZoneKind { ZoneKind::BoundaryNtp | ZoneKind::InternalNtp => Self::NTP_PREFIX, ZoneKind::Clickhouse => "clickhouse", ZoneKind::ClickhouseKeeper => "clickhouse-keeper", + ZoneKind::ClickhouseServer => "clickhouse-server", // Note "cockroach" for historical reasons. ZoneKind::CockroachDb => "cockroach", ZoneKind::Crucible => "crucible", @@ -496,6 +522,7 @@ impl ZoneKind { ZoneKind::BoundaryNtp => "boundary_ntp", ZoneKind::Clickhouse => "clickhouse", ZoneKind::ClickhouseKeeper => "clickhouse_keeper", + ZoneKind::ClickhouseServer => "clickhouse_server", ZoneKind::CockroachDb => "cockroach_db", ZoneKind::Crucible => "crucible", ZoneKind::CruciblePantry => "crucible_pantry", @@ -507,3 +534,24 @@ impl ZoneKind { } } } + +#[cfg(test)] +mod tests { + use omicron_common::api::external::Name; + use strum::IntoEnumIterator; + + use super::*; + + #[test] + fn test_name_prefixes() { + for zone_kind in ZoneKind::iter() { + let name_prefix = zone_kind.name_prefix(); + name_prefix.parse::().unwrap_or_else(|e| { + panic!( + "failed to parse name prefix {:?} for zone kind {:?}: {}", + name_prefix, zone_kind, e + ); + }); + } + } +} diff --git a/nexus-sled-agent-shared/src/lib.rs b/nexus-sled-agent-shared/src/lib.rs index 6781568d62..12fc040bbb 100644 --- a/nexus-sled-agent-shared/src/lib.rs +++ b/nexus-sled-agent-shared/src/lib.rs @@ -5,6 +5,14 @@ //! Internal types shared between Nexus and sled-agent, with extra dependencies //! not in omicron-common. //! +//! Only types that are shared between `nexus-types` and `sled-agent-types` +//! should go here. +//! +//! - If a type is used by `sled-agent-api` and Nexus, but is not required by +//! `nexus-types`, it should go in `sled-agent-types` instead. +//! - If a type is used by `nexus-internal-api` and Nexus, but is not required +//! by `sled-agent-types`, it should go in `nexus-types` instead. +//! //! For more information, see the crate [README](../README.md). pub mod inventory; diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index a949b31f0d..5b181c7fa0 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -25,6 +25,7 @@ chrono.workspace = true cockroach-admin-client.workspace = true crucible-agent-client.workspace = true crucible-pantry-client.workspace = true +crucible-common.workspace = true dns-service-client.workspace = true dpd-client.workspace = true mg-admin-client.workspace = true @@ -34,6 +35,7 @@ futures.workspace = true gateway-client.workspace = true headers.workspace = true hex.workspace = true +hickory-resolver.workspace = true http.workspace = true hyper.workspace = true illumos-utils.workspace = true @@ -55,6 +57,7 @@ openssl.workspace = true oximeter-client.workspace = true oximeter-db = { workspace = true, default-features = false, features = [ "oxql" ] } oxnet.workspace = true +oxql-types.workspace = true parse-display.workspace = true paste.workspace = true # See omicron-rpaths for more about the "pq-sys" dependency. @@ -86,7 +89,6 @@ tokio = { workspace = true, features = ["full"] } tokio-postgres = { workspace = true, features = ["with-serde_json-1"] } tokio-util = { workspace = true, features = ["codec"] } tough.workspace = true -trust-dns-resolver.workspace = true uuid.workspace = true nexus-auth.workspace = true @@ -137,11 +139,12 @@ pretty_assertions.workspace = true rcgen.workspace = true regex.workspace = true similar-asserts.workspace = true +sled-agent-types.workspace = true sp-sim.workspace = true rustls.workspace = true subprocess.workspace = true term.workspace = true -trust-dns-resolver.workspace = true +hickory-resolver.workspace = true tufaceous.workspace = true tufaceous-lib.workspace = true httptest.workspace = true diff --git a/nexus/auth/src/authn/external/mod.rs b/nexus/auth/src/authn/external/mod.rs index ccb7218285..5c7fc7af05 100644 --- a/nexus/auth/src/authn/external/mod.rs +++ b/nexus/auth/src/authn/external/mod.rs @@ -13,7 +13,6 @@ use slog::trace; use std::borrow::Borrow; use uuid::Uuid; -pub mod cookies; pub mod session_cookie; pub mod spoof; pub mod token; diff --git a/nexus/auth/src/authn/external/session_cookie.rs b/nexus/auth/src/authn/external/session_cookie.rs index 7811bf2826..f6b23308a0 100644 --- a/nexus/auth/src/authn/external/session_cookie.rs +++ b/nexus/auth/src/authn/external/session_cookie.rs @@ -4,7 +4,6 @@ //! authn scheme for console that looks up cookie values in a session table -use super::cookies::parse_cookies; use super::{HttpAuthnScheme, Reason, SchemeResult}; use crate::authn; use crate::authn::{Actor, Details}; @@ -13,6 +12,7 @@ use async_trait::async_trait; use chrono::{DateTime, Duration, Utc}; use dropshot::HttpError; use http::HeaderValue; +use nexus_types::authn::cookies::parse_cookies; use slog::debug; use uuid::Uuid; diff --git a/nexus/db-model/src/dataset_kind.rs b/nexus/db-model/src/dataset_kind.rs index 0ab61c8024..fe782dd21f 100644 --- a/nexus/db-model/src/dataset_kind.rs +++ b/nexus/db-model/src/dataset_kind.rs @@ -21,6 +21,7 @@ impl_enum_type!( Cockroach => b"cockroach" Clickhouse => b"clickhouse" ClickhouseKeeper => b"clickhouse_keeper" + ClickhouseServer => b"clickhouse_server" ExternalDns => b"external_dns" InternalDns => b"internal_dns" ZoneRoot => b"zone_root" @@ -39,6 +40,7 @@ impl DatasetKind { (Self::Cockroach, None) => ApiKind::Cockroach, (Self::Clickhouse, None) => ApiKind::Clickhouse, (Self::ClickhouseKeeper, None) => ApiKind::ClickhouseKeeper, + (Self::ClickhouseServer, None) => ApiKind::ClickhouseServer, (Self::ExternalDns, None) => ApiKind::ExternalDns, (Self::InternalDns, None) => ApiKind::InternalDns, (Self::ZoneRoot, None) => ApiKind::ZoneRoot, @@ -67,6 +69,9 @@ impl From<&internal::shared::DatasetKind> for DatasetKind { internal::shared::DatasetKind::ClickhouseKeeper => { DatasetKind::ClickhouseKeeper } + internal::shared::DatasetKind::ClickhouseServer => { + DatasetKind::ClickhouseServer + } internal::shared::DatasetKind::ExternalDns => { DatasetKind::ExternalDns } diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index 6f17045bd1..9fae3cb9e7 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -1036,6 +1036,7 @@ impl_enum_type!( BoundaryNtp => b"boundary_ntp" Clickhouse => b"clickhouse" ClickhouseKeeper => b"clickhouse_keeper" + ClickhouseServer => b"clickhouse_server" CockroachDb => b"cockroach_db" Crucible => b"crucible" CruciblePantry => b"crucible_pantry" @@ -1052,6 +1053,7 @@ impl From for ServiceKind { ZoneType::BoundaryNtp | ZoneType::InternalNtp => Self::Ntp, ZoneType::Clickhouse => Self::Clickhouse, ZoneType::ClickhouseKeeper => Self::ClickhouseKeeper, + ZoneType::ClickhouseServer => Self::ClickhouseServer, ZoneType::CockroachDb => Self::Cockroach, ZoneType::Crucible => Self::Crucible, ZoneType::CruciblePantry => Self::CruciblePantry, @@ -1071,6 +1073,7 @@ impl From for nexus_sled_agent_shared::inventory::ZoneKind { ZoneType::BoundaryNtp => BoundaryNtp, ZoneType::Clickhouse => Clickhouse, ZoneType::ClickhouseKeeper => ClickhouseKeeper, + ZoneType::ClickhouseServer => ClickhouseServer, ZoneType::CockroachDb => CockroachDb, ZoneType::Crucible => Crucible, ZoneType::CruciblePantry => CruciblePantry, @@ -1091,6 +1094,7 @@ impl From for ZoneType { BoundaryNtp => ZoneType::BoundaryNtp, Clickhouse => ZoneType::Clickhouse, ClickhouseKeeper => ZoneType::ClickhouseKeeper, + ClickhouseServer => ZoneType::ClickhouseServer, CockroachDb => ZoneType::CockroachDb, Crucible => ZoneType::Crucible, CruciblePantry => ZoneType::CruciblePantry, diff --git a/nexus/db-model/src/omicron_zone_config.rs b/nexus/db-model/src/omicron_zone_config.rs index 9236fc9407..23e1ef2dd9 100644 --- a/nexus/db-model/src/omicron_zone_config.rs +++ b/nexus/db-model/src/omicron_zone_config.rs @@ -109,6 +109,9 @@ impl OmicronZone { OmicronZoneType::ClickhouseKeeper { address, dataset } => { (ZoneType::ClickhouseKeeper, address, Some(dataset)) } + OmicronZoneType::ClickhouseServer { address, dataset } => { + (ZoneType::ClickhouseServer, address, Some(dataset)) + } OmicronZoneType::CockroachDb { address, dataset } => { (ZoneType::CockroachDb, address, Some(dataset)) } @@ -258,6 +261,12 @@ impl OmicronZone { dataset: common.dataset?, }, ), + ZoneType::ClickhouseServer => BlueprintZoneType::ClickhouseServer( + blueprint_zone_type::ClickhouseServer { + address, + dataset: common.dataset?, + }, + ), ZoneType::CockroachDb => BlueprintZoneType::CockroachDb( blueprint_zone_type::CockroachDb { address, @@ -392,6 +401,10 @@ impl OmicronZone { address, dataset: common.dataset?, }, + ZoneType::ClickhouseServer => OmicronZoneType::ClickhouseServer { + address, + dataset: common.dataset?, + }, ZoneType::CockroachDb => OmicronZoneType::CockroachDb { address, dataset: common.dataset?, diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index d391ec72ae..0780189503 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -139,35 +139,28 @@ table! { table! { switch_port_settings_link_config (port_settings_id, link_name) { port_settings_id -> Uuid, - lldp_service_config_id -> Uuid, link_name -> Text, mtu -> Int4, fec -> crate::SwitchLinkFecEnum, speed -> crate::SwitchLinkSpeedEnum, autoneg -> Bool, + lldp_link_config_id -> Uuid, } } table! { - lldp_service_config (id) { + lldp_link_config (id) { id -> Uuid, enabled -> Bool, - lldp_config_id -> Nullable, - } -} - -table! { - lldp_config (id) { - id -> Uuid, - name -> Text, - description -> Text, + link_name -> Nullable, + link_description -> Nullable, + chassis_id -> Nullable, + system_name -> Nullable, + system_description -> Nullable, + management_ip -> Nullable, time_created -> Timestamptz, time_modified -> Timestamptz, time_deleted -> Nullable, - chassis_id -> Text, - system_name -> Text, - system_description -> Text, - management_ip -> Inet, } } @@ -195,6 +188,7 @@ table! { dst -> Inet, gw -> Inet, vid -> Nullable, + local_pref -> Nullable, } } @@ -1944,7 +1938,8 @@ allow_tables_to_appear_in_same_query!( allow_tables_to_appear_in_same_query!( switch_port, - switch_port_settings_bgp_peer_config + switch_port_settings_bgp_peer_config, + bgp_config ); allow_tables_to_appear_in_same_query!(disk, virtual_provisioning_resource); diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 55fe268db0..a3001f204e 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(89, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(93, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,9 +29,13 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), - KnownVersion::new(89, "blueprint-dataset"), - KnownVersion::new(88, "inv-dataset"), - KnownVersion::new(87, "dataset-kinds-zone-and-debug"), + KnownVersion::new(93, "blueprint-dataset"), + KnownVersion::new(92, "inv-dataset"), + KnownVersion::new(91, "dataset-kinds-zone-and-debug"), + KnownVersion::new(90, "lookup-bgp-config-by-asn"), + KnownVersion::new(89, "collapse_lldp_settings"), + KnownVersion::new(88, "route-local-pref"), + KnownVersion::new(87, "add-clickhouse-server-enum-variants"), KnownVersion::new(86, "snapshot-replacement"), KnownVersion::new(85, "add-migrations-by-time-created-index"), KnownVersion::new(84, "region-read-only"), diff --git a/nexus/db-model/src/service_kind.rs b/nexus/db-model/src/service_kind.rs index 016de9c44e..04fbab20b2 100644 --- a/nexus/db-model/src/service_kind.rs +++ b/nexus/db-model/src/service_kind.rs @@ -20,6 +20,7 @@ impl_enum_type!( // Enum values Clickhouse => b"clickhouse" ClickhouseKeeper => b"clickhouse_keeper" + ClickhouseServer => b"clickhouse_server" Cockroach => b"cockroach" Crucible => b"crucible" CruciblePantry => b"crucible_pantry" diff --git a/nexus/db-model/src/switch_port.rs b/nexus/db-model/src/switch_port.rs index 48afd7b52a..09f1327be2 100644 --- a/nexus/db-model/src/switch_port.rs +++ b/nexus/db-model/src/switch_port.rs @@ -3,7 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use crate::schema::{ - lldp_config, lldp_service_config, switch_port, switch_port_settings, + lldp_link_config, switch_port, switch_port_settings, switch_port_settings_address_config, switch_port_settings_bgp_peer_config, switch_port_settings_bgp_peer_config_allow_export, switch_port_settings_bgp_peer_config_allow_import, @@ -14,6 +14,7 @@ use crate::schema::{ }; use crate::{impl_enum_type, SqlU32}; use crate::{SqlU16, SqlU8}; +use chrono::{DateTime, Utc}; use db_macros::Resource; use diesel::AsChangeset; use ipnetwork::IpNetwork; @@ -380,7 +381,7 @@ impl Into for SwitchPortConfig { #[diesel(table_name = switch_port_settings_link_config)] pub struct SwitchPortLinkConfig { pub port_settings_id: Uuid, - pub lldp_service_config_id: Uuid, + pub lldp_link_config_id: Uuid, pub link_name: String, pub mtu: SqlU16, pub fec: SwitchLinkFec, @@ -391,7 +392,7 @@ pub struct SwitchPortLinkConfig { impl SwitchPortLinkConfig { pub fn new( port_settings_id: Uuid, - lldp_service_config_id: Uuid, + lldp_link_config_id: Uuid, link_name: String, mtu: u16, fec: SwitchLinkFec, @@ -400,7 +401,7 @@ impl SwitchPortLinkConfig { ) -> Self { Self { port_settings_id, - lldp_service_config_id, + lldp_link_config_id, link_name, fec, speed, @@ -414,7 +415,7 @@ impl Into for SwitchPortLinkConfig { fn into(self) -> external::SwitchPortLinkConfig { external::SwitchPortLinkConfig { port_settings_id: self.port_settings_id, - lldp_service_config_id: self.lldp_service_config_id, + lldp_link_config_id: self.lldp_link_config_id, link_name: self.link_name.clone(), mtu: self.mtu.into(), fec: self.fec.into(), @@ -434,57 +435,61 @@ impl Into for SwitchPortLinkConfig { Deserialize, AsChangeset, )] -#[diesel(table_name = lldp_service_config)] -pub struct LldpServiceConfig { +#[diesel(table_name = lldp_link_config)] +pub struct LldpLinkConfig { pub id: Uuid, pub enabled: bool, - pub lldp_config_id: Option, -} - -impl LldpServiceConfig { - pub fn new(enabled: bool, lldp_config_id: Option) -> Self { - Self { id: Uuid::new_v4(), enabled, lldp_config_id } + pub link_name: Option, + pub link_description: Option, + pub chassis_id: Option, + pub system_name: Option, + pub system_description: Option, + pub management_ip: Option, + pub time_created: DateTime, + pub time_modified: DateTime, + pub time_deleted: Option>, +} + +impl LldpLinkConfig { + pub fn new( + enabled: bool, + link_name: Option, + link_description: Option, + chassis_id: Option, + system_name: Option, + system_description: Option, + management_ip: Option, + ) -> Self { + let now = Utc::now(); + Self { + id: Uuid::new_v4(), + enabled, + link_name, + link_description, + chassis_id, + system_name, + system_description, + management_ip, + time_created: now, + time_modified: now, + time_deleted: None, + } } } -impl Into for LldpServiceConfig { - fn into(self) -> external::LldpServiceConfig { - external::LldpServiceConfig { +// This converts the internal database version of the config into the +// user-facing version. +impl Into for LldpLinkConfig { + fn into(self) -> external::LldpLinkConfig { + external::LldpLinkConfig { id: self.id, - lldp_config_id: self.lldp_config_id, enabled: self.enabled, - } - } -} - -#[derive( - Queryable, - Insertable, - Selectable, - Clone, - Debug, - Resource, - Serialize, - Deserialize, -)] -#[diesel(table_name = lldp_config)] -pub struct LldpConfig { - #[diesel(embed)] - pub identity: LldpConfigIdentity, - pub chassis_id: String, - pub system_name: String, - pub system_description: String, - pub management_ip: IpNetwork, -} - -impl Into for LldpConfig { - fn into(self) -> external::LldpConfig { - external::LldpConfig { - identity: self.identity(), + link_name: self.link_name.clone(), + link_description: self.link_description.clone(), chassis_id: self.chassis_id.clone(), system_name: self.system_name.clone(), system_description: self.system_description.clone(), - management_ip: self.management_ip.into(), + management_ip: self.management_ip.map(|a| a.into()), } } } @@ -554,6 +559,7 @@ pub struct SwitchPortRouteConfig { pub dst: IpNetwork, pub gw: IpNetwork, pub vid: Option, + pub local_pref: Option, } impl SwitchPortRouteConfig { @@ -563,8 +569,9 @@ impl SwitchPortRouteConfig { dst: IpNetwork, gw: IpNetwork, vid: Option, + local_pref: Option, ) -> Self { - Self { port_settings_id, interface_name, dst, gw, vid } + Self { port_settings_id, interface_name, dst, gw, vid, local_pref } } } @@ -576,6 +583,7 @@ impl Into for SwitchPortRouteConfig { dst: self.dst.into(), gw: self.gw.into(), vlan_id: self.vid.map(Into::into), + local_pref: self.local_pref.map(Into::into), } } } @@ -642,9 +650,13 @@ pub struct SwitchPortBgpPeerConfigCommunity { )] #[diesel(table_name = switch_port_settings_bgp_peer_config_allow_export)] pub struct SwitchPortBgpPeerConfigAllowExport { + /// Parent switch port configuration pub port_settings_id: Uuid, + /// Interface peer is reachable on pub interface_name: String, + /// Peer Address pub addr: IpNetwork, + /// Allowed Prefix pub prefix: IpNetwork, } @@ -660,9 +672,13 @@ pub struct SwitchPortBgpPeerConfigAllowExport { )] #[diesel(table_name = switch_port_settings_bgp_peer_config_allow_import)] pub struct SwitchPortBgpPeerConfigAllowImport { + /// Parent switch port configuration pub port_settings_id: Uuid, + /// Interface peer is reachable on pub interface_name: String, + /// Peer Address pub addr: IpNetwork, + /// Allowed Prefix pub prefix: IpNetwork, } diff --git a/nexus/db-queries/src/db/datastore/bgp.rs b/nexus/db-queries/src/db/datastore/bgp.rs index 1244184c1d..fdb9629543 100644 --- a/nexus/db-queries/src/db/datastore/bgp.rs +++ b/nexus/db-queries/src/db/datastore/bgp.rs @@ -28,7 +28,7 @@ use ref_cast::RefCast; use uuid::Uuid; impl DataStore { - pub async fn bgp_config_set( + pub async fn bgp_config_create( &self, opctx: &OpContext, config: ¶ms::BgpConfigCreate, @@ -37,80 +37,187 @@ impl DataStore { use db::schema::{ bgp_announce_set, bgp_announce_set::dsl as announce_set_dsl, }; - use diesel::sql_types; - use diesel::IntoSql; let conn = self.pool_connection_authorized(opctx).await?; - self.transaction_retry_wrapper("bgp_config_set") - .transaction(&conn, |conn| async move { - let announce_set_id: Uuid = match &config.bgp_announce_set_id { - NameOrId::Name(name) => { - announce_set_dsl::bgp_announce_set + let err = OptionalError::new(); + self.transaction_retry_wrapper("bgp_config_create") + .transaction(&conn, |conn| { + + let err = err.clone(); + async move { + let announce_set_id = match config.bgp_announce_set_id.clone() { + // Resolve Name to UUID + NameOrId::Name(name) => announce_set_dsl::bgp_announce_set .filter(bgp_announce_set::time_deleted.is_null()) .filter(bgp_announce_set::name.eq(name.to_string())) .select(bgp_announce_set::id) .limit(1) .first_async::(&conn) - .await? + .await + .map_err(|e| { + let msg = "failed to lookup announce set by name"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => { + err.bail(Error::not_found_by_name( + ResourceType::BgpAnnounceSet, + &name, + )) + } + _ => err.bail(Error::internal_error(msg)), + + } + }), + + // We cannot assume that the provided UUID is actually real. + // Lookup the parent record by UUID to verify that it is valid. + NameOrId::Id(id) => announce_set_dsl::bgp_announce_set + .filter(bgp_announce_set::time_deleted.is_null()) + .filter(bgp_announce_set::id.eq(id)) + .select(bgp_announce_set::id) + .limit(1) + .first_async::(&conn) + .await + .map_err(|e| { + let msg = "failed to lookup announce set by id"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => { + err.bail(Error::not_found_by_id( + ResourceType::BgpAnnounceSet, + &id, + )) + } + _ => err.bail(Error::internal_error(msg)), + + } + }), + }?; + + let config = + BgpConfig::from_config_create(config, announce_set_id); + + // Idempotency: + // Check to see if an exact match for the config already exists + let query = dsl::bgp_config + .filter(dsl::name.eq(config.name().to_string())) + .filter(dsl::asn.eq(config.asn)) + .filter(dsl::bgp_announce_set_id.eq(config.bgp_announce_set_id)) + .into_boxed(); + + let query = match config.vrf.clone() { + Some(v) => query.filter(dsl::vrf.eq(v)), + None => query.filter(dsl::vrf.is_null()), + }; + + let query = match config.shaper.clone() { + Some(v) => query.filter(dsl::shaper.eq(v)), + None => query.filter(dsl::shaper.is_null()), + }; + + let query = match config.checker.clone() { + Some(v) => query.filter(dsl::checker.eq(v)), + None => query.filter(dsl::checker.is_null()), + }; + + let matching_config = match query + .filter(dsl::time_deleted.is_null()) + .select(BgpConfig::as_select()) + .first_async::(&conn) + .await { + Ok(v) => Ok(Some(v)), + Err(e) => { + match e { + diesel::result::Error::NotFound => { + info!(opctx.log, "no matching bgp config found"); + Ok(None) + } + _ => { + let msg = "error while checking if bgp config exists"; + error!(opctx.log, "{msg}"; "error" => ?e); + Err(err.bail(Error::internal_error(msg))) + } + } + } + }?; + + // If so, we're done! + if let Some(existing_config) = matching_config { + return Ok(existing_config); } - NameOrId::Id(id) => *id, - }; - let config = - BgpConfig::from_config_create(config, announce_set_id); - - let matching_entry_subquery = dsl::bgp_config - .filter(dsl::name.eq(Name::from(config.name().clone()))) - .filter(dsl::time_deleted.is_null()) - .select(dsl::name); - - // SELECT exactly the values we're trying to INSERT, but only - // if it does not already exist. - let new_entry_subquery = diesel::dsl::select(( - config.id().into_sql::(), - config.name().to_string().into_sql::(), - config - .description() - .to_string() - .into_sql::(), - config.asn.into_sql::(), - config.bgp_announce_set_id.into_sql::(), - config - .vrf - .clone() - .into_sql::>(), - Utc::now().into_sql::(), - Utc::now().into_sql::(), - )) - .filter(diesel::dsl::not(diesel::dsl::exists( - matching_entry_subquery, - ))); - - diesel::insert_into(dsl::bgp_config) - .values(new_entry_subquery) - .into_columns(( - dsl::id, - dsl::name, - dsl::description, - dsl::asn, - dsl::bgp_announce_set_id, - dsl::vrf, - dsl::time_created, - dsl::time_modified, - )) - .execute_async(&conn) - .await?; + // TODO: remove once per-switch-multi-asn support is added + // Bail if a conflicting config for this ASN already exists. + // This is a temporary measure until multi-asn-per-switch is supported. + let configs_with_asn: Vec = dsl::bgp_config + .filter(dsl::asn.eq(config.asn)) + .filter(dsl::time_deleted.is_null()) + .select(BgpConfig::as_select()) + .load_async(&conn) + .await?; + + if !configs_with_asn.is_empty() { + error!( + opctx.log, + "different config for asn already exists"; + "asn" => ?config.asn, + "requested_config" => ?config, + "conflicting_configs" => ?configs_with_asn + ); + return Err(err.bail(Error::conflict("cannot have more than one configuration per ASN"))); + } - dsl::bgp_config - .filter(dsl::name.eq(Name::from(config.name().clone()))) - .filter(dsl::time_deleted.is_null()) - .select(BgpConfig::as_select()) - .limit(1) - .first_async(&conn) - .await + diesel::insert_into(dsl::bgp_config) + .values(config.clone()) + .returning(BgpConfig::as_returning()) + .get_result_async(&conn) + .await + .map_err(|e | { + let msg = "failed to insert bgp config"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::DatabaseError(kind, _) => { + match kind { + diesel::result::DatabaseErrorKind::UniqueViolation => { + err.bail(Error::conflict("a field that must be unique conflicts with an existing record")) + }, + // technically we don't use Foreign Keys but it doesn't hurt to match on them + // instead of returning a 500 by default in the event that we do switch to Foreign Keys + diesel::result::DatabaseErrorKind::ForeignKeyViolation => { + err.bail(Error::conflict("an id field references an object that does not exist")) + } + diesel::result::DatabaseErrorKind::NotNullViolation => { + err.bail(Error::invalid_request("a required field was not provided")) + } + diesel::result::DatabaseErrorKind::CheckViolation => { + err.bail(Error::invalid_request("one or more fields are not valid values")) + }, + _ => err.bail(Error::internal_error(msg)), + } + } + _ => err.bail(Error::internal_error(msg)), + } + }) + } }) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .map_err(|e|{ + let msg = "bgp_config_create failed"; + if let Some(err) = err.take() { + error!(opctx.log, "{msg}"; "error" => ?err); + err + } else { + // The transaction handler errors along with any errors emitted via "?" + // will fall through to here. These errors should truly be 500s + // because they are an internal hiccup that likely was not triggered by + // user input. + error!(opctx.log, "{msg}"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + } + }) } pub async fn bgp_config_delete( @@ -124,11 +231,6 @@ impl DataStore { use db::schema::switch_port_settings_bgp_peer_config as sps_bgp_peer_config; use db::schema::switch_port_settings_bgp_peer_config::dsl as sps_bgp_peer_config_dsl; - #[derive(Debug)] - enum BgpConfigDeleteError { - ConfigInUse, - } - let err = OptionalError::new(); let conn = self.pool_connection_authorized(opctx).await?; self.transaction_retry_wrapper("bgp_config_delete") @@ -138,26 +240,60 @@ impl DataStore { let name_or_id = sel.name_or_id.clone(); let id: Uuid = match name_or_id { - NameOrId::Id(id) => id, - NameOrId::Name(name) => { + NameOrId::Id(id) => bgp_config_dsl::bgp_config + .filter(bgp_config::id.eq(id)) + .select(bgp_config::id) + .limit(1) + .first_async::(&conn) + .await + .map_err(|e| { + let msg = "failed to lookup bgp config by id"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => { + err.bail(Error::not_found_by_id( + ResourceType::BgpConfig, + &id, + )) + } + _ => err.bail(Error::internal_error(msg)), + + } + }), + NameOrId::Name(name) => bgp_config_dsl::bgp_config - .filter(bgp_config::name.eq(name.to_string())) - .select(bgp_config::id) - .limit(1) - .first_async::(&conn) - .await? - } - }; + .filter(bgp_config::name.eq(name.to_string())) + .select(bgp_config::id) + .limit(1) + .first_async::(&conn) + .await + .map_err(|e| { + let msg = "failed to lookup bgp config by name"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => { + err.bail(Error::not_found_by_name( + ResourceType::BgpConfig, + &name, + )) + } + _ => err.bail(Error::internal_error(msg)), + + } + }), + }?; let count = sps_bgp_peer_config_dsl::switch_port_settings_bgp_peer_config - .filter(sps_bgp_peer_config::bgp_config_id.eq(id)) - .count() - .execute_async(&conn) - .await?; + .filter(sps_bgp_peer_config::bgp_config_id.eq(id)) + .count() + .execute_async(&conn) + .await?; if count > 0 { - return Err(err.bail(BgpConfigDeleteError::ConfigInUse)); + return Err(err.bail(Error::conflict("BGP Config is in use and cannot be deleted"))); } diesel::update(bgp_config_dsl::bgp_config) @@ -171,13 +307,12 @@ impl DataStore { }) .await .map_err(|e| { + let msg = "bgp_config_delete failed"; if let Some(err) = err.take() { - match err { - BgpConfigDeleteError::ConfigInUse => { - Error::invalid_request("BGP config in use") - } - } + error!(opctx.log, "{msg}"; "error" => ?err); + err } else { + error!(opctx.log, "{msg}"; "error" => ?e); public_error_from_diesel(e, ErrorHandler::Server) } }) @@ -194,24 +329,45 @@ impl DataStore { let name_or_id = name_or_id.clone(); - let config = match name_or_id { + match name_or_id { NameOrId::Name(name) => dsl::bgp_config .filter(bgp_config::name.eq(name.to_string())) .select(BgpConfig::as_select()) .limit(1) .first_async::(&*conn) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)), + .map_err(|e| { + let msg = "failed to lookup bgp config by name"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => { + Error::not_found_by_name( + ResourceType::BgpConfig, + &name, + ) + } + _ => Error::internal_error(msg), + } + }), NameOrId::Id(id) => dsl::bgp_config .filter(bgp_config::id.eq(id)) .select(BgpConfig::as_select()) .limit(1) .first_async::(&*conn) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)), - }?; + .map_err(|e| { + let msg = "failed to lookup bgp config by id"; + error!(opctx.log, "{msg}"; "error" => ?e); - Ok(config) + match e { + diesel::result::Error::NotFound => { + Error::not_found_by_id(ResourceType::BgpConfig, &id) + } + _ => Error::internal_error(msg), + } + }), + } } pub async fn bgp_config_list( @@ -237,10 +393,42 @@ impl DataStore { .select(BgpConfig::as_select()) .load_async(&*conn) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .map_err(|e| { + error!(opctx.log, "bgp_config_list failed"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + }) + } + + pub async fn bgp_announce_set_list( + &self, + opctx: &OpContext, + pagparams: &PaginatedBy<'_>, + ) -> ListResultVec { + use db::schema::bgp_announce_set::dsl; + + let conn = self.pool_connection_authorized(opctx).await?; + + match pagparams { + PaginatedBy::Id(pagparams) => { + paginated(dsl::bgp_announce_set, dsl::id, &pagparams) + } + PaginatedBy::Name(pagparams) => paginated( + dsl::bgp_announce_set, + dsl::name, + &pagparams.map_name(|n| Name::ref_cast(n)), + ), + } + .filter(dsl::time_deleted.is_null()) + .select(BgpAnnounceSet::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + error!(opctx.log, "bgp_announce_set_list failed"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + }) } - pub async fn bgp_announce_list( + pub async fn bgp_announcement_list( &self, opctx: &OpContext, sel: ¶ms::BgpAnnounceSetSelector, @@ -250,11 +438,6 @@ impl DataStore { bgp_announcement::dsl as announce_dsl, }; - #[derive(Debug)] - enum BgpAnnounceListError { - AnnounceSetNotFound(Name), - } - let err = OptionalError::new(); let conn = self.pool_connection_authorized(opctx).await?; self.transaction_retry_wrapper("bgp_announce_list") @@ -264,7 +447,26 @@ impl DataStore { let name_or_id = sel.name_or_id.clone(); let announce_id: Uuid = match name_or_id { - NameOrId::Id(id) => id, + NameOrId::Id(id) => announce_set_dsl::bgp_announce_set + .filter(bgp_announce_set::time_deleted.is_null()) + .filter(bgp_announce_set::id.eq(id)) + .select(bgp_announce_set::id) + .limit(1) + .first_async::(&conn) + .await + .map_err(|e| { + let msg = "failed to lookup announce set by id"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => err + .bail(Error::not_found_by_id( + ResourceType::BgpAnnounceSet, + &id, + )), + _ => err.bail(Error::internal_error(msg)), + } + }), NameOrId::Name(name) => { announce_set_dsl::bgp_announce_set .filter( @@ -278,15 +480,23 @@ impl DataStore { .first_async::(&conn) .await .map_err(|e| { - err.bail_retryable_or( - e, - BgpAnnounceListError::AnnounceSetNotFound( - Name::from(name.clone()), - ) - ) - })? + let msg = + "failed to lookup announce set by name"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => err + .bail(Error::not_found_by_name( + ResourceType::BgpAnnounceSet, + &name, + )), + _ => { + err.bail(Error::internal_error(msg)) + } + } + }) } - }; + }?; let result = announce_dsl::bgp_announcement .filter(announce_dsl::announce_set_id.eq(announce_id)) @@ -299,21 +509,18 @@ impl DataStore { }) .await .map_err(|e| { + error!(opctx.log, "bgp_announce_list failed"; "error" => ?e); if let Some(err) = err.take() { - match err { - BgpAnnounceListError::AnnounceSetNotFound(name) => { - Error::not_found_by_name( - ResourceType::BgpAnnounceSet, - &name, - ) - } - } + err } else { public_error_from_diesel(e, ErrorHandler::Server) } }) } + // TODO: it seems this logic actually performs a find OR create for an announce set, and then replaces its child announcements. + // This might be changed in omicron#6016 to an api that creates an announce set then allows adding / removal of announcements + // to match how our other APIs work. pub async fn bgp_update_announce_set( &self, opctx: &OpContext, @@ -383,9 +590,16 @@ impl DataStore { Ok((db_as, db_annoucements)) }) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .map_err(|e| { + let msg = "bgp_update_announce_set failed"; + error!(opctx.log, "{msg}"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + }) } + // TODO: it seems this logic actually performs a create OR update of an announce set and its child announcements + // (for example, it will add missing announcements). This might be changed in omicron#6016 to an api that creates an announce set + // then allows adding / removal of announcements to match how our other APIs work. pub async fn bgp_create_announce_set( &self, opctx: &OpContext, @@ -466,7 +680,11 @@ impl DataStore { Ok((db_as, db_annoucements)) }) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .map_err(|e| { + let msg = "bgp_create_announce_set failed"; + error!(opctx.log, "{msg}"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + }) } pub async fn bgp_delete_announce_set( @@ -481,11 +699,6 @@ impl DataStore { use db::schema::bgp_config; use db::schema::bgp_config::dsl as bgp_config_dsl; - #[derive(Debug)] - enum BgpAnnounceSetDeleteError { - AnnounceSetInUse, - } - let conn = self.pool_connection_authorized(opctx).await?; let name_or_id = sel.name_or_id.clone(); @@ -496,18 +709,56 @@ impl DataStore { let name_or_id = name_or_id.clone(); async move { let id: Uuid = match name_or_id { + NameOrId::Id(id) => announce_set_dsl::bgp_announce_set + .filter(bgp_announce_set::time_deleted.is_null()) + .filter(bgp_announce_set::id.eq(id)) + .select(bgp_announce_set::id) + .limit(1) + .first_async::(&conn) + .await + .map_err(|e| { + let msg = "failed to lookup announce set by id"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => err + .bail(Error::not_found_by_id( + ResourceType::BgpAnnounceSet, + &id, + )), + _ => err.bail(Error::internal_error(msg)), + } + }), NameOrId::Name(name) => { announce_set_dsl::bgp_announce_set + .filter( + bgp_announce_set::time_deleted.is_null(), + ) .filter( bgp_announce_set::name.eq(name.to_string()), ) .select(bgp_announce_set::id) .limit(1) .first_async::(&conn) - .await? + .await + .map_err(|e| { + let msg = + "failed to lookup announce set by name"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => err + .bail(Error::not_found_by_name( + ResourceType::BgpAnnounceSet, + &name, + )), + _ => { + err.bail(Error::internal_error(msg)) + } + } + }) } - NameOrId::Id(id) => id, - }; + }?; let count = bgp_config_dsl::bgp_config .filter(bgp_config::bgp_announce_set_id.eq(id)) @@ -516,9 +767,9 @@ impl DataStore { .await?; if count > 0 { - return Err(err.bail( - BgpAnnounceSetDeleteError::AnnounceSetInUse, - )); + return Err( + err.bail(Error::conflict("announce set in use")) + ); } diesel::update(announce_set_dsl::bgp_announce_set) @@ -537,13 +788,12 @@ impl DataStore { }) .await .map_err(|e| { + let msg = "bgp_delete_announce_set failed"; if let Some(err) = err.take() { - match err { - BgpAnnounceSetDeleteError::AnnounceSetInUse => { - Error::invalid_request("BGP announce set in use") - } - } + error!(opctx.log, "{msg}"; "error" => ?err); + err } else { + error!(opctx.log, "{msg}"; "error" => ?e); public_error_from_diesel(e, ErrorHandler::Server) } }) @@ -563,7 +813,11 @@ impl DataStore { .select(BgpPeerView::as_select()) .load_async(&*self.pool_connection_authorized(opctx).await?) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + .map_err(|e| { + let msg = "bgp_peer_configs failed"; + error!(opctx.log, "{msg}"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + })?; Ok(results) } @@ -583,7 +837,11 @@ impl DataStore { .filter(dsl::addr.eq(addr)) .load_async(&*self.pool_connection_authorized(opctx).await?) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + .map_err(|e| { + let msg = "communities_for_peer failed"; + error!(opctx.log, "{msg}"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + })?; Ok(results) } @@ -601,23 +859,40 @@ impl DataStore { use db::schema::switch_port_settings_bgp_peer_config_allow_export::dsl; let conn = self.pool_connection_authorized(opctx).await?; - let result = self - .transaction_retry_wrapper("bgp_allow_export_for_peer") - .transaction(&conn, |conn| async move { - let active = peer_dsl::switch_port_settings_bgp_peer_config - .filter(db_peer::port_settings_id.eq(port_settings_id)) - .select(db_peer::allow_export_list_active) - .limit(1) - .first_async::(&conn) - .await?; - - if !active { - return Ok(None); - } + let err = OptionalError::new(); + self.transaction_retry_wrapper("bgp_allow_export_for_peer") + .transaction(&conn, |conn| { + let err = err.clone(); + async move { + let active = peer_dsl::switch_port_settings_bgp_peer_config + .filter(db_peer::port_settings_id.eq(port_settings_id)) + .filter(db_peer::addr.eq(addr)) + .select(db_peer::allow_export_list_active) + .limit(1) + .first_async::(&conn) + .await + .map_err(|e| { + let msg = "failed to lookup export settings for peer"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => { + let not_found_msg = format!("peer with {addr} not found for port settings {port_settings_id}"); + err.bail(Error::non_resourcetype_not_found(not_found_msg)) + }, + _ => err.bail(Error::internal_error(msg)), + } + })?; + + if !active { + return Ok(None); + } - let list = - dsl::switch_port_settings_bgp_peer_config_allow_export - .filter(db_allow::port_settings_id.eq(port_settings_id)) + let list = + dsl::switch_port_settings_bgp_peer_config_allow_export + .filter( + db_allow::port_settings_id.eq(port_settings_id), + ) .filter( db_allow::interface_name .eq(interface_name.to_owned()), @@ -626,12 +901,20 @@ impl DataStore { .load_async(&conn) .await?; - Ok(Some(list)) + Ok(Some(list)) + } }) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; - - Ok(result) + .map_err(|e| { + let msg = "allow_export_for_peer failed"; + if let Some(err) = err.take() { + error!(opctx.log, "{msg}"; "error" => ?err); + err + } else { + error!(opctx.log, "{msg}"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + } + }) } pub async fn allow_import_for_peer( @@ -646,24 +929,42 @@ impl DataStore { use db::schema::switch_port_settings_bgp_peer_config_allow_import as db_allow; use db::schema::switch_port_settings_bgp_peer_config_allow_import::dsl; + let err = OptionalError::new(); let conn = self.pool_connection_authorized(opctx).await?; - let result = self - .transaction_retry_wrapper("bgp_allow_export_for_peer") - .transaction(&conn, |conn| async move { - let active = peer_dsl::switch_port_settings_bgp_peer_config - .filter(db_peer::port_settings_id.eq(port_settings_id)) - .select(db_peer::allow_import_list_active) - .limit(1) - .first_async::(&conn) - .await?; - - if !active { - return Ok(None); - } + self + .transaction_retry_wrapper("bgp_allow_import_for_peer") + .transaction(&conn, |conn| { + let err = err.clone(); + async move { + let active = peer_dsl::switch_port_settings_bgp_peer_config + .filter(db_peer::port_settings_id.eq(port_settings_id)) + .filter(db_peer::addr.eq(addr)) + .select(db_peer::allow_import_list_active) + .limit(1) + .first_async::(&conn) + .await + .map_err(|e| { + let msg = "failed to lookup import settings for peer"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => { + let not_found_msg = format!("peer with {addr} not found for port settings {port_settings_id}"); + err.bail(Error::non_resourcetype_not_found(not_found_msg)) + }, + _ => err.bail(Error::internal_error(msg)), + } + })?; + + if !active { + return Ok(None); + } - let list = - dsl::switch_port_settings_bgp_peer_config_allow_import - .filter(db_allow::port_settings_id.eq(port_settings_id)) + let list = + dsl::switch_port_settings_bgp_peer_config_allow_import + .filter( + db_allow::port_settings_id.eq(port_settings_id), + ) .filter( db_allow::interface_name .eq(interface_name.to_owned()), @@ -672,11 +973,19 @@ impl DataStore { .load_async(&conn) .await?; - Ok(Some(list)) + Ok(Some(list)) + } }) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; - - Ok(result) + .map_err(|e| { + let msg = "allow_import_for_peer failed"; + if let Some(err) = err.take() { + error!(opctx.log, "{msg}"; "error" => ?err); + err + } else { + error!(opctx.log, "{msg}"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + } + }) } } diff --git a/nexus/db-queries/src/db/datastore/cockroachdb_settings.rs b/nexus/db-queries/src/db/datastore/cockroachdb_settings.rs index e7a975fa69..a38cfb8935 100644 --- a/nexus/db-queries/src/db/datastore/cockroachdb_settings.rs +++ b/nexus/db-queries/src/db/datastore/cockroachdb_settings.rs @@ -153,10 +153,22 @@ mod test { ); let settings = datastore.cockroachdb_settings(&opctx).await.unwrap(); - // With a fresh cluster, this is the expected state - let version = CockroachDbClusterVersion::NEWLY_INITIALIZED.to_string(); - assert_eq!(settings.version, version); - assert_eq!(settings.preserve_downgrade, ""); + let version: CockroachDbClusterVersion = + settings.version.parse().expect("unexpected cluster version"); + if settings.preserve_downgrade == "" { + // This is the expected value while running tests normally. + assert_eq!(version, CockroachDbClusterVersion::NEWLY_INITIALIZED); + } else if settings.preserve_downgrade == version.to_string() { + // This is the expected value if the cluster was created on a + // previous version and `cluster.preserve_downgrade_option` was set. + assert_eq!(version, CockroachDbClusterVersion::POLICY); + } else { + panic!( + "`cluster.preserve_downgrade_option` is {:?}, + but it should be empty or \"{}\"", + settings.preserve_downgrade, version + ); + } // Verify that if a fingerprint is wrong, we get the expected SQL error // back. @@ -165,7 +177,7 @@ mod test { &opctx, String::new(), "cluster.preserve_downgrade_option", - version.clone(), + version.to_string(), ) .await else { @@ -190,7 +202,7 @@ mod test { &opctx, settings.state_fingerprint.clone(), "cluster.preserve_downgrade_option", - version.clone(), + version.to_string(), ) .await .unwrap(); @@ -198,8 +210,8 @@ mod test { datastore.cockroachdb_settings(&opctx).await.unwrap(), CockroachDbSettings { state_fingerprint: settings.state_fingerprint.clone(), - version: version.clone(), - preserve_downgrade: version.clone(), + version: version.to_string(), + preserve_downgrade: version.to_string(), } ); } @@ -215,14 +227,24 @@ mod test { ) .await .unwrap(); - assert_eq!( - datastore.cockroachdb_settings(&opctx).await.unwrap(), - CockroachDbSettings { - state_fingerprint: settings.state_fingerprint.clone(), - version: version.clone(), - preserve_downgrade: String::new(), - } - ); + let settings = + datastore.cockroachdb_settings(&opctx).await.unwrap(); + if version == CockroachDbClusterVersion::NEWLY_INITIALIZED { + assert_eq!( + settings, + CockroachDbSettings { + state_fingerprint: settings.state_fingerprint.clone(), + version: version.to_string(), + preserve_downgrade: String::new(), + } + ); + } else { + // Resetting it permits auto-finalization, so the state + // fingerprint and version are not predictable until that + // completes, but we can still verify that the variable was + // reset. + assert!(settings.preserve_downgrade.is_empty()); + } } db.cleanup().await.unwrap(); diff --git a/nexus/db-queries/src/db/datastore/deployment/external_networking.rs b/nexus/db-queries/src/db/datastore/deployment/external_networking.rs index b6ced8e2c5..7ace07305d 100644 --- a/nexus/db-queries/src/db/datastore/deployment/external_networking.rs +++ b/nexus/db-queries/src/db/datastore/deployment/external_networking.rs @@ -327,6 +327,7 @@ impl DataStore { ZoneKind::Nexus => &*NEXUS_VPC_SUBNET, ZoneKind::Clickhouse | ZoneKind::ClickhouseKeeper + | ZoneKind::ClickhouseServer | ZoneKind::CockroachDb | ZoneKind::Crucible | ZoneKind::CruciblePantry diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index acf80829cf..4a54c1992b 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -127,8 +127,12 @@ pub use vmm::VmmStateUpdateResult; pub use volume::read_only_resources_associated_with_volume; pub use volume::CrucibleResources; pub use volume::CrucibleTargets; +pub use volume::ExistingTarget; +pub use volume::ReplacementTarget; pub use volume::VolumeCheckoutReason; pub use volume::VolumeReplacementParams; +pub use volume::VolumeToDelete; +pub use volume::VolumeWithTarget; // Number of unique datasets required to back a region. // TODO: This should likely turn into a configuration option. @@ -362,6 +366,7 @@ impl DataStore { } } +#[derive(Clone, Copy, Debug)] pub enum UpdatePrecondition { DontCare, Null, diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index 17a1fcf15f..584d1aa464 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -595,6 +595,7 @@ impl DataStore { BlueprintZoneType::InternalNtp(_) | BlueprintZoneType::Clickhouse(_) | BlueprintZoneType::ClickhouseKeeper(_) + | BlueprintZoneType::ClickhouseServer(_) | BlueprintZoneType::CockroachDb(_) | BlueprintZoneType::Crucible(_) | BlueprintZoneType::CruciblePantry(_) diff --git a/nexus/db-queries/src/db/datastore/saga.rs b/nexus/db-queries/src/db/datastore/saga.rs index 939929e665..0b626804e1 100644 --- a/nexus/db-queries/src/db/datastore/saga.rs +++ b/nexus/db-queries/src/db/datastore/saga.rs @@ -9,7 +9,6 @@ use super::SQL_BATCH_SIZE; use crate::db; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; -use crate::db::model::Generation; use crate::db::pagination::paginated; use crate::db::pagination::paginated_multicolumn; use crate::db::pagination::Paginator; @@ -17,10 +16,12 @@ use crate::db::update_and_check::UpdateAndCheck; use crate::db::update_and_check::UpdateStatus; use async_bb8_diesel::AsyncRunQueryDsl; use diesel::prelude::*; +use nexus_auth::authz; use nexus_auth::context::OpContext; use omicron_common::api::external::Error; use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; +use std::ops::Add; impl DataStore { pub async fn saga_create( @@ -80,21 +81,15 @@ impl DataStore { /// now, we're implementing saga adoption only in cases where the original /// SEC/Nexus has been expunged.) /// - /// However, in the future, it may be possible for multiple SECs to try and - /// update the same saga, and overwrite each other's state. For example, - /// one SEC might try and update the state to Running while the other one - /// updates it to Done. That case would have to be carefully considered and - /// tested here, probably using the (currently unused) - /// `current_adopt_generation` field to enable optimistic concurrency. - /// - /// To reiterate, we are *not* considering the case where several SECs try - /// to update the same saga. That will be a future enhancement. + /// It's conceivable that multiple SECs do try to udpate the same saga + /// concurrently. That would be a bug. This is noticed and prevented by + /// making this query conditional on current_sec and failing with a conflict + /// if the current SEC has changed. pub async fn saga_update_state( &self, saga_id: steno::SagaId, new_state: steno::SagaCachedState, current_sec: db::saga_types::SecId, - current_adopt_generation: Generation, ) -> Result<(), Error> { use db::schema::saga::dsl; @@ -102,7 +97,6 @@ impl DataStore { let result = diesel::update(dsl::saga) .filter(dsl::id.eq(saga_id)) .filter(dsl::current_sec.eq(current_sec)) - .filter(dsl::adopt_generation.eq(current_adopt_generation)) .set(dsl::saga_state.eq(db::saga_types::SagaCachedState(new_state))) .check_if_exists::(saga_id) .execute_and_check(&*self.pool_connection_unauthorized().await?) @@ -119,20 +113,19 @@ impl DataStore { match result.status { UpdateStatus::Updated => Ok(()), - UpdateStatus::NotUpdatedButExists => Err(Error::invalid_request( - format!( - "failed to update saga {:?} with state {:?}: preconditions not met: \ - expected current_sec = {:?}, adopt_generation = {:?}, \ - but found current_sec = {:?}, adopt_generation = {:?}, state = {:?}", + UpdateStatus::NotUpdatedButExists => { + Err(Error::invalid_request(format!( + "failed to update saga {:?} with state {:?}:\ + preconditions not met: \ + expected current_sec = {:?}, \ + but found current_sec = {:?}, state = {:?}", saga_id, new_state, current_sec, - current_adopt_generation, result.found.current_sec, - result.found.adopt_generation, result.found.saga_state, - ) - )), + ))) + } } } @@ -207,16 +200,75 @@ impl DataStore { Ok(events) } + + /// Updates all sagas that are currently assigned to any of the SEC ids in + /// `sec_ids`, assigning them to `new_sec_id` instead. + /// + /// Generally, an SEC id corresponds to a Nexus id. This change causes the + /// Nexus instance `new_sec_id` to discover these sagas and resume executing + /// them the next time it performs saga recovery (which is normally on + /// startup and periodically). Generally, `new_sec_id` is the _current_ + /// Nexus instance and the caller should activate the saga recovery + /// background task after calling this function to immediately resume the + /// newly-assigned sagas. + /// + /// **Warning:** This operation is only safe if the other SECs `sec_ids` are + /// not currently running. If those SECs are still running, then two (or + /// more) SECs may wind up running the same saga concurrently. This would + /// likely violate implicit assumptions made by various saga actions, + /// leading to hard-to-debug errors and state corruption. + pub async fn sagas_reassign_sec( + &self, + opctx: &OpContext, + sec_ids: &[db::saga_types::SecId], + new_sec_id: db::saga_types::SecId, + ) -> Result { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + + let now = chrono::Utc::now(); + let conn = self.pool_connection_authorized(opctx).await?; + + // It would be more robust to do this in batches. However, Diesel does + // not appear to support the UPDATE ... LIMIT syntax using the normal + // builder. In practice, it's extremely unlikely we'd have so many + // in-progress sagas that this would be a problem. + use db::schema::saga::dsl; + diesel::update( + dsl::saga + .filter(dsl::current_sec.is_not_null()) + .filter( + dsl::current_sec.eq_any( + sec_ids.into_iter().cloned().collect::>(), + ), + ) + .filter(dsl::saga_state.ne(db::saga_types::SagaCachedState( + steno::SagaCachedState::Done, + ))), + ) + .set(( + dsl::current_sec.eq(Some(new_sec_id)), + dsl::adopt_generation.eq(dsl::adopt_generation.add(1)), + dsl::adopt_time.eq(now), + )) + .execute_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } } #[cfg(test)] mod test { use super::*; use crate::db::datastore::test_utils::datastore_test; + use async_bb8_diesel::AsyncConnection; + use async_bb8_diesel::AsyncSimpleConnection; + use db::queries::ALLOW_FULL_TABLE_SCAN_SQL; use nexus_db_model::{SagaNodeEvent, SecId}; use nexus_test_utils::db::test_setup_database; + use omicron_common::api::external::Generation; use omicron_test_utils::dev; use rand::seq::SliceRandom; + use std::collections::BTreeSet; use uuid::Uuid; // Tests pagination in listing sagas that are candidates for recovery @@ -440,7 +492,6 @@ mod test { node_cx.saga_id, steno::SagaCachedState::Running, node_cx.sec_id, - db::model::Generation::new(), ) .await .expect("updating state to Running again"); @@ -451,7 +502,6 @@ mod test { node_cx.saga_id, steno::SagaCachedState::Done, node_cx.sec_id, - db::model::Generation::new(), ) .await .expect("updating state to Done"); @@ -463,7 +513,6 @@ mod test { node_cx.saga_id, steno::SagaCachedState::Done, node_cx.sec_id, - db::model::Generation::new(), ) .await .expect("updating state to Done again"); @@ -509,4 +558,156 @@ mod test { SagaNodeEvent::new(event, self.sec_id) } } + + #[tokio::test] + async fn test_saga_reassignment() { + // Test setup + let logctx = dev::test_setup_log("test_saga_reassignment"); + let mut db = test_setup_database(&logctx.log).await; + let (_, datastore) = datastore_test(&logctx, &db).await; + let opctx = OpContext::for_tests(logctx.log.clone(), datastore.clone()); + + // Populate the database with a few different sagas: + // + // - assigned to SEC A: done, running, and unwinding + // - assigned to SEC B: done, running, and unwinding + // - assigned to SEC C: done, running, and unwinding + // - assigned to SEC D: done, running, and unwinding + // + // Then we'll reassign SECs B's and C's sagas to SEC A and check exactly + // which sagas were changed by this. This exercises: + // - that we don't touch A's sagas (the one we're assigning *to*) + // - that we do touch both B's and C's sagas (the ones we're assigning + // *from*) + // - that we don't touch D's sagas (some other SEC) + // - that we don't touch any "done" sagas + // - that we do touch both running and unwinding sagas + let mut sagas_to_insert = Vec::new(); + let sec_a = SecId(Uuid::new_v4()); + let sec_b = SecId(Uuid::new_v4()); + let sec_c = SecId(Uuid::new_v4()); + let sec_d = SecId(Uuid::new_v4()); + + for sec_id in [sec_a, sec_b, sec_c, sec_d] { + for state in [ + steno::SagaCachedState::Running, + steno::SagaCachedState::Unwinding, + steno::SagaCachedState::Done, + ] { + let params = steno::SagaCreateParams { + id: steno::SagaId(Uuid::new_v4()), + name: steno::SagaName::new("tewst saga"), + dag: serde_json::value::Value::Null, + state, + }; + + sagas_to_insert + .push(db::model::saga_types::Saga::new(sec_id, params)); + } + } + println!("sagas to insert: {:?}", sagas_to_insert); + + // These two sets are complements, but we write out the conditions to + // double-check that we've got it right. + let sagas_affected: BTreeSet<_> = sagas_to_insert + .iter() + .filter_map(|saga| { + ((saga.creator == sec_b || saga.creator == sec_c) + && (saga.saga_state.0 == steno::SagaCachedState::Running + || saga.saga_state.0 + == steno::SagaCachedState::Unwinding)) + .then(|| saga.id) + }) + .collect(); + let sagas_unaffected: BTreeSet<_> = sagas_to_insert + .iter() + .filter_map(|saga| { + (saga.creator == sec_a + || saga.creator == sec_d + || saga.saga_state.0 == steno::SagaCachedState::Done) + .then(|| saga.id) + }) + .collect(); + println!("sagas affected: {:?}", sagas_affected); + println!("sagas UNaffected: {:?}", sagas_unaffected); + assert_eq!(sagas_affected.intersection(&sagas_unaffected).count(), 0); + assert_eq!( + sagas_affected.len() + sagas_unaffected.len(), + sagas_to_insert.len() + ); + + // Insert the sagas. + let count = { + use db::schema::saga::dsl; + let conn = datastore.pool_connection_for_tests().await.unwrap(); + diesel::insert_into(dsl::saga) + .values(sagas_to_insert) + .execute_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .expect("successful insertion") + }; + assert_eq!(count, sagas_affected.len() + sagas_unaffected.len()); + + // Reassign uncompleted sagas from SECs B and C to SEC A. + let nreassigned = datastore + .sagas_reassign_sec(&opctx, &[sec_b, sec_c], sec_a) + .await + .expect("failed to re-assign sagas"); + + // Fetch all the sagas and check their states. + let all_sagas: Vec<_> = datastore + .pool_connection_for_tests() + .await + .unwrap() + .transaction_async(|conn| async move { + use db::schema::saga::dsl; + conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL).await?; + dsl::saga + .select(nexus_db_model::Saga::as_select()) + .load_async(&conn) + .await + }) + .await + .unwrap(); + + for saga in all_sagas { + println!("checking saga: {:?}", saga); + let current_sec = saga.current_sec.unwrap(); + if sagas_affected.contains(&saga.id) { + assert!(saga.creator == sec_b || saga.creator == sec_c); + assert_eq!(current_sec, sec_a); + assert_eq!(*saga.adopt_generation, Generation::from(2)); + assert!( + saga.saga_state.0 == steno::SagaCachedState::Running + || saga.saga_state.0 + == steno::SagaCachedState::Unwinding + ); + } else if sagas_unaffected.contains(&saga.id) { + assert_eq!(current_sec, saga.creator); + assert_eq!(*saga.adopt_generation, Generation::from(1)); + // Its SEC and state could be anything since we've deliberately + // included sagas with various states and SECs that should not + // be affected by the reassignment. + } else { + println!( + "ignoring saga that was not created by this test: {:?}", + saga + ); + } + } + + assert_eq!(nreassigned, sagas_affected.len()); + + // If we do it again, we should make no changes. + let nreassigned = datastore + .sagas_reassign_sec(&opctx, &[sec_b, sec_c], sec_a) + .await + .expect("failed to re-assign sagas"); + assert_eq!(nreassigned, 0); + + // Test cleanup + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } } diff --git a/nexus/db-queries/src/db/datastore/switch_port.rs b/nexus/db-queries/src/db/datastore/switch_port.rs index 159933dce0..2e09c1ac13 100644 --- a/nexus/db-queries/src/db/datastore/switch_port.rs +++ b/nexus/db-queries/src/db/datastore/switch_port.rs @@ -15,7 +15,7 @@ use crate::db::datastore::UpdatePrecondition; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::model::{ - LldpServiceConfig, Name, SwitchInterfaceConfig, SwitchPort, + LldpLinkConfig, Name, SwitchInterfaceConfig, SwitchPort, SwitchPortAddressConfig, SwitchPortBgpPeerConfig, SwitchPortConfig, SwitchPortLinkConfig, SwitchPortRouteConfig, SwitchPortSettings, SwitchPortSettingsGroup, SwitchPortSettingsGroups, @@ -31,7 +31,7 @@ use diesel::{ use diesel_dtrace::DTraceConnection; use ipnetwork::IpNetwork; use nexus_db_model::{ - SqlU16, SqlU32, SqlU8, SwitchPortBgpPeerConfigAllowExport, + BgpConfig, SqlU16, SqlU32, SqlU8, SwitchPortBgpPeerConfigAllowExport, SwitchPortBgpPeerConfigAllowImport, SwitchPortBgpPeerConfigCommunity, }; use nexus_types::external_api::params; @@ -101,7 +101,7 @@ pub struct SwitchPortSettingsCombinedResult { pub groups: Vec, pub port: SwitchPortConfig, pub links: Vec, - pub link_lldp: Vec, + pub link_lldp: Vec, pub interfaces: Vec, pub vlan_interfaces: Vec, pub routes: Vec, @@ -333,6 +333,7 @@ impl DataStore { SwitchPortSettingsCreateError::ReserveBlock( ReserveBlockError::AddressNotInLot, ) => Error::invalid_request("address not in lot"), + } } else { @@ -451,19 +452,18 @@ impl DataStore { .load_async::(&conn) .await?; - let lldp_svc_ids: Vec = result + let lldp_link_ids: Vec = result .links .iter() - .map(|link| link.lldp_service_config_id) + .map(|link| link.lldp_link_config_id) .collect(); - use db::schema::lldp_service_config as lldp_config; - use db::schema::lldp_service_config::dsl as lldp_dsl; - result.link_lldp = lldp_dsl::lldp_service_config - .filter(lldp_config::id.eq_any(lldp_svc_ids)) - .select(LldpServiceConfig::as_select()) + use db::schema::lldp_link_config; + result.link_lldp = lldp_link_config::dsl::lldp_link_config + .filter(lldp_link_config::id.eq_any(lldp_link_ids)) + .select(LldpLinkConfig::as_select()) .limit(1) - .load_async::(&conn) + .load_async::(&conn) .await?; // get the interface configs @@ -829,45 +829,158 @@ impl DataStore { port_settings_id: Option, current: UpdatePrecondition, ) -> UpdateResult<()> { + use db::schema::bgp_config::dsl as bgp_config_dsl; use db::schema::switch_port; use db::schema::switch_port::dsl as switch_port_dsl; + use db::schema::switch_port_settings_bgp_peer_config::dsl as bgp_peer_dsl; let conn = self.pool_connection_authorized(opctx).await?; - match current { - UpdatePrecondition::DontCare => { - diesel::update(switch_port_dsl::switch_port) - .filter(switch_port::id.eq(switch_port_id)) - .set(switch_port::port_settings_id.eq(port_settings_id)) - .execute_async(&*conn) - .await - .map_err(|e| { - public_error_from_diesel(e, ErrorHandler::Server) - })?; - } - UpdatePrecondition::Null => { - diesel::update(switch_port_dsl::switch_port) - .filter(switch_port::id.eq(switch_port_id)) - .filter(switch_port::port_settings_id.is_null()) - .set(switch_port::port_settings_id.eq(port_settings_id)) - .execute_async(&*conn) - .await - .map_err(|e| { - public_error_from_diesel(e, ErrorHandler::Server) - })?; - } - UpdatePrecondition::Value(current_id) => { - diesel::update(switch_port_dsl::switch_port) - .filter(switch_port::id.eq(switch_port_id)) - .filter(switch_port::port_settings_id.eq(current_id)) - .set(switch_port::port_settings_id.eq(port_settings_id)) - .execute_async(&*conn) - .await - .map_err(|e| { - public_error_from_diesel(e, ErrorHandler::Server) - })?; - } - } + let err = OptionalError::new(); + self.transaction_retry_wrapper("switch_port_set_settings_id") + .transaction(&conn, |conn| { + let err = err.clone(); + async move { + // TODO: remove once per-switch-multi-asn support is added + // Bail if user attempts to assign multiple ASNs to a switch via switch port settings + // This is a temporary measure until multi-asn-per-switch is supported. + + // what switch are we adding a configuration to? + let switch = switch_port_dsl::switch_port + .filter(switch_port_dsl::id.eq(switch_port_id)) + .select(switch_port_dsl::switch_location) + .limit(1) + .first_async::(&conn) + .await + .map_err(|e: diesel::result::Error| { + let msg = "failed to look up switch port by id"; + error!(opctx.log, "{msg}"; "error" => ?e); + match e { + diesel::result::Error::NotFound => { + err.bail(Error::not_found_by_id( + ResourceType::SwitchPort, + &switch_port_id, + )) + } + _ => err.bail(Error::internal_error(msg)), + } + })?; + + // if we're setting a port settings id (and therefore activating a configuration + // on a port) we need to make sure there aren't any conflicting bgp configurations + if let Some(psid) = port_settings_id { + let bgp_config: Option = + match bgp_peer_dsl::switch_port_settings_bgp_peer_config + .inner_join( + bgp_config_dsl::bgp_config + .on(bgp_peer_dsl::bgp_config_id + .eq(bgp_config_dsl::id)), + ) + .filter( + bgp_peer_dsl::port_settings_id + .eq(psid), + ) + .select(BgpConfig::as_select()) + .limit(1) + .first_async::(&conn) + .await { + Ok(v) => Ok(Some(v)), + Err(e) => { + let msg = "failed to check if bgp peer exists in switch port settings"; + error!(opctx.log, "{msg}"; "error" => ?e); + match e { + diesel::result::Error::NotFound => { + Ok(None) + } + _ => Err(err.bail(Error::internal_error(msg))), + } + } + }?; + + // find all port settings for the targeted switch + // switch port + // inner join bgp peer on port settings id + // inner join bgp config on bgp config id + // filter switch location eq switch + // filter port settings id not null + // filter asn doesn't equal our asn + + if let Some(config) = bgp_config { + let conflicting_bgp_configs: Vec = switch_port_dsl::switch_port + .inner_join( + bgp_peer_dsl::switch_port_settings_bgp_peer_config + .on(bgp_peer_dsl::port_settings_id + .nullable() + .eq(switch_port_dsl::port_settings_id)), + ) + .inner_join(bgp_config_dsl::bgp_config.on( + bgp_peer_dsl::bgp_config_id.eq(bgp_config_dsl::id), + )) + .filter(switch_port_dsl::switch_location.eq(switch)) + .filter(switch_port_dsl::port_settings_id.is_not_null()) + .filter(bgp_config_dsl::asn.ne(config.asn)) + .select(BgpConfig::as_select()) + .load_async(&conn) + .await?; + + if !conflicting_bgp_configs.is_empty() { + return Err(err.bail(Error::conflict("a different asn is already configured on this switch"))); + } + } + + } + + // perform the requested update + match current { + UpdatePrecondition::DontCare => { + diesel::update(switch_port_dsl::switch_port) + .filter(switch_port::id.eq(switch_port_id)) + .set( + switch_port::port_settings_id + .eq(port_settings_id), + ) + .execute_async(&conn) + .await + } + UpdatePrecondition::Null => { + diesel::update(switch_port_dsl::switch_port) + .filter(switch_port::id.eq(switch_port_id)) + .filter(switch_port::port_settings_id.is_null()) + .set( + switch_port::port_settings_id + .eq(port_settings_id), + ) + .execute_async(&conn) + .await + } + UpdatePrecondition::Value(current_id) => { + diesel::update(switch_port_dsl::switch_port) + .filter(switch_port::id.eq(switch_port_id)) + .filter( + switch_port::port_settings_id + .eq(current_id), + ) + .set( + switch_port::port_settings_id + .eq(port_settings_id), + ) + .execute_async(&conn) + .await + } + } + } + }) + .await + .map_err(|e| { + let msg = "switch_port_set_settings_id failed"; + if let Some(err) = err.take() { + error!(opctx.log, "{msg}"; "error" => ?err); + err + } else { + error!(opctx.log, "{msg}"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + } + })?; Ok(()) } @@ -946,10 +1059,10 @@ impl DataStore { .eq(route_config_dsl::port_settings_id.nullable())), ) .select(SwitchPort::as_select()) - // TODO: #3592 Correctness - // In single rack deployments there are only 64 ports. We'll need - // pagination in the future, or maybe a way to constrain the query to - // a rack? + // TODO: #3592 Correctness + // In single rack deployments there are only 64 ports. We'll need + // pagination in the future, or maybe a way to constrain the query to + // a rack? .limit(64) .union( switch_port_dsl::switch_port @@ -958,7 +1071,7 @@ impl DataStore { bgp_peer_config_dsl::switch_port_settings_bgp_peer_config .on(switch_port_dsl::port_settings_id .eq(bgp_peer_config_dsl::port_settings_id.nullable()), - ), + ), ) .select(SwitchPort::as_select()) .limit(64), @@ -987,7 +1100,7 @@ async fn do_switch_port_settings_create( ) -> Result { use db::schema::{ address_lot::dsl as address_lot_dsl, bgp_config::dsl as bgp_config_dsl, - lldp_service_config::dsl as lldp_config_dsl, + lldp_link_config::dsl as lldp_link_config_dsl, switch_port_settings::dsl as port_settings_dsl, switch_port_settings_address_config::dsl as address_config_dsl, switch_port_settings_bgp_peer_config::dsl as bgp_peer_dsl, @@ -1047,17 +1160,21 @@ async fn do_switch_port_settings_create( let mut link_config = Vec::with_capacity(params.links.len()); for (link_name, c) in ¶ms.links { - let lldp_config_id = match c.lldp.lldp_config { - Some(_) => todo!(), // TODO actual lldp support - None => None, - }; - let lldp_svc_config = - LldpServiceConfig::new(c.lldp.enabled, lldp_config_id); + let lldp_link_config = LldpLinkConfig::new( + c.lldp.enabled, + c.lldp.link_name.clone(), + c.lldp.link_description.clone(), + c.lldp.chassis_id.clone(), + c.lldp.system_name.clone(), + c.lldp.system_description.clone(), + c.lldp.management_ip.map(|a| a.into()), + ); + let lldp_config_id = lldp_link_config.id; + lldp_config.push(lldp_link_config); - lldp_config.push(lldp_svc_config.clone()); link_config.push(SwitchPortLinkConfig::new( psid, - lldp_svc_config.id, + lldp_config_id, link_name.clone(), c.mtu, c.fec.into(), @@ -1066,9 +1183,9 @@ async fn do_switch_port_settings_create( )); } result.link_lldp = - diesel::insert_into(lldp_config_dsl::lldp_service_config) + diesel::insert_into(lldp_link_config_dsl::lldp_link_config) .values(lldp_config.clone()) - .returning(LldpServiceConfig::as_returning()) + .returning(LldpLinkConfig::as_returning()) .get_results_async(conn) .await?; @@ -1120,6 +1237,7 @@ async fn do_switch_port_settings_create( route.dst.into(), route.gw.into(), route.vid.map(Into::into), + route.local_pref.map(Into::into), )); } } @@ -1144,18 +1262,18 @@ async fn do_switch_port_settings_create( NameOrId::Name(name) => { let name = name.to_string(); bgp_config_dsl::bgp_config - .filter(bgp_config::time_deleted.is_null()) - .filter(bgp_config::name.eq(name)) - .select(bgp_config::id) - .limit(1) - .first_async::(conn) - .await - .map_err(|diesel_error| { - err.bail_retryable_or( - diesel_error, - SwitchPortSettingsCreateError::BgpConfigNotFound - ) - })? + .filter(bgp_config::time_deleted.is_null()) + .filter(bgp_config::name.eq(name)) + .select(bgp_config::id) + .limit(1) + .first_async::(conn) + .await + .map_err(|diesel_error| { + err.bail_retryable_or( + diesel_error, + SwitchPortSettingsCreateError::BgpConfigNotFound + ) + })? } }; @@ -1173,9 +1291,9 @@ async fn do_switch_port_settings_create( .collect(); diesel::insert_into(allow_import_dsl::switch_port_settings_bgp_peer_config_allow_import) - .values(to_insert) - .execute_async(conn) - .await?; + .values(to_insert) + .execute_async(conn) + .await?; } if let ImportExportPolicy::Allow(list) = &p.allowed_export { @@ -1192,9 +1310,9 @@ async fn do_switch_port_settings_create( .collect(); diesel::insert_into(allow_export_dsl::switch_port_settings_bgp_peer_config_allow_export) - .values(to_insert) - .execute_async(conn) - .await?; + .values(to_insert) + .execute_async(conn) + .await?; } if !p.communities.is_empty() { @@ -1212,9 +1330,9 @@ async fn do_switch_port_settings_create( .collect(); diesel::insert_into(bgp_communities_dsl::switch_port_settings_bgp_peer_config_communities) - .values(to_insert) - .execute_async(conn) - .await?; + .values(to_insert) + .execute_async(conn) + .await?; } bgp_peer_config.push(SwitchPortBgpPeerConfig::new( @@ -1225,6 +1343,7 @@ async fn do_switch_port_settings_create( )); } } + let db_bgp_peers: Vec = diesel::insert_into(bgp_peer_dsl::switch_port_settings_bgp_peer_config) .values(bgp_peer_config) @@ -1278,18 +1397,18 @@ async fn do_switch_port_settings_create( NameOrId::Name(name) => { let name = name.to_string(); address_lot_dsl::address_lot - .filter(address_lot::time_deleted.is_null()) - .filter(address_lot::name.eq(name)) - .select(address_lot::id) - .limit(1) - .first_async::(conn) - .await - .map_err(|diesel_error| { - err.bail_retryable_or( - diesel_error, - SwitchPortSettingsCreateError::AddressLotNotFound - ) - })? + .filter(address_lot::time_deleted.is_null()) + .filter(address_lot::name.eq(name)) + .select(address_lot::id) + .limit(1) + .first_async::(conn) + .await + .map_err(|diesel_error| { + err.bail_retryable_or( + diesel_error, + SwitchPortSettingsCreateError::AddressLotNotFound + ) + })? } }; // TODO: Reduce DB round trips needed for reserving ip blocks @@ -1349,18 +1468,18 @@ async fn do_switch_port_settings_delete( NameOrId::Name(name) => { let name = name.to_string(); port_settings_dsl::switch_port_settings - .filter(switch_port_settings::time_deleted.is_null()) - .filter(switch_port_settings::name.eq(name)) - .select(switch_port_settings::id) - .limit(1) - .first_async::(conn) - .await - .map_err(|diesel_error| { - err.bail_retryable_or( - diesel_error, - SwitchPortSettingsDeleteError::SwitchPortSettingsNotFound - ) - })? + .filter(switch_port_settings::time_deleted.is_null()) + .filter(switch_port_settings::name.eq(name)) + .select(switch_port_settings::id) + .limit(1) + .first_async::(conn) + .await + .map_err(|diesel_error| { + err.bail_retryable_or( + diesel_error, + SwitchPortSettingsDeleteError::SwitchPortSettingsNotFound + ) + })? } }; @@ -1389,13 +1508,12 @@ async fn do_switch_port_settings_delete( .returning(SwitchPortLinkConfig::as_returning()) .get_results_async(conn) .await?; - // delete lldp configs - use db::schema::lldp_service_config::{self, dsl as lldp_config_dsl}; - let lldp_svc_ids: Vec = - links.iter().map(|link| link.lldp_service_config_id).collect(); - diesel::delete(lldp_config_dsl::lldp_service_config) - .filter(lldp_service_config::id.eq_any(lldp_svc_ids)) + use db::schema::lldp_link_config; + let lldp_link_ids: Vec = + links.iter().map(|link| link.lldp_link_config_id).collect(); + diesel::delete(lldp_link_config::dsl::lldp_link_config) + .filter(lldp_link_config::id.eq_any(lldp_link_ids)) .execute_async(conn) .await?; @@ -1556,7 +1674,7 @@ mod test { shaper: None, }; - datastore.bgp_config_set(&opctx, &bgp_config).await.unwrap(); + datastore.bgp_config_create(&opctx, &bgp_config).await.unwrap(); let settings = SwitchPortSettingsCreate { identity: IdentityMetadataCreateParams { diff --git a/nexus/db-queries/src/db/datastore/volume.rs b/nexus/db-queries/src/db/datastore/volume.rs index f777384b7b..f5c1f121e4 100644 --- a/nexus/db-queries/src/db/datastore/volume.rs +++ b/nexus/db-queries/src/db/datastore/volume.rs @@ -1795,16 +1795,16 @@ pub struct VolumeReplacementParams { // parameters #[derive(Debug, Clone, Copy)] -pub struct VolumeWithTarget(Uuid); +pub struct VolumeWithTarget(pub Uuid); #[derive(Debug, Clone, Copy)] -pub struct ExistingTarget(SocketAddrV6); +pub struct ExistingTarget(pub SocketAddrV6); #[derive(Debug, Clone, Copy)] -pub struct ReplacementTarget(SocketAddrV6); +pub struct ReplacementTarget(pub SocketAddrV6); #[derive(Debug, Clone, Copy)] -pub struct VolumeToDelete(Uuid); +pub struct VolumeToDelete(pub Uuid); impl DataStore { /// Replace a read-write region in a Volume with a new region. diff --git a/nexus/db-queries/src/db/datastore/volume_repair.rs b/nexus/db-queries/src/db/datastore/volume_repair.rs index 5230e60e3e..c4e9f8b090 100644 --- a/nexus/db-queries/src/db/datastore/volume_repair.rs +++ b/nexus/db-queries/src/db/datastore/volume_repair.rs @@ -13,6 +13,7 @@ use crate::db::error::ErrorHandler; use crate::db::model::VolumeRepair; use async_bb8_diesel::AsyncRunQueryDsl; use diesel::prelude::*; +use diesel::result::DatabaseErrorKind; use diesel::result::Error as DieselError; use omicron_common::api::external::Error; use uuid::Uuid; @@ -39,7 +40,18 @@ impl DataStore { .execute_async(&*conn) .await .map(|_| ()) - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .map_err(|e| match e { + DieselError::DatabaseError( + DatabaseErrorKind::UniqueViolation, + ref error_information, + ) if error_information.constraint_name() + == Some("volume_repair_pkey") => + { + Error::conflict("volume repair lock") + } + + _ => public_error_from_diesel(e, ErrorHandler::Server), + }) } pub(super) fn volume_repair_delete_query( @@ -83,3 +95,35 @@ impl DataStore { .await } } + +#[cfg(test)] +mod test { + use super::*; + + use crate::db::datastore::test_utils::datastore_test; + use nexus_test_utils::db::test_setup_database; + use omicron_test_utils::dev; + + #[tokio::test] + async fn volume_lock_conflict_error_returned() { + let logctx = dev::test_setup_log("volume_lock_conflict_error_returned"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + let lock_1 = Uuid::new_v4(); + let lock_2 = Uuid::new_v4(); + let volume_id = Uuid::new_v4(); + + datastore.volume_repair_lock(&opctx, volume_id, lock_1).await.unwrap(); + + let err = datastore + .volume_repair_lock(&opctx, volume_id, lock_2) + .await + .unwrap_err(); + + assert!(matches!(err, Error::Conflict { .. })); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } +} diff --git a/nexus/db-queries/src/db/sec_store.rs b/nexus/db-queries/src/db/sec_store.rs index 0dcc3aa717..920ff3aee1 100644 --- a/nexus/db-queries/src/db/sec_store.rs +++ b/nexus/db-queries/src/db/sec_store.rs @@ -4,7 +4,7 @@ //! Implementation of [`steno::SecStore`] backed by Omicron's database -use crate::db::{self, model::Generation}; +use crate::db; use anyhow::Context; use async_trait::async_trait; use dropshot::HttpError; @@ -102,12 +102,7 @@ impl steno::SecStore for CockroachDbSecStore { &log, || { self.datastore - .saga_update_state( - id, - update, - self.sec_id, - Generation::new(), - ) + .saga_update_state(id, update, self.sec_id) .map_err(backoff::BackoffError::transient) }, "updating saga state", diff --git a/nexus/examples/config-second.toml b/nexus/examples/config-second.toml index 754f37c064..c87e1255b5 100644 --- a/nexus/examples/config-second.toml +++ b/nexus/examples/config-second.toml @@ -139,6 +139,8 @@ v2p_mapping_propagation.period_secs = 30 abandoned_vmm_reaper.period_secs = 60 saga_recovery.period_secs = 600 lookup_region_port.period_secs = 60 +region_snapshot_replacement_start.period_secs = 30 +region_snapshot_replacement_garbage_collection.period_secs = 30 [default_region_allocation_strategy] # allocate region on 3 random distinct zpools, on 3 random distinct sleds. diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index bd50e846bd..f844adccbe 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -125,6 +125,8 @@ v2p_mapping_propagation.period_secs = 30 abandoned_vmm_reaper.period_secs = 60 saga_recovery.period_secs = 600 lookup_region_port.period_secs = 60 +region_snapshot_replacement_start.period_secs = 30 +region_snapshot_replacement_garbage_collection.period_secs = 30 [default_region_allocation_strategy] # allocate region on 3 random distinct zpools, on 3 random distinct sleds. diff --git a/nexus/internal-api/src/lib.rs b/nexus/internal-api/src/lib.rs index c6ade3b1a2..7ac3e42f57 100644 --- a/nexus/internal-api/src/lib.rs +++ b/nexus/internal-api/src/lib.rs @@ -20,15 +20,15 @@ use nexus_types::{ }, internal_api::{ params::{ - OximeterInfo, RackInitializationRequest, SledAgentInfo, - SwitchPutRequest, SwitchPutResponse, + InstanceMigrateRequest, OximeterInfo, RackInitializationRequest, + SledAgentInfo, SwitchPutRequest, SwitchPutResponse, }, - views::{BackgroundTask, Ipv4NatEntryView, Saga}, + views::{BackgroundTask, DemoSaga, Ipv4NatEntryView, Saga}, }, }; use omicron_common::{ api::{ - external::http_pagination::PaginatedById, + external::{http_pagination::PaginatedById, Instance}, internal::nexus::{ DiskRuntimeState, DownstairsClientStopRequest, DownstairsClientStopped, ProducerEndpoint, @@ -39,7 +39,8 @@ use omicron_common::{ update::ArtifactId, }; use omicron_uuid_kinds::{ - DownstairsKind, SledUuid, TypedUuid, UpstairsKind, UpstairsRepairKind, + DemoSagaUuid, DownstairsKind, SledUuid, TypedUuid, UpstairsKind, + UpstairsRepairKind, }; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -118,6 +119,16 @@ pub trait NexusInternalApi { new_runtime_state: TypedBody, ) -> Result; + #[endpoint { + method = POST, + path = "/instances/{instance_id}/migrate", + }] + async fn instance_migrate( + rqctx: RequestContext, + path_params: Path, + migrate_params: TypedBody, + ) -> Result, HttpError>; + /// Report updated state for a disk. #[endpoint { method = PUT, @@ -282,6 +293,31 @@ pub trait NexusInternalApi { path_params: Path, ) -> Result, HttpError>; + /// Kick off an instance of the "demo" saga + /// + /// This saga is used for demo and testing. The saga just waits until you + /// complete using the `saga_demo_complete` API. + #[endpoint { + method = POST, + path = "/demo-saga", + }] + async fn saga_demo_create( + rqctx: RequestContext, + ) -> Result, HttpError>; + + /// Complete a waiting demo saga + /// + /// Note that the id used here is not the same as the id of the saga. It's + /// the one returned by the `saga_demo_create` API. + #[endpoint { + method = POST, + path = "/demo-saga/{demo_saga_id}/complete", + }] + async fn saga_demo_complete( + rqctx: RequestContext, + path_params: Path, + ) -> Result; + // Background Tasks /// List background tasks @@ -565,6 +601,12 @@ pub struct SagaPathParam { pub saga_id: Uuid, } +/// Path parameters for DemoSaga requests +#[derive(Deserialize, JsonSchema)] +pub struct DemoSagaPathParam { + pub demo_saga_id: DemoSagaUuid, +} + /// Path parameters for Background Task requests #[derive(Deserialize, JsonSchema)] pub struct BackgroundTaskPathParam { diff --git a/nexus/networking/src/firewall_rules.rs b/nexus/networking/src/firewall_rules.rs index 4ba66ec9f3..8491092353 100644 --- a/nexus/networking/src/firewall_rules.rs +++ b/nexus/networking/src/firewall_rules.rs @@ -49,7 +49,7 @@ pub async fn resolve_firewall_rules_for_sled_agent( vpc: &db::model::Vpc, rules: &[db::model::VpcFirewallRule], log: &Logger, -) -> Result, Error> { +) -> Result, Error> { // Collect the names of instances, subnets, and VPCs that are either // targets or host filters. We have to find the sleds for all the // targets, and we'll need information about the IP addresses or @@ -417,16 +417,18 @@ pub async fn resolve_firewall_rules_for_sled_agent( .as_ref() .map(|protocols| protocols.iter().map(|v| v.0.into()).collect()); - sled_agent_rules.push(sled_agent_client::types::VpcFirewallRule { - status: rule.status.0.into(), - direction: rule.direction.0.into(), - targets, - filter_hosts, - filter_ports, - filter_protocols, - action: rule.action.0.into(), - priority: rule.priority.0 .0, - }); + sled_agent_rules.push( + sled_agent_client::types::ResolvedVpcFirewallRule { + status: rule.status.0.into(), + direction: rule.direction.0.into(), + targets, + filter_hosts, + filter_ports, + filter_protocols, + action: rule.action.0.into(), + priority: rule.priority.0 .0, + }, + ); } debug!( log, diff --git a/nexus/reconfigurator/execution/src/cockroachdb.rs b/nexus/reconfigurator/execution/src/cockroachdb.rs index 498944598d..12ff896d9d 100644 --- a/nexus/reconfigurator/execution/src/cockroachdb.rs +++ b/nexus/reconfigurator/execution/src/cockroachdb.rs @@ -34,11 +34,13 @@ pub(crate) async fn ensure_settings( mod test { use super::*; use crate::overridables::Overridables; + use crate::RealizeBlueprintOutput; use nexus_db_queries::authn; use nexus_db_queries::authz; use nexus_test_utils_macros::nexus_test; - use nexus_types::deployment::CockroachDbClusterVersion; + use nexus_types::deployment::CockroachDbPreserveDowngrade; use std::sync::Arc; + use uuid::Uuid; type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; @@ -69,24 +71,26 @@ mod test { .await .expect("failed to get blueprint from datastore"); eprintln!("blueprint: {}", blueprint.display()); - // The initial blueprint should already have these filled in. + // The initial blueprint should already have the state fingerprint + // filled in. assert_eq!( blueprint.cockroachdb_fingerprint, settings.state_fingerprint ); - assert_eq!( - blueprint.cockroachdb_setting_preserve_downgrade, - CockroachDbClusterVersion::NEWLY_INITIALIZED.into() - ); - // The cluster version, preserve downgrade setting, and - // `NEWLY_INITIALIZED` should all match. - assert_eq!( - settings.version, - CockroachDbClusterVersion::NEWLY_INITIALIZED.to_string() - ); + // The initial blueprint should already have the preserve downgrade + // setting filled in. (It might be the current or previous version, but + // it should be `Set` regardless.) + let CockroachDbPreserveDowngrade::Set(bp_preserve_downgrade) = + blueprint.cockroachdb_setting_preserve_downgrade + else { + panic!("blueprint does not set preserve downgrade option"); + }; + // The cluster version, preserve downgrade setting, and the value in the + // blueprint should all match. + assert_eq!(settings.version, bp_preserve_downgrade.to_string()); assert_eq!( settings.preserve_downgrade, - CockroachDbClusterVersion::NEWLY_INITIALIZED.to_string() + bp_preserve_downgrade.to_string() ); // Record the zpools so we don't fail to ensure datasets (unrelated to // crdb settings) during blueprint execution. @@ -96,16 +100,17 @@ mod test { .await; // Execute the initial blueprint. let overrides = Overridables::for_test(cptestctx); - crate::realize_blueprint_with_overrides( - &opctx, - datastore, - resolver, - &blueprint, - "test-suite", - &overrides, - ) - .await - .expect("failed to execute initial blueprint"); + let _: RealizeBlueprintOutput = + crate::realize_blueprint_with_overrides( + &opctx, + datastore, + resolver, + &blueprint, + Uuid::new_v4(), + &overrides, + ) + .await + .expect("failed to execute initial blueprint"); // The CockroachDB settings should not have changed. assert_eq!( settings, diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index 542510f39e..c6ab890f21 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -275,6 +275,9 @@ pub fn blueprint_internal_dns_config( BlueprintZoneType::ClickhouseKeeper( blueprint_zone_type::ClickhouseKeeper { address, .. }, ) => (ServiceName::ClickhouseKeeper, address.port()), + BlueprintZoneType::ClickhouseServer( + blueprint_zone_type::ClickhouseServer { address, .. }, + ) => (ServiceName::ClickhouseServer, address.port()), BlueprintZoneType::CockroachDb( blueprint_zone_type::CockroachDb { address, .. }, ) => (ServiceName::Cockroach, address.port()), @@ -455,6 +458,7 @@ pub fn blueprint_nexus_external_ips(blueprint: &Blueprint) -> Vec { mod test { use super::*; use crate::overridables::Overridables; + use crate::RealizeBlueprintOutput; use crate::Sled; use dns_service_client::DnsDiff; use internal_dns::config::Host; @@ -499,13 +503,13 @@ mod test { use omicron_common::address::get_switch_zone_address; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; - use omicron_common::address::BOUNDARY_NTP_REDUNDANCY; - use omicron_common::address::COCKROACHDB_REDUNDANCY; - use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::address::RACK_PREFIX; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::Generation; use omicron_common::api::external::IdentityMetadataCreateParams; + use omicron_common::policy::BOUNDARY_NTP_REDUNDANCY; + use omicron_common::policy::COCKROACHDB_REDUNDANCY; + use omicron_common::policy::NEXUS_REDUNDANCY; use omicron_common::zpool_name::ZpoolName; use omicron_test_utils::dev::test_setup_log; use omicron_uuid_kinds::ExternalIpUuid; @@ -1243,16 +1247,17 @@ mod test { // Now, execute the initial blueprint. let overrides = Overridables::for_test(cptestctx); - crate::realize_blueprint_with_overrides( - &opctx, - datastore, - resolver, - &blueprint, - "test-suite", - &overrides, - ) - .await - .expect("failed to execute initial blueprint"); + let _: RealizeBlueprintOutput = + crate::realize_blueprint_with_overrides( + &opctx, + datastore, + resolver, + &blueprint, + Uuid::new_v4(), + &overrides, + ) + .await + .expect("failed to execute initial blueprint"); // DNS ought not to have changed. verify_dns_unchanged( @@ -1394,16 +1399,17 @@ mod test { .await .expect("failed to set blueprint as target"); - crate::realize_blueprint_with_overrides( - &opctx, - datastore, - resolver, - &blueprint2, - "test-suite", - &overrides, - ) - .await - .expect("failed to execute second blueprint"); + let _: RealizeBlueprintOutput = + crate::realize_blueprint_with_overrides( + &opctx, + datastore, + resolver, + &blueprint2, + Uuid::new_v4(), + &overrides, + ) + .await + .expect("failed to execute second blueprint"); // Now fetch DNS again. Both should have changed this time. let dns_latest_internal = datastore @@ -1468,16 +1474,17 @@ mod test { } // If we execute it again, we should see no more changes. - crate::realize_blueprint_with_overrides( - &opctx, - datastore, - resolver, - &blueprint2, - "test-suite", - &overrides, - ) - .await - .expect("failed to execute second blueprint again"); + let _: RealizeBlueprintOutput = + crate::realize_blueprint_with_overrides( + &opctx, + datastore, + resolver, + &blueprint2, + Uuid::new_v4(), + &overrides, + ) + .await + .expect("failed to execute second blueprint again"); verify_dns_unchanged( &opctx, datastore, @@ -1504,16 +1511,17 @@ mod test { // One more time, make sure that executing the blueprint does not do // anything. - crate::realize_blueprint_with_overrides( - &opctx, - datastore, - resolver, - &blueprint2, - "test-suite", - &overrides, - ) - .await - .expect("failed to execute second blueprint again"); + let _: RealizeBlueprintOutput = + crate::realize_blueprint_with_overrides( + &opctx, + datastore, + resolver, + &blueprint2, + Uuid::new_v4(), + &overrides, + ) + .await + .expect("failed to execute second blueprint again"); verify_dns_unchanged( &opctx, datastore, @@ -1598,16 +1606,17 @@ mod test { ); // If we execute the blueprint, DNS should not be changed. - crate::realize_blueprint_with_overrides( - &opctx, - datastore, - resolver, - &blueprint, - "test-suite", - &overrides, - ) - .await - .expect("failed to execute blueprint"); + let _: RealizeBlueprintOutput = + crate::realize_blueprint_with_overrides( + &opctx, + datastore, + resolver, + &blueprint, + Uuid::new_v4(), + &overrides, + ) + .await + .expect("failed to execute blueprint"); let dns_latest_internal = datastore .dns_config_read(&opctx, DnsGroup::Internal) .await diff --git a/nexus/reconfigurator/execution/src/lib.rs b/nexus/reconfigurator/execution/src/lib.rs index 1d03fc03cb..2844d9c86b 100644 --- a/nexus/reconfigurator/execution/src/lib.rs +++ b/nexus/reconfigurator/execution/src/lib.rs @@ -24,6 +24,7 @@ use slog::info; use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; use std::net::SocketAddrV6; +use uuid::Uuid; mod cockroachdb; mod datasets; @@ -31,6 +32,7 @@ mod dns; mod omicron_physical_disks; mod omicron_zones; mod overridables; +mod sagas; mod sled_state; pub use dns::blueprint_external_dns_config; @@ -68,43 +70,46 @@ impl From for Sled { } } +/// The result of calling [`realize_blueprint`] or +/// [`realize_blueprint_with_overrides`]. +#[derive(Debug)] +#[must_use = "the output of realize_blueprint should probably be used"] +pub struct RealizeBlueprintOutput { + /// Whether any sagas need to be reassigned to a new Nexus. + pub needs_saga_recovery: bool, +} + /// Make one attempt to realize the given blueprint, meaning to take actions to /// alter the real system to match the blueprint /// /// The assumption is that callers are running this periodically or in a loop to /// deal with transient errors or changes in the underlying system state. -pub async fn realize_blueprint( +pub async fn realize_blueprint( opctx: &OpContext, datastore: &DataStore, resolver: &Resolver, blueprint: &Blueprint, - nexus_label: S, -) -> Result<(), Vec> -where - String: From, -{ + nexus_id: Uuid, +) -> Result> { realize_blueprint_with_overrides( opctx, datastore, resolver, blueprint, - nexus_label, + nexus_id, &Default::default(), ) .await } -pub async fn realize_blueprint_with_overrides( +pub async fn realize_blueprint_with_overrides( opctx: &OpContext, datastore: &DataStore, resolver: &Resolver, blueprint: &Blueprint, - nexus_label: S, + nexus_id: Uuid, overrides: &Overridables, -) -> Result<(), Vec> -where - String: From, -{ +) -> Result> { let opctx = opctx.child(BTreeMap::from([( "comment".to_string(), blueprint.comment.clone(), @@ -136,7 +141,7 @@ where }) .collect(); - omicron_physical_disks::deploy_disks( + let deploy_disks_done = omicron_physical_disks::deploy_disks( &opctx, &sleds_by_id, &blueprint.blueprint_disks, @@ -187,7 +192,7 @@ where dns::deploy_dns( &opctx, datastore, - String::from(nexus_label), + nexus_id.to_string(), blueprint, &sleds_by_id, overrides, @@ -214,20 +219,50 @@ where ) .await?; - // This depends on the "deploy_disks" call earlier -- disk expungement is a - // statement of policy, but we need to be assured that the Sled Agent has - // stopped using that disk before we can mark its state as decommissioned. - omicron_physical_disks::decommission_expunged_disks(&opctx, datastore) - .await?; + omicron_physical_disks::decommission_expunged_disks( + &opctx, + datastore, + deploy_disks_done, + ) + .await?; + + // From this point on, we'll assume that any errors that we encounter do + // *not* require stopping execution. We'll just accumulate them and return + // them all at the end. + // + // TODO We should probably do this with more of the errors above, too. + let mut errors = Vec::new(); + + // For any expunged Nexus zones, re-assign in-progress sagas to some other + // Nexus. If this fails for some reason, it doesn't affect anything else. + let sec_id = nexus_db_model::SecId(nexus_id); + let reassigned = sagas::reassign_sagas_from_expunged( + &opctx, datastore, blueprint, sec_id, + ) + .await + .context("failed to re-assign sagas"); + let needs_saga_recovery = match reassigned { + Ok(needs_recovery) => needs_recovery, + Err(error) => { + errors.push(error); + false + } + }; // This is likely to error if any cluster upgrades are in progress (which // can take some time), so it should remain at the end so that other parts // of the blueprint can progress normally. - cockroachdb::ensure_settings(&opctx, datastore, blueprint) - .await - .map_err(|err| vec![err])?; + if let Err(error) = + cockroachdb::ensure_settings(&opctx, datastore, blueprint).await + { + errors.push(error); + } - Ok(()) + if errors.is_empty() { + Ok(RealizeBlueprintOutput { needs_saga_recovery }) + } else { + Err(errors) + } } #[cfg(test)] diff --git a/nexus/reconfigurator/execution/src/omicron_physical_disks.rs b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs index 8209577ffd..caf3a3ffee 100644 --- a/nexus/reconfigurator/execution/src/omicron_physical_disks.rs +++ b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs @@ -25,7 +25,7 @@ pub(crate) async fn deploy_disks( opctx: &OpContext, sleds_by_id: &BTreeMap, sled_configs: &BTreeMap, -) -> Result<(), Vec> { +) -> Result> { let errors: Vec<_> = stream::iter(sled_configs) .filter_map(|(sled_id, config)| async move { let log = opctx.log.new(o!( @@ -92,16 +92,26 @@ pub(crate) async fn deploy_disks( .await; if errors.is_empty() { - Ok(()) + Ok(DeployDisksDone {}) } else { Err(errors) } } -/// Decommissions all disks which are currently expunged +/// Typestate indicating that the deploy disks step was performed. +#[derive(Debug)] +#[must_use = "this should be passed into decommission_expunged_disks"] +pub(crate) struct DeployDisksDone {} + +/// Decommissions all disks which are currently expunged. pub(crate) async fn decommission_expunged_disks( opctx: &OpContext, datastore: &DataStore, + // This is taken as a parameter to ensure that this depends on a + // "deploy_disks" call made earlier. Disk expungement is a statement of + // policy, but we need to be assured that the Sled Agent has stopped using + // that disk before we can mark its state as decommissioned. + _deploy_disks_done: DeployDisksDone, ) -> Result<(), Vec> { datastore .physical_disk_decommission_all_expunged(&opctx) @@ -113,6 +123,7 @@ pub(crate) async fn decommission_expunged_disks( #[cfg(test)] mod test { use super::deploy_disks; + use super::DeployDisksDone; use crate::DataStore; use crate::Sled; @@ -218,9 +229,13 @@ mod test { // Get a success result back when the blueprint has an empty set of // disks. let (_, blueprint) = create_blueprint(BTreeMap::new()); - deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) - .await - .expect("failed to deploy no disks"); + // Use an explicit type here because not doing so can cause errors to + // be ignored (this behavior is genuinely terrible). Instead, ensure + // that the type has the right result. + let _: DeployDisksDone = + deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) + .await + .expect("failed to deploy no disks"); // Disks are updated in a particular order, but each request contains // the full set of disks that must be running. @@ -273,9 +288,10 @@ mod test { } // Execute it. - deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) - .await - .expect("failed to deploy initial disks"); + let _: DeployDisksDone = + deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) + .await + .expect("failed to deploy initial disks"); s1.verify_and_clear(); s2.verify_and_clear(); @@ -294,9 +310,10 @@ mod test { )), ); } - deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) - .await - .expect("failed to deploy same disks"); + let _: DeployDisksDone = + deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) + .await + .expect("failed to deploy same disks"); s1.verify_and_clear(); s2.verify_and_clear(); @@ -568,7 +585,15 @@ mod test { assert_eq!(d.disk_state, PhysicalDiskState::Active); assert_eq!(d.disk_policy, PhysicalDiskPolicy::InService); - super::decommission_expunged_disks(&opctx, &datastore).await.unwrap(); + super::decommission_expunged_disks( + &opctx, + &datastore, + // This is an internal test, and we're testing decommissioning in + // isolation, so it's okay to create the typestate here. + DeployDisksDone {}, + ) + .await + .unwrap(); // After decommissioning, we see the expunged disk become // decommissioned. The other disk remains in-service. diff --git a/nexus/reconfigurator/execution/src/omicron_zones.rs b/nexus/reconfigurator/execution/src/omicron_zones.rs index c8c02531a0..6336e40879 100644 --- a/nexus/reconfigurator/execution/src/omicron_zones.rs +++ b/nexus/reconfigurator/execution/src/omicron_zones.rs @@ -138,6 +138,7 @@ pub(crate) async fn clean_up_expunged_zones( BlueprintZoneType::BoundaryNtp(_) | BlueprintZoneType::Clickhouse(_) | BlueprintZoneType::ClickhouseKeeper(_) + | BlueprintZoneType::ClickhouseServer(_) | BlueprintZoneType::Crucible(_) | BlueprintZoneType::CruciblePantry(_) | BlueprintZoneType::ExternalDns(_) diff --git a/nexus/reconfigurator/execution/src/sagas.rs b/nexus/reconfigurator/execution/src/sagas.rs new file mode 100644 index 0000000000..458328ef00 --- /dev/null +++ b/nexus/reconfigurator/execution/src/sagas.rs @@ -0,0 +1,71 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Re-assign sagas from expunged Nexus zones + +use nexus_db_model::SecId; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintZoneFilter; +use omicron_common::api::external::Error; +use omicron_uuid_kinds::GenericUuid; +use slog::{debug, info, warn}; + +/// For each expunged Nexus zone, re-assign sagas owned by that Nexus to the +/// specified nexus (`nexus_id`). +pub(crate) async fn reassign_sagas_from_expunged( + opctx: &OpContext, + datastore: &DataStore, + blueprint: &Blueprint, + nexus_id: SecId, +) -> Result { + let log = &opctx.log; + + // Identify any Nexus zones that have been expunged and need to have sagas + // re-assigned. + // + // TODO: Currently, we take any expunged Nexus instances and attempt to + // assign all their sagas to ourselves. Per RFD 289, we can only re-assign + // sagas between two instances of Nexus that are at the same version. Right + // now this can't happen so there's nothing to do here to ensure that + // constraint. However, once we support allowing the control plane to be + // online _during_ an upgrade, there may be multiple different Nexus + // instances running at the same time. At that point, we will need to make + // sure that we only ever try to assign ourselves sagas from other Nexus + // instances that we know are running the same version as ourselves. + let nexus_zone_ids: Vec<_> = blueprint + .all_omicron_zones(BlueprintZoneFilter::Expunged) + .filter_map(|(_, z)| { + z.zone_type + .is_nexus() + .then(|| nexus_db_model::SecId(z.id.into_untyped_uuid())) + }) + .collect(); + + debug!(log, "re-assign sagas: found Nexus instances"; + "nexus_zone_ids" => ?nexus_zone_ids); + + let result = + datastore.sagas_reassign_sec(opctx, &nexus_zone_ids, nexus_id).await; + + match result { + Ok(count) => { + info!(log, "re-assigned sagas"; + "nexus_zone_ids" => ?nexus_zone_ids, + "count" => count, + ); + + Ok(count != 0) + } + Err(error) => { + warn!(log, "failed to re-assign sagas"; + "nexus_zone_ids" => ?nexus_zone_ids, + &error, + ); + + Err(error) + } + } +} diff --git a/nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs b/nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs index c08f30124c..2fb60e66f8 100644 --- a/nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs +++ b/nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs @@ -31,6 +31,7 @@ impl DiscretionaryOmicronZone { // Zones that we should place but don't yet. BlueprintZoneType::Clickhouse(_) | BlueprintZoneType::ClickhouseKeeper(_) + | BlueprintZoneType::ClickhouseServer(_) | BlueprintZoneType::CruciblePantry(_) | BlueprintZoneType::ExternalDns(_) | BlueprintZoneType::InternalDns(_) diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index e953910e92..aaeaf606fb 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -33,7 +33,6 @@ use nexus_types::inventory::SpType; use omicron_common::address::get_sled_address; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; -use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::address::RACK_PREFIX; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::ByteCount; @@ -41,6 +40,7 @@ use omicron_common::api::external::Generation; use omicron_common::disk::DatasetConfig; use omicron_common::disk::DiskIdentity; use omicron_common::disk::DiskVariant; +use omicron_common::policy::NEXUS_REDUNDANCY; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; @@ -329,6 +329,7 @@ impl SystemDescription { target_cockroachdb_zone_count: self.target_cockroachdb_zone_count, target_cockroachdb_cluster_version: self .target_cockroachdb_cluster_version, + clickhouse_policy: None, }; let mut builder = PlanningInputBuilder::new( policy, diff --git a/nexus/reconfigurator/preparation/src/lib.rs b/nexus/reconfigurator/preparation/src/lib.rs index 1be25f6c6c..00cdb45594 100644 --- a/nexus/reconfigurator/preparation/src/lib.rs +++ b/nexus/reconfigurator/preparation/src/lib.rs @@ -33,14 +33,14 @@ use nexus_types::identity::Resource; use nexus_types::inventory::Collection; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; -use omicron_common::address::BOUNDARY_NTP_REDUNDANCY; -use omicron_common::address::COCKROACHDB_REDUNDANCY; -use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::Error; use omicron_common::api::external::LookupType; use omicron_common::disk::DatasetConfig; use omicron_common::disk::DiskIdentity; +use omicron_common::policy::BOUNDARY_NTP_REDUNDANCY; +use omicron_common::policy::COCKROACHDB_REDUNDANCY; +use omicron_common::policy::NEXUS_REDUNDANCY; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::PhysicalDiskUuid; @@ -84,6 +84,7 @@ impl PlanningInputFromDb<'_> { target_cockroachdb_zone_count: self.target_cockroachdb_zone_count, target_cockroachdb_cluster_version: self .target_cockroachdb_cluster_version, + clickhouse_policy: None, }; let mut builder = PlanningInputBuilder::new( policy, diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index 850e63443a..37c276fa07 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -108,6 +108,8 @@ use super::tasks::phantom_disks; use super::tasks::physical_disk_adoption; use super::tasks::region_replacement; use super::tasks::region_replacement_driver; +use super::tasks::region_snapshot_replacement_garbage_collect::*; +use super::tasks::region_snapshot_replacement_start::*; use super::tasks::saga_recovery; use super::tasks::service_firewall_rules; use super::tasks::sync_service_zone_nat::ServiceZoneNatTracker; @@ -161,6 +163,8 @@ pub struct BackgroundTasks { pub task_vpc_route_manager: Activator, pub task_saga_recovery: Activator, pub task_lookup_region_port: Activator, + pub task_region_snapshot_replacement_start: Activator, + pub task_region_snapshot_replacement_garbage_collection: Activator, // Handles to activate background tasks that do not get used by Nexus // at-large. These background tasks are implementation details as far as @@ -242,6 +246,9 @@ impl BackgroundTasksInitializer { task_vpc_route_manager: Activator::new(), task_saga_recovery: Activator::new(), task_lookup_region_port: Activator::new(), + task_region_snapshot_replacement_start: Activator::new(), + task_region_snapshot_replacement_garbage_collection: Activator::new( + ), task_internal_dns_propagation: Activator::new(), task_external_dns_propagation: Activator::new(), @@ -303,6 +310,8 @@ impl BackgroundTasksInitializer { task_vpc_route_manager, task_saga_recovery, task_lookup_region_port, + task_region_snapshot_replacement_start, + task_region_snapshot_replacement_garbage_collection, // Add new background tasks here. Be sure to use this binding in a // call to `Driver::register()` below. That's what actually wires // up the Activator to the corresponding background task. @@ -439,7 +448,8 @@ impl BackgroundTasksInitializer { datastore.clone(), resolver.clone(), rx_blueprint.clone(), - nexus_id.to_string(), + nexus_id, + task_saga_recovery.clone(), ); let rx_blueprint_exec = blueprint_executor.watcher(); driver.register(TaskDefinition { @@ -721,13 +731,44 @@ impl BackgroundTasksInitializer { description: "fill in missing ports for region records", period: config.lookup_region_port.period_secs, task_impl: Box::new(lookup_region_port::LookupRegionPort::new( - datastore, + datastore.clone(), )), opctx: opctx.child(BTreeMap::new()), watchers: vec![], activator: task_lookup_region_port, }); + driver.register(TaskDefinition { + name: "region_snapshot_replacement_start", + description: + "detect if region snapshots need replacement and begin the \ + process", + period: config.region_snapshot_replacement_start.period_secs, + task_impl: Box::new(RegionSnapshotReplacementDetector::new( + datastore.clone(), + sagas.clone(), + )), + opctx: opctx.child(BTreeMap::new()), + watchers: vec![], + activator: task_region_snapshot_replacement_start, + }); + + driver.register(TaskDefinition { + name: "region_snapshot_replacement_garbage_collection", + description: + "clean up all region snapshot replacement step volumes", + period: config + .region_snapshot_replacement_garbage_collection + .period_secs, + task_impl: Box::new(RegionSnapshotReplacementGarbageCollect::new( + datastore, + sagas.clone(), + )), + opctx: opctx.child(BTreeMap::new()), + watchers: vec![], + activator: task_region_snapshot_replacement_garbage_collection, + }); + driver } } diff --git a/nexus/src/app/background/tasks/blueprint_execution.rs b/nexus/src/app/background/tasks/blueprint_execution.rs index 415372031d..bfaca2e4ec 100644 --- a/nexus/src/app/background/tasks/blueprint_execution.rs +++ b/nexus/src/app/background/tasks/blueprint_execution.rs @@ -4,16 +4,18 @@ //! Background task for realizing a plan blueprint -use crate::app::background::BackgroundTask; +use crate::app::background::{Activator, BackgroundTask}; use futures::future::BoxFuture; use futures::FutureExt; use internal_dns::resolver::Resolver; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; +use nexus_reconfigurator_execution::RealizeBlueprintOutput; use nexus_types::deployment::{Blueprint, BlueprintTarget}; use serde_json::json; use std::sync::Arc; use tokio::sync::watch; +use uuid::Uuid; /// Background task that takes a [`Blueprint`] and realizes the change to /// the state of the system based on the `Blueprint`. @@ -21,8 +23,9 @@ pub struct BlueprintExecutor { datastore: Arc, resolver: Resolver, rx_blueprint: watch::Receiver>>, - nexus_label: String, + nexus_id: Uuid, tx: watch::Sender, + saga_recovery: Activator, } impl BlueprintExecutor { @@ -32,10 +35,18 @@ impl BlueprintExecutor { rx_blueprint: watch::Receiver< Option>, >, - nexus_label: String, + nexus_id: Uuid, + saga_recovery: Activator, ) -> BlueprintExecutor { let (tx, _) = watch::channel(0); - BlueprintExecutor { datastore, resolver, rx_blueprint, nexus_label, tx } + BlueprintExecutor { + datastore, + resolver, + rx_blueprint, + nexus_id, + tx, + saga_recovery, + } } pub fn watcher(&self) -> watch::Receiver { @@ -81,7 +92,7 @@ impl BlueprintExecutor { &self.datastore, &self.resolver, blueprint, - &self.nexus_label, + self.nexus_id, ) .await; @@ -90,7 +101,19 @@ impl BlueprintExecutor { // Return the result as a `serde_json::Value` match result { - Ok(()) => json!({}), + Ok(RealizeBlueprintOutput { needs_saga_recovery }) => { + // If executing the blueprint requires activating the saga + // recovery background task, do that now. + if needs_saga_recovery { + info!(&opctx.log, "activating saga recovery task"); + self.saga_recovery.activate(); + } + + json!({ + "target_id": blueprint.id.to_string(), + "needs_saga_recovery": needs_saga_recovery, + }) + } Err(errors) => { let errors: Vec<_> = errors.into_iter().map(|e| format!("{:#}", e)).collect(); @@ -115,7 +138,7 @@ impl BackgroundTask for BlueprintExecutor { #[cfg(test)] mod test { use super::BlueprintExecutor; - use crate::app::background::BackgroundTask; + use crate::app::background::{Activator, BackgroundTask}; use httptest::matchers::{all_of, request}; use httptest::responders::status_code; use httptest::Expectation; @@ -264,7 +287,8 @@ mod test { datastore.clone(), resolver.clone(), blueprint_rx, - String::from("test-suite"), + Uuid::new_v4(), + Activator::new(), ); // Now we're ready. @@ -288,10 +312,17 @@ mod test { ) .await, ); + let blueprint_id = blueprint.1.id; blueprint_tx.send(Some(blueprint)).unwrap(); let value = task.activate(&opctx).await; println!("activating with no zones: {:?}", value); - assert_eq!(value, json!({})); + assert_eq!( + value, + json!({ + "target_id": blueprint_id, + "needs_saga_recovery": false, + }) + ); // Create a non-empty blueprint describing two servers and verify that // the task correctly winds up making requests to both of them and @@ -380,7 +411,13 @@ mod test { // Activate the task to trigger zone configuration on the sled-agents let value = task.activate(&opctx).await; println!("activating two sled agents: {:?}", value); - assert_eq!(value, json!({})); + assert_eq!( + value, + json!({ + "target_id": blueprint.1.id.to_string(), + "needs_saga_recovery": false, + }) + ); s1.verify_and_clear(); s2.verify_and_clear(); diff --git a/nexus/src/app/background/tasks/lookup_region_port.rs b/nexus/src/app/background/tasks/lookup_region_port.rs index fbfc5c5af2..df501fe6b1 100644 --- a/nexus/src/app/background/tasks/lookup_region_port.rs +++ b/nexus/src/app/background/tasks/lookup_region_port.rs @@ -53,7 +53,6 @@ impl BackgroundTask for LookupRegionPort { ) -> BoxFuture<'a, serde_json::Value> { async { let log = &opctx.log; - info!(&log, "lookup region port task started"); let mut status = LookupRegionPortStatus::default(); @@ -147,8 +146,6 @@ impl BackgroundTask for LookupRegionPort { } } - info!(&log, "lookup region port task done"); - json!(status) } .boxed() diff --git a/nexus/src/app/background/tasks/mod.rs b/nexus/src/app/background/tasks/mod.rs index fe041a6daa..7ba68d0b80 100644 --- a/nexus/src/app/background/tasks/mod.rs +++ b/nexus/src/app/background/tasks/mod.rs @@ -25,6 +25,8 @@ pub mod phantom_disks; pub mod physical_disk_adoption; pub mod region_replacement; pub mod region_replacement_driver; +pub mod region_snapshot_replacement_garbage_collect; +pub mod region_snapshot_replacement_start; pub mod saga_recovery; pub mod service_firewall_rules; pub mod sync_service_zone_nat; diff --git a/nexus/src/app/background/tasks/phantom_disks.rs b/nexus/src/app/background/tasks/phantom_disks.rs index 4b0d8bec38..7f3fceab1c 100644 --- a/nexus/src/app/background/tasks/phantom_disks.rs +++ b/nexus/src/app/background/tasks/phantom_disks.rs @@ -43,7 +43,6 @@ impl BackgroundTask for PhantomDiskDetector { ) -> BoxFuture<'a, serde_json::Value> { async { let log = &opctx.log; - warn!(&log, "phantom disk task started"); let phantom_disks = match self.datastore.find_phantom_disks().await { @@ -83,14 +82,13 @@ impl BackgroundTask for PhantomDiskDetector { } else { info!( &log, - "phandom disk {} un-deleted andset to faulted ok", + "phandom disk {} un-deleted and set to faulted ok", disk.id(), ); phantom_disk_deleted_ok += 1; } } - warn!(&log, "phantom disk task done"); json!({ "phantom_disk_deleted_ok": phantom_disk_deleted_ok, "phantom_disk_deleted_err": phantom_disk_deleted_err, diff --git a/nexus/src/app/background/tasks/physical_disk_adoption.rs b/nexus/src/app/background/tasks/physical_disk_adoption.rs index f3b9e8ac62..b1eceed0b6 100644 --- a/nexus/src/app/background/tasks/physical_disk_adoption.rs +++ b/nexus/src/app/background/tasks/physical_disk_adoption.rs @@ -96,8 +96,6 @@ impl BackgroundTask for PhysicalDiskAdoption { } let mut disks_added = 0; - let log = &opctx.log; - warn!(&log, "physical disk adoption task started"); let collection_id = *self.rx_inventory_collection.borrow(); let Some(collection_id) = collection_id else { @@ -171,7 +169,6 @@ impl BackgroundTask for PhysicalDiskAdoption { ); } - warn!(&log, "physical disk adoption task done"); json!({ "physical_disks_added": disks_added, }) diff --git a/nexus/src/app/background/tasks/region_replacement.rs b/nexus/src/app/background/tasks/region_replacement.rs index f852f21734..ba0e7f86fb 100644 --- a/nexus/src/app/background/tasks/region_replacement.rs +++ b/nexus/src/app/background/tasks/region_replacement.rs @@ -61,7 +61,6 @@ impl BackgroundTask for RegionReplacementDetector { ) -> BoxFuture<'a, serde_json::Value> { async { let log = &opctx.log; - warn!(&log, "region replacement task started"); let mut ok = 0; let mut err = 0; @@ -182,8 +181,6 @@ impl BackgroundTask for RegionReplacementDetector { } } - warn!(&log, "region replacement task done"); - json!({ "region_replacement_started_ok": ok, "region_replacement_started_err": err, diff --git a/nexus/src/app/background/tasks/region_replacement_driver.rs b/nexus/src/app/background/tasks/region_replacement_driver.rs index 284ed2c368..02db86eab3 100644 --- a/nexus/src/app/background/tasks/region_replacement_driver.rs +++ b/nexus/src/app/background/tasks/region_replacement_driver.rs @@ -227,16 +227,11 @@ impl BackgroundTask for RegionReplacementDriver { opctx: &'a OpContext, ) -> BoxFuture<'a, serde_json::Value> { async { - let log = &opctx.log; - info!(&log, "region replacement driver task started"); - let mut status = RegionReplacementDriverStatus::default(); self.drive_running_replacements_forward(opctx, &mut status).await; self.complete_done_replacements(opctx, &mut status).await; - info!(&log, "region replacement driver task done"); - json!(status) } .boxed() diff --git a/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs b/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs new file mode 100644 index 0000000000..77dc87c060 --- /dev/null +++ b/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs @@ -0,0 +1,254 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background task for deleting volumes that stash a replaced region snapshot + +use crate::app::authn; +use crate::app::background::BackgroundTask; +use crate::app::saga::StartSaga; +use crate::app::sagas; +use crate::app::sagas::region_snapshot_replacement_garbage_collect::*; +use crate::app::sagas::NexusSaga; +use futures::future::BoxFuture; +use futures::FutureExt; +use nexus_db_model::RegionSnapshotReplacement; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use nexus_types::internal_api::background::RegionSnapshotReplacementGarbageCollectStatus; +use serde_json::json; +use std::sync::Arc; + +pub struct RegionSnapshotReplacementGarbageCollect { + datastore: Arc, + sagas: Arc, +} + +impl RegionSnapshotReplacementGarbageCollect { + pub fn new(datastore: Arc, sagas: Arc) -> Self { + RegionSnapshotReplacementGarbageCollect { datastore, sagas } + } + + async fn send_garbage_collect_request( + &self, + opctx: &OpContext, + request: RegionSnapshotReplacement, + ) -> Result<(), omicron_common::api::external::Error> { + let Some(old_snapshot_volume_id) = request.old_snapshot_volume_id + else { + // This state is illegal! + let s = format!( + "request {} old snapshot volume id is None!", + request.id, + ); + + return Err(omicron_common::api::external::Error::internal_error( + &s, + )); + }; + + let params = + sagas::region_snapshot_replacement_garbage_collect::Params { + serialized_authn: authn::saga::Serialized::for_opctx(opctx), + old_snapshot_volume_id, + request, + }; + + let saga_dag = + SagaRegionSnapshotReplacementGarbageCollect::prepare(¶ms)?; + self.sagas.saga_start(saga_dag).await + } + + async fn clean_up_region_snapshot_replacement_volumes( + &self, + opctx: &OpContext, + status: &mut RegionSnapshotReplacementGarbageCollectStatus, + ) { + let log = &opctx.log; + + let requests = match self + .datastore + .get_replacement_done_region_snapshot_replacements(opctx) + .await + { + Ok(requests) => requests, + + Err(e) => { + let s = format!("querying for requests to collect failed! {e}"); + error!(&log, "{s}"); + status.errors.push(s); + return; + } + }; + + for request in requests { + let request_id = request.id; + + let result = + self.send_garbage_collect_request(opctx, request.clone()).await; + + match result { + Ok(()) => { + let s = format!( + "region snapshot replacement garbage collect request \ + ok for {request_id}" + ); + + info!( + &log, + "{s}"; + "request.snapshot_id" => %request.old_snapshot_id, + "request.region_id" => %request.old_region_id, + "request.dataset_id" => %request.old_dataset_id, + ); + status.garbage_collect_requested.push(s); + } + + Err(e) => { + let s = format!( + "sending region snapshot replacement garbage collect \ + request failed: {e}", + ); + error!( + &log, + "{s}"; + "request.snapshot_id" => %request.old_snapshot_id, + "request.region_id" => %request.old_region_id, + "request.dataset_id" => %request.old_dataset_id, + ); + status.errors.push(s); + } + } + } + } +} + +impl BackgroundTask for RegionSnapshotReplacementGarbageCollect { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + async move { + let mut status = + RegionSnapshotReplacementGarbageCollectStatus::default(); + + self.clean_up_region_snapshot_replacement_volumes( + opctx, + &mut status, + ) + .await; + + json!(status) + } + .boxed() + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::app::background::init::test::NoopStartSaga; + use nexus_db_model::RegionSnapshotReplacement; + use nexus_db_model::RegionSnapshotReplacementState; + use nexus_test_utils_macros::nexus_test; + use uuid::Uuid; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + #[nexus_test(server = crate::Server)] + async fn test_region_snapshot_replacement_garbage_collect_task( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + let starter = Arc::new(NoopStartSaga::new()); + let mut task = RegionSnapshotReplacementGarbageCollect::new( + datastore.clone(), + starter.clone(), + ); + + // Noop test + let result: RegionSnapshotReplacementGarbageCollectStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + assert_eq!( + result, + RegionSnapshotReplacementGarbageCollectStatus::default() + ); + assert_eq!(starter.count_reset(), 0); + + // Add two region snapshot requests that need garbage collection + + let mut request = RegionSnapshotReplacement::new( + Uuid::new_v4(), + Uuid::new_v4(), + Uuid::new_v4(), + ); + request.replacement_state = + RegionSnapshotReplacementState::ReplacementDone; + request.old_snapshot_volume_id = Some(Uuid::new_v4()); + + let request_1_id = request.id; + + datastore + .insert_region_snapshot_replacement_request_with_volume_id( + &opctx, + request, + Uuid::new_v4(), + ) + .await + .unwrap(); + + let mut request = RegionSnapshotReplacement::new( + Uuid::new_v4(), + Uuid::new_v4(), + Uuid::new_v4(), + ); + request.replacement_state = + RegionSnapshotReplacementState::ReplacementDone; + request.old_snapshot_volume_id = Some(Uuid::new_v4()); + + let request_2_id = request.id; + + datastore + .insert_region_snapshot_replacement_request_with_volume_id( + &opctx, + request, + Uuid::new_v4(), + ) + .await + .unwrap(); + + // Activate the task - it should pick up the two requests + + let result: RegionSnapshotReplacementGarbageCollectStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + + for error in &result.errors { + eprintln!("{error}"); + } + + assert_eq!(result.garbage_collect_requested.len(), 2); + + let s = format!( + "region snapshot replacement garbage collect request ok for \ + {request_1_id}" + ); + assert!(result.garbage_collect_requested.contains(&s)); + + let s = format!( + "region snapshot replacement garbage collect request ok for \ + {request_2_id}" + ); + assert!(result.garbage_collect_requested.contains(&s)); + + assert_eq!(result.errors.len(), 0); + + assert_eq!(starter.count_reset(), 2); + } +} diff --git a/nexus/src/app/background/tasks/region_snapshot_replacement_start.rs b/nexus/src/app/background/tasks/region_snapshot_replacement_start.rs new file mode 100644 index 0000000000..1fdc17690d --- /dev/null +++ b/nexus/src/app/background/tasks/region_snapshot_replacement_start.rs @@ -0,0 +1,507 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background task for detecting region snapshots that need replacing and +//! beginning that process +//! +//! This task's responsibility is to create region snapshot replacement requests +//! when physical disks are expunged, and trigger the region snapshot +//! replacement start saga for any requests that are in state "Requested". See +//! the documentation in that saga's docstring for more information. + +use crate::app::authn; +use crate::app::background::BackgroundTask; +use crate::app::saga::StartSaga; +use crate::app::sagas; +use crate::app::sagas::region_snapshot_replacement_start::*; +use crate::app::sagas::NexusSaga; +use crate::app::RegionAllocationStrategy; +use futures::future::BoxFuture; +use futures::FutureExt; +use nexus_db_model::RegionSnapshotReplacement; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use nexus_types::internal_api::background::RegionSnapshotReplacementStartStatus; +use serde_json::json; +use std::sync::Arc; + +pub struct RegionSnapshotReplacementDetector { + datastore: Arc, + sagas: Arc, +} + +impl RegionSnapshotReplacementDetector { + pub fn new(datastore: Arc, sagas: Arc) -> Self { + RegionSnapshotReplacementDetector { datastore, sagas } + } + + async fn send_start_request( + &self, + serialized_authn: authn::saga::Serialized, + request: RegionSnapshotReplacement, + ) -> Result<(), omicron_common::api::external::Error> { + let params = sagas::region_snapshot_replacement_start::Params { + serialized_authn, + request, + allocation_strategy: + RegionAllocationStrategy::RandomWithDistinctSleds { seed: None }, + }; + + let saga_dag = SagaRegionSnapshotReplacementStart::prepare(¶ms)?; + self.sagas.saga_start(saga_dag).await + } + + /// Find region snapshots on expunged physical disks and create region + /// snapshot replacement requests for them. + async fn create_requests_for_region_snapshots_on_expunged_disks( + &self, + opctx: &OpContext, + status: &mut RegionSnapshotReplacementStartStatus, + ) { + let log = &opctx.log; + + // Find region snapshots on expunged physical disks + let region_snapshots_to_be_replaced = match self + .datastore + .find_region_snapshots_on_expunged_physical_disks(opctx) + .await + { + Ok(region_snapshots) => region_snapshots, + + Err(e) => { + let s = format!( + "find_region_snapshots_on_expunged_physical_disks \ + failed: {e}", + ); + + error!(&log, "{s}"); + status.errors.push(s); + return; + } + }; + + for region_snapshot in region_snapshots_to_be_replaced { + // If no request exists yet, create one. + let existing_request = match self + .datastore + .lookup_region_snapshot_replacement_request( + opctx, + ®ion_snapshot, + ) + .await + { + Ok(existing_request) => existing_request, + + Err(e) => { + let s = + format!("error looking up replacement request: {e}"); + + error!( + &log, + "{s}"; + "snapshot_id" => %region_snapshot.snapshot_id, + "region_id" => %region_snapshot.region_id, + "dataset_id" => %region_snapshot.dataset_id, + ); + status.errors.push(s); + continue; + } + }; + + if existing_request.is_none() { + match self + .datastore + .create_region_snapshot_replacement_request( + opctx, + ®ion_snapshot, + ) + .await + { + Ok(request_id) => { + let s = format!( + "created region snapshot replacement request \ + {request_id}" + ); + + info!( + &log, + "{s}"; + "snapshot_id" => %region_snapshot.snapshot_id, + "region_id" => %region_snapshot.region_id, + "dataset_id" => %region_snapshot.dataset_id, + ); + status.requests_created_ok.push(s); + } + + Err(e) => { + let s = + format!("error creating replacement request: {e}"); + + error!( + &log, + "{s}"; + "snapshot_id" => %region_snapshot.snapshot_id, + "region_id" => %region_snapshot.region_id, + "dataset_id" => %region_snapshot.dataset_id, + ); + status.errors.push(s); + } + } + } + } + } + + /// For each region snapshot replacement request in state "Requested", run + /// the start saga. + async fn start_requested_region_snapshot_replacements( + &self, + opctx: &OpContext, + status: &mut RegionSnapshotReplacementStartStatus, + ) { + let log = &opctx.log; + + let requests = match self + .datastore + .get_requested_region_snapshot_replacements(opctx) + .await + { + Ok(requests) => requests, + + Err(e) => { + let s = format!( + "query for region snapshot replacement requests failed: {e}" + ); + + error!(&log, "{s}"); + status.errors.push(s); + return; + } + }; + + for request in requests { + let request_id = request.id; + + let result = self + .send_start_request( + authn::saga::Serialized::for_opctx(opctx), + request.clone(), + ) + .await; + + match result { + Ok(()) => { + let s = format!( + "region snapshot replacement start invoked ok for \ + {request_id}" + ); + + info!( + &log, + "{s}"; + "request.snapshot_id" => %request.old_snapshot_id, + "request.region_id" => %request.old_region_id, + "request.dataset_id" => %request.old_dataset_id, + ); + status.start_invoked_ok.push(s); + } + + Err(e) => { + let s = format!( + "invoking region snapshot replacement start for \ + {request_id} failed: {e}", + ); + + error!( + &log, + "{s}"; + "request.snapshot_id" => %request.old_snapshot_id, + "request.region_id" => %request.old_region_id, + "request.dataset_id" => %request.old_dataset_id, + ); + status.errors.push(s); + } + } + } + } +} + +impl BackgroundTask for RegionSnapshotReplacementDetector { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + async { + let mut status = RegionSnapshotReplacementStartStatus::default(); + + self.create_requests_for_region_snapshots_on_expunged_disks( + opctx, + &mut status, + ) + .await; + + self.start_requested_region_snapshot_replacements( + opctx, + &mut status, + ) + .await; + + json!(status) + } + .boxed() + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::app::background::init::test::NoopStartSaga; + use crate::app::MIN_DISK_SIZE_BYTES; + use chrono::Utc; + use nexus_db_model::BlockSize; + use nexus_db_model::Generation; + use nexus_db_model::PhysicalDiskPolicy; + use nexus_db_model::RegionSnapshot; + use nexus_db_model::RegionSnapshotReplacement; + use nexus_db_model::Snapshot; + use nexus_db_model::SnapshotIdentity; + use nexus_db_model::SnapshotState; + use nexus_db_queries::authz; + use nexus_db_queries::db::lookup::LookupPath; + use nexus_test_utils::resource_helpers::create_project; + use nexus_test_utils_macros::nexus_test; + use omicron_common::api::external; + use omicron_uuid_kinds::GenericUuid; + use std::collections::BTreeMap; + use uuid::Uuid; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + type DiskTest<'a> = + nexus_test_utils::resource_helpers::DiskTest<'a, crate::Server>; + + #[nexus_test(server = crate::Server)] + async fn test_add_region_snapshot_replacement_causes_start( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + let starter = Arc::new(NoopStartSaga::new()); + let mut task = RegionSnapshotReplacementDetector::new( + datastore.clone(), + starter.clone(), + ); + + // Noop test + let result: RegionSnapshotReplacementStartStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + assert_eq!(result, RegionSnapshotReplacementStartStatus::default()); + assert_eq!(starter.count_reset(), 0); + + // Add a region snapshot replacement request for a fake region snapshot + + let request = RegionSnapshotReplacement::new( + Uuid::new_v4(), // dataset id + Uuid::new_v4(), // region id + Uuid::new_v4(), // snapshot id + ); + + let request_id = request.id; + + datastore + .insert_region_snapshot_replacement_request_with_volume_id( + &opctx, + request, + Uuid::new_v4(), + ) + .await + .unwrap(); + + // Activate the task - it should pick that up and try to run the + // region snapshot replacement start saga + let result: RegionSnapshotReplacementStartStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + + assert_eq!( + result, + RegionSnapshotReplacementStartStatus { + requests_created_ok: vec![], + start_invoked_ok: vec![format!( + "region snapshot replacement start invoked ok for \ + {request_id}" + )], + errors: vec![], + }, + ); + + assert_eq!(starter.count_reset(), 1); + } + + #[nexus_test(server = crate::Server)] + async fn test_expunge_disk_causes_region_snapshot_replacement_start( + cptestctx: &ControlPlaneTestContext, + ) { + let disk_test = DiskTest::new(cptestctx).await; + + let client = &cptestctx.external_client; + let project = create_project(&client, "testing").await; + let project_id = project.identity.id; + + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + let starter = Arc::new(NoopStartSaga::new()); + let mut task = RegionSnapshotReplacementDetector::new( + datastore.clone(), + starter.clone(), + ); + + // Noop test + let result: RegionSnapshotReplacementStartStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + assert_eq!(result, RegionSnapshotReplacementStartStatus::default()); + assert_eq!(starter.count_reset(), 0); + + // Add three region snapshots for each dataset + + let region_id = Uuid::new_v4(); + let snapshot_id = Uuid::new_v4(); + let mut dataset_to_zpool: BTreeMap = + BTreeMap::default(); + + for zpool in disk_test.zpools() { + for dataset in &zpool.datasets { + dataset_to_zpool + .insert(zpool.id.to_string(), dataset.id.to_string()); + + datastore + .region_snapshot_create(RegionSnapshot::new( + dataset.id, + region_id, + snapshot_id, + String::from("[fd00:1122:3344::101]:12345"), + )) + .await + .unwrap(); + } + } + + // Create the fake snapshot + + let (.., authz_project) = LookupPath::new(&opctx, &datastore) + .project_id(project_id) + .lookup_for(authz::Action::CreateChild) + .await + .unwrap(); + + datastore + .project_ensure_snapshot( + &opctx, + &authz_project, + Snapshot { + identity: SnapshotIdentity { + id: snapshot_id, + name: external::Name::try_from("snapshot".to_string()) + .unwrap() + .into(), + description: "snapshot".into(), + + time_created: Utc::now(), + time_modified: Utc::now(), + time_deleted: None, + }, + + project_id, + disk_id: Uuid::new_v4(), + volume_id: Uuid::new_v4(), + destination_volume_id: Uuid::new_v4(), + + gen: Generation::new(), + state: SnapshotState::Creating, + block_size: BlockSize::AdvancedFormat, + + size: external::ByteCount::try_from(MIN_DISK_SIZE_BYTES) + .unwrap() + .into(), + }, + ) + .await + .unwrap(); + + // Expunge one of the physical disks + + let first_zpool = + disk_test.zpools().next().expect("Expected at least one zpool"); + + let (_, db_zpool) = LookupPath::new(&opctx, datastore) + .zpool_id(first_zpool.id.into_untyped_uuid()) + .fetch() + .await + .unwrap(); + + datastore + .physical_disk_update_policy( + &opctx, + db_zpool.physical_disk_id, + PhysicalDiskPolicy::Expunged, + ) + .await + .unwrap(); + + // Activate the task - it should pick that up and try to run the region + // snapshot replacement start saga for the region snapshot on that + // expunged disk + + let result: RegionSnapshotReplacementStartStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + + eprintln!("{:?}", &result); + + assert_eq!(result.requests_created_ok.len(), 1); + assert_eq!(result.start_invoked_ok.len(), 1); + assert!(result.errors.is_empty()); + + // The last part of the message is the region snapshot replacement + // request id + let request_created_uuid: Uuid = result.requests_created_ok[0] + .split(" ") + .last() + .unwrap() + .parse() + .unwrap(); + let request_started_uuid: Uuid = result.start_invoked_ok[0] + .split(" ") + .last() + .unwrap() + .parse() + .unwrap(); + + assert_eq!(request_created_uuid, request_started_uuid); + + assert_eq!(starter.count_reset(), 1); + + let request = datastore + .get_region_snapshot_replacement_request_by_id( + &opctx, + request_created_uuid, + ) + .await + .unwrap(); + + assert_eq!(request.old_snapshot_id, snapshot_id); + assert_eq!(request.old_region_id, region_id); + + let dataset_id = + dataset_to_zpool.get(&first_zpool.id.to_string()).unwrap(); + assert_eq!(&request.old_dataset_id.to_string(), dataset_id); + } +} diff --git a/nexus/src/app/background/tasks/sync_service_zone_nat.rs b/nexus/src/app/background/tasks/sync_service_zone_nat.rs index eb9554cff7..4fbef3ae2e 100644 --- a/nexus/src/app/background/tasks/sync_service_zone_nat.rs +++ b/nexus/src/app/background/tasks/sync_service_zone_nat.rs @@ -239,6 +239,7 @@ impl BackgroundTask for ServiceZoneNatTracker { // well OmicronZoneType::Clickhouse {..} => continue, OmicronZoneType::ClickhouseKeeper {..} => continue, + OmicronZoneType::ClickhouseServer{..} => continue, OmicronZoneType::CockroachDb {..} => continue, OmicronZoneType::Crucible {..} => continue, OmicronZoneType::CruciblePantry {..} => continue, diff --git a/nexus/src/app/background/tasks/sync_switch_configuration.rs b/nexus/src/app/background/tasks/sync_switch_configuration.rs index 20a12d1127..f86bb1a782 100644 --- a/nexus/src/app/background/tasks/sync_switch_configuration.rs +++ b/nexus/src/app/background/tasks/sync_switch_configuration.rs @@ -51,8 +51,9 @@ use omicron_common::{ use serde_json::json; use sled_agent_client::types::{ BgpConfig as SledBgpConfig, BgpPeerConfig as SledBgpPeerConfig, - EarlyNetworkConfig, EarlyNetworkConfigBody, HostPortConfig, PortConfigV2, - RackNetworkConfigV2, RouteConfig as SledRouteConfig, UplinkAddressConfig, + EarlyNetworkConfig, EarlyNetworkConfigBody, HostPortConfig, + LldpAdminStatus, LldpPortConfig, PortConfigV2, RackNetworkConfigV2, + RouteConfig as SledRouteConfig, UplinkAddressConfig, }; use std::{ collections::{hash_map::Entry, HashMap, HashSet}, @@ -564,7 +565,7 @@ impl BackgroundTask for SwitchPortSettingsManager { if !bgp_announce_prefixes.contains_key(&bgp_config.bgp_announce_set_id) { let announcements = match self .datastore - .bgp_announce_list( + .bgp_announcement_list( opctx, ¶ms::BgpAnnounceSetSelector { name_or_id: bgp_config @@ -977,6 +978,7 @@ impl BackgroundTask for SwitchPortSettingsManager { destination: r.dst.into(), nexthop: r.gw.ip(), vlan_id: r.vid.map(|x| x.0), + local_pref: r.local_pref.map(|x| x.0), }) .collect(), switch: *location, @@ -992,7 +994,23 @@ impl BackgroundTask for SwitchPortSettingsManager { .map(|l| l.speed) .unwrap_or(SwitchLinkSpeed::Speed100G) .into(), - }; + lldp: info + .link_lldp + .get(0) //TODO https://github.com/oxidecomputer/omicron/issues/3062 + .map(|c| LldpPortConfig { + status: match c.enabled { + true => LldpAdminStatus::Enabled, + false=> LldpAdminStatus::Disabled, + }, + port_id: c.link_name.clone(), + port_description: c.link_description.clone(), + chassis_id: c.chassis_id.clone(), + system_name: c.system_name.clone(), + system_description: c.system_description.clone(), + management_addrs:c.management_ip.map(|a| vec![a.ip()]), + }) + } + ; for peer in port_config.bgp_peers.iter_mut() { peer.communities = match self @@ -1411,6 +1429,29 @@ fn uplinks( let PortSettingsChange::Apply(config) = change else { continue; }; + + let lldp = if config.link_lldp.is_empty() { + None + } else { + let x = &config.link_lldp[0]; + Some(LldpPortConfig { + status: if x.enabled { + LldpAdminStatus::Enabled + } else { + LldpAdminStatus::Disabled + }, + port_id: x.link_name.clone(), + port_description: x.link_description.clone(), + chassis_id: x.chassis_id.clone(), + system_name: x.system_name.clone(), + system_description: x.system_description.clone(), + management_addrs: x.management_ip.map(|a| { + let ip: oxnet::IpNet = a.into(); + vec![ip.addr()] + }), + }) + }; + let config = HostPortConfig { port: port.port_name.clone(), addrs: config @@ -1421,6 +1462,7 @@ fn uplinks( vlan_id: a.vlan_id.map(|v| v.into()), }) .collect(), + lldp, }; match uplinks.entry(*location) { @@ -1455,7 +1497,8 @@ fn build_sled_agent_clients( sled_agent_clients } -type SwitchStaticRoutes = HashSet<(Ipv4Addr, Prefix4, Option)>; +type SwitchStaticRoutes = + HashSet<(Ipv4Addr, Prefix4, Option, Option)>; fn static_routes_to_del( current_static_routes: HashMap, @@ -1471,10 +1514,11 @@ fn static_routes_to_del( // if it's on the switch but not desired (in our db), it should be removed let stale_routes = routes_on_switch .difference(routes_wanted) - .map(|(nexthop, prefix, vlan_id)| StaticRoute4 { + .map(|(nexthop, prefix, vlan_id, local_pref)| StaticRoute4 { nexthop: *nexthop, prefix: *prefix, vlan_id: *vlan_id, + local_pref: *local_pref, }) .collect::>(); @@ -1488,10 +1532,11 @@ fn static_routes_to_del( // if no desired routes are present, all routes on this switch should be deleted let stale_routes = routes_on_switch .iter() - .map(|(nexthop, prefix, vlan_id)| StaticRoute4 { + .map(|(nexthop, prefix, vlan_id, local_pref)| StaticRoute4 { nexthop: *nexthop, prefix: *prefix, vlan_id: *vlan_id, + local_pref: *local_pref, }) .collect::>(); @@ -1538,10 +1583,11 @@ fn static_routes_to_add( }; let missing_routes = routes_wanted .difference(routes_on_switch) - .map(|(nexthop, prefix, vlan_id)| StaticRoute4 { + .map(|(nexthop, prefix, vlan_id, local_pref)| StaticRoute4 { nexthop: *nexthop, prefix: *prefix, vlan_id: *vlan_id, + local_pref: *local_pref, }) .collect::>(); @@ -1590,7 +1636,12 @@ fn static_routes_in_db( } IpAddr::V6(_) => continue, }; - routes.insert((nexthop, prefix, route.vid.map(|x| x.0))); + routes.insert(( + nexthop, + prefix, + route.vid.map(|x| x.0), + route.local_pref.map(|x| x.0), + )); } match routes_from_db.entry(*location) { @@ -1768,44 +1819,46 @@ async fn static_routes_on_switch<'a>( let mut routes_on_switch = HashMap::new(); for (location, client) in mgd_clients { - let static_routes: SwitchStaticRoutes = - match client.static_list_v4_routes().await { - Ok(routes) => { - let mut flattened = HashSet::new(); - for (destination, paths) in routes.iter() { - let Ok(dst) = destination.parse() else { - error!( - log, - "failed to parse static route destination: \ + let static_routes: SwitchStaticRoutes = match client + .static_list_v4_routes() + .await + { + Ok(routes) => { + let mut flattened = HashSet::new(); + for (destination, paths) in routes.iter() { + let Ok(dst) = destination.parse() else { + error!( + log, + "failed to parse static route destination: \ {destination}" - ); - continue; + ); + continue; + }; + for p in paths.iter() { + let nh = match p.nexthop { + IpAddr::V4(addr) => addr, + IpAddr::V6(addr) => { + error!( + log, + "ipv6 nexthops not supported: {addr}" + ); + continue; + } }; - for p in paths.iter() { - let nh = match p.nexthop { - IpAddr::V4(addr) => addr, - IpAddr::V6(addr) => { - error!( - log, - "ipv6 nexthops not supported: {addr}" - ); - continue; - } - }; - flattened.insert((nh, dst, p.vlan_id)); - } + flattened.insert((nh, dst, p.vlan_id, p.local_pref)); } - flattened } - Err(_) => { - error!( - &log, - "unable to retrieve routes from switch"; - "switch_location" => ?location, - ); - continue; - } - }; + flattened + } + Err(_) => { + error!( + &log, + "unable to retrieve routes from switch"; + "switch_location" => ?location, + ); + continue; + } + }; routes_on_switch.insert(*location, static_routes); } routes_on_switch diff --git a/nexus/src/app/bgp.rs b/nexus/src/app/bgp.rs index 118011500a..31a0faa663 100644 --- a/nexus/src/app/bgp.rs +++ b/nexus/src/app/bgp.rs @@ -9,19 +9,20 @@ use nexus_db_model::{BgpAnnounceSet, BgpAnnouncement, BgpConfig}; use nexus_db_queries::context::OpContext; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::{ - self, BgpImportedRouteIpv4, BgpMessageHistory, BgpPeerStatus, CreateResult, - DeleteResult, ListResultVec, LookupResult, NameOrId, SwitchBgpHistory, + self, BgpExported, BgpImportedRouteIpv4, BgpMessageHistory, BgpPeerStatus, + CreateResult, DeleteResult, ListResultVec, LookupResult, NameOrId, + SwitchBgpHistory, }; use std::net::IpAddr; impl super::Nexus { - pub async fn bgp_config_set( + pub async fn bgp_config_create( &self, opctx: &OpContext, config: ¶ms::BgpConfigCreate, ) -> CreateResult { opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; - let result = self.db_datastore.bgp_config_set(opctx, config).await?; + let result = self.db_datastore.bgp_config_create(opctx, config).await?; Ok(result) } @@ -68,13 +69,13 @@ impl super::Nexus { Ok(result) } - pub async fn bgp_announce_list( + pub async fn bgp_announce_set_list( &self, opctx: &OpContext, - sel: ¶ms::BgpAnnounceSetSelector, - ) -> ListResultVec { + pagparams: &PaginatedBy<'_>, + ) -> ListResultVec { opctx.authorize(authz::Action::Read, &authz::FLEET).await?; - self.db_datastore.bgp_announce_list(opctx, sel).await + self.db_datastore.bgp_announce_set_list(opctx, pagparams).await } pub async fn bgp_delete_announce_set( @@ -88,6 +89,15 @@ impl super::Nexus { Ok(result) } + pub async fn bgp_announcement_list( + &self, + opctx: &OpContext, + sel: ¶ms::BgpAnnounceSetSelector, + ) -> ListResultVec { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + self.db_datastore.bgp_announcement_list(opctx, sel).await + } + pub async fn bgp_peer_status( &self, opctx: &OpContext, @@ -145,6 +155,74 @@ impl super::Nexus { Ok(result) } + pub async fn bgp_exported( + &self, + opctx: &OpContext, + ) -> LookupResult { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + let mut result = BgpExported::default(); + for (switch, client) in &self.mg_clients().await.map_err(|e| { + external::Error::internal_error(&format!( + "failed to get mg clients: {e}" + )) + })? { + let router_info = match client.read_routers().await { + Ok(result) => result.into_inner(), + Err(e) => { + error!( + self.log, + "failed to get routers from {switch}: {e}" + ); + continue; + } + }; + for r in &router_info { + let asn = r.asn; + + let exported = match client + .get_exported(&mg_admin_client::types::AsnSelector { asn }) + .await + { + Ok(result) => result.into_inner(), + Err(e) => { + error!( + self.log, + "failed to get exports for asn {asn} from {switch}: {e}" + ); + continue; + } + }; + for (addr, exports) in exported { + let mut xps = Vec::new(); + for ex in exports.iter() { + let net = match ex { + mg_admin_client::types::Prefix::V4(v4) => { + oxnet::Ipv4Net::new_unchecked( + v4.value, v4.length, + ) + } + mg_admin_client::types::Prefix::V6(v6) => { + let v6 = oxnet::IpNet::V6( + oxnet::Ipv6Net::new_unchecked( + v6.value, v6.length, + ), + ); + warn!( + self.log, + "{v6}: ipv6 exports not supported yet" + ); + continue; + } + }; + xps.push(net); + } + result.exports.insert(addr.to_string(), xps); + } + } + } + Ok(result) + } + pub async fn bgp_message_history( &self, opctx: &OpContext, diff --git a/nexus/src/app/crucible.rs b/nexus/src/app/crucible.rs index b8fca26c14..86de328355 100644 --- a/nexus/src/app/crucible.rs +++ b/nexus/src/app/crucible.rs @@ -150,7 +150,7 @@ impl super::Nexus { } /// Call out to Crucible agent and perform region creation. Optionally, - /// supply a read-only source to invoke a clone. + /// supply a read-only source's repair address to invoke a clone. pub async fn ensure_region_in_dataset( &self, log: &Logger, diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index 1591690efa..640c14190e 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -17,9 +17,6 @@ use nexus_types::deployment::CockroachDbClusterVersion; use nexus_types::deployment::PlanningInput; use nexus_types::deployment::SledFilter; use nexus_types::inventory::Collection; -use omicron_common::address::BOUNDARY_NTP_REDUNDANCY; -use omicron_common::address::COCKROACHDB_REDUNDANCY; -use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::DeleteResult; @@ -28,6 +25,9 @@ use omicron_common::api::external::InternalContext; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; +use omicron_common::policy::BOUNDARY_NTP_REDUNDANCY; +use omicron_common::policy::COCKROACHDB_REDUNDANCY; +use omicron_common::policy::NEXUS_REDUNDANCY; use slog_error_chain::InlineErrorChain; use uuid::Uuid; diff --git a/nexus/src/app/external_dns.rs b/nexus/src/app/external_dns.rs index c6a8d833c2..4732146ce2 100644 --- a/nexus/src/app/external_dns.rs +++ b/nexus/src/app/external_dns.rs @@ -5,15 +5,15 @@ use std::net::IpAddr; use std::net::SocketAddr; +use hickory_resolver::config::NameServerConfig; +use hickory_resolver::config::Protocol; +use hickory_resolver::config::ResolverConfig; +use hickory_resolver::config::ResolverOpts; +use hickory_resolver::TokioAsyncResolver; use hyper::client::connect::dns::Name; use omicron_common::address::DNS_PORT; -use trust_dns_resolver::config::NameServerConfig; -use trust_dns_resolver::config::Protocol; -use trust_dns_resolver::config::ResolverConfig; -use trust_dns_resolver::config::ResolverOpts; -use trust_dns_resolver::TokioAsyncResolver; -/// Wrapper around trust-dns-resolver to provide name resolution +/// Wrapper around hickory-resolver to provide name resolution /// using a given set of DNS servers for use with reqwest. pub struct Resolver(TokioAsyncResolver); @@ -26,18 +26,17 @@ impl Resolver { socket_addr: SocketAddr::new(*addr, DNS_PORT), protocol: Protocol::Udp, tls_dns_name: None, - trust_nx_responses: false, + trust_negative_responses: false, bind_addr: None, }); } let mut opts = ResolverOpts::default(); + // Enable edns for potentially larger records + opts.edns0 = true; opts.use_hosts_file = false; // Do as many requests in parallel as we have configured servers opts.num_concurrent_reqs = dns_servers.len(); - Resolver( - TokioAsyncResolver::tokio(rc, opts) - .expect("creating resovler shouldn't fail"), - ) + Resolver(TokioAsyncResolver::tokio(rc, opts)) } } @@ -48,7 +47,7 @@ impl reqwest::dns::Resolve for Resolver { let ips = resolver.lookup_ip(name.as_str()).await?; let addrs = ips .into_iter() - // trust-dns-resolver returns `IpAddr`s but reqwest wants + // hickory-resolver returns `IpAddr`s but reqwest wants // `SocketAddr`s (useful if you have a custom resolver that // returns a scoped IPv6 address). The port provided here // is ignored in favour of the scheme default (http/80, diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 344d2688f7..3106ab9f2a 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -191,6 +191,14 @@ enum InstanceStartDisposition { AlreadyStarted, } +/// The set of API resources needed when ensuring that an instance is registered +/// on a sled. +pub(crate) struct InstanceEnsureRegisteredApiResources { + pub(crate) authz_silo: nexus_auth::authz::Silo, + pub(crate) authz_project: nexus_auth::authz::Project, + pub(crate) authz_instance: nexus_auth::authz::Instance, +} + impl super::Nexus { pub fn instance_lookup<'a>( &'a self, @@ -473,14 +481,16 @@ impl super::Nexus { Ok(()) } - pub(crate) async fn project_instance_migrate( + pub(crate) async fn instance_migrate( self: &Arc, opctx: &OpContext, - instance_lookup: &lookup::Instance<'_>, - params: params::InstanceMigrate, + id: InstanceUuid, + params: nexus_types::internal_api::params::InstanceMigrateRequest, ) -> UpdateResult { - let (.., authz_instance) = - instance_lookup.lookup_for(authz::Action::Modify).await?; + let (.., authz_instance) = LookupPath::new(&opctx, &self.db_datastore) + .instance_id(id.into_untyped_uuid()) + .lookup_for(authz::Action::Modify) + .await?; let state = self .db_datastore @@ -867,7 +877,11 @@ impl super::Nexus { pub(crate) async fn instance_ensure_registered( &self, opctx: &OpContext, - authz_instance: &authz::Instance, + InstanceEnsureRegisteredApiResources { + authz_silo, + authz_project, + authz_instance, + }: &InstanceEnsureRegisteredApiResources, db_instance: &db::model::Instance, propolis_id: &PropolisUuid, initial_vmm: &db::model::Vmm, @@ -1067,23 +1081,9 @@ impl super::Nexus { let ssh_keys: Vec = ssh_keys.map(|ssh_key| ssh_key.public_key).collect(); - // Construct instance metadata used to track its statistics. - // - // This requires another fetch on the silo and project, to extract their - // IDs. - let (.., db_project) = self - .project_lookup( - opctx, - params::ProjectSelector { - project: NameOrId::Id(db_instance.project_id), - }, - )? - .fetch() - .await?; - let (_, db_silo) = self.current_silo_lookup(opctx)?.fetch().await?; let metadata = sled_agent_client::types::InstanceMetadata { - silo_id: db_silo.id(), - project_id: db_project.id(), + silo_id: authz_silo.id(), + project_id: authz_project.id(), }; // Ask the sled agent to begin the state change. Then update the diff --git a/nexus/src/app/metrics.rs b/nexus/src/app/metrics.rs index 3728a3bdc1..3a6e7e27be 100644 --- a/nexus/src/app/metrics.rs +++ b/nexus/src/app/metrics.rs @@ -14,7 +14,7 @@ use nexus_db_queries::{ }; use omicron_common::api::external::{Error, InternalContext}; use oximeter_db::{ - oxql, Measurement, TimeseriesSchema, TimeseriesSchemaPaginationParams, + Measurement, TimeseriesSchema, TimeseriesSchemaPaginationParams, }; use std::num::NonZeroU32; @@ -138,7 +138,7 @@ impl super::Nexus { &self, opctx: &OpContext, query: impl AsRef, - ) -> Result, Error> { + ) -> Result, Error> { // Must be a fleet user to list timeseries schema. // // TODO-security: We need to figure out how to implement proper security diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 60ed611bd7..5cfacd0c9c 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -7,6 +7,7 @@ use self::external_endpoints::NexusCertResolver; use self::saga::SagaExecutor; use crate::app::background::BackgroundTasksData; +use crate::app::background::SagaRecoveryHelpers; use crate::app::oximeter::LazyTimeseriesClient; use crate::populate::populate_start; use crate::populate::PopulateArgs; @@ -19,6 +20,7 @@ use nexus_config::NexusConfig; use nexus_config::RegionAllocationStrategy; use nexus_config::Tunables; use nexus_config::UpdatesConfig; +use nexus_db_model::AllSchemaVersions; use nexus_db_queries::authn; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; @@ -35,6 +37,7 @@ use std::net::SocketAddrV6; use std::net::{IpAddr, Ipv6Addr}; use std::sync::Arc; use std::sync::OnceLock; +use tokio::sync::mpsc; use uuid::Uuid; // The implementation of Nexus is large, and split into a number of submodules @@ -89,12 +92,9 @@ pub(crate) mod sagas; // TODO: When referring to API types, we should try to include // the prefix unless it is unambiguous. -pub(crate) use nexus_db_queries::db::queries::disk::MAX_DISKS_PER_INSTANCE; - -use crate::app::background::SagaRecoveryHelpers; -use nexus_db_model::AllSchemaVersions; pub(crate) use nexus_db_model::MAX_NICS_PER_INSTANCE; -use tokio::sync::mpsc; +pub(crate) use nexus_db_queries::db::queries::disk::MAX_DISKS_PER_INSTANCE; +use sagas::demo::CompletingDemoSagas; // XXX: Might want to recast as max *floating* IPs, we have at most one // ephemeral (so bounded in saga by design). @@ -204,6 +204,9 @@ pub struct Nexus { /// Default Crucible region allocation strategy default_region_allocation_strategy: RegionAllocationStrategy, + + /// List of demo sagas awaiting a request to complete them + demo_sagas: Arc>, } impl Nexus { @@ -480,6 +483,9 @@ impl Nexus { .pkg .default_region_allocation_strategy .clone(), + demo_sagas: Arc::new(std::sync::Mutex::new( + CompletingDemoSagas::new(), + )), }; // TODO-cleanup all the extra Arcs here seems wrong @@ -955,6 +961,17 @@ impl Nexus { } Ok(clients.into_iter().collect::>()) } + + pub(crate) fn demo_sagas( + &self, + ) -> Result, Error> { + self.demo_sagas.lock().map_err(|error| { + Error::internal_error(&format!( + "failed to acquire demo_sagas lock: {:#}", + error + )) + }) + } } /// For unimplemented endpoints, indicates whether the resource identified diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index b289e871eb..835541c2ea 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -33,7 +33,7 @@ use nexus_types::external_api::params::BgpAnnounceSetCreate; use nexus_types::external_api::params::BgpAnnouncementCreate; use nexus_types::external_api::params::BgpConfigCreate; use nexus_types::external_api::params::LinkConfigCreate; -use nexus_types::external_api::params::LldpServiceConfigCreate; +use nexus_types::external_api::params::LldpLinkConfigCreate; use nexus_types::external_api::params::RouteConfig; use nexus_types::external_api::params::SwitchPortConfigCreate; use nexus_types::external_api::params::UninitializedSledId; @@ -61,6 +61,7 @@ use omicron_common::api::external::Name; use omicron_common::api::external::NameOrId; use omicron_common::api::external::ResourceType; use omicron_common::api::internal::shared::ExternalPortDiscovery; +use omicron_common::api::internal::shared::LldpAdminStatus; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::SledUuid; use oxnet::IpNet; @@ -241,22 +242,44 @@ impl super::Nexus { .internal_context( "fetching cockroachdb settings for rack initialization", )?; - self.datastore() - .cockroachdb_setting_set_string( - opctx, - cockroachdb_settings.state_fingerprint.clone(), - "cluster.preserve_downgrade_option", - CockroachDbClusterVersion::NEWLY_INITIALIZED.to_string(), - ) - .await - .internal_context( - "setting `cluster.preserve_downgrade_option` \ - for rack initialization", - )?; + blueprint.cockroachdb_setting_preserve_downgrade = + if cockroachdb_settings.preserve_downgrade.is_empty() { + // Set the option to the current policy in both the database and + // the blueprint. + self.datastore() + .cockroachdb_setting_set_string( + opctx, + cockroachdb_settings.state_fingerprint.clone(), + "cluster.preserve_downgrade_option", + CockroachDbClusterVersion::NEWLY_INITIALIZED + .to_string(), + ) + .await + .internal_context( + "setting `cluster.preserve_downgrade_option` \ + for rack initialization", + )?; + CockroachDbClusterVersion::NEWLY_INITIALIZED + } else { + // `cluster.preserve_downgrade_option` is set, so fill in the + // blueprint with the current value. This branch should never + // be hit during normal rack initialization; it's here for + // eventual test cases where `cluster.preserve_downgrade_option` + // is set by a test harness prior to rack initialization. + CockroachDbClusterVersion::from_str( + &cockroachdb_settings.preserve_downgrade, + ) + .map_err(|_| { + Error::internal_error(&format!( + "database has `cluster.preserve_downgrade_option` \ + set to invalid version {}", + cockroachdb_settings.preserve_downgrade + )) + })? + } + .into(); blueprint.cockroachdb_fingerprint = cockroachdb_settings.state_fingerprint; - blueprint.cockroachdb_setting_preserve_downgrade = - CockroachDbClusterVersion::NEWLY_INITIALIZED.into(); // Administrators of the Recovery Silo are automatically made // administrators of the Fleet. @@ -487,7 +510,7 @@ impl super::Nexus { match self .db_datastore - .bgp_config_set( + .bgp_config_create( &opctx, &BgpConfigCreate { identity: IdentityMetadataCreateParams { @@ -570,6 +593,7 @@ impl super::Nexus { dst: r.destination, gw: r.nexthop, vid: r.vlan_id, + local_pref: r.local_pref, }) .collect(); @@ -608,15 +632,30 @@ impl super::Nexus { .bgp_peers .insert("phy0".to_string(), BgpPeerConfig { peers }); - let link = LinkConfigCreate { - mtu: 1500, //TODO https://github.com/oxidecomputer/omicron/issues/2274 - lldp: LldpServiceConfigCreate { + let lldp = match &uplink_config.lldp { + None => LldpLinkConfigCreate { enabled: false, - lldp_config: None, + ..Default::default() }, + Some(l) => LldpLinkConfigCreate { + enabled: l.status == LldpAdminStatus::Enabled, + link_name: l.port_id.clone(), + link_description: l.port_description.clone(), + chassis_id: l.chassis_id.clone(), + system_name: l.system_name.clone(), + system_description: l.system_description.clone(), + management_ip: match &l.management_addrs { + Some(a) if !a.is_empty() => Some(a[0]), + _ => None, + }, + }, + }; + let link = LinkConfigCreate { + mtu: 1500, //TODO https://github.com/oxidecomputer/omicron/issues/2274 fec: uplink_config.uplink_port_fec.into(), speed: uplink_config.uplink_port_speed.into(), autoneg: uplink_config.autoneg, + lldp, }; port_settings_params.links.insert("phy".to_string(), link); diff --git a/nexus/src/app/saga.rs b/nexus/src/app/saga.rs index fcdbb0db59..975df7fc3b 100644 --- a/nexus/src/app/saga.rs +++ b/nexus/src/app/saga.rs @@ -58,12 +58,14 @@ use futures::FutureExt; use futures::StreamExt; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; +use nexus_types::internal_api::views::DemoSaga; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::ListResult; use omicron_common::api::external::LookupResult; use omicron_common::api::external::ResourceType; use omicron_common::bail_unless; +use omicron_uuid_kinds::DemoSagaUuid; use std::sync::Arc; use std::sync::OnceLock; use steno::SagaDag; @@ -296,7 +298,6 @@ pub(crate) struct RunnableSaga { } impl RunnableSaga { - #[cfg(test)] pub(crate) fn id(&self) -> SagaId { self.id } @@ -457,4 +458,29 @@ impl super::Nexus { pub(crate) fn sec(&self) -> &steno::SecClient { &self.sagas.sec_client } + + pub(crate) async fn saga_demo_create(&self) -> Result { + use crate::app::sagas::demo; + let demo_saga_id = DemoSagaUuid::new_v4(); + let saga_params = demo::Params { id: demo_saga_id }; + let saga_dag = create_saga_dag::(saga_params)?; + let runnable_saga = self.sagas.saga_prepare(saga_dag).await?; + let saga_id = runnable_saga.id().0; + // We don't need the handle that runnable_saga.start() returns because + // we're not going to wait for the saga to finish here. + let _ = runnable_saga.start().await?; + + let mut demo_sagas = self.demo_sagas()?; + demo_sagas.preregister(demo_saga_id); + + Ok(DemoSaga { saga_id, demo_saga_id }) + } + + pub(crate) fn saga_demo_complete( + &self, + demo_saga_id: DemoSagaUuid, + ) -> Result<(), Error> { + let mut demo_sagas = self.demo_sagas()?; + demo_sagas.complete(demo_saga_id) + } } diff --git a/nexus/src/app/sagas/common_storage.rs b/nexus/src/app/sagas/common_storage.rs index 592463f5bb..d37370506c 100644 --- a/nexus/src/app/sagas/common_storage.rs +++ b/nexus/src/app/sagas/common_storage.rs @@ -15,6 +15,7 @@ use nexus_db_queries::db; use nexus_db_queries::db::lookup::LookupPath; use omicron_common::api::external::Error; use omicron_common::retry_until_known_result; +use slog::Logger; use std::net::SocketAddrV6; // Common Pantry operations @@ -107,3 +108,33 @@ pub(crate) async fn call_pantry_detach_for_disk( Ok(()) } + +pub(crate) fn find_only_new_region( + log: &Logger, + existing_datasets_and_regions: Vec<(db::model::Dataset, db::model::Region)>, + new_datasets_and_regions: Vec<(db::model::Dataset, db::model::Region)>, +) -> Option<(db::model::Dataset, db::model::Region)> { + // Only filter on whether or not a Region is in the existing list! Datasets + // can change values (like size_used) if this saga interleaves with other + // saga runs of the same type. + let mut dataset_and_region: Vec<(db::model::Dataset, db::model::Region)> = + new_datasets_and_regions + .into_iter() + .filter(|(_, r)| { + !existing_datasets_and_regions.iter().any(|(_, er)| er == r) + }) + .collect(); + + if dataset_and_region.len() != 1 { + error!( + log, + "find_only_new_region saw dataset_and_region len {}: {:?}", + dataset_and_region.len(), + dataset_and_region, + ); + + None + } else { + dataset_and_region.pop() + } +} diff --git a/nexus/src/app/sagas/demo.rs b/nexus/src/app/sagas/demo.rs new file mode 100644 index 0000000000..d76a48688d --- /dev/null +++ b/nexus/src/app/sagas/demo.rs @@ -0,0 +1,211 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Demo saga, used for testing and interactive debugging +//! +//! The "Demo" saga exists so that developers and automated tests can create a +//! saga that will not complete until they take some action to complete it. The +//! saga just waits until it gets the message that it should finish. Users +//! create demo sagas and complete them using requests to the internal API. +//! +//! The implementation is entirely in-memory, which means you have to send the +//! completion message to the Nexus that's running the saga. However, it does +//! work across Nexus restarts, so this can be used to exercise the saga +//! recovery path. +//! +//! It's tempting to build this only for development and not official releases, +//! but that'd be more work, there's little downside to always including it, and +//! it's conceivable that it'd be useful for production systems, too. + +use super::NexusActionContext; +use super::{ActionRegistry, NexusSaga, SagaInitError}; +use crate::app::sagas::declare_saga_actions; +use anyhow::Context; +use omicron_common::api::external::Error; +use omicron_uuid_kinds::DemoSagaUuid; +use serde::Deserialize; +use serde::Serialize; +use slog::info; +use std::collections::BTreeMap; +use std::future::Future; +use std::sync::Arc; +use steno::ActionError; +use tokio::sync::Semaphore; + +/// Rendezvous point for demo sagas +/// +/// This is where: +/// +/// - demo sagas wait for a completion message +/// - completion messages are recorded for demo sagas that haven't started +/// waiting yet +/// +/// Nexus maintains one of these structures at the top level. +pub struct CompletingDemoSagas { + sagas: BTreeMap>, +} + +impl CompletingDemoSagas { + pub fn new() -> CompletingDemoSagas { + CompletingDemoSagas { sagas: BTreeMap::new() } + } + + pub fn preregister(&mut self, id: DemoSagaUuid) { + assert!(self.sagas.insert(id, Arc::new(Semaphore::new(0))).is_none()); + } + + pub fn subscribe( + &mut self, + id: DemoSagaUuid, + ) -> impl Future> { + let sem = + self.sagas.entry(id).or_insert_with(|| Arc::new(Semaphore::new(0))); + let sem_clone = sem.clone(); + async move { + sem_clone + .acquire() + .await + // We don't need the Semaphore permit once we've acquired it. + .map(|_| ()) + .context("acquiring demo saga semaphore") + } + } + + pub fn complete(&mut self, id: DemoSagaUuid) -> Result<(), Error> { + let sem = self.sagas.get_mut(&id).ok_or_else(|| { + Error::non_resourcetype_not_found(format!( + "demo saga with demo saga id {:?}", + id + )) + })?; + sem.add_permits(1); + Ok(()) + } +} + +#[derive(Debug, Deserialize, Serialize)] +pub(crate) struct Params { + pub id: DemoSagaUuid, +} + +declare_saga_actions! { + demo; + DEMO_WAIT -> "demo_wait" { + + demo_wait + } +} + +#[derive(Debug)] +pub(crate) struct SagaDemo; +impl NexusSaga for SagaDemo { + const NAME: &'static str = "demo"; + type Params = Params; + + fn register_actions(registry: &mut ActionRegistry) { + demo_register_actions(registry); + } + + fn make_saga_dag( + _params: &Self::Params, + mut builder: steno::DagBuilder, + ) -> Result { + builder.append(demo_wait_action()); + Ok(builder.build()?) + } +} + +async fn demo_wait(sagactx: NexusActionContext) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let demo_id = sagactx.saga_params::()?.id; + let log = osagactx.log(); + info!(log, "demo saga: begin wait"; "id" => %demo_id); + let rx = { + let mut demo_sagas = osagactx + .nexus() + .demo_sagas() + .map_err(ActionError::action_failed)?; + demo_sagas.subscribe(demo_id) + }; + match rx.await { + Ok(_) => { + info!(log, "demo saga: completing"; "id" => %demo_id); + Ok(()) + } + Err(error) => { + warn!(log, "demo saga: waiting failed (Nexus shutting down?)"; + "id" => %demo_id, + "error" => #?error, + ); + Err(ActionError::action_failed(Error::internal_error(&format!( + "demo saga wait failed: {:#}", + error + )))) + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use assert_matches::assert_matches; + + #[tokio::test] + async fn test_demo_saga_rendezvous() { + let mut hub = CompletingDemoSagas::new(); + + // The most straightforward sequence is: + // - create (preregister) demo saga + // - demo saga starts and waits for completion (subscribe) + // - complete demo saga + let demo_saga_id = DemoSagaUuid::new_v4(); + println!("demo saga: {demo_saga_id}"); + hub.preregister(demo_saga_id); + println!("demo saga: {demo_saga_id} preregistered"); + let subscribe = hub.subscribe(demo_saga_id); + println!("demo saga: {demo_saga_id} subscribed"); + assert!(hub.complete(demo_saga_id).is_ok()); + println!("demo saga: {demo_saga_id} marked completed"); + subscribe.await.unwrap(); + println!("demo saga: {demo_saga_id} done"); + + // It's also possible that the completion request arrives before the + // saga started waiting. In that case, the sequence is: + // + // - create (preregister) demo saga + // - complete demo saga + // - demo saga starts and waits for completion (subscribe) + // + // This should work, too, with no errors. + let demo_saga_id = DemoSagaUuid::new_v4(); + println!("demo saga: {demo_saga_id}"); + hub.preregister(demo_saga_id); + println!("demo saga: {demo_saga_id} preregistered"); + assert!(hub.complete(demo_saga_id).is_ok()); + println!("demo saga: {demo_saga_id} marked completed"); + let subscribe = hub.subscribe(demo_saga_id); + println!("demo saga: {demo_saga_id} subscribed"); + subscribe.await.unwrap(); + println!("demo saga: {demo_saga_id} done"); + + // It's also possible to have no preregistration at all. This happens + // if the demo saga was recovered. That's fine, too, but then it will + // only work if the completion arrives after the saga starts waiting. + let demo_saga_id = DemoSagaUuid::new_v4(); + println!("demo saga: {demo_saga_id}"); + let subscribe = hub.subscribe(demo_saga_id); + println!("demo saga: {demo_saga_id} subscribed"); + assert!(hub.complete(demo_saga_id).is_ok()); + println!("demo saga: {demo_saga_id} marked completed"); + subscribe.await.unwrap(); + println!("demo saga: {demo_saga_id} done"); + + // If there's no preregistration and we get a completion request, then + // that request should fail. + let demo_saga_id = DemoSagaUuid::new_v4(); + println!("demo saga: {demo_saga_id}"); + let error = hub.complete(demo_saga_id).unwrap_err(); + assert_matches!(error, Error::NotFound { .. }); + println!("demo saga: {demo_saga_id} complete error: {:#}", error); + } +} diff --git a/nexus/src/app/sagas/instance_ip_attach.rs b/nexus/src/app/sagas/instance_ip_attach.rs index b18ac3109f..a14054cf66 100644 --- a/nexus/src/app/sagas/instance_ip_attach.rs +++ b/nexus/src/app/sagas/instance_ip_attach.rs @@ -346,6 +346,7 @@ pub(crate) mod test { }; use nexus_test_utils_macros::nexus_test; use omicron_common::api::external::SimpleIdentity; + use sled_agent_types::instance::InstanceExternalIpBody; type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; @@ -437,14 +438,12 @@ pub(crate) mod test { // Sled agent has a record of the new external IPs. let mut eips = sled_agent.external_ips.lock().await; let my_eips = eips.entry(instance_id.into_untyped_uuid()).or_default(); - assert!(my_eips.iter().any(|v| matches!( - v, - omicron_sled_agent::params::InstanceExternalIpBody::Floating(_) - ))); - assert!(my_eips.iter().any(|v| matches!( - v, - omicron_sled_agent::params::InstanceExternalIpBody::Ephemeral(_) - ))); + assert!(my_eips + .iter() + .any(|v| matches!(v, InstanceExternalIpBody::Floating(_)))); + assert!(my_eips + .iter() + .any(|v| matches!(v, InstanceExternalIpBody::Ephemeral(_)))); // DB has records for SNAT plus the new IPs. let db_eips = datastore diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index bb4bf282e4..19bef2f046 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -4,15 +4,15 @@ use super::{NexusActionContext, NexusSaga, ACTION_GENERATE_ID}; use crate::app::instance::{ - InstanceRegisterReason, InstanceStateChangeError, - InstanceStateChangeRequest, + InstanceEnsureRegisteredApiResources, InstanceRegisterReason, + InstanceStateChangeError, InstanceStateChangeRequest, }; use crate::app::sagas::{ declare_saga_actions, instance_common::allocate_vmm_ipv6, }; -use crate::external_api::params; use nexus_db_queries::db::{identity::Resource, lookup::LookupPath}; use nexus_db_queries::{authn, authz, db}; +use nexus_types::internal_api::params::InstanceMigrateRequest; use omicron_uuid_kinds::{GenericUuid, InstanceUuid, PropolisUuid, SledUuid}; use serde::Deserialize; use serde::Serialize; @@ -30,7 +30,7 @@ pub struct Params { pub serialized_authn: authn::saga::Serialized, pub instance: db::model::Instance, pub src_vmm: db::model::Vmm, - pub migrate_params: params::InstanceMigrate, + pub migrate_params: InstanceMigrateRequest, } // The migration saga is similar to the instance start saga: get a destination @@ -401,11 +401,12 @@ async fn sim_ensure_destination_propolis( "dst_propolis_id" => %vmm.id, "dst_vmm_state" => ?vmm); - let (.., authz_instance) = LookupPath::new(&opctx, &osagactx.datastore()) - .instance_id(db_instance.id()) - .lookup_for(authz::Action::Modify) - .await - .map_err(ActionError::action_failed)?; + let (authz_silo, authz_project, authz_instance) = + LookupPath::new(&opctx, &osagactx.datastore()) + .instance_id(db_instance.id()) + .lookup_for(authz::Action::Modify) + .await + .map_err(ActionError::action_failed)?; let src_propolis_id = PropolisUuid::from_untyped_uuid(params.src_vmm.id); let dst_propolis_id = PropolisUuid::from_untyped_uuid(vmm.id); @@ -413,7 +414,11 @@ async fn sim_ensure_destination_propolis( .nexus() .instance_ensure_registered( &opctx, - &authz_instance, + &InstanceEnsureRegisteredApiResources { + authz_silo, + authz_project, + authz_instance, + }, &db_instance, &dst_propolis_id, &vmm, @@ -565,6 +570,7 @@ async fn sim_instance_migrate( mod tests { use super::*; use crate::app::sagas::test_helpers; + use crate::external_api::params; use dropshot::test_util::ClientTestContext; use nexus_test_utils::resource_helpers::{ create_default_ip_pool, create_project, object_create, @@ -637,7 +643,7 @@ mod tests { serialized_authn: authn::saga::Serialized::for_opctx(&opctx), instance: state.instance().clone(), src_vmm: vmm.clone(), - migrate_params: params::InstanceMigrate { + migrate_params: InstanceMigrateRequest { dst_sled_id: dst_sled_id.into_untyped_uuid(), }, }; @@ -706,7 +712,7 @@ mod tests { ), instance: old_instance.clone(), src_vmm: old_vmm.clone(), - migrate_params: params::InstanceMigrate { + migrate_params: InstanceMigrateRequest { dst_sled_id: dst_sled_id.into_untyped_uuid(), }, } diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index 9e4e010eea..55fc312ae7 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -10,8 +10,10 @@ use super::{ instance_common::allocate_vmm_ipv6, NexusActionContext, NexusSaga, SagaInitError, }; -use crate::app::instance::InstanceRegisterReason; -use crate::app::instance::InstanceStateChangeError; +use crate::app::instance::{ + InstanceEnsureRegisteredApiResources, InstanceRegisterReason, + InstanceStateChangeError, +}; use crate::app::sagas::declare_saga_actions; use chrono::Utc; use nexus_db_queries::db::{identity::Resource, lookup::LookupPath}; @@ -502,17 +504,22 @@ async fn sis_ensure_registered( "instance_id" => %instance_id, "sled_id" => %sled_id); - let (.., authz_instance) = LookupPath::new(&opctx, &osagactx.datastore()) - .instance_id(instance_id) - .lookup_for(authz::Action::Modify) - .await - .map_err(ActionError::action_failed)?; + let (authz_silo, authz_project, authz_instance) = + LookupPath::new(&opctx, &osagactx.datastore()) + .instance_id(instance_id) + .lookup_for(authz::Action::Modify) + .await + .map_err(ActionError::action_failed)?; osagactx .nexus() .instance_ensure_registered( &opctx, - &authz_instance, + &InstanceEnsureRegisteredApiResources { + authz_silo, + authz_project, + authz_instance, + }, &db_instance, &propolis_id, &vmm_record, diff --git a/nexus/src/app/sagas/instance_update/mod.rs b/nexus/src/app/sagas/instance_update/mod.rs index 71abe63bbd..5f226480b8 100644 --- a/nexus/src/app/sagas/instance_update/mod.rs +++ b/nexus/src/app/sagas/instance_update/mod.rs @@ -1403,6 +1403,7 @@ mod test { create_default_ip_pool, create_project, object_create, }; use nexus_test_utils_macros::nexus_test; + use nexus_types::internal_api::params::InstanceMigrateRequest; use omicron_common::api::internal::nexus::{ MigrationRuntimeState, MigrationState, Migrations, }; @@ -2358,7 +2359,7 @@ mod test { serialized_authn: authn::saga::Serialized::for_opctx(&opctx), instance: state.instance().clone(), src_vmm: vmm.clone(), - migrate_params: params::InstanceMigrate { + migrate_params: InstanceMigrateRequest { dst_sled_id: dst_sled_id.into_untyped_uuid(), }, }; diff --git a/nexus/src/app/sagas/mod.rs b/nexus/src/app/sagas/mod.rs index 0c57a5b2dc..926b983460 100644 --- a/nexus/src/app/sagas/mod.rs +++ b/nexus/src/app/sagas/mod.rs @@ -22,6 +22,7 @@ use steno::SagaType; use thiserror::Error; use uuid::Uuid; +pub mod demo; pub mod disk_create; pub mod disk_delete; pub mod finalize_disk; @@ -38,6 +39,8 @@ pub mod project_create; pub mod region_replacement_drive; pub mod region_replacement_finish; pub mod region_replacement_start; +pub mod region_snapshot_replacement_garbage_collect; +pub mod region_snapshot_replacement_start; pub mod snapshot_create; pub mod snapshot_delete; pub mod test_saga; @@ -134,6 +137,7 @@ fn make_action_registry() -> ActionRegistry { let mut registry = steno::ActionRegistry::new(); registry.register(Arc::clone(&*ACTION_GENERATE_ID)); + ::register_actions(&mut registry); ::register_actions(&mut registry); ::register_actions(&mut registry); ::register_actions( @@ -188,6 +192,12 @@ fn make_action_registry() -> ActionRegistry { ::register_actions( &mut registry, ); + ::register_actions( + &mut registry, + ); + ::register_actions( + &mut registry, + ); #[cfg(test)] ::register_actions(&mut registry); diff --git a/nexus/src/app/sagas/region_replacement_start.rs b/nexus/src/app/sagas/region_replacement_start.rs index c2b886938a..1bc1491468 100644 --- a/nexus/src/app/sagas/region_replacement_start.rs +++ b/nexus/src/app/sagas/region_replacement_start.rs @@ -26,12 +26,13 @@ //! ``` //! //! The first thing this saga does is set itself as the "operating saga" for the -//! request, and change the state to "Allocating". Then, it performs the following -//! steps: +//! request, and change the state to "Allocating". Then, it performs the +//! following steps: //! //! 1. Allocate a new region //! -//! 2. For the affected Volume, swap the region being replaced with the new region. +//! 2. For the affected Volume, swap the region being replaced with the new +//! region. //! //! 3. Create a fake volume that can be later deleted with the region being //! replaced. @@ -48,6 +49,7 @@ use super::{ ActionRegistry, NexusActionContext, NexusSaga, SagaInitError, ACTION_GENERATE_ID, }; +use crate::app::sagas::common_storage::find_only_new_region; use crate::app::sagas::declare_saga_actions; use crate::app::RegionAllocationStrategy; use crate::app::{authn, db}; @@ -57,7 +59,6 @@ use serde::Deserialize; use serde::Serialize; use sled_agent_client::types::CrucibleOpts; use sled_agent_client::types::VolumeConstructionRequest; -use slog::Logger; use std::net::SocketAddrV6; use steno::ActionError; use steno::Node; @@ -285,36 +286,6 @@ async fn srrs_alloc_new_region( Ok(datasets_and_regions) } -fn find_only_new_region( - log: &Logger, - existing_datasets_and_regions: Vec<(db::model::Dataset, db::model::Region)>, - new_datasets_and_regions: Vec<(db::model::Dataset, db::model::Region)>, -) -> Option<(db::model::Dataset, db::model::Region)> { - // Only filter on whether or not a Region is in the existing list! Datasets - // can change values (like size_used) if this saga interleaves with other - // saga runs of the same type. - let mut dataset_and_region: Vec<(db::model::Dataset, db::model::Region)> = - new_datasets_and_regions - .into_iter() - .filter(|(_, r)| { - !existing_datasets_and_regions.iter().any(|(_, er)| er == r) - }) - .collect(); - - if dataset_and_region.len() != 1 { - error!( - log, - "find_only_new_region saw dataset_and_region len {}: {:?}", - dataset_and_region.len(), - dataset_and_region, - ); - - None - } else { - dataset_and_region.pop() - } -} - async fn srrs_alloc_new_region_undo( sagactx: NexusActionContext, ) -> Result<(), anyhow::Error> { diff --git a/nexus/src/app/sagas/region_snapshot_replacement_garbage_collect.rs b/nexus/src/app/sagas/region_snapshot_replacement_garbage_collect.rs new file mode 100644 index 0000000000..e3c5143a68 --- /dev/null +++ b/nexus/src/app/sagas/region_snapshot_replacement_garbage_collect.rs @@ -0,0 +1,326 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Clean up the volume that stashes the target replaced during the region +//! snapshot replacement start saga. After that's done, change the region +//! snapshot replacement state to Running. This saga handles the following +//! region snapshot replacement request state transitions: +//! +//! ```text +//! ReplacementDone <-- +//! | +//! | | +//! v | +//! | +//! DeletingOldVolume -- +//! +//! | +//! v +//! +//! Running +//! ``` +//! +//! See the documentation for the "region snapshot replacement step" saga for +//! the next step(s) in the process. + +use super::{ + ActionRegistry, NexusActionContext, NexusSaga, SagaInitError, + ACTION_GENERATE_ID, +}; +use crate::app::sagas::declare_saga_actions; +use crate::app::sagas::volume_delete; +use crate::app::{authn, db}; +use serde::Deserialize; +use serde::Serialize; +use steno::ActionError; +use steno::Node; +use uuid::Uuid; + +// region snapshot replacement garbage collect saga: input parameters + +#[derive(Debug, Deserialize, Serialize)] +pub(crate) struct Params { + pub serialized_authn: authn::saga::Serialized, + /// The fake volume created for the snapshot that was replaced + // Note: this is only required in the params to build the volume-delete sub + // saga + pub old_snapshot_volume_id: Uuid, + pub request: db::model::RegionSnapshotReplacement, +} + +// region snapshot replacement garbage collect saga: actions + +declare_saga_actions! { + region_snapshot_replacement_garbage_collect; + SET_SAGA_ID -> "unused_1" { + + rsrgs_set_saga_id + - rsrgs_set_saga_id_undo + } + UPDATE_REQUEST_RECORD -> "unused_2" { + + rsrgs_update_request_record + } +} + +// region snapshot replacement garbage collect saga: definition + +#[derive(Debug)] +pub(crate) struct SagaRegionSnapshotReplacementGarbageCollect; +impl NexusSaga for SagaRegionSnapshotReplacementGarbageCollect { + const NAME: &'static str = "region-snapshot-replacement-garbage-collect"; + type Params = Params; + + fn register_actions(registry: &mut ActionRegistry) { + region_snapshot_replacement_garbage_collect_register_actions(registry); + } + + fn make_saga_dag( + params: &Self::Params, + mut builder: steno::DagBuilder, + ) -> Result { + builder.append(Node::action( + "saga_id", + "GenerateSagaId", + ACTION_GENERATE_ID.as_ref(), + )); + + builder.append(set_saga_id_action()); + + let subsaga_params = volume_delete::Params { + serialized_authn: params.serialized_authn.clone(), + volume_id: params.old_snapshot_volume_id, + }; + + let subsaga_dag = { + let subsaga_builder = steno::DagBuilder::new(steno::SagaName::new( + volume_delete::SagaVolumeDelete::NAME, + )); + volume_delete::SagaVolumeDelete::make_saga_dag( + &subsaga_params, + subsaga_builder, + )? + }; + + builder.append(Node::constant( + "params_for_volume_delete_subsaga", + serde_json::to_value(&subsaga_params).map_err(|e| { + SagaInitError::SerializeError( + "params_for_volume_delete_subsaga".to_string(), + e, + ) + })?, + )); + + builder.append(Node::subsaga( + "volume_delete_subsaga_no_result", + subsaga_dag, + "params_for_volume_delete_subsaga", + )); + + builder.append(update_request_record_action()); + + Ok(builder.build()?) + } +} + +// region snapshot replacement garbage collect saga: action implementations + +async fn rsrgs_set_saga_id( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + + // Change the request record here to an intermediate "deleting old volume" + // state to block out other sagas that will be triggered for the same + // request. + osagactx + .datastore() + .set_region_snapshot_replacement_deleting_old_volume( + &opctx, + params.request.id, + saga_id, + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +async fn rsrgs_set_saga_id_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + + osagactx + .datastore() + .undo_set_region_snapshot_replacement_deleting_old_volume( + &opctx, + params.request.id, + saga_id, + ) + .await?; + + Ok(()) +} + +async fn rsrgs_update_request_record( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let params = sagactx.saga_params::()?; + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + + // Now that the snapshot volume has been deleted, update the replacement + // request record to 'Running'. There is no undo step for this, it should + // succeed idempotently. + + datastore + .set_region_snapshot_replacement_running( + &opctx, + params.request.id, + saga_id, + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +#[cfg(test)] +pub(crate) mod test { + use crate::app::sagas::region_snapshot_replacement_garbage_collect::{ + Params, SagaRegionSnapshotReplacementGarbageCollect, + }; + use nexus_db_model::RegionSnapshotReplacement; + use nexus_db_model::RegionSnapshotReplacementState; + use nexus_db_model::Volume; + use nexus_db_queries::authn::saga::Serialized; + use nexus_db_queries::context::OpContext; + use nexus_test_utils_macros::nexus_test; + use sled_agent_client::types::CrucibleOpts; + use sled_agent_client::types::VolumeConstructionRequest; + use uuid::Uuid; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + #[nexus_test(server = crate::Server)] + async fn test_region_snapshot_replacement_garbage_collect_saga( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + // Manually insert required records + let old_snapshot_volume_id = Uuid::new_v4(); + + let volume_construction_request = VolumeConstructionRequest::Volume { + id: old_snapshot_volume_id, + block_size: 0, + sub_volumes: vec![VolumeConstructionRequest::Region { + block_size: 0, + blocks_per_extent: 0, + extent_count: 0, + gen: 0, + opts: CrucibleOpts { + id: old_snapshot_volume_id, + target: vec![ + // XXX if you put something here, you'll need a + // synthetic dataset record + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: false, + }, + }], + read_only_parent: None, + }; + + let volume_data = + serde_json::to_string(&volume_construction_request).unwrap(); + + datastore + .volume_create(Volume::new(old_snapshot_volume_id, volume_data)) + .await + .unwrap(); + + let mut request = RegionSnapshotReplacement::new( + Uuid::new_v4(), + Uuid::new_v4(), + Uuid::new_v4(), + ); + request.replacement_state = + RegionSnapshotReplacementState::ReplacementDone; + request.old_snapshot_volume_id = Some(old_snapshot_volume_id); + + datastore + .insert_region_snapshot_replacement_request_with_volume_id( + &opctx, + request.clone(), + Uuid::new_v4(), + ) + .await + .unwrap(); + + // Run the saga + let params = Params { + serialized_authn: Serialized::for_opctx(&opctx), + old_snapshot_volume_id, + request: request.clone(), + }; + + let _output = nexus + .sagas + .saga_execute::(params) + .await + .unwrap(); + + // Validate the state transition + let result = datastore + .get_region_snapshot_replacement_request_by_id(&opctx, request.id) + .await + .unwrap(); + + assert_eq!( + result.replacement_state, + RegionSnapshotReplacementState::Running + ); + + // Validate the Volume was deleted + assert!(datastore + .volume_get(old_snapshot_volume_id) + .await + .unwrap() + .is_none()); + } +} diff --git a/nexus/src/app/sagas/region_snapshot_replacement_start.rs b/nexus/src/app/sagas/region_snapshot_replacement_start.rs new file mode 100644 index 0000000000..941899d862 --- /dev/null +++ b/nexus/src/app/sagas/region_snapshot_replacement_start.rs @@ -0,0 +1,1134 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! In the same way that read/write regions need to be replaced when a physical +//! disk is expunged, read-only regions need to be replaced too: Volumes are in +//! a similarly degraded state when the read-only Downstairs have gone away, and +//! remain in this degraded state until a new Region replaces the one that is +//! gone. +//! +//! It's this saga's responsibility to start that replacement process. This saga +//! handles the following region snapshot replacement request state transitions: +//! +//! ```text +//! Requested <-- +//! | +//! | | +//! v | +//! | +//! Allocating -- +//! +//! | +//! v +//! +//! ReplacementDone +//! ``` +//! +//! The first thing this saga does is set itself as the "operating saga" for the +//! request, and change the state to "Allocating". Then, it performs the +//! following steps: +//! +//! 1. Allocate a new region +//! +//! 2. Create a blank volume that can be later deleted to stash the snapshot +//! being replaced. This is populated in the `volume_replace_snapshot` +//! transaction so that `volume_references` for the corresponding region +//! snapshot remains accurate. +//! +//! 3. For the affected Volume, swap the snapshot being replaced with the new +//! region. +//! +//! 4. Update the region snapshot replacement request by clearing the operating +//! saga id and changing the state to "ReplacementDone". +//! +//! Any unwind will place the state back into Requested. +//! +//! See the documentation for the "region snapshot replacement garbage collect" +//! saga for the next step in the process. + +use super::{ + ActionRegistry, NexusActionContext, NexusSaga, SagaInitError, + ACTION_GENERATE_ID, +}; +use crate::app::db::datastore::ExistingTarget; +use crate::app::db::datastore::RegionAllocationFor; +use crate::app::db::datastore::RegionAllocationParameters; +use crate::app::db::datastore::ReplacementTarget; +use crate::app::db::datastore::VolumeToDelete; +use crate::app::db::datastore::VolumeWithTarget; +use crate::app::db::lookup::LookupPath; +use crate::app::sagas::common_storage::find_only_new_region; +use crate::app::sagas::declare_saga_actions; +use crate::app::RegionAllocationStrategy; +use crate::app::{authn, db}; +use nexus_types::identity::Asset; +use nexus_types::identity::Resource; +use omicron_common::api::external::Error; +use serde::Deserialize; +use serde::Serialize; +use sled_agent_client::types::CrucibleOpts; +use sled_agent_client::types::VolumeConstructionRequest; +use std::net::SocketAddrV6; +use steno::ActionError; +use steno::Node; +use uuid::Uuid; + +// region snapshot replacement start saga: input parameters + +#[derive(Debug, Deserialize, Serialize)] +pub(crate) struct Params { + pub serialized_authn: authn::saga::Serialized, + pub request: db::model::RegionSnapshotReplacement, + pub allocation_strategy: RegionAllocationStrategy, +} + +// region snapshot replacement start saga: actions + +declare_saga_actions! { + region_snapshot_replacement_start; + SET_SAGA_ID -> "unused_1" { + + rsrss_set_saga_id + - rsrss_set_saga_id_undo + } + GET_ALLOC_REGION_PARAMS -> "alloc_region_params" { + + rsrss_get_alloc_region_params + } + ALLOC_NEW_REGION -> "new_datasets_and_regions" { + + rsrss_alloc_new_region + - rsrss_alloc_new_region_undo + } + FIND_NEW_REGION -> "new_dataset_and_region" { + + rsrss_find_new_region + } + NEW_REGION_ENSURE -> "ensured_dataset_and_region" { + + rsrss_new_region_ensure + - rsrss_new_region_ensure_undo + } + GET_OLD_SNAPSHOT_VOLUME_ID -> "old_snapshot_volume_id" { + + rsrss_get_old_snapshot_volume_id + } + CREATE_FAKE_VOLUME -> "unused_2" { + + rsrss_create_fake_volume + - rsrss_create_fake_volume_undo + } + REPLACE_SNAPSHOT_IN_VOLUME -> "unused_3" { + + rsrss_replace_snapshot_in_volume + - rsrss_replace_snapshot_in_volume_undo + } + UPDATE_REQUEST_RECORD -> "unused_4" { + + rsrss_update_request_record + } +} + +// region snapshot replacement start saga: definition + +#[derive(Debug)] +pub(crate) struct SagaRegionSnapshotReplacementStart; +impl NexusSaga for SagaRegionSnapshotReplacementStart { + const NAME: &'static str = "region-snapshot-replacement-start"; + type Params = Params; + + fn register_actions(registry: &mut ActionRegistry) { + region_snapshot_replacement_start_register_actions(registry); + } + + fn make_saga_dag( + _params: &Self::Params, + mut builder: steno::DagBuilder, + ) -> Result { + builder.append(Node::action( + "saga_id", + "GenerateSagaId", + ACTION_GENERATE_ID.as_ref(), + )); + + builder.append(Node::action( + "new_volume_id", + "GenerateNewVolumeId", + ACTION_GENERATE_ID.as_ref(), + )); + + builder.append(set_saga_id_action()); + builder.append(get_alloc_region_params_action()); + builder.append(alloc_new_region_action()); + builder.append(find_new_region_action()); + builder.append(new_region_ensure_action()); + builder.append(get_old_snapshot_volume_id_action()); + builder.append(create_fake_volume_action()); + builder.append(replace_snapshot_in_volume_action()); + builder.append(update_request_record_action()); + + Ok(builder.build()?) + } +} + +// region snapshot replacement start saga: action implementations + +async fn rsrss_set_saga_id( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + + // Change the request record here to an intermediate "allocating" state to + // block out other sagas that will be triggered for the same request. This + // avoids Nexus allocating a bunch of replacement read-only regions only to + // unwind all but one. + osagactx + .datastore() + .set_region_snapshot_replacement_allocating( + &opctx, + params.request.id, + saga_id, + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +async fn rsrss_set_saga_id_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + + osagactx + .datastore() + .undo_set_region_snapshot_replacement_allocating( + &opctx, + params.request.id, + saga_id, + ) + .await?; + + Ok(()) +} + +#[derive(Debug, Deserialize, Serialize)] +struct AllocRegionParams { + block_size: u64, + blocks_per_extent: u64, + extent_count: u64, + current_allocated_regions: Vec<(db::model::Dataset, db::model::Region)>, + snapshot_id: Uuid, + snapshot_volume_id: Uuid, +} + +async fn rsrss_get_alloc_region_params( + sagactx: NexusActionContext, +) -> Result { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + // Look up the existing snapshot + let (.., db_snapshot) = LookupPath::new(&opctx, &osagactx.datastore()) + .snapshot_id(params.request.old_snapshot_id) + .fetch() + .await + .map_err(ActionError::action_failed)?; + + // Find the region to replace + let db_region = osagactx + .datastore() + .get_region(params.request.old_region_id) + .await + .map_err(ActionError::action_failed)?; + + let current_allocated_regions = osagactx + .datastore() + .get_allocated_regions(db_snapshot.volume_id) + .await + .map_err(ActionError::action_failed)?; + + Ok(AllocRegionParams { + block_size: db_region.block_size().to_bytes(), + blocks_per_extent: db_region.blocks_per_extent(), + extent_count: db_region.extent_count(), + current_allocated_regions, + snapshot_id: db_snapshot.id(), + snapshot_volume_id: db_snapshot.volume_id, + }) +} + +async fn rsrss_alloc_new_region( + sagactx: NexusActionContext, +) -> Result, ActionError> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let alloc_region_params = + sagactx.lookup::("alloc_region_params")?; + + // Request an additional region for this snapshot volume. It's important + // _not_ to delete the existing snapshot first, as (if it's still there) + // then the Crucible agent could reuse the allocated port and cause trouble. + let datasets_and_regions = osagactx + .datastore() + .arbitrary_region_allocate( + &opctx, + RegionAllocationFor::SnapshotVolume { + volume_id: alloc_region_params.snapshot_volume_id, + snapshot_id: alloc_region_params.snapshot_id, + }, + RegionAllocationParameters::FromRaw { + block_size: alloc_region_params.block_size, + blocks_per_extent: alloc_region_params.blocks_per_extent, + extent_count: alloc_region_params.extent_count, + }, + ¶ms.allocation_strategy, + alloc_region_params.current_allocated_regions.len() + 1, + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(datasets_and_regions) +} + +async fn rsrss_alloc_new_region_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + let log = osagactx.log(); + + let alloc_region_params = + sagactx.lookup::("alloc_region_params")?; + + let maybe_dataset_and_region = find_only_new_region( + log, + alloc_region_params.current_allocated_regions, + sagactx.lookup::>( + "new_datasets_and_regions", + )?, + ); + + // It should be guaranteed that if rsrss_alloc_new_region succeeded then it + // would have bumped the region redundancy, so we should see something here. + // Guard against the case anyway. + if let Some(dataset_and_region) = maybe_dataset_and_region { + let (_, region) = dataset_and_region; + osagactx + .datastore() + .regions_hard_delete(log, vec![region.id()]) + .await?; + } else { + warn!(&log, "maybe_dataset_and_region is None!"); + } + + Ok(()) +} + +async fn rsrss_find_new_region( + sagactx: NexusActionContext, +) -> Result<(db::model::Dataset, db::model::Region), ActionError> { + let osagactx = sagactx.user_data(); + let log = osagactx.log(); + + let alloc_region_params = + sagactx.lookup::("alloc_region_params")?; + + let maybe_dataset_and_region = find_only_new_region( + log, + alloc_region_params.current_allocated_regions, + sagactx.lookup::>( + "new_datasets_and_regions", + )?, + ); + + let Some(dataset_and_region) = maybe_dataset_and_region else { + return Err(ActionError::action_failed(Error::internal_error( + &format!( + "expected dataset and region, saw {:?}!", + maybe_dataset_and_region, + ), + ))); + }; + + Ok(dataset_and_region) +} + +async fn rsrss_new_region_ensure( + sagactx: NexusActionContext, +) -> Result< + (nexus_db_model::Dataset, crucible_agent_client::types::Region), + ActionError, +> { + let params = sagactx.saga_params::()?; + let osagactx = sagactx.user_data(); + let log = osagactx.log(); + + // With a list of datasets and regions to ensure, other sagas need to have a + // separate no-op forward step for the undo action to ensure that the undo + // step occurs in the case that the ensure partially fails. Here this is not + // required, there's only one dataset and region. + let new_dataset_and_region = sagactx + .lookup::<(db::model::Dataset, db::model::Region)>( + "new_dataset_and_region", + )?; + + let region_snapshot = osagactx + .datastore() + .region_snapshot_get( + params.request.old_dataset_id, + params.request.old_region_id, + params.request.old_snapshot_id, + ) + .await + .map_err(ActionError::action_failed)?; + + let Some(region_snapshot) = region_snapshot else { + return Err(ActionError::action_failed(format!( + "region snapshot {} {} {} deleted!", + params.request.old_dataset_id, + params.request.old_region_id, + params.request.old_snapshot_id, + ))); + }; + + let (new_dataset, new_region) = new_dataset_and_region; + + // Currently, the repair port is set using a fixed offset above the + // downstairs port. Once this goes away, Nexus will require a way to query + // for the repair port! + + let mut source_repair_addr: SocketAddrV6 = + match region_snapshot.snapshot_addr.parse() { + Ok(addr) => addr, + + Err(e) => { + return Err(ActionError::action_failed(format!( + "error parsing region_snapshot.snapshot_addr: {e}" + ))); + } + }; + + source_repair_addr.set_port( + source_repair_addr.port() + crucible_common::REPAIR_PORT_OFFSET, + ); + + let ensured_region = osagactx + .nexus() + .ensure_region_in_dataset( + log, + &new_dataset, + &new_region, + Some(source_repair_addr.to_string()), + ) + .await + .map_err(ActionError::action_failed)?; + + Ok((new_dataset, ensured_region)) +} + +async fn rsrss_new_region_ensure_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let log = sagactx.user_data().log(); + let osagactx = sagactx.user_data(); + + warn!(log, "rsrss_new_region_ensure_undo: Deleting crucible regions"); + + let new_dataset_and_region = sagactx + .lookup::<(db::model::Dataset, db::model::Region)>( + "new_dataset_and_region", + )?; + + osagactx + .nexus() + .delete_crucible_regions(log, vec![new_dataset_and_region]) + .await?; + + Ok(()) +} + +async fn rsrss_get_old_snapshot_volume_id( + sagactx: NexusActionContext, +) -> Result { + // Save the snapshot's original volume ID, because we'll be altering it and + // need the original + + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let (.., db_snapshot) = LookupPath::new(&opctx, &osagactx.datastore()) + .snapshot_id(params.request.old_snapshot_id) + .fetch() + .await + .map_err(ActionError::action_failed)?; + + Ok(db_snapshot.volume_id) +} + +async fn rsrss_create_fake_volume( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + + let new_volume_id = sagactx.lookup::("new_volume_id")?; + + // Create a fake volume record for the old snapshot target. This will be + // deleted after snapshot replacement has finished. It can be completely + // blank here, it will be replaced by `volume_replace_snapshot`. + + let volume_construction_request = VolumeConstructionRequest::Volume { + id: new_volume_id, + block_size: 0, + sub_volumes: vec![VolumeConstructionRequest::Region { + block_size: 0, + blocks_per_extent: 0, + extent_count: 0, + gen: 0, + opts: CrucibleOpts { + id: new_volume_id, + target: vec![], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + }, + }], + read_only_parent: None, + }; + + let volume_data = serde_json::to_string(&volume_construction_request) + .map_err(|e| { + ActionError::action_failed(Error::internal_error(&e.to_string())) + })?; + + let volume = db::model::Volume::new(new_volume_id, volume_data); + + osagactx + .datastore() + .volume_create(volume) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +async fn rsrss_create_fake_volume_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + + // Delete the fake volume. + + let new_volume_id = sagactx.lookup::("new_volume_id")?; + osagactx.datastore().volume_hard_delete(new_volume_id).await?; + + Ok(()) +} + +#[derive(Debug)] +struct ReplaceParams { + old_volume_id: Uuid, + old_snapshot_address: SocketAddrV6, + new_region_address: SocketAddrV6, + new_volume_id: Uuid, +} + +async fn get_replace_params( + sagactx: &NexusActionContext, +) -> Result { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let new_volume_id = sagactx.lookup::("new_volume_id")?; + + let region_snapshot = osagactx + .datastore() + .region_snapshot_get( + params.request.old_dataset_id, + params.request.old_region_id, + params.request.old_snapshot_id, + ) + .await + .map_err(ActionError::action_failed)?; + + let Some(region_snapshot) = region_snapshot else { + return Err(ActionError::action_failed(format!( + "region snapshot {} {} {} deleted!", + params.request.old_dataset_id, + params.request.old_region_id, + params.request.old_snapshot_id, + ))); + }; + + let old_snapshot_address: SocketAddrV6 = + match region_snapshot.snapshot_addr.parse() { + Ok(addr) => addr, + + Err(e) => { + return Err(ActionError::action_failed(format!( + "parsing {} as SocketAddrV6 failed: {e}", + region_snapshot.snapshot_addr, + ))); + } + }; + + let (new_dataset, ensured_region) = sagactx.lookup::<( + db::model::Dataset, + crucible_agent_client::types::Region, + )>( + "ensured_dataset_and_region", + )?; + + let Some(new_dataset_address) = new_dataset.address() else { + return Err(ActionError::action_failed(format!( + "dataset {} does not have an address!", + new_dataset.id(), + ))); + }; + + let new_region_address = SocketAddrV6::new( + *new_dataset_address.ip(), + ensured_region.port_number, + 0, + 0, + ); + + let old_volume_id = sagactx.lookup::("old_snapshot_volume_id")?; + + // Return the replacement parameters for the forward action case - the undo + // will swap the existing and replacement target + Ok(ReplaceParams { + old_volume_id, + old_snapshot_address, + new_region_address, + new_volume_id, + }) +} + +async fn rsrss_replace_snapshot_in_volume( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let log = sagactx.user_data().log(); + let osagactx = sagactx.user_data(); + + let replacement_params = get_replace_params(&sagactx).await?; + + info!( + log, + "replacing {} with {} in volume {}", + replacement_params.old_snapshot_address, + replacement_params.new_region_address, + replacement_params.old_volume_id, + ); + + // `volume_replace_snapshot` will swap the old snapshot for the new region. + // No repair or reconcilation needs to occur after this. + osagactx + .datastore() + .volume_replace_snapshot( + VolumeWithTarget(replacement_params.old_volume_id), + ExistingTarget(replacement_params.old_snapshot_address), + ReplacementTarget(replacement_params.new_region_address), + VolumeToDelete(replacement_params.new_volume_id), + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +async fn rsrss_replace_snapshot_in_volume_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + // Undo the forward action's volume_replace_snapshot call by swapping the + // existing target and replacement target parameters. + + let log = sagactx.user_data().log(); + let osagactx = sagactx.user_data(); + + let replacement_params = get_replace_params(&sagactx).await?; + + // Note the old and new are _not_ swapped in this log message! The intention + // is that someone reviewing the logs could search for "replacing UUID with + // UUID in volume UUID" and get (in the case of no re-execution) two + // results. + info!( + log, + "undo: replacing {} with {} in volume {}", + replacement_params.old_snapshot_address, + replacement_params.new_region_address, + replacement_params.old_volume_id, + ); + + osagactx + .datastore() + .volume_replace_snapshot( + VolumeWithTarget(replacement_params.old_volume_id), + ExistingTarget(replacement_params.new_region_address), + ReplacementTarget(replacement_params.old_snapshot_address), + VolumeToDelete(replacement_params.new_volume_id), + ) + .await?; + + Ok(()) +} + +async fn rsrss_update_request_record( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let params = sagactx.saga_params::()?; + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + let new_dataset_and_region = sagactx + .lookup::<(db::model::Dataset, db::model::Region)>( + "new_dataset_and_region", + )?; + + let new_region_id = new_dataset_and_region.1.id(); + + let old_region_volume_id = sagactx.lookup::("new_volume_id")?; + + // Now that the region has been ensured and the construction request has + // been updated, update the replacement request record to 'ReplacementDone' + // and clear the operating saga id. There is no undo step for this, it + // should succeed idempotently. + datastore + .set_region_snapshot_replacement_replacement_done( + &opctx, + params.request.id, + saga_id, + new_region_id, + old_region_volume_id, + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +#[cfg(test)] +pub(crate) mod test { + use crate::{ + app::db::lookup::LookupPath, app::db::DataStore, + app::saga::create_saga_dag, + app::sagas::region_snapshot_replacement_start::*, + app::sagas::test_helpers::test_opctx, app::RegionAllocationStrategy, + }; + use nexus_db_model::RegionSnapshotReplacement; + use nexus_db_model::RegionSnapshotReplacementState; + use nexus_db_model::Volume; + use nexus_db_queries::authn::saga::Serialized; + use nexus_db_queries::context::OpContext; + use nexus_test_utils::resource_helpers::create_disk; + use nexus_test_utils::resource_helpers::create_project; + use nexus_test_utils::resource_helpers::create_snapshot; + use nexus_test_utils::resource_helpers::DiskTest; + use nexus_test_utils_macros::nexus_test; + use nexus_types::external_api::views; + use nexus_types::identity::Asset; + use sled_agent_client::types::VolumeConstructionRequest; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + const DISK_NAME: &str = "my-disk"; + const SNAPSHOT_NAME: &str = "my-snap"; + const PROJECT_NAME: &str = "springfield-squidport"; + + async fn prepare_for_test( + cptestctx: &ControlPlaneTestContext, + ) -> PrepareResult { + let client = &cptestctx.external_client; + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = test_opctx(cptestctx); + + assert_eq!(region_allocations(&datastore).await, 0); + + let mut disk_test = DiskTest::new(cptestctx).await; + disk_test.add_zpool_with_dataset(cptestctx.first_sled()).await; + + assert_eq!(region_allocations(&datastore).await, 0); + + let _project_id = + create_project(&client, PROJECT_NAME).await.identity.id; + + assert_eq!(region_allocations(&datastore).await, 0); + + // Create a disk + let disk = create_disk(&client, PROJECT_NAME, DISK_NAME).await; + + assert_eq!(region_allocations(&datastore).await, 3); + + let disk_id = disk.identity.id; + let (.., db_disk) = LookupPath::new(&opctx, &datastore) + .disk_id(disk_id) + .fetch() + .await + .unwrap_or_else(|_| panic!("test disk {:?} should exist", disk_id)); + + // Create a snapshot + let snapshot = + create_snapshot(&client, PROJECT_NAME, DISK_NAME, SNAPSHOT_NAME) + .await; + + assert_eq!(region_allocations(&datastore).await, 6); + + let snapshot_id = snapshot.identity.id; + let (.., db_snapshot) = LookupPath::new(&opctx, &datastore) + .snapshot_id(snapshot_id) + .fetch() + .await + .unwrap_or_else(|_| { + panic!("test snapshot {:?} should exist", snapshot_id) + }); + + PrepareResult { db_disk, snapshot, db_snapshot } + } + + struct PrepareResult { + db_disk: nexus_db_model::Disk, + snapshot: views::Snapshot, + db_snapshot: nexus_db_model::Snapshot, + } + + #[nexus_test(server = crate::Server)] + async fn test_region_snapshot_replacement_start_saga( + cptestctx: &ControlPlaneTestContext, + ) { + let PrepareResult { db_disk, snapshot, db_snapshot } = + prepare_for_test(cptestctx).await; + + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = test_opctx(cptestctx); + + // Assert disk has three allocated regions + let disk_allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + assert_eq!(disk_allocated_regions.len(), 3); + + // Assert the snapshot has zero allocated regions + let snapshot_id = snapshot.identity.id; + + let snapshot_allocated_regions = datastore + .get_allocated_regions(db_snapshot.volume_id) + .await + .unwrap(); + assert_eq!(snapshot_allocated_regions.len(), 0); + + // Replace one of the snapshot's targets + let region: &nexus_db_model::Region = &disk_allocated_regions[0].1; + + let region_snapshot = datastore + .region_snapshot_get(region.dataset_id(), region.id(), snapshot_id) + .await + .unwrap() + .unwrap(); + + // Manually insert the region snapshot replacement request + let request = + RegionSnapshotReplacement::for_region_snapshot(®ion_snapshot); + + datastore + .insert_region_snapshot_replacement_request(&opctx, request.clone()) + .await + .unwrap(); + + // Run the region snapshot replacement start saga + let dag = + create_saga_dag::(Params { + serialized_authn: Serialized::for_opctx(&opctx), + request: request.clone(), + allocation_strategy: RegionAllocationStrategy::Random { + seed: None, + }, + }) + .unwrap(); + + let runnable_saga = nexus.sagas.saga_prepare(dag).await.unwrap(); + + // Actually run the saga + runnable_saga.run_to_completion().await.unwrap(); + + // Validate the state transition + let result = datastore + .get_region_snapshot_replacement_request_by_id(&opctx, request.id) + .await + .unwrap(); + + assert_eq!( + result.replacement_state, + RegionSnapshotReplacementState::ReplacementDone + ); + assert!(result.new_region_id.is_some()); + assert!(result.operating_saga_id.is_none()); + + // Validate number of regions for disk didn't change + let disk_allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + assert_eq!(disk_allocated_regions.len(), 3); + + // Validate that the snapshot now has one allocated region + let snapshot_allocated_datasets_and_regions = datastore + .get_allocated_regions(db_snapshot.volume_id) + .await + .unwrap(); + + assert_eq!(snapshot_allocated_datasets_and_regions.len(), 1); + + let (_, snapshot_allocated_region) = + &snapshot_allocated_datasets_and_regions[0]; + + // Validate that the snapshot's volume contains this newly allocated + // region + + let new_region_addr = datastore + .region_addr(snapshot_allocated_region.id()) + .await + .unwrap() + .unwrap(); + + let volumes = datastore + .find_volumes_referencing_socket_addr( + &opctx, + new_region_addr.into(), + ) + .await + .unwrap(); + + assert_eq!(volumes.len(), 1); + assert_eq!(volumes[0].id(), db_snapshot.volume_id); + } + + fn new_test_params( + opctx: &OpContext, + request: &RegionSnapshotReplacement, + ) -> Params { + Params { + serialized_authn: Serialized::for_opctx(opctx), + request: request.clone(), + allocation_strategy: RegionAllocationStrategy::Random { + seed: None, + }, + } + } + + pub(crate) async fn verify_clean_slate( + cptestctx: &ControlPlaneTestContext, + request: &RegionSnapshotReplacement, + affected_volume_original: &Volume, + ) { + let datastore = cptestctx.server.server_context().nexus.datastore(); + + crate::app::sagas::test_helpers::assert_no_failed_undo_steps( + &cptestctx.logctx.log, + datastore, + ) + .await; + + // For these tests, six provisioned regions exist: three for the + // original disk, and three for the (currently unused) snapshot + // destination volume + assert_eq!(region_allocations(&datastore).await, 6); + assert_region_snapshot_replacement_request_untouched( + cptestctx, &datastore, &request, + ) + .await; + assert_volume_untouched(&datastore, &affected_volume_original).await; + } + + async fn region_allocations(datastore: &DataStore) -> usize { + use async_bb8_diesel::AsyncConnection; + use async_bb8_diesel::AsyncRunQueryDsl; + use async_bb8_diesel::AsyncSimpleConnection; + use diesel::QueryDsl; + use nexus_db_queries::db::queries::ALLOW_FULL_TABLE_SCAN_SQL; + use nexus_db_queries::db::schema::region::dsl; + + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + conn.transaction_async(|conn| async move { + // Selecting all regions requires a full table scan + conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL).await.unwrap(); + + dsl::region + .count() + .get_result_async(&conn) + .await + .map(|x: i64| x as usize) + }) + .await + .unwrap() + } + + async fn assert_region_snapshot_replacement_request_untouched( + cptestctx: &ControlPlaneTestContext, + datastore: &DataStore, + request: &RegionSnapshotReplacement, + ) { + let opctx = test_opctx(cptestctx); + let db_request = datastore + .get_region_snapshot_replacement_request_by_id(&opctx, request.id) + .await + .unwrap(); + + assert_eq!(db_request.new_region_id, None); + assert_eq!( + db_request.replacement_state, + RegionSnapshotReplacementState::Requested + ); + assert_eq!(db_request.operating_saga_id, None); + } + + async fn assert_volume_untouched( + datastore: &DataStore, + affected_volume_original: &Volume, + ) { + let affected_volume = datastore + .volume_get(affected_volume_original.id()) + .await + .unwrap() + .unwrap(); + + let actual: VolumeConstructionRequest = + serde_json::from_str(&affected_volume.data()).unwrap(); + + let expected: VolumeConstructionRequest = + serde_json::from_str(&affected_volume_original.data()).unwrap(); + + assert_eq!(actual, expected); + } + + #[nexus_test(server = crate::Server)] + async fn test_action_failure_can_unwind_idempotently( + cptestctx: &ControlPlaneTestContext, + ) { + let PrepareResult { db_disk, snapshot, db_snapshot } = + prepare_for_test(cptestctx).await; + + let log = &cptestctx.logctx.log; + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = test_opctx(cptestctx); + + let disk_allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + assert_eq!(disk_allocated_regions.len(), 3); + + let region: &nexus_db_model::Region = &disk_allocated_regions[0].1; + let snapshot_id = snapshot.identity.id; + + let region_snapshot = datastore + .region_snapshot_get(region.dataset_id(), region.id(), snapshot_id) + .await + .unwrap() + .unwrap(); + + let request = + RegionSnapshotReplacement::for_region_snapshot(®ion_snapshot); + + datastore + .insert_region_snapshot_replacement_request(&opctx, request.clone()) + .await + .unwrap(); + + let affected_volume_original = + datastore.volume_get(db_snapshot.volume_id).await.unwrap().unwrap(); + + verify_clean_slate(&cptestctx, &request, &affected_volume_original) + .await; + + crate::app::sagas::test_helpers::action_failure_can_unwind_idempotently::< + SagaRegionSnapshotReplacementStart, + _, + _ + >( + nexus, + || Box::pin(async { new_test_params(&opctx, &request) }), + || Box::pin(async { + verify_clean_slate( + &cptestctx, + &request, + &affected_volume_original, + ).await; + }), + log + ).await; + } + + #[nexus_test(server = crate::Server)] + async fn test_actions_succeed_idempotently( + cptestctx: &ControlPlaneTestContext, + ) { + let PrepareResult { db_disk, snapshot, db_snapshot: _ } = + prepare_for_test(cptestctx).await; + + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = test_opctx(cptestctx); + + let disk_allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + assert_eq!(disk_allocated_regions.len(), 3); + + let region: &nexus_db_model::Region = &disk_allocated_regions[0].1; + let snapshot_id = snapshot.identity.id; + + let region_snapshot = datastore + .region_snapshot_get(region.dataset_id(), region.id(), snapshot_id) + .await + .unwrap() + .unwrap(); + + let request = + RegionSnapshotReplacement::for_region_snapshot(®ion_snapshot); + + datastore + .insert_region_snapshot_replacement_request(&opctx, request.clone()) + .await + .unwrap(); + + // Build the saga DAG with the provided test parameters + let params = new_test_params(&opctx, &request); + let dag = create_saga_dag::(params) + .unwrap(); + crate::app::sagas::test_helpers::actions_succeed_idempotently( + nexus, dag, + ) + .await; + } +} diff --git a/nexus/src/app/vpc.rs b/nexus/src/app/vpc.rs index b3605945d3..56a7777f0e 100644 --- a/nexus/src/app/vpc.rs +++ b/nexus/src/app/vpc.rs @@ -260,7 +260,8 @@ impl super::Nexus { opctx: &OpContext, vpc: &db::model::Vpc, rules: &[db::model::VpcFirewallRule], - ) -> Result, Error> { + ) -> Result, Error> + { nexus_networking::resolve_firewall_rules_for_sled_agent( &self.db_datastore, opctx, diff --git a/nexus/src/external_api/console_api.rs b/nexus/src/external_api/console_api.rs index fb0a47bbea..2169b631a7 100644 --- a/nexus/src/external_api/console_api.rs +++ b/nexus/src/external_api/console_api.rs @@ -35,15 +35,13 @@ use nexus_db_model::AuthenticationMode; use nexus_db_queries::authn::silos::IdentityProviderType; use nexus_db_queries::context::OpContext; use nexus_db_queries::{ - authn::external::{ - cookies::Cookies, - session_cookie::{ - clear_session_cookie_header_value, session_cookie_header_value, - SessionStore, SESSION_COOKIE_COOKIE_NAME, - }, + authn::external::session_cookie::{ + clear_session_cookie_header_value, session_cookie_header_value, + SessionStore, SESSION_COOKIE_COOKIE_NAME, }, db::identity::Asset, }; +use nexus_types::authn::cookies::Cookies; use nexus_types::external_api::params; use nexus_types::identity::Resource; use omicron_common::api::external::http_pagination::PaginatedBy; diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index a87bdd834d..e11256f06e 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -41,6 +41,7 @@ use nexus_db_queries::db::lookup::ImageLookup; use nexus_db_queries::db::lookup::ImageParentLookup; use nexus_db_queries::db::model::Name; use nexus_types::external_api::shared::{BfdStatus, ProbeInfo}; +use omicron_common::api::external::http_pagination::data_page_params_for; use omicron_common::api::external::http_pagination::marker_for_name; use omicron_common::api::external::http_pagination::marker_for_name_or_id; use omicron_common::api::external::http_pagination::name_or_id_pagination; @@ -55,9 +56,11 @@ use omicron_common::api::external::http_pagination::ScanParams; use omicron_common::api::external::AddressLot; use omicron_common::api::external::AddressLotBlock; use omicron_common::api::external::AddressLotCreateResponse; +use omicron_common::api::external::AggregateBgpMessageHistory; use omicron_common::api::external::BgpAnnounceSet; use omicron_common::api::external::BgpAnnouncement; use omicron_common::api::external::BgpConfig; +use omicron_common::api::external::BgpExported; use omicron_common::api::external::BgpImportedRouteIpv4; use omicron_common::api::external::BgpPeerStatus; use omicron_common::api::external::DataPageParams; @@ -78,9 +81,6 @@ use omicron_common::api::external::TufRepoGetResponse; use omicron_common::api::external::TufRepoInsertResponse; use omicron_common::api::external::VpcFirewallRuleUpdateParams; use omicron_common::api::external::VpcFirewallRules; -use omicron_common::api::external::{ - http_pagination::data_page_params_for, AggregateBgpMessageHistory, -}; use omicron_common::bail_unless; use omicron_uuid_kinds::GenericUuid; use parse_display::Display; @@ -167,7 +167,6 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(instance_view)?; api.register(instance_create)?; api.register(instance_delete)?; - api.register(instance_migrate)?; api.register(instance_reboot)?; api.register(instance_start)?; api.register(instance_stop)?; @@ -278,6 +277,7 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(networking_bgp_config_create)?; api.register(networking_bgp_config_list)?; api.register(networking_bgp_status)?; + api.register(networking_bgp_exported)?; api.register(networking_bgp_imported_routes_ipv4)?; api.register(networking_bgp_config_delete)?; api.register(networking_bgp_announce_set_update)?; @@ -285,6 +285,8 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(networking_bgp_announce_set_delete)?; api.register(networking_bgp_message_history)?; + api.register(networking_bgp_announcement_list)?; + api.register(networking_bfd_enable)?; api.register(networking_bfd_disable)?; api.register(networking_bfd_status)?; @@ -2866,48 +2868,6 @@ async fn instance_delete( .await } -// TODO should this be in the public API? -/// Migrate an instance -#[endpoint { - method = POST, - path = "/v1/instances/{instance}/migrate", - tags = ["instances"], -}] -async fn instance_migrate( - rqctx: RequestContext, - query_params: Query, - path_params: Path, - migrate_params: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let migrate_instance_params = migrate_params.into_inner(); - let instance_selector = params::InstanceSelector { - project: query.project, - instance: path.instance, - }; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let instance_lookup = - nexus.instance_lookup(&opctx, instance_selector)?; - let instance = nexus - .project_instance_migrate( - &opctx, - &instance_lookup, - migrate_instance_params, - ) - .await?; - Ok(HttpResponseOk(instance.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} - /// Reboot an instance #[endpoint { method = POST, @@ -3908,7 +3868,7 @@ async fn networking_bgp_config_create( let nexus = &apictx.context.nexus; let config = config.into_inner(); let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let result = nexus.bgp_config_set(&opctx, &config).await?; + let result = nexus.bgp_config_create(&opctx, &config).await?; Ok(HttpResponseCreated::(result.into())) }; apictx @@ -3980,6 +3940,30 @@ async fn networking_bgp_status( .await } +//TODO pagination? the normal by-name/by-id stuff does not work here +/// Get BGP exported routes +#[endpoint { + method = GET, + path = "/v1/system/networking/bgp-exported", + tags = ["system/networking"], +}] +async fn networking_bgp_exported( + rqctx: RequestContext, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let handler = async { + let nexus = &apictx.context.nexus; + let result = nexus.bgp_exported(&opctx).await?; + Ok(HttpResponseOk(result)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await +} + /// Get BGP router message history #[endpoint { method = GET, @@ -4062,7 +4046,7 @@ async fn networking_bgp_config_delete( /// set with the one specified. #[endpoint { method = PUT, - path = "/v1/system/networking/bgp-announce", + path = "/v1/system/networking/bgp-announce-set", tags = ["system/networking"], }] async fn networking_bgp_announce_set_update( @@ -4084,24 +4068,28 @@ async fn networking_bgp_announce_set_update( .await } -//TODO pagination? the normal by-name/by-id stuff does not work here -/// Get originated routes for a BGP configuration +/// List BGP announce sets #[endpoint { method = GET, - path = "/v1/system/networking/bgp-announce", + path = "/v1/system/networking/bgp-announce-set", tags = ["system/networking"], }] async fn networking_bgp_announce_set_list( rqctx: RequestContext, - query_params: Query, -) -> Result>, HttpError> { + query_params: Query< + PaginatedByNameOrId, + >, +) -> Result>, HttpError> { let apictx = rqctx.context(); let handler = async { let nexus = &apictx.context.nexus; - let sel = query_params.into_inner(); + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; let opctx = crate::context::op_context_for_external_api(&rqctx).await?; let result = nexus - .bgp_announce_list(&opctx, &sel) + .bgp_announce_set_list(&opctx, &paginated_by) .await? .into_iter() .map(|p| p.into()) @@ -4118,17 +4106,17 @@ async fn networking_bgp_announce_set_list( /// Delete BGP announce set #[endpoint { method = DELETE, - path = "/v1/system/networking/bgp-announce", + path = "/v1/system/networking/bgp-announce-set/{name_or_id}", tags = ["system/networking"], }] async fn networking_bgp_announce_set_delete( rqctx: RequestContext, - selector: Query, + path_params: Path, ) -> Result { let apictx = rqctx.context(); let handler = async { let nexus = &apictx.context.nexus; - let sel = selector.into_inner(); + let sel = path_params.into_inner(); let opctx = crate::context::op_context_for_external_api(&rqctx).await?; nexus.bgp_delete_announce_set(&opctx, &sel).await?; Ok(HttpResponseUpdatedNoContent {}) @@ -4140,6 +4128,40 @@ async fn networking_bgp_announce_set_delete( .await } +// TODO: is pagination necessary here? How large do we expect the list of +// announcements to become in real usage? +/// Get originated routes for a specified BGP announce set +#[endpoint { + method = GET, + path = "/v1/system/networking/bgp-announce-set/{name_or_id}/announcement", + tags = ["system/networking"], +}] +async fn networking_bgp_announcement_list( + rqctx: RequestContext, + path_params: Path, +) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let sel = path_params.into_inner(); + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + + let result = nexus + .bgp_announcement_list(&opctx, &sel) + .await? + .into_iter() + .map(|p| p.into()) + .collect(); + + Ok(HttpResponseOk(result)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await +} + /// Enable a BFD session #[endpoint { method = POST, @@ -6429,7 +6451,7 @@ async fn timeseries_schema_list( async fn timeseries_query( rqctx: RequestContext, body: TypedBody, -) -> Result>, HttpError> { +) -> Result, HttpError> { let apictx = rqctx.context(); let handler = async { let nexus = &apictx.context.nexus; @@ -6438,7 +6460,7 @@ async fn timeseries_query( nexus .timeseries_query(&opctx, &query) .await - .map(HttpResponseOk) + .map(|tables| HttpResponseOk(views::OxqlQueryResult { tables })) .map_err(HttpError::from) }; apictx diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index 33b626a7fc..9965b6e21e 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -30,17 +30,20 @@ use nexus_types::external_api::params::UninitializedSledId; use nexus_types::external_api::shared::ProbeInfo; use nexus_types::external_api::shared::UninitializedSled; use nexus_types::external_api::views::SledPolicy; +use nexus_types::internal_api::params::InstanceMigrateRequest; use nexus_types::internal_api::params::SledAgentInfo; use nexus_types::internal_api::params::SwitchPutRequest; use nexus_types::internal_api::params::SwitchPutResponse; use nexus_types::internal_api::views::to_list; use nexus_types::internal_api::views::BackgroundTask; +use nexus_types::internal_api::views::DemoSaga; use nexus_types::internal_api::views::Ipv4NatEntryView; use nexus_types::internal_api::views::Saga; use omicron_common::api::external::http_pagination::data_page_params_for; use omicron_common::api::external::http_pagination::PaginatedById; use omicron_common::api::external::http_pagination::ScanById; use omicron_common::api::external::http_pagination::ScanParams; +use omicron_common::api::external::Instance; use omicron_common::api::internal::nexus::DiskRuntimeState; use omicron_common::api::internal::nexus::DownstairsClientStopRequest; use omicron_common::api::internal::nexus::DownstairsClientStopped; @@ -189,6 +192,33 @@ impl NexusInternalApi for NexusInternalApiImpl { .await } + async fn instance_migrate( + rqctx: RequestContext, + path_params: Path, + migrate_params: TypedBody, + ) -> Result, HttpError> { + let apictx = &rqctx.context().context; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let migrate = migrate_params.into_inner(); + let handler = async { + let opctx = + crate::context::op_context_for_internal_api(&rqctx).await; + let instance = nexus + .instance_migrate( + &opctx, + InstanceUuid::from_untyped_uuid(path.instance_id), + migrate, + ) + .await?; + Ok(HttpResponseOk(instance.into())) + }; + apictx + .internal_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + async fn cpapi_disks_put( rqctx: RequestContext, path_params: Path, @@ -530,6 +560,40 @@ impl NexusInternalApi for NexusInternalApiImpl { .await } + async fn saga_demo_create( + rqctx: RequestContext, + ) -> Result, HttpError> { + let apictx = &rqctx.context().context; + let handler = async { + let nexus = &apictx.nexus; + let demo_saga = nexus.saga_demo_create().await?; + Ok(HttpResponseOk(demo_saga)) + }; + + apictx + .internal_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn saga_demo_complete( + rqctx: RequestContext, + path_params: Path, + ) -> Result { + let apictx = &rqctx.context().context; + let handler = async { + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + nexus.saga_demo_complete(path.demo_saga_id)?; + Ok(HttpResponseUpdatedNoContent()) + }; + + apictx + .internal_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + // Background Tasks async fn bgtask_list( diff --git a/nexus/test-utils/Cargo.toml b/nexus/test-utils/Cargo.toml index a883bc83c5..50110ecaca 100644 --- a/nexus/test-utils/Cargo.toml +++ b/nexus/test-utils/Cargo.toml @@ -46,7 +46,7 @@ sled-agent-client.workspace = true slog.workspace = true tokio.workspace = true tokio-util.workspace = true -trust-dns-resolver.workspace = true +hickory-resolver.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index edbaa6a786..b714e3fb9d 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -4,6 +4,7 @@ //! Integration testing facilities for Nexus +#[cfg(feature = "omicron-dev")] use anyhow::Context; use anyhow::Result; use camino::Utf8Path; @@ -17,6 +18,11 @@ use dropshot::HandlerTaskMode; use futures::future::BoxFuture; use futures::FutureExt; use gateway_test_utils::setup::GatewayTestContext; +use hickory_resolver::config::NameServerConfig; +use hickory_resolver::config::Protocol; +use hickory_resolver::config::ResolverConfig; +use hickory_resolver::config::ResolverOpts; +use hickory_resolver::TokioAsyncResolver; use nexus_config::Database; use nexus_config::DpdConfig; use nexus_config::InternalDns; @@ -73,11 +79,6 @@ use std::collections::HashMap; use std::fmt::Debug; use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; use std::time::Duration; -use trust_dns_resolver::config::NameServerConfig; -use trust_dns_resolver::config::Protocol; -use trust_dns_resolver::config::ResolverConfig; -use trust_dns_resolver::config::ResolverOpts; -use trust_dns_resolver::TokioAsyncResolver; use uuid::Uuid; pub use sim::TEST_HARDWARE_THREADS; @@ -1428,6 +1429,7 @@ pub async fn start_oximeter( address: Some(SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db_port)), batch_size: 10, batch_interval: 1, + replicated: false, }; let config = oximeter_collector::Config { nexus_address: Some(nexus_address), @@ -1587,12 +1589,12 @@ pub async fn start_dns_server( socket_addr: dns_server.local_address(), protocol: Protocol::Udp, tls_dns_name: None, - trust_nx_responses: false, + trust_negative_responses: false, bind_addr: None, }); - let resolver = - TokioAsyncResolver::tokio(resolver_config, ResolverOpts::default()) - .context("creating DNS resolver")?; + let mut resolver_opts = ResolverOpts::default(); + resolver_opts.edns0 = true; + let resolver = TokioAsyncResolver::tokio(resolver_config, resolver_opts); Ok((dns_server, http_server, resolver)) } diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index ac7188f232..14180459ab 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -432,6 +432,28 @@ pub async fn create_disk( .await } +pub async fn create_snapshot( + client: &ClientTestContext, + project_name: &str, + disk_name: &str, + snapshot_name: &str, +) -> views::Snapshot { + let snapshots_url = format!("/v1/snapshots?project={}", project_name); + + object_create( + client, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: snapshot_name.parse().unwrap(), + description: format!("snapshot {:?}", snapshot_name), + }, + disk: disk_name.to_string().try_into().unwrap(), + }, + ) + .await +} + pub async fn delete_disk( client: &ClientTestContext, project_name: &str, diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 8f65a73204..d9cbb5eb34 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -137,6 +137,8 @@ lookup_region_port.period_secs = 60 # Therefore, disable the background task during tests. instance_updater.disable = true instance_updater.period_secs = 60 +region_snapshot_replacement_start.period_secs = 30 +region_snapshot_replacement_garbage_collection.period_secs = 30 [default_region_allocation_strategy] # we only have one sled in the test environment, so we need to use the diff --git a/nexus/tests/integration_tests/demo_saga.rs b/nexus/tests/integration_tests/demo_saga.rs new file mode 100644 index 0000000000..888fa35965 --- /dev/null +++ b/nexus/tests/integration_tests/demo_saga.rs @@ -0,0 +1,74 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Smoke test for the demo saga + +use futures::TryStreamExt; +use nexus_client::types::Saga; +use nexus_client::types::SagaState; +use nexus_test_interface::NexusServer; +use nexus_test_utils_macros::nexus_test; +use omicron_test_utils::dev::poll::wait_for_condition; +use omicron_test_utils::dev::poll::CondCheckError; +use std::time::Duration; + +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + +// Tests that we can create a demo saga, then mark it completed, and the actual +// saga's state matches what we expect along the way. +#[nexus_test] +async fn test_demo_saga(cptestctx: &ControlPlaneTestContext) { + let log = &cptestctx.logctx.log; + let nexus_internal_url = format!( + "http://{}", + cptestctx.server.get_http_server_internal_address().await + ); + let nexus_client = + nexus_client::Client::new(&nexus_internal_url, log.clone()); + + let sagas_before = list_sagas(&nexus_client).await; + eprintln!("found sagas (before): {:?}", sagas_before); + let demo_saga = nexus_client.saga_demo_create().await.unwrap(); + let saga_id = demo_saga.saga_id; + assert!(!sagas_before.into_iter().any(|s| s.id == saga_id)); + + let sagas_after = list_sagas(&nexus_client).await; + eprintln!("found sagas (after): {:?}", sagas_after); + let found = sagas_after.into_iter().find(|s| s.id == saga_id).unwrap(); + assert!(matches!(found.state, SagaState::Running)); + + // It is hard to verify that the saga is not going to complete by itself. + // No matter how long we wait and make sure it didn't complete, it might + // have completed after that. And then we've made the test suite take that + // much longer. But we can at least make sure that completing the saga + // does cause it to finish. + nexus_client.saga_demo_complete(&demo_saga.demo_saga_id).await.unwrap(); + + // Completion is not synchronous -- that just unblocked the saga. So we + // need to poll a bit to wait for it to actually finish. + let found = wait_for_condition( + || async { + let sagas = list_sagas(&nexus_client).await; + eprintln!("found sagas (last): {:?}", sagas); + let found = sagas.into_iter().find(|s| s.id == saga_id).unwrap(); + if matches!(found.state, SagaState::Succeeded) { + Ok(found) + } else { + Err(CondCheckError::<()>::NotYet) + } + }, + &Duration::from_millis(50), + &Duration::from_secs(30), + ) + .await + .unwrap(); + + assert_eq!(found.id, saga_id); + assert!(matches!(found.state, SagaState::Succeeded)); +} + +async fn list_sagas(client: &nexus_client::Client) -> Vec { + client.saga_list_stream(None, None).try_collect::>().await.unwrap() +} diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 6e4e59688a..9703004c73 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -359,12 +359,6 @@ pub static DEMO_INSTANCE_REBOOT_URL: Lazy = Lazy::new(|| { *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR ) }); -pub static DEMO_INSTANCE_MIGRATE_URL: Lazy = Lazy::new(|| { - format!( - "/v1/instances/{}/migrate?{}", - *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR - ) -}); pub static DEMO_INSTANCE_SERIAL_URL: Lazy = Lazy::new(|| { format!( "/v1/instances/{}/serial-console?{}", @@ -579,7 +573,7 @@ pub static DEMO_BGP_CONFIG: Lazy = shaper: None, }); pub const DEMO_BGP_ANNOUNCE_SET_URL: &'static str = - "/v1/system/networking/bgp-announce?name_or_id=a-bag-of-addrs"; + "/v1/system/networking/bgp-announce-set"; pub static DEMO_BGP_ANNOUNCE: Lazy = Lazy::new(|| params::BgpAnnounceSetCreate { identity: IdentityMetadataCreateParams { @@ -591,8 +585,14 @@ pub static DEMO_BGP_ANNOUNCE: Lazy = network: "10.0.0.0/16".parse().unwrap(), }], }); +pub const DEMO_BGP_ANNOUNCE_SET_DELETE_URL: &'static str = + "/v1/system/networking/bgp-announce-set/a-bag-of-addrs"; +pub const DEMO_BGP_ANNOUNCEMENT_URL: &'static str = + "/v1/system/networking/bgp-announce-set/a-bag-of-addrs/announcement"; pub const DEMO_BGP_STATUS_URL: &'static str = "/v1/system/networking/bgp-status"; +pub const DEMO_BGP_EXPORTED_URL: &'static str = + "/v1/system/networking/bgp-exported"; pub const DEMO_BGP_ROUTES_IPV4_URL: &'static str = "/v1/system/networking/bgp-routes-ipv4?asn=47"; pub const DEMO_BGP_MESSAGE_HISTORY_URL: &'static str = @@ -1823,18 +1823,6 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { AllowedMethod::Post(serde_json::Value::Null) ], }, - VerifyEndpoint { - url: &DEMO_INSTANCE_MIGRATE_URL, - visibility: Visibility::Protected, - unprivileged_access: UnprivilegedAccess::None, - allowed_methods: vec![ - AllowedMethod::Post(serde_json::to_value( - params::InstanceMigrate { - dst_sled_id: uuid::Uuid::new_v4(), - } - ).unwrap()), - ], - }, VerifyEndpoint { url: &DEMO_INSTANCE_SERIAL_URL, visibility: Visibility::Protected, @@ -2290,6 +2278,7 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { AllowedMethod::GetNonexistent ], }, + VerifyEndpoint { url: &DEMO_BGP_CONFIG_CREATE_URL, visibility: Visibility::Public, @@ -2311,11 +2300,28 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { AllowedMethod::Put( serde_json::to_value(&*DEMO_BGP_ANNOUNCE).unwrap(), ), - AllowedMethod::GetNonexistent, + AllowedMethod::Get, + ], + }, + + VerifyEndpoint { + url: &DEMO_BGP_ANNOUNCE_SET_DELETE_URL, + visibility: Visibility::Public, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ AllowedMethod::Delete ], }, + VerifyEndpoint { + url: &DEMO_BGP_ANNOUNCEMENT_URL, + visibility: Visibility::Public, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::GetNonexistent, + ], + }, + VerifyEndpoint { url: &DEMO_BGP_STATUS_URL, visibility: Visibility::Public, @@ -2325,6 +2331,15 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { ], }, + VerifyEndpoint { + url: &DEMO_BGP_EXPORTED_URL, + visibility: Visibility::Public, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::GetNonexistent, + ], + }, + VerifyEndpoint { url: &DEMO_BGP_ROUTES_IPV4_URL, visibility: Visibility::Public, diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 2e41fac3a4..eb3c88eb38 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -48,6 +48,7 @@ use nexus_types::external_api::shared::SiloIdentityMode; use nexus_types::external_api::views::SshKey; use nexus_types::external_api::{params, views}; use nexus_types::identity::Resource; +use nexus_types::internal_api::params::InstanceMigrateRequest; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Disk; use omicron_common::api::external::DiskState; @@ -737,6 +738,7 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { } let client = &cptestctx.external_client; + let internal_client = &cptestctx.internal_client; let apictx = &cptestctx.server.server_context(); let nexus = &apictx.nexus; let instance_name = "bird-ecology"; @@ -791,10 +793,10 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { }; let migrate_url = - format!("/v1/instances/{}/migrate", &instance_id.to_string()); + format!("/instances/{}/migrate", &instance_id.to_string()); let instance = NexusRequest::new( - RequestBuilder::new(client, Method::POST, &migrate_url) - .body(Some(¶ms::InstanceMigrate { + RequestBuilder::new(internal_client, Method::POST, &migrate_url) + .body(Some(&InstanceMigrateRequest { dst_sled_id: dst_sled_id.into_untyped_uuid(), })) .expect_status(Some(StatusCode::OK)), @@ -907,6 +909,7 @@ async fn test_instance_migrate_v2p_and_routes( cptestctx: &ControlPlaneTestContext, ) { let client = &cptestctx.external_client; + let internal_client = &cptestctx.internal_client; let apictx = &cptestctx.server.server_context(); let nexus = &apictx.nexus; let datastore = nexus.datastore(); @@ -997,10 +1000,10 @@ async fn test_instance_migrate_v2p_and_routes( // Kick off migration and simulate its completion on the target. let migrate_url = - format!("/v1/instances/{}/migrate", &instance_id.to_string()); + format!("/instances/{}/migrate", &instance_id.to_string()); let _ = NexusRequest::new( - RequestBuilder::new(client, Method::POST, &migrate_url) - .body(Some(¶ms::InstanceMigrate { + RequestBuilder::new(internal_client, Method::POST, &migrate_url) + .body(Some(&InstanceMigrateRequest { dst_sled_id: dst_sled_id.into_untyped_uuid(), })) .expect_status(Some(StatusCode::OK)), @@ -1293,6 +1296,7 @@ async fn test_instance_metrics_with_migration( cptestctx: &ControlPlaneTestContext, ) { let client = &cptestctx.external_client; + let internal_client = &cptestctx.internal_client; let apictx = &cptestctx.server.server_context(); let nexus = &apictx.nexus; let instance_name = "bird-ecology"; @@ -1381,10 +1385,10 @@ async fn test_instance_metrics_with_migration( }; let migrate_url = - format!("/v1/instances/{}/migrate", &instance_id.to_string()); + format!("/instances/{}/migrate", &instance_id.to_string()); let _ = NexusRequest::new( - RequestBuilder::new(client, Method::POST, &migrate_url) - .body(Some(¶ms::InstanceMigrate { + RequestBuilder::new(internal_client, Method::POST, &migrate_url) + .body(Some(&InstanceMigrateRequest { dst_sled_id: dst_sled_id.into_untyped_uuid(), })) .expect_status(Some(StatusCode::OK)), diff --git a/nexus/tests/integration_tests/metrics.rs b/nexus/tests/integration_tests/metrics.rs index 9cfa0350e8..3b808984ae 100644 --- a/nexus/tests/integration_tests/metrics.rs +++ b/nexus/tests/integration_tests/metrics.rs @@ -19,6 +19,7 @@ use nexus_test_utils::resource_helpers::{ }; use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; +use nexus_types::external_api::views::OxqlQueryResult; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; use oximeter::types::Datum; @@ -284,7 +285,7 @@ async fn test_timeseries_schema_list( pub async fn timeseries_query( cptestctx: &ControlPlaneTestContext, query: impl ToString, -) -> Vec { +) -> Vec { // first, make sure the latest timeseries have been collected. cptestctx.oximeter.force_collect().await; @@ -307,12 +308,14 @@ pub async fn timeseries_query( .unwrap_or_else(|e| { panic!("timeseries query failed: {e:?}\nquery: {query}") }); - rsp.parsed_body().unwrap_or_else(|e| { - panic!( - "could not parse timeseries query response: {e:?}\n\ + rsp.parsed_body::() + .unwrap_or_else(|e| { + panic!( + "could not parse timeseries query response: {e:?}\n\ query: {query}\nresponse: {rsp:#?}" - ); - }) + ); + }) + .tables } #[nexus_test] @@ -429,11 +432,11 @@ async fn test_instance_watcher_metrics( #[track_caller] fn count_state( - table: &oximeter_db::oxql::Table, + table: &oxql_types::Table, instance_id: InstanceUuid, state: &'static str, ) -> i64 { - use oximeter_db::oxql::point::ValueArray; + use oxql_types::point::ValueArray; let uuid = FieldValue::Uuid(instance_id.into_untyped_uuid()); let state = FieldValue::String(state.into()); let mut timeserieses = table.timeseries().filter(|ts| { diff --git a/nexus/tests/integration_tests/mod.rs b/nexus/tests/integration_tests/mod.rs index 5054527c63..fdf14dbd07 100644 --- a/nexus/tests/integration_tests/mod.rs +++ b/nexus/tests/integration_tests/mod.rs @@ -11,6 +11,7 @@ mod basic; mod certificates; mod commands; mod console_api; +mod demo_saga; mod device_auth; mod disks; mod external_ips; diff --git a/nexus/tests/integration_tests/silos.rs b/nexus/tests/integration_tests/silos.rs index 2c861ff159..0de4d31395 100644 --- a/nexus/tests/integration_tests/silos.rs +++ b/nexus/tests/integration_tests/silos.rs @@ -37,6 +37,7 @@ use std::fmt::Write; use std::str::FromStr; use base64::Engine; +use hickory_resolver::error::ResolveErrorKind; use http::method::Method; use http::StatusCode; use httptest::{matchers::*, responders::*, Expectation, Server}; @@ -44,7 +45,6 @@ use nexus_types::external_api::shared::{FleetRole, SiloRole}; use std::convert::Infallible; use std::net::Ipv4Addr; use std::time::Duration; -use trust_dns_resolver::error::ResolveErrorKind; use uuid::Uuid; type ControlPlaneTestContext = @@ -2164,7 +2164,7 @@ pub async fn verify_silo_dns_name( .await { Ok(result) => { - let addrs: Vec<_> = result.iter().collect(); + let addrs: Vec<_> = result.iter().map(|a| &a.0).collect(); if addrs.is_empty() { false } else { diff --git a/nexus/tests/integration_tests/switch_port.rs b/nexus/tests/integration_tests/switch_port.rs index 0b71ddb2cf..92c44eddad 100644 --- a/nexus/tests/integration_tests/switch_port.rs +++ b/nexus/tests/integration_tests/switch_port.rs @@ -11,9 +11,9 @@ use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params::{ Address, AddressConfig, AddressLotBlockCreate, AddressLotCreate, BgpAnnounceSetCreate, BgpAnnouncementCreate, BgpConfigCreate, - BgpPeerConfig, LinkConfigCreate, LldpServiceConfigCreate, Route, - RouteConfig, SwitchInterfaceConfigCreate, SwitchInterfaceKind, - SwitchPortApplySettings, SwitchPortSettingsCreate, + BgpPeerConfig, LinkConfigCreate, LldpLinkConfigCreate, Route, RouteConfig, + SwitchInterfaceConfigCreate, SwitchInterfaceKind, SwitchPortApplySettings, + SwitchPortSettingsCreate, }; use nexus_types::external_api::views::Rack; use omicron_common::api::external::ImportExportPolicy; @@ -76,7 +76,7 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { NexusRequest::objects_post( client, - "/v1/system/networking/bgp-announce", + "/v1/system/networking/bgp-announce-set", &announce_set, ) .authn_as(AuthnMode::PrivilegedUser) @@ -118,7 +118,15 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { "phy0".into(), LinkConfigCreate { mtu: 4700, - lldp: LldpServiceConfigCreate { enabled: false, lldp_config: None }, + lldp: LldpLinkConfigCreate { + enabled: true, + link_name: Some("Link Name".into()), + link_description: Some("link_ Dscription".into()), + chassis_id: Some("Chassis ID".into()), + system_name: Some("System Name".into()), + system_description: Some("System description".into()), + management_ip: None, + }, fec: LinkFec::None, speed: LinkSpeed::Speed100G, autoneg: false, @@ -140,6 +148,7 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { dst: "1.2.3.0/24".parse().unwrap(), gw: "1.2.3.4".parse().unwrap(), vid: None, + local_pref: None, }], }, ); @@ -176,8 +185,16 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { assert_eq!(link0.mtu, 4700); let lldp0 = &created.link_lldp[0]; - assert_eq!(lldp0.enabled, false); - assert_eq!(lldp0.lldp_config_id, None); + assert_eq!(lldp0.enabled, true); + assert_eq!(lldp0.link_name, Some("Link Name".to_string())); + assert_eq!(lldp0.link_description, Some("Link Description".to_string())); + assert_eq!(lldp0.chassis_id, Some("Chassis ID".to_string())); + assert_eq!(lldp0.system_name, Some("System Name".to_string())); + assert_eq!( + lldp0.system_description, + Some("System Description".to_string()) + ); + assert_eq!(lldp0.management_ip, None); let ifx0 = &created.interfaces[0]; assert_eq!(&ifx0.interface_name, "phy0"); @@ -212,8 +229,16 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { assert_eq!(link0.mtu, 4700); let lldp0 = &roundtrip.link_lldp[0]; - assert_eq!(lldp0.enabled, false); - assert_eq!(lldp0.lldp_config_id, None); + assert_eq!(lldp0.enabled, true); + assert_eq!(lldp0.link_name, Some("Link Name".to_string())); + assert_eq!(lldp0.link_description, Some("Link Description".to_string())); + assert_eq!(lldp0.chassis_id, Some("Chassis ID".to_string())); + assert_eq!(lldp0.system_name, Some("System Name".to_string())); + assert_eq!( + lldp0.system_description, + Some("System Description".to_string()) + ); + assert_eq!(lldp0.management_ip, None); let ifx0 = &roundtrip.interfaces[0]; assert_eq!(&ifx0.interface_name, "phy0"); diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index 4af018c5af..bde11e2de3 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -51,7 +51,6 @@ instance_ephemeral_ip_attach POST /v1/instances/{instance}/exter instance_ephemeral_ip_detach DELETE /v1/instances/{instance}/external-ips/ephemeral instance_external_ip_list GET /v1/instances/{instance}/external-ips instance_list GET /v1/instances -instance_migrate POST /v1/instances/{instance}/migrate instance_network_interface_create POST /v1/network-interfaces instance_network_interface_delete DELETE /v1/network-interfaces/{interface} instance_network_interface_list GET /v1/network-interfaces @@ -179,12 +178,14 @@ networking_allow_list_view GET /v1/system/networking/allow-li networking_bfd_disable POST /v1/system/networking/bfd-disable networking_bfd_enable POST /v1/system/networking/bfd-enable networking_bfd_status GET /v1/system/networking/bfd-status -networking_bgp_announce_set_delete DELETE /v1/system/networking/bgp-announce -networking_bgp_announce_set_list GET /v1/system/networking/bgp-announce -networking_bgp_announce_set_update PUT /v1/system/networking/bgp-announce +networking_bgp_announce_set_delete DELETE /v1/system/networking/bgp-announce-set/{name_or_id} +networking_bgp_announce_set_list GET /v1/system/networking/bgp-announce-set +networking_bgp_announce_set_update PUT /v1/system/networking/bgp-announce-set +networking_bgp_announcement_list GET /v1/system/networking/bgp-announce-set/{name_or_id}/announcement networking_bgp_config_create POST /v1/system/networking/bgp networking_bgp_config_delete DELETE /v1/system/networking/bgp networking_bgp_config_list GET /v1/system/networking/bgp +networking_bgp_exported GET /v1/system/networking/bgp-exported networking_bgp_imported_routes_ipv4 GET /v1/system/networking/bgp-routes-ipv4 networking_bgp_message_history GET /v1/system/networking/bgp-message-history networking_bgp_status GET /v1/system/networking/bgp-status diff --git a/nexus/types/Cargo.toml b/nexus/types/Cargo.toml index a4418d2a74..124f0d42c9 100644 --- a/nexus/types/Cargo.toml +++ b/nexus/types/Cargo.toml @@ -9,16 +9,22 @@ workspace = true [dependencies] anyhow.workspace = true +async-trait.workspace = true chrono.workspace = true clap.workspace = true +cookie.workspace = true base64.workspace = true derive-where.workspace = true derive_more.workspace = true +dropshot.workspace = true futures.workspace = true +http.workspace = true humantime.workspace = true ipnetwork.workspace = true +newtype_derive.workspace = true omicron-uuid-kinds.workspace = true openssl.workspace = true +oxql-types.workspace = true oxnet.workspace = true parse-display.workspace = true schemars = { workspace = true, features = ["chrono", "uuid1"] } diff --git a/nexus/auth/src/authn/external/cookies.rs b/nexus/types/src/authn/cookies.rs similarity index 100% rename from nexus/auth/src/authn/external/cookies.rs rename to nexus/types/src/authn/cookies.rs diff --git a/nexus/types/src/authn/mod.rs b/nexus/types/src/authn/mod.rs new file mode 100644 index 0000000000..f87935428e --- /dev/null +++ b/nexus/types/src/authn/mod.rs @@ -0,0 +1,7 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Authentication types for the Nexus API. + +pub mod cookies; diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 3e7e89763b..e381ebe1a7 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -690,6 +690,11 @@ impl BlueprintZoneConfig { blueprint_zone_type::ClickhouseKeeper { address, dataset }, ) } + OmicronZoneType::ClickhouseServer { address, dataset } => { + BlueprintZoneType::ClickhouseServer( + blueprint_zone_type::ClickhouseServer { address, dataset }, + ) + } OmicronZoneType::CockroachDb { address, dataset } => { BlueprintZoneType::CockroachDb( blueprint_zone_type::CockroachDb { address, dataset }, diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs index 3b45f5ac79..6ad8cd1231 100644 --- a/nexus/types/src/deployment/planning_input.rs +++ b/nexus/types/src/deployment/planning_input.rs @@ -281,6 +281,13 @@ pub enum CockroachDbPreserveDowngrade { } impl CockroachDbPreserveDowngrade { + pub fn is_set(self) -> bool { + match self { + CockroachDbPreserveDowngrade::Set(_) => true, + _ => false, + } + } + pub fn from_optional_string( value: &Option, ) -> Result { @@ -723,6 +730,23 @@ pub struct Policy { /// at present this is hardcoded based on the version of CockroachDB we /// presently ship and the tick-tock pattern described in RFD 469. pub target_cockroachdb_cluster_version: CockroachDbClusterVersion, + + /// Policy information for a replicated clickhouse setup + /// + /// If this policy is `None`, then we are using a single node clickhouse + /// setup. Eventually we will only allow multi-node setups and this will no + /// longer be an option. + pub clickhouse_policy: Option, +} + +/// Policy for replicated clickhouse setups +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ClickhousePolicy { + /// Desired number of clickhouse servers + pub target_servers: usize, + + /// Desired number of clickhouse keepers + pub target_keepers: usize, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -775,6 +799,7 @@ impl PlanningInputBuilder { target_cockroachdb_zone_count: 0, target_cockroachdb_cluster_version: CockroachDbClusterVersion::POLICY, + clickhouse_policy: None, }, internal_dns_version: Generation::new(), external_dns_version: Generation::new(), diff --git a/nexus/types/src/deployment/zone_type.rs b/nexus/types/src/deployment/zone_type.rs index 789a0215b7..e4958fc3c3 100644 --- a/nexus/types/src/deployment/zone_type.rs +++ b/nexus/types/src/deployment/zone_type.rs @@ -25,6 +25,7 @@ pub enum BlueprintZoneType { BoundaryNtp(blueprint_zone_type::BoundaryNtp), Clickhouse(blueprint_zone_type::Clickhouse), ClickhouseKeeper(blueprint_zone_type::ClickhouseKeeper), + ClickhouseServer(blueprint_zone_type::ClickhouseServer), CockroachDb(blueprint_zone_type::CockroachDb), Crucible(blueprint_zone_type::Crucible), CruciblePantry(blueprint_zone_type::CruciblePantry), @@ -60,6 +61,7 @@ impl BlueprintZoneType { } BlueprintZoneType::Clickhouse(_) | BlueprintZoneType::ClickhouseKeeper(_) + | BlueprintZoneType::ClickhouseServer(_) | BlueprintZoneType::CockroachDb(_) | BlueprintZoneType::Crucible(_) | BlueprintZoneType::CruciblePantry(_) @@ -78,6 +80,7 @@ impl BlueprintZoneType { | BlueprintZoneType::ExternalDns(_) | BlueprintZoneType::Clickhouse(_) | BlueprintZoneType::ClickhouseKeeper(_) + | BlueprintZoneType::ClickhouseServer(_) | BlueprintZoneType::CockroachDb(_) | BlueprintZoneType::Crucible(_) | BlueprintZoneType::CruciblePantry(_) @@ -94,6 +97,7 @@ impl BlueprintZoneType { | BlueprintZoneType::ExternalDns(_) | BlueprintZoneType::Clickhouse(_) | BlueprintZoneType::ClickhouseKeeper(_) + | BlueprintZoneType::ClickhouseServer(_) | BlueprintZoneType::CockroachDb(_) | BlueprintZoneType::Crucible(_) | BlueprintZoneType::CruciblePantry(_) @@ -110,6 +114,7 @@ impl BlueprintZoneType { BlueprintZoneType::BoundaryNtp(_) | BlueprintZoneType::Clickhouse(_) | BlueprintZoneType::ClickhouseKeeper(_) + | BlueprintZoneType::ClickhouseServer(_) | BlueprintZoneType::CockroachDb(_) | BlueprintZoneType::CruciblePantry(_) | BlueprintZoneType::ExternalDns(_) @@ -129,6 +134,9 @@ impl BlueprintZoneType { BlueprintZoneType::ClickhouseKeeper( blueprint_zone_type::ClickhouseKeeper { dataset, address }, ) => (dataset, DatasetKind::ClickhouseKeeper, address), + BlueprintZoneType::ClickhouseServer( + blueprint_zone_type::ClickhouseServer { dataset, address }, + ) => (dataset, DatasetKind::ClickhouseServer, address), BlueprintZoneType::CockroachDb( blueprint_zone_type::CockroachDb { dataset, address }, ) => (dataset, DatasetKind::Cockroach, address), @@ -185,6 +193,12 @@ impl From for OmicronZoneType { dataset: zone.dataset, } } + BlueprintZoneType::ClickhouseServer(zone) => { + Self::ClickhouseServer { + address: zone.address, + dataset: zone.dataset, + } + } BlueprintZoneType::CockroachDb(zone) => Self::CockroachDb { address: zone.address, dataset: zone.dataset, @@ -235,6 +249,7 @@ impl BlueprintZoneType { Self::BoundaryNtp(_) => ZoneKind::BoundaryNtp, Self::Clickhouse(_) => ZoneKind::Clickhouse, Self::ClickhouseKeeper(_) => ZoneKind::ClickhouseKeeper, + Self::ClickhouseServer(_) => ZoneKind::ClickhouseServer, Self::CockroachDb(_) => ZoneKind::CockroachDb, Self::Crucible(_) => ZoneKind::Crucible, Self::CruciblePantry(_) => ZoneKind::CruciblePantry, @@ -273,6 +288,7 @@ pub mod blueprint_zone_type { pub external_ip: OmicronZoneExternalSnatIp, } + /// Used in single-node clickhouse setups #[derive( Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, )] @@ -289,6 +305,15 @@ pub mod blueprint_zone_type { pub dataset: OmicronZoneDataset, } + /// Used in replicated clickhouse setups + #[derive( + Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, + )] + pub struct ClickhouseServer { + pub address: SocketAddrV6, + pub dataset: OmicronZoneDataset, + } + #[derive( Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, )] diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 8dcce913b3..83897cbd1d 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -1093,12 +1093,6 @@ impl JsonSchema for UserData { } } -/// Migration parameters for an `Instance` -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct InstanceMigrate { - pub dst_sled_id: Uuid, -} - /// Forwarded to a propolis server to request the contents of an Instance's serial console. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] pub struct InstanceSerialConsoleRequest { @@ -1506,7 +1500,7 @@ pub struct LinkConfigCreate { pub mtu: u16, /// The link-layer discovery protocol (LLDP) configuration for the link. - pub lldp: LldpServiceConfigCreate, + pub lldp: LldpLinkConfigCreate, /// The forward error correction mode of the link. pub fec: LinkFec, @@ -1518,16 +1512,29 @@ pub struct LinkConfigCreate { pub autoneg: bool, } -/// The LLDP configuration associated with a port. LLDP may be either enabled or -/// disabled, if enabled, an LLDP configuration must be provided by name or id. -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct LldpServiceConfigCreate { +/// The LLDP configuration associated with a port. +#[derive(Clone, Debug, Default, Deserialize, Serialize, JsonSchema)] +pub struct LldpLinkConfigCreate { /// Whether or not LLDP is enabled. pub enabled: bool, - /// A reference to the LLDP configuration used. Must not be `None` when - /// `enabled` is `true`. - pub lldp_config: Option, + /// The LLDP link name TLV. + pub link_name: Option, + + /// The LLDP link description TLV. + pub link_description: Option, + + /// The LLDP chassis identifier TLV. + pub chassis_id: Option, + + /// The LLDP system name TLV. + pub system_name: Option, + + /// The LLDP system description TLV. + pub system_description: Option, + + /// The LLDP management IP TLV. + pub management_ip: Option, } /// A layer-3 switch interface configuration. When IPv6 is enabled, a link local @@ -1587,6 +1594,10 @@ pub struct Route { /// VLAN id the gateway is reachable over. pub vid: Option, + + /// Local preference for route. Higher preference indictes precedence + /// within and across protocols. + pub local_pref: Option, } /// Select a BGP config by a name or id. @@ -1618,6 +1629,13 @@ pub struct BgpAnnounceSetCreate { pub announcement: Vec, } +/// Optionally select a BGP announce set by a name or id. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +pub struct OptionalBgpAnnounceSetSelector { + /// A name or id to use when s electing BGP port settings + pub name_or_id: Option, +} + /// Select a BGP announce set by a name or id. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] pub struct BgpAnnounceSetSelector { diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index e241f849ee..58c2e560ab 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -971,3 +971,12 @@ pub struct AllowList { /// The allowlist of IPs or subnets. pub allowed_ips: ExternalAllowedSourceIps, } + +// OxQL QUERIES + +/// The result of a successful OxQL query. +#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] +pub struct OxqlQueryResult { + /// Tables resulting from the query, each containing timeseries. + pub tables: Vec, +} diff --git a/nexus/types/src/internal_api/background.rs b/nexus/types/src/internal_api/background.rs index 6463aa8ab6..8e4b6b3013 100644 --- a/nexus/types/src/internal_api/background.rs +++ b/nexus/types/src/internal_api/background.rs @@ -19,3 +19,20 @@ pub struct LookupRegionPortStatus { pub found_port_ok: Vec, pub errors: Vec, } + +/// The status of a `region_snapshot_replacement_start` background task +/// activation +#[derive(Serialize, Deserialize, Default, Debug, PartialEq, Eq)] +pub struct RegionSnapshotReplacementStartStatus { + pub requests_created_ok: Vec, + pub start_invoked_ok: Vec, + pub errors: Vec, +} + +/// The status of a `region_snapshot_replacement_garbage_collect` background +/// task activation +#[derive(Serialize, Deserialize, Default, Debug, PartialEq, Eq)] +pub struct RegionSnapshotReplacementGarbageCollectStatus { + pub garbage_collect_requested: Vec, + pub errors: Vec, +} diff --git a/nexus/types/src/internal_api/params.rs b/nexus/types/src/internal_api/params.rs index 3a26dde4ba..c803f003f1 100644 --- a/nexus/types/src/internal_api/params.rs +++ b/nexus/types/src/internal_api/params.rs @@ -207,3 +207,10 @@ pub struct OximeterInfo { /// The address on which this oximeter instance listens for requests pub address: SocketAddr, } + +/// Parameters used when migrating an instance. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct InstanceMigrateRequest { + /// The ID of the sled to which to migrate the target instance. + pub dst_sled_id: Uuid, +} diff --git a/nexus/types/src/internal_api/views.rs b/nexus/types/src/internal_api/views.rs index b71fd04779..a4557ffd31 100644 --- a/nexus/types/src/internal_api/views.rs +++ b/nexus/types/src/internal_api/views.rs @@ -9,6 +9,7 @@ use futures::stream::StreamExt; use omicron_common::api::external::MacAddr; use omicron_common::api::external::ObjectStream; use omicron_common::api::external::Vni; +use omicron_uuid_kinds::DemoSagaUuid; use schemars::JsonSchema; use serde::Serialize; use std::net::Ipv4Addr; @@ -152,6 +153,13 @@ impl From for SagaState { } } +/// Identifies an instance of the demo saga +#[derive(Clone, Debug, Serialize, JsonSchema)] +pub struct DemoSaga { + pub saga_id: Uuid, + pub demo_saga_id: DemoSagaUuid, +} + /// Background tasks /// /// These are currently only intended for observability by developers. We will diff --git a/nexus/types/src/lib.rs b/nexus/types/src/lib.rs index 494573e834..8a0a3ec80e 100644 --- a/nexus/types/src/lib.rs +++ b/nexus/types/src/lib.rs @@ -29,6 +29,7 @@ //! rules, so our model layer knows about our views. That seems to be a //! relatively minor offense, so it's the way we leave things for now. +pub mod authn; pub mod deployment; pub mod external_api; pub mod identity; diff --git a/openapi/bootstrap-agent.json b/openapi/bootstrap-agent.json index 7b4f257670..bd928001bb 100644 --- a/openapi/bootstrap-agent.json +++ b/openapi/bootstrap-agent.json @@ -732,6 +732,67 @@ "last" ] }, + "LldpAdminStatus": { + "description": "To what extent should this port participate in LLDP", + "type": "string", + "enum": [ + "enabled", + "disabled", + "rx_only", + "tx_only" + ] + }, + "LldpPortConfig": { + "description": "Per-port LLDP configuration settings. Only the \"status\" setting is mandatory. All other fields have natural defaults or may be inherited from the switch.", + "type": "object", + "properties": { + "chassis_id": { + "nullable": true, + "description": "Chassis ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + }, + "management_addrs": { + "nullable": true, + "description": "Management IP addresses to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "port_description": { + "nullable": true, + "description": "Port description to advertise. If this is not set, no description will be advertised.", + "type": "string" + }, + "port_id": { + "nullable": true, + "description": "Port ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be set to the port name. e.g., qsfp0/0.", + "type": "string" + }, + "status": { + "description": "To what extent should this port participate in LLDP", + "allOf": [ + { + "$ref": "#/components/schemas/LldpAdminStatus" + } + ] + }, + "system_description": { + "nullable": true, + "description": "System description to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + }, + "system_name": { + "nullable": true, + "description": "System name to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + } + }, + "required": [ + "status" + ] + }, "Name": { "title": "A name unique within the parent collection", "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.", @@ -767,6 +828,15 @@ "$ref": "#/components/schemas/BgpPeerConfig" } }, + "lldp": { + "nullable": true, + "description": "LLDP configuration for this port", + "allOf": [ + { + "$ref": "#/components/schemas/LldpPortConfig" + } + ] + }, "port": { "description": "Nmae of the port this config applies to.", "type": "string" @@ -1183,6 +1253,14 @@ } ] }, + "local_pref": { + "nullable": true, + "description": "The local preference associated with this route.", + "default": null, + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "nexthop": { "description": "The nexthop/gateway address.", "type": "string", diff --git a/openapi/clickhouse-admin.json b/openapi/clickhouse-admin.json new file mode 100644 index 0000000000..6bb5367712 --- /dev/null +++ b/openapi/clickhouse-admin.json @@ -0,0 +1,84 @@ +{ + "openapi": "3.0.3", + "info": { + "title": "ClickHouse Cluster Admin API", + "description": "API for interacting with the Oxide control plane's ClickHouse cluster", + "contact": { + "url": "https://oxide.computer", + "email": "api@oxide.computer" + }, + "version": "0.0.1" + }, + "paths": { + "/node/address": { + "get": { + "summary": "Retrieve the address the ClickHouse server or keeper node is listening on", + "operationId": "clickhouse_address", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ClickhouseAddress" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + } + }, + "components": { + "schemas": { + "ClickhouseAddress": { + "type": "object", + "properties": { + "clickhouse_address": { + "type": "string" + } + }, + "required": [ + "clickhouse_address" + ] + }, + "Error": { + "description": "Error information from a response.", + "type": "object", + "properties": { + "error_code": { + "type": "string" + }, + "message": { + "type": "string" + }, + "request_id": { + "type": "string" + } + }, + "required": [ + "message", + "request_id" + ] + } + }, + "responses": { + "Error": { + "description": "Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + } + } + } + } + } + } +} diff --git a/openapi/installinator.json b/openapi/installinator.json index 0631344b25..6419760fbd 100644 --- a/openapi/installinator.json +++ b/openapi/installinator.json @@ -397,7 +397,7 @@ ] }, "M2Slot": { - "description": "An M.2 slot that was written.", + "description": "Describes an M.2 slot, often in the context of writing a system image to it.", "type": "string", "enum": [ "A", diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index ba77f07920..ac86e13cb2 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -364,6 +364,59 @@ } } }, + "/demo-saga": { + "post": { + "summary": "Kick off an instance of the \"demo\" saga", + "description": "This saga is used for demo and testing. The saga just waits until you complete using the `saga_demo_complete` API.", + "operationId": "saga_demo_create", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DemoSaga" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/demo-saga/{demo_saga_id}/complete": { + "post": { + "summary": "Complete a waiting demo saga", + "description": "Note that the id used here is not the same as the id of the saga. It's the one returned by the `saga_demo_create` API.", + "operationId": "saga_demo_complete", + "parameters": [ + { + "in": "path", + "name": "demo_saga_id", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForDemoSagaKind" + } + } + ], + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/deployment/blueprints/all": { "get": { "summary": "Lists blueprints", @@ -731,6 +784,50 @@ } } }, + "/instances/{instance_id}/migrate": { + "post": { + "operationId": "instance_migrate", + "parameters": [ + { + "in": "path", + "name": "instance_id", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InstanceMigrateRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Instance" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/metrics/collectors": { "post": { "summary": "Accept a notification of a new oximeter collection server.", @@ -2195,6 +2292,7 @@ ] }, { + "description": "Used in single-node clickhouse setups", "type": "object", "properties": { "address": { @@ -2238,6 +2336,29 @@ "type" ] }, + { + "description": "Used in replicated clickhouse setups", + "type": "object", + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/components/schemas/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "clickhouse_server" + ] + } + }, + "required": [ + "address", + "dataset", + "type" + ] + }, { "type": "object", "properties": { @@ -2710,6 +2831,23 @@ "kind" ] }, + "DemoSaga": { + "description": "Identifies an instance of the demo saga", + "type": "object", + "properties": { + "demo_saga_id": { + "$ref": "#/components/schemas/TypedUuidForDemoSagaKind" + }, + "saga_id": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "demo_saga_id", + "saga_id" + ] + }, "DiskIdentity": { "description": "Uniquely identifies a disk.", "type": "object", @@ -3269,6 +3407,179 @@ } ] }, + "Instance": { + "description": "View of an Instance", + "type": "object", + "properties": { + "description": { + "description": "human-readable free-form text about a resource", + "type": "string" + }, + "hostname": { + "description": "RFC1035-compliant hostname for the Instance.", + "type": "string" + }, + "id": { + "description": "unique, immutable, system-controlled identifier for each resource", + "type": "string", + "format": "uuid" + }, + "memory": { + "description": "memory allocated for this Instance", + "allOf": [ + { + "$ref": "#/components/schemas/ByteCount" + } + ] + }, + "name": { + "description": "unique, mutable, user-controlled identifier for each resource", + "allOf": [ + { + "$ref": "#/components/schemas/Name" + } + ] + }, + "ncpus": { + "description": "number of CPUs allocated for this Instance", + "allOf": [ + { + "$ref": "#/components/schemas/InstanceCpuCount" + } + ] + }, + "project_id": { + "description": "id for the project containing this Instance", + "type": "string", + "format": "uuid" + }, + "run_state": { + "$ref": "#/components/schemas/InstanceState" + }, + "time_created": { + "description": "timestamp when this resource was created", + "type": "string", + "format": "date-time" + }, + "time_modified": { + "description": "timestamp when this resource was last modified", + "type": "string", + "format": "date-time" + }, + "time_run_state_updated": { + "type": "string", + "format": "date-time" + } + }, + "required": [ + "description", + "hostname", + "id", + "memory", + "name", + "ncpus", + "project_id", + "run_state", + "time_created", + "time_modified", + "time_run_state_updated" + ] + }, + "InstanceCpuCount": { + "description": "The number of CPUs in an Instance", + "type": "integer", + "format": "uint16", + "minimum": 0 + }, + "InstanceMigrateRequest": { + "description": "Parameters used when migrating an instance.", + "type": "object", + "properties": { + "dst_sled_id": { + "description": "The ID of the sled to which to migrate the target instance.", + "type": "string", + "format": "uuid" + } + }, + "required": [ + "dst_sled_id" + ] + }, + "InstanceState": { + "description": "Running state of an Instance (primarily: booted or stopped)\n\nThis typically reflects whether it's starting, running, stopping, or stopped, but also includes states related to the Instance's lifecycle", + "oneOf": [ + { + "description": "The instance is being created.", + "type": "string", + "enum": [ + "creating" + ] + }, + { + "description": "The instance is currently starting up.", + "type": "string", + "enum": [ + "starting" + ] + }, + { + "description": "The instance is currently running.", + "type": "string", + "enum": [ + "running" + ] + }, + { + "description": "The instance has been requested to stop and a transition to \"Stopped\" is imminent.", + "type": "string", + "enum": [ + "stopping" + ] + }, + { + "description": "The instance is currently stopped.", + "type": "string", + "enum": [ + "stopped" + ] + }, + { + "description": "The instance is in the process of rebooting - it will remain in the \"rebooting\" state until the VM is starting once more.", + "type": "string", + "enum": [ + "rebooting" + ] + }, + { + "description": "The instance is in the process of migrating - it will remain in the \"migrating\" state until the migration process is complete and the destination propolis is ready to continue execution.", + "type": "string", + "enum": [ + "migrating" + ] + }, + { + "description": "The instance is attempting to recover from a failure.", + "type": "string", + "enum": [ + "repairing" + ] + }, + { + "description": "The instance has encountered a failure.", + "type": "string", + "enum": [ + "failed" + ] + }, + { + "description": "The instance has been deleted.", + "type": "string", + "enum": [ + "destroyed" + ] + } + ] + }, "IpNet": { "x-rust-type": { "crate": "oxnet", @@ -3501,6 +3812,67 @@ "start_time" ] }, + "LldpAdminStatus": { + "description": "To what extent should this port participate in LLDP", + "type": "string", + "enum": [ + "enabled", + "disabled", + "rx_only", + "tx_only" + ] + }, + "LldpPortConfig": { + "description": "Per-port LLDP configuration settings. Only the \"status\" setting is mandatory. All other fields have natural defaults or may be inherited from the switch.", + "type": "object", + "properties": { + "chassis_id": { + "nullable": true, + "description": "Chassis ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + }, + "management_addrs": { + "nullable": true, + "description": "Management IP addresses to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "port_description": { + "nullable": true, + "description": "Port description to advertise. If this is not set, no description will be advertised.", + "type": "string" + }, + "port_id": { + "nullable": true, + "description": "Port ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be set to the port name. e.g., qsfp0/0.", + "type": "string" + }, + "status": { + "description": "To what extent should this port participate in LLDP", + "allOf": [ + { + "$ref": "#/components/schemas/LldpAdminStatus" + } + ] + }, + "system_description": { + "nullable": true, + "description": "System description to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + }, + "system_name": { + "nullable": true, + "description": "System name to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + } + }, + "required": [ + "status" + ] + }, "MacAddr": { "example": "ff:ff:ff:ff:ff:ff", "title": "A MAC address", @@ -3906,6 +4278,15 @@ "$ref": "#/components/schemas/BgpPeerConfig" } }, + "lldp": { + "nullable": true, + "description": "LLDP configuration for this port", + "allOf": [ + { + "$ref": "#/components/schemas/LldpPortConfig" + } + ] + }, "port": { "description": "Nmae of the port this config applies to.", "type": "string" @@ -4417,6 +4798,14 @@ } ] }, + "local_pref": { + "nullable": true, + "description": "The local preference associated with this route.", + "default": null, + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "nexthop": { "description": "The nexthop/gateway address.", "type": "string", @@ -4987,6 +5376,10 @@ "type": "string", "format": "uuid" }, + "TypedUuidForDemoSagaKind": { + "type": "string", + "format": "uuid" + }, "TypedUuidForDownstairsRegionKind": { "type": "string", "format": "uuid" diff --git a/openapi/nexus.json b/openapi/nexus.json index ae5eaeae64..285dcd82bb 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -2276,62 +2276,6 @@ } } }, - "/v1/instances/{instance}/migrate": { - "post": { - "tags": [ - "instances" - ], - "summary": "Migrate an instance", - "operationId": "instance_migrate", - "parameters": [ - { - "in": "query", - "name": "project", - "description": "Name or ID of the project", - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, - { - "in": "path", - "name": "instance", - "description": "Name or ID of the instance", - "required": true, - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/InstanceMigrate" - } - } - }, - "required": true - }, - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Instance" - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - } - }, "/v1/instances/{instance}/reboot": { "post": { "tags": [ @@ -6589,22 +6533,48 @@ } } }, - "/v1/system/networking/bgp-announce": { + "/v1/system/networking/bgp-announce-set": { "get": { "tags": [ "system/networking" ], - "summary": "Get originated routes for a BGP configuration", + "summary": "List BGP announce sets", "operationId": "networking_bgp_announce_set_list", "parameters": [ + { + "in": "query", + "name": "limit", + "description": "Maximum number of items returned by a single call", + "schema": { + "nullable": true, + "type": "integer", + "format": "uint32", + "minimum": 1 + } + }, { "in": "query", "name": "name_or_id", - "description": "A name or id to use when selecting BGP port settings", - "required": true, + "description": "A name or id to use when s electing BGP port settings", "schema": { "$ref": "#/components/schemas/NameOrId" } + }, + { + "in": "query", + "name": "page_token", + "description": "Token returned by previous call to retrieve the subsequent page", + "schema": { + "nullable": true, + "type": "string" + } + }, + { + "in": "query", + "name": "sort_by", + "schema": { + "$ref": "#/components/schemas/NameOrIdSortMode" + } } ], "responses": { @@ -6613,10 +6583,10 @@ "content": { "application/json": { "schema": { - "title": "Array_of_BgpAnnouncement", + "title": "Array_of_BgpAnnounceSet", "type": "array", "items": { - "$ref": "#/components/schemas/BgpAnnouncement" + "$ref": "#/components/schemas/BgpAnnounceSet" } } } @@ -6628,6 +6598,9 @@ "5XX": { "$ref": "#/components/responses/Error" } + }, + "x-dropshot-pagination": { + "required": [] } }, "put": { @@ -6665,7 +6638,9 @@ "$ref": "#/components/responses/Error" } } - }, + } + }, + "/v1/system/networking/bgp-announce-set/{name_or_id}": { "delete": { "tags": [ "system/networking" @@ -6674,7 +6649,7 @@ "operationId": "networking_bgp_announce_set_delete", "parameters": [ { - "in": "query", + "in": "path", "name": "name_or_id", "description": "A name or id to use when selecting BGP port settings", "required": true, @@ -6696,6 +6671,75 @@ } } }, + "/v1/system/networking/bgp-announce-set/{name_or_id}/announcement": { + "get": { + "tags": [ + "system/networking" + ], + "summary": "Get originated routes for a specified BGP announce set", + "operationId": "networking_bgp_announcement_list", + "parameters": [ + { + "in": "path", + "name": "name_or_id", + "description": "A name or id to use when selecting BGP port settings", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_BgpAnnouncement", + "type": "array", + "items": { + "$ref": "#/components/schemas/BgpAnnouncement" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/v1/system/networking/bgp-exported": { + "get": { + "tags": [ + "system/networking" + ], + "summary": "Get BGP exported routes", + "operationId": "networking_bgp_exported", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BgpExported" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/system/networking/bgp-message-history": { "get": { "tags": [ @@ -8082,11 +8126,7 @@ "content": { "application/json": { "schema": { - "title": "Array_of_Table", - "type": "array", - "items": { - "$ref": "#/components/schemas/Table" - } + "$ref": "#/components/schemas/OxqlQueryResult" } } } @@ -10411,6 +10451,25 @@ "items" ] }, + "BgpExported": { + "description": "The current status of a BGP peer.", + "type": "object", + "properties": { + "exports": { + "description": "Exported routes indexed by peer address.", + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Ipv4Net" + } + } + } + }, + "required": [ + "exports" + ] + }, "BgpImportedRouteIpv4": { "description": "A route imported from a BGP peer.", "type": "object", @@ -15271,19 +15330,6 @@ } ] }, - "InstanceMigrate": { - "description": "Migration parameters for an `Instance`", - "type": "object", - "properties": { - "dst_sled_id": { - "type": "string", - "format": "uuid" - } - }, - "required": [ - "dst_sled_id" - ] - }, "InstanceNetworkInterface": { "description": "An `InstanceNetworkInterface` represents a virtual network interface device attached to an instance.", "type": "object", @@ -16053,7 +16099,7 @@ "description": "The link-layer discovery protocol (LLDP) configuration for the link.", "allOf": [ { - "$ref": "#/components/schemas/LldpServiceConfigCreate" + "$ref": "#/components/schemas/LldpLinkConfigCreate" } ] }, @@ -16174,10 +16220,15 @@ } ] }, - "LldpServiceConfig": { + "LldpLinkConfig": { "description": "A link layer discovery protocol (LLDP) service configuration.", "type": "object", "properties": { + "chassis_id": { + "nullable": true, + "description": "The LLDP chassis identifier TLV.", + "type": "string" + }, "enabled": { "description": "Whether or not the LLDP service is enabled.", "type": "boolean" @@ -16187,11 +16238,34 @@ "type": "string", "format": "uuid" }, - "lldp_config_id": { + "link_description": { "nullable": true, - "description": "The link-layer discovery protocol configuration for this service.", - "type": "string", - "format": "uuid" + "description": "The LLDP link description TLV.", + "type": "string" + }, + "link_name": { + "nullable": true, + "description": "The LLDP link name TLV.", + "type": "string" + }, + "management_ip": { + "nullable": true, + "description": "The LLDP management IP TLV.", + "allOf": [ + { + "$ref": "#/components/schemas/IpNet" + } + ] + }, + "system_description": { + "nullable": true, + "description": "The LLDP system description TLV.", + "type": "string" + }, + "system_name": { + "nullable": true, + "description": "The LLDP system name TLV.", + "type": "string" } }, "required": [ @@ -16199,22 +16273,44 @@ "id" ] }, - "LldpServiceConfigCreate": { - "description": "The LLDP configuration associated with a port. LLDP may be either enabled or disabled, if enabled, an LLDP configuration must be provided by name or id.", + "LldpLinkConfigCreate": { + "description": "The LLDP configuration associated with a port.", "type": "object", "properties": { + "chassis_id": { + "nullable": true, + "description": "The LLDP chassis identifier TLV.", + "type": "string" + }, "enabled": { "description": "Whether or not LLDP is enabled.", "type": "boolean" }, - "lldp_config": { + "link_description": { "nullable": true, - "description": "A reference to the LLDP configuration used. Must not be `None` when `enabled` is `true`.", - "allOf": [ - { - "$ref": "#/components/schemas/NameOrId" - } - ] + "description": "The LLDP link description TLV.", + "type": "string" + }, + "link_name": { + "nullable": true, + "description": "The LLDP link name TLV.", + "type": "string" + }, + "management_ip": { + "nullable": true, + "description": "The LLDP management IP TLV.", + "type": "string", + "format": "ip" + }, + "system_description": { + "nullable": true, + "description": "The LLDP system description TLV.", + "type": "string" + }, + "system_name": { + "nullable": true, + "description": "The LLDP system name TLV.", + "type": "string" } }, "required": [ @@ -16570,6 +16666,22 @@ } ] }, + "OxqlQueryResult": { + "description": "The result of a successful OxQL query.", + "type": "object", + "properties": { + "tables": { + "description": "Tables resulting from the query, each containing timeseries.", + "type": "array", + "items": { + "$ref": "#/components/schemas/Table" + } + } + }, + "required": [ + "tables" + ] + }, "Password": { "title": "A password used to authenticate a user", "description": "Passwords may be subject to additional constraints.", @@ -17237,6 +17349,13 @@ "type": "string", "format": "ip" }, + "local_pref": { + "nullable": true, + "description": "Local preference for route. Higher preference indictes precedence within and across protocols.", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "vid": { "nullable": true, "description": "VLAN id the gateway is reachable over.", @@ -19215,7 +19334,7 @@ "description": "The name of this link.", "type": "string" }, - "lldp_service_config_id": { + "lldp_link_config_id": { "description": "The link-layer discovery protocol service configuration id for this link.", "type": "string", "format": "uuid" @@ -19244,7 +19363,7 @@ "autoneg", "fec", "link_name", - "lldp_service_config_id", + "lldp_link_config_id", "mtu", "port_settings_id", "speed" @@ -19295,6 +19414,13 @@ "description": "The interface name this route configuration is assigned to.", "type": "string" }, + "local_pref": { + "nullable": true, + "description": "Local preference indicating priority within and across protocols.", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "port_settings_id": { "description": "The port settings object this route configuration belongs to.", "type": "string", @@ -19499,7 +19625,7 @@ "description": "Link-layer discovery protocol (LLDP) settings.", "type": "array", "items": { - "$ref": "#/components/schemas/LldpServiceConfig" + "$ref": "#/components/schemas/LldpLinkConfig" } }, "links": { @@ -19805,7 +19931,10 @@ "count", "bytes", "seconds", - "nanoseconds" + "nanoseconds", + "volts", + "amps", + "degrees_celcius" ] }, { @@ -19814,6 +19943,13 @@ "enum": [ "none" ] + }, + { + "description": "Rotations per minute.", + "type": "string", + "enum": [ + "rpm" + ] } ] }, @@ -20190,10 +20326,20 @@ "type": "object", "properties": { "metric_type": { - "$ref": "#/components/schemas/MetricType" + "description": "The type of this metric.", + "allOf": [ + { + "$ref": "#/components/schemas/MetricType" + } + ] }, "values": { - "$ref": "#/components/schemas/ValueArray" + "description": "The data values.", + "allOf": [ + { + "$ref": "#/components/schemas/ValueArray" + } + ] } }, "required": [ diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index db9ea624a3..ff8531a056 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -68,7 +68,6 @@ }, "/boot-disk/{boot_disk}/os/write/status": { "get": { - "summary": "Get the status of writing a new host OS", "operationId": "host_os_write_status_get", "parameters": [ { @@ -2927,6 +2926,14 @@ "$ref": "#/components/schemas/UplinkAddressConfig" } }, + "lldp": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/LldpPortConfig" + } + ] + }, "port": { "description": "Switchport to use for external connectivity", "type": "string" @@ -3119,7 +3126,7 @@ "firewall_rules": { "type": "array", "items": { - "$ref": "#/components/schemas/VpcFirewallRule" + "$ref": "#/components/schemas/ResolvedVpcFirewallRule" } }, "floating_ips": { @@ -3649,6 +3656,67 @@ "minLength": 1, "maxLength": 11 }, + "LldpAdminStatus": { + "description": "To what extent should this port participate in LLDP", + "type": "string", + "enum": [ + "enabled", + "disabled", + "rx_only", + "tx_only" + ] + }, + "LldpPortConfig": { + "description": "Per-port LLDP configuration settings. Only the \"status\" setting is mandatory. All other fields have natural defaults or may be inherited from the switch.", + "type": "object", + "properties": { + "chassis_id": { + "nullable": true, + "description": "Chassis ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + }, + "management_addrs": { + "nullable": true, + "description": "Management IP addresses to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "port_description": { + "nullable": true, + "description": "Port description to advertise. If this is not set, no description will be advertised.", + "type": "string" + }, + "port_id": { + "nullable": true, + "description": "Port ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be set to the port name. e.g., qsfp0/0.", + "type": "string" + }, + "status": { + "description": "To what extent should this port participate in LLDP", + "allOf": [ + { + "$ref": "#/components/schemas/LldpAdminStatus" + } + ] + }, + "system_description": { + "nullable": true, + "description": "System description to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + }, + "system_name": { + "nullable": true, + "description": "System name to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + } + }, + "required": [ + "status" + ] + }, "MacAddr": { "example": "ff:ff:ff:ff:ff:ff", "title": "A MAC address", @@ -3991,6 +4059,7 @@ ] }, { + "description": "Type of clickhouse zone used for a single node clickhouse deployment", "type": "object", "properties": { "address": { @@ -4013,6 +4082,7 @@ ] }, { + "description": "A zone used to run a Clickhouse Keeper node\n\nKeepers are only used in replicated clickhouse setups", "type": "object", "properties": { "address": { @@ -4034,6 +4104,29 @@ "type" ] }, + { + "description": "A zone used to run a Clickhouse Server in a replicated deployment", + "type": "object", + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/components/schemas/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "clickhouse_server" + ] + } + }, + "required": [ + "address", + "dataset", + "type" + ] + }, { "type": "object", "properties": { @@ -4324,6 +4417,15 @@ "$ref": "#/components/schemas/BgpPeerConfig" } }, + "lldp": { + "nullable": true, + "description": "LLDP configuration for this port", + "allOf": [ + { + "$ref": "#/components/schemas/LldpPortConfig" + } + ] + }, "port": { "description": "Nmae of the port this config applies to.", "type": "string" @@ -4470,6 +4572,60 @@ "rack_subnet" ] }, + "ResolvedVpcFirewallRule": { + "description": "VPC firewall rule after object name resolution has been performed by Nexus", + "type": "object", + "properties": { + "action": { + "$ref": "#/components/schemas/VpcFirewallRuleAction" + }, + "direction": { + "$ref": "#/components/schemas/VpcFirewallRuleDirection" + }, + "filter_hosts": { + "nullable": true, + "type": "array", + "items": { + "$ref": "#/components/schemas/HostIdentifier" + } + }, + "filter_ports": { + "nullable": true, + "type": "array", + "items": { + "$ref": "#/components/schemas/L4PortRange" + } + }, + "filter_protocols": { + "nullable": true, + "type": "array", + "items": { + "$ref": "#/components/schemas/VpcFirewallRuleProtocol" + } + }, + "priority": { + "type": "integer", + "format": "uint16", + "minimum": 0 + }, + "status": { + "$ref": "#/components/schemas/VpcFirewallRuleStatus" + }, + "targets": { + "type": "array", + "items": { + "$ref": "#/components/schemas/NetworkInterface" + } + } + }, + "required": [ + "action", + "direction", + "priority", + "status", + "targets" + ] + }, "ResolvedVpcRoute": { "description": "A VPC route resolved into a concrete target.", "type": "object", @@ -4545,6 +4701,14 @@ } ] }, + "local_pref": { + "nullable": true, + "description": "The local preference associated with this route.", + "default": null, + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "nexthop": { "description": "The nexthop/gateway address.", "type": "string", @@ -5324,60 +5488,6 @@ } ] }, - "VpcFirewallRule": { - "description": "VPC firewall rule after object name resolution has been performed by Nexus", - "type": "object", - "properties": { - "action": { - "$ref": "#/components/schemas/VpcFirewallRuleAction" - }, - "direction": { - "$ref": "#/components/schemas/VpcFirewallRuleDirection" - }, - "filter_hosts": { - "nullable": true, - "type": "array", - "items": { - "$ref": "#/components/schemas/HostIdentifier" - } - }, - "filter_ports": { - "nullable": true, - "type": "array", - "items": { - "$ref": "#/components/schemas/L4PortRange" - } - }, - "filter_protocols": { - "nullable": true, - "type": "array", - "items": { - "$ref": "#/components/schemas/VpcFirewallRuleProtocol" - } - }, - "priority": { - "type": "integer", - "format": "uint16", - "minimum": 0 - }, - "status": { - "$ref": "#/components/schemas/VpcFirewallRuleStatus" - }, - "targets": { - "type": "array", - "items": { - "$ref": "#/components/schemas/NetworkInterface" - } - } - }, - "required": [ - "action", - "direction", - "priority", - "status", - "targets" - ] - }, "VpcFirewallRuleAction": { "type": "string", "enum": [ @@ -5415,7 +5525,7 @@ "rules": { "type": "array", "items": { - "$ref": "#/components/schemas/VpcFirewallRule" + "$ref": "#/components/schemas/ResolvedVpcFirewallRule" } }, "vni": { @@ -5540,7 +5650,7 @@ "pattern": "^ox[ip]_[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$" }, "M2Slot": { - "description": "An M.2 slot that was written.", + "description": "Describes an M.2 slot, often in the context of writing a system image to it.", "type": "string", "enum": [ "A", diff --git a/openapi/wicketd.json b/openapi/wicketd.json index 757383897b..87cfe045d3 100644 --- a/openapi/wicketd.json +++ b/openapi/wicketd.json @@ -1773,6 +1773,67 @@ "last" ] }, + "LldpAdminStatus": { + "description": "To what extent should this port participate in LLDP", + "type": "string", + "enum": [ + "enabled", + "disabled", + "rx_only", + "tx_only" + ] + }, + "LldpPortConfig": { + "description": "Per-port LLDP configuration settings. Only the \"status\" setting is mandatory. All other fields have natural defaults or may be inherited from the switch.", + "type": "object", + "properties": { + "chassis_id": { + "nullable": true, + "description": "Chassis ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + }, + "management_addrs": { + "nullable": true, + "description": "Management IP addresses to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "port_description": { + "nullable": true, + "description": "Port description to advertise. If this is not set, no description will be advertised.", + "type": "string" + }, + "port_id": { + "nullable": true, + "description": "Port ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be set to the port name. e.g., qsfp0/0.", + "type": "string" + }, + "status": { + "description": "To what extent should this port participate in LLDP", + "allOf": [ + { + "$ref": "#/components/schemas/LldpAdminStatus" + } + ] + }, + "system_description": { + "nullable": true, + "description": "System description to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + }, + "system_name": { + "nullable": true, + "description": "System name to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + } + }, + "required": [ + "status" + ] + }, "Name": { "title": "A name unique within the parent collection", "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.", @@ -3062,6 +3123,14 @@ } ] }, + "local_pref": { + "nullable": true, + "description": "The local preference associated with this route.", + "default": null, + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "nexthop": { "description": "The nexthop/gateway address.", "type": "string", @@ -6296,6 +6365,15 @@ "$ref": "#/components/schemas/UserSpecifiedBgpPeerConfig" } }, + "lldp": { + "nullable": true, + "default": null, + "allOf": [ + { + "$ref": "#/components/schemas/LldpPortConfig" + } + ] + }, "routes": { "type": "array", "items": { diff --git a/oximeter/collector/src/agent.rs b/oximeter/collector/src/agent.rs index 5da9a1dfa8..b13fbd3938 100644 --- a/oximeter/collector/src/agent.rs +++ b/oximeter/collector/src/agent.rs @@ -17,8 +17,6 @@ use futures::TryStreamExt; use internal_dns::resolver::Resolver; use internal_dns::ServiceName; use nexus_client::types::IdSortMode; -use omicron_common::address::CLICKHOUSE_PORT; -use omicron_common::address::NEXUS_INTERNAL_PORT; use omicron_common::backoff; use omicron_common::backoff::BackoffError; use oximeter::types::ProducerResults; @@ -381,6 +379,7 @@ impl OximeterAgent { db_config: DbConfig, resolver: &Resolver, log: &Logger, + replicated: bool, ) -> Result { let (result_sender, result_receiver) = mpsc::channel(8); let log = log.new(o!( @@ -394,10 +393,15 @@ impl OximeterAgent { // database. let db_address = if let Some(address) = db_config.address { address + } else if replicated { + SocketAddr::V6( + resolver + .lookup_socket_v6(ServiceName::ClickhouseServer) + .await?, + ) } else { - SocketAddr::new( - resolver.lookup_ip(ServiceName::Clickhouse).await?, - CLICKHOUSE_PORT, + SocketAddr::V6( + resolver.lookup_socket_v6(ServiceName::Clickhouse).await?, ) }; @@ -423,7 +427,6 @@ impl OximeterAgent { .. }) => { debug!(log, "oximeter database does not exist, creating"); - let replicated = client.is_oximeter_cluster().await?; client .initialize_db_with_version( replicated, @@ -816,7 +819,7 @@ async fn refresh_producer_list(agent: OximeterAgent, resolver: Resolver) { async fn resolve_nexus_with_backoff( log: &Logger, resolver: &Resolver, -) -> SocketAddr { +) -> SocketAddrV6 { let log_failure = |error, delay| { warn!( log, @@ -827,12 +830,9 @@ async fn resolve_nexus_with_backoff( }; let do_lookup = || async { resolver - .lookup_ipv6(ServiceName::Nexus) + .lookup_socket_v6(ServiceName::Nexus) .await .map_err(|e| BackoffError::transient(e.to_string())) - .map(|ip| { - SocketAddr::V6(SocketAddrV6::new(ip, NEXUS_INTERNAL_PORT, 0, 0)) - }) }; backoff::retry_notify( backoff::retry_policy_internal_service(), diff --git a/oximeter/collector/src/lib.rs b/oximeter/collector/src/lib.rs index 02bf9152f4..0576c7d532 100644 --- a/oximeter/collector/src/lib.rs +++ b/oximeter/collector/src/lib.rs @@ -14,7 +14,6 @@ use dropshot::HttpServerStarter; use internal_dns::resolver::ResolveError; use internal_dns::resolver::Resolver; use internal_dns::ServiceName; -use omicron_common::address::NEXUS_INTERNAL_PORT; use omicron_common::api::internal::nexus::ProducerEndpoint; use omicron_common::backoff; use omicron_common::FileKv; @@ -79,12 +78,18 @@ pub struct DbConfig { #[serde(default, skip_serializing_if = "Option::is_none")] pub address: Option, - /// Batch size of samples at which to insert + /// Batch size of samples at which to insert. pub batch_size: usize, /// Interval on which to insert data into the database, regardless of the number of collected /// samples. Value is in seconds. pub batch_interval: u64, + + // TODO (https://github.com/oxidecomputer/omicron/issues/4148): This field + // should be removed if single node functionality is removed. + /// Whether ClickHouse is running as a replicated cluster or + /// single-node server. + pub replicated: bool, } impl DbConfig { @@ -96,12 +101,16 @@ impl DbConfig { /// ClickHouse. pub const DEFAULT_BATCH_INTERVAL: u64 = 5; + /// Default ClickHouse topology. + pub const DEFAULT_REPLICATED: bool = false; + // Construct config with an address, using the defaults for other fields fn with_address(address: SocketAddr) -> Self { Self { address: Some(address), batch_size: Self::DEFAULT_BATCH_SIZE, batch_interval: Self::DEFAULT_BATCH_INTERVAL, + replicated: Self::DEFAULT_REPLICATED, } } } @@ -208,6 +217,7 @@ impl Oximeter { config.db, &resolver, &log, + config.db.replicated, ) .await?, )) @@ -251,14 +261,14 @@ impl Oximeter { let nexus_address = if let Some(address) = config.nexus_address { address } else { - SocketAddr::V6(SocketAddrV6::new( - resolver.lookup_ipv6(ServiceName::Nexus).await.map_err( - |e| backoff::BackoffError::transient(e.to_string()), - )?, - NEXUS_INTERNAL_PORT, - 0, - 0, - )) + SocketAddr::V6( + resolver + .lookup_socket_v6(ServiceName::Nexus) + .await + .map_err(|e| { + backoff::BackoffError::transient(e.to_string()) + })?, + ) }; let client = nexus_client::Client::new( &format!("http://{nexus_address}"), diff --git a/oximeter/collector/tests/output/self-stat-schema.json b/oximeter/collector/tests/output/self-stat-schema.json new file mode 100644 index 0000000000..5d325281ab --- /dev/null +++ b/oximeter/collector/tests/output/self-stat-schema.json @@ -0,0 +1,91 @@ +{ + "oximeter_collector:collections": { + "timeseries_name": "oximeter_collector:collections", + "field_schema": [ + { + "name": "base_route", + "field_type": "string", + "source": "metric" + }, + { + "name": "collector_id", + "field_type": "uuid", + "source": "target" + }, + { + "name": "collector_ip", + "field_type": "ip_addr", + "source": "target" + }, + { + "name": "collector_port", + "field_type": "u16", + "source": "target" + }, + { + "name": "producer_id", + "field_type": "uuid", + "source": "metric" + }, + { + "name": "producer_ip", + "field_type": "ip_addr", + "source": "metric" + }, + { + "name": "producer_port", + "field_type": "u16", + "source": "metric" + } + ], + "datum_type": "cumulative_u64", + "created": "2024-06-24T17:15:06.069658599Z" + }, + "oximeter_collector:failed_collections": { + "timeseries_name": "oximeter_collector:failed_collections", + "field_schema": [ + { + "name": "base_route", + "field_type": "string", + "source": "metric" + }, + { + "name": "collector_id", + "field_type": "uuid", + "source": "target" + }, + { + "name": "collector_ip", + "field_type": "ip_addr", + "source": "target" + }, + { + "name": "collector_port", + "field_type": "u16", + "source": "target" + }, + { + "name": "producer_id", + "field_type": "uuid", + "source": "metric" + }, + { + "name": "producer_ip", + "field_type": "ip_addr", + "source": "metric" + }, + { + "name": "producer_port", + "field_type": "u16", + "source": "metric" + }, + { + "name": "reason", + "field_type": "string", + "source": "metric" + } + ], + "datum_type": "cumulative_u64", + "created": "2024-06-24T17:15:06.070765692Z" + } +} \ No newline at end of file diff --git a/oximeter/db/Cargo.toml b/oximeter/db/Cargo.toml index e3cf089cb5..2a9c615da2 100644 --- a/oximeter/db/Cargo.toml +++ b/oximeter/db/Cargo.toml @@ -24,6 +24,7 @@ num.workspace = true omicron-common.workspace = true omicron-workspace-hack.workspace = true oximeter.workspace = true +oxql-types.workspace = true regex.workspace = true serde.workspace = true serde_json.workspace = true @@ -89,6 +90,7 @@ expectorate.workspace = true indexmap.workspace = true itertools.workspace = true omicron-test-utils.workspace = true +oximeter-test-utils.workspace = true slog-dtrace.workspace = true sqlformat.workspace = true sqlparser.workspace = true diff --git a/oximeter/db/schema/replicated/10/00_add_last_updated_column_to_fields_i64_local.sql b/oximeter/db/schema/replicated/10/00_add_last_updated_column_to_fields_i64_local.sql new file mode 100644 index 0000000000..04158b36ce --- /dev/null +++ b/oximeter/db/schema/replicated/10/00_add_last_updated_column_to_fields_i64_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i64_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/01_materialize_last_updated_column_on_fields_i64_local.sql b/oximeter/db/schema/replicated/10/01_materialize_last_updated_column_on_fields_i64_local.sql new file mode 100644 index 0000000000..2e35dd2793 --- /dev/null +++ b/oximeter/db/schema/replicated/10/01_materialize_last_updated_column_on_fields_i64_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i64_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/02_add_ttl_to_fields_i64_local.sql b/oximeter/db/schema/replicated/10/02_add_ttl_to_fields_i64_local.sql new file mode 100644 index 0000000000..25e5303e5a --- /dev/null +++ b/oximeter/db/schema/replicated/10/02_add_ttl_to_fields_i64_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i64_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/03_add_last_updated_column_to_fields_uuid_local.sql b/oximeter/db/schema/replicated/10/03_add_last_updated_column_to_fields_uuid_local.sql new file mode 100644 index 0000000000..f26fdedbb6 --- /dev/null +++ b/oximeter/db/schema/replicated/10/03_add_last_updated_column_to_fields_uuid_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_uuid_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/04_materialize_last_updated_column_on_fields_uuid_local.sql b/oximeter/db/schema/replicated/10/04_materialize_last_updated_column_on_fields_uuid_local.sql new file mode 100644 index 0000000000..1bc623f418 --- /dev/null +++ b/oximeter/db/schema/replicated/10/04_materialize_last_updated_column_on_fields_uuid_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_uuid_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/05_add_ttl_to_fields_uuid_local.sql b/oximeter/db/schema/replicated/10/05_add_ttl_to_fields_uuid_local.sql new file mode 100644 index 0000000000..b98bba1e88 --- /dev/null +++ b/oximeter/db/schema/replicated/10/05_add_ttl_to_fields_uuid_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_uuid_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/06_add_last_updated_column_to_fields_bool_local.sql b/oximeter/db/schema/replicated/10/06_add_last_updated_column_to_fields_bool_local.sql new file mode 100644 index 0000000000..bf3c16dde5 --- /dev/null +++ b/oximeter/db/schema/replicated/10/06_add_last_updated_column_to_fields_bool_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_bool_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/07_materialize_last_updated_column_on_fields_bool_local.sql b/oximeter/db/schema/replicated/10/07_materialize_last_updated_column_on_fields_bool_local.sql new file mode 100644 index 0000000000..3ddb0eec84 --- /dev/null +++ b/oximeter/db/schema/replicated/10/07_materialize_last_updated_column_on_fields_bool_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_bool_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/08_add_ttl_to_fields_bool_local.sql b/oximeter/db/schema/replicated/10/08_add_ttl_to_fields_bool_local.sql new file mode 100644 index 0000000000..58d599cf49 --- /dev/null +++ b/oximeter/db/schema/replicated/10/08_add_ttl_to_fields_bool_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_bool_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/09_add_last_updated_column_to_fields_ipaddr_local.sql b/oximeter/db/schema/replicated/10/09_add_last_updated_column_to_fields_ipaddr_local.sql new file mode 100644 index 0000000000..94696b7b06 --- /dev/null +++ b/oximeter/db/schema/replicated/10/09_add_last_updated_column_to_fields_ipaddr_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_ipaddr_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/10_materialize_last_updated_column_on_fields_ipaddr_local.sql b/oximeter/db/schema/replicated/10/10_materialize_last_updated_column_on_fields_ipaddr_local.sql new file mode 100644 index 0000000000..f621033d56 --- /dev/null +++ b/oximeter/db/schema/replicated/10/10_materialize_last_updated_column_on_fields_ipaddr_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_ipaddr_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/11_add_ttl_to_fields_ipaddr_local.sql b/oximeter/db/schema/replicated/10/11_add_ttl_to_fields_ipaddr_local.sql new file mode 100644 index 0000000000..4a01da9e74 --- /dev/null +++ b/oximeter/db/schema/replicated/10/11_add_ttl_to_fields_ipaddr_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_ipaddr_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/12_add_last_updated_column_to_fields_string_local.sql b/oximeter/db/schema/replicated/10/12_add_last_updated_column_to_fields_string_local.sql new file mode 100644 index 0000000000..173d803437 --- /dev/null +++ b/oximeter/db/schema/replicated/10/12_add_last_updated_column_to_fields_string_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_string_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/13_materialize_last_updated_column_on_fields_string_local.sql b/oximeter/db/schema/replicated/10/13_materialize_last_updated_column_on_fields_string_local.sql new file mode 100644 index 0000000000..d9fcc84eba --- /dev/null +++ b/oximeter/db/schema/replicated/10/13_materialize_last_updated_column_on_fields_string_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_string_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/14_add_ttl_to_fields_string_local.sql b/oximeter/db/schema/replicated/10/14_add_ttl_to_fields_string_local.sql new file mode 100644 index 0000000000..8c9aecca9d --- /dev/null +++ b/oximeter/db/schema/replicated/10/14_add_ttl_to_fields_string_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_string_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/15_add_last_updated_column_to_fields_i8_local.sql b/oximeter/db/schema/replicated/10/15_add_last_updated_column_to_fields_i8_local.sql new file mode 100644 index 0000000000..8d071424f6 --- /dev/null +++ b/oximeter/db/schema/replicated/10/15_add_last_updated_column_to_fields_i8_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i8_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/16_materialize_last_updated_column_on_fields_i8_local.sql b/oximeter/db/schema/replicated/10/16_materialize_last_updated_column_on_fields_i8_local.sql new file mode 100644 index 0000000000..ac5fa948ae --- /dev/null +++ b/oximeter/db/schema/replicated/10/16_materialize_last_updated_column_on_fields_i8_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i8_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/17_add_ttl_to_fields_i8_local.sql b/oximeter/db/schema/replicated/10/17_add_ttl_to_fields_i8_local.sql new file mode 100644 index 0000000000..3caa1b93f6 --- /dev/null +++ b/oximeter/db/schema/replicated/10/17_add_ttl_to_fields_i8_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i8_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/18_add_last_updated_column_to_fields_u8_local.sql b/oximeter/db/schema/replicated/10/18_add_last_updated_column_to_fields_u8_local.sql new file mode 100644 index 0000000000..ed6978c7e6 --- /dev/null +++ b/oximeter/db/schema/replicated/10/18_add_last_updated_column_to_fields_u8_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u8_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/19_materialize_last_updated_column_on_fields_u8_local.sql b/oximeter/db/schema/replicated/10/19_materialize_last_updated_column_on_fields_u8_local.sql new file mode 100644 index 0000000000..81ce8626a7 --- /dev/null +++ b/oximeter/db/schema/replicated/10/19_materialize_last_updated_column_on_fields_u8_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u8_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/20_add_ttl_to_fields_u8_local.sql b/oximeter/db/schema/replicated/10/20_add_ttl_to_fields_u8_local.sql new file mode 100644 index 0000000000..2a7c757dc8 --- /dev/null +++ b/oximeter/db/schema/replicated/10/20_add_ttl_to_fields_u8_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u8_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/21_add_last_updated_column_to_fields_i16_local.sql b/oximeter/db/schema/replicated/10/21_add_last_updated_column_to_fields_i16_local.sql new file mode 100644 index 0000000000..cbe0b08fe4 --- /dev/null +++ b/oximeter/db/schema/replicated/10/21_add_last_updated_column_to_fields_i16_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i16_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/22_materialize_last_updated_column_on_fields_i16_local.sql b/oximeter/db/schema/replicated/10/22_materialize_last_updated_column_on_fields_i16_local.sql new file mode 100644 index 0000000000..d4854807b7 --- /dev/null +++ b/oximeter/db/schema/replicated/10/22_materialize_last_updated_column_on_fields_i16_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i16_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/23_add_ttl_to_fields_i16_local.sql b/oximeter/db/schema/replicated/10/23_add_ttl_to_fields_i16_local.sql new file mode 100644 index 0000000000..c84b634a00 --- /dev/null +++ b/oximeter/db/schema/replicated/10/23_add_ttl_to_fields_i16_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i16_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/24_add_last_updated_column_to_fields_u16_local.sql b/oximeter/db/schema/replicated/10/24_add_last_updated_column_to_fields_u16_local.sql new file mode 100644 index 0000000000..60c28c0047 --- /dev/null +++ b/oximeter/db/schema/replicated/10/24_add_last_updated_column_to_fields_u16_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u16_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/25_materialize_last_updated_column_on_fields_u16_local.sql b/oximeter/db/schema/replicated/10/25_materialize_last_updated_column_on_fields_u16_local.sql new file mode 100644 index 0000000000..b38cdda831 --- /dev/null +++ b/oximeter/db/schema/replicated/10/25_materialize_last_updated_column_on_fields_u16_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u16_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/26_add_ttl_to_fields_u16_local.sql b/oximeter/db/schema/replicated/10/26_add_ttl_to_fields_u16_local.sql new file mode 100644 index 0000000000..cd533ffd8f --- /dev/null +++ b/oximeter/db/schema/replicated/10/26_add_ttl_to_fields_u16_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u16_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/27_add_last_updated_column_to_fields_i32_local.sql b/oximeter/db/schema/replicated/10/27_add_last_updated_column_to_fields_i32_local.sql new file mode 100644 index 0000000000..1ea7093d8f --- /dev/null +++ b/oximeter/db/schema/replicated/10/27_add_last_updated_column_to_fields_i32_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i32_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/28_materialize_last_updated_column_on_fields_i32_local.sql b/oximeter/db/schema/replicated/10/28_materialize_last_updated_column_on_fields_i32_local.sql new file mode 100644 index 0000000000..f9f6464729 --- /dev/null +++ b/oximeter/db/schema/replicated/10/28_materialize_last_updated_column_on_fields_i32_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i32_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/29_add_ttl_to_fields_i32_local.sql b/oximeter/db/schema/replicated/10/29_add_ttl_to_fields_i32_local.sql new file mode 100644 index 0000000000..7c37ee9b21 --- /dev/null +++ b/oximeter/db/schema/replicated/10/29_add_ttl_to_fields_i32_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i32_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/30_add_last_updated_column_to_fields_u32_local.sql b/oximeter/db/schema/replicated/10/30_add_last_updated_column_to_fields_u32_local.sql new file mode 100644 index 0000000000..b15eab9387 --- /dev/null +++ b/oximeter/db/schema/replicated/10/30_add_last_updated_column_to_fields_u32_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u32_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/31_materialize_last_updated_column_on_fields_u32_local.sql b/oximeter/db/schema/replicated/10/31_materialize_last_updated_column_on_fields_u32_local.sql new file mode 100644 index 0000000000..caa96ab5eb --- /dev/null +++ b/oximeter/db/schema/replicated/10/31_materialize_last_updated_column_on_fields_u32_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u32_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/32_add_ttl_to_fields_u32_local.sql b/oximeter/db/schema/replicated/10/32_add_ttl_to_fields_u32_local.sql new file mode 100644 index 0000000000..25af5ee660 --- /dev/null +++ b/oximeter/db/schema/replicated/10/32_add_ttl_to_fields_u32_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u32_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/33_add_last_updated_column_to_fields_u64_local.sql b/oximeter/db/schema/replicated/10/33_add_last_updated_column_to_fields_u64_local.sql new file mode 100644 index 0000000000..e85bd845d4 --- /dev/null +++ b/oximeter/db/schema/replicated/10/33_add_last_updated_column_to_fields_u64_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u64_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/34_materialize_last_updated_column_on_fields_u64_local.sql b/oximeter/db/schema/replicated/10/34_materialize_last_updated_column_on_fields_u64_local.sql new file mode 100644 index 0000000000..d287a02c6f --- /dev/null +++ b/oximeter/db/schema/replicated/10/34_materialize_last_updated_column_on_fields_u64_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u64_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/35_add_ttl_to_fields_u64_local.sql b/oximeter/db/schema/replicated/10/35_add_ttl_to_fields_u64_local.sql new file mode 100644 index 0000000000..02eb09c300 --- /dev/null +++ b/oximeter/db/schema/replicated/10/35_add_ttl_to_fields_u64_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u64_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/timeseries-to-delete.txt b/oximeter/db/schema/replicated/10/timeseries-to-delete.txt new file mode 100644 index 0000000000..40b90e05ff --- /dev/null +++ b/oximeter/db/schema/replicated/10/timeseries-to-delete.txt @@ -0,0 +1 @@ +http_service:request_latency_histogram diff --git a/oximeter/db/schema/replicated/9/timeseries-to-delete.txt b/oximeter/db/schema/replicated/9/timeseries-to-delete.txt new file mode 100644 index 0000000000..449d2e9155 --- /dev/null +++ b/oximeter/db/schema/replicated/9/timeseries-to-delete.txt @@ -0,0 +1,47 @@ +data_link:abort +data_link:b_e_r_check_done +data_link:b_e_r_check_start +data_link:bad_sync_headers +data_link:disabled +data_link:enabled +data_link:end +data_link:errored_blocks +data_link:fec_align +data_link:fec_corr_cnt +data_link:fec_hi_ser +data_link:fec_ser_lane0 +data_link:fec_ser_lane1 +data_link:fec_ser_lane2 +data_link:fec_ser_lane3 +data_link:fec_ser_lane4 +data_link:fec_ser_lane5 +data_link:fec_ser_lane6 +data_link:fec_ser_lane7 +data_link:fec_uncorr_cnt +data_link:idle +data_link:link_down +data_link:link_up +data_link:monitor_p_r_b_s_errors +data_link:pci_hi_ber +data_link:pcs_block_lock_loss +data_link:pcs_invalid_errors +data_link:pcs_sync_loss +data_link:pcs_unknown_errors +data_link:pcs_valid_errors +data_link:remote_fault +data_link:rx_buf_full +data_link:rx_bytes +data_link:rx_crc_errs +data_link:rx_errs +data_link:rx_pkts +data_link:tofino3_states +data_link:tx_bytes +data_link:tx_errs +data_link:tx_pkts +data_link:wait_auto_neg_done +data_link:wait_auto_neg_link_training_done +data_link:wait_d_f_e_done +data_link:wait_p_l_l_ready +data_link:wait_signal_o_k +data_link:wait_test_done +sidecar:sample_time diff --git a/oximeter/db/schema/replicated/db-init-1.sql b/oximeter/db/schema/replicated/db-init-1.sql index 176e5b64f7..4eac2b4e37 100644 --- a/oximeter/db/schema/replicated/db-init-1.sql +++ b/oximeter/db/schema/replicated/db-init-1.sql @@ -78,10 +78,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i64_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int64 + field_value Int64, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_i64_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i64 ON CLUSTER oximeter_cluster AS oximeter.fields_i64_local @@ -93,10 +95,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_uuid_local ON CLUSTER oximeter_cluste timeseries_name String, timeseries_key UInt64, field_name String, - field_value UUID + field_value UUID, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_uuid_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_uuid ON CLUSTER oximeter_cluster AS oximeter.fields_uuid_local diff --git a/oximeter/db/schema/replicated/db-init-2.sql b/oximeter/db/schema/replicated/db-init-2.sql index ae0431ec84..51e64e20e0 100644 --- a/oximeter/db/schema/replicated/db-init-2.sql +++ b/oximeter/db/schema/replicated/db-init-2.sql @@ -595,10 +595,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_bool_local ON CLUSTER oximeter_cluste timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt8 + field_value UInt8, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_bool_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_bool ON CLUSTER oximeter_cluster AS oximeter.fields_bool_local @@ -609,10 +611,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_ipaddr_local ON CLUSTER oximeter_clus timeseries_name String, timeseries_key UInt64, field_name String, - field_value IPv6 + field_value IPv6, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_ipaddr_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_ipaddr ON CLUSTER oximeter_cluster AS oximeter.fields_ipaddr_local @@ -623,10 +627,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_string_local ON CLUSTER oximeter_clus timeseries_name String, timeseries_key UInt64, field_name String, - field_value String + field_value String, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_string_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_string ON CLUSTER oximeter_cluster AS oximeter.fields_string_local @@ -637,10 +643,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i8_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int8 + field_value Int8, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_i8_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i8 ON CLUSTER oximeter_cluster AS oximeter.fields_i8_local @@ -651,10 +659,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u8_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt8 + field_value UInt8, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_u8_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u8 ON CLUSTER oximeter_cluster AS oximeter.fields_u8_local @@ -665,10 +675,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i16_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int16 + field_value Int16, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_i16_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i16 ON CLUSTER oximeter_cluster AS oximeter.fields_i16_local @@ -679,10 +691,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u16_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt16 + field_value UInt16, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_u16_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u16 ON CLUSTER oximeter_cluster AS oximeter.fields_u16_local @@ -693,10 +707,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i32_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int32 + field_value Int32, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_i32_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i32 ON CLUSTER oximeter_cluster AS oximeter.fields_i32_local @@ -707,10 +723,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u32_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt32 + field_value UInt32, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_u32_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u32 ON CLUSTER oximeter_cluster AS oximeter.fields_u32_local @@ -721,10 +739,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u64_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt64 + field_value UInt64, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_u64_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u64 ON CLUSTER oximeter_cluster AS oximeter.fields_u64_local diff --git a/oximeter/db/schema/single-node/10/00_add_last_updated_column_to_fields_bool.sql b/oximeter/db/schema/single-node/10/00_add_last_updated_column_to_fields_bool.sql new file mode 100644 index 0000000000..86f46a43bf --- /dev/null +++ b/oximeter/db/schema/single-node/10/00_add_last_updated_column_to_fields_bool.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_bool ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/01_materialize_last_updated_column_on_fields_bool.sql b/oximeter/db/schema/single-node/10/01_materialize_last_updated_column_on_fields_bool.sql new file mode 100644 index 0000000000..6ebec2d506 --- /dev/null +++ b/oximeter/db/schema/single-node/10/01_materialize_last_updated_column_on_fields_bool.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_bool MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/02_add_ttl_to_fields_bool.sql b/oximeter/db/schema/single-node/10/02_add_ttl_to_fields_bool.sql new file mode 100644 index 0000000000..cc07b8cd1d --- /dev/null +++ b/oximeter/db/schema/single-node/10/02_add_ttl_to_fields_bool.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_bool MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/03_add_last_updated_column_to_fields_i8.sql b/oximeter/db/schema/single-node/10/03_add_last_updated_column_to_fields_i8.sql new file mode 100644 index 0000000000..884b5ffed6 --- /dev/null +++ b/oximeter/db/schema/single-node/10/03_add_last_updated_column_to_fields_i8.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i8 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/04_materialize_last_updated_column_on_fields_i8.sql b/oximeter/db/schema/single-node/10/04_materialize_last_updated_column_on_fields_i8.sql new file mode 100644 index 0000000000..ef569d80c3 --- /dev/null +++ b/oximeter/db/schema/single-node/10/04_materialize_last_updated_column_on_fields_i8.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i8 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/05_add_ttl_to_fields_i8.sql b/oximeter/db/schema/single-node/10/05_add_ttl_to_fields_i8.sql new file mode 100644 index 0000000000..adfc3dd1a4 --- /dev/null +++ b/oximeter/db/schema/single-node/10/05_add_ttl_to_fields_i8.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i8 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/06_add_last_updated_column_to_fields_u8.sql b/oximeter/db/schema/single-node/10/06_add_last_updated_column_to_fields_u8.sql new file mode 100644 index 0000000000..0f4e43ce2c --- /dev/null +++ b/oximeter/db/schema/single-node/10/06_add_last_updated_column_to_fields_u8.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u8 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/07_materialize_last_updated_column_on_fields_u8.sql b/oximeter/db/schema/single-node/10/07_materialize_last_updated_column_on_fields_u8.sql new file mode 100644 index 0000000000..8dcbb32bb2 --- /dev/null +++ b/oximeter/db/schema/single-node/10/07_materialize_last_updated_column_on_fields_u8.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u8 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/08_add_ttl_to_fields_u8.sql b/oximeter/db/schema/single-node/10/08_add_ttl_to_fields_u8.sql new file mode 100644 index 0000000000..11a83bde7a --- /dev/null +++ b/oximeter/db/schema/single-node/10/08_add_ttl_to_fields_u8.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u8 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/09_add_last_updated_column_to_fields_i16.sql b/oximeter/db/schema/single-node/10/09_add_last_updated_column_to_fields_i16.sql new file mode 100644 index 0000000000..d27f38f94f --- /dev/null +++ b/oximeter/db/schema/single-node/10/09_add_last_updated_column_to_fields_i16.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i16 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/10_materialize_last_updated_column_on_fields_i16.sql b/oximeter/db/schema/single-node/10/10_materialize_last_updated_column_on_fields_i16.sql new file mode 100644 index 0000000000..cd60a2a1e9 --- /dev/null +++ b/oximeter/db/schema/single-node/10/10_materialize_last_updated_column_on_fields_i16.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i16 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/11_add_ttl_to_fields_i16.sql b/oximeter/db/schema/single-node/10/11_add_ttl_to_fields_i16.sql new file mode 100644 index 0000000000..5b1b2fcfb6 --- /dev/null +++ b/oximeter/db/schema/single-node/10/11_add_ttl_to_fields_i16.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i16 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/12_add_last_updated_column_to_fields_u16.sql b/oximeter/db/schema/single-node/10/12_add_last_updated_column_to_fields_u16.sql new file mode 100644 index 0000000000..a71753f95d --- /dev/null +++ b/oximeter/db/schema/single-node/10/12_add_last_updated_column_to_fields_u16.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u16 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/13_materialize_last_updated_column_on_fields_u16.sql b/oximeter/db/schema/single-node/10/13_materialize_last_updated_column_on_fields_u16.sql new file mode 100644 index 0000000000..c8dbfb494e --- /dev/null +++ b/oximeter/db/schema/single-node/10/13_materialize_last_updated_column_on_fields_u16.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u16 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/14_add_ttl_to_fields_u16.sql b/oximeter/db/schema/single-node/10/14_add_ttl_to_fields_u16.sql new file mode 100644 index 0000000000..30da688c8c --- /dev/null +++ b/oximeter/db/schema/single-node/10/14_add_ttl_to_fields_u16.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u16 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/15_add_last_updated_column_to_fields_i32.sql b/oximeter/db/schema/single-node/10/15_add_last_updated_column_to_fields_i32.sql new file mode 100644 index 0000000000..eb0f377e2d --- /dev/null +++ b/oximeter/db/schema/single-node/10/15_add_last_updated_column_to_fields_i32.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i32 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/16_materialize_last_updated_column_on_fields_i32.sql b/oximeter/db/schema/single-node/10/16_materialize_last_updated_column_on_fields_i32.sql new file mode 100644 index 0000000000..9cd4fa05c8 --- /dev/null +++ b/oximeter/db/schema/single-node/10/16_materialize_last_updated_column_on_fields_i32.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i32 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/17_add_ttl_to_fields_i32.sql b/oximeter/db/schema/single-node/10/17_add_ttl_to_fields_i32.sql new file mode 100644 index 0000000000..5230634097 --- /dev/null +++ b/oximeter/db/schema/single-node/10/17_add_ttl_to_fields_i32.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i32 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/18_add_last_updated_column_to_fields_u32.sql b/oximeter/db/schema/single-node/10/18_add_last_updated_column_to_fields_u32.sql new file mode 100644 index 0000000000..9d967784e9 --- /dev/null +++ b/oximeter/db/schema/single-node/10/18_add_last_updated_column_to_fields_u32.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u32 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/19_materialize_last_updated_column_on_fields_u32.sql b/oximeter/db/schema/single-node/10/19_materialize_last_updated_column_on_fields_u32.sql new file mode 100644 index 0000000000..f625138b59 --- /dev/null +++ b/oximeter/db/schema/single-node/10/19_materialize_last_updated_column_on_fields_u32.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u32 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/20_add_ttl_to_fields_u32.sql b/oximeter/db/schema/single-node/10/20_add_ttl_to_fields_u32.sql new file mode 100644 index 0000000000..fc80ce7102 --- /dev/null +++ b/oximeter/db/schema/single-node/10/20_add_ttl_to_fields_u32.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u32 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/21_add_last_updated_column_to_fields_i64.sql b/oximeter/db/schema/single-node/10/21_add_last_updated_column_to_fields_i64.sql new file mode 100644 index 0000000000..26256d3924 --- /dev/null +++ b/oximeter/db/schema/single-node/10/21_add_last_updated_column_to_fields_i64.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i64 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/22_materialize_last_updated_column_on_fields_i64.sql b/oximeter/db/schema/single-node/10/22_materialize_last_updated_column_on_fields_i64.sql new file mode 100644 index 0000000000..a81294e535 --- /dev/null +++ b/oximeter/db/schema/single-node/10/22_materialize_last_updated_column_on_fields_i64.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i64 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/23_add_ttl_to_fields_i64.sql b/oximeter/db/schema/single-node/10/23_add_ttl_to_fields_i64.sql new file mode 100644 index 0000000000..43ca166755 --- /dev/null +++ b/oximeter/db/schema/single-node/10/23_add_ttl_to_fields_i64.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i64 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/24_add_last_updated_column_to_fields_u64.sql b/oximeter/db/schema/single-node/10/24_add_last_updated_column_to_fields_u64.sql new file mode 100644 index 0000000000..46074c79ce --- /dev/null +++ b/oximeter/db/schema/single-node/10/24_add_last_updated_column_to_fields_u64.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u64 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/25_materialize_last_updated_column_on_fields_u64.sql b/oximeter/db/schema/single-node/10/25_materialize_last_updated_column_on_fields_u64.sql new file mode 100644 index 0000000000..a68d449de7 --- /dev/null +++ b/oximeter/db/schema/single-node/10/25_materialize_last_updated_column_on_fields_u64.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u64 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/26_add_ttl_to_fields_u64.sql b/oximeter/db/schema/single-node/10/26_add_ttl_to_fields_u64.sql new file mode 100644 index 0000000000..48afb51bf1 --- /dev/null +++ b/oximeter/db/schema/single-node/10/26_add_ttl_to_fields_u64.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u64 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/27_add_last_updated_column_to_fields_ipaddr.sql b/oximeter/db/schema/single-node/10/27_add_last_updated_column_to_fields_ipaddr.sql new file mode 100644 index 0000000000..d3c6be9072 --- /dev/null +++ b/oximeter/db/schema/single-node/10/27_add_last_updated_column_to_fields_ipaddr.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_ipaddr ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/28_materialize_last_updated_column_on_fields_ipaddr.sql b/oximeter/db/schema/single-node/10/28_materialize_last_updated_column_on_fields_ipaddr.sql new file mode 100644 index 0000000000..5bdffd4b2e --- /dev/null +++ b/oximeter/db/schema/single-node/10/28_materialize_last_updated_column_on_fields_ipaddr.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_ipaddr MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/29_add_ttl_to_fields_ipaddr.sql b/oximeter/db/schema/single-node/10/29_add_ttl_to_fields_ipaddr.sql new file mode 100644 index 0000000000..4551db90cd --- /dev/null +++ b/oximeter/db/schema/single-node/10/29_add_ttl_to_fields_ipaddr.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_ipaddr MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/30_add_last_updated_column_to_fields_string.sql b/oximeter/db/schema/single-node/10/30_add_last_updated_column_to_fields_string.sql new file mode 100644 index 0000000000..024c5f8f94 --- /dev/null +++ b/oximeter/db/schema/single-node/10/30_add_last_updated_column_to_fields_string.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_string ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/31_materialize_last_updated_column_on_fields_string.sql b/oximeter/db/schema/single-node/10/31_materialize_last_updated_column_on_fields_string.sql new file mode 100644 index 0000000000..67d3b7a596 --- /dev/null +++ b/oximeter/db/schema/single-node/10/31_materialize_last_updated_column_on_fields_string.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_string MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/32_add_ttl_to_fields_string.sql b/oximeter/db/schema/single-node/10/32_add_ttl_to_fields_string.sql new file mode 100644 index 0000000000..c5272df459 --- /dev/null +++ b/oximeter/db/schema/single-node/10/32_add_ttl_to_fields_string.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_string MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/33_add_last_updated_column_to_fields_uuid.sql b/oximeter/db/schema/single-node/10/33_add_last_updated_column_to_fields_uuid.sql new file mode 100644 index 0000000000..8d01b382fe --- /dev/null +++ b/oximeter/db/schema/single-node/10/33_add_last_updated_column_to_fields_uuid.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_uuid ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/34_materialize_last_updated_column_on_fields_uuid.sql b/oximeter/db/schema/single-node/10/34_materialize_last_updated_column_on_fields_uuid.sql new file mode 100644 index 0000000000..06fbd94d02 --- /dev/null +++ b/oximeter/db/schema/single-node/10/34_materialize_last_updated_column_on_fields_uuid.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_uuid MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/35_add_ttl_to_fields_uuid.sql b/oximeter/db/schema/single-node/10/35_add_ttl_to_fields_uuid.sql new file mode 100644 index 0000000000..481055d4f5 --- /dev/null +++ b/oximeter/db/schema/single-node/10/35_add_ttl_to_fields_uuid.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_uuid MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/timeseries-to-delete.txt b/oximeter/db/schema/single-node/10/timeseries-to-delete.txt new file mode 100644 index 0000000000..40b90e05ff --- /dev/null +++ b/oximeter/db/schema/single-node/10/timeseries-to-delete.txt @@ -0,0 +1 @@ +http_service:request_latency_histogram diff --git a/oximeter/db/schema/single-node/9/timeseries-to-delete.txt b/oximeter/db/schema/single-node/9/timeseries-to-delete.txt new file mode 100644 index 0000000000..449d2e9155 --- /dev/null +++ b/oximeter/db/schema/single-node/9/timeseries-to-delete.txt @@ -0,0 +1,47 @@ +data_link:abort +data_link:b_e_r_check_done +data_link:b_e_r_check_start +data_link:bad_sync_headers +data_link:disabled +data_link:enabled +data_link:end +data_link:errored_blocks +data_link:fec_align +data_link:fec_corr_cnt +data_link:fec_hi_ser +data_link:fec_ser_lane0 +data_link:fec_ser_lane1 +data_link:fec_ser_lane2 +data_link:fec_ser_lane3 +data_link:fec_ser_lane4 +data_link:fec_ser_lane5 +data_link:fec_ser_lane6 +data_link:fec_ser_lane7 +data_link:fec_uncorr_cnt +data_link:idle +data_link:link_down +data_link:link_up +data_link:monitor_p_r_b_s_errors +data_link:pci_hi_ber +data_link:pcs_block_lock_loss +data_link:pcs_invalid_errors +data_link:pcs_sync_loss +data_link:pcs_unknown_errors +data_link:pcs_valid_errors +data_link:remote_fault +data_link:rx_buf_full +data_link:rx_bytes +data_link:rx_crc_errs +data_link:rx_errs +data_link:rx_pkts +data_link:tofino3_states +data_link:tx_bytes +data_link:tx_errs +data_link:tx_pkts +data_link:wait_auto_neg_done +data_link:wait_auto_neg_link_training_done +data_link:wait_d_f_e_done +data_link:wait_p_l_l_ready +data_link:wait_signal_o_k +data_link:wait_test_done +sidecar:sample_time diff --git a/oximeter/db/schema/single-node/db-init.sql b/oximeter/db/schema/single-node/db-init.sql index 38e9d0b70c..184951feeb 100644 --- a/oximeter/db/schema/single-node/db-init.sql +++ b/oximeter/db/schema/single-node/db-init.sql @@ -504,126 +504,158 @@ TTL toDateTime(timestamp) + INTERVAL 30 DAY; * timeseries name and then key, since it would improve lookups where one * already has the key. Realistically though, these tables are quite small and * so performance benefits will be low in absolute terms. + * + * TTL: We use a materialized column to expire old field table records. This + * column is generated automatically by the database whenever a new row is + * inserted. It cannot be inserted directly, nor is it returned in a `SELECT *` + * query. Since these tables are `ReplacingMergeTree`s, that means the last + * record will remain during a deduplication, which will have the last + * timestamp. ClickHouse will then expire old data for us, similar to the + * measurement tables. */ CREATE TABLE IF NOT EXISTS oximeter.fields_bool ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt8 + field_value UInt8, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i8 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int8 + field_value Int8, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u8 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt8 + field_value UInt8, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i16 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int16 + field_value Int16, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u16 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt16 + field_value UInt16, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i32 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int32 + field_value Int32, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u32 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt32 + field_value UInt32, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i64 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int64 + field_value Int64, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u64 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt64 + field_value UInt64, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_ipaddr ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value IPv6 + field_value IPv6, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_string ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value String + field_value String, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_uuid ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value UUID + field_value UUID, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; /* The timeseries schema table stores the extracted schema for the samples * oximeter collects. diff --git a/oximeter/db/src/client/mod.rs b/oximeter/db/src/client/mod.rs index 30ae4b68d2..c2b07ebaa6 100644 --- a/oximeter/db/src/client/mod.rs +++ b/oximeter/db/src/client/mod.rs @@ -22,8 +22,6 @@ use crate::Error; use crate::Metric; use crate::Target; use crate::Timeseries; -use crate::TimeseriesKey; -use crate::TimeseriesName; use crate::TimeseriesPageSelector; use crate::TimeseriesScanParams; use crate::TimeseriesSchema; @@ -31,7 +29,9 @@ use dropshot::EmptyScanParams; use dropshot::PaginationOrder; use dropshot::ResultsPage; use dropshot::WhichPage; +use oximeter::schema::TimeseriesKey; use oximeter::types::Sample; +use oximeter::TimeseriesName; use regex::Regex; use regex::RegexBuilder; use slog::debug; @@ -1191,7 +1191,6 @@ mod tests { }; use omicron_test_utils::dev::test_setup_log; use oximeter::histogram::Histogram; - use oximeter::test_util; use oximeter::types::MissingDatum; use oximeter::Datum; use oximeter::FieldValue; @@ -1723,7 +1722,7 @@ mod tests { let samples = { let mut s = Vec::with_capacity(8); for _ in 0..s.capacity() { - s.push(test_util::make_hist_sample()) + s.push(oximeter_test_utils::make_hist_sample()) } s }; @@ -1762,7 +1761,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let sample = test_util::make_sample(); + let sample = oximeter_test_utils::make_sample(); client.insert_samples(&[sample]).await.unwrap(); let bad_name = name_mismatch::TestTarget { @@ -1770,7 +1769,7 @@ mod tests { name2: "second_name".into(), num: 2, }; - let metric = test_util::TestMetric { + let metric = oximeter_test_utils::TestMetric { id: uuid::Uuid::new_v4(), good: true, datum: 1, @@ -1792,7 +1791,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let sample = test_util::make_sample(); + let sample = oximeter_test_utils::make_sample(); // Verify that this sample is considered new, i.e., we return rows to update the timeseries // schema table. @@ -1867,7 +1866,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = test_util::generate_test_samples(2, 2, 2, 2); + let samples = oximeter_test_utils::generate_test_samples(2, 2, 2, 2); client.insert_samples(&samples).await?; let sample = samples.first().unwrap(); @@ -1956,7 +1955,7 @@ mod tests { // we'd like to exercise the logic of ClickHouse's replacing merge tree engine. let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = test_util::generate_test_samples(2, 2, 2, 2); + let samples = oximeter_test_utils::generate_test_samples(2, 2, 2, 2); client.insert_samples(&samples).await?; async fn assert_table_count( @@ -2631,7 +2630,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = test_util::generate_test_samples(2, 2, 2, 2); + let samples = oximeter_test_utils::generate_test_samples(2, 2, 2, 2); client.insert_samples(&samples).await?; let original_schema = client.schema.lock().await.clone(); @@ -2656,7 +2655,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = test_util::generate_test_samples(2, 2, 2, 2); + let samples = oximeter_test_utils::generate_test_samples(2, 2, 2, 2); client.insert_samples(&samples).await?; let limit = 100u32.try_into().unwrap(); @@ -2691,7 +2690,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = test_util::generate_test_samples(2, 2, 2, 2); + let samples = oximeter_test_utils::generate_test_samples(2, 2, 2, 2); client.insert_samples(&samples).await?; let limit = 7u32.try_into().unwrap(); @@ -3364,7 +3363,7 @@ mod tests { // The values here don't matter much, we just want to check that // the database data hasn't been dropped. assert_eq!(0, get_schema_count(&client).await); - let sample = test_util::make_sample(); + let sample = oximeter_test_utils::make_sample(); client.insert_samples(&[sample.clone()]).await.unwrap(); assert_eq!(1, get_schema_count(&client).await); @@ -3438,7 +3437,7 @@ mod tests { // The values here don't matter much, we just want to check that // the database data gets dropped later. assert_eq!(0, get_schema_count(&client).await); - let sample = test_util::make_sample(); + let sample = oximeter_test_utils::make_sample(); client.insert_samples(&[sample.clone()]).await.unwrap(); assert_eq!(1, get_schema_count(&client).await); @@ -3464,7 +3463,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = [test_util::make_sample()]; + let samples = [oximeter_test_utils::make_sample()]; client.insert_samples(&samples).await.unwrap(); // Get the count of schema directly from the DB, which should have just @@ -3549,7 +3548,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = [test_util::make_sample()]; + let samples = [oximeter_test_utils::make_sample()]; // We're using the components of the `insert_samples()` method here, // which has been refactored explicitly for this test. We need to insert diff --git a/oximeter/db/src/client/oxql.rs b/oximeter/db/src/client/oxql.rs index 29586b8189..4005fa873e 100644 --- a/oximeter/db/src/client/oxql.rs +++ b/oximeter/db/src/client/oxql.rs @@ -18,7 +18,7 @@ use crate::query::field_table_name; use crate::Error; use crate::Metric; use crate::Target; -use crate::TimeseriesKey; +use oximeter::schema::TimeseriesKey; use oximeter::TimeseriesSchema; use slog::debug; use slog::trace; @@ -68,7 +68,7 @@ pub struct OxqlResult { pub query_summaries: Vec, /// The list of OxQL tables returned from the query. - pub tables: Vec, + pub tables: Vec, } /// The maximum number of data values fetched from the database for an OxQL @@ -479,7 +479,9 @@ impl Client { query_id, total_duration: query_start.elapsed(), query_summaries, - tables: vec![oxql::Table::new(schema.timeseries_name.as_str())], + tables: vec![oxql_types::Table::new( + schema.timeseries_name.as_str(), + )], }; return Ok(result); } @@ -503,7 +505,7 @@ impl Client { // At this point, let's construct a set of tables and run the results // through the transformation pipeline. - let mut tables = vec![oxql::Table::from_timeseries( + let mut tables = vec![oxql_types::Table::from_timeseries( schema.timeseries_name.as_str(), timeseries_by_key.into_values(), )?]; @@ -553,7 +555,7 @@ impl Client { limit: Option, total_rows_fetched: &mut u64, ) -> Result< - (Vec, BTreeMap), + (Vec, BTreeMap), Error, > { // We'll create timeseries for each key on the fly. To enable computing @@ -624,25 +626,25 @@ impl Client { for (key, measurements) in measurements_by_key.into_iter() { // Constuct a new timeseries, from the target/metric info. let (target, metric) = info.get(&key).unwrap(); - let mut timeseries = oxql::Timeseries::new( + let mut timeseries = oxql_types::Timeseries::new( target .fields .iter() .chain(metric.fields.iter()) .map(|field| (field.name.clone(), field.value.clone())), - oxql::point::DataType::try_from(schema.datum_type)?, + oxql_types::point::DataType::try_from(schema.datum_type)?, if schema.datum_type.is_cumulative() { - oxql::point::MetricType::Delta + oxql_types::point::MetricType::Delta } else { - oxql::point::MetricType::Gauge + oxql_types::point::MetricType::Gauge }, )?; // Covert its oximeter measurements into OxQL data types. let points = if schema.datum_type.is_cumulative() { - oxql::point::Points::delta_from_cumulative(&measurements)? + oxql_types::point::Points::delta_from_cumulative(&measurements)? } else { - oxql::point::Points::gauge_from_gauge(&measurements)? + oxql_types::point::Points::gauge_from_gauge(&measurements)? }; timeseries.points = points; debug!( @@ -1108,10 +1110,7 @@ fn update_total_rows_and_check( mod tests { use super::ConsistentKeyGroup; use crate::client::oxql::chunk_consistent_key_groups_impl; - use crate::{ - oxql::{point::Points, Table, Timeseries}, - Client, DbWrite, - }; + use crate::{Client, DbWrite}; use crate::{Metric, Target}; use chrono::{DateTime, Utc}; use dropshot::test_util::LogContext; @@ -1119,6 +1118,7 @@ mod tests { use omicron_test_utils::dev::test_setup_log; use oximeter::{types::Cumulative, FieldValue}; use oximeter::{DatumType, Sample}; + use oxql_types::{point::Points, Table, Timeseries}; use std::collections::BTreeMap; use std::time::Duration; diff --git a/oximeter/db/src/lib.rs b/oximeter/db/src/lib.rs index 9ad382c97d..5d56d802c9 100644 --- a/oximeter/db/src/lib.rs +++ b/oximeter/db/src/lib.rs @@ -14,6 +14,7 @@ use dropshot::EmptyScanParams; use dropshot::PaginationParams; pub use oximeter::schema::FieldSchema; pub use oximeter::schema::FieldSource; +use oximeter::schema::TimeseriesKey; pub use oximeter::schema::TimeseriesName; pub use oximeter::schema::TimeseriesSchema; pub use oximeter::DatumType; @@ -267,8 +268,6 @@ pub async fn make_client( Ok(client) } -pub(crate) type TimeseriesKey = u64; - // TODO-cleanup: Add the timeseries version in to the computation of the key. // This will require a full drop of the database, since we're changing the // sorting key and the timeseries key on each past sample. See diff --git a/oximeter/db/src/model.rs b/oximeter/db/src/model.rs index 05667058b5..7608f81e45 100644 --- a/oximeter/db/src/model.rs +++ b/oximeter/db/src/model.rs @@ -11,13 +11,13 @@ use crate::FieldSchema; use crate::FieldSource; use crate::Metric; use crate::Target; -use crate::TimeseriesKey; use crate::TimeseriesSchema; use bytes::Bytes; use chrono::DateTime; use chrono::Utc; use num::traits::Zero; use oximeter::histogram::Histogram; +use oximeter::schema::TimeseriesKey; use oximeter::traits; use oximeter::types::Cumulative; use oximeter::types::Datum; @@ -45,7 +45,7 @@ use uuid::Uuid; /// - [`crate::Client::initialize_db_with_version`] /// - [`crate::Client::ensure_schema`] /// - The `clickhouse-schema-updater` binary in this crate -pub const OXIMETER_VERSION: u64 = 8; +pub const OXIMETER_VERSION: u64 = 10; // Wrapper type to represent a boolean in the database. // @@ -1880,7 +1880,6 @@ mod tests { use super::*; use chrono::Timelike; use oximeter::histogram::Record; - use oximeter::test_util; use oximeter::Datum; #[test] @@ -1983,7 +1982,7 @@ mod tests { #[test] fn test_unroll_from_source() { - let sample = test_util::make_sample(); + let sample = oximeter_test_utils::make_sample(); let out = unroll_from_source(&sample); assert_eq!(out["oximeter.fields_string"].len(), 2); assert_eq!(out["oximeter.fields_i64"].len(), 1); @@ -2003,8 +2002,8 @@ mod tests { // datum. #[test] fn test_unroll_missing_measurement_row() { - let sample = test_util::make_sample(); - let missing_sample = test_util::make_missing_sample(); + let sample = oximeter_test_utils::make_sample(); + let missing_sample = oximeter_test_utils::make_missing_sample(); let (table_name, row) = unroll_measurement_row(&sample); let (missing_table_name, missing_row) = unroll_measurement_row(&missing_sample); @@ -2022,7 +2021,7 @@ mod tests { #[test] fn test_unroll_measurement_row() { - let sample = test_util::make_hist_sample(); + let sample = oximeter_test_utils::make_hist_sample(); let (table_name, row) = unroll_measurement_row(&sample); assert_eq!(table_name, "oximeter.measurements_histogramf64"); let unpacked: HistogramF64MeasurementRow = diff --git a/oximeter/db/src/oxql/ast/grammar.rs b/oximeter/db/src/oxql/ast/grammar.rs index a7585402b6..62182ec553 100644 --- a/oximeter/db/src/oxql/ast/grammar.rs +++ b/oximeter/db/src/oxql/ast/grammar.rs @@ -189,11 +189,11 @@ peg::parser! { rule dashed_uuid_literal() -> Uuid = s:$( "\"" - ['a'..='f' | '0'..='9']*<8> "-" - ['a'..='f' | '0'..='9']*<4> "-" - ['a'..='f' | '0'..='9']*<4> "-" - ['a'..='f' | '0'..='9']*<4> "-" - ['a'..='f' | '0'..='9']*<12> + ['a'..='f' | 'A'..='F' | '0'..='9']*<8> "-" + ['a'..='f' | 'A'..='F' | '0'..='9']*<4> "-" + ['a'..='f' | 'A'..='F' | '0'..='9']*<4> "-" + ['a'..='f' | 'A'..='F' | '0'..='9']*<4> "-" + ['a'..='f' | 'A'..='F' | '0'..='9']*<12> "\"" ) {? let Some(middle) = s.get(1..37) else { @@ -202,7 +202,7 @@ peg::parser! { middle.parse().or(Err("invalid UUID literal")) } rule undashed_uuid_literal() -> Uuid - = s:$("\"" ['a'..='f' | '0'..='9']*<32> "\"") {? + = s:$("\"" ['a'..='f' | 'A'..='F' | '0'..='9']*<32> "\"") {? let Some(middle) = s.get(1..33) else { return Err("invalid UUID literal"); }; @@ -279,11 +279,27 @@ peg::parser! { pub rule string_literal() -> Literal = s:string_literal_impl() { Literal::String(s) } + pub(super) rule hex_integer_literal_impl() -> i128 + = n:$("0x" ['0'..='9' | 'a'..='f' | 'A'..='F']+ !['.']) + {? + let Some((maybe_sign, digits)) = n.split_once("0x") else { + return Err("hex literals should start with '0x'"); + }; + i128::from_str_radix(digits, 16).map_err(|_| "invalid hex literal") + } + + pub(super) rule dec_integer_literal_impl() -> i128 + = n:$(['0'..='9']+ !['e' | 'E' | '.']) + {? + n.parse().map_err(|_| "integer literal") + } + pub(super) rule integer_literal_impl() -> i128 - = n:$("-"? ['0'..='9']+ !['e' | 'E' | '.']) + = maybe_sign:$("-"?) n:(hex_integer_literal_impl() / dec_integer_literal_impl()) {? - let Ok(x) = n.parse() else { - return Err("integer literal"); + let sign = if maybe_sign == "-" { -1 } else { 1 }; + let Some(x) = n.checked_mul(sign) else { + return Err("negative overflow"); }; if x < i128::from(i64::MIN) { Err("negative overflow") @@ -734,17 +750,49 @@ mod tests { .is_err()); } + #[test] + fn test_uuid_literal_is_case_insensitive() { + const ID: Uuid = uuid::uuid!("880D82A1-102F-4699-BE1A-7E2A6A469E8E"); + let as_str = format!("\"{ID}\""); + let as_lower = as_str.to_lowercase(); + assert_eq!(query_parser::uuid_literal_impl(&as_str).unwrap(), ID,); + assert_eq!(query_parser::uuid_literal_impl(&as_lower).unwrap(), ID,); + } + #[test] fn test_integer_literal() { assert_eq!(query_parser::integer_literal_impl("1").unwrap(), 1); assert_eq!(query_parser::integer_literal_impl("-1").unwrap(), -1); - assert_eq!(query_parser::integer_literal_impl("-1").unwrap(), -1); assert!(query_parser::integer_literal_impl("-1.0").is_err()); assert!(query_parser::integer_literal_impl("-1.").is_err()); assert!(query_parser::integer_literal_impl("1e3").is_err()); } + #[test] + fn test_hex_integer_literal() { + assert_eq!(query_parser::integer_literal_impl("0x1").unwrap(), 1); + assert_eq!(query_parser::integer_literal_impl("-0x1").unwrap(), -1); + assert_eq!(query_parser::integer_literal_impl("-0xa").unwrap(), -0xa); + assert_eq!( + query_parser::integer_literal_impl("0xfeed").unwrap(), + 0xfeed + ); + assert_eq!( + query_parser::integer_literal_impl("0xFEED").unwrap(), + 0xfeed + ); + + // Out of range in either direction + assert!(query_parser::integer_literal_impl("0xFFFFFFFFFFFFFFFFFFFF") + .is_err()); + assert!(query_parser::integer_literal_impl("-0xFFFFFFFFFFFFFFFFFFFF") + .is_err()); + + assert!(query_parser::integer_literal_impl("-0x1.0").is_err()); + assert!(query_parser::integer_literal_impl("-0x1.").is_err()); + } + #[test] fn test_double_literal() { assert_eq!(query_parser::double_literal_impl("1.0").unwrap(), 1.0); diff --git a/oximeter/db/src/oxql/ast/table_ops/align.rs b/oximeter/db/src/oxql/ast/table_ops/align.rs index cf54ebc312..b0cd7d80f1 100644 --- a/oximeter/db/src/oxql/ast/table_ops/align.rs +++ b/oximeter/db/src/oxql/ast/table_ops/align.rs @@ -6,19 +6,19 @@ // Copyright 2024 Oxide Computer Company -use crate::oxql::point::DataType; -use crate::oxql::point::MetricType; -use crate::oxql::point::Points; -use crate::oxql::point::ValueArray; -use crate::oxql::point::Values; -use crate::oxql::query::Alignment; -use crate::oxql::Error; -use crate::oxql::Table; -use crate::oxql::Timeseries; use anyhow::Context; +use anyhow::Error; use chrono::DateTime; use chrono::TimeDelta; use chrono::Utc; +use oxql_types::point::DataType; +use oxql_types::point::MetricType; +use oxql_types::point::Points; +use oxql_types::point::ValueArray; +use oxql_types::point::Values; +use oxql_types::Alignment; +use oxql_types::Table; +use oxql_types::Timeseries; use std::time::Duration; // The maximum factor by which an alignment operation may upsample data. @@ -144,7 +144,7 @@ fn align_mean_within( "Alignment by mean requires a gauge or delta metric, not {}", metric_type, ); - verify_max_upsampling_ratio(&points.timestamps, &period)?; + verify_max_upsampling_ratio(points.timestamps(), &period)?; // Always convert the output to doubles, when computing the mean. The // output is always a gauge, so we do not need the start times of the @@ -179,7 +179,7 @@ fn align_mean_within( // - Compute the mean of those. let period_ = TimeDelta::from_std(*period).context("time delta out of range")?; - let first_timestamp = points.timestamps[0]; + let first_timestamp = points.timestamps()[0]; let mut ix: u32 = 0; loop { // Compute the next output timestamp, by shifting the query end time @@ -220,15 +220,15 @@ fn align_mean_within( // entries. let output_value = if matches!(metric_type, MetricType::Gauge) { mean_gauge_value_in_window( - &points.timestamps, + points.timestamps(), &input_points, window_start, output_time, ) } else { mean_delta_value_in_window( - points.start_times.as_ref().unwrap(), - &points.timestamps, + points.start_times().unwrap(), + points.timestamps(), &input_points, window_start, output_time, @@ -255,10 +255,9 @@ fn align_mean_within( ValueArray::Double(output_values.into_iter().rev().collect()); let timestamps = output_timestamps.into_iter().rev().collect(); let values = Values { values, metric_type: MetricType::Gauge }; - new_timeseries.points = - Points { start_times: None, timestamps, values: vec![values] }; - new_timeseries.alignment = - Some(Alignment { end_time: *query_end, period: *period }); + new_timeseries.points = Points::new(None, timestamps, vec![values]); + new_timeseries + .set_alignment(Alignment { end_time: *query_end, period: *period }); output_table.insert(new_timeseries).unwrap(); } Ok(output_table) diff --git a/oximeter/db/src/oxql/ast/table_ops/filter.rs b/oximeter/db/src/oxql/ast/table_ops/filter.rs index b6fc533e4d..ad398da983 100644 --- a/oximeter/db/src/oxql/ast/table_ops/filter.rs +++ b/oximeter/db/src/oxql/ast/table_ops/filter.rs @@ -12,18 +12,18 @@ use crate::oxql::ast::literal::Literal; use crate::oxql::ast::logical_op::LogicalOp; use crate::oxql::ast::table_ops::limit::Limit; use crate::oxql::ast::table_ops::limit::LimitKind; -use crate::oxql::point::DataType; -use crate::oxql::point::MetricType; -use crate::oxql::point::Points; -use crate::oxql::point::ValueArray; use crate::oxql::Error; -use crate::oxql::Table; -use crate::oxql::Timeseries; use crate::shells::special_idents; use chrono::DateTime; use chrono::Utc; use oximeter::FieldType; use oximeter::FieldValue; +use oxql_types::point::DataType; +use oxql_types::point::MetricType; +use oxql_types::point::Points; +use oxql_types::point::ValueArray; +use oxql_types::Table; +use oxql_types::Timeseries; use regex::Regex; use std::collections::BTreeSet; use std::fmt; @@ -340,16 +340,13 @@ impl Filter { // Apply the filter to the data points as well. let points = self.filter_points(&input.points)?; - // Similar to above, if the filter removes all data points in - // the timeseries, let's remove the timeseries altogether. - if points.is_empty() { - continue; + if let Some(new_timeseries) = input.copy_with_points(points) { + timeseries.push(new_timeseries); + } else { + // None means that the filter removed all data points in + // the timeseries. In that case, we remove the timeseries + // altogether. } - timeseries.push(Timeseries { - fields: input.fields.clone(), - points, - alignment: input.alignment, - }) } output_tables.push(Table::from_timeseries( table.name(), @@ -823,7 +820,7 @@ impl SimpleFilter { ) -> Result, Error> { let ident = self.ident.as_str(); if ident == "timestamp" { - self.filter_points_by_timestamp(negated, &points.timestamps) + self.filter_points_by_timestamp(negated, points.timestamps()) } else if ident == "datum" { anyhow::ensure!( points.dimensionality() == 1, @@ -1151,15 +1148,15 @@ impl SimpleFilter { mod tests { use crate::oxql::ast::grammar::query_parser; use crate::oxql::ast::logical_op::LogicalOp; - use crate::oxql::point::DataType; - use crate::oxql::point::MetricType; - use crate::oxql::point::Points; - use crate::oxql::point::ValueArray; - use crate::oxql::point::Values; - use crate::oxql::Table; - use crate::oxql::Timeseries; use chrono::Utc; use oximeter::FieldValue; + use oxql_types::point::DataType; + use oxql_types::point::MetricType; + use oxql_types::point::Points; + use oxql_types::point::ValueArray; + use oxql_types::point::Values; + use oxql_types::Table; + use oxql_types::Timeseries; use std::time::Duration; use uuid::Uuid; @@ -1172,7 +1169,7 @@ mod tests { values: ValueArray::Double(vec![Some(0.0), Some(2.0)]), metric_type: MetricType::Gauge, }]; - let points = Points { start_times, timestamps, values }; + let points = Points::new(start_times, timestamps, values); // This filter should remove the first point based on its timestamp. let t = Utc::now() + Duration::from_secs(10); @@ -1205,7 +1202,7 @@ mod tests { values: ValueArray::Double(vec![Some(0.0), Some(2.0)]), metric_type: MetricType::Gauge, }]; - let points = Points { start_times, timestamps, values }; + let points = Points::new(start_times, timestamps, values); let filter = query_parser::filter("filter datum < \"something\"").unwrap(); diff --git a/oximeter/db/src/oxql/ast/table_ops/group_by.rs b/oximeter/db/src/oxql/ast/table_ops/group_by.rs index f40572d762..c48804a788 100644 --- a/oximeter/db/src/oxql/ast/table_ops/group_by.rs +++ b/oximeter/db/src/oxql/ast/table_ops/group_by.rs @@ -10,13 +10,13 @@ use chrono::DateTime; use chrono::Utc; use crate::oxql::ast::ident::Ident; -use crate::oxql::point::DataType; -use crate::oxql::point::MetricType; -use crate::oxql::point::ValueArray; -use crate::oxql::Error; -use crate::oxql::Table; -use crate::oxql::Timeseries; -use crate::TimeseriesKey; +use anyhow::Error; +use oximeter::schema::TimeseriesKey; +use oxql_types::point::DataType; +use oxql_types::point::MetricType; +use oxql_types::point::ValueArray; +use oxql_types::Table; +use oxql_types::Timeseries; use std::collections::btree_map::Entry; use std::collections::BTreeMap; @@ -98,7 +98,7 @@ impl GroupBy { ValueArray::Double(new_values), ValueArray::Double(existing_values), ) => { - let new_timestamps = &dropped.points.timestamps; + let new_timestamps = dropped.points.timestamps(); // We will be merging the new data with the // existing, but borrow-checking limits the degree @@ -106,7 +106,7 @@ impl GroupBy { // entry in the output table. Instead, aggregate // everything into a copy of the expected data. let mut timestamps = - existing.points.timestamps.clone(); + existing.points.timestamps().to_owned(); let mut values = existing_values.clone(); // Merge in the new values, so long as they actually @@ -152,10 +152,7 @@ impl GroupBy { // Replace the existing output timeseries's // timestamps and data arrays. - std::mem::swap( - &mut existing.points.timestamps, - &mut timestamps, - ); + existing.points.set_timestamps(timestamps); existing .points .values_mut(0) @@ -166,7 +163,7 @@ impl GroupBy { ValueArray::Integer(new_values), ValueArray::Integer(existing_values), ) => { - let new_timestamps = &dropped.points.timestamps; + let new_timestamps = dropped.points.timestamps(); // We will be merging the new data with the // existing, but borrow-checking limits the degree @@ -174,7 +171,7 @@ impl GroupBy { // entry in the output table. Instead, aggregate // everything into a copy of the expected data. let mut timestamps = - existing.points.timestamps.clone(); + existing.points.timestamps().to_owned(); let mut values = existing_values.clone(); // Merge in the new values, so long as they actually @@ -220,10 +217,7 @@ impl GroupBy { // Replace the existing output timeseries's // timestamps and data arrays. - std::mem::swap( - &mut existing.points.timestamps, - &mut timestamps, - ); + existing.points.set_timestamps(timestamps); existing .points .values_mut(0) @@ -286,14 +280,15 @@ impl GroupBy { else { unreachable!(); }; - let new_timestamps = &new_points.timestamps; + let new_timestamps = new_points.timestamps(); // We will be merging the new data with the // existing, but borrow-checking limits the degree // to which we can easily do this on the `existing` // entry in the output table. Instead, aggregate // everything into a copy of the expected data. - let mut timestamps = existing.points.timestamps.clone(); + let mut timestamps = + existing.points.timestamps().to_owned(); let mut values = existing .points .values(0) @@ -360,10 +355,7 @@ impl GroupBy { // Replace the existing output timeseries's // timestamps and data arrays. - std::mem::swap( - &mut existing.points.timestamps, - &mut timestamps, - ); + existing.points.set_timestamps(timestamps); existing .points .values_mut(0) @@ -388,7 +380,7 @@ impl GroupBy { // _zero_ for any where the values are none. let counts = new_timeseries .points - .timestamps + .timestamps() .iter() .zip(values) .map(|(timestamp, maybe_value)| { @@ -434,16 +426,16 @@ pub enum Reducer { #[cfg(test)] mod tests { use super::{GroupBy, Reducer}; - use crate::oxql::{ - ast::{ - ident::Ident, - table_ops::align::{Align, AlignmentMethod}, - }, - point::{DataType, MetricType, ValueArray}, - Table, Timeseries, + use crate::oxql::ast::{ + ident::Ident, + table_ops::align::{Align, AlignmentMethod}, }; use chrono::{DateTime, Utc}; use oximeter::FieldValue; + use oxql_types::{ + point::{DataType, MetricType, ValueArray}, + Table, Timeseries, + }; use std::{collections::BTreeMap, time::Duration}; // Which timeseries the second data point is missing from. @@ -495,8 +487,8 @@ mod tests { MetricType::Gauge, ) .unwrap(); - ts0.points.start_times = None; - ts0.points.timestamps.clone_from(×tamps); + ts0.points.clear_start_times(); + ts0.points.set_timestamps(timestamps.clone()); *ts0.points.values_mut(0).unwrap() = ValueArray::Double(vec![ Some(1.0), if matches!( @@ -527,7 +519,7 @@ mod tests { MetricType::Gauge, ) .unwrap(); - ts1.points.start_times = None; + ts1.points.clear_start_times(); // Non-overlapping in this test setup means that we just shift one // value from this array backward in time by one additional second. @@ -538,7 +530,7 @@ mod tests { // // When reducing, t0 is never changed, and t1-t2 are always reduced // together, if the values are present. - ts1.points.timestamps = if cfg.overlapping_times { + let new_timestamps = if cfg.overlapping_times { timestamps.clone() } else { let mut new_timestamps = timestamps.clone(); @@ -546,6 +538,7 @@ mod tests { timestamps.insert(0, new_timestamps[0]); new_timestamps }; + ts1.points.set_timestamps(new_timestamps); *ts1.points.values_mut(0).unwrap() = ValueArray::Double(vec![ Some(2.0), if matches!(cfg.missing_value, MissingValue::Both) { @@ -604,11 +597,13 @@ mod tests { let points = &grouped_timeseries.points; assert_eq!(points.dimensionality(), 1, "Points should still be 1D"); assert_eq!( - points.start_times, None, + points.start_times(), + None, "Points should not have start times" ); assert_eq!( - points.timestamps, test.timestamps, + points.timestamps(), + test.timestamps, "Points do not have correct timestamps" ); diff --git a/oximeter/db/src/oxql/ast/table_ops/join.rs b/oximeter/db/src/oxql/ast/table_ops/join.rs index 3c150a4acf..2893f6cf3e 100644 --- a/oximeter/db/src/oxql/ast/table_ops/join.rs +++ b/oximeter/db/src/oxql/ast/table_ops/join.rs @@ -6,12 +6,10 @@ // Copyright 2024 Oxide Computer Company -use crate::oxql::point::MetricType; -use crate::oxql::point::Points; -use crate::oxql::point::Values; -use crate::oxql::Error; -use crate::oxql::Table; use anyhow::Context; +use anyhow::Error; +use oxql_types::point::MetricType; +use oxql_types::Table; /// An AST node for a natural inner join. #[derive(Clone, Copy, Debug, PartialEq)] @@ -80,10 +78,8 @@ impl Join { // 1. They have the same alignment, and // 2. We merge the timepoints rather than simply creating a // ragged array of points. - timeseries.points = inner_join_point_arrays( - ×eries.points, - &next_timeseries.points, - )?; + timeseries.points = + timeseries.points.inner_join(&next_timeseries.points)?; } // We'll also update the name, to indicate the joined data. out.name.push(','); @@ -93,101 +89,6 @@ impl Join { } } -// Given two arrays of points, stack them together at matching timepoints. -// -// For time points in either which do not have a corresponding point in the -// other, the entire time point is elided. -fn inner_join_point_arrays( - left: &Points, - right: &Points, -) -> Result { - // Create an output array with roughly the right capacity, and double the - // number of dimensions. We're trying to stack output value arrays together - // along the dimension axis. - let data_types = - left.data_types().chain(right.data_types()).collect::>(); - let metric_types = - left.metric_types().chain(right.metric_types()).collect::>(); - let mut out = Points::with_capacity( - left.len().max(right.len()), - data_types.iter().copied(), - metric_types.iter().copied(), - )?; - - // Iterate through each array until one is exhausted. We're only inserting - // values from both arrays where the timestamps actually match, since this - // is an inner join. We may want to insert missing values where timestamps - // do not match on either side, when we support an outer join of some kind. - let n_left_dim = left.values.len(); - let mut left_ix = 0; - let mut right_ix = 0; - while left_ix < left.len() && right_ix < right.len() { - let left_timestamp = left.timestamps[left_ix]; - let right_timestamp = right.timestamps[right_ix]; - if left_timestamp == right_timestamp { - out.timestamps.push(left_timestamp); - push_concrete_values( - &mut out.values[..n_left_dim], - &left.values, - left_ix, - ); - push_concrete_values( - &mut out.values[n_left_dim..], - &right.values, - right_ix, - ); - left_ix += 1; - right_ix += 1; - } else if left_timestamp < right_timestamp { - left_ix += 1; - } else { - right_ix += 1; - } - } - Ok(out) -} - -// Push the `i`th value from each dimension of `from` onto `to`. -fn push_concrete_values(to: &mut [Values], from: &[Values], i: usize) { - assert_eq!(to.len(), from.len()); - for (output, input) in to.iter_mut().zip(from.iter()) { - let input_array = &input.values; - let output_array = &mut output.values; - assert_eq!(input_array.data_type(), output_array.data_type()); - if let Ok(ints) = input_array.as_integer() { - output_array.as_integer_mut().unwrap().push(ints[i]); - continue; - } - if let Ok(doubles) = input_array.as_double() { - output_array.as_double_mut().unwrap().push(doubles[i]); - continue; - } - if let Ok(bools) = input_array.as_boolean() { - output_array.as_boolean_mut().unwrap().push(bools[i]); - continue; - } - if let Ok(strings) = input_array.as_string() { - output_array.as_string_mut().unwrap().push(strings[i].clone()); - continue; - } - if let Ok(dists) = input_array.as_integer_distribution() { - output_array - .as_integer_distribution_mut() - .unwrap() - .push(dists[i].clone()); - continue; - } - if let Ok(dists) = input_array.as_double_distribution() { - output_array - .as_double_distribution_mut() - .unwrap() - .push(dists[i].clone()); - continue; - } - unreachable!(); - } -} - // Return an error if any metric types are not suitable for joining. fn ensure_all_metric_types( mut metric_types: impl ExactSizeIterator, @@ -200,186 +101,3 @@ fn ensure_all_metric_types( ); Ok(()) } - -#[cfg(test)] -mod tests { - use super::*; - use crate::oxql::point::DataType; - use crate::oxql::point::Datum; - use crate::oxql::point::ValueArray; - use chrono::Utc; - use std::time::Duration; - - #[test] - fn test_push_concrete_values() { - let mut points = Points::with_capacity( - 2, - [DataType::Integer, DataType::Double].into_iter(), - [MetricType::Gauge, MetricType::Gauge].into_iter(), - ) - .unwrap(); - - // Push a concrete value for the integer dimension - let from_ints = vec![Values { - values: ValueArray::Integer(vec![Some(1)]), - metric_type: MetricType::Gauge, - }]; - push_concrete_values(&mut points.values[..1], &from_ints, 0); - - // And another for the double dimension. - let from_doubles = vec![Values { - values: ValueArray::Double(vec![Some(2.0)]), - metric_type: MetricType::Gauge, - }]; - push_concrete_values(&mut points.values[1..], &from_doubles, 0); - - assert_eq!( - points.dimensionality(), - 2, - "Points should have 2 dimensions", - ); - let ints = points.values[0].values.as_integer().unwrap(); - assert_eq!( - ints.len(), - 1, - "Should have pushed one point in the first dimension" - ); - assert_eq!( - ints[0], - Some(1), - "Should have pushed 1 onto the first dimension" - ); - let doubles = points.values[1].values.as_double().unwrap(); - assert_eq!( - doubles.len(), - 1, - "Should have pushed one point in the second dimension" - ); - assert_eq!( - doubles[0], - Some(2.0), - "Should have pushed 2.0 onto the second dimension" - ); - } - - #[test] - fn test_join_point_arrays() { - let now = Utc::now(); - - // Create a set of integer points to join with. - // - // This will have two timestamps, one of which will match the points - // below that are merged in. - let int_points = Points { - start_times: None, - timestamps: vec![ - now - Duration::from_secs(3), - now - Duration::from_secs(2), - now, - ], - values: vec![Values { - values: ValueArray::Integer(vec![Some(1), Some(2), Some(3)]), - metric_type: MetricType::Gauge, - }], - }; - - // Create an additional set of double points. - // - // This also has two timepoints, one of which matches with the above, - // and one of which does not. - let double_points = Points { - start_times: None, - timestamps: vec![ - now - Duration::from_secs(3), - now - Duration::from_secs(1), - now, - ], - values: vec![Values { - values: ValueArray::Double(vec![ - Some(4.0), - Some(5.0), - Some(6.0), - ]), - metric_type: MetricType::Gauge, - }], - }; - - // Merge the arrays. - let merged = - inner_join_point_arrays(&int_points, &double_points).unwrap(); - - // Basic checks that we merged in the right values and have the right - // types and dimensions. - assert_eq!( - merged.dimensionality(), - 2, - "Should have appended the dimensions from each input array" - ); - assert_eq!(merged.len(), 2, "Should have merged two common points",); - assert_eq!( - merged.data_types().collect::>(), - &[DataType::Integer, DataType::Double], - "Should have combined the data types of the input arrays" - ); - assert_eq!( - merged.metric_types().collect::>(), - &[MetricType::Gauge, MetricType::Gauge], - "Should have combined the metric types of the input arrays" - ); - - // Check the actual values of the array. - let mut points = merged.iter_points(); - - // The first and last timepoint overlapped between the two arrays, so we - // should have both of them as concrete samples. - let pt = points.next().unwrap(); - assert_eq!(pt.start_time, None, "Gauges don't have a start time"); - assert_eq!( - *pt.timestamp, int_points.timestamps[0], - "Should have taken the first input timestamp from both arrays", - ); - assert_eq!( - *pt.timestamp, double_points.timestamps[0], - "Should have taken the first input timestamp from both arrays", - ); - let values = pt.values; - assert_eq!(values.len(), 2, "Should have 2 dimensions"); - assert_eq!( - &values[0], - &(Datum::Integer(Some(&1)), MetricType::Gauge), - "Should have pulled value from first integer array." - ); - assert_eq!( - &values[1], - &(Datum::Double(Some(&4.0)), MetricType::Gauge), - "Should have pulled value from second double array." - ); - - // And the next point - let pt = points.next().unwrap(); - assert_eq!(pt.start_time, None, "Gauges don't have a start time"); - assert_eq!( - *pt.timestamp, int_points.timestamps[2], - "Should have taken the input timestamp from both arrays", - ); - assert_eq!( - *pt.timestamp, double_points.timestamps[2], - "Should have taken the input timestamp from both arrays", - ); - let values = pt.values; - assert_eq!(values.len(), 2, "Should have 2 dimensions"); - assert_eq!( - &values[0], - &(Datum::Integer(Some(&3)), MetricType::Gauge), - "Should have pulled value from first integer array." - ); - assert_eq!( - &values[1], - &(Datum::Double(Some(&6.0)), MetricType::Gauge), - "Should have pulled value from second double array." - ); - - // And there should be no other values. - assert!(points.next().is_none(), "There should be no more points"); - } -} diff --git a/oximeter/db/src/oxql/ast/table_ops/limit.rs b/oximeter/db/src/oxql/ast/table_ops/limit.rs index 0205868f5c..89afb31a7c 100644 --- a/oximeter/db/src/oxql/ast/table_ops/limit.rs +++ b/oximeter/db/src/oxql/ast/table_ops/limit.rs @@ -6,12 +6,8 @@ // Copyright 2024 Oxide Computer Company -use crate::oxql::point::Points; -use crate::oxql::point::ValueArray; -use crate::oxql::point::Values; -use crate::oxql::Error; -use crate::oxql::Table; -use crate::oxql::Timeseries; +use anyhow::Error; +use oxql_types::Table; use std::num::NonZeroUsize; /// The kind of limiting operation @@ -65,58 +61,7 @@ impl Limit { } }; - // Slice the various data arrays. - let start_times = input_points - .start_times - .as_ref() - .map(|s| s[start..end].to_vec()); - let timestamps = - input_points.timestamps[start..end].to_vec(); - let values = input_points - .values - .iter() - .map(|vals| { - let values = match &vals.values { - ValueArray::Integer(inner) => { - ValueArray::Integer( - inner[start..end].to_vec(), - ) - } - ValueArray::Double(inner) => { - ValueArray::Double( - inner[start..end].to_vec(), - ) - } - ValueArray::Boolean(inner) => { - ValueArray::Boolean( - inner[start..end].to_vec(), - ) - } - ValueArray::String(inner) => { - ValueArray::String( - inner[start..end].to_vec(), - ) - } - ValueArray::IntegerDistribution(inner) => { - ValueArray::IntegerDistribution( - inner[start..end].to_vec(), - ) - } - ValueArray::DoubleDistribution(inner) => { - ValueArray::DoubleDistribution( - inner[start..end].to_vec(), - ) - } - }; - Values { values, metric_type: vals.metric_type } - }) - .collect(); - let points = Points { start_times, timestamps, values }; - Timeseries { - fields: timeseries.fields.clone(), - points, - alignment: timeseries.alignment, - } + timeseries.limit(start, end) }); Table::from_timeseries(table.name(), timeseries) }) @@ -127,9 +72,12 @@ impl Limit { #[cfg(test)] mod tests { use super::*; - use crate::oxql::point::{DataType, MetricType}; use chrono::Utc; use oximeter::FieldValue; + use oxql_types::{ + point::{DataType, MetricType}, + Timeseries, + }; use std::{collections::BTreeMap, time::Duration}; fn test_tables() -> Vec { @@ -150,12 +98,14 @@ mod tests { MetricType::Gauge, ) .unwrap(); - timeseries.points.timestamps.clone_from(×tamps); - timeseries.points.values[0].values.as_integer_mut().unwrap().extend([ - Some(1), - Some(2), - Some(3), - ]); + timeseries.points.set_timestamps(timestamps.clone()); + timeseries + .points + .values_mut(0) + .unwrap() + .as_integer_mut() + .unwrap() + .extend([Some(1), Some(2), Some(3)]); let table1 = Table::from_timeseries("first", std::iter::once(timeseries)) .unwrap(); @@ -166,12 +116,14 @@ mod tests { MetricType::Gauge, ) .unwrap(); - timeseries.points.timestamps.clone_from(×tamps); - timeseries.points.values[0].values.as_integer_mut().unwrap().extend([ - Some(4), - Some(5), - Some(6), - ]); + timeseries.points.set_timestamps(timestamps.clone()); + timeseries + .points + .values_mut(0) + .unwrap() + .as_integer_mut() + .unwrap() + .extend([Some(4), Some(5), Some(6)]); let table2 = Table::from_timeseries("second", std::iter::once(timeseries)) .unwrap(); @@ -223,7 +175,8 @@ mod tests { "Limited table should have the same fields" ); assert_eq!( - timeseries.alignment, limited_timeseries.alignment, + timeseries.alignment(), + limited_timeseries.alignment(), "Limited timeseries should have the same alignment" ); assert_eq!( @@ -237,14 +190,15 @@ mod tests { // These depend on the limit operation. let points = ×eries.points; let limited_points = &limited_timeseries.points; - assert_eq!(points.start_times, limited_points.start_times); + assert_eq!(points.start_times(), limited_points.start_times()); assert_eq!( - points.timestamps[start..end], - limited_points.timestamps + &points.timestamps()[start..end], + limited_points.timestamps() ); assert_eq!( - limited_points.values[0].values.as_integer().unwrap(), - &points.values[0].values.as_integer().unwrap()[start..end], + limited_points.values(0).unwrap().as_integer().unwrap(), + &points.values(0).unwrap().as_integer().unwrap() + [start..end], "Points should be limited to [{start}..{end}]", ); } diff --git a/oximeter/db/src/oxql/ast/table_ops/mod.rs b/oximeter/db/src/oxql/ast/table_ops/mod.rs index 46f5106a08..8b8d4cbe1b 100644 --- a/oximeter/db/src/oxql/ast/table_ops/mod.rs +++ b/oximeter/db/src/oxql/ast/table_ops/mod.rs @@ -20,10 +20,10 @@ use self::join::Join; use self::limit::Limit; use crate::oxql::ast::Query; use crate::oxql::Error; -use crate::oxql::Table; use chrono::DateTime; use chrono::Utc; use oximeter::TimeseriesName; +use oxql_types::Table; /// A basic table operation, the atoms of an OxQL query. #[derive(Clone, Debug, PartialEq)] diff --git a/oximeter/db/src/oxql/mod.rs b/oximeter/db/src/oxql/mod.rs index 3961fae1cc..fcdfb783c5 100644 --- a/oximeter/db/src/oxql/mod.rs +++ b/oximeter/db/src/oxql/mod.rs @@ -10,13 +10,9 @@ use peg::error::ParseError as PegError; use peg::str::LineCol; pub mod ast; -pub mod point; pub mod query; -pub mod table; pub use self::query::Query; -pub use self::table::Table; -pub use self::table::Timeseries; pub use anyhow::Error; /// Format a PEG parsing error into a nice anyhow error. diff --git a/oximeter/db/src/oxql/query/mod.rs b/oximeter/db/src/oxql/query/mod.rs index e1fada9f2a..46c9bbc92c 100644 --- a/oximeter/db/src/oxql/query/mod.rs +++ b/oximeter/db/src/oxql/query/mod.rs @@ -23,7 +23,6 @@ use crate::oxql::Error; use crate::TimeseriesName; use chrono::DateTime; use chrono::Utc; -use std::time::Duration; /// A parsed OxQL query. #[derive(Clone, Debug, PartialEq)] @@ -391,15 +390,6 @@ fn restrict_filter_idents( } } -/// Describes the time alignment for an OxQL query. -#[derive(Clone, Copy, Debug, PartialEq)] -pub struct Alignment { - /// The end time of the query, which the temporal reference point. - pub end_time: DateTime, - /// The alignment period, the interval on which values are produced. - pub period: Duration, -} - #[cfg(test)] mod tests { use super::Filter; diff --git a/oximeter/db/src/query.rs b/oximeter/db/src/query.rs index ceabf00888..556ced0437 100644 --- a/oximeter/db/src/query.rs +++ b/oximeter/db/src/query.rs @@ -6,11 +6,12 @@ // Copyright 2021 Oxide Computer Company use crate::{ - Error, FieldSchema, FieldSource, TimeseriesKey, TimeseriesSchema, - DATABASE_NAME, DATABASE_SELECT_FORMAT, + Error, FieldSchema, FieldSource, TimeseriesSchema, DATABASE_NAME, + DATABASE_SELECT_FORMAT, }; use chrono::{DateTime, Utc}; use dropshot::PaginationOrder; +use oximeter::schema::TimeseriesKey; use oximeter::types::{DatumType, FieldType, FieldValue}; use oximeter::{Metric, Target}; use regex::Regex; diff --git a/oximeter/db/src/shells/oxql.rs b/oximeter/db/src/shells/oxql.rs index 0f23ea7d64..f46d08c0cf 100644 --- a/oximeter/db/src/shells/oxql.rs +++ b/oximeter/db/src/shells/oxql.rs @@ -7,9 +7,10 @@ // Copyright 2024 Oxide Computer use super::{list_timeseries, prepare_columns}; -use crate::{make_client, oxql::Table, Client, OxqlResult}; +use crate::{make_client, Client, OxqlResult}; use clap::Args; use crossterm::style::Stylize; +use oxql_types::Table; use reedline::DefaultPrompt; use reedline::DefaultPromptSegment; use reedline::Reedline; diff --git a/oximeter/db/tests/integration_test.rs b/oximeter/db/tests/integration_test.rs index 732683c414..f5d81d51d1 100644 --- a/oximeter/db/tests/integration_test.rs +++ b/oximeter/db/tests/integration_test.rs @@ -10,7 +10,6 @@ use clickward::{ use dropshot::test_util::log_prefix_for_test; use omicron_test_utils::dev::poll; use omicron_test_utils::dev::test_setup_log; -use oximeter::test_util; use oximeter_db::{Client, DbWrite, OxqlResult, Sample, TestDbWrite}; use slog::{debug, info, Logger}; use std::collections::BTreeSet; @@ -199,7 +198,7 @@ async fn test_cluster() -> anyhow::Result<()> { // Let's write some samples to our first replica and wait for them to show // up on replica 2. let start = tokio::time::Instant::now(); - let samples = test_util::generate_test_samples( + let samples = oximeter_test_utils::generate_test_samples( input.n_projects, input.n_instances, input.n_cpus, @@ -261,7 +260,7 @@ async fn test_cluster() -> anyhow::Result<()> { info!(log, "successfully stopped server 1"); // Generate some new samples and insert them at replica3 - let samples = test_util::generate_test_samples( + let samples = oximeter_test_utils::generate_test_samples( input.n_projects, input.n_instances, input.n_cpus, @@ -298,7 +297,7 @@ async fn test_cluster() -> anyhow::Result<()> { .expect("failed to get samples from client1"); // We still have a quorum (2 of 3 keepers), so we should be able to insert - let samples = test_util::generate_test_samples( + let samples = oximeter_test_utils::generate_test_samples( input.n_projects, input.n_instances, input.n_cpus, @@ -321,7 +320,7 @@ async fn test_cluster() -> anyhow::Result<()> { .expect("failed to get samples from client1"); info!(log, "Attempting to insert samples without keeper quorum"); - let samples = test_util::generate_test_samples( + let samples = oximeter_test_utils::generate_test_samples( input.n_projects, input.n_instances, input.n_cpus, @@ -350,7 +349,7 @@ async fn test_cluster() -> anyhow::Result<()> { ) .await .expect("failed to sync keepers"); - let samples = test_util::generate_test_samples( + let samples = oximeter_test_utils::generate_test_samples( input.n_projects, input.n_instances, input.n_cpus, @@ -370,7 +369,7 @@ async fn test_cluster() -> anyhow::Result<()> { ) .await .expect("failed to sync keepers"); - let samples = test_util::generate_test_samples( + let samples = oximeter_test_utils::generate_test_samples( input.n_projects, input.n_instances, input.n_cpus, diff --git a/oximeter/impl/src/test_util.rs b/oximeter/impl/src/test_util.rs deleted file mode 100644 index c2ac7b34bd..0000000000 --- a/oximeter/impl/src/test_util.rs +++ /dev/null @@ -1,130 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Utilities for testing the oximeter crate. -// Copyright 2024 Oxide Computer Company - -use crate::histogram; -use crate::histogram::{Histogram, Record}; -use crate::types::{Cumulative, Sample}; -use uuid::Uuid; - -#[derive(oximeter::Target)] -pub struct TestTarget { - pub name1: String, - pub name2: String, - pub num: i64, -} - -impl Default for TestTarget { - fn default() -> Self { - TestTarget { - name1: "first_name".into(), - name2: "second_name".into(), - num: 0, - } - } -} - -#[derive(oximeter::Metric)] -pub struct TestMetric { - pub id: Uuid, - pub good: bool, - pub datum: i64, -} - -#[derive(oximeter::Metric)] -pub struct TestCumulativeMetric { - pub id: Uuid, - pub good: bool, - pub datum: Cumulative, -} - -#[derive(oximeter::Metric)] -pub struct TestHistogram { - pub id: Uuid, - pub good: bool, - pub datum: Histogram, -} - -const ID: Uuid = uuid::uuid!("e00ced4d-39d1-446a-ae85-a67f05c9750b"); - -pub fn make_sample() -> Sample { - let target = TestTarget::default(); - let metric = TestMetric { id: ID, good: true, datum: 1 }; - Sample::new(&target, &metric).unwrap() -} - -pub fn make_missing_sample() -> Sample { - let target = TestTarget::default(); - let metric = TestMetric { id: ID, good: true, datum: 1 }; - Sample::new_missing(&target, &metric).unwrap() -} - -pub fn make_hist_sample() -> Sample { - let target = TestTarget::default(); - let mut hist = histogram::Histogram::new(&[0.0, 5.0, 10.0]).unwrap(); - hist.sample(1.0).unwrap(); - hist.sample(2.0).unwrap(); - hist.sample(6.0).unwrap(); - let metric = TestHistogram { id: ID, good: true, datum: hist }; - Sample::new(&target, &metric).unwrap() -} - -/// A target identifying a single virtual machine instance -#[derive(Debug, Clone, Copy, oximeter::Target)] -pub struct VirtualMachine { - pub project_id: Uuid, - pub instance_id: Uuid, -} - -/// A metric recording the total time a vCPU is busy, by its ID -#[derive(Debug, Clone, Copy, oximeter::Metric)] -pub struct CpuBusy { - cpu_id: i64, - datum: Cumulative, -} - -pub fn generate_test_samples( - n_projects: usize, - n_instances: usize, - n_cpus: usize, - n_samples: usize, -) -> Vec { - let n_timeseries = n_projects * n_instances * n_cpus; - let mut samples = Vec::with_capacity(n_samples * n_timeseries); - for _ in 0..n_projects { - let project_id = Uuid::new_v4(); - for _ in 0..n_instances { - let vm = VirtualMachine { project_id, instance_id: Uuid::new_v4() }; - for cpu in 0..n_cpus { - for sample in 0..n_samples { - let cpu_busy = CpuBusy { - cpu_id: cpu as _, - datum: Cumulative::new(sample as f64), - }; - let sample = Sample::new(&vm, &cpu_busy).unwrap(); - samples.push(sample); - } - } - } - } - samples -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_gen_test_samples() { - let (n_projects, n_instances, n_cpus, n_samples) = (2, 2, 2, 2); - let samples = - generate_test_samples(n_projects, n_instances, n_cpus, n_samples); - assert_eq!( - samples.len(), - n_projects * n_instances * n_cpus * n_samples - ); - } -} diff --git a/oximeter/instruments/src/http.rs b/oximeter/instruments/src/http.rs index 6a0a35ce63..2eef327d02 100644 --- a/oximeter/instruments/src/http.rs +++ b/oximeter/instruments/src/http.rs @@ -6,17 +6,14 @@ // Copyright 2024 Oxide Computer Company -use dropshot::{ - HttpError, HttpResponse, RequestContext, RequestInfo, ServerContext, -}; +use dropshot::{HttpError, HttpResponse, RequestContext, ServerContext}; use futures::Future; use http::StatusCode; -use http::Uri; use oximeter::{ histogram::Histogram, histogram::Record, MetricsError, Producer, Sample, }; -use std::borrow::Cow; -use std::collections::BTreeMap; +use std::collections::HashMap; +use std::hash::{DefaultHasher, Hash as _, Hasher}; use std::sync::{Arc, Mutex}; use std::time::{Duration, Instant}; @@ -24,28 +21,18 @@ oximeter::use_timeseries!("http-service.toml"); pub use http_service::HttpService; pub use http_service::RequestLatencyHistogram; -// Return the route portion of the request, normalized to include a single -// leading slash and no trailing slashes. -fn normalized_uri_path(uri: &Uri) -> Cow<'static, str> { - Cow::Owned(format!( - "/{}", - uri.path().trim_end_matches('/').trim_start_matches('/') - )) -} - impl RequestLatencyHistogram { /// Build a new `RequestLatencyHistogram` with a specified histogram. /// /// Latencies are expressed in seconds. pub fn new( - request: &RequestInfo, + operation_id: &str, status_code: StatusCode, histogram: Histogram, ) -> Self { Self { - route: normalized_uri_path(request.uri()), - method: request.method().to_string().into(), - status_code: status_code.as_u16().into(), + operation_id: operation_id.to_string().into(), + status_code: status_code.as_u16(), datum: histogram, } } @@ -59,25 +46,27 @@ impl RequestLatencyHistogram { /// /// Latencies are expressed as seconds. pub fn with_latency_decades( - request: &RequestInfo, + operation_id: &str, status_code: StatusCode, start_decade: i16, end_decade: i16, ) -> Result { Ok(Self::new( - request, + operation_id, status_code, Histogram::span_decades(start_decade, end_decade)?, )) } - fn key_for(request: &RequestInfo, status_code: StatusCode) -> String { - format!( - "{}:{}:{}", - normalized_uri_path(request.uri()), - request.method(), - status_code.as_u16() - ) + /// Return a key used to ID this histogram. + /// + /// This is a quick way to look up the histogram tracking any particular + /// request and response. + fn key_for(operation_id: &str, status_code: StatusCode) -> u64 { + let mut hasher = DefaultHasher::new(); + operation_id.hash(&mut hasher); + status_code.hash(&mut hasher); + hasher.finish() } } @@ -92,8 +81,19 @@ impl RequestLatencyHistogram { /// The `LatencyTracker` can be used to produce metric data collected by `oximeter`. #[derive(Debug, Clone)] pub struct LatencyTracker { + /// The HTTP service target for which we're tracking request histograms. pub service: HttpService, - latencies: Arc>>, + /// The latency histogram for each request. + /// + /// The map here use a hash of the request fields (operation and status + /// code) as the key to each histogram. It's a bit redundant to then store + /// that in a hashmap, but this lets us avoid creating a new + /// `RequestLatencyHistogram` when handling a request that we already have + /// one for. Instead, we use this key to get the existing entry. + latencies: Arc>>, + /// The histogram used to track each request. + /// + /// We store it here to clone as we see new requests. histogram: Histogram, } @@ -104,7 +104,7 @@ impl LatencyTracker { pub fn new(service: HttpService, histogram: Histogram) -> Self { Self { service, - latencies: Arc::new(Mutex::new(BTreeMap::new())), + latencies: Arc::new(Mutex::new(HashMap::new())), histogram, } } @@ -129,15 +129,15 @@ impl LatencyTracker { /// to which the other arguments belong. (One is created if it does not exist.) pub fn update( &self, - request: &RequestInfo, + operation_id: &str, status_code: StatusCode, latency: Duration, ) -> Result<(), MetricsError> { - let key = RequestLatencyHistogram::key_for(request, status_code); + let key = RequestLatencyHistogram::key_for(operation_id, status_code); let mut latencies = self.latencies.lock().unwrap(); let entry = latencies.entry(key).or_insert_with(|| { RequestLatencyHistogram::new( - request, + operation_id, status_code, self.histogram.clone(), ) @@ -170,14 +170,14 @@ impl LatencyTracker { Ok(response) => response.status_code(), Err(ref e) => e.status_code, }; - if let Err(e) = self.update(&context.request, status_code, latency) { + if let Err(e) = self.update(&context.operation_id, status_code, latency) + { slog::error!( &context.log, "error instrumenting dropshot handler"; "error" => ?e, "status_code" => status_code.as_u16(), - "method" => %context.request.method(), - "uri" => %context.request.uri(), + "operation_id" => &context.operation_id, "remote_addr" => context.request.remote_addr(), "latency" => ?latency, ); @@ -220,41 +220,24 @@ mod tests { HttpService { name: "my-service".into(), id: ID.parse().unwrap() }; let hist = Histogram::new(&[0.0, 1.0]).unwrap(); let tracker = LatencyTracker::new(service, hist); - let request = http::request::Builder::new() - .method(http::Method::GET) - .uri("/some/uri") - .body(()) + let status_code0 = StatusCode::OK; + let status_code1 = StatusCode::NOT_FOUND; + let operation_id = "some_operation_id"; + tracker + .update(operation_id, status_code0, Duration::from_secs_f64(0.5)) .unwrap(); - let status_code = StatusCode::OK; tracker - .update( - &RequestInfo::new(&request, "0.0.0.0:0".parse().unwrap()), - status_code, - Duration::from_secs_f64(0.5), - ) + .update(operation_id, status_code1, Duration::from_secs_f64(0.5)) .unwrap(); - - let key = "/some/uri:GET:200"; - let actual_hist = tracker.latencies.lock().unwrap()[key].datum.clone(); - assert_eq!(actual_hist.n_samples(), 1); - let bins = actual_hist.iter().collect::>(); - assert_eq!(bins[1].count, 1); - } - - #[test] - fn test_normalize_uri_path() { - const EXPECTED: &str = "/foo/bar"; - const TESTS: &[&str] = &[ - "/foo/bar", - "/foo/bar/", - "//foo/bar", - "//foo/bar/", - "/foo/bar//", - "////foo/bar/////", - ]; - for test in TESTS.iter() { - println!("{test}"); - assert_eq!(normalized_uri_path(&test.parse().unwrap()), EXPECTED); + let key0 = RequestLatencyHistogram::key_for(operation_id, status_code0); + let key1 = RequestLatencyHistogram::key_for(operation_id, status_code1); + let latencies = tracker.latencies.lock().unwrap(); + assert_eq!(latencies.len(), 2); + for key in [key0, key1] { + let actual_hist = &latencies[&key].datum; + assert_eq!(actual_hist.n_samples(), 1); + let bins = actual_hist.iter().collect::>(); + assert_eq!(bins[1].count, 1); } } } diff --git a/oximeter/oximeter/Cargo.toml b/oximeter/oximeter/Cargo.toml index c04d1bd3ae..63b370bee6 100644 --- a/oximeter/oximeter/Cargo.toml +++ b/oximeter/oximeter/Cargo.toml @@ -13,9 +13,10 @@ anyhow.workspace = true clap.workspace = true chrono.workspace = true omicron-workspace-hack.workspace = true -oximeter-impl.workspace = true oximeter-macro-impl.workspace = true +oximeter-schema.workspace = true oximeter-timeseries-macro.workspace = true +oximeter-types.workspace = true prettyplease.workspace = true syn.workspace = true toml.workspace = true diff --git a/oximeter/oximeter/schema/http-service.toml b/oximeter/oximeter/schema/http-service.toml index 9098110656..5270f6942c 100644 --- a/oximeter/oximeter/schema/http-service.toml +++ b/oximeter/oximeter/schema/http-service.toml @@ -14,7 +14,7 @@ description = "Duration for the server to handle a request" units = "seconds" datum_type = "histogram_f64" versions = [ - { added_in = 1, fields = [ "route", "method", "status_code" ] } + { added_in = 1, fields = [ "operation_id", "status_code" ] } ] [fields.name] @@ -25,14 +25,15 @@ description = "The name of the HTTP server, or program running it" type = "uuid" description = "UUID of the HTTP server" -[fields.route] +[fields.operation_id] type = "string" -description = "HTTP route in the request" +description = """\ +The identifier for the HTTP operation.\ -[fields.method] -type = "string" -description = "HTTP method in the request" +In most cases, this the OpenAPI `operationId` field that uniquely identifies the +endpoint the request is targeted to and the HTTP method used. +""" [fields.status_code] -type = "i64" +type = "u16" description = "HTTP status code in the server's response" diff --git a/oximeter/oximeter/src/lib.rs b/oximeter/oximeter/src/lib.rs index 5ec6a49e5c..913318b8a8 100644 --- a/oximeter/oximeter/src/lib.rs +++ b/oximeter/oximeter/src/lib.rs @@ -185,14 +185,15 @@ //! `Producer`s may be registered with the same `ProducerServer`, each with potentially different //! sampling intervals. -pub use oximeter_impl::*; +pub use oximeter_macro_impl::{Metric, Target}; pub use oximeter_timeseries_macro::use_timeseries; +pub use oximeter_types::*; #[cfg(test)] mod test { - use oximeter_impl::schema::ir::load_schema; - use oximeter_impl::schema::{FieldSource, SCHEMA_DIRECTORY}; - use oximeter_impl::TimeseriesSchema; + use oximeter_schema::ir::load_schema; + use oximeter_types::schema::{FieldSource, SCHEMA_DIRECTORY}; + use oximeter_types::TimeseriesSchema; use std::collections::BTreeMap; use std::fs; diff --git a/oximeter/oxql-types/Cargo.toml b/oximeter/oxql-types/Cargo.toml new file mode 100644 index 0000000000..da7c7bcd1c --- /dev/null +++ b/oximeter/oxql-types/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "oxql-types" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +chrono.workspace = true +highway.workspace = true +num.workspace = true +omicron-workspace-hack.workspace = true +oximeter-types.workspace = true +schemars.workspace = true +serde.workspace = true diff --git a/oximeter/oxql-types/src/lib.rs b/oximeter/oxql-types/src/lib.rs new file mode 100644 index 0000000000..00468705a9 --- /dev/null +++ b/oximeter/oxql-types/src/lib.rs @@ -0,0 +1,23 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Core types for OxQL. + +use chrono::{DateTime, Utc}; +use std::time::Duration; + +pub mod point; +pub mod table; + +pub use self::table::Table; +pub use self::table::Timeseries; + +/// Describes the time alignment for an OxQL query. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct Alignment { + /// The end time of the query, which the temporal reference point. + pub end_time: DateTime, + /// The alignment period, the interval on which values are produced. + pub period: Duration, +} diff --git a/oximeter/db/src/oxql/point.rs b/oximeter/oxql-types/src/point.rs similarity index 82% rename from oximeter/db/src/oxql/point.rs rename to oximeter/oxql-types/src/point.rs index e04193e8b8..6e3c7143dc 100644 --- a/oximeter/db/src/oxql/point.rs +++ b/oximeter/oxql-types/src/point.rs @@ -6,15 +6,15 @@ // Copyright 2024 Oxide Computer Company -use super::Error; use anyhow::Context; +use anyhow::Error; use chrono::DateTime; use chrono::Utc; use num::ToPrimitive; -use oximeter::traits::HistogramSupport; -use oximeter::DatumType; -use oximeter::Measurement; -use oximeter::Quantile; +use oximeter_types::traits::HistogramSupport; +use oximeter_types::DatumType; +use oximeter_types::Measurement; +use oximeter_types::Quantile; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; @@ -131,32 +131,32 @@ impl CumulativeDatum { // not cumulative. fn from_cumulative(meas: &Measurement) -> Result { let datum = match meas.datum() { - oximeter::Datum::CumulativeI64(val) => { + oximeter_types::Datum::CumulativeI64(val) => { CumulativeDatum::Integer(val.value()) } - oximeter::Datum::CumulativeU64(val) => { + oximeter_types::Datum::CumulativeU64(val) => { let int = val .value() .try_into() .context("Overflow converting u64 to i64")?; CumulativeDatum::Integer(int) } - oximeter::Datum::CumulativeF32(val) => { + oximeter_types::Datum::CumulativeF32(val) => { CumulativeDatum::Double(val.value().into()) } - oximeter::Datum::CumulativeF64(val) => { + oximeter_types::Datum::CumulativeF64(val) => { CumulativeDatum::Double(val.value()) } - oximeter::Datum::HistogramI8(hist) => hist.into(), - oximeter::Datum::HistogramU8(hist) => hist.into(), - oximeter::Datum::HistogramI16(hist) => hist.into(), - oximeter::Datum::HistogramU16(hist) => hist.into(), - oximeter::Datum::HistogramI32(hist) => hist.into(), - oximeter::Datum::HistogramU32(hist) => hist.into(), - oximeter::Datum::HistogramI64(hist) => hist.into(), - oximeter::Datum::HistogramU64(hist) => hist.try_into()?, - oximeter::Datum::HistogramF32(hist) => hist.into(), - oximeter::Datum::HistogramF64(hist) => hist.into(), + oximeter_types::Datum::HistogramI8(hist) => hist.into(), + oximeter_types::Datum::HistogramU8(hist) => hist.into(), + oximeter_types::Datum::HistogramI16(hist) => hist.into(), + oximeter_types::Datum::HistogramU16(hist) => hist.into(), + oximeter_types::Datum::HistogramI32(hist) => hist.into(), + oximeter_types::Datum::HistogramU32(hist) => hist.into(), + oximeter_types::Datum::HistogramI64(hist) => hist.into(), + oximeter_types::Datum::HistogramU64(hist) => hist.try_into()?, + oximeter_types::Datum::HistogramF32(hist) => hist.into(), + oximeter_types::Datum::HistogramF64(hist) => hist.into(), other => anyhow::bail!( "Input datum of type {} is not cumulative", other.datum_type(), @@ -169,10 +169,10 @@ impl CumulativeDatum { /// A single list of values, for one dimension of a timeseries. #[derive(Clone, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] pub struct Values { - // The data values. - pub(super) values: ValueArray, - // The type of this metric. - pub(super) metric_type: MetricType, + /// The data values. + pub values: ValueArray, + /// The type of this metric. + pub metric_type: MetricType, } impl Values { @@ -285,14 +285,23 @@ impl<'a> fmt::Display for Datum<'a> { #[derive(Clone, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] pub struct Points { // The start time points for cumulative or delta metrics. - pub(super) start_times: Option>>, + pub(crate) start_times: Option>>, // The timestamp of each value. - pub(super) timestamps: Vec>, + pub(crate) timestamps: Vec>, // The array of data values, one for each dimension. - pub(super) values: Vec, + pub(crate) values: Vec, } impl Points { + /// Construct a new `Points` with the provided data. + pub fn new( + start_times: Option>>, + timestamps: Vec>, + values: Vec, + ) -> Self { + Self { start_times, timestamps, values } + } + /// Construct an empty array of points to hold data of the provided type. pub fn empty(data_type: DataType, metric_type: MetricType) -> Self { Self::with_capacity( @@ -303,8 +312,28 @@ impl Points { .unwrap() } - // Return a mutable reference to the value array of the specified dimension, if any. - pub(super) fn values_mut(&mut self, dim: usize) -> Option<&mut ValueArray> { + /// Return the start times of the points, if any. + pub fn start_times(&self) -> Option<&[DateTime]> { + self.start_times.as_deref() + } + + /// Clear the start times of the points. + pub fn clear_start_times(&mut self) { + self.start_times = None; + } + + /// Return the timestamps of the points. + pub fn timestamps(&self) -> &[DateTime] { + &self.timestamps + } + + pub fn set_timestamps(&mut self, timestamps: Vec>) { + self.timestamps = timestamps; + } + + /// Return a mutable reference to the value array of the specified + /// dimension, if any. + pub fn values_mut(&mut self, dim: usize) -> Option<&mut ValueArray> { self.values.get_mut(dim).map(|val| &mut val.values) } @@ -563,8 +592,8 @@ impl Points { }) } - // Filter points in self to those where `to_keep` is true. - pub(crate) fn filter(&self, to_keep: Vec) -> Result { + /// Filter points in self to those where `to_keep` is true. + pub fn filter(&self, to_keep: Vec) -> Result { anyhow::ensure!( to_keep.len() == self.len(), "Filter array must be the same length as self", @@ -646,8 +675,8 @@ impl Points { Ok(out) } - // Return a new set of points, with the values casted to the provided types. - pub(crate) fn cast(&self, types: &[DataType]) -> Result { + /// Return a new set of points, with the values casted to the provided types. + pub fn cast(&self, types: &[DataType]) -> Result { anyhow::ensure!( types.len() == self.dimensionality(), "Cannot cast to {} types, the data has dimensionality {}", @@ -863,12 +892,104 @@ impl Points { Ok(Self { start_times, timestamps, values: new_values }) } + /// Given two arrays of points, stack them together at matching timepoints. + /// + /// For time points in either which do not have a corresponding point in + /// the other, the entire time point is elided. + pub fn inner_join(&self, right: &Points) -> Result { + // Create an output array with roughly the right capacity, and double the + // number of dimensions. We're trying to stack output value arrays together + // along the dimension axis. + let data_types = + self.data_types().chain(right.data_types()).collect::>(); + let metric_types = + self.metric_types().chain(right.metric_types()).collect::>(); + let mut out = Points::with_capacity( + self.len().max(right.len()), + data_types.iter().copied(), + metric_types.iter().copied(), + )?; + + // Iterate through each array until one is exhausted. We're only inserting + // values from both arrays where the timestamps actually match, since this + // is an inner join. We may want to insert missing values where timestamps + // do not match on either side, when we support an outer join of some kind. + let n_left_dim = self.dimensionality(); + let mut left_ix = 0; + let mut right_ix = 0; + while left_ix < self.len() && right_ix < right.len() { + let left_timestamp = self.timestamps()[left_ix]; + let right_timestamp = right.timestamps()[right_ix]; + if left_timestamp == right_timestamp { + out.timestamps.push(left_timestamp); + push_concrete_values( + &mut out.values[..n_left_dim], + &self.values, + left_ix, + ); + push_concrete_values( + &mut out.values[n_left_dim..], + &right.values, + right_ix, + ); + left_ix += 1; + right_ix += 1; + } else if left_timestamp < right_timestamp { + left_ix += 1; + } else { + right_ix += 1; + } + } + Ok(out) + } + /// Return true if self contains no data points. pub fn is_empty(&self) -> bool { self.len() == 0 } } +// Push the `i`th value from each dimension of `from` onto `to`. +fn push_concrete_values(to: &mut [Values], from: &[Values], i: usize) { + assert_eq!(to.len(), from.len()); + for (output, input) in to.iter_mut().zip(from.iter()) { + let input_array = &input.values; + let output_array = &mut output.values; + assert_eq!(input_array.data_type(), output_array.data_type()); + if let Ok(ints) = input_array.as_integer() { + output_array.as_integer_mut().unwrap().push(ints[i]); + continue; + } + if let Ok(doubles) = input_array.as_double() { + output_array.as_double_mut().unwrap().push(doubles[i]); + continue; + } + if let Ok(bools) = input_array.as_boolean() { + output_array.as_boolean_mut().unwrap().push(bools[i]); + continue; + } + if let Ok(strings) = input_array.as_string() { + output_array.as_string_mut().unwrap().push(strings[i].clone()); + continue; + } + if let Ok(dists) = input_array.as_integer_distribution() { + output_array + .as_integer_distribution_mut() + .unwrap() + .push(dists[i].clone()); + continue; + } + if let Ok(dists) = input_array.as_double_distribution() { + output_array + .as_double_distribution_mut() + .unwrap() + .push(dists[i].clone()); + continue; + } + unreachable!(); + } +} + /// List of data values for one timeseries. /// /// Each element is an option, where `None` represents a missing sample. @@ -900,8 +1021,8 @@ impl ValueArray { } } - // Return the data type in self. - pub(super) fn data_type(&self) -> DataType { + /// Return the data type in self. + pub fn data_type(&self) -> DataType { match self { ValueArray::Integer(_) => DataType::Integer, ValueArray::Double(_) => DataType::Double, @@ -947,10 +1068,8 @@ impl ValueArray { Ok(inner) } - // Access the inner array of integers, if possible. - pub(super) fn as_integer_mut( - &mut self, - ) -> Result<&mut Vec>, Error> { + /// Access the inner array of integers, if possible. + pub fn as_integer_mut(&mut self) -> Result<&mut Vec>, Error> { let ValueArray::Integer(inner) = self else { anyhow::bail!( "Cannot access value array as integer type, it has type {}", @@ -1107,91 +1226,97 @@ impl ValueArray { // Push a value directly from a datum, without modification. fn push_value_from_datum( &mut self, - datum: &oximeter::Datum, + datum: &oximeter_types::Datum, ) -> Result<(), Error> { match datum { - oximeter::Datum::Bool(b) => self.as_boolean_mut()?.push(Some(*b)), - oximeter::Datum::I8(i) => { + oximeter_types::Datum::Bool(b) => { + self.as_boolean_mut()?.push(Some(*b)) + } + oximeter_types::Datum::I8(i) => { self.as_integer_mut()?.push(Some(i64::from(*i))) } - oximeter::Datum::U8(i) => { + oximeter_types::Datum::U8(i) => { self.as_integer_mut()?.push(Some(i64::from(*i))) } - oximeter::Datum::I16(i) => { + oximeter_types::Datum::I16(i) => { self.as_integer_mut()?.push(Some(i64::from(*i))) } - oximeter::Datum::U16(i) => { + oximeter_types::Datum::U16(i) => { self.as_integer_mut()?.push(Some(i64::from(*i))) } - oximeter::Datum::I32(i) => { + oximeter_types::Datum::I32(i) => { self.as_integer_mut()?.push(Some(i64::from(*i))) } - oximeter::Datum::U32(i) => { + oximeter_types::Datum::U32(i) => { self.as_integer_mut()?.push(Some(i64::from(*i))) } - oximeter::Datum::I64(i) => self.as_integer_mut()?.push(Some(*i)), - oximeter::Datum::U64(i) => { + oximeter_types::Datum::I64(i) => { + self.as_integer_mut()?.push(Some(*i)) + } + oximeter_types::Datum::U64(i) => { let i = i.to_i64().context("Failed to convert u64 datum to i64")?; self.as_integer_mut()?.push(Some(i)); } - oximeter::Datum::F32(f) => { + oximeter_types::Datum::F32(f) => { self.as_double_mut()?.push(Some(f64::from(*f))) } - oximeter::Datum::F64(f) => self.as_double_mut()?.push(Some(*f)), - oximeter::Datum::String(s) => { + oximeter_types::Datum::F64(f) => { + self.as_double_mut()?.push(Some(*f)) + } + oximeter_types::Datum::String(s) => { self.as_string_mut()?.push(Some(s.clone())) } - oximeter::Datum::Bytes(_) => { + oximeter_types::Datum::Bytes(_) => { anyhow::bail!("Bytes data types are not yet supported") } - oximeter::Datum::CumulativeI64(c) => { + oximeter_types::Datum::CumulativeI64(c) => { self.as_integer_mut()?.push(Some(c.value())) } - oximeter::Datum::CumulativeU64(c) => { + oximeter_types::Datum::CumulativeU64(c) => { let c = c .value() .to_i64() .context("Failed to convert u64 datum to i64")?; self.as_integer_mut()?.push(Some(c)); } - oximeter::Datum::CumulativeF32(c) => { + oximeter_types::Datum::CumulativeF32(c) => { self.as_double_mut()?.push(Some(f64::from(c.value()))) } - oximeter::Datum::CumulativeF64(c) => { + oximeter_types::Datum::CumulativeF64(c) => { self.as_double_mut()?.push(Some(c.value())) } - oximeter::Datum::HistogramI8(h) => self + oximeter_types::Datum::HistogramI8(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramU8(h) => self + oximeter_types::Datum::HistogramU8(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramI16(h) => self + oximeter_types::Datum::HistogramI16(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramU16(h) => self + oximeter_types::Datum::HistogramU16(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramI32(h) => self + oximeter_types::Datum::HistogramI32(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramU32(h) => self + oximeter_types::Datum::HistogramU32(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramI64(h) => self + oximeter_types::Datum::HistogramI64(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramU64(h) => self + oximeter_types::Datum::HistogramU64(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::try_from(h)?)), - oximeter::Datum::HistogramF32(h) => self + oximeter_types::Datum::HistogramF32(h) => self .as_double_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramF64(h) => self + oximeter_types::Datum::HistogramF64(h) => self .as_double_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::Missing(missing) => { + oximeter_types::Datum::Missing(missing) => { self.push_missing(missing.datum_type())? } } @@ -1216,7 +1341,7 @@ impl ValueArray { fn push_diff_from_last_to_datum( &mut self, last_datum: &Option, - new_datum: &oximeter::Datum, + new_datum: &oximeter_types::Datum, data_type: DataType, ) -> Result<(), Error> { match (last_datum.as_ref(), new_datum.is_missing()) { @@ -1253,49 +1378,49 @@ impl ValueArray { match (last_datum, new_datum) { ( CumulativeDatum::Integer(last), - oximeter::Datum::I8(new), + oximeter_types::Datum::I8(new), ) => { let new = i64::from(*new); self.as_integer_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::U8(new), + oximeter_types::Datum::U8(new), ) => { let new = i64::from(*new); self.as_integer_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::I16(new), + oximeter_types::Datum::I16(new), ) => { let new = i64::from(*new); self.as_integer_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::U16(new), + oximeter_types::Datum::U16(new), ) => { let new = i64::from(*new); self.as_integer_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::I32(new), + oximeter_types::Datum::I32(new), ) => { let new = i64::from(*new); self.as_integer_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::U32(new), + oximeter_types::Datum::U32(new), ) => { let new = i64::from(*new); self.as_integer_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::I64(new), + oximeter_types::Datum::I64(new), ) => { let diff = new .checked_sub(*last) @@ -1304,7 +1429,7 @@ impl ValueArray { } ( CumulativeDatum::Integer(last), - oximeter::Datum::U64(new), + oximeter_types::Datum::U64(new), ) => { let new = new .to_i64() @@ -1316,20 +1441,20 @@ impl ValueArray { } ( CumulativeDatum::Double(last), - oximeter::Datum::F32(new), + oximeter_types::Datum::F32(new), ) => { self.as_double_mut()? .push(Some(f64::from(*new) - last)); } ( CumulativeDatum::Double(last), - oximeter::Datum::F64(new), + oximeter_types::Datum::F64(new), ) => { self.as_double_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::CumulativeI64(new), + oximeter_types::Datum::CumulativeI64(new), ) => { let new = new.value(); let diff = new @@ -1339,7 +1464,7 @@ impl ValueArray { } ( CumulativeDatum::Integer(last), - oximeter::Datum::CumulativeU64(new), + oximeter_types::Datum::CumulativeU64(new), ) => { let new = new .value() @@ -1352,20 +1477,20 @@ impl ValueArray { } ( CumulativeDatum::Double(last), - oximeter::Datum::CumulativeF32(new), + oximeter_types::Datum::CumulativeF32(new), ) => { self.as_double_mut()? .push(Some(f64::from(new.value()) - last)); } ( CumulativeDatum::Double(last), - oximeter::Datum::CumulativeF64(new), + oximeter_types::Datum::CumulativeF64(new), ) => { self.as_double_mut()?.push(Some(new.value() - last)); } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramI8(new), + oximeter_types::Datum::HistogramI8(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1373,7 +1498,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramU8(new), + oximeter_types::Datum::HistogramU8(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1381,7 +1506,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramI16(new), + oximeter_types::Datum::HistogramI16(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1389,7 +1514,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramU16(new), + oximeter_types::Datum::HistogramU16(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1397,7 +1522,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramI32(new), + oximeter_types::Datum::HistogramI32(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1405,7 +1530,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramU32(new), + oximeter_types::Datum::HistogramU32(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1413,7 +1538,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramI64(new), + oximeter_types::Datum::HistogramI64(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1421,7 +1546,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramU64(new), + oximeter_types::Datum::HistogramU64(new), ) => { let new = Distribution::try_from(new)?; self.as_integer_distribution_mut()? @@ -1429,7 +1554,7 @@ impl ValueArray { } ( CumulativeDatum::DoubleDistribution(last), - oximeter::Datum::HistogramF32(new), + oximeter_types::Datum::HistogramF32(new), ) => { let new = Distribution::::from(new); self.as_double_distribution_mut()? @@ -1437,7 +1562,7 @@ impl ValueArray { } ( CumulativeDatum::DoubleDistribution(last), - oximeter::Datum::HistogramF64(new), + oximeter_types::Datum::HistogramF64(new), ) => { let new = Distribution::::from(new); self.as_double_distribution_mut()? @@ -1486,8 +1611,8 @@ impl ValueArray { } } - // Swap the value in self with other, asserting they're the same type. - pub(crate) fn swap(&mut self, mut values: ValueArray) { + /// Swap the value in self with other, asserting they're the same type. + pub fn swap(&mut self, mut values: ValueArray) { use std::mem::swap; match (self, &mut values) { (ValueArray::Integer(x), ValueArray::Integer(y)) => swap(x, y), @@ -1733,8 +1858,10 @@ where macro_rules! i64_dist_from { ($t:ty) => { - impl From<&oximeter::histogram::Histogram<$t>> for Distribution { - fn from(hist: &oximeter::histogram::Histogram<$t>) -> Self { + impl From<&oximeter_types::histogram::Histogram<$t>> + for Distribution + { + fn from(hist: &oximeter_types::histogram::Histogram<$t>) -> Self { let (bins, counts) = hist.bins_and_counts(); Self { bins: bins.into_iter().map(i64::from).collect(), @@ -1750,8 +1877,10 @@ macro_rules! i64_dist_from { } } - impl From<&oximeter::histogram::Histogram<$t>> for CumulativeDatum { - fn from(hist: &oximeter::histogram::Histogram<$t>) -> Self { + impl From<&oximeter_types::histogram::Histogram<$t>> + for CumulativeDatum + { + fn from(hist: &oximeter_types::histogram::Histogram<$t>) -> Self { CumulativeDatum::IntegerDistribution(hist.into()) } } @@ -1766,10 +1895,10 @@ i64_dist_from!(i32); i64_dist_from!(u32); i64_dist_from!(i64); -impl TryFrom<&oximeter::histogram::Histogram> for Distribution { +impl TryFrom<&oximeter_types::histogram::Histogram> for Distribution { type Error = Error; fn try_from( - hist: &oximeter::histogram::Histogram, + hist: &oximeter_types::histogram::Histogram, ) -> Result { let (bins, counts) = hist.bins_and_counts(); let bins = bins @@ -1791,10 +1920,10 @@ impl TryFrom<&oximeter::histogram::Histogram> for Distribution { } } -impl TryFrom<&oximeter::histogram::Histogram> for CumulativeDatum { +impl TryFrom<&oximeter_types::histogram::Histogram> for CumulativeDatum { type Error = Error; fn try_from( - hist: &oximeter::histogram::Histogram, + hist: &oximeter_types::histogram::Histogram, ) -> Result { hist.try_into().map(CumulativeDatum::IntegerDistribution) } @@ -1802,8 +1931,10 @@ impl TryFrom<&oximeter::histogram::Histogram> for CumulativeDatum { macro_rules! f64_dist_from { ($t:ty) => { - impl From<&oximeter::histogram::Histogram<$t>> for Distribution { - fn from(hist: &oximeter::histogram::Histogram<$t>) -> Self { + impl From<&oximeter_types::histogram::Histogram<$t>> + for Distribution + { + fn from(hist: &oximeter_types::histogram::Histogram<$t>) -> Self { let (bins, counts) = hist.bins_and_counts(); Self { bins: bins.into_iter().map(f64::from).collect(), @@ -1819,8 +1950,10 @@ macro_rules! f64_dist_from { } } - impl From<&oximeter::histogram::Histogram<$t>> for CumulativeDatum { - fn from(hist: &oximeter::histogram::Histogram<$t>) -> Self { + impl From<&oximeter_types::histogram::Histogram<$t>> + for CumulativeDatum + { + fn from(hist: &oximeter_types::histogram::Histogram<$t>) -> Self { CumulativeDatum::DoubleDistribution(hist.into()) } } @@ -1833,9 +1966,9 @@ f64_dist_from!(f64); #[cfg(test)] mod tests { use super::{Distribution, MetricType, Points, Values}; - use crate::oxql::point::{DataType, ValueArray}; + use crate::point::{push_concrete_values, DataType, Datum, ValueArray}; use chrono::{DateTime, Utc}; - use oximeter::{ + use oximeter_types::{ histogram::Record, types::Cumulative, Measurement, Quantile, }; use std::time::Duration; @@ -1939,12 +2072,12 @@ mod tests { let now = Utc::now(); let current1 = now + Duration::from_secs(1); let mut hist1 = - oximeter::histogram::Histogram::new(&[0i64, 10, 20]).unwrap(); + oximeter_types::histogram::Histogram::new(&[0i64, 10, 20]).unwrap(); hist1.sample(1).unwrap(); hist1.set_start_time(current1); let current2 = now + Duration::from_secs(2); let mut hist2 = - oximeter::histogram::Histogram::new(&[0i64, 10, 20]).unwrap(); + oximeter_types::histogram::Histogram::new(&[0i64, 10, 20]).unwrap(); hist2.sample(5).unwrap(); hist2.sample(10).unwrap(); hist2.sample(15).unwrap(); @@ -2273,4 +2406,176 @@ mod tests { .cast(&[DataType::DoubleDistribution, DataType::DoubleDistribution]) .is_err()); } + + #[test] + fn test_push_concrete_values() { + let mut points = Points::with_capacity( + 2, + [DataType::Integer, DataType::Double].into_iter(), + [MetricType::Gauge, MetricType::Gauge].into_iter(), + ) + .unwrap(); + + // Push a concrete value for the integer dimension + let from_ints = vec![Values { + values: ValueArray::Integer(vec![Some(1)]), + metric_type: MetricType::Gauge, + }]; + push_concrete_values(&mut points.values[..1], &from_ints, 0); + + // And another for the double dimension. + let from_doubles = vec![Values { + values: ValueArray::Double(vec![Some(2.0)]), + metric_type: MetricType::Gauge, + }]; + push_concrete_values(&mut points.values[1..], &from_doubles, 0); + + assert_eq!( + points.dimensionality(), + 2, + "Points should have 2 dimensions", + ); + let ints = points.values[0].values.as_integer().unwrap(); + assert_eq!( + ints.len(), + 1, + "Should have pushed one point in the first dimension" + ); + assert_eq!( + ints[0], + Some(1), + "Should have pushed 1 onto the first dimension" + ); + let doubles = points.values[1].values.as_double().unwrap(); + assert_eq!( + doubles.len(), + 1, + "Should have pushed one point in the second dimension" + ); + assert_eq!( + doubles[0], + Some(2.0), + "Should have pushed 2.0 onto the second dimension" + ); + } + + #[test] + fn test_join_point_arrays() { + let now = Utc::now(); + + // Create a set of integer points to join with. + // + // This will have two timestamps, one of which will match the points + // below that are merged in. + let int_points = Points { + start_times: None, + timestamps: vec![ + now - Duration::from_secs(3), + now - Duration::from_secs(2), + now, + ], + values: vec![Values { + values: ValueArray::Integer(vec![Some(1), Some(2), Some(3)]), + metric_type: MetricType::Gauge, + }], + }; + + // Create an additional set of double points. + // + // This also has two timepoints, one of which matches with the above, + // and one of which does not. + let double_points = Points { + start_times: None, + timestamps: vec![ + now - Duration::from_secs(3), + now - Duration::from_secs(1), + now, + ], + values: vec![Values { + values: ValueArray::Double(vec![ + Some(4.0), + Some(5.0), + Some(6.0), + ]), + metric_type: MetricType::Gauge, + }], + }; + + // Merge the arrays. + let merged = int_points.inner_join(&double_points).unwrap(); + + // Basic checks that we merged in the right values and have the right + // types and dimensions. + assert_eq!( + merged.dimensionality(), + 2, + "Should have appended the dimensions from each input array" + ); + assert_eq!(merged.len(), 2, "Should have merged two common points",); + assert_eq!( + merged.data_types().collect::>(), + &[DataType::Integer, DataType::Double], + "Should have combined the data types of the input arrays" + ); + assert_eq!( + merged.metric_types().collect::>(), + &[MetricType::Gauge, MetricType::Gauge], + "Should have combined the metric types of the input arrays" + ); + + // Check the actual values of the array. + let mut points = merged.iter_points(); + + // The first and last timepoint overlapped between the two arrays, so we + // should have both of them as concrete samples. + let pt = points.next().unwrap(); + assert_eq!(pt.start_time, None, "Gauges don't have a start time"); + assert_eq!( + *pt.timestamp, int_points.timestamps[0], + "Should have taken the first input timestamp from both arrays", + ); + assert_eq!( + *pt.timestamp, double_points.timestamps[0], + "Should have taken the first input timestamp from both arrays", + ); + let values = pt.values; + assert_eq!(values.len(), 2, "Should have 2 dimensions"); + assert_eq!( + &values[0], + &(Datum::Integer(Some(&1)), MetricType::Gauge), + "Should have pulled value from first integer array." + ); + assert_eq!( + &values[1], + &(Datum::Double(Some(&4.0)), MetricType::Gauge), + "Should have pulled value from second double array." + ); + + // And the next point + let pt = points.next().unwrap(); + assert_eq!(pt.start_time, None, "Gauges don't have a start time"); + assert_eq!( + *pt.timestamp, int_points.timestamps[2], + "Should have taken the input timestamp from both arrays", + ); + assert_eq!( + *pt.timestamp, double_points.timestamps[2], + "Should have taken the input timestamp from both arrays", + ); + let values = pt.values; + assert_eq!(values.len(), 2, "Should have 2 dimensions"); + assert_eq!( + &values[0], + &(Datum::Integer(Some(&3)), MetricType::Gauge), + "Should have pulled value from first integer array." + ); + assert_eq!( + &values[1], + &(Datum::Double(Some(&6.0)), MetricType::Gauge), + "Should have pulled value from second double array." + ); + + // And there should be no other values. + assert!(points.next().is_none(), "There should be no more points"); + } } diff --git a/oximeter/db/src/oxql/table.rs b/oximeter/oxql-types/src/table.rs similarity index 75% rename from oximeter/db/src/oxql/table.rs rename to oximeter/oxql-types/src/table.rs index 2cd141d2fa..f37992942f 100644 --- a/oximeter/db/src/oxql/table.rs +++ b/oximeter/oxql-types/src/table.rs @@ -6,14 +6,16 @@ // Copyright 2024 Oxide Computer Company -use super::point::DataType; -use super::point::MetricType; -use super::point::Points; -use super::query::Alignment; -use super::Error; -use crate::TimeseriesKey; +use crate::point::DataType; +use crate::point::MetricType; +use crate::point::Points; +use crate::point::ValueArray; +use crate::point::Values; +use crate::Alignment; +use anyhow::Error; use highway::HighwayHasher; -use oximeter::FieldValue; +use oximeter_types::schema::TimeseriesKey; +use oximeter_types::FieldValue; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; @@ -67,10 +69,20 @@ impl Timeseries { hasher.finish() } + /// Return the alignment of this timeseries, if any. + pub fn alignment(&self) -> Option { + self.alignment + } + + /// Set the alignment of this timeseries. + pub fn set_alignment(&mut self, alignment: Alignment) { + self.alignment = Some(alignment); + } + /// Return a copy of the timeseries, keeping only the provided fields. /// /// An error is returned if the timeseries does not contain those fields. - pub(crate) fn copy_with_fields( + pub fn copy_with_fields( &self, kept_fields: &[&str], ) -> Result { @@ -88,6 +100,20 @@ impl Timeseries { }) } + /// Return a copy of the timeseries, keeping only the provided points. + /// + /// Returns `None` if `kept_points` is empty. + pub fn copy_with_points(&self, kept_points: Points) -> Option { + if kept_points.is_empty() { + return None; + } + Some(Self { + fields: self.fields.clone(), + points: kept_points, + alignment: self.alignment, + }) + } + // Return `true` if the schema in `other` matches that of `self`. fn matches_schema(&self, other: &Timeseries) -> bool { if self.fields.len() != other.fields.len() { @@ -125,7 +151,7 @@ impl Timeseries { /// This returns an error if the points cannot be so cast, or the /// dimensionality of the types requested differs from the dimensionality of /// the points themselves. - pub(crate) fn cast(&self, types: &[DataType]) -> Result { + pub fn cast(&self, types: &[DataType]) -> Result { let fields = self.fields.clone(); Ok(Self { fields, @@ -133,6 +159,49 @@ impl Timeseries { alignment: self.alignment, }) } + + /// Return a new timeseries, with the points limited to the provided range. + pub fn limit(&self, start: usize, end: usize) -> Self { + let input_points = &self.points; + + // Slice the various data arrays. + let start_times = + input_points.start_times().map(|s| s[start..end].to_vec()); + let timestamps = input_points.timestamps()[start..end].to_vec(); + let values = input_points + .values + .iter() + .map(|vals| { + let values = match &vals.values { + ValueArray::Integer(inner) => { + ValueArray::Integer(inner[start..end].to_vec()) + } + ValueArray::Double(inner) => { + ValueArray::Double(inner[start..end].to_vec()) + } + ValueArray::Boolean(inner) => { + ValueArray::Boolean(inner[start..end].to_vec()) + } + ValueArray::String(inner) => { + ValueArray::String(inner[start..end].to_vec()) + } + ValueArray::IntegerDistribution(inner) => { + ValueArray::IntegerDistribution( + inner[start..end].to_vec(), + ) + } + ValueArray::DoubleDistribution(inner) => { + ValueArray::DoubleDistribution( + inner[start..end].to_vec(), + ) + } + }; + Values { values, metric_type: vals.metric_type } + }) + .collect(); + let points = Points::new(start_times, timestamps, values); + Self { fields: self.fields.clone(), points, alignment: self.alignment } + } } /// A table represents one or more timeseries with the same schema. @@ -146,7 +215,7 @@ pub struct Table { // // This starts as the name of the timeseries schema the data is derived // from, but can be modified as operations are done. - pub(super) name: String, + pub name: String, // The set of timeseries in the table, ordered by key. timeseries: BTreeMap, } diff --git a/oximeter/schema/Cargo.toml b/oximeter/schema/Cargo.toml new file mode 100644 index 0000000000..fe2e28705a --- /dev/null +++ b/oximeter/schema/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "oximeter-schema" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +chrono.workspace = true +clap.workspace = true +heck.workspace = true +omicron-workspace-hack.workspace = true +oximeter-types.workspace = true +prettyplease.workspace = true +proc-macro2.workspace = true +quote.workspace = true +schemars.workspace = true +serde.workspace = true +slog-error-chain.workspace = true +syn.workspace = true +toml.workspace = true diff --git a/oximeter/oximeter/src/bin/oximeter-schema.rs b/oximeter/schema/src/bin/oximeter-schema.rs similarity index 93% rename from oximeter/oximeter/src/bin/oximeter-schema.rs rename to oximeter/schema/src/bin/oximeter-schema.rs index 14fb31b1e8..5595a28639 100644 --- a/oximeter/oximeter/src/bin/oximeter-schema.rs +++ b/oximeter/schema/src/bin/oximeter-schema.rs @@ -9,7 +9,7 @@ use anyhow::Context as _; use clap::Parser; use clap::Subcommand; -use oximeter::schema::ir::TimeseriesDefinition; +use oximeter_schema::ir::TimeseriesDefinition; use std::num::NonZeroU8; use std::path::PathBuf; @@ -56,7 +56,7 @@ fn main() -> anyhow::Result<()> { println!("{def:#?}"); } Cmd::Schema { timeseries, version } => { - let schema = oximeter_impl::schema::ir::load_schema(&contents)?; + let schema = oximeter_schema::ir::load_schema(&contents)?; match (timeseries, version) { (None, None) => { for each in schema.into_iter() { @@ -87,7 +87,7 @@ fn main() -> anyhow::Result<()> { } } Cmd::Emit => { - let code = oximeter::schema::codegen::use_timeseries(&contents)?; + let code = oximeter_schema::codegen::use_timeseries(&contents)?; let formatted = prettyplease::unparse(&syn::parse_file(&format!("{code}"))?); println!("{formatted}"); diff --git a/oximeter/impl/src/schema/codegen.rs b/oximeter/schema/src/codegen.rs similarity index 73% rename from oximeter/impl/src/schema/codegen.rs rename to oximeter/schema/src/codegen.rs index d433441718..0429cf0534 100644 --- a/oximeter/impl/src/schema/codegen.rs +++ b/oximeter/schema/src/codegen.rs @@ -6,18 +6,18 @@ //! Generate Rust types and code from oximeter schema definitions. -use crate::schema::ir::find_schema_version; -use crate::schema::ir::load_schema; -use crate::schema::AuthzScope; -use crate::schema::FieldSource; -use crate::schema::Units; -use crate::DatumType; -use crate::FieldSchema; -use crate::FieldType; -use crate::MetricsError; -use crate::TimeseriesSchema; +use crate::ir::find_schema_version; +use crate::ir::load_schema; use chrono::prelude::DateTime; use chrono::prelude::Utc; +use oximeter_types::AuthzScope; +use oximeter_types::DatumType; +use oximeter_types::FieldSchema; +use oximeter_types::FieldSource; +use oximeter_types::FieldType; +use oximeter_types::MetricsError; +use oximeter_types::TimeseriesSchema; +use oximeter_types::Units; use proc_macro2::TokenStream; use quote::quote; @@ -34,7 +34,7 @@ pub fn use_timeseries(contents: &str) -> Result { let latest = find_schema_version(schema.iter().cloned(), None); let mod_name = quote::format_ident!("{}", latest[0].target_name()); let types = emit_schema_types(latest); - let func = emit_schema_function(schema.into_iter()); + let func = emit_schema_function(schema.iter()); Ok(quote! { pub mod #mod_name { #types @@ -43,9 +43,10 @@ pub fn use_timeseries(contents: &str) -> Result { }) } -fn emit_schema_function( - list: impl Iterator, +fn emit_schema_function<'a>( + list: impl Iterator, ) -> TokenStream { + let list = list.map(quote_timeseries_schema); quote! { pub fn timeseries_schema() -> Vec<::oximeter::schema::TimeseriesSchema> { vec![ @@ -310,66 +311,63 @@ fn emit_one(source: FieldSource, schema: &TimeseriesSchema) -> TokenStream { // This is used so that we can emit a function that will return the same data as // we parse from the TOML file with the timeseries definition, as a way to // export the definitions without needing that actual file at runtime. -impl quote::ToTokens for DatumType { - fn to_tokens(&self, tokens: &mut TokenStream) { - let toks = match self { - DatumType::Bool => quote! { ::oximeter::DatumType::Bool }, - DatumType::I8 => quote! { ::oximeter::DatumType::I8 }, - DatumType::U8 => quote! { ::oximeter::DatumType::U8 }, - DatumType::I16 => quote! { ::oximeter::DatumType::I16 }, - DatumType::U16 => quote! { ::oximeter::DatumType::U16 }, - DatumType::I32 => quote! { ::oximeter::DatumType::I32 }, - DatumType::U32 => quote! { ::oximeter::DatumType::U32 }, - DatumType::I64 => quote! { ::oximeter::DatumType::I64 }, - DatumType::U64 => quote! { ::oximeter::DatumType::U64 }, - DatumType::F32 => quote! { ::oximeter::DatumType::F32 }, - DatumType::F64 => quote! { ::oximeter::DatumType::F64 }, - DatumType::String => quote! { ::oximeter::DatumType::String }, - DatumType::Bytes => quote! { ::oximeter::DatumType::Bytes }, - DatumType::CumulativeI64 => { - quote! { ::oximeter::DatumType::CumulativeI64 } - } - DatumType::CumulativeU64 => { - quote! { ::oximeter::DatumType::CumulativeU64 } - } - DatumType::CumulativeF32 => { - quote! { ::oximeter::DatumType::CumulativeF32 } - } - DatumType::CumulativeF64 => { - quote! { ::oximeter::DatumType::CumulativeF64 } - } - DatumType::HistogramI8 => { - quote! { ::oximeter::DatumType::HistogramI8 } - } - DatumType::HistogramU8 => { - quote! { ::oximeter::DatumType::HistogramU8 } - } - DatumType::HistogramI16 => { - quote! { ::oximeter::DatumType::HistogramI16 } - } - DatumType::HistogramU16 => { - quote! { ::oximeter::DatumType::HistogramU16 } - } - DatumType::HistogramI32 => { - quote! { ::oximeter::DatumType::HistogramI32 } - } - DatumType::HistogramU32 => { - quote! { ::oximeter::DatumType::HistogramU32 } - } - DatumType::HistogramI64 => { - quote! { ::oximeter::DatumType::HistogramI64 } - } - DatumType::HistogramU64 => { - quote! { ::oximeter::DatumType::HistogramU64 } - } - DatumType::HistogramF32 => { - quote! { ::oximeter::DatumType::HistogramF32 } - } - DatumType::HistogramF64 => { - quote! { ::oximeter::DatumType::HistogramF64 } - } - }; - toks.to_tokens(tokens); +fn quote_datum_type(datum_type: DatumType) -> TokenStream { + match datum_type { + DatumType::Bool => quote! { ::oximeter::DatumType::Bool }, + DatumType::I8 => quote! { ::oximeter::DatumType::I8 }, + DatumType::U8 => quote! { ::oximeter::DatumType::U8 }, + DatumType::I16 => quote! { ::oximeter::DatumType::I16 }, + DatumType::U16 => quote! { ::oximeter::DatumType::U16 }, + DatumType::I32 => quote! { ::oximeter::DatumType::I32 }, + DatumType::U32 => quote! { ::oximeter::DatumType::U32 }, + DatumType::I64 => quote! { ::oximeter::DatumType::I64 }, + DatumType::U64 => quote! { ::oximeter::DatumType::U64 }, + DatumType::F32 => quote! { ::oximeter::DatumType::F32 }, + DatumType::F64 => quote! { ::oximeter::DatumType::F64 }, + DatumType::String => quote! { ::oximeter::DatumType::String }, + DatumType::Bytes => quote! { ::oximeter::DatumType::Bytes }, + DatumType::CumulativeI64 => { + quote! { ::oximeter::DatumType::CumulativeI64 } + } + DatumType::CumulativeU64 => { + quote! { ::oximeter::DatumType::CumulativeU64 } + } + DatumType::CumulativeF32 => { + quote! { ::oximeter::DatumType::CumulativeF32 } + } + DatumType::CumulativeF64 => { + quote! { ::oximeter::DatumType::CumulativeF64 } + } + DatumType::HistogramI8 => { + quote! { ::oximeter::DatumType::HistogramI8 } + } + DatumType::HistogramU8 => { + quote! { ::oximeter::DatumType::HistogramU8 } + } + DatumType::HistogramI16 => { + quote! { ::oximeter::DatumType::HistogramI16 } + } + DatumType::HistogramU16 => { + quote! { ::oximeter::DatumType::HistogramU16 } + } + DatumType::HistogramI32 => { + quote! { ::oximeter::DatumType::HistogramI32 } + } + DatumType::HistogramU32 => { + quote! { ::oximeter::DatumType::HistogramU32 } + } + DatumType::HistogramI64 => { + quote! { ::oximeter::DatumType::HistogramI64 } + } + DatumType::HistogramU64 => { + quote! { ::oximeter::DatumType::HistogramU64 } + } + DatumType::HistogramF32 => { + quote! { ::oximeter::DatumType::HistogramF32 } + } + DatumType::HistogramF64 => { + quote! { ::oximeter::DatumType::HistogramF64 } + } } } @@ -452,55 +450,46 @@ fn emit_rust_type_for_field(field_type: FieldType) -> TokenStream { } } -impl quote::ToTokens for FieldSource { - fn to_tokens(&self, tokens: &mut TokenStream) { - let toks = match self { - FieldSource::Target => { - quote! { ::oximeter::schema::FieldSource::Target } - } - FieldSource::Metric => { - quote! { ::oximeter::schema::FieldSource::Metric } - } - }; - toks.to_tokens(tokens); +fn quote_field_source(source: FieldSource) -> TokenStream { + match source { + FieldSource::Target => { + quote! { ::oximeter::schema::FieldSource::Target } + } + FieldSource::Metric => { + quote! { ::oximeter::schema::FieldSource::Metric } + } } } -impl quote::ToTokens for FieldType { - fn to_tokens(&self, tokens: &mut TokenStream) { - let toks = match self { - FieldType::String => quote! { ::oximeter::FieldType::String }, - FieldType::I8 => quote! { ::oximeter::FieldType::I8 }, - FieldType::U8 => quote! { ::oximeter::FieldType::U8 }, - FieldType::I16 => quote! { ::oximeter::FieldType::I16 }, - FieldType::U16 => quote! { ::oximeter::FieldType::U16 }, - FieldType::I32 => quote! { ::oximeter::FieldType::I32 }, - FieldType::U32 => quote! { ::oximeter::FieldType::U32 }, - FieldType::I64 => quote! { ::oximeter::FieldType::I64 }, - FieldType::U64 => quote! { ::oximeter::FieldType::U64 }, - FieldType::IpAddr => quote! { ::oximeter::FieldType::IpAddr }, - FieldType::Uuid => quote! { ::oximeter::FieldType::Uuid }, - FieldType::Bool => quote! { ::oximeter::FieldType::Bool }, - }; - toks.to_tokens(tokens); +fn quote_field_type(field_type: FieldType) -> TokenStream { + match field_type { + FieldType::String => quote! { ::oximeter::FieldType::String }, + FieldType::I8 => quote! { ::oximeter::FieldType::I8 }, + FieldType::U8 => quote! { ::oximeter::FieldType::U8 }, + FieldType::I16 => quote! { ::oximeter::FieldType::I16 }, + FieldType::U16 => quote! { ::oximeter::FieldType::U16 }, + FieldType::I32 => quote! { ::oximeter::FieldType::I32 }, + FieldType::U32 => quote! { ::oximeter::FieldType::U32 }, + FieldType::I64 => quote! { ::oximeter::FieldType::I64 }, + FieldType::U64 => quote! { ::oximeter::FieldType::U64 }, + FieldType::IpAddr => quote! { ::oximeter::FieldType::IpAddr }, + FieldType::Uuid => quote! { ::oximeter::FieldType::Uuid }, + FieldType::Bool => quote! { ::oximeter::FieldType::Bool }, } } -impl quote::ToTokens for AuthzScope { - fn to_tokens(&self, tokens: &mut TokenStream) { - let toks = match self { - AuthzScope::Fleet => { - quote! { ::oximeter::schema::AuthzScope::Fleet } - } - AuthzScope::Silo => quote! { ::oximeter::schema::AuthzScope::Silo }, - AuthzScope::Project => { - quote! { ::oximeter::schema::AuthzScope::Project } - } - AuthzScope::ViewableToAll => { - quote! { ::oximeter::schema::AuthzScope::ViewableToAll } - } - }; - toks.to_tokens(tokens); +fn quote_authz_scope(authz_scope: AuthzScope) -> TokenStream { + match authz_scope { + AuthzScope::Fleet => { + quote! { ::oximeter::schema::AuthzScope::Fleet } + } + AuthzScope::Silo => quote! { ::oximeter::schema::AuthzScope::Silo }, + AuthzScope::Project => { + quote! { ::oximeter::schema::AuthzScope::Project } + } + AuthzScope::ViewableToAll => { + quote! { ::oximeter::schema::AuthzScope::ViewableToAll } + } } } @@ -512,79 +501,79 @@ fn quote_creation_time(created: DateTime) -> TokenStream { } } -impl quote::ToTokens for Units { - fn to_tokens(&self, tokens: &mut TokenStream) { - let toks = match self { - Units::None => quote! { ::oximeter::schema::Units::None }, - Units::Count => quote! { ::oximeter::schema::Units::Count }, - Units::Bytes => quote! { ::oximeter::schema::Units::Bytes }, - Units::Seconds => quote! { ::oximeter::schema::Units::Seconds }, - Units::Nanoseconds => { - quote! { ::oximeter::schema::Units::Nanoseconds } - } - }; - toks.to_tokens(tokens); +fn quote_units(units: Units) -> TokenStream { + match units { + Units::None => quote! { ::oximeter::schema::Units::None }, + Units::Count => quote! { ::oximeter::schema::Units::Count }, + Units::Bytes => quote! { ::oximeter::schema::Units::Bytes }, + Units::Seconds => quote! { ::oximeter::schema::Units::Seconds }, + Units::Nanoseconds => { + quote! { ::oximeter::schema::Units::Nanoseconds } + } + Units::Amps => quote! { ::oximeter::schema::Units::Amps }, + Units::Volts => quote! { ::oximeter::schema::Units::Volts }, + Units::DegreesCelcius => { + quote! { ::oximeter::schema::Units::DegreesCelcius } + } + Units::Rpm => quote! { ::oximeter::schema::Units::Rpm }, } } -impl quote::ToTokens for FieldSchema { - fn to_tokens(&self, tokens: &mut TokenStream) { - let name = self.name.as_str(); - let field_type = self.field_type; - let source = self.source; - let description = self.description.as_str(); - let toks = quote! { - ::oximeter::FieldSchema { - name: String::from(#name), - field_type: #field_type, - source: #source, - description: String::from(#description), - } - }; - toks.to_tokens(tokens); +fn quote_field_schema(field_schema: &FieldSchema) -> TokenStream { + let name = field_schema.name.as_str(); + let field_type = quote_field_type(field_schema.field_type); + let source = quote_field_source(field_schema.source); + let description = field_schema.description.as_str(); + quote! { + ::oximeter::FieldSchema { + name: String::from(#name), + field_type: #field_type, + source: #source, + description: String::from(#description), + } } } -impl quote::ToTokens for TimeseriesSchema { - fn to_tokens(&self, tokens: &mut TokenStream) { - let field_schema = &self.field_schema; - let timeseries_name = self.timeseries_name.to_string(); - let target_description = self.description.target.as_str(); - let metric_description = self.description.metric.as_str(); - let authz_scope = self.authz_scope; - let units = self.units; - let datum_type = self.datum_type; - let ver = self.version.get(); - let version = quote! { ::core::num::NonZeroU8::new(#ver).unwrap() }; - let created = quote_creation_time(self.created); - let toks = quote! { - ::oximeter::schema::TimeseriesSchema { - timeseries_name: - <::oximeter::TimeseriesName as ::std::convert::TryFrom<&str>>::try_from( - #timeseries_name - ).unwrap(), - description: ::oximeter::schema::TimeseriesDescription { - target: String::from(#target_description), - metric: String::from(#metric_description), - }, - authz_scope: #authz_scope, - units: #units, - field_schema: ::std::collections::BTreeSet::from([ - #(#field_schema),* - ]), - datum_type: #datum_type, - version: #version, - created: #created, - } - }; - toks.to_tokens(tokens); +fn quote_timeseries_schema( + timeseries_schema: &TimeseriesSchema, +) -> TokenStream { + let field_schema = + timeseries_schema.field_schema.iter().map(quote_field_schema); + let timeseries_name = timeseries_schema.timeseries_name.to_string(); + let target_description = timeseries_schema.description.target.as_str(); + let metric_description = timeseries_schema.description.metric.as_str(); + let authz_scope = quote_authz_scope(timeseries_schema.authz_scope); + let units = quote_units(timeseries_schema.units); + let datum_type = quote_datum_type(timeseries_schema.datum_type); + let ver = timeseries_schema.version.get(); + let version = quote! { ::core::num::NonZeroU8::new(#ver).unwrap() }; + let created = quote_creation_time(timeseries_schema.created); + quote! { + ::oximeter::schema::TimeseriesSchema { + timeseries_name: + <::oximeter::TimeseriesName as ::std::convert::TryFrom<&str>>::try_from( + #timeseries_name + ).unwrap(), + description: ::oximeter::schema::TimeseriesDescription { + target: String::from(#target_description), + metric: String::from(#metric_description), + }, + authz_scope: #authz_scope, + units: #units, + field_schema: ::std::collections::BTreeSet::from([ + #(#field_schema),* + ]), + datum_type: #datum_type, + version: #version, + created: #created, + } } } #[cfg(test)] mod tests { use super::*; - use crate::schema::TimeseriesDescription; + use oximeter_types::TimeseriesDescription; use std::{collections::BTreeSet, num::NonZeroU8}; #[test] diff --git a/oximeter/impl/src/schema/ir.rs b/oximeter/schema/src/ir.rs similarity index 99% rename from oximeter/impl/src/schema/ir.rs rename to oximeter/schema/src/ir.rs index f7a209294f..370236000a 100644 --- a/oximeter/impl/src/schema/ir.rs +++ b/oximeter/schema/src/ir.rs @@ -11,17 +11,17 @@ //! inspected or used to generate code that contains the equivalent Rust types //! and trait implementations. -use crate::schema::AuthzScope; -use crate::schema::DatumType; -use crate::schema::FieldSource; -use crate::schema::FieldType; -use crate::schema::TimeseriesDescription; -use crate::schema::Units; -use crate::FieldSchema; -use crate::MetricsError; -use crate::TimeseriesName; -use crate::TimeseriesSchema; use chrono::Utc; +use oximeter_types::AuthzScope; +use oximeter_types::DatumType; +use oximeter_types::FieldSchema; +use oximeter_types::FieldSource; +use oximeter_types::FieldType; +use oximeter_types::MetricsError; +use oximeter_types::TimeseriesDescription; +use oximeter_types::TimeseriesName; +use oximeter_types::TimeseriesSchema; +use oximeter_types::Units; use serde::Deserialize; use std::collections::btree_map::Entry; use std::collections::BTreeMap; diff --git a/oximeter/schema/src/lib.rs b/oximeter/schema/src/lib.rs new file mode 100644 index 0000000000..b1ce73a940 --- /dev/null +++ b/oximeter/schema/src/lib.rs @@ -0,0 +1,12 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2024 Oxide Computer Company + +//! Tools for working with schemas for fields and timeseries. +//! +//! The actual schema type definitions are in [`oximeter_types::schema`]. + +pub mod codegen; +pub mod ir; diff --git a/oximeter/test-utils/Cargo.toml b/oximeter/test-utils/Cargo.toml new file mode 100644 index 0000000000..f463e74aca --- /dev/null +++ b/oximeter/test-utils/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "oximeter-test-utils" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +chrono.workspace = true +omicron-workspace-hack.workspace = true +oximeter-macro-impl.workspace = true +oximeter-types.workspace = true +uuid.workspace = true diff --git a/oximeter/test-utils/src/lib.rs b/oximeter/test-utils/src/lib.rs new file mode 100644 index 0000000000..04c49add65 --- /dev/null +++ b/oximeter/test-utils/src/lib.rs @@ -0,0 +1,295 @@ +// Copyright 2024 Oxide Computer Company + +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Utilities for testing the oximeter crate. + +// Export the current crate as `oximeter`. The macros defined in `oximeter-macro-impl` generate +// code referring to symbols like `oximeter::traits::Target`. In consumers of this crate, that's +// fine, but internally there _is_ no crate named `oximeter`, it's just `self` or `crate`. +// +// See https://github.com/rust-lang/rust/pull/55275 for the PR introducing this fix, which links to +// lots of related issues and discussion. +extern crate self as oximeter; + +use oximeter_macro_impl::{Metric, Target}; +use oximeter_types::histogram; +use oximeter_types::histogram::{Histogram, Record}; +use oximeter_types::traits; +use oximeter_types::types::{ + Cumulative, Datum, DatumType, FieldType, FieldValue, Measurement, Sample, +}; +use oximeter_types::{Metric, Target}; +use uuid::Uuid; + +#[derive(Target)] +pub struct TestTarget { + pub name1: String, + pub name2: String, + pub num: i64, +} + +impl Default for TestTarget { + fn default() -> Self { + TestTarget { + name1: "first_name".into(), + name2: "second_name".into(), + num: 0, + } + } +} + +#[derive(Metric)] +pub struct TestMetric { + pub id: Uuid, + pub good: bool, + pub datum: i64, +} + +#[derive(Metric)] +pub struct TestCumulativeMetric { + pub id: Uuid, + pub good: bool, + pub datum: Cumulative, +} + +#[derive(Metric)] +pub struct TestHistogram { + pub id: Uuid, + pub good: bool, + pub datum: Histogram, +} + +const ID: Uuid = uuid::uuid!("e00ced4d-39d1-446a-ae85-a67f05c9750b"); + +pub fn make_sample() -> Sample { + let target = TestTarget::default(); + let metric = TestMetric { id: ID, good: true, datum: 1 }; + Sample::new(&target, &metric).unwrap() +} + +pub fn make_missing_sample() -> Sample { + let target = TestTarget::default(); + let metric = TestMetric { id: ID, good: true, datum: 1 }; + Sample::new_missing(&target, &metric).unwrap() +} + +pub fn make_hist_sample() -> Sample { + let target = TestTarget::default(); + let mut hist = histogram::Histogram::new(&[0.0, 5.0, 10.0]).unwrap(); + hist.sample(1.0).unwrap(); + hist.sample(2.0).unwrap(); + hist.sample(6.0).unwrap(); + let metric = TestHistogram { id: ID, good: true, datum: hist }; + Sample::new(&target, &metric).unwrap() +} + +/// A target identifying a single virtual machine instance +#[derive(Debug, Clone, Copy, oximeter::Target)] +pub struct VirtualMachine { + pub project_id: Uuid, + pub instance_id: Uuid, +} + +/// A metric recording the total time a vCPU is busy, by its ID +#[derive(Debug, Clone, Copy, oximeter::Metric)] +pub struct CpuBusy { + cpu_id: i64, + datum: Cumulative, +} + +pub fn generate_test_samples( + n_projects: usize, + n_instances: usize, + n_cpus: usize, + n_samples: usize, +) -> Vec { + let n_timeseries = n_projects * n_instances * n_cpus; + let mut samples = Vec::with_capacity(n_samples * n_timeseries); + for _ in 0..n_projects { + let project_id = Uuid::new_v4(); + for _ in 0..n_instances { + let vm = VirtualMachine { project_id, instance_id: Uuid::new_v4() }; + for cpu in 0..n_cpus { + for sample in 0..n_samples { + let cpu_busy = CpuBusy { + cpu_id: cpu as _, + datum: Cumulative::new(sample as f64), + }; + let sample = Sample::new(&vm, &cpu_busy).unwrap(); + samples.push(sample); + } + } + } + } + samples +} + +#[cfg(test)] +mod tests { + use chrono::Utc; + use oximeter_types::{ + schema::{ + default_schema_version, AuthzScope, FieldSchema, FieldSource, + TimeseriesSchema, Units, + }, + TimeseriesName, + }; + + use super::*; + + #[test] + fn test_gen_test_samples() { + let (n_projects, n_instances, n_cpus, n_samples) = (2, 2, 2, 2); + let samples = + generate_test_samples(n_projects, n_instances, n_cpus, n_samples); + assert_eq!( + samples.len(), + n_projects * n_instances * n_cpus * n_samples + ); + } + + #[test] + fn test_sample_struct() { + let t = TestTarget::default(); + let m = TestMetric { id: Uuid::new_v4(), good: true, datum: 1i64 }; + let sample = Sample::new(&t, &m).unwrap(); + assert_eq!( + sample.timeseries_name, + format!("{}:{}", t.name(), m.name()) + ); + assert!(sample.measurement.start_time().is_none()); + assert_eq!(sample.measurement.datum(), &Datum::from(1i64)); + + let m = TestCumulativeMetric { + id: Uuid::new_v4(), + good: true, + datum: 1i64.into(), + }; + let sample = Sample::new(&t, &m).unwrap(); + assert!(sample.measurement.start_time().is_some()); + } + + #[derive(Target)] + struct MyTarget { + id: Uuid, + name: String, + } + + const ID: Uuid = uuid::uuid!("ca565ef4-65dc-4ab0-8622-7be43ed72105"); + + impl Default for MyTarget { + fn default() -> Self { + Self { id: ID, name: String::from("name") } + } + } + + #[derive(Metric)] + struct MyMetric { + happy: bool, + datum: u64, + } + + impl Default for MyMetric { + fn default() -> Self { + Self { happy: true, datum: 0 } + } + } + + #[test] + fn test_timeseries_schema_from_parts() { + let target = MyTarget::default(); + let metric = MyMetric::default(); + let schema = TimeseriesSchema::new(&target, &metric).unwrap(); + + assert_eq!(schema.timeseries_name, "my_target:my_metric"); + let f = schema.schema_for_field("id").unwrap(); + assert_eq!(f.name, "id"); + assert_eq!(f.field_type, FieldType::Uuid); + assert_eq!(f.source, FieldSource::Target); + + let f = schema.schema_for_field("name").unwrap(); + assert_eq!(f.name, "name"); + assert_eq!(f.field_type, FieldType::String); + assert_eq!(f.source, FieldSource::Target); + + let f = schema.schema_for_field("happy").unwrap(); + assert_eq!(f.name, "happy"); + assert_eq!(f.field_type, FieldType::Bool); + assert_eq!(f.source, FieldSource::Metric); + assert_eq!(schema.datum_type, DatumType::U64); + } + + #[test] + fn test_timeseries_schema_from_sample() { + let target = MyTarget::default(); + let metric = MyMetric::default(); + let sample = Sample::new(&target, &metric).unwrap(); + let schema = TimeseriesSchema::new(&target, &metric).unwrap(); + let schema_from_sample = TimeseriesSchema::from(&sample); + assert_eq!(schema, schema_from_sample); + } + + // Test that we correctly order field across a target and metric. + // + // In an earlier commit, we switched from storing fields in an unordered Vec + // to using a BTree{Map,Set} to ensure ordering by name. However, the + // `TimeseriesSchema` type stored all its fields by chaining the sorted + // fields from the target and metric, without then sorting _across_ them. + // + // This was exacerbated by the error reporting, where we did in fact sort + // all fields across the target and metric, making it difficult to tell how + // the derived schema was different, if at all. + // + // This test generates a sample with a schema where the target and metric + // fields are sorted within them, but not across them. We check that the + // derived schema are actually equal, which means we've imposed that + // ordering when deriving the schema. + #[test] + fn test_schema_field_ordering_across_target_metric() { + let target_field = FieldSchema { + name: String::from("later"), + field_type: FieldType::U64, + source: FieldSource::Target, + description: String::new(), + }; + let metric_field = FieldSchema { + name: String::from("earlier"), + field_type: FieldType::U64, + source: FieldSource::Metric, + description: String::new(), + }; + let timeseries_name: TimeseriesName = "foo:bar".parse().unwrap(); + let datum_type = DatumType::U64; + let field_schema = + [target_field.clone(), metric_field.clone()].into_iter().collect(); + let expected_schema = TimeseriesSchema { + timeseries_name, + description: Default::default(), + field_schema, + datum_type, + version: default_schema_version(), + authz_scope: AuthzScope::Fleet, + units: Units::Count, + created: Utc::now(), + }; + + #[derive(oximeter::Target)] + struct Foo { + later: u64, + } + #[derive(oximeter::Metric)] + struct Bar { + earlier: u64, + datum: u64, + } + + let target = Foo { later: 1 }; + let metric = Bar { earlier: 2, datum: 10 }; + let sample = Sample::new(&target, &metric).unwrap(); + let derived_schema = TimeseriesSchema::from(&sample); + assert_eq!(derived_schema, expected_schema); + } +} diff --git a/oximeter/timeseries-macro/Cargo.toml b/oximeter/timeseries-macro/Cargo.toml index db591aed06..2fb8b8f312 100644 --- a/oximeter/timeseries-macro/Cargo.toml +++ b/oximeter/timeseries-macro/Cargo.toml @@ -8,7 +8,8 @@ proc-macro = true [dependencies] omicron-workspace-hack.workspace = true -oximeter-impl.workspace = true +oximeter-schema.workspace = true +oximeter-types.workspace = true proc-macro2.workspace = true quote.workspace = true syn.workspace = true diff --git a/oximeter/timeseries-macro/src/lib.rs b/oximeter/timeseries-macro/src/lib.rs index 317a8533a4..12ec2cc417 100644 --- a/oximeter/timeseries-macro/src/lib.rs +++ b/oximeter/timeseries-macro/src/lib.rs @@ -8,7 +8,7 @@ extern crate proc_macro; -use oximeter_impl::schema::SCHEMA_DIRECTORY; +use oximeter_types::schema::SCHEMA_DIRECTORY; /// Generate code to use the timeseries from one target. /// @@ -45,7 +45,7 @@ pub fn use_timeseries( .into(); } }; - match oximeter_impl::schema::codegen::use_timeseries(&contents) { + match oximeter_schema::codegen::use_timeseries(&contents) { Ok(toks) => { let path_ = path.display().to_string(); return quote::quote! { @@ -59,9 +59,8 @@ pub fn use_timeseries( Err(e) => { let msg = format!( "Failed to generate timeseries types \ - from '{}': {:?}", + from '{}': {e}", path.display(), - e, ); return syn::Error::new(token.span(), msg) .into_compile_error() diff --git a/oximeter/impl/Cargo.toml b/oximeter/types/Cargo.toml similarity index 78% rename from oximeter/impl/Cargo.toml rename to oximeter/types/Cargo.toml index 91277d9d47..6d6bbc07e6 100644 --- a/oximeter/impl/Cargo.toml +++ b/oximeter/types/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "oximeter-impl" +name = "oximeter-types" version = "0.1.0" edition = "2021" license = "MPL-2.0" @@ -11,22 +11,13 @@ workspace = true bytes = { workspace = true, features = [ "serde" ] } chrono.workspace = true float-ord.workspace = true -heck.workspace = true num.workspace = true omicron-common.workspace = true omicron-workspace-hack.workspace = true -oximeter-macro-impl.workspace = true -prettyplease.workspace = true -proc-macro2.workspace = true -quote.workspace = true regex.workspace = true schemars = { workspace = true, features = [ "uuid1", "bytes", "chrono" ] } serde.workspace = true -serde_json.workspace = true -slog-error-chain.workspace = true strum.workspace = true -syn.workspace = true -toml.workspace = true thiserror.workspace = true uuid.workspace = true @@ -34,6 +25,7 @@ uuid.workspace = true approx.workspace = true # For benchmark criterion.workspace = true +oximeter-macro-impl.workspace = true rand = { workspace = true, features = ["std_rng"] } rand_distr.workspace = true rstest.workspace = true diff --git a/oximeter/impl/benches/quantile.rs b/oximeter/types/benches/quantile.rs similarity index 97% rename from oximeter/impl/benches/quantile.rs rename to oximeter/types/benches/quantile.rs index 4540ba8f6a..b88cb211e6 100644 --- a/oximeter/impl/benches/quantile.rs +++ b/oximeter/types/benches/quantile.rs @@ -8,7 +8,7 @@ // Copyright 2024 Oxide Computer Company use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; -use oximeter_impl::Quantile; +use oximeter_types::Quantile; use rand_distr::{Distribution, Normal}; /// Emulates baseline code in a Python implementation of the P² diff --git a/oximeter/impl/src/histogram.rs b/oximeter/types/src/histogram.rs similarity index 99% rename from oximeter/impl/src/histogram.rs rename to oximeter/types/src/histogram.rs index 40df0a1b41..0b85727ee0 100644 --- a/oximeter/impl/src/histogram.rs +++ b/oximeter/types/src/histogram.rs @@ -523,9 +523,9 @@ where /// Example /// ------- /// ```rust - /// # // Rename the impl crate so the doctests can refer to the public + /// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. - /// # use oximeter_impl as oximeter; + /// # use oximeter_types as oximeter; /// use oximeter::histogram::Histogram; /// /// let hist = Histogram::with_bins(&[(0..10).into(), (10..100).into()]).unwrap(); @@ -922,9 +922,9 @@ where /// ------- /// /// ```rust - /// # // Rename the impl crate so the doctests can refer to the public + /// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. - /// # use oximeter_impl as oximeter; + /// # use oximeter_types as oximeter; /// use oximeter::histogram::{Histogram, BinRange}; /// use std::ops::{RangeBounds, Bound}; /// diff --git a/oximeter/impl/src/lib.rs b/oximeter/types/src/lib.rs similarity index 92% rename from oximeter/impl/src/lib.rs rename to oximeter/types/src/lib.rs index 5acbeb9422..7a1a480f8d 100644 --- a/oximeter/impl/src/lib.rs +++ b/oximeter/types/src/lib.rs @@ -4,8 +4,6 @@ // Copyright 2024 Oxide Computer Company -pub use oximeter_macro_impl::*; - // Export the current crate as `oximeter`. The macros defined in `oximeter-macro-impl` generate // code referring to symbols like `oximeter::traits::Target`. In consumers of this crate, that's // fine, but internally there _is_ no crate named `oximeter`, it's just `self` or `crate`. @@ -17,15 +15,18 @@ extern crate self as oximeter; pub mod histogram; pub mod quantile; pub mod schema; -pub mod test_util; pub mod traits; pub mod types; pub use quantile::Quantile; pub use quantile::QuantileError; +pub use schema::AuthzScope; pub use schema::FieldSchema; +pub use schema::FieldSource; +pub use schema::TimeseriesDescription; pub use schema::TimeseriesName; pub use schema::TimeseriesSchema; +pub use schema::Units; pub use traits::Metric; pub use traits::Producer; pub use traits::Target; diff --git a/oximeter/impl/src/quantile.rs b/oximeter/types/src/quantile.rs similarity index 97% rename from oximeter/impl/src/quantile.rs rename to oximeter/types/src/quantile.rs index fafe9c9ece..40777217e5 100644 --- a/oximeter/impl/src/quantile.rs +++ b/oximeter/types/src/quantile.rs @@ -78,9 +78,9 @@ impl Quantile { /// # Examples /// /// ``` - /// # // Rename the impl crate so the doctests can refer to the public + /// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. - /// # use oximeter_impl as oximeter; + /// # use oximeter_types as oximeter; /// use oximeter::Quantile; /// let q = Quantile::new(0.5).unwrap(); /// @@ -116,9 +116,9 @@ impl Quantile { /// /// # Examples /// ``` - /// # // Rename the impl crate so the doctests can refer to the public + /// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. - /// # use oximeter_impl as oximeter; + /// # use oximeter_types as oximeter; /// use oximeter::Quantile; /// let q = Quantile::from_parts( /// 0.5, @@ -200,9 +200,9 @@ impl Quantile { /// # Examples /// /// ``` - /// # // Rename the impl crate so the doctests can refer to the public + /// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. - /// # use oximeter_impl as oximeter; + /// # use oximeter_types as oximeter; /// use oximeter::Quantile; /// let mut q = Quantile::new(0.5).unwrap(); /// for o in 1..=100 { @@ -243,9 +243,9 @@ impl Quantile { /// # Examples /// /// ``` - /// # // Rename the impl crate so the doctests can refer to the public + /// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. - /// # use oximeter_impl as oximeter; + /// # use oximeter_types as oximeter; /// use oximeter::Quantile; /// let mut q = Quantile::new(0.9).unwrap(); /// q.append(10).unwrap(); diff --git a/oximeter/impl/src/schema/mod.rs b/oximeter/types/src/schema.rs similarity index 75% rename from oximeter/impl/src/schema/mod.rs rename to oximeter/types/src/schema.rs index 7743034e31..80aaa6f101 100644 --- a/oximeter/impl/src/schema/mod.rs +++ b/oximeter/types/src/schema.rs @@ -6,9 +6,6 @@ //! Tools for working with schema for fields and timeseries. -pub mod codegen; -pub mod ir; - use crate::types::DatumType; use crate::types::FieldType; use crate::types::MetricsError; @@ -31,6 +28,8 @@ use std::num::NonZeroU8; pub const SCHEMA_DIRECTORY: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/../oximeter/schema"); +pub type TimeseriesKey = u64; + /// The name and type information for a field of a timeseries schema. #[derive( Clone, @@ -179,7 +178,6 @@ pub struct TimeseriesDescription { /// Measurement units for timeseries samples. #[derive(Clone, Copy, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] #[serde(rename_all = "snake_case")] -// TODO-completeness: Include more units, such as power / temperature. // TODO-completeness: Decide whether and how to handle dimensional analysis // during queries, if needed. pub enum Units { @@ -189,6 +187,11 @@ pub enum Units { Bytes, Seconds, Nanoseconds, + Volts, + Amps, + DegreesCelcius, + /// Rotations per minute. + Rpm, } /// The schema for a timeseries. @@ -398,7 +401,6 @@ pub enum AuthzScope { mod tests { use super::*; use std::convert::TryFrom; - use uuid::Uuid; #[test] fn test_timeseries_name() { @@ -422,127 +424,6 @@ mod tests { assert!(TimeseriesName::try_from("x.a:b").is_err()); } - #[derive(Target)] - struct MyTarget { - id: Uuid, - name: String, - } - - const ID: Uuid = uuid::uuid!("ca565ef4-65dc-4ab0-8622-7be43ed72105"); - - impl Default for MyTarget { - fn default() -> Self { - Self { id: ID, name: String::from("name") } - } - } - - #[derive(Metric)] - struct MyMetric { - happy: bool, - datum: u64, - } - - impl Default for MyMetric { - fn default() -> Self { - Self { happy: true, datum: 0 } - } - } - - #[test] - fn test_timeseries_schema_from_parts() { - let target = MyTarget::default(); - let metric = MyMetric::default(); - let schema = TimeseriesSchema::new(&target, &metric).unwrap(); - - assert_eq!(schema.timeseries_name, "my_target:my_metric"); - let f = schema.schema_for_field("id").unwrap(); - assert_eq!(f.name, "id"); - assert_eq!(f.field_type, FieldType::Uuid); - assert_eq!(f.source, FieldSource::Target); - - let f = schema.schema_for_field("name").unwrap(); - assert_eq!(f.name, "name"); - assert_eq!(f.field_type, FieldType::String); - assert_eq!(f.source, FieldSource::Target); - - let f = schema.schema_for_field("happy").unwrap(); - assert_eq!(f.name, "happy"); - assert_eq!(f.field_type, FieldType::Bool); - assert_eq!(f.source, FieldSource::Metric); - assert_eq!(schema.datum_type, DatumType::U64); - } - - #[test] - fn test_timeseries_schema_from_sample() { - let target = MyTarget::default(); - let metric = MyMetric::default(); - let sample = Sample::new(&target, &metric).unwrap(); - let schema = TimeseriesSchema::new(&target, &metric).unwrap(); - let schema_from_sample = TimeseriesSchema::from(&sample); - assert_eq!(schema, schema_from_sample); - } - - // Test that we correctly order field across a target and metric. - // - // In an earlier commit, we switched from storing fields in an unordered Vec - // to using a BTree{Map,Set} to ensure ordering by name. However, the - // `TimeseriesSchema` type stored all its fields by chaining the sorted - // fields from the target and metric, without then sorting _across_ them. - // - // This was exacerbated by the error reporting, where we did in fact sort - // all fields across the target and metric, making it difficult to tell how - // the derived schema was different, if at all. - // - // This test generates a sample with a schema where the target and metric - // fields are sorted within them, but not across them. We check that the - // derived schema are actually equal, which means we've imposed that - // ordering when deriving the schema. - #[test] - fn test_schema_field_ordering_across_target_metric() { - let target_field = FieldSchema { - name: String::from("later"), - field_type: FieldType::U64, - source: FieldSource::Target, - description: String::new(), - }; - let metric_field = FieldSchema { - name: String::from("earlier"), - field_type: FieldType::U64, - source: FieldSource::Metric, - description: String::new(), - }; - let timeseries_name: TimeseriesName = "foo:bar".parse().unwrap(); - let datum_type = DatumType::U64; - let field_schema = - [target_field.clone(), metric_field.clone()].into_iter().collect(); - let expected_schema = TimeseriesSchema { - timeseries_name, - description: Default::default(), - field_schema, - datum_type, - version: default_schema_version(), - authz_scope: AuthzScope::Fleet, - units: Units::Count, - created: Utc::now(), - }; - - #[derive(oximeter::Target)] - struct Foo { - later: u64, - } - #[derive(oximeter::Metric)] - struct Bar { - earlier: u64, - datum: u64, - } - - let target = Foo { later: 1 }; - let metric = Bar { earlier: 2, datum: 10 }; - let sample = Sample::new(&target, &metric).unwrap(); - let derived_schema = TimeseriesSchema::from(&sample); - assert_eq!(derived_schema, expected_schema); - } - #[test] fn test_field_schema_ordering() { let mut fields = BTreeSet::new(); diff --git a/oximeter/impl/src/traits.rs b/oximeter/types/src/traits.rs similarity index 96% rename from oximeter/impl/src/traits.rs rename to oximeter/types/src/traits.rs index 16baa4f619..91ecca817d 100644 --- a/oximeter/impl/src/traits.rs +++ b/oximeter/types/src/traits.rs @@ -45,9 +45,9 @@ use std::ops::AddAssign; /// -------- /// /// ```rust -/// # // Rename the impl crate so the doctests can refer to the public +/// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. -/// # extern crate oximeter_impl as oximeter; +/// # extern crate oximeter_types as oximeter; /// # use oximeter_macro_impl::*; /// use oximeter::{traits::Target, types::FieldType}; /// use uuid::Uuid; @@ -75,9 +75,9 @@ use std::ops::AddAssign; /// supported types. /// /// ```compile_fail -/// # // Rename the impl crate so the doctests can refer to the public +/// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. -/// # extern crate oximeter_impl as oximeter; +/// # extern crate oximeter_types as oximeter; /// # use oximeter_macro_impl::*; /// #[derive(oximeter::Target)] /// struct Bad { @@ -160,9 +160,9 @@ pub trait Target { /// Example /// ------- /// ```rust -/// # // Rename the impl crate so the doctests can refer to the public +/// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. -/// # extern crate oximeter_impl as oximeter; +/// # extern crate oximeter_types as oximeter; /// # use oximeter_macro_impl::*; /// use chrono::Utc; /// use oximeter::Metric; @@ -185,9 +185,9 @@ pub trait Target { /// an unsupported type. /// /// ```compile_fail -/// # // Rename the impl crate so the doctests can refer to the public +/// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. -/// # extern crate oximeter_impl as oximeter; +/// # extern crate oximeter_types as oximeter; /// # use oximeter_macro_impl::*; /// #[derive(Metric)] /// pub struct BadType { @@ -364,9 +364,9 @@ pub use crate::histogram::HistogramSupport; /// Example /// ------- /// ```rust -/// # // Rename the impl crate so the doctests can refer to the public +/// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. -/// # extern crate oximeter_impl as oximeter; +/// # extern crate oximeter_types as oximeter; /// # use oximeter_macro_impl::*; /// use oximeter::{Datum, MetricsError, Metric, Producer, Target}; /// use oximeter::types::{Measurement, Sample, Cumulative}; @@ -464,6 +464,8 @@ pub trait Producer: Send + Sync + std::fmt::Debug + 'static { #[cfg(test)] mod tests { + use oximeter_macro_impl::{Metric, Target}; + use crate::types; use crate::{ Datum, DatumType, FieldType, FieldValue, Metric, MetricsError, diff --git a/oximeter/impl/src/types.rs b/oximeter/types/src/types.rs similarity index 97% rename from oximeter/impl/src/types.rs rename to oximeter/types/src/types.rs index 370557f7f7..60260e3649 100644 --- a/oximeter/impl/src/types.rs +++ b/oximeter/types/src/types.rs @@ -850,7 +850,7 @@ pub struct Sample { /// The version of the timeseries this sample belongs to // // TODO-cleanup: This should be removed once schema are tracked in CRDB. - #[serde(default = "::oximeter::schema::default_schema_version")] + #[serde(default = "crate::schema::default_schema_version")] pub timeseries_version: NonZeroU8, // Target name and fields @@ -1104,15 +1104,10 @@ mod tests { use super::Measurement; use super::MetricsError; use super::Sample; - use crate::test_util; - use crate::types; - use crate::Metric; - use crate::Target; use bytes::Bytes; use std::collections::BTreeMap; use std::net::Ipv4Addr; use std::net::Ipv6Addr; - use uuid::Uuid; #[test] fn test_cumulative_i64() { @@ -1176,31 +1171,6 @@ mod tests { assert!(measurement.timestamp() >= measurement.start_time().unwrap()); } - #[test] - fn test_sample_struct() { - let t = test_util::TestTarget::default(); - let m = test_util::TestMetric { - id: Uuid::new_v4(), - good: true, - datum: 1i64, - }; - let sample = types::Sample::new(&t, &m).unwrap(); - assert_eq!( - sample.timeseries_name, - format!("{}:{}", t.name(), m.name()) - ); - assert!(sample.measurement.start_time().is_none()); - assert_eq!(sample.measurement.datum(), &Datum::from(1i64)); - - let m = test_util::TestCumulativeMetric { - id: Uuid::new_v4(), - good: true, - datum: 1i64.into(), - }; - let sample = types::Sample::new(&t, &m).unwrap(); - assert!(sample.measurement.start_time().is_some()); - } - #[rstest::rstest] #[case::as_string("some string", FieldValue::String("some string".into()))] #[case::as_i8("2", FieldValue::I8(2))] diff --git a/oximeter/impl/tests/fail/failures.rs b/oximeter/types/tests/fail/failures.rs similarity index 100% rename from oximeter/impl/tests/fail/failures.rs rename to oximeter/types/tests/fail/failures.rs diff --git a/oximeter/impl/tests/fail/failures.stderr b/oximeter/types/tests/fail/failures.stderr similarity index 100% rename from oximeter/impl/tests/fail/failures.stderr rename to oximeter/types/tests/fail/failures.stderr diff --git a/oximeter/impl/tests/test_compilation.rs b/oximeter/types/tests/test_compilation.rs similarity index 100% rename from oximeter/impl/tests/test_compilation.rs rename to oximeter/types/tests/test_compilation.rs diff --git a/package-manifest.toml b/package-manifest.toml index 24f848c31a..125861f610 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -140,19 +140,24 @@ source.type = "local" source.rust.binary_names = ["oximeter", "clickhouse-schema-updater"] source.rust.release = true source.paths = [ - { from = "smf/oximeter", to = "/var/svc/manifest/site/oximeter" }, { from = "oximeter/db/schema", to = "/opt/oxide/oximeter/schema" }, + { from = "smf/oximeter/{{clickhouse-topology}}/config.toml", to = "/var/svc/manifest/site/oximeter/config.toml" }, + { from = "smf/oximeter/manifest.xml", to = "/var/svc/manifest/site/oximeter/manifest.xml" }, ] output.type = "zone" output.intermediate_only = true [package.clickhouse] +# This service runs a single-node ClickHouse server. service_name = "clickhouse" only_for_targets.image = "standard" source.type = "composite" source.packages = [ "clickhouse_svc.tar.gz", "internal-dns-cli.tar.gz", + # TODO: This package is for solely for testing purposes. + # Remove once replicated clickhouse is up and running. + "omicron-clickhouse-admin.tar.gz", "zone-setup.tar.gz", "zone-network-install.tar.gz" ] @@ -166,19 +171,52 @@ source.paths = [ { from = "out/clickhouse", to = "/opt/oxide/clickhouse" }, { from = "smf/clickhouse/manifest.xml", to = "/var/svc/manifest/site/clickhouse/manifest.xml" }, { from = "smf/clickhouse/method_script.sh", to = "/opt/oxide/lib/svc/manifest/clickhouse.sh" }, - { from = "smf/clickhouse/config_replica.xml", to = "/opt/oxide/clickhouse/config.d/config_replica.xml" }, +] +output.type = "zone" +output.intermediate_only = true +setup_hint = "Run `cargo xtask download clickhouse` to download the necessary binaries" + +[package.clickhouse_server] +# This service runs a server for a replicated ClickHouse cluster. +# It is complimentary to the clickhouse_keeper service. +# One cannot be run without the other. +service_name = "clickhouse_server" +only_for_targets.image = "standard" +source.type = "composite" +source.packages = [ + "clickhouse_server_svc.tar.gz", + "internal-dns-cli.tar.gz", + "omicron-clickhouse-admin.tar.gz", + "zone-setup.tar.gz", + "zone-network-install.tar.gz" +] +output.type = "zone" + +[package.clickhouse_server_svc] +service_name = "clickhouse_server_svc" +only_for_targets.image = "standard" +source.type = "local" +source.paths = [ + { from = "out/clickhouse", to = "/opt/oxide/clickhouse_server" }, + { from = "smf/clickhouse_server/manifest.xml", to = "/var/svc/manifest/site/clickhouse_server/manifest.xml" }, + { from = "smf/clickhouse_server/method_script.sh", to = "/opt/oxide/lib/svc/manifest/clickhouse_server.sh" }, + { from = "smf/clickhouse_server/config_replica.xml", to = "/opt/oxide/clickhouse_server/config.d/config_replica.xml" }, ] output.type = "zone" output.intermediate_only = true setup_hint = "Run `cargo xtask download clickhouse` to download the necessary binaries" [package.clickhouse_keeper] +# This service runs a keeper for a replicated ClickHouse cluster. +# It is complimentary to the clickhouse_server service. +# One cannot be run without the other. service_name = "clickhouse_keeper" only_for_targets.image = "standard" source.type = "composite" source.packages = [ "clickhouse_keeper_svc.tar.gz", "internal-dns-cli.tar.gz", + "omicron-clickhouse-admin.tar.gz", "zone-setup.tar.gz", "zone-network-install.tar.gz" ] @@ -198,6 +236,18 @@ output.type = "zone" output.intermediate_only = true setup_hint = "Run `cargo xtask download clickhouse` to download the necessary binaries" +[package.omicron-clickhouse-admin] +service_name = "clickhouse-admin" +only_for_targets.image = "standard" +source.type = "local" +source.rust.binary_names = ["clickhouse-admin"] +source.rust.release = true +source.paths = [ + { from = "smf/clickhouse-admin", to = "/var/svc/manifest/site/clickhouse-admin" }, +] +output.type = "zone" +output.intermediate_only = true + [package.cockroachdb] service_name = "cockroachdb" only_for_targets.image = "standard" @@ -578,10 +628,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "220dd026e83142b83bd93123f465a64dd4600201" +source.commit = "c92d6ff85db8992066f49da176cf686acfd8fe0f" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm-gz.sha256.txt -source.sha256 = "58c8fcec6b932f7e602ac82cc28460aa557cabae1b66947ab3cb7334b87c35d4" +source.sha256 = "c33915998894dd36a2d1078f7e13717aa20760924c30640d7647d4791dd5f2ee" output.type = "tarball" [package.mg-ddm] @@ -594,10 +644,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "220dd026e83142b83bd93123f465a64dd4600201" +source.commit = "c92d6ff85db8992066f49da176cf686acfd8fe0f" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "69fa43393a77f19713c7d76a320064e3eb58b3ea0b2953d2079a5c3edebc172e" +source.sha256 = "be9d657ec22a69468b18f2b4d48e55621538eade8b8d3e367a1d8d5cc686cfbe" output.type = "zone" output.intermediate_only = true @@ -609,10 +659,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "220dd026e83142b83bd93123f465a64dd4600201" +source.commit = "c92d6ff85db8992066f49da176cf686acfd8fe0f" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mgd.sha256.txt -source.sha256 = "f1103de5dda4830eb653f4d555995d08c31253116448387399a77392c08dfb54" +source.sha256 = "e000485f7e04ac1cf9b3532b60bcf23598ab980331ba4f1c6788a7e95c1e9ef8" output.type = "zone" output.intermediate_only = true @@ -620,8 +670,8 @@ output.intermediate_only = true service_name = "lldp" source.type = "prebuilt" source.repo = "lldp" -source.commit = "30e5d89fae9190c69258ca77d5d5a1acec064742" -source.sha256 = "f58bfd1b77748544b5b1a99a07e52bab8dc5673b9bd3a745ebbfdd614d492328" +source.commit = "188f0f6d4c066f1515bd707050407cedd790fcf1" +source.sha256 = "132d0760be5208f60b58bcaed98fa6384b09f41dd5febf51970f5cbf46138ecf" output.type = "zone" output.intermediate_only = true @@ -660,8 +710,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "9811438cc91c6ec4e8a8ca12479c920bb25fec81" -source.sha256 = "4b09ea6d89af353fd4240a3cfde8655c555f6f42e05c6fc4a4e32724f86bb749" +source.commit = "21b16567f28e103f145cd18d53fac6958429c4ff" +source.sha256 = "3771671f0069b33143774e560eb258db99253dba9b78fa3ca974f02a8e1145b4" output.type = "zone" output.intermediate_only = true @@ -687,8 +737,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "9811438cc91c6ec4e8a8ca12479c920bb25fec81" -source.sha256 = "224ff076a3031d5b913e40084a48fce7bdd08e8ef1abd1ab74df0058963bb3b2" +source.commit = "21b16567f28e103f145cd18d53fac6958429c4ff" +source.sha256 = "ad02632713a57fe8c5371316320309e1fad52f0ce2f7e6f768859aa94dfbb1d9" output.type = "zone" output.intermediate_only = true @@ -707,8 +757,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "9811438cc91c6ec4e8a8ca12479c920bb25fec81" -source.sha256 = "66b04128c41ad9cd26ca3746d51fff5d295ca65f48e7aabee616026934cc8d5e" +source.commit = "21b16567f28e103f145cd18d53fac6958429c4ff" +source.sha256 = "23bca3873cdb0441cd18c0cf071b86d49755be06837479661876ac95d2f10f27" output.type = "zone" output.intermediate_only = true diff --git a/package/src/bin/omicron-package.rs b/package/src/bin/omicron-package.rs index b2b8703015..cd88345d0a 100644 --- a/package/src/bin/omicron-package.rs +++ b/package/src/bin/omicron-package.rs @@ -265,12 +265,19 @@ async fn do_target( format!("failed to create directory {}", target_dir) })?; match subcommand { - TargetCommand::Create { image, machine, switch, rack_topology } => { + TargetCommand::Create { + image, + machine, + switch, + rack_topology, + clickhouse_topology, + } => { let target = KnownTarget::new( image.clone(), machine.clone(), switch.clone(), rack_topology.clone(), + clickhouse_topology.clone(), )?; let path = get_single_target(&target_dir, name).await?; diff --git a/package/src/lib.rs b/package/src/lib.rs index 2009de9dfe..b37c1774fd 100644 --- a/package/src/lib.rs +++ b/package/src/lib.rs @@ -68,6 +68,21 @@ pub enum TargetCommand { /// fail in a single-sled environment. `single-sled` relaxes this /// requirement. rack_topology: crate::target::RackTopology, + + #[clap( + short, + long, + default_value = Some("single-node"), + required = false + )] + // TODO (https://github.com/oxidecomputer/omicron/issues/4148): Remove + // once single-node functionality is removed. + /// Specify whether clickhouse will be deployed as a replicated cluster + /// or single-node configuration. + /// + /// Replicated cluster configuration is an experimental feature to be + /// used only for testing. + clickhouse_topology: crate::target::ClickhouseTopology, }, /// List all existing targets List, diff --git a/package/src/target.rs b/package/src/target.rs index 589dba7870..6a6cbd32d8 100644 --- a/package/src/target.rs +++ b/package/src/target.rs @@ -62,6 +62,18 @@ pub enum RackTopology { SingleSled, } +/// Topology of the ClickHouse installation within the rack. +#[derive(Clone, Debug, strum::EnumString, strum::Display, ValueEnum)] +#[strum(serialize_all = "kebab-case")] +#[clap(rename_all = "kebab-case")] +pub enum ClickhouseTopology { + /// Use configurations suitable for a replicated ClickHouse cluster deployment. + ReplicatedCluster, + + /// Use configurations suitable for a single-node ClickHouse deployment. + SingleNode, +} + /// A strongly-typed variant of [Target]. #[derive(Clone, Debug)] pub struct KnownTarget { @@ -69,6 +81,7 @@ pub struct KnownTarget { machine: Option, switch: Option, rack_topology: RackTopology, + clickhouse_topology: ClickhouseTopology, } impl KnownTarget { @@ -77,6 +90,7 @@ impl KnownTarget { machine: Option, switch: Option, rack_topology: RackTopology, + clickhouse_topology: ClickhouseTopology, ) -> Result { if matches!(image, Image::Trampoline) { if machine.is_some() { @@ -93,7 +107,7 @@ impl KnownTarget { bail!("'switch=asic' is only valid with 'machine=gimlet'"); } - Ok(Self { image, machine, switch, rack_topology }) + Ok(Self { image, machine, switch, rack_topology, clickhouse_topology }) } } @@ -104,6 +118,7 @@ impl Default for KnownTarget { machine: Some(Machine::NonGimlet), switch: Some(Switch::Stub), rack_topology: RackTopology::MultiSled, + clickhouse_topology: ClickhouseTopology::SingleNode, } } } @@ -119,6 +134,10 @@ impl From for Target { map.insert("switch".to_string(), switch.to_string()); } map.insert("rack-topology".to_string(), kt.rack_topology.to_string()); + map.insert( + "clickhouse-topology".to_string(), + kt.clickhouse_topology.to_string(), + ); Target(map) } } @@ -140,6 +159,7 @@ impl std::str::FromStr for KnownTarget { let mut machine = None; let mut switch = None; let mut rack_topology = None; + let mut clickhouse_topology = None; for (k, v) in target.0.into_iter() { match k.as_str() { @@ -155,6 +175,9 @@ impl std::str::FromStr for KnownTarget { "rack-topology" => { rack_topology = Some(v.parse()?); } + "clickhouse-topology" => { + clickhouse_topology = Some(v.parse()?); + } _ => { bail!( "Unknown target key {k}\nValid keys include: [{}]", @@ -173,6 +196,7 @@ impl std::str::FromStr for KnownTarget { machine, switch, rack_topology.unwrap_or(RackTopology::MultiSled), + clickhouse_topology.unwrap_or(ClickhouseTopology::SingleNode), ) } } diff --git a/schema/all-zones-requests.json b/schema/all-zones-requests.json index 910feb8c74..4d20959ad1 100644 --- a/schema/all-zones-requests.json +++ b/schema/all-zones-requests.json @@ -353,6 +353,7 @@ } }, { + "description": "Type of clickhouse zone used for a single node clickhouse deployment", "type": "object", "required": [ "address", @@ -375,6 +376,7 @@ } }, { + "description": "A zone used to run a Clickhouse Keeper node\n\nKeepers are only used in replicated clickhouse setups", "type": "object", "required": [ "address", @@ -396,6 +398,29 @@ } } }, + { + "description": "A zone used to run a Clickhouse Server in a replicated deployment", + "type": "object", + "required": [ + "address", + "dataset", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "clickhouse_server" + ] + } + } + }, { "type": "object", "required": [ diff --git a/schema/crdb/add-clickhouse-server-enum-variants/up1.sql b/schema/crdb/add-clickhouse-server-enum-variants/up1.sql new file mode 100644 index 0000000000..9f1b4e419c --- /dev/null +++ b/schema/crdb/add-clickhouse-server-enum-variants/up1.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.service_kind ADD VALUE IF NOT EXISTS 'clickhouse_server' AFTER 'clickhouse_keeper'; diff --git a/schema/crdb/add-clickhouse-server-enum-variants/up2.sql b/schema/crdb/add-clickhouse-server-enum-variants/up2.sql new file mode 100644 index 0000000000..b94a4df0cf --- /dev/null +++ b/schema/crdb/add-clickhouse-server-enum-variants/up2.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.dataset_kind ADD VALUE IF NOT EXISTS 'clickhouse_server' AFTER 'clickhouse_keeper'; diff --git a/schema/crdb/add-clickhouse-server-enum-variants/up3.sql b/schema/crdb/add-clickhouse-server-enum-variants/up3.sql new file mode 100644 index 0000000000..874ccec8f2 --- /dev/null +++ b/schema/crdb/add-clickhouse-server-enum-variants/up3.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.zone_type ADD VALUE IF NOT EXISTS 'clickhouse_server' AFTER 'clickhouse_keeper'; diff --git a/schema/crdb/collapse_lldp_settings/up1.sql b/schema/crdb/collapse_lldp_settings/up1.sql new file mode 100644 index 0000000000..f7fb05d726 --- /dev/null +++ b/schema/crdb/collapse_lldp_settings/up1.sql @@ -0,0 +1,4 @@ +/* + * The old lldp_service_config_id is being replaced with lldp_link_config_id. + */ +ALTER TABLE omicron.public.switch_port_settings_link_config DROP COLUMN IF EXISTS lldp_service_config_id; diff --git a/schema/crdb/collapse_lldp_settings/up2.sql b/schema/crdb/collapse_lldp_settings/up2.sql new file mode 100644 index 0000000000..b2d884d068 --- /dev/null +++ b/schema/crdb/collapse_lldp_settings/up2.sql @@ -0,0 +1,4 @@ +/* + * Add a pointer to this link's LLDP config settings. + */ +ALTER TABLE omicron.public.switch_port_settings_link_config ADD COLUMN IF NOT EXISTS lldp_link_config_id UUID NOT NULL; diff --git a/schema/crdb/collapse_lldp_settings/up3.sql b/schema/crdb/collapse_lldp_settings/up3.sql new file mode 100644 index 0000000000..9c4ef8549b --- /dev/null +++ b/schema/crdb/collapse_lldp_settings/up3.sql @@ -0,0 +1,5 @@ +/* + * Drop the old lldp_service_config table, which has been incorporated into the + * new lldp_link_config. + */ +DROP TABLE IF EXISTS omicron.public.lldp_service_config; diff --git a/schema/crdb/collapse_lldp_settings/up4.sql b/schema/crdb/collapse_lldp_settings/up4.sql new file mode 100644 index 0000000000..3c8d4e86cf --- /dev/null +++ b/schema/crdb/collapse_lldp_settings/up4.sql @@ -0,0 +1,4 @@ +/* + * Drop the old lldp_config table, which has been replaced by lldp_link_config. + */ +DROP TABLE IF EXISTS omicron.public.lldp_config; diff --git a/schema/crdb/collapse_lldp_settings/up5.sql b/schema/crdb/collapse_lldp_settings/up5.sql new file mode 100644 index 0000000000..50dcd618d8 --- /dev/null +++ b/schema/crdb/collapse_lldp_settings/up5.sql @@ -0,0 +1,13 @@ +CREATE TABLE IF NOT EXISTS omicron.public.lldp_link_config ( + id UUID PRIMARY KEY, + enabled BOOL NOT NULL, + link_name STRING(63), + link_description STRING(512), + chassis_id STRING(63), + system_name STRING(63), + system_description STRING(512), + management_ip TEXT, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + time_deleted TIMESTAMPTZ +); diff --git a/schema/crdb/collapse_lldp_settings/up6.sql b/schema/crdb/collapse_lldp_settings/up6.sql new file mode 100644 index 0000000000..3b16af6f4b --- /dev/null +++ b/schema/crdb/collapse_lldp_settings/up6.sql @@ -0,0 +1 @@ +DROP INDEX IF EXISTS lldp_config_by_name; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 08f14dbaaf..7216211fb4 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -288,6 +288,7 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_switch_by_rack ON omicron.public.switch CREATE TYPE IF NOT EXISTS omicron.public.service_kind AS ENUM ( 'clickhouse', 'clickhouse_keeper', + 'clickhouse_server', 'cockroach', 'crucible', 'crucible_pantry', @@ -506,6 +507,7 @@ CREATE TYPE IF NOT EXISTS omicron.public.dataset_kind AS ENUM ( 'cockroach', 'clickhouse', 'clickhouse_keeper', + 'clickhouse_server', 'external_dns', 'internal_dns', 'zone_root', @@ -2663,40 +2665,30 @@ CREATE TYPE IF NOT EXISTS omicron.public.switch_link_speed AS ENUM ( CREATE TABLE IF NOT EXISTS omicron.public.switch_port_settings_link_config ( port_settings_id UUID, - lldp_service_config_id UUID NOT NULL, link_name TEXT, mtu INT4, fec omicron.public.switch_link_fec, speed omicron.public.switch_link_speed, autoneg BOOL NOT NULL DEFAULT false, + lldp_link_config_id UUID NOT NULL, PRIMARY KEY (port_settings_id, link_name) ); -CREATE TABLE IF NOT EXISTS omicron.public.lldp_service_config ( +CREATE TABLE IF NOT EXISTS omicron.public.lldp_link_config ( id UUID PRIMARY KEY, - lldp_config_id UUID, - enabled BOOL NOT NULL -); - -CREATE TABLE IF NOT EXISTS omicron.public.lldp_config ( - id UUID PRIMARY KEY, - name STRING(63) NOT NULL, - description STRING(512) NOT NULL, + enabled BOOL NOT NULL, + link_name STRING(63), + link_description STRING(512), + chassis_id STRING(63), + system_name STRING(63), + system_description STRING(612), + management_ip TEXT, time_created TIMESTAMPTZ NOT NULL, time_modified TIMESTAMPTZ NOT NULL, - time_deleted TIMESTAMPTZ, - chassis_id TEXT, - system_name TEXT, - system_description TEXT, - management_ip TEXT + time_deleted TIMESTAMPTZ ); -CREATE UNIQUE INDEX IF NOT EXISTS lldp_config_by_name ON omicron.public.lldp_config ( - name -) WHERE - time_deleted IS NULL; - CREATE TYPE IF NOT EXISTS omicron.public.switch_interface_kind AS ENUM ( 'primary', 'vlan', @@ -2728,6 +2720,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.switch_port_settings_route_config ( dst INET, gw INET, vid INT4, + local_pref INT8, /* TODO https://github.com/oxidecomputer/omicron/issues/3013 */ PRIMARY KEY (port_settings_id, interface_name, dst, gw) @@ -2803,6 +2796,10 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_bgp_config_by_name ON omicron.public.bg ) WHERE time_deleted IS NULL; +CREATE INDEX IF NOT EXISTS lookup_bgp_config_by_asn ON omicron.public.bgp_config ( + asn +) WHERE time_deleted IS NULL; + CREATE TABLE IF NOT EXISTS omicron.public.bgp_announce_set ( id UUID PRIMARY KEY, name STRING(63) NOT NULL, @@ -3249,6 +3246,7 @@ CREATE TYPE IF NOT EXISTS omicron.public.zone_type AS ENUM ( 'boundary_ntp', 'clickhouse', 'clickhouse_keeper', + 'clickhouse_server', 'cockroach_db', 'crucible', 'crucible_pantry', @@ -4305,7 +4303,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '89.0.0', NULL) + (TRUE, NOW(), NOW(), '93.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/lookup-bgp-config-by-asn/up01.sql b/schema/crdb/lookup-bgp-config-by-asn/up01.sql new file mode 100644 index 0000000000..e886015a29 --- /dev/null +++ b/schema/crdb/lookup-bgp-config-by-asn/up01.sql @@ -0,0 +1,3 @@ +CREATE INDEX IF NOT EXISTS lookup_bgp_config_by_asn ON omicron.public.bgp_config ( + asn +) WHERE time_deleted IS NULL; diff --git a/schema/crdb/route-local-pref/up.sql b/schema/crdb/route-local-pref/up.sql new file mode 100644 index 0000000000..d1051ccd0c --- /dev/null +++ b/schema/crdb/route-local-pref/up.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.switch_port_settings_route_config ADD COLUMN IF NOT EXISTS local_pref INT8; diff --git a/schema/rss-service-plan-v3.json b/schema/rss-service-plan-v3.json index 04cf473ccb..58f1877b80 100644 --- a/schema/rss-service-plan-v3.json +++ b/schema/rss-service-plan-v3.json @@ -587,6 +587,7 @@ } }, { + "description": "Type of clickhouse zone used for a single node clickhouse deployment", "type": "object", "required": [ "address", @@ -609,6 +610,7 @@ } }, { + "description": "A zone used to run a Clickhouse Keeper node\n\nKeepers are only used in replicated clickhouse setups", "type": "object", "required": [ "address", @@ -630,6 +632,29 @@ } } }, + { + "description": "A zone used to run a Clickhouse Server in a replicated deployment", + "type": "object", + "required": [ + "address", + "dataset", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "clickhouse_server" + ] + } + } + }, { "type": "object", "required": [ diff --git a/schema/rss-sled-plan.json b/schema/rss-sled-plan.json index a3d3425870..b0abc8c67e 100644 --- a/schema/rss-sled-plan.json +++ b/schema/rss-sled-plan.json @@ -604,6 +604,79 @@ } } }, + "LldpAdminStatus": { + "description": "To what extent should this port participate in LLDP", + "type": "string", + "enum": [ + "enabled", + "disabled", + "rx_only", + "tx_only" + ] + }, + "LldpPortConfig": { + "description": "Per-port LLDP configuration settings. Only the \"status\" setting is mandatory. All other fields have natural defaults or may be inherited from the switch.", + "type": "object", + "required": [ + "status" + ], + "properties": { + "chassis_id": { + "description": "Chassis ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be inherited from the switch-level settings.", + "type": [ + "string", + "null" + ] + }, + "management_addrs": { + "description": "Management IP addresses to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": [ + "array", + "null" + ], + "items": { + "type": "string", + "format": "ip" + } + }, + "port_description": { + "description": "Port description to advertise. If this is not set, no description will be advertised.", + "type": [ + "string", + "null" + ] + }, + "port_id": { + "description": "Port ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be set to the port name. e.g., qsfp0/0.", + "type": [ + "string", + "null" + ] + }, + "status": { + "description": "To what extent should this port participate in LLDP", + "allOf": [ + { + "$ref": "#/definitions/LldpAdminStatus" + } + ] + }, + "system_description": { + "description": "System description to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": [ + "string", + "null" + ] + }, + "system_name": { + "description": "System name to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": [ + "string", + "null" + ] + } + } + }, "Name": { "title": "A name unique within the parent collection", "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.", @@ -648,6 +721,17 @@ "$ref": "#/definitions/BgpPeerConfig" } }, + "lldp": { + "description": "LLDP configuration for this port", + "anyOf": [ + { + "$ref": "#/definitions/LldpPortConfig" + }, + { + "type": "null" + } + ] + }, "port": { "description": "Nmae of the port this config applies to.", "type": "string" @@ -894,6 +978,16 @@ } ] }, + "local_pref": { + "description": "The local preference associated with this route.", + "default": null, + "type": [ + "integer", + "null" + ], + "format": "uint32", + "minimum": 0.0 + }, "nexthop": { "description": "The nexthop/gateway address.", "type": "string", diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 52889d8fa2..2aefd8f464 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -69,6 +69,7 @@ serde.workspace = true serde_human_bytes.workspace = true serde_json = { workspace = true, features = ["raw_value"] } sha3.workspace = true +sled-agent-api.workspace = true sled-agent-client.workspace = true sled-agent-types.workspace = true sled-hardware.workspace = true @@ -103,8 +104,6 @@ guppy.workspace = true http.workspace = true hyper.workspace = true omicron-test-utils.workspace = true -openapi-lint.workspace = true -openapiv3.workspace = true pretty_assertions.workspace = true rcgen.workspace = true subprocess.workspace = true diff --git a/sled-agent/api/Cargo.toml b/sled-agent/api/Cargo.toml new file mode 100644 index 0000000000..046f17574b --- /dev/null +++ b/sled-agent/api/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "sled-agent-api" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +camino.workspace = true +dropshot.workspace = true +nexus-sled-agent-shared.workspace = true +omicron-common.workspace = true +omicron-uuid-kinds.workspace = true +omicron-workspace-hack.workspace = true +schemars.workspace = true +serde.workspace = true +sled-agent-types.workspace = true +sled-hardware-types.workspace = true +uuid.workspace = true diff --git a/sled-agent/api/src/lib.rs b/sled-agent/api/src/lib.rs new file mode 100644 index 0000000000..4f8f95a726 --- /dev/null +++ b/sled-agent/api/src/lib.rs @@ -0,0 +1,571 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::{collections::BTreeMap, time::Duration}; + +use camino::Utf8PathBuf; +use dropshot::{ + FreeformBody, HttpError, HttpResponseCreated, HttpResponseDeleted, + HttpResponseHeaders, HttpResponseOk, HttpResponseUpdatedNoContent, Path, + Query, RequestContext, StreamingBody, TypedBody, +}; +use nexus_sled_agent_shared::inventory::{ + Inventory, OmicronZonesConfig, SledRole, +}; +use omicron_common::{ + api::internal::{ + nexus::{DiskRuntimeState, SledInstanceState, UpdateArtifactId}, + shared::{ + ResolvedVpcRouteSet, ResolvedVpcRouteState, SledIdentifiers, + SwitchPorts, VirtualNetworkInterfaceHost, + }, + }, + disk::{ + DatasetsConfig, DatasetsManagementResult, DiskVariant, + DisksManagementResult, OmicronPhysicalDisksConfig, + }, +}; +use omicron_uuid_kinds::{InstanceUuid, ZpoolUuid}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use sled_agent_types::{ + boot_disk::{ + BootDiskOsWriteStatus, BootDiskPathParams, BootDiskUpdatePathParams, + BootDiskWriteStartQueryParams, + }, + bootstore::BootstoreStatus, + disk::DiskEnsureBody, + early_networking::EarlyNetworkConfig, + firewall_rules::VpcFirewallRulesEnsureBody, + instance::{ + InstanceEnsureBody, InstanceExternalIpBody, InstancePutStateBody, + InstancePutStateResponse, InstanceUnregisterResponse, + }, + sled::AddSledRequest, + time_sync::TimeSync, + zone_bundle::{ + BundleUtilization, CleanupContext, CleanupCount, PriorityOrder, + ZoneBundleId, ZoneBundleMetadata, + }, +}; +use uuid::Uuid; + +#[dropshot::api_description] +pub trait SledAgentApi { + type Context; + + /// List all zone bundles that exist, even for now-deleted zones. + #[endpoint { + method = GET, + path = "/zones/bundles", + }] + async fn zone_bundle_list_all( + rqctx: RequestContext, + query: Query, + ) -> Result>, HttpError>; + + /// List the zone bundles that are available for a running zone. + #[endpoint { + method = GET, + path = "/zones/bundles/{zone_name}", + }] + async fn zone_bundle_list( + rqctx: RequestContext, + params: Path, + ) -> Result>, HttpError>; + + /// Ask the sled agent to create a zone bundle. + #[endpoint { + method = POST, + path = "/zones/bundles/{zone_name}", + }] + async fn zone_bundle_create( + rqctx: RequestContext, + params: Path, + ) -> Result, HttpError>; + + /// Fetch the binary content of a single zone bundle. + #[endpoint { + method = GET, + path = "/zones/bundles/{zone_name}/{bundle_id}", + }] + async fn zone_bundle_get( + rqctx: RequestContext, + params: Path, + ) -> Result>, HttpError>; + + /// Delete a zone bundle. + #[endpoint { + method = DELETE, + path = "/zones/bundles/{zone_name}/{bundle_id}", + }] + async fn zone_bundle_delete( + rqctx: RequestContext, + params: Path, + ) -> Result; + + /// Return utilization information about all zone bundles. + #[endpoint { + method = GET, + path = "/zones/bundle-cleanup/utilization", + }] + async fn zone_bundle_utilization( + rqctx: RequestContext, + ) -> Result< + HttpResponseOk>, + HttpError, + >; + + /// Return context used by the zone-bundle cleanup task. + #[endpoint { + method = GET, + path = "/zones/bundle-cleanup/context", + }] + async fn zone_bundle_cleanup_context( + rqctx: RequestContext, + ) -> Result, HttpError>; + + /// Update context used by the zone-bundle cleanup task. + #[endpoint { + method = PUT, + path = "/zones/bundle-cleanup/context", + }] + async fn zone_bundle_cleanup_context_update( + rqctx: RequestContext, + body: TypedBody, + ) -> Result; + + /// Trigger a zone bundle cleanup. + #[endpoint { + method = POST, + path = "/zones/bundle-cleanup", + }] + async fn zone_bundle_cleanup( + rqctx: RequestContext, + ) -> Result>, HttpError>; + + /// List the zones that are currently managed by the sled agent. + #[endpoint { + method = GET, + path = "/zones", + }] + async fn zones_list( + rqctx: RequestContext, + ) -> Result>, HttpError>; + + #[endpoint { + method = GET, + path = "/omicron-zones", + }] + async fn omicron_zones_get( + rqctx: RequestContext, + ) -> Result, HttpError>; + + #[endpoint { + method = PUT, + path = "/omicron-zones", + }] + async fn omicron_zones_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result; + + /// Configures datasets to be used on this sled + #[endpoint { + method = PUT, + path = "/datasets", + }] + async fn datasets_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result, HttpError>; + + /// Lists the datasets that this sled is configured to use + #[endpoint { + method = GET, + path = "/datasets", + }] + async fn datasets_get( + rqctx: RequestContext, + ) -> Result, HttpError>; + + #[endpoint { + method = GET, + path = "/omicron-physical-disks", + }] + async fn omicron_physical_disks_get( + rqctx: RequestContext, + ) -> Result, HttpError>; + + #[endpoint { + method = PUT, + path = "/omicron-physical-disks", + }] + async fn omicron_physical_disks_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result, HttpError>; + + #[endpoint { + method = GET, + path = "/zpools", + }] + async fn zpools_get( + rqctx: RequestContext, + ) -> Result>, HttpError>; + + #[endpoint { + method = GET, + path = "/sled-role", + }] + async fn sled_role_get( + rqctx: RequestContext, + ) -> Result, HttpError>; + + /// Initializes a CockroachDB cluster + #[endpoint { + method = POST, + path = "/cockroachdb", + }] + async fn cockroachdb_init( + rqctx: RequestContext, + ) -> Result; + + #[endpoint { + method = PUT, + path = "/instances/{instance_id}", + }] + async fn instance_register( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result, HttpError>; + + #[endpoint { + method = DELETE, + path = "/instances/{instance_id}", + }] + async fn instance_unregister( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + #[endpoint { + method = PUT, + path = "/instances/{instance_id}/state", + }] + async fn instance_put_state( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result, HttpError>; + + #[endpoint { + method = GET, + path = "/instances/{instance_id}/state", + }] + async fn instance_get_state( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + #[endpoint { + method = PUT, + path = "/instances/{instance_id}/external-ip", + }] + async fn instance_put_external_ip( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result; + + #[endpoint { + method = DELETE, + path = "/instances/{instance_id}/external-ip", + }] + async fn instance_delete_external_ip( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result; + + #[endpoint { + method = PUT, + path = "/disks/{disk_id}", + }] + async fn disk_put( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result, HttpError>; + + #[endpoint { + method = POST, + path = "/update" + }] + async fn update_artifact( + rqctx: RequestContext, + artifact: TypedBody, + ) -> Result; + + /// Take a snapshot of a disk that is attached to an instance + #[endpoint { + method = POST, + path = "/instances/{instance_id}/disks/{disk_id}/snapshot", + }] + async fn instance_issue_disk_snapshot_request( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result< + HttpResponseOk, + HttpError, + >; + + #[endpoint { + method = PUT, + path = "/vpc/{vpc_id}/firewall/rules", + }] + async fn vpc_firewall_rules_put( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result; + + /// Create a mapping from a virtual NIC to a physical host + // Keep interface_id to maintain parity with the simulated sled agent, which + // requires interface_id on the path. + #[endpoint { + method = PUT, + path = "/v2p/", + }] + async fn set_v2p( + rqctx: RequestContext, + body: TypedBody, + ) -> Result; + + /// Delete a mapping from a virtual NIC to a physical host + // Keep interface_id to maintain parity with the simulated sled agent, which + // requires interface_id on the path. + #[endpoint { + method = DELETE, + path = "/v2p/", + }] + async fn del_v2p( + rqctx: RequestContext, + body: TypedBody, + ) -> Result; + + /// List v2p mappings present on sled + // Used by nexus background task + #[endpoint { + method = GET, + path = "/v2p/", + }] + async fn list_v2p( + rqctx: RequestContext, + ) -> Result>, HttpError>; + + #[endpoint { + method = GET, + path = "/timesync", + }] + async fn timesync_get( + rqctx: RequestContext, + ) -> Result, HttpError>; + + #[endpoint { + method = POST, + path = "/switch-ports", + }] + async fn uplink_ensure( + rqctx: RequestContext, + body: TypedBody, + ) -> Result; + + /// This API endpoint is only reading the local sled agent's view of the + /// bootstore. The boostore is a distributed data store that is eventually + /// consistent. Reads from individual nodes may not represent the latest state. + #[endpoint { + method = GET, + path = "/network-bootstore-config", + }] + async fn read_network_bootstore_config_cache( + rqctx: RequestContext, + ) -> Result, HttpError>; + + #[endpoint { + method = PUT, + path = "/network-bootstore-config", + }] + async fn write_network_bootstore_config( + rqctx: RequestContext, + body: TypedBody, + ) -> Result; + + /// Add a sled to a rack that was already initialized via RSS + #[endpoint { + method = PUT, + path = "/sleds" + }] + async fn sled_add( + rqctx: RequestContext, + body: TypedBody, + ) -> Result; + + /// Write a new host OS image to the specified boot disk + #[endpoint { + method = POST, + path = "/boot-disk/{boot_disk}/os/write", + }] + async fn host_os_write_start( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + body: StreamingBody, + ) -> Result; + + #[endpoint { + method = GET, + path = "/boot-disk/{boot_disk}/os/write/status", + }] + async fn host_os_write_status_get( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + /// Clear the status of a completed write of a new host OS + #[endpoint { + method = DELETE, + path = "/boot-disk/{boot_disk}/os/write/status/{update_id}", + }] + async fn host_os_write_status_delete( + rqctx: RequestContext, + path_params: Path, + ) -> Result; + + /// Fetch basic information about this sled + #[endpoint { + method = GET, + path = "/inventory", + }] + async fn inventory( + rqctx: RequestContext, + ) -> Result, HttpError>; + + /// Fetch sled identifiers + #[endpoint { + method = GET, + path = "/sled-identifiers", + }] + async fn sled_identifiers( + rqctx: RequestContext, + ) -> Result, HttpError>; + + /// Get the internal state of the local bootstore node + #[endpoint { + method = GET, + path = "/bootstore/status", + }] + async fn bootstore_status( + request_context: RequestContext, + ) -> Result, HttpError>; + + /// Get the current versions of VPC routing rules. + #[endpoint { + method = GET, + path = "/vpc-routes", + }] + async fn list_vpc_routes( + rqctx: RequestContext, + ) -> Result>, HttpError>; + + /// Update VPC routing rules. + #[endpoint { + method = PUT, + path = "/vpc-routes", + }] + async fn set_vpc_routes( + request_context: RequestContext, + body: TypedBody>, + ) -> Result; +} + +#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] +pub struct ZoneBundleFilter { + /// An optional substring used to filter zone bundles. + pub filter: Option, +} + +#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] +pub struct ZonePathParam { + /// The name of the zone. + pub zone_name: String, +} + +/// Parameters used to update the zone bundle cleanup context. +#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] +pub struct CleanupContextUpdate { + /// The new period on which automatic cleanups are run. + pub period: Option, + /// The priority ordering for preserving old zone bundles. + pub priority: Option, + /// The new limit on the underlying dataset quota allowed for bundles. + pub storage_limit: Option, +} + +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +pub struct Zpool { + pub id: ZpoolUuid, + pub disk_type: DiskType, +} + +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +pub enum DiskType { + U2, + M2, +} + +impl From for DiskType { + fn from(v: DiskVariant) -> Self { + match v { + DiskVariant::U2 => Self::U2, + DiskVariant::M2 => Self::M2, + } + } +} + +/// Path parameters for Instance requests (sled agent API) +#[derive(Deserialize, JsonSchema)] +pub struct InstancePathParam { + pub instance_id: InstanceUuid, +} + +/// Path parameters for Disk requests (sled agent API) +#[derive(Deserialize, JsonSchema)] +pub struct DiskPathParam { + pub disk_id: Uuid, +} + +#[derive(Deserialize, JsonSchema)] +pub struct InstanceIssueDiskSnapshotRequestPathParam { + pub instance_id: Uuid, + pub disk_id: Uuid, +} + +#[derive(Deserialize, JsonSchema)] +pub struct InstanceIssueDiskSnapshotRequestBody { + pub snapshot_id: Uuid, +} + +#[derive(Serialize, JsonSchema)] +pub struct InstanceIssueDiskSnapshotRequestResponse { + pub snapshot_id: Uuid, +} + +/// Path parameters for VPC requests (sled agent API) +#[derive(Deserialize, JsonSchema)] +pub struct VpcPathParam { + pub vpc_id: Uuid, +} diff --git a/sled-agent/src/bin/sled-agent.rs b/sled-agent/src/bin/sled-agent.rs index 6feeffd302..446103e982 100644 --- a/sled-agent/src/bin/sled-agent.rs +++ b/sled-agent/src/bin/sled-agent.rs @@ -6,20 +6,14 @@ use anyhow::anyhow; use camino::Utf8PathBuf; -use clap::{Parser, Subcommand}; +use clap::Parser; use omicron_common::cmd::fatal; use omicron_common::cmd::CmdError; use omicron_sled_agent::bootstrap::server as bootstrap_server; use omicron_sled_agent::bootstrap::RssAccessError; -use omicron_sled_agent::{config::Config as SledConfig, server as sled_server}; +use omicron_sled_agent::config::Config as SledConfig; use sled_agent_types::rack_init::RackInitializeRequest; -#[derive(Subcommand, Debug)] -enum OpenapiFlavor { - /// Generates sled agent openapi spec - Sled, -} - #[derive(Debug, Parser)] #[clap( name = "sled_agent", @@ -27,10 +21,6 @@ enum OpenapiFlavor { version )] enum Args { - /// Generates the OpenAPI specification. - #[command(subcommand)] - Openapi(OpenapiFlavor), - /// Runs the Sled Agent server. Run { #[clap(name = "CONFIG_FILE_PATH", action)] @@ -49,10 +39,6 @@ async fn do_run() -> Result<(), CmdError> { let args = Args::parse(); match args { - Args::Openapi(flavor) => match flavor { - OpenapiFlavor::Sled => sled_server::run_openapi() - .map_err(|err| CmdError::Failure(anyhow!(err))), - }, Args::Run { config_path } => { let config = SledConfig::from_file(&config_path) .map_err(|e| CmdError::Failure(anyhow!(e)))?; diff --git a/sled-agent/src/boot_disk_os_writer.rs b/sled-agent/src/boot_disk_os_writer.rs index a0798ed174..59e79c418f 100644 --- a/sled-agent/src/boot_disk_os_writer.rs +++ b/sled-agent/src/boot_disk_os_writer.rs @@ -5,8 +5,6 @@ //! This module provides `BootDiskOsWriter`, via which sled-agent can write new //! OS images to its boot disks. -use crate::http_entrypoints::BootDiskOsWriteProgress; -use crate::http_entrypoints::BootDiskOsWriteStatus; use async_trait::async_trait; use bytes::Bytes; use camino::Utf8PathBuf; @@ -14,10 +12,12 @@ use display_error_chain::DisplayErrorChain; use dropshot::HttpError; use futures::Stream; use futures::TryStreamExt; -use installinator_common::M2Slot; use installinator_common::RawDiskWriter; +use omicron_common::disk::M2Slot; use sha3::Digest; use sha3::Sha3_256; +use sled_agent_types::boot_disk::BootDiskOsWriteProgress; +use sled_agent_types::boot_disk::BootDiskOsWriteStatus; use slog::Logger; use std::collections::btree_map::Entry; use std::collections::BTreeMap; @@ -37,18 +37,16 @@ use tokio::sync::oneshot::error::TryRecvError; use tokio::sync::watch; use uuid::Uuid; -impl BootDiskOsWriteStatus { - fn from_result( - update_id: Uuid, - result: &Result<(), Arc>, - ) -> Self { - match result { - Ok(()) => Self::Complete { update_id }, - Err(err) => Self::Failed { - update_id, - message: DisplayErrorChain::new(err).to_string(), - }, - } +fn to_boot_disk_status( + update_id: Uuid, + result: &Result<(), Arc>, +) -> BootDiskOsWriteStatus { + match result { + Ok(()) => BootDiskOsWriteStatus::Complete { update_id }, + Err(err) => BootDiskOsWriteStatus::Failed { + update_id, + message: DisplayErrorChain::new(err).to_string(), + }, } } @@ -393,9 +391,7 @@ impl BootDiskOsWriter { match running.complete_rx.try_recv() { Ok(result) => { let update_id = running.update_id; - let status = BootDiskOsWriteStatus::from_result( - update_id, &result, - ); + let status = to_boot_disk_status(update_id, &result); slot.insert(WriterState::Complete(TaskCompleteState { update_id, result, @@ -413,9 +409,7 @@ impl BootDiskOsWriter { let update_id = running.update_id; let result = Err(Arc::new(BootDiskOsWriteError::TaskPanic)); - let status = BootDiskOsWriteStatus::from_result( - update_id, &result, - ); + let status = to_boot_disk_status(update_id, &result); slot.insert(WriterState::Complete(TaskCompleteState { update_id, result, @@ -425,10 +419,7 @@ impl BootDiskOsWriter { } } WriterState::Complete(complete) => { - BootDiskOsWriteStatus::from_result( - complete.update_id, - &complete.result, - ) + to_boot_disk_status(complete.update_id, &complete.result) } } } diff --git a/sled-agent/src/bootstrap/client.rs b/sled-agent/src/bootstrap/client.rs index 10f1ab6f25..bfdaf6e6d4 100644 --- a/sled-agent/src/bootstrap/client.rs +++ b/sled-agent/src/bootstrap/client.rs @@ -7,10 +7,10 @@ use super::params::version; use super::params::Request; use super::params::RequestEnvelope; -use super::params::StartSledAgentRequest; use super::views::SledAgentResponse; use crate::bootstrap::views::Response; use crate::bootstrap::views::ResponseEnvelope; +use sled_agent_types::sled::StartSledAgentRequest; use slog::Logger; use std::borrow::Cow; use std::io; diff --git a/sled-agent/src/bootstrap/early_networking.rs b/sled-agent/src/bootstrap/early_networking.rs index 95a1f873f6..abc88d67c1 100644 --- a/sled-agent/src/bootstrap/early_networking.rs +++ b/sled-agent/src/bootstrap/early_networking.rs @@ -631,7 +631,8 @@ impl<'a> EarlyNetworkSetup<'a> { IpAddr::V6(_) => continue, }; let vlan_id = r.vlan_id; - let sr = StaticRoute4 { nexthop, prefix, vlan_id }; + let local_pref = r.local_pref; + let sr = StaticRoute4 { nexthop, prefix, vlan_id, local_pref }; rq.routes.list.push(sr); } } diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index 9fe399419f..5aedf848fe 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -4,181 +4,9 @@ //! Request types for the bootstrap agent -use anyhow::Result; -use async_trait::async_trait; -use omicron_common::address::{self, Ipv6Subnet, SLED_PREFIX}; -use omicron_common::ledger::Ledgerable; -use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -use sha3::{Digest, Sha3_256}; +use sled_agent_types::sled::StartSledAgentRequest; use std::borrow::Cow; -use std::net::{IpAddr, Ipv6Addr, SocketAddrV6}; -use uuid::Uuid; - -/// A representation of a Baseboard ID as used in the inventory subsystem -/// This type is essentially the same as a `Baseboard` except it doesn't have a -/// revision or HW type (Gimlet, PC, Unknown). -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)] -pub struct BaseboardId { - /// Oxide Part Number - pub part_number: String, - /// Serial number (unique for a given part number) - pub serial_number: String, -} - -/// A request to Add a given sled after rack initialization has occurred -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)] -pub struct AddSledRequest { - pub sled_id: BaseboardId, - pub start_request: StartSledAgentRequest, -} - -// A wrapper around StartSledAgentRequestV0 that was used -// for the ledger format. -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)] -struct PersistentSledAgentRequest { - request: StartSledAgentRequestV0, -} - -/// The version of `StartSledAgentRequest` we originally shipped with. -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)] -pub struct StartSledAgentRequestV0 { - /// Uuid of the Sled Agent to be created. - pub id: Uuid, - - /// Uuid of the rack to which this sled agent belongs. - pub rack_id: Uuid, - - /// The external NTP servers to use - pub ntp_servers: Vec, - - /// The external DNS servers to use - pub dns_servers: Vec, - - /// Use trust quorum for key generation - pub use_trust_quorum: bool, - - // Note: The order of these fields is load bearing, because we serialize - // `SledAgentRequest`s as toml. `subnet` serializes as a TOML table, so it - // must come after non-table fields. - /// Portion of the IP space to be managed by the Sled Agent. - pub subnet: Ipv6Subnet, -} - -/// Configuration information for launching a Sled Agent. -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)] -pub struct StartSledAgentRequest { - /// The current generation number of data as stored in CRDB. - /// - /// The initial generation is set during RSS time and then only mutated - /// by Nexus. For now, we don't actually anticipate mutating this data, - /// but we leave open the possiblity. - pub generation: u64, - - // Which version of the data structure do we have. This is to help with - // deserialization and conversion in future updates. - pub schema_version: u32, - - // The actual configuration details - pub body: StartSledAgentRequestBody, -} - -/// This is the actual app level data of `StartSledAgentRequest` -/// -/// We nest it below the "header" of `generation` and `schema_version` so that -/// we can perform partial deserialization of `EarlyNetworkConfig` to only read -/// the header and defer deserialization of the body once we know the schema -/// version. This is possible via the use of [`serde_json::value::RawValue`] in -/// future (post-v1) deserialization paths. -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)] -pub struct StartSledAgentRequestBody { - /// Uuid of the Sled Agent to be created. - pub id: Uuid, - - /// Uuid of the rack to which this sled agent belongs. - pub rack_id: Uuid, - - /// Use trust quorum for key generation - pub use_trust_quorum: bool, - - /// Is this node an LRTQ learner node? - /// - /// We only put the node into learner mode if `use_trust_quorum` is also - /// true. - pub is_lrtq_learner: bool, - - /// Portion of the IP space to be managed by the Sled Agent. - pub subnet: Ipv6Subnet, -} - -impl StartSledAgentRequest { - pub fn sled_address(&self) -> SocketAddrV6 { - address::get_sled_address(self.body.subnet) - } - - pub fn switch_zone_ip(&self) -> Ipv6Addr { - address::get_switch_zone_address(self.body.subnet) - } - - /// Compute the sha3_256 digest of `self.rack_id` to use as a `salt` - /// for disk encryption. We don't want to include other values that are - /// consistent across sleds as it would prevent us from moving drives - /// between sleds. - pub fn hash_rack_id(&self) -> [u8; 32] { - // We know the unwrap succeeds as a Sha3_256 digest is 32 bytes - Sha3_256::digest(self.body.rack_id.as_bytes()) - .as_slice() - .try_into() - .unwrap() - } -} - -impl From for StartSledAgentRequest { - fn from(v0: StartSledAgentRequestV0) -> Self { - StartSledAgentRequest { - generation: 0, - schema_version: 1, - body: StartSledAgentRequestBody { - id: v0.id, - rack_id: v0.rack_id, - use_trust_quorum: v0.use_trust_quorum, - is_lrtq_learner: false, - subnet: v0.subnet, - }, - } - } -} - -#[async_trait] -impl Ledgerable for StartSledAgentRequest { - fn is_newer_than(&self, other: &Self) -> bool { - self.generation > other.generation - } - - fn generation_bump(&mut self) { - // DO NOTHING! - // - // Generation bumps must only ever come from nexus and will be encoded - // in the struct itself - } - - // Attempt to deserialize the v1 or v0 version and return - // the v1 version. - fn deserialize( - s: &str, - ) -> Result { - // Try to deserialize the latest version of the data structure (v1). If - // that succeeds we are done. - if let Ok(val) = serde_json::from_str::(s) { - return Ok(val); - } - - // We don't have the latest version. Try to deserialize v0 and then - // convert it to the latest version. - let v0 = serde_json::from_str::(s)?.request; - Ok(v0.into()) - } -} #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub enum Request<'a> { @@ -200,6 +28,10 @@ pub(super) mod version { mod tests { use std::net::Ipv6Addr; + use omicron_common::address::Ipv6Subnet; + use sled_agent_types::sled::StartSledAgentRequestBody; + use uuid::Uuid; + use super::*; #[test] @@ -227,34 +59,4 @@ mod tests { assert!(envelope == deserialized, "serialization round trip failed"); } - - #[test] - fn serialize_start_sled_agent_v0_deserialize_v1() { - let v0 = PersistentSledAgentRequest { - request: StartSledAgentRequestV0 { - id: Uuid::new_v4(), - rack_id: Uuid::new_v4(), - ntp_servers: vec![String::from("test.pool.example.com")], - dns_servers: vec!["1.1.1.1".parse().unwrap()], - use_trust_quorum: false, - subnet: Ipv6Subnet::new(Ipv6Addr::LOCALHOST), - }, - }; - let serialized = serde_json::to_string(&v0).unwrap(); - let expected = StartSledAgentRequest { - generation: 0, - schema_version: 1, - body: StartSledAgentRequestBody { - id: v0.request.id, - rack_id: v0.request.rack_id, - use_trust_quorum: v0.request.use_trust_quorum, - is_lrtq_learner: false, - subnet: v0.request.subnet, - }, - }; - - let actual: StartSledAgentRequest = - Ledgerable::deserialize(&serialized).unwrap(); - assert_eq!(expected, actual); - } } diff --git a/sled-agent/src/bootstrap/rss_handle.rs b/sled-agent/src/bootstrap/rss_handle.rs index 73f7537853..eee7eed085 100644 --- a/sled-agent/src/bootstrap/rss_handle.rs +++ b/sled-agent/src/bootstrap/rss_handle.rs @@ -5,7 +5,6 @@ //! sled-agent's handle to the Rack Setup Service it spawns use super::client as bootstrap_agent_client; -use super::params::StartSledAgentRequest; use crate::rack_setup::service::RackSetupService; use crate::rack_setup::service::SetupServiceError; use ::bootstrap_agent_client::Client as BootstrapAgentClient; @@ -16,6 +15,7 @@ use omicron_common::backoff::retry_notify; use omicron_common::backoff::retry_policy_local; use omicron_common::backoff::BackoffError; use sled_agent_types::rack_init::RackInitializeRequest; +use sled_agent_types::sled::StartSledAgentRequest; use sled_storage::manager::StorageHandle; use slog::Logger; use std::net::Ipv6Addr; diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index fa1d781a96..6681f396b4 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -6,7 +6,6 @@ use super::config::BOOTSTRAP_AGENT_HTTP_PORT; use super::http_entrypoints; -use super::params::StartSledAgentRequest; use super::views::SledAgentResponse; use super::BootstrapError; use super::RssAccessError; @@ -41,6 +40,7 @@ use omicron_ddm_admin_client::Client as DdmAdminClient; use omicron_ddm_admin_client::DdmError; use omicron_uuid_kinds::RackInitUuid; use sled_agent_types::rack_init::RackInitializeRequest; +use sled_agent_types::sled::StartSledAgentRequest; use sled_hardware::underlay; use sled_storage::dataset::CONFIG_DATASET; use sled_storage::manager::StorageHandle; @@ -714,11 +714,10 @@ impl Inner { #[cfg(test)] mod tests { - use crate::bootstrap::params::StartSledAgentRequestBody; - use super::*; use omicron_common::address::Ipv6Subnet; use omicron_test_utils::dev::test_setup_log; + use sled_agent_types::sled::StartSledAgentRequestBody; use std::net::Ipv6Addr; use uuid::Uuid; diff --git a/sled-agent/src/bootstrap/sprockets_server.rs b/sled-agent/src/bootstrap/sprockets_server.rs index 796883b578..8d92970d54 100644 --- a/sled-agent/src/bootstrap/sprockets_server.rs +++ b/sled-agent/src/bootstrap/sprockets_server.rs @@ -7,10 +7,10 @@ use crate::bootstrap::params::version; use crate::bootstrap::params::Request; use crate::bootstrap::params::RequestEnvelope; -use crate::bootstrap::params::StartSledAgentRequest; use crate::bootstrap::views::Response; use crate::bootstrap::views::ResponseEnvelope; use crate::bootstrap::views::SledAgentResponse; +use sled_agent_types::sled::StartSledAgentRequest; use slog::Logger; use std::io; use std::net::SocketAddrV6; diff --git a/sled-agent/src/common/disk.rs b/sled-agent/src/common/disk.rs index 54c56825eb..7bef28ac7c 100644 --- a/sled-agent/src/common/disk.rs +++ b/sled-agent/src/common/disk.rs @@ -4,12 +4,12 @@ //! Describes the states of network-attached storage. -use crate::params::DiskStateRequested; use chrono::Utc; use omicron_common::api::external::DiskState; use omicron_common::api::external::Error; use omicron_common::api::internal::nexus::DiskRuntimeState; use propolis_client::types::DiskAttachmentState as PropolisDiskState; +use sled_agent_types::disk::DiskStateRequested; use uuid::Uuid; /// Action to be taken on behalf of state transition. diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 11c998bf9e..e8285874da 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -5,26 +5,17 @@ //! HTTP entrypoint functions for the sled agent's exposed API use super::sled_agent::SledAgent; -use crate::bootstrap::params::AddSledRequest; -use crate::params::{ - BootstoreStatus, CleanupContextUpdate, DiskEnsureBody, InstanceEnsureBody, - InstanceExternalIpBody, InstancePutStateBody, InstancePutStateResponse, - InstanceUnregisterResponse, TimeSync, VpcFirewallRulesEnsureBody, - ZoneBundleId, ZoneBundleMetadata, Zpool, -}; use crate::sled_agent::Error as SledAgentError; -use crate::zone_bundle; +use crate::zone_bundle::BundleError; use bootstore::schemes::v0::NetworkConfig; use camino::Utf8PathBuf; use display_error_chain::DisplayErrorChain; use dropshot::{ - endpoint, ApiDescription, ApiDescriptionRegisterError, FreeformBody, - HttpError, HttpResponseCreated, HttpResponseDeleted, HttpResponseHeaders, - HttpResponseOk, HttpResponseUpdatedNoContent, Path, Query, RequestContext, - StreamingBody, TypedBody, + ApiDescription, FreeformBody, HttpError, HttpResponseCreated, + HttpResponseDeleted, HttpResponseHeaders, HttpResponseOk, + HttpResponseUpdatedNoContent, Path, Query, RequestContext, StreamingBody, + TypedBody, }; -use illumos_utils::opte::params::VirtualNetworkInterfaceHost; -use installinator_common::M2Slot; use nexus_sled_agent_shared::inventory::{ Inventory, OmicronZonesConfig, SledRole, }; @@ -34,1049 +25,714 @@ use omicron_common::api::internal::nexus::{ }; use omicron_common::api::internal::shared::{ ResolvedVpcRouteSet, ResolvedVpcRouteState, SledIdentifiers, SwitchPorts, + VirtualNetworkInterfaceHost, }; use omicron_common::disk::{ - DatasetsConfig, DiskVariant, OmicronPhysicalDisksConfig, + DatasetsConfig, DatasetsManagementResult, DiskVariant, + DisksManagementResult, M2Slot, OmicronPhysicalDisksConfig, }; use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; +use sled_agent_api::*; +use sled_agent_types::boot_disk::{ + BootDiskOsWriteStatus, BootDiskPathParams, BootDiskUpdatePathParams, + BootDiskWriteStartQueryParams, +}; +use sled_agent_types::bootstore::BootstoreStatus; +use sled_agent_types::disk::DiskEnsureBody; use sled_agent_types::early_networking::EarlyNetworkConfig; -use sled_storage::resources::DatasetsManagementResult; -use sled_storage::resources::DisksManagementResult; +use sled_agent_types::firewall_rules::VpcFirewallRulesEnsureBody; +use sled_agent_types::instance::{ + InstanceEnsureBody, InstanceExternalIpBody, InstancePutStateBody, + InstancePutStateResponse, InstanceUnregisterResponse, +}; +use sled_agent_types::sled::AddSledRequest; +use sled_agent_types::time_sync::TimeSync; +use sled_agent_types::zone_bundle::{ + BundleUtilization, CleanupContext, CleanupCount, CleanupPeriod, + StorageLimit, ZoneBundleId, ZoneBundleMetadata, +}; use std::collections::BTreeMap; -use uuid::Uuid; type SledApiDescription = ApiDescription; /// Returns a description of the sled agent API pub fn api() -> SledApiDescription { - fn register_endpoints( - api: &mut SledApiDescription, - ) -> Result<(), ApiDescriptionRegisterError> { - api.register(disk_put)?; - api.register(cockroachdb_init)?; - api.register(instance_issue_disk_snapshot_request)?; - api.register(instance_put_state)?; - api.register(instance_get_state)?; - api.register(instance_put_external_ip)?; - api.register(instance_delete_external_ip)?; - api.register(instance_register)?; - api.register(instance_unregister)?; - api.register(omicron_zones_get)?; - api.register(omicron_zones_put)?; - api.register(zones_list)?; - api.register(datasets_get)?; - api.register(datasets_put)?; - api.register(omicron_physical_disks_get)?; - api.register(omicron_physical_disks_put)?; - api.register(zone_bundle_list)?; - api.register(zone_bundle_list_all)?; - api.register(zone_bundle_create)?; - api.register(zone_bundle_get)?; - api.register(zone_bundle_delete)?; - api.register(zone_bundle_utilization)?; - api.register(zone_bundle_cleanup_context)?; - api.register(zone_bundle_cleanup_context_update)?; - api.register(zone_bundle_cleanup)?; - api.register(sled_role_get)?; - api.register(list_v2p)?; - api.register(set_v2p)?; - api.register(del_v2p)?; - api.register(timesync_get)?; - api.register(update_artifact)?; - api.register(vpc_firewall_rules_put)?; - api.register(zpools_get)?; - api.register(uplink_ensure)?; - api.register(read_network_bootstore_config_cache)?; - api.register(write_network_bootstore_config)?; - api.register(sled_add)?; - api.register(host_os_write_start)?; - api.register(host_os_write_status_get)?; - api.register(host_os_write_status_delete)?; - api.register(inventory)?; - api.register(sled_identifiers)?; - api.register(bootstore_status)?; - api.register(list_vpc_routes)?; - api.register(set_vpc_routes)?; - - Ok(()) - } - - let mut api = SledApiDescription::new(); - if let Err(err) = register_endpoints(&mut api) { - panic!("failed to register entrypoints: {}", err); - } - api + sled_agent_api_mod::api_description::() + .expect("registered entrypoints") } -#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] -struct ZonePathParam { - /// The name of the zone. - zone_name: String, -} +enum SledAgentImpl {} -#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] -struct ZoneBundleFilter { - /// An optional substring used to filter zone bundles. - filter: Option, -} - -/// List all zone bundles that exist, even for now-deleted zones. -#[endpoint { - method = GET, - path = "/zones/bundles", -}] -async fn zone_bundle_list_all( - rqctx: RequestContext, - query: Query, -) -> Result>, HttpError> { - let sa = rqctx.context(); - let filter = query.into_inner().filter; - sa.list_all_zone_bundles(filter.as_deref()) - .await - .map(HttpResponseOk) - .map_err(HttpError::from) -} +impl SledAgentApi for SledAgentImpl { + type Context = SledAgent; -/// List the zone bundles that are available for a running zone. -#[endpoint { - method = GET, - path = "/zones/bundles/{zone_name}", -}] -async fn zone_bundle_list( - rqctx: RequestContext, - params: Path, -) -> Result>, HttpError> { - let params = params.into_inner(); - let zone_name = params.zone_name; - let sa = rqctx.context(); - sa.list_zone_bundles(&zone_name) - .await - .map(HttpResponseOk) - .map_err(HttpError::from) -} + async fn zone_bundle_list_all( + rqctx: RequestContext, + query: Query, + ) -> Result>, HttpError> { + let sa = rqctx.context(); + let filter = query.into_inner().filter; + sa.list_all_zone_bundles(filter.as_deref()) + .await + .map(HttpResponseOk) + .map_err(HttpError::from) + } -/// Ask the sled agent to create a zone bundle. -#[endpoint { - method = POST, - path = "/zones/bundles/{zone_name}", -}] -async fn zone_bundle_create( - rqctx: RequestContext, - params: Path, -) -> Result, HttpError> { - let params = params.into_inner(); - let zone_name = params.zone_name; - let sa = rqctx.context(); - sa.create_zone_bundle(&zone_name) - .await - .map(HttpResponseCreated) - .map_err(HttpError::from) -} + async fn zone_bundle_list( + rqctx: RequestContext, + params: Path, + ) -> Result>, HttpError> { + let params = params.into_inner(); + let zone_name = params.zone_name; + let sa = rqctx.context(); + sa.list_zone_bundles(&zone_name) + .await + .map(HttpResponseOk) + .map_err(HttpError::from) + } -/// Fetch the binary content of a single zone bundle. -#[endpoint { - method = GET, - path = "/zones/bundles/{zone_name}/{bundle_id}", -}] -async fn zone_bundle_get( - rqctx: RequestContext, - params: Path, -) -> Result>, HttpError> { - let params = params.into_inner(); - let zone_name = params.zone_name; - let bundle_id = params.bundle_id; - let sa = rqctx.context(); - let Some(path) = sa - .get_zone_bundle_paths(&zone_name, &bundle_id) - .await - .map_err(HttpError::from)? - .into_iter() - .next() - else { - return Err(HttpError::for_not_found( - None, - format!( - "No zone bundle for zone '{}' with ID '{}'", - zone_name, bundle_id - ), - )); - }; - let f = tokio::fs::File::open(&path).await.map_err(|e| { - HttpError::for_internal_error(format!( - "failed to open zone bundle file at {}: {:?}", - path, e, - )) - })?; - let stream = hyper_staticfile::FileBytesStream::new(f); - let body = FreeformBody(stream.into_body()); - let mut response = HttpResponseHeaders::new_unnamed(HttpResponseOk(body)); - response.headers_mut().append( - http::header::CONTENT_TYPE, - "application/gzip".try_into().unwrap(), - ); - Ok(response) -} + async fn zone_bundle_create( + rqctx: RequestContext, + params: Path, + ) -> Result, HttpError> { + let params = params.into_inner(); + let zone_name = params.zone_name; + let sa = rqctx.context(); + sa.create_zone_bundle(&zone_name) + .await + .map(HttpResponseCreated) + .map_err(HttpError::from) + } -/// Delete a zone bundle. -#[endpoint { - method = DELETE, - path = "/zones/bundles/{zone_name}/{bundle_id}", -}] -async fn zone_bundle_delete( - rqctx: RequestContext, - params: Path, -) -> Result { - let params = params.into_inner(); - let zone_name = params.zone_name; - let bundle_id = params.bundle_id; - let sa = rqctx.context(); - let paths = sa - .get_zone_bundle_paths(&zone_name, &bundle_id) - .await - .map_err(HttpError::from)?; - if paths.is_empty() { - return Err(HttpError::for_not_found( - None, - format!( - "No zone bundle for zone '{}' with ID '{}'", - zone_name, bundle_id - ), - )); - }; - for path in paths.into_iter() { - tokio::fs::remove_file(&path).await.map_err(|e| { + async fn zone_bundle_get( + rqctx: RequestContext, + params: Path, + ) -> Result>, HttpError> + { + let params = params.into_inner(); + let zone_name = params.zone_name; + let bundle_id = params.bundle_id; + let sa = rqctx.context(); + let Some(path) = sa + .get_zone_bundle_paths(&zone_name, &bundle_id) + .await + .map_err(HttpError::from)? + .into_iter() + .next() + else { + return Err(HttpError::for_not_found( + None, + format!( + "No zone bundle for zone '{}' with ID '{}'", + zone_name, bundle_id + ), + )); + }; + let f = tokio::fs::File::open(&path).await.map_err(|e| { HttpError::for_internal_error(format!( - "Failed to delete zone bundle: {e}" + "failed to open zone bundle file at {}: {:?}", + path, e, )) })?; + let stream = hyper_staticfile::FileBytesStream::new(f); + let body = FreeformBody(stream.into_body()); + let mut response = + HttpResponseHeaders::new_unnamed(HttpResponseOk(body)); + response.headers_mut().append( + http::header::CONTENT_TYPE, + "application/gzip".try_into().unwrap(), + ); + Ok(response) } - Ok(HttpResponseDeleted()) -} -/// Return utilization information about all zone bundles. -#[endpoint { - method = GET, - path = "/zones/bundle-cleanup/utilization", -}] -async fn zone_bundle_utilization( - rqctx: RequestContext, -) -> Result< - HttpResponseOk>, - HttpError, -> { - let sa = rqctx.context(); - sa.zone_bundle_utilization() - .await - .map(HttpResponseOk) - .map_err(HttpError::from) -} + async fn zone_bundle_delete( + rqctx: RequestContext, + params: Path, + ) -> Result { + let params = params.into_inner(); + let zone_name = params.zone_name; + let bundle_id = params.bundle_id; + let sa = rqctx.context(); + let paths = sa + .get_zone_bundle_paths(&zone_name, &bundle_id) + .await + .map_err(HttpError::from)?; + if paths.is_empty() { + return Err(HttpError::for_not_found( + None, + format!( + "No zone bundle for zone '{}' with ID '{}'", + zone_name, bundle_id + ), + )); + }; + for path in paths.into_iter() { + tokio::fs::remove_file(&path).await.map_err(|e| { + HttpError::for_internal_error(format!( + "Failed to delete zone bundle: {e}" + )) + })?; + } + Ok(HttpResponseDeleted()) + } -/// Return context used by the zone-bundle cleanup task. -#[endpoint { - method = GET, - path = "/zones/bundle-cleanup/context", -}] -async fn zone_bundle_cleanup_context( - rqctx: RequestContext, -) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.zone_bundle_cleanup_context().await)) -} + async fn zone_bundle_utilization( + rqctx: RequestContext, + ) -> Result< + HttpResponseOk>, + HttpError, + > { + let sa = rqctx.context(); + sa.zone_bundle_utilization() + .await + .map(HttpResponseOk) + .map_err(HttpError::from) + } -/// Update context used by the zone-bundle cleanup task. -#[endpoint { - method = PUT, - path = "/zones/bundle-cleanup/context", -}] -async fn zone_bundle_cleanup_context_update( - rqctx: RequestContext, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let params = body.into_inner(); - let new_period = params - .period - .map(zone_bundle::CleanupPeriod::new) - .transpose() - .map_err(|e| HttpError::from(SledAgentError::from(e)))?; - let new_priority = params.priority; - let new_limit = params - .storage_limit - .map(zone_bundle::StorageLimit::new) - .transpose() - .map_err(|e| HttpError::from(SledAgentError::from(e)))?; - sa.update_zone_bundle_cleanup_context(new_period, new_limit, new_priority) + async fn zone_bundle_cleanup_context( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.zone_bundle_cleanup_context().await)) + } + + async fn zone_bundle_cleanup_context_update( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let params = body.into_inner(); + let new_period = + params.period.map(CleanupPeriod::new).transpose().map_err(|e| { + HttpError::from(SledAgentError::from(BundleError::from(e))) + })?; + let new_priority = params.priority; + let new_limit = + params.storage_limit.map(StorageLimit::new).transpose().map_err( + |e| HttpError::from(SledAgentError::from(BundleError::from(e))), + )?; + sa.update_zone_bundle_cleanup_context( + new_period, + new_limit, + new_priority, + ) .await .map(|_| HttpResponseUpdatedNoContent()) .map_err(HttpError::from) -} + } -/// Trigger a zone bundle cleanup. -#[endpoint { - method = POST, - path = "/zones/bundle-cleanup", -}] -async fn zone_bundle_cleanup( - rqctx: RequestContext, -) -> Result< - HttpResponseOk>, - HttpError, -> { - let sa = rqctx.context(); - sa.zone_bundle_cleanup().await.map(HttpResponseOk).map_err(HttpError::from) -} + async fn datasets_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let body_args = body.into_inner(); + let result = sa.datasets_ensure(body_args).await?; + Ok(HttpResponseOk(result)) + } -/// List the zones that are currently managed by the sled agent. -#[endpoint { - method = GET, - path = "/zones", -}] -async fn zones_list( - rqctx: RequestContext, -) -> Result>, HttpError> { - let sa = rqctx.context(); - sa.zones_list().await.map(HttpResponseOk).map_err(HttpError::from) -} + async fn datasets_get( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.datasets_config_list().await?)) + } -#[endpoint { - method = GET, - path = "/omicron-zones", -}] -async fn omicron_zones_get( - rqctx: RequestContext, -) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.omicron_zones_list().await?)) -} + async fn zone_bundle_cleanup( + rqctx: RequestContext, + ) -> Result>, HttpError> + { + let sa = rqctx.context(); + sa.zone_bundle_cleanup() + .await + .map(HttpResponseOk) + .map_err(HttpError::from) + } -/// Configures datasets to be used on this sled -#[endpoint { - method = PUT, - path = "/datasets", -}] -async fn datasets_put( - rqctx: RequestContext, - body: TypedBody, -) -> Result, HttpError> { - let sa = rqctx.context(); - let body_args = body.into_inner(); - let result = sa.datasets_ensure(body_args).await?; - Ok(HttpResponseOk(result)) -} + async fn zones_list( + rqctx: RequestContext, + ) -> Result>, HttpError> { + let sa = rqctx.context(); + sa.zones_list().await.map(HttpResponseOk).map_err(HttpError::from) + } -/// Lists the datasets that this sled is configured to use -#[endpoint { - method = GET, - path = "/datasets", -}] -async fn datasets_get( - rqctx: RequestContext, -) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.datasets_config_list().await?)) -} + async fn omicron_zones_get( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.omicron_zones_list().await?)) + } -#[endpoint { - method = PUT, - path = "/omicron-physical-disks", -}] -async fn omicron_physical_disks_put( - rqctx: RequestContext, - body: TypedBody, -) -> Result, HttpError> { - let sa = rqctx.context(); - let body_args = body.into_inner(); - let result = sa.omicron_physical_disks_ensure(body_args).await?; - Ok(HttpResponseOk(result)) -} + async fn omicron_zones_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let body_args = body.into_inner(); + sa.omicron_zones_ensure(body_args).await?; + Ok(HttpResponseUpdatedNoContent()) + } -#[endpoint { - method = GET, - path = "/omicron-physical-disks", -}] -async fn omicron_physical_disks_get( - rqctx: RequestContext, -) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.omicron_physical_disks_list().await?)) -} + async fn omicron_physical_disks_get( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.omicron_physical_disks_list().await?)) + } -#[endpoint { - method = PUT, - path = "/omicron-zones", -}] -async fn omicron_zones_put( - rqctx: RequestContext, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let body_args = body.into_inner(); - sa.omicron_zones_ensure(body_args).await?; - Ok(HttpResponseUpdatedNoContent()) -} + async fn omicron_physical_disks_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let body_args = body.into_inner(); + let result = sa.omicron_physical_disks_ensure(body_args).await?; + Ok(HttpResponseOk(result)) + } -#[endpoint { - method = GET, - path = "/zpools", -}] -async fn zpools_get( - rqctx: RequestContext, -) -> Result>, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.zpools_get().await)) -} + async fn zpools_get( + rqctx: RequestContext, + ) -> Result>, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.zpools_get().await)) + } -#[endpoint { - method = GET, - path = "/sled-role", -}] -async fn sled_role_get( - rqctx: RequestContext, -) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.get_role())) -} + async fn sled_role_get( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.get_role())) + } -/// Initializes a CockroachDB cluster -#[endpoint { - method = POST, - path = "/cockroachdb", -}] -async fn cockroachdb_init( - rqctx: RequestContext, -) -> Result { - let sa = rqctx.context(); - sa.cockroachdb_initialize().await?; - Ok(HttpResponseUpdatedNoContent()) -} + async fn cockroachdb_init( + rqctx: RequestContext, + ) -> Result { + let sa = rqctx.context(); + sa.cockroachdb_initialize().await?; + Ok(HttpResponseUpdatedNoContent()) + } -/// Path parameters for Instance requests (sled agent API) -#[derive(Deserialize, JsonSchema)] -struct InstancePathParam { - instance_id: InstanceUuid, -} + async fn instance_register( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + Ok(HttpResponseOk( + sa.instance_ensure_registered( + instance_id, + body_args.propolis_id, + body_args.hardware, + body_args.instance_runtime, + body_args.vmm_runtime, + body_args.propolis_addr, + body_args.metadata, + ) + .await?, + )) + } -#[endpoint { - method = PUT, - path = "/instances/{instance_id}", -}] -async fn instance_register( - rqctx: RequestContext, - path_params: Path, - body: TypedBody, -) -> Result, HttpError> { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - let body_args = body.into_inner(); - Ok(HttpResponseOk( - sa.instance_ensure_registered( - instance_id, - body_args.propolis_id, - body_args.hardware, - body_args.instance_runtime, - body_args.vmm_runtime, - body_args.propolis_addr, - body_args.metadata, - ) - .await?, - )) -} + async fn instance_unregister( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + Ok(HttpResponseOk(sa.instance_ensure_unregistered(instance_id).await?)) + } -#[endpoint { - method = DELETE, - path = "/instances/{instance_id}", -}] -async fn instance_unregister( - rqctx: RequestContext, - path_params: Path, -) -> Result, HttpError> { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - Ok(HttpResponseOk(sa.instance_ensure_unregistered(instance_id).await?)) -} + async fn instance_put_state( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + Ok(HttpResponseOk( + sa.instance_ensure_state(instance_id, body_args.state).await?, + )) + } -#[endpoint { - method = PUT, - path = "/instances/{instance_id}/state", -}] -async fn instance_put_state( - rqctx: RequestContext, - path_params: Path, - body: TypedBody, -) -> Result, HttpError> { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - let body_args = body.into_inner(); - Ok(HttpResponseOk( - sa.instance_ensure_state(instance_id, body_args.state).await?, - )) -} + async fn instance_get_state( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + Ok(HttpResponseOk(sa.instance_get_state(instance_id).await?)) + } -#[endpoint { - method = GET, - path = "/instances/{instance_id}/state", -}] -async fn instance_get_state( - rqctx: RequestContext, - path_params: Path, -) -> Result, HttpError> { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - Ok(HttpResponseOk(sa.instance_get_state(instance_id).await?)) -} + async fn instance_put_external_ip( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + sa.instance_put_external_ip(instance_id, &body_args).await?; + Ok(HttpResponseUpdatedNoContent()) + } -#[endpoint { - method = PUT, - path = "/instances/{instance_id}/external-ip", -}] -async fn instance_put_external_ip( - rqctx: RequestContext, - path_params: Path, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - let body_args = body.into_inner(); - sa.instance_put_external_ip(instance_id, &body_args).await?; - Ok(HttpResponseUpdatedNoContent()) -} + async fn instance_delete_external_ip( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + sa.instance_delete_external_ip(instance_id, &body_args).await?; + Ok(HttpResponseUpdatedNoContent()) + } -#[endpoint { - method = DELETE, - path = "/instances/{instance_id}/external-ip", -}] -async fn instance_delete_external_ip( - rqctx: RequestContext, - path_params: Path, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - let body_args = body.into_inner(); - sa.instance_delete_external_ip(instance_id, &body_args).await?; - Ok(HttpResponseUpdatedNoContent()) -} + async fn disk_put( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let disk_id = path_params.into_inner().disk_id; + let body_args = body.into_inner(); + Ok(HttpResponseOk( + sa.disk_ensure( + disk_id, + body_args.initial_runtime.clone(), + body_args.target.clone(), + ) + .await + .map_err(|e| Error::from(e))?, + )) + } -/// Path parameters for Disk requests (sled agent API) -#[derive(Deserialize, JsonSchema)] -struct DiskPathParam { - disk_id: Uuid, -} + async fn update_artifact( + rqctx: RequestContext, + artifact: TypedBody, + ) -> Result { + let sa = rqctx.context(); + sa.update_artifact(artifact.into_inner()).await.map_err(Error::from)?; + Ok(HttpResponseUpdatedNoContent()) + } -#[endpoint { - method = PUT, - path = "/disks/{disk_id}", -}] -async fn disk_put( - rqctx: RequestContext, - path_params: Path, - body: TypedBody, -) -> Result, HttpError> { - let sa = rqctx.context(); - let disk_id = path_params.into_inner().disk_id; - let body_args = body.into_inner(); - Ok(HttpResponseOk( - sa.disk_ensure( - disk_id, - body_args.initial_runtime.clone(), - body_args.target.clone(), + async fn instance_issue_disk_snapshot_request( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result< + HttpResponseOk, + HttpError, + > { + let sa = rqctx.context(); + let path_params = path_params.into_inner(); + let body = body.into_inner(); + + sa.instance_issue_disk_snapshot_request( + InstanceUuid::from_untyped_uuid(path_params.instance_id), + path_params.disk_id, + body.snapshot_id, ) - .await - .map_err(|e| Error::from(e))?, - )) -} - -#[endpoint { - method = POST, - path = "/update" -}] -async fn update_artifact( - rqctx: RequestContext, - artifact: TypedBody, -) -> Result { - let sa = rqctx.context(); - sa.update_artifact(artifact.into_inner()).await.map_err(Error::from)?; - Ok(HttpResponseUpdatedNoContent()) -} + .await?; -#[derive(Deserialize, JsonSchema)] -pub struct InstanceIssueDiskSnapshotRequestPathParam { - instance_id: Uuid, - disk_id: Uuid, -} + Ok(HttpResponseOk(InstanceIssueDiskSnapshotRequestResponse { + snapshot_id: body.snapshot_id, + })) + } -#[derive(Deserialize, JsonSchema)] -pub struct InstanceIssueDiskSnapshotRequestBody { - snapshot_id: Uuid, -} + async fn vpc_firewall_rules_put( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let _vpc_id = path_params.into_inner().vpc_id; + let body_args = body.into_inner(); -#[derive(Serialize, JsonSchema)] -pub struct InstanceIssueDiskSnapshotRequestResponse { - snapshot_id: Uuid, -} + sa.firewall_rules_ensure(body_args.vni, &body_args.rules[..]) + .await + .map_err(Error::from)?; -/// Take a snapshot of a disk that is attached to an instance -#[endpoint { - method = POST, - path = "/instances/{instance_id}/disks/{disk_id}/snapshot", -}] -async fn instance_issue_disk_snapshot_request( - rqctx: RequestContext, - path_params: Path, - body: TypedBody, -) -> Result, HttpError> -{ - let sa = rqctx.context(); - let path_params = path_params.into_inner(); - let body = body.into_inner(); - - sa.instance_issue_disk_snapshot_request( - InstanceUuid::from_untyped_uuid(path_params.instance_id), - path_params.disk_id, - body.snapshot_id, - ) - .await?; - - Ok(HttpResponseOk(InstanceIssueDiskSnapshotRequestResponse { - snapshot_id: body.snapshot_id, - })) -} + Ok(HttpResponseUpdatedNoContent()) + } -/// Path parameters for VPC requests (sled agent API) -#[derive(Deserialize, JsonSchema)] -struct VpcPathParam { - vpc_id: Uuid, -} + async fn set_v2p( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let body_args = body.into_inner(); -#[endpoint { - method = PUT, - path = "/vpc/{vpc_id}/firewall/rules", -}] -async fn vpc_firewall_rules_put( - rqctx: RequestContext, - path_params: Path, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let _vpc_id = path_params.into_inner().vpc_id; - let body_args = body.into_inner(); - - sa.firewall_rules_ensure(body_args.vni, &body_args.rules[..]) - .await - .map_err(Error::from)?; + sa.set_virtual_nic_host(&body_args).await.map_err(Error::from)?; - Ok(HttpResponseUpdatedNoContent()) -} + Ok(HttpResponseUpdatedNoContent()) + } -/// Create a mapping from a virtual NIC to a physical host -// Keep interface_id to maintain parity with the simulated sled agent, which -// requires interface_id on the path. -#[endpoint { - method = PUT, - path = "/v2p/", -}] -async fn set_v2p( - rqctx: RequestContext, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let body_args = body.into_inner(); - - sa.set_virtual_nic_host(&body_args).await.map_err(Error::from)?; - - Ok(HttpResponseUpdatedNoContent()) -} + async fn del_v2p( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let body_args = body.into_inner(); -/// Delete a mapping from a virtual NIC to a physical host -// Keep interface_id to maintain parity with the simulated sled agent, which -// requires interface_id on the path. -#[endpoint { - method = DELETE, - path = "/v2p/", -}] -async fn del_v2p( - rqctx: RequestContext, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let body_args = body.into_inner(); - - sa.unset_virtual_nic_host(&body_args).await.map_err(Error::from)?; - - Ok(HttpResponseUpdatedNoContent()) -} + sa.unset_virtual_nic_host(&body_args).await.map_err(Error::from)?; -/// List v2p mappings present on sled -// Used by nexus background task -#[endpoint { - method = GET, - path = "/v2p/", -}] -async fn list_v2p( - rqctx: RequestContext, -) -> Result>, HttpError> { - let sa = rqctx.context(); + Ok(HttpResponseUpdatedNoContent()) + } - let vnics = sa.list_virtual_nics().await.map_err(Error::from)?; + async fn list_v2p( + rqctx: RequestContext, + ) -> Result>, HttpError> + { + let sa = rqctx.context(); - Ok(HttpResponseOk(vnics)) -} + let vnics = sa.list_virtual_nics().await.map_err(Error::from)?; -#[endpoint { - method = GET, - path = "/timesync", -}] -async fn timesync_get( - rqctx: RequestContext, -) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.timesync_get().await.map_err(|e| Error::from(e))?)) -} + Ok(HttpResponseOk(vnics)) + } -#[endpoint { - method = POST, - path = "/switch-ports", -}] -async fn uplink_ensure( - rqctx: RequestContext, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - sa.ensure_scrimlet_host_ports(body.into_inner().uplinks).await?; - Ok(HttpResponseUpdatedNoContent()) -} + async fn timesync_get( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.timesync_get().await.map_err(|e| Error::from(e))?)) + } -/// This API endpoint is only reading the local sled agent's view of the -/// bootstore. The boostore is a distributed data store that is eventually -/// consistent. Reads from individual nodes may not represent the latest state. -#[endpoint { - method = GET, - path = "/network-bootstore-config", -}] -async fn read_network_bootstore_config_cache( - rqctx: RequestContext, -) -> Result, HttpError> { - let sa = rqctx.context(); - let bs = sa.bootstore(); - - let config = bs.get_network_config().await.map_err(|e| { - HttpError::for_internal_error(format!("failed to get bootstore: {e}")) - })?; - - let config = match config { - Some(config) => EarlyNetworkConfig::deserialize_bootstore_config( - &rqctx.log, &config, - ) - .map_err(|e| { - HttpError::for_internal_error(format!( - "deserialize early network config: {e}" - )) - })?, - None => { - return Err(HttpError::for_unavail( - None, - "early network config does not exist yet".into(), - )); - } - }; + async fn uplink_ensure( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + sa.ensure_scrimlet_host_ports(body.into_inner().uplinks).await?; + Ok(HttpResponseUpdatedNoContent()) + } - Ok(HttpResponseOk(config)) -} + async fn read_network_bootstore_config_cache( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let bs = sa.bootstore(); -#[endpoint { - method = PUT, - path = "/network-bootstore-config", -}] -async fn write_network_bootstore_config( - rqctx: RequestContext, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let bs = sa.bootstore(); - let config = body.into_inner(); - - bs.update_network_config(NetworkConfig::from(config)).await.map_err( - |e| { + let config = bs.get_network_config().await.map_err(|e| { HttpError::for_internal_error(format!( - "failed to write updated config to boot store: {e}" + "failed to get bootstore: {e}" )) - }, - )?; - - Ok(HttpResponseUpdatedNoContent()) -} + })?; -/// Add a sled to a rack that was already initialized via RSS -#[endpoint { - method = PUT, - path = "/sleds" -}] -async fn sled_add( - rqctx: RequestContext, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let request = body.into_inner(); - - // Perform some minimal validation - if request.start_request.body.use_trust_quorum - && !request.start_request.body.is_lrtq_learner - { - return Err(HttpError::for_bad_request( - None, - "New sleds must be LRTQ learners if trust quorum is in use" - .to_string(), - )); + let config = match config { + Some(config) => EarlyNetworkConfig::deserialize_bootstore_config( + &rqctx.log, &config, + ) + .map_err(|e| { + HttpError::for_internal_error(format!( + "deserialize early network config: {e}" + )) + })?, + None => { + return Err(HttpError::for_unavail( + None, + "early network config does not exist yet".into(), + )); + } + }; + + Ok(HttpResponseOk(config)) } - crate::sled_agent::sled_add( - sa.logger().clone(), - request.sled_id, - request.start_request, - ) - .await - .map_err(|e| { - let message = format!("Failed to add sled to rack cluster: {e}"); - HttpError { - status_code: http::StatusCode::INTERNAL_SERVER_ERROR, - error_code: None, - external_message: message.clone(), - internal_message: message, - } - })?; - Ok(HttpResponseUpdatedNoContent()) -} - -#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] -pub struct BootDiskPathParams { - pub boot_disk: M2Slot, -} - -#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] -pub struct BootDiskUpdatePathParams { - pub boot_disk: M2Slot, - pub update_id: Uuid, -} - -#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] -pub struct BootDiskWriteStartQueryParams { - pub update_id: Uuid, - // TODO do we already have sha2-256 hashes of the OS images, and if so - // should we use that instead? Another option is to use the external API - // `Digest` type, although it predates `serde_human_bytes` so just stores - // the hash as a `String`. - #[serde(with = "serde_human_bytes::hex_array")] - #[schemars(schema_with = "omicron_common::hex_schema::<32>")] - pub sha3_256_digest: [u8; 32], -} - -/// Write a new host OS image to the specified boot disk -#[endpoint { - method = POST, - path = "/boot-disk/{boot_disk}/os/write", -}] -async fn host_os_write_start( - request_context: RequestContext, - path_params: Path, - query_params: Query, - body: StreamingBody, -) -> Result { - let sa = request_context.context(); - let boot_disk = path_params.into_inner().boot_disk; - - // Find our corresponding disk. - let maybe_disk_path = - sa.storage().get_latest_disks().await.iter_managed().find_map( - |(_identity, disk)| { - // Synthetic disks panic if asked for their `slot()`, so filter - // them out first; additionally, filter out any non-M2 disks. - if disk.is_synthetic() || disk.variant() != DiskVariant::M2 { - return None; - } - - // Convert this M2 disk's slot to an M2Slot, and skip any that - // don't match the requested boot_disk. - let Ok(slot) = M2Slot::try_from(disk.slot()) else { - return None; - }; - if slot != boot_disk { - return None; - } - - let raw_devs_path = true; - Some(disk.boot_image_devfs_path(raw_devs_path)) + async fn write_network_bootstore_config( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let bs = sa.bootstore(); + let config = body.into_inner(); + + bs.update_network_config(NetworkConfig::from(config)).await.map_err( + |e| { + HttpError::for_internal_error(format!( + "failed to write updated config to boot store: {e}" + )) }, - ); + )?; - let disk_path = match maybe_disk_path { - Some(Ok(path)) => path, - Some(Err(err)) => { - let message = format!( - "failed to find devfs path for {boot_disk:?}: {}", - DisplayErrorChain::new(&err) - ); - return Err(HttpError { - status_code: http::StatusCode::SERVICE_UNAVAILABLE, - error_code: None, - external_message: message.clone(), - internal_message: message, - }); + Ok(HttpResponseUpdatedNoContent()) + } + + async fn sled_add( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let request = body.into_inner(); + + // Perform some minimal validation + if request.start_request.body.use_trust_quorum + && !request.start_request.body.is_lrtq_learner + { + return Err(HttpError::for_bad_request( + None, + "New sleds must be LRTQ learners if trust quorum is in use" + .to_string(), + )); } - None => { - let message = format!("no disk found for slot {boot_disk:?}",); - return Err(HttpError { - status_code: http::StatusCode::SERVICE_UNAVAILABLE, + + crate::sled_agent::sled_add( + sa.logger().clone(), + request.sled_id, + request.start_request, + ) + .await + .map_err(|e| { + let message = format!("Failed to add sled to rack cluster: {e}"); + HttpError { + status_code: http::StatusCode::INTERNAL_SERVER_ERROR, error_code: None, external_message: message.clone(), internal_message: message, - }); - } - }; - - let BootDiskWriteStartQueryParams { update_id, sha3_256_digest } = - query_params.into_inner(); - sa.boot_disk_os_writer() - .start_update( - boot_disk, - disk_path, - update_id, - sha3_256_digest, - body.into_stream(), - ) - .await - .map_err(|err| HttpError::from(&*err))?; - Ok(HttpResponseUpdatedNoContent()) -} + } + })?; + Ok(HttpResponseUpdatedNoContent()) + } -/// Current progress of an OS image being written to disk. -#[derive( - Debug, Clone, Copy, PartialEq, Eq, Deserialize, JsonSchema, Serialize, -)] -#[serde(tag = "state", rename_all = "snake_case")] -pub enum BootDiskOsWriteProgress { - /// The image is still being uploaded. - ReceivingUploadedImage { bytes_received: usize }, - /// The image is being written to disk. - WritingImageToDisk { bytes_written: usize }, - /// The image is being read back from disk for validation. - ValidatingWrittenImage { bytes_read: usize }, -} + async fn host_os_write_start( + request_context: RequestContext, + path_params: Path, + query_params: Query, + body: StreamingBody, + ) -> Result { + let sa = request_context.context(); + let boot_disk = path_params.into_inner().boot_disk; + + // Find our corresponding disk. + let maybe_disk_path = + sa.storage().get_latest_disks().await.iter_managed().find_map( + |(_identity, disk)| { + // Synthetic disks panic if asked for their `slot()`, so filter + // them out first; additionally, filter out any non-M2 disks. + if disk.is_synthetic() || disk.variant() != DiskVariant::M2 + { + return None; + } + + // Convert this M2 disk's slot to an M2Slot, and skip any that + // don't match the requested boot_disk. + let Ok(slot) = M2Slot::try_from(disk.slot()) else { + return None; + }; + if slot != boot_disk { + return None; + } + + let raw_devs_path = true; + Some(disk.boot_image_devfs_path(raw_devs_path)) + }, + ); -/// Status of an update to a boot disk OS. -#[derive(Debug, Clone, Deserialize, JsonSchema, Serialize)] -#[serde(tag = "status", rename_all = "snake_case")] -pub enum BootDiskOsWriteStatus { - /// No update has been started for this disk, or any previously-started - /// update has completed and had its status cleared. - NoUpdateStarted, - /// An update is currently running. - InProgress { update_id: Uuid, progress: BootDiskOsWriteProgress }, - /// The most recent update completed successfully. - Complete { update_id: Uuid }, - /// The most recent update failed. - Failed { update_id: Uuid, message: String }, -} + let disk_path = match maybe_disk_path { + Some(Ok(path)) => path, + Some(Err(err)) => { + let message = format!( + "failed to find devfs path for {boot_disk:?}: {}", + DisplayErrorChain::new(&err) + ); + return Err(HttpError { + status_code: http::StatusCode::SERVICE_UNAVAILABLE, + error_code: None, + external_message: message.clone(), + internal_message: message, + }); + } + None => { + let message = format!("no disk found for slot {boot_disk:?}",); + return Err(HttpError { + status_code: http::StatusCode::SERVICE_UNAVAILABLE, + error_code: None, + external_message: message.clone(), + internal_message: message, + }); + } + }; + + let BootDiskWriteStartQueryParams { update_id, sha3_256_digest } = + query_params.into_inner(); + sa.boot_disk_os_writer() + .start_update( + boot_disk, + disk_path, + update_id, + sha3_256_digest, + body.into_stream(), + ) + .await + .map_err(|err| HttpError::from(&*err))?; + Ok(HttpResponseUpdatedNoContent()) + } -/// Get the status of writing a new host OS -#[endpoint { - method = GET, - path = "/boot-disk/{boot_disk}/os/write/status", -}] -async fn host_os_write_status_get( - request_context: RequestContext, - path_params: Path, -) -> Result, HttpError> { - let sa = request_context.context(); - let boot_disk = path_params.into_inner().boot_disk; - let status = sa.boot_disk_os_writer().status(boot_disk); - Ok(HttpResponseOk(status)) -} + async fn host_os_write_status_get( + request_context: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let sa = request_context.context(); + let boot_disk = path_params.into_inner().boot_disk; + let status = sa.boot_disk_os_writer().status(boot_disk); + Ok(HttpResponseOk(status)) + } -/// Clear the status of a completed write of a new host OS -#[endpoint { - method = DELETE, - path = "/boot-disk/{boot_disk}/os/write/status/{update_id}", -}] -async fn host_os_write_status_delete( - request_context: RequestContext, - path_params: Path, -) -> Result { - let sa = request_context.context(); - let BootDiskUpdatePathParams { boot_disk, update_id } = - path_params.into_inner(); - sa.boot_disk_os_writer() - .clear_terminal_status(boot_disk, update_id) - .map_err(|err| HttpError::from(&err))?; - Ok(HttpResponseUpdatedNoContent()) -} + async fn host_os_write_status_delete( + request_context: RequestContext, + path_params: Path, + ) -> Result { + let sa = request_context.context(); + let BootDiskUpdatePathParams { boot_disk, update_id } = + path_params.into_inner(); + sa.boot_disk_os_writer() + .clear_terminal_status(boot_disk, update_id) + .map_err(|err| HttpError::from(&err))?; + Ok(HttpResponseUpdatedNoContent()) + } -/// Fetch basic information about this sled -#[endpoint { - method = GET, - path = "/inventory", -}] -async fn inventory( - request_context: RequestContext, -) -> Result, HttpError> { - let sa = request_context.context(); - Ok(HttpResponseOk(sa.inventory().await?)) -} + async fn inventory( + request_context: RequestContext, + ) -> Result, HttpError> { + let sa = request_context.context(); + Ok(HttpResponseOk(sa.inventory().await?)) + } -/// Fetch sled identifiers -#[endpoint { - method = GET, - path = "/sled-identifiers", -}] -async fn sled_identifiers( - request_context: RequestContext, -) -> Result, HttpError> { - Ok(HttpResponseOk(request_context.context().sled_identifiers())) -} + async fn sled_identifiers( + request_context: RequestContext, + ) -> Result, HttpError> { + Ok(HttpResponseOk(request_context.context().sled_identifiers())) + } -/// Get the internal state of the local bootstore node -#[endpoint { - method = GET, - path = "/bootstore/status", -}] -async fn bootstore_status( - request_context: RequestContext, -) -> Result, HttpError> { - let sa = request_context.context(); - let bootstore = sa.bootstore(); - let status = bootstore - .get_status() - .await - .map_err(|e| { - HttpError::from(omicron_common::api::external::Error::from(e)) - })? - .into(); - Ok(HttpResponseOk(status)) -} + async fn bootstore_status( + request_context: RequestContext, + ) -> Result, HttpError> { + let sa = request_context.context(); + let bootstore = sa.bootstore(); + let status = bootstore + .get_status() + .await + .map_err(|e| { + HttpError::from(omicron_common::api::external::Error::from(e)) + })? + .into(); + Ok(HttpResponseOk(status)) + } -/// Get the current versions of VPC routing rules. -#[endpoint { - method = GET, - path = "/vpc-routes", -}] -async fn list_vpc_routes( - request_context: RequestContext, -) -> Result>, HttpError> { - let sa = request_context.context(); - Ok(HttpResponseOk(sa.list_vpc_routes())) -} + async fn list_vpc_routes( + request_context: RequestContext, + ) -> Result>, HttpError> { + let sa = request_context.context(); + Ok(HttpResponseOk(sa.list_vpc_routes())) + } -/// Update VPC routing rules. -#[endpoint { - method = PUT, - path = "/vpc-routes", -}] -async fn set_vpc_routes( - request_context: RequestContext, - body: TypedBody>, -) -> Result { - let sa = request_context.context(); - sa.set_vpc_routes(body.into_inner())?; - Ok(HttpResponseUpdatedNoContent()) + async fn set_vpc_routes( + request_context: RequestContext, + body: TypedBody>, + ) -> Result { + let sa = request_context.context(); + sa.set_vpc_routes(body.into_inner())?; + Ok(HttpResponseUpdatedNoContent()) + } } diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 631f2b83f6..0bcbc97fd2 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -12,14 +12,7 @@ use crate::instance_manager::{ Error as ManagerError, InstanceManagerServices, InstanceTicket, }; use crate::metrics::MetricsRequestQueue; -use crate::nexus::NexusClientWithResolver; -use crate::params::ZoneBundleMetadata; -use crate::params::{InstanceExternalIpBody, ZoneBundleCause}; -use crate::params::{ - InstanceHardware, InstanceMetadata, InstanceMigrationTargetParams, - InstancePutStateResponse, InstanceStateRequested, - InstanceUnregisterResponse, VpcFirewallRule, -}; +use crate::nexus::NexusClient; use crate::profile::*; use crate::zone_bundle::BundleError; use crate::zone_bundle::ZoneBundler; @@ -36,7 +29,7 @@ use omicron_common::api::internal::nexus::{ SledInstanceState, VmmRuntimeState, }; use omicron_common::api::internal::shared::{ - NetworkInterface, SledIdentifiers, SourceNatConfig, + NetworkInterface, ResolvedVpcFirewallRule, SledIdentifiers, SourceNatConfig, }; use omicron_common::backoff; use omicron_common::zpool_name::ZpoolName; @@ -44,6 +37,8 @@ use omicron_uuid_kinds::{GenericUuid, InstanceUuid, PropolisUuid}; use propolis_client::Client as PropolisClient; use rand::prelude::IteratorRandom; use rand::SeedableRng; +use sled_agent_types::instance::*; +use sled_agent_types::zone_bundle::{ZoneBundleCause, ZoneBundleMetadata}; use sled_storage::dataset::ZONE_DATASET; use sled_storage::manager::StorageHandle; use slog::Logger; @@ -225,7 +220,7 @@ enum InstanceRequest { tx: oneshot::Sender, }, PutState { - state: crate::params::InstanceStateRequested, + state: InstanceStateRequested, tx: oneshot::Sender>, }, Terminate { @@ -337,7 +332,7 @@ struct InstanceRunner { source_nat: SourceNatConfig, ephemeral_ip: Option, floating_ips: Vec, - firewall_rules: Vec, + firewall_rules: Vec, dhcp_config: DhcpCfg, // Disk related properties @@ -349,7 +344,7 @@ struct InstanceRunner { running_state: Option, // Connection to Nexus - nexus_client: NexusClientWithResolver, + nexus_client: NexusClient, // Storage resources storage: StorageHandle, @@ -528,7 +523,6 @@ impl InstanceRunner { ); self.nexus_client - .client() .cpapi_instances_put( &self.id().into_untyped_uuid(), &state.into(), @@ -1159,7 +1153,7 @@ impl Instance { pub async fn put_state( &self, tx: oneshot::Sender>, - state: crate::params::InstanceStateRequested, + state: InstanceStateRequested, ) -> Result<(), Error> { self.tx .send(InstanceRequest::PutState { state, tx }) @@ -1306,7 +1300,7 @@ impl InstanceRunner { async fn put_state( &mut self, - state: crate::params::InstanceStateRequested, + state: InstanceStateRequested, ) -> Result { use propolis_client::types::InstanceStateRequested as PropolisRequest; let (propolis_state, next_published) = match state { @@ -1568,15 +1562,14 @@ mod tests { use super::*; use crate::fakes::nexus::{FakeNexusServer, ServerContext}; use crate::metrics; + use crate::nexus::make_nexus_client_with_port; use crate::vmm_reservoir::VmmReservoirManagerHandle; - use crate::zone_bundle::CleanupContext; use camino_tempfile::Utf8TempDir; use dns_server::TransientServer; use dropshot::HttpServer; use illumos_utils::dladm::MockDladm; use illumos_utils::dladm::__mock_MockDladm::__create_vnic::Context as MockDladmCreateVnicContext; use illumos_utils::dladm::__mock_MockDladm::__delete_vnic::Context as MockDladmDeleteVnicContext; - use illumos_utils::opte::params::DhcpConfig; use illumos_utils::svc::__wait_for_service::Context as MockWaitForServiceContext; use illumos_utils::zone::MockZones; use illumos_utils::zone::__mock_MockZones::__boot::Context as MockZonesBootContext; @@ -1588,8 +1581,9 @@ mod tests { use omicron_common::api::internal::nexus::{ InstanceProperties, InstanceRuntimeState, VmmState, }; - use omicron_common::api::internal::shared::SledIdentifiers; + use omicron_common::api::internal::shared::{DhcpConfig, SledIdentifiers}; use omicron_common::FileKv; + use sled_agent_types::zone_bundle::CleanupContext; use sled_storage::manager_test_harness::StorageManagerTestHarness; use std::net::Ipv6Addr; use std::net::SocketAddrV6; @@ -1634,7 +1628,7 @@ mod tests { } struct FakeNexusParts { - nexus_client: NexusClientWithResolver, + nexus_client: NexusClient, _nexus_server: HttpServer, state_rx: Receiver, _dns_server: TransientServer, @@ -1662,12 +1656,11 @@ mod tests { .unwrap(), ); - let nexus_client = - NexusClientWithResolver::new_from_resolver_with_port( - &log, - resolver, - _nexus_server.local_addr().port(), - ); + let nexus_client = make_nexus_client_with_port( + &log, + resolver, + _nexus_server.local_addr().port(), + ); Self { nexus_client, _nexus_server, state_rx, _dns_server } } @@ -1760,7 +1753,7 @@ mod tests { async fn instance_struct( log: &Logger, propolis_addr: SocketAddr, - nexus_client_with_resolver: NexusClientWithResolver, + nexus_client: NexusClient, storage_handle: StorageHandle, temp_dir: &String, ) -> (Instance, MetricsRx) { @@ -1774,7 +1767,7 @@ mod tests { let (services, rx) = fake_instance_manager_services( log, storage_handle, - nexus_client_with_resolver, + nexus_client, temp_dir, ); @@ -1850,7 +1843,7 @@ mod tests { fn fake_instance_manager_services( log: &Logger, storage_handle: StorageHandle, - nexus_client_with_resolver: NexusClientWithResolver, + nexus_client: NexusClient, temp_dir: &String, ) -> (InstanceManagerServices, MetricsRx) { let vnic_allocator = @@ -1869,7 +1862,7 @@ mod tests { let (metrics_queue, rx) = MetricsRequestQueue::for_test(); let services = InstanceManagerServices { - nexus_client: nexus_client_with_resolver, + nexus_client, vnic_allocator, port_manager, storage: storage_handle, diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index 1b2fb204d0..63164ed290 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -7,14 +7,7 @@ use crate::instance::propolis_zone_name; use crate::instance::Instance; use crate::metrics::MetricsRequestQueue; -use crate::nexus::NexusClientWithResolver; -use crate::params::InstanceExternalIpBody; -use crate::params::InstanceMetadata; -use crate::params::ZoneBundleMetadata; -use crate::params::{ - InstanceHardware, InstancePutStateResponse, InstanceStateRequested, - InstanceUnregisterResponse, -}; +use crate::nexus::NexusClient; use crate::vmm_reservoir::VmmReservoirManagerHandle; use crate::zone_bundle::BundleError; use crate::zone_bundle::ZoneBundler; @@ -32,6 +25,8 @@ use omicron_common::api::internal::nexus::VmmRuntimeState; use omicron_common::api::internal::shared::SledIdentifiers; use omicron_uuid_kinds::InstanceUuid; use omicron_uuid_kinds::PropolisUuid; +use sled_agent_types::instance::*; +use sled_agent_types::zone_bundle::ZoneBundleMetadata; use sled_storage::manager::StorageHandle; use sled_storage::resources::AllDisks; use slog::Logger; @@ -74,7 +69,7 @@ pub enum Error { } pub(crate) struct InstanceManagerServices { - pub nexus_client: NexusClientWithResolver, + pub nexus_client: NexusClient, pub vnic_allocator: VnicAllocator, pub port_manager: PortManager, pub storage: StorageHandle, @@ -103,7 +98,7 @@ impl InstanceManager { #[allow(clippy::too_many_arguments)] pub fn new( log: Logger, - nexus_client: NexusClientWithResolver, + nexus_client: NexusClient, etherstub: Etherstub, port_manager: PortManager, storage: StorageHandle, @@ -422,7 +417,7 @@ struct InstanceManagerRunner { terminate_tx: mpsc::UnboundedSender, terminate_rx: mpsc::UnboundedReceiver, - nexus_client: NexusClientWithResolver, + nexus_client: NexusClient, // TODO: If we held an object representing an enum of "Created OR Running" // instance, we could avoid the methods within "instance.rs" that panic diff --git a/sled-agent/src/long_running_tasks.rs b/sled-agent/src/long_running_tasks.rs index e920ffc3fc..68389ccf43 100644 --- a/sled-agent/src/long_running_tasks.rs +++ b/sled-agent/src/long_running_tasks.rs @@ -21,9 +21,10 @@ use crate::hardware_monitor::HardwareMonitor; use crate::services::ServiceManager; use crate::sled_agent::SledAgent; use crate::storage_monitor::{StorageMonitor, StorageMonitorHandle}; -use crate::zone_bundle::{CleanupContext, ZoneBundler}; +use crate::zone_bundle::ZoneBundler; use bootstore::schemes::v0 as bootstore; use key_manager::{KeyManager, StorageKeyRequester}; +use sled_agent_types::zone_bundle::CleanupContext; use sled_hardware::{HardwareManager, SledMode, UnparsedDisk}; use sled_storage::config::MountConfig; use sled_storage::disk::RawSyntheticDisk; diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index 9565e34b1f..26b0e3de59 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -2,12 +2,11 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -pub use nexus_client::Client as NexusClient; use omicron_common::api::external::Generation; use omicron_common::disk::DiskVariant; use crate::vmm_reservoir::VmmReservoirManagerHandle; -use internal_dns::resolver::{ResolveError, Resolver}; +use internal_dns::resolver::Resolver; use internal_dns::ServiceName; use nexus_client::types::SledAgentInfo; use omicron_common::address::NEXUS_INTERNAL_PORT; @@ -19,62 +18,33 @@ use tokio::sync::{broadcast, mpsc, oneshot, Notify}; use tokio::time::{interval, Duration, MissedTickBehavior}; use uuid::Uuid; -/// A thin wrapper over a progenitor-generated NexusClient. -/// -/// Also attaches the "DNS resolver" for historical reasons. -#[derive(Clone)] -pub struct NexusClientWithResolver { - client: NexusClient, +// Re-export the nexus_client::Client crate. (Use a type alias to be more +// rust-analyzer friendly.) +pub(crate) type NexusClient = nexus_client::Client; + +pub(crate) fn make_nexus_client( + log: &Logger, resolver: Arc, +) -> NexusClient { + make_nexus_client_with_port(log, resolver, NEXUS_INTERNAL_PORT) } -impl NexusClientWithResolver { - pub fn new( - log: &Logger, - resolver: Arc, - ) -> Result { - Ok(Self::new_from_resolver_with_port( - log, - resolver, - NEXUS_INTERNAL_PORT, - )) - } - - pub fn new_from_resolver_with_port( - log: &Logger, - resolver: Arc, - port: u16, - ) -> Self { - let client = reqwest::ClientBuilder::new() - .dns_resolver(resolver.clone()) - .build() - .expect("Failed to build client"); - - let dns_name = ServiceName::Nexus.srv_name(); - Self { - client: NexusClient::new_with_client( - &format!("http://{dns_name}:{port}"), - client, - log.new(o!("component" => "NexusClient")), - ), - resolver, - } - } - - /// Access the progenitor-based Nexus Client. - pub fn client(&self) -> &NexusClient { - &self.client - } - - /// Access the DNS resolver used by the Nexus Client. - /// - /// WARNING: If you're using this resolver to access an IP address of - /// another service, be aware that it might change if that service moves - /// around! Be cautious when accessing and persisting IP addresses of other - /// services. - pub fn resolver(&self) -> &Arc { - &self.resolver - } +pub(crate) fn make_nexus_client_with_port( + log: &Logger, + resolver: Arc, + port: u16, +) -> NexusClient { + let client = reqwest::ClientBuilder::new() + .dns_resolver(resolver) + .build() + .expect("Failed to build client"); + + let dns_name = ServiceName::Nexus.srv_name(); + NexusClient::new_with_client( + &format!("http://{dns_name}:{port}"), + client, + log.new(o!("component" => "NexusClient")), + ) } pub fn d2n_params( diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 8bf38bde0e..de0b086752 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -2,232 +2,10 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use crate::zone_bundle::PriorityOrder; -pub use crate::zone_bundle::ZoneBundleCause; -pub use crate::zone_bundle::ZoneBundleId; -pub use crate::zone_bundle::ZoneBundleMetadata; -pub use illumos_utils::opte::params::DhcpConfig; -pub use illumos_utils::opte::params::VpcFirewallRule; -pub use illumos_utils::opte::params::VpcFirewallRulesEnsureBody; use nexus_sled_agent_shared::inventory::{OmicronZoneConfig, OmicronZoneType}; -use omicron_common::api::internal::nexus::{ - DiskRuntimeState, InstanceProperties, InstanceRuntimeState, - SledInstanceState, VmmRuntimeState, -}; -use omicron_common::api::internal::shared::{ - NetworkInterface, SourceNatConfig, -}; -use omicron_common::disk::{DatasetKind, DatasetName, DiskVariant}; -use omicron_uuid_kinds::PropolisUuid; -use omicron_uuid_kinds::ZpoolUuid; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; +use omicron_common::disk::{DatasetKind, DatasetName}; pub use sled_hardware::DendriteAsic; -use sled_hardware_types::Baseboard; -use std::collections::BTreeSet; -use std::fmt::{Debug, Display, Formatter, Result as FormatResult}; -use std::net::{IpAddr, SocketAddr, SocketAddrV6}; -use std::time::Duration; -use uuid::Uuid; - -/// Used to request a Disk state change -#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize, JsonSchema)] -#[serde(rename_all = "lowercase", tag = "state", content = "instance")] -pub enum DiskStateRequested { - Detached, - Attached(Uuid), - Destroyed, - Faulted, -} - -impl DiskStateRequested { - /// Returns whether the requested state is attached to an Instance or not. - pub fn is_attached(&self) -> bool { - match self { - DiskStateRequested::Detached => false, - DiskStateRequested::Destroyed => false, - DiskStateRequested::Faulted => false, - - DiskStateRequested::Attached(_) => true, - } - } -} - -/// Sent from to a sled agent to establish the runtime state of a Disk -#[derive(Serialize, Deserialize, JsonSchema)] -pub struct DiskEnsureBody { - /// Last runtime state of the Disk known to Nexus (used if the agent has - /// never seen this Disk before). - pub initial_runtime: DiskRuntimeState, - /// requested runtime state of the Disk - pub target: DiskStateRequested, -} - -/// Describes the instance hardware. -#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] -pub struct InstanceHardware { - pub properties: InstanceProperties, - pub nics: Vec, - pub source_nat: SourceNatConfig, - /// Zero or more external IP addresses (either floating or ephemeral), - /// provided to an instance to allow inbound connectivity. - pub ephemeral_ip: Option, - pub floating_ips: Vec, - pub firewall_rules: Vec, - pub dhcp_config: DhcpConfig, - // TODO: replace `propolis_client::*` with locally-modeled request type - pub disks: Vec, - pub cloud_init_bytes: Option, -} - -/// Metadata used to track statistics about an instance. -/// -// NOTE: The instance ID is not here, since it's already provided in other -// pieces of the instance-related requests. It is pulled from there when -// publishing metrics for the instance. -#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] -pub struct InstanceMetadata { - pub silo_id: Uuid, - pub project_id: Uuid, -} - -/// The body of a request to ensure that a instance and VMM are known to a sled -/// agent. -#[derive(Serialize, Deserialize, JsonSchema)] -pub struct InstanceEnsureBody { - /// A description of the instance's virtual hardware and the initial runtime - /// state this sled agent should store for this incarnation of the instance. - pub hardware: InstanceHardware, - - /// The instance runtime state for the instance being registered. - pub instance_runtime: InstanceRuntimeState, - - /// The initial VMM runtime state for the VMM being registered. - pub vmm_runtime: VmmRuntimeState, - - /// The ID of the VMM being registered. This may not be the active VMM ID in - /// the instance runtime state (e.g. if the new VMM is going to be a - /// migration target). - pub propolis_id: PropolisUuid, - - /// The address at which this VMM should serve a Propolis server API. - pub propolis_addr: SocketAddr, - - /// Metadata used to track instance statistics. - pub metadata: InstanceMetadata, -} - -/// The body of a request to move a previously-ensured instance into a specific -/// runtime state. -#[derive(Serialize, Deserialize, JsonSchema)] -pub struct InstancePutStateBody { - /// The state into which the instance should be driven. - pub state: InstanceStateRequested, -} - -/// The response sent from a request to move an instance into a specific runtime -/// state. -#[derive(Debug, Serialize, Deserialize, JsonSchema)] -pub struct InstancePutStateResponse { - /// The current runtime state of the instance after handling the request to - /// change its state. If the instance's state did not change, this field is - /// `None`. - pub updated_runtime: Option, -} - -/// The response sent from a request to unregister an instance. -#[derive(Serialize, Deserialize, JsonSchema)] -pub struct InstanceUnregisterResponse { - /// The current state of the instance after handling the request to - /// unregister it. If the instance's state did not change, this field is - /// `None`. - pub updated_runtime: Option, -} - -/// Parameters used when directing Propolis to initialize itself via live -/// migration. -#[derive(Copy, Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct InstanceMigrationTargetParams { - /// The Propolis ID of the migration source. - pub src_propolis_id: Uuid, - - /// The address of the Propolis server that will serve as the migration - /// source. - pub src_propolis_addr: SocketAddr, -} - -/// Requestable running state of an Instance. -/// -/// A subset of [`omicron_common::api::external::InstanceState`]. -#[derive(Copy, Clone, Debug, Deserialize, Serialize, JsonSchema)] -#[serde(rename_all = "snake_case", tag = "type", content = "value")] -pub enum InstanceStateRequested { - /// Run this instance by migrating in from a previous running incarnation of - /// the instance. - MigrationTarget(InstanceMigrationTargetParams), - /// Start the instance if it is not already running. - Running, - /// Stop the instance. - Stopped, - /// Immediately reset the instance, as though it had stopped and immediately - /// began to run again. - Reboot, -} - -impl Display for InstanceStateRequested { - fn fmt(&self, f: &mut Formatter) -> FormatResult { - write!(f, "{}", self.label()) - } -} - -impl InstanceStateRequested { - fn label(&self) -> &str { - match self { - InstanceStateRequested::MigrationTarget(_) => "migrating in", - InstanceStateRequested::Running => "running", - InstanceStateRequested::Stopped => "stopped", - InstanceStateRequested::Reboot => "reboot", - } - } - - /// Returns true if the state represents a stopped Instance. - pub fn is_stopped(&self) -> bool { - match self { - InstanceStateRequested::MigrationTarget(_) => false, - InstanceStateRequested::Running => false, - InstanceStateRequested::Stopped => true, - InstanceStateRequested::Reboot => false, - } - } -} - -/// Instance runtime state to update for a migration. -#[derive(Copy, Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct InstanceMigrationSourceParams { - pub migration_id: Uuid, - pub dst_propolis_id: PropolisUuid, -} - -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] -pub enum DiskType { - U2, - M2, -} - -impl From for DiskType { - fn from(v: DiskVariant) -> Self { - match v { - DiskVariant::U2 => Self::U2, - DiskVariant::M2 => Self::M2, - } - } -} - -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] -pub struct Zpool { - pub id: ZpoolUuid, - pub disk_type: DiskType, -} +use std::net::SocketAddrV6; /// Extension trait for `OmicronZoneConfig`. /// @@ -275,6 +53,9 @@ pub(crate) trait OmicronZoneTypeExt { OmicronZoneType::ClickhouseKeeper { dataset, address, .. } => { Some((dataset, DatasetKind::ClickhouseKeeper, address)) } + OmicronZoneType::ClickhouseServer { dataset, address, .. } => { + Some((dataset, DatasetKind::ClickhouseServer, address)) + } OmicronZoneType::CockroachDb { dataset, address, .. } => { Some((dataset, DatasetKind::Cockroach, address)) } @@ -307,88 +88,3 @@ impl OmicronZoneTypeExt for OmicronZoneConfig { &self.zone_type } } - -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] -pub struct TimeSync { - /// The synchronization state of the sled, true when the system clock - /// and the NTP clock are in sync (to within a small window). - pub sync: bool, - /// The NTP reference ID. - pub ref_id: u32, - /// The NTP reference IP address. - pub ip_addr: IpAddr, - /// The NTP stratum (our upstream's stratum plus one). - pub stratum: u8, - /// The NTP reference time (i.e. what chrony thinks the current time is, not - /// necessarily the current system time). - pub ref_time: f64, - // This could be f32, but there is a problem with progenitor/typify - // where, although the f32 correctly becomes "float" (and not "double") in - // the API spec, that "float" gets converted back to f64 when generating - // the client. - /// The current offset between the NTP clock and system clock. - pub correction: f64, -} - -/// Parameters used to update the zone bundle cleanup context. -#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] -pub struct CleanupContextUpdate { - /// The new period on which automatic cleanups are run. - pub period: Option, - /// The priority ordering for preserving old zone bundles. - pub priority: Option, - /// The new limit on the underlying dataset quota allowed for bundles. - pub storage_limit: Option, -} - -/// Used to dynamically update external IPs attached to an instance. -#[derive( - Copy, Clone, Debug, Eq, PartialEq, Hash, Deserialize, JsonSchema, Serialize, -)] -#[serde(rename_all = "snake_case", tag = "type", content = "value")] -pub enum InstanceExternalIpBody { - Ephemeral(IpAddr), - Floating(IpAddr), -} - -#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] -pub struct EstablishedConnection { - baseboard: Baseboard, - addr: SocketAddrV6, -} - -impl From<(Baseboard, SocketAddrV6)> for EstablishedConnection { - fn from(value: (Baseboard, SocketAddrV6)) -> Self { - EstablishedConnection { baseboard: value.0, addr: value.1 } - } -} - -#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] -pub struct BootstoreStatus { - pub fsm_ledger_generation: u64, - pub network_config_ledger_generation: Option, - pub fsm_state: String, - pub peers: BTreeSet, - pub established_connections: Vec, - pub accepted_connections: BTreeSet, - pub negotiating_connections: BTreeSet, -} - -impl From for BootstoreStatus { - fn from(value: bootstore::schemes::v0::Status) -> Self { - BootstoreStatus { - fsm_ledger_generation: value.fsm_ledger_generation, - network_config_ledger_generation: value - .network_config_ledger_generation, - fsm_state: value.fsm_state.to_string(), - peers: value.peers, - established_connections: value - .connections - .into_iter() - .map(EstablishedConnection::from) - .collect(), - accepted_connections: value.accepted_connections, - negotiating_connections: value.negotiating_connections, - } - } -} diff --git a/sled-agent/src/probe_manager.rs b/sled-agent/src/probe_manager.rs index fa2e9dfa3d..42186f66e9 100644 --- a/sled-agent/src/probe_manager.rs +++ b/sled-agent/src/probe_manager.rs @@ -1,9 +1,8 @@ use crate::metrics::MetricsRequestQueue; -use crate::nexus::NexusClientWithResolver; +use crate::nexus::NexusClient; use anyhow::{anyhow, Result}; use illumos_utils::dladm::Etherstub; use illumos_utils::link::VnicAllocator; -use illumos_utils::opte::params::VpcFirewallRule; use illumos_utils::opte::{DhcpCfg, PortCreateParams, PortManager}; use illumos_utils::running_zone::{RunningZone, ZoneBuilderFactory}; use illumos_utils::zone::Zones; @@ -14,7 +13,9 @@ use omicron_common::api::external::{ Generation, VpcFirewallRuleAction, VpcFirewallRuleDirection, VpcFirewallRulePriority, VpcFirewallRuleStatus, }; -use omicron_common::api::internal::shared::NetworkInterface; +use omicron_common::api::internal::shared::{ + NetworkInterface, ResolvedVpcFirewallRule, +}; use rand::prelude::IteratorRandom; use rand::SeedableRng; use sled_storage::dataset::ZONE_DATASET; @@ -54,7 +55,7 @@ struct RunningProbes { pub(crate) struct ProbeManagerInner { join_handle: Mutex>>, - nexus_client: NexusClientWithResolver, + nexus_client: NexusClient, log: Logger, sled_id: Uuid, vnic_allocator: VnicAllocator, @@ -67,7 +68,7 @@ pub(crate) struct ProbeManagerInner { impl ProbeManager { pub(crate) fn new( sled_id: Uuid, - nexus_client: NexusClientWithResolver, + nexus_client: NexusClient, etherstub: Etherstub, storage: StorageHandle, port_manager: PortManager, @@ -248,7 +249,6 @@ impl ProbeManagerInner { if n_added > 0 { if let Err(e) = self .nexus_client - .client() .bgtask_activate(&BackgroundTasksActivateRequest { bgtask_names: vec!["vpc_route_manager".into()], }) @@ -309,7 +309,7 @@ impl ProbeManagerInner { source_nat: None, ephemeral_ip: Some(eip.ip), floating_ips: &[], - firewall_rules: &[VpcFirewallRule { + firewall_rules: &[ResolvedVpcFirewallRule { status: VpcFirewallRuleStatus::Enabled, direction: VpcFirewallRuleDirection::Inbound, targets: vec![nic.clone()], @@ -439,7 +439,6 @@ impl ProbeManagerInner { async fn target_state(self: &Arc) -> Result> { Ok(self .nexus_client - .client() .probes_get( &self.sled_id, None, //limit diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index 24a18d5561..60a7690569 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -4,7 +4,6 @@ //! Plan generation for "where should services be initialized". -use crate::bootstrap::params::StartSledAgentRequest; use camino::Utf8PathBuf; use dns_service_client::types::DnsConfigParams; use illumos_utils::zpool::ZpoolName; @@ -15,10 +14,8 @@ use nexus_sled_agent_shared::inventory::{ }; use omicron_common::address::{ get_sled_address, get_switch_zone_address, Ipv6Subnet, ReservedRackSubnet, - BOUNDARY_NTP_REDUNDANCY, COCKROACHDB_REDUNDANCY, DENDRITE_PORT, - DNS_HTTP_PORT, DNS_PORT, DNS_REDUNDANCY, MAX_DNS_REDUNDANCY, MGD_PORT, - MGS_PORT, NEXUS_REDUNDANCY, NTP_PORT, NUM_SOURCE_NAT_PORTS, - RSS_RESERVED_ADDRESSES, SLED_PREFIX, + DENDRITE_PORT, DNS_HTTP_PORT, DNS_PORT, MGD_PORT, MGS_PORT, NTP_PORT, + NUM_SOURCE_NAT_PORTS, RSS_RESERVED_ADDRESSES, SLED_PREFIX, }; use omicron_common::api::external::{Generation, MacAddr, Vni}; use omicron_common::api::internal::shared::{ @@ -33,6 +30,10 @@ use omicron_common::disk::{ OmicronPhysicalDiskConfig, OmicronPhysicalDisksConfig, }; use omicron_common::ledger::{self, Ledger, Ledgerable}; +use omicron_common::policy::{ + BOUNDARY_NTP_REDUNDANCY, COCKROACHDB_REDUNDANCY, DNS_REDUNDANCY, + MAX_DNS_REDUNDANCY, NEXUS_REDUNDANCY, +}; use omicron_uuid_kinds::{GenericUuid, OmicronZoneUuid, SledUuid, ZpoolUuid}; use rand::prelude::SliceRandom; use schemars::JsonSchema; @@ -41,6 +42,7 @@ use sled_agent_client::{ types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError, }; use sled_agent_types::rack_init::RackInitializeRequest as Config; +use sled_agent_types::sled::StartSledAgentRequest; use sled_storage::dataset::CONFIG_DATASET; use sled_storage::manager::StorageHandle; use slog::Logger; @@ -55,12 +57,25 @@ use uuid::Uuid; const OXIMETER_COUNT: usize = 1; // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove // when Nexus provisions Clickhouse. -// TODO(https://github.com/oxidecomputer/omicron/issues/4000): Set to 2 once we enable replicated ClickHouse +// TODO(https://github.com/oxidecomputer/omicron/issues/4000): Use +// omicron_common::policy::CLICKHOUSE_SERVER_REDUNDANCY once we enable +// replicated ClickHouse. +// Set to 0 when testing replicated ClickHouse. const CLICKHOUSE_COUNT: usize = 1; // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove // when Nexus provisions Clickhouse keeper. -// TODO(https://github.com/oxidecomputer/omicron/issues/4000): Set to 3 once we enable replicated ClickHouse +// TODO(https://github.com/oxidecomputer/omicron/issues/4000): Use +// omicron_common::policy::CLICKHOUSE_KEEPER_REDUNDANCY once we enable +// replicated ClickHouse +// Set to 3 when testing replicated ClickHouse. const CLICKHOUSE_KEEPER_COUNT: usize = 0; +// TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove +// when Nexus provisions Clickhouse server. +// TODO(https://github.com/oxidecomputer/omicron/issues/4000): Use +// omicron_common::policy::CLICKHOUSE_SERVER_REDUNDANCY once we enable +// replicated ClickHouse. +// Set to 2 when testing replicated ClickHouse +const CLICKHOUSE_SERVER_COUNT: usize = 0; // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove. // when Nexus provisions Crucible. const MINIMUM_U2_COUNT: usize = 3; @@ -626,6 +641,47 @@ impl Plan { }); } + // Provision Clickhouse server zones, continuing to stripe across sleds. + // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove + // Temporary linter rule until replicated Clickhouse is enabled + #[allow(clippy::reversed_empty_ranges)] + for _ in 0..CLICKHOUSE_SERVER_COUNT { + let sled = { + let which_sled = + sled_allocator.next().ok_or(PlanError::NotEnoughSleds)?; + &mut sled_info[which_sled] + }; + let id = OmicronZoneUuid::new_v4(); + let ip = sled.addr_alloc.next().expect("Not enough addrs"); + // TODO: This may need to be a different port if/when to have single node + // and replicated running side by side as per stage 1 of RFD 468. + let port = omicron_common::address::CLICKHOUSE_PORT; + let address = SocketAddrV6::new(ip, port, 0, 0); + dns_builder + .host_zone_with_one_backend( + id, + ip, + ServiceName::ClickhouseServer, + port, + ) + .unwrap(); + let dataset_name = + sled.alloc_dataset_from_u2s(DatasetKind::ClickhouseServer)?; + let filesystem_pool = Some(dataset_name.pool().clone()); + sled.request.zones.push(OmicronZoneConfig { + // TODO-cleanup use TypedUuid everywhere + id: id.into_untyped_uuid(), + underlay_address: ip, + zone_type: OmicronZoneType::ClickhouseServer { + address, + dataset: OmicronZoneDataset { + pool_name: dataset_name.pool().clone(), + }, + }, + filesystem_pool, + }); + } + // Provision Clickhouse Keeper zones, continuing to stripe across sleds. // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove // Temporary linter rule until replicated Clickhouse is enabled diff --git a/sled-agent/src/rack_setup/plan/sled.rs b/sled-agent/src/rack_setup/plan/sled.rs index 3d5b90a22d..32906d0195 100644 --- a/sled-agent/src/rack_setup/plan/sled.rs +++ b/sled-agent/src/rack_setup/plan/sled.rs @@ -4,16 +4,15 @@ //! Plan generation for "how should sleds be initialized". -use crate::bootstrap::params::StartSledAgentRequestBody; -use crate::bootstrap::{ - config::BOOTSTRAP_AGENT_RACK_INIT_PORT, params::StartSledAgentRequest, -}; +use crate::bootstrap::config::BOOTSTRAP_AGENT_RACK_INIT_PORT; use camino::Utf8PathBuf; use omicron_common::ledger::{self, Ledger, Ledgerable}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use sled_agent_types::rack_init::back_compat::RackInitializeRequestV1 as ConfigV1; use sled_agent_types::rack_init::RackInitializeRequest as Config; +use sled_agent_types::sled::StartSledAgentRequest; +use sled_agent_types::sled::StartSledAgentRequestBody; use sled_storage::dataset::CONFIG_DATASET; use sled_storage::manager::StorageHandle; use slog::Logger; @@ -173,7 +172,7 @@ impl Plan { let mut ledger = Ledger::::new_with(log, paths, plan.clone()); ledger.commit().await?; - info!(log, "Sled plan written to storage"); + info!(log, "Sled plan written to storage: {plan:#?}"); Ok(plan) } } diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index fdfdf7d282..6a8d2eebca 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -69,10 +69,9 @@ use crate::bootstrap::config::BOOTSTRAP_AGENT_HTTP_PORT; use crate::bootstrap::early_networking::{ EarlyNetworkSetup, EarlyNetworkSetupError, }; -use crate::bootstrap::params::StartSledAgentRequest; use crate::bootstrap::rss_handle::BootstrapAgentHandle; use crate::nexus::d2n_params; -use crate::params::{OmicronZoneTypeExt, TimeSync}; +use crate::params::OmicronZoneTypeExt; use crate::rack_setup::plan::service::{ Plan as ServicePlan, PlanError as ServicePlanError, }; @@ -102,6 +101,7 @@ use omicron_common::address::get_sled_address; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Generation; use omicron_common::api::internal::shared::ExternalPortDiscovery; +use omicron_common::api::internal::shared::LldpAdminStatus; use omicron_common::backoff::{ retry_notify, retry_policy_internal_service_aggressive, BackoffError, }; @@ -122,6 +122,8 @@ use sled_agent_types::early_networking::{ use sled_agent_types::rack_init::{ BootstrapAddressDiscovery, RackInitializeRequest as Config, }; +use sled_agent_types::sled::StartSledAgentRequest; +use sled_agent_types::time_sync::TimeSync; use sled_hardware_types::underlay::BootstrapInterface; use sled_storage::dataset::CONFIG_DATASET; use sled_storage::manager::StorageHandle; @@ -751,23 +753,24 @@ impl ServiceInner { .iter() .map(|config| NexusTypes::PortConfigV2 { port: config.port.clone(), - routes: config + routes: config .routes .iter() .map(|r| NexusTypes::RouteConfig { destination: r.destination, nexthop: r.nexthop, vlan_id: r.vlan_id, + local_pref: r.local_pref, + }) + .collect(), + addresses: config + .addresses + .iter() + .map(|a| NexusTypes::UplinkAddressConfig { + address: a.address, + vlan_id: a.vlan_id, }) .collect(), - addresses: config - .addresses - .iter() - .map(|a| NexusTypes::UplinkAddressConfig { - address: a.address, - vlan_id: a.vlan_id - }) - .collect(), switch: config.switch.into(), uplink_port_speed: config.uplink_port_speed.into(), uplink_port_fec: config.uplink_port_fec.into(), @@ -787,7 +790,8 @@ impl ServiceInner { remote_asn: b.remote_asn, min_ttl: b.min_ttl, md5_auth_key: b.md5_auth_key.clone(), - multi_exit_discriminator: b.multi_exit_discriminator, + multi_exit_discriminator: b + .multi_exit_discriminator, local_pref: b.local_pref, enforce_first_as: b.enforce_first_as, communities: b.communities.clone(), @@ -796,6 +800,32 @@ impl ServiceInner { vlan_id: b.vlan_id, }) .collect(), + lldp: config.lldp.as_ref().map(|lp| { + NexusTypes::LldpPortConfig { + status: match lp.status { + LldpAdminStatus::Enabled => { + NexusTypes::LldpAdminStatus::Enabled + } + LldpAdminStatus::Disabled => { + NexusTypes::LldpAdminStatus::Disabled + } + LldpAdminStatus::TxOnly => { + NexusTypes::LldpAdminStatus::TxOnly + } + LldpAdminStatus::RxOnly => { + NexusTypes::LldpAdminStatus::RxOnly + } + }, + chassis_id: lp.chassis_id.clone(), + port_id: lp.port_id.clone(), + system_name: lp.system_name.clone(), + system_description: lp + .system_description + .clone(), + port_description: lp.port_description.clone(), + management_addrs: lp.management_addrs.clone(), + } + }), }) .collect(), bgp: config @@ -803,7 +833,12 @@ impl ServiceInner { .iter() .map(|config| NexusTypes::BgpConfig { asn: config.asn, - originate: config.originate.iter().cloned().map(Into::into).collect(), + originate: config + .originate + .iter() + .cloned() + .map(Into::into) + .collect(), shaper: config.shaper.clone(), checker: config.checker.clone(), }) @@ -811,25 +846,26 @@ impl ServiceInner { bfd: config .bfd .iter() - .map(|spec| NexusTypes::BfdPeerConfig { - detection_threshold: spec.detection_threshold, - local: spec.local, - mode: match spec.mode { - omicron_common::api::external::BfdMode::SingleHop => { - nexus_client::types::BfdMode::SingleHop - } - omicron_common::api::external::BfdMode::MultiHop => { - nexus_client::types::BfdMode::MultiHop - } - }, - remote: spec.remote, - required_rx: spec.required_rx, - switch: spec.switch.into(), + .map(|spec| { + NexusTypes::BfdPeerConfig { + detection_threshold: spec.detection_threshold, + local: spec.local, + mode: match spec.mode { + omicron_common::api::external::BfdMode::SingleHop => { + nexus_client::types::BfdMode::SingleHop + } + omicron_common::api::external::BfdMode::MultiHop => { + nexus_client::types::BfdMode::MultiHop + } + }, + remote: spec.remote, + required_rx: spec.required_rx, + switch: spec.switch.into(), + } }) .collect(), } }; - info!(self.log, "rack_network_config: {:#?}", rack_network_config); let physical_disks: Vec<_> = sled_configs_by_id diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index ec86066096..c4f3e1008f 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -7,11 +7,11 @@ use super::config::Config; use super::http_entrypoints::api as http_api; use super::sled_agent::SledAgent; -use crate::bootstrap::params::StartSledAgentRequest; use crate::long_running_tasks::LongRunningTaskHandles; -use crate::nexus::NexusClientWithResolver; +use crate::nexus::make_nexus_client; use crate::services::ServiceManager; use internal_dns::resolver::Resolver; +use sled_agent_types::sled::StartSledAgentRequest; use slog::Logger; use std::net::SocketAddr; use std::sync::Arc; @@ -52,8 +52,7 @@ impl Server { .map_err(|e| e.to_string())?, ); - let nexus_client = NexusClientWithResolver::new(&log, resolver) - .map_err(|e| e.to_string())?; + let nexus_client = make_nexus_client(&log, resolver); let sled_agent = SledAgent::new( &config, @@ -100,14 +99,3 @@ impl Server { self.http_server.close().await } } - -/// Runs the OpenAPI generator, emitting the spec to stdout. -pub fn run_openapi() -> Result<(), String> { - http_api() - .openapi("Oxide Sled Agent API", "0.0.1") - .description("API for interacting with individual sleds") - .contact_url("https://oxide.computer") - .contact_email("api@oxide.computer") - .write(&mut std::io::stdout()) - .map_err(|e| e.to_string()) -} diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index bb8701771f..adc06f64f7 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -31,10 +31,7 @@ use crate::bootstrap::early_networking::{ use crate::bootstrap::BootstrapNetworking; use crate::config::SidecarRevision; use crate::metrics::MetricsRequestQueue; -use crate::params::{ - DendriteAsic, OmicronZoneConfigExt, OmicronZoneTypeExt, TimeSync, - ZoneBundleCause, ZoneBundleMetadata, -}; +use crate::params::{DendriteAsic, OmicronZoneConfigExt, OmicronZoneTypeExt}; use crate::profile::*; use crate::zone_bundle::BundleError; use crate::zone_bundle::ZoneBundler; @@ -68,6 +65,7 @@ use nexus_config::{ConfigDropshotWithTls, DeploymentConfig}; use nexus_sled_agent_shared::inventory::{ OmicronZoneConfig, OmicronZoneType, OmicronZonesConfig, ZoneKind, }; +use omicron_common::address::CLICKHOUSE_ADMIN_PORT; use omicron_common::address::CLICKHOUSE_KEEPER_PORT; use omicron_common::address::CLICKHOUSE_PORT; use omicron_common::address::COCKROACH_PORT; @@ -97,6 +95,10 @@ use omicron_common::ledger::{self, Ledger, Ledgerable}; use omicron_ddm_admin_client::{Client as DdmAdminClient, DdmError}; use once_cell::sync::OnceCell; use rand::prelude::SliceRandom; +use sled_agent_types::{ + time_sync::TimeSync, + zone_bundle::{ZoneBundleCause, ZoneBundleMetadata}, +}; use sled_hardware::is_gimlet; use sled_hardware::underlay; use sled_hardware::SledMode; @@ -1571,12 +1573,37 @@ impl ServiceManager { .add_property_group(config), ); + let ch_address = + SocketAddr::new(IpAddr::V6(listen_addr), CLICKHOUSE_PORT) + .to_string(); + + let admin_address = SocketAddr::new( + IpAddr::V6(listen_addr), + CLICKHOUSE_ADMIN_PORT, + ) + .to_string(); + + let clickhouse_admin_config = + PropertyGroupBuilder::new("config") + .add_property( + "clickhouse_address", + "astring", + ch_address, + ) + .add_property("http_address", "astring", admin_address); + let clickhouse_admin_service = + ServiceBuilder::new("oxide/clickhouse-admin").add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(clickhouse_admin_config), + ); + let profile = ProfileBuilder::new("omicron") .add_service(nw_setup_service) .add_service(disabled_ssh_service) .add_service(clickhouse_service) .add_service(dns_service) - .add_service(enabled_dns_client_service); + .add_service(enabled_dns_client_service) + .add_service(clickhouse_admin_service); profile .add_to_zone(&self.inner.log, &installed_zone) .await @@ -1586,6 +1613,88 @@ impl ServiceManager { RunningZone::boot(installed_zone).await? } + ZoneArgs::Omicron(OmicronZoneConfigLocal { + zone: + OmicronZoneConfig { + zone_type: OmicronZoneType::ClickhouseServer { .. }, + underlay_address, + .. + }, + .. + }) => { + let Some(info) = self.inner.sled_info.get() else { + return Err(Error::SledAgentNotReady); + }; + + let listen_addr = *underlay_address; + let listen_port = CLICKHOUSE_PORT.to_string(); + + let nw_setup_service = Self::zone_network_setup_install( + Some(&info.underlay_address), + &installed_zone, + &[listen_addr], + )?; + + let dns_service = Self::dns_install(info, None, &None).await?; + + let config = PropertyGroupBuilder::new("config") + .add_property( + "listen_addr", + "astring", + listen_addr.to_string(), + ) + .add_property("listen_port", "astring", listen_port) + .add_property("store", "astring", "/data"); + let clickhouse_server_service = + ServiceBuilder::new("oxide/clickhouse_server") + .add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(config), + ); + + let ch_address = + SocketAddr::new(IpAddr::V6(listen_addr), CLICKHOUSE_PORT) + .to_string(); + + let admin_address = SocketAddr::new( + IpAddr::V6(listen_addr), + CLICKHOUSE_ADMIN_PORT, + ) + .to_string(); + + let clickhouse_admin_config = + PropertyGroupBuilder::new("config") + .add_property( + "clickhouse_address", + "astring", + ch_address, + ) + .add_property("http_address", "astring", admin_address); + let clickhouse_admin_service = + ServiceBuilder::new("oxide/clickhouse-admin").add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(clickhouse_admin_config), + ); + + let profile = ProfileBuilder::new("omicron") + .add_service(nw_setup_service) + .add_service(disabled_ssh_service) + .add_service(clickhouse_server_service) + .add_service(dns_service) + .add_service(enabled_dns_client_service) + .add_service(clickhouse_admin_service); + profile + .add_to_zone(&self.inner.log, &installed_zone) + .await + .map_err(|err| { + Error::io( + "Failed to setup clickhouse server profile", + err, + ) + })?; + RunningZone::boot(installed_zone).await? + } + ZoneArgs::Omicron(OmicronZoneConfigLocal { zone: OmicronZoneConfig { @@ -1624,12 +1733,38 @@ impl ServiceManager { ServiceInstanceBuilder::new("default") .add_property_group(config), ); + + let ch_address = + SocketAddr::new(IpAddr::V6(listen_addr), CLICKHOUSE_PORT) + .to_string(); + + let admin_address = SocketAddr::new( + IpAddr::V6(listen_addr), + CLICKHOUSE_ADMIN_PORT, + ) + .to_string(); + + let clickhouse_admin_config = + PropertyGroupBuilder::new("config") + .add_property( + "clickhouse_address", + "astring", + ch_address, + ) + .add_property("http_address", "astring", admin_address); + let clickhouse_admin_service = + ServiceBuilder::new("oxide/clickhouse-admin").add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(clickhouse_admin_config), + ); + let profile = ProfileBuilder::new("omicron") .add_service(nw_setup_service) .add_service(disabled_ssh_service) .add_service(clickhouse_keeper_service) .add_service(dns_service) - .add_service(enabled_dns_client_service); + .add_service(enabled_dns_client_service) + .add_service(clickhouse_admin_service); profile .add_to_zone(&self.inner.log, &installed_zone) .await @@ -3859,6 +3994,19 @@ impl ServiceManager { &self, our_ports: Vec, ) -> Result<(), Error> { + // Helper function to add a property-value pair + // if the config actually has a value set. + fn apv( + smfh: &SmfHelper, + prop: &str, + val: &Option, + ) -> Result<(), Error> { + if let Some(v) = val { + smfh.addpropvalue_type(prop, v, "astring")? + } + Ok(()) + } + // We expect the switch zone to be running, as we're called immediately // after `ensure_zone()` above and we just successfully configured // uplinks via DPD running in our switch zone. If somehow we're in any @@ -3881,26 +4029,76 @@ impl ServiceManager { } }; - info!(self.inner.log, "Setting up uplinkd service"); - let smfh = SmfHelper::new(&zone, &SwitchService::Uplink); + info!(self.inner.log, "ensuring scrimlet uplinks"); + let usmfh = SmfHelper::new(&zone, &SwitchService::Uplink); + let lsmfh = SmfHelper::new( + &zone, + &SwitchService::Lldpd { baseboard: Baseboard::Unknown }, + ); // We want to delete all the properties in the `uplinks` group, but we // don't know their names, so instead we'll delete and recreate the // group, then add all our properties. - smfh.delpropgroup("uplinks")?; - smfh.addpropgroup("uplinks", "application")?; + let _ = usmfh.delpropgroup("uplinks"); + usmfh.addpropgroup("uplinks", "application")?; for port_config in &our_ports { for addr in &port_config.addrs { - info!(self.inner.log, "configuring port: {port_config:?}"); - smfh.addpropvalue_type( + usmfh.addpropvalue_type( &format!("uplinks/{}_0", port_config.port,), &addr.to_string(), "astring", )?; } + + if let Some(lldp_config) = &port_config.lldp { + let group_name = format!("port_{}", port_config.port); + info!(self.inner.log, "setting up {group_name}"); + let _ = lsmfh.delpropgroup(&group_name); + lsmfh.addpropgroup(&group_name, "application")?; + apv( + &lsmfh, + &format!("{group_name}/status"), + &Some(lldp_config.status.to_string()), + )?; + apv( + &lsmfh, + &format!("{group_name}/chassis_id"), + &lldp_config.chassis_id, + )?; + apv( + &lsmfh, + &format!("{group_name}/system_name"), + &lldp_config.system_name, + )?; + apv( + &lsmfh, + &format!("{group_name}/system_description"), + &lldp_config.system_description, + )?; + apv( + &lsmfh, + &format!("{group_name}/port_description"), + &lldp_config.port_description, + )?; + apv( + &lsmfh, + &format!("{group_name}/port_id"), + &lldp_config.port_id, + )?; + if let Some(a) = &lldp_config.management_addrs { + for address in a { + apv( + &lsmfh, + &format!("{group_name}/management_addrs"), + &Some(address.to_string()), + )?; + } + } + } } - smfh.refresh()?; + usmfh.refresh()?; + lsmfh.refresh()?; Ok(()) } diff --git a/sled-agent/src/sim/collection.rs b/sled-agent/src/sim/collection.rs index ffb7327ce7..6057d03f70 100644 --- a/sled-agent/src/sim/collection.rs +++ b/sled-agent/src/sim/collection.rs @@ -410,7 +410,6 @@ impl SimCollection { #[cfg(test)] mod test { - use crate::params::{DiskStateRequested, InstanceStateRequested}; use crate::sim::collection::SimObject; use crate::sim::disk::SimDisk; use crate::sim::instance::SimInstance; @@ -427,6 +426,8 @@ mod test { use omicron_common::api::internal::nexus::VmmState; use omicron_test_utils::dev::test_setup_log; use omicron_uuid_kinds::PropolisUuid; + use sled_agent_types::disk::DiskStateRequested; + use sled_agent_types::instance::InstanceStateRequested; fn make_instance( logctx: &LogContext, diff --git a/sled-agent/src/sim/disk.rs b/sled-agent/src/sim/disk.rs index 284e424ebf..9661b1949b 100644 --- a/sled-agent/src/sim/disk.rs +++ b/sled-agent/src/sim/disk.rs @@ -5,7 +5,6 @@ //! Simulated sled agent implementation use crate::nexus::NexusClient; -use crate::params::DiskStateRequested; use crate::sim::simulatable::Simulatable; use async_trait::async_trait; use dropshot::ConfigLogging; @@ -20,6 +19,7 @@ use omicron_common::api::internal::nexus::ProducerKind; use oximeter_producer::LogConfig; use oximeter_producer::Server as ProducerServer; use propolis_client::types::DiskAttachmentState as PropolisDiskState; +use sled_agent_types::disk::DiskStateRequested; use std::net::{Ipv6Addr, SocketAddr}; use std::sync::Arc; use std::time::Duration; diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index 95c986bf92..ac68f5e42e 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -5,38 +5,59 @@ //! HTTP entrypoint functions for the sled agent's exposed API use super::collection::PokeMode; -use crate::bootstrap::params::AddSledRequest; -use crate::params::{ - DiskEnsureBody, InstanceEnsureBody, InstanceExternalIpBody, - InstancePutStateBody, InstancePutStateResponse, InstanceUnregisterResponse, - VpcFirewallRulesEnsureBody, -}; +use camino::Utf8PathBuf; +use dropshot::endpoint; use dropshot::ApiDescription; +use dropshot::FreeformBody; use dropshot::HttpError; +use dropshot::HttpResponseCreated; +use dropshot::HttpResponseDeleted; +use dropshot::HttpResponseHeaders; use dropshot::HttpResponseOk; use dropshot::HttpResponseUpdatedNoContent; use dropshot::Path; +use dropshot::Query; use dropshot::RequestContext; +use dropshot::StreamingBody; use dropshot::TypedBody; -use dropshot::{endpoint, ApiDescriptionRegisterError}; -use illumos_utils::opte::params::VirtualNetworkInterfaceHost; +use nexus_sled_agent_shared::inventory::SledRole; use nexus_sled_agent_shared::inventory::{Inventory, OmicronZonesConfig}; use omicron_common::api::internal::nexus::DiskRuntimeState; use omicron_common::api::internal::nexus::SledInstanceState; use omicron_common::api::internal::nexus::UpdateArtifactId; +use omicron_common::api::internal::shared::SledIdentifiers; +use omicron_common::api::internal::shared::VirtualNetworkInterfaceHost; use omicron_common::api::internal::shared::{ ResolvedVpcRouteSet, ResolvedVpcRouteState, SwitchPorts, }; use omicron_common::disk::DatasetsConfig; +use omicron_common::disk::DatasetsManagementResult; +use omicron_common::disk::DisksManagementResult; use omicron_common::disk::OmicronPhysicalDisksConfig; use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; +use sled_agent_api::*; +use sled_agent_types::boot_disk::BootDiskOsWriteStatus; +use sled_agent_types::boot_disk::BootDiskPathParams; +use sled_agent_types::boot_disk::BootDiskUpdatePathParams; +use sled_agent_types::boot_disk::BootDiskWriteStartQueryParams; +use sled_agent_types::bootstore::BootstoreStatus; +use sled_agent_types::disk::DiskEnsureBody; use sled_agent_types::early_networking::EarlyNetworkConfig; -use sled_storage::resources::DatasetsManagementResult; -use sled_storage::resources::DisksManagementResult; +use sled_agent_types::firewall_rules::VpcFirewallRulesEnsureBody; +use sled_agent_types::instance::InstanceEnsureBody; +use sled_agent_types::instance::InstanceExternalIpBody; +use sled_agent_types::instance::InstancePutStateBody; +use sled_agent_types::instance::InstancePutStateResponse; +use sled_agent_types::instance::InstanceUnregisterResponse; +use sled_agent_types::sled::AddSledRequest; +use sled_agent_types::time_sync::TimeSync; +use sled_agent_types::zone_bundle::BundleUtilization; +use sled_agent_types::zone_bundle::CleanupContext; +use sled_agent_types::zone_bundle::CleanupCount; +use sled_agent_types::zone_bundle::ZoneBundleId; +use sled_agent_types::zone_bundle::ZoneBundleMetadata; +use std::collections::BTreeMap; use std::sync::Arc; -use uuid::Uuid; use super::sled_agent::SledAgent; @@ -44,537 +65,523 @@ type SledApiDescription = ApiDescription>; /// Returns a description of the sled agent API pub fn api() -> SledApiDescription { - fn register_endpoints( - api: &mut SledApiDescription, - ) -> Result<(), ApiDescriptionRegisterError> { - api.register(instance_put_state)?; - api.register(instance_get_state)?; - api.register(instance_register)?; - api.register(instance_unregister)?; - api.register(instance_put_external_ip)?; - api.register(instance_delete_external_ip)?; + fn register_endpoints() -> Result { + let mut api = sled_agent_api::sled_agent_api_mod::api_description::< + SledAgentSimImpl, + >()?; api.register(instance_poke_post)?; api.register(instance_poke_single_step_post)?; api.register(instance_post_sim_migration_source)?; - api.register(disk_put)?; api.register(disk_poke_post)?; - api.register(update_artifact)?; - api.register(instance_issue_disk_snapshot_request)?; - api.register(vpc_firewall_rules_put)?; - api.register(set_v2p)?; - api.register(del_v2p)?; - api.register(list_v2p)?; - api.register(uplink_ensure)?; - api.register(read_network_bootstore_config)?; - api.register(write_network_bootstore_config)?; - api.register(inventory)?; - api.register(datasets_get)?; - api.register(datasets_put)?; - api.register(omicron_physical_disks_get)?; - api.register(omicron_physical_disks_put)?; - api.register(omicron_zones_get)?; - api.register(omicron_zones_put)?; - api.register(sled_add)?; - api.register(list_vpc_routes)?; - api.register(set_vpc_routes)?; - - Ok(()) - } - - let mut api = SledApiDescription::new(); - if let Err(err) = register_endpoints(&mut api) { - panic!("failed to register entrypoints: {}", err); - } - api -} + Ok(api) + } -/// Path parameters for Instance requests (sled agent API) -#[derive(Deserialize, JsonSchema)] -struct InstancePathParam { - instance_id: InstanceUuid, -} + register_endpoints().expect("failed to register entrypoints") +} + +enum SledAgentSimImpl {} + +impl SledAgentApi for SledAgentSimImpl { + type Context = Arc; + + async fn instance_register( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + Ok(HttpResponseOk( + sa.instance_register( + instance_id, + body_args.propolis_id, + body_args.hardware, + body_args.instance_runtime, + body_args.vmm_runtime, + body_args.metadata, + ) + .await?, + )) + } -#[endpoint { - method = PUT, - path = "/instances/{instance_id}", -}] -async fn instance_register( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result, HttpError> { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - let body_args = body.into_inner(); - Ok(HttpResponseOk( - sa.instance_register( - instance_id, - body_args.propolis_id, - body_args.hardware, - body_args.instance_runtime, - body_args.vmm_runtime, - body_args.metadata, + async fn instance_unregister( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + Ok(HttpResponseOk(sa.instance_unregister(instance_id).await?)) + } + + async fn instance_put_state( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + Ok(HttpResponseOk( + sa.instance_ensure_state(instance_id, body_args.state).await?, + )) + } + + async fn instance_get_state( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + Ok(HttpResponseOk(sa.instance_get_state(instance_id).await?)) + } + + async fn instance_put_external_ip( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + sa.instance_put_external_ip(instance_id, &body_args).await?; + Ok(HttpResponseUpdatedNoContent()) + } + + async fn instance_delete_external_ip( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + sa.instance_delete_external_ip(instance_id, &body_args).await?; + Ok(HttpResponseUpdatedNoContent()) + } + + async fn disk_put( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let disk_id = path_params.into_inner().disk_id; + let body_args = body.into_inner(); + Ok(HttpResponseOk( + sa.disk_ensure( + disk_id, + body_args.initial_runtime.clone(), + body_args.target.clone(), + ) + .await?, + )) + } + + async fn update_artifact( + rqctx: RequestContext, + artifact: TypedBody, + ) -> Result { + let sa = rqctx.context(); + sa.updates() + .download_artifact( + artifact.into_inner(), + rqctx.context().nexus_client.as_ref(), + ) + .await + .map_err(|e| HttpError::for_internal_error(e.to_string()))?; + Ok(HttpResponseUpdatedNoContent()) + } + + async fn instance_issue_disk_snapshot_request( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result< + HttpResponseOk, + HttpError, + > { + let sa = rqctx.context(); + let path_params = path_params.into_inner(); + let body = body.into_inner(); + + sa.instance_issue_disk_snapshot_request( + InstanceUuid::from_untyped_uuid(path_params.instance_id), + path_params.disk_id, + body.snapshot_id, ) - .await?, - )) -} + .await + .map_err(|e| HttpError::for_internal_error(e.to_string()))?; -#[endpoint { - method = DELETE, - path = "/instances/{instance_id}", -}] -async fn instance_unregister( - rqctx: RequestContext>, - path_params: Path, -) -> Result, HttpError> { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - Ok(HttpResponseOk(sa.instance_unregister(instance_id).await?)) -} + Ok(HttpResponseOk(InstanceIssueDiskSnapshotRequestResponse { + snapshot_id: body.snapshot_id, + })) + } -#[endpoint { - method = PUT, - path = "/instances/{instance_id}/state", -}] -async fn instance_put_state( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result, HttpError> { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - let body_args = body.into_inner(); - Ok(HttpResponseOk( - sa.instance_ensure_state(instance_id, body_args.state).await?, - )) -} + async fn vpc_firewall_rules_put( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result { + let _sa = rqctx.context(); + let _vpc_id = path_params.into_inner().vpc_id; + let _body_args = body.into_inner(); -#[endpoint { - method = GET, - path = "/instances/{instance_id}/state", -}] -async fn instance_get_state( - rqctx: RequestContext>, - path_params: Path, -) -> Result, HttpError> { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - Ok(HttpResponseOk(sa.instance_get_state(instance_id).await?)) -} + Ok(HttpResponseUpdatedNoContent()) + } -#[endpoint { - method = PUT, - path = "/instances/{instance_id}/external-ip", -}] -async fn instance_put_external_ip( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - let body_args = body.into_inner(); - sa.instance_put_external_ip(instance_id, &body_args).await?; - Ok(HttpResponseUpdatedNoContent()) -} + async fn set_v2p( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let body_args = body.into_inner(); -#[endpoint { - method = DELETE, - path = "/instances/{instance_id}/external-ip", -}] -async fn instance_delete_external_ip( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - let body_args = body.into_inner(); - sa.instance_delete_external_ip(instance_id, &body_args).await?; - Ok(HttpResponseUpdatedNoContent()) -} + sa.set_virtual_nic_host(&body_args) + .await + .map_err(|e| HttpError::for_internal_error(e.to_string()))?; -#[endpoint { - method = POST, - path = "/instances/{instance_id}/poke", -}] -async fn instance_poke_post( - rqctx: RequestContext>, - path_params: Path, -) -> Result { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - sa.instance_poke(instance_id, PokeMode::Drain).await; - Ok(HttpResponseUpdatedNoContent()) -} + Ok(HttpResponseUpdatedNoContent()) + } -#[endpoint { - method = POST, - path = "/instances/{instance_id}/poke-single-step", -}] -async fn instance_poke_single_step_post( - rqctx: RequestContext>, - path_params: Path, -) -> Result { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - sa.instance_poke(instance_id, PokeMode::SingleStep).await; - Ok(HttpResponseUpdatedNoContent()) -} + async fn del_v2p( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let body_args = body.into_inner(); -#[endpoint { - method = POST, - path = "/instances/{instance_id}/sim-migration-source", -}] -async fn instance_post_sim_migration_source( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - sa.instance_simulate_migration_source(instance_id, body.into_inner()) - .await?; - Ok(HttpResponseUpdatedNoContent()) -} + sa.unset_virtual_nic_host(&body_args) + .await + .map_err(|e| HttpError::for_internal_error(e.to_string()))?; -/// Path parameters for Disk requests (sled agent API) -#[derive(Deserialize, JsonSchema)] -struct DiskPathParam { - disk_id: Uuid, -} + Ok(HttpResponseUpdatedNoContent()) + } -#[endpoint { - method = PUT, - path = "/disks/{disk_id}", -}] -async fn disk_put( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result, HttpError> { - let sa = rqctx.context(); - let disk_id = path_params.into_inner().disk_id; - let body_args = body.into_inner(); - Ok(HttpResponseOk( - sa.disk_ensure( - disk_id, - body_args.initial_runtime.clone(), - body_args.target.clone(), - ) - .await?, - )) -} + async fn list_v2p( + rqctx: RequestContext, + ) -> Result>, HttpError> + { + let sa = rqctx.context(); -#[endpoint { - method = POST, - path = "/disks/{disk_id}/poke", -}] -async fn disk_poke_post( - rqctx: RequestContext>, - path_params: Path, -) -> Result { - let sa = rqctx.context(); - let disk_id = path_params.into_inner().disk_id; - sa.disk_poke(disk_id).await; - Ok(HttpResponseUpdatedNoContent()) -} + let vnics = sa.list_virtual_nics().await.map_err(HttpError::from)?; -#[endpoint { - method = POST, - path = "/update" -}] -async fn update_artifact( - rqctx: RequestContext>, - artifact: TypedBody, -) -> Result { - let sa = rqctx.context(); - sa.updates() - .download_artifact( - artifact.into_inner(), - rqctx.context().nexus_client.as_ref(), - ) - .await - .map_err(|e| HttpError::for_internal_error(e.to_string()))?; - Ok(HttpResponseUpdatedNoContent()) -} + Ok(HttpResponseOk(vnics)) + } -#[derive(Deserialize, JsonSchema)] -pub struct InstanceIssueDiskSnapshotRequestPathParam { - instance_id: Uuid, - disk_id: Uuid, -} + async fn uplink_ensure( + _rqctx: RequestContext, + _body: TypedBody, + ) -> Result { + Ok(HttpResponseUpdatedNoContent()) + } -#[derive(Deserialize, JsonSchema)] -pub struct InstanceIssueDiskSnapshotRequestBody { - snapshot_id: Uuid, -} + async fn read_network_bootstore_config_cache( + rqctx: RequestContext, + ) -> Result, HttpError> { + let config = + rqctx.context().bootstore_network_config.lock().await.clone(); + Ok(HttpResponseOk(config)) + } -#[derive(Serialize, JsonSchema)] -pub struct InstanceIssueDiskSnapshotRequestResponse { - snapshot_id: Uuid, -} + async fn write_network_bootstore_config( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let mut config = rqctx.context().bootstore_network_config.lock().await; + *config = body.into_inner(); + Ok(HttpResponseUpdatedNoContent()) + } -/// Take a snapshot of a disk that is attached to an instance -#[endpoint { - method = POST, - path = "/instances/{instance_id}/disks/{disk_id}/snapshot", -}] -async fn instance_issue_disk_snapshot_request( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result, HttpError> -{ - let sa = rqctx.context(); - let path_params = path_params.into_inner(); - let body = body.into_inner(); - - sa.instance_issue_disk_snapshot_request( - InstanceUuid::from_untyped_uuid(path_params.instance_id), - path_params.disk_id, - body.snapshot_id, - ) - .await - .map_err(|e| HttpError::for_internal_error(e.to_string()))?; - - Ok(HttpResponseOk(InstanceIssueDiskSnapshotRequestResponse { - snapshot_id: body.snapshot_id, - })) -} + /// Fetch basic information about this sled + async fn inventory( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk( + sa.inventory(rqctx.server.local_addr).await.map_err(|e| { + HttpError::for_internal_error(format!("{:#}", e)) + })?, + )) + } -/// Path parameters for VPC requests (sled agent API) -#[derive(Deserialize, JsonSchema)] -struct VpcPathParam { - vpc_id: Uuid, -} + async fn datasets_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let body_args = body.into_inner(); + let result = sa.datasets_ensure(body_args).await?; + Ok(HttpResponseOk(result)) + } -#[endpoint { - method = PUT, - path = "/vpc/{vpc_id}/firewall/rules", -}] -async fn vpc_firewall_rules_put( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result { - let _sa = rqctx.context(); - let _vpc_id = path_params.into_inner().vpc_id; - let _body_args = body.into_inner(); + async fn datasets_get( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.datasets_config_list().await?)) + } - Ok(HttpResponseUpdatedNoContent()) -} + async fn omicron_physical_disks_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let body_args = body.into_inner(); + let result = sa.omicron_physical_disks_ensure(body_args).await?; + Ok(HttpResponseOk(result)) + } -/// Create a mapping from a virtual NIC to a physical host -#[endpoint { - method = PUT, - path = "/v2p/", -}] -async fn set_v2p( - rqctx: RequestContext>, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let body_args = body.into_inner(); + async fn omicron_physical_disks_get( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.omicron_physical_disks_list().await?)) + } - sa.set_virtual_nic_host(&body_args) - .await - .map_err(|e| HttpError::for_internal_error(e.to_string()))?; + async fn omicron_zones_get( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.omicron_zones_list().await)) + } - Ok(HttpResponseUpdatedNoContent()) -} + async fn omicron_zones_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let body_args = body.into_inner(); + sa.omicron_zones_ensure(body_args).await; + Ok(HttpResponseUpdatedNoContent()) + } -/// Delete a mapping from a virtual NIC to a physical host -#[endpoint { - method = DELETE, - path = "/v2p/", -}] -async fn del_v2p( - rqctx: RequestContext>, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let body_args = body.into_inner(); + async fn sled_add( + _rqctx: RequestContext, + _body: TypedBody, + ) -> Result { + Ok(HttpResponseUpdatedNoContent()) + } - sa.unset_virtual_nic_host(&body_args) - .await - .map_err(|e| HttpError::for_internal_error(e.to_string()))?; + async fn list_vpc_routes( + rqctx: RequestContext, + ) -> Result>, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.list_vpc_routes().await)) + } - Ok(HttpResponseUpdatedNoContent()) -} + async fn set_vpc_routes( + rqctx: RequestContext, + body: TypedBody>, + ) -> Result { + let sa = rqctx.context(); + sa.set_vpc_routes(body.into_inner()).await; + Ok(HttpResponseUpdatedNoContent()) + } -/// List v2p mappings present on sled -#[endpoint { - method = GET, - path = "/v2p/", -}] -async fn list_v2p( - rqctx: RequestContext>, -) -> Result>, HttpError> { - let sa = rqctx.context(); + // --- Unimplemented endpoints --- - let vnics = sa.list_virtual_nics().await.map_err(HttpError::from)?; + async fn zone_bundle_list_all( + _rqctx: RequestContext, + _query: Query, + ) -> Result>, HttpError> { + method_unimplemented() + } - Ok(HttpResponseOk(vnics)) -} + async fn zone_bundle_list( + _rqctx: RequestContext, + _params: Path, + ) -> Result>, HttpError> { + method_unimplemented() + } -#[endpoint { - method = POST, - path = "/switch-ports", -}] -async fn uplink_ensure( - _rqctx: RequestContext>, - _body: TypedBody, -) -> Result { - Ok(HttpResponseUpdatedNoContent()) -} + async fn zone_bundle_create( + _rqctx: RequestContext, + _params: Path, + ) -> Result, HttpError> { + method_unimplemented() + } -#[endpoint { - method = GET, - path = "/network-bootstore-config", -}] -async fn read_network_bootstore_config( - rqctx: RequestContext>, -) -> Result, HttpError> { - let config = rqctx.context().bootstore_network_config.lock().await.clone(); - Ok(HttpResponseOk(config)) -} + async fn zone_bundle_get( + _rqctx: RequestContext, + _params: Path, + ) -> Result>, HttpError> + { + method_unimplemented() + } -#[endpoint { - method = PUT, - path = "/network-bootstore-config", -}] -async fn write_network_bootstore_config( - rqctx: RequestContext>, - body: TypedBody, -) -> Result { - let mut config = rqctx.context().bootstore_network_config.lock().await; - *config = body.into_inner(); - Ok(HttpResponseUpdatedNoContent()) -} + async fn zone_bundle_delete( + _rqctx: RequestContext, + _params: Path, + ) -> Result { + method_unimplemented() + } -/// Fetch basic information about this sled -#[endpoint { - method = GET, - path = "/inventory", -}] -async fn inventory( - rqctx: RequestContext>, -) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk( - sa.inventory(rqctx.server.local_addr) - .await - .map_err(|e| HttpError::for_internal_error(format!("{:#}", e)))?, - )) -} + async fn zone_bundle_utilization( + _rqctx: RequestContext, + ) -> Result< + HttpResponseOk>, + HttpError, + > { + method_unimplemented() + } -#[endpoint { - method = PUT, - path = "/datasets", -}] -async fn datasets_put( - rqctx: RequestContext>, - body: TypedBody, -) -> Result, HttpError> { - let sa = rqctx.context(); - let body_args = body.into_inner(); - let result = sa.datasets_ensure(body_args).await?; - Ok(HttpResponseOk(result)) -} + async fn zone_bundle_cleanup_context( + _rqctx: RequestContext, + ) -> Result, HttpError> { + method_unimplemented() + } -#[endpoint { - method = GET, - path = "/datasets", -}] -async fn datasets_get( - rqctx: RequestContext>, -) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.datasets_list().await?)) -} + async fn zone_bundle_cleanup_context_update( + _rqctx: RequestContext, + _body: TypedBody, + ) -> Result { + method_unimplemented() + } -#[endpoint { - method = PUT, - path = "/omicron-physical-disks", -}] -async fn omicron_physical_disks_put( - rqctx: RequestContext>, - body: TypedBody, -) -> Result, HttpError> { - let sa = rqctx.context(); - let body_args = body.into_inner(); - let result = sa.omicron_physical_disks_ensure(body_args).await?; - Ok(HttpResponseOk(result)) -} + async fn zone_bundle_cleanup( + _rqctx: RequestContext, + ) -> Result>, HttpError> + { + method_unimplemented() + } -#[endpoint { - method = GET, - path = "/omicron-physical-disks", -}] -async fn omicron_physical_disks_get( - rqctx: RequestContext>, -) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.omicron_physical_disks_list().await?)) + async fn zones_list( + _rqctx: RequestContext, + ) -> Result>, HttpError> { + method_unimplemented() + } + + async fn zpools_get( + _rqctx: RequestContext, + ) -> Result>, HttpError> { + method_unimplemented() + } + + async fn sled_role_get( + _rqctx: RequestContext, + ) -> Result, HttpError> { + method_unimplemented() + } + + async fn cockroachdb_init( + _rqctx: RequestContext, + ) -> Result { + method_unimplemented() + } + + async fn timesync_get( + _rqctx: RequestContext, + ) -> Result, HttpError> { + method_unimplemented() + } + + async fn host_os_write_start( + _rqctx: RequestContext, + _path_params: Path, + _query_params: Query, + _body: StreamingBody, + ) -> Result { + method_unimplemented() + } + + async fn host_os_write_status_get( + _rqctx: RequestContext, + _path_params: Path, + ) -> Result, HttpError> { + method_unimplemented() + } + + async fn host_os_write_status_delete( + _rqctx: RequestContext, + _path_params: Path, + ) -> Result { + method_unimplemented() + } + + async fn sled_identifiers( + _rqctx: RequestContext, + ) -> Result, HttpError> { + method_unimplemented() + } + + async fn bootstore_status( + _rqctx: RequestContext, + ) -> Result, HttpError> { + method_unimplemented() + } } -#[endpoint { - method = GET, - path = "/omicron-zones", -}] -async fn omicron_zones_get( - rqctx: RequestContext>, -) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.omicron_zones_list().await)) +fn method_unimplemented() -> Result { + Err(HttpError { + // Use a client error here (405 Method Not Allowed vs 501 Not + // Implemented) even though it isn't strictly accurate here, so tests + // get to see the error message. + status_code: http::StatusCode::METHOD_NOT_ALLOWED, + error_code: None, + external_message: "Method not implemented in sled-agent-sim" + .to_string(), + internal_message: "Method not implemented in sled-agent-sim" + .to_string(), + }) } +// --- Extra endpoints only available in the sim implementation --- + #[endpoint { - method = PUT, - path = "/omicron-zones", + method = POST, + path = "/instances/{instance_id}/poke", }] -async fn omicron_zones_put( +async fn instance_poke_post( rqctx: RequestContext>, - body: TypedBody, + path_params: Path, ) -> Result { let sa = rqctx.context(); - let body_args = body.into_inner(); - sa.omicron_zones_ensure(body_args).await; + let instance_id = path_params.into_inner().instance_id; + sa.instance_poke(instance_id, PokeMode::Drain).await; Ok(HttpResponseUpdatedNoContent()) } #[endpoint { - method = PUT, - path = "/sleds" + method = POST, + path = "/instances/{instance_id}/poke-single-step", }] -async fn sled_add( - _rqctx: RequestContext>, - _body: TypedBody, +async fn instance_poke_single_step_post( + rqctx: RequestContext>, + path_params: Path, ) -> Result { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + sa.instance_poke(instance_id, PokeMode::SingleStep).await; Ok(HttpResponseUpdatedNoContent()) } #[endpoint { - method = GET, - path = "/vpc-routes", + method = POST, + path = "/instances/{instance_id}/sim-migration-source", }] -async fn list_vpc_routes( +async fn instance_post_sim_migration_source( rqctx: RequestContext>, -) -> Result>, HttpError> { + path_params: Path, + body: TypedBody, +) -> Result { let sa = rqctx.context(); - Ok(HttpResponseOk(sa.list_vpc_routes().await)) + let instance_id = path_params.into_inner().instance_id; + sa.instance_simulate_migration_source(instance_id, body.into_inner()) + .await?; + Ok(HttpResponseUpdatedNoContent()) } #[endpoint { - method = PUT, - path = "/vpc-routes", + method = POST, + path = "/disks/{disk_id}/poke", }] -async fn set_vpc_routes( +async fn disk_poke_post( rqctx: RequestContext>, - body: TypedBody>, + path_params: Path, ) -> Result { let sa = rqctx.context(); - sa.set_vpc_routes(body.into_inner()).await; + let disk_id = path_params.into_inner().disk_id; + sa.disk_poke(disk_id).await; Ok(HttpResponseUpdatedNoContent()) } diff --git a/sled-agent/src/sim/instance.rs b/sled-agent/src/sim/instance.rs index 8ee0130262..33bc1c40c1 100644 --- a/sled-agent/src/sim/instance.rs +++ b/sled-agent/src/sim/instance.rs @@ -8,7 +8,6 @@ use super::simulatable::Simulatable; use crate::common::instance::{ObservedPropolisState, PublishedVmmState}; use crate::nexus::NexusClient; -use crate::params::InstanceStateRequested; use async_trait::async_trait; use chrono::Utc; use nexus_client; @@ -21,6 +20,7 @@ use propolis_client::types::{ InstanceMigrationStatus as PropolisMigrationStatus, InstanceState as PropolisInstanceState, InstanceStateMonitorResponse, }; +use sled_agent_types::instance::InstanceStateRequested; use std::collections::VecDeque; use std::sync::Arc; use std::sync::Mutex; diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs index b4016b0404..d9fbadff05 100644 --- a/sled-agent/src/sim/server.rs +++ b/sled-agent/src/sim/server.rs @@ -524,7 +524,7 @@ pub async fn run_standalone_server( SledUuid::from_untyped_uuid(config.id), SledConfig { disks: server.sled_agent.omicron_physical_disks_list().await?, - datasets: server.sled_agent.datasets_list().await?, + datasets: server.sled_agent.datasets_config_list().await?, zones, }, ); diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 2c1b810ab2..bc1de2e6b5 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -11,18 +11,12 @@ use super::instance::{self, SimInstance}; use super::storage::CrucibleData; use super::storage::Storage; use crate::nexus::NexusClient; -use crate::params::{ - DiskStateRequested, InstanceExternalIpBody, InstanceHardware, - InstanceMetadata, InstancePutStateResponse, InstanceStateRequested, - InstanceUnregisterResponse, -}; use crate::sim::simulatable::Simulatable; use crate::updates::UpdateManager; use anyhow::bail; use anyhow::Context; use dropshot::{HttpError, HttpServer}; use futures::lock::Mutex; -use illumos_utils::opte::params::VirtualNetworkInterfaceHost; use nexus_sled_agent_shared::inventory::{ Inventory, InventoryDisk, InventoryZpool, OmicronZonesConfig, SledRole, }; @@ -38,9 +32,11 @@ use omicron_common::api::internal::nexus::{ use omicron_common::api::internal::shared::{ RackNetworkConfig, ResolvedVpcRoute, ResolvedVpcRouteSet, ResolvedVpcRouteState, RouterId, RouterKind, RouterVersion, + VirtualNetworkInterfaceHost, }; use omicron_common::disk::{ - DatasetsConfig, DiskIdentity, DiskVariant, OmicronPhysicalDisksConfig, + DatasetsConfig, DatasetsManagementResult, DiskIdentity, DiskVariant, + DisksManagementResult, OmicronPhysicalDisksConfig, }; use omicron_uuid_kinds::{GenericUuid, InstanceUuid, PropolisUuid, ZpoolUuid}; use oxnet::Ipv6Net; @@ -48,11 +44,15 @@ use propolis_client::{ types::VolumeConstructionRequest, Client as PropolisClient, }; use propolis_mock_server::Context as PropolisContext; +use sled_agent_types::disk::DiskStateRequested; use sled_agent_types::early_networking::{ EarlyNetworkConfig, EarlyNetworkConfigBody, }; -use sled_storage::resources::DatasetsManagementResult; -use sled_storage::resources::DisksManagementResult; +use sled_agent_types::instance::{ + InstanceExternalIpBody, InstanceHardware, InstanceMetadata, + InstancePutStateResponse, InstanceStateRequested, + InstanceUnregisterResponse, +}; use slog::Logger; use std::collections::{HashMap, HashSet, VecDeque}; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr}; @@ -914,8 +914,10 @@ impl SledAgent { self.storage.lock().await.datasets_ensure(config).await } - pub async fn datasets_list(&self) -> Result { - self.storage.lock().await.datasets_list().await + pub async fn datasets_config_list( + &self, + ) -> Result { + self.storage.lock().await.datasets_config_list().await } pub async fn omicron_physical_disks_list( diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index db5ab92d55..fb31a4c16c 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -18,19 +18,19 @@ use crucible_agent_client::types::{ use dropshot::HandlerTaskMode; use dropshot::HttpError; use futures::lock::Mutex; +use omicron_common::disk::DatasetManagementStatus; use omicron_common::disk::DatasetsConfig; +use omicron_common::disk::DatasetsManagementResult; use omicron_common::disk::DiskIdentity; +use omicron_common::disk::DiskManagementStatus; use omicron_common::disk::DiskVariant; +use omicron_common::disk::DisksManagementResult; use omicron_common::disk::OmicronPhysicalDisksConfig; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::InstanceUuid; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::ZpoolUuid; use propolis_client::types::VolumeConstructionRequest; -use sled_storage::resources::DatasetManagementStatus; -use sled_storage::resources::DatasetsManagementResult; -use sled_storage::resources::DiskManagementStatus; -use sled_storage::resources::DisksManagementResult; use slog::Logger; use std::collections::HashMap; use std::collections::HashSet; @@ -586,7 +586,9 @@ impl Storage { &self.physical_disks } - pub async fn datasets_list(&self) -> Result { + pub async fn datasets_config_list( + &self, + ) -> Result { let Some(config) = self.dataset_config.as_ref() else { return Err(HttpError::for_not_found( None, diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 3eb3805784..9732f9099c 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -7,21 +7,14 @@ use crate::boot_disk_os_writer::BootDiskOsWriter; use crate::bootstrap::config::BOOTSTRAP_AGENT_RACK_INIT_PORT; use crate::bootstrap::early_networking::EarlyNetworkSetupError; -use crate::bootstrap::params::{BaseboardId, StartSledAgentRequest}; use crate::config::Config; use crate::instance_manager::InstanceManager; use crate::long_running_tasks::LongRunningTaskHandles; use crate::metrics::MetricsManager; use crate::nexus::{ - NexusClientWithResolver, NexusNotifierHandle, NexusNotifierInput, - NexusNotifierTask, -}; -use crate::params::{ - DiskStateRequested, InstanceExternalIpBody, InstanceHardware, - InstanceMetadata, InstancePutStateResponse, InstanceStateRequested, - InstanceUnregisterResponse, OmicronZoneTypeExt, TimeSync, VpcFirewallRule, - ZoneBundleMetadata, Zpool, + NexusClient, NexusNotifierHandle, NexusNotifierInput, NexusNotifierTask, }; +use crate::params::OmicronZoneTypeExt; use crate::probe_manager::ProbeManager; use crate::services::{self, ServiceManager}; use crate::storage_monitor::StorageMonitorHandle; @@ -35,7 +28,6 @@ use derive_more::From; use dropshot::HttpError; use futures::stream::FuturesUnordered; use futures::StreamExt; -use illumos_utils::opte::params::VirtualNetworkInterfaceHost; use illumos_utils::opte::PortManager; use illumos_utils::zone::PROPOLIS_ZONE_PREFIX; use illumos_utils::zone::ZONE_PREFIX; @@ -51,8 +43,9 @@ use omicron_common::api::internal::nexus::{ SledInstanceState, VmmRuntimeState, }; use omicron_common::api::internal::shared::{ - HostPortConfig, RackNetworkConfig, ResolvedVpcRouteSet, - ResolvedVpcRouteState, SledIdentifiers, + HostPortConfig, RackNetworkConfig, ResolvedVpcFirewallRule, + ResolvedVpcRouteSet, ResolvedVpcRouteState, SledIdentifiers, + VirtualNetworkInterfaceHost, }; use omicron_common::api::{ internal::nexus::DiskRuntimeState, internal::nexus::InstanceRuntimeState, @@ -61,17 +54,31 @@ use omicron_common::api::{ use omicron_common::backoff::{ retry_notify, retry_policy_internal_service_aggressive, BackoffError, }; -use omicron_common::disk::{DatasetsConfig, OmicronPhysicalDisksConfig}; +use omicron_common::disk::{ + DatasetsConfig, DatasetsManagementResult, DisksManagementResult, + OmicronPhysicalDisksConfig, +}; use omicron_ddm_admin_client::Client as DdmAdminClient; use omicron_uuid_kinds::{InstanceUuid, PropolisUuid}; +use sled_agent_api::Zpool; +use sled_agent_types::disk::DiskStateRequested; use sled_agent_types::early_networking::EarlyNetworkConfig; +use sled_agent_types::instance::{ + InstanceExternalIpBody, InstanceHardware, InstanceMetadata, + InstancePutStateResponse, InstanceStateRequested, + InstanceUnregisterResponse, +}; +use sled_agent_types::sled::{BaseboardId, StartSledAgentRequest}; +use sled_agent_types::time_sync::TimeSync; +use sled_agent_types::zone_bundle::{ + BundleUtilization, CleanupContext, CleanupCount, CleanupPeriod, + PriorityOrder, StorageLimit, ZoneBundleMetadata, +}; use sled_hardware::{underlay, HardwareManager}; use sled_hardware_types::underlay::BootstrapInterface; use sled_hardware_types::Baseboard; use sled_storage::dataset::{CRYPT_DATASET, ZONE_DATASET}; use sled_storage::manager::StorageHandle; -use sled_storage::resources::DatasetsManagementResult; -use sled_storage::resources::DisksManagementResult; use slog::Logger; use std::collections::BTreeMap; use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; @@ -237,8 +244,9 @@ impl From for dropshot::HttpError { BundleError::NoSuchZone { .. } => { HttpError::for_not_found(None, inner.to_string()) } - BundleError::InvalidStorageLimit - | BundleError::InvalidCleanupPeriod => { + BundleError::StorageLimitCreate(_) + | BundleError::CleanupPeriodCreate(_) + | BundleError::PriorityOrderCreate(_) => { HttpError::for_bad_request(None, inner.to_string()) } BundleError::InstanceTerminating => { @@ -323,7 +331,7 @@ struct SledAgentInner { services: ServiceManager, // Connection to Nexus. - nexus_client: NexusClientWithResolver, + nexus_client: NexusClient, // A mechanism for notifiying nexus about sled-agent updates nexus_notifier: NexusNotifierHandle, @@ -368,7 +376,7 @@ impl SledAgent { pub async fn new( config: &Config, log: Logger, - nexus_client: NexusClientWithResolver, + nexus_client: NexusClient, request: StartSledAgentRequest, services: ServiceManager, long_running_task_handles: LongRunningTaskHandles, @@ -555,7 +563,7 @@ impl SledAgent { let nexus_notifier_input = NexusNotifierInput { sled_id: request.body.id, sled_address: get_sled_address(request.body.subnet), - nexus_client: nexus_client.client().clone(), + nexus_client: nexus_client.clone(), hardware: long_running_task_handles.hardware_manager.clone(), vmm_reservoir_manager: vmm_reservoir_manager.clone(), }; @@ -691,7 +699,6 @@ impl SledAgent { self.inner .nexus_client - .client() .sled_firewall_rules_request(&sled_id) .await .map_err(|err| Error::FirewallRequest(err))?; @@ -777,18 +784,16 @@ impl SledAgent { } /// Fetch the zone bundle cleanup context. - pub async fn zone_bundle_cleanup_context( - &self, - ) -> zone_bundle::CleanupContext { + pub async fn zone_bundle_cleanup_context(&self) -> CleanupContext { self.inner.zone_bundler.cleanup_context().await } /// Update the zone bundle cleanup context. pub async fn update_zone_bundle_cleanup_context( &self, - period: Option, - storage_limit: Option, - priority: Option, + period: Option, + storage_limit: Option, + priority: Option, ) -> Result<(), Error> { self.inner .zone_bundler @@ -800,15 +805,14 @@ impl SledAgent { /// Fetch the current utilization of the relevant datasets for zone bundles. pub async fn zone_bundle_utilization( &self, - ) -> Result, Error> - { + ) -> Result, Error> { self.inner.zone_bundler.utilization().await.map_err(Error::from) } /// Trigger an explicit request to cleanup old zone bundles. pub async fn zone_bundle_cleanup( &self, - ) -> Result, Error> { + ) -> Result, Error> { self.inner.zone_bundler.cleanup().await.map_err(Error::from) } @@ -1100,7 +1104,7 @@ impl SledAgent { ) -> Result<(), Error> { self.inner .updates - .download_artifact(artifact, &self.inner.nexus_client.client()) + .download_artifact(artifact, &self.inner.nexus_client) .await?; Ok(()) } @@ -1126,7 +1130,7 @@ impl SledAgent { pub async fn firewall_rules_ensure( &self, vpc_vni: Vni, - rules: &[VpcFirewallRule], + rules: &[ResolvedVpcFirewallRule], ) -> Result<(), Error> { self.inner .port_manager diff --git a/sled-agent/src/updates.rs b/sled-agent/src/updates.rs index 9193a855b0..a928abe9b3 100644 --- a/sled-agent/src/updates.rs +++ b/sled-agent/src/updates.rs @@ -252,8 +252,8 @@ impl UpdateManager { mod test { use super::*; use crate::fakes::nexus::FakeNexusServer; + use crate::nexus::NexusClient; use flate2::write::GzEncoder; - use nexus_client::Client as NexusClient; use omicron_common::api::external::{Error, SemverVersion}; use omicron_common::api::internal::nexus::UpdateArtifactId; use omicron_test_utils::dev::test_setup_log; diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs index 7436266c93..4ba7a39145 100644 --- a/sled-agent/src/zone_bundle.rs +++ b/sled-agent/src/zone_bundle.rs @@ -11,8 +11,6 @@ use anyhow::Context; use camino::FromPathBufError; use camino::Utf8Path; use camino::Utf8PathBuf; -use chrono::DateTime; -use chrono::Utc; use flate2::bufread::GzDecoder; use illumos_utils::running_zone::is_oxide_smf_log_file; use illumos_utils::running_zone::RunningZone; @@ -29,18 +27,12 @@ use illumos_utils::zfs::Snapshot; use illumos_utils::zfs::Zfs; use illumos_utils::zfs::ZFS; use illumos_utils::zone::AdmError; -use schemars::JsonSchema; -use serde::Deserialize; -use serde::Serialize; +use sled_agent_types::zone_bundle::*; use sled_storage::dataset::U2_DEBUG_DATASET; use sled_storage::manager::StorageHandle; use slog::Logger; -use std::cmp::Ord; -use std::cmp::Ordering; -use std::cmp::PartialOrd; use std::collections::BTreeMap; use std::collections::BTreeSet; -use std::collections::HashSet; use std::io::Cursor; use std::sync::Arc; use std::time::Duration; @@ -55,104 +47,6 @@ use tokio::time::sleep; use tokio::time::Instant; use uuid::Uuid; -/// An identifier for a zone bundle. -#[derive( - Clone, - Debug, - Deserialize, - Eq, - Hash, - JsonSchema, - Ord, - PartialEq, - PartialOrd, - Serialize, -)] -pub struct ZoneBundleId { - /// The name of the zone this bundle is derived from. - pub zone_name: String, - /// The ID for this bundle itself. - pub bundle_id: Uuid, -} - -/// The reason or cause for a zone bundle, i.e., why it was created. -// -// NOTE: The ordering of the enum variants is important, and should not be -// changed without careful consideration. -// -// The ordering is used when deciding which bundles to remove automatically. In -// addition to time, the cause is used to sort bundles, so changing the variant -// order will change that priority. -#[derive( - Clone, - Copy, - Debug, - Default, - Deserialize, - Eq, - Hash, - JsonSchema, - Ord, - PartialEq, - PartialOrd, - Serialize, -)] -#[serde(rename_all = "snake_case")] -#[non_exhaustive] -pub enum ZoneBundleCause { - /// Some other, unspecified reason. - #[default] - Other, - /// A zone bundle taken when a sled agent finds a zone that it does not - /// expect to be running. - UnexpectedZone, - /// An instance zone was terminated. - TerminatedInstance, - /// Generated in response to an explicit request to the sled agent. - ExplicitRequest, -} - -/// Metadata about a zone bundle. -#[derive( - Clone, - Debug, - Deserialize, - Eq, - Hash, - JsonSchema, - Ord, - PartialEq, - PartialOrd, - Serialize, -)] -pub struct ZoneBundleMetadata { - /// Identifier for this zone bundle - pub id: ZoneBundleId, - /// The time at which this zone bundle was created. - pub time_created: DateTime, - /// A version number for this zone bundle. - pub version: u8, - /// The reason or cause a bundle was created. - pub cause: ZoneBundleCause, -} - -impl ZoneBundleMetadata { - const VERSION: u8 = 0; - - /// Create a new set of metadata for the provided zone. - pub(crate) fn new(zone_name: &str, cause: ZoneBundleCause) -> Self { - Self { - id: ZoneBundleId { - zone_name: zone_name.to_string(), - bundle_id: Uuid::new_v4(), - }, - time_created: Utc::now(), - version: Self::VERSION, - cause, - } - } -} - // The name of the snapshot created from the zone root filesystem. const ZONE_ROOT_SNAPSHOT_NAME: &'static str = "zone-root"; @@ -650,20 +544,14 @@ pub enum BundleError { #[error("Zone '{name}' cannot currently be bundled")] Unavailable { name: String }, - #[error("Storage limit must be expressed as a percentage in (0, 100]")] - InvalidStorageLimit, + #[error(transparent)] + StorageLimitCreate(#[from] StorageLimitCreateError), - #[error( - "Cleanup period must be between {min:?} and {max:?}, inclusive", - min = CleanupPeriod::MIN, - max = CleanupPeriod::MAX, - )] - InvalidCleanupPeriod, + #[error(transparent)] + CleanupPeriodCreate(#[from] CleanupPeriodCreateError), - #[error( - "Invalid priority ordering. Each element must appear exactly once." - )] - InvalidPriorityOrder, + #[error(transparent)] + PriorityOrderCreate(#[from] PriorityOrderCreateError), #[error("Cleanup failed")] Cleanup(#[source] anyhow::Error), @@ -1484,29 +1372,6 @@ async fn get_zone_bundle_paths( Ok(out) } -/// The portion of a debug dataset used for zone bundles. -#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] -pub struct BundleUtilization { - /// The total dataset quota, in bytes. - pub dataset_quota: u64, - /// The total number of bytes available for zone bundles. - /// - /// This is `dataset_quota` multiplied by the context's storage limit. - pub bytes_available: u64, - /// Total bundle usage, in bytes. - pub bytes_used: u64, -} - -#[derive(Clone, Debug, PartialEq)] -struct ZoneBundleInfo { - // The raw metadata for the bundle - metadata: ZoneBundleMetadata, - // The full path to the bundle - path: Utf8PathBuf, - // The number of bytes consumed on disk by the bundle - bytes: u64, -} - // Enumerate all zone bundles under the provided directory. async fn enumerate_zone_bundles( log: &Logger, @@ -1577,15 +1442,6 @@ async fn enumerate_zone_bundles( Ok(out) } -/// The count of bundles / bytes removed during a cleanup operation. -#[derive(Clone, Copy, Debug, Default, Deserialize, JsonSchema, Serialize)] -pub struct CleanupCount { - /// The number of bundles removed. - bundles: u64, - /// The number of bytes removed. - bytes: u64, -} - // Run a cleanup, removing old bundles according to the strategy. // // Return the number of bundles removed and the new usage. @@ -1687,19 +1543,6 @@ async fn compute_bundle_utilization( Ok(out) } -/// Context provided for the zone bundle cleanup task. -#[derive( - Clone, Copy, Debug, Default, Deserialize, JsonSchema, PartialEq, Serialize, -)] -pub struct CleanupContext { - /// The period on which automatic checks and cleanup is performed. - pub period: CleanupPeriod, - /// The limit on the dataset quota available for zone bundles. - pub storage_limit: StorageLimit, - /// The priority ordering for keeping old bundles. - pub priority: PriorityOrder, -} - // Return the number of bytes occupied by the provided directory. // // This returns an error if: @@ -1814,258 +1657,10 @@ async fn zfs_quota(path: &Utf8PathBuf) -> Result { } } -/// The limit on space allowed for zone bundles, as a percentage of the overall -/// dataset's quota. -#[derive( - Clone, - Copy, - Debug, - Deserialize, - JsonSchema, - PartialEq, - PartialOrd, - Serialize, -)] -pub struct StorageLimit(u8); - -impl std::fmt::Display for StorageLimit { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{}%", self.as_u8()) - } -} - -impl Default for StorageLimit { - fn default() -> Self { - StorageLimit(25) - } -} - -impl StorageLimit { - /// Minimum percentage of dataset quota supported. - pub const MIN: Self = Self(0); - - /// Maximum percentage of dataset quota supported. - pub const MAX: Self = Self(50); - - /// Construct a new limit allowed for zone bundles. - /// - /// This should be expressed as a percentage, in the range (Self::MIN, - /// Self::MAX]. - pub const fn new(percentage: u8) -> Result { - if percentage > Self::MIN.0 && percentage <= Self::MAX.0 { - Ok(Self(percentage)) - } else { - Err(BundleError::InvalidStorageLimit) - } - } - - /// Return the contained quota percentage. - pub const fn as_u8(&self) -> u8 { - self.0 - } - - // Compute the number of bytes available from a dataset quota, in bytes. - const fn bytes_available(&self, dataset_quota: u64) -> u64 { - (dataset_quota * self.as_u8() as u64) / 100 - } -} - -/// A dimension along with bundles can be sorted, to determine priority. -#[derive( - Clone, - Copy, - Debug, - Deserialize, - Eq, - Hash, - JsonSchema, - Serialize, - Ord, - PartialEq, - PartialOrd, -)] -#[serde(rename_all = "snake_case")] -pub enum PriorityDimension { - /// Sorting by time, with older bundles with lower priority. - Time, - /// Sorting by the cause for creating the bundle. - Cause, - // TODO-completeness: Support zone or zone type (e.g., service vs instance)? -} - -/// The priority order for bundles during cleanup. -/// -/// Bundles are sorted along the dimensions in [`PriorityDimension`], with each -/// dimension appearing exactly once. During cleanup, lesser-priority bundles -/// are pruned first, to maintain the dataset quota. Note that bundles are -/// sorted by each dimension in the order in which they appear, with each -/// dimension having higher priority than the next. -#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] -pub struct PriorityOrder([PriorityDimension; PriorityOrder::EXPECTED_SIZE]); - -impl std::ops::Deref for PriorityOrder { - type Target = [PriorityDimension; PriorityOrder::EXPECTED_SIZE]; - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl Default for PriorityOrder { - fn default() -> Self { - Self::DEFAULT - } -} - -impl PriorityOrder { - // NOTE: Must match the number of variants in `PriorityDimension`. - const EXPECTED_SIZE: usize = 2; - const DEFAULT: Self = - Self([PriorityDimension::Cause, PriorityDimension::Time]); - - /// Construct a new priority order. - /// - /// This requires that each dimension appear exactly once. - pub fn new(dims: &[PriorityDimension]) -> Result { - if dims.len() != Self::EXPECTED_SIZE { - return Err(BundleError::InvalidPriorityOrder); - } - let mut seen = HashSet::new(); - for dim in dims.iter() { - if !seen.insert(dim) { - return Err(BundleError::InvalidPriorityOrder); - } - } - Ok(Self(dims.try_into().unwrap())) - } - - // Order zone bundle info according to the contained priority. - // - // We sort the info by each dimension, in the order in which it appears. - // That means earlier dimensions have higher priority than later ones. - fn compare_bundles( - &self, - lhs: &ZoneBundleInfo, - rhs: &ZoneBundleInfo, - ) -> Ordering { - for dim in self.0.iter() { - let ord = match dim { - PriorityDimension::Cause => { - lhs.metadata.cause.cmp(&rhs.metadata.cause) - } - PriorityDimension::Time => { - lhs.metadata.time_created.cmp(&rhs.metadata.time_created) - } - }; - if matches!(ord, Ordering::Equal) { - continue; - } - return ord; - } - Ordering::Equal - } -} - -/// A period on which bundles are automatically cleaned up. -#[derive( - Clone, Copy, Deserialize, JsonSchema, PartialEq, PartialOrd, Serialize, -)] -pub struct CleanupPeriod(Duration); - -impl Default for CleanupPeriod { - fn default() -> Self { - Self(Duration::from_secs(600)) - } -} - -impl CleanupPeriod { - /// The minimum supported cleanup period. - pub const MIN: Self = Self(Duration::from_secs(60)); - - /// The maximum supported cleanup period. - pub const MAX: Self = Self(Duration::from_secs(60 * 60 * 24)); - - /// Construct a new cleanup period, checking that it's valid. - pub fn new(duration: Duration) -> Result { - if duration >= Self::MIN.as_duration() - && duration <= Self::MAX.as_duration() - { - Ok(Self(duration)) - } else { - Err(BundleError::InvalidCleanupPeriod) - } - } - - /// Return the period as a duration. - pub const fn as_duration(&self) -> Duration { - self.0 - } -} - -impl TryFrom for CleanupPeriod { - type Error = BundleError; - - fn try_from(duration: Duration) -> Result { - Self::new(duration) - } -} - -impl std::fmt::Debug for CleanupPeriod { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - self.0.fmt(f) - } -} - #[cfg(test)] mod tests { use super::disk_usage; - use super::PriorityDimension; - use super::PriorityOrder; - use super::StorageLimit; use super::Utf8PathBuf; - use super::ZoneBundleCause; - use super::ZoneBundleId; - use super::ZoneBundleInfo; - use super::ZoneBundleMetadata; - use chrono::TimeZone; - use chrono::Utc; - - #[test] - fn test_sort_zone_bundle_cause() { - use ZoneBundleCause::*; - let mut original = - [ExplicitRequest, Other, TerminatedInstance, UnexpectedZone]; - let expected = - [Other, UnexpectedZone, TerminatedInstance, ExplicitRequest]; - original.sort(); - assert_eq!(original, expected); - } - - #[test] - fn test_priority_dimension() { - assert!(PriorityOrder::new(&[]).is_err()); - assert!(PriorityOrder::new(&[PriorityDimension::Cause]).is_err()); - assert!(PriorityOrder::new(&[ - PriorityDimension::Cause, - PriorityDimension::Cause - ]) - .is_err()); - assert!(PriorityOrder::new(&[ - PriorityDimension::Cause, - PriorityDimension::Cause, - PriorityDimension::Time - ]) - .is_err()); - - assert!(PriorityOrder::new(&[ - PriorityDimension::Cause, - PriorityDimension::Time - ]) - .is_ok()); - assert_eq!( - PriorityOrder::new(&PriorityOrder::default().0).unwrap(), - PriorityOrder::default() - ); - } #[tokio::test] async fn test_disk_usage() { @@ -2081,95 +1676,6 @@ mod tests { let path = Utf8PathBuf::from("/some/nonexistent/path"); assert!(disk_usage(&path).await.is_err()); } - - #[test] - fn test_storage_limit_bytes_available() { - let pct = StorageLimit(1); - assert_eq!(pct.bytes_available(100), 1); - assert_eq!(pct.bytes_available(1000), 10); - - let pct = StorageLimit(100); - assert_eq!(pct.bytes_available(100), 100); - assert_eq!(pct.bytes_available(1000), 1000); - - let pct = StorageLimit(100); - assert_eq!(pct.bytes_available(99), 99); - - let pct = StorageLimit(99); - assert_eq!(pct.bytes_available(1), 0); - - // Test non-power of 10. - let pct = StorageLimit(25); - assert_eq!(pct.bytes_available(32768), 8192); - } - - #[test] - fn test_compare_bundles() { - use PriorityDimension::*; - let time_first = PriorityOrder([Time, Cause]); - let cause_first = PriorityOrder([Cause, Time]); - - fn make_info( - year: i32, - month: u32, - day: u32, - cause: ZoneBundleCause, - ) -> ZoneBundleInfo { - ZoneBundleInfo { - metadata: ZoneBundleMetadata { - id: ZoneBundleId { - zone_name: String::from("oxz_whatever"), - bundle_id: uuid::Uuid::new_v4(), - }, - time_created: Utc - .with_ymd_and_hms(year, month, day, 0, 0, 0) - .single() - .unwrap(), - cause, - version: 0, - }, - path: Utf8PathBuf::from("/some/path"), - bytes: 0, - } - } - - let info = [ - make_info(2020, 1, 2, ZoneBundleCause::TerminatedInstance), - make_info(2020, 1, 2, ZoneBundleCause::ExplicitRequest), - make_info(2020, 1, 1, ZoneBundleCause::TerminatedInstance), - make_info(2020, 1, 1, ZoneBundleCause::ExplicitRequest), - ]; - - let mut sorted = info.clone(); - sorted.sort_by(|lhs, rhs| time_first.compare_bundles(lhs, rhs)); - // Low -> high priority - // [old/terminated, old/explicit, new/terminated, new/explicit] - let expected = [ - info[2].clone(), - info[3].clone(), - info[0].clone(), - info[1].clone(), - ]; - assert_eq!( - sorted, expected, - "sorting zone bundles by time-then-cause failed" - ); - - let mut sorted = info.clone(); - sorted.sort_by(|lhs, rhs| cause_first.compare_bundles(lhs, rhs)); - // Low -> high priority - // [old/terminated, new/terminated, old/explicit, new/explicit] - let expected = [ - info[2].clone(), - info[0].clone(), - info[3].clone(), - info[1].clone(), - ]; - assert_eq!( - sorted, expected, - "sorting zone bundles by cause-then-time failed" - ); - } } #[cfg(all(target_os = "illumos", test))] @@ -2347,7 +1853,10 @@ mod illumos_tests { let new_context = CleanupContext { period: CleanupPeriod::new(ctx.context.period.as_duration() / 2) .unwrap(), - storage_limit: StorageLimit(ctx.context.storage_limit.as_u8() / 2), + storage_limit: StorageLimit::new( + ctx.context.storage_limit.as_u8() / 2, + ) + .unwrap(), priority: PriorityOrder::new( &ctx.context.priority.iter().copied().rev().collect::>(), ) @@ -2525,7 +2034,11 @@ mod illumos_tests { // First, reduce the storage limit, so that we only need to add a few // bundles. ctx.bundler - .update_cleanup_context(None, Some(StorageLimit(2)), None) + .update_cleanup_context( + None, + Some(StorageLimit::new(2).unwrap()), + None, + ) .await .context("failed to update cleanup context")?; diff --git a/sled-agent/tests/integration_tests/commands.rs b/sled-agent/tests/integration_tests/commands.rs index 26c82e488e..8a5b355770 100644 --- a/sled-agent/tests/integration_tests/commands.rs +++ b/sled-agent/tests/integration_tests/commands.rs @@ -13,9 +13,7 @@ use expectorate::assert_contents; use omicron_test_utils::dev::test_cmds::assert_exit_code; use omicron_test_utils::dev::test_cmds::path_to_executable; use omicron_test_utils::dev::test_cmds::run_command; -use omicron_test_utils::dev::test_cmds::EXIT_SUCCESS; use omicron_test_utils::dev::test_cmds::EXIT_USAGE; -use openapiv3::OpenAPI; use subprocess::Exec; /// name of the "sled-agent-sim" executable @@ -56,26 +54,3 @@ fn test_sled_agent_no_args() { assert_contents("tests/output/cmd-sled-agent-noargs-stdout", &stdout_text); assert_contents("tests/output/cmd-sled-agent-noargs-stderr", &stderr_text); } - -#[test] -fn test_sled_agent_openapi_sled() { - let exec = Exec::cmd(path_to_sled_agent()).arg("openapi").arg("sled"); - let (exit_status, stdout_text, stderr_text) = run_command(exec); - assert_exit_code(exit_status, EXIT_SUCCESS, &stderr_text); - assert_contents( - "tests/output/cmd-sled-agent-openapi-sled-stderr", - &stderr_text, - ); - - let spec: OpenAPI = serde_json::from_str(&stdout_text) - .expect("stdout was not valid OpenAPI"); - - // Check for lint errors. - let errors = openapi_lint::validate(&spec); - assert!(errors.is_empty(), "{}", errors.join("\n\n")); - - // Confirm that the output hasn't changed. It's expected that we'll change - // this file as the API evolves, but pay attention to the diffs to ensure - // that the changes match your expectations. - assert_contents("../openapi/sled-agent.json", &stdout_text); -} diff --git a/sled-agent/tests/integration_tests/early_network.rs b/sled-agent/tests/integration_tests/early_network.rs index 6fa91e0e4a..9b69975054 100644 --- a/sled-agent/tests/integration_tests/early_network.rs +++ b/sled-agent/tests/integration_tests/early_network.rs @@ -126,6 +126,7 @@ fn current_config_example() -> (&'static str, EarlyNetworkConfig) { destination: "10.1.9.32/16".parse().unwrap(), nexthop: "10.1.9.32".parse().unwrap(), vlan_id: None, + local_pref: None, }], addresses: vec!["2001:db8::/96".parse().unwrap()], switch: SwitchLocation::Switch0, @@ -153,6 +154,7 @@ fn current_config_example() -> (&'static str, EarlyNetworkConfig) { vlan_id: None, }], autoneg: true, + lldp: None, }], bgp: vec![BgpConfig { asn: 20000, diff --git a/sled-agent/tests/output/cmd-sled-agent-noargs-stderr b/sled-agent/tests/output/cmd-sled-agent-noargs-stderr index ee397c0ef7..409d1ec0d8 100644 --- a/sled-agent/tests/output/cmd-sled-agent-noargs-stderr +++ b/sled-agent/tests/output/cmd-sled-agent-noargs-stderr @@ -3,9 +3,8 @@ See README.adoc for more information Usage: sled-agent Commands: - openapi Generates the OpenAPI specification - run Runs the Sled Agent server - help Print this message or the help of the given subcommand(s) + run Runs the Sled Agent server + help Print this message or the help of the given subcommand(s) Options: -h, --help Print help diff --git a/sled-agent/tests/output/cmd-sled-agent-openapi-bootstrap-stderr b/sled-agent/tests/output/cmd-sled-agent-openapi-bootstrap-stderr deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/sled-agent/tests/output/cmd-sled-agent-openapi-sled-stderr b/sled-agent/tests/output/cmd-sled-agent-openapi-sled-stderr deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/sled-agent/tests/output/new-rss-sled-plans/madrid-rss-sled-plan.json b/sled-agent/tests/output/new-rss-sled-plans/madrid-rss-sled-plan.json index efd1a3c167..2da814042d 100644 --- a/sled-agent/tests/output/new-rss-sled-plans/madrid-rss-sled-plan.json +++ b/sled-agent/tests/output/new-rss-sled-plans/madrid-rss-sled-plan.json @@ -128,7 +128,8 @@ { "destination": "0.0.0.0/0", "nexthop": "172.20.15.33", - "vlan_id": null + "vlan_id": null, + "local_pref": null } ], "addresses": [ @@ -142,14 +143,16 @@ "uplink_port_speed": "speed40_g", "uplink_port_fec": "none", "bgp_peers": [], - "autoneg": false + "autoneg": false, + "lldp": null }, { "routes": [ { "destination": "0.0.0.0/0", "nexthop": "172.20.15.33", - "vlan_id": null + "vlan_id": null, + "local_pref": null } ], "addresses": [ @@ -163,7 +166,8 @@ "uplink_port_speed": "speed40_g", "uplink_port_fec": "none", "bgp_peers": [], - "autoneg": false + "autoneg": false, + "lldp": null } ], "bgp": [], diff --git a/sled-agent/types/Cargo.toml b/sled-agent/types/Cargo.toml index a9ed8fcb22..e01d40db28 100644 --- a/sled-agent/types/Cargo.toml +++ b/sled-agent/types/Cargo.toml @@ -9,8 +9,10 @@ workspace = true [dependencies] anyhow.workspace = true +async-trait.workspace = true bootstore.workspace = true camino.workspace = true +chrono.workspace = true nexus-sled-agent-shared.workspace = true # Note: we're trying to avoid a dependency from sled-agent-types to nexus-types # because the correct direction of dependency is unclear. If there are types @@ -19,11 +21,13 @@ omicron-common.workspace = true omicron-uuid-kinds.workspace = true omicron-workspace-hack.workspace = true oxnet.workspace = true +propolis-client.workspace = true schemars.workspace = true serde.workspace = true +serde_human_bytes.workspace = true serde_json.workspace = true +sha3.workspace = true sled-hardware-types.workspace = true -sled-storage.workspace = true slog.workspace = true thiserror.workspace = true toml.workspace = true diff --git a/sled-agent/types/src/boot_disk.rs b/sled-agent/types/src/boot_disk.rs new file mode 100644 index 0000000000..30129d6c7e --- /dev/null +++ b/sled-agent/types/src/boot_disk.rs @@ -0,0 +1,62 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Common types related to boot disks. + +use omicron_common::disk::M2Slot; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] +pub struct BootDiskPathParams { + pub boot_disk: M2Slot, +} + +#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] +pub struct BootDiskUpdatePathParams { + pub boot_disk: M2Slot, + pub update_id: Uuid, +} + +#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] +pub struct BootDiskWriteStartQueryParams { + pub update_id: Uuid, + // TODO do we already have sha2-256 hashes of the OS images, and if so + // should we use that instead? Another option is to use the external API + // `Digest` type, although it predates `serde_human_bytes` so just stores + // the hash as a `String`. + #[serde(with = "serde_human_bytes::hex_array")] + #[schemars(schema_with = "omicron_common::hex_schema::<32>")] + pub sha3_256_digest: [u8; 32], +} + +/// Current progress of an OS image being written to disk. +#[derive( + Debug, Clone, Copy, PartialEq, Eq, Deserialize, JsonSchema, Serialize, +)] +#[serde(tag = "state", rename_all = "snake_case")] +pub enum BootDiskOsWriteProgress { + /// The image is still being uploaded. + ReceivingUploadedImage { bytes_received: usize }, + /// The image is being written to disk. + WritingImageToDisk { bytes_written: usize }, + /// The image is being read back from disk for validation. + ValidatingWrittenImage { bytes_read: usize }, +} + +/// Status of an update to a boot disk OS. +#[derive(Debug, Clone, Deserialize, JsonSchema, Serialize)] +#[serde(tag = "status", rename_all = "snake_case")] +pub enum BootDiskOsWriteStatus { + /// No update has been started for this disk, or any previously-started + /// update has completed and had its status cleared. + NoUpdateStarted, + /// An update is currently running. + InProgress { update_id: Uuid, progress: BootDiskOsWriteProgress }, + /// The most recent update completed successfully. + Complete { update_id: Uuid }, + /// The most recent update failed. + Failed { update_id: Uuid, message: String }, +} diff --git a/sled-agent/types/src/bootstore.rs b/sled-agent/types/src/bootstore.rs new file mode 100644 index 0000000000..9c9e8257a4 --- /dev/null +++ b/sled-agent/types/src/bootstore.rs @@ -0,0 +1,51 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::{collections::BTreeSet, net::SocketAddrV6}; + +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use sled_hardware_types::Baseboard; + +#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] +pub struct BootstoreStatus { + pub fsm_ledger_generation: u64, + pub network_config_ledger_generation: Option, + pub fsm_state: String, + pub peers: BTreeSet, + pub established_connections: Vec, + pub accepted_connections: BTreeSet, + pub negotiating_connections: BTreeSet, +} + +impl From for BootstoreStatus { + fn from(value: bootstore::schemes::v0::Status) -> Self { + BootstoreStatus { + fsm_ledger_generation: value.fsm_ledger_generation, + network_config_ledger_generation: value + .network_config_ledger_generation, + fsm_state: value.fsm_state.to_string(), + peers: value.peers, + established_connections: value + .connections + .into_iter() + .map(EstablishedConnection::from) + .collect(), + accepted_connections: value.accepted_connections, + negotiating_connections: value.negotiating_connections, + } + } +} + +#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] +pub struct EstablishedConnection { + pub baseboard: Baseboard, + pub addr: SocketAddrV6, +} + +impl From<(Baseboard, SocketAddrV6)> for EstablishedConnection { + fn from(value: (Baseboard, SocketAddrV6)) -> Self { + EstablishedConnection { baseboard: value.0, addr: value.1 } + } +} diff --git a/sled-agent/types/src/disk.rs b/sled-agent/types/src/disk.rs new file mode 100644 index 0000000000..332f1a0c5c --- /dev/null +++ b/sled-agent/types/src/disk.rs @@ -0,0 +1,41 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use omicron_common::api::internal::nexus::DiskRuntimeState; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// Sent from to a sled agent to establish the runtime state of a Disk +#[derive(Serialize, Deserialize, JsonSchema)] +pub struct DiskEnsureBody { + /// Last runtime state of the Disk known to Nexus (used if the agent has + /// never seen this Disk before). + pub initial_runtime: DiskRuntimeState, + /// requested runtime state of the Disk + pub target: DiskStateRequested, +} + +/// Used to request a Disk state change +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize, JsonSchema)] +#[serde(rename_all = "lowercase", tag = "state", content = "instance")] +pub enum DiskStateRequested { + Detached, + Attached(Uuid), + Destroyed, + Faulted, +} + +impl DiskStateRequested { + /// Returns whether the requested state is attached to an Instance or not. + pub fn is_attached(&self) -> bool { + match self { + DiskStateRequested::Detached => false, + DiskStateRequested::Destroyed => false, + DiskStateRequested::Faulted => false, + + DiskStateRequested::Attached(_) => true, + } + } +} diff --git a/sled-agent/types/src/early_networking.rs b/sled-agent/types/src/early_networking.rs index dc93aa1300..755033dc23 100644 --- a/sled-agent/types/src/early_networking.rs +++ b/sled-agent/types/src/early_networking.rs @@ -299,6 +299,7 @@ pub mod back_compat { uplink_port_fec: v1.uplink_port_fec, bgp_peers: v1.bgp_peers.clone(), autoneg: v1.autoneg, + lldp: None, } } } @@ -322,6 +323,8 @@ pub mod back_compat { pub uplink_cidr: Ipv4Net, /// VLAN id to use for uplink pub uplink_vid: Option, + /// Local preference + pub local_pref: Option, } impl From for PortConfigV2 { @@ -331,6 +334,7 @@ pub mod back_compat { destination: "0.0.0.0/0".parse().unwrap(), nexthop: value.gateway_ip.into(), vlan_id: value.uplink_vid, + local_pref: value.local_pref, }], addresses: vec![UplinkAddressConfig { address: value.uplink_cidr.into(), @@ -342,6 +346,7 @@ pub mod back_compat { uplink_port_fec: value.uplink_port_fec, bgp_peers: vec![], autoneg: false, + lldp: None, } } } @@ -472,6 +477,7 @@ mod tests { uplink_port_fec: PortFec::None, uplink_cidr: "192.168.0.1/16".parse().unwrap(), uplink_vid: None, + local_pref: None, }], }), }; @@ -501,6 +507,7 @@ mod tests { destination: "0.0.0.0/0".parse().unwrap(), nexthop: uplink.gateway_ip.into(), vlan_id: None, + local_pref: None, }], addresses: vec![UplinkAddressConfig { address: uplink.uplink_cidr.into(), @@ -512,6 +519,7 @@ mod tests { uplink_port_fec: uplink.uplink_port_fec, autoneg: false, bgp_peers: vec![], + lldp: None, }], bgp: vec![], bfd: vec![], @@ -545,6 +553,7 @@ mod tests { destination: "0.0.0.0/0".parse().unwrap(), nexthop: "192.168.0.2".parse().unwrap(), vlan_id: None, + local_pref: None, }], addresses: vec!["192.168.0.1/16".parse().unwrap()], switch: SwitchLocation::Switch0, @@ -592,6 +601,7 @@ mod tests { uplink_port_fec: port.uplink_port_fec, autoneg: false, bgp_peers: vec![], + lldp: None, }], bgp: vec![], bfd: vec![], diff --git a/sled-agent/types/src/firewall_rules.rs b/sled-agent/types/src/firewall_rules.rs new file mode 100644 index 0000000000..d7cb22f976 --- /dev/null +++ b/sled-agent/types/src/firewall_rules.rs @@ -0,0 +1,16 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use omicron_common::api::{ + external, internal::shared::ResolvedVpcFirewallRule, +}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +/// Update firewall rules for a VPC +#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] +pub struct VpcFirewallRulesEnsureBody { + pub vni: external::Vni, + pub rules: Vec, +} diff --git a/sled-agent/types/src/instance.rs b/sled-agent/types/src/instance.rs new file mode 100644 index 0000000000..0753e273dc --- /dev/null +++ b/sled-agent/types/src/instance.rs @@ -0,0 +1,172 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Common instance-related types. + +use std::{ + fmt, + net::{IpAddr, SocketAddr}, +}; + +use omicron_common::api::internal::{ + nexus::{ + InstanceProperties, InstanceRuntimeState, SledInstanceState, + VmmRuntimeState, + }, + shared::{ + DhcpConfig, NetworkInterface, ResolvedVpcFirewallRule, SourceNatConfig, + }, +}; +use omicron_uuid_kinds::PropolisUuid; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// The body of a request to ensure that a instance and VMM are known to a sled +/// agent. +#[derive(Serialize, Deserialize, JsonSchema)] +pub struct InstanceEnsureBody { + /// A description of the instance's virtual hardware and the initial runtime + /// state this sled agent should store for this incarnation of the instance. + pub hardware: InstanceHardware, + + /// The instance runtime state for the instance being registered. + pub instance_runtime: InstanceRuntimeState, + + /// The initial VMM runtime state for the VMM being registered. + pub vmm_runtime: VmmRuntimeState, + + /// The ID of the VMM being registered. This may not be the active VMM ID in + /// the instance runtime state (e.g. if the new VMM is going to be a + /// migration target). + pub propolis_id: PropolisUuid, + + /// The address at which this VMM should serve a Propolis server API. + pub propolis_addr: SocketAddr, + + /// Metadata used to track instance statistics. + pub metadata: InstanceMetadata, +} + +/// Describes the instance hardware. +#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] +pub struct InstanceHardware { + pub properties: InstanceProperties, + pub nics: Vec, + pub source_nat: SourceNatConfig, + /// Zero or more external IP addresses (either floating or ephemeral), + /// provided to an instance to allow inbound connectivity. + pub ephemeral_ip: Option, + pub floating_ips: Vec, + pub firewall_rules: Vec, + pub dhcp_config: DhcpConfig, + // TODO: replace `propolis_client::*` with locally-modeled request type + pub disks: Vec, + pub cloud_init_bytes: Option, +} + +/// Metadata used to track statistics about an instance. +/// +// NOTE: The instance ID is not here, since it's already provided in other +// pieces of the instance-related requests. It is pulled from there when +// publishing metrics for the instance. +#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] +pub struct InstanceMetadata { + pub silo_id: Uuid, + pub project_id: Uuid, +} + +/// The body of a request to move a previously-ensured instance into a specific +/// runtime state. +#[derive(Serialize, Deserialize, JsonSchema)] +pub struct InstancePutStateBody { + /// The state into which the instance should be driven. + pub state: InstanceStateRequested, +} + +/// The response sent from a request to move an instance into a specific runtime +/// state. +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct InstancePutStateResponse { + /// The current runtime state of the instance after handling the request to + /// change its state. If the instance's state did not change, this field is + /// `None`. + pub updated_runtime: Option, +} + +/// Requestable running state of an Instance. +/// +/// A subset of [`omicron_common::api::external::InstanceState`]. +#[derive(Copy, Clone, Debug, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case", tag = "type", content = "value")] +pub enum InstanceStateRequested { + /// Run this instance by migrating in from a previous running incarnation of + /// the instance. + MigrationTarget(InstanceMigrationTargetParams), + /// Start the instance if it is not already running. + Running, + /// Stop the instance. + Stopped, + /// Immediately reset the instance, as though it had stopped and immediately + /// began to run again. + Reboot, +} + +impl fmt::Display for InstanceStateRequested { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.label()) + } +} + +impl InstanceStateRequested { + fn label(&self) -> &str { + match self { + InstanceStateRequested::MigrationTarget(_) => "migrating in", + InstanceStateRequested::Running => "running", + InstanceStateRequested::Stopped => "stopped", + InstanceStateRequested::Reboot => "reboot", + } + } + + /// Returns true if the state represents a stopped Instance. + pub fn is_stopped(&self) -> bool { + match self { + InstanceStateRequested::MigrationTarget(_) => false, + InstanceStateRequested::Running => false, + InstanceStateRequested::Stopped => true, + InstanceStateRequested::Reboot => false, + } + } +} + +/// The response sent from a request to unregister an instance. +#[derive(Serialize, Deserialize, JsonSchema)] +pub struct InstanceUnregisterResponse { + /// The current state of the instance after handling the request to + /// unregister it. If the instance's state did not change, this field is + /// `None`. + pub updated_runtime: Option, +} + +/// Parameters used when directing Propolis to initialize itself via live +/// migration. +#[derive(Copy, Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct InstanceMigrationTargetParams { + /// The Propolis ID of the migration source. + pub src_propolis_id: Uuid, + + /// The address of the Propolis server that will serve as the migration + /// source. + pub src_propolis_addr: SocketAddr, +} + +/// Used to dynamically update external IPs attached to an instance. +#[derive( + Copy, Clone, Debug, Eq, PartialEq, Hash, Deserialize, JsonSchema, Serialize, +)] +#[serde(rename_all = "snake_case", tag = "type", content = "value")] +pub enum InstanceExternalIpBody { + Ephemeral(IpAddr), + Floating(IpAddr), +} diff --git a/sled-agent/types/src/lib.rs b/sled-agent/types/src/lib.rs index 12e8f049f9..47e1535ade 100644 --- a/sled-agent/types/src/lib.rs +++ b/sled-agent/types/src/lib.rs @@ -4,6 +4,14 @@ //! Common types for sled-agent. +pub mod boot_disk; +pub mod bootstore; +pub mod disk; pub mod early_networking; +pub mod firewall_rules; +pub mod instance; pub mod rack_init; pub mod rack_ops; +pub mod sled; +pub mod time_sync; +pub mod zone_bundle; diff --git a/sled-agent/types/src/sled.rs b/sled-agent/types/src/sled.rs new file mode 100644 index 0000000000..37a064bdc9 --- /dev/null +++ b/sled-agent/types/src/sled.rs @@ -0,0 +1,219 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Types related to operating on sleds. + +use std::net::{IpAddr, Ipv6Addr, SocketAddrV6}; + +use async_trait::async_trait; +use omicron_common::{ + address::{self, Ipv6Subnet, SLED_PREFIX}, + ledger::Ledgerable, +}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use sha3::{Digest, Sha3_256}; +use uuid::Uuid; + +/// A representation of a Baseboard ID as used in the inventory subsystem +/// This type is essentially the same as a `Baseboard` except it doesn't have a +/// revision or HW type (Gimlet, PC, Unknown). +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)] +pub struct BaseboardId { + /// Oxide Part Number + pub part_number: String, + /// Serial number (unique for a given part number) + pub serial_number: String, +} + +/// A request to Add a given sled after rack initialization has occurred +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)] +pub struct AddSledRequest { + pub sled_id: BaseboardId, + pub start_request: StartSledAgentRequest, +} + +/// Configuration information for launching a Sled Agent. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)] +pub struct StartSledAgentRequest { + /// The current generation number of data as stored in CRDB. + /// + /// The initial generation is set during RSS time and then only mutated + /// by Nexus. For now, we don't actually anticipate mutating this data, + /// but we leave open the possiblity. + pub generation: u64, + + // Which version of the data structure do we have. This is to help with + // deserialization and conversion in future updates. + pub schema_version: u32, + + // The actual configuration details + pub body: StartSledAgentRequestBody, +} + +impl StartSledAgentRequest { + pub fn sled_address(&self) -> SocketAddrV6 { + address::get_sled_address(self.body.subnet) + } + + pub fn switch_zone_ip(&self) -> Ipv6Addr { + address::get_switch_zone_address(self.body.subnet) + } + + /// Compute the sha3_256 digest of `self.rack_id` to use as a `salt` + /// for disk encryption. We don't want to include other values that are + /// consistent across sleds as it would prevent us from moving drives + /// between sleds. + pub fn hash_rack_id(&self) -> [u8; 32] { + // We know the unwrap succeeds as a Sha3_256 digest is 32 bytes + Sha3_256::digest(self.body.rack_id.as_bytes()) + .as_slice() + .try_into() + .unwrap() + } +} + +#[async_trait] +impl Ledgerable for StartSledAgentRequest { + fn is_newer_than(&self, other: &Self) -> bool { + self.generation > other.generation + } + + fn generation_bump(&mut self) { + // DO NOTHING! + // + // Generation bumps must only ever come from nexus and will be encoded + // in the struct itself + } + + // Attempt to deserialize the v1 or v0 version and return + // the v1 version. + fn deserialize( + s: &str, + ) -> Result { + // Try to deserialize the latest version of the data structure (v1). If + // that succeeds we are done. + if let Ok(val) = serde_json::from_str::(s) { + return Ok(val); + } + + // We don't have the latest version. Try to deserialize v0 and then + // convert it to the latest version. + let v0 = serde_json::from_str::(s)?.request; + Ok(v0.into()) + } +} + +/// This is the actual app level data of `StartSledAgentRequest` +/// +/// We nest it below the "header" of `generation` and `schema_version` so that +/// we can perform partial deserialization of `EarlyNetworkConfig` to only read +/// the header and defer deserialization of the body once we know the schema +/// version. This is possible via the use of [`serde_json::value::RawValue`] in +/// future (post-v1) deserialization paths. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)] +pub struct StartSledAgentRequestBody { + /// Uuid of the Sled Agent to be created. + pub id: Uuid, + + /// Uuid of the rack to which this sled agent belongs. + pub rack_id: Uuid, + + /// Use trust quorum for key generation + pub use_trust_quorum: bool, + + /// Is this node an LRTQ learner node? + /// + /// We only put the node into learner mode if `use_trust_quorum` is also + /// true. + pub is_lrtq_learner: bool, + + /// Portion of the IP space to be managed by the Sled Agent. + pub subnet: Ipv6Subnet, +} + +/// The version of `StartSledAgentRequest` we originally shipped with. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)] +pub struct StartSledAgentRequestV0 { + /// Uuid of the Sled Agent to be created. + pub id: Uuid, + + /// Uuid of the rack to which this sled agent belongs. + pub rack_id: Uuid, + + /// The external NTP servers to use + pub ntp_servers: Vec, + + /// The external DNS servers to use + pub dns_servers: Vec, + + /// Use trust quorum for key generation + pub use_trust_quorum: bool, + + // Note: The order of these fields is load bearing, because we serialize + // `SledAgentRequest`s as toml. `subnet` serializes as a TOML table, so it + // must come after non-table fields. + /// Portion of the IP space to be managed by the Sled Agent. + pub subnet: Ipv6Subnet, +} + +impl From for StartSledAgentRequest { + fn from(v0: StartSledAgentRequestV0) -> Self { + StartSledAgentRequest { + generation: 0, + schema_version: 1, + body: StartSledAgentRequestBody { + id: v0.id, + rack_id: v0.rack_id, + use_trust_quorum: v0.use_trust_quorum, + is_lrtq_learner: false, + subnet: v0.subnet, + }, + } + } +} + +// A wrapper around StartSledAgentRequestV0 that was used +// for the ledger format. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)] +struct PersistentSledAgentRequest { + request: StartSledAgentRequestV0, +} + +#[cfg(test)] +mod tests { + use std::net::Ipv6Addr; + + use super::*; + + #[test] + fn serialize_start_sled_agent_v0_deserialize_v1() { + let v0 = PersistentSledAgentRequest { + request: StartSledAgentRequestV0 { + id: Uuid::new_v4(), + rack_id: Uuid::new_v4(), + ntp_servers: vec![String::from("test.pool.example.com")], + dns_servers: vec!["1.1.1.1".parse().unwrap()], + use_trust_quorum: false, + subnet: Ipv6Subnet::new(Ipv6Addr::LOCALHOST), + }, + }; + let serialized = serde_json::to_string(&v0).unwrap(); + let expected = StartSledAgentRequest { + generation: 0, + schema_version: 1, + body: StartSledAgentRequestBody { + id: v0.request.id, + rack_id: v0.request.rack_id, + use_trust_quorum: v0.request.use_trust_quorum, + is_lrtq_learner: false, + subnet: v0.request.subnet, + }, + }; + + let actual: StartSledAgentRequest = + Ledgerable::deserialize(&serialized).unwrap(); + assert_eq!(expected, actual); + } +} diff --git a/sled-agent/types/src/time_sync.rs b/sled-agent/types/src/time_sync.rs new file mode 100644 index 0000000000..7ac9ded636 --- /dev/null +++ b/sled-agent/types/src/time_sync.rs @@ -0,0 +1,30 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::net::IpAddr; + +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +pub struct TimeSync { + /// The synchronization state of the sled, true when the system clock + /// and the NTP clock are in sync (to within a small window). + pub sync: bool, + /// The NTP reference ID. + pub ref_id: u32, + /// The NTP reference IP address. + pub ip_addr: IpAddr, + /// The NTP stratum (our upstream's stratum plus one). + pub stratum: u8, + /// The NTP reference time (i.e. what chrony thinks the current time is, not + /// necessarily the current system time). + pub ref_time: f64, + // This could be f32, but there is a problem with progenitor/typify + // where, although the f32 correctly becomes "float" (and not "double") in + // the API spec, that "float" gets converted back to f64 when generating + // the client. + /// The current offset between the NTP clock and system clock. + pub correction: f64, +} diff --git a/sled-agent/types/src/zone_bundle.rs b/sled-agent/types/src/zone_bundle.rs new file mode 100644 index 0000000000..f7a388771d --- /dev/null +++ b/sled-agent/types/src/zone_bundle.rs @@ -0,0 +1,529 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Types related to zone bundles. + +use std::{cmp::Ordering, collections::HashSet, time::Duration}; + +use camino::Utf8PathBuf; +use chrono::{DateTime, Utc}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use thiserror::Error; +use uuid::Uuid; + +/// An identifier for a zone bundle. +#[derive( + Clone, + Debug, + Deserialize, + Eq, + Hash, + JsonSchema, + Ord, + PartialEq, + PartialOrd, + Serialize, +)] +pub struct ZoneBundleId { + /// The name of the zone this bundle is derived from. + pub zone_name: String, + /// The ID for this bundle itself. + pub bundle_id: Uuid, +} + +/// The reason or cause for a zone bundle, i.e., why it was created. +// +// NOTE: The ordering of the enum variants is important, and should not be +// changed without careful consideration. +// +// The ordering is used when deciding which bundles to remove automatically. In +// addition to time, the cause is used to sort bundles, so changing the variant +// order will change that priority. +#[derive( + Clone, + Copy, + Debug, + Default, + Deserialize, + Eq, + Hash, + JsonSchema, + Ord, + PartialEq, + PartialOrd, + Serialize, +)] +#[serde(rename_all = "snake_case")] +#[non_exhaustive] +pub enum ZoneBundleCause { + /// Some other, unspecified reason. + #[default] + Other, + /// A zone bundle taken when a sled agent finds a zone that it does not + /// expect to be running. + UnexpectedZone, + /// An instance zone was terminated. + TerminatedInstance, + /// Generated in response to an explicit request to the sled agent. + ExplicitRequest, +} + +/// Metadata about a zone bundle. +#[derive( + Clone, + Debug, + Deserialize, + Eq, + Hash, + JsonSchema, + Ord, + PartialEq, + PartialOrd, + Serialize, +)] +pub struct ZoneBundleMetadata { + /// Identifier for this zone bundle + pub id: ZoneBundleId, + /// The time at which this zone bundle was created. + pub time_created: DateTime, + /// A version number for this zone bundle. + pub version: u8, + /// The reason or cause a bundle was created. + pub cause: ZoneBundleCause, +} + +impl ZoneBundleMetadata { + pub const VERSION: u8 = 0; + + /// Create a new set of metadata for the provided zone. + pub fn new(zone_name: &str, cause: ZoneBundleCause) -> Self { + Self { + id: ZoneBundleId { + zone_name: zone_name.to_string(), + bundle_id: Uuid::new_v4(), + }, + time_created: Utc::now(), + version: Self::VERSION, + cause, + } + } +} + +/// A dimension along with bundles can be sorted, to determine priority. +#[derive( + Clone, + Copy, + Debug, + Deserialize, + Eq, + Hash, + JsonSchema, + Serialize, + Ord, + PartialEq, + PartialOrd, +)] +#[serde(rename_all = "snake_case")] +pub enum PriorityDimension { + /// Sorting by time, with older bundles with lower priority. + Time, + /// Sorting by the cause for creating the bundle. + Cause, + // TODO-completeness: Support zone or zone type (e.g., service vs instance)? +} + +/// The priority order for bundles during cleanup. +/// +/// Bundles are sorted along the dimensions in [`PriorityDimension`], with each +/// dimension appearing exactly once. During cleanup, lesser-priority bundles +/// are pruned first, to maintain the dataset quota. Note that bundles are +/// sorted by each dimension in the order in which they appear, with each +/// dimension having higher priority than the next. +#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] +pub struct PriorityOrder([PriorityDimension; PriorityOrder::EXPECTED_SIZE]); + +impl std::ops::Deref for PriorityOrder { + type Target = [PriorityDimension; PriorityOrder::EXPECTED_SIZE]; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl Default for PriorityOrder { + fn default() -> Self { + Self::DEFAULT + } +} + +impl PriorityOrder { + // NOTE: Must match the number of variants in `PriorityDimension`. + const EXPECTED_SIZE: usize = 2; + const DEFAULT: Self = + Self([PriorityDimension::Cause, PriorityDimension::Time]); + + /// Construct a new priority order. + /// + /// This requires that each dimension appear exactly once. + pub fn new( + dims: &[PriorityDimension], + ) -> Result { + if dims.len() != Self::EXPECTED_SIZE { + return Err(PriorityOrderCreateError::WrongDimensionCount( + dims.len(), + )); + } + let mut seen = HashSet::new(); + for dim in dims.iter() { + if !seen.insert(dim) { + return Err(PriorityOrderCreateError::DuplicateFound(*dim)); + } + } + Ok(Self(dims.try_into().unwrap())) + } + + /// Get the priority order as a slice. + pub fn as_slice(&self) -> &[PriorityDimension] { + &self.0 + } + + /// Order zone bundle info according to the contained priority. + /// + /// We sort the info by each dimension, in the order in which it appears. + /// That means earlier dimensions have higher priority than later ones. + pub fn compare_bundles( + &self, + lhs: &ZoneBundleInfo, + rhs: &ZoneBundleInfo, + ) -> Ordering { + for dim in self.0.iter() { + let ord = match dim { + PriorityDimension::Cause => { + lhs.metadata.cause.cmp(&rhs.metadata.cause) + } + PriorityDimension::Time => { + lhs.metadata.time_created.cmp(&rhs.metadata.time_created) + } + }; + if matches!(ord, Ordering::Equal) { + continue; + } + return ord; + } + Ordering::Equal + } +} + +/// A period on which bundles are automatically cleaned up. +#[derive( + Clone, Copy, Deserialize, JsonSchema, PartialEq, PartialOrd, Serialize, +)] +pub struct CleanupPeriod(Duration); + +impl Default for CleanupPeriod { + fn default() -> Self { + Self(Duration::from_secs(600)) + } +} + +impl CleanupPeriod { + /// The minimum supported cleanup period. + pub const MIN: Self = Self(Duration::from_secs(60)); + + /// The maximum supported cleanup period. + pub const MAX: Self = Self(Duration::from_secs(60 * 60 * 24)); + + /// Construct a new cleanup period, checking that it's valid. + pub fn new(duration: Duration) -> Result { + if duration >= Self::MIN.as_duration() + && duration <= Self::MAX.as_duration() + { + Ok(Self(duration)) + } else { + Err(CleanupPeriodCreateError::OutOfBounds(duration)) + } + } + + /// Return the period as a duration. + pub const fn as_duration(&self) -> Duration { + self.0 + } +} + +impl TryFrom for CleanupPeriod { + type Error = CleanupPeriodCreateError; + + fn try_from(duration: Duration) -> Result { + Self::new(duration) + } +} + +impl std::fmt::Debug for CleanupPeriod { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + self.0.fmt(f) + } +} + +#[derive(Clone, Debug, PartialEq)] +pub struct ZoneBundleInfo { + /// The raw metadata for the bundle + pub metadata: ZoneBundleMetadata, + /// The full path to the bundle + pub path: Utf8PathBuf, + /// The number of bytes consumed on disk by the bundle + pub bytes: u64, +} + +/// The portion of a debug dataset used for zone bundles. +#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] +pub struct BundleUtilization { + /// The total dataset quota, in bytes. + pub dataset_quota: u64, + /// The total number of bytes available for zone bundles. + /// + /// This is `dataset_quota` multiplied by the context's storage limit. + pub bytes_available: u64, + /// Total bundle usage, in bytes. + pub bytes_used: u64, +} + +/// Context provided for the zone bundle cleanup task. +#[derive( + Clone, Copy, Debug, Default, Deserialize, JsonSchema, PartialEq, Serialize, +)] +pub struct CleanupContext { + /// The period on which automatic checks and cleanup is performed. + pub period: CleanupPeriod, + /// The limit on the dataset quota available for zone bundles. + pub storage_limit: StorageLimit, + /// The priority ordering for keeping old bundles. + pub priority: PriorityOrder, +} + +/// The count of bundles / bytes removed during a cleanup operation. +#[derive(Clone, Copy, Debug, Default, Deserialize, JsonSchema, Serialize)] +pub struct CleanupCount { + /// The number of bundles removed. + pub bundles: u64, + /// The number of bytes removed. + pub bytes: u64, +} + +/// The limit on space allowed for zone bundles, as a percentage of the overall +/// dataset's quota. +#[derive( + Clone, + Copy, + Debug, + Deserialize, + JsonSchema, + PartialEq, + PartialOrd, + Serialize, +)] +pub struct StorageLimit(u8); + +impl std::fmt::Display for StorageLimit { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}%", self.as_u8()) + } +} + +impl Default for StorageLimit { + fn default() -> Self { + StorageLimit(25) + } +} + +impl StorageLimit { + /// Minimum percentage of dataset quota supported. + pub const MIN: Self = Self(0); + + /// Maximum percentage of dataset quota supported. + pub const MAX: Self = Self(50); + + /// Construct a new limit allowed for zone bundles. + /// + /// This should be expressed as a percentage, in the range (Self::MIN, + /// Self::MAX]. + pub const fn new(percentage: u8) -> Result { + if percentage > Self::MIN.0 && percentage <= Self::MAX.0 { + Ok(Self(percentage)) + } else { + Err(StorageLimitCreateError::OutOfBounds(percentage)) + } + } + + /// Return the contained quota percentage. + pub const fn as_u8(&self) -> u8 { + self.0 + } + + // Compute the number of bytes available from a dataset quota, in bytes. + pub const fn bytes_available(&self, dataset_quota: u64) -> u64 { + (dataset_quota * self.as_u8() as u64) / 100 + } +} + +#[derive(Debug, Error)] +pub enum PriorityOrderCreateError { + #[error("expected exactly {n} dimensions, found {0}", n = PriorityOrder::EXPECTED_SIZE)] + WrongDimensionCount(usize), + #[error("duplicate element found in priority ordering: {0:?}")] + DuplicateFound(PriorityDimension), +} + +#[derive(Debug, Error)] +pub enum CleanupPeriodCreateError { + #[error( + "invalid cleanup period ({0:?}): must be \ + between {min:?} and {max:?}, inclusive", + min = CleanupPeriod::MIN, + max = CleanupPeriod::MAX, + )] + OutOfBounds(Duration), +} + +#[derive(Debug, Error)] +pub enum StorageLimitCreateError { + #[error("invalid storage limit ({0}): must be expressed as a percentage in ({min}, {max}]", + min = StorageLimit::MIN.0, + max = StorageLimit::MAX.0, + )] + OutOfBounds(u8), +} + +#[cfg(test)] +mod tests { + use chrono::TimeZone; + + use super::*; + + #[test] + fn test_sort_zone_bundle_cause() { + use ZoneBundleCause::*; + let mut original = + [ExplicitRequest, Other, TerminatedInstance, UnexpectedZone]; + let expected = + [Other, UnexpectedZone, TerminatedInstance, ExplicitRequest]; + original.sort(); + assert_eq!(original, expected); + } + + #[test] + fn test_priority_dimension() { + assert!(PriorityOrder::new(&[]).is_err()); + assert!(PriorityOrder::new(&[PriorityDimension::Cause]).is_err()); + assert!(PriorityOrder::new(&[ + PriorityDimension::Cause, + PriorityDimension::Cause + ]) + .is_err()); + assert!(PriorityOrder::new(&[ + PriorityDimension::Cause, + PriorityDimension::Cause, + PriorityDimension::Time + ]) + .is_err()); + + assert!(PriorityOrder::new(&[ + PriorityDimension::Cause, + PriorityDimension::Time + ]) + .is_ok()); + assert_eq!( + PriorityOrder::new(PriorityOrder::default().as_slice()).unwrap(), + PriorityOrder::default() + ); + } + + #[test] + fn test_storage_limit_bytes_available() { + let pct = StorageLimit(1); + assert_eq!(pct.bytes_available(100), 1); + assert_eq!(pct.bytes_available(1000), 10); + + let pct = StorageLimit(100); + assert_eq!(pct.bytes_available(100), 100); + assert_eq!(pct.bytes_available(1000), 1000); + + let pct = StorageLimit(100); + assert_eq!(pct.bytes_available(99), 99); + + let pct = StorageLimit(99); + assert_eq!(pct.bytes_available(1), 0); + + // Test non-power of 10. + let pct = StorageLimit(25); + assert_eq!(pct.bytes_available(32768), 8192); + } + + #[test] + fn test_compare_bundles() { + use PriorityDimension::*; + let time_first = PriorityOrder([Time, Cause]); + let cause_first = PriorityOrder([Cause, Time]); + + fn make_info( + year: i32, + month: u32, + day: u32, + cause: ZoneBundleCause, + ) -> ZoneBundleInfo { + ZoneBundleInfo { + metadata: ZoneBundleMetadata { + id: ZoneBundleId { + zone_name: String::from("oxz_whatever"), + bundle_id: uuid::Uuid::new_v4(), + }, + time_created: Utc + .with_ymd_and_hms(year, month, day, 0, 0, 0) + .single() + .unwrap(), + cause, + version: 0, + }, + path: Utf8PathBuf::from("/some/path"), + bytes: 0, + } + } + + let info = [ + make_info(2020, 1, 2, ZoneBundleCause::TerminatedInstance), + make_info(2020, 1, 2, ZoneBundleCause::ExplicitRequest), + make_info(2020, 1, 1, ZoneBundleCause::TerminatedInstance), + make_info(2020, 1, 1, ZoneBundleCause::ExplicitRequest), + ]; + + let mut sorted = info.clone(); + sorted.sort_by(|lhs, rhs| time_first.compare_bundles(lhs, rhs)); + // Low -> high priority + // [old/terminated, old/explicit, new/terminated, new/explicit] + let expected = [ + info[2].clone(), + info[3].clone(), + info[0].clone(), + info[1].clone(), + ]; + assert_eq!( + sorted, expected, + "sorting zone bundles by time-then-cause failed" + ); + + let mut sorted = info.clone(); + sorted.sort_by(|lhs, rhs| cause_first.compare_bundles(lhs, rhs)); + // Low -> high priority + // [old/terminated, new/terminated, old/explicit, new/explicit] + let expected = [ + info[2].clone(), + info[0].clone(), + info[3].clone(), + info[1].clone(), + ]; + assert_eq!( + sorted, expected, + "sorting zone bundles by cause-then-time failed" + ); + } +} diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs index bc0cac2014..747ac77823 100644 --- a/sled-storage/src/manager.rs +++ b/sled-storage/src/manager.rs @@ -10,10 +10,7 @@ use crate::config::MountConfig; use crate::dataset::CONFIG_DATASET; use crate::disk::RawDisk; use crate::error::Error; -use crate::resources::{ - AllDisks, DatasetManagementStatus, DatasetsManagementResult, - DisksManagementResult, StorageResources, -}; +use crate::resources::{AllDisks, StorageResources}; use camino::Utf8PathBuf; use debug_ignore::DebugIgnore; use futures::future::FutureExt; @@ -21,7 +18,8 @@ use illumos_utils::zfs::{Mountpoint, Zfs}; use illumos_utils::zpool::ZpoolName; use key_manager::StorageKeyRequester; use omicron_common::disk::{ - DatasetConfig, DatasetName, DatasetsConfig, DiskIdentity, DiskVariant, + DatasetConfig, DatasetManagementStatus, DatasetName, DatasetsConfig, + DatasetsManagementResult, DiskIdentity, DiskVariant, DisksManagementResult, OmicronPhysicalDisksConfig, }; use omicron_common::ledger::Ledger; @@ -1120,12 +1118,12 @@ impl StorageManager { mod tests { use crate::disk::RawSyntheticDisk; use crate::manager_test_harness::StorageManagerTestHarness; - use crate::resources::DiskManagementError; use super::*; use camino_tempfile::tempdir_in; use omicron_common::api::external::Generation; use omicron_common::disk::DatasetKind; + use omicron_common::disk::DiskManagementError; use omicron_common::ledger; use omicron_test_utils::dev::test_setup_log; use sled_hardware::DiskFirmware; diff --git a/sled-storage/src/resources.rs b/sled-storage/src/resources.rs index 31c4ddcae8..425aafb12d 100644 --- a/sled-storage/src/resources.rs +++ b/sled-storage/src/resources.rs @@ -14,12 +14,10 @@ use illumos_utils::zpool::{PathInPool, ZpoolName}; use key_manager::StorageKeyRequester; use omicron_common::api::external::Generation; use omicron_common::disk::{ - DatasetName, DiskIdentity, DiskVariant, OmicronPhysicalDiskConfig, + DiskIdentity, DiskManagementError, DiskManagementStatus, DiskVariant, + DisksManagementResult, OmicronPhysicalDiskConfig, OmicronPhysicalDisksConfig, }; -use omicron_uuid_kinds::ZpoolUuid; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; use sled_hardware::DiskFirmware; use slog::{info, o, warn, Logger}; use std::collections::BTreeMap; @@ -32,104 +30,6 @@ const BUNDLE_DIRECTORY: &str = "bundle"; // The directory for zone bundles. const ZONE_BUNDLE_DIRECTORY: &str = "zone"; -#[derive(Debug, thiserror::Error, JsonSchema, Serialize, Deserialize)] -#[serde(rename_all = "snake_case", tag = "type", content = "value")] -pub enum DiskManagementError { - #[error("Disk requested by control plane, but not found on device")] - NotFound, - - #[error("Expected zpool UUID of {expected}, but saw {observed}")] - ZpoolUuidMismatch { expected: ZpoolUuid, observed: ZpoolUuid }, - - #[error("Failed to access keys necessary to unlock storage. This error may be transient.")] - KeyManager(String), - - #[error("Other error starting disk management: {0}")] - Other(String), -} - -impl DiskManagementError { - fn retryable(&self) -> bool { - match self { - DiskManagementError::KeyManager(_) => true, - _ => false, - } - } -} - -/// Identifies how a single dataset management operation may have succeeded or -/// failed. -#[derive(Debug, JsonSchema, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub struct DatasetManagementStatus { - pub dataset_name: DatasetName, - pub err: Option, -} - -/// The result from attempting to manage datasets. -#[derive(Default, Debug, JsonSchema, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -#[must_use = "this `DatasetManagementResult` may contain errors, which should be handled"] -pub struct DatasetsManagementResult { - pub status: Vec, -} - -impl DatasetsManagementResult { - pub fn has_error(&self) -> bool { - for status in &self.status { - if status.err.is_some() { - return true; - } - } - false - } -} - -/// Identifies how a single disk management operation may have succeeded or -/// failed. -#[derive(Debug, JsonSchema, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub struct DiskManagementStatus { - pub identity: DiskIdentity, - pub err: Option, -} - -/// The result from attempting to manage underlying disks. -/// -/// This is more complex than a simple "Error" type because it's possible -/// for some disks to be initialized correctly, while others can fail. -/// -/// This structure provides a mechanism for callers to learn about partial -/// failures, and handle them appropriately on a per-disk basis. -#[derive(Default, Debug, JsonSchema, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -#[must_use = "this `DiskManagementResult` may contain errors, which should be handled"] -pub struct DisksManagementResult { - pub status: Vec, -} - -impl DisksManagementResult { - pub fn has_error(&self) -> bool { - for status in &self.status { - if status.err.is_some() { - return true; - } - } - false - } - - pub fn has_retryable_error(&self) -> bool { - for status in &self.status { - if let Some(err) = &status.err { - if err.retryable() { - return true; - } - } - } - false - } -} - // The Sled Agent is responsible for both observing disks and managing them at // the request of the broader control plane. This enum encompasses that duality, // by representing all disks that can exist, managed or not. diff --git a/smf/clickhouse-admin/config.toml b/smf/clickhouse-admin/config.toml new file mode 100644 index 0000000000..86ee2c5d4b --- /dev/null +++ b/smf/clickhouse-admin/config.toml @@ -0,0 +1,10 @@ +[dropshot] +# 1 MiB; we don't expect any requests of more than nominal size. +request_body_max_bytes = 1048576 + +[log] +# Show log messages of this level and more severe +level = "info" +mode = "file" +path = "/dev/stdout" +if_exists = "append" diff --git a/smf/clickhouse-admin/manifest.xml b/smf/clickhouse-admin/manifest.xml new file mode 100644 index 0000000000..435f8a86ac --- /dev/null +++ b/smf/clickhouse-admin/manifest.xml @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/smf/clickhouse/method_script.sh b/smf/clickhouse/method_script.sh index 224d759cf3..bb5dd960a1 100755 --- a/smf/clickhouse/method_script.sh +++ b/smf/clickhouse/method_script.sh @@ -10,136 +10,13 @@ LISTEN_ADDR="$(svcprop -c -p config/listen_addr "${SMF_FMRI}")" LISTEN_PORT="$(svcprop -c -p config/listen_port "${SMF_FMRI}")" DATASTORE="$(svcprop -c -p config/store "${SMF_FMRI}")" -# TEMPORARY: Racks will be set up with single node ClickHouse until -# Nexus provisions services so there is no divergence between racks -# https://github.com/oxidecomputer/omicron/issues/732 -single_node=true +args=( +"--log-file" "/var/tmp/clickhouse-server.log" +"--errorlog-file" "/var/tmp/clickhouse-server.errlog" +"--" +"--path" "${DATASTORE}" +"--listen_host" "$LISTEN_ADDR" +"--http_port" "$LISTEN_PORT" +) -command=() -# TODO((https://github.com/oxidecomputer/omicron/issues/4000)): Remove single node mode once all racks are running in replicated mode -if $single_node -then - command+=( - "/opt/oxide/clickhouse/clickhouse" "server" - "--log-file" "/var/tmp/clickhouse-server.log" - "--errorlog-file" "/var/tmp/clickhouse-server.errlog" - "--" - "--path" "${DATASTORE}" - "--listen_host" "$LISTEN_ADDR" - "--http_port" "$LISTEN_PORT" - ) -else - # Retrieve hostnames (SRV records in internal DNS) of the clickhouse nodes. - CH_ADDRS="$(/opt/oxide/internal-dns-cli/bin/dnswait clickhouse -H)" - - if [[ -z "$CH_ADDRS" ]]; then - printf 'ERROR: found no hostnames for other ClickHouse nodes\n' >&2 - exit "$SMF_EXIT_ERR_CONFIG" - fi - - declare -a nodes=($CH_ADDRS) - - for i in "${nodes[@]}" - do - if ! grep -q "host.control-plane.oxide.internal" <<< "${i}"; then - printf 'ERROR: retrieved ClickHouse hostname does not match the expected format\n' >&2 - exit "$SMF_EXIT_ERR_CONFIG" - fi - done - - # Assign hostnames to replicas - REPLICA_HOST_01="${nodes[0]}" - REPLICA_HOST_02="${nodes[1]}" - - # Retrieve hostnames (SRV records in internal DNS) of the keeper nodes. - K_ADDRS="$(/opt/oxide/internal-dns-cli/bin/dnswait clickhouse-keeper -H)" - - if [[ -z "$K_ADDRS" ]]; then - printf 'ERROR: found no hostnames for other ClickHouse Keeper nodes\n' >&2 - exit "$SMF_EXIT_ERR_CONFIG" - fi - - declare -a keepers=($K_ADDRS) - - for i in "${keepers[@]}" - do - if ! grep -q "host.control-plane.oxide.internal" <<< "${i}"; then - printf 'ERROR: retrieved ClickHouse Keeper hostname does not match the expected format\n' >&2 - exit "$SMF_EXIT_ERR_CONFIG" - fi - done - - if [[ "${#keepers[@]}" != 3 ]] - then - printf "ERROR: expected 3 ClickHouse Keeper hosts, found "${#keepers[@]}" instead\n" >&2 - exit "$SMF_EXIT_ERR_CONFIG" - fi - - # Identify the node type this is as this will influence how the config is constructed - # TODO(https://github.com/oxidecomputer/omicron/issues/3824): There are probably much - # better ways to do this service discovery, but this works for now. - # The services contain the same IDs as the hostnames. - CLICKHOUSE_SVC="$(zonename | tr -dc [:digit:])" - REPLICA_IDENTIFIER_01="$( echo "${REPLICA_HOST_01}" | tr -dc [:digit:])" - REPLICA_IDENTIFIER_02="$( echo "${REPLICA_HOST_02}" | tr -dc [:digit:])" - if [[ $REPLICA_IDENTIFIER_01 == $CLICKHOUSE_SVC ]] - then - REPLICA_DISPLAY_NAME="oximeter_cluster node 1" - REPLICA_NUMBER="01" - elif [[ $REPLICA_IDENTIFIER_02 == $CLICKHOUSE_SVC ]] - then - REPLICA_DISPLAY_NAME="oximeter_cluster node 2" - REPLICA_NUMBER="02" - else - printf 'ERROR: service name does not match any of the identified ClickHouse hostnames\n' >&2 - exit "$SMF_EXIT_ERR_CONFIG" - fi - - # Setting environment variables this way is best practice, but has the downside of - # obscuring the field values to anyone ssh-ing into the zone. To mitigate this, - # we will be saving them to ${DATASTORE}/config_env_vars - export CH_LOG="${DATASTORE}/clickhouse-server.log" - export CH_ERROR_LOG="${DATASTORE}/clickhouse-server.errlog" - export CH_REPLICA_DISPLAY_NAME=${REPLICA_DISPLAY_NAME} - export CH_LISTEN_ADDR=${LISTEN_ADDR} - export CH_LISTEN_PORT=${LISTEN_PORT} - export CH_DATASTORE=${DATASTORE} - export CH_TMP_PATH="${DATASTORE}/tmp/" - export CH_USER_FILES_PATH="${DATASTORE}/user_files/" - export CH_USER_LOCAL_DIR="${DATASTORE}/access/" - export CH_FORMAT_SCHEMA_PATH="${DATASTORE}/format_schemas/" - export CH_REPLICA_NUMBER=${REPLICA_NUMBER} - export CH_REPLICA_HOST_01=${REPLICA_HOST_01} - export CH_REPLICA_HOST_02=${REPLICA_HOST_02} - export CH_KEEPER_HOST_01="${keepers[0]}" - export CH_KEEPER_HOST_02="${keepers[1]}" - export CH_KEEPER_HOST_03="${keepers[2]}" - - content="CH_LOG="${CH_LOG}"\n\ - CH_ERROR_LOG="${CH_ERROR_LOG}"\n\ - CH_REPLICA_DISPLAY_NAME="${CH_REPLICA_DISPLAY_NAME}"\n\ - CH_LISTEN_ADDR="${CH_LISTEN_ADDR}"\n\ - CH_LISTEN_PORT="${CH_LISTEN_PORT}"\n\ - CH_DATASTORE="${CH_DATASTORE}"\n\ - CH_TMP_PATH="${CH_TMP_PATH}"\n\ - CH_USER_FILES_PATH="${CH_USER_FILES_PATH}"\n\ - CH_USER_LOCAL_DIR="${CH_USER_LOCAL_DIR}"\n\ - CH_FORMAT_SCHEMA_PATH="${CH_FORMAT_SCHEMA_PATH}"\n\ - CH_REPLICA_NUMBER="${CH_REPLICA_NUMBER}"\n\ - CH_REPLICA_HOST_01="${CH_REPLICA_HOST_01}"\n\ - CH_REPLICA_HOST_02="${CH_REPLICA_HOST_02}"\n\ - CH_KEEPER_HOST_01="${CH_KEEPER_HOST_01}"\n\ - CH_KEEPER_HOST_02="${CH_KEEPER_HOST_02}"\n\ - CH_KEEPER_HOST_03="${CH_KEEPER_HOST_03}"" - - echo $content >> "${DATASTORE}/config_env_vars" - - - # The clickhouse binary must be run from within the directory that contains it. - # Otherwise, it does not automatically detect the configuration files, nor does - # it append them when necessary - cd /opt/oxide/clickhouse/ - command+=("./clickhouse" "server") -fi - -exec "${command[@]}" & \ No newline at end of file +exec /opt/oxide/clickhouse/clickhouse server "${args[@]}" & \ No newline at end of file diff --git a/smf/clickhouse/config_replica.xml b/smf/clickhouse_server/config_replica.xml similarity index 100% rename from smf/clickhouse/config_replica.xml rename to smf/clickhouse_server/config_replica.xml diff --git a/smf/clickhouse_server/manifest.xml b/smf/clickhouse_server/manifest.xml new file mode 100644 index 0000000000..8ab4f78bcb --- /dev/null +++ b/smf/clickhouse_server/manifest.xml @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/smf/clickhouse_server/method_script.sh b/smf/clickhouse_server/method_script.sh new file mode 100755 index 0000000000..a0d61072ac --- /dev/null +++ b/smf/clickhouse_server/method_script.sh @@ -0,0 +1,124 @@ +#!/bin/bash + +set -x +set -o errexit +set -o pipefail + +. /lib/svc/share/smf_include.sh + +LISTEN_ADDR="$(svcprop -c -p config/listen_addr "${SMF_FMRI}")" +LISTEN_PORT="$(svcprop -c -p config/listen_port "${SMF_FMRI}")" +DATASTORE="$(svcprop -c -p config/store "${SMF_FMRI}")" + +# Retrieve hostnames (SRV records in internal DNS) of the clickhouse nodes. +CH_ADDRS="$(/opt/oxide/internal-dns-cli/bin/dnswait clickhouse-server -H)" + +if [[ -z "$CH_ADDRS" ]]; then + printf 'ERROR: found no hostnames for other ClickHouse server nodes\n' >&2 + exit "$SMF_EXIT_ERR_CONFIG" +fi + +declare -a nodes=($CH_ADDRS) + +for i in "${nodes[@]}" +do + if ! grep -q "host.control-plane.oxide.internal" <<< "${i}"; then + printf 'ERROR: retrieved ClickHouse hostname does not match the expected format\n' >&2 + exit "$SMF_EXIT_ERR_CONFIG" + fi +done + +# Assign hostnames to replicas +REPLICA_HOST_01="${nodes[0]}" +REPLICA_HOST_02="${nodes[1]}" + +# Retrieve hostnames (SRV records in internal DNS) of the keeper nodes. +K_ADDRS="$(/opt/oxide/internal-dns-cli/bin/dnswait clickhouse-keeper -H)" + +if [[ -z "$K_ADDRS" ]]; then + printf 'ERROR: found no hostnames for other ClickHouse Keeper nodes\n' >&2 + exit "$SMF_EXIT_ERR_CONFIG" +fi + +declare -a keepers=($K_ADDRS) + +for i in "${keepers[@]}" +do + if ! grep -q "host.control-plane.oxide.internal" <<< "${i}"; then + printf 'ERROR: retrieved ClickHouse Keeper hostname does not match the expected format\n' >&2 + exit "$SMF_EXIT_ERR_CONFIG" + fi +done + +if [[ "${#keepers[@]}" != 3 ]] +then + printf "ERROR: expected 3 ClickHouse Keeper hosts, found "${#keepers[@]}" instead\n" >&2 + exit "$SMF_EXIT_ERR_CONFIG" +fi + +# Identify the node type this is as this will influence how the config is constructed +# TODO(https://github.com/oxidecomputer/omicron/issues/3824): There are probably much +# better ways to do this service discovery, but this works for now. +# The services contain the same IDs as the hostnames. +CLICKHOUSE_SVC="$(zonename | tr -dc [:digit:])" +REPLICA_IDENTIFIER_01="$( echo "${REPLICA_HOST_01}" | tr -dc [:digit:])" +REPLICA_IDENTIFIER_02="$( echo "${REPLICA_HOST_02}" | tr -dc [:digit:])" +if [[ $REPLICA_IDENTIFIER_01 == $CLICKHOUSE_SVC ]] +then + REPLICA_DISPLAY_NAME="oximeter_cluster node 1" + REPLICA_NUMBER="01" +elif [[ $REPLICA_IDENTIFIER_02 == $CLICKHOUSE_SVC ]] +then + REPLICA_DISPLAY_NAME="oximeter_cluster node 2" + REPLICA_NUMBER="02" +else + printf 'ERROR: service name does not match any of the identified ClickHouse hostnames\n' >&2 + exit "$SMF_EXIT_ERR_CONFIG" +fi + +# Setting environment variables this way is best practice, but has the downside of +# obscuring the field values to anyone ssh-ing into the zone. To mitigate this, +# we will be saving them to ${DATASTORE}/config_env_vars +export CH_LOG="${DATASTORE}/clickhouse-server.log" +export CH_ERROR_LOG="${DATASTORE}/clickhouse-server.errlog" +export CH_REPLICA_DISPLAY_NAME=${REPLICA_DISPLAY_NAME} +export CH_LISTEN_ADDR=${LISTEN_ADDR} +export CH_LISTEN_PORT=${LISTEN_PORT} +export CH_DATASTORE=${DATASTORE} +export CH_TMP_PATH="${DATASTORE}/tmp/" +export CH_USER_FILES_PATH="${DATASTORE}/user_files/" +export CH_USER_LOCAL_DIR="${DATASTORE}/access/" +export CH_FORMAT_SCHEMA_PATH="${DATASTORE}/format_schemas/" +export CH_REPLICA_NUMBER=${REPLICA_NUMBER} +export CH_REPLICA_HOST_01=${REPLICA_HOST_01} +export CH_REPLICA_HOST_02=${REPLICA_HOST_02} +export CH_KEEPER_HOST_01="${keepers[0]}" +export CH_KEEPER_HOST_02="${keepers[1]}" +export CH_KEEPER_HOST_03="${keepers[2]}" + +content="CH_LOG="${CH_LOG}"\n\ +CH_ERROR_LOG="${CH_ERROR_LOG}"\n\ +CH_REPLICA_DISPLAY_NAME="${CH_REPLICA_DISPLAY_NAME}"\n\ +CH_LISTEN_ADDR="${CH_LISTEN_ADDR}"\n\ +CH_LISTEN_PORT="${CH_LISTEN_PORT}"\n\ +CH_DATASTORE="${CH_DATASTORE}"\n\ +CH_TMP_PATH="${CH_TMP_PATH}"\n\ +CH_USER_FILES_PATH="${CH_USER_FILES_PATH}"\n\ +CH_USER_LOCAL_DIR="${CH_USER_LOCAL_DIR}"\n\ +CH_FORMAT_SCHEMA_PATH="${CH_FORMAT_SCHEMA_PATH}"\n\ +CH_REPLICA_NUMBER="${CH_REPLICA_NUMBER}"\n\ +CH_REPLICA_HOST_01="${CH_REPLICA_HOST_01}"\n\ +CH_REPLICA_HOST_02="${CH_REPLICA_HOST_02}"\n\ +CH_KEEPER_HOST_01="${CH_KEEPER_HOST_01}"\n\ +CH_KEEPER_HOST_02="${CH_KEEPER_HOST_02}"\n\ +CH_KEEPER_HOST_03="${CH_KEEPER_HOST_03}"" + +echo $content >> "${DATASTORE}/config_env_vars" + + +# The clickhouse binary must be run from within the directory that contains it. +# Otherwise, it does not automatically detect the configuration files, nor does +# it append them when necessary +cd /opt/oxide/clickhouse_server/ + +exec ./clickhouse server & \ No newline at end of file diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml index c502c20b1b..2e3a8fe578 100644 --- a/smf/nexus/multi-sled/config-partial.toml +++ b/smf/nexus/multi-sled/config-partial.toml @@ -65,6 +65,8 @@ abandoned_vmm_reaper.period_secs = 60 saga_recovery.period_secs = 600 lookup_region_port.period_secs = 60 instance_updater.period_secs = 30 +region_snapshot_replacement_start.period_secs = 30 +region_snapshot_replacement_garbage_collection.period_secs = 30 [default_region_allocation_strategy] # by default, allocate across 3 distinct sleds diff --git a/smf/nexus/single-sled/config-partial.toml b/smf/nexus/single-sled/config-partial.toml index 30a0243122..dbd61e953d 100644 --- a/smf/nexus/single-sled/config-partial.toml +++ b/smf/nexus/single-sled/config-partial.toml @@ -65,6 +65,8 @@ abandoned_vmm_reaper.period_secs = 60 saga_recovery.period_secs = 600 lookup_region_port.period_secs = 60 instance_updater.period_secs = 30 +region_snapshot_replacement_start.period_secs = 30 +region_snapshot_replacement_garbage_collection.period_secs = 30 [default_region_allocation_strategy] # by default, allocate without requirement for distinct sleds. diff --git a/smf/oximeter/config.toml b/smf/oximeter/replicated-cluster/config.toml similarity index 91% rename from smf/oximeter/config.toml rename to smf/oximeter/replicated-cluster/config.toml index ca14fe6ec8..f7958e5eb1 100644 --- a/smf/oximeter/config.toml +++ b/smf/oximeter/replicated-cluster/config.toml @@ -3,6 +3,7 @@ [db] batch_size = 1000 batch_interval = 5 # In seconds +replicated = true [log] level = "debug" diff --git a/smf/oximeter/single-node/config.toml b/smf/oximeter/single-node/config.toml new file mode 100644 index 0000000000..bc0418159c --- /dev/null +++ b/smf/oximeter/single-node/config.toml @@ -0,0 +1,12 @@ +# Example configuration file for running an oximeter collector server + +[db] +batch_size = 1000 +batch_interval = 5 # In seconds +replicated = false + +[log] +level = "debug" +mode = "file" +path = "/dev/stdout" +if_exists = "append" diff --git a/smf/sled-agent/non-gimlet/config-rss.toml b/smf/sled-agent/non-gimlet/config-rss.toml index 90f5339e84..a61ac81d91 100644 --- a/smf/sled-agent/non-gimlet/config-rss.toml +++ b/smf/sled-agent/non-gimlet/config-rss.toml @@ -118,6 +118,22 @@ switch = "switch0" # Neighbors we expect to peer with over BGP on this port. bgp_peers = [] +# LLDP settings for this port +#[rack_network_config.switch0.qsfp0.lldp] +#status = "Enabled" +# Optional Port ID, overriding default of qsfpX/0 +#port_id = "" +## Optional port description +#port_description = "uplink 0" +# Optional chassid ID, overriding the switch-level setting +#chassis_id = "" +# Optional system name, overriding the switch-level setting +#system_name = "" +# Optional system description, overriding the switch-level setting +#system_description = "" +# Optional management addresses to advertise, overriding switch-level setting +#management_addrs = [] + # An allowlist of source IPs that can make requests to user-facing services can # be specified here. It can be written as the string "any" ... [allowed_source_ips] diff --git a/sp-sim/examples/config.toml b/sp-sim/examples/config.toml index cf338ecf2e..f53ea7cfd8 100644 --- a/sp-sim/examples/config.toml +++ b/sp-sim/examples/config.toml @@ -24,6 +24,16 @@ capabilities = 0 presence = "Present" serial_console = "[::1]:33312" +[[simulated_sps.gimlet.components]] +id = "dev-0" +device = "tmp117" +description = "FAKE Southwest temperature sensor" +capabilities = 2 +presence = "Present" +sensors = [ + { name = "Southwest", kind = "Temperature", last_data.value = 41.7890625, last_data.timestamp = 1234 }, +] + [[simulated_sps.gimlet]] multicast_addr = "ff15:0:1de::2" bind_addrs = ["[::]:33320", "[::]:33321"] @@ -39,6 +49,17 @@ capabilities = 0 presence = "Present" serial_console = "[::1]:33322" +[[simulated_sps.gimlet.components]] +id = "dev-0" +device = "tmp117" +description = "FAKE Southwest temperature sensor" +capabilities = 2 +presence = "Present" +sensors = [ + { name = "Southwest", kind = "Temperature", last_data.value = 41.7890625, last_data.timestamp = 1234 }, +] + + [log] # Show log messages of this level and more severe level = "debug" diff --git a/sp-sim/src/config.rs b/sp-sim/src/config.rs index b64953e5ed..d45e956dee 100644 --- a/sp-sim/src/config.rs +++ b/sp-sim/src/config.rs @@ -5,6 +5,7 @@ //! Interfaces for parsing configuration files and working with a simulated SP //! configuration +use crate::sensors; use dropshot::ConfigLogging; use gateway_messages::DeviceCapabilities; use gateway_messages::DevicePresence; @@ -59,6 +60,9 @@ pub struct SpComponentConfig { /// /// Only supported for components inside a [`GimletConfig`]. pub serial_console: Option, + + #[serde(skip_serializing_if = "Vec::is_empty", default)] + pub sensors: Vec, } /// Configuration of a simulated sidecar SP @@ -93,6 +97,16 @@ pub struct Config { pub log: ConfigLogging, } +/// Configuration for a component's sensor readings. +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub struct SensorConfig { + #[serde(flatten)] + pub def: sensors::SensorDef, + + #[serde(flatten)] + pub state: sensors::SensorState, +} + impl Config { /// Load a `Config` from the given TOML file /// diff --git a/sp-sim/src/gimlet.rs b/sp-sim/src/gimlet.rs index e980a4b67d..70c2e72fcb 100644 --- a/sp-sim/src/gimlet.rs +++ b/sp-sim/src/gimlet.rs @@ -6,6 +6,7 @@ use crate::config::GimletConfig; use crate::config::SpComponentConfig; use crate::helpers::rot_slot_id_from_u16; use crate::helpers::rot_slot_id_to_u16; +use crate::sensors::Sensors; use crate::serial_number_padded; use crate::server; use crate::server::SimSpHandler; @@ -630,6 +631,7 @@ struct Handler { startup_options: StartupOptions, update_state: SimSpUpdate, reset_pending: Option, + sensors: Sensors, last_request_handled: Option, @@ -665,9 +667,12 @@ impl Handler { .push(&*Box::leak(c.description.clone().into_boxed_str())); } + let sensors = Sensors::from_component_configs(&components); + Self { log, components, + sensors, leaked_component_device_strings, leaked_component_description_strings, serial_number, @@ -1206,13 +1211,16 @@ impl SpHandler for Handler { port: SpPort, component: SpComponent, ) -> Result { + let num_details = + self.sensors.num_component_details(&component).unwrap_or(0); debug!( - &self.log, "asked for component details (returning 0 details)"; + &self.log, "asked for number of component details"; "sender" => %sender, "port" => ?port, "component" => ?component, + "num_details" => num_details ); - Ok(0) + Ok(num_details) } fn component_details( @@ -1220,9 +1228,20 @@ impl SpHandler for Handler { component: SpComponent, index: BoundsChecked, ) -> ComponentDetails { - // We return 0 for all components, so we should never be called (`index` - // would have to have been bounds checked to live in 0..0). - unreachable!("asked for {component:?} details index {index:?}") + let Some(sensor_details) = + self.sensors.component_details(&component, index) + else { + unreachable!( + "this is a gimlet, so it should have no port status details" + ); + }; + debug!( + &self.log, "asked for component details for a sensor"; + "component" => ?component, + "index" => index.0, + "details" => ?sensor_details + ); + sensor_details } fn component_clear_status( @@ -1445,9 +1464,9 @@ impl SpHandler for Handler { fn read_sensor( &mut self, - _request: gateway_messages::SensorRequest, + request: gateway_messages::SensorRequest, ) -> std::result::Result { - Err(SpError::RequestUnsupportedForSp) + self.sensors.read_sensor(request).map_err(SpError::Sensor) } fn current_time(&mut self) -> std::result::Result { diff --git a/sp-sim/src/lib.rs b/sp-sim/src/lib.rs index 0f340ed642..15f2034aa8 100644 --- a/sp-sim/src/lib.rs +++ b/sp-sim/src/lib.rs @@ -5,6 +5,7 @@ pub mod config; mod gimlet; mod helpers; +mod sensors; mod server; mod sidecar; mod update; diff --git a/sp-sim/src/sensors.rs b/sp-sim/src/sensors.rs new file mode 100644 index 0000000000..fc684af01b --- /dev/null +++ b/sp-sim/src/sensors.rs @@ -0,0 +1,218 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::config::SpComponentConfig; +use gateway_messages::measurement::MeasurementError; +use gateway_messages::measurement::MeasurementKind; +use gateway_messages::sp_impl::BoundsChecked; +use gateway_messages::ComponentDetails; +use gateway_messages::DeviceCapabilities; +use gateway_messages::Measurement; +use gateway_messages::SensorDataMissing; +use gateway_messages::SensorError; +use gateway_messages::SensorReading; +use gateway_messages::SensorRequest; +use gateway_messages::SensorRequestKind; +use gateway_messages::SensorResponse; +use gateway_messages::SpComponent; + +use std::collections::HashMap; + +pub(crate) struct Sensors { + by_component: HashMap>, + sensors: Vec, +} + +#[derive(Debug)] +struct Sensor { + def: SensorDef, + state: SensorState, +} + +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize, PartialEq)] +pub struct SensorDef { + pub name: String, + pub kind: MeasurementKind, +} + +// TODO(eliza): note that currently, we just hardcode these in +// `MeasurementConfig`. Eventually, it would be neat to allow the sensor to be +// changed dynamically as part of a simulation. +#[derive(Clone, Debug, serde::Serialize, serde::Deserialize, PartialEq)] +pub struct SensorState { + #[serde(default)] + pub last_error: Option, + + #[serde(default)] + pub last_data: Option, +} + +#[derive( + Clone, Copy, Debug, serde::Serialize, serde::Deserialize, PartialEq, +)] +pub struct LastError { + pub timestamp: u64, + pub value: SensorDataMissing, +} + +#[derive( + Clone, Copy, Debug, serde::Serialize, serde::Deserialize, PartialEq, +)] +pub struct LastData { + pub timestamp: u64, + pub value: f32, +} + +impl SensorState { + fn last_reading(&self) -> SensorReading { + match self { + Self { last_data: Some(data), last_error: Some(error) } => { + if data.timestamp >= error.timestamp { + SensorReading { + value: Ok(data.value), + timestamp: data.timestamp, + } + } else { + SensorReading { + value: Err(error.value), + timestamp: error.timestamp, + } + } + } + Self { last_data: Some(data), last_error: None } => SensorReading { + value: Ok(data.value), + timestamp: data.timestamp, + }, + Self { last_data: None, last_error: Some(error) } => { + SensorReading { + value: Err(error.value), + timestamp: error.timestamp, + } + } + Self { last_data: None, last_error: None } => SensorReading { + value: Err(SensorDataMissing::DeviceNotPresent), + timestamp: 0, // TODO(eliza): what do? + }, + } + } +} + +impl Sensors { + pub(crate) fn from_component_configs<'a>( + cfgs: impl IntoIterator, + ) -> Self { + let mut sensors = Vec::new(); + let mut by_component = HashMap::new(); + for cfg in cfgs { + if cfg.sensors.is_empty() { + continue; + } + if !cfg + .capabilities + .contains(DeviceCapabilities::HAS_MEASUREMENT_CHANNELS) + { + panic!( + "invalid component config: a device with sensors should \ + have the `HAS_MEASUREMENT_CHANNELS` capability:{cfg:#?}" + ); + } + + let mut ids = Vec::with_capacity(cfg.sensors.len()); + for sensor in &cfg.sensors { + let sensor_id = sensors.len() as u32; + sensors.push(Sensor { + def: sensor.def.clone(), + state: sensor.state.clone(), + }); + ids.push(sensor_id) + } + + let component = SpComponent::try_from(cfg.id.as_str()).unwrap(); + let prev = by_component.insert(component, ids); + assert!(prev.is_none(), "component ID {component} already exists!"); + } + Self { sensors, by_component } + } + + fn sensor_for_component<'sensors>( + &'sensors self, + component: &SpComponent, + index: BoundsChecked, + ) -> Option<&'sensors Sensor> { + let &id = self.by_component.get(component)?.get(index.0 as usize)?; + self.sensors.get(id as usize) + } + + pub(crate) fn num_component_details( + &self, + component: &SpComponent, + ) -> Option { + let len = self + .by_component + .get(component)? + .len() + .try_into() + .expect("why would you have more than `u32::MAX` sensors?"); + Some(len) + } + + /// This method returns an `Option` because the component's details might + /// be a port status rather than a measurement, if we eventually decide to + /// implement port statuses in the simulated sidecar... + pub(crate) fn component_details( + &self, + component: &SpComponent, + index: BoundsChecked, + ) -> Option { + let sensor = self.sensor_for_component(component, index)?; + let value = + sensor.state.last_reading().value.map_err(|err| match err { + SensorDataMissing::DeviceError => MeasurementError::DeviceError, + SensorDataMissing::DeviceNotPresent => { + MeasurementError::NotPresent + } + SensorDataMissing::DeviceOff => MeasurementError::DeviceOff, + SensorDataMissing::DeviceTimeout => { + MeasurementError::DeviceTimeout + } + SensorDataMissing::DeviceUnavailable => { + MeasurementError::DeviceUnavailable + } + }); + Some(ComponentDetails::Measurement(Measurement { + name: sensor.def.name.clone(), + kind: sensor.def.kind, + value, + })) + } + + pub(crate) fn read_sensor( + &self, + SensorRequest { id, kind }: SensorRequest, + ) -> Result { + let sensor = + self.sensors.get(id as usize).ok_or(SensorError::InvalidSensor)?; + match kind { + SensorRequestKind::LastReading => { + Ok(SensorResponse::LastReading(sensor.state.last_reading())) + } + SensorRequestKind::ErrorCount => { + let count = + // TODO(eliza): simulate more than one error... + if sensor.state.last_error.is_some() { 1 } else { 0 }; + Ok(SensorResponse::ErrorCount(count)) + } + SensorRequestKind::LastData => { + let LastData { timestamp, value } = + sensor.state.last_data.ok_or(SensorError::NoReading)?; + Ok(SensorResponse::LastData { value, timestamp }) + } + SensorRequestKind::LastError => { + let LastError { timestamp, value } = + sensor.state.last_error.ok_or(SensorError::NoReading)?; + Ok(SensorResponse::LastError { value, timestamp }) + } + } + } +} diff --git a/sp-sim/src/sidecar.rs b/sp-sim/src/sidecar.rs index c2fb2467d8..bef1d26c78 100644 --- a/sp-sim/src/sidecar.rs +++ b/sp-sim/src/sidecar.rs @@ -8,6 +8,7 @@ use crate::config::SimulatedSpsConfig; use crate::config::SpComponentConfig; use crate::helpers::rot_slot_id_from_u16; use crate::helpers::rot_slot_id_to_u16; +use crate::sensors::Sensors; use crate::serial_number_padded; use crate::server; use crate::server::SimSpHandler; @@ -377,6 +378,7 @@ struct Handler { // our life as a simulator. leaked_component_device_strings: Vec<&'static str>, leaked_component_description_strings: Vec<&'static str>, + sensors: Sensors, serial_number: String, ignition: FakeIgnition, @@ -417,9 +419,12 @@ impl Handler { .push(&*Box::leak(c.description.clone().into_boxed_str())); } + let sensors = Sensors::from_component_configs(&components); + Self { log, components, + sensors, leaked_component_device_strings, leaked_component_description_strings, serial_number, @@ -929,13 +934,18 @@ impl SpHandler for Handler { port: SpPort, component: SpComponent, ) -> Result { - warn!( - &self.log, "asked for component details (returning 0 details)"; + let num_sensor_details = + self.sensors.num_component_details(&component).unwrap_or(0); + // TODO: here is where we might also handle port statuses, if we decide + // to simulate that later... + debug!( + &self.log, "asked for number of component details"; "sender" => %sender, "port" => ?port, "component" => ?component, + "num_details" => num_sensor_details ); - Ok(0) + Ok(num_sensor_details) } fn component_details( @@ -943,9 +953,18 @@ impl SpHandler for Handler { component: SpComponent, index: BoundsChecked, ) -> ComponentDetails { - // We return 0 for all components, so we should never be called (`index` - // would have to have been bounds checked to live in 0..0). - unreachable!("asked for {component:?} details index {index:?}") + let Some(sensor_details) = + self.sensors.component_details(&component, index) + else { + todo!("simulate port status details..."); + }; + debug!( + &self.log, "asked for component details for a sensor"; + "component" => ?component, + "index" => index.0, + "details" => ?sensor_details + ); + sensor_details } fn component_clear_status( @@ -1163,9 +1182,9 @@ impl SpHandler for Handler { fn read_sensor( &mut self, - _request: gateway_messages::SensorRequest, + request: gateway_messages::SensorRequest, ) -> std::result::Result { - Err(SpError::RequestUnsupportedForSp) + self.sensors.read_sensor(request).map_err(SpError::Sensor) } fn current_time(&mut self) -> std::result::Result { diff --git a/tools/console_version b/tools/console_version index 4f67064733..ef59f6e40c 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="17ae890c68a5277fbefe773694e790a8f1b178b4" -SHA2="273a31ba14546305bfafeb9aedb2d9a7530328a0359cda363380c9ca3240b948" +COMMIT="8dcddcef62b8d10dfcd3adb470439212b23b3d5e" +SHA2="30a5ecc4d7b82dfc8bbd5ea59d5d92b8414d0362425c1ce1011da8c722a8ec4c" diff --git a/tools/dendrite_openapi_version b/tools/dendrite_openapi_version index b7d34debd1..2d0f4d4887 100755 --- a/tools/dendrite_openapi_version +++ b/tools/dendrite_openapi_version @@ -1,2 +1,2 @@ -COMMIT="9811438cc91c6ec4e8a8ca12479c920bb25fec81" -SHA2="12dc61e7c62b2e1ee1cf3c2bf7cdda6bee6ec96925d2fc1c021c6c1a8fdd56cd" +COMMIT="21b16567f28e103f145cd18d53fac6958429c4ff" +SHA2="3a54305ab4b1270c9a5fb0603f481fce199f3767c174a03559ff642f7f44687e" diff --git a/tools/dendrite_stub_checksums b/tools/dendrite_stub_checksums index 5d5b60ff57..e3d16d779c 100644 --- a/tools/dendrite_stub_checksums +++ b/tools/dendrite_stub_checksums @@ -1,3 +1,3 @@ -CIDL_SHA256_ILLUMOS="4b09ea6d89af353fd4240a3cfde8655c555f6f42e05c6fc4a4e32724f86bb749" -CIDL_SHA256_LINUX_DPD="fb597785b6fd94b0840a80ff82bc596426aa6b815dd64793075f05d2ba5db38d" -CIDL_SHA256_LINUX_SWADM="9be30b688301debe4103057730ff9a426c96b45d571a6287268f381d8a11dbc1" +CIDL_SHA256_ILLUMOS="3771671f0069b33143774e560eb258db99253dba9b78fa3ca974f02a8e1145b4" +CIDL_SHA256_LINUX_DPD="6aa070ab0590aca7458f2555012acc5571e61b3b1523de862d4bbb04b9d34135" +CIDL_SHA256_LINUX_SWADM="e1e35784538a4fdd76dc257cc636ac3f43f7ef2842dabfe981f17f8ce6b8e1a2" diff --git a/tools/generate-nexus-api.sh b/tools/generate-nexus-api.sh index a0c7d13165..9e3f8d63f6 100755 --- a/tools/generate-nexus-api.sh +++ b/tools/generate-nexus-api.sh @@ -1,4 +1,3 @@ #!/usr/bin/env bash ./target/debug/nexus nexus/examples/config.toml -O > openapi/nexus.json -./target/debug/nexus nexus/examples/config.toml -I > openapi/nexus-internal.json diff --git a/tools/maghemite_ddm_openapi_version b/tools/maghemite_ddm_openapi_version index 40d39b3dd0..0c223c85a8 100644 --- a/tools/maghemite_ddm_openapi_version +++ b/tools/maghemite_ddm_openapi_version @@ -1,2 +1,2 @@ -COMMIT="220dd026e83142b83bd93123f465a64dd4600201" +COMMIT="c92d6ff85db8992066f49da176cf686acfd8fe0f" SHA2="007bfb717ccbc077c0250dee3121aeb0c5bb0d1c16795429a514fa4f8635a5ef" diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version index 172c5c6f3d..0db6a3b63d 100644 --- a/tools/maghemite_mg_openapi_version +++ b/tools/maghemite_mg_openapi_version @@ -1,2 +1,2 @@ -COMMIT="220dd026e83142b83bd93123f465a64dd4600201" -SHA2="e4b42ab9daad90f0c561a830b62a9d17e294b4d0da0a6d44b4030929b0c37b7e" +COMMIT="c92d6ff85db8992066f49da176cf686acfd8fe0f" +SHA2="5b327f213f8f341cf9072d428980f53757b2c6383f684ac80bbccfb1984ffe5f" diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums index 5479623d30..2e180a83db 100644 --- a/tools/maghemite_mgd_checksums +++ b/tools/maghemite_mgd_checksums @@ -1,2 +1,2 @@ -CIDL_SHA256="f1103de5dda4830eb653f4d555995d08c31253116448387399a77392c08dfb54" -MGD_LINUX_SHA256="b4469b8ec3b2193f3eff2886fe1c7ac17dc135b8d7572e1a6c765811738402bd" \ No newline at end of file +CIDL_SHA256="e000485f7e04ac1cf9b3532b60bcf23598ab980331ba4f1c6788a7e95c1e9ef8" +MGD_LINUX_SHA256="1c3d93bbfbe4ce97af7cb81c13e42a2eea464e18de6827794a55d5bfd971b66c" \ No newline at end of file diff --git a/tools/permslip_production b/tools/permslip_production index 5e9b76f980..4f82e4d6ed 100644 --- a/tools/permslip_production +++ b/tools/permslip_production @@ -1,2 +1,2 @@ -905d38cb8298c72ecac5cf31f792919fbcd69a4ad656c40e53b3ce2d80140111 manifest-oxide-rot-1-v1.0.12.toml +55336a274d0f100d5ef51cb653ec285b651eaba139c35a533e300e6d7d46032c manifest-oxide-rot-1-v1.0.13.toml 74e754e68705cf6fed4152a92bc1ee9667d1d98a21fc12993a2232dbe34bfccb manifest-bootleby-v1.3.0.toml diff --git a/tools/permslip_staging b/tools/permslip_staging index 6f5f925eb0..d2ddc45f20 100644 --- a/tools/permslip_staging +++ b/tools/permslip_staging @@ -1,5 +1,5 @@ -c28eaa13638f55100a42916727227242ee02d18cebecb1412d6af5c8aa945b99 manifest-gimlet-v1.0.22.toml -201ff5580bb4b0b01419d7c5e580af9926103e2b6d3024e6b49cee6fab415519 manifest-oxide-rot-1-v1.0.12.toml -6d53bfbfdd6baa3fc150153a003abfac6d4b46c34f61fa7a8ec2af8af19a7d5a manifest-psc-v1.0.21.toml -26b6096a377edb3d7da50b1b499af104e6195bc7c7c6eb1b2751b32434d7ac9e manifest-sidecar-v1.0.23.toml +6ea87b554882860f1a9b1cf97b2f4a9c61fadf3d69e6ea1bdcd781d306d6ca9c manifest-gimlet-v1.0.24.toml +85553dd164933a9b9e4f22409abd1190b1d632d192b5f7527129acaa778a671a manifest-oxide-rot-1-v1.0.13.toml +11bc0684155119f494a6e21810e4dc97b9efadb8154d570f67143dae98a45060 manifest-psc-v1.0.23.toml +60205852109f1584d29e2b086eae5a72d7f61b2e1f64d958e6326312ed2b0d66 manifest-sidecar-v1.0.24.toml c0fecaefac7674138337f3bd4ce4ce5b884053dead5ec27b575701471631ea2f manifest-bootleby-v1.3.0.toml diff --git a/tools/update_lldp.sh b/tools/update_lldp.sh index bf7f19eb02..2a9d1d6bae 100755 --- a/tools/update_lldp.sh +++ b/tools/update_lldp.sh @@ -47,7 +47,9 @@ function main { esac done - TARGET_COMMIT=$(get_latest_commit_from_gh "$REPO" "$TARGET_COMMIT") + if [[ -z "$TARGET_COMMIT" ]]; then + TARGET_COMMIT=$(get_latest_commit_from_gh "$REPO" "$TARGET_BRANCH") + fi install_toml2json do_update_packages "$TARGET_COMMIT" "$DRY_RUN" "$REPO" "${PACKAGES[@]}" do_update_crates "$TARGET_COMMIT" "$DRY_RUN" "$REPO" "${CRATES[@]}" diff --git a/uuid-kinds/src/lib.rs b/uuid-kinds/src/lib.rs index 8f695d2399..ba586c03a5 100644 --- a/uuid-kinds/src/lib.rs +++ b/uuid-kinds/src/lib.rs @@ -51,6 +51,7 @@ macro_rules! impl_typed_uuid_kind { impl_typed_uuid_kind! { Collection => "collection", Dataset => "dataset", + DemoSaga => "demo_saga", Downstairs => "downstairs", DownstairsRegion => "downstairs_region", ExternalIp => "external_ip", diff --git a/wicket-common/src/example.rs b/wicket-common/src/example.rs index bb70273b45..34af11e906 100644 --- a/wicket-common/src/example.rs +++ b/wicket-common/src/example.rs @@ -12,7 +12,8 @@ use omicron_common::{ api::{ external::AllowedSourceIps, internal::shared::{ - BgpConfig, BgpPeerConfig, PortFec, PortSpeed, RouteConfig, + BgpConfig, BgpPeerConfig, LldpAdminStatus, LldpPortConfig, PortFec, + PortSpeed, RouteConfig, }, }, }; @@ -166,23 +167,45 @@ impl ExampleRackSetupData { vlan_id: None, }]; + let switch0_port0_lldp = Some(LldpPortConfig { + status: LldpAdminStatus::Enabled, + chassis_id: Some("chassid id override".to_string()), + port_id: Some("port id override".to_string()), + system_name: Some("system name override".to_string()), + system_description: Some("system description override".to_string()), + port_description: Some("port description override".to_string()), + management_addrs: None, + }); + + let switch1_port0_lldp = Some(LldpPortConfig { + status: LldpAdminStatus::Enabled, + chassis_id: Some("chassid id override".to_string()), + port_id: Some("port id override".to_string()), + system_name: Some("system name override".to_string()), + system_description: Some("system description override".to_string()), + port_description: Some("port description override".to_string()), + management_addrs: Some(vec!["172.32.0.4".parse().unwrap()]), + }); + let rack_network_config = UserSpecifiedRackNetworkConfig { infra_ip_first: "172.30.0.1".parse().unwrap(), infra_ip_last: "172.30.0.10".parse().unwrap(), switch0: btreemap! { "port0".to_owned() => UserSpecifiedPortConfig { - addresses: vec!["172.30.0.1/24".parse().unwrap()], - routes: vec![RouteConfig { + addresses: vec!["172.30.0.1/24".parse().unwrap()], + routes: vec![RouteConfig { destination: "0.0.0.0/0".parse().unwrap(), nexthop: "172.30.0.10".parse().unwrap(), vlan_id: Some(1), + local_pref: None, }], bgp_peers: switch0_port0_bgp_peers, uplink_port_speed: PortSpeed::Speed400G, uplink_port_fec: PortFec::Firecode, + lldp: switch0_port0_lldp, autoneg: true, }, - }, + }, switch1: btreemap! { // Use the same port name as in switch0 to test that it doesn't // collide. @@ -192,10 +215,12 @@ impl ExampleRackSetupData { destination: "0.0.0.0/0".parse().unwrap(), nexthop: "172.33.0.10".parse().unwrap(), vlan_id: Some(1), + local_pref: None, }], bgp_peers: switch1_port0_bgp_peers, uplink_port_speed: PortSpeed::Speed400G, uplink_port_fec: PortFec::Firecode, + lldp: switch1_port0_lldp, autoneg: true, }, }, diff --git a/wicket-common/src/rack_setup.rs b/wicket-common/src/rack_setup.rs index 7fd83e522a..cb6b13422b 100644 --- a/wicket-common/src/rack_setup.rs +++ b/wicket-common/src/rack_setup.rs @@ -11,6 +11,7 @@ use omicron_common::api::external::SwitchLocation; use omicron_common::api::internal::shared::AllowedSourceIps; use omicron_common::api::internal::shared::BgpConfig; use omicron_common::api::internal::shared::BgpPeerConfig; +use omicron_common::api::internal::shared::LldpPortConfig; use omicron_common::api::internal::shared::PortFec; use omicron_common::api::internal::shared::PortSpeed; use omicron_common::api::internal::shared::RouteConfig; @@ -185,6 +186,8 @@ pub struct UserSpecifiedPortConfig { pub autoneg: bool, #[serde(default)] pub bgp_peers: Vec, + #[serde(default)] + pub lldp: Option, } /// User-specified version of [`BgpPeerConfig`]. diff --git a/wicket/src/cli/rack_setup/config_toml.rs b/wicket/src/cli/rack_setup/config_toml.rs index 68485815a8..17b31e7730 100644 --- a/wicket/src/cli/rack_setup/config_toml.rs +++ b/wicket/src/cli/rack_setup/config_toml.rs @@ -8,6 +8,7 @@ use omicron_common::address::IpRange; use omicron_common::api::external::AllowedSourceIps; use omicron_common::api::internal::shared::BgpConfig; +use omicron_common::api::internal::shared::LldpPortConfig; use omicron_common::api::internal::shared::RouteConfig; use omicron_common::api::internal::shared::UplinkAddressConfig; use serde::Serialize; @@ -320,6 +321,7 @@ fn populate_uplink_table(cfg: &UserSpecifiedPortConfig) -> Table { uplink_port_fec, autoneg, bgp_peers, + lldp, } = cfg; let mut uplink = Table::new(); @@ -327,13 +329,16 @@ fn populate_uplink_table(cfg: &UserSpecifiedPortConfig) -> Table { // routes = [] let mut routes_out = Array::new(); for r in routes { - let RouteConfig { destination, nexthop, vlan_id } = r; + let RouteConfig { destination, nexthop, vlan_id, local_pref } = r; let mut route = InlineTable::new(); route.insert("nexthop", string_value(nexthop)); route.insert("destination", string_value(destination)); if let Some(vlan_id) = vlan_id { route.insert("vlan_id", i64_value(i64::from(*vlan_id))); } + if let Some(local_pref) = local_pref { + route.insert("local_pref", i64_value(i64::from(*local_pref))); + } routes_out.push(Value::InlineTable(route)); } uplink.insert("routes", Item::Value(Value::Array(routes_out))); @@ -488,6 +493,46 @@ fn populate_uplink_table(cfg: &UserSpecifiedPortConfig) -> Table { uplink.insert("bgp_peers", Item::ArrayOfTables(peers)); + if let Some(l) = lldp { + let LldpPortConfig { + status, + chassis_id, + port_id, + system_name, + system_description, + port_description, + management_addrs, + } = l; + let mut lldp = Table::new(); + lldp.insert("status", string_item(status)); + if let Some(x) = chassis_id { + lldp.insert("chassis_id", string_item(x)); + } + if let Some(x) = port_id { + lldp.insert("port_id", string_item(x)); + } + if let Some(x) = system_name { + lldp.insert("system_name", string_item(x)); + } + if let Some(x) = system_description { + lldp.insert("system_description", string_item(x)); + } + if let Some(x) = port_description { + lldp.insert("port_description", string_item(x)); + } + if let Some(addrs) = management_addrs { + let mut addresses_out = Array::new(); + for a in addrs { + addresses_out.push(string_value(a)); + } + lldp.insert( + "management_addrs", + Item::Value(Value::Array(addresses_out)), + ); + } + uplink.insert("lldp", Item::Table(lldp)); + } + uplink } diff --git a/wicket/src/runner.rs b/wicket/src/runner.rs index 3af68ccbec..0e201478a8 100644 --- a/wicket/src/runner.rs +++ b/wicket/src/runner.rs @@ -75,7 +75,7 @@ impl RunnerCore { /// Resize and draw the initial screen before handling `Event`s pub fn init_screen(&mut self) -> anyhow::Result<()> { // Size the initial screen - let rect = self.terminal.get_frame().size(); + let rect = self.terminal.get_frame().area(); self.screen.resize(&mut self.state, rect.width, rect.height); // Draw the initial screen diff --git a/wicket/src/ui/main.rs b/wicket/src/ui/main.rs index 379cbd03af..ae6924071a 100644 --- a/wicket/src/ui/main.rs +++ b/wicket/src/ui/main.rs @@ -64,7 +64,7 @@ impl MainScreen { terminal: &mut Term, ) -> anyhow::Result<()> { terminal.draw(|frame| { - let mut rect = frame.size(); + let mut rect = frame.area(); rect.height -= 1; let statusbar_rect = Rect { @@ -85,7 +85,7 @@ impl MainScreen { // Draw all the components, starting with the background let background = Block::default().style(style::background()); - frame.render_widget(background, frame.size()); + frame.render_widget(background, frame.area()); self.sidebar.draw(state, frame, chunks[0], self.sidebar.active); self.draw_pane(state, frame, chunks[1]); self.draw_statusbar(state, frame, statusbar_rect); diff --git a/wicket/src/ui/panes/rack_setup.rs b/wicket/src/ui/panes/rack_setup.rs index 7bb63b6b1b..f23bc3c816 100644 --- a/wicket/src/ui/panes/rack_setup.rs +++ b/wicket/src/ui/panes/rack_setup.rs @@ -21,6 +21,7 @@ use itertools::Itertools; use omicron_common::address::IpRange; use omicron_common::api::internal::shared::AllowedSourceIps; use omicron_common::api::internal::shared::BgpConfig; +use omicron_common::api::internal::shared::LldpPortConfig; use omicron_common::api::internal::shared::RouteConfig; use ratatui::layout::Constraint; use ratatui::layout::Direction; @@ -740,6 +741,7 @@ fn rss_config_text<'a>( uplink_port_fec, autoneg, bgp_peers, + lldp, } = uplink; let mut items = vec![ @@ -771,7 +773,8 @@ fn rss_config_text<'a>( ]; let routes = routes.iter().map(|r| { - let RouteConfig { destination, nexthop, vlan_id } = r; + let RouteConfig { destination, nexthop, vlan_id, local_pref } = + r; let mut items = vec![ Span::styled(" • Route : ", label_style), @@ -787,6 +790,13 @@ fn rss_config_text<'a>( Span::styled(")", label_style), ]); } + if let Some(local_pref) = local_pref { + items.extend([ + Span::styled(" (local_pref=", label_style), + Span::styled(local_pref.to_string(), ok_style), + Span::styled(")", label_style), + ]); + } items }); @@ -1027,6 +1037,68 @@ fn rss_config_text<'a>( items.extend(addresses); items.extend(peers); + if let Some(lp) = lldp { + let LldpPortConfig { + status, + chassis_id, + port_id, + system_name, + system_description, + port_description, + management_addrs, + } = lp; + + let mut lldp = vec![ + vec![Span::styled(" • LLDP port settings: ", label_style)], + vec![ + Span::styled(" • Admin status : ", label_style), + Span::styled(status.to_string(), ok_style), + ], + ]; + + if let Some(c) = chassis_id { + lldp.push(vec![ + Span::styled(" • Chassis ID : ", label_style), + Span::styled(c.to_string(), ok_style), + ]) + } + if let Some(s) = system_name { + lldp.push(vec![ + Span::styled(" • System name : ", label_style), + Span::styled(s.to_string(), ok_style), + ]) + } + if let Some(s) = system_description { + lldp.push(vec![ + Span::styled(" • System description: ", label_style), + Span::styled(s.to_string(), ok_style), + ]) + } + if let Some(p) = port_id { + lldp.push(vec![ + Span::styled(" • Port ID : ", label_style), + Span::styled(p.to_string(), ok_style), + ]) + } + if let Some(p) = port_description { + lldp.push(vec![ + Span::styled(" • Port description : ", label_style), + Span::styled(p.to_string(), ok_style), + ]) + } + if let Some(addrs) = management_addrs { + let mut label = " • Management addrs : "; + for a in addrs { + lldp.push(vec![ + Span::styled(label, label_style), + Span::styled(a.to_string(), ok_style), + ]); + label = " : "; + } + } + items.extend(lldp); + } + append_list( &mut spans, Cow::from(format!("Uplink {}: ", i + 1)), diff --git a/wicket/src/ui/splash.rs b/wicket/src/ui/splash.rs index 9da9fa8648..18d7b37f08 100644 --- a/wicket/src/ui/splash.rs +++ b/wicket/src/ui/splash.rs @@ -33,7 +33,7 @@ impl SplashScreen { fn draw_background(&self, f: &mut Frame) { let block = Block::default().style(style::background()); - f.render_widget(block, f.size()); + f.render_widget(block, f.area()); } // Sweep left to right, painting the banner white, with @@ -41,7 +41,7 @@ impl SplashScreen { fn animate_logo(&self, f: &mut Frame) { // Center the banner let rect = f - .size() + .area() .center_horizontally(LOGO_WIDTH) .center_vertically(LOGO_HEIGHT); diff --git a/wicket/src/ui/widgets/animated_logo.rs b/wicket/src/ui/widgets/animated_logo.rs index cae6487fa8..f8919bad1d 100644 --- a/wicket/src/ui/widgets/animated_logo.rs +++ b/wicket/src/ui/widgets/animated_logo.rs @@ -67,7 +67,7 @@ impl<'a> Widget for Logo<'a> { for (x, c) in line.chars().enumerate() { if c == '#' { let cell = buf - .get_mut(x as u16 + area.left(), y as u16 + area.top()) + [(x as u16 + area.left(), y as u16 + area.top())] .set_symbol(" "); if x < paint_point { // The cell is highlighted diff --git a/wicket/src/ui/widgets/box_connector.rs b/wicket/src/ui/widgets/box_connector.rs index af4630773c..71af4d383e 100644 --- a/wicket/src/ui/widgets/box_connector.rs +++ b/wicket/src/ui/widgets/box_connector.rs @@ -31,15 +31,15 @@ impl Widget for BoxConnector { if self.kind == BoxConnectorKind::Top || self.kind == BoxConnectorKind::Both { - buf.get_mut(rect.x, rect.y - 1).set_symbol("├"); - buf.get_mut(rect.x + rect.width - 1, rect.y - 1).set_symbol("┤"); + buf[(rect.x, rect.y - 1)].set_symbol("├"); + buf[(rect.x + rect.width - 1, rect.y - 1)].set_symbol("┤"); } if self.kind == BoxConnectorKind::Bottom || self.kind == BoxConnectorKind::Both { - buf.get_mut(rect.x, rect.y + rect.height).set_symbol("├"); - buf.get_mut(rect.x + rect.width - 1, rect.y + rect.height) + buf[(rect.x, rect.y + rect.height)].set_symbol("├"); + buf[(rect.x + rect.width - 1, rect.y + rect.height)] .set_symbol("┤"); } } diff --git a/wicket/src/ui/widgets/rack.rs b/wicket/src/ui/widgets/rack.rs index 42ebf39d02..b393b63528 100644 --- a/wicket/src/ui/widgets/rack.rs +++ b/wicket/src/ui/widgets/rack.rs @@ -65,7 +65,7 @@ impl<'a> Rack<'a> { // TODO: Draw 10 only? - That may not scale down as well for x in inner.left()..inner.right() { for y in inner.top()..inner.bottom() { - let cell = buf.get_mut(x, y).set_symbol("▕"); + let cell = buf[(x, y)].set_symbol("▕"); if self.state.selected == component_id { if let Some(KnightRiderMode { count }) = self.state.knight_rider_mode @@ -118,7 +118,7 @@ impl<'a> Rack<'a> { if presence == ComponentPresence::Present { for x in inner.left()..inner.right() { for y in inner.top()..inner.bottom() { - buf.get_mut(x, y).set_symbol("❒"); + buf[(x, y)].set_symbol("❒"); } } } @@ -156,7 +156,7 @@ impl<'a> Rack<'a> { for x in inner.left() + border..inner.right() - border { for y in inner.top()..inner.bottom() { if x % step != 0 { - buf.get_mut(x, y).set_symbol("█"); + buf[(x, y)].set_symbol("█"); } } } diff --git a/wicket/tests/output/example_non_empty.toml b/wicket/tests/output/example_non_empty.toml index 717e940ca5..fafb31048d 100644 --- a/wicket/tests/output/example_non_empty.toml +++ b/wicket/tests/output/example_non_empty.toml @@ -111,6 +111,14 @@ allowed_export = [] local_pref = 80 enforce_first_as = true +[rack_network_config.switch0.port0.lldp] +status = "enabled" +chassis_id = "chassid id override" +port_id = "port id override" +system_name = "system name override" +system_description = "system description override" +port_description = "port description override" + [rack_network_config.switch1.port0] routes = [{ nexthop = "172.33.0.10", destination = "0.0.0.0/0", vlan_id = 1 }] addresses = [{ address = "172.32.0.1/24" }] @@ -131,6 +139,15 @@ auth_key_id = "bgp-key-1" allowed_import = ["224.0.0.0/4"] enforce_first_as = false +[rack_network_config.switch1.port0.lldp] +status = "enabled" +chassis_id = "chassid id override" +port_id = "port id override" +system_name = "system name override" +system_description = "system description override" +port_description = "port description override" +management_addrs = ["172.32.0.4"] + [[rack_network_config.bgp]] asn = 47 originate = ["10.0.0.0/16"] diff --git a/wicketd/Cargo.toml b/wicketd/Cargo.toml index 324ae01b42..6e2c27a97e 100644 --- a/wicketd/Cargo.toml +++ b/wicketd/Cargo.toml @@ -25,6 +25,7 @@ flume.workspace = true futures.workspace = true gateway-messages.workspace = true hex.workspace = true +hickory-resolver.workspace = true http.workspace = true hubtools.workspace = true hyper.workspace = true @@ -46,7 +47,6 @@ tokio-stream.workspace = true tokio-util.workspace = true toml.workspace = true tough.workspace = true -trust-dns-resolver.workspace = true uuid.workspace = true bootstrap-agent-client.workspace = true diff --git a/wicketd/src/installinator_progress.rs b/wicketd/src/installinator_progress.rs index 7d076e7b0e..8f8465652e 100644 --- a/wicketd/src/installinator_progress.rs +++ b/wicketd/src/installinator_progress.rs @@ -295,10 +295,10 @@ mod tests { use installinator_common::{ InstallinatorCompletionMetadata, InstallinatorComponent, - InstallinatorSpec, InstallinatorStepId, M2Slot, StepEvent, - StepEventKind, StepInfo, StepInfoWithMetadata, StepOutcome, - WriteOutput, + InstallinatorSpec, InstallinatorStepId, StepEvent, StepEventKind, + StepInfo, StepInfoWithMetadata, StepOutcome, WriteOutput, }; + use omicron_common::disk::M2Slot; use omicron_test_utils::dev::test_setup_log; use schemars::JsonSchema; use update_engine::ExecutionId; diff --git a/wicketd/src/preflight_check/uplink.rs b/wicketd/src/preflight_check/uplink.rs index 36a4f61779..fb0914e836 100644 --- a/wicketd/src/preflight_check/uplink.rs +++ b/wicketd/src/preflight_check/uplink.rs @@ -14,6 +14,11 @@ use dpd_client::types::PortSpeed as DpdPortSpeed; use dpd_client::Client as DpdClient; use dpd_client::ClientState as DpdClientState; use either::Either; +use hickory_resolver::config::NameServerConfigGroup; +use hickory_resolver::config::ResolverConfig; +use hickory_resolver::config::ResolverOpts; +use hickory_resolver::error::ResolveErrorKind; +use hickory_resolver::TokioAsyncResolver; use illumos_utils::zone::SVCCFG; use illumos_utils::PFEXEC; use omicron_common::address::DENDRITE_PORT; @@ -35,12 +40,6 @@ use std::time::Duration; use std::time::Instant; use tokio::process::Command; use tokio::sync::mpsc; -use trust_dns_resolver::config::NameServerConfigGroup; -use trust_dns_resolver::config::ResolverConfig; -use trust_dns_resolver::config::ResolverOpts; -use trust_dns_resolver::error::ResolveError; -use trust_dns_resolver::error::ResolveErrorKind; -use trust_dns_resolver::TokioAsyncResolver; use wicket_common::preflight_check::EventBuffer; use wicket_common::preflight_check::StepContext; use wicket_common::preflight_check::StepProgress; @@ -930,16 +929,7 @@ impl DnsLookupStep { }; 'dns_servers: for &dns_ip in dns_servers { - let resolver = match self.build_resolver(dns_ip) { - Ok(resolver) => resolver, - Err(err) => { - self.warnings.push(format!( - "failed to create resolver for {dns_ip}: {}", - DisplayErrorChain::new(&err) - )); - continue; - } - }; + let resolver = self.build_resolver(dns_ip); // Attempt to resolve any NTP servers that aren't IP addresses. for &ntp_name in &ntp_names_to_resolve { @@ -1052,14 +1042,18 @@ impl DnsLookupStep { ( "A", resolver.ipv4_lookup(name).await.map(|records| { - Either::Left(records.into_iter().map(IpAddr::V4)) + Either::Left( + records.into_iter().map(|x| IpAddr::V4(x.into())), + ) }), ) } else { ( "AAAA", resolver.ipv6_lookup(name).await.map(|records| { - Either::Right(records.into_iter().map(IpAddr::V6)) + Either::Right( + records.into_iter().map(|x| IpAddr::V6(x.into())), + ) }), ) }; @@ -1175,12 +1169,12 @@ impl DnsLookupStep { /// /// If building it fails, we'll append to our internal `warnings` and return /// `None`. - fn build_resolver( - &mut self, - dns_ip: IpAddr, - ) -> Result { + fn build_resolver(&mut self, dns_ip: IpAddr) -> TokioAsyncResolver { let mut options = ResolverOpts::default(); + // Enable edns for potentially larger records + options.edns0 = true; + // We will retry ourselves; we don't want the resolver // retrying internally too. options.attempts = 1; diff --git a/wicketd/src/rss_config.rs b/wicketd/src/rss_config.rs index c6f2dd5892..56e83fcd41 100644 --- a/wicketd/src/rss_config.rs +++ b/wicketd/src/rss_config.rs @@ -686,11 +686,14 @@ fn build_port_config( bgp_auth_keys: &BTreeMap>, ) -> BaPortConfigV2 { use bootstrap_agent_client::types::BgpPeerConfig as BaBgpPeerConfig; + use bootstrap_agent_client::types::LldpAdminStatus as BaLldpAdminStatus; + use bootstrap_agent_client::types::LldpPortConfig as BaLldpPortConfig; use bootstrap_agent_client::types::PortFec as BaPortFec; use bootstrap_agent_client::types::PortSpeed as BaPortSpeed; use bootstrap_agent_client::types::RouteConfig as BaRouteConfig; use bootstrap_agent_client::types::SwitchLocation as BaSwitchLocation; use bootstrap_agent_client::types::UplinkAddressConfig as BaUplinkAddressConfig; + use omicron_common::api::internal::shared::LldpAdminStatus; use omicron_common::api::internal::shared::PortFec; use omicron_common::api::internal::shared::PortSpeed; @@ -703,6 +706,7 @@ fn build_port_config( destination: r.destination, nexthop: r.nexthop, vlan_id: r.vlan_id, + local_pref: r.local_pref, }) .collect(), addresses: config @@ -779,6 +783,20 @@ fn build_port_config( PortFec::Rs => BaPortFec::Rs, }, autoneg: config.autoneg, + lldp: config.lldp.as_ref().map(|c| BaLldpPortConfig { + status: match c.status { + LldpAdminStatus::Enabled => BaLldpAdminStatus::Enabled, + LldpAdminStatus::Disabled => BaLldpAdminStatus::Disabled, + LldpAdminStatus::TxOnly => BaLldpAdminStatus::TxOnly, + LldpAdminStatus::RxOnly => BaLldpAdminStatus::RxOnly, + }, + chassis_id: c.chassis_id.clone(), + port_id: c.port_id.clone(), + system_name: c.system_name.clone(), + system_description: c.system_description.clone(), + port_description: c.port_description.clone(), + management_addrs: c.management_addrs.clone(), + }), } } diff --git a/wicketd/src/update_tracker.rs b/wicketd/src/update_tracker.rs index dee22f70c0..9980359253 100644 --- a/wicketd/src/update_tracker.rs +++ b/wicketd/src/update_tracker.rs @@ -35,9 +35,9 @@ use gateway_messages::ROT_PAGE_SIZE; use hubtools::RawHubrisArchive; use installinator_common::InstallinatorCompletionMetadata; use installinator_common::InstallinatorSpec; -use installinator_common::M2Slot; use installinator_common::WriteOutput; use omicron_common::api::external::SemverVersion; +use omicron_common::disk::M2Slot; use omicron_common::update::ArtifactHash; use slog::error; use slog::info; diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 5f34c76db9..a39daa5735 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -22,23 +22,22 @@ aho-corasick = { version = "1.1.3" } anyhow = { version = "1.0.86", features = ["backtrace"] } base16ct = { version = "0.2.0", default-features = false, features = ["alloc"] } base64 = { version = "0.22.1" } +base64ct = { version = "1.6.0", default-features = false, features = ["std"] } bit-set = { version = "0.5.3" } bit-vec = { version = "0.6.3" } bitflags-dff4ba8e3ae991db = { package = "bitflags", version = "1.3.2" } bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.6.0", default-features = false, features = ["serde", "std"] } -bstr-6f8ce4dd05d13bba = { package = "bstr", version = "0.2.17" } -bstr-dff4ba8e3ae991db = { package = "bstr", version = "1.9.1" } +bstr = { version = "1.9.1" } byteorder = { version = "1.5.0" } bytes = { version = "1.7.1", features = ["serde"] } chrono = { version = "0.4.38", features = ["serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } -clap = { version = "4.5.13", features = ["cargo", "derive", "env", "wrap_help"] } -clap_builder = { version = "4.5.13", default-features = false, features = ["cargo", "color", "env", "std", "suggestions", "usage", "wrap_help"] } +clap = { version = "4.5.16", features = ["cargo", "derive", "env", "wrap_help"] } +clap_builder = { version = "4.5.15", default-features = false, features = ["cargo", "color", "env", "std", "suggestions", "usage", "wrap_help"] } console = { version = "0.15.8" } const-oid = { version = "0.9.6", default-features = false, features = ["db", "std"] } crossbeam-epoch = { version = "0.9.18" } crossbeam-utils = { version = "0.8.19" } -crossterm = { version = "0.27.0", features = ["event-stream", "serde"] } crypto-common = { version = "0.1.6", default-features = false, features = ["getrandom", "std"] } der = { version = "0.7.9", default-features = false, features = ["derive", "flagset", "oid", "pem", "std"] } digest = { version = "0.10.7", features = ["mac", "oid", "std"] } @@ -60,20 +59,21 @@ getrandom = { version = "0.2.14", default-features = false, features = ["js", "r group = { version = "0.13.0", default-features = false, features = ["alloc"] } hashbrown = { version = "0.14.5", features = ["raw"] } hex = { version = "0.4.3", features = ["serde"] } +hickory-proto = { version = "0.24.1", features = ["text-parsing"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "0.14.30", features = ["full"] } -indexmap = { version = "2.3.0", features = ["serde"] } +indexmap = { version = "2.4.0", features = ["serde"] } inout = { version = "0.1.3", default-features = false, features = ["std"] } itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12.1" } itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10.5" } lalrpop-util = { version = "0.19.12" } lazy_static = { version = "1.5.0", default-features = false, features = ["spin_no_std"] } -libc = { version = "0.2.155", features = ["extra_traits"] } +libc = { version = "0.2.156", features = ["extra_traits"] } log = { version = "0.4.21", default-features = false, features = ["std"] } managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] } memchr = { version = "2.7.2" } nom = { version = "7.1.3" } -num-bigint = { version = "0.4.5", features = ["rand"] } +num-bigint-dig = { version = "0.8.4", default-features = false, features = ["i128", "prime", "serde", "u64_digit", "zeroize"] } num-integer = { version = "0.1.46", features = ["i128"] } num-iter = { version = "0.1.45", default-features = false, features = ["i128"] } num-traits = { version = "0.2.19", features = ["i128", "libm"] } @@ -81,42 +81,46 @@ openapiv3 = { version = "2.0.0", default-features = false, features = ["skip_ser peg-runtime = { version = "0.8.3", default-features = false, features = ["std"] } pem-rfc7468 = { version = "0.7.0", default-features = false, features = ["std"] } petgraph = { version = "0.6.5", features = ["serde-1"] } +pkcs8 = { version = "0.10.2", default-features = false, features = ["encryption", "pem", "std"] } postgres-types = { version = "0.2.7", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } predicates = { version = "3.1.2" } proc-macro2 = { version = "1.0.86" } +quote = { version = "1.0.36" } regex = { version = "1.10.6" } regex-automata = { version = "0.4.6", default-features = false, features = ["dfa", "hybrid", "meta", "nfa", "perf", "unicode"] } -regex-syntax = { version = "0.8.3" } +regex-syntax = { version = "0.8.4" } reqwest = { version = "0.11.27", features = ["blocking", "cookies", "json", "rustls-tls", "stream"] } ring = { version = "0.17.8", features = ["std"] } +rsa = { version = "0.9.6", features = ["serde", "sha2"] } schemars = { version = "0.8.21", features = ["bytes", "chrono", "uuid1"] } scopeguard = { version = "1.2.0" } semver = { version = "1.0.23", features = ["serde"] } -serde = { version = "1.0.205", features = ["alloc", "derive", "rc"] } -serde_json = { version = "1.0.122", features = ["raw_value", "unbounded_depth"] } +serde = { version = "1.0.208", features = ["alloc", "derive", "rc"] } +serde_json = { version = "1.0.125", features = ["raw_value", "unbounded_depth"] } +sha1 = { version = "0.10.6", features = ["oid"] } sha2 = { version = "0.10.8", features = ["oid"] } -similar = { version = "2.5.0", features = ["bytes", "inline", "unicode"] } +similar = { version = "2.6.0", features = ["bytes", "inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } smallvec = { version = "1.13.2", default-features = false, features = ["const_new"] } +socket2 = { version = "0.5.7", default-features = false, features = ["all"] } spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } -syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.72", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } +syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.74", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.36", features = ["formatting", "local-offset", "macros", "parsing"] } tokio = { version = "1.38.1", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.11", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } tokio-stream = { version = "0.1.15", features = ["net"] } tokio-util = { version = "0.7.11", features = ["codec", "io-util"] } toml = { version = "0.7.8" } +toml_datetime = { version = "0.6.8", default-features = false, features = ["serde"] } toml_edit-3c51e837cfc5589a = { package = "toml_edit", version = "0.22.20", features = ["serde"] } tracing = { version = "0.1.40", features = ["log"] } -trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.15" } unicode-normalization = { version = "0.1.23" } usdt = { version = "0.5.0" } usdt-impl = { version = "0.5.0", default-features = false, features = ["asm", "des"] } uuid = { version = "1.10.0", features = ["serde", "v4"] } -yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } zerocopy = { version = "0.7.34", features = ["derive", "simd"] } zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } @@ -126,23 +130,23 @@ aho-corasick = { version = "1.1.3" } anyhow = { version = "1.0.86", features = ["backtrace"] } base16ct = { version = "0.2.0", default-features = false, features = ["alloc"] } base64 = { version = "0.22.1" } +base64ct = { version = "1.6.0", default-features = false, features = ["std"] } bit-set = { version = "0.5.3" } bit-vec = { version = "0.6.3" } bitflags-dff4ba8e3ae991db = { package = "bitflags", version = "1.3.2" } bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.6.0", default-features = false, features = ["serde", "std"] } -bstr-6f8ce4dd05d13bba = { package = "bstr", version = "0.2.17" } -bstr-dff4ba8e3ae991db = { package = "bstr", version = "1.9.1" } +bstr = { version = "1.9.1" } byteorder = { version = "1.5.0" } bytes = { version = "1.7.1", features = ["serde"] } +cc = { version = "1.0.97", default-features = false, features = ["parallel"] } chrono = { version = "0.4.38", features = ["serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } -clap = { version = "4.5.13", features = ["cargo", "derive", "env", "wrap_help"] } -clap_builder = { version = "4.5.13", default-features = false, features = ["cargo", "color", "env", "std", "suggestions", "usage", "wrap_help"] } +clap = { version = "4.5.16", features = ["cargo", "derive", "env", "wrap_help"] } +clap_builder = { version = "4.5.15", default-features = false, features = ["cargo", "color", "env", "std", "suggestions", "usage", "wrap_help"] } console = { version = "0.15.8" } const-oid = { version = "0.9.6", default-features = false, features = ["db", "std"] } crossbeam-epoch = { version = "0.9.18" } crossbeam-utils = { version = "0.8.19" } -crossterm = { version = "0.27.0", features = ["event-stream", "serde"] } crypto-common = { version = "0.1.6", default-features = false, features = ["getrandom", "std"] } der = { version = "0.7.9", default-features = false, features = ["derive", "flagset", "oid", "pem", "std"] } digest = { version = "0.10.7", features = ["mac", "oid", "std"] } @@ -164,20 +168,21 @@ getrandom = { version = "0.2.14", default-features = false, features = ["js", "r group = { version = "0.13.0", default-features = false, features = ["alloc"] } hashbrown = { version = "0.14.5", features = ["raw"] } hex = { version = "0.4.3", features = ["serde"] } +hickory-proto = { version = "0.24.1", features = ["text-parsing"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "0.14.30", features = ["full"] } -indexmap = { version = "2.3.0", features = ["serde"] } +indexmap = { version = "2.4.0", features = ["serde"] } inout = { version = "0.1.3", default-features = false, features = ["std"] } itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12.1" } itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10.5" } lalrpop-util = { version = "0.19.12" } lazy_static = { version = "1.5.0", default-features = false, features = ["spin_no_std"] } -libc = { version = "0.2.155", features = ["extra_traits"] } +libc = { version = "0.2.156", features = ["extra_traits"] } log = { version = "0.4.21", default-features = false, features = ["std"] } managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] } memchr = { version = "2.7.2" } nom = { version = "7.1.3" } -num-bigint = { version = "0.4.5", features = ["rand"] } +num-bigint-dig = { version = "0.8.4", default-features = false, features = ["i128", "prime", "serde", "u64_digit", "zeroize"] } num-integer = { version = "0.1.46", features = ["i128"] } num-iter = { version = "0.1.45", default-features = false, features = ["i128"] } num-traits = { version = "0.2.19", features = ["i128", "libm"] } @@ -185,28 +190,33 @@ openapiv3 = { version = "2.0.0", default-features = false, features = ["skip_ser peg-runtime = { version = "0.8.3", default-features = false, features = ["std"] } pem-rfc7468 = { version = "0.7.0", default-features = false, features = ["std"] } petgraph = { version = "0.6.5", features = ["serde-1"] } +pkcs8 = { version = "0.10.2", default-features = false, features = ["encryption", "pem", "std"] } postgres-types = { version = "0.2.7", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } predicates = { version = "3.1.2" } proc-macro2 = { version = "1.0.86" } +quote = { version = "1.0.36" } regex = { version = "1.10.6" } regex-automata = { version = "0.4.6", default-features = false, features = ["dfa", "hybrid", "meta", "nfa", "perf", "unicode"] } -regex-syntax = { version = "0.8.3" } +regex-syntax = { version = "0.8.4" } reqwest = { version = "0.11.27", features = ["blocking", "cookies", "json", "rustls-tls", "stream"] } ring = { version = "0.17.8", features = ["std"] } +rsa = { version = "0.9.6", features = ["serde", "sha2"] } schemars = { version = "0.8.21", features = ["bytes", "chrono", "uuid1"] } scopeguard = { version = "1.2.0" } semver = { version = "1.0.23", features = ["serde"] } -serde = { version = "1.0.205", features = ["alloc", "derive", "rc"] } -serde_json = { version = "1.0.122", features = ["raw_value", "unbounded_depth"] } +serde = { version = "1.0.208", features = ["alloc", "derive", "rc"] } +serde_json = { version = "1.0.125", features = ["raw_value", "unbounded_depth"] } +sha1 = { version = "0.10.6", features = ["oid"] } sha2 = { version = "0.10.8", features = ["oid"] } -similar = { version = "2.5.0", features = ["bytes", "inline", "unicode"] } +similar = { version = "2.6.0", features = ["bytes", "inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } smallvec = { version = "1.13.2", default-features = false, features = ["const_new"] } +socket2 = { version = "0.5.7", default-features = false, features = ["all"] } spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extra-traits", "fold", "full", "visit"] } -syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.72", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } +syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.74", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.36", features = ["formatting", "local-offset", "macros", "parsing"] } time-macros = { version = "0.2.18", default-features = false, features = ["formatting", "parsing"] } tokio = { version = "1.38.1", features = ["full", "test-util"] } @@ -214,16 +224,15 @@ tokio-postgres = { version = "0.7.11", features = ["with-chrono-0_4", "with-serd tokio-stream = { version = "0.1.15", features = ["net"] } tokio-util = { version = "0.7.11", features = ["codec", "io-util"] } toml = { version = "0.7.8" } +toml_datetime = { version = "0.6.8", default-features = false, features = ["serde"] } toml_edit-3c51e837cfc5589a = { package = "toml_edit", version = "0.22.20", features = ["serde"] } tracing = { version = "0.1.40", features = ["log"] } -trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.15" } unicode-normalization = { version = "0.1.23" } unicode-xid = { version = "0.2.4" } usdt = { version = "0.5.0" } usdt-impl = { version = "0.5.0", default-features = false, features = ["asm", "des"] } uuid = { version = "1.10.0", features = ["serde", "v4"] } -yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } zerocopy = { version = "0.7.34", features = ["derive", "simd"] } zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } @@ -231,50 +240,64 @@ zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } dof = { version = "0.3.0", default-features = false, features = ["des"] } linux-raw-sys = { version = "0.4.13", default-features = false, features = ["elf", "errno", "general", "ioctl", "no_std", "std", "system"] } mio = { version = "0.8.11", features = ["net", "os-ext"] } +nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } -rustix = { version = "0.38.34", features = ["fs", "system", "termios"] } +rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } +signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } [target.x86_64-unknown-linux-gnu.build-dependencies] dof = { version = "0.3.0", default-features = false, features = ["des"] } linux-raw-sys = { version = "0.4.13", default-features = false, features = ["elf", "errno", "general", "ioctl", "no_std", "std", "system"] } mio = { version = "0.8.11", features = ["net", "os-ext"] } +nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } -rustix = { version = "0.38.34", features = ["fs", "system", "termios"] } +rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } +signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } [target.x86_64-apple-darwin.dependencies] mio = { version = "0.8.11", features = ["net", "os-ext"] } +nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } -rustix = { version = "0.38.34", features = ["fs", "system", "termios"] } +rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } +signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } [target.x86_64-apple-darwin.build-dependencies] mio = { version = "0.8.11", features = ["net", "os-ext"] } +nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } -rustix = { version = "0.38.34", features = ["fs", "system", "termios"] } +rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } +signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } [target.aarch64-apple-darwin.dependencies] mio = { version = "0.8.11", features = ["net", "os-ext"] } +nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } -rustix = { version = "0.38.34", features = ["fs", "system", "termios"] } +rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } +signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } [target.aarch64-apple-darwin.build-dependencies] mio = { version = "0.8.11", features = ["net", "os-ext"] } +nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } -rustix = { version = "0.38.34", features = ["fs", "system", "termios"] } +rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } +signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } [target.x86_64-unknown-illumos.dependencies] dof = { version = "0.3.0", default-features = false, features = ["des"] } mio = { version = "0.8.11", features = ["net", "os-ext"] } +nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } -rustix = { version = "0.38.34", features = ["fs", "system", "termios"] } -toml_datetime = { version = "0.6.8", default-features = false, features = ["serde"] } +rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } +signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } toml_edit-cdcf2f9584511fe6 = { package = "toml_edit", version = "0.19.15", features = ["serde"] } [target.x86_64-unknown-illumos.build-dependencies] dof = { version = "0.3.0", default-features = false, features = ["des"] } mio = { version = "0.8.11", features = ["net", "os-ext"] } +nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } -rustix = { version = "0.38.34", features = ["fs", "system", "termios"] } -toml_datetime = { version = "0.6.8", default-features = false, features = ["serde"] } +rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } +signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } toml_edit-cdcf2f9584511fe6 = { package = "toml_edit", version = "0.19.15", features = ["serde"] } ### END HAKARI SECTION