diff --git a/.config/nextest.toml b/.config/nextest.toml index 95d4c20102a..eb62e1c9afc 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -3,14 +3,16 @@ # # The required version should be bumped up if we need new features, performance # improvements or bugfixes that are present in newer versions of nextest. -nextest-version = { required = "0.9.64", recommended = "0.9.70" } +nextest-version = { required = "0.9.77", recommended = "0.9.77" } experimental = ["setup-scripts"] [[profile.default.scripts]] # Exclude omicron-dev tests from crdb-seed as we explicitly want to simulate an # environment where the seed file doesn't exist. -filter = 'rdeps(nexus-test-utils) - package(omicron-dev)' +# Exclude omicron-live-tests because those don't need this and also don't have +# it available in the environment in which they run. +filter = 'rdeps(nexus-test-utils) - package(omicron-dev) - package(omicron-live-tests)' setup = 'crdb-seed' [profile.ci] @@ -21,13 +23,20 @@ fail-fast = false # invocations of nextest happen. command = 'cargo run -p crdb-seed --profile test' +[test-groups] # The ClickHouse cluster tests currently rely on a hard-coded set of ports for # the nodes in the cluster. We would like to relax this in the future, at which # point this test-group configuration can be removed or at least loosened to # support testing in parallel. For now, enforce strict serialization for all # tests with `replicated` in the name. -[test-groups] clickhouse-cluster = { max-threads = 1 } +# While most Omicron tests operate with their own simulated control plane, the +# live-tests operate on a more realistic, shared control plane and test +# behaviors that conflict with each other. They need to be run serially. +live-tests = { max-threads = 1 } + +[profile.default] +default-filter = 'all() - package(omicron-live-tests) - package(end-to-end-tests)' [[profile.default.overrides]] filter = 'package(oximeter-db) and test(replicated)' @@ -43,3 +52,10 @@ filter = 'binary_id(omicron-nexus::test_all)' # As of 2023-01-08, the slowest test in test_all takes 196s on a Ryzen 7950X. # 900s is a good upper limit that adds a comfortable buffer. slow-timeout = { period = '60s', terminate-after = 15 } + +[profile.live-tests] +default-filter = 'package(omicron-live-tests)' + +[[profile.live-tests.overrides]] +filter = 'package(omicron-live-tests)' +test-group = 'live-tests' diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 00000000000..ddfd1b04b76 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,2 @@ +# Whitespace-only changes +d01ba56c2127789d85723793380a7378394583f1 diff --git a/.github/ISSUE_TEMPLATE/test-flake-from-buildomat.md b/.github/ISSUE_TEMPLATE/test-flake-from-buildomat.md new file mode 100644 index 00000000000..eb1ac2c6e9f --- /dev/null +++ b/.github/ISSUE_TEMPLATE/test-flake-from-buildomat.md @@ -0,0 +1,65 @@ +--- +name: Test flake from buildomat +about: Report a test failure from a CI run either on "main" or on a PR where you don't + think the PR changes caused the failure +title: 'test failed in CI: NAME_OF_TEST' +labels: Test Flake +assignees: '' + +--- + + + +This test failed on a CI run on **"main" (or pull request XXX)**: + + Link here to the GitHub page showing the test failure. + If it's from a PR, this might look like: + https://github.com/oxidecomputer/omicron/pull/4588/checks?check_run_id=19198066410 + It could also be a link to a failure on "main", which would look like: + https://github.com/oxidecomputer/omicron/runs/20589829185 + This is useful because it shows which commit failed and all the surrounding context. + +Log showing the specific test failure: + + + Link here to the specific line of output from the buildomat log showing the failure: + https://buildomat.eng.oxide.computer/wg/0/details/01HGH32FQYKZJNX9J62HNABKPA/31C5jyox8tyHUIuDDevKkXlDZCyNw143z4nOq8wLl3xtjKzT/01HGH32V3P0HH6B56S46AJAT63#S4455 + This is useful because it shows all the details about the test failure. + +Excerpt from the log showing the failure: + +``` +Paste here an excerpt from the log. +This is redundant with the log above but helps people searching for the error message +or test name. It also works if the link above becomes unavailable. +Here's an example: + +------ + +failures: + integration_tests::updates::test_update_races + +test result: FAILED. 0 passed; 1 failed; 0 ignored; 0 measured; 4 filtered out; finished in 4.84s + + +--- STDERR: wicketd::mod integration_tests::updates::test_update_races --- +log file: /var/tmp/omicron_tmp/mod-ae2eb84a30e4213e-test_artifact_upload_while_updating.14133.0.log +note: configured to log to "/var/tmp/omicron_tmp/mod-ae2eb84a30e4213e-test_artifact_upload_while_updating.14133.0.log" +hint: Generated a random key: +hint: +hint: ed25519:826a8f799d4cc767158c990a60f721215bfd71f8f94fa88ba1960037bd6e5554 +hint: +hint: To modify this repository, you will need this key. Use the -k/--key +hint: command line flag or the TUFACEOUS_KEY environment variable: +hint: +hint: export TUFACEOUS_KEY=ed25519:826a8f799d4cc767158c990a60f721215bfd71f8f94fa88ba1960037bd6e5554 +hint: +hint: To prevent this default behavior, use --no-generate-key. +thread 'integration_tests::updates::test_update_races' panicked at wicketd/tests/integration_tests/updates.rs:482:41: +at least one event +stack backtrace: +... +``` diff --git a/.github/ISSUE_TEMPLATE/test-flake-from-local-failure.md b/.github/ISSUE_TEMPLATE/test-flake-from-local-failure.md new file mode 100644 index 00000000000..e963c839265 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/test-flake-from-local-failure.md @@ -0,0 +1,42 @@ +--- +name: Test flake from local failure +about: Report a test failure that happened locally (not CI) that you believe is not + related to local changes +title: 'test failure: TEST_NAME' +labels: Test Flake +assignees: '' + +--- + +On branch **BRANCH** commit **COMMIT**, I saw this test failure: + +``` +Include the trimmed, relevant output from `cargo nextest`. Here's an example: + +------- +failures: + integration_tests::updates::test_update_races + +test result: FAILED. 0 passed; 1 failed; 0 ignored; 0 measured; 4 filtered out; finished in 4.84s + + +--- STDERR: wicketd::mod integration_tests::updates::test_update_races --- +log file: /var/tmp/omicron_tmp/mod-ae2eb84a30e4213e-test_artifact_upload_while_updating.14133.0.log +note: configured to log to "/var/tmp/omicron_tmp/mod-ae2eb84a30e4213e-test_artifact_upload_while_updating.14133.0.log" +hint: Generated a random key: +hint: +hint: ed25519:826a8f799d4cc767158c990a60f721215bfd71f8f94fa88ba1960037bd6e5554 +hint: +hint: To modify this repository, you will need this key. Use the -k/--key +hint: command line flag or the TUFACEOUS_KEY environment variable: +hint: +hint: export TUFACEOUS_KEY=ed25519:826a8f799d4cc767158c990a60f721215bfd71f8f94fa88ba1960037bd6e5554 +hint: +hint: To prevent this default behavior, use --no-generate-key. +thread 'integration_tests::updates::test_update_races' panicked at wicketd/tests/integration_tests/updates.rs:482:41: +at least one event +stack backtrace: +... +``` + +**NOTE: Consider attaching any log files produced by the test.** diff --git a/.github/buildomat/build-and-test.sh b/.github/buildomat/build-and-test.sh index 1e4b655cb9d..19800126641 100755 --- a/.github/buildomat/build-and-test.sh +++ b/.github/buildomat/build-and-test.sh @@ -9,7 +9,7 @@ target_os=$1 # NOTE: This version should be in sync with the recommended version in # .config/nextest.toml. (Maybe build an automated way to pull the recommended # version in the future.) -NEXTEST_VERSION='0.9.70' +NEXTEST_VERSION='0.9.77' cargo --version rustc --version @@ -89,6 +89,12 @@ ptime -m timeout 2h cargo nextest run --profile ci --locked --verbose banner doctest ptime -m timeout 1h cargo test --doc --locked --verbose --no-fail-fast +# Build the live-tests. This is only supported on illumos. +# We also can't actually run them here. See the README for more details. +if [[ $target_os == "illumos" ]]; then + ptime -m cargo xtask live-tests +fi + # We expect the seed CRDB to be placed here, so we explicitly remove it so the # rmdir check below doesn't get triggered. Nextest doesn't have support for # teardown scripts so this is the best we've got. diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index e310c011e72..11aa638ace1 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@7f737c1056bae14d45b3daec1a2d26ad480e50f7 # v2 + uses: taiki-e/install-action@11053896c3ed8d313b47efa789def6474abd1e6b # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date diff --git a/Cargo.lock b/Cargo.lock index ee3d2a49f1f..fa32990d8c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -59,7 +59,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", - "getrandom 0.2.14", + "getrandom", "once_cell", "version_check", "zerocopy 0.7.34", @@ -256,12 +256,14 @@ checksum = "9b34d609dfbaf33d6889b2b7106d3ca345eacad44200913df5ba02bfd31d2ba9" [[package]] name = "async-bb8-diesel" -version = "0.1.0" -source = "git+https://github.com/oxidecomputer/async-bb8-diesel?rev=ed7ab5ef0513ba303d33efd41d3e9e381169d59b#ed7ab5ef0513ba303d33efd41d3e9e381169d59b" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc03a2806f66f36513d65e0a7f34200382230250cadcf8a8397cfbe3f26b795" dependencies = [ "async-trait", "bb8", "diesel", + "futures", "thiserror", "tokio", ] @@ -376,10 +378,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1" dependencies = [ "futures-core", - "getrandom 0.2.14", + "getrandom", "instant", "pin-project-lite", - "rand 0.8.5", + "rand", "tokio", ] @@ -683,7 +685,7 @@ dependencies = [ "omicron-workspace-hack", "pq-sys", "proptest", - "rand 0.8.5", + "rand", "secrecy", "serde", "serde_with", @@ -703,7 +705,7 @@ dependencies = [ name = "bootstrap-agent-api" version = "0.1.0" dependencies = [ - "dropshot", + "dropshot 0.10.2-dev", "nexus-client", "omicron-common", "omicron-uuid-kinds", @@ -815,9 +817,9 @@ dependencies = [ [[package]] name = "camino" -version = "1.1.7" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0ec6b951b160caa93cc0c7b209e5a3bff7aae9062213451ac99493cd844c239" +checksum = "8b96ec4966b5813e2c0507c1f86115c8c5abaadc3980879c3424042a02fd1ad3" dependencies = [ "serde", ] @@ -913,6 +915,11 @@ name = "cc" version = "1.0.97" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "099a5357d84c4c61eb35fc8eafa9a79a902c2f76911e5747ced4e032edd8d9b4" +dependencies = [ + "jobserver", + "libc", + "once_cell", +] [[package]] name = "cert-dev" @@ -968,7 +975,7 @@ version = "0.1.0" dependencies = [ "anyhow", "clap", - "dropshot", + "dropshot 0.10.2-dev", "futures", "libc", "omicron-rpaths", @@ -1069,9 +1076,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.15" +version = "4.5.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11d8838454fda655dafd3accb2b6e2bea645b9e4078abe84a22ceb947235c5cc" +checksum = "ed6719fffa43d0d87e5fd8caeab59be1554fb028cd30edc88fc4369b17971019" dependencies = [ "clap_builder", "clap_derive", @@ -1108,6 +1115,34 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" +[[package]] +name = "clickhouse-admin-api" +version = "0.1.0" +dependencies = [ + "dropshot 0.10.2-dev", + "omicron-common", + "omicron-uuid-kinds", + "omicron-workspace-hack", + "schemars", + "serde", +] + +[[package]] +name = "clickhouse-admin-types" +version = "0.1.0" +dependencies = [ + "anyhow", + "camino", + "camino-tempfile", + "derive_more", + "expectorate", + "omicron-common", + "omicron-workspace-hack", + "schemars", + "serde", + "serde_json", +] + [[package]] name = "clickward" version = "0.1.0" @@ -1143,7 +1178,7 @@ name = "cockroach-admin-api" version = "0.1.0" dependencies = [ "cockroach-admin-types", - "dropshot", + "dropshot 0.10.2-dev", "omicron-common", "omicron-uuid-kinds", "omicron-workspace-hack", @@ -1370,7 +1405,7 @@ name = "crdb-seed" version = "0.1.0" dependencies = [ "anyhow", - "dropshot", + "dropshot 0.10.2-dev", "omicron-test-utils", "omicron-workspace-hack", "slog", @@ -1514,6 +1549,36 @@ dependencies = [ "serde_json", ] +[[package]] +name = "crucible-common" +version = "0.0.1" +source = "git+https://github.com/oxidecomputer/crucible?rev=e58ca3693cb9ce0438947beba10e97ee38a0966b#e58ca3693cb9ce0438947beba10e97ee38a0966b" +dependencies = [ + "anyhow", + "atty", + "crucible-workspace-hack", + "dropshot 0.10.2-dev", + "nix 0.28.0", + "rusqlite", + "rustls-pemfile 1.0.4", + "schemars", + "serde", + "serde_json", + "slog", + "slog-async", + "slog-bunyan", + "slog-dtrace", + "slog-term", + "tempfile", + "thiserror", + "tokio", + "tokio-rustls 0.24.1", + "toml 0.8.19", + "twox-hash", + "uuid", + "vergen", +] + [[package]] name = "crucible-pantry-client" version = "0.0.1" @@ -1562,7 +1627,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76" dependencies = [ "generic-array", - "rand_core 0.6.4", + "rand_core", "subtle", "zeroize", ] @@ -1574,7 +1639,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" dependencies = [ "generic-array", - "rand_core 0.6.4", + "rand_core", "typenum", ] @@ -1629,7 +1694,7 @@ dependencies = [ "curve25519-dalek-derive", "digest", "fiat-crypto", - "rand_core 0.6.4", + "rand_core", "rustc_version 0.4.0", "subtle", "zeroize", @@ -1739,7 +1804,7 @@ dependencies = [ [[package]] name = "ddm-admin-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/maghemite?rev=220dd026e83142b83bd93123f465a64dd4600201#220dd026e83142b83bd93123f465a64dd4600201" +source = "git+https://github.com/oxidecomputer/maghemite?rev=9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42#9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42" dependencies = [ "oxnet", "percent-encoding", @@ -1878,6 +1943,15 @@ dependencies = [ "syn 2.0.74", ] +[[package]] +name = "des" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffdd80ce8ce993de27e9f063a444a4d53ce8e8db4c1f00cc03af5ad5a9867a1e" +dependencies = [ + "cipher", +] + [[package]] name = "dhcproto" version = "0.12.0" @@ -1887,7 +1961,7 @@ dependencies = [ "dhcproto-macros", "hex", "ipnet", - "rand 0.8.5", + "rand", "thiserror", "trust-dns-proto", "url", @@ -1901,9 +1975,9 @@ checksum = "a7993efb860416547839c115490d4951c6d0f8ec04a3594d9dd99d50ed7ec170" [[package]] name = "diesel" -version = "2.2.2" +version = "2.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf97ee7261bb708fa3402fa9c17a54b70e90e3cb98afb3dc8999d5512cb03f94" +checksum = "65e13bab2796f412722112327f3e575601a3e9cdcbe426f0d30dbf43f3f5dc71" dependencies = [ "bitflags 2.6.0", "byteorder", @@ -1976,15 +2050,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "dirs" -version = "5.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" -dependencies = [ - "dirs-sys", -] - [[package]] name = "dirs-next" version = "2.0.0" @@ -1995,18 +2060,6 @@ dependencies = [ "dirs-sys-next", ] -[[package]] -name = "dirs-sys" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" -dependencies = [ - "libc", - "option-ext", - "redox_users", - "windows-sys 0.48.0", -] - [[package]] name = "dirs-sys-next" version = "0.1.2" @@ -2020,9 +2073,9 @@ dependencies = [ [[package]] name = "display-error-chain" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f77af9e75578c1ab34f5f04545a8b05be0c36fbd7a9bb3cf2d2a971e435fdbb9" +checksum = "7d305e5a3904ee14166439a70feef04853c1234226dbb27ede127b88dc5a4a9d" [[package]] name = "dladm" @@ -2057,8 +2110,12 @@ dependencies = [ "clap", "dns-server-api", "dns-service-client", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", + "hickory-client", + "hickory-proto", + "hickory-resolver", + "hickory-server", "http 0.2.12", "omicron-test-utils", "omicron-workspace-hack", @@ -2078,10 +2135,6 @@ dependencies = [ "thiserror", "tokio", "toml 0.8.19", - "trust-dns-client", - "trust-dns-proto", - "trust-dns-resolver", - "trust-dns-server", "uuid", ] @@ -2090,7 +2143,7 @@ name = "dns-server-api" version = "0.1.0" dependencies = [ "chrono", - "dropshot", + "dropshot 0.10.2-dev", "omicron-workspace-hack", "schemars", "serde", @@ -2151,7 +2204,7 @@ dependencies = [ "progenitor", "progenitor-client", "quote", - "rand 0.8.5", + "rand", "regress", "reqwest", "rustfmt-wrapper", @@ -2163,10 +2216,56 @@ dependencies = [ "uuid", ] +[[package]] +name = "dropshot" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a391eeedf8a75a188eb670327c704b7ab10eb2bb890e2ec0880dd21d609fb6e8" +dependencies = [ + "async-stream", + "async-trait", + "base64 0.22.1", + "bytes", + "camino", + "chrono", + "debug-ignore", + "dropshot_endpoint 0.10.1", + "form_urlencoded", + "futures", + "hostname 0.4.0", + "http 0.2.12", + "hyper 0.14.30", + "indexmap 2.4.0", + "multer", + "openapiv3", + "paste", + "percent-encoding", + "rustls 0.22.4", + "rustls-pemfile 2.1.3", + "schemars", + "scopeguard", + "serde", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", + "sha1", + "slog", + "slog-async", + "slog-bunyan", + "slog-json", + "slog-term", + "tokio", + "tokio-rustls 0.25.0", + "toml 0.8.19", + "uuid", + "version_check", + "waitgroup", +] + [[package]] name = "dropshot" version = "0.10.2-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#52d900a470b8f08eddf021813470b2a9194f2cc0" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#06c8dab40e28d313f8bb0e15e1027eeace3bce89" dependencies = [ "async-stream", "async-trait", @@ -2175,13 +2274,13 @@ dependencies = [ "camino", "chrono", "debug-ignore", - "dropshot_endpoint", + "dropshot_endpoint 0.10.2-dev", "form_urlencoded", "futures", "hostname 0.4.0", "http 0.2.12", "hyper 0.14.30", - "indexmap 2.3.0", + "indexmap 2.4.0", "multer", "openapiv3", "paste", @@ -2209,10 +2308,23 @@ dependencies = [ "waitgroup", ] +[[package]] +name = "dropshot_endpoint" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9058c9c7e4a6b378cd12e71dc155bb15d0d4f8e1e6039ce2cf0a7c0c81043e33" +dependencies = [ + "proc-macro2", + "quote", + "serde", + "serde_tokenstream", + "syn 2.0.74", +] + [[package]] name = "dropshot_endpoint" version = "0.10.2-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#52d900a470b8f08eddf021813470b2a9194f2cc0" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#06c8dab40e28d313f8bb0e15e1027eeace3bce89" dependencies = [ "heck 0.5.0", "proc-macro2", @@ -2285,7 +2397,7 @@ checksum = "4a3daa8e81a3963a60642bcc1f90a670680bd4a77535faa384e9d1c79d620871" dependencies = [ "curve25519-dalek", "ed25519", - "rand_core 0.6.4", + "rand_core", "serde", "sha2", "subtle", @@ -2313,7 +2425,7 @@ dependencies = [ "hkdf", "pem-rfc7468", "pkcs8", - "rand_core 0.6.4", + "rand_core", "sec1", "subtle", "zeroize", @@ -2361,6 +2473,7 @@ dependencies = [ "clap", "colored", "dhcproto", + "hickory-resolver", "http 0.2.12", "humantime", "hyper 0.14.30", @@ -2371,7 +2484,7 @@ dependencies = [ "omicron-test-utils", "omicron-workspace-hack", "oxide-client", - "rand 0.8.5", + "rand", "reqwest", "russh", "russh-keys", @@ -2381,7 +2494,6 @@ dependencies = [ "socket2 0.5.7", "tokio", "toml 0.8.19", - "trust-dns-resolver", "uuid", ] @@ -2403,6 +2515,18 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "enum-as-inner" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ffccbb6966c05b32ef8fbac435df276c4ae4d3dc55a8cd0eb9745e6c12f546a" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "syn 2.0.74", +] + [[package]] name = "env_logger" version = "0.9.3" @@ -2484,6 +2608,18 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + [[package]] name = "fancy-regex" version = "0.13.0" @@ -2530,7 +2666,7 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ded41244b729663b1e574f1b4fb731469f69f79c17667b5d776b16cda0479449" dependencies = [ - "rand_core 0.6.4", + "rand_core", "subtle", ] @@ -2542,9 +2678,9 @@ checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d" [[package]] name = "filetime" -version = "0.2.24" +version = "0.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf401df4a4e3872c4fe8151134cf483738e74b67fc934d6532c882b3d24a4550" +checksum = "35c0522e981e68cbfa8c3f978441a5f34b30b96e146b33cd3359176b50fe8586" dependencies = [ "cfg-if", "libc", @@ -2804,7 +2940,7 @@ dependencies = [ name = "gateway-api" version = "0.1.0" dependencies = [ - "dropshot", + "dropshot 0.10.2-dev", "gateway-types", "omicron-common", "omicron-uuid-kinds", @@ -2846,7 +2982,7 @@ dependencies = [ "gateway-messages", "omicron-workspace-hack", "progenitor", - "rand 0.8.5", + "rand", "reqwest", "schemars", "serde", @@ -2908,7 +3044,7 @@ name = "gateway-test-utils" version = "0.1.0" dependencies = [ "camino", - "dropshot", + "dropshot 0.10.2-dev", "gateway-messages", "gateway-types", "omicron-gateway", @@ -2964,17 +3100,6 @@ dependencies = [ "unicode-width", ] -[[package]] -name = "getrandom" -version = "0.1.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" -dependencies = [ - "cfg-if", - "libc", - "wasi 0.9.0+wasi-snapshot-preview1", -] - [[package]] name = "getrandom" version = "0.2.14" @@ -2984,7 +3109,7 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi", "wasm-bindgen", ] @@ -3004,6 +3129,19 @@ version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" +[[package]] +name = "git2" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b903b73e45dc0c6c596f2d37eccece7c1c8bb6e4407b001096387c63d0d93724" +dependencies = [ + "bitflags 2.6.0", + "libc", + "libgit2-sys", + "log", + "url", +] + [[package]] name = "glob" version = "0.3.1" @@ -3041,7 +3179,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63" dependencies = [ "ff", - "rand_core 0.6.4", + "rand_core", "subtle", ] @@ -3058,7 +3196,7 @@ dependencies = [ "debug-ignore", "fixedbitset", "guppy-workspace-hack", - "indexmap 2.3.0", + "indexmap 2.4.0", "itertools 0.13.0", "nested", "once_cell", @@ -3090,7 +3228,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.12", - "indexmap 2.3.0", + "indexmap 2.4.0", "slab", "tokio", "tokio-util", @@ -3150,6 +3288,15 @@ dependencies = [ "allocator-api2", ] +[[package]] +name = "hashlink" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af" +dependencies = [ + "hashbrown 0.14.5", +] + [[package]] name = "headers" version = "0.3.9" @@ -3248,6 +3395,90 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6fe2267d4ed49bc07b63801559be28c718ea06c4738b7a03c94df7386d2cde46" +[[package]] +name = "hickory-client" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bab9683b08d8f8957a857b0236455d80e1886eaa8c6178af556aa7871fb61b55" +dependencies = [ + "cfg-if", + "data-encoding", + "futures-channel", + "futures-util", + "hickory-proto", + "once_cell", + "radix_trie", + "rand", + "thiserror", + "tokio", + "tracing", +] + +[[package]] +name = "hickory-proto" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07698b8420e2f0d6447a436ba999ec85d8fbf2a398bbd737b82cac4a2e96e512" +dependencies = [ + "async-trait", + "cfg-if", + "data-encoding", + "enum-as-inner 0.6.0", + "futures-channel", + "futures-io", + "futures-util", + "idna 0.4.0", + "ipnet", + "once_cell", + "rand", + "thiserror", + "tinyvec", + "tokio", + "tracing", + "url", +] + +[[package]] +name = "hickory-resolver" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28757f23aa75c98f254cf0405e6d8c25b831b32921b050a66692427679b1f243" +dependencies = [ + "cfg-if", + "futures-util", + "hickory-proto", + "ipconfig", + "lru-cache", + "once_cell", + "parking_lot 0.12.2", + "rand", + "resolv-conf", + "smallvec 1.13.2", + "thiserror", + "tokio", + "tracing", +] + +[[package]] +name = "hickory-server" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9be0e43c556b9b3fdb6c7c71a9a32153a2275d02419e3de809e520bfcfe40c37" +dependencies = [ + "async-trait", + "bytes", + "cfg-if", + "enum-as-inner 0.6.0", + "futures-util", + "hickory-proto", + "serde", + "thiserror", + "time", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "highway" version = "1.2.0" @@ -3522,7 +3753,7 @@ dependencies = [ "hyper 0.14.30", "mime_guess", "percent-encoding", - "rand 0.8.5", + "rand", "tokio", "url", "winapi", @@ -3611,6 +3842,16 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "idna" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + [[package]] name = "idna" version = "0.5.0" @@ -3634,7 +3875,7 @@ dependencies = [ [[package]] name = "illumos-sys-hdrs" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d#3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d" +source = "git+https://github.com/oxidecomputer/opte?rev=76878de67229ea113d70503c441eab47ac5dc653#76878de67229ea113d70503c441eab47ac5dc653" [[package]] name = "illumos-utils" @@ -3704,9 +3945,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.3.0" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de3fc2e30ba82dd1b3911c8de1ffc143c74a914a14e99514d7637e3099df5ea0" +checksum = "93ead53efc7ea8ed3cfb0c79fc8023fbb782a5432b52830b6518941cebe6505c" dependencies = [ "equivalent", "hashbrown 0.14.5", @@ -3762,6 +4003,7 @@ dependencies = [ "buf-list", "bytes", "camino", + "camino-tempfile", "cancel-safe-futures", "clap", "display-error-chain", @@ -3791,7 +4033,6 @@ dependencies = [ "slog-envlogger", "slog-term", "smf", - "tempfile", "test-strategy", "thiserror", "tokio", @@ -3806,7 +4047,7 @@ name = "installinator-api" version = "0.1.0" dependencies = [ "anyhow", - "dropshot", + "dropshot 0.10.2-dev", "hyper 0.14.30", "installinator-common", "omicron-common", @@ -3874,9 +4115,10 @@ dependencies = [ "chrono", "dns-server", "dns-service-client", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "futures", + "hickory-resolver", "hyper 0.14.30", "omicron-common", "omicron-test-utils", @@ -3891,7 +4133,6 @@ dependencies = [ "tempfile", "thiserror", "tokio", - "trust-dns-resolver", "uuid", ] @@ -3901,13 +4142,13 @@ version = "0.1.0" dependencies = [ "anyhow", "clap", - "dropshot", + "dropshot 0.10.2-dev", + "hickory-resolver", "internal-dns", "omicron-common", "omicron-workspace-hack", "slog", "tokio", - "trust-dns-resolver", ] [[package]] @@ -3920,9 +4161,8 @@ checksum = "fc6d6206008e25125b1f97fbe5d309eb7b85141cf9199d52dbd3729a1584dd16" name = "ipcc" version = "0.1.0" dependencies = [ - "cfg-if", "ciborium", - "libc", + "libipcc", "omicron-common", "omicron-workspace-hack", "proptest", @@ -4024,6 +4264,15 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +[[package]] +name = "jobserver" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +dependencies = [ + "libc", +] + [[package]] name = "js-sys" version = "0.3.69" @@ -4061,7 +4310,7 @@ dependencies = [ [[package]] name = "kstat-macro" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d#3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d" +source = "git+https://github.com/oxidecomputer/opte?rev=76878de67229ea113d70503c441eab47ac5dc653#76878de67229ea113d70503c441eab47ac5dc653" dependencies = [ "quote", "syn 2.0.74", @@ -4122,9 +4371,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.155" +version = "0.2.158" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439" [[package]] name = "libdlpi-sys" @@ -4164,7 +4413,7 @@ dependencies = [ "portpicker", "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=6dceb9ef69c217cb78a2018bbedafbc19f6ec1af)", "propolis-server-config", - "rand 0.8.5", + "rand", "regex", "reqwest", "ron 0.7.1", @@ -4183,6 +4432,28 @@ dependencies = [ "zone 0.1.8", ] +[[package]] +name = "libgit2-sys" +version = "0.17.0+1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10472326a8a6477c3c20a64547b0059e4b0d086869eee31e6d7da728a8eb7224" +dependencies = [ + "cc", + "libc", + "libz-sys", + "pkg-config", +] + +[[package]] +name = "libipcc" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/libipcc?rev=fdffa212373a8f92473ea5f411088912bf458d5f#fdffa212373a8f92473ea5f411088912bf458d5f" +dependencies = [ + "cfg-if", + "libc", + "thiserror", +] + [[package]] name = "libloading" version = "0.8.3" @@ -4266,6 +4537,16 @@ dependencies = [ "redox_syscall 0.5.1", ] +[[package]] +name = "libsqlite3-sys" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c10584274047cb335c23d3e61bcef8e323adae7c5c8c760540f73610177fc3f" +dependencies = [ + "pkg-config", + "vcpkg", +] + [[package]] name = "libsw" version = "3.3.1" @@ -4298,6 +4579,18 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "libz-sys" +version = "1.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e143b5e666b2695d28f6bca6497720813f699c9602dd7f5cac91008b8ada7f9" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "linear-map" version = "1.2.0" @@ -4316,6 +4609,15 @@ version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" +[[package]] +name = "live-tests-macros" +version = "0.1.0" +dependencies = [ + "omicron-workspace-hack", + "quote", + "syn 2.0.74", +] + [[package]] name = "lock_api" version = "0.4.12" @@ -4478,7 +4780,7 @@ dependencies = [ [[package]] name = "mg-admin-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/maghemite?rev=220dd026e83142b83bd93123f465a64dd4600201#220dd026e83142b83bd93123f465a64dd4600201" +source = "git+https://github.com/oxidecomputer/maghemite?rev=9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42#9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42" dependencies = [ "anyhow", "chrono", @@ -4501,6 +4803,7 @@ dependencies = [ "gateway-messages", "gateway-test-utils", "libc", + "omicron-gateway", "omicron-workspace-hack", "signal-hook-tokio", "tokio", @@ -4545,7 +4848,7 @@ checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" dependencies = [ "libc", "log", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi", "windows-sys 0.48.0", ] @@ -4558,7 +4861,7 @@ dependencies = [ "hermit-abi 0.3.9", "libc", "log", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi", "windows-sys 0.52.0", ] @@ -4620,7 +4923,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3" dependencies = [ - "getrandom 0.2.14", + "getrandom", ] [[package]] @@ -4692,7 +4995,7 @@ dependencies = [ "base64 0.22.1", "chrono", "cookie 0.18.1", - "dropshot", + "dropshot 0.10.2-dev", "futures", "headers", "http 0.2.12", @@ -4749,7 +5052,7 @@ version = "0.1.0" dependencies = [ "anyhow", "camino", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "libc", "omicron-common", @@ -4808,7 +5111,7 @@ dependencies = [ "oxnet", "parse-display", "pq-sys", - "rand 0.8.5", + "rand", "ref-cast", "schemars", "semver 1.0.23", @@ -4832,7 +5135,6 @@ dependencies = [ "assert_matches", "async-bb8-diesel", "async-trait", - "bb8", "camino", "camino-tempfile", "chrono", @@ -4840,7 +5142,7 @@ dependencies = [ "db-macros", "diesel", "diesel-dtrace", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "futures", "gateway-client", @@ -4877,7 +5179,8 @@ dependencies = [ "pq-sys", "predicates", "pretty_assertions", - "rand 0.8.5", + "qorb", + "rand", "rcgen", "ref-cast", "regex", @@ -4898,6 +5201,7 @@ dependencies = [ "term", "thiserror", "tokio", + "url", "usdt", "uuid", ] @@ -4911,15 +5215,33 @@ dependencies = [ "omicron-workspace-hack", "once_cell", "oxnet", - "rand 0.8.5", + "rand", "serde_json", ] +[[package]] +name = "nexus-external-api" +version = "0.1.0" +dependencies = [ + "anyhow", + "dropshot 0.10.2-dev", + "http 0.2.12", + "hyper 0.14.30", + "ipnetwork", + "nexus-types", + "omicron-common", + "omicron-workspace-hack", + "openapi-manager-types", + "openapiv3", + "oximeter-types", + "oxql-types", +] + [[package]] name = "nexus-internal-api" version = "0.1.0" dependencies = [ - "dropshot", + "dropshot 0.10.2-dev", "nexus-types", "omicron-common", "omicron-uuid-kinds", @@ -5026,6 +5348,7 @@ dependencies = [ "httptest", "internal-dns", "ipnet", + "newtype-uuid", "nexus-config", "nexus-db-model", "nexus-db-queries", @@ -5062,7 +5385,7 @@ dependencies = [ "debug-ignore", "expectorate", "gateway-client", - "indexmap 2.3.0", + "indexmap 2.4.0", "internal-dns", "ipnet", "maplit", @@ -5076,7 +5399,7 @@ dependencies = [ "omicron-workspace-hack", "oxnet", "proptest", - "rand 0.8.5", + "rand", "sled-agent-client", "slog", "static_assertions", @@ -5174,15 +5497,17 @@ dependencies = [ "crucible-agent-client", "dns-server", "dns-service-client", - "dropshot", + "dropshot 0.10.2-dev", "futures", "gateway-messages", "gateway-test-utils", "headers", + "hickory-resolver", "http 0.2.12", "hyper 0.14.30", "illumos-utils", "internal-dns", + "nexus-client", "nexus-config", "nexus-db-queries", "nexus-sled-agent-shared", @@ -5205,7 +5530,6 @@ dependencies = [ "slog", "tokio", "tokio-util", - "trust-dns-resolver", "uuid", ] @@ -5224,17 +5548,22 @@ version = "0.1.0" dependencies = [ "anyhow", "api_identity", + "async-trait", "base64 0.22.1", "chrono", "clap", + "cookie 0.18.1", "derive-where", "derive_more", "dns-service-client", + "dropshot 0.10.2-dev", "futures", "gateway-client", + "http 0.2.12", "humantime", "ipnetwork", "newtype-uuid", + "newtype_derive", "nexus-sled-agent-shared", "omicron-common", "omicron-passwords", @@ -5242,6 +5571,7 @@ dependencies = [ "omicron-workspace-hack", "openssl", "oxnet", + "oxql-types", "parse-display", "proptest", "schemars", @@ -5342,7 +5672,7 @@ checksum = "c165a9ab64cf766f73521c0dd2cfdff64f488b8f0b3e621face3462d3db536d7" dependencies = [ "num-integer", "num-traits", - "rand 0.8.5", + "rand", ] [[package]] @@ -5357,7 +5687,7 @@ dependencies = [ "num-integer", "num-iter", "num-traits", - "rand 0.8.5", + "rand", "serde", "smallvec 1.13.2", "zeroize", @@ -5454,7 +5784,7 @@ version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcbff9bc912032c62bf65ef1d5aea88983b420f4f839db1e9b0c281a25c9c799" dependencies = [ - "proc-macro-crate", + "proc-macro-crate 1.3.1", "proc-macro2", "quote", "syn 1.0.109", @@ -5542,6 +5872,42 @@ dependencies = [ "thiserror", ] +[[package]] +name = "omicron-clickhouse-admin" +version = "0.1.0" +dependencies = [ + "anyhow", + "camino", + "chrono", + "clap", + "clickhouse-admin-api", + "clickhouse-admin-types", + "dropshot 0.10.2-dev", + "expectorate", + "http 0.2.12", + "illumos-utils", + "nexus-test-utils", + "omicron-common", + "omicron-test-utils", + "omicron-uuid-kinds", + "omicron-workspace-hack", + "openapi-lint", + "openapiv3", + "schemars", + "serde", + "serde_json", + "slog", + "slog-async", + "slog-dtrace", + "slog-error-chain", + "subprocess", + "thiserror", + "tokio", + "tokio-postgres", + "toml 0.8.19", + "url", +] + [[package]] name = "omicron-cockroach-admin" version = "0.1.0" @@ -5553,7 +5919,7 @@ dependencies = [ "cockroach-admin-api", "cockroach-admin-types", "csv", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "http 0.2.12", "illumos-utils", @@ -5595,7 +5961,7 @@ dependencies = [ "camino", "camino-tempfile", "chrono", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "futures", "hex", @@ -5612,7 +5978,7 @@ dependencies = [ "progenitor", "progenitor-client", "proptest", - "rand 0.8.5", + "rand", "regress", "reqwest", "schemars", @@ -5663,7 +6029,7 @@ version = "0.1.0" dependencies = [ "anyhow", "clap", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "futures", "libc", @@ -5701,8 +6067,9 @@ dependencies = [ "anyhow", "base64 0.22.1", "camino", + "chrono", "clap", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "futures", "gateway-api", @@ -5719,6 +6086,9 @@ dependencies = [ "omicron-test-utils", "omicron-workspace-hack", "once_cell", + "oximeter", + "oximeter-instruments", + "oximeter-producer", "schemars", "serde", "serde_json", @@ -5737,6 +6107,38 @@ dependencies = [ "uuid", ] +[[package]] +name = "omicron-live-tests" +version = "0.1.0" +dependencies = [ + "anyhow", + "assert_matches", + "dropshot 0.10.2-dev", + "futures", + "internal-dns", + "live-tests-macros", + "nexus-client", + "nexus-config", + "nexus-db-model", + "nexus-db-queries", + "nexus-reconfigurator-planning", + "nexus-reconfigurator-preparation", + "nexus-sled-agent-shared", + "nexus-types", + "omicron-common", + "omicron-rpaths", + "omicron-test-utils", + "omicron-workspace-hack", + "pq-sys", + "reqwest", + "serde", + "slog", + "slog-error-chain", + "textwrap", + "tokio", + "uuid", +] + [[package]] name = "omicron-nexus" version = "0.1.0" @@ -5756,13 +6158,14 @@ dependencies = [ "cockroach-admin-client", "criterion", "crucible-agent-client", + "crucible-common", "crucible-pantry-client", "diesel", "display-error-chain", "dns-server", "dns-service-client", "dpd-client", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "fatfs", "futures", @@ -5771,6 +6174,7 @@ dependencies = [ "gateway-test-utils", "headers", "hex", + "hickory-resolver", "http 0.2.12", "httptest", "hubtools", @@ -5788,6 +6192,7 @@ dependencies = [ "nexus-db-model", "nexus-db-queries", "nexus-defaults", + "nexus-external-api", "nexus-internal-api", "nexus-inventory", "nexus-metrics-producer-gc", @@ -5821,6 +6226,7 @@ dependencies = [ "oximeter-instruments", "oximeter-producer", "oxnet", + "oxql-types", "parse-display", "paste", "pem", @@ -5829,7 +6235,7 @@ dependencies = [ "pretty_assertions", "progenitor-client", "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=24a74d0c76b6a63961ecef76acb1516b6e66c5c9)", - "rand 0.8.5", + "rand", "rcgen", "ref-cast", "regex", @@ -5863,7 +6269,6 @@ dependencies = [ "tokio-postgres", "tokio-util", "tough", - "trust-dns-resolver", "tufaceous", "tufaceous-lib", "update-common", @@ -5884,17 +6289,19 @@ dependencies = [ "crucible-agent-client", "csv", "diesel", - "dropshot", + "dropshot 0.10.2-dev", "dyn-clone", "expectorate", "futures", "gateway-client", "gateway-messages", "gateway-test-utils", + "http 0.2.12", "humantime", "indicatif", "internal-dns", "ipnetwork", + "itertools 0.13.0", "multimap", "nexus-client", "nexus-config", @@ -5975,7 +6382,7 @@ dependencies = [ "clap", "criterion", "omicron-workspace-hack", - "rand 0.8.5", + "rand", "rust-argon2", "schemars", "serde", @@ -6045,7 +6452,7 @@ dependencies = [ "dns-server", "dns-service-client", "dpd-client", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "flate2", "flume", @@ -6084,7 +6491,7 @@ dependencies = [ "pretty_assertions", "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=24a74d0c76b6a63961ecef76acb1516b6e66c5c9)", "propolis-mock-server", - "rand 0.8.5", + "rand", "rcgen", "reqwest", "schemars", @@ -6130,7 +6537,7 @@ dependencies = [ "atomicwrites", "camino", "camino-tempfile", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "filetime", "gethostname", @@ -6178,14 +6585,15 @@ dependencies = [ "anyhow", "base16ct", "base64 0.22.1", + "base64ct", "bit-set", "bit-vec", "bitflags 1.3.2", "bitflags 2.6.0", - "bstr 0.2.17", "bstr 1.9.1", "byteorder", "bytes", + "cc", "chrono", "cipher", "clap", @@ -6212,13 +6620,14 @@ dependencies = [ "futures-util", "gateway-messages", "generic-array", - "getrandom 0.2.14", + "getrandom", "group", "hashbrown 0.14.5", "hex", + "hickory-proto", "hmac", "hyper 0.14.30", - "indexmap 2.3.0", + "indexmap 2.4.0", "inout", "itertools 0.10.5", "itertools 0.12.1", @@ -6229,9 +6638,10 @@ dependencies = [ "log", "managed", "memchr", - "mio 0.8.11", + "mio 1.0.2", + "nix 0.28.0", "nom", - "num-bigint", + "num-bigint-dig", "num-integer", "num-iter", "num-traits", @@ -6240,20 +6650,24 @@ dependencies = [ "peg-runtime", "pem-rfc7468", "petgraph", + "pkcs8", "postgres-types", "predicates", "proc-macro2", + "quote", "regex", "regex-automata 0.4.6", "regex-syntax 0.8.4", "reqwest", "ring 0.17.8", + "rsa", "rustix", "schemars", "scopeguard", "semver 1.0.23", "serde", "serde_json", + "sha1", "sha2", "signal-hook-mio", "similar", @@ -6275,14 +6689,12 @@ dependencies = [ "toml_edit 0.19.15", "toml_edit 0.22.20", "tracing", - "trust-dns-proto", "unicode-bidi", "unicode-normalization", "unicode-xid", "usdt", "usdt-impl", "uuid", - "yasna", "zerocopy 0.7.34", "zeroize", ] @@ -6343,7 +6755,7 @@ version = "0.4.0" source = "git+https://github.com/oxidecomputer/openapi-lint?branch=main#ef442ee4343e97b6d9c217d3e7533962fe7d7236" dependencies = [ "heck 0.4.1", - "indexmap 2.3.0", + "indexmap 2.4.0", "lazy_static", "openapiv3", "regex", @@ -6358,16 +6770,19 @@ dependencies = [ "bootstrap-agent-api", "camino", "clap", + "clickhouse-admin-api", "cockroach-admin-api", "dns-server-api", - "dropshot", + "dropshot 0.10.2-dev", "fs-err", "gateway-api", "indent_write", "installinator-api", + "nexus-external-api", "nexus-internal-api", "omicron-workspace-hack", "openapi-lint", + "openapi-manager-types", "openapiv3", "owo-colors", "oximeter-api", @@ -6378,13 +6793,22 @@ dependencies = [ "wicketd-api", ] +[[package]] +name = "openapi-manager-types" +version = "0.1.0" +dependencies = [ + "anyhow", + "camino", + "omicron-workspace-hack", +] + [[package]] name = "openapiv3" version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cc02deea53ffe807708244e5914f6b099ad7015a207ee24317c22112e17d9c5c" dependencies = [ - "indexmap 2.3.0", + "indexmap 2.4.0", "serde", "serde_json", ] @@ -6436,7 +6860,7 @@ dependencies = [ [[package]] name = "opte" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d#3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d" +source = "git+https://github.com/oxidecomputer/opte?rev=76878de67229ea113d70503c441eab47ac5dc653#76878de67229ea113d70503c441eab47ac5dc653" dependencies = [ "cfg-if", "dyn-clone", @@ -6453,7 +6877,7 @@ dependencies = [ [[package]] name = "opte-api" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d#3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d" +source = "git+https://github.com/oxidecomputer/opte?rev=76878de67229ea113d70503c441eab47ac5dc653#76878de67229ea113d70503c441eab47ac5dc653" dependencies = [ "illumos-sys-hdrs", "ipnetwork", @@ -6465,7 +6889,7 @@ dependencies = [ [[package]] name = "opte-ioctl" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d#3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d" +source = "git+https://github.com/oxidecomputer/opte?rev=76878de67229ea113d70503c441eab47ac5dc653#76878de67229ea113d70503c441eab47ac5dc653" dependencies = [ "libc", "libnet 0.1.0 (git+https://github.com/oxidecomputer/netadm-sys)", @@ -6476,12 +6900,6 @@ dependencies = [ "thiserror", ] -[[package]] -name = "option-ext" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" - [[package]] name = "oso" version = "0.27.3" @@ -6521,25 +6939,25 @@ dependencies = [ "base64 0.22.1", "chrono", "futures", + "hickory-resolver", "http 0.2.12", "hyper 0.14.30", "omicron-workspace-hack", "progenitor", - "rand 0.8.5", + "rand", "regress", "reqwest", "serde", "serde_json", "thiserror", "tokio", - "trust-dns-resolver", "uuid", ] [[package]] name = "oxide-vpc" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d#3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d" +source = "git+https://github.com/oxidecomputer/opte?rev=76878de67229ea113d70503c441eab47ac5dc653#76878de67229ea113d70503c441eab47ac5dc653" dependencies = [ "cfg-if", "illumos-sys-hdrs", @@ -6559,9 +6977,10 @@ dependencies = [ "chrono", "clap", "omicron-workspace-hack", - "oximeter-impl", "oximeter-macro-impl", + "oximeter-schema", "oximeter-timeseries-macro", + "oximeter-types", "prettyplease", "syn 2.0.74", "toml 0.8.19", @@ -6573,7 +6992,7 @@ name = "oximeter-api" version = "0.1.0" dependencies = [ "chrono", - "dropshot", + "dropshot 0.10.2-dev", "omicron-common", "omicron-workspace-hack", "schemars", @@ -6604,7 +7023,7 @@ dependencies = [ "camino", "chrono", "clap", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "futures", "hyper 0.14.30", @@ -6620,7 +7039,7 @@ dependencies = [ "oximeter-api", "oximeter-client", "oximeter-db", - "rand 0.8.5", + "rand", "reqwest", "schemars", "serde", @@ -6652,17 +7071,19 @@ dependencies = [ "clap", "clickward", "crossterm 0.28.1", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "futures", "highway", - "indexmap 2.3.0", + "indexmap 2.4.0", "itertools 0.13.0", "num", "omicron-common", "omicron-test-utils", "omicron-workspace-hack", "oximeter", + "oximeter-test-utils", + "oxql-types", "peg", "reedline", "regex", @@ -6686,46 +7107,13 @@ dependencies = [ "uuid", ] -[[package]] -name = "oximeter-impl" -version = "0.1.0" -dependencies = [ - "approx", - "bytes", - "chrono", - "criterion", - "float-ord", - "heck 0.5.0", - "num", - "omicron-common", - "omicron-workspace-hack", - "oximeter-macro-impl", - "prettyplease", - "proc-macro2", - "quote", - "rand 0.8.5", - "rand_distr", - "regex", - "rstest", - "schemars", - "serde", - "serde_json", - "slog-error-chain", - "strum", - "syn 2.0.74", - "thiserror", - "toml 0.8.19", - "trybuild", - "uuid", -] - [[package]] name = "oximeter-instruments" version = "0.1.0" dependencies = [ "cfg-if", "chrono", - "dropshot", + "dropshot 0.10.2-dev", "futures", "http 0.2.12", "hyper 0.14.30", @@ -6733,7 +7121,7 @@ dependencies = [ "libc", "omicron-workspace-hack", "oximeter", - "rand 0.8.5", + "rand", "schemars", "serde", "slog", @@ -6761,7 +7149,7 @@ dependencies = [ "anyhow", "chrono", "clap", - "dropshot", + "dropshot 0.10.2-dev", "internal-dns", "nexus-client", "omicron-common", @@ -6779,17 +7167,75 @@ dependencies = [ "uuid", ] +[[package]] +name = "oximeter-schema" +version = "0.1.0" +dependencies = [ + "anyhow", + "chrono", + "clap", + "heck 0.5.0", + "omicron-workspace-hack", + "oximeter-types", + "prettyplease", + "proc-macro2", + "quote", + "schemars", + "serde", + "slog-error-chain", + "syn 2.0.74", + "toml 0.8.19", +] + +[[package]] +name = "oximeter-test-utils" +version = "0.1.0" +dependencies = [ + "chrono", + "omicron-workspace-hack", + "oximeter-macro-impl", + "oximeter-types", + "uuid", +] + [[package]] name = "oximeter-timeseries-macro" version = "0.1.0" dependencies = [ "omicron-workspace-hack", - "oximeter-impl", + "oximeter-schema", + "oximeter-types", "proc-macro2", "quote", "syn 2.0.74", ] +[[package]] +name = "oximeter-types" +version = "0.1.0" +dependencies = [ + "approx", + "bytes", + "chrono", + "criterion", + "float-ord", + "num", + "omicron-common", + "omicron-workspace-hack", + "oximeter-macro-impl", + "rand", + "rand_distr", + "regex", + "rstest", + "schemars", + "serde", + "serde_json", + "strum", + "thiserror", + "trybuild", + "uuid", +] + [[package]] name = "oxlog" version = "0.1.0" @@ -6814,6 +7260,20 @@ dependencies = [ "serde_json", ] +[[package]] +name = "oxql-types" +version = "0.1.0" +dependencies = [ + "anyhow", + "chrono", + "highway", + "num", + "omicron-workspace-hack", + "oximeter-types", + "schemars", + "serde", +] + [[package]] name = "p256" version = "0.13.2" @@ -6826,6 +7286,18 @@ dependencies = [ "sha2", ] +[[package]] +name = "p384" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70786f51bcc69f6a4c0360e063a4cac5419ef7c5cd5b3c99ad70f3be5ba79209" +dependencies = [ + "ecdsa", + "elliptic-curve", + "primeorder", + "sha2", +] + [[package]] name = "p521" version = "0.13.3" @@ -6836,7 +7308,7 @@ dependencies = [ "ecdsa", "elliptic-curve", "primeorder", - "rand_core 0.6.4", + "rand_core", "sha2", ] @@ -6971,7 +7443,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7676374caaee8a325c9e7a2ae557f216c5563a171d6997b0ef8a65af35147700" dependencies = [ "base64ct", - "rand_core 0.6.4", + "rand_core", "subtle", ] @@ -6982,7 +7454,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "346f04948ba92c43e8469c1ee6736c7563d71012b17d40745260fe106aac2166" dependencies = [ "base64ct", - "rand_core 0.6.4", + "rand_core", "subtle", ] @@ -7026,6 +7498,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ed6a7761f76e3b9f92dfb0a60a6a6477c61024b775147ff0973a02653abaf2" dependencies = [ "digest", + "hmac", ] [[package]] @@ -7132,7 +7605,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ "fixedbitset", - "indexmap 2.3.0", + "indexmap 2.4.0", "serde", "serde_derive", ] @@ -7207,6 +7680,21 @@ dependencies = [ "spki", ] +[[package]] +name = "pkcs5" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e847e2c91a18bfa887dd028ec33f2fe6f25db77db3619024764914affe8b69a6" +dependencies = [ + "aes", + "cbc", + "der", + "pbkdf2 0.12.2", + "scrypt", + "sha2", + "spki", +] + [[package]] name = "pkcs8" version = "0.10.2" @@ -7214,6 +7702,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" dependencies = [ "der", + "pkcs5", + "rand_core", "spki", ] @@ -7313,7 +7803,7 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be97d76faf1bfab666e1375477b23fde79eccf0276e9b63b92a39d676a889ba9" dependencies = [ - "rand 0.8.5", + "rand", ] [[package]] @@ -7336,11 +7826,11 @@ dependencies = [ "base64 0.22.1", "byteorder", "bytes", - "fallible-iterator", + "fallible-iterator 0.2.0", "hmac", "md-5", "memchr", - "rand 0.8.5", + "rand", "sha2", "stringprep", ] @@ -7353,7 +7843,7 @@ checksum = "02048d9e032fb3cc3413bbf7b83a15d84a5d419778e2628751896d856498eee9" dependencies = [ "bytes", "chrono", - "fallible-iterator", + "fallible-iterator 0.2.0", "postgres-protocol", "serde", "serde_json", @@ -7467,6 +7957,15 @@ dependencies = [ "toml_edit 0.19.15", ] +[[package]] +name = "proc-macro-crate" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d37c51ca738a55da99dc0c4a34860fd675453b8b36209178c2249bb13651284" +dependencies = [ + "toml_edit 0.21.1", +] + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -7533,7 +8032,7 @@ dependencies = [ "getopts", "heck 0.5.0", "http 0.2.12", - "indexmap 2.3.0", + "indexmap 2.4.0", "openapiv3", "proc-macro2", "quote", @@ -7603,7 +8102,7 @@ dependencies = [ "base64 0.21.7", "futures", "progenitor", - "rand 0.8.5", + "rand", "reqwest", "schemars", "serde", @@ -7624,7 +8123,7 @@ dependencies = [ "base64 0.21.7", "futures", "progenitor", - "rand 0.8.5", + "rand", "reqwest", "schemars", "serde", @@ -7645,12 +8144,12 @@ dependencies = [ "atty", "base64 0.21.7", "clap", - "dropshot", + "dropshot 0.10.2-dev", "futures", "hyper 0.14.30", "progenitor", "propolis_types 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=24a74d0c76b6a63961ecef76acb1516b6e66c5c9)", - "rand 0.8.5", + "rand", "reqwest", "schemars", "serde", @@ -7707,8 +8206,8 @@ dependencies = [ "bitflags 2.6.0", "lazy_static", "num-traits", - "rand 0.8.5", - "rand_chacha 0.3.1", + "rand", + "rand_chacha", "rand_xorshift", "regex-syntax 0.8.4", "rusty-fork", @@ -7732,6 +8231,29 @@ dependencies = [ "psl-types", ] +[[package]] +name = "qorb" +version = "0.0.1" +source = "git+https://github.com/oxidecomputer/qorb?branch=master#163a77838a3cfe8f7741d32e443f76d995b89df3" +dependencies = [ + "anyhow", + "async-trait", + "debug-ignore", + "derive-where", + "dropshot 0.10.1", + "futures", + "hickory-resolver", + "rand", + "schemars", + "serde", + "serde_json", + "thiserror", + "tokio", + "tokio-stream", + "tokio-tungstenite 0.23.1", + "tracing", +] + [[package]] name = "quick-error" version = "1.2.3" @@ -7784,19 +8306,6 @@ dependencies = [ "nibble_vec", ] -[[package]] -name = "rand" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" -dependencies = [ - "getrandom 0.1.16", - "libc", - "rand_chacha 0.2.2", - "rand_core 0.5.1", - "rand_hc", -] - [[package]] name = "rand" version = "0.8.5" @@ -7804,18 +8313,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha 0.3.1", - "rand_core 0.6.4", -] - -[[package]] -name = "rand_chacha" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" -dependencies = [ - "ppv-lite86", - "rand_core 0.5.1", + "rand_chacha", + "rand_core", ] [[package]] @@ -7825,16 +8324,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core 0.6.4", -] - -[[package]] -name = "rand_core" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" -dependencies = [ - "getrandom 0.1.16", + "rand_core", ] [[package]] @@ -7843,7 +8333,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.14", + "getrandom", ] [[package]] @@ -7853,16 +8343,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" dependencies = [ "num-traits", - "rand 0.8.5", -] - -[[package]] -name = "rand_hc" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" -dependencies = [ - "rand_core 0.5.1", + "rand", ] [[package]] @@ -7871,7 +8352,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a9febe641d2842ffc76ee962668a17578767c4e01735e4802b21ed9a24b2e4e" dependencies = [ - "rand_core 0.6.4", + "rand_core", ] [[package]] @@ -7880,7 +8361,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f" dependencies = [ - "rand_core 0.6.4", + "rand_core", ] [[package]] @@ -7946,10 +8427,10 @@ dependencies = [ "camino-tempfile", "clap", "dns-service-client", - "dropshot", + "dropshot 0.10.2-dev", "expectorate", "humantime", - "indexmap 2.3.0", + "indexmap 2.4.0", "nexus-client", "nexus-db-queries", "nexus-reconfigurator-execution", @@ -8011,7 +8492,7 @@ version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd283d9651eeda4b2a83a43c1c91b266c40fd76ecd39a50a8c630ae69dc72891" dependencies = [ - "getrandom 0.2.14", + "getrandom", "libredox", "thiserror", ] @@ -8219,7 +8700,7 @@ checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" dependencies = [ "cc", "cfg-if", - "getrandom 0.2.14", + "getrandom", "libc", "spin 0.9.8", "untrusted 0.9.0", @@ -8273,7 +8754,7 @@ dependencies = [ "num-traits", "pkcs1", "pkcs8", - "rand_core 0.6.4", + "rand_core", "serde", "sha2", "signature", @@ -8284,9 +8765,9 @@ dependencies = [ [[package]] name = "rstest" -version = "0.19.0" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d5316d2a1479eeef1ea21e7f9ddc67c191d497abc8fc3ba2467857abbb68330" +checksum = "7b423f0e62bdd61734b67cd21ff50871dfaeb9cc74f869dcd6af974fbcb19936" dependencies = [ "futures", "futures-timer", @@ -8296,12 +8777,13 @@ dependencies = [ [[package]] name = "rstest_macros" -version = "0.19.0" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04a9df72cc1f67020b0d63ad9bfe4a323e459ea7eb68e03bd9824db49f9a4c25" +checksum = "c5e1711e7d14f74b12a58411c542185ef7fb7f2e7f8ee6e2940a883628522b42" dependencies = [ "cfg-if", "glob", + "proc-macro-crate 3.1.0", "proc-macro2", "quote", "regex", @@ -8321,21 +8803,38 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "rusqlite" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b838eba278d213a8beaf485bd313fd580ca4505a00d5871caeb1457c55322cae" +dependencies = [ + "bitflags 2.6.0", + "fallible-iterator 0.3.0", + "fallible-streaming-iterator", + "hashlink", + "libsqlite3-sys", + "smallvec 1.13.2", +] + [[package]] name = "russh" -version = "0.43.0" +version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c9534703dc13be1eefc5708618f4c346da8e4f04f260218613f351ed5e94259" +checksum = "0a229f2a03daea3f62cee897b40329ce548600cca615906d98d58b8db3029b19" dependencies = [ "aes", "aes-gcm", "async-trait", "bitflags 2.6.0", "byteorder", + "cbc", "chacha20", "ctr", "curve25519-dalek", + "des", "digest", + "elliptic-curve", "flate2", "futures", "generic-array", @@ -8344,16 +8843,21 @@ dependencies = [ "log", "num-bigint", "once_cell", + "p256", + "p384", + "p521", "poly1305", - "rand 0.8.5", + "rand", + "rand_core", "russh-cryptovec", "russh-keys", "sha1", "sha2", + "ssh-encoding", + "ssh-key", "subtle", "thiserror", "tokio", - "tokio-util", ] [[package]] @@ -8368,41 +8872,53 @@ dependencies = [ [[package]] name = "russh-keys" -version = "0.43.0" +version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa4a5afa2fab6fd49d0c470a3b75c3c70a4f363c38db32df5ae3b44a3abf5ab9" +checksum = "89757474f7c9ee30121d8cc7fe293a954ba10b204a82ccf5850a5352a532ebc7" dependencies = [ "aes", "async-trait", "bcrypt-pbkdf", - "bit-vec", "block-padding", "byteorder", "cbc", "ctr", "data-encoding", - "dirs", + "der", + "digest", + "ecdsa", "ed25519-dalek", + "elliptic-curve", "futures", "hmac", + "home", "inout", "log", "md5", - "num-bigint", "num-integer", "p256", + "p384", "p521", "pbkdf2 0.11.0", - "rand 0.7.3", - "rand_core 0.6.4", + "pkcs1", + "pkcs5", + "pkcs8", + "rand", + "rand_core", + "rsa", "russh-cryptovec", + "sec1", "serde", "sha1", "sha2", + "spki", + "ssh-encoding", + "ssh-key", "thiserror", "tokio", "tokio-stream", - "yasna", + "typenum", + "zeroize", ] [[package]] @@ -8621,6 +9137,15 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +[[package]] +name = "salsa20" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97a22f5af31f73a954c10289c93e8a50cc23d971e80ee446f1f6f7137a088213" +dependencies = [ + "cipher", +] + [[package]] name = "samael" version = "0.0.15" @@ -8641,7 +9166,7 @@ dependencies = [ "openssl-sys", "pkg-config", "quick-xml", - "rand 0.8.5", + "rand", "serde", "thiserror", "url", @@ -8728,6 +9253,17 @@ dependencies = [ "syn 2.0.74", ] +[[package]] +name = "scrypt" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0516a385866c09368f0b5bcd1caff3366aace790fcd46e2bb032697bb172fd1f" +dependencies = [ + "pbkdf2 0.12.2", + "salsa20", + "sha2", +] + [[package]] name = "sct" version = "0.7.1" @@ -8801,9 +9337,9 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.207" +version = "1.0.208" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5665e14a49a4ea1b91029ba7d3bca9f299e1f7cfa194388ccc20f14743e784f2" +checksum = "cff085d2cb684faa248efb494c39b68e522822ac0de72ccf08109abde717cfb2" dependencies = [ "serde_derive", ] @@ -8839,9 +9375,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.207" +version = "1.0.208" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6aea2634c86b0e8ef2cfdc0c340baede54ec27b1e46febd7f80dffb2aa44a00e" +checksum = "24008e81ff7613ed8e5ba0cfaf24e2c2f1e5b8a0495711e44fcd4882fca62bcf" dependencies = [ "proc-macro2", "quote", @@ -8870,9 +9406,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.124" +version = "1.0.125" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66ad62847a56b3dba58cc891acd13884b9c61138d330c0d7b6181713d4fce38d" +checksum = "83c8e735a073ccf5be70aa8066aa984eaf2fa000db6c8d0100ae605b366d31ed" dependencies = [ "itoa", "memchr", @@ -8921,9 +9457,9 @@ dependencies = [ [[package]] name = "serde_tokenstream" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8790a7c3fe883e443eaa2af6f705952bc5d6e8671a220b9335c8cae92c037e74" +checksum = "64060d864397305347a78851c51588fd283767e7e7589829e8121d65512340f1" dependencies = [ "proc-macro2", "quote", @@ -8945,15 +9481,15 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.8.3" +version = "3.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e73139bc5ec2d45e6c5fd85be5a46949c1c39a4c18e56915f5eb4c12f975e377" +checksum = "69cecfa94848272156ea67b2b1a53f20fc7bc638c4a46d2f8abde08f05f4b857" dependencies = [ "base64 0.22.1", "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.3.0", + "indexmap 2.4.0", "serde", "serde_derive", "serde_json", @@ -8963,9 +9499,9 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.8.3" +version = "3.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b80d3d6b56b64335c0180e5ffde23b3c5e08c14c585b51a15bd0e95393f46703" +checksum = "a8fee4991ef4f274617a51ad4af30519438dacb2f56ac773b08a1922ff743350" dependencies = [ "darling", "proc-macro2", @@ -8979,7 +9515,7 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.3.0", + "indexmap 2.4.0", "itoa", "ryu", "serde", @@ -9080,7 +9616,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" dependencies = [ "digest", - "rand_core 0.6.4", + "rand_core", ] [[package]] @@ -9094,11 +9630,11 @@ dependencies = [ [[package]] name = "similar" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa42c91313f1d05da9b26f267f931cf178d4aba455b4c4622dd7355eb80c6640" +checksum = "1de1d4f81173b03af4c0cbed3c898f6bff5b870e4a7f5d6f4057d62a7a4b686e" dependencies = [ - "bstr 0.2.17", + "bstr 1.9.1", "unicode-segmentation", ] @@ -9148,7 +9684,7 @@ name = "sled-agent-api" version = "0.1.0" dependencies = [ "camino", - "dropshot", + "dropshot 0.10.2-dev", "nexus-sled-agent-shared", "omicron-common", "omicron-uuid-kinds", @@ -9231,7 +9767,7 @@ dependencies = [ "omicron-test-utils", "omicron-uuid-kinds", "omicron-workspace-hack", - "rand 0.8.5", + "rand", "schemars", "serde", "sled-hardware-types", @@ -9275,7 +9811,7 @@ dependencies = [ "omicron-test-utils", "omicron-uuid-kinds", "omicron-workspace-hack", - "rand 0.8.5", + "rand", "schemars", "serde", "serde_json", @@ -9518,7 +10054,7 @@ dependencies = [ "anyhow", "async-trait", "clap", - "dropshot", + "dropshot 0.10.2-dev", "futures", "gateway-messages", "gateway-types", @@ -9589,6 +10125,57 @@ dependencies = [ "syn 2.0.74", ] +[[package]] +name = "ssh-cipher" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "caac132742f0d33c3af65bfcde7f6aa8f62f0e991d80db99149eb9d44708784f" +dependencies = [ + "aes", + "aes-gcm", + "cbc", + "chacha20", + "cipher", + "ctr", + "poly1305", + "ssh-encoding", + "subtle", +] + +[[package]] +name = "ssh-encoding" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb9242b9ef4108a78e8cd1a2c98e193ef372437f8c22be363075233321dd4a15" +dependencies = [ + "base64ct", + "pem-rfc7468", + "sha2", +] + +[[package]] +name = "ssh-key" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca9b366a80cf18bb6406f4cf4d10aebfb46140a8c0c33f666a144c5c76ecbafc" +dependencies = [ + "bcrypt-pbkdf", + "ed25519-dalek", + "num-bigint-dig", + "p256", + "p384", + "p521", + "rand_core", + "rsa", + "sec1", + "sha2", + "signature", + "ssh-cipher", + "ssh-encoding", + "subtle", + "zeroize", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -10191,28 +10778,27 @@ dependencies = [ [[package]] name = "tokio" -version = "1.38.1" +version = "1.39.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb2caba9f80616f438e09748d5acda951967e1ea58508ef53d9c6402485a46df" +checksum = "9babc99b9923bfa4804bd74722ff02c0381021eafa4db9949217e3be8e84fff5" dependencies = [ "backtrace", "bytes", "libc", - "mio 0.8.11", - "num_cpus", + "mio 1.0.2", "parking_lot 0.12.2", "pin-project-lite", "signal-hook-registry", "socket2 0.5.7", "tokio-macros", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] name = "tokio-macros" -version = "2.3.0" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" +checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", @@ -10238,7 +10824,7 @@ dependencies = [ "async-trait", "byteorder", "bytes", - "fallible-iterator", + "fallible-iterator 0.2.0", "futures-channel", "futures-util", "log", @@ -10248,7 +10834,7 @@ dependencies = [ "pin-project-lite", "postgres-protocol", "postgres-types", - "rand 0.8.5", + "rand", "socket2 0.5.7", "tokio", "tokio-util", @@ -10285,6 +10871,7 @@ dependencies = [ "futures-core", "pin-project-lite", "tokio", + "tokio-util", ] [[package]] @@ -10311,6 +10898,18 @@ dependencies = [ "tungstenite 0.21.0", ] +[[package]] +name = "tokio-tungstenite" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6989540ced10490aaf14e6bad2e3d33728a2813310a0c71d1574304c49631cd" +dependencies = [ + "futures-util", + "log", + "tokio", + "tungstenite 0.23.0", +] + [[package]] name = "tokio-util" version = "0.7.11" @@ -10324,15 +10923,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "toml" -version = "0.5.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" -dependencies = [ - "serde", -] - [[package]] name = "toml" version = "0.7.8" @@ -10372,20 +10962,31 @@ version = "0.19.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" dependencies = [ - "indexmap 2.3.0", + "indexmap 2.4.0", "serde", "serde_spanned", "toml_datetime", "winnow 0.5.40", ] +[[package]] +name = "toml_edit" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8534fd7f78b5405e860340ad6575217ce99f38d4d5c8f2442cb5ecb50090e1" +dependencies = [ + "indexmap 2.4.0", + "toml_datetime", + "winnow 0.5.40", +] + [[package]] name = "toml_edit" version = "0.22.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "583c44c02ad26b0c3f3066fe629275e50627026c51ac2e595cca4c230ce1ce1d" dependencies = [ - "indexmap 2.3.0", + "indexmap 2.4.0", "serde", "serde_spanned", "toml_datetime", @@ -10505,26 +11106,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "trust-dns-client" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c408c32e6a9dbb38037cece35740f2cf23c875d8ca134d33631cec83f74d3fe" -dependencies = [ - "cfg-if", - "data-encoding", - "futures-channel", - "futures-util", - "lazy_static", - "radix_trie", - "rand 0.8.5", - "thiserror", - "time", - "tokio", - "tracing", - "trust-dns-proto", -] - [[package]] name = "trust-dns-proto" version = "0.22.0" @@ -10534,64 +11115,21 @@ dependencies = [ "async-trait", "cfg-if", "data-encoding", - "enum-as-inner", + "enum-as-inner 0.5.1", "futures-channel", "futures-io", "futures-util", "idna 0.2.3", "ipnet", "lazy_static", - "rand 0.8.5", + "rand", "smallvec 1.13.2", "thiserror", "tinyvec", - "tokio", "tracing", "url", ] -[[package]] -name = "trust-dns-resolver" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aff21aa4dcefb0a1afbfac26deb0adc93888c7d295fb63ab273ef276ba2b7cfe" -dependencies = [ - "cfg-if", - "futures-util", - "ipconfig", - "lazy_static", - "lru-cache", - "parking_lot 0.12.2", - "resolv-conf", - "smallvec 1.13.2", - "thiserror", - "tokio", - "tracing", - "trust-dns-proto", -] - -[[package]] -name = "trust-dns-server" -version = "0.22.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99022f9befa6daec2a860be68ac28b1f0d9d7ccf441d8c5a695e35a58d88840d" -dependencies = [ - "async-trait", - "bytes", - "cfg-if", - "enum-as-inner", - "futures-executor", - "futures-util", - "serde", - "thiserror", - "time", - "tokio", - "toml 0.5.11", - "tracing", - "trust-dns-client", - "trust-dns-proto", -] - [[package]] name = "try-lock" version = "0.2.5" @@ -10660,7 +11198,7 @@ dependencies = [ "omicron-test-utils", "omicron-workspace-hack", "parse-size", - "rand 0.8.5", + "rand", "ring 0.17.8", "serde", "serde_json", @@ -10697,7 +11235,7 @@ dependencies = [ "http 0.2.12", "httparse", "log", - "rand 0.8.5", + "rand", "sha1", "thiserror", "url", @@ -10716,13 +11254,42 @@ dependencies = [ "http 1.1.0", "httparse", "log", - "rand 0.8.5", + "rand", "sha1", "thiserror", "url", "utf-8", ] +[[package]] +name = "tungstenite" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e2e2ce1e47ed2994fd43b04c8f618008d4cabdd5ee34027cf14f9d918edd9c8" +dependencies = [ + "byteorder", + "bytes", + "data-encoding", + "http 1.1.0", + "httparse", + "log", + "rand", + "sha1", + "thiserror", + "utf-8", +] + +[[package]] +name = "twox-hash" +version = "1.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +dependencies = [ + "cfg-if", + "rand", + "static_assertions", +] + [[package]] name = "typed-path" version = "0.7.1" @@ -10735,8 +11302,8 @@ version = "0.1.0" dependencies = [ "newtype-uuid", "omicron-workspace-hack", - "rand 0.8.5", - "rand_core 0.6.4", + "rand", + "rand_core", "rand_seeder", "uuid", ] @@ -10913,14 +11480,14 @@ dependencies = [ "clap", "debug-ignore", "display-error-chain", - "dropshot", + "dropshot 0.10.2-dev", "futures", "hex", "hubtools", "omicron-common", "omicron-test-utils", "omicron-workspace-hack", - "rand 0.8.5", + "rand", "sha2", "slog", "thiserror", @@ -10946,7 +11513,7 @@ dependencies = [ "derive-where", "either", "futures", - "indexmap 2.3.0", + "indexmap 2.4.0", "indicatif", "libsw", "linear-map", @@ -11060,7 +11627,7 @@ version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" dependencies = [ - "getrandom 0.2.14", + "getrandom", "serde", ] @@ -11080,6 +11647,22 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +[[package]] +name = "vergen" +version = "8.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2990d9ea5967266ea0ccf413a4aa5c42a93dbcfda9cb49a97de6931726b12566" +dependencies = [ + "anyhow", + "cargo_metadata", + "cfg-if", + "git2", + "regex", + "rustc_version 0.4.0", + "rustversion", + "time", +] + [[package]] name = "version_check" version = "0.9.5" @@ -11112,9 +11695,9 @@ dependencies = [ "curve25519-dalek", "elliptic-curve", "hex", - "rand 0.8.5", - "rand_chacha 0.3.1", - "rand_core 0.6.4", + "rand", + "rand_chacha", + "rand_core", "serde", "subtle", "thiserror-no-std", @@ -11178,12 +11761,6 @@ dependencies = [ "try-lock", ] -[[package]] -name = "wasi" -version = "0.9.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" - [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -11328,7 +11905,7 @@ dependencies = [ "expectorate", "futures", "humantime", - "indexmap 2.3.0", + "indexmap 2.4.0", "indicatif", "itertools 0.13.0", "maplit", @@ -11370,7 +11947,7 @@ version = "0.1.0" dependencies = [ "anyhow", "dpd-client", - "dropshot", + "dropshot 0.10.2-dev", "gateway-client", "maplit", "omicron-common", @@ -11426,7 +12003,7 @@ dependencies = [ "debug-ignore", "display-error-chain", "dpd-client", - "dropshot", + "dropshot 0.10.2-dev", "either", "expectorate", "flate2", @@ -11437,6 +12014,7 @@ dependencies = [ "gateway-messages", "gateway-test-utils", "hex", + "hickory-resolver", "http 0.2.12", "hubtools", "hyper 0.14.30", @@ -11459,7 +12037,7 @@ dependencies = [ "openapi-lint", "openapiv3", "oxnet", - "rand 0.8.5", + "rand", "reqwest", "schemars", "serde", @@ -11476,7 +12054,6 @@ dependencies = [ "tokio-util", "toml 0.8.19", "tough", - "trust-dns-resolver", "tufaceous", "tufaceous-lib", "update-common", @@ -11493,7 +12070,7 @@ name = "wicketd-api" version = "0.1.0" dependencies = [ "bootstrap-agent-client", - "dropshot", + "dropshot 0.10.2-dev", "gateway-client", "omicron-common", "omicron-passwords", @@ -11798,6 +12375,7 @@ version = "0.1.0" dependencies = [ "anyhow", "camino", + "camino-tempfile", "cargo_metadata", "cargo_toml", "clap", @@ -11806,6 +12384,7 @@ dependencies = [ "serde", "swrite", "tabled", + "textwrap", "toml 0.8.19", "usdt", ] @@ -11842,8 +12421,6 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e17bb3549cc1321ae1296b9cdc2698e2b6cb1992adfa19a8c72e5b7a738f44cd" dependencies = [ - "bit-vec", - "num-bigint", "time", ] @@ -11936,7 +12513,8 @@ dependencies = [ [[package]] name = "zone" version = "0.3.0" -source = "git+https://github.com/oxidecomputer/zone?branch=state-derive-eq-hash#f1920d5636c69ea8179f8ec659702dcdef43268c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a62a428a79ea2224ce8ab05d6d8a21bdd7b4b68a8dbc1230511677a56e72ef22" dependencies = [ "itertools 0.10.5", "thiserror", @@ -11951,7 +12529,7 @@ dependencies = [ "anyhow", "camino", "clap", - "dropshot", + "dropshot 0.10.2-dev", "illumos-utils", "omicron-common", "omicron-sled-agent", @@ -11981,7 +12559,8 @@ dependencies = [ [[package]] name = "zone_cfg_derive" version = "0.3.0" -source = "git+https://github.com/oxidecomputer/zone?branch=state-derive-eq-hash#f1920d5636c69ea8179f8ec659702dcdef43268c" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5c4f01d3785e222d5aca11c9813e9c46b69abfe258756c99c9b628683626cc8" dependencies = [ "heck 0.4.1", "proc-macro-error", diff --git a/Cargo.toml b/Cargo.toml index 962bfb82de0..a870c52fe95 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,8 @@ members = [ "api_identity", "bootstore", "certificates", + "clickhouse-admin", + "clickhouse-admin/api", "clients/bootstrap-agent-client", "clients/cockroach-admin-client", "clients/ddm-admin-client", @@ -29,6 +31,7 @@ members = [ "dev-tools/omicron-dev", "dev-tools/omicron-dev-lib", "dev-tools/openapi-manager", + "dev-tools/openapi-manager/types", "dev-tools/oxlog", "dev-tools/reconfigurator-cli", "dev-tools/releng", @@ -49,6 +52,8 @@ members = [ "internal-dns", "ipcc", "key-manager", + "live-tests", + "live-tests/macros", "nexus", "nexus-config", "nexus-sled-agent-shared", @@ -59,6 +64,7 @@ members = [ "nexus/db-model", "nexus/db-queries", "nexus/defaults", + "nexus/external-api", "nexus/internal-api", "nexus/inventory", "nexus/macros-common", @@ -75,12 +81,15 @@ members = [ "oximeter/api", "oximeter/collector", "oximeter/db", - "oximeter/impl", "oximeter/instruments", "oximeter/oximeter-macro-impl", "oximeter/oximeter", + "oximeter/oxql-types", "oximeter/producer", + "oximeter/schema", + "oximeter/test-utils", "oximeter/timeseries-macro", + "oximeter/types", "package", "passwords", "rpaths", @@ -112,6 +121,9 @@ default-members = [ "api_identity", "bootstore", "certificates", + "clickhouse-admin", + "clickhouse-admin/api", + "clickhouse-admin/types", "clients/bootstrap-agent-client", "clients/cockroach-admin-client", "clients/ddm-admin-client", @@ -138,6 +150,7 @@ default-members = [ "dev-tools/omicron-dev", "dev-tools/omicron-dev-lib", "dev-tools/openapi-manager", + "dev-tools/openapi-manager/types", "dev-tools/oxlog", "dev-tools/reconfigurator-cli", "dev-tools/releng", @@ -146,8 +159,7 @@ default-members = [ # See omicron#4392. "dns-server", "dns-server-api", - # Do not include end-to-end-tests in the list of default members, as its - # tests only work on a deployed control plane. + "end-to-end-tests", "gateway", "gateway-api", "gateway-cli", @@ -161,6 +173,8 @@ default-members = [ "internal-dns", "ipcc", "key-manager", + "live-tests", + "live-tests/macros", "nexus", "nexus-config", "nexus-sled-agent-shared", @@ -171,6 +185,7 @@ default-members = [ "nexus/db-model", "nexus/db-queries", "nexus/defaults", + "nexus/external-api", "nexus/internal-api", "nexus/inventory", "nexus/macros-common", @@ -187,12 +202,15 @@ default-members = [ "oximeter/api", "oximeter/collector", "oximeter/db", - "oximeter/impl", "oximeter/instruments", "oximeter/oximeter-macro-impl", "oximeter/oximeter", + "oximeter/oxql-types", "oximeter/producer", + "oximeter/schema", + "oximeter/test-utils", "oximeter/timeseries-macro", + "oximeter/types", "package", "passwords", "rpaths", @@ -270,13 +288,12 @@ api_identity = { path = "api_identity" } approx = "0.5.1" assert_matches = "1.5.0" assert_cmd = "2.0.16" -async-bb8-diesel = { git = "https://github.com/oxidecomputer/async-bb8-diesel", rev = "ed7ab5ef0513ba303d33efd41d3e9e381169d59b" } +async-bb8-diesel = "0.2" async-trait = "0.1.81" atomicwrites = "0.4.3" authz-macros = { path = "nexus/authz-macros" } backoff = { version = "0.4.0", features = [ "tokio" ] } base64 = "0.22.1" -bb8 = "0.8.5" bcs = "0.1.6" bincode = "1.3.3" bootstore = { path = "bootstore" } @@ -294,6 +311,8 @@ cfg-if = "1.0" chrono = { version = "0.4", features = [ "serde" ] } ciborium = "0.2.2" clap = { version = "4.5", features = ["cargo", "derive", "env", "wrap_help"] } +clickhouse-admin-api = { path = "clickhouse-admin/api" } +clickhouse-admin-types = { path = "clickhouse-admin/types" } clickward = { git = "https://github.com/oxidecomputer/clickward", rev = "ceec762e6a87d2a22bf56792a3025e145caa095e" } cockroach-admin-api = { path = "cockroach-admin/api" } cockroach-admin-client = { path = "clients/cockroach-admin-client" } @@ -307,17 +326,18 @@ crossterm = { version = "0.28.1", features = ["event-stream"] } crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "e58ca3693cb9ce0438947beba10e97ee38a0966b" } crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "e58ca3693cb9ce0438947beba10e97ee38a0966b" } crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "e58ca3693cb9ce0438947beba10e97ee38a0966b" } +crucible-common = { git = "https://github.com/oxidecomputer/crucible", rev = "e58ca3693cb9ce0438947beba10e97ee38a0966b" } csv = "1.3.0" curve25519-dalek = "4" datatest-stable = "0.2.9" -display-error-chain = "0.2.0" +display-error-chain = "0.2.1" omicron-ddm-admin-client = { path = "clients/ddm-admin-client" } db-macros = { path = "nexus/db-macros" } debug-ignore = "1.0.5" derive_more = "0.99.18" derive-where = "1.2.7" # Having the i-implement-... feature here makes diesel go away from the workspace-hack -diesel = { version = "2.2.2", features = ["i-implement-a-third-party-backend-and-opt-into-breaking-changes", "postgres", "r2d2", "chrono", "serde_json", "network-address", "uuid"] } +diesel = { version = "2.2.3", features = ["i-implement-a-third-party-backend-and-opt-into-breaking-changes", "postgres", "r2d2", "chrono", "serde_json", "network-address", "uuid"] } diesel-dtrace = { git = "https://github.com/oxidecomputer/diesel-dtrace", branch = "main" } dns-server = { path = "dns-server" } dns-server-api = { path = "dns-server-api" } @@ -328,7 +348,7 @@ dyn-clone = "1.0.17" either = "1.13.0" expectorate = "1.1.0" fatfs = "0.3.6" -filetime = "0.2.24" +filetime = "0.2.25" flate2 = "1.0.31" float-ord = "0.3.2" flume = "0.11.0" @@ -356,6 +376,10 @@ headers = "0.3.9" heck = "0.5" hex = "0.4.3" hex-literal = "0.4.1" +hickory-client = "0.24.1" +hickory-proto = "0.24.1" +hickory-resolver = "0.24.1" +hickory-server = "0.24.1" highway = "1.2.0" hkdf = "0.12.4" http = "0.2.12" @@ -367,7 +391,7 @@ hyper-rustls = "0.26.0" hyper-staticfile = "0.9.5" illumos-utils = { path = "illumos-utils" } indent_write = "2.2.0" -indexmap = "2.3.0" +indexmap = "2.4.0" indicatif = { version = "0.17.8", features = ["rayon"] } installinator = { path = "installinator" } installinator-api = { path = "installinator-api" } @@ -382,16 +406,18 @@ ipnetwork = { version = "0.20", features = ["schemars"] } ispf = { git = "https://github.com/oxidecomputer/ispf" } key-manager = { path = "key-manager" } kstat-rs = "0.2.4" -libc = "0.2.155" +libc = "0.2.158" +libipcc = { git = "https://github.com/oxidecomputer/libipcc", rev = "fdffa212373a8f92473ea5f411088912bf458d5f" } libfalcon = { git = "https://github.com/oxidecomputer/falcon", rev = "e69694a1f7cc9fe31fab27f321017280531fb5f7" } libnvme = { git = "https://github.com/oxidecomputer/libnvme", rev = "dd5bb221d327a1bc9287961718c3c10d6bd37da0" } linear-map = "1.2.0" +live-tests-macros = { path = "live-tests/macros" } macaddr = { version = "1.0.1", features = ["serde_std"] } maplit = "1.0.2" mockall = "0.13" newtype_derive = "0.1.6" -mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "220dd026e83142b83bd93123f465a64dd4600201" } -ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "220dd026e83142b83bd93123f465a64dd4600201" } +mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42" } +ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42" } multimap = "0.10.0" nexus-auth = { path = "nexus/auth" } nexus-client = { path = "clients/nexus-client" } @@ -400,6 +426,7 @@ nexus-db-fixed-data = { path = "nexus/db-fixed-data" } nexus-db-model = { path = "nexus/db-model" } nexus-db-queries = { path = "nexus/db-queries" } nexus-defaults = { path = "nexus/defaults" } +nexus-external-api = { path = "nexus/external-api" } nexus-inventory = { path = "nexus/inventory" } nexus-internal-api = { path = "nexus/internal-api" } nexus-macros-common = { path = "nexus/macros-common" } @@ -416,6 +443,7 @@ nexus-test-utils = { path = "nexus/test-utils" } nexus-types = { path = "nexus/types" } num-integer = "0.1.46" num = { version = "0.4.3", default-features = false, features = [ "libm" ] } +omicron-clickhouse-admin = { path = "clickhouse-admin" } omicron-certificates = { path = "certificates" } omicron-cockroach-admin = { path = "cockroach-admin" } omicron-common = { path = "common" } @@ -431,16 +459,17 @@ omicron-test-utils = { path = "test-utils" } omicron-workspace-hack = "0.1.0" omicron-zone-package = "0.11.0" oxide-client = { path = "clients/oxide-client" } -oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d", features = [ "api", "std" ] } +oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "76878de67229ea113d70503c441eab47ac5dc653", features = [ "api", "std" ] } oxlog = { path = "dev-tools/oxlog" } oxnet = { git = "https://github.com/oxidecomputer/oxnet" } once_cell = "1.19.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } +openapi-manager-types = { path = "dev-tools/openapi-manager/types" } openapiv3 = "2.0.0" # must match samael's crate! openssl = "0.10" openssl-sys = "0.9" -opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d" } +opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "76878de67229ea113d70503c441eab47ac5dc653" } oso = "0.27" owo-colors = "4.0.0" oximeter = { path = "oximeter/oximeter" } @@ -448,11 +477,14 @@ oximeter-api = { path = "oximeter/api" } oximeter-client = { path = "clients/oximeter-client" } oximeter-db = { path = "oximeter/db/", default-features = false } oximeter-collector = { path = "oximeter/collector" } -oximeter-impl = { path = "oximeter/impl" } oximeter-instruments = { path = "oximeter/instruments" } oximeter-macro-impl = { path = "oximeter/oximeter-macro-impl" } oximeter-producer = { path = "oximeter/producer" } +oximeter-schema = { path = "oximeter/schema" } +oximeter-test-utils = { path = "oximeter/test-utils" } oximeter-timeseries-macro = { path = "oximeter/timeseries-macro" } +oximeter-types = { path = "oximeter/types" } +oxql-types = { path = "oximeter/oxql-types" } p256 = "0.13" parse-display = "0.10.0" partial-io = { version = "0.5.4", features = ["proptest1", "tokio1"] } @@ -474,6 +506,7 @@ bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "24a74d0c propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "24a74d0c76b6a63961ecef76acb1516b6e66c5c9" } propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "24a74d0c76b6a63961ecef76acb1516b6e66c5c9" } proptest = "1.5.0" +qorb = { git = "https://github.com/oxidecomputer/qorb", branch = "master" } quote = "1.0" rand = "0.8.5" rand_core = "0.6.4" @@ -489,7 +522,7 @@ regress = "0.9.1" reqwest = { version = "0.11", default-features = false } ring = "0.17.8" rpassword = "7.3.1" -rstest = "0.19.0" +rstest = "0.22.0" rustfmt-wrapper = "0.2" rustls = "0.22.2" rustls-pemfile = "2.1.3" @@ -500,18 +533,18 @@ secrecy = "0.8.0" semver = { version = "1.0.23", features = ["std", "serde"] } serde = { version = "1.0", default-features = false, features = [ "derive", "rc" ] } serde_human_bytes = { git = "https://github.com/oxidecomputer/serde_human_bytes", branch = "main" } -serde_json = "1.0.124" +serde_json = "1.0.125" serde_path_to_error = "0.1.16" serde_tokenstream = "0.2" serde_urlencoded = "0.7.1" -serde_with = "3.8.3" +serde_with = "3.9.0" sha2 = "0.10.8" sha3 = "0.10.8" shell-words = "1.1.0" signal-hook = "0.3" signal-hook-tokio = { version = "0.3", features = [ "futures-v0_3" ] } sigpipe = "0.1.3" -similar = { version = "2.5.0", features = ["bytes"] } +similar = { version = "2.6.0", features = ["bytes"] } similar-asserts = "1.5.0" # Don't change sled's version on accident; sled's on-disk format is not yet # stable and requires manual migrations. In the limit this won't matter because @@ -557,7 +590,7 @@ textwrap = "0.16.1" test-strategy = "0.3.1" thiserror = "1.0" tofino = { git = "https://github.com/oxidecomputer/tofino", branch = "main" } -tokio = "1.38.1" +tokio = "1.39.3" tokio-postgres = { version = "0.7", features = [ "with-chrono-0_4", "with-uuid-1" ] } tokio-stream = "0.1.15" tokio-tungstenite = "0.20" @@ -565,10 +598,6 @@ tokio-util = { version = "0.7.11", features = ["io", "io-util"] } toml = "0.8.19" toml_edit = "0.22.20" tough = { version = "0.17.1", features = [ "http" ] } -trust-dns-client = "0.22" -trust-dns-proto = "0.22" -trust-dns-resolver = "0.22" -trust-dns-server = "0.22" trybuild = "1.0.99" tufaceous = { path = "tufaceous" } tufaceous-lib = { path = "tufaceous-lib" } @@ -727,8 +756,6 @@ opt-level = 3 opt-level = 3 [profile.dev.package.rand_core] opt-level = 3 -[profile.dev.package.rand_hc] -opt-level = 3 [profile.dev.package.rand_xorshift] opt-level = 3 [profile.dev.package.rsa] @@ -811,7 +838,3 @@ path = "workspace-hack" [patch."https://github.com/oxidecomputer/omicron"] omicron-uuid-kinds = { path = "uuid-kinds" } omicron-common = { path = "common" } - -[patch.crates-io.zone] -git = 'https://github.com/oxidecomputer/zone' -branch = 'state-derive-eq-hash' diff --git a/README.adoc b/README.adoc index d48a5c97367..80753e030f3 100644 --- a/README.adoc +++ b/README.adoc @@ -62,6 +62,8 @@ Nextest https://github.com/nextest-rs/nextest/issues/16[does not support doctest Similarly, you can run tests inside a https://github.com/oxidecomputer/falcon[Falcon] based VM. This is described in the `test-utils` https://github.com/oxidecomputer/omicron/tree/main/test-utils[README]. +There's also a xref:./live-tests/README.adoc[`live-tests`] test suite that can be run by hand in a _deployed_ Omicron system. + === rustfmt and clippy You can **format the code** using `cargo fmt`. Make sure to run this before pushing changes. The CI checks that the code is correctly formatted. @@ -206,12 +208,14 @@ We also use these OpenAPI documents as the source for the clients we generate using https://github.com/oxidecomputer/progenitor[Progenitor]. Clients are automatically updated when the coresponding OpenAPI document is modified. -There are currently two kinds of services based on how their corresponding documents are generated: *managed* and *unmanaged*. Eventually, all services within Omicron will transition to being managed. +OpenAPI documents are tracked by the `cargo xtask openapi` command. -* A *managed* service is tracked by the `cargo xtask openapi` command, using Dropshot's relatively new API trait functionality. -* An *unmanaged* service is defined the traditional way, by gluing together a set of implementation functions, and is tracked by an independent test. +* To regenerate all OpenAPI documents, run `cargo xtask openapi generate`. +* To check whether all OpenAPI documents are up-to-date, run `cargo xtask + openapi check`. -To check whether your document is managed, run `cargo xtask openapi list`: it will list out all managed OpenAPI documents. If your document is not on the list, it is unmanaged. +For more information, see the documentation in +link:./dev-tools/openapi-manager[`dev-tools/openapi-manager`]. Note that Omicron contains a nominally circular dependency: @@ -223,33 +227,6 @@ Note that Omicron contains a nominally circular dependency: We effectively "break" this circular dependency by virtue of the OpenAPI documents being checked in. -==== Updating or Creating New Managed Services - -See the documentation in link:./dev-tools/openapi-manager[`dev-tools/openapi-manager`] for more information. - -==== Updating Unmanaged Services - -In general, changes to unmanaged service APs **require the following set of build steps**: - -. Make changes to the service API. -. Update the OpenAPI document by running the relevant test with overwrite set: - `EXPECTORATE=overwrite cargo nextest run -p -- test_nexus_openapi_internal` - (changing the package name and test name as necessary). It's important to do - this _before_ the next step. -. This will cause the generated client to be updated which may break the build - for dependent consumers. -. Modify any dependent services to fix calls to the generated client. - -Note that if you make changes to both Nexus and Sled Agent simultaneously, you -may end up in a spot where neither can build and therefore neither OpenAPI -document can be generated. In this case, revert or comment out changes in one -so that the OpenAPI document can be generated. - -This is a particular problem if you find yourself resolving merge conflicts in the generated files. You have basically two options for this: - -* Resolve the merge conflicts by hand. This is usually not too bad in practice. -* Take the upstream copy of the file, back out your client side changes (`git stash` and its `-p` option can be helpful for this), follow the steps above to regenerate the file using the automated test, and finally re-apply your changes to the client side. This is essentially getting yourself back to step 1 above and then following the procedure above. - === Resolving merge conflicts in Cargo.lock When pulling in new changes from upstream "main", you may find conflicts in Cargo.lock. The easiest way to deal with these is usually to take the upstream changes as-is, then trigger any Cargo operation that updates the lockfile. `cargo metadata` is a quick one. Here's an example: diff --git a/clickhouse-admin/Cargo.toml b/clickhouse-admin/Cargo.toml new file mode 100644 index 00000000000..270f779d7e8 --- /dev/null +++ b/clickhouse-admin/Cargo.toml @@ -0,0 +1,43 @@ +[package] +name = "omicron-clickhouse-admin" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[dependencies] +anyhow.workspace = true +camino.workspace = true +chrono.workspace = true +clap.workspace = true +clickhouse-admin-api.workspace = true +clickhouse-admin-types.workspace = true +dropshot.workspace = true +http.workspace = true +illumos-utils.workspace = true +omicron-common.workspace = true +omicron-uuid-kinds.workspace = true +schemars.workspace = true +slog.workspace = true +slog-async.workspace = true +slog-dtrace.workspace = true +slog-error-chain.workspace = true +serde.workspace = true +thiserror.workspace = true +tokio.workspace = true +tokio-postgres.workspace = true +toml.workspace = true + +omicron-workspace-hack.workspace = true + +[dev-dependencies] +expectorate.workspace = true +nexus-test-utils.workspace = true +omicron-test-utils.workspace = true +openapi-lint.workspace = true +openapiv3.workspace = true +serde_json.workspace = true +subprocess.workspace = true +url.workspace = true + +[lints] +workspace = true diff --git a/clickhouse-admin/api/Cargo.toml b/clickhouse-admin/api/Cargo.toml new file mode 100644 index 00000000000..ceec09f6c83 --- /dev/null +++ b/clickhouse-admin/api/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "clickhouse-admin-api" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +dropshot.workspace = true +omicron-common.workspace = true +omicron-uuid-kinds.workspace = true +omicron-workspace-hack.workspace = true +schemars.workspace = true +serde.workspace = true diff --git a/clickhouse-admin/api/src/lib.rs b/clickhouse-admin/api/src/lib.rs new file mode 100644 index 00000000000..9a011d4387e --- /dev/null +++ b/clickhouse-admin/api/src/lib.rs @@ -0,0 +1,28 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use dropshot::{HttpError, HttpResponseOk, RequestContext}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use std::net::SocketAddrV6; + +#[dropshot::api_description] +pub trait ClickhouseAdminApi { + type Context; + + /// Retrieve the address the ClickHouse server or keeper node is listening on + #[endpoint { + method = GET, + path = "/node/address", + }] + async fn clickhouse_address( + rqctx: RequestContext, + ) -> Result, HttpError>; +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub struct ClickhouseAddress { + pub clickhouse_address: SocketAddrV6, +} diff --git a/clickhouse-admin/src/bin/clickhouse-admin.rs b/clickhouse-admin/src/bin/clickhouse-admin.rs new file mode 100644 index 00000000000..6f28a82804e --- /dev/null +++ b/clickhouse-admin/src/bin/clickhouse-admin.rs @@ -0,0 +1,68 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Executable program to run the Omicron ClickHouse admin interface + +use anyhow::anyhow; +use camino::Utf8PathBuf; +use clap::Parser; +use omicron_clickhouse_admin::{Clickward, Config}; +use omicron_common::cmd::fatal; +use omicron_common::cmd::CmdError; +use std::net::{SocketAddr, SocketAddrV6}; + +#[derive(Debug, Parser)] +#[clap( + name = "clickhouse-admin", + about = "Omicron ClickHouse cluster admin server" +)] +enum Args { + /// Start the ClickHouse admin server + Run { + // TODO: This address is solely for testing now. We should remove it + // once we have more endpoints up and running. + /// Socket address for a running clickhouse server or keeper instance + #[clap(long, short = 'a', action)] + clickhouse_address: SocketAddrV6, + + /// Address on which this server should run + #[clap(long, short = 'H', action)] + http_address: SocketAddrV6, + + /// Path to the server configuration file + #[clap(long, short, action)] + config: Utf8PathBuf, + }, +} + +#[tokio::main] +async fn main() { + if let Err(err) = main_impl().await { + fatal(err); + } +} + +async fn main_impl() -> Result<(), CmdError> { + let args = Args::parse(); + + match args { + Args::Run { clickhouse_address, http_address, config } => { + let mut config = Config::from_file(&config) + .map_err(|err| CmdError::Failure(anyhow!(err)))?; + config.dropshot.bind_address = SocketAddr::V6(http_address); + + let clickward = Clickward::new(clickhouse_address); + + let server = + omicron_clickhouse_admin::start_server(clickward, config) + .await + .map_err(|err| CmdError::Failure(anyhow!(err)))?; + server.await.map_err(|err| { + CmdError::Failure(anyhow!( + "server failed after starting: {err}" + )) + }) + } + } +} diff --git a/clickhouse-admin/src/clickward.rs b/clickhouse-admin/src/clickward.rs new file mode 100644 index 00000000000..114201e44bb --- /dev/null +++ b/clickhouse-admin/src/clickward.rs @@ -0,0 +1,51 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use clickhouse_admin_api::ClickhouseAddress; +use dropshot::HttpError; +use slog_error_chain::{InlineErrorChain, SlogInlineError}; +use std::io; +use std::net::SocketAddrV6; + +#[derive(Debug, thiserror::Error, SlogInlineError)] +pub enum ClickwardError { + #[error("clickward failure")] + Failure { + #[source] + err: io::Error, + }, +} + +impl From for HttpError { + fn from(err: ClickwardError) -> Self { + match err { + ClickwardError::Failure { .. } => { + let message = InlineErrorChain::new(&err).to_string(); + HttpError { + status_code: http::StatusCode::INTERNAL_SERVER_ERROR, + error_code: Some(String::from("Internal")), + external_message: message.clone(), + internal_message: message, + } + } + } + } +} + +#[derive(Debug)] +pub struct Clickward { + clickhouse_address: SocketAddrV6, +} + +impl Clickward { + pub fn new(clickhouse_address: SocketAddrV6) -> Self { + Self { clickhouse_address } + } + + pub fn clickhouse_address( + &self, + ) -> Result { + Ok(ClickhouseAddress { clickhouse_address: self.clickhouse_address }) + } +} diff --git a/clickhouse-admin/src/config.rs b/clickhouse-admin/src/config.rs new file mode 100644 index 00000000000..77a624835c5 --- /dev/null +++ b/clickhouse-admin/src/config.rs @@ -0,0 +1,43 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use camino::Utf8Path; +use camino::Utf8PathBuf; +use dropshot::ConfigDropshot; +use dropshot::ConfigLogging; +use serde::Deserialize; +use serde::Serialize; +use slog_error_chain::SlogInlineError; +use std::io; + +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub struct Config { + pub dropshot: ConfigDropshot, + pub log: ConfigLogging, +} +impl Config { + /// Load a `Config` from the given TOML file + pub fn from_file(path: &Utf8Path) -> Result { + let contents = std::fs::read_to_string(path) + .map_err(|err| LoadError::Read { path: path.to_owned(), err })?; + toml::de::from_str(&contents) + .map_err(|err| LoadError::Parse { path: path.to_owned(), err }) + } +} + +#[derive(Debug, thiserror::Error, SlogInlineError)] +pub enum LoadError { + #[error("failed to read {path}")] + Read { + path: Utf8PathBuf, + #[source] + err: io::Error, + }, + #[error("failed to parse {path} as TOML")] + Parse { + path: Utf8PathBuf, + #[source] + err: toml::de::Error, + }, +} diff --git a/clickhouse-admin/src/context.rs b/clickhouse-admin/src/context.rs new file mode 100644 index 00000000000..cab875fe1d7 --- /dev/null +++ b/clickhouse-admin/src/context.rs @@ -0,0 +1,21 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::Clickward; +use slog::Logger; + +pub struct ServerContext { + clickward: Clickward, + _log: Logger, +} + +impl ServerContext { + pub fn new(clickward: Clickward, _log: Logger) -> Self { + Self { clickward, _log } + } + + pub fn clickward(&self) -> &Clickward { + &self.clickward + } +} diff --git a/clickhouse-admin/src/http_entrypoints.rs b/clickhouse-admin/src/http_entrypoints.rs new file mode 100644 index 00000000000..05988a73b0d --- /dev/null +++ b/clickhouse-admin/src/http_entrypoints.rs @@ -0,0 +1,31 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::context::ServerContext; +use clickhouse_admin_api::*; +use dropshot::HttpError; +use dropshot::HttpResponseOk; +use dropshot::RequestContext; +use std::sync::Arc; + +type ClickhouseApiDescription = dropshot::ApiDescription>; + +pub fn api() -> ClickhouseApiDescription { + clickhouse_admin_api_mod::api_description::() + .expect("registered entrypoints") +} + +enum ClickhouseAdminImpl {} + +impl ClickhouseAdminApi for ClickhouseAdminImpl { + type Context = Arc; + + async fn clickhouse_address( + rqctx: RequestContext, + ) -> Result, HttpError> { + let ctx = rqctx.context(); + let output = ctx.clickward().clickhouse_address()?; + Ok(HttpResponseOk(output)) + } +} diff --git a/clickhouse-admin/src/lib.rs b/clickhouse-admin/src/lib.rs new file mode 100644 index 00000000000..a48588c5442 --- /dev/null +++ b/clickhouse-admin/src/lib.rs @@ -0,0 +1,70 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use context::ServerContext; +use omicron_common::FileKv; +use slog::{debug, error, Drain}; +use slog_dtrace::ProbeRegistration; +use slog_error_chain::SlogInlineError; +use std::error::Error; +use std::io; +use std::sync::Arc; + +mod clickward; +mod config; +mod context; +mod http_entrypoints; + +pub use clickward::Clickward; +pub use config::Config; + +#[derive(Debug, thiserror::Error, SlogInlineError)] +pub enum StartError { + #[error("failed to initialize logger")] + InitializeLogger(#[source] io::Error), + #[error("failed to register dtrace probes: {0}")] + RegisterDtraceProbes(String), + #[error("failed to initialize HTTP server")] + InitializeHttpServer(#[source] Box), +} + +pub type Server = dropshot::HttpServer>; + +/// Start the dropshot server +pub async fn start_server( + clickward: Clickward, + server_config: Config, +) -> Result { + let (drain, registration) = slog_dtrace::with_drain( + server_config + .log + .to_logger("clickhouse-admin") + .map_err(StartError::InitializeLogger)?, + ); + let log = slog::Logger::root(drain.fuse(), slog::o!(FileKv)); + match registration { + ProbeRegistration::Success => { + debug!(log, "registered DTrace probes"); + } + ProbeRegistration::Failed(err) => { + let err = StartError::RegisterDtraceProbes(err); + error!(log, "failed to register DTrace probes"; &err); + return Err(err); + } + } + + let context = ServerContext::new( + clickward, + log.new(slog::o!("component" => "ServerContext")), + ); + let http_server_starter = dropshot::HttpServerStarter::new( + &server_config.dropshot, + http_entrypoints::api(), + Arc::new(context), + &log.new(slog::o!("component" => "dropshot")), + ) + .map_err(StartError::InitializeHttpServer)?; + + Ok(http_server_starter.start()) +} diff --git a/clickhouse-admin/types/Cargo.toml b/clickhouse-admin/types/Cargo.toml new file mode 100644 index 00000000000..a90a7c2e395 --- /dev/null +++ b/clickhouse-admin/types/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "clickhouse-admin-types" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +camino.workspace = true +camino-tempfile.workspace = true +derive_more.workspace = true +omicron-common.workspace = true +omicron-workspace-hack.workspace = true +schemars.workspace = true +serde.workspace = true +serde_json.workspace = true +expectorate.workspace = true diff --git a/clickhouse-admin/types/src/config.rs b/clickhouse-admin/types/src/config.rs new file mode 100644 index 00000000000..3337d733a99 --- /dev/null +++ b/clickhouse-admin/types/src/config.rs @@ -0,0 +1,515 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::{KeeperId, ServerId, OXIMETER_CLUSTER}; +use camino::Utf8PathBuf; +use omicron_common::address::{ + CLICKHOUSE_HTTP_PORT, CLICKHOUSE_INTERSERVER_PORT, + CLICKHOUSE_KEEPER_RAFT_PORT, CLICKHOUSE_KEEPER_TCP_PORT, + CLICKHOUSE_TCP_PORT, +}; +use schemars::{ + gen::SchemaGenerator, + schema::{Schema, SchemaObject}, + JsonSchema, +}; +use serde::{Deserialize, Serialize}; +use std::{fmt::Display, net::Ipv6Addr}; + +// Used for schemars to be able to be used with camino: +// See https://github.com/camino-rs/camino/issues/91#issuecomment-2027908513 +fn path_schema(gen: &mut SchemaGenerator) -> Schema { + let mut schema: SchemaObject = ::json_schema(gen).into(); + schema.format = Some("Utf8PathBuf".to_owned()); + schema.into() +} + +/// Configuration for a ClickHouse replica server +#[derive(Debug, Clone, PartialEq, Eq, JsonSchema, Serialize, Deserialize)] +pub struct ReplicaConfig { + pub logger: LogConfig, + pub macros: Macros, + pub listen_host: Ipv6Addr, + pub http_port: u16, + pub tcp_port: u16, + pub interserver_http_port: u16, + pub remote_servers: RemoteServers, + pub keepers: KeeperConfigsForReplica, + #[schemars(schema_with = "path_schema")] + pub data_path: Utf8PathBuf, +} + +impl ReplicaConfig { + /// A new ClickHouse replica server configuration with default ports and directories + pub fn new( + logger: LogConfig, + macros: Macros, + listen_host: Ipv6Addr, + remote_servers: Vec, + keepers: Vec, + path: Utf8PathBuf, + ) -> Self { + let data_path = path.join("data"); + let remote_servers = RemoteServers::new(remote_servers); + let keepers = KeeperConfigsForReplica::new(keepers); + + Self { + logger, + macros, + listen_host, + http_port: CLICKHOUSE_HTTP_PORT, + tcp_port: CLICKHOUSE_TCP_PORT, + interserver_http_port: CLICKHOUSE_INTERSERVER_PORT, + remote_servers, + keepers, + data_path, + } + } + + pub fn to_xml(&self) -> String { + let ReplicaConfig { + logger, + macros, + listen_host, + http_port, + tcp_port, + interserver_http_port, + remote_servers, + keepers, + data_path, + } = self; + let logger = logger.to_xml(); + let cluster = macros.cluster.clone(); + let id = macros.replica; + let macros = macros.to_xml(); + let keepers = keepers.to_xml(); + let remote_servers = remote_servers.to_xml(); + let user_files_path = data_path.clone().join("user_files"); + let format_schema_path = data_path.clone().join("format_schemas"); + format!( + " + +{logger} + {data_path} + + + + random + + + + + + + + + ::/0 + + default + default + + + + + + + 3600 + 0 + 0 + 0 + 0 + 0 + + + + + {user_files_path} + default + {format_schema_path} + {cluster}_{id} + {listen_host} + {http_port} + {tcp_port} + {interserver_http_port} + {listen_host} + + + + + 604800 + + + 60 + + + 1000 + +{macros} +{remote_servers} +{keepers} + + +" + ) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, JsonSchema, Serialize, Deserialize)] +pub struct Macros { + pub shard: u64, + pub replica: ServerId, + pub cluster: String, +} + +impl Macros { + /// A new macros configuration block with default cluster + pub fn new(replica: ServerId) -> Self { + Self { shard: 1, replica, cluster: OXIMETER_CLUSTER.to_string() } + } + + pub fn to_xml(&self) -> String { + let Macros { shard, replica, cluster } = self; + format!( + " + + {shard} + {replica} + {cluster} + " + ) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, JsonSchema, Serialize, Deserialize)] +pub struct RemoteServers { + pub cluster: String, + pub secret: String, + pub replicas: Vec, +} + +impl RemoteServers { + /// A new remote_servers configuration block with default cluster + pub fn new(replicas: Vec) -> Self { + Self { + cluster: OXIMETER_CLUSTER.to_string(), + // TODO(https://github.com/oxidecomputer/omicron/issues/3823): secret handling TBD + secret: "some-unique-value".to_string(), + replicas, + } + } + + pub fn to_xml(&self) -> String { + let RemoteServers { cluster, secret, replicas } = self; + + let mut s = format!( + " + + <{cluster}> + + {secret} + + true" + ); + + for r in replicas { + let ServerNodeConfig { host, port } = r; + s.push_str(&format!( + " + + {host} + {port} + " + )); + } + + s.push_str(&format!( + " + + + + " + )); + + s + } +} + +#[derive(Debug, Clone, PartialEq, Eq, JsonSchema, Serialize, Deserialize)] +pub struct KeeperConfigsForReplica { + pub nodes: Vec, +} + +impl KeeperConfigsForReplica { + pub fn new(nodes: Vec) -> Self { + Self { nodes } + } + + pub fn to_xml(&self) -> String { + let mut s = String::from(" "); + for node in &self.nodes { + let KeeperNodeConfig { host, port } = node; + + // ClickHouse servers have a small quirk, where when setting the + // keeper hosts as IPv6 addresses in the replica configuration file, + // they must be wrapped in square brackets. + // Otherwise, when running any query, a "Service not found" error + // appears. + // https://github.com/ClickHouse/ClickHouse/blob/a011990fd75628c63c7995c4f15475f1d4125d10/src/Coordination/KeeperStateManager.cpp#L149 + let parsed_host = match host.parse::() { + Ok(_) => format!("[{host}]"), + Err(_) => host.to_string(), + }; + + s.push_str(&format!( + " + + {parsed_host} + {port} + ", + )); + } + s.push_str("\n "); + s + } +} + +#[derive(Debug, Clone, PartialEq, Eq, JsonSchema, Serialize, Deserialize)] +pub struct KeeperNodeConfig { + pub host: String, + pub port: u16, +} + +impl KeeperNodeConfig { + /// A new ClickHouse keeper node configuration with default port + pub fn new(host: String) -> Self { + let port = CLICKHOUSE_KEEPER_TCP_PORT; + Self { host, port } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, JsonSchema, Serialize, Deserialize)] +pub struct ServerNodeConfig { + pub host: String, + pub port: u16, +} + +impl ServerNodeConfig { + /// A new ClickHouse replica node configuration with default port + pub fn new(host: String) -> Self { + let port = CLICKHOUSE_TCP_PORT; + Self { host, port } + } +} + +pub enum NodeType { + Server, + Keeper, +} + +#[derive(Debug, Clone, PartialEq, Eq, JsonSchema, Serialize, Deserialize)] +pub struct LogConfig { + pub level: LogLevel, + #[schemars(schema_with = "path_schema")] + pub log: Utf8PathBuf, + #[schemars(schema_with = "path_schema")] + pub errorlog: Utf8PathBuf, + pub size: u16, + pub count: usize, +} + +impl LogConfig { + /// A new logger configuration with default directories + pub fn new(path: Utf8PathBuf, node_type: NodeType) -> Self { + let prefix = match node_type { + NodeType::Server => "clickhouse", + NodeType::Keeper => "clickhouse-keeper", + }; + + let logs: Utf8PathBuf = path.join("log"); + let log = logs.join(format!("{prefix}.log")); + let errorlog = logs.join(format!("{prefix}.err.log")); + + Self { level: LogLevel::default(), log, errorlog, size: 100, count: 1 } + } + + pub fn to_xml(&self) -> String { + let LogConfig { level, log, errorlog, size, count } = &self; + format!( + " + + {level} + {log} + {errorlog} + {size}M + {count} + +" + ) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, JsonSchema, Serialize, Deserialize)] +pub struct KeeperCoordinationSettings { + pub operation_timeout_ms: u32, + pub session_timeout_ms: u32, + pub raft_logs_level: LogLevel, +} + +impl KeeperCoordinationSettings { + pub fn default() -> Self { + Self { + operation_timeout_ms: 10000, + session_timeout_ms: 30000, + raft_logs_level: LogLevel::Trace, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, JsonSchema, Serialize, Deserialize)] +pub struct RaftServers { + pub servers: Vec, +} + +impl RaftServers { + pub fn new(servers: Vec) -> Self { + Self { servers } + } + pub fn to_xml(&self) -> String { + let mut s = String::new(); + for server in &self.servers { + let RaftServerConfig { id, hostname, port } = server; + s.push_str(&format!( + " + + {id} + {hostname} + {port} + + " + )); + } + + s + } +} + +#[derive(Debug, Clone, PartialEq, Eq, JsonSchema, Serialize, Deserialize)] +pub struct RaftServerConfig { + pub id: KeeperId, + pub hostname: String, + pub port: u16, +} + +impl RaftServerConfig { + pub fn new(id: KeeperId, hostname: String) -> Self { + Self { id, hostname, port: CLICKHOUSE_KEEPER_RAFT_PORT } + } +} + +/// Configuration for a ClickHouse keeper +#[derive(Debug, Clone, PartialEq, Eq, JsonSchema, Serialize, Deserialize)] +pub struct KeeperConfig { + pub logger: LogConfig, + pub listen_host: Ipv6Addr, + pub tcp_port: u16, + pub server_id: KeeperId, + #[schemars(schema_with = "path_schema")] + pub log_storage_path: Utf8PathBuf, + #[schemars(schema_with = "path_schema")] + pub snapshot_storage_path: Utf8PathBuf, + pub coordination_settings: KeeperCoordinationSettings, + pub raft_config: RaftServers, +} + +impl KeeperConfig { + /// A new ClickHouse keeper node configuration with default ports and directories + pub fn new( + logger: LogConfig, + listen_host: Ipv6Addr, + server_id: KeeperId, + datastore_path: Utf8PathBuf, + raft_config: RaftServers, + ) -> Self { + let coordination_path = datastore_path.join("coordination"); + let log_storage_path = coordination_path.join("log"); + let snapshot_storage_path = coordination_path.join("snapshots"); + let coordination_settings = KeeperCoordinationSettings::default(); + Self { + logger, + listen_host, + tcp_port: CLICKHOUSE_KEEPER_TCP_PORT, + server_id, + log_storage_path, + snapshot_storage_path, + coordination_settings, + raft_config, + } + } + + pub fn to_xml(&self) -> String { + let KeeperConfig { + logger, + listen_host, + tcp_port, + server_id, + log_storage_path, + snapshot_storage_path, + coordination_settings, + raft_config, + } = self; + let logger = logger.to_xml(); + let KeeperCoordinationSettings { + operation_timeout_ms, + session_timeout_ms, + raft_logs_level, + } = coordination_settings; + let raft_servers = raft_config.to_xml(); + format!( + " + +{logger} + {listen_host} + + false + {tcp_port} + {server_id} + {log_storage_path} + {snapshot_storage_path} + + {operation_timeout_ms} + {session_timeout_ms} + {raft_logs_level} + + +{raft_servers} + + + + +" + ) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, JsonSchema, Serialize, Deserialize)] +pub enum LogLevel { + Trace, + Debug, +} + +impl LogLevel { + fn default() -> Self { + LogLevel::Trace + } +} + +impl Display for LogLevel { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let s = match self { + LogLevel::Trace => "trace", + LogLevel::Debug => "debug", + }; + write!(f, "{s}") + } +} diff --git a/clickhouse-admin/types/src/lib.rs b/clickhouse-admin/types/src/lib.rs new file mode 100644 index 00000000000..c9cc076de5e --- /dev/null +++ b/clickhouse-admin/types/src/lib.rs @@ -0,0 +1,242 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use anyhow::Result; +use camino::Utf8PathBuf; +use camino_tempfile::NamedUtf8TempFile; +use derive_more::{Add, AddAssign, Display, From}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use std::fs::rename; +use std::io::Write; +use std::net::Ipv6Addr; + +pub mod config; +use config::*; + +pub const OXIMETER_CLUSTER: &str = "oximeter_cluster"; + +/// A unique ID for a ClickHouse Keeper +#[derive( + Debug, + Clone, + Copy, + Eq, + PartialEq, + Ord, + PartialOrd, + From, + Add, + AddAssign, + Display, + JsonSchema, + Serialize, + Deserialize, +)] +pub struct KeeperId(pub u64); + +/// A unique ID for a Clickhouse Server +#[derive( + Debug, + Clone, + Copy, + Eq, + PartialEq, + Ord, + PartialOrd, + From, + Add, + AddAssign, + Display, + JsonSchema, + Serialize, + Deserialize, +)] +pub struct ServerId(pub u64); + +#[derive(Debug, Clone)] +pub struct ClickhouseServerConfig { + pub config_dir: Utf8PathBuf, + pub id: ServerId, + pub datastore_path: Utf8PathBuf, + pub listen_addr: Ipv6Addr, + pub keepers: Vec, + pub servers: Vec, +} + +impl ClickhouseServerConfig { + pub fn new( + config_dir: Utf8PathBuf, + id: ServerId, + datastore_path: Utf8PathBuf, + listen_addr: Ipv6Addr, + keepers: Vec, + servers: Vec, + ) -> Self { + Self { config_dir, id, datastore_path, listen_addr, keepers, servers } + } + + /// Generate a configuration file for a replica server node + pub fn generate_xml_file(&self) -> Result<()> { + let logger = + LogConfig::new(self.datastore_path.clone(), NodeType::Server); + let macros = Macros::new(self.id); + + let config = ReplicaConfig::new( + logger, + macros, + self.listen_addr, + self.servers.clone(), + self.keepers.clone(), + self.datastore_path.clone(), + ); + + // Writing to a temporary file and then renaming it will ensure we + // don't end up with a partially written file after a crash + let mut f = NamedUtf8TempFile::new()?; + f.write_all(config.to_xml().as_bytes())?; + f.flush()?; + rename(f.path(), self.config_dir.join("replica-server-config.xml"))?; + Ok(()) + } +} + +#[derive(Debug, Clone)] +pub struct ClickhouseKeeperConfig { + pub config_dir: Utf8PathBuf, + pub id: KeeperId, + pub raft_servers: Vec, + pub datastore_path: Utf8PathBuf, + pub listen_addr: Ipv6Addr, +} + +impl ClickhouseKeeperConfig { + pub fn new( + config_dir: Utf8PathBuf, + id: KeeperId, + raft_servers: Vec, + datastore_path: Utf8PathBuf, + listen_addr: Ipv6Addr, + ) -> Self { + ClickhouseKeeperConfig { + config_dir, + id, + raft_servers, + datastore_path, + listen_addr, + } + } + + /// Generate a configuration file for a keeper node + pub fn generate_xml_file(&self) -> Result<()> { + let logger = + LogConfig::new(self.datastore_path.clone(), NodeType::Keeper); + let raft_config = RaftServers::new(self.raft_servers.clone()); + let config = KeeperConfig::new( + logger, + self.listen_addr, + self.id, + self.datastore_path.clone(), + raft_config, + ); + + // Writing to a temporary file and then renaming it will ensure we + // don't end up with a partially written file after a crash + let mut f = NamedUtf8TempFile::new()?; + f.write_all(config.to_xml().as_bytes())?; + f.flush()?; + rename(f.path(), self.config_dir.join("keeper-config.xml"))?; + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use std::{net::Ipv6Addr, str::FromStr}; + + use camino::Utf8PathBuf; + use camino_tempfile::Builder; + + use crate::{ + ClickhouseKeeperConfig, ClickhouseServerConfig, KeeperId, + KeeperNodeConfig, RaftServerConfig, ServerId, ServerNodeConfig, + }; + + #[test] + fn test_generate_keeper_config() { + let config_dir = Builder::new() + .tempdir_in( + Utf8PathBuf::try_from(std::env::temp_dir()).unwrap()) + .expect("Could not create directory for ClickHouse configuration generation test" + ); + + let keepers = vec![ + RaftServerConfig::new(KeeperId(1), "ff::01".to_string()), + RaftServerConfig::new(KeeperId(2), "ff::02".to_string()), + RaftServerConfig::new(KeeperId(3), "ff::03".to_string()), + ]; + + let config = ClickhouseKeeperConfig::new( + Utf8PathBuf::from(config_dir.path()), + KeeperId(1), + keepers, + Utf8PathBuf::from_str("./").unwrap(), + Ipv6Addr::from_str("ff::08").unwrap(), + ); + + config.generate_xml_file().unwrap(); + + let expected_file = Utf8PathBuf::from_str("./testutils") + .unwrap() + .join("keeper-config.xml"); + let generated_file = + Utf8PathBuf::from(config_dir.path()).join("keeper-config.xml"); + let generated_content = std::fs::read_to_string(generated_file) + .expect("Failed to read from generated ClickHouse keeper file"); + + expectorate::assert_contents(expected_file, &generated_content); + } + + #[test] + fn test_generate_replica_config() { + let config_dir = Builder::new() + .tempdir_in( + Utf8PathBuf::try_from(std::env::temp_dir()).unwrap()) + .expect("Could not create directory for ClickHouse configuration generation test" + ); + + let keepers = vec![ + KeeperNodeConfig::new("ff::01".to_string()), + KeeperNodeConfig::new("127.0.0.1".to_string()), + KeeperNodeConfig::new("we.dont.want.brackets.com".to_string()), + ]; + + let servers = vec![ + ServerNodeConfig::new("ff::08".to_string()), + ServerNodeConfig::new("ff::09".to_string()), + ]; + + let config = ClickhouseServerConfig::new( + Utf8PathBuf::from(config_dir.path()), + ServerId(1), + Utf8PathBuf::from_str("./").unwrap(), + Ipv6Addr::from_str("ff::08").unwrap(), + keepers, + servers, + ); + + config.generate_xml_file().unwrap(); + + let expected_file = Utf8PathBuf::from_str("./testutils") + .unwrap() + .join("replica-server-config.xml"); + let generated_file = Utf8PathBuf::from(config_dir.path()) + .join("replica-server-config.xml"); + let generated_content = std::fs::read_to_string(generated_file).expect( + "Failed to read from generated ClickHouse replica server file", + ); + + expectorate::assert_contents(expected_file, &generated_content); + } +} diff --git a/clickhouse-admin/types/testutils/keeper-config.xml b/clickhouse-admin/types/testutils/keeper-config.xml new file mode 100644 index 00000000000..e05cf9d954a --- /dev/null +++ b/clickhouse-admin/types/testutils/keeper-config.xml @@ -0,0 +1,47 @@ + + + + + trace + ./log/clickhouse-keeper.log + ./log/clickhouse-keeper.err.log + 100M + 1 + + + ff::8 + + false + 9181 + 1 + ./coordination/log + ./coordination/snapshots + + 10000 + 30000 + trace + + + + + 1 + ff::01 + 9234 + + + + 2 + ff::02 + 9234 + + + + 3 + ff::03 + 9234 + + + + + + diff --git a/clickhouse-admin/types/testutils/replica-server-config.xml b/clickhouse-admin/types/testutils/replica-server-config.xml new file mode 100644 index 00000000000..056fd2cc1cd --- /dev/null +++ b/clickhouse-admin/types/testutils/replica-server-config.xml @@ -0,0 +1,106 @@ + + + + + trace + ./log/clickhouse.log + ./log/clickhouse.err.log + 100M + 1 + + + ./data + + + + random + + + + + + + + + ::/0 + + default + default + + + + + + + 3600 + 0 + 0 + 0 + 0 + 0 + + + + + ./data/user_files + default + ./data/format_schemas + oximeter_cluster_1 + ff::8 + 8123 + 9000 + 9009 + ff::8 + + + + + 604800 + + + 60 + + + 1000 + + + + 1 + 1 + oximeter_cluster + + + + + + some-unique-value + + true + + ff::08 + 9000 + + + ff::09 + 9000 + + + + + + + + [ff::01] + 9181 + + + 127.0.0.1 + 9181 + + + we.dont.want.brackets.com + 9181 + + + + diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs index 62366c45e1b..97f6373e29c 100644 --- a/clients/nexus-client/src/lib.rs +++ b/clients/nexus-client/src/lib.rs @@ -131,14 +131,11 @@ impl From } } -impl From - for types::SledInstanceState +impl From + for types::SledVmmState { - fn from( - s: omicron_common::api::internal::nexus::SledInstanceState, - ) -> Self { + fn from(s: omicron_common::api::internal::nexus::SledVmmState) -> Self { Self { - propolis_id: s.propolis_id, vmm_state: s.vmm_state.into(), migration_in: s.migration_in.map(Into::into), migration_out: s.migration_out.map(Into::into), @@ -213,6 +210,7 @@ impl From fn from(kind: omicron_common::api::internal::nexus::ProducerKind) -> Self { use omicron_common::api::internal::nexus::ProducerKind; match kind { + ProducerKind::ManagementGateway => Self::ManagementGateway, ProducerKind::SledAgent => Self::SledAgent, ProducerKind::Service => Self::Service, ProducerKind::Instance => Self::Instance, @@ -390,6 +388,9 @@ impl From fn from(kind: types::ProducerKind) -> Self { use omicron_common::api::internal::nexus::ProducerKind; match kind { + types::ProducerKind::ManagementGateway => { + ProducerKind::ManagementGateway + } types::ProducerKind::SledAgent => ProducerKind::SledAgent, types::ProducerKind::Instance => ProducerKind::Instance, types::ProducerKind::Service => ProducerKind::Service, diff --git a/clients/oxide-client/Cargo.toml b/clients/oxide-client/Cargo.toml index f2adcacb1b3..183640946f8 100644 --- a/clients/oxide-client/Cargo.toml +++ b/clients/oxide-client/Cargo.toml @@ -12,6 +12,7 @@ anyhow.workspace = true base64.workspace = true chrono.workspace = true futures.workspace = true +hickory-resolver.workspace = true http.workspace = true hyper.workspace = true progenitor.workspace = true @@ -22,6 +23,5 @@ serde.workspace = true serde_json.workspace = true thiserror.workspace = true tokio = { workspace = true, features = [ "net" ] } -trust-dns-resolver.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/clients/oxide-client/src/lib.rs b/clients/oxide-client/src/lib.rs index 07a190c38ed..249ea18146a 100644 --- a/clients/oxide-client/src/lib.rs +++ b/clients/oxide-client/src/lib.rs @@ -7,13 +7,13 @@ use anyhow::anyhow; use anyhow::Context; use futures::FutureExt; +use hickory_resolver::config::{ + NameServerConfig, Protocol, ResolverConfig, ResolverOpts, +}; +use hickory_resolver::TokioAsyncResolver; use std::net::SocketAddr; use std::sync::Arc; use thiserror::Error; -use trust_dns_resolver::config::{ - NameServerConfig, Protocol, ResolverConfig, ResolverOpts, -}; -use trust_dns_resolver::TokioAsyncResolver; progenitor::generate_api!( spec = "../../openapi/nexus.json", @@ -46,14 +46,15 @@ impl CustomDnsResolver { socket_addr: dns_addr, protocol: Protocol::Udp, tls_dns_name: None, - trust_nx_responses: false, + trust_negative_responses: false, bind_addr: None, }); + let mut resolver_opts = ResolverOpts::default(); + // Enable edns for potentially larger records + resolver_opts.edns0 = true; - let resolver = Arc::new( - TokioAsyncResolver::tokio(resolver_config, ResolverOpts::default()) - .context("failed to create resolver")?, - ); + let resolver = + Arc::new(TokioAsyncResolver::tokio(resolver_config, resolver_opts)); Ok(CustomDnsResolver { dns_addr, resolver }) } diff --git a/clients/oximeter-client/src/lib.rs b/clients/oximeter-client/src/lib.rs index 74fc6968e89..c23e5177a09 100644 --- a/clients/oximeter-client/src/lib.rs +++ b/clients/oximeter-client/src/lib.rs @@ -26,6 +26,7 @@ impl From fn from(kind: omicron_common::api::internal::nexus::ProducerKind) -> Self { use omicron_common::api::internal::nexus; match kind { + nexus::ProducerKind::ManagementGateway => Self::ManagementGateway, nexus::ProducerKind::Service => Self::Service, nexus::ProducerKind::SledAgent => Self::SledAgent, nexus::ProducerKind::Instance => Self::Instance, diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index 4ed5aaa1cb5..be19659c69d 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -5,6 +5,7 @@ //! Interface for making API requests to a Sled Agent use async_trait::async_trait; +use omicron_uuid_kinds::PropolisUuid; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; @@ -29,6 +30,7 @@ progenitor::generate_api!( BfdPeerConfig = { derives = [Eq, Hash] }, BgpConfig = { derives = [Eq, Hash] }, BgpPeerConfig = { derives = [Eq, Hash] }, + LldpPortConfig = { derives = [Eq, Hash, PartialOrd, Ord] }, OmicronPhysicalDiskConfig = { derives = [Eq, Hash, PartialOrd, Ord] }, PortConfigV2 = { derives = [Eq, Hash] }, RouteConfig = { derives = [Eq, Hash] }, @@ -41,6 +43,7 @@ progenitor::generate_api!( replace = { Baseboard = nexus_sled_agent_shared::inventory::Baseboard, ByteCount = omicron_common::api::external::ByteCount, + DatasetKind = omicron_common::api::internal::shared::DatasetKind, DiskIdentity = omicron_common::disk::DiskIdentity, DiskVariant = omicron_common::disk::DiskVariant, Generation = omicron_common::api::external::Generation, @@ -160,12 +163,11 @@ impl From } } -impl From - for omicron_common::api::internal::nexus::SledInstanceState +impl From + for omicron_common::api::internal::nexus::SledVmmState { - fn from(s: types::SledInstanceState) -> Self { + fn from(s: types::SledVmmState) -> Self { Self { - propolis_id: s.propolis_id, vmm_state: s.vmm_state.into(), migration_in: s.migration_in.map(Into::into), migration_out: s.migration_out.map(Into::into), @@ -447,11 +449,11 @@ impl From /// are bonus endpoints, not generated in the real client. #[async_trait] pub trait TestInterfaces { - async fn instance_single_step(&self, id: Uuid); - async fn instance_finish_transition(&self, id: Uuid); - async fn instance_simulate_migration_source( + async fn vmm_single_step(&self, id: PropolisUuid); + async fn vmm_finish_transition(&self, id: PropolisUuid); + async fn vmm_simulate_migration_source( &self, - id: Uuid, + id: PropolisUuid, params: SimulateMigrationSource, ); async fn disk_finish_transition(&self, id: Uuid); @@ -459,10 +461,10 @@ pub trait TestInterfaces { #[async_trait] impl TestInterfaces for Client { - async fn instance_single_step(&self, id: Uuid) { + async fn vmm_single_step(&self, id: PropolisUuid) { let baseurl = self.baseurl(); let client = self.client(); - let url = format!("{}/instances/{}/poke-single-step", baseurl, id); + let url = format!("{}/vmms/{}/poke-single-step", baseurl, id); client .post(url) .send() @@ -470,10 +472,10 @@ impl TestInterfaces for Client { .expect("instance_single_step() failed unexpectedly"); } - async fn instance_finish_transition(&self, id: Uuid) { + async fn vmm_finish_transition(&self, id: PropolisUuid) { let baseurl = self.baseurl(); let client = self.client(); - let url = format!("{}/instances/{}/poke", baseurl, id); + let url = format!("{}/vmms/{}/poke", baseurl, id); client .post(url) .send() @@ -492,14 +494,14 @@ impl TestInterfaces for Client { .expect("disk_finish_transition() failed unexpectedly"); } - async fn instance_simulate_migration_source( + async fn vmm_simulate_migration_source( &self, - id: Uuid, + id: PropolisUuid, params: SimulateMigrationSource, ) { let baseurl = self.baseurl(); let client = self.client(); - let url = format!("{baseurl}/instances/{id}/sim-migration-source"); + let url = format!("{baseurl}/vmms/{id}/sim-migration-source"); client .post(url) .json(¶ms) diff --git a/common/src/address.rs b/common/src/address.rs index 5ed5689289a..49684f0d995 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -8,6 +8,7 @@ //! and Nexus, who need to agree upon addressing schemes. use crate::api::external::{self, Error}; +use crate::policy::{INTERNAL_DNS_REDUNDANCY, MAX_INTERNAL_DNS_REDUNDANCY}; use ipnetwork::Ipv6Network; use once_cell::sync::Lazy; use oxnet::{Ipv4Net, Ipv6Net}; @@ -25,31 +26,6 @@ pub const MAX_PORT: u16 = u16::MAX; /// minimum possible value for a tcp or udp port pub const MIN_PORT: u16 = u16::MIN; -/// The amount of redundancy for boundary NTP servers. -pub const BOUNDARY_NTP_REDUNDANCY: usize = 2; - -/// The amount of redundancy for Nexus services. -/// -/// This is used by both RSS (to distribute the initial set of services) and the -/// Reconfigurator (to know whether to add new Nexus zones) -pub const NEXUS_REDUNDANCY: usize = 3; - -/// The amount of redundancy for CockroachDb services. -/// -/// This is used by both RSS (to distribute the initial set of services) and the -/// Reconfigurator (to know whether to add new crdb zones) -pub const COCKROACHDB_REDUNDANCY: usize = 5; - -/// The amount of redundancy for internal DNS servers. -/// -/// Must be less than or equal to MAX_DNS_REDUNDANCY. -pub const DNS_REDUNDANCY: usize = 3; - -/// The maximum amount of redundancy for DNS servers. -/// -/// This determines the number of addresses which are reserved for DNS servers. -pub const MAX_DNS_REDUNDANCY: usize = 5; - pub const DNS_PORT: u16 = 53; pub const DNS_HTTP_PORT: u16 = 5353; pub const SLED_AGENT_PORT: u16 = 12345; @@ -57,8 +33,12 @@ pub const SLED_AGENT_PORT: u16 = 12345; pub const COCKROACH_PORT: u16 = 32221; pub const COCKROACH_ADMIN_PORT: u16 = 32222; pub const CRUCIBLE_PORT: u16 = 32345; -pub const CLICKHOUSE_PORT: u16 = 8123; -pub const CLICKHOUSE_KEEPER_PORT: u16 = 9181; +pub const CLICKHOUSE_HTTP_PORT: u16 = 8123; +pub const CLICKHOUSE_INTERSERVER_PORT: u16 = 9009; +pub const CLICKHOUSE_TCP_PORT: u16 = 9000; +pub const CLICKHOUSE_KEEPER_TCP_PORT: u16 = 9181; +pub const CLICKHOUSE_KEEPER_RAFT_PORT: u16 = 9234; +pub const CLICKHOUSE_ADMIN_PORT: u16 = 8888; pub const OXIMETER_PORT: u16 = 12223; pub const DENDRITE_PORT: u16 = 12224; pub const LLDP_PORT: u16 = 12230; @@ -195,7 +175,18 @@ pub const CP_SERVICES_RESERVED_ADDRESSES: u16 = 0xFFFF; pub const SLED_RESERVED_ADDRESSES: u16 = 32; /// Wraps an [`Ipv6Net`] with a compile-time prefix length. -#[derive(Debug, Clone, Copy, JsonSchema, Serialize, Hash, PartialEq, Eq)] +#[derive( + Debug, + Clone, + Copy, + JsonSchema, + Serialize, + Hash, + PartialEq, + Eq, + PartialOrd, + Ord, +)] #[schemars(rename = "Ipv6Subnet")] pub struct Ipv6Subnet { net: Ipv6Net, @@ -249,12 +240,33 @@ impl<'de, const N: u8> Deserialize<'de> for Ipv6Subnet { } /// Represents a subnet which may be used for contacting DNS services. -#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] +#[derive( + Clone, Copy, Debug, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord, +)] pub struct DnsSubnet { subnet: Ipv6Subnet, } impl DnsSubnet { + pub fn new(subnet: Ipv6Subnet) -> Self { + Self { subnet } + } + + /// Makes a new DNS subnet from the high-order bits of an address. + pub fn from_addr(addr: Ipv6Addr) -> Self { + Self::new(Ipv6Subnet::new(addr)) + } + + /// Returns the DNS subnet. + pub fn subnet(&self) -> Ipv6Subnet { + self.subnet + } + + /// Returns the reserved rack subnet that contains this DNS subnet. + pub fn rack_subnet(&self) -> ReservedRackSubnet { + ReservedRackSubnet::from_subnet(self.subnet) + } + /// Returns the DNS server address within the subnet. /// /// This is the first address within the subnet. @@ -273,7 +285,7 @@ impl DnsSubnet { /// A wrapper around an IPv6 network, indicating it is a "reserved" rack /// subnet which can be used for AZ-wide services. -#[derive(Debug, Clone)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct ReservedRackSubnet(pub Ipv6Subnet); impl ReservedRackSubnet { @@ -282,17 +294,23 @@ impl ReservedRackSubnet { ReservedRackSubnet(Ipv6Subnet::::new(subnet.net().addr())) } + /// Infer the reserved rack subnet from a sled/AZ/DNS subnet. + pub fn from_subnet(subnet: Ipv6Subnet) -> Self { + Self::new(Ipv6Subnet::::new(subnet.net().addr())) + } + + /// Returns the `index`th DNS subnet from this reserved rack subnet. + pub fn get_dns_subnet(&self, index: u8) -> DnsSubnet { + DnsSubnet::new(get_64_subnet(self.0, index)) + } + /// Returns the DNS addresses from this reserved rack subnet. /// - /// These addresses will come from the first [`MAX_DNS_REDUNDANCY`] `/64s` of the - /// [`RACK_PREFIX`] subnet. + /// These addresses will come from the first [`MAX_INTERNAL_DNS_REDUNDANCY`] + /// `/64s` of the [`RACK_PREFIX`] subnet. pub fn get_dns_subnets(&self) -> Vec { - (0..MAX_DNS_REDUNDANCY) - .map(|idx| { - let subnet = - get_64_subnet(self.0, u8::try_from(idx + 1).unwrap()); - DnsSubnet { subnet } - }) + (0..MAX_INTERNAL_DNS_REDUNDANCY) + .map(|idx| self.get_dns_subnet(u8::try_from(idx + 1).unwrap())) .collect() } } @@ -303,7 +321,7 @@ pub fn get_internal_dns_server_addresses(addr: Ipv6Addr) -> Vec { let az_subnet = Ipv6Subnet::::new(addr); let reserved_rack_subnet = ReservedRackSubnet::new(az_subnet); let dns_subnets = - &reserved_rack_subnet.get_dns_subnets()[0..DNS_REDUNDANCY]; + &reserved_rack_subnet.get_dns_subnets()[0..INTERNAL_DNS_REDUNDANCY]; dns_subnets .iter() .map(|dns_subnet| IpAddr::from(dns_subnet.dns_address())) @@ -684,7 +702,7 @@ mod test { // Observe the first DNS subnet within this reserved rack subnet. let dns_subnets = rack_subnet.get_dns_subnets(); - assert_eq!(MAX_DNS_REDUNDANCY, dns_subnets.len()); + assert_eq!(MAX_INTERNAL_DNS_REDUNDANCY, dns_subnets.len()); // The DNS address and GZ address should be only differing by one. assert_eq!( diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index c7421aa5ee0..58cace30323 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -23,6 +23,7 @@ pub use dropshot::PaginationOrder; pub use error::*; use futures::stream::BoxStream; use oxnet::IpNet; +use oxnet::Ipv4Net; use parse_display::Display; use parse_display::FromStr; use rand::thread_rng; @@ -2228,7 +2229,7 @@ pub struct SwitchPortSettingsView { pub links: Vec, /// Link-layer discovery protocol (LLDP) settings. - pub link_lldp: Vec, + pub link_lldp: Vec, /// Layer 3 interface settings. pub interfaces: Vec, @@ -2370,7 +2371,7 @@ pub struct SwitchPortLinkConfig { /// The link-layer discovery protocol service configuration id for this /// link. - pub lldp_service_config_id: Uuid, + pub lldp_link_config_id: Option, /// The name of this link. pub link_name: String, @@ -2390,34 +2391,30 @@ pub struct SwitchPortLinkConfig { /// A link layer discovery protocol (LLDP) service configuration. #[derive(Clone, Debug, Deserialize, JsonSchema, Serialize, PartialEq)] -pub struct LldpServiceConfig { +pub struct LldpLinkConfig { /// The id of this LLDP service instance. pub id: Uuid, - /// The link-layer discovery protocol configuration for this service. - pub lldp_config_id: Option, - /// Whether or not the LLDP service is enabled. pub enabled: bool, -} -/// A link layer discovery protocol (LLDP) base configuration. -#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize, PartialEq)] -pub struct LldpConfig { - #[serde(flatten)] - pub identity: IdentityMetadata, + /// The LLDP link name TLV. + pub link_name: Option, + + /// The LLDP link description TLV. + pub link_description: Option, /// The LLDP chassis identifier TLV. - pub chassis_id: String, + pub chassis_id: Option, - /// THE LLDP system name TLV. - pub system_name: String, + /// The LLDP system name TLV. + pub system_name: Option, - /// THE LLDP system description TLV. - pub system_description: String, + /// The LLDP system description TLV. + pub system_description: Option, - /// THE LLDP management IP TLV. - pub management_ip: oxnet::IpNet, + /// The LLDP management IP TLV. + pub management_ip: Option, } /// Describes the kind of an switch interface. @@ -2492,6 +2489,9 @@ pub struct SwitchPortRouteConfig { /// The VLAN identifier for the route. Use this if the gateway is reachable /// over an 802.1Q tagged L2 segment. pub vlan_id: Option, + + /// Local preference indicating priority within and across protocols. + pub local_pref: Option, } /* @@ -2705,6 +2705,15 @@ pub struct BgpPeerStatus { pub switch: SwitchLocation, } +/// The current status of a BGP peer. +#[derive( + Clone, Debug, Deserialize, JsonSchema, Serialize, PartialEq, Default, +)] +pub struct BgpExported { + /// Exported routes indexed by peer address. + pub exports: HashMap>, +} + /// Opaque object representing BGP message history for a given BGP peer. The /// contents of this object are not yet stable. #[derive(Clone, Debug, Deserialize, Serialize)] diff --git a/common/src/api/internal/nexus.rs b/common/src/api/internal/nexus.rs index 7f4eb358a4a..191716d0179 100644 --- a/common/src/api/internal/nexus.rs +++ b/common/src/api/internal/nexus.rs @@ -113,13 +113,9 @@ pub struct VmmRuntimeState { pub time_updated: DateTime, } -/// A wrapper type containing a sled's total knowledge of the state of a -/// specific VMM and the instance it incarnates. +/// A wrapper type containing a sled's total knowledge of the state of a VMM. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct SledInstanceState { - /// The ID of the VMM whose state is being reported. - pub propolis_id: PropolisUuid, - +pub struct SledVmmState { /// The most recent state of the sled's VMM process. pub vmm_state: VmmRuntimeState, @@ -142,7 +138,7 @@ impl Migrations<'_> { } } -impl SledInstanceState { +impl SledVmmState { pub fn migrations(&self) -> Migrations<'_> { Migrations { migration_in: self.migration_in.as_ref(), @@ -223,6 +219,8 @@ pub enum ProducerKind { Service, /// The producer is a Propolis VMM managing a guest instance. Instance, + /// The producer is a management gateway service. + ManagementGateway, } /// Information announced by a metric server, used so that clients can contact it and collect @@ -277,31 +275,15 @@ pub struct UpdateArtifactId { // Adding a new KnownArtifactKind // =============================== // -// Adding a new update artifact kind is a tricky process. To do so: +// To add a new kind of update artifact: // // 1. Add it here. +// 2. Regenerate OpenAPI documents with `cargo xtask openapi generate` -- this +// should work without any compile errors. +// 3. Run `cargo check --all-targets` to resolve compile errors. // -// 2. Add the new kind to /clients/src/lib.rs. -// The mapping from `UpdateArtifactKind::*` to `types::UpdateArtifactKind::*` -// must be left as a `todo!()` for now; `types::UpdateArtifactKind` will not -// be updated with the new variant until step 5 below. -// -// 4. Add the new kind and the mapping to its `update_artifact_kind` to -// /nexus/db-model/src/update_artifact.rs -// -// 5. Regenerate the OpenAPI specs for nexus and sled-agent: -// -// ``` -// EXPECTORATE=overwrite cargo nextest run -p omicron-nexus -p omicron-sled-agent openapi -// ``` -// -// 6. Return to /{nexus-client,sled-agent-client}/lib.rs from step 2 -// and replace the `todo!()`s with the new `types::UpdateArtifactKind::*` -// variant. -// -// See https://github.com/oxidecomputer/omicron/pull/2300 as an example. -// -// NOTE: KnownArtifactKind has to be in snake_case due to openapi-lint requirements. +// NOTE: KnownArtifactKind has to be in snake_case due to openapi-lint +// requirements. /// Kinds of update artifacts, as used by Nexus to determine what updates are available and by /// sled-agent to determine how to apply an update when asked. diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index 089ff9b324d..4826292863a 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -10,13 +10,14 @@ use crate::{ }; use oxnet::{IpNet, Ipv4Net, Ipv6Net}; use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; +use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; use std::{ collections::{HashMap, HashSet}, fmt, net::{IpAddr, Ipv4Addr, Ipv6Addr}, str::FromStr, }; +use strum::EnumCount; use uuid::Uuid; use super::nexus::HostIdentifier; @@ -305,6 +306,9 @@ pub struct RouteConfig { /// The VLAN id associated with this route. #[serde(default)] pub vlan_id: Option, + /// The local preference associated with this route. + #[serde(default)] + pub local_pref: Option, } #[derive( @@ -376,6 +380,84 @@ impl FromStr for UplinkAddressConfig { } } +#[derive( + Clone, Debug, Default, Deserialize, Serialize, PartialEq, Eq, JsonSchema, +)] +#[serde(rename_all = "snake_case")] +/// To what extent should this port participate in LLDP +pub enum LldpAdminStatus { + #[default] + Enabled, + Disabled, + RxOnly, + TxOnly, +} + +impl fmt::Display for LldpAdminStatus { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + LldpAdminStatus::Enabled => write!(f, "enabled"), + LldpAdminStatus::Disabled => write!(f, "disabled"), + LldpAdminStatus::RxOnly => write!(f, "rx_only"), + LldpAdminStatus::TxOnly => write!(f, "tx_only"), + } + } +} + +#[derive(Debug, PartialEq, Eq, Deserialize, Serialize)] +pub struct ParseLldpAdminStatusError(String); + +impl std::fmt::Display for ParseLldpAdminStatusError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "LLDP admin status error: {}", self.0) + } +} + +impl FromStr for LldpAdminStatus { + type Err = ParseLldpAdminStatusError; + + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "enabled" => Ok(Self::Enabled), + "disabled" => Ok(Self::Disabled), + "rxonly" | "rx_only" => Ok(Self::RxOnly), + "txonly" | "tx_only" => Ok(Self::TxOnly), + _ => Err(ParseLldpAdminStatusError(format!( + "not a valid admin status: {s}" + ))), + } + } +} + +/// Per-port LLDP configuration settings. Only the "status" setting is +/// mandatory. All other fields have natural defaults or may be inherited from +/// the switch. +#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq, JsonSchema)] +pub struct LldpPortConfig { + /// To what extent should this port participate in LLDP + pub status: LldpAdminStatus, + /// Chassis ID to advertise. If this is set, it will be advertised as a + /// LocallyAssigned ID type. If this is not set, it will be + /// inherited from the switch-level settings. + pub chassis_id: Option, + /// Port ID to advertise. If this is set, it will be advertised as a + /// LocallyAssigned ID type. If this is not set, it will be set to + /// the port name. e.g., qsfp0/0. + pub port_id: Option, + /// Port description to advertise. If this is not set, no + /// description will be advertised. + pub port_description: Option, + /// System name to advertise. If this is not set, it will be + /// inherited from the switch-level settings. + pub system_name: Option, + /// System description to advertise. If this is not set, it will be + /// inherited from the switch-level settings. + pub system_description: Option, + /// Management IP addresses to advertise. If this is not set, it will be + /// inherited from the switch-level settings. + pub management_addrs: Option>, +} + #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq, JsonSchema)] pub struct PortConfigV2 { /// The set of routes associated with this port. @@ -395,6 +477,8 @@ pub struct PortConfigV2 { /// Whether or not to set autonegotiation #[serde(default)] pub autoneg: bool, + /// LLDP configuration for this port + pub lldp: Option, } /// A set of switch uplinks. @@ -411,11 +495,13 @@ pub struct HostPortConfig { /// IP Address and prefix (e.g., `192.168.0.1/16`) to apply to switchport /// (must be in infra_ip pool). May also include an optional VLAN ID. pub addrs: Vec, + + pub lldp: Option, } impl From for HostPortConfig { fn from(x: PortConfigV2) -> Self { - Self { port: x.port, addrs: x.addresses } + Self { port: x.port, addrs: x.addresses, lldp: x.lldp.clone() } } } @@ -752,13 +838,11 @@ pub struct ResolvedVpcRouteSet { } /// Describes the purpose of the dataset. -#[derive( - Debug, Serialize, Deserialize, JsonSchema, Clone, Copy, PartialEq, Eq, -)] -#[serde(rename_all = "snake_case")] +#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash, EnumCount)] pub enum DatasetKind { - Crucible, + // Durable datasets for zones Cockroach, + Crucible, /// Used for single-node clickhouse deployments Clickhouse, /// Used for replicated clickhouse deployments @@ -767,24 +851,153 @@ pub enum DatasetKind { ClickhouseServer, ExternalDns, InternalDns, + + // Zone filesystems + ZoneRoot, + Zone { + name: String, + }, + + // Other datasets + Debug, +} + +impl Serialize for DatasetKind { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_str(&self.to_string()) + } +} + +impl<'de> Deserialize<'de> for DatasetKind { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + s.parse().map_err(de::Error::custom) + } +} + +impl JsonSchema for DatasetKind { + fn schema_name() -> String { + "DatasetKind".to_string() + } + + fn json_schema( + gen: &mut schemars::gen::SchemaGenerator, + ) -> schemars::schema::Schema { + // The schema is a bit more complicated than this -- it's either one of + // the fixed values or a string starting with "zone/" -- but this is + // good enough for now. + let mut schema = ::json_schema(gen).into_object(); + schema.metadata().description = Some( + "The kind of dataset. See the `DatasetKind` enum \ + in omicron-common for possible values." + .to_owned(), + ); + schema.into() + } +} + +impl DatasetKind { + pub fn dataset_should_be_encrypted(&self) -> bool { + match self { + // We encrypt all datasets except Crucible. + // + // Crucible already performs encryption internally, and we + // avoid double-encryption. + DatasetKind::Crucible => false, + _ => true, + } + } + + /// Returns true if this dataset is delegated to a non-global zone. + pub fn zoned(&self) -> bool { + use DatasetKind::*; + match self { + Cockroach | Crucible | Clickhouse | ClickhouseKeeper + | ClickhouseServer | ExternalDns | InternalDns => true, + ZoneRoot | Zone { .. } | Debug => false, + } + } + + /// Returns the zone name, if this is a dataset for a zone filesystem. + /// + /// Otherwise, returns "None". + pub fn zone_name(&self) -> Option<&str> { + if let DatasetKind::Zone { name } = self { + Some(name) + } else { + None + } + } } +// Be cautious updating this implementation: +// +// - It should align with [DatasetKind::FromStr], below +// - The strings here are used here comprise the dataset name, stored durably +// on-disk impl fmt::Display for DatasetKind { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { use DatasetKind::*; let s = match self { Crucible => "crucible", - Cockroach => "cockroach", + Cockroach => "cockroachdb", Clickhouse => "clickhouse", ClickhouseKeeper => "clickhouse_keeper", ClickhouseServer => "clickhouse_server", ExternalDns => "external_dns", InternalDns => "internal_dns", + ZoneRoot => "zone", + Zone { name } => { + write!(f, "zone/{}", name)?; + return Ok(()); + } + Debug => "debug", }; write!(f, "{}", s) } } +#[derive(Debug, thiserror::Error)] +pub enum DatasetKindParseError { + #[error("Dataset unknown: {0}")] + UnknownDataset(String), +} + +impl FromStr for DatasetKind { + type Err = DatasetKindParseError; + + fn from_str(s: &str) -> Result { + use DatasetKind::*; + let kind = match s { + "cockroachdb" => Cockroach, + "crucible" => Crucible, + "clickhouse" => Clickhouse, + "clickhouse_keeper" => ClickhouseKeeper, + "clickhouse_server" => ClickhouseServer, + "external_dns" => ExternalDns, + "internal_dns" => InternalDns, + "zone" => ZoneRoot, + "debug" => Debug, + other => { + if let Some(name) = other.strip_prefix("zone/") { + Zone { name: name.to_string() } + } else { + return Err(DatasetKindParseError::UnknownDataset( + s.to_string(), + )); + } + } + }; + Ok(kind) + } +} + /// Identifiers for a single sled. /// /// This is intended primarily to be used in timeseries, to identify @@ -807,6 +1020,7 @@ pub struct SledIdentifiers { #[cfg(test)] mod tests { + use super::*; use crate::api::internal::shared::AllowedSourceIps; use oxnet::{IpNet, Ipv4Net, Ipv6Net}; use std::net::{Ipv4Addr, Ipv6Addr}; @@ -851,4 +1065,49 @@ mod tests { serde_json::from_str(r#"{"allow":"any"}"#).unwrap(), ); } + + #[test] + fn test_dataset_kind_serialization() { + let kinds = [ + DatasetKind::Cockroach, + DatasetKind::Crucible, + DatasetKind::Clickhouse, + DatasetKind::ClickhouseKeeper, + DatasetKind::ClickhouseServer, + DatasetKind::ExternalDns, + DatasetKind::InternalDns, + DatasetKind::ZoneRoot, + DatasetKind::Zone { name: String::from("myzone") }, + DatasetKind::Debug, + ]; + + assert_eq!(kinds.len(), DatasetKind::COUNT); + + for kind in &kinds { + // To string, from string + let as_str = kind.to_string(); + let from_str = + DatasetKind::from_str(&as_str).unwrap_or_else(|_| { + panic!("Failed to convert {kind} to and from string") + }); + assert_eq!( + *kind, from_str, + "{kind} failed to convert to/from a string" + ); + + // Serialize, deserialize + let ser = serde_json::to_string(&kind) + .unwrap_or_else(|_| panic!("Failed to serialize {kind}")); + let de: DatasetKind = serde_json::from_str(&ser) + .unwrap_or_else(|_| panic!("Failed to deserialize {kind}")); + assert_eq!(*kind, de, "{kind} failed serialization"); + + // Test that serialization is equivalent to stringifying. + assert_eq!( + format!("\"{as_str}\""), + ser, + "{kind} does not match stringification/serialization" + ); + } + } } diff --git a/common/src/disk.rs b/common/src/disk.rs index d8b4c2e0a18..ed0bf8666ee 100644 --- a/common/src/disk.rs +++ b/common/src/disk.rs @@ -4,18 +4,23 @@ //! Disk related types shared among crates -use std::fmt; - use anyhow::bail; +use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::ZpoolUuid; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use std::collections::BTreeMap; +use std::fmt; use uuid::Uuid; use crate::{ - api::external::Generation, ledger::Ledgerable, zpool_name::ZpoolKind, + api::external::Generation, + ledger::Ledgerable, + zpool_name::{ZpoolKind, ZpoolName}, }; +pub use crate::api::internal::shared::DatasetKind; + #[derive( Clone, Debug, @@ -72,6 +77,243 @@ impl OmicronPhysicalDisksConfig { } } +#[derive( + Debug, + PartialEq, + Eq, + Hash, + Serialize, + Deserialize, + Clone, + JsonSchema, + PartialOrd, + Ord, +)] +pub struct DatasetName { + // A unique identifier for the Zpool on which the dataset is stored. + pool_name: ZpoolName, + // A name for the dataset within the Zpool. + kind: DatasetKind, +} + +impl DatasetName { + pub fn new(pool_name: ZpoolName, kind: DatasetKind) -> Self { + Self { pool_name, kind } + } + + pub fn pool(&self) -> &ZpoolName { + &self.pool_name + } + + pub fn dataset(&self) -> &DatasetKind { + &self.kind + } + + /// Returns the full name of the dataset, as would be returned from + /// "zfs get" or "zfs list". + /// + /// If this dataset should be encrypted, this automatically adds the + /// "crypt" dataset component. + pub fn full_name(&self) -> String { + // Currently, we encrypt all datasets except Crucible. + // + // Crucible already performs encryption internally, and we + // avoid double-encryption. + if self.kind.dataset_should_be_encrypted() { + self.full_encrypted_name() + } else { + self.full_unencrypted_name() + } + } + + fn full_encrypted_name(&self) -> String { + format!("{}/crypt/{}", self.pool_name, self.kind) + } + + fn full_unencrypted_name(&self) -> String { + format!("{}/{}", self.pool_name, self.kind) + } +} + +#[derive( + Copy, + Clone, + Debug, + Deserialize, + Serialize, + JsonSchema, + PartialEq, + Eq, + Hash, + PartialOrd, + Ord, +)] +pub struct GzipLevel(u8); + +// Fastest compression level +const GZIP_LEVEL_MIN: u8 = 1; + +// Best compression ratio +const GZIP_LEVEL_MAX: u8 = 9; + +impl GzipLevel { + pub const fn new() -> Self { + assert!(N >= GZIP_LEVEL_MIN, "Compression level too small"); + assert!(N <= GZIP_LEVEL_MAX, "Compression level too large"); + Self(N) + } +} + +#[derive( + Copy, + Clone, + Debug, + Default, + Deserialize, + Serialize, + JsonSchema, + PartialEq, + Eq, + Hash, + PartialOrd, + Ord, +)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum CompressionAlgorithm { + // Selects a default compression algorithm. This is dependent on both the + // zpool and OS version. + On, + + // Disables compression. + #[default] + Off, + + // Selects the default Gzip compression level. + // + // According to the ZFS docs, this is "gzip-6", but that's a default value, + // which may change with OS updates. + Gzip, + + GzipN { + level: GzipLevel, + }, + Lz4, + Lzjb, + Zle, +} + +impl fmt::Display for CompressionAlgorithm { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use CompressionAlgorithm::*; + let s = match self { + On => "on", + Off => "off", + Gzip => "gzip", + GzipN { level } => { + return write!(f, "gzip-{}", level.0); + } + Lz4 => "lz4", + Lzjb => "lzjb", + Zle => "zle", + }; + write!(f, "{}", s) + } +} + +/// Configuration information necessary to request a single dataset +#[derive( + Clone, + Debug, + Deserialize, + Serialize, + JsonSchema, + PartialEq, + Eq, + Hash, + PartialOrd, + Ord, +)] +pub struct DatasetConfig { + /// The UUID of the dataset being requested + pub id: DatasetUuid, + + /// The dataset's name + pub name: DatasetName, + + /// The compression mode to be used by the dataset + pub compression: CompressionAlgorithm, + + /// The upper bound on the amount of storage used by this dataset + pub quota: Option, + + /// The lower bound on the amount of storage usable by this dataset + pub reservation: Option, +} + +#[derive( + Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +pub struct DatasetsConfig { + /// generation number of this configuration + /// + /// This generation number is owned by the control plane (i.e., RSS or + /// Nexus, depending on whether RSS-to-Nexus handoff has happened). It + /// should not be bumped within Sled Agent. + /// + /// Sled Agent rejects attempts to set the configuration to a generation + /// older than the one it's currently running. + /// + /// Note that "Generation::new()", AKA, the first generation number, + /// is reserved for "no datasets". This is the default configuration + /// for a sled before any requests have been made. + pub generation: Generation, + + pub datasets: BTreeMap, +} + +impl Default for DatasetsConfig { + fn default() -> Self { + Self { generation: Generation::new(), datasets: BTreeMap::new() } + } +} + +impl Ledgerable for DatasetsConfig { + fn is_newer_than(&self, other: &Self) -> bool { + self.generation > other.generation + } + + // No need to do this, the generation number is provided externally. + fn generation_bump(&mut self) {} +} + +/// Identifies how a single dataset management operation may have succeeded or +/// failed. +#[derive(Debug, JsonSchema, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub struct DatasetManagementStatus { + pub dataset_name: DatasetName, + pub err: Option, +} + +/// The result from attempting to manage datasets. +#[derive(Default, Debug, JsonSchema, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +#[must_use = "this `DatasetManagementResult` may contain errors, which should be handled"] +pub struct DatasetsManagementResult { + pub status: Vec, +} + +impl DatasetsManagementResult { + pub fn has_error(&self) -> bool { + for status in &self.status { + if status.err.is_some() { + return true; + } + } + false + } +} + /// Uniquely identifies a disk. #[derive( Debug, diff --git a/common/src/lib.rs b/common/src/lib.rs index e4f53cbfab6..b9d6dd31724 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -26,6 +26,7 @@ pub mod backoff; pub mod cmd; pub mod disk; pub mod ledger; +pub mod policy; pub mod progenitor_operation_retry; pub mod update; pub mod vlan; @@ -117,3 +118,27 @@ where async fn never_bail() -> Result { Ok(false) } + +/// A wrapper struct that does nothing other than elide the inner value from +/// [`std::fmt::Debug`] output. +/// +/// We define this within Omicron instead of using one of the many available +/// crates that do the same thing because it's trivial to do so, and we want the +/// flexibility to add traits to this type without needing to wait on upstream +/// to add an optional dependency. +/// +/// If you want to use this for secrets, consider that it might not do +/// everything you expect (it does not zeroize memory on drop, nor get in the +/// way of you removing the inner value from this wrapper struct). +#[derive( + Clone, Copy, serde::Deserialize, serde::Serialize, schemars::JsonSchema, +)] +#[repr(transparent)] +#[serde(transparent)] +pub struct NoDebug(pub T); + +impl std::fmt::Debug for NoDebug { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "..") + } +} diff --git a/common/src/policy.rs b/common/src/policy.rs new file mode 100644 index 00000000000..e615981a21c --- /dev/null +++ b/common/src/policy.rs @@ -0,0 +1,40 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Fleet policy related functionality used by both Reconfigurator and RSS. + +/// The amount of redundancy for boundary NTP servers. +pub const BOUNDARY_NTP_REDUNDANCY: usize = 2; + +/// The amount of redundancy for Nexus services. +/// +/// This is used by both RSS (to distribute the initial set of services) and the +/// Reconfigurator (to know whether to add new Nexus zones) +pub const NEXUS_REDUNDANCY: usize = 3; + +/// The amount of redundancy for CockroachDb services. +/// +/// This is used by both RSS (to distribute the initial set of services) and the +/// Reconfigurator (to know whether to add new crdb zones) +pub const COCKROACHDB_REDUNDANCY: usize = 5; + +/// The amount of redundancy for internal DNS servers. +/// +/// Must be less than or equal to MAX_INTERNAL_DNS_REDUNDANCY. +pub const INTERNAL_DNS_REDUNDANCY: usize = 3; + +/// The maximum amount of redundancy for internal DNS servers. +/// +/// This determines the number of addresses which are reserved for internal DNS servers. +pub const MAX_INTERNAL_DNS_REDUNDANCY: usize = 5; + +/// The amount of redundancy for clickhouse servers +/// +/// Clickhouse servers contain lazily replicated data +pub const CLICKHOUSE_SERVER_REDUNDANCY: usize = 3; + +/// The amount of redundancy for clickhouse keepers +/// +/// Keepers maintain strongly consistent metadata about data replication +pub const CLICKHOUSE_KEEPER_REDUNDANCY: usize = 5; diff --git a/dev-tools/downloader/src/lib.rs b/dev-tools/downloader/src/lib.rs index d5b436244cc..c3d6e165ff6 100644 --- a/dev-tools/downloader/src/lib.rs +++ b/dev-tools/downloader/src/lib.rs @@ -586,7 +586,10 @@ impl<'a> Downloader<'a> { let version = version.trim(); let (url_base, suffix) = match os { - Os::Illumos => ("https://illumos.org/downloads", "tar.gz"), + Os::Illumos => ( + "https://oxide-cockroachdb-build.s3.us-west-2.amazonaws.com", + "tar.gz", + ), Os::Linux | Os::Mac => ("https://binaries.cockroachdb.com", "tgz"), }; let build = match os { diff --git a/dev-tools/mgs-dev/Cargo.toml b/dev-tools/mgs-dev/Cargo.toml index d5f61f4b96d..70382c04695 100644 --- a/dev-tools/mgs-dev/Cargo.toml +++ b/dev-tools/mgs-dev/Cargo.toml @@ -14,6 +14,7 @@ futures.workspace = true gateway-messages.workspace = true gateway-test-utils.workspace = true libc.workspace = true +omicron-gateway.workspace = true omicron-workspace-hack.workspace = true signal-hook-tokio.workspace = true tokio.workspace = true diff --git a/dev-tools/mgs-dev/src/main.rs b/dev-tools/mgs-dev/src/main.rs index 85b1313d683..77947999d97 100644 --- a/dev-tools/mgs-dev/src/main.rs +++ b/dev-tools/mgs-dev/src/main.rs @@ -8,6 +8,7 @@ use clap::{Args, Parser, Subcommand}; use futures::StreamExt; use libc::SIGINT; use signal_hook_tokio::Signals; +use std::net::SocketAddr; #[tokio::main] async fn main() -> anyhow::Result<()> { @@ -36,7 +37,12 @@ enum MgsDevCmd { } #[derive(Clone, Debug, Args)] -struct MgsRunArgs {} +struct MgsRunArgs { + /// Override the address of the Nexus instance to use when registering the + /// Oximeter producer. + #[clap(long)] + nexus_address: Option, +} impl MgsRunArgs { async fn exec(&self) -> Result<(), anyhow::Error> { @@ -46,9 +52,23 @@ impl MgsRunArgs { let mut signal_stream = signals.fuse(); println!("mgs-dev: setting up MGS ... "); - let gwtestctx = gateway_test_utils::setup::test_setup( + let (mut mgs_config, sp_sim_config) = + gateway_test_utils::setup::load_test_config(); + if let Some(addr) = self.nexus_address { + mgs_config.metrics = + Some(gateway_test_utils::setup::MetricsConfig { + disabled: false, + dev_nexus_address: Some(addr), + dev_bind_loopback: true, + }); + } + + let gwtestctx = gateway_test_utils::setup::test_setup_with_config( "mgs-dev", gateway_messages::SpPort::One, + mgs_config, + &sp_sim_config, + None, ) .await; println!("mgs-dev: MGS is running."); diff --git a/dev-tools/omdb/Cargo.toml b/dev-tools/omdb/Cargo.toml index 0990fdb11c9..4cc484b9a93 100644 --- a/dev-tools/omdb/Cargo.toml +++ b/dev-tools/omdb/Cargo.toml @@ -28,6 +28,7 @@ gateway-messages.workspace = true gateway-test-utils.workspace = true humantime.workspace = true internal-dns.workspace = true +itertools.workspace = true nexus-client.workspace = true nexus-config.workspace = true nexus-db-model.workspace = true @@ -61,13 +62,14 @@ multimap.workspace = true indicatif.workspace = true [dev-dependencies] +camino-tempfile.workspace = true expectorate.workspace = true +http.workspace = true nexus-test-utils.workspace = true nexus-test-utils-macros.workspace = true omicron-nexus.workspace = true omicron-test-utils.workspace = true subprocess.workspace = true -camino-tempfile.workspace = true # Disable doc builds by default for our binaries to work around issue # rust-lang/cargo#8373. These docs would not be very useful anyway. diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 7e53697a6da..72bc883543b 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -247,7 +247,8 @@ impl DbUrlOptions { eprintln!("note: using database URL {}", &db_url); let db_config = db::Config { url: db_url.clone() }; - let pool = Arc::new(db::Pool::new(&log.clone(), &db_config)); + let pool = + Arc::new(db::Pool::new_single_host(&log.clone(), &db_config)); // Being a dev tool, we want to try this operation even if the schema // doesn't match what we expect. So we use `DataStore::new_unchecked()` @@ -4225,7 +4226,7 @@ async fn cmd_db_inventory( } async fn cmd_db_inventory_baseboard_ids( - conn: &DataStoreConnection<'_>, + conn: &DataStoreConnection, limit: NonZeroU32, ) -> Result<(), anyhow::Error> { #[derive(Tabled)] @@ -4262,7 +4263,7 @@ async fn cmd_db_inventory_baseboard_ids( } async fn cmd_db_inventory_cabooses( - conn: &DataStoreConnection<'_>, + conn: &DataStoreConnection, limit: NonZeroU32, ) -> Result<(), anyhow::Error> { #[derive(Tabled)] @@ -4303,7 +4304,7 @@ async fn cmd_db_inventory_cabooses( } async fn cmd_db_inventory_physical_disks( - conn: &DataStoreConnection<'_>, + conn: &DataStoreConnection, limit: NonZeroU32, args: InvPhysicalDisksArgs, ) -> Result<(), anyhow::Error> { @@ -4386,7 +4387,7 @@ async fn cmd_db_inventory_physical_disks( } async fn cmd_db_inventory_rot_pages( - conn: &DataStoreConnection<'_>, + conn: &DataStoreConnection, limit: NonZeroU32, ) -> Result<(), anyhow::Error> { #[derive(Tabled)] @@ -4421,7 +4422,7 @@ async fn cmd_db_inventory_rot_pages( } async fn cmd_db_inventory_collections_list( - conn: &DataStoreConnection<'_>, + conn: &DataStoreConnection, limit: NonZeroU32, ) -> Result<(), anyhow::Error> { #[derive(Tabled)] diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index 9aae6b2205d..6d7152d9f7f 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -19,6 +19,7 @@ use clap::Subcommand; use clap::ValueEnum; use futures::future::try_join; use futures::TryStreamExt; +use itertools::Itertools; use nexus_client::types::ActivationReason; use nexus_client::types::BackgroundTask; use nexus_client::types::BackgroundTasksActivateRequest; @@ -33,6 +34,10 @@ use nexus_saga_recovery::LastPass; use nexus_types::deployment::Blueprint; use nexus_types::internal_api::background::LookupRegionPortStatus; use nexus_types::internal_api::background::RegionReplacementDriverStatus; +use nexus_types::internal_api::background::RegionSnapshotReplacementFinishStatus; +use nexus_types::internal_api::background::RegionSnapshotReplacementGarbageCollectStatus; +use nexus_types::internal_api::background::RegionSnapshotReplacementStartStatus; +use nexus_types::internal_api::background::RegionSnapshotReplacementStepStatus; use nexus_types::inventory::BaseboardId; use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::DemoSagaUuid; @@ -45,8 +50,10 @@ use reedline::Reedline; use serde::Deserialize; use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; +use std::collections::BTreeSet; use std::str::FromStr; use tabled::Tabled; +use tokio::sync::OnceCell; use uuid::Uuid; /// Arguments to the "omdb nexus" subcommand @@ -92,11 +99,21 @@ enum BackgroundTasksCommands { /// Print a summary of the status of all background tasks List, /// Print human-readable summary of the status of each background task - Show, + Show(BackgroundTasksShowArgs), /// Activate one or more background tasks Activate(BackgroundTasksActivateArgs), } +#[derive(Debug, Args)] +struct BackgroundTasksShowArgs { + /// Names of background tasks to show (default: all) + /// + /// You can use any background task name here or one of the special strings + /// "all", "dns_external", or "dns_internal". + #[clap(value_name = "TASK_NAME")] + tasks: Vec, +} + #[derive(Debug, Args)] struct BackgroundTasksActivateArgs { /// Name of the background tasks to activate @@ -230,6 +247,10 @@ struct BlueprintTargetSetArgs { blueprint_id: Uuid, /// whether this blueprint should be enabled enabled: BlueprintTargetSetEnabled, + /// if specified, diff against the current target and wait for confirmation + /// before proceeding + #[clap(long)] + diff: bool, } #[derive(Debug, Clone, Copy, ValueEnum)] @@ -360,8 +381,8 @@ impl NexusArgs { command: BackgroundTasksCommands::List, }) => cmd_nexus_background_tasks_list(&client).await, NexusCommands::BackgroundTasks(BackgroundTasksArgs { - command: BackgroundTasksCommands::Show, - }) => cmd_nexus_background_tasks_show(&client).await, + command: BackgroundTasksCommands::Show(args), + }) => cmd_nexus_background_tasks_show(&client, args).await, NexusCommands::BackgroundTasks(BackgroundTasksArgs { command: BackgroundTasksCommands::Activate(args), }) => { @@ -522,7 +543,9 @@ async fn cmd_nexus_background_tasks_list( ) -> Result<(), anyhow::Error> { let response = client.bgtask_list().await.context("listing background tasks")?; - let tasks = response.into_inner(); + // Convert the HashMap to a BTreeMap because we want the keys in sorted + // order. + let tasks = response.into_inner().into_iter().collect::>(); let table_rows = tasks.values().map(BackgroundTaskStatusRow::from); let table = tabled::Table::new(table_rows) .with(tabled::settings::Style::empty()) @@ -535,6 +558,7 @@ async fn cmd_nexus_background_tasks_list( /// Runs `omdb nexus background-tasks show` async fn cmd_nexus_background_tasks_show( client: &nexus_client::Client, + args: &BackgroundTasksShowArgs, ) -> Result<(), anyhow::Error> { let response = client.bgtask_list().await.context("listing background tasks")?; @@ -543,8 +567,50 @@ async fn cmd_nexus_background_tasks_show( let mut tasks = response.into_inner().into_iter().collect::>(); - // We want to pick the order that we print some tasks intentionally. Then - // we want to print anything else that we find. + // Now, pick out the tasks that the user selected. + // + // The set of user tasks may include: + // + // - nothing at all, in which case we include all tasks + // - individual task names + // - certain groups that we recognize, like "dns_external" for all the tasks + // related to external DNS propagation. "all" means "all tasks". + let selected_set: BTreeSet<_> = + args.tasks.iter().map(AsRef::as_ref).collect(); + let selected_all = selected_set.is_empty() || selected_set.contains("all"); + if !selected_all { + for s in &selected_set { + if !tasks.contains_key(*s) + && *s != "all" + && *s != "dns_external" + && *s != "dns_internal" + { + bail!( + "unknown task name: {:?} (known task names: all, \ + dns_external, dns_internal, {})", + s, + tasks.keys().join(", ") + ); + } + } + + tasks.retain(|k, _| { + selected_set.contains(k.as_str()) + || selected_set.contains("all") + || (selected_set.contains("dns_external") + && k.starts_with("dns_") + && k.ends_with("_external")) + || (selected_set.contains("dns_internal") + && k.starts_with("dns_") + && k.ends_with("_internal")) + }); + } + + // Some tasks should be grouped and printed together in a certain order, + // even though their names aren't alphabetical. Notably, the DNS tasks + // logically go from config -> servers -> propagation, so we want to print + // them in that order. So we pick these out first and then print anything + // else that we find in alphabetical order. for name in [ "dns_config_internal", "dns_servers_internal", @@ -558,7 +624,7 @@ async fn cmd_nexus_background_tasks_show( ] { if let Some(bgtask) = tasks.remove(name) { print_task(&bgtask); - } else { + } else if selected_all { eprintln!("warning: expected to find background task {:?}", name); } } @@ -1394,6 +1460,183 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) { } } }; + } else if name == "region_snapshot_replacement_start" { + match serde_json::from_value::( + details.clone(), + ) { + Err(error) => eprintln!( + "warning: failed to interpret task details: {:?}: {:?}", + error, details + ), + + Ok(status) => { + println!( + " total requests created ok: {}", + status.requests_created_ok.len(), + ); + for line in &status.requests_created_ok { + println!(" > {line}"); + } + + println!( + " total start saga invoked ok: {}", + status.start_invoked_ok.len(), + ); + for line in &status.start_invoked_ok { + println!(" > {line}"); + } + + println!(" errors: {}", status.errors.len()); + for line in &status.errors { + println!(" > {line}"); + } + } + } + } else if name == "region_snapshot_replacement_garbage_collection" { + match serde_json::from_value::< + RegionSnapshotReplacementGarbageCollectStatus, + >(details.clone()) + { + Err(error) => eprintln!( + "warning: failed to interpret task details: {:?}: {:?}", + error, details + ), + + Ok(status) => { + println!( + " total garbage collections requested: {}", + status.garbage_collect_requested.len(), + ); + for line in &status.garbage_collect_requested { + println!(" > {line}"); + } + + println!(" errors: {}", status.errors.len()); + for line in &status.errors { + println!(" > {line}"); + } + } + } + } else if name == "region_snapshot_replacement_step" { + match serde_json::from_value::( + details.clone(), + ) { + Err(error) => eprintln!( + "warning: failed to interpret task details: {:?}: {:?}", + error, details + ), + + Ok(status) => { + println!( + " total step records created ok: {}", + status.step_records_created_ok.len(), + ); + for line in &status.step_records_created_ok { + println!(" > {line}"); + } + + println!( + " total step garbage collect saga invoked ok: {}", + status.step_garbage_collect_invoked_ok.len(), + ); + for line in &status.step_garbage_collect_invoked_ok { + println!(" > {line}"); + } + + println!( + " total step saga invoked ok: {}", + status.step_invoked_ok.len(), + ); + for line in &status.step_invoked_ok { + println!(" > {line}"); + } + + println!(" errors: {}", status.errors.len()); + for line in &status.errors { + println!(" > {line}"); + } + } + } + } else if name == "blueprint_loader" { + #[derive(Deserialize)] + struct BlueprintLoaderStatus { + target_id: Uuid, + time_created: DateTime, + status: String, + enabled: bool, + } + + match serde_json::from_value::(details.clone()) { + Err(error) => eprintln!( + "warning: failed to interpret task details: {:?}: {:?}", + error, details + ), + Ok(status) => { + println!(" target blueprint: {}", status.target_id); + println!( + " execution: {}", + if status.enabled { "enabled" } else { "disabled" } + ); + println!( + " created at: {}", + humantime::format_rfc3339_millis( + status.time_created.into() + ) + ); + println!(" status: {}", status.status); + } + } + } else if name == "blueprint_executor" { + #[derive(Deserialize)] + struct BlueprintExecutorStatus { + target_id: Uuid, + enabled: bool, + errors: Option>, + } + + match serde_json::from_value::(details.clone()) + { + Err(error) => eprintln!( + "warning: failed to interpret task details: {:?}: {:?}", + error, details + ), + Ok(status) => { + println!(" target blueprint: {}", status.target_id); + println!( + " execution: {}", + if status.enabled { "enabled" } else { "disabled" } + ); + let errors = status.errors.as_deref().unwrap_or(&[]); + println!(" errors: {}", errors.len()); + for (i, e) in errors.iter().enumerate() { + println!(" error {}: {}", i, e); + } + } + } + } else if name == "region_snapshot_replacement_finish" { + match serde_json::from_value::( + details.clone(), + ) { + Err(error) => eprintln!( + "warning: failed to interpret task details: {:?}: {:?}", + error, details + ), + + Ok(status) => { + println!( + " total records transitioned to done: {}", + status.records_set_to_done.len(), + ); + for line in &status.records_set_to_done { + println!(" > {line}"); + } + + println!(" errors: {}", status.errors.len()); + for line in &status.errors { + println!(" > {line}"); + } + } + } } else { println!( "warning: unknown background task: {:?} \ @@ -1606,6 +1849,38 @@ async fn cmd_nexus_blueprints_target_set( args: &BlueprintTargetSetArgs, _destruction_token: DestructiveOperationToken, ) -> Result<(), anyhow::Error> { + // Helper to only fetch the current target once. We may need it immediately + // if `args.diff` is true, or later if `args.enabled` is "inherit" (or + // both). + let current_target = OnceCell::new(); + let get_current_target = || async { + current_target + .get_or_try_init(|| client.blueprint_target_view()) + .await + .context("failed to fetch current target blueprint") + }; + + if args.diff { + let current_target = get_current_target().await?; + let blueprint1 = client + .blueprint_view(¤t_target.target_id) + .await + .context("failed to fetch target blueprint")? + .into_inner(); + let blueprint2 = + client.blueprint_view(&args.blueprint_id).await.with_context( + || format!("fetching blueprint {}", args.blueprint_id), + )?; + let diff = blueprint2.diff_since_blueprint(&blueprint1); + println!("{}", diff.display()); + println!( + "\nDo you want to make {} the target blueprint?", + args.blueprint_id + ); + let mut prompt = ConfirmationPrompt::new(); + prompt.read_and_validate("y/N", "y")?; + } + let enabled = match args.enabled { BlueprintTargetSetEnabled::Enabled => true, BlueprintTargetSetEnabled::Disabled => false, @@ -1618,12 +1893,11 @@ async fn cmd_nexus_blueprints_target_set( // operator. (In the case of the current target blueprint being changed // entirely, that will result in a failure to set the current target // below, because its parent will no longer be the current target.) - BlueprintTargetSetEnabled::Inherit => client - .blueprint_target_view() - .await - .map(|current| current.into_inner().enabled) - .context("failed to fetch current target blueprint")?, + BlueprintTargetSetEnabled::Inherit => { + get_current_target().await?.enabled + } }; + client .blueprint_target_set(&nexus_client::types::BlueprintTargetSet { target_id: args.blueprint_id, @@ -1850,7 +2124,7 @@ impl ConfirmationPrompt { { Ok(input) } else { - bail!("expungement aborted") + bail!("operation aborted") } } diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index 5755df94883..c57a9c9dce3 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -127,6 +127,23 @@ task: "region_replacement_driver" drive region replacements forward to completion +task: "region_snapshot_replacement_finish" + complete a region snapshot replacement if all the steps are done + + +task: "region_snapshot_replacement_garbage_collection" + clean up all region snapshot replacement step volumes + + +task: "region_snapshot_replacement_start" + detect if region snapshots need replacement and begin the process + + +task: "region_snapshot_replacement_step" + detect what volumes were affected by a region snapshot replacement, and run + the step saga for them + + task: "saga_recovery" recovers sagas assigned to this Nexus @@ -276,6 +293,23 @@ task: "region_replacement_driver" drive region replacements forward to completion +task: "region_snapshot_replacement_finish" + complete a region snapshot replacement if all the steps are done + + +task: "region_snapshot_replacement_garbage_collection" + clean up all region snapshot replacement step volumes + + +task: "region_snapshot_replacement_start" + detect if region snapshots need replacement and begin the process + + +task: "region_snapshot_replacement_step" + detect what volumes were affected by a region snapshot replacement, and run + the step saga for them + + task: "saga_recovery" recovers sagas assigned to this Nexus @@ -412,6 +446,23 @@ task: "region_replacement_driver" drive region replacements forward to completion +task: "region_snapshot_replacement_finish" + complete a region snapshot replacement if all the steps are done + + +task: "region_snapshot_replacement_garbage_collection" + clean up all region snapshot replacement step volumes + + +task: "region_snapshot_replacement_start" + detect if region snapshots need replacement and begin the process + + +task: "region_snapshot_replacement_step" + detect what volumes were affected by a region snapshot replacement, and run + the step saga for them + + task: "saga_recovery" recovers sagas assigned to this Nexus diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 66f07cb2f08..e7720dd12ce 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -141,9 +141,16 @@ SP DETAILS: type "Sled" slot 0 COMPONENTS - NAME DESCRIPTION DEVICE PRESENCE SERIAL - sp3-host-cpu FAKE host cpu sp3-host-cpu Present None - dev-0 FAKE temperature sensor fake-tmp-sensor Failed None + NAME DESCRIPTION DEVICE PRESENCE SERIAL + sp3-host-cpu FAKE host cpu sp3-host-cpu Present None + dev-0 FAKE temperature sensor fake-tmp-sensor Failed None + dev-1 FAKE temperature sensor tmp117 Present None + dev-2 FAKE Southeast temperature sensor tmp117 Present None + dev-6 FAKE U.2 Sharkfin A VPD at24csw080 Present None + dev-7 FAKE U.2 Sharkfin A hot swap controller max5970 Present None + dev-8 FAKE U.2 A NVMe Basic Management Command nvme_bmc Present None + dev-39 FAKE T6 temperature sensor tmp451 Present None + dev-53 FAKE Fan controller max31790 Present None CABOOSES: none found @@ -167,8 +174,16 @@ SP DETAILS: type "Sled" slot 1 COMPONENTS - NAME DESCRIPTION DEVICE PRESENCE SERIAL - sp3-host-cpu FAKE host cpu sp3-host-cpu Present None + NAME DESCRIPTION DEVICE PRESENCE SERIAL + sp3-host-cpu FAKE host cpu sp3-host-cpu Present None + dev-0 FAKE temperature sensor tmp117 Present None + dev-1 FAKE temperature sensor tmp117 Present None + dev-2 FAKE Southeast temperature sensor tmp117 Present None + dev-6 FAKE U.2 Sharkfin A VPD at24csw080 Present None + dev-7 FAKE U.2 Sharkfin A hot swap controller max5970 Present None + dev-8 FAKE U.2 A NVMe Basic Management Command nvme_bmc Present None + dev-39 FAKE T6 temperature sensor tmp451 Present None + dev-53 FAKE Fan controller max31790 Present None CABOOSES: none found @@ -328,6 +343,23 @@ task: "region_replacement_driver" drive region replacements forward to completion +task: "region_snapshot_replacement_finish" + complete a region snapshot replacement if all the steps are done + + +task: "region_snapshot_replacement_garbage_collection" + clean up all region snapshot replacement step volumes + + +task: "region_snapshot_replacement_start" + detect if region snapshots need replacement and begin the process + + +task: "region_snapshot_replacement_step" + detect what volumes were affected by a region snapshot replacement, and run + the step saga for them + + task: "saga_recovery" recovers sagas assigned to this Nexus @@ -566,6 +598,467 @@ task: "region_replacement_driver" number of region replacement finish sagas started ok: 0 number of errors: 0 +task: "region_snapshot_replacement_finish" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total records transitioned to done: 0 + errors: 0 + +task: "region_snapshot_replacement_garbage_collection" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total garbage collections requested: 0 + errors: 0 + +task: "region_snapshot_replacement_start" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total requests created ok: 0 + total start saga invoked ok: 0 + errors: 0 + +task: "region_snapshot_replacement_step" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total step records created ok: 0 + total step garbage collect saga invoked ok: 0 + total step saga invoked ok: 0 + errors: 0 + +task: "saga_recovery" + configured period: every 10m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + since Nexus started: + sagas recovered: 0 + sagas recovery errors: 0 + sagas observed started: 0 + sagas inferred finished: 0 + missing from SEC: 0 + bad state in SEC: 0 + last pass: + found sagas: 0 (in-progress, assigned to this Nexus) + recovered: 0 (successfully) + failed: 0 + skipped: 0 (already running) + removed: 0 (newly finished) + no recovered sagas + no saga recovery failures + +task: "service_firewall_rule_propagation" + configured period: every 5m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + +task: "service_zone_nat_tracker" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + last completion reported error: inventory collection is None + +task: "switch_port_config_manager" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms +warning: unknown background task: "switch_port_config_manager" (don't know how to interpret details: Object {}) + +task: "v2p_manager" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms +warning: unknown background task: "v2p_manager" (don't know how to interpret details: Object {}) + +task: "vpc_route_manager" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms +warning: unknown background task: "vpc_route_manager" (don't know how to interpret details: Object {}) + +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +============================================= +EXECUTING COMMAND: omdb ["nexus", "background-tasks", "show", "saga_recovery"] +termination: Exited(0) +--------------------------------------------- +stdout: +task: "saga_recovery" + configured period: every 10m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + since Nexus started: + sagas recovered: 0 + sagas recovery errors: 0 + sagas observed started: 0 + sagas inferred finished: 0 + missing from SEC: 0 + bad state in SEC: 0 + last pass: + found sagas: 0 (in-progress, assigned to this Nexus) + recovered: 0 (successfully) + failed: 0 + skipped: 0 (already running) + removed: 0 (newly finished) + no recovered sagas + no saga recovery failures + +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +============================================= +EXECUTING COMMAND: omdb ["nexus", "background-tasks", "show", "blueprint_loader", "blueprint_executor"] +termination: Exited(0) +--------------------------------------------- +stdout: +task: "blueprint_loader" + configured period: every 1m s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + last completion reported error: failed to read target blueprint: Internal Error: no target blueprint set + +task: "blueprint_executor" + configured period: every 10m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + last completion reported error: no blueprint + +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +============================================= +EXECUTING COMMAND: omdb ["nexus", "background-tasks", "show", "dns_internal"] +termination: Exited(0) +--------------------------------------------- +stdout: +task: "dns_config_internal" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + last generation found: 1 + +task: "dns_servers_internal" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + servers found: 1 + + DNS_SERVER_ADDR + [::1]:REDACTED_PORT + +task: "dns_propagation_internal" + configured period: every 1m + currently executing: no + last completed activation: , triggered by a dependent task completing + started at (s ago) and ran for ms + attempt to propagate generation: 1 + + DNS_SERVER_ADDR LAST_RESULT + [::1]:REDACTED_PORT success + + +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +============================================= +EXECUTING COMMAND: omdb ["nexus", "background-tasks", "show", "dns_external"] +termination: Exited(0) +--------------------------------------------- +stdout: +task: "dns_config_external" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + last generation found: 2 + +task: "dns_servers_external" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + servers found: 1 + + DNS_SERVER_ADDR + [::1]:REDACTED_PORT + +task: "dns_propagation_external" + configured period: every 1m + currently executing: no + last completed activation: , triggered by a dependent task completing + started at (s ago) and ran for ms + attempt to propagate generation: 2 + + DNS_SERVER_ADDR LAST_RESULT + [::1]:REDACTED_PORT success + + +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +============================================= +EXECUTING COMMAND: omdb ["nexus", "background-tasks", "show", "all"] +termination: Exited(0) +--------------------------------------------- +stdout: +task: "dns_config_internal" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + last generation found: 1 + +task: "dns_servers_internal" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + servers found: 1 + + DNS_SERVER_ADDR + [::1]:REDACTED_PORT + +task: "dns_propagation_internal" + configured period: every 1m + currently executing: no + last completed activation: , triggered by a dependent task completing + started at (s ago) and ran for ms + attempt to propagate generation: 1 + + DNS_SERVER_ADDR LAST_RESULT + [::1]:REDACTED_PORT success + + +task: "dns_config_external" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + last generation found: 2 + +task: "dns_servers_external" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + servers found: 1 + + DNS_SERVER_ADDR + [::1]:REDACTED_PORT + +task: "dns_propagation_external" + configured period: every 1m + currently executing: no + last completed activation: , triggered by a dependent task completing + started at (s ago) and ran for ms + attempt to propagate generation: 2 + + DNS_SERVER_ADDR LAST_RESULT + [::1]:REDACTED_PORT success + + +task: "nat_v4_garbage_collector" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + last completion reported error: failed to resolve addresses for Dendrite services: no record found for Query { name: Name("_dendrite._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN } + +task: "blueprint_loader" + configured period: every 1m s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + last completion reported error: failed to read target blueprint: Internal Error: no target blueprint set + +task: "blueprint_executor" + configured period: every 10m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + last completion reported error: no blueprint + +task: "abandoned_vmm_reaper" + configured period: every 1m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total abandoned VMMs found: 0 + VMM records deleted: 0 + VMM records already deleted by another Nexus: 0 + sled resource reservations deleted: 0 + +task: "bfd_manager" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + last completion reported error: failed to resolve addresses for Dendrite services: no record found for Query { name: Name("_dendrite._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN } + +task: "crdb_node_id_collector" + configured period: every 10m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + last completion reported error: no blueprint + +task: "decommissioned_disk_cleaner" + configured period: every 1m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms +warning: unknown background task: "decommissioned_disk_cleaner" (don't know how to interpret details: Object {"deleted": Number(0), "error": Null, "error_count": Number(0), "found": Number(0), "not_ready_to_be_deleted": Number(0)}) + +task: "external_endpoints" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + external API endpoints: 2 ('*' below marks default) + + SILO_ID DNS_NAME + ..................... default-silo.sys.oxide-dev.test + * ..................... test-suite-silo.sys.oxide-dev.test + + warnings: 2 + warning: silo ..................... with DNS name "default-silo.sys.oxide-dev.test" has no usable certificates + warning: silo ..................... with DNS name "test-suite-silo.sys.oxide-dev.test" has no usable certificates + + TLS certificates: 0 + +task: "instance_updater" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total instances in need of updates: 0 + instances with destroyed active VMMs: 0 + instances with terminated active migrations: 0 + update sagas started: 0 + update sagas completed successfully: 0 + +task: "instance_watcher" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total instances checked: 0 + checks completed: 0 + successful checks: 0 + update sagas queued: 0 + failed checks: 0 + checks that could not be completed: 0 + stale instance metrics pruned: 0 + +task: "inventory_collection" + configured period: every 10m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + last collection id: ..................... + last collection started: + last collection done: + +task: "lookup_region_port" + configured period: every 1m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total filled in ports: 0 + errors: 0 + +task: "metrics_producer_gc" + configured period: every 1m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms +warning: unknown background task: "metrics_producer_gc" (don't know how to interpret details: Object {"expiration": String(""), "pruned": Array []}) + +task: "phantom_disks" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + number of phantom disks deleted: 0 + number of phantom disk delete errors: 0 + +task: "physical_disk_adoption" + configured period: every s + currently executing: no + last completed activation: , triggered by a dependent task completing + started at (s ago) and ran for ms + last completion reported error: task disabled + +task: "region_replacement" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + number of region replacements started ok: 0 + number of region replacement start errors: 0 + +task: "region_replacement_driver" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + number of region replacement drive sagas started ok: 0 + number of region replacement finish sagas started ok: 0 + number of errors: 0 + +task: "region_snapshot_replacement_finish" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total records transitioned to done: 0 + errors: 0 + +task: "region_snapshot_replacement_garbage_collection" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total garbage collections requested: 0 + errors: 0 + +task: "region_snapshot_replacement_start" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total requests created ok: 0 + total start saga invoked ok: 0 + errors: 0 + +task: "region_snapshot_replacement_step" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total step records created ok: 0 + total step garbage collect saga invoked ok: 0 + total step saga invoked ok: 0 + errors: 0 + task: "saga_recovery" configured period: every 10m currently executing: no @@ -702,8 +1195,8 @@ WARNING: Zones exist without physical disks! COCKROACHDB SETTINGS: - state fingerprint::::::::::::::::: d4d87aa2ad877a4cc2fddd0573952362739110de - cluster.preserve_downgrade_option: "22.1" + state fingerprint::::::::::::::::: + cluster.preserve_downgrade_option: METADATA: created by::::::::::: nexus-test-utils @@ -740,8 +1233,8 @@ WARNING: Zones exist without physical disks! COCKROACHDB SETTINGS: - state fingerprint::::::::::::::::: d4d87aa2ad877a4cc2fddd0573952362739110de - cluster.preserve_downgrade_option: "22.1" + state fingerprint::::::::::::::::: + cluster.preserve_downgrade_option: METADATA: created by::::::::::: nexus-test-utils @@ -781,8 +1274,8 @@ to: blueprint ............. COCKROACHDB SETTINGS: - state fingerprint::::::::::::::::: d4d87aa2ad877a4cc2fddd0573952362739110de (unchanged) - cluster.preserve_downgrade_option: "22.1" (unchanged) + state fingerprint::::::::::::::::: (unchanged) + cluster.preserve_downgrade_option: (unchanged) METADATA: internal DNS version: 1 (unchanged) diff --git a/dev-tools/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs index d0258aeaed8..d266e59ce82 100644 --- a/dev-tools/omdb/tests/test_all_output.rs +++ b/dev-tools/omdb/tests/test_all_output.rs @@ -7,9 +7,12 @@ //! Feel free to change the tool's output. This test just makes it easy to make //! sure you're only breaking what you intend. +use dropshot::Method; use expectorate::assert_contents; +use http::StatusCode; use nexus_test_utils::{OXIMETER_UUID, PRODUCER_UUID}; use nexus_test_utils_macros::nexus_test; +use nexus_types::deployment::Blueprint; use nexus_types::deployment::SledFilter; use nexus_types::deployment::UnstableReconfiguratorState; use omicron_test_utils::dev::test_cmds::path_to_executable; @@ -56,6 +59,7 @@ fn assert_oximeter_list_producers_output( #[tokio::test] async fn test_omdb_usage_errors() { + clear_omdb_env(); let cmd_path = path_to_executable(CMD_OMDB); let mut output = String::new(); let invocations: &[&[&'static str]] = &[ @@ -80,6 +84,7 @@ async fn test_omdb_usage_errors() { &["mgs"], &["nexus"], &["nexus", "background-tasks"], + &["nexus", "background-tasks", "show", "--help"], &["nexus", "blueprints"], &["nexus", "sagas"], // Missing "--destructive" flag. The URL is bogus but just ensures that @@ -110,6 +115,8 @@ async fn test_omdb_usage_errors() { #[nexus_test] async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { + clear_omdb_env(); + let gwtestctx = gateway_test_utils::setup::test_setup( "test_omdb_success_case", gateway_messages::SpPort::One, @@ -130,6 +137,20 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { let tmppath = tmpdir.path().join("reconfigurator-save.out"); let initial_blueprint_id = cptestctx.initial_blueprint_id.to_string(); + // Get the CockroachDB metadata from the blueprint so we can redact it + let initial_blueprint: Blueprint = dropshot::test_util::read_json( + &mut cptestctx + .internal_client + .make_request_no_body( + Method::GET, + &format!("/deployment/blueprints/all/{initial_blueprint_id}"), + StatusCode::OK, + ) + .await + .unwrap(), + ) + .await; + let mut output = String::new(); let invocations: &[&[&str]] = &[ @@ -144,6 +165,19 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { &["mgs", "inventory"], &["nexus", "background-tasks", "doc"], &["nexus", "background-tasks", "show"], + // background tasks: test picking out specific names + &["nexus", "background-tasks", "show", "saga_recovery"], + &[ + "nexus", + "background-tasks", + "show", + "blueprint_loader", + "blueprint_executor", + ], + // background tasks: test recognized group names + &["nexus", "background-tasks", "show", "dns_internal"], + &["nexus", "background-tasks", "show", "dns_external"], + &["nexus", "background-tasks", "show", "all"], &["nexus", "sagas", "list"], &["--destructive", "nexus", "sagas", "demo-create"], &["nexus", "sagas", "list"], @@ -169,6 +203,19 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { // ControlPlaneTestContext. ]; + let mut redactions = ExtraRedactions::new(); + redactions + .variable_length("tmp_path", tmppath.as_str()) + .fixed_length("blueprint_id", &initial_blueprint_id) + .variable_length( + "cockroachdb_fingerprint", + &initial_blueprint.cockroachdb_fingerprint, + ); + let crdb_version = + initial_blueprint.cockroachdb_setting_preserve_downgrade.to_string(); + if initial_blueprint.cockroachdb_setting_preserve_downgrade.is_set() { + redactions.variable_length("cockroachdb_version", &crdb_version); + } for args in invocations { println!("running commands with args: {:?}", args); let p = postgres_url.to_string(); @@ -187,11 +234,7 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { }, &cmd_path, args, - Some( - ExtraRedactions::new() - .variable_length("tmp_path", tmppath.as_str()) - .fixed_length("blueprint_id", &initial_blueprint_id), - ), + Some(&redactions), ) .await; } @@ -257,6 +300,8 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { /// that's covered by the success tests above. #[nexus_test] async fn test_omdb_env_settings(cptestctx: &ControlPlaneTestContext) { + clear_omdb_env(); + let cmd_path = path_to_executable(CMD_OMDB); let postgres_url = cptestctx.database.listen_url().to_string(); let nexus_internal_url = @@ -490,3 +535,22 @@ async fn do_run_extra( write!(output, "=============================================\n").unwrap(); } + +// We're testing behavior that can be affected by OMDB-related environment +// variables. Clear all of them from the current process so that all child +// processes don't have them. OMDB environment variables can affect even the +// help output provided by clap. See clap-rs/clap#5673 for an example. +fn clear_omdb_env() { + // Rust documents that it's not safe to manipulate the environment in a + // multi-threaded process outside of Windows because it's possible that + // other threads are reading or writing the environment and most systems do + // not support this. On illumos, the underlying interfaces are broadly + // thread-safe. Further, Omicron only supports running tests under `cargo + // nextest`, in which case there are no threads running concurrently here + // that may be reading or modifying the environment. + for (env_var, _) in std::env::vars().filter(|(k, _)| k.starts_with("OMDB_")) + { + eprintln!("removing {:?} from environment", env_var); + std::env::remove_var(env_var); + } +} diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out index 1ee07410bf9..55781136b6f 100644 --- a/dev-tools/omdb/tests/usage_errors.out +++ b/dev-tools/omdb/tests/usage_errors.out @@ -491,6 +491,46 @@ Connection Options: Safety Options: -w, --destructive Allow potentially-destructive subcommands ============================================= +EXECUTING COMMAND: omdb ["nexus", "background-tasks", "show", "--help"] +termination: Exited(0) +--------------------------------------------- +stdout: +Print human-readable summary of the status of each background task + +Usage: omdb nexus background-tasks show [OPTIONS] [TASK_NAME]... + +Arguments: + [TASK_NAME]... + Names of background tasks to show (default: all) + + You can use any background task name here or one of the special strings "all", + "dns_external", or "dns_internal". + +Options: + --log-level + log level filter + + [env: LOG_LEVEL=] + [default: warn] + + -h, --help + Print help (see a summary with '-h') + +Connection Options: + --nexus-internal-url + URL of the Nexus internal API + + [env: OMDB_NEXUS_URL=] + + --dns-server + [env: OMDB_DNS_SERVER=] + +Safety Options: + -w, --destructive + Allow potentially-destructive subcommands +--------------------------------------------- +stderr: +============================================= EXECUTING COMMAND: omdb ["nexus", "blueprints"] termination: Exited(2) --------------------------------------------- diff --git a/dev-tools/openapi-manager/Cargo.toml b/dev-tools/openapi-manager/Cargo.toml index 85d27aaafde..211e1340161 100644 --- a/dev-tools/openapi-manager/Cargo.toml +++ b/dev-tools/openapi-manager/Cargo.toml @@ -12,6 +12,7 @@ anyhow.workspace = true atomicwrites.workspace = true bootstrap-agent-api.workspace = true camino.workspace = true +clickhouse-admin-api.workspace = true cockroach-admin-api.workspace = true clap.workspace = true dns-server-api.workspace = true @@ -20,10 +21,12 @@ fs-err.workspace = true gateway-api.workspace = true indent_write.workspace = true installinator-api.workspace = true +nexus-external-api.workspace = true nexus-internal-api.workspace = true omicron-workspace-hack.workspace = true openapiv3.workspace = true openapi-lint.workspace = true +openapi-manager-types.workspace = true owo-colors.workspace = true oximeter-api.workspace = true serde_json.workspace = true diff --git a/dev-tools/openapi-manager/README.adoc b/dev-tools/openapi-manager/README.adoc index 1aadaa2c0cf..e6b28b44f64 100644 --- a/dev-tools/openapi-manager/README.adoc +++ b/dev-tools/openapi-manager/README.adoc @@ -4,19 +4,15 @@ This tool manages the OpenAPI documents (JSON files) checked into Omicron's `ope NOTE: For more information about API traits, see https://rfd.shared.oxide.computer/rfd/0479[RFD 479]. -Currently, a subset of OpenAPI documents is managed by this tool. Eventually, all of the OpenAPI documents in Omicron will be managed by this tool; work to make that happen is ongoing. - -To check whether your document is managed, run `cargo xtask openapi list`: it will list out all managed OpenAPI documents. If your document is not on the list, it is unmanaged. - == Basic usage The OpenAPI manager is meant to be invoked via `cargo xtask openapi`. Currently, three commands are provided: -* `cargo xtask openapi list`: List information about currently-managed documents. -* `cargo xtask openapi check`: Check that all of the managed documents are up-to-date. +* `cargo xtask openapi list`: List information about OpenAPI documents. +* `cargo xtask openapi check`: Check that all of the documents are up-to-date. * `cargo xtask openapi generate`: Update and generate OpenAPI documents. -There is also a test which makes sure that all managed documents are up-to-date, and tells you to run `cargo xtask openapi generate` if they aren't. +There is also a test which makes sure that all documents are up-to-date, and tells you to run `cargo xtask openapi generate` if they aren't. === API crates [[api_crates]] @@ -49,40 +45,13 @@ In the implementation crate: . Add a dependency on the API crate. . Following the example in https://rfd.shared.oxide.computer/rfd/0479#guide_api_implementation[RFD 479's _API implementation_], provide an implementation of the trait. -Once the API crate is defined, perform the steps in <> below. - -=== Converting existing documents - -Existing, unmanaged documents are generated via *function-based servers*: a set of functions that some code combines into a Dropshot `ApiDescription`. (There is also likely an expectorate test which ensures that the document is up-to-date.) - -The first step is to convert the function-based server into an API trait. To do so, create an API crate (see <> above). - -. Add the API crate to the workspace's `Cargo.toml`: `members` and `default-members`, and a reference in `[workspace.dependencies]`. -. Follow the instructions in https://rfd.shared.oxide.computer/rfd/0479#guide_converting_functions_to_traits[RFD 479's _Converting functions to API traits_] for the API crate. - -In the implementation crate: - -. Continue following the instructions in https://rfd.shared.oxide.computer/rfd/0479#guide_converting_functions_to_traits[RFD 479's _Converting functions to API traits_] for where the endpoint functions are currently defined. -. Find the test which currently manages the document (try searching the repo for `openapi_lint::validate`). If it performs any checks on the document beyond `openapi_lint::validate` or `openapi_lint::validate_external`, see <>. - -Next, perform the steps in <> below. - -Finally, remove: - -. The test which used to manage the document. The OpenAPI manager includes a test that will automatically run in CI. -. The binary subcommand (typically called `openapi`) that generated the OpenAPI document. The test was the only practical use of this subcommand. - -=== Adding the API crate to the manager [[add_to_manager]] - Once the API crate is defined, inform the OpenAPI manager of its existence. Within this directory: . In `Cargo.toml`, add a dependency on the API crate. . In `src/spec.rs`, add the crate to the `all_apis` function. (Please keep the list sorted by filename.) -To ensure everything works well, run `cargo xtask openapi generate`. - -* Your OpenAPI document should be generated on disk and listed in the output. -* If you're converting an existing API, the only changes should be the ones you might have introduced as part of the refactor. If there are significant changes, something's gone wrong--maybe you missed an endpoint? +To ensure everything works well, run `cargo xtask openapi generate`. Your +OpenAPI document should be generated on disk and listed in the output. ==== Performing extra validation [[extra_validation]] @@ -90,10 +59,19 @@ By default, the OpenAPI manager does basic validation on the generated document. It's best to put extra validation next to the trait, within the API crate. -. In the API crate, add dependencies on `anyhow` and `openapiv3`. -. Define a function with signature `fn extra_validation(openapi: &openapiv3::OpenAPI) -> anyhow::Result<()>` which performs the extra validation steps. +. In the API crate, add dependencies on `openapiv3` and `openapi-manager-types`. +. Define a function with signature `fn validate_api(spec: &openapiv3::OpenAPI, mut cx: openapi_manager_types::ValidationContext<'_>) which performs the extra validation steps. . In `all_apis`, set the `extra_validation` field to this function. +Currently, the validator can do two things: + +. Via the `ValidationContext::report_error` function, report validation errors. +. Via the `ValidationContext::record_file_contents` function, assert the contents of other generated files. + +(This can be made richer as needed.) + +For an example, see `validate_api` in the `nexus-external-api` crate. + == Design notes The OpenAPI manager uses the new support for Dropshot API traits described in https://rfd.shared.oxide.computer/rfd/0479[RFD 479]. diff --git a/dev-tools/openapi-manager/src/check.rs b/dev-tools/openapi-manager/src/check.rs index 182ed9fb19f..b43e43e7e57 100644 --- a/dev-tools/openapi-manager/src/check.rs +++ b/dev-tools/openapi-manager/src/check.rs @@ -5,17 +5,16 @@ use std::{io::Write, process::ExitCode}; use anyhow::Result; -use camino::Utf8Path; use indent_write::io::IndentWriter; use owo_colors::OwoColorize; use similar::TextDiff; use crate::{ output::{ - display_api_spec, display_error, display_summary, headers::*, plural, - write_diff, OutputOpts, Styles, + display_api_spec, display_api_spec_file, display_error, + display_summary, headers::*, plural, write_diff, OutputOpts, Styles, }, - spec::{all_apis, CheckStatus}, + spec::{all_apis, CheckStale, Environment}, FAILURE_EXIT_CODE, NEEDS_UPDATE_EXIT_CODE, }; @@ -37,7 +36,7 @@ impl CheckResult { } pub(crate) fn check_impl( - dir: &Utf8Path, + env: &Environment, output: &OutputOpts, ) -> Result { let mut styles = Styles::default(); @@ -48,6 +47,7 @@ pub(crate) fn check_impl( let all_apis = all_apis(); let total = all_apis.len(); let count_width = total.to_string().len(); + let count_section_indent = count_section_indent(count_width); let continued_indent = continued_indent(count_width); eprintln!("{:>HEADER_WIDTH$}", SEPARATOR); @@ -58,57 +58,89 @@ pub(crate) fn check_impl( total.style(styles.bold), plural::documents(total), ); - let mut num_up_to_date = 0; + let mut num_fresh = 0; let mut num_stale = 0; - let mut num_missing = 0; let mut num_failed = 0; for (ix, spec) in all_apis.iter().enumerate() { let count = ix + 1; - match spec.check(&dir) { - Ok(status) => match status { - CheckStatus::Ok(summary) => { - eprintln!( - "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}: {}", - UP_TO_DATE.style(styles.success_header), - display_api_spec(spec, &styles), - display_summary(&summary, &styles), - ); + match spec.check(env) { + Ok(status) => { + let total_errors = status.total_errors(); + let total_errors_width = total_errors.to_string().len(); + + if total_errors == 0 { + // Success case. + let extra = if status.extra_files_len() > 0 { + format!( + ", {} extra files", + status.extra_files_len().style(styles.bold) + ) + } else { + "".to_string() + }; - num_up_to_date += 1; - } - CheckStatus::Stale { full_path, actual, expected } => { eprintln!( - "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}", - STALE.style(styles.warning_header), + "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}: {}{extra}", + FRESH.style(styles.success_header), display_api_spec(spec, &styles), + display_summary(&status.summary, &styles), ); - let diff = TextDiff::from_lines(&actual, &expected); - write_diff( - &diff, - &full_path, - &styles, - // Add an indent to align diff with the status message. - &mut IndentWriter::new( - &continued_indent, - std::io::stderr(), - ), - )?; - - num_stale += 1; + num_fresh += 1; + continue; } - CheckStatus::Missing => { - eprintln!( - "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}", - MISSING.style(styles.warning_header), - display_api_spec(spec, &styles), - ); - num_missing += 1; + // Out of date: print errors. + eprintln!( + "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}", + STALE.style(styles.warning_header), + display_api_spec(spec, &styles), + ); + num_stale += 1; + + for (error_ix, (spec_file, error)) in + status.iter_errors().enumerate() + { + let error_count = error_ix + 1; + + let display_heading = |heading: &str| { + eprintln!( + "{:>HEADER_WIDTH$}{count_section_indent}\ + ({error_count:>total_errors_width$}/{total_errors}) {}", + heading.style(styles.warning_header), + display_api_spec_file(spec, spec_file, &styles), + ); + }; + + match error { + CheckStale::Modified { + full_path, + actual, + expected, + } => { + display_heading(MODIFIED); + + let diff = + TextDiff::from_lines(&**actual, &**expected); + write_diff( + &diff, + &full_path, + &styles, + // Add an indent to align diff with the status message. + &mut IndentWriter::new( + &continued_indent, + std::io::stderr(), + ), + )?; + } + CheckStale::New => { + display_heading(NEW); + } + } } - }, + } Err(error) => { eprint!( "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}", @@ -138,13 +170,12 @@ pub(crate) fn check_impl( }; eprintln!( - "{:>HEADER_WIDTH$} {} {} checked: {} up-to-date, {} stale, {} missing, {} failed", + "{:>HEADER_WIDTH$} {} {} checked: {} fresh, {} stale, {} failed", status_header, total.style(styles.bold), plural::documents(total), - num_up_to_date.style(styles.bold), + num_fresh.style(styles.bold), num_stale.style(styles.bold), - num_missing.style(styles.bold), num_failed.style(styles.bold), ); if num_failed > 0 { @@ -170,14 +201,14 @@ pub(crate) fn check_impl( mod tests { use std::process::ExitCode; - use crate::spec::find_openapi_dir; + use crate::spec::Environment; use super::*; #[test] fn check_apis_up_to_date() -> Result { let output = OutputOpts { color: clap::ColorChoice::Auto }; - let dir = find_openapi_dir()?; + let dir = Environment::new(None)?; let result = check_impl(&dir, &output)?; Ok(result.to_exit_code()) diff --git a/dev-tools/openapi-manager/src/dispatch.rs b/dev-tools/openapi-manager/src/dispatch.rs index 937a8b485f1..ca2989396f3 100644 --- a/dev-tools/openapi-manager/src/dispatch.rs +++ b/dev-tools/openapi-manager/src/dispatch.rs @@ -10,7 +10,7 @@ use clap::{Args, Parser, Subcommand}; use crate::{ check::check_impl, generate::generate_impl, list::list_impl, - output::OutputOpts, spec::openapi_dir, + output::OutputOpts, spec::Environment, }; /// Manage OpenAPI specifications. @@ -73,7 +73,7 @@ pub struct GenerateArgs { impl GenerateArgs { fn exec(self, output: &OutputOpts) -> anyhow::Result { - let dir = openapi_dir(self.dir)?; + let dir = Environment::new(self.dir)?; Ok(generate_impl(&dir, output)?.to_exit_code()) } } @@ -87,8 +87,8 @@ pub struct CheckArgs { impl CheckArgs { fn exec(self, output: &OutputOpts) -> anyhow::Result { - let dir = openapi_dir(self.dir)?; - Ok(check_impl(&dir, output)?.to_exit_code()) + let env = Environment::new(self.dir)?; + Ok(check_impl(&env, output)?.to_exit_code()) } } diff --git a/dev-tools/openapi-manager/src/generate.rs b/dev-tools/openapi-manager/src/generate.rs index f776ff27093..1cf9ebbb613 100644 --- a/dev-tools/openapi-manager/src/generate.rs +++ b/dev-tools/openapi-manager/src/generate.rs @@ -5,7 +5,6 @@ use std::{io::Write, process::ExitCode}; use anyhow::Result; -use camino::Utf8Path; use indent_write::io::IndentWriter; use owo_colors::OwoColorize; @@ -14,7 +13,7 @@ use crate::{ display_api_spec, display_error, display_summary, headers::*, plural, OutputOpts, Styles, }, - spec::{all_apis, OverwriteStatus}, + spec::{all_apis, Environment}, FAILURE_EXIT_CODE, }; @@ -34,7 +33,7 @@ impl GenerateResult { } pub(crate) fn generate_impl( - dir: &Utf8Path, + env: &Environment, output: &OutputOpts, ) -> Result { let mut styles = Styles::default(); @@ -62,27 +61,30 @@ pub(crate) fn generate_impl( for (ix, spec) in all_apis.iter().enumerate() { let count = ix + 1; - match spec.overwrite(&dir) { - Ok((status, summary)) => match status { - OverwriteStatus::Updated => { + match spec.overwrite(env) { + Ok(status) => { + let updated_count = status.updated_count(); + + if updated_count > 0 { eprintln!( - "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}: {}", + "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}: {} ({} {} updated)", UPDATED.style(styles.success_header), display_api_spec(spec, &styles), - display_summary(&summary, &styles), + display_summary(&status.summary, &styles), + updated_count.style(styles.bold), + plural::files(updated_count), ); num_updated += 1; - } - OverwriteStatus::Unchanged => { + } else { eprintln!( "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}: {}", UNCHANGED.style(styles.unchanged_header), display_api_spec(spec, &styles), - display_summary(&summary, &styles), + display_summary(&status.summary, &styles), ); num_unchanged += 1; } - }, + } Err(err) => { eprintln!( "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}", diff --git a/dev-tools/openapi-manager/src/output.rs b/dev-tools/openapi-manager/src/output.rs index 6cd578e7784..fee7f0f15cb 100644 --- a/dev-tools/openapi-manager/src/output.rs +++ b/dev-tools/openapi-manager/src/output.rs @@ -10,7 +10,7 @@ use indent_write::fmt::IndentWriter; use owo_colors::{OwoColorize, Style}; use similar::{ChangeTag, DiffableStr, TextDiff}; -use crate::spec::{ApiSpec, DocumentSummary}; +use crate::spec::{ApiSpec, ApiSpecFile, DocumentSummary}; #[derive(Debug, Args)] #[clap(next_help_heading = "Global options")] @@ -123,6 +123,21 @@ pub(crate) fn display_api_spec(spec: &ApiSpec, styles: &Styles) -> String { ) } +pub(crate) fn display_api_spec_file( + spec: &ApiSpec, + spec_file: ApiSpecFile<'_>, + styles: &Styles, +) -> String { + match spec_file { + ApiSpecFile::Openapi => { + format!("OpenAPI document {}", spec.filename.style(styles.filename)) + } + ApiSpecFile::Extra(path) => { + format!("Extra file {}", path.style(styles.filename)) + } + } +} + pub(crate) fn display_summary( summary: &DocumentSummary, styles: &Styles, @@ -201,9 +216,14 @@ pub(crate) mod headers { pub(crate) static CHECKING: &str = "Checking"; pub(crate) static GENERATING: &str = "Generating"; - pub(crate) static UP_TO_DATE: &str = "Up-to-date"; + pub(crate) static FRESH: &str = "Fresh"; + + // Stale encompasses: + // - Stale: the file on disk is different from what we generated. + // - Missing: the file on disk does not exist. pub(crate) static STALE: &str = "Stale"; - pub(crate) static MISSING: &str = "Missing"; + pub(crate) static NEW: &str = "-> New"; + pub(crate) static MODIFIED: &str = "-> Modified"; pub(crate) static UPDATED: &str = "Updated"; pub(crate) static UNCHANGED: &str = "Unchanged"; @@ -211,22 +231,38 @@ pub(crate) mod headers { pub(crate) static SUCCESS: &str = "Success"; pub(crate) static FAILURE: &str = "Failure"; - pub(crate) fn continued_indent(count_width: usize) -> String { + fn count_section_width(count_width: usize) -> usize { // Status strings are of the form: // // Generated [ 1/12] api.json: 1 path, 1 schema + // ^^^^^^^^^ // - // So the continued indent is: - // - // HEADER_WIDTH for the status string - // + (count_width * 2) for current and total counts + // So the width of the count section is: + // (count_width * 2) for current and total counts // + 3 for '[/]' // + 2 for spaces on either side. - " ".repeat(HEADER_WIDTH + count_width * 2 + 3 + 2) + count_width * 2 + 3 + 2 + } + + pub(crate) fn count_section_indent(count_width: usize) -> String { + " ".repeat(count_section_width(count_width)) + } + + pub(crate) fn continued_indent(count_width: usize) -> String { + // HEADER_WIDTH for the status string + count_section_width + " ".repeat(HEADER_WIDTH + count_section_width(count_width)) } } pub(crate) mod plural { + pub(crate) fn files(count: usize) -> &'static str { + if count == 1 { + "file" + } else { + "files" + } + } + pub(crate) fn documents(count: usize) -> &'static str { if count == 1 { "document" diff --git a/dev-tools/openapi-manager/src/spec.rs b/dev-tools/openapi-manager/src/spec.rs index 37a657ee93b..03511a7945c 100644 --- a/dev-tools/openapi-manager/src/spec.rs +++ b/dev-tools/openapi-manager/src/spec.rs @@ -9,6 +9,7 @@ use atomicwrites::AtomicFile; use camino::{Utf8Path, Utf8PathBuf}; use dropshot::{ApiDescription, ApiDescriptionBuildErrors, StubContext}; use fs_err as fs; +use openapi_manager_types::{ValidationBackend, ValidationContext}; use openapiv3::OpenAPI; /// All APIs managed by openapi-manager. @@ -24,6 +25,17 @@ pub fn all_apis() -> Vec { filename: "bootstrap-agent.json", extra_validation: None, }, + ApiSpec { + title: "ClickHouse Cluster Admin API", + version: "0.0.1", + description: "API for interacting with the Oxide \ + control plane's ClickHouse cluster", + boundary: ApiBoundary::Internal, + api_description: + clickhouse_admin_api::clickhouse_admin_api_mod::stub_api_description, + filename: "clickhouse-admin.json", + extra_validation: None, + }, ApiSpec { title: "CockroachDB Cluster Admin API", version: "0.0.1", @@ -67,6 +79,16 @@ pub fn all_apis() -> Vec { filename: "installinator.json", extra_validation: None, }, + ApiSpec { + title: "Oxide Region API", + version: "20240821.0", + description: "API for interacting with the Oxide control plane", + boundary: ApiBoundary::External, + api_description: + nexus_external_api::nexus_external_api_mod::stub_api_description, + filename: "nexus.json", + extra_validation: Some(nexus_external_api::validate_api), + }, ApiSpec { title: "Nexus internal API", version: "0.0.1", @@ -132,47 +154,64 @@ pub struct ApiSpec { pub filename: &'static str, /// Extra validation to perform on the OpenAPI spec, if any. - pub extra_validation: Option anyhow::Result<()>>, + pub extra_validation: Option)>, } impl ApiSpec { pub(crate) fn overwrite( &self, - dir: &Utf8Path, - ) -> Result<(OverwriteStatus, DocumentSummary)> { + env: &Environment, + ) -> Result { let contents = self.to_json_bytes()?; - let summary = self + let (summary, validation_result) = self .validate_json(&contents) .context("OpenAPI document validation failed")?; - let full_path = dir.join(&self.filename); - let status = overwrite_file(&full_path, &contents)?; - - Ok((status, summary)) + let full_path = env.openapi_dir.join(&self.filename); + let openapi_doc_status = overwrite_file(&full_path, &contents)?; + + let extra_files = validation_result + .extra_files + .into_iter() + .map(|(path, contents)| { + let full_path = env.workspace_root.join(&path); + let status = overwrite_file(&full_path, &contents)?; + Ok((path, status)) + }) + .collect::>()?; + + Ok(SpecOverwriteStatus { + summary, + openapi_doc: openapi_doc_status, + extra_files, + }) } - pub(crate) fn check(&self, dir: &Utf8Path) -> Result { + pub(crate) fn check(&self, env: &Environment) -> Result { let contents = self.to_json_bytes()?; - let summary = self + let (summary, validation_result) = self .validate_json(&contents) .context("OpenAPI document validation failed")?; - let full_path = dir.join(&self.filename); - let existing_contents = - read_opt(&full_path).context("failed to read contents on disk")?; - - match existing_contents { - Some(existing_contents) if existing_contents == contents => { - Ok(CheckStatus::Ok(summary)) - } - Some(existing_contents) => Ok(CheckStatus::Stale { - full_path, - actual: existing_contents, - expected: contents, - }), - None => Ok(CheckStatus::Missing), - } + let full_path = env.openapi_dir.join(&self.filename); + let openapi_doc_status = check_file(full_path, contents)?; + + let extra_files = validation_result + .extra_files + .into_iter() + .map(|(path, contents)| { + let full_path = env.workspace_root.join(&path); + let status = check_file(full_path, contents)?; + Ok((path, status)) + }) + .collect::>()?; + + Ok(SpecCheckStatus { + summary, + openapi_doc: openapi_doc_status, + extra_files, + }) } pub(crate) fn to_openapi_doc(&self) -> Result { @@ -205,7 +244,10 @@ impl ApiSpec { Ok(contents) } - fn validate_json(&self, contents: &[u8]) -> Result { + fn validate_json( + &self, + contents: &[u8], + ) -> Result<(DocumentSummary, ValidationResult)> { let openapi_doc = contents_to_openapi(contents) .context("JSON returned by ApiDescription is not valid OpenAPI")?; @@ -220,11 +262,51 @@ impl ApiSpec { return Err(anyhow::anyhow!("{}", errors.join("\n\n"))); } - if let Some(extra_validation) = self.extra_validation { - extra_validation(&openapi_doc)?; - } + let extra_files = if let Some(extra_validation) = self.extra_validation + { + let mut validation_context = + ValidationContextImpl { errors: Vec::new(), files: Vec::new() }; + extra_validation( + &openapi_doc, + ValidationContext::new(&mut validation_context), + ); + + if !validation_context.errors.is_empty() { + return Err(anyhow::anyhow!( + "OpenAPI document extended validation failed:\n{}", + validation_context + .errors + .iter() + .map(|e| e.to_string()) + .collect::>() + .join("\n") + )); + } + + validation_context.files + } else { + Vec::new() + }; + + Ok(( + DocumentSummary::new(&openapi_doc), + ValidationResult { extra_files }, + )) + } +} + +struct ValidationContextImpl { + errors: Vec, + files: Vec<(Utf8PathBuf, Vec)>, +} + +impl ValidationBackend for ValidationContextImpl { + fn report_error(&mut self, error: anyhow::Error) { + self.errors.push(error); + } - Ok(DocumentSummary::new(&openapi_doc)) + fn record_file_contents(&mut self, path: Utf8PathBuf, contents: Vec) { + self.files.push((path, contents)); } } @@ -249,6 +331,32 @@ impl fmt::Display for ApiBoundary { } } +#[derive(Debug)] +#[must_use] +pub(crate) struct SpecOverwriteStatus { + pub(crate) summary: DocumentSummary, + openapi_doc: OverwriteStatus, + extra_files: Vec<(Utf8PathBuf, OverwriteStatus)>, +} + +impl SpecOverwriteStatus { + pub(crate) fn updated_count(&self) -> usize { + self.iter() + .filter(|(_, status)| matches!(status, OverwriteStatus::Updated)) + .count() + } + + fn iter( + &self, + ) -> impl Iterator, &OverwriteStatus)> { + std::iter::once((ApiSpecFile::Openapi, &self.openapi_doc)).chain( + self.extra_files.iter().map(|(file_name, status)| { + (ApiSpecFile::Extra(file_name), status) + }), + ) + } +} + #[derive(Debug)] #[must_use] pub(crate) enum OverwriteStatus { @@ -256,12 +364,58 @@ pub(crate) enum OverwriteStatus { Unchanged, } +#[derive(Debug)] +#[must_use] +pub(crate) struct SpecCheckStatus { + pub(crate) summary: DocumentSummary, + pub(crate) openapi_doc: CheckStatus, + pub(crate) extra_files: Vec<(Utf8PathBuf, CheckStatus)>, +} + +impl SpecCheckStatus { + pub(crate) fn total_errors(&self) -> usize { + self.iter_errors().count() + } + + pub(crate) fn extra_files_len(&self) -> usize { + self.extra_files.len() + } + + pub(crate) fn iter_errors( + &self, + ) -> impl Iterator, &CheckStale)> { + std::iter::once((ApiSpecFile::Openapi, &self.openapi_doc)) + .chain(self.extra_files.iter().map(|(file_name, status)| { + (ApiSpecFile::Extra(file_name), status) + })) + .filter_map(|(spec_file, status)| { + if let CheckStatus::Stale(e) = status { + Some((spec_file, e)) + } else { + None + } + }) + } +} + +#[derive(Clone, Copy, Debug)] +pub(crate) enum ApiSpecFile<'a> { + Openapi, + Extra(&'a Utf8Path), +} + #[derive(Debug)] #[must_use] pub(crate) enum CheckStatus { - Ok(DocumentSummary), - Stale { full_path: Utf8PathBuf, actual: Vec, expected: Vec }, - Missing, + Fresh, + Stale(CheckStale), +} + +#[derive(Debug)] +#[must_use] +pub(crate) enum CheckStale { + Modified { full_path: Utf8PathBuf, actual: Vec, expected: Vec }, + New, } #[derive(Debug)] @@ -284,31 +438,45 @@ impl DocumentSummary { } } -pub(crate) fn openapi_dir(dir: Option) -> Result { - match dir { - Some(dir) => Ok(dir.canonicalize_utf8().with_context(|| { - format!("failed to canonicalize directory: {}", dir) - })?), - None => find_openapi_dir().context("failed to find openapi directory"), - } +#[derive(Debug)] +#[must_use] +struct ValidationResult { + // Extra files recorded by the validation context. + extra_files: Vec<(Utf8PathBuf, Vec)>, } -pub(crate) fn find_openapi_dir() -> Result { - let mut root = Utf8PathBuf::from(env!("CARGO_MANIFEST_DIR")); - // This crate is two levels down from the root of omicron, so go up twice. - root.pop(); - root.pop(); +pub(crate) struct Environment { + pub(crate) workspace_root: Utf8PathBuf, + pub(crate) openapi_dir: Utf8PathBuf, +} - root.push("openapi"); - let root = root.canonicalize_utf8().with_context(|| { - format!("failed to canonicalize openapi directory: {}", root) - })?; +impl Environment { + pub(crate) fn new(openapi_dir: Option) -> Result { + let mut root = Utf8PathBuf::from(env!("CARGO_MANIFEST_DIR")); + // This crate is two levels down from the root of omicron, so go up twice. + root.pop(); + root.pop(); - if !root.is_dir() { - anyhow::bail!("openapi root is not a directory: {}", root); - } + let workspace_root = root.canonicalize_utf8().with_context(|| { + format!("failed to canonicalize workspace root: {}", root) + })?; + + let openapi_dir = + openapi_dir.unwrap_or_else(|| workspace_root.join("openapi")); + let openapi_dir = + openapi_dir.canonicalize_utf8().with_context(|| { + format!( + "failed to canonicalize openapi directory: {}", + openapi_dir + ) + })?; + + if !openapi_dir.is_dir() { + anyhow::bail!("openapi root is not a directory: {}", root); + } - Ok(root) + Ok(Self { workspace_root, openapi_dir }) + } } /// Overwrite a file with new contents, if the contents are different. @@ -333,6 +501,29 @@ fn overwrite_file(path: &Utf8Path, contents: &[u8]) -> Result { Ok(OverwriteStatus::Updated) } +/// Check a file against expected contents. +fn check_file( + full_path: Utf8PathBuf, + contents: Vec, +) -> Result { + let existing_contents = + read_opt(&full_path).context("failed to read contents on disk")?; + + match existing_contents { + Some(existing_contents) if existing_contents == contents => { + Ok(CheckStatus::Fresh) + } + Some(existing_contents) => { + Ok(CheckStatus::Stale(CheckStale::Modified { + full_path, + actual: existing_contents, + expected: contents, + })) + } + None => Ok(CheckStatus::Stale(CheckStale::New)), + } +} + fn read_opt(path: &Utf8Path) -> std::io::Result>> { match fs::read(path) { Ok(contents) => Ok(Some(contents)), diff --git a/dev-tools/openapi-manager/types/Cargo.toml b/dev-tools/openapi-manager/types/Cargo.toml new file mode 100644 index 00000000000..262529f1a9b --- /dev/null +++ b/dev-tools/openapi-manager/types/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "openapi-manager-types" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +camino.workspace = true +omicron-workspace-hack.workspace = true diff --git a/dev-tools/openapi-manager/types/src/lib.rs b/dev-tools/openapi-manager/types/src/lib.rs new file mode 100644 index 00000000000..b48ea03e74e --- /dev/null +++ b/dev-tools/openapi-manager/types/src/lib.rs @@ -0,0 +1,12 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Shared types for the OpenAPI manager. +//! +//! API trait crates can depend on this crate to get access to interfaces +//! exposed by the OpenAPI manager. + +mod validation; + +pub use validation::*; diff --git a/dev-tools/openapi-manager/types/src/validation.rs b/dev-tools/openapi-manager/types/src/validation.rs new file mode 100644 index 00000000000..6f22228f4d5 --- /dev/null +++ b/dev-tools/openapi-manager/types/src/validation.rs @@ -0,0 +1,47 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use camino::Utf8PathBuf; + +/// Context for validation of OpenAPI specifications. +pub struct ValidationContext<'a> { + backend: &'a mut dyn ValidationBackend, +} + +impl<'a> ValidationContext<'a> { + /// Note part of the public API -- only called by the OpenAPI manager. + #[doc(hidden)] + pub fn new(backend: &'a mut dyn ValidationBackend) -> Self { + Self { backend } + } + + /// Reports a validation error. + pub fn report_error(&mut self, error: anyhow::Error) { + self.backend.report_error(error); + } + + /// Records that the file has the given contents. + /// + /// In check mode, if the files differ, an error is logged. + /// + /// In generate mode, the file is overwritten with the given contents. + /// + /// The path is treated as relative to the root of the repository. + pub fn record_file_contents( + &mut self, + path: impl Into, + contents: Vec, + ) { + self.backend.record_file_contents(path.into(), contents); + } +} + +/// The backend for validation. +/// +/// Not part of the public API -- only implemented by the OpenAPI manager. +#[doc(hidden)] +pub trait ValidationBackend { + fn report_error(&mut self, error: anyhow::Error); + fn record_file_contents(&mut self, path: Utf8PathBuf, contents: Vec); +} diff --git a/dev-tools/releng/src/main.rs b/dev-tools/releng/src/main.rs index ee649e79b20..264eec25032 100644 --- a/dev-tools/releng/src/main.rs +++ b/dev-tools/releng/src/main.rs @@ -143,6 +143,10 @@ struct Args { /// Path to a pre-built omicron-package binary (skips building if set) #[clap(long, env = "OMICRON_PACKAGE")] omicron_package_bin: Option, + + /// Build the helios OS image from local sources. + #[clap(long)] + helios_local: bool, } impl Args { @@ -286,7 +290,7 @@ async fn main() -> Result<()> { logger, "helios checkout at {0} is out-of-date; run \ `git pull -C {0}`, or run omicron-releng with \ - --ignore-helios-origin or --helios-path", + --ignore-helios-origin or --helios-dir", shell_words::quote(args.helios_dir.as_str()) ); preflight_ok = false; @@ -496,39 +500,42 @@ async fn main() -> Result<()> { Utc::now().format("%Y-%m-%d %H:%M") ); - // helios-build experiment-image - jobs.push_command( - format!("{}-image", target), - Command::new("ptime") - .arg("-m") - .arg(args.helios_dir.join("helios-build")) - .arg("experiment-image") - .arg("-o") // output directory for image - .arg(args.output_dir.join(format!("os-{}", target))) + let mut image_cmd = Command::new("ptime") + .arg("-m") + .arg(args.helios_dir.join("helios-build")) + .arg("experiment-image") + .arg("-o") // output directory for image + .arg(args.output_dir.join(format!("os-{}", target))) + .arg("-F") // pass extra image builder features + .arg(format!("optever={}", opte_version.trim())) + .arg("-P") // include all files from extra proto area + .arg(proto_dir.join("root")) + .arg("-N") // image name + .arg(image_name) + .arg("-s") // tempdir name suffix + .arg(target.as_str()) + .args(target.image_build_args()) + .current_dir(&args.helios_dir) + .env( + "IMAGE_DATASET", + match target { + Target::Host => &args.host_dataset, + Target::Recovery => &args.recovery_dataset, + }, + ) + .env_remove("CARGO") + .env_remove("RUSTUP_TOOLCHAIN"); + + if !args.helios_local { + image_cmd = image_cmd .arg("-p") // use an external package repository - .arg(format!("helios-dev={}", HELIOS_REPO)) - .arg("-F") // pass extra image builder features - .arg(format!("optever={}", opte_version.trim())) - .arg("-P") // include all files from extra proto area - .arg(proto_dir.join("root")) - .arg("-N") // image name - .arg(image_name) - .arg("-s") // tempdir name suffix - .arg(target.as_str()) - .args(target.image_build_args()) - .current_dir(&args.helios_dir) - .env( - "IMAGE_DATASET", - match target { - Target::Host => &args.host_dataset, - Target::Recovery => &args.recovery_dataset, - }, - ) - .env_remove("CARGO") - .env_remove("RUSTUP_TOOLCHAIN"), - ) - .after("helios-setup") - .after(format!("{}-proto", target)); + .arg(format!("helios-dev={HELIOS_REPO}")) + } + + // helios-build experiment-image + jobs.push_command(format!("{}-image", target), image_cmd) + .after("helios-setup") + .after(format!("{}-proto", target)); } // Build the recovery target after we build the host target. Only one // of these will build at a time since Cargo locks its target directory; diff --git a/dev-tools/xtask/Cargo.toml b/dev-tools/xtask/Cargo.toml index ec1b7825c61..508d0c73eeb 100644 --- a/dev-tools/xtask/Cargo.toml +++ b/dev-tools/xtask/Cargo.toml @@ -24,6 +24,7 @@ workspace = true # downstream binaries do depend on it.) anyhow.workspace = true camino.workspace = true +camino-tempfile.workspace = true cargo_toml = "0.20" cargo_metadata.workspace = true clap.workspace = true @@ -32,5 +33,6 @@ macaddr.workspace = true serde.workspace = true swrite.workspace = true tabled.workspace = true +textwrap.workspace = true toml.workspace = true usdt.workspace = true diff --git a/dev-tools/xtask/src/check_workspace_deps.rs b/dev-tools/xtask/src/check_workspace_deps.rs index 73d5643ffb3..a9627569b97 100644 --- a/dev-tools/xtask/src/check_workspace_deps.rs +++ b/dev-tools/xtask/src/check_workspace_deps.rs @@ -125,9 +125,6 @@ pub fn run_cmd() -> Result<()> { // Including xtask causes hakari to not work as well and build // times to be longer (omicron#4392). "xtask", - // The tests here should not be run by default, as they require - // a running control plane. - "end-to-end-tests", ] .contains(&package.name.as_str()) .then_some(&package.id) diff --git a/dev-tools/xtask/src/clippy.rs b/dev-tools/xtask/src/clippy.rs index 7924a05574a..229d0e126ea 100644 --- a/dev-tools/xtask/src/clippy.rs +++ b/dev-tools/xtask/src/clippy.rs @@ -4,7 +4,8 @@ //! Subcommand: cargo xtask clippy -use anyhow::{bail, Context, Result}; +use crate::common::run_subcmd; +use anyhow::Result; use clap::Parser; use std::process::Command; @@ -51,25 +52,5 @@ pub fn run_cmd(args: ClippyArgs) -> Result<()> { .arg("--deny") .arg("warnings"); - eprintln!( - "running: {:?} {}", - &cargo, - command - .get_args() - .map(|arg| format!("{:?}", arg.to_str().unwrap())) - .collect::>() - .join(" ") - ); - - let exit_status = command - .spawn() - .context("failed to spawn child process")? - .wait() - .context("failed to wait for child process")?; - - if !exit_status.success() { - bail!("clippy failed: {}", exit_status); - } - - Ok(()) + run_subcmd(command) } diff --git a/dev-tools/xtask/src/common.rs b/dev-tools/xtask/src/common.rs new file mode 100644 index 00000000000..03b17a560f2 --- /dev/null +++ b/dev-tools/xtask/src/common.rs @@ -0,0 +1,34 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Common xtask command helpers + +use anyhow::{bail, Context, Result}; +use std::process::Command; + +/// Runs the given command, printing some basic debug information around it, and +/// failing with an error message if the command does not exit successfully +pub fn run_subcmd(mut command: Command) -> Result<()> { + eprintln!( + "running: {} {}", + command.get_program().to_str().unwrap(), + command + .get_args() + .map(|arg| format!("{:?}", arg.to_str().unwrap())) + .collect::>() + .join(" ") + ); + + let exit_status = command + .spawn() + .context("failed to spawn child process")? + .wait() + .context("failed to wait for child process")?; + + if !exit_status.success() { + bail!("failed: {}", exit_status); + } + + Ok(()) +} diff --git a/dev-tools/xtask/src/live_tests.rs b/dev-tools/xtask/src/live_tests.rs new file mode 100644 index 00000000000..e63881a1fd3 --- /dev/null +++ b/dev-tools/xtask/src/live_tests.rs @@ -0,0 +1,159 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Subcommand: cargo xtask live-tests + +use crate::common::run_subcmd; +use anyhow::{bail, Context, Result}; +use clap::Parser; +use std::process::Command; + +#[derive(Parser)] +pub struct Args {} + +pub fn run_cmd(_args: Args) -> Result<()> { + const NAME: &str = "live-tests-archive"; + + // The live tests operate in deployed environments, which always run + // illumos. Bail out quickly if someone tries to run this on a system whose + // binaries won't be usable. (We could compile this subcommand out + // altogether on non-illumos systems, but it seems more confusing to be + // silently missing something you might expect to be there. Plus, you can + // still check and even build *this* code on non-illumos systems.) + if cfg!(not(target_os = "illumos")) { + bail!("live-tests archive can only be built on illumos systems"); + } + + let tmpdir_root = + camino_tempfile::tempdir().context("creating temporary directory")?; + let final_tarball = camino::Utf8PathBuf::try_from( + std::env::current_dir() + .map(|d| d.join("target")) + .context("getting current directory")?, + ) + .context("non-UTF-8 current directory")? + .join(format!("{}.tgz", NAME)); + let proto_root = tmpdir_root.path().join(NAME); + let nextest_archive_file = proto_root.join("omicron-live-tests.tar.zst"); + + eprintln!("using temporary directory: {}", tmpdir_root.path()); + eprintln!("will create archive file: {}", nextest_archive_file); + eprintln!("output tarball: {}", final_tarball); + eprintln!(); + + std::fs::create_dir(&proto_root) + .with_context(|| format!("mkdir {:?}", &proto_root))?; + + let cargo = + std::env::var("CARGO").unwrap_or_else(|_| String::from("cargo")); + let mut command = Command::new(&cargo); + + command.arg("nextest"); + command.arg("archive"); + command.arg("--package"); + command.arg("omicron-live-tests"); + command.arg("--archive-file"); + command.arg(&nextest_archive_file); + run_subcmd(command)?; + + // Using nextest archives requires that the source be separately transmitted + // to the system where the tests will be run. We're trying to automate + // that. So let's bundle up the source and the nextest archive into one big + // tarball. But which source files do we bundle? We need: + // + // - Cargo.toml (nextest expects to find this) + // - .config/nextest.toml (nextest's configuration, which is used while + // running the tests) + // - live-tests (this is where the tests live, and they might expect stuff + // that exists in here like expectorate files) + // + // plus the nextext archive file. + // + // To avoid creating a tarbomb, we want all the files prefixed with + // "live-tests-archive/". There's no great way to do this with the illumos + // tar(1) except to create a temporary directory called "live-tests-archive" + // that contains the files and then tar'ing up that. + // + // Ironically, an easy way to construct that directory is with tar(1). + let mut command = Command::new("bash"); + command.arg("-c"); + command.arg(format!( + "tar cf - Cargo.toml .config/nextest.toml live-tests | \ + tar xf - -C {:?}", + &proto_root + )); + run_subcmd(command)?; + + let mut command = Command::new("tar"); + command.arg("cf"); + command.arg(&final_tarball); + command.arg("-C"); + command.arg(tmpdir_root.path()); + command.arg(NAME); + run_subcmd(command)?; + + drop(tmpdir_root); + + eprint!("created: "); + println!("{}", &final_tarball); + eprintln!("\nTo use this:\n"); + eprintln!( + "1. Copy the tarball to the switch zone in a deployed Omicron system.\n" + ); + let raw = &[ + "scp \\", + &format!("{} \\", &final_tarball), + "root@YOUR_SCRIMLET_GZ_IP:/zone/oxz_switch/root/root", + ] + .join("\n"); + let text = textwrap::wrap( + &raw, + textwrap::Options::new(160) + .initial_indent(" e.g., ") + .subsequent_indent(" "), + ); + eprintln!("{}\n", text.join("\n")); + eprintln!("2. Copy the `cargo-nextest` binary to the same place.\n"); + let raw = &[ + "scp \\", + "$(which cargo-nextest) \\", + "root@YOUR_SCRIMLET_GZ_IP:/zone/oxz_switch/root/root", + ] + .join("\n"); + let text = textwrap::wrap( + &raw, + textwrap::Options::new(160) + .initial_indent(" e.g., ") + .subsequent_indent(" "), + ); + eprintln!("{}\n", text.join("\n")); + eprintln!("3. On that system, unpack the tarball with:\n"); + eprintln!(" tar xzf {}\n", final_tarball.file_name().unwrap()); + eprintln!("4. On that system, run tests with:\n"); + // TMPDIR=/var/tmp puts stuff on disk, cached as needed, rather than the + // default /tmp which requires that stuff be in-memory. That can lead to + // great sadness if the tests wind up writing a lot of data. + // + // nextest configuration for these tests is specified in the "live-tests" + // profile. + let raw = &[ + "TMPDIR=/var/tmp ./cargo-nextest nextest run --profile=live-tests \\", + &format!( + "--archive-file {}/{} \\", + NAME, + nextest_archive_file.file_name().unwrap() + ), + &format!("--workspace-remap {}", NAME), + ] + .join("\n"); + let text = textwrap::wrap( + &raw, + textwrap::Options::new(160) + .initial_indent(" ") + .subsequent_indent(" "), + ); + eprintln!("{}\n", text.join("\n")); + + Ok(()) +} diff --git a/dev-tools/xtask/src/main.rs b/dev-tools/xtask/src/main.rs index 02fd05a198c..9880adeb67b 100644 --- a/dev-tools/xtask/src/main.rs +++ b/dev-tools/xtask/src/main.rs @@ -16,8 +16,10 @@ use std::process::Command; mod check_features; mod check_workspace_deps; mod clippy; +mod common; #[cfg_attr(not(target_os = "illumos"), allow(dead_code))] mod external; +mod live_tests; mod usdt; #[cfg(target_os = "illumos")] @@ -59,6 +61,9 @@ enum Cmds { /// Download binaries, OpenAPI specs, and other out-of-repo utilities. Download(external::External), + /// Create a bundle of live tests + LiveTests(live_tests::Args), + /// Utilities for working with MGS. MgsDev(external::External), /// Utilities for working with Omicron. @@ -127,6 +132,7 @@ fn main() -> Result<()> { external.exec_bin("xtask-downloader") } } + Cmds::LiveTests(args) => live_tests::run_cmd(args), Cmds::MgsDev(external) => external.exec_bin("mgs-dev"), Cmds::OmicronDev(external) => external.exec_bin("omicron-dev"), Cmds::Openapi(external) => external.exec_bin("openapi-manager"), diff --git a/dev-tools/xtask/src/virtual_hardware.rs b/dev-tools/xtask/src/virtual_hardware.rs index d28c3d90379..29738016f54 100644 --- a/dev-tools/xtask/src/virtual_hardware.rs +++ b/dev-tools/xtask/src/virtual_hardware.rs @@ -49,6 +49,10 @@ enum Commands { #[clap(long, default_value = PXA_MAC_DEFAULT)] pxa_mac: String, + + /// Size in bytes for created vdevs + #[clap(long, default_value_t = 20 * GB)] + vdev_size: u64, }, /// Destroy virtual hardware which was initialized with "Create" Destroy, @@ -96,7 +100,6 @@ pub struct Args { static NO_INSTALL_MARKER: &'static str = "/etc/opt/oxide/NO_INSTALL"; const GB: u64 = 1 << 30; -const VDEV_SIZE: u64 = 20 * GB; const ARP: &'static str = "/usr/sbin/arp"; const DLADM: &'static str = "/usr/sbin/dladm"; @@ -163,6 +166,7 @@ pub fn run_cmd(args: Args) -> Result<()> { gateway_mac, pxa, pxa_mac, + vdev_size, } => { let physical_link = if let Some(l) = physical_link { l @@ -172,7 +176,7 @@ pub fn run_cmd(args: Args) -> Result<()> { println!("creating virtual hardware"); if matches!(args.scope, Scope::All | Scope::Disks) { - ensure_vdevs(&sled_agent_config, &args.vdev_dir)?; + ensure_vdevs(&sled_agent_config, &args.vdev_dir, vdev_size)?; } if matches!(args.scope, Scope::All | Scope::Network) && softnpu_mode == "zone" @@ -503,6 +507,7 @@ impl SledAgentConfig { fn ensure_vdevs( sled_agent_config: &Utf8Path, vdev_dir: &Utf8Path, + vdev_size: u64, ) -> Result<()> { let config = SledAgentConfig::read(sled_agent_config)?; @@ -522,7 +527,7 @@ fn ensure_vdevs( } else { println!("creating {vdev_path}"); let file = std::fs::File::create(&vdev_path)?; - file.set_len(VDEV_SIZE)?; + file.set_len(vdev_size)?; } } Ok(()) diff --git a/dns-server/Cargo.toml b/dns-server/Cargo.toml index d11dabaf850..b4516b8b77c 100644 --- a/dns-server/Cargo.toml +++ b/dns-server/Cargo.toml @@ -15,24 +15,24 @@ clap.workspace = true dns-server-api.workspace = true dns-service-client.workspace = true dropshot.workspace = true +hickory-client.workspace = true +hickory-proto.workspace = true +hickory-resolver.workspace = true +hickory-server.workspace = true http.workspace = true pretty-hex.workspace = true schemars.workspace = true serde.workspace = true serde_json.workspace = true sled.workspace = true -slog.workspace = true -slog-term.workspace = true slog-async.workspace = true slog-envlogger.workspace = true +slog-term.workspace = true +slog.workspace = true tempfile.workspace = true thiserror.workspace = true tokio = { workspace = true, features = [ "full" ] } toml.workspace = true -trust-dns-client.workspace = true -trust-dns-proto.workspace = true -trust-dns-resolver.workspace = true -trust-dns-server.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true @@ -44,4 +44,3 @@ openapiv3.workspace = true openapi-lint.workspace = true serde_json.workspace = true subprocess.workspace = true -trust-dns-resolver.workspace = true diff --git a/dns-server/src/bin/dns-server.rs b/dns-server/src/bin/dns-server.rs index 52a9c17c0dc..9e8d098ee2f 100644 --- a/dns-server/src/bin/dns-server.rs +++ b/dns-server/src/bin/dns-server.rs @@ -3,7 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. //! Executable that starts the HTTP-configurable DNS server used for both -//! internal DNS (RFD 248) and extenral DNS (RFD 357) for the Oxide system +//! internal DNS (RFD 248) and external DNS (RFD 357) for the Oxide system use anyhow::anyhow; use anyhow::Context; diff --git a/dns-server/src/dns_server.rs b/dns-server/src/dns_server.rs index 5c761f2aa3f..4ecbe382c8f 100644 --- a/dns-server/src/dns_server.rs +++ b/dns-server/src/dns_server.rs @@ -13,6 +13,19 @@ use crate::storage::Store; use anyhow::anyhow; use anyhow::Context; use dns_server_api::DnsRecord; +use hickory_proto::op::Header; +use hickory_proto::op::ResponseCode; +use hickory_proto::rr::rdata::SRV; +use hickory_proto::rr::RData; +use hickory_proto::rr::Record; +use hickory_proto::rr::RecordType; +use hickory_proto::serialize::binary::BinDecodable; +use hickory_proto::serialize::binary::BinDecoder; +use hickory_proto::serialize::binary::BinEncoder; +use hickory_resolver::Name; +use hickory_server::authority::MessageRequest; +use hickory_server::authority::MessageResponse; +use hickory_server::authority::MessageResponseBuilder; use pretty_hex::*; use serde::Deserialize; use slog::{debug, error, info, o, trace, Logger}; @@ -21,17 +34,6 @@ use std::str::FromStr; use std::sync::Arc; use thiserror::Error; use tokio::net::UdpSocket; -use trust_dns_proto::op::header::Header; -use trust_dns_proto::op::response_code::ResponseCode; -use trust_dns_proto::rr::rdata::SRV; -use trust_dns_proto::rr::record_data::RData; -use trust_dns_proto::rr::record_type::RecordType; -use trust_dns_proto::rr::{Name, Record}; -use trust_dns_proto::serialize::binary::{ - BinDecodable, BinDecoder, BinEncoder, -}; -use trust_dns_server::authority::MessageResponse; -use trust_dns_server::authority::{MessageRequest, MessageResponseBuilder}; use uuid::Uuid; /// Configuration related to the DNS server @@ -167,7 +169,10 @@ async fn handle_dns_packet(request: Request) { Err(error) => { let header = Header::response_from_request(mr.header()); let rb_servfail = MessageResponseBuilder::from_message_request(&mr); - error!(log, "failed to handle incoming DNS message: {:#}", error); + error!( + log, + "failed to handle incoming DNS message: {:#?} {:#}", mr, error + ); match error { RequestError::NxDomain(_) => { let rb_nxdomain = @@ -222,7 +227,7 @@ fn dns_record_to_record( let mut a = Record::new(); a.set_name(name.clone()) .set_rr_type(RecordType::A) - .set_data(Some(RData::A(addr))); + .set_data(Some(RData::A(addr.into()))); Ok(a) } @@ -230,7 +235,7 @@ fn dns_record_to_record( let mut aaaa = Record::new(); aaaa.set_name(name.clone()) .set_rr_type(RecordType::AAAA) - .set_data(Some(RData::AAAA(addr))); + .set_data(Some(RData::AAAA(addr.into()))); Ok(aaaa) } diff --git a/dns-server/src/lib.rs b/dns-server/src/lib.rs index 424159e41d2..8abd3b945eb 100644 --- a/dns-server/src/lib.rs +++ b/dns-server/src/lib.rs @@ -47,13 +47,13 @@ pub mod http_server; pub mod storage; use anyhow::{anyhow, Context}; +use hickory_resolver::config::NameServerConfig; +use hickory_resolver::config::Protocol; +use hickory_resolver::config::ResolverConfig; +use hickory_resolver::config::ResolverOpts; +use hickory_resolver::TokioAsyncResolver; use slog::o; use std::net::SocketAddr; -use trust_dns_resolver::config::NameServerConfig; -use trust_dns_resolver::config::Protocol; -use trust_dns_resolver::config::ResolverConfig; -use trust_dns_resolver::config::ResolverOpts; -use trust_dns_resolver::TokioAsyncResolver; /// Starts both the HTTP and DNS servers over a given store. pub async fn start_servers( @@ -167,12 +167,14 @@ impl TransientServer { socket_addr: self.dns_server.local_address(), protocol: Protocol::Udp, tls_dns_name: None, - trust_nx_responses: false, + trust_negative_responses: false, bind_addr: None, }); + let mut resolver_opts = ResolverOpts::default(); + // Enable edns for potentially larger records + resolver_opts.edns0 = true; let resolver = - TokioAsyncResolver::tokio(resolver_config, ResolverOpts::default()) - .context("creating DNS resolver")?; + TokioAsyncResolver::tokio(resolver_config, resolver_opts); Ok(resolver) } } diff --git a/dns-server/src/storage.rs b/dns-server/src/storage.rs index 85b2e79b8b9..b3141f6751b 100644 --- a/dns-server/src/storage.rs +++ b/dns-server/src/storage.rs @@ -95,6 +95,8 @@ use anyhow::{anyhow, Context}; use camino::Utf8PathBuf; use dns_server_api::{DnsConfig, DnsConfigParams, DnsConfigZone, DnsRecord}; +use hickory_proto::rr::LowerName; +use hickory_resolver::Name; use serde::{Deserialize, Serialize}; use sled::transaction::ConflictableTransactionError; use slog::{debug, error, info, o, warn}; @@ -104,8 +106,6 @@ use std::sync::atomic::Ordering; use std::sync::Arc; use thiserror::Error; use tokio::sync::Mutex; -use trust_dns_client::rr::LowerName; -use trust_dns_client::rr::Name; const KEY_CONFIG: &'static str = "config"; @@ -586,7 +586,7 @@ impl Store { /// If the returned set would have been empty, returns `QueryError::NoName`. pub(crate) fn query( &self, - mr: &trust_dns_server::authority::MessageRequest, + mr: &hickory_server::authority::MessageRequest, ) -> Result, QueryError> { let name = mr.query().name(); let orig_name = mr.query().original().name(); @@ -784,14 +784,14 @@ mod test { use dns_server_api::DnsConfigParams; use dns_server_api::DnsConfigZone; use dns_server_api::DnsRecord; + use hickory_proto::rr::LowerName; + use hickory_resolver::Name; use omicron_test_utils::dev::test_setup_log; use std::collections::BTreeSet; use std::collections::HashMap; use std::net::Ipv6Addr; use std::str::FromStr; use std::sync::Arc; - use trust_dns_client::rr::LowerName; - use trust_dns_client::rr::Name; /// As usual, `TestContext` groups the various pieces we need in a bunch of /// our tests and helps make sure they get cleaned up properly. diff --git a/dns-server/tests/basic_test.rs b/dns-server/tests/basic_test.rs index b3b7f37378a..fa5bfea4684 100644 --- a/dns-server/tests/basic_test.rs +++ b/dns-server/tests/basic_test.rs @@ -9,6 +9,12 @@ use dns_service_client::{ Client, }; use dropshot::{test_util::LogContext, HandlerTaskMode}; +use hickory_resolver::error::ResolveErrorKind; +use hickory_resolver::TokioAsyncResolver; +use hickory_resolver::{ + config::{NameServerConfig, Protocol, ResolverConfig, ResolverOpts}, + proto::op::ResponseCode, +}; use omicron_test_utils::dev::test_setup_log; use slog::o; use std::{ @@ -16,12 +22,6 @@ use std::{ net::Ipv6Addr, net::{IpAddr, Ipv4Addr}, }; -use trust_dns_resolver::error::ResolveErrorKind; -use trust_dns_resolver::TokioAsyncResolver; -use trust_dns_resolver::{ - config::{NameServerConfig, Protocol, ResolverConfig, ResolverOpts}, - proto::op::ResponseCode, -}; const TEST_ZONE: &'static str = "oxide.internal"; @@ -374,17 +374,19 @@ async fn init_client_server( ) .await?; - let mut rc = ResolverConfig::new(); - rc.add_name_server(NameServerConfig { + let mut resolver_config = ResolverConfig::new(); + resolver_config.add_name_server(NameServerConfig { socket_addr: dns_server.local_address(), protocol: Protocol::Udp, tls_dns_name: None, - trust_nx_responses: false, + trust_negative_responses: false, bind_addr: None, }); + let mut resolver_opts = ResolverOpts::default(); + // Enable edns for potentially larger records + resolver_opts.edns0 = true; - let resolver = - TokioAsyncResolver::tokio(rc, ResolverOpts::default()).unwrap(); + let resolver = TokioAsyncResolver::tokio(resolver_config, resolver_opts); let client = Client::new(&format!("http://{}", dropshot_server.local_addr()), log); diff --git a/docs/adding-an-endpoint.adoc b/docs/adding-an-endpoint.adoc index cebaae4c525..d9e5c559b40 100644 --- a/docs/adding-an-endpoint.adoc +++ b/docs/adding-an-endpoint.adoc @@ -12,17 +12,21 @@ NOTE: This guide is not intended to be exhaustive, or even particularly detailed. For that, refer to the documentation which exists in the codebase -- this document should act as a jumping-off point. -=== **HTTP** -* Add endpoints for either the internal or external API -** xref:../nexus/src/external_api/http_entrypoints.rs[The External API] is customer-facing, and provides interfaces for both developers and operators -** xref:../nexus/src/internal_api/http_entrypoints.rs[The Internal API] is internal, and provides interfaces for services on the Oxide rack (such as the Sled Agent) to call -** Register endpoints in the `register_endpoints` method (https://github.com/oxidecomputer/omicron/blob/1dfe47c1b3122bc4f32a9c517cb31b1600581ea2/nexus/src/external_api/http_entrypoints.rs#L84[Example]) +== **HTTP** + +* Add endpoint _definitions_ for either the internal or external API +** xref:../nexus/external-api/src/lib.rs[The External API] is customer-facing, and provides interfaces for both developers and operators +** xref:../nexus/internal-api/src/lib.rs[The Internal API] is internal, and provides interfaces for services on the Oxide rack (such as the Sled Agent) to call +* Add the corresponding _implementations_ to the respective `http_entrypoints.rs` files: +** xref:../nexus/src/external_api/http_entrypoints.rs[The External API's `http_entrypoints.rs`] +** xref:../nexus/src/internal_api/http_entrypoints.rs[The Internal API's `http_entrypoints.rs`] ** These endpoints typically call into the *Application* layer, and do not access the database directly * Inputs and Outputs ** Input parameters are defined in https://github.com/oxidecomputer/omicron/blob/main/nexus/types/src/external_api/params.rs[params.rs] (https://github.com/oxidecomputer/omicron/blob/1dfe47c1b3122bc4f32a9c517cb31b1600581ea2/nexus/types/src/external_api/params.rs#L587-L601[Example]) ** Output views are defined in https://github.com/oxidecomputer/omicron/blob/main/nexus/types/src/external_api/views.rs[views.rs] (https://github.com/oxidecomputer/omicron/blob/1dfe47c1b3122bc4f32a9c517cb31b1600581ea2/nexus/types/src/external_api/views.rs#L270-L274[Example]) -=== **Lookup & Authorization** +== **Lookup & Authorization** + * Declare a new resource-to-be-looked-up via `lookup_resource!` in xref:../nexus/src/db/lookup.rs[lookup.rs] (https://github.com/oxidecomputer/omicron/blob/1dfe47c1b3122bc4f32a9c517cb31b1600581ea2/nexus/src/db/lookup.rs#L557-L564[Example]) ** This defines a new struct named after your resource, with some https://github.com/oxidecomputer/omicron/blob/1dfe47c1b3122bc4f32a9c517cb31b1600581ea2/nexus/db-macros/src/lookup.rs#L521-L628[auto-generated methods], including `lookup_for` (look up the authz object), `fetch_for` (look up and return the object), and more * Add helper functions to `LookupPath` to make it possible to fetch the resource by either UUID or name (https://github.com/oxidecomputer/omicron/blob/1dfe47c1b3122bc4f32a9c517cb31b1600581ea2/nexus/src/db/lookup.rs#L225-L237[Example]) @@ -32,12 +36,14 @@ this document should act as a jumping-off point. ** If you define `polar_snippet = Custom`, you should edit the omicron.polar file to describe the authorization policy for your object (https://github.com/oxidecomputer/omicron/blob/1dfe47c1b3122bc4f32a9c517cb31b1600581ea2/nexus/src/authz/omicron.polar#L376-L393[Example]) * Either way, you should add reference the new resource when https://github.com/oxidecomputer/omicron/blob/1dfe47c1b3122bc4f32a9c517cb31b1600581ea2/nexus/src/authz/oso_generic.rs#L119-L148[constructing the Oso structure] -=== **Application** +== **Application** + * Add any "business logic" for the resource to xref:../nexus/src/app[the app directory] * This layer bridges the gap between the database and external services. * If your application logic involes any multi-step operations which would be interrupted by Nexus stopping mid-execution (due to reboot, crash, failure, etc), it is recommended to use a https://github.com/oxidecomputer/omicron/tree/1dfe47c1b3122bc4f32a9c517cb31b1600581ea2/nexus/src/app/sagas[saga] to define the operations durably. -=== **Database** +== **Database** + * `CREATE TABLE` for the resource in xref:../schema/crdb/dbinit.sql[dbinit.sql] (https://github.com/oxidecomputer/omicron/blob/1dfe47c1b3122bc4f32a9c517cb31b1600581ea2/common/src/sql/dbinit.sql#L1103-L1129[Example]) * Add an equivalent schema for the resource in xref:../nexus/db-model/src/schema.rs[schema.rs], which allows https://docs.diesel.rs/master/diesel/index.html[Diesel] to translate raw SQL to rust queries (https://github.com/oxidecomputer/omicron/blob/1dfe47c1b3122bc4f32a9c517cb31b1600581ea2/nexus/db-model/src/schema.rs#L144-L155[Example]) * Add a Rust representation of the database object to xref:../nexus/db-model/src[the DB model] (https://github.com/oxidecomputer/omicron/blob/1dfe47c1b3122bc4f32a9c517cb31b1600581ea2/nexus/db-model/src/ip_pool.rs#L24-L40[Example]) @@ -48,22 +54,10 @@ this document should act as a jumping-off point. * Authorization ** There exists a https://github.com/oxidecomputer/omicron/blob/main/nexus/src/authz/policy_test[policy test] which compares all Oso objects against an expected policy. New resources are usually added to https://github.com/oxidecomputer/omicron/blob/main/nexus/src/authz/policy_test/resources.rs[resources.rs] to get coverage. -* openapi -** Nexus generates a new openapi spec from the dropshot endpoints. If you modify endpoints, you'll need to update openapi JSON files. -*** The following commands may be used to update APIs: -+ -[source, rust] ----- -$ cargo run -p omicron-nexus --bin nexus -- -I nexus/examples/config.toml > openapi/nexus-internal.json -$ cargo run -p omicron-nexus --bin nexus -- -O nexus/examples/config.toml > openapi/nexus.json -$ cargo run -p omicron-sled-agent --bin sled-agent -- openapi > openapi/sled-agent.json ----- -*** Alternative, you can run: -+ -[source, rust] ----- -$ EXPECTORATE=overwrite cargo test_nexus_openapi test_nexus_openapi_internal test_sled_agent_openapi_sled ----- +* OpenAPI +** Once you've added or changed endpoint definitions in `nexus-external-api` or `nexus-internal-api`, you'll need to update the corresponding OpenAPI documents (the JSON files in `openapi/`). +** To update all OpenAPI documents, run `cargo xtask openapi generate`. +** This does not require you to provide an implementation, or to get either omicron-nexus or omicron-sled-agent to compile: just the definition in the API crate is sufficient. * Integration Tests ** Nexus' https://github.com/oxidecomputer/omicron/tree/main/nexus/tests/integration_tests[integration tests] are used to cross the HTTP interface for testing. Typically, one file is used "per-resource". *** These tests use a simulated Sled Agent, and keep the "Nexus" object in-process, so it can still be accessed and modified for invasive testing. diff --git a/docs/crdb-upgrades.adoc b/docs/crdb-upgrades.adoc index eecfa9194e9..66133454653 100644 --- a/docs/crdb-upgrades.adoc +++ b/docs/crdb-upgrades.adoc @@ -60,13 +60,15 @@ a tick, but they must occur in that order.) . Add an enum variant for the new version to `CockroachDbClusterVersion` in `nexus/types/src/deployment/planning_input.rs`, and change the associated constant `NEWLY_INITIALIZED` to that value. -. Run the test suite, which should catch any unexpected SQL +. Regenerate the Nexus internal OpenAPI document, which contains an enum + of CockroachDB versions: ++ +.... +cargo xtask openapi generate +.... +. Run the full test suite, which should catch any unexpected SQL compatibility issues between releases and help validate that your build works. - * You will need to run the `test_omdb_success_cases` test from - omicron-omdb with `EXPECTORATE=overwrite`; this file contains the - expected output of various omdb commands, including a fingerprint of - CockroachDB's cluster state. . Submit a PR for your changes to garbage-compactor; when merged, publish the final build to the `oxide-cockroachdb-build` S3 bucket. . Update `tools/cockroachdb_checksums`. For non-illumos checksums, use diff --git a/end-to-end-tests/Cargo.toml b/end-to-end-tests/Cargo.toml index eb7cd68812f..044f70ef235 100644 --- a/end-to-end-tests/Cargo.toml +++ b/end-to-end-tests/Cargo.toml @@ -19,14 +19,14 @@ omicron-test-utils.workspace = true oxide-client.workspace = true rand.workspace = true reqwest = { workspace = true, features = ["cookies"] } -russh = "0.43.0" -russh-keys = "0.43.0" +russh = "0.45.0" +russh-keys = "0.45.0" serde.workspace = true serde_json.workspace = true sled-agent-types.workspace = true tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } toml.workspace = true -trust-dns-resolver.workspace = true +hickory-resolver.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true ispf.workspace = true diff --git a/end-to-end-tests/README.adoc b/end-to-end-tests/README.adoc index b9766db8097..3e31f2b3829 100644 --- a/end-to-end-tests/README.adoc +++ b/end-to-end-tests/README.adoc @@ -4,6 +4,8 @@ These tests run in Buildomat. They are built by the xref:../.github/buildomat/jo This package is not built or run by default (it is excluded from `default-members` in xref:../Cargo.toml[]). +See also: xref:../live-tests/README.adoc[omicron-live-tests]. + == Running these tests on your machine 1. xref:../docs/how-to-run.adoc[Make yourself a Gimlet]. diff --git a/end-to-end-tests/src/helpers/ctx.rs b/end-to-end-tests/src/helpers/ctx.rs index d9a2d7027aa..53635575026 100644 --- a/end-to-end-tests/src/helpers/ctx.rs +++ b/end-to-end-tests/src/helpers/ctx.rs @@ -1,6 +1,7 @@ use crate::helpers::generate_name; use anyhow::{anyhow, Context as _, Result}; use chrono::Utc; +use hickory_resolver::error::ResolveErrorKind; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use oxide_client::types::{Name, ProjectCreate}; use oxide_client::CustomDnsResolver; @@ -13,7 +14,6 @@ use std::net::IpAddr; use std::net::SocketAddr; use std::sync::Arc; use std::time::Duration; -use trust_dns_resolver::error::ResolveErrorKind; use uuid::Uuid; const RSS_CONFIG_STR: &str = include_str!(concat!( diff --git a/gateway-test-utils/configs/config.test.toml b/gateway-test-utils/configs/config.test.toml index 79975f4611a..4e3e9c6e6e9 100644 --- a/gateway-test-utils/configs/config.test.toml +++ b/gateway-test-utils/configs/config.test.toml @@ -88,6 +88,15 @@ addr = "[::1]:0" ignition-target = 3 location = { switch0 = ["sled", 1], switch1 = ["sled", 1] } +# +# Configuration for SP sensor metrics polling +# +[metrics] +# Allow the Oximeter metrics endpoint to bind on the loopback IP. This is +# useful in local testing and development, when the gateway service is not +# given a "real" underlay network IP. +dev_bind_loopback = true + # # NOTE: for the test suite, if mode = "file", the file path MUST be the sentinel # string "UNUSED". The actual path will be generated by the test suite for each diff --git a/gateway-test-utils/configs/sp_sim_config.test.toml b/gateway-test-utils/configs/sp_sim_config.test.toml index cc08eec30b7..4f370a167c8 100644 --- a/gateway-test-utils/configs/sp_sim_config.test.toml +++ b/gateway-test-utils/configs/sp_sim_config.test.toml @@ -20,6 +20,9 @@ device = "fake-tmp-sensor" description = "FAKE temperature sensor 1" capabilities = 0x2 presence = "Present" +sensors = [ + {name = "Southwest", kind = "Temperature", last_data.value = 41.7890625, last_data.timestamp = 1234 }, +] [[simulated_sps.sidecar.components]] id = "dev-1" @@ -27,6 +30,9 @@ device = "fake-tmp-sensor" description = "FAKE temperature sensor 2" capabilities = 0x2 presence = "Failed" +sensors = [ + { name = "South", kind = "Temperature", last_error.value = "DeviceError", last_error.timestamp = 1234 }, +] [[simulated_sps.sidecar]] multicast_addr = "::1" @@ -56,6 +62,82 @@ device = "fake-tmp-sensor" description = "FAKE temperature sensor" capabilities = 0x2 presence = "Failed" +sensors = [ + { name = "Southwest", kind = "Temperature", last_error.value = "DeviceError", last_error.timestamp = 1234 }, +] +[[simulated_sps.gimlet.components]] +id = "dev-1" +device = "tmp117" +description = "FAKE temperature sensor" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "South", kind = "Temperature", last_data.value = 42.5625, last_data.timestamp = 1234 }, +] + +[[simulated_sps.gimlet.components]] +id = "dev-2" +device = "tmp117" +description = "FAKE Southeast temperature sensor" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "Southeast", kind = "Temperature", last_data.value = 41.570313, last_data.timestamp = 1234 }, +] + +[[simulated_sps.gimlet.components]] +id = "dev-6" +device = "at24csw080" +description = "FAKE U.2 Sharkfin A VPD" +capabilities = 0x0 +presence = "Present" + +[[simulated_sps.gimlet.components]] +id = "dev-7" +device = "max5970" +description = "FAKE U.2 Sharkfin A hot swap controller" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "V12_U2A_A0", kind = "Current", last_data.value = 0.45898438, last_data.timestamp = 1234 }, + { name = "V3P3_U2A_A0", kind = "Current", last_data.value = 0.024414063, last_data.timestamp = 1234 }, + { name = "V12_U2A_A0", kind = "Voltage", last_data.value = 12.03125, last_data.timestamp = 1234 }, + { name = "V3P3_U2A_A0", kind = "Voltage", last_data.value = 3.328125, last_data.timestamp = 1234 }, +] + +[[simulated_sps.gimlet.components]] +id = "dev-8" +device = "nvme_bmc" +description = "FAKE U.2 A NVMe Basic Management Command" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "U2_N0", kind = "Temperature", last_data.value = 56.0, last_data.timestamp = 1234 }, +] +[[simulated_sps.gimlet.components]] +id = "dev-39" +device = "tmp451" +description = "FAKE T6 temperature sensor" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "t6", kind = "Temperature", last_data.value = 70.625, last_data.timestamp = 1234 }, +] +[[simulated_sps.gimlet.components]] +id = "dev-53" +device = "max31790" +description = "FAKE Fan controller" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "Southeast", kind = "Speed", last_data.value = 2607.0, last_data.timestamp = 1234 }, + { name = "Northeast", kind = "Speed", last_data.value = 2476.0, last_data.timestamp = 1234 }, + { name = "South", kind = "Speed", last_data.value = 2553.0, last_data.timestamp = 1234 }, + { name = "North", kind = "Speed", last_data.value = 2265.0, last_data.timestamp = 1234 }, + { name = "Southwest", kind = "Speed", last_data.value = 2649.0, last_data.timestamp = 1234 }, + { name = "Northwest", kind = "Speed", last_data.value = 2275.0, last_data.timestamp = 1234 }, +] + [[simulated_sps.gimlet]] multicast_addr = "::1" @@ -72,6 +154,90 @@ capabilities = 0 presence = "Present" serial_console = "[::1]:0" + +[[simulated_sps.gimlet.components]] +id = "dev-0" +device = "tmp117" +description = "FAKE temperature sensor" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "Southwest", kind = "Temperature", last_data.value = 41.3629, last_data.timestamp = 1234 }, +] +[[simulated_sps.gimlet.components]] +id = "dev-1" +device = "tmp117" +description = "FAKE temperature sensor" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "South", kind = "Temperature", last_data.value = 42.5625, last_data.timestamp = 1234 }, +] + +[[simulated_sps.gimlet.components]] +id = "dev-2" +device = "tmp117" +description = "FAKE Southeast temperature sensor" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "Southeast", kind = "Temperature", last_data.value = 41.570313, last_data.timestamp = 1234 }, +] + +[[simulated_sps.gimlet.components]] +id = "dev-6" +device = "at24csw080" +description = "FAKE U.2 Sharkfin A VPD" +capabilities = 0x0 +presence = "Present" + +[[simulated_sps.gimlet.components]] +id = "dev-7" +device = "max5970" +description = "FAKE U.2 Sharkfin A hot swap controller" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "V12_U2A_A0", kind = "Current", last_data.value = 0.41893438, last_data.timestamp = 1234 }, + { name = "V3P3_U2A_A0", kind = "Current", last_data.value = 0.025614603, last_data.timestamp = 1234 }, + { name = "V12_U2A_A0", kind = "Voltage", last_data.value = 12.02914, last_data.timestamp = 1234 }, + { name = "V3P3_U2A_A0", kind = "Voltage", last_data.value = 3.2618, last_data.timestamp = 1234 }, +] + +[[simulated_sps.gimlet.components]] +id = "dev-8" +device = "nvme_bmc" +description = "FAKE U.2 A NVMe Basic Management Command" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "U2_N0", kind = "Temperature", last_data.value = 56.0, last_data.timestamp = 1234 }, +] +[[simulated_sps.gimlet.components]] +id = "dev-39" +device = "tmp451" +description = "FAKE T6 temperature sensor" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "t6", kind = "Temperature", last_data.value = 70.625, last_data.timestamp = 1234 }, +] +[[simulated_sps.gimlet.components]] +id = "dev-53" +device = "max31790" +description = "FAKE Fan controller" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "Southeast", kind = "Speed", last_data.value = 2510.0, last_data.timestamp = 1234 }, + { name = "Northeast", kind = "Speed", last_data.value = 2390.0, last_data.timestamp = 1234 }, + { name = "South", kind = "Speed", last_data.value = 2467.0, last_data.timestamp = 1234 }, + { name = "North", kind = "Speed", last_data.value = 2195.0, last_data.timestamp = 1234 }, + { name = "Southwest", kind = "Speed", last_data.value = 2680.0, last_data.timestamp = 1234 }, + { name = "Northwest", kind = "Speed", last_data.value = 2212.0, last_data.timestamp = 1234 }, +] + + # # NOTE: for the test suite, the [log] section is ignored; sp-sim logs are rolled # into the gateway logfile. diff --git a/gateway-test-utils/src/setup.rs b/gateway-test-utils/src/setup.rs index 46bc55805aa..056bb451f73 100644 --- a/gateway-test-utils/src/setup.rs +++ b/gateway-test-utils/src/setup.rs @@ -8,6 +8,7 @@ use camino::Utf8Path; use dropshot::test_util::ClientTestContext; use dropshot::test_util::LogContext; use gateway_messages::SpPort; +pub use omicron_gateway::metrics::MetricsConfig; use omicron_gateway::MgsArguments; use omicron_gateway::SpType; use omicron_gateway::SwitchPortConfig; @@ -33,6 +34,7 @@ pub struct GatewayTestContext { pub server: omicron_gateway::Server, pub simrack: SimRack, pub logctx: LogContext, + pub gateway_id: Uuid, } impl GatewayTestContext { @@ -48,13 +50,18 @@ pub fn load_test_config() -> (omicron_gateway::Config, sp_sim::Config) { let manifest_dir = Utf8Path::new(env!("CARGO_MANIFEST_DIR")); let server_config_file_path = manifest_dir.join("configs/config.test.toml"); let server_config = - omicron_gateway::Config::from_file(&server_config_file_path) - .expect("failed to load config.test.toml"); + match omicron_gateway::Config::from_file(&server_config_file_path) { + Ok(config) => config, + Err(e) => panic!("failed to load MGS config: {e}"), + }; let sp_sim_config_file_path = manifest_dir.join("configs/sp_sim_config.test.toml"); - let sp_sim_config = sp_sim::Config::from_file(&sp_sim_config_file_path) - .expect("failed to load sp_sim_config.test.toml"); + let sp_sim_config = + match sp_sim::Config::from_file(&sp_sim_config_file_path) { + Ok(config) => config, + Err(e) => panic!("failed to load SP simulator config: {e}"), + }; (server_config, sp_sim_config) } @@ -143,8 +150,8 @@ pub async fn test_setup_with_config( // Start gateway server let rack_id = Some(Uuid::parse_str(RACK_UUID).unwrap()); - - let args = MgsArguments { id: Uuid::new_v4(), addresses, rack_id }; + let gateway_id = Uuid::new_v4(); + let args = MgsArguments { id: gateway_id, addresses, rack_id }; let server = omicron_gateway::Server::start( server_config.clone(), args, @@ -206,5 +213,5 @@ pub async fn test_setup_with_config( log.new(o!("component" => "client test context")), ); - GatewayTestContext { client, server, simrack, logctx } + GatewayTestContext { client, server, simrack, logctx, gateway_id } } diff --git a/gateway/Cargo.toml b/gateway/Cargo.toml index 3cfd1d447b9..bdf4a911afb 100644 --- a/gateway/Cargo.toml +++ b/gateway/Cargo.toml @@ -11,6 +11,7 @@ workspace = true anyhow.workspace = true base64.workspace = true camino.workspace = true +chrono.workspace = true clap.workspace = true dropshot.workspace = true futures.workspace = true @@ -39,6 +40,9 @@ tokio-tungstenite.workspace = true toml.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true +oximeter.workspace = true +oximeter-producer.workspace = true +oximeter-instruments = { workspace = true, features = ["http-instruments"] } [dev-dependencies] expectorate.workspace = true diff --git a/gateway/examples/config.toml b/gateway/examples/config.toml index d29d9508b93..a76edcd7b5c 100644 --- a/gateway/examples/config.toml +++ b/gateway/examples/config.toml @@ -71,6 +71,15 @@ addr = "[::1]:33320" ignition-target = 3 location = { switch0 = ["sled", 1], switch1 = ["sled", 1] } +# +# Configuration for SP sensor metrics polling +# +[metrics] +# Allow the Oximeter metrics endpoint to bind on the loopback IP. This is +# useful in local testing and development, when the gateway service is not +# given a "real" underlay network IP. +dev_bind_loopback = true + [log] # Show log messages of this level and more severe level = "debug" diff --git a/gateway/src/config.rs b/gateway/src/config.rs index afdb046881d..edf895ef59c 100644 --- a/gateway/src/config.rs +++ b/gateway/src/config.rs @@ -6,6 +6,7 @@ //! configuration use crate::management_switch::SwitchConfig; +use crate::metrics::MetricsConfig; use camino::Utf8Path; use camino::Utf8PathBuf; use dropshot::ConfigLogging; @@ -25,6 +26,8 @@ pub struct Config { pub switch: SwitchConfig, /// Server-wide logging configuration. pub log: ConfigLogging, + /// Configuration for SP sensor metrics. + pub metrics: Option, } impl Config { @@ -47,13 +50,13 @@ pub struct PartialDropshotConfig { #[derive(Debug, Error, SlogInlineError)] pub enum LoadError { - #[error("error reading \"{path}\"")] + #[error("error reading \"{path}\": {err}")] Io { path: Utf8PathBuf, #[source] err: std::io::Error, }, - #[error("error parsing \"{path}\"")] + #[error("error parsing \"{path}\": {err}")] Parse { path: Utf8PathBuf, #[source] diff --git a/gateway/src/context.rs b/gateway/src/context.rs index 939bb9b6b90..dc5717604bf 100644 --- a/gateway/src/context.rs +++ b/gateway/src/context.rs @@ -16,11 +16,13 @@ pub struct ServerContext { pub mgmt_switch: ManagementSwitch, pub host_phase2_provider: Arc, pub rack_id: OnceLock, + pub latencies: oximeter_instruments::http::LatencyTracker, pub log: Logger, } impl ServerContext { pub async fn new( + id: Uuid, host_phase2_provider: Arc, switch_config: SwitchConfig, rack_id_config: Option, @@ -37,7 +39,23 @@ impl ServerContext { OnceLock::new() }; + // Track from 1 microsecond == 1e3 nanoseconds + const LATENCY_START_POWER: u16 = 3; + // To 1000s == 1e9 * 1e3 == 1e12 nanoseconds + const LATENCY_END_POWER: u16 = 12; + let latencies = + oximeter_instruments::http::LatencyTracker::with_log_linear_bins( + oximeter_instruments::http::HttpService { + name: "management-gateway-service".into(), + id, + }, + LATENCY_START_POWER, + LATENCY_END_POWER, + ) + .expect("start and end decades are hardcoded and should be valid"); + Ok(Arc::new(ServerContext { + latencies, mgmt_switch, host_phase2_provider, rack_id, diff --git a/gateway/src/error.rs b/gateway/src/error.rs index 5933daa3404..ee148e0c98c 100644 --- a/gateway/src/error.rs +++ b/gateway/src/error.rs @@ -26,12 +26,8 @@ pub enum StartupError { #[derive(Debug, Error, SlogInlineError)] pub enum SpCommsError { - #[error("discovery process not yet complete")] - DiscoveryNotYetComplete, - #[error("location discovery failed: {reason}")] - DiscoveryFailed { reason: String }, - #[error("nonexistent SP {0:?}")] - SpDoesNotExist(SpIdentifier), + #[error(transparent)] + Discovery(#[from] SpLookupError), #[error("unknown socket address for SP {0:?}")] SpAddressUnknown(SpIdentifier), #[error( @@ -52,13 +48,22 @@ pub enum SpCommsError { }, } +/// Errors returned by attempts to look up a SP in the management switch's +/// discovery map. +#[derive(Debug, Error, SlogInlineError)] +pub enum SpLookupError { + #[error("discovery process not yet complete")] + DiscoveryNotYetComplete, + #[error("location discovery failed: {reason}")] + DiscoveryFailed { reason: String }, + #[error("nonexistent SP {0:?}")] + SpDoesNotExist(SpIdentifier), +} + impl From for HttpError { fn from(error: SpCommsError) -> Self { match error { - SpCommsError::SpDoesNotExist(_) => HttpError::for_bad_request( - Some("InvalidSp".to_string()), - InlineErrorChain::new(&error).to_string(), - ), + SpCommsError::Discovery(err) => HttpError::from(err), SpCommsError::SpCommunicationFailed { err: CommunicationError::SpError( @@ -124,21 +129,11 @@ impl From for HttpError { "UpdateInProgress", InlineErrorChain::new(&error).to_string(), ), - SpCommsError::DiscoveryNotYetComplete => http_err_with_message( - http::StatusCode::SERVICE_UNAVAILABLE, - "DiscoveryNotYetComplete", - InlineErrorChain::new(&error).to_string(), - ), SpCommsError::SpAddressUnknown(_) => http_err_with_message( http::StatusCode::SERVICE_UNAVAILABLE, "SpAddressUnknown", InlineErrorChain::new(&error).to_string(), ), - SpCommsError::DiscoveryFailed { .. } => http_err_with_message( - http::StatusCode::SERVICE_UNAVAILABLE, - "DiscoveryFailed ", - InlineErrorChain::new(&error).to_string(), - ), SpCommsError::Timeout { .. } => http_err_with_message( http::StatusCode::SERVICE_UNAVAILABLE, "Timeout ", @@ -160,6 +155,27 @@ impl From for HttpError { } } +impl From for HttpError { + fn from(error: SpLookupError) -> Self { + match error { + SpLookupError::SpDoesNotExist(_) => HttpError::for_bad_request( + Some("InvalidSp".to_string()), + InlineErrorChain::new(&error).to_string(), + ), + SpLookupError::DiscoveryNotYetComplete => http_err_with_message( + http::StatusCode::SERVICE_UNAVAILABLE, + "DiscoveryNotYetComplete", + InlineErrorChain::new(&error).to_string(), + ), + SpLookupError::DiscoveryFailed { .. } => http_err_with_message( + http::StatusCode::SERVICE_UNAVAILABLE, + "DiscoveryFailed ", + InlineErrorChain::new(&error).to_string(), + ), + } + } +} + // Helper function to return an `HttpError` with the same internal and external // message. MGS is an "internal" service - even when we return a 500-level // status code, we want to give our caller some information about what is going diff --git a/gateway/src/http_entrypoints.rs b/gateway/src/http_entrypoints.rs index 332f50ed8a7..c10e71ad61f 100644 --- a/gateway/src/http_entrypoints.rs +++ b/gateway/src/http_entrypoints.rs @@ -81,18 +81,22 @@ impl GatewayApi for GatewayImpl { ) -> Result, HttpError> { let apictx = rqctx.context(); let sp_id = path.into_inner().sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; - let state = sp.state().await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; + let state = sp.state().await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; + + let rot_state = sp + .rot_state(gateway_messages::RotBootInfo::HIGHEST_KNOWN_VERSION) + .await; - let rot_state = sp - .rot_state(gateway_messages::RotBootInfo::HIGHEST_KNOWN_VERSION) - .await; + let final_state = sp_state_from_comms(state, rot_state); - let final_state = sp_state_from_comms(state, rot_state); - Ok(HttpResponseOk(final_state)) + Ok(HttpResponseOk(final_state)) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_startup_options_get( @@ -100,15 +104,18 @@ impl GatewayApi for GatewayImpl { path: Path, ) -> Result, HttpError> { let apictx = rqctx.context(); - let mgmt_switch = &apictx.mgmt_switch; - let sp_id = path.into_inner().sp.into(); - let sp = mgmt_switch.sp(sp_id)?; + let handler = async { + let mgmt_switch = &apictx.mgmt_switch; + let sp_id = path.into_inner().sp.into(); + let sp = mgmt_switch.sp(sp_id)?; - let options = sp.get_startup_options().await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; + let options = sp.get_startup_options().await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - Ok(HttpResponseOk(options.into())) + Ok(HttpResponseOk(options.into())) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_startup_options_set( @@ -119,13 +126,16 @@ impl GatewayApi for GatewayImpl { let apictx = rqctx.context(); let mgmt_switch = &apictx.mgmt_switch; let sp_id = path.into_inner().sp.into(); - let sp = mgmt_switch.sp(sp_id)?; + let handler = async { + let sp = mgmt_switch.sp(sp_id)?; - sp.set_startup_options(body.into_inner().into()).await.map_err( - |err| SpCommsError::SpCommunicationFailed { sp: sp_id, err }, - )?; + sp.set_startup_options(body.into_inner().into()).await.map_err( + |err| SpCommsError::SpCommunicationFailed { sp: sp_id, err }, + )?; - Ok(HttpResponseUpdatedNoContent {}) + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_sensor_read_value( @@ -135,12 +145,17 @@ impl GatewayApi for GatewayImpl { let apictx = rqctx.context(); let PathSpSensorId { sp, sensor_id } = path.into_inner(); let sp_id = sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; - let value = sp.read_sensor_value(sensor_id).await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; + let value = + sp.read_sensor_value(sensor_id).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; + + Ok(HttpResponseOk(value.into())) + }; - Ok(HttpResponseOk(value.into())) + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_component_list( @@ -149,12 +164,15 @@ impl GatewayApi for GatewayImpl { ) -> Result, HttpError> { let apictx = rqctx.context(); let sp_id = path.into_inner().sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; - let inventory = sp.inventory().await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; + let inventory = sp.inventory().await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - Ok(HttpResponseOk(sp_component_list_from_comms(inventory))) + Ok(HttpResponseOk(sp_component_list_from_comms(inventory))) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_component_get( @@ -164,16 +182,21 @@ impl GatewayApi for GatewayImpl { let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); let sp_id = sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; - let component = component_from_str(&component)?; - - let details = sp.component_details(component).await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; + let component = component_from_str(&component)?; + + let details = + sp.component_details(component).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; + + Ok(HttpResponseOk( + details.entries.into_iter().map(Into::into).collect(), + )) + }; - Ok(HttpResponseOk( - details.entries.into_iter().map(Into::into).collect(), - )) + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } // Implementation notes: @@ -198,66 +221,79 @@ impl GatewayApi for GatewayImpl { let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); let sp_id = sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; - let ComponentCabooseSlot { firmware_slot } = query_params.into_inner(); - let component = component_from_str(&component)?; - let from_utf8 = |key: &[u8], bytes| { - // This helper closure is only called with the ascii-printable [u8; 4] - // key constants we define above, so we can unwrap this conversion. - let key = str::from_utf8(key).unwrap(); - String::from_utf8(bytes).map_err(|_| { - http_err_with_message( - http::StatusCode::SERVICE_UNAVAILABLE, - "InvalidCaboose", - format!("non-utf8 data returned for caboose key {key}"), + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; + let ComponentCabooseSlot { firmware_slot } = + query_params.into_inner(); + let component = component_from_str(&component)?; + + let from_utf8 = |key: &[u8], bytes| { + // This helper closure is only called with the ascii-printable [u8; 4] + // key constants we define above, so we can unwrap this conversion. + let key = str::from_utf8(key).unwrap(); + String::from_utf8(bytes).map_err(|_| { + http_err_with_message( + http::StatusCode::SERVICE_UNAVAILABLE, + "InvalidCaboose", + format!("non-utf8 data returned for caboose key {key}"), + ) + }) + }; + + let git_commit = + sp.read_component_caboose( + component, + firmware_slot, + CABOOSE_KEY_GIT_COMMIT, ) - }) - }; + .await + .map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; + let board = + sp.read_component_caboose( + component, + firmware_slot, + CABOOSE_KEY_BOARD, + ) + .await + .map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; + let name = + sp.read_component_caboose( + component, + firmware_slot, + CABOOSE_KEY_NAME, + ) + .await + .map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; + let version = + sp.read_component_caboose( + component, + firmware_slot, + CABOOSE_KEY_VERSION, + ) + .await + .map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - let git_commit = - sp.read_component_caboose( - component, - firmware_slot, - CABOOSE_KEY_GIT_COMMIT, - ) - .await - .map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; - let board = sp - .read_component_caboose(component, firmware_slot, CABOOSE_KEY_BOARD) - .await - .map_err(|err| SpCommsError::SpCommunicationFailed { - sp: sp_id, - err, - })?; - let name = sp - .read_component_caboose(component, firmware_slot, CABOOSE_KEY_NAME) - .await - .map_err(|err| SpCommsError::SpCommunicationFailed { - sp: sp_id, - err, - })?; - let version = - sp.read_component_caboose( - component, - firmware_slot, - CABOOSE_KEY_VERSION, - ) - .await - .map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; + let git_commit = from_utf8(&CABOOSE_KEY_GIT_COMMIT, git_commit)?; + let board = from_utf8(&CABOOSE_KEY_BOARD, board)?; + let name = from_utf8(&CABOOSE_KEY_NAME, name)?; + let version = from_utf8(&CABOOSE_KEY_VERSION, version)?; - let git_commit = from_utf8(&CABOOSE_KEY_GIT_COMMIT, git_commit)?; - let board = from_utf8(&CABOOSE_KEY_BOARD, board)?; - let name = from_utf8(&CABOOSE_KEY_NAME, name)?; - let version = from_utf8(&CABOOSE_KEY_VERSION, version)?; + let caboose = + SpComponentCaboose { git_commit, board, name, version }; - let caboose = SpComponentCaboose { git_commit, board, name, version }; + Ok(HttpResponseOk(caboose)) + }; - Ok(HttpResponseOk(caboose)) + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_component_clear_status( @@ -267,14 +303,18 @@ impl GatewayApi for GatewayImpl { let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); let sp_id = sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; - let component = component_from_str(&component)?; + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; + let component = component_from_str(&component)?; - sp.component_clear_status(component).await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; + sp.component_clear_status(component).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; + + Ok(HttpResponseUpdatedNoContent {}) + }; - Ok(HttpResponseUpdatedNoContent {}) + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_component_active_slot_get( @@ -284,15 +324,18 @@ impl GatewayApi for GatewayImpl { let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); let sp_id = sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; - let component = component_from_str(&component)?; + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; + let component = component_from_str(&component)?; - let slot = - sp.component_active_slot(component).await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; + let slot = + sp.component_active_slot(component).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - Ok(HttpResponseOk(SpComponentFirmwareSlot { slot })) + Ok(HttpResponseOk(SpComponentFirmwareSlot { slot })) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_component_active_slot_set( @@ -304,16 +347,22 @@ impl GatewayApi for GatewayImpl { let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); let sp_id = sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; - let component = component_from_str(&component)?; - let slot = body.into_inner().slot; - let persist = query_params.into_inner().persist; - - sp.set_component_active_slot(component, slot, persist).await.map_err( - |err| SpCommsError::SpCommunicationFailed { sp: sp_id, err }, - )?; - - Ok(HttpResponseUpdatedNoContent {}) + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; + let component = component_from_str(&component)?; + let slot = body.into_inner().slot; + let persist = query_params.into_inner().persist; + + sp.set_component_active_slot(component, slot, persist) + .await + .map_err(|err| SpCommsError::SpCommunicationFailed { + sp: sp_id, + err, + })?; + + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_component_serial_console_attach( @@ -321,6 +370,10 @@ impl GatewayApi for GatewayImpl { path: Path, websocket: WebsocketUpgrade, ) -> WebsocketEndpointResult { + // TODO(eliza): I'm not sure whether there's a way to make + // `oximeter_instruments`'s HTTP latency tracker work with websockets + // requests? It would be nice to get the latency and any error returned + // prior to actually returning the websocket stream... let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); let sp_id = sp.into(); @@ -356,13 +409,15 @@ impl GatewayApi for GatewayImpl { // we don't use it at all to detach. let PathSpComponent { sp, component: _ } = path.into_inner(); let sp_id = sp.into(); + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; + sp.serial_console_detach().await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - let sp = apictx.mgmt_switch.sp(sp_id)?; - sp.serial_console_detach().await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; - - Ok(HttpResponseUpdatedNoContent {}) + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_component_reset( @@ -372,20 +427,23 @@ impl GatewayApi for GatewayImpl { let apictx = rqctx.context(); let PathSpComponent { sp, component } = path.into_inner(); let sp_id = sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; - let component = component_from_str(&component)?; - - sp.reset_component_prepare(component) - // We always want to run with the watchdog when resetting as - // disabling the watchdog should be considered a debug only feature - .and_then(|()| sp.reset_component_trigger(component, false)) - .await - .map_err(|err| SpCommsError::SpCommunicationFailed { - sp: sp_id, - err, - })?; - - Ok(HttpResponseUpdatedNoContent {}) + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; + let component = component_from_str(&component)?; + + sp.reset_component_prepare(component) + // We always want to run with the watchdog when resetting as + // disabling the watchdog should be considered a debug only feature + .and_then(|()| sp.reset_component_trigger(component, false)) + .await + .map_err(|err| SpCommsError::SpCommunicationFailed { + sp: sp_id, + err, + })?; + + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_component_update( @@ -398,19 +456,22 @@ impl GatewayApi for GatewayImpl { let PathSpComponent { sp, component } = path.into_inner(); let sp_id = sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; - let component = component_from_str(&component)?; - let ComponentUpdateIdSlot { id, firmware_slot } = - query_params.into_inner(); + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; + let component = component_from_str(&component)?; + let ComponentUpdateIdSlot { id, firmware_slot } = + query_params.into_inner(); - // TODO-performance: this makes a full copy of the uploaded data - let image = body.as_bytes().to_vec(); + // TODO-performance: this makes a full copy of the uploaded data + let image = body.as_bytes().to_vec(); - sp.start_update(component, id, firmware_slot, image) - .await - .map_err(|err| SpCommsError::UpdateFailed { sp: sp_id, err })?; + sp.start_update(component, id, firmware_slot, image) + .await + .map_err(|err| SpCommsError::UpdateFailed { sp: sp_id, err })?; - Ok(HttpResponseUpdatedNoContent {}) + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_component_update_status( @@ -421,14 +482,17 @@ impl GatewayApi for GatewayImpl { let PathSpComponent { sp, component } = path.into_inner(); let sp_id = sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; - let component = component_from_str(&component)?; + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; + let component = component_from_str(&component)?; - let status = sp.update_status(component).await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; + let status = sp.update_status(component).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - Ok(HttpResponseOk(status.into())) + Ok(HttpResponseOk(status.into())) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_component_update_abort( @@ -440,15 +504,18 @@ impl GatewayApi for GatewayImpl { let PathSpComponent { sp, component } = path.into_inner(); let sp_id = sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; - let component = component_from_str(&component)?; + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; + let component = component_from_str(&component)?; - let UpdateAbortBody { id } = body.into_inner(); - sp.update_abort(component, id).await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; + let UpdateAbortBody { id } = body.into_inner(); + sp.update_abort(component, id).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - Ok(HttpResponseUpdatedNoContent {}) + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_rot_cmpa_get( @@ -459,24 +526,26 @@ impl GatewayApi for GatewayImpl { let PathSpComponent { sp, component } = path.into_inner(); let sp_id = sp.into(); + let handler = async { + // Ensure the caller knows they're asking for the RoT + if component_from_str(&component)? != SpComponent::ROT { + return Err(HttpError::for_bad_request( + Some("RequestUnsupportedForComponent".to_string()), + "Only the RoT has a CFPA".into(), + )); + } + + let sp = apictx.mgmt_switch.sp(sp_id)?; + let data = sp.read_rot_cmpa().await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - // Ensure the caller knows they're asking for the RoT - if component_from_str(&component)? != SpComponent::ROT { - return Err(HttpError::for_bad_request( - Some("RequestUnsupportedForComponent".to_string()), - "Only the RoT has a CFPA".into(), - )); - } - - let sp = apictx.mgmt_switch.sp(sp_id)?; - let data = sp.read_rot_cmpa().await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; - - let base64_data = - base64::engine::general_purpose::STANDARD.encode(data); + let base64_data = + base64::engine::general_purpose::STANDARD.encode(data); - Ok(HttpResponseOk(RotCmpa { base64_data })) + Ok(HttpResponseOk(RotCmpa { base64_data })) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_rot_cfpa_get( @@ -490,29 +559,32 @@ impl GatewayApi for GatewayImpl { let GetCfpaParams { slot } = params.into_inner(); let sp_id = sp.into(); - // Ensure the caller knows they're asking for the RoT - if component_from_str(&component)? != SpComponent::ROT { - return Err(HttpError::for_bad_request( - Some("RequestUnsupportedForComponent".to_string()), - "Only the RoT has a CFPA".into(), - )); - } + let handler = async { + // Ensure the caller knows they're asking for the RoT + if component_from_str(&component)? != SpComponent::ROT { + return Err(HttpError::for_bad_request( + Some("RequestUnsupportedForComponent".to_string()), + "Only the RoT has a CFPA".into(), + )); + } + + let sp = apictx.mgmt_switch.sp(sp_id)?; + let data = match slot { + RotCfpaSlot::Active => sp.read_rot_active_cfpa().await, + RotCfpaSlot::Inactive => sp.read_rot_inactive_cfpa().await, + RotCfpaSlot::Scratch => sp.read_rot_scratch_cfpa().await, + } + .map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - let sp = apictx.mgmt_switch.sp(sp_id)?; - let data = match slot { - RotCfpaSlot::Active => sp.read_rot_active_cfpa().await, - RotCfpaSlot::Inactive => sp.read_rot_inactive_cfpa().await, - RotCfpaSlot::Scratch => sp.read_rot_scratch_cfpa().await, - } - .map_err(|err| SpCommsError::SpCommunicationFailed { - sp: sp_id, - err, - })?; + let base64_data = + base64::engine::general_purpose::STANDARD.encode(data); - let base64_data = - base64::engine::general_purpose::STANDARD.encode(data); + Ok(HttpResponseOk(RotCfpa { base64_data, slot })) + }; - Ok(HttpResponseOk(RotCfpa { base64_data, slot })) + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_rot_boot_info( @@ -526,20 +598,24 @@ impl GatewayApi for GatewayImpl { let GetRotBootInfoParams { version } = params.into_inner(); let sp_id = sp.into(); - // Ensure the caller knows they're asking for the RoT - if component_from_str(&component)? != SpComponent::ROT { - return Err(HttpError::for_bad_request( - Some("RequestUnsupportedForComponent".to_string()), - "rot_boot_info only makes sent for a RoT".into(), - )); - } + let handler = async { + // Ensure the caller knows they're asking for the RoT + if component_from_str(&component)? != SpComponent::ROT { + return Err(HttpError::for_bad_request( + Some("RequestUnsupportedForComponent".to_string()), + "rot_boot_info only makes sent for a RoT".into(), + )); + } + + let sp = apictx.mgmt_switch.sp(sp_id)?; + let state = sp.rot_state(version).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - let sp = apictx.mgmt_switch.sp(sp_id)?; - let state = sp.rot_state(version).await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; + Ok(HttpResponseOk(state.into())) + }; - Ok(HttpResponseOk(state.into())) + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn ignition_list( @@ -547,17 +623,19 @@ impl GatewayApi for GatewayImpl { ) -> Result>, HttpError> { let apictx = rqctx.context(); let mgmt_switch = &apictx.mgmt_switch; - - let out = mgmt_switch - .bulk_ignition_state() - .await? - .map(|(id, state)| SpIgnitionInfo { - id: id.into(), - details: state.into(), - }) - .collect(); - - Ok(HttpResponseOk(out)) + let handler = async { + let out = mgmt_switch + .bulk_ignition_state() + .await? + .map(|(id, state)| SpIgnitionInfo { + id: id.into(), + details: state.into(), + }) + .collect(); + + Ok(HttpResponseOk(out)) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn ignition_get( @@ -568,19 +646,23 @@ impl GatewayApi for GatewayImpl { let mgmt_switch = &apictx.mgmt_switch; let sp_id = path.into_inner().sp.into(); - let ignition_target = mgmt_switch.ignition_target(sp_id)?; - - let state = mgmt_switch - .ignition_controller() - .ignition_state(ignition_target) - .await - .map_err(|err| SpCommsError::SpCommunicationFailed { - sp: sp_id, - err, - })?; - - let info = SpIgnitionInfo { id: sp_id.into(), details: state.into() }; - Ok(HttpResponseOk(info)) + let handler = async { + let ignition_target = mgmt_switch.ignition_target(sp_id)?; + + let state = mgmt_switch + .ignition_controller() + .ignition_state(ignition_target) + .await + .map_err(|err| SpCommsError::SpCommunicationFailed { + sp: sp_id, + err, + })?; + + let info = + SpIgnitionInfo { id: sp_id.into(), details: state.into() }; + Ok(HttpResponseOk(info)) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn ignition_command( @@ -591,18 +673,22 @@ impl GatewayApi for GatewayImpl { let mgmt_switch = &apictx.mgmt_switch; let PathSpIgnitionCommand { sp, command } = path.into_inner(); let sp_id = sp.into(); - let ignition_target = mgmt_switch.ignition_target(sp_id)?; - mgmt_switch - .ignition_controller() - .ignition_command(ignition_target, command.into()) - .await - .map_err(|err| SpCommsError::SpCommunicationFailed { - sp: sp_id, - err, - })?; + let handler = async { + let ignition_target = mgmt_switch.ignition_target(sp_id)?; - Ok(HttpResponseUpdatedNoContent {}) + mgmt_switch + .ignition_controller() + .ignition_command(ignition_target, command.into()) + .await + .map_err(|err| SpCommsError::SpCommunicationFailed { + sp: sp_id, + err, + })?; + + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_power_state_get( @@ -611,13 +697,16 @@ impl GatewayApi for GatewayImpl { ) -> Result, HttpError> { let apictx = rqctx.context(); let sp_id = path.into_inner().sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; - let power_state = sp.power_state().await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; + let power_state = sp.power_state().await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - Ok(HttpResponseOk(power_state.into())) + Ok(HttpResponseOk(power_state.into())) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_power_state_set( @@ -627,14 +716,17 @@ impl GatewayApi for GatewayImpl { ) -> Result { let apictx = rqctx.context(); let sp_id = path.into_inner().sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; - let power_state = body.into_inner(); + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; + let power_state = body.into_inner(); - sp.set_power_state(power_state.into()).await.map_err(|err| { - SpCommsError::SpCommunicationFailed { sp: sp_id, err } - })?; + sp.set_power_state(power_state.into()).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - Ok(HttpResponseUpdatedNoContent {}) + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_installinator_image_id_set( @@ -646,21 +738,23 @@ impl GatewayApi for GatewayImpl { let apictx = rqctx.context(); let sp_id = path.into_inner().sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; - let image_id = ipcc::InstallinatorImageId::from(body.into_inner()); + let image_id = ipcc::InstallinatorImageId::from(body.into_inner()); - sp.set_ipcc_key_lookup_value( - Key::InstallinatorImageId as u8, - image_id.serialize(), - ) - .await - .map_err(|err| SpCommsError::SpCommunicationFailed { - sp: sp_id, - err, - })?; + sp.set_ipcc_key_lookup_value( + Key::InstallinatorImageId as u8, + image_id.serialize(), + ) + .await + .map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - Ok(HttpResponseUpdatedNoContent {}) + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_installinator_image_id_delete( @@ -671,20 +765,22 @@ impl GatewayApi for GatewayImpl { let apictx = rqctx.context(); let sp_id = path.into_inner().sp.into(); - let sp = apictx.mgmt_switch.sp(sp_id)?; + let handler = async { + let sp = apictx.mgmt_switch.sp(sp_id)?; - // We clear the image ID by setting it to a 0-length vec. - sp.set_ipcc_key_lookup_value( - Key::InstallinatorImageId as u8, - Vec::new(), - ) - .await - .map_err(|err| SpCommsError::SpCommunicationFailed { - sp: sp_id, - err, - })?; + // We clear the image ID by setting it to a 0-length vec. + sp.set_ipcc_key_lookup_value( + Key::InstallinatorImageId as u8, + Vec::new(), + ) + .await + .map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; - Ok(HttpResponseUpdatedNoContent {}) + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_host_phase2_progress_get( @@ -692,37 +788,41 @@ impl GatewayApi for GatewayImpl { path: Path, ) -> Result, HttpError> { let apictx = rqctx.context(); - let sp = apictx.mgmt_switch.sp(path.into_inner().sp.into())?; - - let Some(progress) = sp.most_recent_host_phase2_request().await else { - return Ok(HttpResponseOk(HostPhase2Progress::None)); - }; - - // Our `host_phase2_provider` is using an in-memory cache, so the only way - // we can fail to get the total size is if we no longer have the image that - // this SP most recently requested. We'll treat that as "no progress - // information", since it almost certainly means our progress info on this - // SP is very stale. - let Ok(total_size) = - apictx.host_phase2_provider.total_size(progress.hash).await - else { - return Ok(HttpResponseOk(HostPhase2Progress::None)); - }; - - let image_id = HostPhase2RecoveryImageId { - sha256_hash: ArtifactHash(progress.hash), + let handler = async { + let sp = apictx.mgmt_switch.sp(path.into_inner().sp.into())?; + + let Some(progress) = sp.most_recent_host_phase2_request().await + else { + return Ok(HttpResponseOk(HostPhase2Progress::None)); + }; + + // Our `host_phase2_provider` is using an in-memory cache, so the only way + // we can fail to get the total size is if we no longer have the image that + // this SP most recently requested. We'll treat that as "no progress + // information", since it almost certainly means our progress info on this + // SP is very stale. + let Ok(total_size) = + apictx.host_phase2_provider.total_size(progress.hash).await + else { + return Ok(HttpResponseOk(HostPhase2Progress::None)); + }; + + let image_id = HostPhase2RecoveryImageId { + sha256_hash: ArtifactHash(progress.hash), + }; + + // `progress` tells us the offset the SP requested and the amount of data we + // sent starting at that offset; report the end of that chunk to our caller. + let offset = progress.offset.saturating_add(progress.data_sent); + + Ok(HttpResponseOk(HostPhase2Progress::Available { + image_id, + offset, + total_size, + age: progress.received.elapsed(), + })) }; - - // `progress` tells us the offset the SP requested and the amount of data we - // sent starting at that offset; report the end of that chunk to our caller. - let offset = progress.offset.saturating_add(progress.data_sent); - - Ok(HttpResponseOk(HostPhase2Progress::Available { - image_id, - offset, - total_size, - age: progress.received.elapsed(), - })) + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_host_phase2_progress_delete( @@ -730,11 +830,14 @@ impl GatewayApi for GatewayImpl { path: Path, ) -> Result { let apictx = rqctx.context(); - let sp = apictx.mgmt_switch.sp(path.into_inner().sp.into())?; + let handler = async { + let sp = apictx.mgmt_switch.sp(path.into_inner().sp.into())?; - sp.clear_most_recent_host_phase2_request().await; + sp.clear_most_recent_host_phase2_request().await; - Ok(HttpResponseUpdatedNoContent {}) + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn recovery_host_phase2_upload( @@ -742,44 +845,55 @@ impl GatewayApi for GatewayImpl { body: UntypedBody, ) -> Result, HttpError> { let apictx = rqctx.context(); - - // TODO: this makes a full copy of the host image, potentially unnecessarily - // if it's malformed. - let image = body.as_bytes().to_vec(); - - let sha256_hash = - apictx.host_phase2_provider.insert(image).await.map_err(|err| { - // Any cache-insertion failure indicates a malformed image; map them - // to bad requests. - HttpError::for_bad_request( - Some("BadHostPhase2Image".to_string()), - err.to_string(), - ) - })?; - let sha256_hash = ArtifactHash(sha256_hash); - - Ok(HttpResponseOk(HostPhase2RecoveryImageId { sha256_hash })) + let handler = async { + // TODO: this makes a full copy of the host image, potentially unnecessarily + // if it's malformed. + let image = body.as_bytes().to_vec(); + + let sha256_hash = + apictx.host_phase2_provider.insert(image).await.map_err( + |err| { + // Any cache-insertion failure indicates a malformed image; map them + // to bad requests. + HttpError::for_bad_request( + Some("BadHostPhase2Image".to_string()), + err.to_string(), + ) + }, + )?; + let sha256_hash = ArtifactHash(sha256_hash); + + Ok(HttpResponseOk(HostPhase2RecoveryImageId { sha256_hash })) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_local_switch_id( rqctx: RequestContext, ) -> Result, HttpError> { let apictx = rqctx.context(); + let handler = async { + let id = apictx.mgmt_switch.local_switch()?; - let id = apictx.mgmt_switch.local_switch()?; - - Ok(HttpResponseOk(id.into())) + Ok(HttpResponseOk(id.into())) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } async fn sp_all_ids( rqctx: RequestContext, ) -> Result>, HttpError> { let apictx = rqctx.context(); - - let all_ids = - apictx.mgmt_switch.all_sps()?.map(|(id, _)| id.into()).collect(); - - Ok(HttpResponseOk(all_ids)) + let handler = async { + let all_ids = apictx + .mgmt_switch + .all_sps()? + .map(|(id, _)| id.into()) + .collect(); + + Ok(HttpResponseOk(all_ids)) + }; + apictx.latencies.instrument_dropshot_handler(&rqctx, handler).await } } diff --git a/gateway/src/lib.rs b/gateway/src/lib.rs index e1eed053344..e07df0cfb9a 100644 --- a/gateway/src/lib.rs +++ b/gateway/src/lib.rs @@ -6,6 +6,7 @@ mod config; mod context; mod error; mod management_switch; +pub mod metrics; mod serial_console; pub mod http_entrypoints; // TODO pub only for testing - is this right? @@ -62,6 +63,8 @@ pub struct Server { /// `http_servers` all_servers_shutdown: FuturesUnordered, request_body_max_bytes: usize, + /// handle to the SP sensor metrics subsystem + metrics: metrics::Metrics, log: Logger, } @@ -140,6 +143,7 @@ impl Server { config.host_phase2_recovery_image_cache_max_images, )); let apictx = ServerContext::new( + args.id, host_phase2_provider, config.switch, args.rack_id, @@ -151,6 +155,9 @@ impl Server { let mut http_servers = HashMap::with_capacity(args.addresses.len()); let all_servers_shutdown = FuturesUnordered::new(); + let metrics = + metrics::Metrics::new(&log, &args, config.metrics, apictx.clone()); + for addr in args.addresses { start_dropshot_server( &apictx, @@ -167,6 +174,7 @@ impl Server { http_servers, all_servers_shutdown, request_body_max_bytes: config.dropshot.request_body_max_bytes, + metrics, log, }) } @@ -275,12 +283,14 @@ impl Server { server.close().await?; } + self.metrics.update_server_addrs(addresses).await; + Ok(()) } /// The rack_id will be set on a refresh of the SMF property when the sled /// agent starts. - pub fn set_rack_id(&self, rack_id: Option) { + pub fn set_rack_id(&mut self, rack_id: Option) { if let Some(rack_id) = rack_id { let val = self.apictx.rack_id.get_or_init(|| rack_id); if *val != rack_id { @@ -291,20 +301,12 @@ impl Server { "ignored_new_rack_id" => %rack_id); } else { info!(self.apictx.log, "Set rack_id"; "rack_id" => %rack_id); + self.metrics.set_rack_id(rack_id); } } else { warn!(self.apictx.log, "SMF refresh called without a rack id"); } } - - // TODO does MGS register itself with oximeter? - // Register the Nexus server as a metric producer with `oximeter. - // pub async fn register_as_producer(&self) { - // self.apictx - // .nexus - // .register_as_producer(self.http_server_internal.local_addr()) - // .await; - // } } /// Start an instance of the [Server]. @@ -327,6 +329,5 @@ pub async fn start_server( debug!(log, "registered DTrace probes"); } let server = Server::start(config, args, log).await?; - // server.register_as_producer().await; Ok(server) } diff --git a/gateway/src/management_switch.rs b/gateway/src/management_switch.rs index a93c44d62cb..23dfbe01a81 100644 --- a/gateway/src/management_switch.rs +++ b/gateway/src/management_switch.rs @@ -20,6 +20,7 @@ pub use self::location_map::SwitchPortConfig; pub use self::location_map::SwitchPortDescription; use self::location_map::ValidatedLocationConfig; use crate::error::SpCommsError; +use crate::error::SpLookupError; use crate::error::StartupError; use gateway_messages::IgnitionState; use gateway_sp_comms::default_discovery_addr; @@ -316,18 +317,18 @@ impl ManagementSwitch { self.location_map.get().is_some() } - fn location_map(&self) -> Result<&LocationMap, SpCommsError> { + fn location_map(&self) -> Result<&LocationMap, SpLookupError> { let discovery_result = self .location_map .get() - .ok_or(SpCommsError::DiscoveryNotYetComplete)?; + .ok_or(SpLookupError::DiscoveryNotYetComplete)?; discovery_result .as_ref() - .map_err(|s| SpCommsError::DiscoveryFailed { reason: s.clone() }) + .map_err(|s| SpLookupError::DiscoveryFailed { reason: s.clone() }) } /// Get the identifier of our local switch. - pub fn local_switch(&self) -> Result { + pub fn local_switch(&self) -> Result { let location_map = self.location_map()?; Ok(location_map.port_to_id(self.local_ignition_controller_port)) } @@ -347,11 +348,11 @@ impl ManagementSwitch { /// This method will fail if discovery is not yet complete (i.e., we don't /// know the logical identifiers of any SP yet!) or if `id` specifies an SP /// that doesn't exist in our discovered location map. - fn get_port(&self, id: SpIdentifier) -> Result { + fn get_port(&self, id: SpIdentifier) -> Result { let location_map = self.location_map()?; let port = location_map .id_to_port(id) - .ok_or(SpCommsError::SpDoesNotExist(id))?; + .ok_or(SpLookupError::SpDoesNotExist(id))?; Ok(port) } @@ -362,7 +363,7 @@ impl ManagementSwitch { /// This method will fail if discovery is not yet complete (i.e., we don't /// know the logical identifiers of any SP yet!) or if `id` specifies an SP /// that doesn't exist in our discovered location map. - pub fn sp(&self, id: SpIdentifier) -> Result<&SingleSp, SpCommsError> { + pub fn sp(&self, id: SpIdentifier) -> Result<&SingleSp, SpLookupError> { let port = self.get_port(id)?; Ok(self.port_to_sp(port)) } @@ -377,7 +378,7 @@ impl ManagementSwitch { pub fn ignition_target( &self, id: SpIdentifier, - ) -> Result { + ) -> Result { let port = self.get_port(id)?; Ok(self.port_to_ignition_target[port.0]) } @@ -389,7 +390,7 @@ impl ManagementSwitch { /// therefore can't map our switch ports to SP identities). pub(crate) fn all_sps( &self, - ) -> Result, SpCommsError> + ) -> Result, SpLookupError> { let location_map = self.location_map()?; Ok(location_map diff --git a/gateway/src/metrics.rs b/gateway/src/metrics.rs new file mode 100644 index 00000000000..7c133f5d97f --- /dev/null +++ b/gateway/src/metrics.rs @@ -0,0 +1,1169 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. +use crate::error::CommunicationError; +use crate::management_switch::SpIdentifier; +use crate::management_switch::SpType; +use crate::MgsArguments; +use crate::ServerContext; +use anyhow::Context; +use gateway_messages::measurement::MeasurementError; +use gateway_messages::measurement::MeasurementKind; +use gateway_messages::ComponentDetails; +use gateway_messages::DeviceCapabilities; +use gateway_sp_comms::SingleSp; +use gateway_sp_comms::SpComponent; +use gateway_sp_comms::VersionedSpState; +use omicron_common::api::internal::nexus::ProducerEndpoint; +use omicron_common::api::internal::nexus::ProducerKind; +use omicron_common::backoff; +use oximeter::types::Cumulative; +use oximeter::types::ProducerRegistry; +use oximeter::types::Sample; +use oximeter::MetricsError; +use std::borrow::Cow; +use std::collections::hash_map; +use std::collections::hash_map::HashMap; +use std::net::IpAddr; +use std::net::SocketAddr; +use std::net::SocketAddrV6; +use std::sync::Arc; +use std::time::Duration; +use tokio::sync::broadcast; +use tokio::sync::oneshot; +use tokio::sync::watch; +use tokio::task::JoinHandle; +use uuid::Uuid; + +oximeter::use_timeseries!("hardware-component.toml"); +use hardware_component as metric; + +/// Handle to the metrics tasks. +pub struct Metrics { + /// If the metrics subsystem is disabled, this is `None`. + inner: Option, +} + +struct Handles { + addrs_tx: watch::Sender>, + rack_id_tx: Option>, + server: JoinHandle>, +} + +/// Configuration for metrics. +/// +/// In order to reduce the risk of a bad config file taking down the whole +/// management network, we try to keep the metrics-specific portion of the +/// config file as minimal as possible. At present, it only includes development +/// configurations that shouldn't be present in production configs. +#[derive( + Clone, Debug, Default, PartialEq, Eq, serde::Deserialize, serde::Serialize, +)] +#[serde(deny_unknown_fields)] +pub struct MetricsConfig { + /// Completely disable the metrics subsystem. + /// + /// If `disabled = true`, sensor data metrics will not be collected, and the + /// metrics polling tasks will not be started. + #[serde(default)] + pub disabled: bool, + + /// Override the Nexus address used to register the SP metrics Oximeter + /// producer. This is intended for use in development and testing. + /// + /// If this argument is not present, Nexus is discovered through DNS. + #[serde(default)] + pub dev_nexus_address: Option, + + /// Allow the metrics producer endpoint to bind on loopback. + /// + /// This should be disabled in production, as Nexus will not be able to + /// reach the loopback interface, but is necessary for local development and + /// test purposes. + #[serde(default)] + pub dev_bind_loopback: bool, +} + +/// Polls sensor readings from an individual SP. +struct SpPoller { + spid: SpIdentifier, + known_state: Option, + components: HashMap, + log: slog::Logger, + rack_id: Uuid, + mgs_id: Uuid, + sample_tx: broadcast::Sender>, +} + +struct ComponentMetrics { + target: metric::HardwareComponent, + /// Counts of errors reported by sensors on this component. + sensor_errors: HashMap>, + /// Counts of errors that occurred whilst polling the SP for measurements + /// from this component. + poll_errors: HashMap<&'static str, Cumulative>, +} + +#[derive(Eq, PartialEq, Hash)] +struct SensorErrorKey { + name: Cow<'static, str>, + kind: &'static str, + error: &'static str, +} + +/// Manages a metrics server and stuff. +struct ServerManager { + log: slog::Logger, + addrs: watch::Receiver>, + registry: ProducerRegistry, +} + +#[derive(Debug)] +struct Producer { + /// Receiver for samples produced by SP pollers. + sample_rx: broadcast::Receiver>, + /// Logging context. + /// + /// We stick this on the producer because we would like to be able to log + /// when stale samples are dropped. + log: slog::Logger, +} + +/// The maximum Dropshot request size for the metrics server. +const METRIC_REQUEST_MAX_SIZE: usize = 10 * 1024 * 1024; + +/// Poll interval for requesting sensor readings from SPs. +/// +/// Bryan wants to try polling at 1Hz, so let's do that for now. +const SP_POLL_INTERVAL: Duration = Duration::from_secs(1); + +///The interval at which we will ask Oximeter to collect our metric samples. +/// +/// Every ten seconds seems good. +const OXIMETER_COLLECTION_INTERVAL: Duration = Duration::from_secs(10); + +/// The expected number of SPs in a fully-loaded rack. +/// +/// N.B. that there *might* be more than this; we shouldn't ever panic or +/// otherwise misbehave if we see more than this number. This is just intended +/// for sizing buffers/map allocations and so forth; we can always realloc if we +/// see a bonus SP or two. That's why it's called "normal number of SPs" and not +/// "MAX_SPS" or similar. +/// +/// Additionally, note that we always determine the channel capacity based on +/// the assumption that *someday*, the rack might be fully loaded with compute +/// sleds, even if it isn't *right now*. A rack with 16 sleds could always grow +/// another 16 later! +const NORMAL_NUMBER_OF_SPS: usize = + 32 // 32 compute sleds + + 2 // two switches + + 2 // two power shelves, someday. + ; + +/// What size should we make the +const MAX_BUFFERED_SAMPLE_CHUNKS: usize = { + // Roughly how many times will we poll SPs for each metrics collection + // interval? + let polls_per_metrics_interval = { + let collection_interval_secs: usize = + OXIMETER_COLLECTION_INTERVAL.as_secs() as usize; + let poll_interval_secs: usize = SP_POLL_INTERVAL.as_secs() as usize; + + collection_interval_secs / poll_interval_secs + }; + + // How many sample collection intervals do we want to allow to elapse before + // we start putting stuff on the floor? + // + // Let's say 16. Chosen totally arbitrarily but seems reasonable-ish. + let sloppiness = 16; + let capacity = + NORMAL_NUMBER_OF_SPS * polls_per_metrics_interval * sloppiness; + // Finally, the buffer capacity will probably be allocated in a power of two + // anyway, so let's make sure our thing is a power of two so we don't waste + // the allocation we're gonna get anyway. + capacity.next_power_of_two() +}; + +impl Metrics { + pub fn new( + log: &slog::Logger, + args: &MgsArguments, + cfg: Option, + apictx: Arc, + ) -> Self { + let &MgsArguments { id, rack_id, ref addresses } = args; + + if cfg.as_ref().map(|c| c.disabled).unwrap_or(false) { + slog::warn!(&log, "metrics subsystem disabled by config"); + return Self { inner: None }; + } + + // Create a channel for the SP poller tasks to send samples to the + // Oximeter producer endpoint. + // + // A broadcast channel is used here, not because we are actually + // multi-consumer (`Producer::produce` is never called concurrently), + // but because the broadcast channel has properly ring-buffer-like + // behavior, where earlier messages are discarded, rather than exerting + // backpressure on senders (as Tokio's MPSC channel does). This + // is what we want, as we would prefer a full buffer to result in + // clobbering the oldest measurements, rather than leaving the newest + // ones on the floor. + let (sample_tx, sample_rx) = + broadcast::channel(MAX_BUFFERED_SAMPLE_CHUNKS); + + // Using a channel for this is, admittedly, a bit of an end-run around + // the `OnceLock` on the `ServerContext` that *also* stores the rack ID, + // but it has the nice benefit of allowing the `PollerManager` task to _await_ + // the rack ID being set...we might want to change other code to use a + // similar approach in the future. + let (rack_id_tx, rack_id_rx) = oneshot::channel(); + let rack_id_tx = if let Some(rack_id) = rack_id { + rack_id_tx.send(rack_id).expect( + "we just created the channel; it therefore will not be \ + closed", + ); + None + } else { + Some(rack_id_tx) + }; + + tokio::spawn(start_pollers( + log.new(slog::o!("component" => "sensor-poller")), + apictx.clone(), + rack_id_rx, + id, + sample_tx, + )); + + let (addrs_tx, addrs_rx) = + tokio::sync::watch::channel(addresses.clone()); + let server = { + let log = log.new(slog::o!("component" => "producer-server")); + let registry = ProducerRegistry::with_id(id); + // Register the producer for SP sensor metrics. + registry + .register_producer(Producer { sample_rx, log: log.clone() }) + // TODO(ben): when you change `register_producer` to not return + // a `Result`, delete this `expect`. thanks in advance! :) + .expect( + "`ProducerRegistry::register_producer()` will never \ + actually return an `Err`, so this shouldn't ever \ + happen...", + ); + // Also, register the producer for the HTTP API metrics. + registry + .register_producer(apictx.latencies.clone()) + // TODO(ben): do this one too pls + .expect( + "`ProducerRegistry::register_producer()` will never \ + actually return an `Err`, so this shouldn't ever \ + happen...", + ); + + tokio::spawn( + ServerManager { log, addrs: addrs_rx, registry }.run(cfg), + ) + }; + Self { inner: Some(Handles { addrs_tx, rack_id_tx, server }) } + } + + pub fn set_rack_id(&mut self, rack_id: Uuid) { + let tx = self.inner.as_mut().and_then(|i| i.rack_id_tx.take()); + if let Some(tx) = tx { + // If the task that starts sensor pollers has gone away already, + // we're probably shutting down, and shouldn't panic. + let _ = tx.send(rack_id); + } + // Ignoring duplicate attempt to set the rack ID... + } + + pub async fn update_server_addrs(&self, new_addrs: &[SocketAddrV6]) { + if let Some(ref inner) = self.inner { + inner.addrs_tx.send_if_modified(|current_addrs| { + if current_addrs.len() == new_addrs.len() + // N.B. that we could make this "faster" with a `HashSet`, + // but...the size of this Vec of addresses is probably going to + // two or three items, max, so the linear scan actually probably + // outperforms it... + && current_addrs.iter().all(|addr| new_addrs.contains(addr)) + { + return false; + } + + // Reuse existing `Vec` capacity if possible.This is almost + // certainly not performance-critical, but it makes me feel happy. + current_addrs.clear(); + current_addrs.extend_from_slice(new_addrs); + true + }); + } + } +} + +impl Drop for Metrics { + fn drop(&mut self) { + // Clean up our children on drop. + if let Some(ref mut inner) = self.inner { + inner.server.abort(); + } + } +} + +impl oximeter::Producer for Producer { + fn produce( + &mut self, + ) -> Result>, MetricsError> { + // Drain all samples currently in the queue into a `Vec`. + // + // N.B. it may be tempting to pursue an alternative design where we + // implement `Iterator` for a `broadcast::Receiver>` and + // just return that using `Receiver::resubscribe`...DON'T DO THAT! The + // `resubscribe` function creates a receiver at the current *tail* of + // the ringbuffer, so it won't see any samples produced *before* now. + // Which is the opposite of what we want! + let mut samples = Vec::with_capacity(self.sample_rx.len()); + // Because we receive the individual samples in a `Vec` of all samples + // produced by a poller, let's also sum the length of each of those + // `Vec`s here, so we can log it later. + let mut total_samples = 0; + // Also, track whether any sample chunks were dropped off the end of the + // ring buffer. + let mut dropped_chunks = 0; + + use broadcast::error::TryRecvError; + loop { + match self.sample_rx.try_recv() { + Ok(sample_chunk) => { + total_samples += sample_chunk.len(); + samples.push(sample_chunk) + } + // This error indicates that an old ringbuffer entry was + // overwritten. That's fine, just get the next one. + Err(TryRecvError::Lagged(dropped)) => { + dropped_chunks += dropped; + } + // We've drained all currently available samples! We're done here! + Err(TryRecvError::Empty) => break, + // This should only happen when shutting down. + Err(TryRecvError::Closed) => { + slog::debug!(&self.log, "sample producer channel closed"); + break; + } + } + } + + if dropped_chunks > 0 { + slog::info!( + &self.log, + "produced metric samples. some old sample chunks were dropped!"; + "samples" => total_samples, + "sample_chunks" => samples.len(), + "dropped_chunks" => dropped_chunks, + ); + } else { + slog::debug!( + &self.log, + "produced metric samples"; + "samples" => total_samples, + "sample_chunks" => samples.len(), + ); + } + + // There you go, that's all I've got. + Ok(Box::new(samples.into_iter().flatten())) + } +} + +async fn start_pollers( + log: slog::Logger, + apictx: Arc, + rack_id: oneshot::Receiver, + mgs_id: Uuid, + sample_tx: broadcast::Sender>, +) -> anyhow::Result<()> { + let switch = &apictx.mgmt_switch; + + // First, wait until we know what the rack ID is known... + let rack_id = rack_id + .await + .context("rack ID sender has gone away...we must be shutting down")?; + + // Wait for SP discovery to complete, if it hasn't already. + // TODO(eliza): presently, we busy-poll here. It would be nicer to + // replace the `OnceLock` in `ManagementSwitch` + // with a `tokio::sync::watch` + let sps = backoff::retry_notify_ext( + backoff::retry_policy_local(), + || async { switch.all_sps().map_err(backoff::BackoffError::transient) }, + |err, _, elapsed| { + let secs = elapsed.as_secs(); + if secs < 30 { + slog::debug!( + &log, + "waiting for SP discovery to complete..."; + "elapsed" => ?elapsed, + "error" => err, + ); + } else if secs < 180 { + slog::info!( + &log, + "still waiting for SP discovery to complete..."; + "elapsed" => ?elapsed, + "error" => err, + ) + } else { + slog::warn!( + &log, + "we have been waiting for SP discovery to complete \ + for a pretty long time!"; + "elapsed" => ?elapsed, + "error" => err, + ) + } + }, + ) + .await + .context("we should never return a fatal error here")?; + + slog::info!( + &log, + "starting to poll SP sensor data every {SP_POLL_INTERVAL:?}" + ); + + for (spid, _) in sps { + slog::info!( + &log, + "found a new little friend!"; + "sp_slot" => ?spid.slot, + "chassis_type" => ?spid.typ, + ); + + let poller = SpPoller { + spid, + rack_id, + mgs_id, + log: log.new(slog::o!( + "sp_slot" => spid.slot, + "chassis_type" => format!("{:?}", spid.typ), + )), + components: HashMap::new(), + known_state: None, + sample_tx: sample_tx.clone(), + }; + tokio::spawn(poller.run(apictx.clone())); + } + + Ok(()) +} + +impl SpPoller { + async fn run(mut self, apictx: Arc) { + let mut interval = tokio::time::interval(SP_POLL_INTERVAL); + let switch = &apictx.mgmt_switch; + let sp = match switch.sp(self.spid) { + Ok(sp) => sp, + Err(e) => { + // This should never happen, but it's not worth taking down the + // entire management network over that... + const MSG: &'static str = + "the `SpPoller::run` function is only called after \ + discovery completes successfully, and the `SpIdentifier` \ + used was returned by the management switch, \ + so it should be valid."; + if cfg!(debug_assertions) { + unreachable!( + "{MSG} nonetheless, we saw a {e:?} error when looking \ + up {:?}", + self.spid + ); + } else { + slog::error!( + &self.log, + "THIS SHOULDN'T HAPPEN: {MSG}"; + "error" => e, + "sp" => ?self.spid, + ); + return; + } + } + }; + loop { + interval.tick().await; + slog::trace!(&self.log, "interval elapsed, polling SP..."); + + match self.poll(sp).await { + // No sense cluttering the ringbuffer with empty vecs... + Ok(samples) if samples.is_empty() => { + slog::trace!( + &self.log, + "polled SP, no samples returned"; + "num_samples" => 0usize + ); + } + Ok(samples) => { + slog::trace!( + &self.log, + "polled SP successfully"; + "num_samples" => samples.len(), + ); + + if let Err(_) = self.sample_tx.send(samples) { + slog::debug!( + &self.log, + "all sample receiver handles have been dropped! \ + presumably we are shutting down..."; + ); + return; + } + } + // No SP is currently present for this ID. This may change in + // the future: a cubby that is not populated at present may have + // a sled added to it in the future. So, let's wait until it + // changes. + Err(CommunicationError::NoSpDiscovered) => { + slog::info!( + &self.log, + "no SP is present for this slot. waiting for a \ + little buddy to appear..."; + ); + let mut watch = sp.sp_addr_watch().clone(); + loop { + if let Some((addr, port)) = *watch.borrow_and_update() { + // Ladies and gentlemen...we got him! + slog::info!( + &self.log, + "found a SP, resuming polling."; + "sp_addr" => ?addr, + "sp_port" => ?port, + ); + break; + } + + // Wait for an address to be discovered. + slog::debug!(&self.log, "waiting for a SP to appear."); + if watch.changed().await.is_err() { + slog::debug!( + &self.log, + "SP address watch has been closed, presumably \ + we are shutting down"; + ); + return; + } + } + } + Err(error) => { + slog::warn!( + &self.log, + "failed to poll SP, will try again momentarily..."; + "error" => %error, + ); + // TODO(eliza): we should probably have a metric for failed + // SP polls. + } + } + } + } + + async fn poll( + &mut self, + sp: &SingleSp, + ) -> Result, CommunicationError> { + let mut current_state = SpUnderstanding::from(sp.state().await?); + let mut samples = Vec::new(); + // If the SP's state changes dramatically *during* a poll, it may be + // necessary to re-do the metrics scrape, thus the loop. Normally, we + // will only loop a single time, but may retry if necessary. + loop { + // Check if the SP's state has changed. If it has, we need to make sure + // we still know what all of its sensors are. + if Some(¤t_state) != self.known_state.as_ref() { + // The SP's state appears to have changed. Time to make sure our + // understanding of its devices and identity is up to date! + + let chassis_kind = match self.spid.typ { + SpType::Sled => "sled", + SpType::Switch => "switch", + SpType::Power => "power", + }; + let model = stringify_byte_string(¤t_state.model[..]); + let serial = + stringify_byte_string(¤t_state.serial_number[..]); + let hubris_archive_id = + hex::encode(¤t_state.hubris_archive_id); + + slog::debug!( + &self.log, + "our little friend seems to have changed in some kind of way"; + "current_state" => ?current_state, + "known_state" => ?self.known_state, + "new_model" => %model, + "new_serial" => %serial, + "new_hubris_archive_id" => %hubris_archive_id, + ); + + let inv_devices = sp.inventory().await?.devices; + + // Clear out any previously-known devices, and preallocate capacity + // for all the new ones. + self.components.clear(); + self.components.reserve(inv_devices.len()); + + for dev in inv_devices { + // Skip devices which have nothing interesting for us. + if !dev + .capabilities + .contains(DeviceCapabilities::HAS_MEASUREMENT_CHANNELS) + { + continue; + } + let component_id = match dev.component.as_str() { + Some(c) => Cow::Owned(c.to_string()), + None => { + // These are supposed to always be strings. But, if we + // see one that's not a string, fall back to the hex + // representation rather than panicking. + let hex = hex::encode(dev.component.id); + slog::warn!( + &self.log, + "a SP component ID was not a string! this isn't \ + supposed to happen!"; + "component" => %hex, + "device" => ?dev, + ); + Cow::Owned(hex) + } + }; + + // TODO(eliza): i hate having to clone all these strings for + // every device on the SP...it would be cool if Oximeter let us + // reference count them... + let target = metric::HardwareComponent { + rack_id: self.rack_id, + gateway_id: self.mgs_id, + chassis_model: Cow::Owned(model.clone()), + chassis_revision: current_state.revision, + chassis_kind: Cow::Borrowed(chassis_kind), + chassis_serial: Cow::Owned(serial.clone()), + hubris_archive_id: Cow::Owned( + hubris_archive_id.clone(), + ), + slot: self.spid.slot as u32, + component_kind: Cow::Owned(dev.device), + component_id, + description: Cow::Owned(dev.description), + }; + match self.components.entry(dev.component) { + // Found a new device! + hash_map::Entry::Vacant(entry) => { + slog::debug!( + &self.log, + "discovered a new component!"; + "component_id" => %target.component_id, + "component_kind" => %target.component_kind, + "description" => %target.component_id, + ); + entry.insert(ComponentMetrics { + target, + sensor_errors: HashMap::new(), + poll_errors: HashMap::new(), + }); + } + // We previously had a known device for this thing, but + // the metrics target has changed, so we should reset + // its cumulative metrics. + hash_map::Entry::Occupied(mut entry) + if entry.get().target != target => + { + slog::trace!( + &self.log, + "target has changed, resetting cumulative metrics \ + for component"; + "component" => ?dev.component, + ); + entry.insert(ComponentMetrics { + target, + sensor_errors: HashMap::new(), + poll_errors: HashMap::new(), + }); + } + + // The target for this device hasn't changed, don't reset it. + hash_map::Entry::Occupied(_) => {} + } + } + + self.known_state = Some(current_state); + } + + // We will need capacity for *at least* the number of components on the + // SP --- it will probably be more, as several components have multiple + // measurement channels which will produce independent samples (e.g. a + // power rail will likely have both voltage and current measurements, + // and a device may have multiple rails...) but, this way, we can avoid + // *some* amount of reallocating... + samples.reserve(self.components.len()); + for (c, metrics) in &mut self.components { + // Metrics samples *should* always be well-formed. If we ever emit a + // messed up one, this is a programmer error, and therefore should + // fail in test, but should probably *not* take down the whole + // management gateway in a real-life rack, especially because it's + // probably going to happen again if we were to get restarted. + const BAD_SAMPLE: &str = + "we emitted a bad metrics sample! this should never happen"; + macro_rules! try_sample { + ($sample:expr) => { + match $sample { + Ok(sample) => samples.push(sample), + + Err(err) => { + slog::error!( + &self.log, + "{BAD_SAMPLE}!"; + "error" => %err, + ); + #[cfg(debug_assertions)] + unreachable!("{BAD_SAMPLE}: {err}"); + } + } + } + } + let details = match sp.component_details(*c).await { + Ok(deets) => deets, + // SP seems gone! + Err(CommunicationError::NoSpDiscovered) => { + return Err(CommunicationError::NoSpDiscovered) + } + Err(error) => { + slog::warn!( + &self.log, + "failed to read details on SP component"; + "sp_component" => %c, + "error" => %error, + ); + try_sample!(metrics.poll_error(comms_error_str(error))); + continue; + } + }; + if details.entries.is_empty() { + slog::warn!( + &self.log, + "a component which claimed to have measurement channels \ + had empty details. this seems weird..."; + "sp_component" => %c, + ); + try_sample!(metrics.poll_error("no_measurement_channels")); + continue; + } + + let ComponentMetrics { sensor_errors, target, .. } = metrics; + for d in details.entries { + let ComponentDetails::Measurement(m) = d else { + // If the component details are switch port details rather + // than measurement channels, ignore it for now. + continue; + }; + let sensor: Cow<'static, str> = Cow::Owned(m.name); + + // First, if there's a measurement error, increment the + // error count metric. We will synthesize a missing sample + // for the sensor's metric as well, after we produce the + // measurement error sample. + // + // We do this first so that we only have to clone the + // sensor's name if there's an error, rather than always + // cloning it in *case* there's an error. + if let Err(error) = m.value { + let kind = match m.kind { + MeasurementKind::Temperature => "temperature", + MeasurementKind::Current => "current", + MeasurementKind::Voltage => "voltage", + MeasurementKind::Power => "power", + MeasurementKind::InputCurrent => "input_current", + MeasurementKind::InputVoltage => "input_voltage", + MeasurementKind::Speed => "fan_speed", + }; + let error = match error { + MeasurementError::InvalidSensor => "invalid_sensor", + MeasurementError::NoReading => "no_reading", + MeasurementError::NotPresent => "not_present", + MeasurementError::DeviceError => "device_error", + MeasurementError::DeviceUnavailable => { + "device_unavailable" + } + MeasurementError::DeviceTimeout => "device_timeout", + MeasurementError::DeviceOff => "device_off", + }; + let datum = sensor_errors + .entry(SensorErrorKey { + name: sensor.clone(), + kind, + error, + }) + .or_insert(Cumulative::new(0)); + // TODO(eliza): perhaps we should treat this as + // "level-triggered" and only increment the counter + // when the sensor has *changed* to an errored + // state after we have seen at least one good + // measurement from it since the last time the error + // was observed? + datum.increment(); + try_sample!(Sample::new( + target, + &metric::SensorErrorCount { + error: Cow::Borrowed(error), + sensor: sensor.clone(), + datum: *datum, + sensor_kind: Cow::Borrowed(kind), + }, + )); + } + + // I don't love this massive `match`, but because the + // `Sample::new_missing` constructor is a different function + // from `Sample::new`, we need separate branches for the + // error and not-error cases, rather than just doing + // something to produce a datum from both the `Ok` and + // `Error` cases... + let sample = match (m.value, m.kind) { + (Ok(datum), MeasurementKind::Temperature) => { + Sample::new( + target, + &metric::Temperature { sensor, datum }, + ) + } + (Err(_), MeasurementKind::Temperature) => { + Sample::new_missing( + target, + &metric::Temperature { sensor, datum: 0.0 }, + ) + } + (Ok(datum), MeasurementKind::Current) => Sample::new( + target, + &metric::Current { sensor, datum }, + ), + (Err(_), MeasurementKind::Current) => { + Sample::new_missing( + target, + &metric::Current { sensor, datum: 0.0 }, + ) + } + (Ok(datum), MeasurementKind::Voltage) => Sample::new( + target, + &metric::Voltage { sensor, datum }, + ), + + (Err(_), MeasurementKind::Voltage) => { + Sample::new_missing( + target, + &metric::Voltage { sensor, datum: 0.0 }, + ) + } + (Ok(datum), MeasurementKind::Power) => Sample::new( + target, + &metric::Power { sensor, datum }, + ), + (Err(_), MeasurementKind::Power) => { + Sample::new_missing( + target, + &metric::Power { sensor, datum: 0.0 }, + ) + } + (Ok(datum), MeasurementKind::InputCurrent) => { + Sample::new( + target, + &metric::InputCurrent { sensor, datum }, + ) + } + (Err(_), MeasurementKind::InputCurrent) => { + Sample::new_missing( + target, + &metric::InputCurrent { sensor, datum: 0.0 }, + ) + } + (Ok(datum), MeasurementKind::InputVoltage) => { + Sample::new( + target, + &metric::InputVoltage { sensor, datum }, + ) + } + (Err(_), MeasurementKind::InputVoltage) => { + Sample::new_missing( + target, + &metric::InputVoltage { sensor, datum: 0.0 }, + ) + } + (Ok(datum), MeasurementKind::Speed) => Sample::new( + target, + &metric::FanSpeed { sensor, datum }, + ), + (Err(_), MeasurementKind::Speed) => { + Sample::new_missing( + target, + &metric::FanSpeed { sensor, datum: 0.0 }, + ) + } + }; + try_sample!(sample); + } + } + + // Now, fetch the SP's state *again*. It is possible that, while we + // were scraping the SP's samples, the SP's identity changed in some + // way: perhaps its version was updated during the poll, or it + // was removed from the rack and replaced with an entirely different + // chassis! If that's the case, some of the samples we collected may + // have a metrics target describing the wrong thing (e.g. they could + // still have the previous firmware's `hubris_archive_id`, if the SP + // was updated). In that case, we need to throw away the samples we + // collected and try again, potentially rebuilding our understanding + // of the SP's inventory. + let state = SpUnderstanding::from(sp.state().await?); + if state == current_state { + // All good, the SP is still who we thought it was! We can + // "commit" this batch of samples + return Ok(samples); + } + + slog::info!( + &self.log, + "SP's state changed mid-poll! discarding current samples and \ + starting over!"; + "new_state" => ?state, + "current_state" => ?current_state, + ); + // Let's reuse the buffer we already have for the next batch of + // samples. + samples.clear(); + //...and try again with the new state. + current_state = state; + } + } +} + +/// The fields of the `gateway_messages` `VersionedSpState` and +/// `SpStateV1`/`SpStateV2`/`SpStateV3` that we actually care about for purposes +/// of determining whether our understanding of the SP's components are still +/// valid. +/// +/// In particular, we throw out the RoT state and the SP's power state, because +/// those changing won't actually invalidate our understanding of the SP's +/// components. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +struct SpUnderstanding { + hubris_archive_id: [u8; 8], + serial_number: [u8; 32], + model: [u8; 32], + revision: u32, +} + +impl From for SpUnderstanding { + fn from(v: VersionedSpState) -> Self { + match v { + VersionedSpState::V1(gateway_messages::SpStateV1 { + hubris_archive_id, + serial_number, + model, + revision, + .. + }) => Self { hubris_archive_id, serial_number, model, revision }, + VersionedSpState::V2(gateway_messages::SpStateV2 { + hubris_archive_id, + serial_number, + model, + revision, + .. + }) => Self { hubris_archive_id, serial_number, model, revision }, + VersionedSpState::V3(gateway_messages::SpStateV3 { + hubris_archive_id, + serial_number, + model, + revision, + .. + }) => Self { hubris_archive_id, serial_number, model, revision }, + } + } +} + +// Reimplement this ourselves because we don't really care about +// reading the RoT state at present. This is unfortunately copied +// from `gateway_messages`. +fn stringify_byte_string(bytes: &[u8]) -> String { + // We expect serial and model numbers to be ASCII and 0-padded: find the first 0 + // byte and convert to a string. If that fails, hexlify the entire slice. + let first_zero = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len()); + + std::str::from_utf8(&bytes[..first_zero]) + .map(|s| s.to_string()) + .unwrap_or_else(|_err| hex::encode(bytes)) +} + +impl ServerManager { + async fn run(mut self, cfg: Option) -> anyhow::Result<()> { + let (registration_address, bind_loopback) = + if let Some(MetricsConfig { + dev_bind_loopback, + dev_nexus_address, + .. + }) = cfg + { + if dev_bind_loopback || dev_nexus_address.is_some() { + slog::warn!( + &self.log, + "using development metrics configuration overrides!"; + "nexus_address" => ?dev_nexus_address, + "bind_loopback" => dev_bind_loopback, + ); + } + (dev_nexus_address, dev_bind_loopback) + } else { + (None, false) + }; + let id = self.registry.producer_id(); + + let mut current_server: Option = None; + loop { + let current_ip = current_server.as_ref().map(|s| s.address().ip()); + let mut new_ip = None; + for addr in self.addrs.borrow_and_update().iter() { + let &ip = addr.ip(); + // Don't bind the metrics endpoint on ::1 + if ip.is_loopback() && !bind_loopback { + continue; + } + // If our current address is contained in the new addresses, + // no need to rebind. + if current_ip == Some(IpAddr::V6(ip)) { + new_ip = None; + break; + } else { + new_ip = Some(ip); + } + } + + if let Some(ip) = new_ip { + slog::debug!( + &self.log, + "rebinding producer server on new IP"; + "new_ip" => ?ip, + "current_ip" => ?current_ip, + "collection_interval" => ?OXIMETER_COLLECTION_INTERVAL, + "producer_id" => ?id, + ); + let server = { + // Listen on any available socket, using the provided underlay IP. + let address = SocketAddr::new(ip.into(), 0); + + let server_info = ProducerEndpoint { + id, + kind: ProducerKind::ManagementGateway, + address, + interval: OXIMETER_COLLECTION_INTERVAL, + }; + let config = oximeter_producer::Config { + server_info, + registration_address, + request_body_max_bytes: METRIC_REQUEST_MAX_SIZE, + log: oximeter_producer::LogConfig::Logger( + self.log.clone(), + ), + }; + oximeter_producer::Server::with_registry( + self.registry.clone(), + &config, + ) + .context("failed to start producer server")? + }; + + slog::info!( + &self.log, + "bound metrics producer server"; + "collection_interval" => ?OXIMETER_COLLECTION_INTERVAL, + "producer_id" => ?id, + "address" => %server.address(), + ); + + if let Some(old_server) = current_server.replace(server) { + let old_addr = old_server.address(); + if let Err(error) = old_server.close().await { + slog::error!( + &self.log, + "failed to close old metrics producer server"; + "address" => %old_addr, + "error" => %error, + ); + } else { + slog::debug!( + &self.log, + "old metrics producer server shut down"; + "address" => %old_addr, + ) + } + } + } + + // Wait for a subsequent address change. + self.addrs.changed().await?; + } + } +} + +impl ComponentMetrics { + fn poll_error( + &mut self, + error_str: &'static str, + ) -> Result { + let datum = self + .poll_errors + .entry(error_str) + .or_insert_with(|| Cumulative::new(0)); + datum.increment(); + Sample::new( + &self.target, + &metric::PollErrorCount { + error: Cow::Borrowed(error_str), + datum: *datum, + }, + ) + } +} + +fn comms_error_str(error: CommunicationError) -> &'static str { + // TODO(eliza): a bunch of these probably can't be returned by the specific + // operations we try to do. It could be good to make the methods this code + // calls return a smaller enum of just the errors it might actually + // encounter? Figure this out later. + match error { + CommunicationError::NoSpDiscovered => "no_sp_discovered", + CommunicationError::InterfaceError(_) => "interface", + CommunicationError::ScopeIdChangingFrequently { .. } => { + "scope_id_changing_frequently" + } + CommunicationError::JoinMulticast { .. } => "join_multicast", + CommunicationError::UdpSendTo { .. } => "udp_send_to", + CommunicationError::UdpRecv(_) => "udp_recv", + CommunicationError::Deserialize { .. } => "deserialize", + CommunicationError::ExhaustedNumAttempts(_) => "exhausted_num_attempts", + CommunicationError::BadResponseType { .. } => "bad_response_type", + CommunicationError::SpError { .. } => "sp_error", + CommunicationError::BogusSerialConsoleState { .. } => { + "bogus_serial_console_state" + } + CommunicationError::VersionMismatch { .. } => { + "protocol_version_mismatch" + } + CommunicationError::TlvDeserialize { .. } => "tlv_deserialize", + CommunicationError::TlvDecode(_) => "tlv_decode", + CommunicationError::TlvPagination { .. } => "tlv_pagination", + CommunicationError::IpccKeyLookupValueTooLarge => { + "ipcc_key_lookup_value_too_large" + } + CommunicationError::UnexpectedTrailingData(_) => { + "unexpected_trailing_data" + } + CommunicationError::BadTrailingDataSize { .. } => { + "bad_trailing_data_size" + } + } +} diff --git a/gateway/tests/integration_tests/component_list.rs b/gateway/tests/integration_tests/component_list.rs index ec876c07836..993dcc9e93c 100644 --- a/gateway/tests/integration_tests/component_list.rs +++ b/gateway/tests/integration_tests/component_list.rs @@ -57,7 +57,71 @@ async fn component_list() { capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS .bits(), presence: SpComponentPresence::Failed, - } + }, + SpComponentInfo { + component: "dev-1".to_string(), + device: "tmp117".to_string(), + serial_number: None, + description: "FAKE temperature sensor".to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-2".to_string(), + device: "tmp117".to_string(), + serial_number: None, + description: "FAKE Southeast temperature sensor".to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-6".to_string(), + device: "at24csw080".to_string(), + serial_number: None, + description: "FAKE U.2 Sharkfin A VPD".to_string(), + capabilities: 0, + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-7".to_string(), + device: "max5970".to_string(), + serial_number: None, + description: "FAKE U.2 Sharkfin A hot swap controller" + .to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-8".to_string(), + device: "nvme_bmc".to_string(), + serial_number: None, + description: "FAKE U.2 A NVMe Basic Management Command" + .to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-39".to_string(), + device: "tmp451".to_string(), + serial_number: None, + description: "FAKE T6 temperature sensor".to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-53".to_string(), + device: "max31790".to_string(), + serial_number: None, + description: "FAKE Fan controller".to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, ] ); @@ -67,14 +131,89 @@ async fn component_list() { assert_eq!( resp.components, - &[SpComponentInfo { - component: SpComponent::SP3_HOST_CPU.const_as_str().to_string(), - device: SpComponent::SP3_HOST_CPU.const_as_str().to_string(), - serial_number: None, - description: "FAKE host cpu".to_string(), - capabilities: 0, - presence: SpComponentPresence::Present, - },] + &[ + SpComponentInfo { + component: SpComponent::SP3_HOST_CPU.const_as_str().to_string(), + device: SpComponent::SP3_HOST_CPU.const_as_str().to_string(), + serial_number: None, + description: "FAKE host cpu".to_string(), + capabilities: 0, + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-0".to_string(), + device: "tmp117".to_string(), + serial_number: None, + description: "FAKE temperature sensor".to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-1".to_string(), + device: "tmp117".to_string(), + serial_number: None, + description: "FAKE temperature sensor".to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-2".to_string(), + device: "tmp117".to_string(), + serial_number: None, + description: "FAKE Southeast temperature sensor".to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-6".to_string(), + device: "at24csw080".to_string(), + serial_number: None, + description: "FAKE U.2 Sharkfin A VPD".to_string(), + capabilities: 0, + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-7".to_string(), + device: "max5970".to_string(), + serial_number: None, + description: "FAKE U.2 Sharkfin A hot swap controller" + .to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-8".to_string(), + device: "nvme_bmc".to_string(), + serial_number: None, + description: "FAKE U.2 A NVMe Basic Management Command" + .to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-39".to_string(), + device: "tmp451".to_string(), + serial_number: None, + description: "FAKE T6 temperature sensor".to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-53".to_string(), + device: "max31790".to_string(), + serial_number: None, + description: "FAKE Fan controller".to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + ] ); // Get the component list for switch 0. diff --git a/illumos-utils/src/smf_helper.rs b/illumos-utils/src/smf_helper.rs index 2c24ceaa4d5..2d293769506 100644 --- a/illumos-utils/src/smf_helper.rs +++ b/illumos-utils/src/smf_helper.rs @@ -77,7 +77,7 @@ impl<'t> SmfHelper<'t> { "addpropvalue", &prop.to_string(), &format!("{}:", valtype.to_string()), - &val.to_string(), + &format!("\"{}\"", val.to_string()), ]) .map_err(|err| Error::ZoneCommand { intent: format!("add {} smf property value", prop.to_string()), diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs index 139e6fe6071..5d512677f8f 100644 --- a/illumos-utils/src/zfs.rs +++ b/illumos-utils/src/zfs.rs @@ -6,6 +6,7 @@ use crate::{execute, PFEXEC}; use camino::{Utf8Path, Utf8PathBuf}; +use omicron_common::disk::CompressionAlgorithm; use omicron_common::disk::DiskIdentity; use std::fmt; @@ -203,7 +204,8 @@ pub struct EncryptionDetails { #[derive(Debug, Default)] pub struct SizeDetails { pub quota: Option, - pub compression: Option<&'static str>, + pub reservation: Option, + pub compression: CompressionAlgorithm, } #[cfg_attr(any(test, feature = "testing"), mockall::automock, allow(dead_code))] @@ -259,9 +261,27 @@ impl Zfs { Ok(()) } - /// Creates a new ZFS filesystem named `name`, unless one already exists. + /// Creates a new ZFS filesystem unless one already exists. /// - /// Applies an optional quota, provided _in bytes_. + /// - `name`: the full path to the zfs dataset + /// - `mountpoint`: The expected mountpoint of this filesystem. + /// If the filesystem already exists, and is not mounted here, and error is + /// returned. + /// - `zoned`: identifies whether or not this filesystem should be + /// used in a zone. Only used when creating a new filesystem - ignored + /// if the filesystem already exists. + /// - `do_format`: if "false", prevents a new filesystem from being created, + /// and returns an error if it is not found. + /// - `encryption_details`: Ensures a filesystem as an encryption root. + /// For new filesystems, this supplies the key, and all datasets within this + /// root are implicitly encrypted. For existing filesystems, ensures that + /// they are mounted (and that keys are loaded), but does not verify the + /// input details. + /// - `size_details`: If supplied, sets size-related information. These + /// values are set on both new filesystem creation as well as when loading + /// existing filesystems. + /// - `additional_options`: Additional ZFS options, which are only set when + /// creating new filesystems. #[allow(clippy::too_many_arguments)] pub fn ensure_filesystem( name: &str, @@ -274,10 +294,18 @@ impl Zfs { ) -> Result<(), EnsureFilesystemError> { let (exists, mounted) = Self::dataset_exists(name, &mountpoint)?; if exists { - if let Some(SizeDetails { quota, compression }) = size_details { + if let Some(SizeDetails { quota, reservation, compression }) = + size_details + { // apply quota and compression mode (in case they've changed across // sled-agent versions since creation) - Self::apply_properties(name, &mountpoint, quota, compression)?; + Self::apply_properties( + name, + &mountpoint, + quota, + reservation, + compression, + )?; } if encryption_details.is_none() { @@ -351,42 +379,64 @@ impl Zfs { })?; } - if let Some(SizeDetails { quota, compression }) = size_details { + if let Some(SizeDetails { quota, reservation, compression }) = + size_details + { // Apply any quota and compression mode. - Self::apply_properties(name, &mountpoint, quota, compression)?; + Self::apply_properties( + name, + &mountpoint, + quota, + reservation, + compression, + )?; } Ok(()) } + /// Applies the following properties to the filesystem. + /// + /// If any of the options are not supplied, a default "none" or "off" + /// value is supplied. fn apply_properties( name: &str, mountpoint: &Mountpoint, quota: Option, - compression: Option<&'static str>, + reservation: Option, + compression: CompressionAlgorithm, ) -> Result<(), EnsureFilesystemError> { - if let Some(quota) = quota { - if let Err(err) = - Self::set_value(name, "quota", &format!("{quota}")) - { - return Err(EnsureFilesystemError { - name: name.to_string(), - mountpoint: mountpoint.clone(), - // Take the execution error from the SetValueError - err: err.err.into(), - }); - } + let quota = quota + .map(|q| q.to_string()) + .unwrap_or_else(|| String::from("none")); + let reservation = reservation + .map(|r| r.to_string()) + .unwrap_or_else(|| String::from("none")); + let compression = compression.to_string(); + + if let Err(err) = Self::set_value(name, "quota", "a) { + return Err(EnsureFilesystemError { + name: name.to_string(), + mountpoint: mountpoint.clone(), + // Take the execution error from the SetValueError + err: err.err.into(), + }); } - if let Some(compression) = compression { - if let Err(err) = Self::set_value(name, "compression", compression) - { - return Err(EnsureFilesystemError { - name: name.to_string(), - mountpoint: mountpoint.clone(), - // Take the execution error from the SetValueError - err: err.err.into(), - }); - } + if let Err(err) = Self::set_value(name, "reservation", &reservation) { + return Err(EnsureFilesystemError { + name: name.to_string(), + mountpoint: mountpoint.clone(), + // Take the execution error from the SetValueError + err: err.err.into(), + }); + } + if let Err(err) = Self::set_value(name, "compression", &compression) { + return Err(EnsureFilesystemError { + name: name.to_string(), + mountpoint: mountpoint.clone(), + // Take the execution error from the SetValueError + err: err.err.into(), + }); } Ok(()) } diff --git a/installinator/Cargo.toml b/installinator/Cargo.toml index 00dfb6440ba..0d59950a2a6 100644 --- a/installinator/Cargo.toml +++ b/installinator/Cargo.toml @@ -13,6 +13,7 @@ async-trait.workspace = true buf-list.workspace = true bytes.workspace = true camino.workspace = true +camino-tempfile.workspace = true cancel-safe-futures.workspace = true clap.workspace = true display-error-chain.workspace = true @@ -37,7 +38,6 @@ slog-async.workspace = true slog-envlogger.workspace = true slog-term.workspace = true smf.workspace = true -tempfile.workspace = true thiserror.workspace = true tokio = { workspace = true, features = ["full"] } tufaceous-lib.workspace = true @@ -50,7 +50,6 @@ omicron-test-utils.workspace = true hex-literal.workspace = true partial-io.workspace = true proptest.workspace = true -tempfile.workspace = true test-strategy.workspace = true tokio = { workspace = true, features = ["test-util"] } tokio-stream.workspace = true diff --git a/installinator/src/async_temp_file.rs b/installinator/src/async_temp_file.rs index c884908ac88..168fffa2aac 100644 --- a/installinator/src/async_temp_file.rs +++ b/installinator/src/async_temp_file.rs @@ -3,13 +3,13 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use camino::Utf8PathBuf; +use camino_tempfile::NamedUtf8TempFile; +use camino_tempfile::Utf8PathPersistError; +use camino_tempfile::Utf8TempPath; use std::io; use std::pin::Pin; use std::task::Context; use std::task::Poll; -use tempfile::NamedTempFile; -use tempfile::PathPersistError; -use tempfile::TempPath; use tokio::fs::File; use tokio::io::AsyncWrite; @@ -18,7 +18,7 @@ pub(crate) struct AsyncNamedTempFile { // in our `persist()` method below. This allows us to drop the temp path // (deleting the temporary file) if we're dropped before `persist()` is // called. - temp_path: Option, + temp_path: Option, destination: Utf8PathBuf, inner: File, } @@ -41,7 +41,7 @@ impl AsyncNamedTempFile { .to_owned(); let temp_file = - tokio::task::spawn_blocking(|| NamedTempFile::new_in(parent)) + tokio::task::spawn_blocking(|| NamedUtf8TempFile::new_in(parent)) .await .unwrap()?; let temp_path = temp_file.into_temp_path(); @@ -62,7 +62,7 @@ impl AsyncNamedTempFile { tokio::task::spawn_blocking(move || temp_path.persist(&destination)) .await .unwrap() - .map_err(|PathPersistError { error, .. }| error) + .map_err(|Utf8PathPersistError { error, .. }| error) } } diff --git a/installinator/src/write.rs b/installinator/src/write.rs index fdc83cffa21..583c5a7b517 100644 --- a/installinator/src/write.rs +++ b/installinator/src/write.rs @@ -918,6 +918,7 @@ mod tests { use anyhow::Result; use bytes::{Buf, Bytes}; use camino::Utf8Path; + use camino_tempfile::tempdir; use futures::StreamExt; use installinator_common::{ Event, InstallinatorCompletionMetadata, InstallinatorComponent, @@ -934,7 +935,6 @@ mod tests { PartialAsyncWrite, PartialOp, }; use proptest::prelude::*; - use tempfile::tempdir; use test_strategy::proptest; use tokio::io::AsyncReadExt; use tokio::sync::Mutex; @@ -1032,7 +1032,7 @@ mod tests { ) -> Result<()> { let logctx = test_setup_log("test_write_artifact"); let tempdir = tempdir()?; - let tempdir_path: &Utf8Path = tempdir.path().try_into()?; + let tempdir_path = tempdir.path(); let destination_host = tempdir_path.join("test-host.bin"); let destination_control_plane = diff --git a/internal-dns-cli/Cargo.toml b/internal-dns-cli/Cargo.toml index dae0af02802..3e34c216226 100644 --- a/internal-dns-cli/Cargo.toml +++ b/internal-dns-cli/Cargo.toml @@ -11,9 +11,9 @@ workspace = true anyhow.workspace = true clap.workspace = true dropshot.workspace = true +hickory-resolver.workspace = true internal-dns.workspace = true omicron-common.workspace = true slog.workspace = true tokio.workspace = true -trust-dns-resolver.workspace = true omicron-workspace-hack.workspace = true diff --git a/internal-dns-cli/src/bin/dnswait.rs b/internal-dns-cli/src/bin/dnswait.rs index 9e003ed14f9..f9875e71a04 100644 --- a/internal-dns-cli/src/bin/dnswait.rs +++ b/internal-dns-cli/src/bin/dnswait.rs @@ -36,15 +36,17 @@ struct Opt { #[value(rename_all = "kebab-case")] enum ServiceName { Cockroach, - Clickhouse, ClickhouseKeeper, + ClickhouseServer, } impl From for internal_dns::ServiceName { fn from(value: ServiceName) -> Self { match value { ServiceName::Cockroach => internal_dns::ServiceName::Cockroach, - ServiceName::Clickhouse => internal_dns::ServiceName::Clickhouse, + ServiceName::ClickhouseServer => { + internal_dns::ServiceName::ClickhouseServer + } ServiceName::ClickhouseKeeper => { internal_dns::ServiceName::ClickhouseKeeper } @@ -65,10 +67,8 @@ async fn main() -> Result<()> { let resolver = if opt.nameserver_addresses.is_empty() { info!(&log, "using system configuration"); - let async_resolver = - trust_dns_resolver::AsyncResolver::tokio_from_system_conf() - .context("initializing resolver from system configuration")?; - Resolver::new_with_resolver(log.clone(), async_resolver) + Resolver::new_from_system_conf(log.clone()) + .context("initializing resolver from system configuration")? } else { let addrs = opt.nameserver_addresses; info!(&log, "using explicit nameservers"; "nameservers" => ?addrs); diff --git a/internal-dns/Cargo.toml b/internal-dns/Cargo.toml index c08cc012c1f..c12035e2cb0 100644 --- a/internal-dns/Cargo.toml +++ b/internal-dns/Cargo.toml @@ -18,7 +18,7 @@ omicron-uuid-kinds.workspace = true reqwest = { workspace = true, features = ["rustls-tls", "stream"] } slog.workspace = true thiserror.workspace = true -trust-dns-resolver.workspace = true +hickory-resolver.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/internal-dns/src/config.rs b/internal-dns/src/config.rs index a9ff664030e..e9d7ed873d3 100644 --- a/internal-dns/src/config.rs +++ b/internal-dns/src/config.rs @@ -510,6 +510,10 @@ mod test { ServiceName::ClickhouseKeeper.dns_name(), "_clickhouse-keeper._tcp", ); + assert_eq!( + ServiceName::ClickhouseServer.dns_name(), + "_clickhouse-server._tcp", + ); assert_eq!(ServiceName::Cockroach.dns_name(), "_cockroach._tcp",); assert_eq!(ServiceName::InternalDns.dns_name(), "_nameservice._tcp",); assert_eq!(ServiceName::Nexus.dns_name(), "_nexus._tcp",); diff --git a/internal-dns/src/resolver.rs b/internal-dns/src/resolver.rs index fdd5dce428c..5d3832a4175 100644 --- a/internal-dns/src/resolver.rs +++ b/internal-dns/src/resolver.rs @@ -2,24 +2,24 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use hickory_resolver::config::{ + LookupIpStrategy, NameServerConfig, Protocol, ResolverConfig, ResolverOpts, +}; +use hickory_resolver::lookup::SrvLookup; +use hickory_resolver::TokioAsyncResolver; use hyper::client::connect::dns::Name; use omicron_common::address::{ Ipv6Subnet, ReservedRackSubnet, AZ_PREFIX, DNS_PORT, }; use slog::{debug, error, info, trace}; use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; -use trust_dns_resolver::config::{ - LookupIpStrategy, NameServerConfig, Protocol, ResolverConfig, ResolverOpts, -}; -use trust_dns_resolver::lookup::SrvLookup; -use trust_dns_resolver::TokioAsyncResolver; pub type DnsError = dns_service_client::Error; #[derive(Debug, Clone, thiserror::Error)] pub enum ResolveError { #[error(transparent)] - Resolve(#[from] trust_dns_resolver::error::ResolveError), + Resolve(#[from] hickory_resolver::error::ResolveError), #[error("Record not found for SRV key: {}", .0.dns_name())] NotFound(crate::ServiceName), @@ -52,6 +52,19 @@ impl reqwest::dns::Resolve for Resolver { } impl Resolver { + /// Construct a new DNS resolver from the system configuration. + pub fn new_from_system_conf( + log: slog::Logger, + ) -> Result { + let (rc, mut opts) = hickory_resolver::system_conf::read_system_conf()?; + // Enable edns for potentially larger records + opts.edns0 = true; + + let resolver = TokioAsyncResolver::tokio(rc, opts); + + Ok(Self { log, resolver }) + } + /// Construct a new DNS resolver from specific DNS server addresses. pub fn new_from_addrs( log: slog::Logger, @@ -66,18 +79,20 @@ impl Resolver { socket_addr, protocol: Protocol::Udp, tls_dns_name: None, - trust_nx_responses: false, + trust_negative_responses: false, bind_addr: None, }); } let mut opts = ResolverOpts::default(); + // Enable edns for potentially larger records + opts.edns0 = true; opts.use_hosts_file = false; opts.num_concurrent_reqs = dns_server_count; // The underlay is IPv6 only, so this helps avoid needless lookups of // the IPv4 variant. opts.ip_strategy = LookupIpStrategy::Ipv6Only; opts.negative_max_ttl = Some(std::time::Duration::from_secs(15)); - let resolver = TokioAsyncResolver::tokio(rc, opts)?; + let resolver = TokioAsyncResolver::tokio(rc, opts); Ok(Self { log, resolver }) } @@ -145,27 +160,6 @@ impl Resolver { self.resolver.clear_cache(); } - /// Looks up a single [`Ipv6Addr`] based on the SRV name. - /// Returns an error if the record does not exist. - // TODO: There are lots of ways this API can expand: Caching, - // actually respecting TTL, looking up ports, etc. - // - // For now, however, it serves as a very simple "get everyone using DNS" - // API that can be improved upon later. - pub async fn lookup_ipv6( - &self, - srv: crate::ServiceName, - ) -> Result { - let name = srv.srv_name(); - debug!(self.log, "lookup_ipv6 srv"; "dns_name" => &name); - let response = self.resolver.ipv6_lookup(&name).await?; - let address = response - .iter() - .next() - .ok_or_else(|| ResolveError::NotFound(srv))?; - Ok(*address) - } - /// Returns the targets of the SRV records for a DNS name /// /// The returned values are generally other DNS names that themselves would @@ -220,6 +214,12 @@ impl Resolver { // TODO-robustness: any callers of this should probably be using // all the targets for a given SRV and not just the first one // we get, see [`Resolver::lookup_all_socket_v6`]. + // + // TODO: There are lots of ways this API can expand: Caching, + // actually respecting TTL, looking up ports, etc. + // + // For now, however, it serves as a very simple "get everyone using DNS" + // API that can be improved upon later. pub async fn lookup_socket_v6( &self, service: crate::ServiceName, @@ -313,7 +313,7 @@ impl Resolver { // (1) it returns `IpAddr`'s rather than `SocketAddr`'s // (2) it doesn't actually return all the addresses from the Additional // section of the DNS server's response. - // See bluejekyll/trust-dns#1980 + // See hickory-dns/hickory-dns#1980 // // (1) is not a huge deal as we can try to match up the targets ourselves // to grab the port for creating a `SocketAddr` but (2) means we need to do @@ -350,10 +350,9 @@ impl Resolver { .await .into_iter() .flat_map(move |target| match target { - Ok((ips, port)) => Some( - ips.into_iter() - .map(move |ip| SocketAddrV6::new(ip, port, 0, 0)), - ), + Ok((ips, port)) => Some(ips.into_iter().map(move |aaaa| { + SocketAddrV6::new(aaaa.into(), port, 0, 0) + })), Err((target, err)) => { error!( log, @@ -511,7 +510,7 @@ mod test { assert!( matches!( dns_error.kind(), - trust_dns_resolver::error::ResolveErrorKind::NoRecordsFound { .. }, + hickory_resolver::error::ResolveErrorKind::NoRecordsFound { .. }, ), "Saw error: {dns_error}", ); @@ -535,11 +534,11 @@ mod test { dns_server.update(&dns_config).await.unwrap(); let resolver = dns_server.resolver().unwrap(); - let found_ip = resolver - .lookup_ipv6(ServiceName::Cockroach) + let found_addr = resolver + .lookup_socket_v6(ServiceName::Cockroach) .await .expect("Should have been able to look up IP address"); - assert_eq!(found_ip, ip,); + assert_eq!(found_addr.ip(), &ip,); dns_server.cleanup_successful(); logctx.cleanup_successful(); @@ -617,11 +616,13 @@ mod test { // Look up Cockroach let resolver = dns_server.resolver().unwrap(); - let ip = resolver - .lookup_ipv6(ServiceName::Cockroach) + let resolved_addr = resolver + .lookup_socket_v6(ServiceName::Cockroach) .await .expect("Should have been able to look up IP address"); - assert!(cockroach_addrs.iter().any(|addr| addr.ip() == &ip)); + assert!(cockroach_addrs + .iter() + .any(|addr| addr.ip() == resolved_addr.ip())); // Look up all the Cockroach addresses. let mut ips = @@ -635,18 +636,18 @@ mod test { ); // Look up Clickhouse - let ip = resolver - .lookup_ipv6(ServiceName::Clickhouse) + let addr = resolver + .lookup_socket_v6(ServiceName::Clickhouse) .await .expect("Should have been able to look up IP address"); - assert_eq!(&ip, clickhouse_addr.ip()); + assert_eq!(addr.ip(), clickhouse_addr.ip()); // Look up Backend Service - let ip = resolver - .lookup_ipv6(srv_backend) + let addr = resolver + .lookup_socket_v6(srv_backend) .await .expect("Should have been able to look up IP address"); - assert_eq!(&ip, crucible_addr.ip()); + assert_eq!(addr.ip(), crucible_addr.ip()); // If we deploy a new generation that removes all records, then we don't // find anything any more. @@ -657,14 +658,14 @@ mod test { // If we remove the records for all services, we won't find them any // more. (e.g., there's no hidden caching going on) let error = resolver - .lookup_ipv6(ServiceName::Cockroach) + .lookup_socket_v6(ServiceName::Cockroach) .await .expect_err("unexpectedly found records"); assert_matches!( error, ResolveError::Resolve(error) if matches!(error.kind(), - trust_dns_resolver::error::ResolveErrorKind::NoRecordsFound { .. } + hickory_resolver::error::ResolveErrorKind::NoRecordsFound { .. } ) ); @@ -694,11 +695,11 @@ mod test { dns_builder.service_backend_zone(srv_crdb, &zone, 12345).unwrap(); let dns_config = dns_builder.build_full_config_for_initial_generation(); dns_server.update(&dns_config).await.unwrap(); - let found_ip = resolver - .lookup_ipv6(ServiceName::Cockroach) + let found_addr = resolver + .lookup_socket_v6(ServiceName::Cockroach) .await .expect("Should have been able to look up IP address"); - assert_eq!(found_ip, ip1); + assert_eq!(found_addr.ip(), &ip1); // If we insert the same record with a new address, it should be // updated. @@ -712,11 +713,11 @@ mod test { dns_builder.build_full_config_for_initial_generation(); dns_config.generation += 1; dns_server.update(&dns_config).await.unwrap(); - let found_ip = resolver - .lookup_ipv6(ServiceName::Cockroach) + let found_addr = resolver + .lookup_socket_v6(ServiceName::Cockroach) .await .expect("Should have been able to look up IP address"); - assert_eq!(found_ip, ip2); + assert_eq!(found_addr.ip(), &ip2); dns_server.cleanup_successful(); logctx.cleanup_successful(); @@ -847,11 +848,11 @@ mod test { dns_server.update(&dns_config).await.unwrap(); // Confirm that we can access this record manually. - let found_ip = resolver - .lookup_ipv6(ServiceName::Nexus) + let found_addr = resolver + .lookup_socket_v6(ServiceName::Nexus) .await .expect("Should have been able to look up IP address"); - assert_eq!(found_ip, ip); + assert_eq!(found_addr.ip(), &ip); // Confirm that the progenitor client can access this record too. let value = client.test_endpoint().await.unwrap(); diff --git a/ipcc/Cargo.toml b/ipcc/Cargo.toml index a9278349e1e..cfde3f737a1 100644 --- a/ipcc/Cargo.toml +++ b/ipcc/Cargo.toml @@ -9,13 +9,12 @@ workspace = true [dependencies] ciborium.workspace = true -libc.workspace = true omicron-common.workspace = true serde.workspace = true thiserror.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true -cfg-if.workspace = true +libipcc.workspace = true [dev-dependencies] omicron-common = { workspace = true, features = ["testing"] } diff --git a/ipcc/build.rs b/ipcc/build.rs deleted file mode 100644 index a64133dac2c..00000000000 --- a/ipcc/build.rs +++ /dev/null @@ -1,16 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -/// This path is where Oxide specific libraries live on helios systems. -#[cfg(target_os = "illumos")] -static OXIDE_PLATFORM: &str = "/usr/platform/oxide/lib/amd64/"; - -fn main() { - println!("cargo:rerun-if-changed=build.rs"); - #[cfg(target_os = "illumos")] - { - println!("cargo:rustc-link-arg=-Wl,-R{}", OXIDE_PLATFORM); - println!("cargo:rustc-link-search={}", OXIDE_PLATFORM); - } -} diff --git a/ipcc/src/ffi.rs b/ipcc/src/ffi.rs deleted file mode 100644 index 420c1ddcdea..00000000000 --- a/ipcc/src/ffi.rs +++ /dev/null @@ -1,83 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -// Copyright 2023 Oxide Computer Company - -#![allow(non_upper_case_globals)] -#![allow(non_camel_case_types)] -#![allow(non_snake_case)] - -use std::ffi::{c_char, c_int, c_uint}; - -/// Opaque libipcc handle -#[repr(C)] -pub(crate) struct libipcc_handle_t { - _data: [u8; 0], - _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>, -} - -/// Indicates that there was no error. Used as the initialized value when -/// calling into libipcc. -pub(crate) const LIBIPCC_ERR_OK: libipcc_err_t = 0; - -/// Indicates that there was a memory allocation error. The system error -/// contains the specific errno. -pub(crate) const LIBIPCC_ERR_NO_MEM: libipcc_err_t = 1; - -/// One of the function parameters does not pass validation. There will be more -/// detail available via libipcc_errmsg(). -pub(crate) const LIBIPCC_ERR_INVALID_PARAM: libipcc_err_t = 2; - -/// An internal error occurred. There will be more detail available via -/// libipcc_errmsg() and libipcc_syserr(). -pub(crate) const LIBIPCC_ERR_INTERNAL: libipcc_err_t = 3; - -/// The requested lookup key was not known to the SP. -pub(crate) const LIBIPCC_ERR_KEY_UNKNOWN: libipcc_err_t = 4; - -/// The value for the requested lookup key was too large for the -/// supplied buffer. -pub(crate) const LIBIPCC_ERR_KEY_BUFTOOSMALL: libipcc_err_t = 5; - -/// An attempt to write to a key failed because the key is read-only. -pub(crate) const LIBIPCC_ERR_KEY_READONLY: libipcc_err_t = 6; - -/// An attempt to write to a key failed because the passed value is too -/// long. -pub(crate) const LIBIPCC_ERR_KEY_VALTOOLONG: libipcc_err_t = 7; - -/// Compression or decompression failed. If appropriate, libipcc_syserr() will -/// return the Z_ error from zlib. -pub(crate) const LIBIPCC_ERR_KEY_ZERR: libipcc_err_t = 8; -pub(crate) type libipcc_err_t = c_uint; - -/// Maxium length of an error message retrieved by libipcc_errmsg(). -pub(crate) const LIBIPCC_ERR_LEN: usize = 1024; - -/// Flags that can be passed to libipcc when looking up a key. Today this is -/// used for looking up a compressed key, however nothing in the public API of -/// this crate takes advantage of this. -pub(crate) type libipcc_key_flag_t = ::std::os::raw::c_uint; - -#[link(name = "ipcc")] -extern "C" { - pub(crate) fn libipcc_init( - lihp: *mut *mut libipcc_handle_t, - libipcc_errp: *mut libipcc_err_t, - syserrp: *mut c_int, - errmsg: *const c_char, - errlen: usize, - ) -> bool; - pub(crate) fn libipcc_fini(lih: *mut libipcc_handle_t); - pub(crate) fn libipcc_err(lih: *mut libipcc_handle_t) -> libipcc_err_t; - pub(crate) fn libipcc_syserr(lih: *mut libipcc_handle_t) -> c_int; - pub(crate) fn libipcc_errmsg(lih: *mut libipcc_handle_t) -> *const c_char; - pub(crate) fn libipcc_keylookup( - lih: *mut libipcc_handle_t, - key: u8, - bufp: *mut *mut u8, - lenp: *mut usize, - flags: libipcc_key_flag_t, - ) -> bool; -} diff --git a/ipcc/src/handle.rs b/ipcc/src/handle.rs deleted file mode 100644 index 91b71a6ce33..00000000000 --- a/ipcc/src/handle.rs +++ /dev/null @@ -1,129 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -// Copyright 2023 Oxide Computer Company - -use std::{ - ffi::{c_int, CStr, CString}, - ptr, -}; - -use crate::IpccError; -use crate::{ffi::*, IpccErrorInner}; - -pub struct IpccHandle(*mut libipcc_handle_t); - -impl Drop for IpccHandle { - fn drop(&mut self) { - unsafe { - libipcc_fini(self.0); - } - } -} -fn ipcc_fatal_error>( - context: C, - lerr: libipcc_err_t, - syserr: c_int, - errmsg: CString, -) -> IpccError { - let context = context.into(); - let syserr = if syserr == 0 { - "no system errno".to_string() - } else { - std::io::Error::from_raw_os_error(syserr).to_string() - }; - let inner = IpccErrorInner { - context, - errmsg: errmsg.to_string_lossy().into_owned(), - syserr, - }; - match lerr { - LIBIPCC_ERR_OK => panic!("called fatal on LIBIPCC_ERR_OK"), - LIBIPCC_ERR_NO_MEM => IpccError::NoMem(inner), - LIBIPCC_ERR_INVALID_PARAM => IpccError::InvalidParam(inner), - LIBIPCC_ERR_INTERNAL => IpccError::Internal(inner), - LIBIPCC_ERR_KEY_UNKNOWN => IpccError::KeyUnknown(inner), - LIBIPCC_ERR_KEY_BUFTOOSMALL => IpccError::KeyBufTooSmall(inner), - LIBIPCC_ERR_KEY_READONLY => IpccError::KeyReadonly(inner), - LIBIPCC_ERR_KEY_VALTOOLONG => IpccError::KeyValTooLong(inner), - LIBIPCC_ERR_KEY_ZERR => IpccError::KeyZerr(inner), - _ => IpccError::UnknownErr(inner), - } -} - -impl IpccHandle { - pub fn new() -> Result { - let mut ipcc_handle: *mut libipcc_handle_t = ptr::null_mut(); - // We subtract 1 from the length of the inital vector since CString::new - // will append a nul for us. - // Safety: Unwrapped because we guarantee that the supplied bytes - // contain no 0 bytes up front. - let errmsg = CString::new(vec![1; LIBIPCC_ERR_LEN - 1]).unwrap(); - let errmsg_len = errmsg.as_bytes().len(); - let errmsg_ptr = errmsg.into_raw(); - let mut lerr = LIBIPCC_ERR_OK; - let mut syserr = 0; - if !unsafe { - libipcc_init( - &mut ipcc_handle, - &mut lerr, - &mut syserr, - errmsg_ptr, - errmsg_len, - ) - } { - // Safety: CString::from_raw retakes ownership of a CString - // transferred to C via CString::into_raw. We are calling into_raw() - // above so it is safe to turn this back into it's owned variant. - let errmsg = unsafe { CString::from_raw(errmsg_ptr) }; - return Err(ipcc_fatal_error( - "Could not init libipcc handle", - lerr, - syserr, - errmsg, - )); - } - - Ok(IpccHandle(ipcc_handle)) - } - - fn fatal>(&self, context: C) -> IpccError { - let lerr = unsafe { libipcc_err(self.0) }; - let errmsg = unsafe { libipcc_errmsg(self.0) }; - // Safety: CStr::from_ptr is documented as safe if: - // 1. The pointer contains a valid null terminator at the end of - // the string - // 2. The pointer is valid for reads of bytes up to and including - // the null terminator - // 3. The memory referenced by the return CStr is not mutated for - // the duration of lifetime 'a - // - // (1) is true because this crate initializes space for an error message - // via CString::new which adds a terminator on our behalf. - // (2) should be guaranteed by libipcc itself since it is writing error - // messages into the CString backed buffer that we gave it. - // (3) We aren't currently mutating the memory referenced by the - // CStr, and we are creating an owned copy of the data immediately so - // that it can outlive the lifetime of the libipcc handle if needed. - let errmsg = unsafe { CStr::from_ptr(errmsg) }.to_owned(); - let syserr = unsafe { libipcc_syserr(self.0) }; - ipcc_fatal_error(context, lerr, syserr, errmsg) - } - - pub(crate) fn key_lookup( - &self, - key: u8, - buf: &mut [u8], - ) -> Result { - let mut lenp = buf.len(); - - if !unsafe { - libipcc_keylookup(self.0, key, &mut buf.as_mut_ptr(), &mut lenp, 0) - } { - return Err(self.fatal(format!("lookup of key {key} failed"))); - } - - Ok(lenp) - } -} diff --git a/ipcc/src/handle_stub.rs b/ipcc/src/handle_stub.rs deleted file mode 100644 index bc4b84b7fee..00000000000 --- a/ipcc/src/handle_stub.rs +++ /dev/null @@ -1,25 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -// Copyright 2023 Oxide Computer Company - -use crate::IpccError; - -/// This stub and it's implementation are used for non-illumos platforms which -/// lack libipcc. -pub struct IpccHandle; - -impl IpccHandle { - pub fn new() -> Result { - panic!("ipcc unavailable on this platform") - } - - pub(crate) fn key_lookup( - &self, - _key: u8, - _buf: &mut [u8], - ) -> Result { - panic!("ipcc unavailable on this platform") - } -} diff --git a/ipcc/src/lib.rs b/ipcc/src/lib.rs index e997c512300..2693929834a 100644 --- a/ipcc/src/lib.rs +++ b/ipcc/src/lib.rs @@ -9,24 +9,13 @@ //! values are variously static, passed from the control plane to the SP //! (through MGS) or set from userland via libipcc. -use cfg_if::cfg_if; +use libipcc::{IpccError, IpccHandle}; use omicron_common::update::ArtifactHash; use serde::Deserialize; use serde::Serialize; use thiserror::Error; use uuid::Uuid; -cfg_if! { - if #[cfg(target_os = "illumos")] { - mod ffi; - mod handle; - use handle::IpccHandle; - } else { - mod handle_stub; - use handle_stub::IpccHandle; - } -} - #[cfg(test)] use proptest::arbitrary::any; #[cfg(test)] @@ -145,36 +134,6 @@ pub enum InstallinatorImageIdError { DeserializationFailed(String), } -#[derive(Error, Debug)] -pub enum IpccError { - #[error("Memory allocation error")] - NoMem(#[source] IpccErrorInner), - #[error("Invalid parameter")] - InvalidParam(#[source] IpccErrorInner), - #[error("Internal error occurred")] - Internal(#[source] IpccErrorInner), - #[error("Requested lookup key was not known to the SP")] - KeyUnknown(#[source] IpccErrorInner), - #[error("Value for the requested lookup key was too large for the supplied buffer")] - KeyBufTooSmall(#[source] IpccErrorInner), - #[error("Attempted to write to read-only key")] - KeyReadonly(#[source] IpccErrorInner), - #[error("Attempted write to key failed because the value is too long")] - KeyValTooLong(#[source] IpccErrorInner), - #[error("Compression or decompression failed")] - KeyZerr(#[source] IpccErrorInner), - #[error("Unknown libipcc error")] - UnknownErr(#[source] IpccErrorInner), -} - -#[derive(Error, Debug)] -#[error("{context}: {errmsg} ({syserr})")] -pub struct IpccErrorInner { - pub context: String, - pub errmsg: String, - pub syserr: String, -} - /// These are the IPCC keys we can look up. /// NB: These keys match the definitions found in libipcc (RFD 316) and should /// match the values in `[ipcc::Key]` one-to-one. diff --git a/live-tests/Cargo.toml b/live-tests/Cargo.toml new file mode 100644 index 00000000000..e0eaf2c338b --- /dev/null +++ b/live-tests/Cargo.toml @@ -0,0 +1,41 @@ +[package] +name = "omicron-live-tests" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[build-dependencies] +omicron-rpaths.workspace = true + +[dependencies] +# See omicron-rpaths for more about the "pq-sys" dependency. +pq-sys = "*" +omicron-workspace-hack.workspace = true + +[dev-dependencies] +anyhow.workspace = true +assert_matches.workspace = true +dropshot.workspace = true +futures.workspace = true +internal-dns.workspace = true +live-tests-macros.workspace = true +nexus-client.workspace = true +nexus-config.workspace = true +nexus-db-model.workspace = true +nexus-db-queries.workspace = true +nexus-reconfigurator-planning.workspace = true +nexus-reconfigurator-preparation.workspace = true +nexus-sled-agent-shared.workspace = true +nexus-types.workspace = true +omicron-common.workspace = true +omicron-test-utils.workspace = true +reqwest.workspace = true +serde.workspace = true +slog.workspace = true +slog-error-chain.workspace = true +textwrap.workspace = true +tokio.workspace = true +uuid.workspace = true + +[lints] +workspace = true diff --git a/live-tests/README.adoc b/live-tests/README.adoc new file mode 100644 index 00000000000..56f9554bb7e --- /dev/null +++ b/live-tests/README.adoc @@ -0,0 +1,78 @@ += Omicron live tests + +The `omicron-live-tests` package contains automated tests that operate in the context of an already-deployed "real" Oxide system (e.g., `a4x2` or our `london` or `madrid` test environments). This is a home for automated tests for all kinds of Reconfigurator behavior (e.g., add/expunge of all zones, add/expunge sled, upgrades, etc.). It can probably be used for non-Reconfigurator behavior, too. + +This package is not built or tested by default because the tests generally can't work in a dev environment and there's no way to have `cargo` build and check them but not run the tests by default. + +== Why a separate test suite? + +What makes these tests different from the rest of the test suite is that they require connectivity to the underlay network of the deployed system and they make API calls to various components in that system and they assume that this will behave like a real production system. By contrast, the normal tests instead _set up_ a bunch of components using simulated sled agents and localhost networking, which is great for starting from a predictable state and running tests in parallel, but the simulated sled agents and networking make it impossible to exercise quite a lot of Reconfigurator's functionality. + +There are also the `end-to-end-tests`. That environment is more realistic than the main test suite, but not faithful enough for many Reconfigurator tests. + +== Production systems + +There are some safeguards so that these tests won't run on production systems: they refuse to run if they find any Oxide-hardware sleds in the system whose serial numbers don't correspond to known test environments. + +== Usage + +These tests are not currently run automatically (though they are _built_ in CI). + +You can run them yourself. First, deploy Omicron using `a4x2` or one of the hardware test rigs. In your Omicron workspace, run `cargo xtask live-tests` to build an archive and then follow the instructions: + +``` +$ cargo xtask live-tests + Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.96s + Running `target/debug/xtask live-tests` +using temporary directory: /dangerzone/omicron_tmp/.tmp0ItZUD +will create archive file: /dangerzone/omicron_tmp/.tmp0ItZUD/live-tests-archive/omicron-live-tests.tar.zst +output tarball: /home/dap/omicron-work/target/live-tests-archive.tgz + +running: /home/dap/.rustup/toolchains/1.80.1-x86_64-unknown-illumos/bin/cargo "nextest" "archive" "--package" "omicron-live-tests" "--archive-file" "/dangerzone/omicron_tmp/.tmp0ItZUD/live-tests-archive/omicron-live-tests.tar.zst" + Finished `test` profile [unoptimized + debuginfo] target(s) in 0.89s +info: experimental features enabled: setup-scripts + Archiving 1 binary, 1 build script output directory, and 1 linked path to /dangerzone/omicron_tmp/.tmp0ItZUD/live-tests-archive/omicron-live-tests.tar.zst + Archived 35 files to /dangerzone/omicron_tmp/.tmp0ItZUD/live-tests-archive/omicron-live-tests.tar.zst in 0.31s +running: bash "-c" "tar cf - Cargo.toml .config/nextest.toml live-tests | tar xf - -C \"/dangerzone/omicron_tmp/.tmp0ItZUD/live-tests-archive\"" +running: tar "cf" "/home/dap/omicron-work/target/live-tests-archive.tgz" "-C" "/dangerzone/omicron_tmp/.tmp0ItZUD" "live-tests-archive" +created: /home/dap/omicron-work/target/live-tests-archive.tgz + +To use this: + +1. Copy the tarball to the switch zone in a deployed Omicron system. + + e.g., scp \ + /home/dap/omicron-work/target/live-tests-archive.tgz \ + root@YOUR_SCRIMLET_GZ_IP:/zone/oxz_switch/root/root + +2. Copy the `cargo-nextest` binary to the same place. + + e.g., scp \ + $(which cargo-nextest) \ + root@YOUR_SCRIMLET_GZ_IP:/zone/oxz_switch/root/root + +3. On that system, unpack the tarball with: + + tar xzf live-tests-archive.tgz + +4. On that system, run tests with: + + TMPDIR=/var/tmp ./cargo-nextest nextest run --profile=live-tests \ + --archive-file live-tests-archive/omicron-live-tests.tar.zst \ + --workspace-remap live-tests-archive +``` + +Follow the instructions, run the tests, and you'll see the usual `nextest`-style output: + +``` +root@oxz_switch:~# TMPDIR=/var/tmp ./cargo-nextest nextest run --archive-file live-tests-archive/omicron-live-tests.tar.zst --workspace-remap live-tests-archive + Extracting 1 binary, 1 build script output directory, and 1 linked path to /var/tmp/nextest-archive-Lqx9VZ + Extracted 35 files to /var/tmp/nextest-archive-Lqx9VZ in 1.01s +info: experimental features enabled: setup-scripts + Starting 1 test across 1 binary (run ID: a5fc9163-9dd5-4b23-b89f-55f8f39ebbbc, nextest profile: default) + SLOW [> 60.000s] omicron-live-tests::test_nexus_add_remove test_nexus_add_remove + PASS [ 61.975s] omicron-live-tests::test_nexus_add_remove test_nexus_add_remove +------------ + Summary [ 61.983s] 1 test run: 1 passed (1 slow), 0 skipped +root@oxz_switch:~# +``` diff --git a/live-tests/build.rs b/live-tests/build.rs new file mode 100644 index 00000000000..1ba9acd41c9 --- /dev/null +++ b/live-tests/build.rs @@ -0,0 +1,10 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// See omicron-rpaths for documentation. +// NOTE: This file MUST be kept in sync with the other build.rs files in this +// repository. +fn main() { + omicron_rpaths::configure_default_omicron_rpaths(); +} diff --git a/live-tests/macros/Cargo.toml b/live-tests/macros/Cargo.toml new file mode 100644 index 00000000000..81d094d9261 --- /dev/null +++ b/live-tests/macros/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "live-tests-macros" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lib] +proc-macro = true + +[lints] +workspace = true + +[dependencies] +quote.workspace = true +syn = { workspace = true, features = [ "fold", "parsing" ] } +omicron-workspace-hack.workspace = true diff --git a/live-tests/macros/src/lib.rs b/live-tests/macros/src/lib.rs new file mode 100644 index 00000000000..4fdd4029b57 --- /dev/null +++ b/live-tests/macros/src/lib.rs @@ -0,0 +1,86 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Macro to wrap a live test function that automatically creates and cleans up +//! the `LiveTestContext` + +use proc_macro::TokenStream; +use quote::quote; +use syn::{parse_macro_input, ItemFn}; + +/// Define a test function that uses `LiveTestContext` +/// +/// This is usable only within the `omicron-live-tests` crate. +/// +/// Similar to `nexus_test`, this macro lets you define a test function that +/// behaves like `tokio::test` except that it accepts an argument of type +/// `&LiveTestContext`. The `LiveTestContext` is cleaned up on _successful_ +/// return of the test function. On failure, debugging information is +/// deliberately left around. +/// +/// Example usage: +/// +/// ```ignore +/// #[live_test] +/// async fn test_my_test_case(lc: &LiveTestContext) { +/// assert!(true); +/// } +/// ``` +/// +/// We use this instead of implementing Drop on LiveTestContext because we want +/// the teardown to only happen when the test doesn't fail (which causes a panic +/// and unwind). +#[proc_macro_attribute] +pub fn live_test(_attrs: TokenStream, input: TokenStream) -> TokenStream { + let input_func = parse_macro_input!(input as ItemFn); + + let mut correct_signature = true; + if input_func.sig.variadic.is_some() + || input_func.sig.inputs.len() != 1 + || input_func.sig.asyncness.is_none() + { + correct_signature = false; + } + + // Verify we're returning an empty tuple + correct_signature &= match input_func.sig.output { + syn::ReturnType::Default => true, + syn::ReturnType::Type(_, ref t) => { + if let syn::Type::Tuple(syn::TypeTuple { elems, .. }) = &**t { + elems.is_empty() + } else { + false + } + } + }; + if !correct_signature { + panic!("func signature must be async fn(&LiveTestContext)"); + } + + let func_ident_string = input_func.sig.ident.to_string(); + let func_ident = input_func.sig.ident.clone(); + let new_block = quote! { + { + #input_func + + let ctx = crate::common::LiveTestContext::new( + #func_ident_string + ).await.expect("setting up LiveTestContext"); + #func_ident(&ctx).await; + ctx.cleanup_successful(); + } + }; + let mut sig = input_func.sig.clone(); + sig.inputs.clear(); + let func = ItemFn { + attrs: input_func.attrs, + vis: input_func.vis, + sig, + block: Box::new(syn::parse2(new_block).unwrap()), + }; + TokenStream::from(quote!( + #[::tokio::test] + #func + )) +} diff --git a/live-tests/tests/common/mod.rs b/live-tests/tests/common/mod.rs new file mode 100644 index 00000000000..28f677f5edf --- /dev/null +++ b/live-tests/tests/common/mod.rs @@ -0,0 +1,249 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +pub mod reconfigurator; + +use anyhow::{anyhow, ensure, Context}; +use dropshot::test_util::LogContext; +use internal_dns::resolver::Resolver; +use internal_dns::ServiceName; +use nexus_config::PostgresConfigWithUrl; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use nexus_types::deployment::SledFilter; +use omicron_common::address::Ipv6Subnet; +use slog::info; +use slog::o; +use std::ffi::OsStr; +use std::net::SocketAddrV6; +use std::path::Component; +use std::sync::Arc; + +/// Contains data and interfaces useful for running tests against an existing +/// deployed control plane +pub struct LiveTestContext { + logctx: LogContext, + opctx: OpContext, + resolver: Resolver, + datastore: Arc, +} + +impl LiveTestContext { + /// Make a new `LiveTestContext` for a test called `test_name`. + pub async fn new( + test_name: &'static str, + ) -> Result { + let logctx = omicron_test_utils::dev::test_setup_log(test_name); + let log = &logctx.log; + let resolver = create_resolver(log)?; + check_execution_environment(&resolver).await?; + let datastore = create_datastore(&log, &resolver).await?; + let opctx = OpContext::for_tests(log.clone(), datastore.clone()); + check_hardware_environment(&opctx, &datastore).await?; + Ok(LiveTestContext { logctx, opctx, resolver, datastore }) + } + + /// Clean up this `LiveTestContext` + /// + /// This mainly removes log files created by the test. We do this in this + /// explicit cleanup function rather than on `Drop` because we want the log + /// files preserved on test failure. + pub fn cleanup_successful(self) { + self.logctx.cleanup_successful(); + } + + /// Returns a logger suitable for use in the test + pub fn log(&self) -> &slog::Logger { + &self.logctx.log + } + + /// Returns an `OpContext` suitable for use in tests + pub fn opctx(&self) -> &OpContext { + &self.opctx + } + + /// Returns a `DataStore` pointing at this deployed system's database + pub fn datastore(&self) -> &DataStore { + &self.datastore + } + + /// Returns a client for a Nexus internal API at the given socket address + pub fn specific_internal_nexus_client( + &self, + sockaddr: SocketAddrV6, + ) -> nexus_client::Client { + let url = format!("http://{}", sockaddr); + let log = self.logctx.log.new(o!("nexus_internal_url" => url.clone())); + nexus_client::Client::new(&url, log) + } + + /// Returns a list of clients for the internal APIs for all Nexus instances + /// found in DNS + pub async fn all_internal_nexus_clients( + &self, + ) -> Result, anyhow::Error> { + Ok(self + .resolver + .lookup_all_socket_v6(ServiceName::Nexus) + .await + .context("looking up Nexus in internal DNS")? + .into_iter() + .map(|s| self.specific_internal_nexus_client(s)) + .collect()) + } +} + +fn create_resolver(log: &slog::Logger) -> Result { + // In principle, we should look at /etc/resolv.conf to find the DNS servers. + // In practice, this usually isn't populated today. See + // oxidecomputer/omicron#2122. + // + // However, the address selected below should work for most existing Omicron + // deployments today. That's because while the base subnet is in principle + // configurable in config-rss.toml, it's very uncommon to change it from the + // default value used here. + let subnet = Ipv6Subnet::new("fd00:1122:3344:0100::".parse().unwrap()); + eprintln!("note: using DNS server for subnet {}", subnet.net()); + internal_dns::resolver::Resolver::new_from_subnet(log.clone(), subnet) + .with_context(|| { + format!("creating DNS resolver for subnet {}", subnet.net()) + }) +} + +/// Creates a DataStore pointing at the CockroachDB cluster that's in DNS +async fn create_datastore( + log: &slog::Logger, + resolver: &Resolver, +) -> Result, anyhow::Error> { + let sockaddrs = resolver + .lookup_all_socket_v6(ServiceName::Cockroach) + .await + .context("resolving CockroachDB")?; + + let url = format!( + "postgresql://root@{}/omicron?sslmode=disable", + sockaddrs + .into_iter() + .map(|a| a.to_string()) + .collect::>() + .join(",") + ) + .parse::() + .context("failed to parse constructed postgres URL")?; + + let db_config = nexus_db_queries::db::Config { url }; + let pool = + Arc::new(nexus_db_queries::db::Pool::new_single_host(log, &db_config)); + DataStore::new_failfast(log, pool) + .await + .context("creating DataStore") + .map(Arc::new) +} + +/// Performs quick checks to determine if the user is running these tests in the +/// wrong place and bails out if so +/// +/// This isn't perfect but seeks to fail fast in obviously bogus environments +/// that someone might accidentally try to run this in. +async fn check_execution_environment( + resolver: &Resolver, +) -> Result<(), anyhow::Error> { + ensure!( + cfg!(target_os = "illumos"), + "live tests can only be run on deployed systems, which run illumos" + ); + + // The only real requirement for these tests is that they're run from a + // place with connectivity to the underlay network of a deployed control + // plane. The easiest way to tell is to look up something in internal DNS. + resolver.lookup_ip(ServiceName::InternalDns).await.map_err(|e| { + let text = format!( + "check_execution_environment(): failed to look up internal DNS \ + in the internal DNS servers.\n\n \ + Are you trying to run this in a development environment? \ + This test can only be run on deployed systems and only from a \ + context with connectivity to the underlay network.\n\n \ + raw error: {}", + slog_error_chain::InlineErrorChain::new(&e) + ); + anyhow!("{}", textwrap::wrap(&text, 80).join("\n")) + })?; + + // Warn the user if the temporary directory is /tmp. This check is + // heuristic. There are other ways they may have specified a tmpfs + // temporary directory and we don't claim to catch all of them. + // + // We could also just go ahead and use /var/tmp, but it's not clear we can + // reliably do that at this point (if Rust or other components have cached + // TMPDIR) and it would be hard to override. + let tmpdir = std::env::temp_dir(); + let mut tmpdir_components = tmpdir.components().take(2); + if let Some(first) = tmpdir_components.next() { + if let Some(next) = tmpdir_components.next() { + if first == Component::RootDir + && next == Component::Normal(OsStr::new("tmp")) + { + eprintln!( + "WARNING: temporary directory appears to be under /tmp, \ + which is generally tmpfs. Consider setting \ + TMPDIR=/var/tmp to avoid runaway tests using too much\ + memory and swap." + ); + } + } + } + + Ok(()) +} + +/// Performs additional checks to determine if we're running in an environment +/// that we believe is safe to run tests +/// +/// These tests may make arbitrary modifications to the system. We don't want +/// to run this in dogfood or other pre-production or production environments. +/// This function uses an allowlist of Oxide serials corresponding to test +/// environments so that it never accidentally runs on a production system. +/// +/// Non-Oxide hardware (e.g., PCs, a4x2, etc.) are always allowed. +async fn check_hardware_environment( + opctx: &OpContext, + datastore: &DataStore, +) -> Result<(), anyhow::Error> { + const ALLOWED_GIMLET_SERIALS: &[&str] = &[ + // test rig: "madrid" + "BRM42220004", + "BRM42220081", + "BRM42220007", + "BRM42220046", + // test rig: "london" + "BRM42220036", + "BRM42220062", + "BRM42220030", + "BRM44220007", + ]; + + // Refuse to operate in an environment that might contain real Oxide + // hardware that's not known to be part of a test rig. This is deliberately + // conservative. + let scary_sleds = datastore + .sled_list_all_batched(opctx, SledFilter::Commissioned) + .await + .context("check_environment: listing commissioned sleds")? + .into_iter() + .filter_map(|s| { + (s.part_number() != "i86pc" + && !ALLOWED_GIMLET_SERIALS.contains(&s.serial_number())) + .then(|| s.serial_number().to_owned()) + }) + .collect::>(); + if scary_sleds.is_empty() { + info!(&opctx.log, "environment verified"); + Ok(()) + } else { + Err(anyhow!( + "refusing to operate in an environment with an unknown system: {}", + scary_sleds.join(", ") + )) + } +} diff --git a/live-tests/tests/common/reconfigurator.rs b/live-tests/tests/common/reconfigurator.rs new file mode 100644 index 00000000000..8f2560bb49c --- /dev/null +++ b/live-tests/tests/common/reconfigurator.rs @@ -0,0 +1,103 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Helpers common to Reconfigurator tests + +use anyhow::{ensure, Context}; +use nexus_client::types::BlueprintTargetSet; +use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; +use nexus_types::deployment::{Blueprint, PlanningInput}; +use slog::{debug, info}; + +/// Modify the system by editing the current target blueprint +/// +/// More precisely, this function: +/// +/// - fetches the current target blueprint +/// - creates a new BlueprintBuilder based on it +/// - invokes the caller's `edit_fn`, which may modify the builder however it +/// likes +/// - generates a new blueprint (thus based on the current target) +/// - uploads the new blueprint +/// - sets the new blueprint as the current target +/// - enables the new blueprint +/// +/// ## Errors +/// +/// This function fails if the current target blueprint is not already enabled. +/// That's because a disabled target blueprint means somebody doesn't want +/// Reconfigurator running or doesn't want it using that blueprint. We don't +/// want the test to inadvertently override that behavior. In a typical use +/// case, a developer enables the initial target blueprint before running these +/// tests and then doesn't need to think about it again for the lifetime of +/// their test environment. +pub async fn blueprint_edit_current_target( + log: &slog::Logger, + planning_input: &PlanningInput, + nexus: &nexus_client::Client, + edit_fn: &dyn Fn(&mut BlueprintBuilder) -> Result<(), anyhow::Error>, +) -> Result<(Blueprint, Blueprint), anyhow::Error> { + // Fetch the current target configuration. + info!(log, "editing current target blueprint"); + let target_blueprint = nexus + .blueprint_target_view() + .await + .context("fetch current target config")? + .into_inner(); + debug!(log, "found current target blueprint"; + "blueprint_id" => %target_blueprint.target_id + ); + ensure!( + target_blueprint.enabled, + "refusing to modify a system with target blueprint disabled" + ); + + // Fetch the actual blueprint. + let blueprint1 = nexus + .blueprint_view(&target_blueprint.target_id) + .await + .context("fetch current target blueprint")? + .into_inner(); + debug!(log, "fetched current target blueprint"; + "blueprint_id" => %target_blueprint.target_id + ); + + // Make a new builder based on that blueprint and use `edit_fn` to edit it. + let mut builder = BlueprintBuilder::new_based_on( + log, + &blueprint1, + &planning_input, + "test-suite", + ) + .context("creating BlueprintBuilder")?; + + edit_fn(&mut builder)?; + + // Assemble the new blueprint, import it, and make it the new target. + let blueprint2 = builder.build(); + info!(log, "assembled new blueprint based on target"; + "current_target_id" => %target_blueprint.target_id, + "new_blueprint_id" => %blueprint2.id, + ); + nexus + .blueprint_import(&blueprint2) + .await + .context("importing new blueprint")?; + debug!(log, "imported new blueprint"; + "blueprint_id" => %blueprint2.id, + ); + nexus + .blueprint_target_set(&BlueprintTargetSet { + enabled: true, + target_id: blueprint2.id, + }) + .await + .expect("setting new target"); + info!(log, "finished editing target blueprint"; + "old_target_id" => %blueprint1.id, + "new_target_id" => %blueprint2.id, + ); + + Ok((blueprint1, blueprint2)) +} diff --git a/live-tests/tests/test_nexus_add_remove.rs b/live-tests/tests/test_nexus_add_remove.rs new file mode 100644 index 00000000000..70e55b704ab --- /dev/null +++ b/live-tests/tests/test_nexus_add_remove.rs @@ -0,0 +1,229 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +mod common; + +use anyhow::Context; +use assert_matches::assert_matches; +use common::reconfigurator::blueprint_edit_current_target; +use common::LiveTestContext; +use futures::TryStreamExt; +use live_tests_macros::live_test; +use nexus_client::types::Saga; +use nexus_client::types::SagaState; +use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; +use nexus_reconfigurator_planning::blueprint_builder::EnsureMultiple; +use nexus_reconfigurator_preparation::PlanningInputFromDb; +use nexus_sled_agent_shared::inventory::ZoneKind; +use nexus_types::deployment::SledFilter; +use omicron_common::address::NEXUS_INTERNAL_PORT; +use omicron_test_utils::dev::poll::wait_for_condition; +use omicron_test_utils::dev::poll::CondCheckError; +use slog::{debug, info}; +use std::net::SocketAddrV6; +use std::time::Duration; + +// TODO-coverage This test could check other stuff: +// +// - that after adding: +// - the new Nexus appears in external DNS +// - we can _use_ the new Nexus from the outside +// (e.g., using an `oxide_client` using a custom reqwest resolver that +// points only at that one IP so that we can make sure we're always getting +// that one) +// - that after expungement, it doesn't appear in external DNS any more +// +#[live_test] +async fn test_nexus_add_remove(lc: &LiveTestContext) { + // Test setup + let log = lc.log(); + let opctx = lc.opctx(); + let datastore = lc.datastore(); + let planning_input = PlanningInputFromDb::assemble(&opctx, &datastore) + .await + .expect("planning input"); + let initial_nexus_clients = lc.all_internal_nexus_clients().await.unwrap(); + let nexus = initial_nexus_clients.first().expect("internal Nexus client"); + + // First, deploy a new Nexus zone to an arbitrary sled. + let sled_id = planning_input + .all_sled_ids(SledFilter::Commissioned) + .next() + .expect("any sled id"); + let (blueprint1, blueprint2) = blueprint_edit_current_target( + log, + &planning_input, + &nexus, + &|builder: &mut BlueprintBuilder| { + let nnexus = builder + .sled_num_running_zones_of_kind(sled_id, ZoneKind::Nexus); + let count = builder + .sled_ensure_zone_multiple_nexus(sled_id, nnexus + 1) + .context("adding Nexus zone")?; + assert_matches!( + count, + EnsureMultiple::Changed { added: 1, removed: 0 } + ); + Ok(()) + }, + ) + .await + .expect("editing blueprint to add zone"); + + // Figure out which zone is new and make a new client for it. + let diff = blueprint2.diff_since_blueprint(&blueprint1); + let new_zone = diff + .zones + .added + .values() + .next() + .expect("at least one sled with added zones") + .zones + .first() + .expect("at least one added zone on that sled"); + assert_eq!(new_zone.kind(), ZoneKind::Nexus); + let new_zone_addr = new_zone.underlay_address(); + let new_zone_sockaddr = + SocketAddrV6::new(new_zone_addr, NEXUS_INTERNAL_PORT, 0, 0); + let new_zone_client = lc.specific_internal_nexus_client(new_zone_sockaddr); + + // Wait for the new Nexus zone to show up and be usable. + let initial_sagas_list = wait_for_condition( + || async { + list_sagas(&new_zone_client).await.map_err(|e| { + debug!(log, + "waiting for new Nexus to be available: listing sagas: {e:#}" + ); + CondCheckError::<()>::NotYet + }) + }, + &Duration::from_millis(50), + &Duration::from_secs(60), + ) + .await + .expect("new Nexus to be usable"); + assert!(initial_sagas_list.is_empty()); + info!(log, "new Nexus is online"); + + // Create a demo saga from the new Nexus zone. We'll use this to test that + // when the zone is expunged, its saga gets moved to a different Nexus. + let demo_saga = new_zone_client + .saga_demo_create() + .await + .expect("new Nexus saga demo create"); + let saga_id = demo_saga.saga_id; + let sagas_list = + list_sagas(&new_zone_client).await.expect("new Nexus sagas_list"); + assert_eq!(sagas_list.len(), 1); + assert_eq!(sagas_list[0].id, saga_id); + info!(log, "created demo saga"; "demo_saga" => ?demo_saga); + + // Now expunge the zone we just created. + let _ = blueprint_edit_current_target( + log, + &planning_input, + &nexus, + &|builder: &mut BlueprintBuilder| { + builder + .sled_expunge_zone(sled_id, new_zone.id()) + .context("expunging zone") + }, + ) + .await + .expect("editing blueprint to expunge zone"); + + // At some point, we should be unable to reach this Nexus any more. + wait_for_condition( + || async { + match new_zone_client.saga_list(None, None, None).await { + Err(nexus_client::Error::CommunicationError(error)) => { + info!(log, "expunged Nexus no longer reachable"; + "error" => slog_error_chain::InlineErrorChain::new(&error), + ); + Ok(()) + } + Ok(_) => { + debug!(log, "expunged Nexus is still reachable"); + Err(CondCheckError::<()>::NotYet) + } + Err(error) => { + debug!(log, "expunged Nexus is still reachable"; + "error" => slog_error_chain::InlineErrorChain::new(&error), + ); + Err(CondCheckError::NotYet) + } + } + }, + &Duration::from_millis(50), + &Duration::from_secs(60), + ) + .await + .unwrap(); + + // Wait for some other Nexus instance to pick up the saga. + let nexus_found = wait_for_condition( + || async { + for nexus_client in &initial_nexus_clients { + assert!(nexus_client.baseurl() != new_zone_client.baseurl()); + let Ok(sagas) = list_sagas(&nexus_client).await else { + continue; + }; + + debug!(log, "found sagas (last): {:?}", sagas); + if sagas.into_iter().any(|s| s.id == saga_id) { + return Ok(nexus_client); + } + } + + return Err(CondCheckError::<()>::NotYet); + }, + &Duration::from_millis(50), + &Duration::from_secs(60), + ) + .await + .unwrap(); + + info!(log, "found saga in a different Nexus instance"; + "saga_id" => %saga_id, + "found_nexus" => nexus_found.baseurl(), + ); + assert!(nexus_found.baseurl() != new_zone_client.baseurl()); + + // Now, complete the demo saga on whichever instance is running it now. + // `saga_demo_complete` is not synchronous. It just unblocks the saga. + // We'll need to poll a bit to wait for it to finish. + nexus_found + .saga_demo_complete(&demo_saga.demo_saga_id) + .await + .expect("complete demo saga"); + let found = wait_for_condition( + || async { + let sagas = list_sagas(&nexus_found).await.expect("listing sagas"); + debug!(log, "found sagas (last): {:?}", sagas); + let found = sagas.into_iter().find(|s| s.id == saga_id).unwrap(); + if matches!(found.state, SagaState::Succeeded) { + Ok(found) + } else { + Err(CondCheckError::<()>::NotYet) + } + }, + &Duration::from_millis(50), + &Duration::from_secs(30), + ) + .await + .unwrap(); + + assert_eq!(found.id, saga_id); + assert!(matches!(found.state, SagaState::Succeeded)); +} + +async fn list_sagas( + client: &nexus_client::Client, +) -> Result, anyhow::Error> { + client + .saga_list_stream(None, None) + .try_collect::>() + .await + .context("listing sagas") +} diff --git a/nexus-config/src/nexus_config.rs b/nexus-config/src/nexus_config.rs index 9d8bf1ac9ba..7f2726cc595 100644 --- a/nexus-config/src/nexus_config.rs +++ b/nexus-config/src/nexus_config.rs @@ -391,6 +391,16 @@ pub struct BackgroundTaskConfig { pub saga_recovery: SagaRecoveryConfig, /// configuration for lookup region port task pub lookup_region_port: LookupRegionPortConfig, + /// configuration for region snapshot replacement starter task + pub region_snapshot_replacement_start: RegionSnapshotReplacementStartConfig, + /// configuration for region snapshot replacement garbage collection + pub region_snapshot_replacement_garbage_collection: + RegionSnapshotReplacementGarbageCollectionConfig, + /// configuration for region snapshot replacement step task + pub region_snapshot_replacement_step: RegionSnapshotReplacementStepConfig, + /// configuration for region snapshot replacement finisher task + pub region_snapshot_replacement_finish: + RegionSnapshotReplacementFinishConfig, } #[serde_as] @@ -627,6 +637,38 @@ pub struct LookupRegionPortConfig { pub period_secs: Duration, } +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct RegionSnapshotReplacementStartConfig { + /// period (in seconds) for periodic activations of this background task + #[serde_as(as = "DurationSeconds")] + pub period_secs: Duration, +} + +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct RegionSnapshotReplacementGarbageCollectionConfig { + /// period (in seconds) for periodic activations of this background task + #[serde_as(as = "DurationSeconds")] + pub period_secs: Duration, +} + +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct RegionSnapshotReplacementStepConfig { + /// period (in seconds) for periodic activations of this background task + #[serde_as(as = "DurationSeconds")] + pub period_secs: Duration, +} + +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct RegionSnapshotReplacementFinishConfig { + /// period (in seconds) for periodic activations of this background task + #[serde_as(as = "DurationSeconds")] + pub period_secs: Duration, +} + /// Configuration for a nexus server #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] pub struct PackageConfig { @@ -874,6 +916,10 @@ mod test { abandoned_vmm_reaper.period_secs = 60 saga_recovery.period_secs = 60 lookup_region_port.period_secs = 60 + region_snapshot_replacement_start.period_secs = 30 + region_snapshot_replacement_garbage_collection.period_secs = 30 + region_snapshot_replacement_step.period_secs = 30 + region_snapshot_replacement_finish.period_secs = 30 [default_region_allocation_strategy] type = "random" seed = 0 @@ -1036,6 +1082,22 @@ mod test { lookup_region_port: LookupRegionPortConfig { period_secs: Duration::from_secs(60), }, + region_snapshot_replacement_start: + RegionSnapshotReplacementStartConfig { + period_secs: Duration::from_secs(30), + }, + region_snapshot_replacement_garbage_collection: + RegionSnapshotReplacementGarbageCollectionConfig { + period_secs: Duration::from_secs(30), + }, + region_snapshot_replacement_step: + RegionSnapshotReplacementStepConfig { + period_secs: Duration::from_secs(30), + }, + region_snapshot_replacement_finish: + RegionSnapshotReplacementFinishConfig { + period_secs: Duration::from_secs(30), + }, }, default_region_allocation_strategy: crate::nexus_config::RegionAllocationStrategy::Random { @@ -1112,6 +1174,10 @@ mod test { abandoned_vmm_reaper.period_secs = 60 saga_recovery.period_secs = 60 lookup_region_port.period_secs = 60 + region_snapshot_replacement_start.period_secs = 30 + region_snapshot_replacement_garbage_collection.period_secs = 30 + region_snapshot_replacement_step.period_secs = 30 + region_snapshot_replacement_finish.period_secs = 30 [default_region_allocation_strategy] type = "random" "##, diff --git a/nexus-config/src/postgres_config.rs b/nexus-config/src/postgres_config.rs index 2509ae4fca2..0c72d2ba9e6 100644 --- a/nexus-config/src/postgres_config.rs +++ b/nexus-config/src/postgres_config.rs @@ -5,6 +5,7 @@ //! Common objects used for configuration use std::fmt; +use std::net::SocketAddr; use std::ops::Deref; use std::str::FromStr; @@ -32,6 +33,29 @@ impl PostgresConfigWithUrl { pub fn url(&self) -> String { self.url_raw.clone() } + + /// Accesses the first ip / port pair within the URL. + /// + /// # Panics + /// + /// This method makes the assumption that the hostname has at least one + /// "host IP / port" pair which can be extracted. If the supplied URL + /// does not have such a pair, this function will panic. + // Yes, panicking in the above scenario sucks. But this type is already + // pretty ubiquitous within Omicron, and integration with the qorb + // connection pooling library requires access to database by SocketAddr. + pub fn address(&self) -> SocketAddr { + let tokio_postgres::config::Host::Tcp(host) = + &self.config.get_hosts()[0] + else { + panic!("Non-TCP hostname"); + }; + let ip: std::net::IpAddr = + host.parse().expect("Failed to parse host as IP address"); + + let port = self.config.get_ports()[0]; + SocketAddr::new(ip, port) + } } impl FromStr for PostgresConfigWithUrl { diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index 86d9abc460e..d6f97adc393 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -25,6 +25,7 @@ chrono.workspace = true cockroach-admin-client.workspace = true crucible-agent-client.workspace = true crucible-pantry-client.workspace = true +crucible-common.workspace = true dns-service-client.workspace = true dpd-client.workspace = true mg-admin-client.workspace = true @@ -34,6 +35,7 @@ futures.workspace = true gateway-client.workspace = true headers.workspace = true hex.workspace = true +hickory-resolver.workspace = true http.workspace = true hyper.workspace = true illumos-utils.workspace = true @@ -45,6 +47,7 @@ macaddr.workspace = true # integration tests. nexus-client.workspace = true nexus-config.workspace = true +nexus-external-api.workspace = true nexus-internal-api.workspace = true nexus-networking.workspace = true nexus-saga-recovery.workspace = true @@ -55,6 +58,7 @@ openssl.workspace = true oximeter-client.workspace = true oximeter-db = { workspace = true, default-features = false, features = [ "oxql" ] } oxnet.workspace = true +oxql-types.workspace = true parse-display.workspace = true paste.workspace = true # See omicron-rpaths for more about the "pq-sys" dependency. @@ -86,7 +90,6 @@ tokio = { workspace = true, features = ["full"] } tokio-postgres = { workspace = true, features = ["with-serde_json-1"] } tokio-util = { workspace = true, features = ["codec"] } tough.workspace = true -trust-dns-resolver.workspace = true uuid.workspace = true nexus-auth.workspace = true @@ -142,7 +145,7 @@ sp-sim.workspace = true rustls.workspace = true subprocess.workspace = true term.workspace = true -trust-dns-resolver.workspace = true +hickory-resolver.workspace = true tufaceous.workspace = true tufaceous-lib.workspace = true httptest.workspace = true diff --git a/nexus/auth/src/authn/external/mod.rs b/nexus/auth/src/authn/external/mod.rs index ccb72182857..5c7fc7af05d 100644 --- a/nexus/auth/src/authn/external/mod.rs +++ b/nexus/auth/src/authn/external/mod.rs @@ -13,7 +13,6 @@ use slog::trace; use std::borrow::Borrow; use uuid::Uuid; -pub mod cookies; pub mod session_cookie; pub mod spoof; pub mod token; diff --git a/nexus/auth/src/authn/external/session_cookie.rs b/nexus/auth/src/authn/external/session_cookie.rs index 7811bf2826c..f6b23308a0a 100644 --- a/nexus/auth/src/authn/external/session_cookie.rs +++ b/nexus/auth/src/authn/external/session_cookie.rs @@ -4,7 +4,6 @@ //! authn scheme for console that looks up cookie values in a session table -use super::cookies::parse_cookies; use super::{HttpAuthnScheme, Reason, SchemeResult}; use crate::authn; use crate::authn::{Actor, Details}; @@ -13,6 +12,7 @@ use async_trait::async_trait; use chrono::{DateTime, Duration, Utc}; use dropshot::HttpError; use http::HeaderValue; +use nexus_types::authn::cookies::parse_cookies; use slog::debug; use uuid::Uuid; diff --git a/nexus/db-model/src/dataset.rs b/nexus/db-model/src/dataset.rs index a9dee990b94..f896f11c5b2 100644 --- a/nexus/db-model/src/dataset.rs +++ b/nexus/db-model/src/dataset.rs @@ -8,6 +8,7 @@ use crate::ipv6; use crate::schema::{dataset, region}; use chrono::{DateTime, Utc}; use db_macros::Asset; +use omicron_common::api::internal::shared::DatasetKind as ApiDatasetKind; use serde::{Deserialize, Serialize}; use std::net::{Ipv6Addr, SocketAddrV6}; use uuid::Uuid; @@ -41,6 +42,7 @@ pub struct Dataset { pub kind: DatasetKind, pub size_used: Option, + zone_name: Option, } impl Dataset { @@ -48,12 +50,15 @@ impl Dataset { id: Uuid, pool_id: Uuid, addr: Option, - kind: DatasetKind, + api_kind: ApiDatasetKind, ) -> Self { - let size_used = match kind { - DatasetKind::Crucible => Some(0), - _ => None, + let kind = DatasetKind::from(&api_kind); + let (size_used, zone_name) = match api_kind { + ApiDatasetKind::Crucible => (Some(0), None), + ApiDatasetKind::Zone { name } => (None, Some(name)), + _ => (None, None), }; + Self { identity: DatasetIdentity::new(id), time_deleted: None, @@ -63,6 +68,7 @@ impl Dataset { port: addr.map(|addr| addr.port().into()), kind, size_used, + zone_name, } } diff --git a/nexus/db-model/src/dataset_kind.rs b/nexus/db-model/src/dataset_kind.rs index 4a86efaca13..40ec76ded34 100644 --- a/nexus/db-model/src/dataset_kind.rs +++ b/nexus/db-model/src/dataset_kind.rs @@ -23,10 +23,13 @@ impl_enum_type!( ClickhouseServer => b"clickhouse_server" ExternalDns => b"external_dns" InternalDns => b"internal_dns" + ZoneRoot => b"zone_root" + Zone => b"zone" + Debug => b"debug" ); -impl From for DatasetKind { - fn from(k: internal::shared::DatasetKind) -> Self { +impl From<&internal::shared::DatasetKind> for DatasetKind { + fn from(k: &internal::shared::DatasetKind) -> Self { match k { internal::shared::DatasetKind::Crucible => DatasetKind::Crucible, internal::shared::DatasetKind::Cockroach => DatasetKind::Cockroach, @@ -45,6 +48,13 @@ impl From for DatasetKind { internal::shared::DatasetKind::InternalDns => { DatasetKind::InternalDns } + internal::shared::DatasetKind::ZoneRoot => DatasetKind::ZoneRoot, + // Enums in the database do not have associated data, so this drops + // the "name" of the zone and only considers the type. + // + // The zone name, if it exists, is stored in a separate column. + internal::shared::DatasetKind::Zone { .. } => DatasetKind::Zone, + internal::shared::DatasetKind::Debug => DatasetKind::Debug, } } } diff --git a/nexus/db-model/src/deployment.rs b/nexus/db-model/src/deployment.rs index 6bef893a5ba..b4c60e12ef0 100644 --- a/nexus/db-model/src/deployment.rs +++ b/nexus/db-model/src/deployment.rs @@ -6,7 +6,7 @@ //! database use crate::inventory::ZoneType; -use crate::omicron_zone_config::{OmicronZone, OmicronZoneNic}; +use crate::omicron_zone_config::{self, OmicronZoneNic}; use crate::schema::{ blueprint, bp_omicron_physical_disk, bp_omicron_zone, bp_omicron_zone_nic, bp_sled_omicron_physical_disks, bp_sled_omicron_zones, bp_sled_state, @@ -17,21 +17,31 @@ use crate::{ impl_enum_type, ipv6, Generation, MacAddr, Name, SledState, SqlU16, SqlU32, SqlU8, }; +use anyhow::{anyhow, bail, Context, Result}; use chrono::{DateTime, Utc}; use ipnetwork::IpNetwork; -use nexus_types::deployment::BlueprintPhysicalDiskConfig; -use nexus_types::deployment::BlueprintPhysicalDisksConfig; +use nexus_sled_agent_shared::inventory::OmicronZoneDataset; use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::BlueprintZoneConfig; use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZonesConfig; use nexus_types::deployment::CockroachDbPreserveDowngrade; +use nexus_types::deployment::{ + blueprint_zone_type, BlueprintPhysicalDisksConfig, +}; +use nexus_types::deployment::{BlueprintPhysicalDiskConfig, BlueprintZoneType}; +use nexus_types::deployment::{ + OmicronZoneExternalFloatingAddr, OmicronZoneExternalFloatingIp, + OmicronZoneExternalSnatIp, +}; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::disk::DiskIdentity; -use omicron_uuid_kinds::GenericUuid; +use omicron_common::zpool_name::ZpoolName; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; use omicron_uuid_kinds::{ExternalIpKind, SledKind, ZpoolKind}; +use omicron_uuid_kinds::{ExternalIpUuid, GenericUuid, OmicronZoneUuid}; +use std::net::{IpAddr, SocketAddrV6}; use uuid::Uuid; /// See [`nexus_types::deployment::Blueprint`]. @@ -256,82 +266,435 @@ impl BpOmicronZone { blueprint_id: Uuid, sled_id: SledUuid, blueprint_zone: &BlueprintZoneConfig, - ) -> Result { + ) -> anyhow::Result { let external_ip_id = blueprint_zone .zone_type .external_networking() - .map(|(ip, _)| ip.id()); - let zone = OmicronZone::new( - sled_id, - blueprint_zone.id.into_untyped_uuid(), - blueprint_zone.underlay_address, - blueprint_zone.filesystem_pool.as_ref().map(|pool| pool.id()), - &blueprint_zone.zone_type.clone().into(), - external_ip_id, - )?; - Ok(Self { + .map(|(ip, _)| ip.id().into()); + + // Create a dummy record to start, then fill in the rest + let mut bp_omicron_zone = BpOmicronZone { + // Fill in the known fields that don't require inspecting + // `blueprint_zone.zone_type` blueprint_id, - sled_id: zone.sled_id.into(), - id: zone.id, - underlay_address: zone.underlay_address, - zone_type: zone.zone_type, - primary_service_ip: zone.primary_service_ip, - primary_service_port: zone.primary_service_port, - second_service_ip: zone.second_service_ip, - second_service_port: zone.second_service_port, - dataset_zpool_name: zone.dataset_zpool_name, - bp_nic_id: zone.nic_id, - dns_gz_address: zone.dns_gz_address, - dns_gz_address_index: zone.dns_gz_address_index, - ntp_ntp_servers: zone.ntp_ntp_servers, - ntp_dns_servers: zone.ntp_dns_servers, - ntp_domain: zone.ntp_domain, - nexus_external_tls: zone.nexus_external_tls, - nexus_external_dns_servers: zone.nexus_external_dns_servers, - snat_ip: zone.snat_ip, - snat_first_port: zone.snat_first_port, - snat_last_port: zone.snat_last_port, - disposition: to_db_bp_zone_disposition(blueprint_zone.disposition), - external_ip_id: zone.external_ip_id.map(From::from), + sled_id: sled_id.into(), + id: blueprint_zone.id.into_untyped_uuid(), + underlay_address: blueprint_zone.underlay_address.into(), + external_ip_id, filesystem_pool: blueprint_zone .filesystem_pool .as_ref() .map(|pool| pool.id().into()), - }) + disposition: to_db_bp_zone_disposition(blueprint_zone.disposition), + zone_type: blueprint_zone.zone_type.kind().into(), + + // Set the remainder of the fields to a default + primary_service_ip: "::1" + .parse::() + .unwrap() + .into(), + primary_service_port: 0.into(), + second_service_ip: None, + second_service_port: None, + dataset_zpool_name: None, + bp_nic_id: None, + dns_gz_address: None, + dns_gz_address_index: None, + ntp_ntp_servers: None, + ntp_dns_servers: None, + ntp_domain: None, + nexus_external_tls: None, + nexus_external_dns_servers: None, + snat_ip: None, + snat_first_port: None, + snat_last_port: None, + }; + + match &blueprint_zone.zone_type { + BlueprintZoneType::BoundaryNtp( + blueprint_zone_type::BoundaryNtp { + address, + ntp_servers, + dns_servers, + domain, + nic, + external_ip, + }, + ) => { + // Set the common fields + bp_omicron_zone.set_primary_service_ip_and_port(address); + + // Set the zone specific fields + let snat_cfg = external_ip.snat_cfg; + let (first_port, last_port) = snat_cfg.port_range_raw(); + bp_omicron_zone.ntp_ntp_servers = Some(ntp_servers.clone()); + bp_omicron_zone.ntp_dns_servers = Some( + dns_servers + .into_iter() + .cloned() + .map(IpNetwork::from) + .collect(), + ); + bp_omicron_zone.ntp_domain.clone_from(domain); + bp_omicron_zone.snat_ip = Some(IpNetwork::from(snat_cfg.ip)); + bp_omicron_zone.snat_first_port = + Some(SqlU16::from(first_port)); + bp_omicron_zone.snat_last_port = Some(SqlU16::from(last_port)); + bp_omicron_zone.bp_nic_id = Some(nic.id); + } + BlueprintZoneType::Clickhouse( + blueprint_zone_type::Clickhouse { address, dataset }, + ) => { + // Set the common fields + bp_omicron_zone.set_primary_service_ip_and_port(address); + bp_omicron_zone.set_zpool_name(dataset); + } + BlueprintZoneType::ClickhouseKeeper( + blueprint_zone_type::ClickhouseKeeper { address, dataset }, + ) => { + // Set the common fields + bp_omicron_zone.set_primary_service_ip_and_port(address); + bp_omicron_zone.set_zpool_name(dataset); + } + BlueprintZoneType::ClickhouseServer( + blueprint_zone_type::ClickhouseServer { address, dataset }, + ) => { + // Set the common fields + bp_omicron_zone.set_primary_service_ip_and_port(address); + bp_omicron_zone.set_zpool_name(dataset); + } + BlueprintZoneType::CockroachDb( + blueprint_zone_type::CockroachDb { address, dataset }, + ) => { + // Set the common fields + bp_omicron_zone.set_primary_service_ip_and_port(address); + bp_omicron_zone.set_zpool_name(dataset); + } + BlueprintZoneType::Crucible(blueprint_zone_type::Crucible { + address, + dataset, + }) => { + // Set the common fields + bp_omicron_zone.set_primary_service_ip_and_port(address); + bp_omicron_zone.set_zpool_name(dataset); + } + BlueprintZoneType::CruciblePantry( + blueprint_zone_type::CruciblePantry { address }, + ) => { + // Set the common fields + bp_omicron_zone.set_primary_service_ip_and_port(address); + } + BlueprintZoneType::ExternalDns( + blueprint_zone_type::ExternalDns { + dataset, + http_address, + dns_address, + nic, + }, + ) => { + // Set the common fields + bp_omicron_zone.set_primary_service_ip_and_port(http_address); + bp_omicron_zone.set_zpool_name(dataset); + + // Set the zone specific fields + bp_omicron_zone.bp_nic_id = Some(nic.id); + bp_omicron_zone.second_service_ip = + Some(IpNetwork::from(dns_address.addr.ip())); + bp_omicron_zone.second_service_port = + Some(SqlU16::from(dns_address.addr.port())); + } + BlueprintZoneType::InternalDns( + blueprint_zone_type::InternalDns { + dataset, + http_address, + dns_address, + gz_address, + gz_address_index, + }, + ) => { + // Set the common fields + bp_omicron_zone.set_primary_service_ip_and_port(http_address); + bp_omicron_zone.set_zpool_name(dataset); + + // Set the zone specific fields + bp_omicron_zone.second_service_ip = + Some(IpNetwork::from(IpAddr::V6(*dns_address.ip()))); + bp_omicron_zone.second_service_port = + Some(SqlU16::from(dns_address.port())); + + bp_omicron_zone.dns_gz_address = + Some(ipv6::Ipv6Addr::from(gz_address)); + bp_omicron_zone.dns_gz_address_index = + Some(SqlU32::from(*gz_address_index)); + } + BlueprintZoneType::InternalNtp( + blueprint_zone_type::InternalNtp { + address, + ntp_servers, + dns_servers, + domain, + }, + ) => { + // Set the common fields + bp_omicron_zone.set_primary_service_ip_and_port(address); + + // Set the zone specific fields + bp_omicron_zone.ntp_ntp_servers = Some(ntp_servers.clone()); + bp_omicron_zone.ntp_dns_servers = Some( + dns_servers.iter().cloned().map(IpNetwork::from).collect(), + ); + bp_omicron_zone.ntp_domain.clone_from(domain); + } + BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + internal_address, + external_ip, + nic, + external_tls, + external_dns_servers, + }) => { + // Set the common fields + bp_omicron_zone + .set_primary_service_ip_and_port(internal_address); + + // Set the zone specific fields + bp_omicron_zone.bp_nic_id = Some(nic.id); + bp_omicron_zone.second_service_ip = + Some(IpNetwork::from(external_ip.ip)); + bp_omicron_zone.nexus_external_tls = Some(*external_tls); + bp_omicron_zone.nexus_external_dns_servers = Some( + external_dns_servers + .iter() + .cloned() + .map(IpNetwork::from) + .collect(), + ); + } + BlueprintZoneType::Oximeter(blueprint_zone_type::Oximeter { + address, + }) => { + // Set the common fields + bp_omicron_zone.set_primary_service_ip_and_port(address); + } + } + + Ok(bp_omicron_zone) + } + + fn set_primary_service_ip_and_port(&mut self, address: &SocketAddrV6) { + let (primary_service_ip, primary_service_port) = + (ipv6::Ipv6Addr::from(*address.ip()), SqlU16::from(address.port())); + self.primary_service_ip = primary_service_ip; + self.primary_service_port = primary_service_port; + } + + fn set_zpool_name(&mut self, dataset: &OmicronZoneDataset) { + self.dataset_zpool_name = Some(dataset.pool_name.to_string()); + } + /// Convert an external ip from a `BpOmicronZone` to a `BlueprintZoneType` + /// representation. + fn external_ip_to_blueprint_zone_type( + external_ip: Option>, + ) -> anyhow::Result { + external_ip + .map(Into::into) + .ok_or_else(|| anyhow!("expected an external IP ID")) } pub fn into_blueprint_zone_config( self, nic_row: Option, - ) -> Result { - let zone = OmicronZone { - sled_id: self.sled_id.into(), - id: self.id, - underlay_address: self.underlay_address, - filesystem_pool: self.filesystem_pool.map(|id| id.into()), - zone_type: self.zone_type, - primary_service_ip: self.primary_service_ip, - primary_service_port: self.primary_service_port, - second_service_ip: self.second_service_ip, - second_service_port: self.second_service_port, - dataset_zpool_name: self.dataset_zpool_name, - nic_id: self.bp_nic_id, - dns_gz_address: self.dns_gz_address, - dns_gz_address_index: self.dns_gz_address_index, - ntp_ntp_servers: self.ntp_ntp_servers, - ntp_dns_servers: self.ntp_dns_servers, - ntp_domain: self.ntp_domain, - nexus_external_tls: self.nexus_external_tls, - nexus_external_dns_servers: self.nexus_external_dns_servers, - snat_ip: self.snat_ip, - snat_first_port: self.snat_first_port, - snat_last_port: self.snat_last_port, - external_ip_id: self.external_ip_id.map(From::from), + ) -> anyhow::Result { + // Build up a set of common fields for our `BlueprintZoneType`s + // + // Some of these are results that we only evaluate when used, because + // not all zone types use all common fields. + let primary_address = SocketAddrV6::new( + self.primary_service_ip.into(), + *self.primary_service_port, + 0, + 0, + ); + let dataset = + omicron_zone_config::dataset_zpool_name_to_omicron_zone_dataset( + self.dataset_zpool_name, + ); + + // There is a nested result here. If there is a caller error (the outer + // Result) we immediately return. We check the inner result later, but + // only if some code path tries to use `nic` and it's not present. + let nic = omicron_zone_config::nic_row_to_network_interface( + self.id, + self.bp_nic_id, + nic_row.map(Into::into), + )?; + + let external_ip_id = + Self::external_ip_to_blueprint_zone_type(self.external_ip_id); + + let dns_address = + omicron_zone_config::secondary_ip_and_port_to_dns_address( + self.second_service_ip, + self.second_service_port, + ); + + let ntp_dns_servers = + omicron_zone_config::ntp_dns_servers_to_omicron_internal( + self.ntp_dns_servers, + ); + + let ntp_servers = omicron_zone_config::ntp_servers_to_omicron_internal( + self.ntp_ntp_servers, + ); + + let zone_type = match self.zone_type { + ZoneType::BoundaryNtp => { + let snat_cfg = match ( + self.snat_ip, + self.snat_first_port, + self.snat_last_port, + ) { + (Some(ip), Some(first_port), Some(last_port)) => { + nexus_types::inventory::SourceNatConfig::new( + ip.ip(), + *first_port, + *last_port, + ) + .context("bad SNAT config for boundary NTP")? + } + _ => bail!( + "expected non-NULL snat properties, \ + found at least one NULL" + ), + }; + BlueprintZoneType::BoundaryNtp( + blueprint_zone_type::BoundaryNtp { + address: primary_address, + ntp_servers: ntp_servers?, + dns_servers: ntp_dns_servers?, + domain: self.ntp_domain, + nic: nic?, + external_ip: OmicronZoneExternalSnatIp { + id: external_ip_id?, + snat_cfg, + }, + }, + ) + } + ZoneType::Clickhouse => { + BlueprintZoneType::Clickhouse(blueprint_zone_type::Clickhouse { + address: primary_address, + dataset: dataset?, + }) + } + ZoneType::ClickhouseKeeper => BlueprintZoneType::ClickhouseKeeper( + blueprint_zone_type::ClickhouseKeeper { + address: primary_address, + dataset: dataset?, + }, + ), + ZoneType::ClickhouseServer => BlueprintZoneType::ClickhouseServer( + blueprint_zone_type::ClickhouseServer { + address: primary_address, + dataset: dataset?, + }, + ), + + ZoneType::CockroachDb => BlueprintZoneType::CockroachDb( + blueprint_zone_type::CockroachDb { + address: primary_address, + dataset: dataset?, + }, + ), + ZoneType::Crucible => { + BlueprintZoneType::Crucible(blueprint_zone_type::Crucible { + address: primary_address, + dataset: dataset?, + }) + } + ZoneType::CruciblePantry => BlueprintZoneType::CruciblePantry( + blueprint_zone_type::CruciblePantry { + address: primary_address, + }, + ), + ZoneType::ExternalDns => BlueprintZoneType::ExternalDns( + blueprint_zone_type::ExternalDns { + dataset: dataset?, + http_address: primary_address, + dns_address: OmicronZoneExternalFloatingAddr { + id: external_ip_id?, + addr: dns_address?, + }, + nic: nic?, + }, + ), + ZoneType::InternalDns => BlueprintZoneType::InternalDns( + blueprint_zone_type::InternalDns { + dataset: dataset?, + http_address: primary_address, + dns_address: omicron_zone_config::to_internal_dns_address( + dns_address?, + )?, + gz_address: self + .dns_gz_address + .map(Into::into) + .ok_or_else(|| { + anyhow!("expected dns_gz_address, found none") + })?, + gz_address_index: *self.dns_gz_address_index.ok_or_else( + || anyhow!("expected dns_gz_address_index, found none"), + )?, + }, + ), + ZoneType::InternalNtp => BlueprintZoneType::InternalNtp( + blueprint_zone_type::InternalNtp { + address: primary_address, + ntp_servers: ntp_servers?, + dns_servers: ntp_dns_servers?, + domain: self.ntp_domain, + }, + ), + ZoneType::Nexus => { + BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + internal_address: primary_address, + external_ip: OmicronZoneExternalFloatingIp { + id: external_ip_id?, + ip: self + .second_service_ip + .ok_or_else(|| { + anyhow!("expected second service IP") + })? + .ip(), + }, + nic: nic?, + external_tls: self + .nexus_external_tls + .ok_or_else(|| anyhow!("expected 'external_tls'"))?, + external_dns_servers: self + .nexus_external_dns_servers + .ok_or_else(|| { + anyhow!("expected 'external_dns_servers'") + })? + .into_iter() + .map(|i| i.ip()) + .collect(), + }) + } + ZoneType::Oximeter => { + BlueprintZoneType::Oximeter(blueprint_zone_type::Oximeter { + address: primary_address, + }) + } }; - zone.into_blueprint_zone_config( - self.disposition.into(), - nic_row.map(OmicronZoneNic::from), - ) + + Ok(BlueprintZoneConfig { + disposition: self.disposition.into(), + id: OmicronZoneUuid::from_untyped_uuid(self.id), + underlay_address: self.underlay_address.into(), + filesystem_pool: self + .filesystem_pool + .map(|id| ZpoolName::new_external(id.into())), + zone_type, + }) } } @@ -394,21 +757,6 @@ pub struct BpOmicronZoneNic { slot: SqlU8, } -impl From for OmicronZoneNic { - fn from(value: BpOmicronZoneNic) -> Self { - OmicronZoneNic { - id: value.id, - name: value.name, - ip: value.ip, - mac: value.mac, - subnet: value.subnet, - vni: value.vni, - is_primary: value.is_primary, - slot: value.slot, - } - } -} - impl BpOmicronZoneNic { pub fn new( blueprint_id: Uuid, @@ -440,6 +788,21 @@ impl BpOmicronZoneNic { } } +impl From for OmicronZoneNic { + fn from(value: BpOmicronZoneNic) -> Self { + OmicronZoneNic { + id: value.id, + name: value.name, + ip: value.ip, + mac: value.mac, + subnet: value.subnet, + vni: value.vni, + is_primary: value.is_primary, + slot: value.slot, + } + } +} + mod diesel_util { use crate::{ schema::bp_omicron_zone::disposition, to_db_bp_zone_disposition, diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index 4f0ff60f186..07374f7761f 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -4,7 +4,7 @@ //! Types for representing the hardware/software inventory in the database -use crate::omicron_zone_config::{OmicronZone, OmicronZoneNic}; +use crate::omicron_zone_config::{self, OmicronZoneNic}; use crate::schema::{ hw_baseboard_id, inv_caboose, inv_collection, inv_collection_error, inv_nvme_disk_firmware, inv_omicron_zone, inv_omicron_zone_nic, @@ -18,7 +18,7 @@ use crate::{ impl_enum_type, ipv6, ByteCount, Generation, MacAddr, Name, ServiceKind, SqlU16, SqlU32, SqlU8, }; -use anyhow::anyhow; +use anyhow::{anyhow, bail, Context, Result}; use chrono::DateTime; use chrono::Utc; use diesel::backend::Backend; @@ -28,14 +28,16 @@ use diesel::pg::Pg; use diesel::serialize::ToSql; use diesel::{serialize, sql_types}; use ipnetwork::IpNetwork; +use nexus_sled_agent_shared::inventory::OmicronZoneDataset; use nexus_sled_agent_shared::inventory::{ - OmicronZoneConfig, OmicronZonesConfig, + OmicronZoneConfig, OmicronZoneType, OmicronZonesConfig, }; use nexus_types::inventory::{ BaseboardId, Caboose, Collection, NvmeFirmware, PowerState, RotPage, RotSlot, }; use omicron_common::api::internal::shared::NetworkInterface; +use omicron_common::zpool_name::ZpoolName; use omicron_uuid_kinds::CollectionKind; use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::GenericUuid; @@ -43,6 +45,7 @@ use omicron_uuid_kinds::SledKind; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolKind; use omicron_uuid_kinds::ZpoolUuid; +use std::net::{IpAddr, SocketAddrV6}; use uuid::Uuid; // See [`nexus_types::inventory::PowerState`]. @@ -1136,73 +1139,351 @@ impl InvOmicronZone { sled_id: SledUuid, zone: &OmicronZoneConfig, ) -> Result { - // Inventory zones do not know the external IP ID. - let external_ip_id = None; - let zone = OmicronZone::new( - sled_id, - zone.id, - zone.underlay_address, - zone.filesystem_pool.as_ref().map(|pool| pool.id()), - &zone.zone_type, - external_ip_id, - )?; - Ok(Self { + // Create a dummy record to start, then fill in the rest + // according to the zone type + let mut inv_omicron_zone = InvOmicronZone { + // Fill in the known fields that don't require inspecting + // `zone.zone_type` inv_collection_id: inv_collection_id.into(), - sled_id: zone.sled_id.into(), + sled_id: sled_id.into(), id: zone.id, - underlay_address: zone.underlay_address, - zone_type: zone.zone_type, - primary_service_ip: zone.primary_service_ip, - primary_service_port: zone.primary_service_port, - second_service_ip: zone.second_service_ip, - second_service_port: zone.second_service_port, - dataset_zpool_name: zone.dataset_zpool_name, - nic_id: zone.nic_id, - dns_gz_address: zone.dns_gz_address, - dns_gz_address_index: zone.dns_gz_address_index, - ntp_ntp_servers: zone.ntp_ntp_servers, - ntp_dns_servers: zone.ntp_dns_servers, - ntp_domain: zone.ntp_domain, - nexus_external_tls: zone.nexus_external_tls, - nexus_external_dns_servers: zone.nexus_external_dns_servers, - snat_ip: zone.snat_ip, - snat_first_port: zone.snat_first_port, - snat_last_port: zone.snat_last_port, - filesystem_pool: zone.filesystem_pool.map(|id| id.into()), - }) + underlay_address: zone.underlay_address.into(), + filesystem_pool: zone + .filesystem_pool + .as_ref() + .map(|pool| pool.id().into()), + zone_type: zone.zone_type.kind().into(), + + // Set the remainder of the fields to a default + primary_service_ip: "::1" + .parse::() + .unwrap() + .into(), + primary_service_port: 0.into(), + second_service_ip: None, + second_service_port: None, + dataset_zpool_name: None, + nic_id: None, + dns_gz_address: None, + dns_gz_address_index: None, + ntp_ntp_servers: None, + ntp_dns_servers: None, + ntp_domain: None, + nexus_external_tls: None, + nexus_external_dns_servers: None, + snat_ip: None, + snat_first_port: None, + snat_last_port: None, + }; + + match &zone.zone_type { + OmicronZoneType::BoundaryNtp { + address, + ntp_servers, + dns_servers, + domain, + nic, + snat_cfg, + } => { + // Set the common fields + inv_omicron_zone.set_primary_service_ip_and_port(address); + + // Set the zone specific fields + let (first_port, last_port) = snat_cfg.port_range_raw(); + inv_omicron_zone.ntp_ntp_servers = Some(ntp_servers.clone()); + inv_omicron_zone.ntp_dns_servers = Some( + dns_servers + .into_iter() + .cloned() + .map(IpNetwork::from) + .collect(), + ); + inv_omicron_zone.ntp_domain.clone_from(domain); + inv_omicron_zone.snat_ip = Some(IpNetwork::from(snat_cfg.ip)); + inv_omicron_zone.snat_first_port = + Some(SqlU16::from(first_port)); + inv_omicron_zone.snat_last_port = Some(SqlU16::from(last_port)); + inv_omicron_zone.nic_id = Some(nic.id); + } + OmicronZoneType::Clickhouse { address, dataset } => { + // Set the common fields + inv_omicron_zone.set_primary_service_ip_and_port(address); + inv_omicron_zone.set_zpool_name(dataset); + } + OmicronZoneType::ClickhouseKeeper { address, dataset } => { + // Set the common fields + inv_omicron_zone.set_primary_service_ip_and_port(address); + inv_omicron_zone.set_zpool_name(dataset); + } + OmicronZoneType::ClickhouseServer { address, dataset } => { + // Set the common fields + inv_omicron_zone.set_primary_service_ip_and_port(address); + inv_omicron_zone.set_zpool_name(dataset); + } + OmicronZoneType::CockroachDb { address, dataset } => { + // Set the common fields + inv_omicron_zone.set_primary_service_ip_and_port(address); + inv_omicron_zone.set_zpool_name(dataset); + } + OmicronZoneType::Crucible { address, dataset } => { + // Set the common fields + inv_omicron_zone.set_primary_service_ip_and_port(address); + inv_omicron_zone.set_zpool_name(dataset); + } + OmicronZoneType::CruciblePantry { address } => { + // Set the common fields + inv_omicron_zone.set_primary_service_ip_and_port(address); + } + OmicronZoneType::ExternalDns { + dataset, + http_address, + dns_address, + nic, + } => { + // Set the common fields + inv_omicron_zone.set_primary_service_ip_and_port(http_address); + inv_omicron_zone.set_zpool_name(dataset); + + // Set the zone specific fields + inv_omicron_zone.nic_id = Some(nic.id); + inv_omicron_zone.second_service_ip = + Some(IpNetwork::from(dns_address.ip())); + inv_omicron_zone.second_service_port = + Some(SqlU16::from(dns_address.port())); + } + OmicronZoneType::InternalDns { + dataset, + http_address, + dns_address, + gz_address, + gz_address_index, + } => { + // Set the common fields + inv_omicron_zone.set_primary_service_ip_and_port(http_address); + inv_omicron_zone.set_zpool_name(dataset); + + // Set the zone specific fields + inv_omicron_zone.second_service_ip = + Some(IpNetwork::from(IpAddr::V6(*dns_address.ip()))); + inv_omicron_zone.second_service_port = + Some(SqlU16::from(dns_address.port())); + + inv_omicron_zone.dns_gz_address = + Some(ipv6::Ipv6Addr::from(gz_address)); + inv_omicron_zone.dns_gz_address_index = + Some(SqlU32::from(*gz_address_index)); + } + OmicronZoneType::InternalNtp { + address, + ntp_servers, + dns_servers, + domain, + } => { + // Set the common fields + inv_omicron_zone.set_primary_service_ip_and_port(address); + + // Set the zone specific fields + inv_omicron_zone.ntp_ntp_servers = Some(ntp_servers.clone()); + inv_omicron_zone.ntp_dns_servers = Some( + dns_servers.iter().cloned().map(IpNetwork::from).collect(), + ); + inv_omicron_zone.ntp_domain.clone_from(domain); + } + OmicronZoneType::Nexus { + internal_address, + external_ip, + nic, + external_tls, + external_dns_servers, + } => { + // Set the common fields + inv_omicron_zone + .set_primary_service_ip_and_port(internal_address); + + // Set the zone specific fields + inv_omicron_zone.nic_id = Some(nic.id); + inv_omicron_zone.second_service_ip = + Some(IpNetwork::from(*external_ip)); + inv_omicron_zone.nexus_external_tls = Some(*external_tls); + inv_omicron_zone.nexus_external_dns_servers = Some( + external_dns_servers + .iter() + .cloned() + .map(IpNetwork::from) + .collect(), + ); + } + OmicronZoneType::Oximeter { address } => { + // Set the common fields + inv_omicron_zone.set_primary_service_ip_and_port(address); + } + } + + Ok(inv_omicron_zone) + } + + fn set_primary_service_ip_and_port(&mut self, address: &SocketAddrV6) { + let (primary_service_ip, primary_service_port) = + (ipv6::Ipv6Addr::from(*address.ip()), SqlU16::from(address.port())); + self.primary_service_ip = primary_service_ip; + self.primary_service_port = primary_service_port; + } + + fn set_zpool_name(&mut self, dataset: &OmicronZoneDataset) { + self.dataset_zpool_name = Some(dataset.pool_name.to_string()); } pub fn into_omicron_zone_config( self, nic_row: Option, ) -> Result { - let zone = OmicronZone { - sled_id: self.sled_id.into(), - id: self.id, - underlay_address: self.underlay_address, - filesystem_pool: self.filesystem_pool.map(|id| id.into()), - zone_type: self.zone_type, - primary_service_ip: self.primary_service_ip, - primary_service_port: self.primary_service_port, - second_service_ip: self.second_service_ip, - second_service_port: self.second_service_port, - dataset_zpool_name: self.dataset_zpool_name, - nic_id: self.nic_id, - dns_gz_address: self.dns_gz_address, - dns_gz_address_index: self.dns_gz_address_index, - ntp_ntp_servers: self.ntp_ntp_servers, - ntp_dns_servers: self.ntp_dns_servers, - ntp_domain: self.ntp_domain, - nexus_external_tls: self.nexus_external_tls, - nexus_external_dns_servers: self.nexus_external_dns_servers, - snat_ip: self.snat_ip, - snat_first_port: self.snat_first_port, - snat_last_port: self.snat_last_port, - // Inventory zones don't know an external IP ID, and Omicron zone - // configs don't need it. - external_ip_id: None, + // Build up a set of common fields for our `OmicronZoneType`s + // + // Some of these are results that we only evaluate when used, because + // not all zone types use all common fields. + let primary_address = SocketAddrV6::new( + self.primary_service_ip.into(), + *self.primary_service_port, + 0, + 0, + ); + + let dataset = + omicron_zone_config::dataset_zpool_name_to_omicron_zone_dataset( + self.dataset_zpool_name, + ); + + // There is a nested result here. If there is a caller error (the outer + // Result) we immediately return. We check the inner result later, but + // only if some code path tries to use `nic` and it's not present. + let nic = omicron_zone_config::nic_row_to_network_interface( + self.id, + self.nic_id, + nic_row.map(Into::into), + )?; + + let dns_address = + omicron_zone_config::secondary_ip_and_port_to_dns_address( + self.second_service_ip, + self.second_service_port, + ); + + let ntp_dns_servers = + omicron_zone_config::ntp_dns_servers_to_omicron_internal( + self.ntp_dns_servers, + ); + + let ntp_servers = omicron_zone_config::ntp_servers_to_omicron_internal( + self.ntp_ntp_servers, + ); + + let zone_type = match self.zone_type { + ZoneType::BoundaryNtp => { + let snat_cfg = match ( + self.snat_ip, + self.snat_first_port, + self.snat_last_port, + ) { + (Some(ip), Some(first_port), Some(last_port)) => { + nexus_types::inventory::SourceNatConfig::new( + ip.ip(), + *first_port, + *last_port, + ) + .context("bad SNAT config for boundary NTP")? + } + _ => bail!( + "expected non-NULL snat properties, \ + found at least one NULL" + ), + }; + OmicronZoneType::BoundaryNtp { + address: primary_address, + ntp_servers: ntp_servers?, + dns_servers: ntp_dns_servers?, + domain: self.ntp_domain, + nic: nic?, + snat_cfg, + } + } + ZoneType::Clickhouse => OmicronZoneType::Clickhouse { + address: primary_address, + dataset: dataset?, + }, + ZoneType::ClickhouseKeeper => OmicronZoneType::ClickhouseKeeper { + address: primary_address, + dataset: dataset?, + }, + ZoneType::ClickhouseServer => OmicronZoneType::ClickhouseServer { + address: primary_address, + dataset: dataset?, + }, + ZoneType::CockroachDb => OmicronZoneType::CockroachDb { + address: primary_address, + dataset: dataset?, + }, + ZoneType::Crucible => OmicronZoneType::Crucible { + address: primary_address, + dataset: dataset?, + }, + ZoneType::CruciblePantry => { + OmicronZoneType::CruciblePantry { address: primary_address } + } + ZoneType::ExternalDns => OmicronZoneType::ExternalDns { + dataset: dataset?, + http_address: primary_address, + dns_address: dns_address?, + nic: nic?, + }, + ZoneType::InternalDns => OmicronZoneType::InternalDns { + dataset: dataset?, + http_address: primary_address, + dns_address: omicron_zone_config::to_internal_dns_address( + dns_address?, + )?, + gz_address: self.dns_gz_address.map(Into::into).ok_or_else( + || anyhow!("expected dns_gz_address, found none"), + )?, + gz_address_index: *self.dns_gz_address_index.ok_or_else( + || anyhow!("expected dns_gz_address_index, found none"), + )?, + }, + ZoneType::InternalNtp => OmicronZoneType::InternalNtp { + address: primary_address, + ntp_servers: ntp_servers?, + dns_servers: ntp_dns_servers?, + domain: self.ntp_domain, + }, + ZoneType::Nexus => OmicronZoneType::Nexus { + internal_address: primary_address, + external_ip: self + .second_service_ip + .ok_or_else(|| anyhow!("expected second service IP"))? + .ip(), + nic: nic?, + external_tls: self + .nexus_external_tls + .ok_or_else(|| anyhow!("expected 'external_tls'"))?, + external_dns_servers: self + .nexus_external_dns_servers + .ok_or_else(|| anyhow!("expected 'external_dns_servers'"))? + .into_iter() + .map(|i| i.ip()) + .collect(), + }, + ZoneType::Oximeter => { + OmicronZoneType::Oximeter { address: primary_address } + } }; - zone.into_omicron_zone_config(nic_row.map(OmicronZoneNic::from)) + + Ok(OmicronZoneConfig { + id: self.id, + underlay_address: self.underlay_address.into(), + filesystem_pool: self + .filesystem_pool + .map(|id| ZpoolName::new_external(id.into())), + zone_type, + }) } } diff --git a/nexus/db-model/src/omicron_zone_config.rs b/nexus/db-model/src/omicron_zone_config.rs index 23e1ef2dd91..0abc2bb4eca 100644 --- a/nexus/db-model/src/omicron_zone_config.rs +++ b/nexus/db-model/src/omicron_zone_config.rs @@ -2,613 +2,113 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -//! Types for sharing nontrivial conversions between various `OmicronZoneConfig` -//! database serializations and the corresponding Nexus/sled-agent type +//! Helper types and methods for sharing nontrivial conversions between various +//! `OmicronZoneConfig` database serializations and the corresponding Nexus/ +//! sled-agent type //! //! Both inventory and deployment have nearly-identical tables to serialize -//! `OmicronZoneConfigs` that are collected or generated, respectively. We -//! expect those tables to diverge over time (e.g., inventory may start +//! `OmicronZoneConfigs` that are collected or generated, respectively. +//! We expect those tables to diverge over time (e.g., inventory may start //! collecting extra metadata like uptime). This module provides conversion //! helpers for the parts of those tables that are common between the two. -use crate::inventory::ZoneType; -use crate::{ipv6, MacAddr, Name, SqlU16, SqlU32, SqlU8}; +use crate::{MacAddr, Name, SqlU16, SqlU32, SqlU8}; use anyhow::{anyhow, bail, ensure, Context}; use ipnetwork::IpNetwork; -use nexus_sled_agent_shared::inventory::{ - OmicronZoneConfig, OmicronZoneDataset, OmicronZoneType, -}; -use nexus_types::deployment::{ - blueprint_zone_type, BlueprintZoneDisposition, BlueprintZoneType, - OmicronZoneExternalFloatingAddr, OmicronZoneExternalFloatingIp, - OmicronZoneExternalSnatIp, -}; +use nexus_sled_agent_shared::inventory::OmicronZoneDataset; use nexus_types::inventory::NetworkInterface; use omicron_common::api::internal::shared::NetworkInterfaceKind; -use omicron_common::zpool_name::ZpoolName; -use omicron_uuid_kinds::{ - ExternalIpUuid, GenericUuid, OmicronZoneUuid, SledUuid, ZpoolUuid, -}; -use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; +use std::net::{IpAddr, SocketAddr, SocketAddrV6}; use uuid::Uuid; -#[derive(Debug)] -pub(crate) struct OmicronZone { - pub(crate) sled_id: SledUuid, - pub(crate) id: Uuid, - pub(crate) underlay_address: ipv6::Ipv6Addr, - pub(crate) filesystem_pool: Option, - pub(crate) zone_type: ZoneType, - pub(crate) primary_service_ip: ipv6::Ipv6Addr, - pub(crate) primary_service_port: SqlU16, - pub(crate) second_service_ip: Option, - pub(crate) second_service_port: Option, - pub(crate) dataset_zpool_name: Option, - pub(crate) nic_id: Option, - pub(crate) dns_gz_address: Option, - pub(crate) dns_gz_address_index: Option, - pub(crate) ntp_ntp_servers: Option>, - pub(crate) ntp_dns_servers: Option>, - pub(crate) ntp_domain: Option, - pub(crate) nexus_external_tls: Option, - pub(crate) nexus_external_dns_servers: Option>, - pub(crate) snat_ip: Option, - pub(crate) snat_first_port: Option, - pub(crate) snat_last_port: Option, - // Only present for BlueprintZoneConfig; always `None` for OmicronZoneConfig - pub(crate) external_ip_id: Option, +/// Convert ntp server config from the DB representation to the +/// omicron internal representation +pub fn ntp_servers_to_omicron_internal( + ntp_ntp_servers: Option>, +) -> anyhow::Result> { + ntp_ntp_servers.ok_or_else(|| anyhow!("expected ntp servers")) } -impl OmicronZone { - pub(crate) fn new( - sled_id: SledUuid, - zone_id: Uuid, - zone_underlay_address: Ipv6Addr, - filesystem_pool: Option, - zone_type: &OmicronZoneType, - external_ip_id: Option, - ) -> anyhow::Result { - let id = zone_id; - let underlay_address = ipv6::Ipv6Addr::from(zone_underlay_address); - let mut nic_id = None; - let mut dns_gz_address = None; - let mut dns_gz_address_index = None; - let mut ntp_ntp_servers = None; - let mut ntp_dns_servers = None; - let mut ntp_ntp_domain = None; - let mut nexus_external_tls = None; - let mut nexus_external_dns_servers = None; - let mut snat_ip = None; - let mut snat_first_port = None; - let mut snat_last_port = None; - let mut second_service_ip = None; - let mut second_service_port = None; - - let (zone_type, primary_service_sockaddr, dataset) = match zone_type { - OmicronZoneType::BoundaryNtp { - address, - ntp_servers, - dns_servers, - domain, - nic, - snat_cfg, - } => { - let (first_port, last_port) = snat_cfg.port_range_raw(); - ntp_ntp_servers = Some(ntp_servers.clone()); - ntp_dns_servers = Some(dns_servers.clone()); - ntp_ntp_domain.clone_from(domain); - snat_ip = Some(IpNetwork::from(snat_cfg.ip)); - snat_first_port = Some(SqlU16::from(first_port)); - snat_last_port = Some(SqlU16::from(last_port)); - nic_id = Some(nic.id); - (ZoneType::BoundaryNtp, address, None) - } - OmicronZoneType::Clickhouse { address, dataset } => { - (ZoneType::Clickhouse, address, Some(dataset)) - } - OmicronZoneType::ClickhouseKeeper { address, dataset } => { - (ZoneType::ClickhouseKeeper, address, Some(dataset)) - } - OmicronZoneType::ClickhouseServer { address, dataset } => { - (ZoneType::ClickhouseServer, address, Some(dataset)) - } - OmicronZoneType::CockroachDb { address, dataset } => { - (ZoneType::CockroachDb, address, Some(dataset)) - } - OmicronZoneType::Crucible { address, dataset } => { - (ZoneType::Crucible, address, Some(dataset)) - } - OmicronZoneType::CruciblePantry { address } => { - (ZoneType::CruciblePantry, address, None) - } - OmicronZoneType::ExternalDns { - dataset, - http_address, - dns_address, - nic, - } => { - nic_id = Some(nic.id); - second_service_ip = Some(dns_address.ip()); - second_service_port = Some(SqlU16::from(dns_address.port())); - (ZoneType::ExternalDns, http_address, Some(dataset)) - } - OmicronZoneType::InternalDns { - dataset, - http_address, - dns_address, - gz_address, - gz_address_index, - } => { - dns_gz_address = Some(ipv6::Ipv6Addr::from(gz_address)); - dns_gz_address_index = Some(SqlU32::from(*gz_address_index)); - second_service_ip = Some(IpAddr::V6(*dns_address.ip())); - second_service_port = Some(SqlU16::from(dns_address.port())); - (ZoneType::InternalDns, http_address, Some(dataset)) - } - OmicronZoneType::InternalNtp { - address, - ntp_servers, - dns_servers, - domain, - } => { - ntp_ntp_servers = Some(ntp_servers.clone()); - ntp_dns_servers = Some(dns_servers.clone()); - ntp_ntp_domain.clone_from(domain); - (ZoneType::InternalNtp, address, None) - } - OmicronZoneType::Nexus { - internal_address, - external_ip, - nic, - external_tls, - external_dns_servers, - } => { - nic_id = Some(nic.id); - nexus_external_tls = Some(*external_tls); - nexus_external_dns_servers = Some(external_dns_servers.clone()); - second_service_ip = Some(*external_ip); - (ZoneType::Nexus, internal_address, None) - } - OmicronZoneType::Oximeter { address } => { - (ZoneType::Oximeter, address, None) - } - }; - - let dataset_zpool_name = dataset.map(|d| d.pool_name.to_string()); - let (primary_service_ip, primary_service_port) = ( - ipv6::Ipv6Addr::from(*primary_service_sockaddr.ip()), - SqlU16::from(primary_service_sockaddr.port()), - ); - - Ok(Self { - sled_id, - id, - underlay_address, - filesystem_pool, - zone_type, - primary_service_ip, - primary_service_port, - second_service_ip: second_service_ip.map(IpNetwork::from), - second_service_port, - dataset_zpool_name, - nic_id, - dns_gz_address, - dns_gz_address_index, - ntp_ntp_servers, - ntp_dns_servers: ntp_dns_servers - .map(|list| list.into_iter().map(IpNetwork::from).collect()), - ntp_domain: ntp_ntp_domain, - nexus_external_tls, - nexus_external_dns_servers: nexus_external_dns_servers - .map(|list| list.into_iter().map(IpNetwork::from).collect()), - snat_ip, - snat_first_port, - snat_last_port, - external_ip_id, - }) - } +/// Convert ntp dns server config from the DB representation to +/// the omicron internal representation. +pub fn ntp_dns_servers_to_omicron_internal( + ntp_dns_servers: Option>, +) -> anyhow::Result> { + ntp_dns_servers + .ok_or_else(|| anyhow!("expected list of DNS servers, found null")) + .map(|list| list.into_iter().map(|ipnetwork| ipnetwork.ip()).collect()) +} - pub(crate) fn into_blueprint_zone_config( - self, - disposition: BlueprintZoneDisposition, - nic_row: Option, - ) -> anyhow::Result { - let common = self.into_zone_config_common(nic_row)?; - let address = common.primary_service_address; - let zone_type = match common.zone_type { - ZoneType::BoundaryNtp => { - let snat_cfg = match ( - common.snat_ip, - common.snat_first_port, - common.snat_last_port, - ) { - (Some(ip), Some(first_port), Some(last_port)) => { - nexus_types::inventory::SourceNatConfig::new( - ip.ip(), - *first_port, - *last_port, - ) - .context("bad SNAT config for boundary NTP")? - } - _ => bail!( - "expected non-NULL snat properties, \ - found at least one NULL" - ), - }; - BlueprintZoneType::BoundaryNtp( - blueprint_zone_type::BoundaryNtp { - address, - dns_servers: common.ntp_dns_servers?, - domain: common.ntp_domain, - nic: common.nic?, - ntp_servers: common.ntp_ntp_servers?, - external_ip: OmicronZoneExternalSnatIp { - id: common.external_ip_id?, - snat_cfg, - }, - }, - ) - } - ZoneType::Clickhouse => { - BlueprintZoneType::Clickhouse(blueprint_zone_type::Clickhouse { - address, - dataset: common.dataset?, - }) - } - ZoneType::ClickhouseKeeper => BlueprintZoneType::ClickhouseKeeper( - blueprint_zone_type::ClickhouseKeeper { - address, - dataset: common.dataset?, - }, - ), - ZoneType::ClickhouseServer => BlueprintZoneType::ClickhouseServer( - blueprint_zone_type::ClickhouseServer { - address, - dataset: common.dataset?, - }, - ), - ZoneType::CockroachDb => BlueprintZoneType::CockroachDb( - blueprint_zone_type::CockroachDb { - address, - dataset: common.dataset?, - }, - ), - ZoneType::Crucible => { - BlueprintZoneType::Crucible(blueprint_zone_type::Crucible { - address, - dataset: common.dataset?, - }) - } - ZoneType::CruciblePantry => BlueprintZoneType::CruciblePantry( - blueprint_zone_type::CruciblePantry { address }, - ), - ZoneType::ExternalDns => BlueprintZoneType::ExternalDns( - blueprint_zone_type::ExternalDns { - dataset: common.dataset?, - dns_address: OmicronZoneExternalFloatingAddr { - id: common.external_ip_id?, - addr: common.dns_address?, - }, - http_address: address, - nic: common.nic?, - }, - ), - ZoneType::InternalDns => BlueprintZoneType::InternalDns( - blueprint_zone_type::InternalDns { - dataset: common.dataset?, - dns_address: to_internal_dns_address(common.dns_address?)?, - http_address: address, - gz_address: *common.dns_gz_address.ok_or_else(|| { - anyhow!("expected dns_gz_address, found none") - })?, - gz_address_index: *common.dns_gz_address_index.ok_or_else( - || anyhow!("expected dns_gz_address_index, found none"), - )?, - }, - ), - ZoneType::InternalNtp => BlueprintZoneType::InternalNtp( - blueprint_zone_type::InternalNtp { - address, - dns_servers: common.ntp_dns_servers?, - domain: common.ntp_domain, - ntp_servers: common.ntp_ntp_servers?, - }, - ), - ZoneType::Nexus => { - BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { - internal_address: address, - nic: common.nic?, - external_tls: common - .nexus_external_tls - .ok_or_else(|| anyhow!("expected 'external_tls'"))?, - external_ip: OmicronZoneExternalFloatingIp { - id: common.external_ip_id?, - ip: common - .second_service_ip - .ok_or_else(|| { - anyhow!("expected second service IP") - })? - .ip(), - }, - external_dns_servers: common - .nexus_external_dns_servers - .ok_or_else(|| { - anyhow!("expected 'external_dns_servers'") - })? - .into_iter() - .map(|i| i.ip()) - .collect(), - }) - } - ZoneType::Oximeter => { - BlueprintZoneType::Oximeter(blueprint_zone_type::Oximeter { - address, - }) - } - }; - Ok(nexus_types::deployment::BlueprintZoneConfig { - disposition, - id: OmicronZoneUuid::from_untyped_uuid(common.id), - underlay_address: std::net::Ipv6Addr::from(common.underlay_address), - filesystem_pool: common - .filesystem_pool - .map(|id| ZpoolName::new_external(id)), - zone_type, - }) +/// Assemble a value that we can use to extract the NIC _if necessary_ +/// and report an error if it was needed but not found. +/// +/// Any error here should be impossible. By the time we get here, the +/// caller should have provided `nic_row` iff there's a corresponding +/// `nic_id` in this row, and the ids should match up. And whoever +/// created this row ought to have provided a nic_id iff this type of +/// zone needs a NIC. This last issue is not under our control, though, +/// so we definitely want to handle that as an operational error. The +/// others could arguably be programmer errors (i.e., we could `assert`), +/// but it seems excessive to crash here. +/// +/// The outer result represents a programmer error and should be unwrapped +/// immediately. The inner result represents an operational error and should +/// only be unwrapped when the nic is used. +pub fn nic_row_to_network_interface( + zone_id: Uuid, + nic_id: Option, + nic_row: Option, +) -> anyhow::Result> { + match (nic_id, nic_row) { + (Some(expected_id), Some(nic_row)) => { + ensure!(expected_id == nic_row.id, "caller provided wrong NIC"); + Ok(nic_row.into_network_interface_for_zone(zone_id)) + } + (None, None) => Ok(Err(anyhow!( + "expected zone to have an associated NIC, but it doesn't" + ))), + (Some(_), None) => bail!("caller provided no NIC"), + (None, Some(_)) => bail!("caller unexpectedly provided a NIC"), } +} - pub(crate) fn into_omicron_zone_config( - self, - nic_row: Option, - ) -> anyhow::Result { - let common = self.into_zone_config_common(nic_row)?; - let address = common.primary_service_address; - - let zone_type = match common.zone_type { - ZoneType::BoundaryNtp => { - let snat_cfg = match ( - common.snat_ip, - common.snat_first_port, - common.snat_last_port, - ) { - (Some(ip), Some(first_port), Some(last_port)) => { - nexus_types::inventory::SourceNatConfig::new( - ip.ip(), - *first_port, - *last_port, - ) - .context("bad SNAT config for boundary NTP")? - } - _ => bail!( - "expected non-NULL snat properties, \ - found at least one NULL" - ), - }; - OmicronZoneType::BoundaryNtp { - address, - dns_servers: common.ntp_dns_servers?, - domain: common.ntp_domain, - nic: common.nic?, - ntp_servers: common.ntp_ntp_servers?, - snat_cfg, - } - } - ZoneType::Clickhouse => OmicronZoneType::Clickhouse { - address, - dataset: common.dataset?, - }, - ZoneType::ClickhouseKeeper => OmicronZoneType::ClickhouseKeeper { - address, - dataset: common.dataset?, - }, - ZoneType::ClickhouseServer => OmicronZoneType::ClickhouseServer { - address, - dataset: common.dataset?, - }, - ZoneType::CockroachDb => OmicronZoneType::CockroachDb { - address, - dataset: common.dataset?, - }, - ZoneType::Crucible => { - OmicronZoneType::Crucible { address, dataset: common.dataset? } - } - ZoneType::CruciblePantry => { - OmicronZoneType::CruciblePantry { address } - } - ZoneType::ExternalDns => OmicronZoneType::ExternalDns { - dataset: common.dataset?, - dns_address: common.dns_address?, - http_address: address, - nic: common.nic?, - }, - ZoneType::InternalDns => OmicronZoneType::InternalDns { - dataset: common.dataset?, - dns_address: to_internal_dns_address(common.dns_address?)?, - http_address: address, - gz_address: *common.dns_gz_address.ok_or_else(|| { - anyhow!("expected dns_gz_address, found none") +/// Convert a dataset from a DB representation to a an Omicron internal +/// representation +pub fn dataset_zpool_name_to_omicron_zone_dataset( + dataset_zpool_name: Option, +) -> anyhow::Result { + dataset_zpool_name + .map(|zpool_name| -> Result<_, anyhow::Error> { + Ok(OmicronZoneDataset { + pool_name: zpool_name.parse().map_err(|e| { + anyhow!("parsing zpool name {:?}: {}", zpool_name, e) })?, - gz_address_index: *common.dns_gz_address_index.ok_or_else( - || anyhow!("expected dns_gz_address_index, found none"), - )?, - }, - ZoneType::InternalNtp => OmicronZoneType::InternalNtp { - address, - dns_servers: common.ntp_dns_servers?, - domain: common.ntp_domain, - ntp_servers: common.ntp_ntp_servers?, - }, - ZoneType::Nexus => OmicronZoneType::Nexus { - internal_address: address, - nic: common.nic?, - external_tls: common - .nexus_external_tls - .ok_or_else(|| anyhow!("expected 'external_tls'"))?, - external_ip: common - .second_service_ip - .ok_or_else(|| anyhow!("expected second service IP"))? - .ip(), - external_dns_servers: common - .nexus_external_dns_servers - .ok_or_else(|| anyhow!("expected 'external_dns_servers'"))? - .into_iter() - .map(|i| i.ip()) - .collect(), - }, - ZoneType::Oximeter => OmicronZoneType::Oximeter { address }, - }; - Ok(OmicronZoneConfig { - id: common.id, - underlay_address: std::net::Ipv6Addr::from(common.underlay_address), - filesystem_pool: common - .filesystem_pool - .map(|id| ZpoolName::new_external(id)), - zone_type, - }) - } - - fn into_zone_config_common( - self, - nic_row: Option, - ) -> anyhow::Result { - let primary_service_address = SocketAddrV6::new( - std::net::Ipv6Addr::from(self.primary_service_ip), - *self.primary_service_port, - 0, - 0, - ); - - // Assemble a value that we can use to extract the NIC _if necessary_ - // and report an error if it was needed but not found. - // - // Any error here should be impossible. By the time we get here, the - // caller should have provided `nic_row` iff there's a corresponding - // `nic_id` in this row, and the ids should match up. And whoever - // created this row ought to have provided a nic_id iff this type of - // zone needs a NIC. This last issue is not under our control, though, - // so we definitely want to handle that as an operational error. The - // others could arguably be programmer errors (i.e., we could `assert`), - // but it seems excessive to crash here. - // - // Note that we immediately return for any of the caller errors here. - // For the other error, we will return only later, if some code path - // below tries to use `nic` when it's not present. - let nic = match (self.nic_id, nic_row) { - (Some(expected_id), Some(nic_row)) => { - ensure!(expected_id == nic_row.id, "caller provided wrong NIC"); - Ok(nic_row.into_network_interface_for_zone(self.id)?) - } - // We don't expect and don't have a NIC. This is reasonable, so we - // don't `bail!` like we do in the next two cases, but we also - // _don't have a NIC_. Put an error into `nic`, and then if we land - // in a zone below that expects one, we'll fail then. - (None, None) => Err(anyhow!( - "expected zone to have an associated NIC, but it doesn't" - )), - (Some(_), None) => bail!("caller provided no NIC"), - (None, Some(_)) => bail!("caller unexpectedly provided a NIC"), - }; - - // Similarly, assemble a value that we can use to extract the dataset, - // if necessary. We only return this error if code below tries to use - // this value. - let dataset = self - .dataset_zpool_name - .map(|zpool_name| -> Result<_, anyhow::Error> { - Ok(OmicronZoneDataset { - pool_name: zpool_name.parse().map_err(|e| { - anyhow!("parsing zpool name {:?}: {}", zpool_name, e) - })?, - }) }) - .transpose()? - .ok_or_else(|| anyhow!("expected dataset zpool name, found none")); - - // Do the same for the DNS server address. - let dns_address = - match (self.second_service_ip, self.second_service_port) { - (Some(dns_ip), Some(dns_port)) => { - Ok(std::net::SocketAddr::new(dns_ip.ip(), *dns_port)) - } - _ => Err(anyhow!( - "expected second service IP and port, \ - found one missing" - )), - }; - - // Do the same for NTP zone properties. - let ntp_dns_servers = self - .ntp_dns_servers - .ok_or_else(|| anyhow!("expected list of DNS servers, found null")) - .map(|list| { - list.into_iter().map(|ipnetwork| ipnetwork.ip()).collect() - }); - let ntp_ntp_servers = - self.ntp_ntp_servers.ok_or_else(|| anyhow!("expected ntp_servers")); - - // Do the same for the external IP ID. - let external_ip_id = - self.external_ip_id.context("expected an external IP ID"); - - Ok(ZoneConfigCommon { - id: self.id, - underlay_address: self.underlay_address, - filesystem_pool: self.filesystem_pool, - zone_type: self.zone_type, - primary_service_address, - snat_ip: self.snat_ip, - snat_first_port: self.snat_first_port, - snat_last_port: self.snat_last_port, - ntp_domain: self.ntp_domain, - dns_gz_address: self.dns_gz_address, - dns_gz_address_index: self.dns_gz_address_index, - nexus_external_tls: self.nexus_external_tls, - nexus_external_dns_servers: self.nexus_external_dns_servers, - second_service_ip: self.second_service_ip, - nic, - dataset, - dns_address, - ntp_dns_servers, - ntp_ntp_servers, - external_ip_id, }) - } + .transpose()? + .ok_or_else(|| anyhow!("expected dataset zpool name, found none")) } -struct ZoneConfigCommon { - id: Uuid, - underlay_address: ipv6::Ipv6Addr, - filesystem_pool: Option, - zone_type: ZoneType, - primary_service_address: SocketAddrV6, - snat_ip: Option, - snat_first_port: Option, - snat_last_port: Option, - ntp_domain: Option, - dns_gz_address: Option, - dns_gz_address_index: Option, - nexus_external_tls: Option, - nexus_external_dns_servers: Option>, +/// Convert the secondary ip and port to a dns address +pub fn secondary_ip_and_port_to_dns_address( second_service_ip: Option, - // These properties may or may not be needed, depending on the zone type. We - // store results here that can be unpacked once we determine our zone type. - nic: anyhow::Result, - dataset: anyhow::Result, - // Note that external DNS is SocketAddr (also supports v4) while internal - // DNS is always v6. - dns_address: anyhow::Result, - ntp_dns_servers: anyhow::Result>, - ntp_ntp_servers: anyhow::Result>, - external_ip_id: anyhow::Result, + second_service_port: Option, +) -> anyhow::Result { + match (second_service_ip, second_service_port) { + (Some(dns_ip), Some(dns_port)) => { + Ok(std::net::SocketAddr::new(dns_ip.ip(), *dns_port)) + } + _ => Err(anyhow!( + "expected second service IP and port, found one missing" + )), + } } -// Ideally this would be a method on `ZoneConfigCommon`, but that's more -// annoying to deal with because often, at the time this function is called, -// part of `ZoneConfigCommon` has already been moved out. -fn to_internal_dns_address( - external_address: SocketAddr, +/// Extract a SocketAddrV6 from a SocketAddr for a given dns address +pub fn to_internal_dns_address( + address: SocketAddr, ) -> anyhow::Result { - match external_address { + match address { SocketAddr::V4(address) => { bail!( "expected internal DNS address to be v6, found v4: {:?}", diff --git a/nexus/db-model/src/producer_endpoint.rs b/nexus/db-model/src/producer_endpoint.rs index 74a7356adbd..c2fab2de5ad 100644 --- a/nexus/db-model/src/producer_endpoint.rs +++ b/nexus/db-model/src/producer_endpoint.rs @@ -22,6 +22,7 @@ impl_enum_type!( #[diesel(sql_type = ProducerKindEnum)] pub enum ProducerKind; + ManagementGateway => b"management_gateway" SledAgent => b"sled_agent" Service => b"service" Instance => b"instance" @@ -30,6 +31,9 @@ impl_enum_type!( impl From for ProducerKind { fn from(kind: internal::nexus::ProducerKind) -> Self { match kind { + internal::nexus::ProducerKind::ManagementGateway => { + ProducerKind::ManagementGateway + } internal::nexus::ProducerKind::SledAgent => ProducerKind::SledAgent, internal::nexus::ProducerKind::Service => ProducerKind::Service, internal::nexus::ProducerKind::Instance => ProducerKind::Instance, @@ -40,6 +44,9 @@ impl From for ProducerKind { impl From for internal::nexus::ProducerKind { fn from(kind: ProducerKind) -> Self { match kind { + ProducerKind::ManagementGateway => { + internal::nexus::ProducerKind::ManagementGateway + } ProducerKind::SledAgent => internal::nexus::ProducerKind::SledAgent, ProducerKind::Service => internal::nexus::ProducerKind::Service, ProducerKind::Instance => internal::nexus::ProducerKind::Instance, diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 4da879fa799..f8bf6bc32ec 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -139,35 +139,28 @@ table! { table! { switch_port_settings_link_config (port_settings_id, link_name) { port_settings_id -> Uuid, - lldp_service_config_id -> Uuid, link_name -> Text, mtu -> Int4, fec -> crate::SwitchLinkFecEnum, speed -> crate::SwitchLinkSpeedEnum, autoneg -> Bool, + lldp_link_config_id -> Nullable, } } table! { - lldp_service_config (id) { + lldp_link_config (id) { id -> Uuid, enabled -> Bool, - lldp_config_id -> Nullable, - } -} - -table! { - lldp_config (id) { - id -> Uuid, - name -> Text, - description -> Text, + link_name -> Nullable, + link_description -> Nullable, + chassis_id -> Nullable, + system_name -> Nullable, + system_description -> Nullable, + management_ip -> Nullable, time_created -> Timestamptz, time_modified -> Timestamptz, time_deleted -> Nullable, - chassis_id -> Text, - system_name -> Text, - system_description -> Text, - management_ip -> Inet, } } @@ -195,6 +188,7 @@ table! { dst -> Inet, gw -> Inet, vid -> Nullable, + local_pref -> Nullable, } } @@ -1029,6 +1023,7 @@ table! { kind -> crate::DatasetKindEnum, size_used -> Nullable, + zone_name -> Nullable, } } @@ -1910,7 +1905,8 @@ allow_tables_to_appear_in_same_query!( allow_tables_to_appear_in_same_query!( switch_port, - switch_port_settings_bgp_peer_config + switch_port_settings_bgp_peer_config, + bgp_config ); allow_tables_to_appear_in_same_query!(disk, virtual_provisioning_resource); diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index c2de9a7a5df..66cd2c49d97 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(88, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(94, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,7 +29,13 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), - KnownVersion::new(88, "inventory-nvme-firmware"), + KnownVersion::new(94, "inventory-nvme-firmware"), + KnownVersion::new(93, "dataset-kinds-zone-and-debug"), + KnownVersion::new(92, "lldp-link-config-nullable"), + KnownVersion::new(91, "add-management-gateway-producer-kind"), + KnownVersion::new(90, "lookup-bgp-config-by-asn"), + KnownVersion::new(89, "collapse_lldp_settings"), + KnownVersion::new(88, "route-local-pref"), KnownVersion::new(87, "add-clickhouse-server-enum-variants"), KnownVersion::new(86, "snapshot-replacement"), KnownVersion::new(85, "add-migrations-by-time-created-index"), diff --git a/nexus/db-model/src/switch_port.rs b/nexus/db-model/src/switch_port.rs index f790d7d527d..bbcbb0748a7 100644 --- a/nexus/db-model/src/switch_port.rs +++ b/nexus/db-model/src/switch_port.rs @@ -3,7 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use crate::schema::{ - lldp_config, lldp_service_config, switch_port, switch_port_settings, + lldp_link_config, switch_port, switch_port_settings, switch_port_settings_address_config, switch_port_settings_bgp_peer_config, switch_port_settings_bgp_peer_config_allow_export, switch_port_settings_bgp_peer_config_allow_import, @@ -14,6 +14,7 @@ use crate::schema::{ }; use crate::{impl_enum_type, SqlU32}; use crate::{SqlU16, SqlU8}; +use chrono::{DateTime, Utc}; use db_macros::Resource; use diesel::AsChangeset; use ipnetwork::IpNetwork; @@ -380,7 +381,7 @@ impl Into for SwitchPortConfig { #[diesel(table_name = switch_port_settings_link_config)] pub struct SwitchPortLinkConfig { pub port_settings_id: Uuid, - pub lldp_service_config_id: Uuid, + pub lldp_link_config_id: Option, pub link_name: String, pub mtu: SqlU16, pub fec: SwitchLinkFec, @@ -391,7 +392,7 @@ pub struct SwitchPortLinkConfig { impl SwitchPortLinkConfig { pub fn new( port_settings_id: Uuid, - lldp_service_config_id: Uuid, + lldp_link_config_id: Uuid, link_name: String, mtu: u16, fec: SwitchLinkFec, @@ -400,7 +401,7 @@ impl SwitchPortLinkConfig { ) -> Self { Self { port_settings_id, - lldp_service_config_id, + lldp_link_config_id: Some(lldp_link_config_id), link_name, fec, speed, @@ -414,7 +415,7 @@ impl Into for SwitchPortLinkConfig { fn into(self) -> external::SwitchPortLinkConfig { external::SwitchPortLinkConfig { port_settings_id: self.port_settings_id, - lldp_service_config_id: self.lldp_service_config_id, + lldp_link_config_id: self.lldp_link_config_id, link_name: self.link_name.clone(), mtu: self.mtu.into(), fec: self.fec.into(), @@ -434,57 +435,61 @@ impl Into for SwitchPortLinkConfig { Deserialize, AsChangeset, )] -#[diesel(table_name = lldp_service_config)] -pub struct LldpServiceConfig { +#[diesel(table_name = lldp_link_config)] +pub struct LldpLinkConfig { pub id: Uuid, pub enabled: bool, - pub lldp_config_id: Option, -} - -impl LldpServiceConfig { - pub fn new(enabled: bool, lldp_config_id: Option) -> Self { - Self { id: Uuid::new_v4(), enabled, lldp_config_id } + pub link_name: Option, + pub link_description: Option, + pub chassis_id: Option, + pub system_name: Option, + pub system_description: Option, + pub management_ip: Option, + pub time_created: DateTime, + pub time_modified: DateTime, + pub time_deleted: Option>, +} + +impl LldpLinkConfig { + pub fn new( + enabled: bool, + link_name: Option, + link_description: Option, + chassis_id: Option, + system_name: Option, + system_description: Option, + management_ip: Option, + ) -> Self { + let now = Utc::now(); + Self { + id: Uuid::new_v4(), + enabled, + link_name, + link_description, + chassis_id, + system_name, + system_description, + management_ip, + time_created: now, + time_modified: now, + time_deleted: None, + } } } -impl Into for LldpServiceConfig { - fn into(self) -> external::LldpServiceConfig { - external::LldpServiceConfig { +// This converts the internal database version of the config into the +// user-facing version. +impl Into for LldpLinkConfig { + fn into(self) -> external::LldpLinkConfig { + external::LldpLinkConfig { id: self.id, - lldp_config_id: self.lldp_config_id, enabled: self.enabled, - } - } -} - -#[derive( - Queryable, - Insertable, - Selectable, - Clone, - Debug, - Resource, - Serialize, - Deserialize, -)] -#[diesel(table_name = lldp_config)] -pub struct LldpConfig { - #[diesel(embed)] - pub identity: LldpConfigIdentity, - pub chassis_id: String, - pub system_name: String, - pub system_description: String, - pub management_ip: IpNetwork, -} - -impl Into for LldpConfig { - fn into(self) -> external::LldpConfig { - external::LldpConfig { - identity: self.identity(), + link_name: self.link_name.clone(), + link_description: self.link_description.clone(), chassis_id: self.chassis_id.clone(), system_name: self.system_name.clone(), system_description: self.system_description.clone(), - management_ip: self.management_ip.into(), + management_ip: self.management_ip.map(|a| a.into()), } } } @@ -554,6 +559,7 @@ pub struct SwitchPortRouteConfig { pub dst: IpNetwork, pub gw: IpNetwork, pub vid: Option, + pub local_pref: Option, } impl SwitchPortRouteConfig { @@ -563,8 +569,9 @@ impl SwitchPortRouteConfig { dst: IpNetwork, gw: IpNetwork, vid: Option, + local_pref: Option, ) -> Self { - Self { port_settings_id, interface_name, dst, gw, vid } + Self { port_settings_id, interface_name, dst, gw, vid, local_pref } } } @@ -576,6 +583,7 @@ impl Into for SwitchPortRouteConfig { dst: self.dst.into(), gw: self.gw.into(), vlan_id: self.vid.map(Into::into), + local_pref: self.local_pref.map(Into::into), } } } diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml index 51925289449..c6c5caab6a3 100644 --- a/nexus/db-queries/Cargo.toml +++ b/nexus/db-queries/Cargo.toml @@ -14,7 +14,6 @@ omicron-rpaths.workspace = true anyhow.workspace = true async-bb8-diesel.workspace = true async-trait.workspace = true -bb8.workspace = true camino.workspace = true chrono.workspace = true const_format.workspace = true @@ -22,6 +21,7 @@ diesel.workspace = true diesel-dtrace.workspace = true dropshot.workspace = true futures.workspace = true +internal-dns.workspace = true ipnetwork.workspace = true macaddr.workspace = true once_cell.workspace = true @@ -29,6 +29,7 @@ oxnet.workspace = true paste.workspace = true # See omicron-rpaths for more about the "pq-sys" dependency. pq-sys = "*" +qorb = { workspace = true, features = [ "qtop" ] } rand.workspace = true ref-cast.workspace = true schemars.workspace = true @@ -45,8 +46,9 @@ strum.workspace = true swrite.workspace = true thiserror.workspace = true tokio = { workspace = true, features = ["full"] } -uuid.workspace = true +url.workspace = true usdt.workspace = true +uuid.workspace = true db-macros.workspace = true nexus-auth.workspace = true diff --git a/nexus/db-queries/src/db/collection_attach.rs b/nexus/db-queries/src/db/collection_attach.rs index 95e6afeb4b7..c009d60483d 100644 --- a/nexus/db-queries/src/db/collection_attach.rs +++ b/nexus/db-queries/src/db/collection_attach.rs @@ -578,9 +578,7 @@ where mod test { use super::*; use crate::db::{self, identity::Resource as IdentityResource}; - use async_bb8_diesel::{ - AsyncRunQueryDsl, AsyncSimpleConnection, ConnectionManager, - }; + use async_bb8_diesel::{AsyncRunQueryDsl, AsyncSimpleConnection}; use chrono::Utc; use db_macros::Resource; use diesel::expression_methods::ExpressionMethods; @@ -617,8 +615,8 @@ mod test { async fn setup_db( pool: &crate::db::Pool, - ) -> bb8::PooledConnection> { - let connection = pool.pool().get().await.unwrap(); + ) -> crate::db::datastore::DataStoreConnection { + let connection = pool.claim().await.unwrap(); (*connection) .batch_execute_async( "CREATE SCHEMA IF NOT EXISTS test_schema; \ @@ -873,7 +871,7 @@ mod test { dev::test_setup_log("test_attach_missing_collection_fails"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -902,7 +900,7 @@ mod test { let logctx = dev::test_setup_log("test_attach_missing_resource_fails"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -939,7 +937,7 @@ mod test { let logctx = dev::test_setup_log("test_attach_once"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -987,7 +985,7 @@ mod test { let logctx = dev::test_setup_log("test_attach_once_synchronous"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -1036,7 +1034,7 @@ mod test { let logctx = dev::test_setup_log("test_attach_multiple_times"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -1092,7 +1090,7 @@ mod test { let logctx = dev::test_setup_log("test_attach_beyond_capacity_fails"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -1156,7 +1154,7 @@ mod test { let logctx = dev::test_setup_log("test_attach_while_already_attached"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -1263,7 +1261,7 @@ mod test { let logctx = dev::test_setup_log("test_attach_once"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -1318,7 +1316,7 @@ mod test { let logctx = dev::test_setup_log("test_attach_deleted_resource_fails"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -1363,7 +1361,7 @@ mod test { let logctx = dev::test_setup_log("test_attach_without_update_filter"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; diff --git a/nexus/db-queries/src/db/collection_detach.rs b/nexus/db-queries/src/db/collection_detach.rs index 03e09d41ca1..bc547d5127f 100644 --- a/nexus/db-queries/src/db/collection_detach.rs +++ b/nexus/db-queries/src/db/collection_detach.rs @@ -482,9 +482,7 @@ mod test { use super::*; use crate::db::collection_attach::DatastoreAttachTarget; use crate::db::{self, identity::Resource as IdentityResource}; - use async_bb8_diesel::{ - AsyncRunQueryDsl, AsyncSimpleConnection, ConnectionManager, - }; + use async_bb8_diesel::{AsyncRunQueryDsl, AsyncSimpleConnection}; use chrono::Utc; use db_macros::Resource; use diesel::expression_methods::ExpressionMethods; @@ -521,8 +519,8 @@ mod test { async fn setup_db( pool: &crate::db::Pool, - ) -> bb8::PooledConnection> { - let connection = pool.pool().get().await.unwrap(); + ) -> crate::db::datastore::DataStoreConnection { + let connection = pool.claim().await.unwrap(); (*connection) .batch_execute_async( "CREATE SCHEMA IF NOT EXISTS test_schema; \ @@ -786,7 +784,7 @@ mod test { dev::test_setup_log("test_detach_missing_collection_fails"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -814,7 +812,7 @@ mod test { let logctx = dev::test_setup_log("test_detach_missing_resource_fails"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -850,7 +848,7 @@ mod test { let logctx = dev::test_setup_log("test_detach_once"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -890,7 +888,7 @@ mod test { let logctx = dev::test_setup_log("test_detach_while_already_detached"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -954,7 +952,7 @@ mod test { let logctx = dev::test_setup_log("test_detach_deleted_resource_fails"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -998,7 +996,7 @@ mod test { let logctx = dev::test_setup_log("test_detach_without_update_filter"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; diff --git a/nexus/db-queries/src/db/collection_detach_many.rs b/nexus/db-queries/src/db/collection_detach_many.rs index 986cfb70b7f..36755599d44 100644 --- a/nexus/db-queries/src/db/collection_detach_many.rs +++ b/nexus/db-queries/src/db/collection_detach_many.rs @@ -480,9 +480,7 @@ mod test { use super::*; use crate::db::collection_attach::DatastoreAttachTarget; use crate::db::{self, identity::Resource as IdentityResource}; - use async_bb8_diesel::{ - AsyncRunQueryDsl, AsyncSimpleConnection, ConnectionManager, - }; + use async_bb8_diesel::{AsyncRunQueryDsl, AsyncSimpleConnection}; use chrono::Utc; use db_macros::Resource; use diesel::expression_methods::ExpressionMethods; @@ -519,8 +517,8 @@ mod test { async fn setup_db( pool: &crate::db::Pool, - ) -> bb8::PooledConnection> { - let connection = pool.pool().get().await.unwrap(); + ) -> crate::db::datastore::DataStoreConnection { + let connection = pool.claim().await.unwrap(); (*connection) .batch_execute_async( "CREATE SCHEMA IF NOT EXISTS test_schema; \ @@ -778,7 +776,7 @@ mod test { dev::test_setup_log("test_detach_missing_collection_fails"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -808,7 +806,7 @@ mod test { dev::test_setup_log("test_detach_missing_resource_succeeds"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -849,7 +847,7 @@ mod test { let logctx = dev::test_setup_log("test_detach_once"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -892,7 +890,7 @@ mod test { let logctx = dev::test_setup_log("test_detach_once_synchronous"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -937,7 +935,7 @@ mod test { let logctx = dev::test_setup_log("test_detach_while_already_detached"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -993,7 +991,7 @@ mod test { let logctx = dev::test_setup_log("test_detach_filter_collection"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -1044,7 +1042,7 @@ mod test { let logctx = dev::test_setup_log("test_detach_deleted_resource"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -1102,7 +1100,7 @@ mod test { let logctx = dev::test_setup_log("test_detach_many"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; diff --git a/nexus/db-queries/src/db/collection_insert.rs b/nexus/db-queries/src/db/collection_insert.rs index 69906e6498f..3aaea6aeb1a 100644 --- a/nexus/db-queries/src/db/collection_insert.rs +++ b/nexus/db-queries/src/db/collection_insert.rs @@ -406,9 +406,7 @@ where mod test { use super::*; use crate::db::{self, identity::Resource as IdentityResource}; - use async_bb8_diesel::{ - AsyncRunQueryDsl, AsyncSimpleConnection, ConnectionManager, - }; + use async_bb8_diesel::{AsyncRunQueryDsl, AsyncSimpleConnection}; use chrono::{DateTime, Utc}; use db_macros::Resource; use diesel::expression_methods::ExpressionMethods; @@ -443,8 +441,8 @@ mod test { async fn setup_db( pool: &crate::db::Pool, - ) -> bb8::PooledConnection> { - let connection = pool.pool().get().await.unwrap(); + ) -> crate::db::datastore::DataStoreConnection { + let connection = pool.claim().await.unwrap(); (*connection) .batch_execute_async( "CREATE SCHEMA IF NOT EXISTS test_schema; \ @@ -560,7 +558,7 @@ mod test { let logctx = dev::test_setup_log("test_collection_not_present"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; @@ -590,7 +588,7 @@ mod test { let logctx = dev::test_setup_log("test_collection_present"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let conn = setup_db(&pool).await; diff --git a/nexus/db-queries/src/db/datastore/bgp.rs b/nexus/db-queries/src/db/datastore/bgp.rs index f4bea0f6052..fdb96295435 100644 --- a/nexus/db-queries/src/db/datastore/bgp.rs +++ b/nexus/db-queries/src/db/datastore/bgp.rs @@ -28,7 +28,7 @@ use ref_cast::RefCast; use uuid::Uuid; impl DataStore { - pub async fn bgp_config_set( + pub async fn bgp_config_create( &self, opctx: &OpContext, config: ¶ms::BgpConfigCreate, @@ -37,80 +37,187 @@ impl DataStore { use db::schema::{ bgp_announce_set, bgp_announce_set::dsl as announce_set_dsl, }; - use diesel::sql_types; - use diesel::IntoSql; let conn = self.pool_connection_authorized(opctx).await?; - self.transaction_retry_wrapper("bgp_config_set") - .transaction(&conn, |conn| async move { - let announce_set_id: Uuid = match &config.bgp_announce_set_id { - NameOrId::Name(name) => { - announce_set_dsl::bgp_announce_set + let err = OptionalError::new(); + self.transaction_retry_wrapper("bgp_config_create") + .transaction(&conn, |conn| { + + let err = err.clone(); + async move { + let announce_set_id = match config.bgp_announce_set_id.clone() { + // Resolve Name to UUID + NameOrId::Name(name) => announce_set_dsl::bgp_announce_set .filter(bgp_announce_set::time_deleted.is_null()) .filter(bgp_announce_set::name.eq(name.to_string())) .select(bgp_announce_set::id) .limit(1) .first_async::(&conn) - .await? + .await + .map_err(|e| { + let msg = "failed to lookup announce set by name"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => { + err.bail(Error::not_found_by_name( + ResourceType::BgpAnnounceSet, + &name, + )) + } + _ => err.bail(Error::internal_error(msg)), + + } + }), + + // We cannot assume that the provided UUID is actually real. + // Lookup the parent record by UUID to verify that it is valid. + NameOrId::Id(id) => announce_set_dsl::bgp_announce_set + .filter(bgp_announce_set::time_deleted.is_null()) + .filter(bgp_announce_set::id.eq(id)) + .select(bgp_announce_set::id) + .limit(1) + .first_async::(&conn) + .await + .map_err(|e| { + let msg = "failed to lookup announce set by id"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => { + err.bail(Error::not_found_by_id( + ResourceType::BgpAnnounceSet, + &id, + )) + } + _ => err.bail(Error::internal_error(msg)), + + } + }), + }?; + + let config = + BgpConfig::from_config_create(config, announce_set_id); + + // Idempotency: + // Check to see if an exact match for the config already exists + let query = dsl::bgp_config + .filter(dsl::name.eq(config.name().to_string())) + .filter(dsl::asn.eq(config.asn)) + .filter(dsl::bgp_announce_set_id.eq(config.bgp_announce_set_id)) + .into_boxed(); + + let query = match config.vrf.clone() { + Some(v) => query.filter(dsl::vrf.eq(v)), + None => query.filter(dsl::vrf.is_null()), + }; + + let query = match config.shaper.clone() { + Some(v) => query.filter(dsl::shaper.eq(v)), + None => query.filter(dsl::shaper.is_null()), + }; + + let query = match config.checker.clone() { + Some(v) => query.filter(dsl::checker.eq(v)), + None => query.filter(dsl::checker.is_null()), + }; + + let matching_config = match query + .filter(dsl::time_deleted.is_null()) + .select(BgpConfig::as_select()) + .first_async::(&conn) + .await { + Ok(v) => Ok(Some(v)), + Err(e) => { + match e { + diesel::result::Error::NotFound => { + info!(opctx.log, "no matching bgp config found"); + Ok(None) + } + _ => { + let msg = "error while checking if bgp config exists"; + error!(opctx.log, "{msg}"; "error" => ?e); + Err(err.bail(Error::internal_error(msg))) + } + } + } + }?; + + // If so, we're done! + if let Some(existing_config) = matching_config { + return Ok(existing_config); } - NameOrId::Id(id) => *id, - }; - let config = - BgpConfig::from_config_create(config, announce_set_id); - - let matching_entry_subquery = dsl::bgp_config - .filter(dsl::name.eq(Name::from(config.name().clone()))) - .filter(dsl::time_deleted.is_null()) - .select(dsl::name); - - // SELECT exactly the values we're trying to INSERT, but only - // if it does not already exist. - let new_entry_subquery = diesel::dsl::select(( - config.id().into_sql::(), - config.name().to_string().into_sql::(), - config - .description() - .to_string() - .into_sql::(), - config.asn.into_sql::(), - config.bgp_announce_set_id.into_sql::(), - config - .vrf - .clone() - .into_sql::>(), - Utc::now().into_sql::(), - Utc::now().into_sql::(), - )) - .filter(diesel::dsl::not(diesel::dsl::exists( - matching_entry_subquery, - ))); - - diesel::insert_into(dsl::bgp_config) - .values(new_entry_subquery) - .into_columns(( - dsl::id, - dsl::name, - dsl::description, - dsl::asn, - dsl::bgp_announce_set_id, - dsl::vrf, - dsl::time_created, - dsl::time_modified, - )) - .execute_async(&conn) - .await?; + // TODO: remove once per-switch-multi-asn support is added + // Bail if a conflicting config for this ASN already exists. + // This is a temporary measure until multi-asn-per-switch is supported. + let configs_with_asn: Vec = dsl::bgp_config + .filter(dsl::asn.eq(config.asn)) + .filter(dsl::time_deleted.is_null()) + .select(BgpConfig::as_select()) + .load_async(&conn) + .await?; + + if !configs_with_asn.is_empty() { + error!( + opctx.log, + "different config for asn already exists"; + "asn" => ?config.asn, + "requested_config" => ?config, + "conflicting_configs" => ?configs_with_asn + ); + return Err(err.bail(Error::conflict("cannot have more than one configuration per ASN"))); + } - dsl::bgp_config - .filter(dsl::name.eq(Name::from(config.name().clone()))) - .filter(dsl::time_deleted.is_null()) - .select(BgpConfig::as_select()) - .limit(1) - .first_async(&conn) - .await + diesel::insert_into(dsl::bgp_config) + .values(config.clone()) + .returning(BgpConfig::as_returning()) + .get_result_async(&conn) + .await + .map_err(|e | { + let msg = "failed to insert bgp config"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::DatabaseError(kind, _) => { + match kind { + diesel::result::DatabaseErrorKind::UniqueViolation => { + err.bail(Error::conflict("a field that must be unique conflicts with an existing record")) + }, + // technically we don't use Foreign Keys but it doesn't hurt to match on them + // instead of returning a 500 by default in the event that we do switch to Foreign Keys + diesel::result::DatabaseErrorKind::ForeignKeyViolation => { + err.bail(Error::conflict("an id field references an object that does not exist")) + } + diesel::result::DatabaseErrorKind::NotNullViolation => { + err.bail(Error::invalid_request("a required field was not provided")) + } + diesel::result::DatabaseErrorKind::CheckViolation => { + err.bail(Error::invalid_request("one or more fields are not valid values")) + }, + _ => err.bail(Error::internal_error(msg)), + } + } + _ => err.bail(Error::internal_error(msg)), + } + }) + } }) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .map_err(|e|{ + let msg = "bgp_config_create failed"; + if let Some(err) = err.take() { + error!(opctx.log, "{msg}"; "error" => ?err); + err + } else { + // The transaction handler errors along with any errors emitted via "?" + // will fall through to here. These errors should truly be 500s + // because they are an internal hiccup that likely was not triggered by + // user input. + error!(opctx.log, "{msg}"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + } + }) } pub async fn bgp_config_delete( @@ -124,11 +231,6 @@ impl DataStore { use db::schema::switch_port_settings_bgp_peer_config as sps_bgp_peer_config; use db::schema::switch_port_settings_bgp_peer_config::dsl as sps_bgp_peer_config_dsl; - #[derive(Debug)] - enum BgpConfigDeleteError { - ConfigInUse, - } - let err = OptionalError::new(); let conn = self.pool_connection_authorized(opctx).await?; self.transaction_retry_wrapper("bgp_config_delete") @@ -138,26 +240,60 @@ impl DataStore { let name_or_id = sel.name_or_id.clone(); let id: Uuid = match name_or_id { - NameOrId::Id(id) => id, - NameOrId::Name(name) => { + NameOrId::Id(id) => bgp_config_dsl::bgp_config + .filter(bgp_config::id.eq(id)) + .select(bgp_config::id) + .limit(1) + .first_async::(&conn) + .await + .map_err(|e| { + let msg = "failed to lookup bgp config by id"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => { + err.bail(Error::not_found_by_id( + ResourceType::BgpConfig, + &id, + )) + } + _ => err.bail(Error::internal_error(msg)), + + } + }), + NameOrId::Name(name) => bgp_config_dsl::bgp_config - .filter(bgp_config::name.eq(name.to_string())) - .select(bgp_config::id) - .limit(1) - .first_async::(&conn) - .await? - } - }; + .filter(bgp_config::name.eq(name.to_string())) + .select(bgp_config::id) + .limit(1) + .first_async::(&conn) + .await + .map_err(|e| { + let msg = "failed to lookup bgp config by name"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => { + err.bail(Error::not_found_by_name( + ResourceType::BgpConfig, + &name, + )) + } + _ => err.bail(Error::internal_error(msg)), + + } + }), + }?; let count = sps_bgp_peer_config_dsl::switch_port_settings_bgp_peer_config - .filter(sps_bgp_peer_config::bgp_config_id.eq(id)) - .count() - .execute_async(&conn) - .await?; + .filter(sps_bgp_peer_config::bgp_config_id.eq(id)) + .count() + .execute_async(&conn) + .await?; if count > 0 { - return Err(err.bail(BgpConfigDeleteError::ConfigInUse)); + return Err(err.bail(Error::conflict("BGP Config is in use and cannot be deleted"))); } diesel::update(bgp_config_dsl::bgp_config) @@ -171,13 +307,12 @@ impl DataStore { }) .await .map_err(|e| { + let msg = "bgp_config_delete failed"; if let Some(err) = err.take() { - match err { - BgpConfigDeleteError::ConfigInUse => { - Error::invalid_request("BGP config in use") - } - } + error!(opctx.log, "{msg}"; "error" => ?err); + err } else { + error!(opctx.log, "{msg}"; "error" => ?e); public_error_from_diesel(e, ErrorHandler::Server) } }) @@ -194,24 +329,45 @@ impl DataStore { let name_or_id = name_or_id.clone(); - let config = match name_or_id { + match name_or_id { NameOrId::Name(name) => dsl::bgp_config .filter(bgp_config::name.eq(name.to_string())) .select(BgpConfig::as_select()) .limit(1) .first_async::(&*conn) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)), + .map_err(|e| { + let msg = "failed to lookup bgp config by name"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => { + Error::not_found_by_name( + ResourceType::BgpConfig, + &name, + ) + } + _ => Error::internal_error(msg), + } + }), NameOrId::Id(id) => dsl::bgp_config .filter(bgp_config::id.eq(id)) .select(BgpConfig::as_select()) .limit(1) .first_async::(&*conn) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)), - }?; + .map_err(|e| { + let msg = "failed to lookup bgp config by id"; + error!(opctx.log, "{msg}"; "error" => ?e); - Ok(config) + match e { + diesel::result::Error::NotFound => { + Error::not_found_by_id(ResourceType::BgpConfig, &id) + } + _ => Error::internal_error(msg), + } + }), + } } pub async fn bgp_config_list( @@ -237,10 +393,42 @@ impl DataStore { .select(BgpConfig::as_select()) .load_async(&*conn) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .map_err(|e| { + error!(opctx.log, "bgp_config_list failed"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + }) + } + + pub async fn bgp_announce_set_list( + &self, + opctx: &OpContext, + pagparams: &PaginatedBy<'_>, + ) -> ListResultVec { + use db::schema::bgp_announce_set::dsl; + + let conn = self.pool_connection_authorized(opctx).await?; + + match pagparams { + PaginatedBy::Id(pagparams) => { + paginated(dsl::bgp_announce_set, dsl::id, &pagparams) + } + PaginatedBy::Name(pagparams) => paginated( + dsl::bgp_announce_set, + dsl::name, + &pagparams.map_name(|n| Name::ref_cast(n)), + ), + } + .filter(dsl::time_deleted.is_null()) + .select(BgpAnnounceSet::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + error!(opctx.log, "bgp_announce_set_list failed"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + }) } - pub async fn bgp_announce_list( + pub async fn bgp_announcement_list( &self, opctx: &OpContext, sel: ¶ms::BgpAnnounceSetSelector, @@ -250,11 +438,6 @@ impl DataStore { bgp_announcement::dsl as announce_dsl, }; - #[derive(Debug)] - enum BgpAnnounceListError { - AnnounceSetNotFound(Name), - } - let err = OptionalError::new(); let conn = self.pool_connection_authorized(opctx).await?; self.transaction_retry_wrapper("bgp_announce_list") @@ -264,7 +447,26 @@ impl DataStore { let name_or_id = sel.name_or_id.clone(); let announce_id: Uuid = match name_or_id { - NameOrId::Id(id) => id, + NameOrId::Id(id) => announce_set_dsl::bgp_announce_set + .filter(bgp_announce_set::time_deleted.is_null()) + .filter(bgp_announce_set::id.eq(id)) + .select(bgp_announce_set::id) + .limit(1) + .first_async::(&conn) + .await + .map_err(|e| { + let msg = "failed to lookup announce set by id"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => err + .bail(Error::not_found_by_id( + ResourceType::BgpAnnounceSet, + &id, + )), + _ => err.bail(Error::internal_error(msg)), + } + }), NameOrId::Name(name) => { announce_set_dsl::bgp_announce_set .filter( @@ -278,15 +480,23 @@ impl DataStore { .first_async::(&conn) .await .map_err(|e| { - err.bail_retryable_or( - e, - BgpAnnounceListError::AnnounceSetNotFound( - Name::from(name.clone()), - ) - ) - })? + let msg = + "failed to lookup announce set by name"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => err + .bail(Error::not_found_by_name( + ResourceType::BgpAnnounceSet, + &name, + )), + _ => { + err.bail(Error::internal_error(msg)) + } + } + }) } - }; + }?; let result = announce_dsl::bgp_announcement .filter(announce_dsl::announce_set_id.eq(announce_id)) @@ -299,21 +509,18 @@ impl DataStore { }) .await .map_err(|e| { + error!(opctx.log, "bgp_announce_list failed"; "error" => ?e); if let Some(err) = err.take() { - match err { - BgpAnnounceListError::AnnounceSetNotFound(name) => { - Error::not_found_by_name( - ResourceType::BgpAnnounceSet, - &name, - ) - } - } + err } else { public_error_from_diesel(e, ErrorHandler::Server) } }) } + // TODO: it seems this logic actually performs a find OR create for an announce set, and then replaces its child announcements. + // This might be changed in omicron#6016 to an api that creates an announce set then allows adding / removal of announcements + // to match how our other APIs work. pub async fn bgp_update_announce_set( &self, opctx: &OpContext, @@ -383,9 +590,16 @@ impl DataStore { Ok((db_as, db_annoucements)) }) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .map_err(|e| { + let msg = "bgp_update_announce_set failed"; + error!(opctx.log, "{msg}"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + }) } + // TODO: it seems this logic actually performs a create OR update of an announce set and its child announcements + // (for example, it will add missing announcements). This might be changed in omicron#6016 to an api that creates an announce set + // then allows adding / removal of announcements to match how our other APIs work. pub async fn bgp_create_announce_set( &self, opctx: &OpContext, @@ -466,7 +680,11 @@ impl DataStore { Ok((db_as, db_annoucements)) }) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .map_err(|e| { + let msg = "bgp_create_announce_set failed"; + error!(opctx.log, "{msg}"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + }) } pub async fn bgp_delete_announce_set( @@ -481,11 +699,6 @@ impl DataStore { use db::schema::bgp_config; use db::schema::bgp_config::dsl as bgp_config_dsl; - #[derive(Debug)] - enum BgpAnnounceSetDeleteError { - AnnounceSetInUse, - } - let conn = self.pool_connection_authorized(opctx).await?; let name_or_id = sel.name_or_id.clone(); @@ -496,18 +709,56 @@ impl DataStore { let name_or_id = name_or_id.clone(); async move { let id: Uuid = match name_or_id { + NameOrId::Id(id) => announce_set_dsl::bgp_announce_set + .filter(bgp_announce_set::time_deleted.is_null()) + .filter(bgp_announce_set::id.eq(id)) + .select(bgp_announce_set::id) + .limit(1) + .first_async::(&conn) + .await + .map_err(|e| { + let msg = "failed to lookup announce set by id"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => err + .bail(Error::not_found_by_id( + ResourceType::BgpAnnounceSet, + &id, + )), + _ => err.bail(Error::internal_error(msg)), + } + }), NameOrId::Name(name) => { announce_set_dsl::bgp_announce_set + .filter( + bgp_announce_set::time_deleted.is_null(), + ) .filter( bgp_announce_set::name.eq(name.to_string()), ) .select(bgp_announce_set::id) .limit(1) .first_async::(&conn) - .await? + .await + .map_err(|e| { + let msg = + "failed to lookup announce set by name"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => err + .bail(Error::not_found_by_name( + ResourceType::BgpAnnounceSet, + &name, + )), + _ => { + err.bail(Error::internal_error(msg)) + } + } + }) } - NameOrId::Id(id) => id, - }; + }?; let count = bgp_config_dsl::bgp_config .filter(bgp_config::bgp_announce_set_id.eq(id)) @@ -516,9 +767,9 @@ impl DataStore { .await?; if count > 0 { - return Err(err.bail( - BgpAnnounceSetDeleteError::AnnounceSetInUse, - )); + return Err( + err.bail(Error::conflict("announce set in use")) + ); } diesel::update(announce_set_dsl::bgp_announce_set) @@ -537,13 +788,12 @@ impl DataStore { }) .await .map_err(|e| { + let msg = "bgp_delete_announce_set failed"; if let Some(err) = err.take() { - match err { - BgpAnnounceSetDeleteError::AnnounceSetInUse => { - Error::invalid_request("BGP announce set in use") - } - } + error!(opctx.log, "{msg}"; "error" => ?err); + err } else { + error!(opctx.log, "{msg}"; "error" => ?e); public_error_from_diesel(e, ErrorHandler::Server) } }) @@ -563,7 +813,11 @@ impl DataStore { .select(BgpPeerView::as_select()) .load_async(&*self.pool_connection_authorized(opctx).await?) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + .map_err(|e| { + let msg = "bgp_peer_configs failed"; + error!(opctx.log, "{msg}"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + })?; Ok(results) } @@ -583,7 +837,11 @@ impl DataStore { .filter(dsl::addr.eq(addr)) .load_async(&*self.pool_connection_authorized(opctx).await?) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + .map_err(|e| { + let msg = "communities_for_peer failed"; + error!(opctx.log, "{msg}"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + })?; Ok(results) } @@ -601,24 +859,40 @@ impl DataStore { use db::schema::switch_port_settings_bgp_peer_config_allow_export::dsl; let conn = self.pool_connection_authorized(opctx).await?; - let result = self - .transaction_retry_wrapper("bgp_allow_export_for_peer") - .transaction(&conn, |conn| async move { - let active = peer_dsl::switch_port_settings_bgp_peer_config - .filter(db_peer::port_settings_id.eq(port_settings_id)) - .filter(db_peer::addr.eq(addr)) - .select(db_peer::allow_export_list_active) - .limit(1) - .first_async::(&conn) - .await?; - - if !active { - return Ok(None); - } + let err = OptionalError::new(); + self.transaction_retry_wrapper("bgp_allow_export_for_peer") + .transaction(&conn, |conn| { + let err = err.clone(); + async move { + let active = peer_dsl::switch_port_settings_bgp_peer_config + .filter(db_peer::port_settings_id.eq(port_settings_id)) + .filter(db_peer::addr.eq(addr)) + .select(db_peer::allow_export_list_active) + .limit(1) + .first_async::(&conn) + .await + .map_err(|e| { + let msg = "failed to lookup export settings for peer"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => { + let not_found_msg = format!("peer with {addr} not found for port settings {port_settings_id}"); + err.bail(Error::non_resourcetype_not_found(not_found_msg)) + }, + _ => err.bail(Error::internal_error(msg)), + } + })?; + + if !active { + return Ok(None); + } - let list = - dsl::switch_port_settings_bgp_peer_config_allow_export - .filter(db_allow::port_settings_id.eq(port_settings_id)) + let list = + dsl::switch_port_settings_bgp_peer_config_allow_export + .filter( + db_allow::port_settings_id.eq(port_settings_id), + ) .filter( db_allow::interface_name .eq(interface_name.to_owned()), @@ -627,12 +901,20 @@ impl DataStore { .load_async(&conn) .await?; - Ok(Some(list)) + Ok(Some(list)) + } }) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; - - Ok(result) + .map_err(|e| { + let msg = "allow_export_for_peer failed"; + if let Some(err) = err.take() { + error!(opctx.log, "{msg}"; "error" => ?err); + err + } else { + error!(opctx.log, "{msg}"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + } + }) } pub async fn allow_import_for_peer( @@ -647,25 +929,42 @@ impl DataStore { use db::schema::switch_port_settings_bgp_peer_config_allow_import as db_allow; use db::schema::switch_port_settings_bgp_peer_config_allow_import::dsl; + let err = OptionalError::new(); let conn = self.pool_connection_authorized(opctx).await?; - let result = self - .transaction_retry_wrapper("bgp_allow_export_for_peer") - .transaction(&conn, |conn| async move { - let active = peer_dsl::switch_port_settings_bgp_peer_config - .filter(db_peer::port_settings_id.eq(port_settings_id)) - .filter(db_peer::addr.eq(addr)) - .select(db_peer::allow_import_list_active) - .limit(1) - .first_async::(&conn) - .await?; - - if !active { - return Ok(None); - } + self + .transaction_retry_wrapper("bgp_allow_import_for_peer") + .transaction(&conn, |conn| { + let err = err.clone(); + async move { + let active = peer_dsl::switch_port_settings_bgp_peer_config + .filter(db_peer::port_settings_id.eq(port_settings_id)) + .filter(db_peer::addr.eq(addr)) + .select(db_peer::allow_import_list_active) + .limit(1) + .first_async::(&conn) + .await + .map_err(|e| { + let msg = "failed to lookup import settings for peer"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => { + let not_found_msg = format!("peer with {addr} not found for port settings {port_settings_id}"); + err.bail(Error::non_resourcetype_not_found(not_found_msg)) + }, + _ => err.bail(Error::internal_error(msg)), + } + })?; + + if !active { + return Ok(None); + } - let list = - dsl::switch_port_settings_bgp_peer_config_allow_import - .filter(db_allow::port_settings_id.eq(port_settings_id)) + let list = + dsl::switch_port_settings_bgp_peer_config_allow_import + .filter( + db_allow::port_settings_id.eq(port_settings_id), + ) .filter( db_allow::interface_name .eq(interface_name.to_owned()), @@ -674,11 +973,19 @@ impl DataStore { .load_async(&conn) .await?; - Ok(Some(list)) + Ok(Some(list)) + } }) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; - - Ok(result) + .map_err(|e| { + let msg = "allow_import_for_peer failed"; + if let Some(err) = err.take() { + error!(opctx.log, "{msg}"; "error" => ?err); + err + } else { + error!(opctx.log, "{msg}"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + } + }) } } diff --git a/nexus/db-queries/src/db/datastore/cockroachdb_settings.rs b/nexus/db-queries/src/db/datastore/cockroachdb_settings.rs index e7a975fa691..a38cfb89350 100644 --- a/nexus/db-queries/src/db/datastore/cockroachdb_settings.rs +++ b/nexus/db-queries/src/db/datastore/cockroachdb_settings.rs @@ -153,10 +153,22 @@ mod test { ); let settings = datastore.cockroachdb_settings(&opctx).await.unwrap(); - // With a fresh cluster, this is the expected state - let version = CockroachDbClusterVersion::NEWLY_INITIALIZED.to_string(); - assert_eq!(settings.version, version); - assert_eq!(settings.preserve_downgrade, ""); + let version: CockroachDbClusterVersion = + settings.version.parse().expect("unexpected cluster version"); + if settings.preserve_downgrade == "" { + // This is the expected value while running tests normally. + assert_eq!(version, CockroachDbClusterVersion::NEWLY_INITIALIZED); + } else if settings.preserve_downgrade == version.to_string() { + // This is the expected value if the cluster was created on a + // previous version and `cluster.preserve_downgrade_option` was set. + assert_eq!(version, CockroachDbClusterVersion::POLICY); + } else { + panic!( + "`cluster.preserve_downgrade_option` is {:?}, + but it should be empty or \"{}\"", + settings.preserve_downgrade, version + ); + } // Verify that if a fingerprint is wrong, we get the expected SQL error // back. @@ -165,7 +177,7 @@ mod test { &opctx, String::new(), "cluster.preserve_downgrade_option", - version.clone(), + version.to_string(), ) .await else { @@ -190,7 +202,7 @@ mod test { &opctx, settings.state_fingerprint.clone(), "cluster.preserve_downgrade_option", - version.clone(), + version.to_string(), ) .await .unwrap(); @@ -198,8 +210,8 @@ mod test { datastore.cockroachdb_settings(&opctx).await.unwrap(), CockroachDbSettings { state_fingerprint: settings.state_fingerprint.clone(), - version: version.clone(), - preserve_downgrade: version.clone(), + version: version.to_string(), + preserve_downgrade: version.to_string(), } ); } @@ -215,14 +227,24 @@ mod test { ) .await .unwrap(); - assert_eq!( - datastore.cockroachdb_settings(&opctx).await.unwrap(), - CockroachDbSettings { - state_fingerprint: settings.state_fingerprint.clone(), - version: version.clone(), - preserve_downgrade: String::new(), - } - ); + let settings = + datastore.cockroachdb_settings(&opctx).await.unwrap(); + if version == CockroachDbClusterVersion::NEWLY_INITIALIZED { + assert_eq!( + settings, + CockroachDbSettings { + state_fingerprint: settings.state_fingerprint.clone(), + version: version.to_string(), + preserve_downgrade: String::new(), + } + ); + } else { + // Resetting it permits auto-finalization, so the state + // fingerprint and version are not predictable until that + // completes, but we can still verify that the variable was + // reset. + assert!(settings.preserve_downgrade.is_empty()); + } } db.cleanup().await.unwrap(); diff --git a/nexus/db-queries/src/db/datastore/dataset.rs b/nexus/db-queries/src/db/datastore/dataset.rs index a08e346fe8c..0fe1c7912e9 100644 --- a/nexus/db-queries/src/db/datastore/dataset.rs +++ b/nexus/db-queries/src/db/datastore/dataset.rs @@ -241,6 +241,7 @@ mod test { use nexus_db_model::SledSystemHardware; use nexus_db_model::SledUpdate; use nexus_test_utils::db::test_setup_database; + use omicron_common::api::internal::shared::DatasetKind as ApiDatasetKind; use omicron_test_utils::dev; #[tokio::test] @@ -291,7 +292,7 @@ mod test { Uuid::new_v4(), zpool_id, Some("[::1]:0".parse().unwrap()), - DatasetKind::Crucible, + ApiDatasetKind::Crucible, )) .await .expect("failed to insert dataset") @@ -324,7 +325,7 @@ mod test { dataset1.id(), zpool_id, Some("[::1]:12345".parse().unwrap()), - DatasetKind::Cockroach, + ApiDatasetKind::Cockroach, )) .await .expect("failed to do-nothing insert dataset"); @@ -340,7 +341,7 @@ mod test { Uuid::new_v4(), zpool_id, Some("[::1]:0".parse().unwrap()), - DatasetKind::Cockroach, + ApiDatasetKind::Cockroach, )) .await .expect("failed to upsert dataset"); @@ -372,7 +373,7 @@ mod test { dataset1.id(), zpool_id, Some("[::1]:12345".parse().unwrap()), - DatasetKind::Cockroach, + ApiDatasetKind::Cockroach, )) .await .expect("failed to do-nothing insert dataset"); diff --git a/nexus/db-queries/src/db/datastore/db_metadata.rs b/nexus/db-queries/src/db/datastore/db_metadata.rs index 4169cc06bd1..b997bf384f3 100644 --- a/nexus/db-queries/src/db/datastore/db_metadata.rs +++ b/nexus/db-queries/src/db/datastore/db_metadata.rs @@ -511,7 +511,7 @@ mod test { let mut crdb = test_db::test_setup_database(&logctx.log).await; let cfg = db::Config { url: crdb.pg_config().clone() }; - let pool = Arc::new(db::Pool::new(&logctx.log, &cfg)); + let pool = Arc::new(db::Pool::new_single_host(&logctx.log, &cfg)); let datastore = Arc::new(DataStore::new(&logctx.log, pool, None).await.unwrap()); @@ -559,8 +559,8 @@ mod test { let mut crdb = test_db::test_setup_database(&logctx.log).await; let cfg = db::Config { url: crdb.pg_config().clone() }; - let pool = Arc::new(db::Pool::new(&logctx.log, &cfg)); - let conn = pool.pool().get().await.unwrap(); + let pool = Arc::new(db::Pool::new_single_host(&logctx.log, &cfg)); + let conn = pool.claim().await.unwrap(); // Mimic the layout of "schema/crdb". let config_dir = Utf8TempDir::new().unwrap(); @@ -671,8 +671,8 @@ mod test { let mut crdb = test_db::test_setup_database(&logctx.log).await; let cfg = db::Config { url: crdb.pg_config().clone() }; - let pool = Arc::new(db::Pool::new(&logctx.log, &cfg)); - let conn = pool.pool().get().await.unwrap(); + let pool = Arc::new(db::Pool::new_single_host(&logctx.log, &cfg)); + let conn = pool.claim().await.unwrap(); // Mimic the layout of "schema/crdb". let config_dir = Utf8TempDir::new().unwrap(); diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index 3b767a804f2..6d7a2801378 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -2288,7 +2288,7 @@ mod test { } impl CollectionCounts { - async fn new(conn: &DataStoreConnection<'_>) -> anyhow::Result { + async fn new(conn: &DataStoreConnection) -> anyhow::Result { conn.transaction_async(|conn| async move { conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL) .await diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 58259be7ee2..5b1163dc8b6 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -27,7 +27,8 @@ use crate::db::{ error::{public_error_from_diesel, ErrorHandler}, }; use ::oximeter::types::ProducerRegistry; -use async_bb8_diesel::{AsyncRunQueryDsl, ConnectionManager}; +use anyhow::{anyhow, bail, Context}; +use async_bb8_diesel::AsyncRunQueryDsl; use diesel::pg::Pg; use diesel::prelude::*; use diesel::query_builder::{QueryFragment, QueryId}; @@ -127,8 +128,12 @@ pub use vmm::VmmStateUpdateResult; pub use volume::read_only_resources_associated_with_volume; pub use volume::CrucibleResources; pub use volume::CrucibleTargets; +pub use volume::ExistingTarget; +pub use volume::ReplacementTarget; pub use volume::VolumeCheckoutReason; pub use volume::VolumeReplacementParams; +pub use volume::VolumeToDelete; +pub use volume::VolumeWithTarget; // Number of unique datasets required to back a region. // TODO: This should likely turn into a configuration option. @@ -170,8 +175,8 @@ impl RunnableQuery for T where { } -pub type DataStoreConnection<'a> = - bb8::PooledConnection<'a, ConnectionManager>; +pub type DataStoreConnection = + qorb::claim::Handle>; pub struct DataStore { log: Logger, @@ -203,7 +208,7 @@ impl DataStore { /// Constructs a new Datastore object. /// - /// Only returns if the database schema is compatible with Nexus's known + /// Only returns when the database schema is compatible with Nexus's known /// schema version. pub async fn new( log: &Logger, @@ -237,6 +242,38 @@ impl DataStore { Ok(datastore) } + /// Constructs a new Datastore, failing if the schema version does not match + /// this program's expected version + pub async fn new_failfast( + log: &Logger, + pool: Arc, + ) -> Result { + let datastore = + Self::new_unchecked(log.new(o!("component" => "datastore")), pool) + .map_err(|e| anyhow!("{}", e))?; + const EXPECTED_VERSION: SemverVersion = nexus_db_model::SCHEMA_VERSION; + let (found_version, found_target) = datastore + .database_schema_version() + .await + .context("loading database schema version")?; + + if let Some(found_target) = found_target { + bail!( + "database schema check failed: apparently mid-upgrade \ + (found_target = {found_target})" + ); + } + + if found_version != EXPECTED_VERSION { + bail!( + "database schema check failed: \ + expected {EXPECTED_VERSION}, found {found_version}", + ); + } + + Ok(datastore) + } + pub fn register_producers(&self, registry: &ProducerRegistry) { registry .register_producer( @@ -275,8 +312,7 @@ impl DataStore { opctx: &OpContext, ) -> Result { opctx.authorize(authz::Action::Query, &authz::DATABASE).await?; - let pool = self.pool.pool(); - let connection = pool.get().await.map_err(|err| { + let connection = self.pool.claim().await.map_err(|err| { Error::unavail(&format!("Failed to access DB connection: {err}")) })?; Ok(connection) @@ -290,7 +326,7 @@ impl DataStore { pub(super) async fn pool_connection_unauthorized( &self, ) -> Result { - let connection = self.pool.pool().get().await.map_err(|err| { + let connection = self.pool.claim().await.map_err(|err| { Error::unavail(&format!("Failed to access DB connection: {err}")) })?; Ok(connection) @@ -362,6 +398,7 @@ impl DataStore { } } +#[derive(Clone, Copy, Debug)] pub enum UpdatePrecondition { DontCare, Null, @@ -394,10 +431,10 @@ mod test { use crate::db::identity::Asset; use crate::db::lookup::LookupPath; use crate::db::model::{ - BlockSize, ConsoleSession, Dataset, DatasetKind, ExternalIp, - PhysicalDisk, PhysicalDiskKind, PhysicalDiskPolicy, PhysicalDiskState, - Project, Rack, Region, SiloUser, SledBaseboard, SledSystemHardware, - SledUpdate, SshKey, Zpool, + BlockSize, ConsoleSession, Dataset, ExternalIp, PhysicalDisk, + PhysicalDiskKind, PhysicalDiskPolicy, PhysicalDiskState, Project, Rack, + Region, SiloUser, SledBaseboard, SledSystemHardware, SledUpdate, + SshKey, Zpool, }; use crate::db::queries::vpc_subnet::InsertVpcSubnetQuery; use chrono::{Duration, Utc}; @@ -413,6 +450,7 @@ mod test { use omicron_common::api::external::{ ByteCount, Error, IdentityMetadataCreateParams, LookupType, Name, }; + use omicron_common::api::internal::shared::DatasetKind; use omicron_test_utils::dev; use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::GenericUuid; @@ -1582,7 +1620,7 @@ mod test { dev::test_setup_log("test_queries_do_not_require_full_table_scan"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); let datastore = DataStore::new(&logctx.log, Arc::new(pool), None).await.unwrap(); let conn = datastore.pool_connection_for_tests().await.unwrap(); @@ -1627,7 +1665,7 @@ mod test { let logctx = dev::test_setup_log("test_sled_ipv6_address_allocation"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = Arc::new(db::Pool::new(&logctx.log, &cfg)); + let pool = Arc::new(db::Pool::new_single_host(&logctx.log, &cfg)); let datastore = Arc::new(DataStore::new(&logctx.log, pool, None).await.unwrap()); let opctx = OpContext::for_tests( diff --git a/nexus/db-queries/src/db/datastore/probe.rs b/nexus/db-queries/src/db/datastore/probe.rs index f3e0614552b..434bf25760f 100644 --- a/nexus/db-queries/src/db/datastore/probe.rs +++ b/nexus/db-queries/src/db/datastore/probe.rs @@ -62,7 +62,7 @@ impl super::DataStore { use db::schema::probe::dsl; use db::schema::vpc_subnet::dsl as vpc_subnet_dsl; - let pool = self.pool_connection_authorized(opctx).await?; + let conn = self.pool_connection_authorized(opctx).await?; let probes = match pagparams { PaginatedBy::Id(pagparams) => { @@ -77,7 +77,7 @@ impl super::DataStore { .filter(dsl::project_id.eq(authz_project.id())) .filter(dsl::time_deleted.is_null()) .select(Probe::as_select()) - .load_async(&*pool) + .load_async(&*conn) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; @@ -99,7 +99,7 @@ impl super::DataStore { let db_subnet = vpc_subnet_dsl::vpc_subnet .filter(vpc_subnet_dsl::id.eq(interface.subnet_id)) .select(VpcSubnet::as_select()) - .first_async(&*pool) + .first_async(&*conn) .await .map_err(|e| { public_error_from_diesel(e, ErrorHandler::Server) @@ -126,7 +126,7 @@ impl super::DataStore { &self, opctx: &OpContext, probe: &Probe, - pool: &DataStoreConnection<'_>, + conn: &DataStoreConnection, ) -> LookupResult { use db::schema::vpc_subnet::dsl as vpc_subnet_dsl; @@ -143,7 +143,7 @@ impl super::DataStore { let db_subnet = vpc_subnet_dsl::vpc_subnet .filter(vpc_subnet_dsl::id.eq(interface.subnet_id)) .select(VpcSubnet::as_select()) - .first_async(&**pool) + .first_async(&**conn) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; @@ -172,20 +172,20 @@ impl super::DataStore { ) -> ListResultVec { use db::schema::probe::dsl; - let pool = self.pool_connection_authorized(opctx).await?; + let conn = self.pool_connection_authorized(opctx).await?; let probes = paginated(dsl::probe, dsl::id, pagparams) .filter(dsl::time_deleted.is_null()) .filter(dsl::sled.eq(sled)) .select(Probe::as_select()) - .load_async(&*pool) + .load_async(&*conn) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; let mut result = Vec::with_capacity(probes.len()); for probe in probes.into_iter() { - result.push(self.resolve_probe_info(opctx, &probe, &pool).await?); + result.push(self.resolve_probe_info(opctx, &probe, &conn).await?); } Ok(result) @@ -200,7 +200,7 @@ impl super::DataStore { ) -> LookupResult { use db::schema::probe; use db::schema::probe::dsl; - let pool = self.pool_connection_authorized(opctx).await?; + let conn = self.pool_connection_authorized(opctx).await?; let name_or_id = name_or_id.clone(); @@ -211,7 +211,7 @@ impl super::DataStore { .filter(probe::project_id.eq(authz_project.id())) .select(Probe::as_select()) .limit(1) - .first_async::(&*pool) + .first_async::(&*conn) .await .map_err(|e| { public_error_from_diesel( @@ -227,7 +227,7 @@ impl super::DataStore { .filter(probe::project_id.eq(authz_project.id())) .select(Probe::as_select()) .limit(1) - .first_async::(&*pool) + .first_async::(&*conn) .await .map_err(|e| { public_error_from_diesel( @@ -240,7 +240,7 @@ impl super::DataStore { }), }?; - self.resolve_probe_info(opctx, &probe, &pool).await + self.resolve_probe_info(opctx, &probe, &conn).await } /// Add a probe to the data store. @@ -253,7 +253,7 @@ impl super::DataStore { ) -> CreateResult { //TODO in transaction use db::schema::probe::dsl; - let pool = self.pool_connection_authorized(opctx).await?; + let conn = self.pool_connection_authorized(opctx).await?; let _eip = self .allocate_probe_ephemeral_ip( @@ -306,7 +306,7 @@ impl super::DataStore { let result = diesel::insert_into(dsl::probe) .values(probe.clone()) .returning(Probe::as_returning()) - .get_result_async(&*pool) + .get_result_async(&*conn) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; @@ -322,7 +322,7 @@ impl super::DataStore { ) -> DeleteResult { use db::schema::probe; use db::schema::probe::dsl; - let pool = self.pool_connection_authorized(opctx).await?; + let conn = self.pool_connection_authorized(opctx).await?; let name_or_id = name_or_id.clone(); @@ -334,7 +334,7 @@ impl super::DataStore { .filter(probe::project_id.eq(authz_project.id())) .select(probe::id) .limit(1) - .first_async::(&*pool) + .first_async::(&*conn) .await .map_err(|e| { public_error_from_diesel(e, ErrorHandler::Server) @@ -350,7 +350,7 @@ impl super::DataStore { .filter(dsl::id.eq(id)) .filter(dsl::project_id.eq(authz_project.id())) .set(dsl::time_deleted.eq(Utc::now())) - .execute_async(&*pool) + .execute_async(&*conn) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; diff --git a/nexus/db-queries/src/db/datastore/pub_test_utils.rs b/nexus/db-queries/src/db/datastore/pub_test_utils.rs index 93a172bd152..bcf6a6c80f1 100644 --- a/nexus/db-queries/src/db/datastore/pub_test_utils.rs +++ b/nexus/db-queries/src/db/datastore/pub_test_utils.rs @@ -29,7 +29,7 @@ pub async fn datastore_test( use crate::authn; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = Arc::new(db::Pool::new(&logctx.log, &cfg)); + let pool = Arc::new(db::Pool::new_single_host(&logctx.log, &cfg)); let datastore = Arc::new(DataStore::new(&logctx.log, pool, None).await.unwrap()); diff --git a/nexus/db-queries/src/db/datastore/saga.rs b/nexus/db-queries/src/db/datastore/saga.rs index 939929e665e..0b626804e1e 100644 --- a/nexus/db-queries/src/db/datastore/saga.rs +++ b/nexus/db-queries/src/db/datastore/saga.rs @@ -9,7 +9,6 @@ use super::SQL_BATCH_SIZE; use crate::db; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; -use crate::db::model::Generation; use crate::db::pagination::paginated; use crate::db::pagination::paginated_multicolumn; use crate::db::pagination::Paginator; @@ -17,10 +16,12 @@ use crate::db::update_and_check::UpdateAndCheck; use crate::db::update_and_check::UpdateStatus; use async_bb8_diesel::AsyncRunQueryDsl; use diesel::prelude::*; +use nexus_auth::authz; use nexus_auth::context::OpContext; use omicron_common::api::external::Error; use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; +use std::ops::Add; impl DataStore { pub async fn saga_create( @@ -80,21 +81,15 @@ impl DataStore { /// now, we're implementing saga adoption only in cases where the original /// SEC/Nexus has been expunged.) /// - /// However, in the future, it may be possible for multiple SECs to try and - /// update the same saga, and overwrite each other's state. For example, - /// one SEC might try and update the state to Running while the other one - /// updates it to Done. That case would have to be carefully considered and - /// tested here, probably using the (currently unused) - /// `current_adopt_generation` field to enable optimistic concurrency. - /// - /// To reiterate, we are *not* considering the case where several SECs try - /// to update the same saga. That will be a future enhancement. + /// It's conceivable that multiple SECs do try to udpate the same saga + /// concurrently. That would be a bug. This is noticed and prevented by + /// making this query conditional on current_sec and failing with a conflict + /// if the current SEC has changed. pub async fn saga_update_state( &self, saga_id: steno::SagaId, new_state: steno::SagaCachedState, current_sec: db::saga_types::SecId, - current_adopt_generation: Generation, ) -> Result<(), Error> { use db::schema::saga::dsl; @@ -102,7 +97,6 @@ impl DataStore { let result = diesel::update(dsl::saga) .filter(dsl::id.eq(saga_id)) .filter(dsl::current_sec.eq(current_sec)) - .filter(dsl::adopt_generation.eq(current_adopt_generation)) .set(dsl::saga_state.eq(db::saga_types::SagaCachedState(new_state))) .check_if_exists::(saga_id) .execute_and_check(&*self.pool_connection_unauthorized().await?) @@ -119,20 +113,19 @@ impl DataStore { match result.status { UpdateStatus::Updated => Ok(()), - UpdateStatus::NotUpdatedButExists => Err(Error::invalid_request( - format!( - "failed to update saga {:?} with state {:?}: preconditions not met: \ - expected current_sec = {:?}, adopt_generation = {:?}, \ - but found current_sec = {:?}, adopt_generation = {:?}, state = {:?}", + UpdateStatus::NotUpdatedButExists => { + Err(Error::invalid_request(format!( + "failed to update saga {:?} with state {:?}:\ + preconditions not met: \ + expected current_sec = {:?}, \ + but found current_sec = {:?}, state = {:?}", saga_id, new_state, current_sec, - current_adopt_generation, result.found.current_sec, - result.found.adopt_generation, result.found.saga_state, - ) - )), + ))) + } } } @@ -207,16 +200,75 @@ impl DataStore { Ok(events) } + + /// Updates all sagas that are currently assigned to any of the SEC ids in + /// `sec_ids`, assigning them to `new_sec_id` instead. + /// + /// Generally, an SEC id corresponds to a Nexus id. This change causes the + /// Nexus instance `new_sec_id` to discover these sagas and resume executing + /// them the next time it performs saga recovery (which is normally on + /// startup and periodically). Generally, `new_sec_id` is the _current_ + /// Nexus instance and the caller should activate the saga recovery + /// background task after calling this function to immediately resume the + /// newly-assigned sagas. + /// + /// **Warning:** This operation is only safe if the other SECs `sec_ids` are + /// not currently running. If those SECs are still running, then two (or + /// more) SECs may wind up running the same saga concurrently. This would + /// likely violate implicit assumptions made by various saga actions, + /// leading to hard-to-debug errors and state corruption. + pub async fn sagas_reassign_sec( + &self, + opctx: &OpContext, + sec_ids: &[db::saga_types::SecId], + new_sec_id: db::saga_types::SecId, + ) -> Result { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + + let now = chrono::Utc::now(); + let conn = self.pool_connection_authorized(opctx).await?; + + // It would be more robust to do this in batches. However, Diesel does + // not appear to support the UPDATE ... LIMIT syntax using the normal + // builder. In practice, it's extremely unlikely we'd have so many + // in-progress sagas that this would be a problem. + use db::schema::saga::dsl; + diesel::update( + dsl::saga + .filter(dsl::current_sec.is_not_null()) + .filter( + dsl::current_sec.eq_any( + sec_ids.into_iter().cloned().collect::>(), + ), + ) + .filter(dsl::saga_state.ne(db::saga_types::SagaCachedState( + steno::SagaCachedState::Done, + ))), + ) + .set(( + dsl::current_sec.eq(Some(new_sec_id)), + dsl::adopt_generation.eq(dsl::adopt_generation.add(1)), + dsl::adopt_time.eq(now), + )) + .execute_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } } #[cfg(test)] mod test { use super::*; use crate::db::datastore::test_utils::datastore_test; + use async_bb8_diesel::AsyncConnection; + use async_bb8_diesel::AsyncSimpleConnection; + use db::queries::ALLOW_FULL_TABLE_SCAN_SQL; use nexus_db_model::{SagaNodeEvent, SecId}; use nexus_test_utils::db::test_setup_database; + use omicron_common::api::external::Generation; use omicron_test_utils::dev; use rand::seq::SliceRandom; + use std::collections::BTreeSet; use uuid::Uuid; // Tests pagination in listing sagas that are candidates for recovery @@ -440,7 +492,6 @@ mod test { node_cx.saga_id, steno::SagaCachedState::Running, node_cx.sec_id, - db::model::Generation::new(), ) .await .expect("updating state to Running again"); @@ -451,7 +502,6 @@ mod test { node_cx.saga_id, steno::SagaCachedState::Done, node_cx.sec_id, - db::model::Generation::new(), ) .await .expect("updating state to Done"); @@ -463,7 +513,6 @@ mod test { node_cx.saga_id, steno::SagaCachedState::Done, node_cx.sec_id, - db::model::Generation::new(), ) .await .expect("updating state to Done again"); @@ -509,4 +558,156 @@ mod test { SagaNodeEvent::new(event, self.sec_id) } } + + #[tokio::test] + async fn test_saga_reassignment() { + // Test setup + let logctx = dev::test_setup_log("test_saga_reassignment"); + let mut db = test_setup_database(&logctx.log).await; + let (_, datastore) = datastore_test(&logctx, &db).await; + let opctx = OpContext::for_tests(logctx.log.clone(), datastore.clone()); + + // Populate the database with a few different sagas: + // + // - assigned to SEC A: done, running, and unwinding + // - assigned to SEC B: done, running, and unwinding + // - assigned to SEC C: done, running, and unwinding + // - assigned to SEC D: done, running, and unwinding + // + // Then we'll reassign SECs B's and C's sagas to SEC A and check exactly + // which sagas were changed by this. This exercises: + // - that we don't touch A's sagas (the one we're assigning *to*) + // - that we do touch both B's and C's sagas (the ones we're assigning + // *from*) + // - that we don't touch D's sagas (some other SEC) + // - that we don't touch any "done" sagas + // - that we do touch both running and unwinding sagas + let mut sagas_to_insert = Vec::new(); + let sec_a = SecId(Uuid::new_v4()); + let sec_b = SecId(Uuid::new_v4()); + let sec_c = SecId(Uuid::new_v4()); + let sec_d = SecId(Uuid::new_v4()); + + for sec_id in [sec_a, sec_b, sec_c, sec_d] { + for state in [ + steno::SagaCachedState::Running, + steno::SagaCachedState::Unwinding, + steno::SagaCachedState::Done, + ] { + let params = steno::SagaCreateParams { + id: steno::SagaId(Uuid::new_v4()), + name: steno::SagaName::new("tewst saga"), + dag: serde_json::value::Value::Null, + state, + }; + + sagas_to_insert + .push(db::model::saga_types::Saga::new(sec_id, params)); + } + } + println!("sagas to insert: {:?}", sagas_to_insert); + + // These two sets are complements, but we write out the conditions to + // double-check that we've got it right. + let sagas_affected: BTreeSet<_> = sagas_to_insert + .iter() + .filter_map(|saga| { + ((saga.creator == sec_b || saga.creator == sec_c) + && (saga.saga_state.0 == steno::SagaCachedState::Running + || saga.saga_state.0 + == steno::SagaCachedState::Unwinding)) + .then(|| saga.id) + }) + .collect(); + let sagas_unaffected: BTreeSet<_> = sagas_to_insert + .iter() + .filter_map(|saga| { + (saga.creator == sec_a + || saga.creator == sec_d + || saga.saga_state.0 == steno::SagaCachedState::Done) + .then(|| saga.id) + }) + .collect(); + println!("sagas affected: {:?}", sagas_affected); + println!("sagas UNaffected: {:?}", sagas_unaffected); + assert_eq!(sagas_affected.intersection(&sagas_unaffected).count(), 0); + assert_eq!( + sagas_affected.len() + sagas_unaffected.len(), + sagas_to_insert.len() + ); + + // Insert the sagas. + let count = { + use db::schema::saga::dsl; + let conn = datastore.pool_connection_for_tests().await.unwrap(); + diesel::insert_into(dsl::saga) + .values(sagas_to_insert) + .execute_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .expect("successful insertion") + }; + assert_eq!(count, sagas_affected.len() + sagas_unaffected.len()); + + // Reassign uncompleted sagas from SECs B and C to SEC A. + let nreassigned = datastore + .sagas_reassign_sec(&opctx, &[sec_b, sec_c], sec_a) + .await + .expect("failed to re-assign sagas"); + + // Fetch all the sagas and check their states. + let all_sagas: Vec<_> = datastore + .pool_connection_for_tests() + .await + .unwrap() + .transaction_async(|conn| async move { + use db::schema::saga::dsl; + conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL).await?; + dsl::saga + .select(nexus_db_model::Saga::as_select()) + .load_async(&conn) + .await + }) + .await + .unwrap(); + + for saga in all_sagas { + println!("checking saga: {:?}", saga); + let current_sec = saga.current_sec.unwrap(); + if sagas_affected.contains(&saga.id) { + assert!(saga.creator == sec_b || saga.creator == sec_c); + assert_eq!(current_sec, sec_a); + assert_eq!(*saga.adopt_generation, Generation::from(2)); + assert!( + saga.saga_state.0 == steno::SagaCachedState::Running + || saga.saga_state.0 + == steno::SagaCachedState::Unwinding + ); + } else if sagas_unaffected.contains(&saga.id) { + assert_eq!(current_sec, saga.creator); + assert_eq!(*saga.adopt_generation, Generation::from(1)); + // Its SEC and state could be anything since we've deliberately + // included sagas with various states and SECs that should not + // be affected by the reassignment. + } else { + println!( + "ignoring saga that was not created by this test: {:?}", + saga + ); + } + } + + assert_eq!(nreassigned, sagas_affected.len()); + + // If we do it again, we should make no changes. + let nreassigned = datastore + .sagas_reassign_sec(&opctx, &[sec_b, sec_c], sec_a) + .await + .expect("failed to re-assign sagas"); + assert_eq!(nreassigned, 0); + + // Test cleanup + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } } diff --git a/nexus/db-queries/src/db/datastore/switch_port.rs b/nexus/db-queries/src/db/datastore/switch_port.rs index 159933dce00..59748aa4db2 100644 --- a/nexus/db-queries/src/db/datastore/switch_port.rs +++ b/nexus/db-queries/src/db/datastore/switch_port.rs @@ -15,7 +15,7 @@ use crate::db::datastore::UpdatePrecondition; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::model::{ - LldpServiceConfig, Name, SwitchInterfaceConfig, SwitchPort, + LldpLinkConfig, Name, SwitchInterfaceConfig, SwitchPort, SwitchPortAddressConfig, SwitchPortBgpPeerConfig, SwitchPortConfig, SwitchPortLinkConfig, SwitchPortRouteConfig, SwitchPortSettings, SwitchPortSettingsGroup, SwitchPortSettingsGroups, @@ -31,7 +31,7 @@ use diesel::{ use diesel_dtrace::DTraceConnection; use ipnetwork::IpNetwork; use nexus_db_model::{ - SqlU16, SqlU32, SqlU8, SwitchPortBgpPeerConfigAllowExport, + BgpConfig, SqlU16, SqlU32, SqlU8, SwitchPortBgpPeerConfigAllowExport, SwitchPortBgpPeerConfigAllowImport, SwitchPortBgpPeerConfigCommunity, }; use nexus_types::external_api::params; @@ -101,7 +101,7 @@ pub struct SwitchPortSettingsCombinedResult { pub groups: Vec, pub port: SwitchPortConfig, pub links: Vec, - pub link_lldp: Vec, + pub link_lldp: Vec, pub interfaces: Vec, pub vlan_interfaces: Vec, pub routes: Vec, @@ -333,6 +333,7 @@ impl DataStore { SwitchPortSettingsCreateError::ReserveBlock( ReserveBlockError::AddressNotInLot, ) => Error::invalid_request("address not in lot"), + } } else { @@ -451,19 +452,18 @@ impl DataStore { .load_async::(&conn) .await?; - let lldp_svc_ids: Vec = result + let lldp_link_ids: Vec = result .links .iter() - .map(|link| link.lldp_service_config_id) + .filter_map(|link| link.lldp_link_config_id) .collect(); - use db::schema::lldp_service_config as lldp_config; - use db::schema::lldp_service_config::dsl as lldp_dsl; - result.link_lldp = lldp_dsl::lldp_service_config - .filter(lldp_config::id.eq_any(lldp_svc_ids)) - .select(LldpServiceConfig::as_select()) + use db::schema::lldp_link_config; + result.link_lldp = lldp_link_config::dsl::lldp_link_config + .filter(lldp_link_config::id.eq_any(lldp_link_ids)) + .select(LldpLinkConfig::as_select()) .limit(1) - .load_async::(&conn) + .load_async::(&conn) .await?; // get the interface configs @@ -829,45 +829,158 @@ impl DataStore { port_settings_id: Option, current: UpdatePrecondition, ) -> UpdateResult<()> { + use db::schema::bgp_config::dsl as bgp_config_dsl; use db::schema::switch_port; use db::schema::switch_port::dsl as switch_port_dsl; + use db::schema::switch_port_settings_bgp_peer_config::dsl as bgp_peer_dsl; let conn = self.pool_connection_authorized(opctx).await?; - match current { - UpdatePrecondition::DontCare => { - diesel::update(switch_port_dsl::switch_port) - .filter(switch_port::id.eq(switch_port_id)) - .set(switch_port::port_settings_id.eq(port_settings_id)) - .execute_async(&*conn) - .await - .map_err(|e| { - public_error_from_diesel(e, ErrorHandler::Server) - })?; - } - UpdatePrecondition::Null => { - diesel::update(switch_port_dsl::switch_port) - .filter(switch_port::id.eq(switch_port_id)) - .filter(switch_port::port_settings_id.is_null()) - .set(switch_port::port_settings_id.eq(port_settings_id)) - .execute_async(&*conn) - .await - .map_err(|e| { - public_error_from_diesel(e, ErrorHandler::Server) - })?; - } - UpdatePrecondition::Value(current_id) => { - diesel::update(switch_port_dsl::switch_port) - .filter(switch_port::id.eq(switch_port_id)) - .filter(switch_port::port_settings_id.eq(current_id)) - .set(switch_port::port_settings_id.eq(port_settings_id)) - .execute_async(&*conn) - .await - .map_err(|e| { - public_error_from_diesel(e, ErrorHandler::Server) - })?; - } - } + let err = OptionalError::new(); + self.transaction_retry_wrapper("switch_port_set_settings_id") + .transaction(&conn, |conn| { + let err = err.clone(); + async move { + // TODO: remove once per-switch-multi-asn support is added + // Bail if user attempts to assign multiple ASNs to a switch via switch port settings + // This is a temporary measure until multi-asn-per-switch is supported. + + // what switch are we adding a configuration to? + let switch = switch_port_dsl::switch_port + .filter(switch_port_dsl::id.eq(switch_port_id)) + .select(switch_port_dsl::switch_location) + .limit(1) + .first_async::(&conn) + .await + .map_err(|e: diesel::result::Error| { + let msg = "failed to look up switch port by id"; + error!(opctx.log, "{msg}"; "error" => ?e); + match e { + diesel::result::Error::NotFound => { + err.bail(Error::not_found_by_id( + ResourceType::SwitchPort, + &switch_port_id, + )) + } + _ => err.bail(Error::internal_error(msg)), + } + })?; + + // if we're setting a port settings id (and therefore activating a configuration + // on a port) we need to make sure there aren't any conflicting bgp configurations + if let Some(psid) = port_settings_id { + let bgp_config: Option = + match bgp_peer_dsl::switch_port_settings_bgp_peer_config + .inner_join( + bgp_config_dsl::bgp_config + .on(bgp_peer_dsl::bgp_config_id + .eq(bgp_config_dsl::id)), + ) + .filter( + bgp_peer_dsl::port_settings_id + .eq(psid), + ) + .select(BgpConfig::as_select()) + .limit(1) + .first_async::(&conn) + .await { + Ok(v) => Ok(Some(v)), + Err(e) => { + let msg = "failed to check if bgp peer exists in switch port settings"; + error!(opctx.log, "{msg}"; "error" => ?e); + match e { + diesel::result::Error::NotFound => { + Ok(None) + } + _ => Err(err.bail(Error::internal_error(msg))), + } + } + }?; + + // find all port settings for the targeted switch + // switch port + // inner join bgp peer on port settings id + // inner join bgp config on bgp config id + // filter switch location eq switch + // filter port settings id not null + // filter asn doesn't equal our asn + + if let Some(config) = bgp_config { + let conflicting_bgp_configs: Vec = switch_port_dsl::switch_port + .inner_join( + bgp_peer_dsl::switch_port_settings_bgp_peer_config + .on(bgp_peer_dsl::port_settings_id + .nullable() + .eq(switch_port_dsl::port_settings_id)), + ) + .inner_join(bgp_config_dsl::bgp_config.on( + bgp_peer_dsl::bgp_config_id.eq(bgp_config_dsl::id), + )) + .filter(switch_port_dsl::switch_location.eq(switch)) + .filter(switch_port_dsl::port_settings_id.is_not_null()) + .filter(bgp_config_dsl::asn.ne(config.asn)) + .select(BgpConfig::as_select()) + .load_async(&conn) + .await?; + + if !conflicting_bgp_configs.is_empty() { + return Err(err.bail(Error::conflict("a different asn is already configured on this switch"))); + } + } + + } + + // perform the requested update + match current { + UpdatePrecondition::DontCare => { + diesel::update(switch_port_dsl::switch_port) + .filter(switch_port::id.eq(switch_port_id)) + .set( + switch_port::port_settings_id + .eq(port_settings_id), + ) + .execute_async(&conn) + .await + } + UpdatePrecondition::Null => { + diesel::update(switch_port_dsl::switch_port) + .filter(switch_port::id.eq(switch_port_id)) + .filter(switch_port::port_settings_id.is_null()) + .set( + switch_port::port_settings_id + .eq(port_settings_id), + ) + .execute_async(&conn) + .await + } + UpdatePrecondition::Value(current_id) => { + diesel::update(switch_port_dsl::switch_port) + .filter(switch_port::id.eq(switch_port_id)) + .filter( + switch_port::port_settings_id + .eq(current_id), + ) + .set( + switch_port::port_settings_id + .eq(port_settings_id), + ) + .execute_async(&conn) + .await + } + } + } + }) + .await + .map_err(|e| { + let msg = "switch_port_set_settings_id failed"; + if let Some(err) = err.take() { + error!(opctx.log, "{msg}"; "error" => ?err); + err + } else { + error!(opctx.log, "{msg}"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + } + })?; Ok(()) } @@ -946,10 +1059,10 @@ impl DataStore { .eq(route_config_dsl::port_settings_id.nullable())), ) .select(SwitchPort::as_select()) - // TODO: #3592 Correctness - // In single rack deployments there are only 64 ports. We'll need - // pagination in the future, or maybe a way to constrain the query to - // a rack? + // TODO: #3592 Correctness + // In single rack deployments there are only 64 ports. We'll need + // pagination in the future, or maybe a way to constrain the query to + // a rack? .limit(64) .union( switch_port_dsl::switch_port @@ -958,7 +1071,7 @@ impl DataStore { bgp_peer_config_dsl::switch_port_settings_bgp_peer_config .on(switch_port_dsl::port_settings_id .eq(bgp_peer_config_dsl::port_settings_id.nullable()), - ), + ), ) .select(SwitchPort::as_select()) .limit(64), @@ -987,7 +1100,7 @@ async fn do_switch_port_settings_create( ) -> Result { use db::schema::{ address_lot::dsl as address_lot_dsl, bgp_config::dsl as bgp_config_dsl, - lldp_service_config::dsl as lldp_config_dsl, + lldp_link_config::dsl as lldp_link_config_dsl, switch_port_settings::dsl as port_settings_dsl, switch_port_settings_address_config::dsl as address_config_dsl, switch_port_settings_bgp_peer_config::dsl as bgp_peer_dsl, @@ -1047,17 +1160,21 @@ async fn do_switch_port_settings_create( let mut link_config = Vec::with_capacity(params.links.len()); for (link_name, c) in ¶ms.links { - let lldp_config_id = match c.lldp.lldp_config { - Some(_) => todo!(), // TODO actual lldp support - None => None, - }; - let lldp_svc_config = - LldpServiceConfig::new(c.lldp.enabled, lldp_config_id); + let lldp_link_config = LldpLinkConfig::new( + c.lldp.enabled, + c.lldp.link_name.clone(), + c.lldp.link_description.clone(), + c.lldp.chassis_id.clone(), + c.lldp.system_name.clone(), + c.lldp.system_description.clone(), + c.lldp.management_ip.map(|a| a.into()), + ); + let lldp_config_id = lldp_link_config.id; + lldp_config.push(lldp_link_config); - lldp_config.push(lldp_svc_config.clone()); link_config.push(SwitchPortLinkConfig::new( psid, - lldp_svc_config.id, + lldp_config_id, link_name.clone(), c.mtu, c.fec.into(), @@ -1066,9 +1183,9 @@ async fn do_switch_port_settings_create( )); } result.link_lldp = - diesel::insert_into(lldp_config_dsl::lldp_service_config) + diesel::insert_into(lldp_link_config_dsl::lldp_link_config) .values(lldp_config.clone()) - .returning(LldpServiceConfig::as_returning()) + .returning(LldpLinkConfig::as_returning()) .get_results_async(conn) .await?; @@ -1120,6 +1237,7 @@ async fn do_switch_port_settings_create( route.dst.into(), route.gw.into(), route.vid.map(Into::into), + route.local_pref.map(Into::into), )); } } @@ -1144,18 +1262,18 @@ async fn do_switch_port_settings_create( NameOrId::Name(name) => { let name = name.to_string(); bgp_config_dsl::bgp_config - .filter(bgp_config::time_deleted.is_null()) - .filter(bgp_config::name.eq(name)) - .select(bgp_config::id) - .limit(1) - .first_async::(conn) - .await - .map_err(|diesel_error| { - err.bail_retryable_or( - diesel_error, - SwitchPortSettingsCreateError::BgpConfigNotFound - ) - })? + .filter(bgp_config::time_deleted.is_null()) + .filter(bgp_config::name.eq(name)) + .select(bgp_config::id) + .limit(1) + .first_async::(conn) + .await + .map_err(|diesel_error| { + err.bail_retryable_or( + diesel_error, + SwitchPortSettingsCreateError::BgpConfigNotFound + ) + })? } }; @@ -1173,9 +1291,9 @@ async fn do_switch_port_settings_create( .collect(); diesel::insert_into(allow_import_dsl::switch_port_settings_bgp_peer_config_allow_import) - .values(to_insert) - .execute_async(conn) - .await?; + .values(to_insert) + .execute_async(conn) + .await?; } if let ImportExportPolicy::Allow(list) = &p.allowed_export { @@ -1192,9 +1310,9 @@ async fn do_switch_port_settings_create( .collect(); diesel::insert_into(allow_export_dsl::switch_port_settings_bgp_peer_config_allow_export) - .values(to_insert) - .execute_async(conn) - .await?; + .values(to_insert) + .execute_async(conn) + .await?; } if !p.communities.is_empty() { @@ -1212,9 +1330,9 @@ async fn do_switch_port_settings_create( .collect(); diesel::insert_into(bgp_communities_dsl::switch_port_settings_bgp_peer_config_communities) - .values(to_insert) - .execute_async(conn) - .await?; + .values(to_insert) + .execute_async(conn) + .await?; } bgp_peer_config.push(SwitchPortBgpPeerConfig::new( @@ -1225,6 +1343,7 @@ async fn do_switch_port_settings_create( )); } } + let db_bgp_peers: Vec = diesel::insert_into(bgp_peer_dsl::switch_port_settings_bgp_peer_config) .values(bgp_peer_config) @@ -1278,18 +1397,18 @@ async fn do_switch_port_settings_create( NameOrId::Name(name) => { let name = name.to_string(); address_lot_dsl::address_lot - .filter(address_lot::time_deleted.is_null()) - .filter(address_lot::name.eq(name)) - .select(address_lot::id) - .limit(1) - .first_async::(conn) - .await - .map_err(|diesel_error| { - err.bail_retryable_or( - diesel_error, - SwitchPortSettingsCreateError::AddressLotNotFound - ) - })? + .filter(address_lot::time_deleted.is_null()) + .filter(address_lot::name.eq(name)) + .select(address_lot::id) + .limit(1) + .first_async::(conn) + .await + .map_err(|diesel_error| { + err.bail_retryable_or( + diesel_error, + SwitchPortSettingsCreateError::AddressLotNotFound + ) + })? } }; // TODO: Reduce DB round trips needed for reserving ip blocks @@ -1349,18 +1468,18 @@ async fn do_switch_port_settings_delete( NameOrId::Name(name) => { let name = name.to_string(); port_settings_dsl::switch_port_settings - .filter(switch_port_settings::time_deleted.is_null()) - .filter(switch_port_settings::name.eq(name)) - .select(switch_port_settings::id) - .limit(1) - .first_async::(conn) - .await - .map_err(|diesel_error| { - err.bail_retryable_or( - diesel_error, - SwitchPortSettingsDeleteError::SwitchPortSettingsNotFound - ) - })? + .filter(switch_port_settings::time_deleted.is_null()) + .filter(switch_port_settings::name.eq(name)) + .select(switch_port_settings::id) + .limit(1) + .first_async::(conn) + .await + .map_err(|diesel_error| { + err.bail_retryable_or( + diesel_error, + SwitchPortSettingsDeleteError::SwitchPortSettingsNotFound + ) + })? } }; @@ -1389,13 +1508,12 @@ async fn do_switch_port_settings_delete( .returning(SwitchPortLinkConfig::as_returning()) .get_results_async(conn) .await?; - // delete lldp configs - use db::schema::lldp_service_config::{self, dsl as lldp_config_dsl}; - let lldp_svc_ids: Vec = - links.iter().map(|link| link.lldp_service_config_id).collect(); - diesel::delete(lldp_config_dsl::lldp_service_config) - .filter(lldp_service_config::id.eq_any(lldp_svc_ids)) + use db::schema::lldp_link_config; + let lldp_link_ids: Vec = + links.iter().filter_map(|link| link.lldp_link_config_id).collect(); + diesel::delete(lldp_link_config::dsl::lldp_link_config) + .filter(lldp_link_config::id.eq_any(lldp_link_ids)) .execute_async(conn) .await?; @@ -1556,7 +1674,7 @@ mod test { shaper: None, }; - datastore.bgp_config_set(&opctx, &bgp_config).await.unwrap(); + datastore.bgp_config_create(&opctx, &bgp_config).await.unwrap(); let settings = SwitchPortSettingsCreate { identity: IdentityMetadataCreateParams { diff --git a/nexus/db-queries/src/db/datastore/vmm.rs b/nexus/db-queries/src/db/datastore/vmm.rs index 14c3405a705..089a2914be2 100644 --- a/nexus/db-queries/src/db/datastore/vmm.rs +++ b/nexus/db-queries/src/db/datastore/vmm.rs @@ -5,7 +5,6 @@ //! [`DataStore`] helpers for working with VMM records. use super::DataStore; -use crate::authz; use crate::context::OpContext; use crate::db; use crate::db::error::public_error_from_diesel; @@ -40,8 +39,13 @@ use uuid::Uuid; /// The result of an [`DataStore::vmm_and_migration_update_runtime`] call, /// indicating which records were updated. -#[derive(Copy, Clone, Debug)] +#[derive(Clone, Debug)] pub struct VmmStateUpdateResult { + /// The VMM record that the update query found and possibly updated. + /// + /// NOTE: This is the record prior to the update! + pub found_vmm: Vmm, + /// `true` if the VMM record was updated, `false` otherwise. pub vmm_updated: bool, @@ -108,14 +112,10 @@ impl DataStore { pub async fn vmm_fetch( &self, opctx: &OpContext, - authz_instance: &authz::Instance, vmm_id: &PropolisUuid, ) -> LookupResult { - opctx.authorize(authz::Action::Read, authz_instance).await?; - let vmm = dsl::vmm .filter(dsl::id.eq(vmm_id.into_untyped_uuid())) - .filter(dsl::instance_id.eq(authz_instance.id())) .filter(dsl::time_deleted.is_null()) .select(Vmm::as_select()) .get_result_async(&*self.pool_connection_authorized(opctx).await?) @@ -233,13 +233,21 @@ impl DataStore { .transaction(&conn, |conn| { let err = err.clone(); async move { - let vmm_updated = self + let vmm_update_result = self .vmm_update_runtime_on_connection( &conn, &vmm_id, new_runtime, ) - .await.map(|r| match r.status { UpdateStatus::Updated => true, UpdateStatus::NotUpdatedButExists => false })?; + .await?; + + + let found_vmm = vmm_update_result.found; + let vmm_updated = match vmm_update_result.status { + UpdateStatus::Updated => true, + UpdateStatus::NotUpdatedButExists => false + }; + let migration_out_updated = match migration_out { Some(migration) => { let r = self.migration_update_source_on_connection( @@ -287,6 +295,7 @@ impl DataStore { None => false, }; Ok(VmmStateUpdateResult { + found_vmm, vmm_updated, migration_in_updated, migration_out_updated, diff --git a/nexus/db-queries/src/db/datastore/volume.rs b/nexus/db-queries/src/db/datastore/volume.rs index f777384b7ba..f5c1f121e45 100644 --- a/nexus/db-queries/src/db/datastore/volume.rs +++ b/nexus/db-queries/src/db/datastore/volume.rs @@ -1795,16 +1795,16 @@ pub struct VolumeReplacementParams { // parameters #[derive(Debug, Clone, Copy)] -pub struct VolumeWithTarget(Uuid); +pub struct VolumeWithTarget(pub Uuid); #[derive(Debug, Clone, Copy)] -pub struct ExistingTarget(SocketAddrV6); +pub struct ExistingTarget(pub SocketAddrV6); #[derive(Debug, Clone, Copy)] -pub struct ReplacementTarget(SocketAddrV6); +pub struct ReplacementTarget(pub SocketAddrV6); #[derive(Debug, Clone, Copy)] -pub struct VolumeToDelete(Uuid); +pub struct VolumeToDelete(pub Uuid); impl DataStore { /// Replace a read-write region in a Volume with a new region. diff --git a/nexus/db-queries/src/db/explain.rs b/nexus/db-queries/src/db/explain.rs index 24fd9930407..52844c204f7 100644 --- a/nexus/db-queries/src/db/explain.rs +++ b/nexus/db-queries/src/db/explain.rs @@ -124,8 +124,7 @@ mod test { } async fn create_schema(pool: &db::Pool) { - pool.pool() - .get() + pool.claim() .await .unwrap() .batch_execute_async( @@ -145,8 +144,8 @@ mod test { let logctx = dev::test_setup_log("test_explain_async"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); - let conn = pool.pool().get().await.unwrap(); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); + let conn = pool.claim().await.unwrap(); create_schema(&pool).await; @@ -170,8 +169,8 @@ mod test { let logctx = dev::test_setup_log("test_explain_full_table_scan"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); - let conn = pool.pool().get().await.unwrap(); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); + let conn = pool.claim().await.unwrap(); create_schema(&pool).await; diff --git a/nexus/db-queries/src/db/pagination.rs b/nexus/db-queries/src/db/pagination.rs index 4fc1cf59669..9920440ade4 100644 --- a/nexus/db-queries/src/db/pagination.rs +++ b/nexus/db-queries/src/db/pagination.rs @@ -354,7 +354,7 @@ mod test { async fn populate_users(pool: &db::Pool, values: &Vec<(i64, i64)>) { use schema::test_users::dsl; - let conn = pool.pool().get().await.unwrap(); + let conn = pool.claim().await.unwrap(); // The indexes here work around the check that prevents full table // scans. @@ -392,7 +392,7 @@ mod test { pool: &db::Pool, query: BoxedQuery, ) -> Vec { - let conn = pool.pool().get().await.unwrap(); + let conn = pool.claim().await.unwrap(); query.select(User::as_select()).load_async(&*conn).await.unwrap() } @@ -402,7 +402,7 @@ mod test { dev::test_setup_log("test_paginated_single_column_ascending"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); use schema::test_users::dsl; @@ -437,7 +437,7 @@ mod test { dev::test_setup_log("test_paginated_single_column_descending"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); use schema::test_users::dsl; @@ -472,7 +472,7 @@ mod test { dev::test_setup_log("test_paginated_multicolumn_ascending"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); use schema::test_users::dsl; @@ -526,7 +526,7 @@ mod test { dev::test_setup_log("test_paginated_multicolumn_descending"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = db::Pool::new(&logctx.log, &cfg); + let pool = db::Pool::new_single_host(&logctx.log, &cfg); use schema::test_users::dsl; diff --git a/nexus/db-queries/src/db/pool.rs b/nexus/db-queries/src/db/pool.rs index 497c8d97c50..dccee6fa3fc 100644 --- a/nexus/db-queries/src/db/pool.rs +++ b/nexus/db-queries/src/db/pool.rs @@ -3,108 +3,155 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. //! Database connection pooling -// This whole thing is a placeholder for prototyping. -// -// TODO-robustness TODO-resilience We will want to carefully think about the -// connection pool that we use and its parameters. It's not clear from the -// survey so far whether an existing module is suitable for our purposes. See -// the Cueball Internals document for details on the sorts of behaviors we'd -// like here. Even if by luck we stick with bb8, we definitely want to think -// through the various parameters. -// -// Notes about bb8's behavior: -// * When the database is completely offline, and somebody wants a connection, -// it still waits for the connection timeout before giving up. That seems -// like not what we want. (To be clear, this is a failure mode where we know -// the database is offline, not one where it's partitioned and we can't tell.) -// * Although the `build_unchecked()` builder allows the pool to start up with -// no connections established (good), it also _seems_ to not establish any -// connections even when it could, resulting in a latency bubble for the first -// operation after startup. That's not what we're looking for. -// // TODO-design Need TLS support (the types below hardcode NoTls). use super::Config as DbConfig; -use async_bb8_diesel::ConnectionError; -use async_bb8_diesel::ConnectionManager; +use crate::db::pool_connection::{DieselPgConnector, DieselPgConnectorArgs}; + +use qorb::backend; +use qorb::policy::Policy; +use qorb::resolver::{AllBackends, Resolver}; +use qorb::resolvers::dns::{DnsResolver, DnsResolverConfig}; +use qorb::service; +use slog::Logger; +use std::collections::BTreeMap; +use std::net::SocketAddr; +use std::sync::Arc; +use tokio::sync::watch; pub use super::pool_connection::DbConnection; +type QorbConnection = async_bb8_diesel::Connection; +type QorbPool = qorb::pool::Pool; + /// Wrapper around a database connection pool. /// /// Expected to be used as the primary interface to the database. pub struct Pool { - pool: bb8::Pool>, + inner: QorbPool, } -impl Pool { - pub fn new(log: &slog::Logger, db_config: &DbConfig) -> Self { - // Make sure diesel-dtrace's USDT probes are enabled. - usdt::register_probes().expect("Failed to register USDT DTrace probes"); - Self::new_builder(log, db_config, bb8::Builder::new()) - } +// Provides an alternative to the DNS resolver for cases where we want to +// contact the database without performing resolution. +struct SingleHostResolver { + tx: watch::Sender, +} - pub fn new_failfast_for_tests( - log: &slog::Logger, - db_config: &DbConfig, - ) -> Self { - Self::new_builder( - log, - db_config, - bb8::Builder::new() - .connection_timeout(std::time::Duration::from_millis(1)), - ) +impl SingleHostResolver { + fn new(config: &DbConfig) -> Self { + let backends = Arc::new(BTreeMap::from([( + backend::Name::new("singleton"), + backend::Backend { address: config.url.address() }, + )])); + let (tx, _rx) = watch::channel(backends.clone()); + Self { tx } } +} - fn new_builder( - log: &slog::Logger, - db_config: &DbConfig, - builder: bb8::Builder>, - ) -> Self { - let url = db_config.url.url(); - let log = log.new(o!( - "database_url" => url.clone(), - "component" => "db::Pool" - )); - info!(&log, "database connection pool"); - let error_sink = LoggingErrorSink::new(log); - let manager = ConnectionManager::::new(url); - let pool = builder - .connection_customizer(Box::new( - super::pool_connection::ConnectionCustomizer::new(), - )) - .error_sink(Box::new(error_sink)) - .build_unchecked(manager); - Pool { pool } +impl Resolver for SingleHostResolver { + fn monitor(&mut self) -> watch::Receiver { + self.tx.subscribe() } +} - /// Returns a reference to the underlying pool. - pub fn pool(&self) -> &bb8::Pool> { - &self.pool - } +fn make_dns_resolver( + bootstrap_dns: Vec, +) -> qorb::resolver::BoxedResolver { + Box::new(DnsResolver::new( + service::Name(internal_dns::ServiceName::Cockroach.srv_name()), + bootstrap_dns, + DnsResolverConfig { + hardcoded_ttl: Some(tokio::time::Duration::MAX), + ..Default::default() + }, + )) } -#[derive(Clone, Debug)] -struct LoggingErrorSink { - log: slog::Logger, +fn make_single_host_resolver( + config: &DbConfig, +) -> qorb::resolver::BoxedResolver { + Box::new(SingleHostResolver::new(config)) } -impl LoggingErrorSink { - fn new(log: slog::Logger) -> LoggingErrorSink { - LoggingErrorSink { log } - } +fn make_postgres_connector( + log: &Logger, +) -> qorb::backend::SharedConnector { + // Create postgres connections. + // + // We're currently relying on the DieselPgConnector doing the following: + // - Disallowing full table scans in its implementation of "on_acquire" + // - Creating async_bb8_diesel connections that also wrap DTraceConnections. + let user = "root"; + let db = "omicron"; + let args = vec![("sslmode", "disable")]; + Arc::new(DieselPgConnector::new( + log, + DieselPgConnectorArgs { user, db, args }, + )) } -impl bb8::ErrorSink for LoggingErrorSink { - fn sink(&self, error: ConnectionError) { - error!( - &self.log, - "database connection error"; - "error_message" => #%error - ); +impl Pool { + /// Creates a new qorb-backed connection pool to the database. + /// + /// Creating this pool does not necessarily wait for connections to become + /// available, as backends may shift over time. + pub fn new(log: &Logger, bootstrap_dns: Vec) -> Self { + // Make sure diesel-dtrace's USDT probes are enabled. + usdt::register_probes().expect("Failed to register USDT DTrace probes"); + + let resolver = make_dns_resolver(bootstrap_dns); + let connector = make_postgres_connector(log); + + let policy = Policy::default(); + Pool { inner: qorb::pool::Pool::new(resolver, connector, policy) } + } + + /// Creates a new qorb-backed connection pool to a single instance of the + /// database. + /// + /// This is intended for tests that want to skip DNS resolution, relying + /// on a single instance of the database. + /// + /// In production, [Self::new] should be preferred. + pub fn new_single_host(log: &Logger, db_config: &DbConfig) -> Self { + // Make sure diesel-dtrace's USDT probes are enabled. + usdt::register_probes().expect("Failed to register USDT DTrace probes"); + + let resolver = make_single_host_resolver(db_config); + let connector = make_postgres_connector(log); + + let policy = Policy::default(); + Pool { inner: qorb::pool::Pool::new(resolver, connector, policy) } + } + + /// Creates a new qorb-backed connection pool which returns an error + /// if claims are not available within one millisecond. + /// + /// This is intended for test-only usage, in particular for tests where + /// claim requests should rapidly return errors when a backend has been + /// intentionally disabled. + #[cfg(any(test, feature = "testing"))] + pub fn new_single_host_failfast( + log: &Logger, + db_config: &DbConfig, + ) -> Self { + // Make sure diesel-dtrace's USDT probes are enabled. + usdt::register_probes().expect("Failed to register USDT DTrace probes"); + + let resolver = make_single_host_resolver(db_config); + let connector = make_postgres_connector(log); + + let policy = Policy { + claim_timeout: tokio::time::Duration::from_millis(1), + ..Default::default() + }; + Pool { inner: qorb::pool::Pool::new(resolver, connector, policy) } } - fn boxed_clone(&self) -> Box> { - Box::new(self.clone()) + /// Returns a connection from the pool + pub async fn claim( + &self, + ) -> anyhow::Result> { + Ok(self.inner.claim().await?) } } diff --git a/nexus/db-queries/src/db/pool_connection.rs b/nexus/db-queries/src/db/pool_connection.rs index dae6a0ee51a..9a33370a5a7 100644 --- a/nexus/db-queries/src/db/pool_connection.rs +++ b/nexus/db-queries/src/db/pool_connection.rs @@ -4,46 +4,139 @@ //! Customization that happens on each connection as they're acquired. +use anyhow::anyhow; +use async_bb8_diesel::AsyncR2D2Connection; use async_bb8_diesel::AsyncSimpleConnection; -use async_bb8_diesel::Connection; -use async_bb8_diesel::ConnectionError; use async_trait::async_trait; -use bb8::CustomizeConnection; +use diesel::Connection; use diesel::PgConnection; use diesel_dtrace::DTraceConnection; +use qorb::backend::{self, Backend, Error}; +use slog::Logger; +use url::Url; pub type DbConnection = DTraceConnection; pub const DISALLOW_FULL_TABLE_SCAN_SQL: &str = "set disallow_full_table_scans = on; set large_full_scan_rows = 0;"; -/// A customizer for all new connections made to CockroachDB, from Diesel. -#[derive(Debug)] -pub(crate) struct ConnectionCustomizer {} +/// A [backend::Connector] which provides access to [PgConnection]. +pub(crate) struct DieselPgConnector { + log: Logger, + user: String, + db: String, + args: Vec<(String, String)>, +} + +pub(crate) struct DieselPgConnectorArgs<'a> { + pub(crate) user: &'a str, + pub(crate) db: &'a str, + pub(crate) args: Vec<(&'a str, &'a str)>, +} -impl ConnectionCustomizer { - pub(crate) fn new() -> Self { - Self {} +impl DieselPgConnector { + /// Creates a new "connector" to a database, which + /// swaps out the IP address at runtime depending on the selected backend. + /// + /// Format of the url is: + /// + /// - postgresql://{user}@{address}/{db} + /// + /// Or, if arguments are supplied: + /// + /// - postgresql://{user}@{address}/{db}?{args} + pub(crate) fn new(log: &Logger, args: DieselPgConnectorArgs<'_>) -> Self { + let DieselPgConnectorArgs { user, db, args } = args; + Self { + log: log.clone(), + user: user.to_string(), + db: db.to_string(), + args: args + .into_iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect(), + } } - async fn disallow_full_table_scans( + fn to_url( &self, - conn: &mut Connection, - ) -> Result<(), ConnectionError> { - conn.batch_execute_async(DISALLOW_FULL_TABLE_SCAN_SQL).await?; - Ok(()) + address: std::net::SocketAddr, + ) -> Result { + let user = &self.user; + let db = &self.db; + let mut url = + Url::parse(&format!("postgresql://{user}@{address}/{db}"))?; + + for (k, v) in &self.args { + url.query_pairs_mut().append_pair(k, v); + } + + Ok(url.as_str().to_string()) } } #[async_trait] -impl CustomizeConnection, ConnectionError> - for ConnectionCustomizer -{ +impl backend::Connector for DieselPgConnector { + type Connection = async_bb8_diesel::Connection; + + async fn connect( + &self, + backend: &Backend, + ) -> Result { + let url = self.to_url(backend.address).map_err(Error::Other)?; + + let conn = tokio::task::spawn_blocking(move || { + let pg_conn = DbConnection::establish(&url) + .map_err(|e| Error::Other(anyhow!(e)))?; + Ok::<_, Error>(async_bb8_diesel::Connection::new(pg_conn)) + }) + .await + .expect("Task panicked establishing connection") + .map_err(|e| { + warn!( + self.log, + "Failed to make connection"; + "error" => e.to_string(), + "backend" => backend.address, + ); + e + })?; + Ok(conn) + } + async fn on_acquire( &self, - conn: &mut Connection, - ) -> Result<(), ConnectionError> { - self.disallow_full_table_scans(conn).await?; + conn: &mut Self::Connection, + ) -> Result<(), Error> { + conn.batch_execute_async(DISALLOW_FULL_TABLE_SCAN_SQL).await.map_err( + |e| { + warn!( + self.log, + "Failed on_acquire execution"; + "error" => e.to_string() + ); + Error::Other(anyhow!(e)) + }, + )?; Ok(()) } + + async fn is_valid(&self, conn: &mut Self::Connection) -> Result<(), Error> { + let is_broken = conn.is_broken_async().await; + if is_broken { + warn!( + self.log, + "Failed is_valid check; connection known to be broken" + ); + return Err(Error::Other(anyhow!("Connection broken"))); + } + conn.ping_async().await.map_err(|e| { + warn!( + self.log, + "Failed is_valid check; connection failed ping"; + "error" => e.to_string() + ); + Error::Other(anyhow!(e)) + }) + } } diff --git a/nexus/db-queries/src/db/queries/external_ip.rs b/nexus/db-queries/src/db/queries/external_ip.rs index 7ea44b33fb9..4d752d451b3 100644 --- a/nexus/db-queries/src/db/queries/external_ip.rs +++ b/nexus/db-queries/src/db/queries/external_ip.rs @@ -918,7 +918,8 @@ mod tests { crate::db::datastore::test_utils::datastore_test(&logctx, &db) .await; let cfg = crate::db::Config { url: db.pg_config().clone() }; - let pool = Arc::new(crate::db::Pool::new(&logctx.log, &cfg)); + let pool = + Arc::new(crate::db::Pool::new_single_host(&logctx.log, &cfg)); let db_datastore = Arc::new( crate::db::DataStore::new(&logctx.log, Arc::clone(&pool), None) .await diff --git a/nexus/db-queries/src/db/queries/next_item.rs b/nexus/db-queries/src/db/queries/next_item.rs index 769c8913493..658d151a5b2 100644 --- a/nexus/db-queries/src/db/queries/next_item.rs +++ b/nexus/db-queries/src/db/queries/next_item.rs @@ -616,7 +616,7 @@ mod tests { } async fn setup_test_schema(pool: &db::Pool) { - let connection = pool.pool().get().await.unwrap(); + let connection = pool.claim().await.unwrap(); (*connection) .batch_execute_async( "CREATE SCHEMA IF NOT EXISTS test_schema; \ @@ -708,8 +708,9 @@ mod tests { let log = logctx.log.new(o!()); let mut db = test_setup_database(&log).await; let cfg = crate::db::Config { url: db.pg_config().clone() }; - let pool = Arc::new(crate::db::Pool::new(&logctx.log, &cfg)); - let conn = pool.pool().get().await.unwrap(); + let pool = + Arc::new(crate::db::Pool::new_single_host(&logctx.log, &cfg)); + let conn = pool.claim().await.unwrap(); // We're going to operate on a separate table, for simplicity. setup_test_schema(&pool).await; @@ -770,8 +771,9 @@ mod tests { let log = logctx.log.new(o!()); let mut db = test_setup_database(&log).await; let cfg = crate::db::Config { url: db.pg_config().clone() }; - let pool = Arc::new(crate::db::Pool::new(&logctx.log, &cfg)); - let conn = pool.pool().get().await.unwrap(); + let pool = + Arc::new(crate::db::Pool::new_single_host(&logctx.log, &cfg)); + let conn = pool.claim().await.unwrap(); // We're going to operate on a separate table, for simplicity. setup_test_schema(&pool).await; diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index 7cf378d53bd..dbf37fda2e6 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -507,8 +507,8 @@ mod test { let log = logctx.log.new(o!()); let mut db = test_setup_database(&log).await; let cfg = crate::db::Config { url: db.pg_config().clone() }; - let pool = crate::db::Pool::new(&logctx.log, &cfg); - let conn = pool.pool().get().await.unwrap(); + let pool = crate::db::Pool::new_single_host(&logctx.log, &cfg); + let conn = pool.claim().await.unwrap(); let volume_id = Uuid::new_v4(); let params = RegionParameters { diff --git a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs index 902d955a796..9d2ed04c850 100644 --- a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs +++ b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs @@ -568,8 +568,8 @@ mod test { let log = logctx.log.new(o!()); let mut db = test_setup_database(&log).await; let cfg = crate::db::Config { url: db.pg_config().clone() }; - let pool = crate::db::Pool::new(&logctx.log, &cfg); - let conn = pool.pool().get().await.unwrap(); + let pool = crate::db::Pool::new_single_host(&logctx.log, &cfg); + let conn = pool.claim().await.unwrap(); let id = Uuid::nil(); let project_id = Uuid::nil(); @@ -597,8 +597,8 @@ mod test { let log = logctx.log.new(o!()); let mut db = test_setup_database(&log).await; let cfg = crate::db::Config { url: db.pg_config().clone() }; - let pool = crate::db::Pool::new(&logctx.log, &cfg); - let conn = pool.pool().get().await.unwrap(); + let pool = crate::db::Pool::new_single_host(&logctx.log, &cfg); + let conn = pool.claim().await.unwrap(); let id = Uuid::nil(); let project_id = Uuid::nil(); @@ -624,8 +624,8 @@ mod test { let log = logctx.log.new(o!()); let mut db = test_setup_database(&log).await; let cfg = crate::db::Config { url: db.pg_config().clone() }; - let pool = crate::db::Pool::new(&logctx.log, &cfg); - let conn = pool.pool().get().await.unwrap(); + let pool = crate::db::Pool::new_single_host(&logctx.log, &cfg); + let conn = pool.claim().await.unwrap(); let id = InstanceUuid::nil(); let project_id = Uuid::nil(); @@ -650,8 +650,8 @@ mod test { let log = logctx.log.new(o!()); let mut db = test_setup_database(&log).await; let cfg = crate::db::Config { url: db.pg_config().clone() }; - let pool = crate::db::Pool::new(&logctx.log, &cfg); - let conn = pool.pool().get().await.unwrap(); + let pool = crate::db::Pool::new_single_host(&logctx.log, &cfg); + let conn = pool.claim().await.unwrap(); let id = InstanceUuid::nil(); let project_id = Uuid::nil(); diff --git a/nexus/db-queries/src/db/queries/vpc_subnet.rs b/nexus/db-queries/src/db/queries/vpc_subnet.rs index 8cbf4495ca1..85c771c0503 100644 --- a/nexus/db-queries/src/db/queries/vpc_subnet.rs +++ b/nexus/db-queries/src/db/queries/vpc_subnet.rs @@ -313,8 +313,9 @@ mod test { let log = logctx.log.new(o!()); let mut db = test_setup_database(&log).await; let cfg = crate::db::Config { url: db.pg_config().clone() }; - let pool = Arc::new(crate::db::Pool::new(&logctx.log, &cfg)); - let conn = pool.pool().get().await.unwrap(); + let pool = + Arc::new(crate::db::Pool::new_single_host(&logctx.log, &cfg)); + let conn = pool.claim().await.unwrap(); let explain = query.explain_async(&conn).await.unwrap(); println!("{explain}"); db.cleanup().await.unwrap(); @@ -352,7 +353,8 @@ mod test { let log = logctx.log.new(o!()); let mut db = test_setup_database(&log).await; let cfg = crate::db::Config { url: db.pg_config().clone() }; - let pool = Arc::new(crate::db::Pool::new(&logctx.log, &cfg)); + let pool = + Arc::new(crate::db::Pool::new_single_host(&logctx.log, &cfg)); let db_datastore = Arc::new( crate::db::DataStore::new(&log, Arc::clone(&pool), None) .await @@ -544,7 +546,8 @@ mod test { let log = logctx.log.new(o!()); let mut db = test_setup_database(&log).await; let cfg = crate::db::Config { url: db.pg_config().clone() }; - let pool = Arc::new(crate::db::Pool::new(&logctx.log, &cfg)); + let pool = + Arc::new(crate::db::Pool::new_single_host(&logctx.log, &cfg)); let db_datastore = Arc::new( crate::db::DataStore::new(&log, Arc::clone(&pool), None) .await diff --git a/nexus/db-queries/src/db/sec_store.rs b/nexus/db-queries/src/db/sec_store.rs index 0dcc3aa7171..920ff3aee10 100644 --- a/nexus/db-queries/src/db/sec_store.rs +++ b/nexus/db-queries/src/db/sec_store.rs @@ -4,7 +4,7 @@ //! Implementation of [`steno::SecStore`] backed by Omicron's database -use crate::db::{self, model::Generation}; +use crate::db; use anyhow::Context; use async_trait::async_trait; use dropshot::HttpError; @@ -102,12 +102,7 @@ impl steno::SecStore for CockroachDbSecStore { &log, || { self.datastore - .saga_update_state( - id, - update, - self.sec_id, - Generation::new(), - ) + .saga_update_state(id, update, self.sec_id) .map_err(backoff::BackoffError::transient) }, "updating saga state", diff --git a/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql b/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql index 6331770ef56..4e7dde244bd 100644 --- a/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql @@ -270,7 +270,8 @@ WITH dataset.ip, dataset.port, dataset.kind, - dataset.size_used + dataset.size_used, + dataset.zone_name ) ( SELECT @@ -284,6 +285,7 @@ WITH dataset.port, dataset.kind, dataset.size_used, + dataset.zone_name, old_regions.id, old_regions.time_created, old_regions.time_modified, @@ -310,6 +312,7 @@ UNION updated_datasets.port, updated_datasets.kind, updated_datasets.size_used, + updated_datasets.zone_name, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, diff --git a/nexus/db-queries/tests/output/region_allocate_random_sleds.sql b/nexus/db-queries/tests/output/region_allocate_random_sleds.sql index e713121d34b..b2c164a6d90 100644 --- a/nexus/db-queries/tests/output/region_allocate_random_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_random_sleds.sql @@ -268,7 +268,8 @@ WITH dataset.ip, dataset.port, dataset.kind, - dataset.size_used + dataset.size_used, + dataset.zone_name ) ( SELECT @@ -282,6 +283,7 @@ WITH dataset.port, dataset.kind, dataset.size_used, + dataset.zone_name, old_regions.id, old_regions.time_created, old_regions.time_modified, @@ -308,6 +310,7 @@ UNION updated_datasets.port, updated_datasets.kind, updated_datasets.size_used, + updated_datasets.zone_name, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, diff --git a/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql b/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql index 0b8dc4fca68..97ee23f82e6 100644 --- a/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql @@ -281,7 +281,8 @@ WITH dataset.ip, dataset.port, dataset.kind, - dataset.size_used + dataset.size_used, + dataset.zone_name ) ( SELECT @@ -295,6 +296,7 @@ WITH dataset.port, dataset.kind, dataset.size_used, + dataset.zone_name, old_regions.id, old_regions.time_created, old_regions.time_modified, @@ -321,6 +323,7 @@ UNION updated_datasets.port, updated_datasets.kind, updated_datasets.size_used, + updated_datasets.zone_name, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, diff --git a/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql b/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql index 9ac945f71d4..a1cc1035940 100644 --- a/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql @@ -279,7 +279,8 @@ WITH dataset.ip, dataset.port, dataset.kind, - dataset.size_used + dataset.size_used, + dataset.zone_name ) ( SELECT @@ -293,6 +294,7 @@ WITH dataset.port, dataset.kind, dataset.size_used, + dataset.zone_name, old_regions.id, old_regions.time_created, old_regions.time_modified, @@ -319,6 +321,7 @@ UNION updated_datasets.port, updated_datasets.kind, updated_datasets.size_used, + updated_datasets.zone_name, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, diff --git a/nexus/examples/config-second.toml b/nexus/examples/config-second.toml index 754f37c064f..4c181ef3a28 100644 --- a/nexus/examples/config-second.toml +++ b/nexus/examples/config-second.toml @@ -139,6 +139,10 @@ v2p_mapping_propagation.period_secs = 30 abandoned_vmm_reaper.period_secs = 60 saga_recovery.period_secs = 600 lookup_region_port.period_secs = 60 +region_snapshot_replacement_start.period_secs = 30 +region_snapshot_replacement_garbage_collection.period_secs = 30 +region_snapshot_replacement_step.period_secs = 30 +region_snapshot_replacement_finish.period_secs = 30 [default_region_allocation_strategy] # allocate region on 3 random distinct zpools, on 3 random distinct sleds. diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index bd50e846bdc..d25408e6e34 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -125,6 +125,10 @@ v2p_mapping_propagation.period_secs = 30 abandoned_vmm_reaper.period_secs = 60 saga_recovery.period_secs = 600 lookup_region_port.period_secs = 60 +region_snapshot_replacement_start.period_secs = 30 +region_snapshot_replacement_garbage_collection.period_secs = 30 +region_snapshot_replacement_step.period_secs = 30 +region_snapshot_replacement_finish.period_secs = 30 [default_region_allocation_strategy] # allocate region on 3 random distinct zpools, on 3 random distinct sleds. diff --git a/nexus/external-api/Cargo.toml b/nexus/external-api/Cargo.toml new file mode 100644 index 00000000000..0875e1f5742 --- /dev/null +++ b/nexus/external-api/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "nexus-external-api" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +dropshot.workspace = true +http.workspace = true +hyper.workspace = true +ipnetwork.workspace = true +nexus-types.workspace = true +omicron-common.workspace = true +omicron-workspace-hack.workspace = true +openapiv3.workspace = true +openapi-manager-types.workspace = true +oximeter-types.workspace = true +oxql-types.workspace = true diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/external-api/output/nexus_tags.txt similarity index 98% rename from nexus/tests/output/nexus_tags.txt rename to nexus/external-api/output/nexus_tags.txt index 4af018c5af4..bde11e2de3e 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/external-api/output/nexus_tags.txt @@ -51,7 +51,6 @@ instance_ephemeral_ip_attach POST /v1/instances/{instance}/exter instance_ephemeral_ip_detach DELETE /v1/instances/{instance}/external-ips/ephemeral instance_external_ip_list GET /v1/instances/{instance}/external-ips instance_list GET /v1/instances -instance_migrate POST /v1/instances/{instance}/migrate instance_network_interface_create POST /v1/network-interfaces instance_network_interface_delete DELETE /v1/network-interfaces/{interface} instance_network_interface_list GET /v1/network-interfaces @@ -179,12 +178,14 @@ networking_allow_list_view GET /v1/system/networking/allow-li networking_bfd_disable POST /v1/system/networking/bfd-disable networking_bfd_enable POST /v1/system/networking/bfd-enable networking_bfd_status GET /v1/system/networking/bfd-status -networking_bgp_announce_set_delete DELETE /v1/system/networking/bgp-announce -networking_bgp_announce_set_list GET /v1/system/networking/bgp-announce -networking_bgp_announce_set_update PUT /v1/system/networking/bgp-announce +networking_bgp_announce_set_delete DELETE /v1/system/networking/bgp-announce-set/{name_or_id} +networking_bgp_announce_set_list GET /v1/system/networking/bgp-announce-set +networking_bgp_announce_set_update PUT /v1/system/networking/bgp-announce-set +networking_bgp_announcement_list GET /v1/system/networking/bgp-announce-set/{name_or_id}/announcement networking_bgp_config_create POST /v1/system/networking/bgp networking_bgp_config_delete DELETE /v1/system/networking/bgp networking_bgp_config_list GET /v1/system/networking/bgp +networking_bgp_exported GET /v1/system/networking/bgp-exported networking_bgp_imported_routes_ipv4 GET /v1/system/networking/bgp-routes-ipv4 networking_bgp_message_history GET /v1/system/networking/bgp-message-history networking_bgp_status GET /v1/system/networking/bgp-status diff --git a/nexus/external-api/src/lib.rs b/nexus/external-api/src/lib.rs new file mode 100644 index 00000000000..669b25145f4 --- /dev/null +++ b/nexus/external-api/src/lib.rs @@ -0,0 +1,3032 @@ +use std::collections::BTreeMap; + +use anyhow::anyhow; +use dropshot::{ + EmptyScanParams, EndpointTagPolicy, HttpError, HttpResponseAccepted, + HttpResponseCreated, HttpResponseDeleted, HttpResponseFound, + HttpResponseHeaders, HttpResponseOk, HttpResponseSeeOther, + HttpResponseUpdatedNoContent, PaginationParams, Path, Query, + RequestContext, ResultsPage, StreamingBody, TypedBody, + WebsocketChannelResult, WebsocketConnection, +}; +use http::Response; +use hyper::Body; +use ipnetwork::IpNetwork; +use nexus_types::{ + authn::cookies::Cookies, + external_api::{params, shared, views}, +}; +use omicron_common::api::external::{ + http_pagination::{PaginatedById, PaginatedByName, PaginatedByNameOrId}, + *, +}; +use openapi_manager_types::ValidationContext; +use openapiv3::OpenAPI; + +pub const API_VERSION: &str = "20240821.0"; + +// API ENDPOINT FUNCTION NAMING CONVENTIONS +// +// Generally, HTTP resources are grouped within some collection. For a +// relatively simple example: +// +// GET v1/projects (list the projects in the collection) +// POST v1/projects (create a project in the collection) +// GET v1/projects/{project} (look up a project in the collection) +// DELETE v1/projects/{project} (delete a project in the collection) +// PUT v1/projects/{project} (update a project in the collection) +// +// We pick a name for the function that implements a given API entrypoint +// based on how we expect it to appear in the CLI subcommand hierarchy. For +// example: +// +// GET v1/projects -> project_list() +// POST v1/projects -> project_create() +// GET v1/projects/{project} -> project_view() +// DELETE v1/projects/{project} -> project_delete() +// PUT v1/projects/{project} -> project_update() +// +// Note that the path typically uses the entity's plural form while the +// function name uses its singular. +// +// Operations beyond list, create, view, delete, and update should use a +// descriptive noun or verb, again bearing in mind that this will be +// transcribed into the CLI and SDKs: +// +// POST -> instance_reboot +// POST -> instance_stop +// GET -> instance_serial_console +// +// Note that these function names end up in generated OpenAPI spec as the +// operationId for each endpoint, and therefore represent a contract with +// clients. Client generators use operationId to name API methods, so changing +// a function name is a breaking change from a client perspective. + +#[dropshot::api_description { + tag_config = { + allow_other_tags = false, + policy = EndpointTagPolicy::ExactlyOne, + tags = { + "disks" = { + description = "Virtual disks are used to store instance-local data which includes the operating system.", + external_docs = { + url = "http://docs.oxide.computer/api/disks" + } + }, + "floating-ips" = { + description = "Floating IPs allow a project to allocate well-known IPs to instances.", + external_docs = { + url = "http://docs.oxide.computer/api/floating-ips" + } + }, + "hidden" = { + description = "TODO operations that will not ship to customers", + external_docs = { + url = "http://docs.oxide.computer/api" + } + }, + "images" = { + description = "Images are read-only virtual disks that may be used to boot virtual machines.", + external_docs = { + url = "http://docs.oxide.computer/api/images" + } + }, + "instances" = { + description = "Virtual machine instances are the basic unit of computation. These operations are used for provisioning, controlling, and destroying instances.", + external_docs = { + url = "http://docs.oxide.computer/api/instances" + } + }, + "login" = { + description = "Authentication endpoints", + external_docs = { + url = "http://docs.oxide.computer/api/login" + } + }, + "metrics" = { + description = "Silo-scoped metrics", + external_docs = { + url = "http://docs.oxide.computer/api/metrics" + } + }, + "policy" = { + description = "System-wide IAM policy", + external_docs = { + url = "http://docs.oxide.computer/api/policy" + } + }, + "projects" = { + description = "Projects are a grouping of associated resources such as instances and disks within a silo for purposes of billing and access control.", + external_docs = { + url = "http://docs.oxide.computer/api/projects" + } + }, + "roles" = { + description = "Roles are a component of Identity and Access Management (IAM) that allow a user or agent account access to additional permissions.", + external_docs = { + url = "http://docs.oxide.computer/api/roles" + } + }, + "session" = { + description = "Information pertaining to the current session.", + external_docs = { + url = "http://docs.oxide.computer/api/session" + } + }, + "silos" = { + description = "Silos represent a logical partition of users and resources.", + external_docs = { + url = "http://docs.oxide.computer/api/silos" + } + }, + "snapshots" = { + description = "Snapshots of virtual disks at a particular point in time.", + external_docs = { + url = "http://docs.oxide.computer/api/snapshots" + } + }, + "vpcs" = { + description = "Virtual Private Clouds (VPCs) provide isolated network environments for managing and deploying services.", + external_docs = { + url = "http://docs.oxide.computer/api/vpcs" + } + }, + "system/probes" = { + description = "Probes for testing network connectivity", + external_docs = { + url = "http://docs.oxide.computer/api/probes" + } + }, + "system/status" = { + description = "Endpoints related to system health", + external_docs = { + url = "http://docs.oxide.computer/api/system-status" + } + }, + "system/hardware" = { + description = "These operations pertain to hardware inventory and management. Racks are the unit of expansion of an Oxide deployment. Racks are in turn composed of sleds, switches, power supplies, and a cabled backplane.", + external_docs = { + url = "http://docs.oxide.computer/api/system-hardware" + } + }, + "system/metrics" = { + description = "Metrics provide insight into the operation of the Oxide deployment. These include telemetry on hardware and software components that can be used to understand the current state as well as to diagnose issues.", + external_docs = { + url = "http://docs.oxide.computer/api/system-metrics" + } + }, + "system/networking" = { + description = "This provides rack-level network configuration.", + external_docs = { + url = "http://docs.oxide.computer/api/system-networking" + } + }, + "system/silos" = { + description = "Silos represent a logical partition of users and resources.", + external_docs = { + url = "http://docs.oxide.computer/api/system-silos" + } + } + } + } +}] +pub trait NexusExternalApi { + type Context; + + /// Ping API + /// + /// Always responds with Ok if it responds at all. + #[endpoint { + method = GET, + path = "/v1/ping", + tags = ["system/status"], + }] + async fn ping( + _rqctx: RequestContext, + ) -> Result, HttpError> { + Ok(HttpResponseOk(views::Ping { status: views::PingStatus::Ok })) + } + + /// Fetch top-level IAM policy + #[endpoint { + method = GET, + path = "/v1/system/policy", + tags = ["policy"], + }] + async fn system_policy_view( + rqctx: RequestContext, + ) -> Result>, HttpError>; + + /// Update top-level IAM policy + #[endpoint { + method = PUT, + path = "/v1/system/policy", + tags = ["policy"], + }] + async fn system_policy_update( + rqctx: RequestContext, + new_policy: TypedBody>, + ) -> Result>, HttpError>; + + /// Fetch current silo's IAM policy + #[endpoint { + method = GET, + path = "/v1/policy", + tags = ["silos"], + }] + async fn policy_view( + rqctx: RequestContext, + ) -> Result>, HttpError>; + + /// Update current silo's IAM policy + #[endpoint { + method = PUT, + path = "/v1/policy", + tags = ["silos"], + }] + async fn policy_update( + rqctx: RequestContext, + new_policy: TypedBody>, + ) -> Result>, HttpError>; + + /// Fetch resource utilization for user's current silo + #[endpoint { + method = GET, + path = "/v1/utilization", + tags = ["silos"], + }] + async fn utilization_view( + rqctx: RequestContext, + ) -> Result, HttpError>; + + /// Fetch current utilization for given silo + #[endpoint { + method = GET, + path = "/v1/system/utilization/silos/{silo}", + tags = ["system/silos"], + }] + async fn silo_utilization_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + /// List current utilization state for all silos + #[endpoint { + method = GET, + path = "/v1/system/utilization/silos", + tags = ["system/silos"], + }] + async fn silo_utilization_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError>; + + /// Lists resource quotas for all silos + #[endpoint { + method = GET, + path = "/v1/system/silo-quotas", + tags = ["system/silos"], + }] + async fn system_quotas_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError>; + + /// Fetch resource quotas for silo + #[endpoint { + method = GET, + path = "/v1/system/silos/{silo}/quotas", + tags = ["system/silos"], + }] + async fn silo_quotas_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + /// Update resource quotas for silo + /// + /// If a quota value is not specified, it will remain unchanged. + #[endpoint { + method = PUT, + path = "/v1/system/silos/{silo}/quotas", + tags = ["system/silos"], + }] + async fn silo_quotas_update( + rqctx: RequestContext, + path_params: Path, + new_quota: TypedBody, + ) -> Result, HttpError>; + + /// List silos + /// + /// Lists silos that are discoverable based on the current permissions. + #[endpoint { + method = GET, + path = "/v1/system/silos", + tags = ["system/silos"], + }] + async fn silo_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError>; + + /// Create a silo + #[endpoint { + method = POST, + path = "/v1/system/silos", + tags = ["system/silos"], + }] + async fn silo_create( + rqctx: RequestContext, + new_silo_params: TypedBody, + ) -> Result, HttpError>; + + /// Fetch silo + /// + /// Fetch silo by name or ID. + #[endpoint { + method = GET, + path = "/v1/system/silos/{silo}", + tags = ["system/silos"], + }] + async fn silo_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + /// List IP pools linked to silo + /// + /// Linked IP pools are available to users in the specified silo. A silo + /// can have at most one default pool. IPs are allocated from the default + /// pool when users ask for one without specifying a pool. + #[endpoint { + method = GET, + path = "/v1/system/silos/{silo}/ip-pools", + tags = ["system/silos"], + }] + async fn silo_ip_pool_list( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result>, HttpError>; + + /// Delete a silo + /// + /// Delete a silo by name or ID. + #[endpoint { + method = DELETE, + path = "/v1/system/silos/{silo}", + tags = ["system/silos"], + }] + async fn silo_delete( + rqctx: RequestContext, + path_params: Path, + ) -> Result; + + /// Fetch silo IAM policy + #[endpoint { + method = GET, + path = "/v1/system/silos/{silo}/policy", + tags = ["system/silos"], + }] + async fn silo_policy_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result>, HttpError>; + + /// Update silo IAM policy + #[endpoint { + method = PUT, + path = "/v1/system/silos/{silo}/policy", + tags = ["system/silos"], + }] + async fn silo_policy_update( + rqctx: RequestContext, + path_params: Path, + new_policy: TypedBody>, + ) -> Result>, HttpError>; + + // Silo-specific user endpoints + + /// List built-in (system) users in silo + #[endpoint { + method = GET, + path = "/v1/system/users", + tags = ["system/silos"], + }] + async fn silo_user_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError>; + + /// Fetch built-in (system) user + #[endpoint { + method = GET, + path = "/v1/system/users/{user_id}", + tags = ["system/silos"], + }] + async fn silo_user_view( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError>; + + // Silo identity providers + + /// List a silo's IdP's name + #[endpoint { + method = GET, + path = "/v1/system/identity-providers", + tags = ["system/silos"], + }] + async fn silo_identity_provider_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError>; + + // Silo SAML identity providers + + /// Create SAML IdP + #[endpoint { + method = POST, + path = "/v1/system/identity-providers/saml", + tags = ["system/silos"], + }] + async fn saml_identity_provider_create( + rqctx: RequestContext, + query_params: Query, + new_provider: TypedBody, + ) -> Result, HttpError>; + + /// Fetch SAML IdP + #[endpoint { + method = GET, + path = "/v1/system/identity-providers/saml/{provider}", + tags = ["system/silos"], + }] + async fn saml_identity_provider_view( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError>; + + // TODO: no DELETE for identity providers? + + // "Local" Identity Provider + + /// Create user + /// + /// Users can only be created in Silos with `provision_type` == `Fixed`. + /// Otherwise, Silo users are just-in-time (JIT) provisioned when a user + /// first logs in using an external Identity Provider. + #[endpoint { + method = POST, + path = "/v1/system/identity-providers/local/users", + tags = ["system/silos"], + }] + async fn local_idp_user_create( + rqctx: RequestContext, + query_params: Query, + new_user_params: TypedBody, + ) -> Result, HttpError>; + + /// Delete user + #[endpoint { + method = DELETE, + path = "/v1/system/identity-providers/local/users/{user_id}", + tags = ["system/silos"], + }] + async fn local_idp_user_delete( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result; + + /// Set or invalidate user's password + /// + /// Passwords can only be updated for users in Silos with identity mode + /// `LocalOnly`. + #[endpoint { + method = POST, + path = "/v1/system/identity-providers/local/users/{user_id}/set-password", + tags = ["system/silos"], + }] + async fn local_idp_user_set_password( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + update: TypedBody, + ) -> Result; + + /// List projects + #[endpoint { + method = GET, + path = "/v1/projects", + tags = ["projects"], + }] + async fn project_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError>; + + /// Create project + #[endpoint { + method = POST, + path = "/v1/projects", + tags = ["projects"], + }] + async fn project_create( + rqctx: RequestContext, + new_project: TypedBody, + ) -> Result, HttpError>; + + /// Fetch project + #[endpoint { + method = GET, + path = "/v1/projects/{project}", + tags = ["projects"], + }] + async fn project_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + /// Delete project + #[endpoint { + method = DELETE, + path = "/v1/projects/{project}", + tags = ["projects"], + }] + async fn project_delete( + rqctx: RequestContext, + path_params: Path, + ) -> Result; + + // TODO-correctness: Is it valid for PUT to accept application/json that's + // a subset of what the resource actually represents? If not, is that a + // problem? (HTTP may require that this be idempotent.) If so, can we get + // around that having this be a slightly different content-type (e.g., + // "application/json-patch")? We should see what other APIs do. + /// Update a project + #[endpoint { + method = PUT, + path = "/v1/projects/{project}", + tags = ["projects"], + }] + async fn project_update( + rqctx: RequestContext, + path_params: Path, + updated_project: TypedBody, + ) -> Result, HttpError>; + + /// Fetch project's IAM policy + #[endpoint { + method = GET, + path = "/v1/projects/{project}/policy", + tags = ["projects"], + }] + async fn project_policy_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result>, HttpError>; + + /// Update project's IAM policy + #[endpoint { + method = PUT, + path = "/v1/projects/{project}/policy", + tags = ["projects"], + }] + async fn project_policy_update( + rqctx: RequestContext, + path_params: Path, + new_policy: TypedBody>, + ) -> Result>, HttpError>; + + // IP Pools + + /// List IP pools + #[endpoint { + method = GET, + path = "/v1/ip-pools", + tags = ["projects"], + }] + async fn project_ip_pool_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError>; + + /// Fetch IP pool + #[endpoint { + method = GET, + path = "/v1/ip-pools/{pool}", + tags = ["projects"], + }] + async fn project_ip_pool_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + /// List IP pools + #[endpoint { + method = GET, + path = "/v1/system/ip-pools", + tags = ["system/networking"], + }] + async fn ip_pool_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError>; + + /// Create IP pool + #[endpoint { + method = POST, + path = "/v1/system/ip-pools", + tags = ["system/networking"], + }] + async fn ip_pool_create( + rqctx: RequestContext, + pool_params: TypedBody, + ) -> Result, HttpError>; + + /// Fetch IP pool + #[endpoint { + method = GET, + path = "/v1/system/ip-pools/{pool}", + tags = ["system/networking"], + }] + async fn ip_pool_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + /// Delete IP pool + #[endpoint { + method = DELETE, + path = "/v1/system/ip-pools/{pool}", + tags = ["system/networking"], + }] + async fn ip_pool_delete( + rqctx: RequestContext, + path_params: Path, + ) -> Result; + + /// Update IP pool + #[endpoint { + method = PUT, + path = "/v1/system/ip-pools/{pool}", + tags = ["system/networking"], + }] + async fn ip_pool_update( + rqctx: RequestContext, + path_params: Path, + updates: TypedBody, + ) -> Result, HttpError>; + + /// Fetch IP pool utilization + #[endpoint { + method = GET, + path = "/v1/system/ip-pools/{pool}/utilization", + tags = ["system/networking"], + }] + async fn ip_pool_utilization_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + /// List IP pool's linked silos + #[endpoint { + method = GET, + path = "/v1/system/ip-pools/{pool}/silos", + tags = ["system/networking"], + }] + async fn ip_pool_silo_list( + rqctx: RequestContext, + path_params: Path, + // paginating by resource_id because they're unique per pool. most robust + // option would be to paginate by a composite key representing the (pool, + // resource_type, resource) + query_params: Query, + // TODO: this could just list views::Silo -- it's not like knowing silo_id + // and nothing else is particularly useful -- except we also want to say + // whether the pool is marked default on each silo. So one option would + // be to do the same as we did with SiloIpPool -- include is_default on + // whatever the thing is. Still... all we'd have to do to make this usable + // in both places would be to make it { ...IpPool, silo_id, silo_name, + // is_default } + ) -> Result>, HttpError>; + + /// Link IP pool to silo + /// + /// Users in linked silos can allocate external IPs from this pool for their + /// instances. A silo can have at most one default pool. IPs are allocated from + /// the default pool when users ask for one without specifying a pool. + #[endpoint { + method = POST, + path = "/v1/system/ip-pools/{pool}/silos", + tags = ["system/networking"], + }] + async fn ip_pool_silo_link( + rqctx: RequestContext, + path_params: Path, + resource_assoc: TypedBody, + ) -> Result, HttpError>; + + /// Unlink IP pool from silo + /// + /// Will fail if there are any outstanding IPs allocated in the silo. + #[endpoint { + method = DELETE, + path = "/v1/system/ip-pools/{pool}/silos/{silo}", + tags = ["system/networking"], + }] + async fn ip_pool_silo_unlink( + rqctx: RequestContext, + path_params: Path, + ) -> Result; + + /// Make IP pool default for silo + /// + /// When a user asks for an IP (e.g., at instance create time) without + /// specifying a pool, the IP comes from the default pool if a default is + /// configured. When a pool is made the default for a silo, any existing + /// default will remain linked to the silo, but will no longer be the + /// default. + #[endpoint { + method = PUT, + path = "/v1/system/ip-pools/{pool}/silos/{silo}", + tags = ["system/networking"], + }] + async fn ip_pool_silo_update( + rqctx: RequestContext, + path_params: Path, + update: TypedBody, + ) -> Result, HttpError>; + + /// Fetch Oxide service IP pool + #[endpoint { + method = GET, + path = "/v1/system/ip-pools-service", + tags = ["system/networking"], + }] + async fn ip_pool_service_view( + rqctx: RequestContext, + ) -> Result, HttpError>; + + /// List ranges for IP pool + /// + /// Ranges are ordered by their first address. + #[endpoint { + method = GET, + path = "/v1/system/ip-pools/{pool}/ranges", + tags = ["system/networking"], + }] + async fn ip_pool_range_list( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result>, HttpError>; + + /// Add range to IP pool + /// + /// IPv6 ranges are not allowed yet. + #[endpoint { + method = POST, + path = "/v1/system/ip-pools/{pool}/ranges/add", + tags = ["system/networking"], + }] + async fn ip_pool_range_add( + rqctx: RequestContext, + path_params: Path, + range_params: TypedBody, + ) -> Result, HttpError>; + + /// Remove range from IP pool + #[endpoint { + method = POST, + path = "/v1/system/ip-pools/{pool}/ranges/remove", + tags = ["system/networking"], + }] + async fn ip_pool_range_remove( + rqctx: RequestContext, + path_params: Path, + range_params: TypedBody, + ) -> Result; + + /// List IP ranges for the Oxide service pool + /// + /// Ranges are ordered by their first address. + #[endpoint { + method = GET, + path = "/v1/system/ip-pools-service/ranges", + tags = ["system/networking"], + }] + async fn ip_pool_service_range_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError>; + + /// Add IP range to Oxide service pool + /// + /// IPv6 ranges are not allowed yet. + #[endpoint { + method = POST, + path = "/v1/system/ip-pools-service/ranges/add", + tags = ["system/networking"], + }] + async fn ip_pool_service_range_add( + rqctx: RequestContext, + range_params: TypedBody, + ) -> Result, HttpError>; + + /// Remove IP range from Oxide service pool + #[endpoint { + method = POST, + path = "/v1/system/ip-pools-service/ranges/remove", + tags = ["system/networking"], + }] + async fn ip_pool_service_range_remove( + rqctx: RequestContext, + range_params: TypedBody, + ) -> Result; + + // Floating IP Addresses + + /// List floating IPs + #[endpoint { + method = GET, + path = "/v1/floating-ips", + tags = ["floating-ips"], + }] + async fn floating_ip_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError>; + + /// Create floating IP + #[endpoint { + method = POST, + path = "/v1/floating-ips", + tags = ["floating-ips"], + }] + async fn floating_ip_create( + rqctx: RequestContext, + query_params: Query, + floating_params: TypedBody, + ) -> Result, HttpError>; + + /// Update floating IP + #[endpoint { + method = PUT, + path = "/v1/floating-ips/{floating_ip}", + tags = ["floating-ips"], + }] + async fn floating_ip_update( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + updated_floating_ip: TypedBody, + ) -> Result, HttpError>; + + /// Delete floating IP + #[endpoint { + method = DELETE, + path = "/v1/floating-ips/{floating_ip}", + tags = ["floating-ips"], + }] + async fn floating_ip_delete( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result; + + /// Fetch floating IP + #[endpoint { + method = GET, + path = "/v1/floating-ips/{floating_ip}", + tags = ["floating-ips"] + }] + async fn floating_ip_view( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError>; + + /// Attach floating IP + /// + /// Attach floating IP to an instance or other resource. + #[endpoint { + method = POST, + path = "/v1/floating-ips/{floating_ip}/attach", + tags = ["floating-ips"], + }] + async fn floating_ip_attach( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + target: TypedBody, + ) -> Result, HttpError>; + + /// Detach floating IP + /// + // Detach floating IP from instance or other resource. + #[endpoint { + method = POST, + path = "/v1/floating-ips/{floating_ip}/detach", + tags = ["floating-ips"], + }] + async fn floating_ip_detach( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError>; + + // Disks + + /// List disks + #[endpoint { + method = GET, + path = "/v1/disks", + tags = ["disks"], + }] + async fn disk_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError>; + + // TODO-correctness See note about instance create. This should be async. + /// Create a disk + #[endpoint { + method = POST, + path = "/v1/disks", + tags = ["disks"] + }] + async fn disk_create( + rqctx: RequestContext, + query_params: Query, + new_disk: TypedBody, + ) -> Result, HttpError>; + + /// Fetch disk + #[endpoint { + method = GET, + path = "/v1/disks/{disk}", + tags = ["disks"] + }] + async fn disk_view( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError>; + + /// Delete disk + #[endpoint { + method = DELETE, + path = "/v1/disks/{disk}", + tags = ["disks"], + }] + async fn disk_delete( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result; + + /// Fetch disk metrics + #[endpoint { + method = GET, + path = "/v1/disks/{disk}/metrics/{metric}", + tags = ["disks"], + }] + async fn disk_metrics_list( + rqctx: RequestContext, + path_params: Path, + query_params: Query< + PaginationParams, + >, + selector_params: Query, + ) -> Result< + HttpResponseOk>, + HttpError, + >; + + /// Start importing blocks into disk + /// + /// Start the process of importing blocks into a disk + #[endpoint { + method = POST, + path = "/v1/disks/{disk}/bulk-write-start", + tags = ["disks"], + }] + async fn disk_bulk_write_import_start( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result; + + /// Import blocks into disk + #[endpoint { + method = POST, + path = "/v1/disks/{disk}/bulk-write", + tags = ["disks"], + }] + async fn disk_bulk_write_import( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + import_params: TypedBody, + ) -> Result; + + /// Stop importing blocks into disk + /// + /// Stop the process of importing blocks into a disk + #[endpoint { + method = POST, + path = "/v1/disks/{disk}/bulk-write-stop", + tags = ["disks"], + }] + async fn disk_bulk_write_import_stop( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result; + + /// Confirm disk block import completion + #[endpoint { + method = POST, + path = "/v1/disks/{disk}/finalize", + tags = ["disks"], + }] + async fn disk_finalize_import( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + finalize_params: TypedBody, + ) -> Result; + + // Instances + + /// List instances + #[endpoint { + method = GET, + path = "/v1/instances", + tags = ["instances"], + }] + async fn instance_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError>; + + /// Create instance + #[endpoint { + method = POST, + path = "/v1/instances", + tags = ["instances"], + }] + async fn instance_create( + rqctx: RequestContext, + query_params: Query, + new_instance: TypedBody, + ) -> Result, HttpError>; + + /// Fetch instance + #[endpoint { + method = GET, + path = "/v1/instances/{instance}", + tags = ["instances"], + }] + async fn instance_view( + rqctx: RequestContext, + query_params: Query, + path_params: Path, + ) -> Result, HttpError>; + + /// Delete instance + #[endpoint { + method = DELETE, + path = "/v1/instances/{instance}", + tags = ["instances"], + }] + async fn instance_delete( + rqctx: RequestContext, + query_params: Query, + path_params: Path, + ) -> Result; + + /// Reboot an instance + #[endpoint { + method = POST, + path = "/v1/instances/{instance}/reboot", + tags = ["instances"], + }] + async fn instance_reboot( + rqctx: RequestContext, + query_params: Query, + path_params: Path, + ) -> Result, HttpError>; + + /// Boot instance + #[endpoint { + method = POST, + path = "/v1/instances/{instance}/start", + tags = ["instances"], + }] + async fn instance_start( + rqctx: RequestContext, + query_params: Query, + path_params: Path, + ) -> Result, HttpError>; + + /// Stop instance + #[endpoint { + method = POST, + path = "/v1/instances/{instance}/stop", + tags = ["instances"], + }] + async fn instance_stop( + rqctx: RequestContext, + query_params: Query, + path_params: Path, + ) -> Result, HttpError>; + + /// Fetch instance serial console + #[endpoint { + method = GET, + path = "/v1/instances/{instance}/serial-console", + tags = ["instances"], + }] + async fn instance_serial_console( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError>; + + /// Stream instance serial console + #[channel { + protocol = WEBSOCKETS, + path = "/v1/instances/{instance}/serial-console/stream", + tags = ["instances"], + }] + async fn instance_serial_console_stream( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + conn: WebsocketConnection, + ) -> WebsocketChannelResult; + + /// List SSH public keys for instance + /// + /// List SSH public keys injected via cloud-init during instance creation. + /// Note that this list is a snapshot in time and will not reflect updates + /// made after the instance is created. + #[endpoint { + method = GET, + path = "/v1/instances/{instance}/ssh-public-keys", + tags = ["instances"], + }] + async fn instance_ssh_public_key_list( + rqctx: RequestContext, + path_params: Path, + query_params: Query< + PaginatedByNameOrId, + >, + ) -> Result>, HttpError>; + + /// List disks for instance + #[endpoint { + method = GET, + path = "/v1/instances/{instance}/disks", + tags = ["instances"], + }] + async fn instance_disk_list( + rqctx: RequestContext, + query_params: Query< + PaginatedByNameOrId, + >, + path_params: Path, + ) -> Result>, HttpError>; + + /// Attach disk to instance + #[endpoint { + method = POST, + path = "/v1/instances/{instance}/disks/attach", + tags = ["instances"], + }] + async fn instance_disk_attach( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + disk_to_attach: TypedBody, + ) -> Result, HttpError>; + + /// Detach disk from instance + #[endpoint { + method = POST, + path = "/v1/instances/{instance}/disks/detach", + tags = ["instances"], + }] + async fn instance_disk_detach( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + disk_to_detach: TypedBody, + ) -> Result, HttpError>; + + // Certificates + + /// List certificates for external endpoints + /// + /// Returns a list of TLS certificates used for the external API (for the + /// current Silo). These are sorted by creation date, with the most recent + /// certificates appearing first. + #[endpoint { + method = GET, + path = "/v1/certificates", + tags = ["silos"], + }] + async fn certificate_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError>; + + /// Create new system-wide x.509 certificate + /// + /// This certificate is automatically used by the Oxide Control plane to serve + /// external connections. + #[endpoint { + method = POST, + path = "/v1/certificates", + tags = ["silos"] + }] + async fn certificate_create( + rqctx: RequestContext, + new_cert: TypedBody, + ) -> Result, HttpError>; + + /// Fetch certificate + /// + /// Returns the details of a specific certificate + #[endpoint { + method = GET, + path = "/v1/certificates/{certificate}", + tags = ["silos"], + }] + async fn certificate_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + /// Delete certificate + /// + /// Permanently delete a certificate. This operation cannot be undone. + #[endpoint { + method = DELETE, + path = "/v1/certificates/{certificate}", + tags = ["silos"], + }] + async fn certificate_delete( + rqctx: RequestContext, + path_params: Path, + ) -> Result; + + /// Create address lot + #[endpoint { + method = POST, + path = "/v1/system/networking/address-lot", + tags = ["system/networking"], + }] + async fn networking_address_lot_create( + rqctx: RequestContext, + new_address_lot: TypedBody, + ) -> Result, HttpError>; + + /// Delete address lot + #[endpoint { + method = DELETE, + path = "/v1/system/networking/address-lot/{address_lot}", + tags = ["system/networking"], + }] + async fn networking_address_lot_delete( + rqctx: RequestContext, + path_params: Path, + ) -> Result; + + /// List address lots + #[endpoint { + method = GET, + path = "/v1/system/networking/address-lot", + tags = ["system/networking"], + }] + async fn networking_address_lot_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError>; + + /// List blocks in address lot + #[endpoint { + method = GET, + path = "/v1/system/networking/address-lot/{address_lot}/blocks", + tags = ["system/networking"], + }] + async fn networking_address_lot_block_list( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result>, HttpError>; + + /// Create loopback address + #[endpoint { + method = POST, + path = "/v1/system/networking/loopback-address", + tags = ["system/networking"], + }] + async fn networking_loopback_address_create( + rqctx: RequestContext, + new_loopback_address: TypedBody, + ) -> Result, HttpError>; + + /// Delete loopback address + #[endpoint { + method = DELETE, + path = "/v1/system/networking/loopback-address/{rack_id}/{switch_location}/{address}/{subnet_mask}", + tags = ["system/networking"], + }] + async fn networking_loopback_address_delete( + rqctx: RequestContext, + path: Path, + ) -> Result; + + /// List loopback addresses + #[endpoint { + method = GET, + path = "/v1/system/networking/loopback-address", + tags = ["system/networking"], + }] + async fn networking_loopback_address_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError>; + + /// Create switch port settings + #[endpoint { + method = POST, + path = "/v1/system/networking/switch-port-settings", + tags = ["system/networking"], + }] + async fn networking_switch_port_settings_create( + rqctx: RequestContext, + new_settings: TypedBody, + ) -> Result, HttpError>; + + /// Delete switch port settings + #[endpoint { + method = DELETE, + path = "/v1/system/networking/switch-port-settings", + tags = ["system/networking"], + }] + async fn networking_switch_port_settings_delete( + rqctx: RequestContext, + query_params: Query, + ) -> Result; + + /// List switch port settings + #[endpoint { + method = GET, + path = "/v1/system/networking/switch-port-settings", + tags = ["system/networking"], + }] + async fn networking_switch_port_settings_list( + rqctx: RequestContext, + query_params: Query< + PaginatedByNameOrId, + >, + ) -> Result>, HttpError>; + + /// Get information about switch port + #[endpoint { + method = GET, + path = "/v1/system/networking/switch-port-settings/{port}", + tags = ["system/networking"], + }] + async fn networking_switch_port_settings_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + /// List switch ports + #[endpoint { + method = GET, + path = "/v1/system/hardware/switch-port", + tags = ["system/hardware"], + }] + async fn networking_switch_port_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError>; + + /// Get switch port status + #[endpoint { + method = GET, + path = "/v1/system/hardware/switch-port/{port}/status", + tags = ["system/hardware"], + }] + async fn networking_switch_port_status( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError>; + + /// Apply switch port settings + #[endpoint { + method = POST, + path = "/v1/system/hardware/switch-port/{port}/settings", + tags = ["system/hardware"], + }] + async fn networking_switch_port_apply_settings( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + settings_body: TypedBody, + ) -> Result; + + /// Clear switch port settings + #[endpoint { + method = DELETE, + path = "/v1/system/hardware/switch-port/{port}/settings", + tags = ["system/hardware"], + }] + async fn networking_switch_port_clear_settings( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result; + + /// Create new BGP configuration + #[endpoint { + method = POST, + path = "/v1/system/networking/bgp", + tags = ["system/networking"], + }] + async fn networking_bgp_config_create( + rqctx: RequestContext, + config: TypedBody, + ) -> Result, HttpError>; + + /// List BGP configurations + #[endpoint { + method = GET, + path = "/v1/system/networking/bgp", + tags = ["system/networking"], + }] + async fn networking_bgp_config_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError>; + + //TODO pagination? the normal by-name/by-id stuff does not work here + /// Get BGP peer status + #[endpoint { + method = GET, + path = "/v1/system/networking/bgp-status", + tags = ["system/networking"], + }] + async fn networking_bgp_status( + rqctx: RequestContext, + ) -> Result>, HttpError>; + + //TODO pagination? the normal by-name/by-id stuff does not work here + /// Get BGP exported routes + #[endpoint { + method = GET, + path = "/v1/system/networking/bgp-exported", + tags = ["system/networking"], + }] + async fn networking_bgp_exported( + rqctx: RequestContext, + ) -> Result, HttpError>; + + /// Get BGP router message history + #[endpoint { + method = GET, + path = "/v1/system/networking/bgp-message-history", + tags = ["system/networking"], + }] + async fn networking_bgp_message_history( + rqctx: RequestContext, + query_params: Query, + ) -> Result, HttpError>; + + //TODO pagination? the normal by-name/by-id stuff does not work here + /// Get imported IPv4 BGP routes + #[endpoint { + method = GET, + path = "/v1/system/networking/bgp-routes-ipv4", + tags = ["system/networking"], + }] + async fn networking_bgp_imported_routes_ipv4( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError>; + + /// Delete BGP configuration + #[endpoint { + method = DELETE, + path = "/v1/system/networking/bgp", + tags = ["system/networking"], + }] + async fn networking_bgp_config_delete( + rqctx: RequestContext, + sel: Query, + ) -> Result; + + /// Update BGP announce set + /// + /// If the announce set exists, this endpoint replaces the existing announce + /// set with the one specified. + #[endpoint { + method = PUT, + path = "/v1/system/networking/bgp-announce-set", + tags = ["system/networking"], + }] + async fn networking_bgp_announce_set_update( + rqctx: RequestContext, + config: TypedBody, + ) -> Result, HttpError>; + + /// List BGP announce sets + #[endpoint { + method = GET, + path = "/v1/system/networking/bgp-announce-set", + tags = ["system/networking"], + }] + async fn networking_bgp_announce_set_list( + rqctx: RequestContext, + query_params: Query< + PaginatedByNameOrId, + >, + ) -> Result>, HttpError>; + + /// Delete BGP announce set + #[endpoint { + method = DELETE, + path = "/v1/system/networking/bgp-announce-set/{name_or_id}", + tags = ["system/networking"], + }] + async fn networking_bgp_announce_set_delete( + rqctx: RequestContext, + path_params: Path, + ) -> Result; + + // TODO: is pagination necessary here? How large do we expect the list of + // announcements to become in real usage? + /// Get originated routes for a specified BGP announce set + #[endpoint { + method = GET, + path = "/v1/system/networking/bgp-announce-set/{name_or_id}/announcement", + tags = ["system/networking"], + }] + async fn networking_bgp_announcement_list( + rqctx: RequestContext, + path_params: Path, + ) -> Result>, HttpError>; + + /// Enable a BFD session + #[endpoint { + method = POST, + path = "/v1/system/networking/bfd-enable", + tags = ["system/networking"], + }] + async fn networking_bfd_enable( + rqctx: RequestContext, + session: TypedBody, + ) -> Result; + + /// Disable a BFD session + #[endpoint { + method = POST, + path = "/v1/system/networking/bfd-disable", + tags = ["system/networking"], + }] + async fn networking_bfd_disable( + rqctx: RequestContext, + session: TypedBody, + ) -> Result; + + /// Get BFD status + #[endpoint { + method = GET, + path = "/v1/system/networking/bfd-status", + tags = ["system/networking"], + }] + async fn networking_bfd_status( + rqctx: RequestContext, + ) -> Result>, HttpError>; + + /// Get user-facing services IP allowlist + #[endpoint { + method = GET, + path = "/v1/system/networking/allow-list", + tags = ["system/networking"], + }] + async fn networking_allow_list_view( + rqctx: RequestContext, + ) -> Result, HttpError>; + + /// Update user-facing services IP allowlist + #[endpoint { + method = PUT, + path = "/v1/system/networking/allow-list", + tags = ["system/networking"], + }] + async fn networking_allow_list_update( + rqctx: RequestContext, + params: TypedBody, + ) -> Result, HttpError>; + + // Images + + /// List images + /// + /// List images which are global or scoped to the specified project. The images + /// are returned sorted by creation date, with the most recent images appearing first. + #[endpoint { + method = GET, + path = "/v1/images", + tags = ["images"], + }] + async fn image_list( + rqctx: RequestContext, + query_params: Query< + PaginatedByNameOrId, + >, + ) -> Result>, HttpError>; + + /// Create image + /// + /// Create a new image in a project. + #[endpoint { + method = POST, + path = "/v1/images", + tags = ["images"] + }] + async fn image_create( + rqctx: RequestContext, + query_params: Query, + new_image: TypedBody, + ) -> Result, HttpError>; + + /// Fetch image + /// + /// Fetch the details for a specific image in a project. + #[endpoint { + method = GET, + path = "/v1/images/{image}", + tags = ["images"], + }] + async fn image_view( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError>; + + /// Delete image + /// + /// Permanently delete an image from a project. This operation cannot be undone. + /// Any instances in the project using the image will continue to run, however + /// new instances can not be created with this image. + #[endpoint { + method = DELETE, + path = "/v1/images/{image}", + tags = ["images"], + }] + async fn image_delete( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result; + + /// Promote project image + /// + /// Promote project image to be visible to all projects in the silo + #[endpoint { + method = POST, + path = "/v1/images/{image}/promote", + tags = ["images"] + }] + async fn image_promote( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError>; + + /// Demote silo image + /// + /// Demote silo image to be visible only to a specified project + #[endpoint { + method = POST, + path = "/v1/images/{image}/demote", + tags = ["images"] + }] + async fn image_demote( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError>; + + /// List network interfaces + #[endpoint { + method = GET, + path = "/v1/network-interfaces", + tags = ["instances"], + }] + async fn instance_network_interface_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError>; + + /// Create network interface + #[endpoint { + method = POST, + path = "/v1/network-interfaces", + tags = ["instances"], + }] + async fn instance_network_interface_create( + rqctx: RequestContext, + query_params: Query, + interface_params: TypedBody, + ) -> Result, HttpError>; + + /// Delete network interface + /// + /// Note that the primary interface for an instance cannot be deleted if there + /// are any secondary interfaces. A new primary interface must be designated + /// first. The primary interface can be deleted if there are no secondary + /// interfaces. + #[endpoint { + method = DELETE, + path = "/v1/network-interfaces/{interface}", + tags = ["instances"], + }] + async fn instance_network_interface_delete( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result; + + /// Fetch network interface + #[endpoint { + method = GET, + path = "/v1/network-interfaces/{interface}", + tags = ["instances"], + }] + async fn instance_network_interface_view( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError>; + + /// Update network interface + #[endpoint { + method = PUT, + path = "/v1/network-interfaces/{interface}", + tags = ["instances"], + }] + async fn instance_network_interface_update( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + updated_iface: TypedBody, + ) -> Result, HttpError>; + + // External IP addresses for instances + + /// List external IP addresses + #[endpoint { + method = GET, + path = "/v1/instances/{instance}/external-ips", + tags = ["instances"], + }] + async fn instance_external_ip_list( + rqctx: RequestContext, + query_params: Query, + path_params: Path, + ) -> Result>, HttpError>; + + /// Allocate and attach ephemeral IP to instance + #[endpoint { + method = POST, + path = "/v1/instances/{instance}/external-ips/ephemeral", + tags = ["instances"], + }] + async fn instance_ephemeral_ip_attach( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ip_to_create: TypedBody, + ) -> Result, HttpError>; + + /// Detach and deallocate ephemeral IP from instance + #[endpoint { + method = DELETE, + path = "/v1/instances/{instance}/external-ips/ephemeral", + tags = ["instances"], + }] + async fn instance_ephemeral_ip_detach( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result; + + // Snapshots + + /// List snapshots + #[endpoint { + method = GET, + path = "/v1/snapshots", + tags = ["snapshots"], + }] + async fn snapshot_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError>; + + /// Create snapshot + /// + /// Creates a point-in-time snapshot from a disk. + #[endpoint { + method = POST, + path = "/v1/snapshots", + tags = ["snapshots"], + }] + async fn snapshot_create( + rqctx: RequestContext, + query_params: Query, + new_snapshot: TypedBody, + ) -> Result, HttpError>; + + /// Fetch snapshot + #[endpoint { + method = GET, + path = "/v1/snapshots/{snapshot}", + tags = ["snapshots"], + }] + async fn snapshot_view( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError>; + + /// Delete snapshot + #[endpoint { + method = DELETE, + path = "/v1/snapshots/{snapshot}", + tags = ["snapshots"], + }] + async fn snapshot_delete( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result; + + // VPCs + + /// List VPCs + #[endpoint { + method = GET, + path = "/v1/vpcs", + tags = ["vpcs"], + }] + async fn vpc_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError>; + + /// Create VPC + #[endpoint { + method = POST, + path = "/v1/vpcs", + tags = ["vpcs"], + }] + async fn vpc_create( + rqctx: RequestContext, + query_params: Query, + body: TypedBody, + ) -> Result, HttpError>; + + /// Fetch VPC + #[endpoint { + method = GET, + path = "/v1/vpcs/{vpc}", + tags = ["vpcs"], + }] + async fn vpc_view( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError>; + + /// Update a VPC + #[endpoint { + method = PUT, + path = "/v1/vpcs/{vpc}", + tags = ["vpcs"], + }] + async fn vpc_update( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + updated_vpc: TypedBody, + ) -> Result, HttpError>; + + /// Delete VPC + #[endpoint { + method = DELETE, + path = "/v1/vpcs/{vpc}", + tags = ["vpcs"], + }] + async fn vpc_delete( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result; + + /// List subnets + #[endpoint { + method = GET, + path = "/v1/vpc-subnets", + tags = ["vpcs"], + }] + async fn vpc_subnet_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError>; + + /// Create subnet + #[endpoint { + method = POST, + path = "/v1/vpc-subnets", + tags = ["vpcs"], + }] + async fn vpc_subnet_create( + rqctx: RequestContext, + query_params: Query, + create_params: TypedBody, + ) -> Result, HttpError>; + + /// Fetch subnet + #[endpoint { + method = GET, + path = "/v1/vpc-subnets/{subnet}", + tags = ["vpcs"], + }] + async fn vpc_subnet_view( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError>; + + /// Delete subnet + #[endpoint { + method = DELETE, + path = "/v1/vpc-subnets/{subnet}", + tags = ["vpcs"], + }] + async fn vpc_subnet_delete( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result; + + /// Update subnet + #[endpoint { + method = PUT, + path = "/v1/vpc-subnets/{subnet}", + tags = ["vpcs"], + }] + async fn vpc_subnet_update( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + subnet_params: TypedBody, + ) -> Result, HttpError>; + + // This endpoint is likely temporary. We would rather list all IPs allocated in + // a subnet whether they come from NICs or something else. See + // https://github.com/oxidecomputer/omicron/issues/2476 + + /// List network interfaces + #[endpoint { + method = GET, + path = "/v1/vpc-subnets/{subnet}/network-interfaces", + tags = ["vpcs"], + }] + async fn vpc_subnet_list_network_interfaces( + rqctx: RequestContext, + path_params: Path, + query_params: Query>, + ) -> Result>, HttpError>; + + // VPC Firewalls + + /// List firewall rules + #[endpoint { + method = GET, + path = "/v1/vpc-firewall-rules", + tags = ["vpcs"], + }] + async fn vpc_firewall_rules_view( + rqctx: RequestContext, + query_params: Query, + ) -> Result, HttpError>; + + // Note: the limits in the below comment come from the firewall rules model + // file, nexus/db-model/src/vpc_firewall_rule.rs. + + /// Replace firewall rules + /// + /// The maximum number of rules per VPC is 1024. + /// + /// Targets are used to specify the set of instances to which a firewall rule + /// applies. You can target instances directly by name, or specify a VPC, VPC + /// subnet, IP, or IP subnet, which will apply the rule to traffic going to + /// all matching instances. Targets are additive: the rule applies to instances + /// matching ANY target. The maximum number of targets is 256. + /// + /// Filters reduce the scope of a firewall rule. Without filters, the rule + /// applies to all packets to the targets (or from the targets, if it's an + /// outbound rule). With multiple filters, the rule applies only to packets + /// matching ALL filters. The maximum number of each type of filter is 256. + #[endpoint { + method = PUT, + path = "/v1/vpc-firewall-rules", + tags = ["vpcs"], + }] + async fn vpc_firewall_rules_update( + rqctx: RequestContext, + query_params: Query, + router_params: TypedBody, + ) -> Result, HttpError>; + + // VPC Routers + + /// List routers + #[endpoint { + method = GET, + path = "/v1/vpc-routers", + tags = ["vpcs"], + }] + async fn vpc_router_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError>; + + /// Fetch router + #[endpoint { + method = GET, + path = "/v1/vpc-routers/{router}", + tags = ["vpcs"], + }] + async fn vpc_router_view( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError>; + + /// Create VPC router + #[endpoint { + method = POST, + path = "/v1/vpc-routers", + tags = ["vpcs"], + }] + async fn vpc_router_create( + rqctx: RequestContext, + query_params: Query, + create_params: TypedBody, + ) -> Result, HttpError>; + + /// Delete router + #[endpoint { + method = DELETE, + path = "/v1/vpc-routers/{router}", + tags = ["vpcs"], + }] + async fn vpc_router_delete( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result; + + /// Update router + #[endpoint { + method = PUT, + path = "/v1/vpc-routers/{router}", + tags = ["vpcs"], + }] + async fn vpc_router_update( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + router_params: TypedBody, + ) -> Result, HttpError>; + + /// List routes + /// + /// List the routes associated with a router in a particular VPC. + #[endpoint { + method = GET, + path = "/v1/vpc-router-routes", + tags = ["vpcs"], + }] + async fn vpc_router_route_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError>; + + // Vpc Router Routes + + /// Fetch route + #[endpoint { + method = GET, + path = "/v1/vpc-router-routes/{route}", + tags = ["vpcs"], + }] + async fn vpc_router_route_view( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError>; + + /// Create route + #[endpoint { + method = POST, + path = "/v1/vpc-router-routes", + tags = ["vpcs"], + }] + async fn vpc_router_route_create( + rqctx: RequestContext, + query_params: Query, + create_params: TypedBody, + ) -> Result, HttpError>; + + /// Delete route + #[endpoint { + method = DELETE, + path = "/v1/vpc-router-routes/{route}", + tags = ["vpcs"], + }] + async fn vpc_router_route_delete( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result; + + /// Update route + #[endpoint { + method = PUT, + path = "/v1/vpc-router-routes/{route}", + tags = ["vpcs"], + }] + async fn vpc_router_route_update( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + router_params: TypedBody, + ) -> Result, HttpError>; + + // Racks + + /// List racks + #[endpoint { + method = GET, + path = "/v1/system/hardware/racks", + tags = ["system/hardware"], + }] + async fn rack_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError>; + + /// Fetch rack + #[endpoint { + method = GET, + path = "/v1/system/hardware/racks/{rack_id}", + tags = ["system/hardware"], + }] + async fn rack_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + /// List uninitialized sleds + #[endpoint { + method = GET, + path = "/v1/system/hardware/sleds-uninitialized", + tags = ["system/hardware"] + }] + async fn sled_list_uninitialized( + rqctx: RequestContext, + query: Query>, + ) -> Result>, HttpError>; + + /// Add sled to initialized rack + // + // TODO: In the future this should really be a PUT request, once we resolve + // https://github.com/oxidecomputer/omicron/issues/4494. It should also + // explicitly be tied to a rack via a `rack_id` path param. For now we assume + // we are only operating on single rack systems. + #[endpoint { + method = POST, + path = "/v1/system/hardware/sleds", + tags = ["system/hardware"] + }] + async fn sled_add( + rqctx: RequestContext, + sled: TypedBody, + ) -> Result, HttpError>; + + // Sleds + + /// List sleds + #[endpoint { + method = GET, + path = "/v1/system/hardware/sleds", + tags = ["system/hardware"], + }] + async fn sled_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError>; + + /// Fetch sled + #[endpoint { + method = GET, + path = "/v1/system/hardware/sleds/{sled_id}", + tags = ["system/hardware"], + }] + async fn sled_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + /// Set sled provision policy + #[endpoint { + method = PUT, + path = "/v1/system/hardware/sleds/{sled_id}/provision-policy", + tags = ["system/hardware"], + }] + async fn sled_set_provision_policy( + rqctx: RequestContext, + path_params: Path, + new_provision_state: TypedBody, + ) -> Result, HttpError>; + + /// List instances running on given sled + #[endpoint { + method = GET, + path = "/v1/system/hardware/sleds/{sled_id}/instances", + tags = ["system/hardware"], + }] + async fn sled_instance_list( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result>, HttpError>; + + // Physical disks + + /// List physical disks + #[endpoint { + method = GET, + path = "/v1/system/hardware/disks", + tags = ["system/hardware"], + }] + async fn physical_disk_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError>; + + /// Get a physical disk + #[endpoint { + method = GET, + path = "/v1/system/hardware/disks/{disk_id}", + tags = ["system/hardware"], + }] + async fn physical_disk_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + // Switches + + /// List switches + #[endpoint { + method = GET, + path = "/v1/system/hardware/switches", + tags = ["system/hardware"], + }] + async fn switch_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError>; + + /// Fetch switch + #[endpoint { + method = GET, + path = "/v1/system/hardware/switches/{switch_id}", + tags = ["system/hardware"], + }] + async fn switch_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + /// List physical disks attached to sleds + #[endpoint { + method = GET, + path = "/v1/system/hardware/sleds/{sled_id}/disks", + tags = ["system/hardware"], + }] + async fn sled_physical_disk_list( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result>, HttpError>; + + // Metrics + + /// View metrics + /// + /// View CPU, memory, or storage utilization metrics at the fleet or silo level. + #[endpoint { + method = GET, + path = "/v1/system/metrics/{metric_name}", + tags = ["system/metrics"], + }] + async fn system_metric( + rqctx: RequestContext, + path_params: Path, + pag_params: Query< + PaginationParams, + >, + other_params: Query, + ) -> Result< + HttpResponseOk>, + HttpError, + >; + + /// View metrics + /// + /// View CPU, memory, or storage utilization metrics at the silo or project level. + #[endpoint { + method = GET, + path = "/v1/metrics/{metric_name}", + tags = ["metrics"], + }] + async fn silo_metric( + rqctx: RequestContext, + path_params: Path, + pag_params: Query< + PaginationParams, + >, + other_params: Query, + ) -> Result< + HttpResponseOk>, + HttpError, + >; + + /// List timeseries schemas + #[endpoint { + method = GET, + path = "/v1/timeseries/schema", + tags = ["metrics"], + }] + async fn timeseries_schema_list( + rqctx: RequestContext, + pag_params: Query, + ) -> Result< + HttpResponseOk>, + HttpError, + >; + + // TODO: can we link to an OxQL reference? Do we have one? Can we even do links? + + /// Run timeseries query + /// + /// Queries are written in OxQL. + #[endpoint { + method = POST, + path = "/v1/timeseries/query", + tags = ["metrics"], + }] + async fn timeseries_query( + rqctx: RequestContext, + body: TypedBody, + ) -> Result, HttpError>; + + // Updates + + /// Upload TUF repository + #[endpoint { + method = PUT, + path = "/v1/system/update/repository", + tags = ["system/update"], + unpublished = true, + }] + async fn system_update_put_repository( + rqctx: RequestContext, + query: Query, + body: StreamingBody, + ) -> Result, HttpError>; + + /// Fetch TUF repository description + /// + /// Fetch description of TUF repository by system version. + #[endpoint { + method = GET, + path = "/v1/system/update/repository/{system_version}", + tags = ["system/update"], + unpublished = true, + }] + async fn system_update_get_repository( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + // Silo users + + /// List users + #[endpoint { + method = GET, + path = "/v1/users", + tags = ["silos"], + }] + async fn user_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError>; + + // Silo groups + + /// List groups + #[endpoint { + method = GET, + path = "/v1/groups", + tags = ["silos"], + }] + async fn group_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError>; + + /// Fetch group + #[endpoint { + method = GET, + path = "/v1/groups/{group_id}", + tags = ["silos"], + }] + async fn group_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + // Built-in (system) users + + /// List built-in users + #[endpoint { + method = GET, + path = "/v1/system/users-builtin", + tags = ["system/silos"], + }] + async fn user_builtin_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError>; + + /// Fetch built-in user + #[endpoint { + method = GET, + path = "/v1/system/users-builtin/{user}", + tags = ["system/silos"], + }] + async fn user_builtin_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + // Built-in roles + + /// List built-in roles + #[endpoint { + method = GET, + path = "/v1/system/roles", + tags = ["roles"], + }] + async fn role_list( + rqctx: RequestContext, + query_params: Query< + PaginationParams, + >, + ) -> Result>, HttpError>; + + /// Fetch built-in role + #[endpoint { + method = GET, + path = "/v1/system/roles/{role_name}", + tags = ["roles"], + }] + async fn role_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + // Current user + + /// Fetch user for current session + #[endpoint { + method = GET, + path = "/v1/me", + tags = ["session"], + }] + async fn current_user_view( + rqctx: RequestContext, + ) -> Result, HttpError>; + + /// Fetch current user's groups + #[endpoint { + method = GET, + path = "/v1/me/groups", + tags = ["session"], + }] + async fn current_user_groups( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError>; + + // Per-user SSH public keys + + /// List SSH public keys + /// + /// Lists SSH public keys for the currently authenticated user. + #[endpoint { + method = GET, + path = "/v1/me/ssh-keys", + tags = ["session"], + }] + async fn current_user_ssh_key_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError>; + + /// Create SSH public key + /// + /// Create an SSH public key for the currently authenticated user. + #[endpoint { + method = POST, + path = "/v1/me/ssh-keys", + tags = ["session"], + }] + async fn current_user_ssh_key_create( + rqctx: RequestContext, + new_key: TypedBody, + ) -> Result, HttpError>; + + /// Fetch SSH public key + /// + /// Fetch SSH public key associated with the currently authenticated user. + #[endpoint { + method = GET, + path = "/v1/me/ssh-keys/{ssh_key}", + tags = ["session"], + }] + async fn current_user_ssh_key_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + /// Delete SSH public key + /// + /// Delete an SSH public key associated with the currently authenticated user. + #[endpoint { + method = DELETE, + path = "/v1/me/ssh-keys/{ssh_key}", + tags = ["session"], + }] + async fn current_user_ssh_key_delete( + rqctx: RequestContext, + path_params: Path, + ) -> Result; + + // Probes (experimental) + + /// List instrumentation probes + #[endpoint { + method = GET, + path = "/experimental/v1/probes", + tags = ["hidden"], // system/probes: only one tag is allowed + }] + async fn probe_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError>; + + /// View instrumentation probe + #[endpoint { + method = GET, + path = "/experimental/v1/probes/{probe}", + tags = ["hidden"], // system/probes: only one tag is allowed + }] + async fn probe_view( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError>; + + /// Create instrumentation probe + #[endpoint { + method = POST, + path = "/experimental/v1/probes", + tags = ["hidden"], // system/probes: only one tag is allowed + }] + async fn probe_create( + rqctx: RequestContext, + query_params: Query, + new_probe: TypedBody, + ) -> Result, HttpError>; + + /// Delete instrumentation probe + #[endpoint { + method = DELETE, + path = "/experimental/v1/probes/{probe}", + tags = ["hidden"], // system/probes: only one tag is allowed + }] + async fn probe_delete( + rqctx: RequestContext, + query_params: Query, + path_params: Path, + ) -> Result; + + // Console API: logins + + /// SAML login console page (just a link to the IdP) + #[endpoint { + method = GET, + path = "/login/{silo_name}/saml/{provider_name}", + tags = ["login"], + unpublished = true, + }] + async fn login_saml_begin( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError>; + + /// Get a redirect straight to the IdP + /// + /// Console uses this to avoid having to ask the API anything about the IdP. It + /// already knows the IdP name from the path, so it can just link to this path + /// and rely on Nexus to redirect to the actual IdP. + #[endpoint { + method = GET, + path = "/login/{silo_name}/saml/{provider_name}/redirect", + tags = ["login"], + unpublished = true, + }] + async fn login_saml_redirect( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result; + + /// Authenticate a user via SAML + #[endpoint { + method = POST, + path = "/login/{silo_name}/saml/{provider_name}", + tags = ["login"], + }] + async fn login_saml( + rqctx: RequestContext, + path_params: Path, + body_bytes: dropshot::UntypedBody, + ) -> Result; + + #[endpoint { + method = GET, + path = "/login/{silo_name}/local", + tags = ["login"], + unpublished = true, + }] + async fn login_local_begin( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError>; + + /// Authenticate a user via username and password + #[endpoint { + method = POST, + path = "/v1/login/{silo_name}/local", + tags = ["login"], + }] + async fn login_local( + rqctx: RequestContext, + path_params: Path, + credentials: TypedBody, + ) -> Result, HttpError>; + + /// Log user out of web console by deleting session on client and server + #[endpoint { + // important for security that this be a POST despite the empty req body + method = POST, + path = "/v1/logout", + tags = ["hidden"], + }] + async fn logout( + rqctx: RequestContext, + cookies: Cookies, + ) -> Result, HttpError>; + + /// Redirect to a login page for the current Silo (if that can be determined) + #[endpoint { + method = GET, + path = "/login", + unpublished = true, + }] + async fn login_begin( + rqctx: RequestContext, + query_params: Query, + ) -> Result; + + // Console API: Pages + + #[endpoint { + method = GET, + path = "/projects/{path:.*}", + unpublished = true, + }] + async fn console_projects( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + #[endpoint { + method = GET, + path = "/settings/{path:.*}", + unpublished = true, + }] + async fn console_settings_page( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + #[endpoint { + method = GET, + path = "/system/{path:.*}", + unpublished = true, + }] + async fn console_system_page( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + #[endpoint { + method = GET, + path = "/lookup/{path:.*}", + unpublished = true, + }] + async fn console_lookup( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + #[endpoint { + method = GET, + path = "/", + unpublished = true, + }] + async fn console_root( + rqctx: RequestContext, + ) -> Result, HttpError>; + + #[endpoint { + method = GET, + path = "/projects-new", + unpublished = true, + }] + async fn console_projects_new( + rqctx: RequestContext, + ) -> Result, HttpError>; + + #[endpoint { + method = GET, + path = "/images", + unpublished = true, + }] + async fn console_silo_images( + rqctx: RequestContext, + ) -> Result, HttpError>; + + #[endpoint { + method = GET, + path = "/utilization", + unpublished = true, + }] + async fn console_silo_utilization( + rqctx: RequestContext, + ) -> Result, HttpError>; + + #[endpoint { + method = GET, + path = "/access", + unpublished = true, + }] + async fn console_silo_access( + rqctx: RequestContext, + ) -> Result, HttpError>; + + /// Serve a static asset + #[endpoint { + method = GET, + path = "/assets/{path:.*}", + unpublished = true, + }] + async fn asset( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; + + /// Start an OAuth 2.0 Device Authorization Grant + /// + /// This endpoint is designed to be accessed from an *unauthenticated* + /// API client. It generates and records a `device_code` and `user_code` + /// which must be verified and confirmed prior to a token being granted. + #[endpoint { + method = POST, + path = "/device/auth", + content_type = "application/x-www-form-urlencoded", + tags = ["hidden"], // "token" + }] + async fn device_auth_request( + rqctx: RequestContext, + params: TypedBody, + ) -> Result, HttpError>; + + /// Verify an OAuth 2.0 Device Authorization Grant + /// + /// This endpoint should be accessed in a full user agent (e.g., + /// a browser). If the user is not logged in, we redirect them to + /// the login page and use the `state` parameter to get them back + /// here on completion. If they are logged in, serve up the console + /// verification page so they can verify the user code. + #[endpoint { + method = GET, + path = "/device/verify", + unpublished = true, + }] + async fn device_auth_verify( + rqctx: RequestContext, + ) -> Result, HttpError>; + + #[endpoint { + method = GET, + path = "/device/success", + unpublished = true, + }] + async fn device_auth_success( + rqctx: RequestContext, + ) -> Result, HttpError>; + + /// Confirm an OAuth 2.0 Device Authorization Grant + /// + /// This endpoint is designed to be accessed by the user agent (browser), + /// not the client requesting the token. So we do not actually return the + /// token here; it will be returned in response to the poll on `/device/token`. + #[endpoint { + method = POST, + path = "/device/confirm", + tags = ["hidden"], // "token" + }] + async fn device_auth_confirm( + rqctx: RequestContext, + params: TypedBody, + ) -> Result; + + /// Request a device access token + /// + /// This endpoint should be polled by the client until the user code + /// is verified and the grant is confirmed. + #[endpoint { + method = POST, + path = "/device/token", + content_type = "application/x-www-form-urlencoded", + tags = ["hidden"], // "token" + }] + async fn device_access_token( + rqctx: RequestContext, + params: TypedBody, + ) -> Result, HttpError>; +} + +/// Perform extra validations on the OpenAPI spec. +pub fn validate_api(spec: &OpenAPI, mut cx: ValidationContext<'_>) { + if spec.openapi != "3.0.3" { + cx.report_error(anyhow!( + "Expected OpenAPI version to be 3.0.3, found {}", + spec.openapi, + )); + } + if spec.info.title != "Oxide Region API" { + cx.report_error(anyhow!( + "Expected OpenAPI version to be 'Oxide Region API', found '{}'", + spec.info.title, + )); + } + if spec.info.version != API_VERSION { + cx.report_error(anyhow!( + "Expected OpenAPI version to be '{}', found '{}'", + API_VERSION, + spec.info.version, + )); + } + + // Spot check a couple of items. + if spec.paths.paths.is_empty() { + cx.report_error(anyhow!("Expected at least one path in the spec")); + } + if spec.paths.paths.get("/v1/projects").is_none() { + cx.report_error(anyhow!("Expected a path for /v1/projects")); + } + + // Construct a string that helps us identify the organization of tags and + // operations. + let mut ops_by_tag = + BTreeMap::>::new(); + + let mut ops_by_tag_valid = true; + for (path, method, op) in spec.operations() { + // Make sure each operation has exactly one tag. Note, we intentionally + // do this before validating the OpenAPI output as fixing an error here + // would necessitate refreshing the spec file again. + if op.tags.len() != 1 { + cx.report_error(anyhow!( + "operation '{}' has {} tags rather than 1", + op.operation_id.as_ref().unwrap(), + op.tags.len() + )); + ops_by_tag_valid = false; + continue; + } + + // Every non-hidden endpoint must have a summary + if op.tags.contains(&"hidden".to_string()) && op.summary.is_none() { + cx.report_error(anyhow!( + "operation '{}' is missing a summary doc comment", + op.operation_id.as_ref().unwrap() + )); + // This error does not prevent `ops_by_tag` from being populated + // correctly, so we can continue. + } + + ops_by_tag + .entry(op.tags.first().unwrap().to_string()) + .or_default() + .push(( + op.operation_id.as_ref().unwrap().to_string(), + method.to_string().to_uppercase(), + path.to_string(), + )); + } + + if ops_by_tag_valid { + let mut tags = String::new(); + for (tag, mut ops) in ops_by_tag { + ops.sort(); + tags.push_str(&format!( + r#"API operations found with tag "{}""#, + tag + )); + tags.push_str(&format!( + "\n{:40} {:8} {}\n", + "OPERATION ID", "METHOD", "URL PATH" + )); + for (operation_id, method, path) in ops { + tags.push_str(&format!( + "{:40} {:8} {}\n", + operation_id, method, path + )); + } + tags.push('\n'); + } + + // When this fails, verify that operations on which you're adding, + // renaming, or changing the tags are what you intend. + cx.record_file_contents( + "nexus/external-api/output/nexus_tags.txt", + tags.into_bytes(), + ); + } +} + +pub type IpPoolRangePaginationParams = + PaginationParams; + +/// Type used to paginate request to list timeseries schema. +pub type TimeseriesSchemaPaginationParams = + PaginationParams; diff --git a/nexus/internal-api/src/lib.rs b/nexus/internal-api/src/lib.rs index 6a98c446142..12e99ba23b4 100644 --- a/nexus/internal-api/src/lib.rs +++ b/nexus/internal-api/src/lib.rs @@ -20,27 +20,27 @@ use nexus_types::{ }, internal_api::{ params::{ - OximeterInfo, RackInitializationRequest, SledAgentInfo, - SwitchPutRequest, SwitchPutResponse, + InstanceMigrateRequest, OximeterInfo, RackInitializationRequest, + SledAgentInfo, SwitchPutRequest, SwitchPutResponse, }, views::{BackgroundTask, DemoSaga, Ipv4NatEntryView, Saga}, }, }; use omicron_common::{ api::{ - external::http_pagination::PaginatedById, + external::{http_pagination::PaginatedById, Instance}, internal::nexus::{ DiskRuntimeState, DownstairsClientStopRequest, DownstairsClientStopped, ProducerEndpoint, ProducerRegistrationResponse, RepairFinishInfo, RepairProgress, - RepairStartInfo, SledInstanceState, + RepairStartInfo, SledVmmState, }, }, update::ArtifactId, }; use omicron_uuid_kinds::{ - DemoSagaUuid, DownstairsKind, SledUuid, TypedUuid, UpstairsKind, - UpstairsRepairKind, + DemoSagaUuid, DownstairsKind, PropolisUuid, SledUuid, TypedUuid, + UpstairsKind, UpstairsRepairKind, }; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -108,17 +108,27 @@ pub trait NexusInternalApi { body: TypedBody, ) -> Result, HttpError>; - /// Report updated state for an instance. + /// Report updated state for a VMM. #[endpoint { method = PUT, - path = "/instances/{instance_id}", + path = "/vmms/{propolis_id}", }] async fn cpapi_instances_put( rqctx: RequestContext, - path_params: Path, - new_runtime_state: TypedBody, + path_params: Path, + new_runtime_state: TypedBody, ) -> Result; + #[endpoint { + method = POST, + path = "/instances/{instance_id}/migrate", + }] + async fn instance_migrate( + rqctx: RequestContext, + path_params: Path, + migrate_params: TypedBody, + ) -> Result, HttpError>; + /// Report updated state for a disk. #[endpoint { method = PUT, @@ -558,6 +568,12 @@ pub struct InstancePathParam { pub instance_id: Uuid, } +/// Path parameters for VMM requests (internal API) +#[derive(Deserialize, JsonSchema)] +pub struct VmmPathParam { + pub propolis_id: PropolisUuid, +} + #[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] pub struct CollectorIdPathParams { /// The ID of the oximeter collector. diff --git a/nexus/reconfigurator/execution/Cargo.toml b/nexus/reconfigurator/execution/Cargo.toml index a531b66df4d..1c62e553a83 100644 --- a/nexus/reconfigurator/execution/Cargo.toml +++ b/nexus/reconfigurator/execution/Cargo.toml @@ -16,6 +16,7 @@ dns-service-client.workspace = true chrono.workspace = true futures.workspace = true internal-dns.workspace = true +newtype-uuid.workspace = true nexus-config.workspace = true nexus-db-model.workspace = true nexus-db-queries.workspace = true diff --git a/nexus/reconfigurator/execution/src/cockroachdb.rs b/nexus/reconfigurator/execution/src/cockroachdb.rs index 498944598df..33b8176df68 100644 --- a/nexus/reconfigurator/execution/src/cockroachdb.rs +++ b/nexus/reconfigurator/execution/src/cockroachdb.rs @@ -34,10 +34,11 @@ pub(crate) async fn ensure_settings( mod test { use super::*; use crate::overridables::Overridables; + use crate::test_utils::realize_blueprint_and_expect; use nexus_db_queries::authn; use nexus_db_queries::authz; use nexus_test_utils_macros::nexus_test; - use nexus_types::deployment::CockroachDbClusterVersion; + use nexus_types::deployment::CockroachDbPreserveDowngrade; use std::sync::Arc; type ControlPlaneTestContext = @@ -69,24 +70,26 @@ mod test { .await .expect("failed to get blueprint from datastore"); eprintln!("blueprint: {}", blueprint.display()); - // The initial blueprint should already have these filled in. + // The initial blueprint should already have the state fingerprint + // filled in. assert_eq!( blueprint.cockroachdb_fingerprint, settings.state_fingerprint ); - assert_eq!( - blueprint.cockroachdb_setting_preserve_downgrade, - CockroachDbClusterVersion::NEWLY_INITIALIZED.into() - ); - // The cluster version, preserve downgrade setting, and - // `NEWLY_INITIALIZED` should all match. - assert_eq!( - settings.version, - CockroachDbClusterVersion::NEWLY_INITIALIZED.to_string() - ); + // The initial blueprint should already have the preserve downgrade + // setting filled in. (It might be the current or previous version, but + // it should be `Set` regardless.) + let CockroachDbPreserveDowngrade::Set(bp_preserve_downgrade) = + blueprint.cockroachdb_setting_preserve_downgrade + else { + panic!("blueprint does not set preserve downgrade option"); + }; + // The cluster version, preserve downgrade setting, and the value in the + // blueprint should all match. + assert_eq!(settings.version, bp_preserve_downgrade.to_string()); assert_eq!( settings.preserve_downgrade, - CockroachDbClusterVersion::NEWLY_INITIALIZED.to_string() + bp_preserve_downgrade.to_string() ); // Record the zpools so we don't fail to ensure datasets (unrelated to // crdb settings) during blueprint execution. @@ -96,16 +99,10 @@ mod test { .await; // Execute the initial blueprint. let overrides = Overridables::for_test(cptestctx); - crate::realize_blueprint_with_overrides( - &opctx, - datastore, - resolver, - &blueprint, - "test-suite", - &overrides, + _ = realize_blueprint_and_expect( + &opctx, datastore, resolver, &blueprint, &overrides, ) - .await - .expect("failed to execute initial blueprint"); + .await; // The CockroachDB settings should not have changed. assert_eq!( settings, diff --git a/nexus/reconfigurator/execution/src/datasets.rs b/nexus/reconfigurator/execution/src/datasets.rs index 6444934ba6d..2f84378a13a 100644 --- a/nexus/reconfigurator/execution/src/datasets.rs +++ b/nexus/reconfigurator/execution/src/datasets.rs @@ -67,7 +67,7 @@ pub(crate) async fn ensure_dataset_records_exist( id.into_untyped_uuid(), pool_id.into_untyped_uuid(), Some(address), - kind.into(), + kind.clone(), ); let maybe_inserted = datastore .dataset_insert_if_not_exists(dataset) diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index 885ffa67d17..aab3839bd04 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -458,6 +458,7 @@ pub fn blueprint_nexus_external_ips(blueprint: &Blueprint) -> Vec { mod test { use super::*; use crate::overridables::Overridables; + use crate::test_utils::realize_blueprint_and_expect; use crate::Sled; use dns_service_client::DnsDiff; use internal_dns::config::Host; @@ -466,6 +467,7 @@ mod test { use internal_dns::resolver::Resolver; use internal_dns::ServiceName; use internal_dns::DNS_ZONE; + use newtype_uuid::GenericUuid; use nexus_db_model::DnsGroup; use nexus_db_model::Silo; use nexus_db_queries::authn; @@ -477,6 +479,8 @@ mod test { use nexus_reconfigurator_planning::blueprint_builder::EnsureMultiple; use nexus_reconfigurator_planning::example::example; use nexus_reconfigurator_preparation::PlanningInputFromDb; + use nexus_sled_agent_shared::inventory::OmicronZoneConfig; + use nexus_sled_agent_shared::inventory::OmicronZoneType; use nexus_sled_agent_shared::inventory::ZoneKind; use nexus_test_utils::resource_helpers::create_silo; use nexus_test_utils::resource_helpers::DiskTestBuilder; @@ -489,6 +493,9 @@ mod test { use nexus_types::deployment::CockroachDbClusterVersion; use nexus_types::deployment::CockroachDbPreserveDowngrade; use nexus_types::deployment::CockroachDbSettings; + pub use nexus_types::deployment::OmicronZoneExternalFloatingAddr; + pub use nexus_types::deployment::OmicronZoneExternalFloatingIp; + pub use nexus_types::deployment::OmicronZoneExternalSnatIp; use nexus_types::deployment::SledFilter; use nexus_types::external_api::params; use nexus_types::external_api::shared; @@ -502,13 +509,14 @@ mod test { use omicron_common::address::get_switch_zone_address; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; - use omicron_common::address::BOUNDARY_NTP_REDUNDANCY; - use omicron_common::address::COCKROACHDB_REDUNDANCY; - use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::address::RACK_PREFIX; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::Generation; use omicron_common::api::external::IdentityMetadataCreateParams; + use omicron_common::policy::BOUNDARY_NTP_REDUNDANCY; + use omicron_common::policy::COCKROACHDB_REDUNDANCY; + use omicron_common::policy::INTERNAL_DNS_REDUNDANCY; + use omicron_common::policy::NEXUS_REDUNDANCY; use omicron_common::zpool_name::ZpoolName; use omicron_test_utils::dev::test_setup_log; use omicron_uuid_kinds::ExternalIpUuid; @@ -538,6 +546,212 @@ mod test { } } + /// ********************************************************************** + /// DEPRECATION WARNING: + /// + /// Remove when `deprecated_omicron_zone_config_to_blueprint_zone_config` + /// is deleted. + /// ********************************************************************** + /// + /// Errors from converting an [`OmicronZoneType`] into a [`BlueprintZoneType`]. + #[derive(Debug, Clone)] + pub enum InvalidOmicronZoneType { + #[allow(unused)] + ExternalIpIdRequired { kind: ZoneKind }, + } + + /// ********************************************************************** + /// DEPRECATION WARNING: Do not call this function in new code !!! + /// ********************************************************************** + /// + /// Convert an [`OmicronZoneConfig`] to a [`BlueprintZoneConfig`]. + /// + /// A `BlueprintZoneConfig` is a superset of `OmicronZoneConfig` and + /// contains auxiliary information not present in an `OmicronZoneConfig`. + /// Therefore, the only valid direction for a real system to take is a + /// lossy conversion from `BlueprintZoneConfig` to `OmicronZoneConfig`. + /// This function, however, does the opposite. We therefore have to inject + /// fake information to fill in the unknown fields in the generated + /// `OmicronZoneConfig`. + /// + /// This is bad, and we should generally feel bad for doing it :). At + /// the time this was done we were backporting the blueprint system into + /// RSS while trying not to change too much code. This was a judicious + /// shortcut used right before a release for stability reasons. As the + /// number of zones managed by the reconfigurator has grown, the use + /// of this function has become more egregious, and so it was removed + /// from the production code path and into this test module. This move + /// itself is a judicious shortcut. We have a test in this module, + /// `test_blueprint_internal_dns_basic`, that is the last caller of this + /// function, and so we have moved this function into this module. + /// + /// Ideally, we would get rid of this function altogether and use another + /// method for generating `BlueprintZoneConfig` structures. Unfortunately, + /// there are still a few remaining zones that need to be implemented in the + /// `BlueprintBuilder`, and some of them require custom code. Until that is + /// done, we don't have a good way of generating a test representation of + /// the real system that would properly serve this test. We could generate + /// a `BlueprintZoneConfig` by hand for each zone type in this test, on + /// top of the more modern `SystemDescription` setup, but that isn't much + /// different than what we do in this test. We'd also eventually remove it + /// for better test setup when our `BlueprintBuilder` is capable of properly + /// constructing all zone types. Instead, we do the simple thing, and reuse + /// what we alreaady have. + /// + /// # Errors + /// + /// If `config.zone_type` is a zone that has an external IP address (Nexus, + /// boundary NTP, external DNS), `external_ip_id` must be `Some(_)` or this + /// method will return an error. + pub fn deprecated_omicron_zone_config_to_blueprint_zone_config( + config: OmicronZoneConfig, + disposition: BlueprintZoneDisposition, + external_ip_id: Option, + ) -> Result { + let kind = config.zone_type.kind(); + let zone_type = match config.zone_type { + OmicronZoneType::BoundaryNtp { + address, + dns_servers, + domain, + nic, + ntp_servers, + snat_cfg, + } => { + let external_ip_id = external_ip_id.ok_or( + InvalidOmicronZoneType::ExternalIpIdRequired { kind }, + )?; + BlueprintZoneType::BoundaryNtp( + blueprint_zone_type::BoundaryNtp { + address, + ntp_servers, + dns_servers, + domain, + nic, + external_ip: OmicronZoneExternalSnatIp { + id: external_ip_id, + snat_cfg, + }, + }, + ) + } + OmicronZoneType::Clickhouse { address, dataset } => { + BlueprintZoneType::Clickhouse(blueprint_zone_type::Clickhouse { + address, + dataset, + }) + } + OmicronZoneType::ClickhouseKeeper { address, dataset } => { + BlueprintZoneType::ClickhouseKeeper( + blueprint_zone_type::ClickhouseKeeper { address, dataset }, + ) + } + OmicronZoneType::ClickhouseServer { address, dataset } => { + BlueprintZoneType::ClickhouseServer( + blueprint_zone_type::ClickhouseServer { address, dataset }, + ) + } + OmicronZoneType::CockroachDb { address, dataset } => { + BlueprintZoneType::CockroachDb( + blueprint_zone_type::CockroachDb { address, dataset }, + ) + } + OmicronZoneType::Crucible { address, dataset } => { + BlueprintZoneType::Crucible(blueprint_zone_type::Crucible { + address, + dataset, + }) + } + OmicronZoneType::CruciblePantry { address } => { + BlueprintZoneType::CruciblePantry( + blueprint_zone_type::CruciblePantry { address }, + ) + } + OmicronZoneType::ExternalDns { + dataset, + dns_address, + http_address, + nic, + } => { + let external_ip_id = external_ip_id.ok_or( + InvalidOmicronZoneType::ExternalIpIdRequired { kind }, + )?; + BlueprintZoneType::ExternalDns( + blueprint_zone_type::ExternalDns { + dataset, + http_address, + dns_address: OmicronZoneExternalFloatingAddr { + id: external_ip_id, + addr: dns_address, + }, + nic, + }, + ) + } + OmicronZoneType::InternalDns { + dataset, + dns_address, + gz_address, + gz_address_index, + http_address, + } => BlueprintZoneType::InternalDns( + blueprint_zone_type::InternalDns { + dataset, + http_address, + dns_address, + gz_address, + gz_address_index, + }, + ), + OmicronZoneType::InternalNtp { + address, + dns_servers, + domain, + ntp_servers, + } => BlueprintZoneType::InternalNtp( + blueprint_zone_type::InternalNtp { + address, + ntp_servers, + dns_servers, + domain, + }, + ), + OmicronZoneType::Nexus { + external_dns_servers, + external_ip, + external_tls, + internal_address, + nic, + } => { + let external_ip_id = external_ip_id.ok_or( + InvalidOmicronZoneType::ExternalIpIdRequired { kind }, + )?; + BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + internal_address, + external_ip: OmicronZoneExternalFloatingIp { + id: external_ip_id, + ip: external_ip, + }, + nic, + external_tls, + external_dns_servers, + }) + } + OmicronZoneType::Oximeter { address } => { + BlueprintZoneType::Oximeter(blueprint_zone_type::Oximeter { + address, + }) + } + }; + Ok(BlueprintZoneConfig { + disposition, + id: OmicronZoneUuid::from_untyped_uuid(config.id), + underlay_address: config.underlay_address, + filesystem_pool: config.filesystem_pool, + zone_type, + }) + } + /// test blueprint_internal_dns_config(): trivial case of an empty blueprint #[test] fn test_blueprint_internal_dns_empty() { @@ -588,7 +802,7 @@ mod test { .zones .into_iter() .map(|config| -> BlueprintZoneConfig { - BlueprintZoneConfig::from_omicron_zone_config( + deprecated_omicron_zone_config_to_blueprint_zone_config( config, BlueprintZoneDisposition::InService, // We don't get external IP IDs in inventory @@ -1245,16 +1459,10 @@ mod test { // Now, execute the initial blueprint. let overrides = Overridables::for_test(cptestctx); - crate::realize_blueprint_with_overrides( - &opctx, - datastore, - resolver, - &blueprint, - "test-suite", - &overrides, + _ = realize_blueprint_and_expect( + &opctx, datastore, resolver, &blueprint, &overrides, ) - .await - .expect("failed to execute initial blueprint"); + .await; // DNS ought not to have changed. verify_dns_unchanged( @@ -1319,6 +1527,7 @@ mod test { service_nic_rows: &[], target_boundary_ntp_zone_count: BOUNDARY_NTP_REDUNDANCY, target_nexus_zone_count: NEXUS_REDUNDANCY, + target_internal_dns_zone_count: INTERNAL_DNS_REDUNDANCY, target_cockroachdb_zone_count: COCKROACHDB_REDUNDANCY, target_cockroachdb_cluster_version: CockroachDbClusterVersion::POLICY, @@ -1385,16 +1594,14 @@ mod test { .await .expect("failed to set blueprint as target"); - crate::realize_blueprint_with_overrides( + _ = realize_blueprint_and_expect( &opctx, datastore, resolver, &blueprint2, - "test-suite", &overrides, ) - .await - .expect("failed to execute second blueprint"); + .await; // Now fetch DNS again. Both should have changed this time. let dns_latest_internal = datastore @@ -1459,16 +1666,14 @@ mod test { } // If we execute it again, we should see no more changes. - crate::realize_blueprint_with_overrides( + _ = realize_blueprint_and_expect( &opctx, datastore, resolver, &blueprint2, - "test-suite", &overrides, ) - .await - .expect("failed to execute second blueprint again"); + .await; verify_dns_unchanged( &opctx, datastore, @@ -1495,16 +1700,14 @@ mod test { // One more time, make sure that executing the blueprint does not do // anything. - crate::realize_blueprint_with_overrides( + _ = realize_blueprint_and_expect( &opctx, datastore, resolver, &blueprint2, - "test-suite", &overrides, ) - .await - .expect("failed to execute second blueprint again"); + .await; verify_dns_unchanged( &opctx, datastore, @@ -1589,16 +1792,10 @@ mod test { ); // If we execute the blueprint, DNS should not be changed. - crate::realize_blueprint_with_overrides( - &opctx, - datastore, - resolver, - &blueprint, - "test-suite", - &overrides, + _ = realize_blueprint_and_expect( + &opctx, datastore, resolver, &blueprint, &overrides, ) - .await - .expect("failed to execute blueprint"); + .await; let dns_latest_internal = datastore .dns_config_read(&opctx, DnsGroup::Internal) .await diff --git a/nexus/reconfigurator/execution/src/lib.rs b/nexus/reconfigurator/execution/src/lib.rs index bb525b1b8b3..fc2d9a8ae5e 100644 --- a/nexus/reconfigurator/execution/src/lib.rs +++ b/nexus/reconfigurator/execution/src/lib.rs @@ -24,6 +24,7 @@ use slog::info; use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; use std::net::SocketAddrV6; +use uuid::Uuid; mod cockroachdb; mod datasets; @@ -31,7 +32,10 @@ mod dns; mod omicron_physical_disks; mod omicron_zones; mod overridables; +mod sagas; mod sled_state; +#[cfg(test)] +mod test_utils; pub use dns::blueprint_external_dns_config; pub use dns::blueprint_internal_dns_config; @@ -68,43 +72,46 @@ impl From for Sled { } } +/// The result of calling [`realize_blueprint`] or +/// [`realize_blueprint_with_overrides`]. +#[derive(Debug)] +#[must_use = "the output of realize_blueprint should probably be used"] +pub struct RealizeBlueprintOutput { + /// Whether any sagas need to be reassigned to a new Nexus. + pub needs_saga_recovery: bool, +} + /// Make one attempt to realize the given blueprint, meaning to take actions to /// alter the real system to match the blueprint /// /// The assumption is that callers are running this periodically or in a loop to /// deal with transient errors or changes in the underlying system state. -pub async fn realize_blueprint( +pub async fn realize_blueprint( opctx: &OpContext, datastore: &DataStore, resolver: &Resolver, blueprint: &Blueprint, - nexus_label: S, -) -> Result<(), Vec> -where - String: From, -{ + nexus_id: Uuid, +) -> Result> { realize_blueprint_with_overrides( opctx, datastore, resolver, blueprint, - nexus_label, + nexus_id, &Default::default(), ) .await } -pub async fn realize_blueprint_with_overrides( +pub async fn realize_blueprint_with_overrides( opctx: &OpContext, datastore: &DataStore, resolver: &Resolver, blueprint: &Blueprint, - nexus_label: S, + nexus_id: Uuid, overrides: &Overridables, -) -> Result<(), Vec> -where - String: From, -{ +) -> Result> { let opctx = opctx.child(BTreeMap::from([( "comment".to_string(), blueprint.comment.clone(), @@ -136,7 +143,7 @@ where }) .collect(); - omicron_physical_disks::deploy_disks( + let deploy_disks_done = omicron_physical_disks::deploy_disks( &opctx, &sleds_by_id, &blueprint.blueprint_disks, @@ -182,7 +189,7 @@ where dns::deploy_dns( &opctx, datastore, - String::from(nexus_label), + nexus_id.to_string(), blueprint, &sleds_by_id, overrides, @@ -209,20 +216,50 @@ where ) .await?; - // This depends on the "deploy_disks" call earlier -- disk expungement is a - // statement of policy, but we need to be assured that the Sled Agent has - // stopped using that disk before we can mark its state as decommissioned. - omicron_physical_disks::decommission_expunged_disks(&opctx, datastore) - .await?; + omicron_physical_disks::decommission_expunged_disks( + &opctx, + datastore, + deploy_disks_done, + ) + .await?; + + // From this point on, we'll assume that any errors that we encounter do + // *not* require stopping execution. We'll just accumulate them and return + // them all at the end. + // + // TODO We should probably do this with more of the errors above, too. + let mut errors = Vec::new(); + + // For any expunged Nexus zones, re-assign in-progress sagas to some other + // Nexus. If this fails for some reason, it doesn't affect anything else. + let sec_id = nexus_db_model::SecId(nexus_id); + let reassigned = sagas::reassign_sagas_from_expunged( + &opctx, datastore, blueprint, sec_id, + ) + .await + .context("failed to re-assign sagas"); + let needs_saga_recovery = match reassigned { + Ok(needs_recovery) => needs_recovery, + Err(error) => { + errors.push(error); + false + } + }; // This is likely to error if any cluster upgrades are in progress (which // can take some time), so it should remain at the end so that other parts // of the blueprint can progress normally. - cockroachdb::ensure_settings(&opctx, datastore, blueprint) - .await - .map_err(|err| vec![err])?; + if let Err(error) = + cockroachdb::ensure_settings(&opctx, datastore, blueprint).await + { + errors.push(error); + } - Ok(()) + if errors.is_empty() { + Ok(RealizeBlueprintOutput { needs_saga_recovery }) + } else { + Err(errors) + } } #[cfg(test)] @@ -235,6 +272,7 @@ mod tests { use nexus_db_model::SledSystemHardware; use nexus_db_model::SledUpdate; use nexus_db_model::Zpool; + use omicron_common::api::external::Error; use std::collections::BTreeSet; use uuid::Uuid; @@ -305,10 +343,10 @@ mod tests { PhysicalDiskKind::U2, sled_id.into_untyped_uuid(), ); - datastore - .physical_disk_insert(&opctx, disk.clone()) - .await - .expect("failed to upsert physical disk"); + match datastore.physical_disk_insert(&opctx, disk.clone()).await { + Ok(_) | Err(Error::ObjectAlreadyExists { .. }) => (), + Err(e) => panic!("failed to upsert physical disk: {e}"), + } if pool_inserted.insert(pool_id) { let zpool = Zpool::new( diff --git a/nexus/reconfigurator/execution/src/omicron_physical_disks.rs b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs index 7adc41213e0..d94bbe2e279 100644 --- a/nexus/reconfigurator/execution/src/omicron_physical_disks.rs +++ b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs @@ -25,7 +25,7 @@ pub(crate) async fn deploy_disks( opctx: &OpContext, sleds_by_id: &BTreeMap, sled_configs: &BTreeMap, -) -> Result<(), Vec> { +) -> Result> { let errors: Vec<_> = stream::iter(sled_configs) .filter_map(|(sled_id, config)| async move { let log = opctx.log.new(o!( @@ -92,16 +92,26 @@ pub(crate) async fn deploy_disks( .await; if errors.is_empty() { - Ok(()) + Ok(DeployDisksDone {}) } else { Err(errors) } } -/// Decommissions all disks which are currently expunged +/// Typestate indicating that the deploy disks step was performed. +#[derive(Debug)] +#[must_use = "this should be passed into decommission_expunged_disks"] +pub(crate) struct DeployDisksDone {} + +/// Decommissions all disks which are currently expunged. pub(crate) async fn decommission_expunged_disks( opctx: &OpContext, datastore: &DataStore, + // This is taken as a parameter to ensure that this depends on a + // "deploy_disks" call made earlier. Disk expungement is a statement of + // policy, but we need to be assured that the Sled Agent has stopped using + // that disk before we can mark its state as decommissioned. + _deploy_disks_done: DeployDisksDone, ) -> Result<(), Vec> { datastore .physical_disk_decommission_all_expunged(&opctx) @@ -113,6 +123,7 @@ pub(crate) async fn decommission_expunged_disks( #[cfg(test)] mod test { use super::deploy_disks; + use super::DeployDisksDone; use crate::DataStore; use crate::Sled; @@ -124,7 +135,6 @@ mod test { use httptest::responders::status_code; use httptest::Expectation; use nexus_db_model::Dataset; - use nexus_db_model::DatasetKind; use nexus_db_model::PhysicalDisk; use nexus_db_model::PhysicalDiskKind; use nexus_db_model::PhysicalDiskPolicy; @@ -142,6 +152,7 @@ mod test { use nexus_types::identity::Asset; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Generation; + use omicron_common::api::internal::shared::DatasetKind; use omicron_common::disk::DiskIdentity; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::PhysicalDiskUuid; @@ -217,9 +228,13 @@ mod test { // Get a success result back when the blueprint has an empty set of // disks. let (_, blueprint) = create_blueprint(BTreeMap::new()); - deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) - .await - .expect("failed to deploy no disks"); + // Use an explicit type here because not doing so can cause errors to + // be ignored (this behavior is genuinely terrible). Instead, ensure + // that the type has the right result. + let _: DeployDisksDone = + deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) + .await + .expect("failed to deploy no disks"); // Disks are updated in a particular order, but each request contains // the full set of disks that must be running. @@ -272,9 +287,10 @@ mod test { } // Execute it. - deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) - .await - .expect("failed to deploy initial disks"); + let _: DeployDisksDone = + deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) + .await + .expect("failed to deploy initial disks"); s1.verify_and_clear(); s2.verify_and_clear(); @@ -293,9 +309,10 @@ mod test { )), ); } - deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) - .await - .expect("failed to deploy same disks"); + let _: DeployDisksDone = + deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) + .await + .expect("failed to deploy same disks"); s1.verify_and_clear(); s2.verify_and_clear(); @@ -567,7 +584,15 @@ mod test { assert_eq!(d.disk_state, PhysicalDiskState::Active); assert_eq!(d.disk_policy, PhysicalDiskPolicy::InService); - super::decommission_expunged_disks(&opctx, &datastore).await.unwrap(); + super::decommission_expunged_disks( + &opctx, + &datastore, + // This is an internal test, and we're testing decommissioning in + // isolation, so it's okay to create the typestate here. + DeployDisksDone {}, + ) + .await + .unwrap(); // After decommissioning, we see the expunged disk become // decommissioned. The other disk remains in-service. diff --git a/nexus/reconfigurator/execution/src/sagas.rs b/nexus/reconfigurator/execution/src/sagas.rs new file mode 100644 index 00000000000..458328ef008 --- /dev/null +++ b/nexus/reconfigurator/execution/src/sagas.rs @@ -0,0 +1,71 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Re-assign sagas from expunged Nexus zones + +use nexus_db_model::SecId; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintZoneFilter; +use omicron_common::api::external::Error; +use omicron_uuid_kinds::GenericUuid; +use slog::{debug, info, warn}; + +/// For each expunged Nexus zone, re-assign sagas owned by that Nexus to the +/// specified nexus (`nexus_id`). +pub(crate) async fn reassign_sagas_from_expunged( + opctx: &OpContext, + datastore: &DataStore, + blueprint: &Blueprint, + nexus_id: SecId, +) -> Result { + let log = &opctx.log; + + // Identify any Nexus zones that have been expunged and need to have sagas + // re-assigned. + // + // TODO: Currently, we take any expunged Nexus instances and attempt to + // assign all their sagas to ourselves. Per RFD 289, we can only re-assign + // sagas between two instances of Nexus that are at the same version. Right + // now this can't happen so there's nothing to do here to ensure that + // constraint. However, once we support allowing the control plane to be + // online _during_ an upgrade, there may be multiple different Nexus + // instances running at the same time. At that point, we will need to make + // sure that we only ever try to assign ourselves sagas from other Nexus + // instances that we know are running the same version as ourselves. + let nexus_zone_ids: Vec<_> = blueprint + .all_omicron_zones(BlueprintZoneFilter::Expunged) + .filter_map(|(_, z)| { + z.zone_type + .is_nexus() + .then(|| nexus_db_model::SecId(z.id.into_untyped_uuid())) + }) + .collect(); + + debug!(log, "re-assign sagas: found Nexus instances"; + "nexus_zone_ids" => ?nexus_zone_ids); + + let result = + datastore.sagas_reassign_sec(opctx, &nexus_zone_ids, nexus_id).await; + + match result { + Ok(count) => { + info!(log, "re-assigned sagas"; + "nexus_zone_ids" => ?nexus_zone_ids, + "count" => count, + ); + + Ok(count != 0) + } + Err(error) => { + warn!(log, "failed to re-assign sagas"; + "nexus_zone_ids" => ?nexus_zone_ids, + &error, + ); + + Err(error) + } + } +} diff --git a/nexus/reconfigurator/execution/src/test_utils.rs b/nexus/reconfigurator/execution/src/test_utils.rs new file mode 100644 index 00000000000..0d6675c7dd4 --- /dev/null +++ b/nexus/reconfigurator/execution/src/test_utils.rs @@ -0,0 +1,37 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Test utilities for reconfigurator execution. + +use internal_dns::resolver::Resolver; +use nexus_db_queries::{context::OpContext, db::DataStore}; +use nexus_types::deployment::Blueprint; +use uuid::Uuid; + +use crate::{overridables::Overridables, RealizeBlueprintOutput}; + +pub(crate) async fn realize_blueprint_and_expect( + opctx: &OpContext, + datastore: &DataStore, + resolver: &Resolver, + blueprint: &Blueprint, + overrides: &Overridables, +) -> RealizeBlueprintOutput { + let output = crate::realize_blueprint_with_overrides( + opctx, + datastore, + resolver, + blueprint, + Uuid::new_v4(), + overrides, + ) + .await + // We expect here rather than in the caller because we want to assert that + // the result is a `RealizeBlueprintOutput`. Because the latter is + // `must_use`, the caller may assign it to `_` and miss the `expect` call. + .expect("failed to execute blueprint"); + + eprintln!("realize_blueprint output: {:#?}", output); + output +} diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index 2d8a7c9598a..c7eb5bddad9 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -37,13 +37,17 @@ use nexus_types::external_api::views::SledState; use omicron_common::address::get_internal_dns_server_addresses; use omicron_common::address::get_sled_address; use omicron_common::address::get_switch_zone_address; +use omicron_common::address::ReservedRackSubnet; use omicron_common::address::CP_SERVICES_RESERVED_ADDRESSES; +use omicron_common::address::DNS_HTTP_PORT; +use omicron_common::address::DNS_PORT; use omicron_common::address::NTP_PORT; use omicron_common::address::SLED_RESERVED_ADDRESSES; use omicron_common::api::external::Generation; use omicron_common::api::external::Vni; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::api::internal::shared::NetworkInterfaceKind; +use omicron_common::policy::MAX_INTERNAL_DNS_REDUNDANCY; use omicron_uuid_kinds::ExternalIpKind; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::OmicronZoneKind; @@ -73,6 +77,7 @@ use typed_rng::UuidRng; use super::external_networking::BuilderExternalNetworking; use super::external_networking::ExternalNetworkingChoice; use super::external_networking::ExternalSnatNetworkingChoice; +use super::internal_dns::DnsSubnetAllocator; use super::zones::is_already_expunged; use super::zones::BuilderZoneState; use super::zones::BuilderZonesConfig; @@ -106,6 +111,12 @@ pub enum Error { }, #[error("programming error in planner")] Planner(#[source] anyhow::Error), + #[error("no reserved subnets available for DNS")] + NoAvailableDnsSubnets, + #[error( + "can only have {MAX_INTERNAL_DNS_REDUNDANCY} internal DNS servers" + )] + TooManyDnsServers, } /// Describes whether an idempotent "ensure" operation resulted in action taken @@ -197,6 +208,7 @@ pub struct BlueprintBuilder<'a> { input: &'a PlanningInput, sled_ip_allocators: BTreeMap, external_networking: BuilderExternalNetworking<'a>, + internal_dns_subnets: DnsSubnetAllocator, // These fields will become part of the final blueprint. See the // corresponding fields in `Blueprint`. @@ -291,6 +303,8 @@ impl<'a> BlueprintBuilder<'a> { let external_networking = BuilderExternalNetworking::new(parent_blueprint, input)?; + let internal_dns_subnets = + DnsSubnetAllocator::new(parent_blueprint, input)?; // Prefer the sled state from our parent blueprint for sleds // that were in it; there may be new sleds in `input`, in which @@ -323,6 +337,7 @@ impl<'a> BlueprintBuilder<'a> { input, sled_ip_allocators: BTreeMap::new(), external_networking, + internal_dns_subnets, zones: BlueprintZonesBuilder::new(parent_blueprint), disks: BlueprintDisksBuilder::new(parent_blueprint), sled_state, @@ -619,6 +634,69 @@ impl<'a> BlueprintBuilder<'a> { Ok(EnsureMultiple::Changed { added, removed }) } + fn sled_add_zone_internal_dns( + &mut self, + sled_id: SledUuid, + gz_address_index: u32, + ) -> Result { + let sled_subnet = self.sled_resources(sled_id)?.subnet; + let rack_subnet = ReservedRackSubnet::from_subnet(sled_subnet); + let dns_subnet = self.internal_dns_subnets.alloc(rack_subnet)?; + let address = dns_subnet.dns_address(); + let zpool = self.sled_select_zpool(sled_id, ZoneKind::InternalDns)?; + let zone_type = + BlueprintZoneType::InternalDns(blueprint_zone_type::InternalDns { + dataset: OmicronZoneDataset { pool_name: zpool.clone() }, + dns_address: SocketAddrV6::new(address, DNS_PORT, 0, 0), + http_address: SocketAddrV6::new(address, DNS_HTTP_PORT, 0, 0), + gz_address: dns_subnet.gz_address(), + gz_address_index, + }); + + let zone = BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id: self.rng.zone_rng.next(), + underlay_address: address, + filesystem_pool: Some(zpool), + zone_type, + }; + + self.sled_add_zone(sled_id, zone)?; + Ok(Ensure::Added) + } + + pub fn sled_ensure_zone_multiple_internal_dns( + &mut self, + sled_id: SledUuid, + desired_zone_count: usize, + ) -> Result { + // How many internal DNS zones do we need to add? + let count = + self.sled_num_running_zones_of_kind(sled_id, ZoneKind::InternalDns); + let to_add = match desired_zone_count.checked_sub(count) { + Some(0) => return Ok(EnsureMultiple::NotNeeded), + Some(n) => n, + None => { + return Err(Error::Planner(anyhow!( + "removing an internal DNS zone not yet supported \ + (sled {sled_id} has {count}; \ + planner wants {desired_zone_count})" + ))); + } + }; + + for i in count..desired_zone_count { + self.sled_add_zone_internal_dns( + sled_id, + i.try_into().map_err(|_| { + Error::Planner(anyhow!("zone index overflow")) + })?, + )?; + } + + Ok(EnsureMultiple::Changed { added: to_add, removed: 0 }) + } + pub fn sled_ensure_zone_ntp( &mut self, sled_id: SledUuid, @@ -636,14 +714,18 @@ impl<'a> BlueprintBuilder<'a> { let sled_subnet = sled_info.subnet; let ip = self.sled_alloc_ip(sled_id)?; let ntp_address = SocketAddrV6::new(ip, NTP_PORT, 0, 0); + // Construct the list of internal DNS servers. // // It'd be tempting to get this list from the other internal NTP - // servers but there may not be any of those. We could also - // construct this list manually from the set of internal DNS servers - // actually deployed. Instead, we take the same approach as RSS: - // these are at known, fixed addresses relative to the AZ subnet - // (which itself is a known-prefix parent subnet of the sled subnet). + // servers, but there may not be any of those. We could also + // construct it manually from the set of internal DNS servers + // actually deployed, or ask the DNS subnet allocator; but those + // would both require that all the internal DNS zones be added + // before any NTP zones, a constraint we don't currently enforce. + // Instead, we take the same approach as RSS: they are at known, + // fixed addresses relative to the AZ subnet (which itself is a + // known-prefix parent subnet of the sled subnet). let dns_servers = get_internal_dns_server_addresses(sled_subnet.net().prefix()); @@ -1139,13 +1221,13 @@ impl<'a> BlueprintBuilder<'a> { allocator.alloc().ok_or(Error::OutOfAddresses { sled_id }) } - // Selects a zpools for this zone type. - // - // This zpool may be used for either durable storage or transient - // storage - the usage is up to the caller. - // - // If `zone_kind` already exists on `sled_id`, it is prevented - // from using the same zpool as exisitng zones with the same kind. + /// Selects a zpool for this zone type. + /// + /// This zpool may be used for either durable storage or transient + /// storage - the usage is up to the caller. + /// + /// If `zone_kind` already exists on `sled_id`, it is prevented + /// from using the same zpool as existing zones with the same kind. fn sled_select_zpool( &self, sled_id: SledUuid, diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/external_networking.rs b/nexus/reconfigurator/planning/src/blueprint_builder/external_networking.rs index 93c845add5c..4594ff9fed6 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/external_networking.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/external_networking.rs @@ -103,8 +103,8 @@ impl<'a> BuilderExternalNetworking<'a> { ExternalIpAllocator::new(input.service_ip_pool_ranges()); let mut used_macs: HashSet = HashSet::new(); - for (_, z) in - parent_blueprint.all_omicron_zones(BlueprintZoneFilter::All) + for (_, z) in parent_blueprint + .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) { let zone_type = &z.zone_type; match zone_type { diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/internal_dns.rs b/nexus/reconfigurator/planning/src/blueprint_builder/internal_dns.rs new file mode 100644 index 00000000000..61b4ec64dea --- /dev/null +++ b/nexus/reconfigurator/planning/src/blueprint_builder/internal_dns.rs @@ -0,0 +1,183 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::Error; +use nexus_types::deployment::blueprint_zone_type::InternalDns; +use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintZoneFilter; +use nexus_types::deployment::BlueprintZoneType; +use nexus_types::deployment::PlanningInput; +use omicron_common::address::DnsSubnet; +use omicron_common::address::ReservedRackSubnet; +use omicron_common::policy::MAX_INTERNAL_DNS_REDUNDANCY; +use std::collections::BTreeSet; + +/// Internal DNS zones are not allocated an address in the sled's subnet. +/// Instead, they get a /64 subnet of the "reserved" rack subnet (so that +/// it's routable with IPv6), and use the first address in that. There may +/// be at most `MAX_INTERNAL_DNS_REDUNDANCY` subnets (and so servers) +/// allocated. This structure tracks which subnets are currently allocated. +#[derive(Debug)] +pub struct DnsSubnetAllocator { + in_use: BTreeSet, +} + +impl DnsSubnetAllocator { + pub fn new<'a>( + parent_blueprint: &'a Blueprint, + input: &'a PlanningInput, + ) -> Result { + let in_use = parent_blueprint + .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) + .filter_map(|(_sled_id, zone_config)| match zone_config.zone_type { + BlueprintZoneType::InternalDns(InternalDns { + dns_address, + .. + }) => Some(DnsSubnet::from_addr(*dns_address.ip())), + _ => None, + }) + .collect::>(); + + let redundancy = input.target_internal_dns_zone_count(); + if redundancy > MAX_INTERNAL_DNS_REDUNDANCY { + return Err(Error::TooManyDnsServers); + } + + Ok(Self { in_use }) + } + + /// Allocate the first available DNS subnet, or call a function to generate + /// a default. The default is needed because we can't necessarily guess the + /// correct reserved rack subnet (e.g., there might not be any internal DNS + /// zones in the parent blueprint, though that would itself be odd), but we + /// can derive it at runtime from the sled address. + pub fn alloc( + &mut self, + rack_subnet: ReservedRackSubnet, + ) -> Result { + let new = if let Some(first) = self.in_use.first() { + // Take the first available DNS subnet. We currently generate + // all `MAX_INTERNAL_DNS_REDUNDANCY` subnets and subtract any + // that are in use; this is fine as long as that constant is small. + let subnets = BTreeSet::from_iter( + ReservedRackSubnet::from_subnet(first.subnet()) + .get_dns_subnets(), + ); + let mut avail = subnets.difference(&self.in_use); + if let Some(first) = avail.next() { + *first + } else { + return Err(Error::NoAvailableDnsSubnets); + } + } else { + rack_subnet.get_dns_subnet(1) + }; + self.in_use.insert(new); + Ok(new) + } + + #[cfg(test)] + fn first(&self) -> Option { + self.in_use.first().copied() + } + + #[cfg(test)] + fn pop_first(&mut self) -> Option { + self.in_use.pop_first() + } + + #[cfg(test)] + fn last(&self) -> Option { + self.in_use.last().cloned() + } + + #[cfg(test)] + fn len(&self) -> usize { + self.in_use.len() + } +} + +#[cfg(test)] +pub mod test { + use super::*; + use crate::blueprint_builder::test::verify_blueprint; + use crate::example::ExampleSystem; + use omicron_common::policy::{ + INTERNAL_DNS_REDUNDANCY, MAX_INTERNAL_DNS_REDUNDANCY, + }; + use omicron_test_utils::dev::test_setup_log; + + #[test] + fn test_dns_subnet_allocator() { + static TEST_NAME: &str = "test_dns_subnet_allocator"; + let logctx = test_setup_log(TEST_NAME); + + // Use our example system to create a blueprint and input. + let example = + ExampleSystem::new(&logctx.log, TEST_NAME, INTERNAL_DNS_REDUNDANCY); + let blueprint1 = &example.blueprint; + verify_blueprint(blueprint1); + + // Create an allocator. + let mut allocator = DnsSubnetAllocator::new(blueprint1, &example.input) + .expect("can't create allocator"); + + // Save the first & last allocated subnets. + let first = allocator.first().expect("should be a first subnet"); + let last = allocator.last().expect("should be a last subnet"); + assert!(last > first, "first should come before last"); + + // Derive the reserved rack subnet. + let rack_subnet = first.rack_subnet(); + assert_eq!( + rack_subnet, + last.rack_subnet(), + "first & last DNS subnets should be in the same rack subnet" + ); + + // Allocate two new subnets. + assert_eq!(MAX_INTERNAL_DNS_REDUNDANCY - INTERNAL_DNS_REDUNDANCY, 2); + assert_eq!( + allocator.len(), + INTERNAL_DNS_REDUNDANCY, + "should be {INTERNAL_DNS_REDUNDANCY} subnets allocated" + ); + let new1 = + allocator.alloc(rack_subnet).expect("failed to allocate a subnet"); + let new2 = allocator + .alloc(rack_subnet) + .expect("failed to allocate another subnet"); + assert!( + new1 > last, + "newly allocated subnets should be after initial ones" + ); + assert!(new2 > new1, "allocated subnets out of order"); + assert_ne!(new1, new2, "allocated duplicate subnets"); + assert_eq!( + allocator.len(), + MAX_INTERNAL_DNS_REDUNDANCY, + "should be {INTERNAL_DNS_REDUNDANCY} subnets allocated" + ); + allocator.alloc(rack_subnet).expect_err("no subnets available"); + + // Test packing. + let first = allocator.pop_first().expect("should be a first subnet"); + let second = allocator.pop_first().expect("should be a second subnet"); + assert!(first < second, "first should be before second"); + assert_eq!( + allocator.alloc(rack_subnet).expect("allocation failed"), + first, + "should get first subnet" + ); + assert_eq!( + allocator.alloc(rack_subnet).expect("allocation failed"), + second, + "should get second subnet" + ); + allocator.alloc(rack_subnet).expect_err("no subnets available"); + + // Done! + logctx.cleanup_successful(); + } +} diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/mod.rs b/nexus/reconfigurator/planning/src/blueprint_builder/mod.rs index 99d3b417726..9c6e51f1ded 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/mod.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/mod.rs @@ -6,6 +6,7 @@ mod builder; mod external_networking; +mod internal_dns; mod zones; pub use builder::*; diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs index e52fe3fc4bf..e0da9428ee6 100644 --- a/nexus/reconfigurator/planning/src/example.rs +++ b/nexus/reconfigurator/planning/src/example.rs @@ -9,13 +9,10 @@ use crate::system::SledBuilder; use crate::system::SystemDescription; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintZoneFilter; -use nexus_types::deployment::OmicronZoneNic; use nexus_types::deployment::PlanningInput; use nexus_types::deployment::SledFilter; use nexus_types::inventory::Collection; -use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::SledKind; -use omicron_uuid_kinds::VnicUuid; use typed_rng::TypedUuidRng; pub struct ExampleSystem { @@ -79,6 +76,9 @@ impl ExampleSystem { vec![], ) .unwrap(); + let _ = builder + .sled_ensure_zone_multiple_internal_dns(sled_id, 1) + .unwrap(); let _ = builder.sled_ensure_disks(sled_id, sled_resources).unwrap(); for pool_name in sled_resources.zpools.keys() { let _ = builder @@ -92,37 +92,15 @@ impl ExampleSystem { system.to_collection_builder().expect("failed to build collection"); builder.set_rng_seed((test_name, "ExampleSystem collection")); - for sled_id in blueprint.sleds() { - let Some(zones) = blueprint.blueprint_zones.get(&sled_id) else { - continue; - }; - for zone in zones.zones.iter() { - let service_id = zone.id; - if let Some((external_ip, nic)) = - zone.zone_type.external_networking() - { - input_builder - .add_omicron_zone_external_ip(service_id, external_ip) - .expect("failed to add Omicron zone external IP"); - input_builder - .add_omicron_zone_nic( - service_id, - OmicronZoneNic { - // TODO-cleanup use `TypedUuid` everywhere - id: VnicUuid::from_untyped_uuid(nic.id), - mac: nic.mac, - ip: nic.ip, - slot: nic.slot, - primary: nic.primary, - }, - ) - .expect("failed to add Omicron zone NIC"); - } - } + input_builder + .update_network_resources_from_blueprint(&blueprint) + .expect("failed to add network resources from blueprint"); + + for (sled_id, zones) in &blueprint.blueprint_zones { builder .found_sled_omicron_zones( "fake sled agent", - sled_id, + *sled_id, zones.to_omicron_zones_config( BlueprintZoneFilter::ShouldBeRunning, ), diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 3bd1b8757e7..8dcad21df18 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -212,10 +212,7 @@ impl<'a> Planner<'a> { fn do_plan_add(&mut self) -> Result<(), Error> { // Internal DNS is a prerequisite for bringing up all other zones. At // this point, we assume that internal DNS (as a service) is already - // functioning. At some point, this function will have to grow the - // ability to determine whether more internal DNS zones need to be - // added and where they should go. And the blueprint builder will need - // to grow the ability to provision one. + // functioning. // After we make our initial pass through the sleds below to check for // zones every sled should have (NTP, Crucible), we'll start making @@ -356,6 +353,7 @@ impl<'a> Planner<'a> { for zone_kind in [ DiscretionaryOmicronZone::BoundaryNtp, DiscretionaryOmicronZone::CockroachDb, + DiscretionaryOmicronZone::InternalDns, DiscretionaryOmicronZone::Nexus, ] { let num_zones_to_add = self.num_additional_zones_needed(zone_kind); @@ -434,6 +432,9 @@ impl<'a> Planner<'a> { DiscretionaryOmicronZone::CockroachDb => { self.input.target_cockroachdb_zone_count() } + DiscretionaryOmicronZone::InternalDns => { + self.input.target_internal_dns_zone_count() + } DiscretionaryOmicronZone::Nexus => { self.input.target_nexus_zone_count() } @@ -516,6 +517,12 @@ impl<'a> Planner<'a> { new_total_zone_count, )? } + DiscretionaryOmicronZone::InternalDns => { + self.blueprint.sled_ensure_zone_multiple_internal_dns( + sled_id, + new_total_zone_count, + )? + } DiscretionaryOmicronZone::Nexus => { self.blueprint.sled_ensure_zone_multiple_nexus( sled_id, @@ -745,6 +752,7 @@ mod test { use nexus_types::inventory::OmicronZonesFound; use omicron_common::api::external::Generation; use omicron_common::disk::DiskIdentity; + use omicron_common::policy::MAX_INTERNAL_DNS_REDUNDANCY; use omicron_test_utils::dev::test_setup_log; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::PhysicalDiskUuid; @@ -1029,6 +1037,11 @@ mod test { // one sled we have. let mut builder = input.into_builder(); builder.policy_mut().target_nexus_zone_count = 5; + + // But we don't want it to add any more internal DNS zones, + // which it would by default (because we have only one sled). + builder.policy_mut().target_internal_dns_zone_count = 1; + let input = builder.build(); let blueprint2 = Planner::new_based_on( logctx.log.clone(), @@ -1141,6 +1154,194 @@ mod test { logctx.cleanup_successful(); } + /// Check that the planner will spread additional internal DNS zones out across + /// sleds as it adds them + #[test] + fn test_spread_internal_dns_zones_across_sleds() { + static TEST_NAME: &str = + "planner_spread_internal_dns_zones_across_sleds"; + let logctx = test_setup_log(TEST_NAME); + + // Use our example system as a starting point. + let (collection, input, blueprint1) = + example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); + + // This blueprint should have exactly 3 internal DNS zones: one on each sled. + assert_eq!(blueprint1.blueprint_zones.len(), 3); + for sled_config in blueprint1.blueprint_zones.values() { + assert_eq!( + sled_config + .zones + .iter() + .filter(|z| z.zone_type.is_internal_dns()) + .count(), + 1 + ); + } + + // Try to run the planner with a high number of internal DNS zones; + // it will fail because the target is > MAX_DNS_REDUNDANCY. + let mut builder = input.clone().into_builder(); + builder.policy_mut().target_internal_dns_zone_count = 14; + assert!( + Planner::new_based_on( + logctx.log.clone(), + &blueprint1, + &builder.build(), + "test_blueprint2", + &collection, + ) + .is_err(), + "too many DNS zones" + ); + + // Try again with a reasonable number. + let mut builder = input.into_builder(); + builder.policy_mut().target_internal_dns_zone_count = + MAX_INTERNAL_DNS_REDUNDANCY; + let blueprint2 = Planner::new_based_on( + logctx.log.clone(), + &blueprint1, + &builder.build(), + "test_blueprint2", + &collection, + ) + .expect("failed to create planner") + .with_rng_seed((TEST_NAME, "bp2")) + .plan() + .expect("failed to plan"); + + let diff = blueprint2.diff_since_blueprint(&blueprint1); + println!( + "1 -> 2 (added additional internal DNS zones):\n{}", + diff.display() + ); + assert_eq!(diff.sleds_added.len(), 0); + assert_eq!(diff.sleds_removed.len(), 0); + assert_eq!(diff.sleds_modified.len(), 2); + + // 2 sleds should each get 1 additional internal DNS zone. + let mut total_new_zones = 0; + for sled_id in diff.sleds_modified { + assert!(!diff.zones.removed.contains_key(&sled_id)); + assert!(!diff.zones.modified.contains_key(&sled_id)); + if let Some(zones_added) = &diff.zones.added.get(&sled_id) { + let zones = &zones_added.zones; + match zones.len() { + n @ 1 => { + total_new_zones += n; + } + n => { + panic!("unexpected number of zones added to {sled_id}: {n}") + } + } + for zone in zones { + assert_eq!( + zone.kind(), + ZoneKind::InternalDns, + "unexpectedly added a non-internal-DNS zone: {zone:?}" + ); + } + } + } + assert_eq!(total_new_zones, 2); + + logctx.cleanup_successful(); + } + + /// Check that the planner will reuse external IPs that were previously + /// assigned to expunged zones + #[test] + fn test_reuse_external_ips_from_expunged_zones() { + static TEST_NAME: &str = + "planner_reuse_external_ips_from_expunged_zones"; + let logctx = test_setup_log(TEST_NAME); + + // Use our example system as a starting point. + let (collection, input, blueprint1) = + example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); + + // Expunge the first sled we see, which will result in a Nexus external + // IP no longer being associated with a running zone, and a new Nexus + // zone being added to one of the two remaining sleds. + let mut builder = input.into_builder(); + let (sled_id, details) = + builder.sleds_mut().iter_mut().next().expect("no sleds"); + let sled_id = *sled_id; + details.policy = SledPolicy::Expunged; + let input = builder.build(); + let blueprint2 = Planner::new_based_on( + logctx.log.clone(), + &blueprint1, + &input, + "test_blueprint2", + &collection, + ) + .expect("failed to create planner") + .with_rng_seed((TEST_NAME, "bp2")) + .plan() + .expect("failed to plan"); + + let diff = blueprint2.diff_since_blueprint(&blueprint1); + println!("1 -> 2 (expunged sled):\n{}", diff.display()); + + // The expunged sled should have an expunged Nexus zone. + let zone = blueprint2.blueprint_zones[&sled_id] + .zones + .iter() + .find(|zone| matches!(zone.zone_type, BlueprintZoneType::Nexus(_))) + .expect("no nexus zone found"); + assert_eq!(zone.disposition, BlueprintZoneDisposition::Expunged); + + // Set the target Nexus zone count to one that will completely exhaust + // the service IP pool. This will force reuse of the IP that was + // allocated to the expunged Nexus zone. + let mut builder = input.into_builder(); + builder.update_network_resources_from_blueprint(&blueprint2).unwrap(); + assert_eq!(builder.policy_mut().service_ip_pool_ranges.len(), 1); + builder.policy_mut().target_nexus_zone_count = + builder.policy_mut().service_ip_pool_ranges[0] + .len() + .try_into() + .unwrap(); + let input = builder.build(); + let blueprint3 = Planner::new_based_on( + logctx.log.clone(), + &blueprint2, + &input, + "test_blueprint3", + &collection, + ) + .expect("failed to create planner") + .with_rng_seed((TEST_NAME, "bp3")) + .plan() + .expect("failed to plan"); + + let diff = blueprint3.diff_since_blueprint(&blueprint2); + println!("2 -> 3 (maximum Nexus):\n{}", diff.display()); + + // Planning succeeded, but let's prove that we reused the IP address! + let expunged_ip = zone.zone_type.external_networking().unwrap().0.ip(); + let new_zone = blueprint3 + .blueprint_zones + .values() + .flat_map(|c| &c.zones) + .find(|zone| { + zone.disposition == BlueprintZoneDisposition::InService + && zone + .zone_type + .external_networking() + .map_or(false, |(ip, _)| expunged_ip == ip.ip()) + }) + .expect("couldn't find that the external IP was reused"); + println!( + "zone {} reused external IP {} from expunged zone {}", + new_zone.id, expunged_ip, zone.id + ); + + logctx.cleanup_successful(); + } + #[test] fn test_crucible_allocation_skips_nonprovisionable_disks() { static TEST_NAME: &str = @@ -1153,9 +1354,10 @@ mod test { let mut builder = input.into_builder(); - // Avoid churning on the quantity of Nexus zones - we're okay staying at - // one. + // Avoid churning on the quantity of Nexus and internal DNS zones - + // we're okay staying at one each. builder.policy_mut().target_nexus_zone_count = 1; + builder.policy_mut().target_internal_dns_zone_count = 1; // Make generated disk ids deterministic let mut disk_rng = @@ -1236,9 +1438,10 @@ mod test { let mut builder = input.into_builder(); - // Aside: Avoid churning on the quantity of Nexus zones - we're okay - // staying at one. + // Avoid churning on the quantity of Nexus and internal DNS zones - + // we're okay staying at one each. builder.policy_mut().target_nexus_zone_count = 1; + builder.policy_mut().target_internal_dns_zone_count = 1; // The example system should be assigning crucible zones to each // in-service disk. When we expunge one of these disks, the planner @@ -1494,6 +1697,10 @@ mod test { }; println!("1 -> 2: decommissioned {decommissioned_sled_id}"); + // Because we marked zones as expunged, we need to update the networking + // config in the planning input. + builder.update_network_resources_from_blueprint(&blueprint1).unwrap(); + // Now run the planner with a high number of target Nexus zones. The // number (9) is chosen such that: // @@ -1767,6 +1974,7 @@ mod test { // Remove the now-decommissioned sled from the planning input. let mut builder = input.into_builder(); + builder.update_network_resources_from_blueprint(&blueprint2).unwrap(); builder.sleds_mut().remove(&expunged_sled_id); let input = builder.build(); diff --git a/nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs b/nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs index 2fb60e66f8e..6f3bac0ecc2 100644 --- a/nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs +++ b/nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs @@ -16,6 +16,7 @@ use std::mem; pub(crate) enum DiscretionaryOmicronZone { BoundaryNtp, CockroachDb, + InternalDns, Nexus, // TODO expand this enum as we start to place more services } @@ -27,6 +28,7 @@ impl DiscretionaryOmicronZone { match zone_type { BlueprintZoneType::BoundaryNtp(_) => Some(Self::BoundaryNtp), BlueprintZoneType::CockroachDb(_) => Some(Self::CockroachDb), + BlueprintZoneType::InternalDns(_) => Some(Self::InternalDns), BlueprintZoneType::Nexus(_) => Some(Self::Nexus), // Zones that we should place but don't yet. BlueprintZoneType::Clickhouse(_) @@ -34,7 +36,6 @@ impl DiscretionaryOmicronZone { | BlueprintZoneType::ClickhouseServer(_) | BlueprintZoneType::CruciblePantry(_) | BlueprintZoneType::ExternalDns(_) - | BlueprintZoneType::InternalDns(_) | BlueprintZoneType::Oximeter(_) => None, // Zones that get special handling for placement (all sleds get // them, although internal NTP has some interactions with boundary @@ -50,6 +51,7 @@ impl From for ZoneKind { match zone { DiscretionaryOmicronZone::BoundaryNtp => Self::BoundaryNtp, DiscretionaryOmicronZone::CockroachDb => Self::CockroachDb, + DiscretionaryOmicronZone::InternalDns => Self::InternalDns, DiscretionaryOmicronZone::Nexus => Self::Nexus, } } diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index 9faf14bdc08..d3b2daca63f 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -33,13 +33,14 @@ use nexus_types::inventory::SpType; use omicron_common::address::get_sled_address; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; -use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::address::RACK_PREFIX; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Generation; use omicron_common::disk::DiskIdentity; use omicron_common::disk::DiskVariant; +use omicron_common::policy::INTERNAL_DNS_REDUNDANCY; +use omicron_common::policy::NEXUS_REDUNDANCY; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; @@ -81,6 +82,7 @@ pub struct SystemDescription { available_scrimlet_slots: BTreeSet, target_boundary_ntp_zone_count: usize, target_nexus_zone_count: usize, + target_internal_dns_zone_count: usize, target_cockroachdb_zone_count: usize, target_cockroachdb_cluster_version: CockroachDbClusterVersion, service_ip_pool_ranges: Vec, @@ -130,6 +132,7 @@ impl SystemDescription { // Policy defaults let target_nexus_zone_count = NEXUS_REDUNDANCY; + let target_internal_dns_zone_count = INTERNAL_DNS_REDUNDANCY; // TODO-cleanup These are wrong, but we don't currently set up any // boundary NTP or CRDB nodes in our fake system, so this prevents @@ -156,6 +159,7 @@ impl SystemDescription { available_scrimlet_slots, target_boundary_ntp_zone_count, target_nexus_zone_count, + target_internal_dns_zone_count, target_cockroachdb_zone_count, target_cockroachdb_cluster_version, service_ip_pool_ranges, @@ -325,9 +329,11 @@ impl SystemDescription { service_ip_pool_ranges: self.service_ip_pool_ranges.clone(), target_boundary_ntp_zone_count: self.target_boundary_ntp_zone_count, target_nexus_zone_count: self.target_nexus_zone_count, + target_internal_dns_zone_count: self.target_internal_dns_zone_count, target_cockroachdb_zone_count: self.target_cockroachdb_zone_count, target_cockroachdb_cluster_version: self .target_cockroachdb_cluster_version, + clickhouse_policy: None, }; let mut builder = PlanningInputBuilder::new( policy, diff --git a/nexus/reconfigurator/planning/tests/output/blueprint_builder_initial_diff.txt b/nexus/reconfigurator/planning/tests/output/blueprint_builder_initial_diff.txt index 01b7ceb46b7..08acad5a459 100644 --- a/nexus/reconfigurator/planning/tests/output/blueprint_builder_initial_diff.txt +++ b/nexus/reconfigurator/planning/tests/output/blueprint_builder_initial_diff.txt @@ -24,16 +24,17 @@ to: blueprint e4aeb3b3-272f-4967-be34-2d34daa46aa1 ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ - crucible 44afce85-3377-4b20-a398-517c1579df4d in service fd00:1122:3344:103::23 - crucible 4644ea0c-0ec3-41be-a356-660308e1c3fc in service fd00:1122:3344:103::2c - crucible 55f4d117-0b9d-4256-a2c0-f46d3ed5fff9 in service fd00:1122:3344:103::25 - crucible 5c6a4628-8831-483b-995f-79b9126c4d04 in service fd00:1122:3344:103::28 - crucible 6a01210c-45ed-41a5-9230-8e05ecf5dd8f in service fd00:1122:3344:103::29 - crucible 7004cab9-dfc0-43ba-92d3-58d4ced66025 in service fd00:1122:3344:103::24 - crucible 79552859-fbd3-43bb-a9d3-6baba25558f8 in service fd00:1122:3344:103::26 - crucible 90696819-9b53-485a-9c65-ca63602e843e in service fd00:1122:3344:103::27 - crucible c99525b3-3680-4df6-9214-2ee3e1020e8b in service fd00:1122:3344:103::2a - crucible f42959d3-9eef-4e3b-b404-6177ce3ec7a1 in service fd00:1122:3344:103::2b + crucible 38b047ea-e3de-4859-b8e0-70cac5871446 in service fd00:1122:3344:103::2c + crucible 4644ea0c-0ec3-41be-a356-660308e1c3fc in service fd00:1122:3344:103::2b + crucible 55f4d117-0b9d-4256-a2c0-f46d3ed5fff9 in service fd00:1122:3344:103::24 + crucible 5c6a4628-8831-483b-995f-79b9126c4d04 in service fd00:1122:3344:103::27 + crucible 6a01210c-45ed-41a5-9230-8e05ecf5dd8f in service fd00:1122:3344:103::28 + crucible 7004cab9-dfc0-43ba-92d3-58d4ced66025 in service fd00:1122:3344:103::23 + crucible 79552859-fbd3-43bb-a9d3-6baba25558f8 in service fd00:1122:3344:103::25 + crucible 90696819-9b53-485a-9c65-ca63602e843e in service fd00:1122:3344:103::26 + crucible c99525b3-3680-4df6-9214-2ee3e1020e8b in service fd00:1122:3344:103::29 + crucible f42959d3-9eef-4e3b-b404-6177ce3ec7a1 in service fd00:1122:3344:103::2a + internal_dns 44afce85-3377-4b20-a398-517c1579df4d in service fd00:1122:3344:1::1 internal_ntp c81c9d4a-36d7-4796-9151-f564d3735152 in service fd00:1122:3344:103::21 nexus b2573120-9c91-4ed7-8b4f-a7bfe8dbc807 in service fd00:1122:3344:103::22 @@ -60,18 +61,19 @@ to: blueprint e4aeb3b3-272f-4967-be34-2d34daa46aa1 ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ - crucible 0faa9350-2c02-47c7-a0a6-9f4afd69152c in service fd00:1122:3344:101::2c - crucible 5b44003e-1a3d-4152-b606-872c72efce0e in service fd00:1122:3344:101::25 - crucible 943fea7a-9458-4935-9dc7-01ee5cfe5a02 in service fd00:1122:3344:101::29 - crucible 95c3b6d1-2592-4252-b5c1-5d0faf3ce9c9 in service fd00:1122:3344:101::24 - crucible a5a0b7a9-37c9-4dbd-8393-ec7748ada3b0 in service fd00:1122:3344:101::2b - crucible a9a6a974-8953-4783-b815-da46884f2c02 in service fd00:1122:3344:101::23 - crucible aa25add8-60b0-4ace-ac60-15adcdd32d50 in service fd00:1122:3344:101::2a - crucible b6f2dd1e-7f98-4a68-9df2-b33c69d1f7ea in service fd00:1122:3344:101::27 - crucible dc22d470-dc46-436b-9750-25c8d7d369e2 in service fd00:1122:3344:101::26 - crucible f7e434f9-6d4a-476b-a9e2-48d6ee28a08e in service fd00:1122:3344:101::28 - internal_ntp 38b047ea-e3de-4859-b8e0-70cac5871446 in service fd00:1122:3344:101::21 - nexus fb36b9dc-273a-4bc3-aaa9-19ee4d0ef552 in service fd00:1122:3344:101::22 + crucible 0faa9350-2c02-47c7-a0a6-9f4afd69152c in service fd00:1122:3344:101::2a + crucible 29278a22-1ba1-4117-bfdb-39fcb9ae7fd1 in service fd00:1122:3344:101::2c + crucible 5b44003e-1a3d-4152-b606-872c72efce0e in service fd00:1122:3344:101::23 + crucible 943fea7a-9458-4935-9dc7-01ee5cfe5a02 in service fd00:1122:3344:101::27 + crucible a5a0b7a9-37c9-4dbd-8393-ec7748ada3b0 in service fd00:1122:3344:101::29 + crucible aa25add8-60b0-4ace-ac60-15adcdd32d50 in service fd00:1122:3344:101::28 + crucible aac3ab51-9e2b-4605-9bf6-e3eb3681c2b5 in service fd00:1122:3344:101::2b + crucible b6f2dd1e-7f98-4a68-9df2-b33c69d1f7ea in service fd00:1122:3344:101::25 + crucible dc22d470-dc46-436b-9750-25c8d7d369e2 in service fd00:1122:3344:101::24 + crucible f7e434f9-6d4a-476b-a9e2-48d6ee28a08e in service fd00:1122:3344:101::26 + internal_dns 95c3b6d1-2592-4252-b5c1-5d0faf3ce9c9 in service fd00:1122:3344:2::1 + internal_ntp fb36b9dc-273a-4bc3-aaa9-19ee4d0ef552 in service fd00:1122:3344:101::21 + nexus a9a6a974-8953-4783-b815-da46884f2c02 in service fd00:1122:3344:101::22 sled be7f4375-2a6b-457f-b1a4-3074a715e5fe: @@ -96,18 +98,19 @@ to: blueprint e4aeb3b3-272f-4967-be34-2d34daa46aa1 ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ - crucible 248db330-56e6-4c7e-b5ff-9cd6cbcb210a in service fd00:1122:3344:102::2c - crucible 353b0aff-4c71-4fae-a6bd-adcb1d2a1a1d in service fd00:1122:3344:102::29 - crucible 4330134c-41b9-4097-aa0b-3eaefa06d473 in service fd00:1122:3344:102::24 - crucible 65d03287-e43f-45f4-902e-0a5e4638f31a in service fd00:1122:3344:102::25 - crucible 6a5901b1-f9d7-425c-8ecb-a786c900f217 in service fd00:1122:3344:102::27 - crucible 9b722fea-a186-4bc3-bc37-ce7f6de6a796 in service fd00:1122:3344:102::23 - crucible b3583b5f-4a62-4471-9be7-41e61578de4c in service fd00:1122:3344:102::2a - crucible bac92034-b9e6-4e8b-9ffb-dbba9caec88d in service fd00:1122:3344:102::28 - crucible d9653001-f671-4905-a410-6a7abc358318 in service fd00:1122:3344:102::2b - crucible edaca77e-5806-446a-b00c-125962cd551d in service fd00:1122:3344:102::26 - internal_ntp aac3ab51-9e2b-4605-9bf6-e3eb3681c2b5 in service fd00:1122:3344:102::21 - nexus 29278a22-1ba1-4117-bfdb-39fcb9ae7fd1 in service fd00:1122:3344:102::22 + crucible 248db330-56e6-4c7e-b5ff-9cd6cbcb210a in service fd00:1122:3344:102::29 + crucible 353b0aff-4c71-4fae-a6bd-adcb1d2a1a1d in service fd00:1122:3344:102::26 + crucible 6a5901b1-f9d7-425c-8ecb-a786c900f217 in service fd00:1122:3344:102::24 + crucible b3583b5f-4a62-4471-9be7-41e61578de4c in service fd00:1122:3344:102::27 + crucible b97bdef5-ed14-4e11-9d3b-3379c18ea694 in service fd00:1122:3344:102::2c + crucible bac92034-b9e6-4e8b-9ffb-dbba9caec88d in service fd00:1122:3344:102::25 + crucible c240ec8c-cec5-4117-944d-faeb5672d568 in service fd00:1122:3344:102::2b + crucible cf766535-9b6f-4263-a83a-86f45f7b005b in service fd00:1122:3344:102::2a + crucible d9653001-f671-4905-a410-6a7abc358318 in service fd00:1122:3344:102::28 + crucible edaca77e-5806-446a-b00c-125962cd551d in service fd00:1122:3344:102::23 + internal_dns 65d03287-e43f-45f4-902e-0a5e4638f31a in service fd00:1122:3344:3::1 + internal_ntp 9b722fea-a186-4bc3-bc37-ce7f6de6a796 in service fd00:1122:3344:102::21 + nexus 4330134c-41b9-4097-aa0b-3eaefa06d473 in service fd00:1122:3344:102::22 COCKROACHDB SETTINGS: diff --git a/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_2_3.txt b/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_2_3.txt index 3b14db49c70..4ceb76ba39d 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_2_3.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_2_3.txt @@ -25,16 +25,17 @@ to: blueprint 4171ad05-89dd-474b-846b-b007e4346366 ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ - crucible 322ee9f1-8903-4542-a0a8-a54cefabdeca in service fd00:1122:3344:103::24 - crucible 4ab1650f-32c5-447f-939d-64b8103a7645 in service fd00:1122:3344:103::2a - crucible 64aa65f8-1ccb-4cd6-9953-027aebdac8ff in service fd00:1122:3344:103::27 - crucible 6e811d86-8aa7-4660-935b-84b4b7721b10 in service fd00:1122:3344:103::2b - crucible 747d2426-68bf-4c22-8806-41d290b5d5f5 in service fd00:1122:3344:103::25 - crucible 7fbd2c38-5dc3-48c4-b061-558a2041d70f in service fd00:1122:3344:103::2c - crucible 8e9e923e-62b1-4cbc-9f59-d6397e338b6b in service fd00:1122:3344:103::29 - crucible b14d5478-1a0e-4b90-b526-36b06339dfc4 in service fd00:1122:3344:103::28 - crucible b40f7c7b-526c-46c8-ae33-67280c280eb7 in service fd00:1122:3344:103::23 - crucible be97b92b-38d6-422a-8c76-d37060f75bd2 in service fd00:1122:3344:103::26 + crucible 08c7f8aa-1ea9-469b-8cac-2fdbfc11ebcb in service fd00:1122:3344:103::2c + crucible 322ee9f1-8903-4542-a0a8-a54cefabdeca in service fd00:1122:3344:103::23 + crucible 4ab1650f-32c5-447f-939d-64b8103a7645 in service fd00:1122:3344:103::29 + crucible 64aa65f8-1ccb-4cd6-9953-027aebdac8ff in service fd00:1122:3344:103::26 + crucible 6e811d86-8aa7-4660-935b-84b4b7721b10 in service fd00:1122:3344:103::2a + crucible 747d2426-68bf-4c22-8806-41d290b5d5f5 in service fd00:1122:3344:103::24 + crucible 7fbd2c38-5dc3-48c4-b061-558a2041d70f in service fd00:1122:3344:103::2b + crucible 8e9e923e-62b1-4cbc-9f59-d6397e338b6b in service fd00:1122:3344:103::28 + crucible b14d5478-1a0e-4b90-b526-36b06339dfc4 in service fd00:1122:3344:103::27 + crucible be97b92b-38d6-422a-8c76-d37060f75bd2 in service fd00:1122:3344:103::25 + internal_dns b40f7c7b-526c-46c8-ae33-67280c280eb7 in service fd00:1122:3344:1::1 internal_ntp 267ed614-92af-4b9d-bdba-c2881c2e43a2 in service fd00:1122:3344:103::21 nexus cc816cfe-3869-4dde-b596-397d41198628 in service fd00:1122:3344:103::22 @@ -61,18 +62,19 @@ to: blueprint 4171ad05-89dd-474b-846b-b007e4346366 ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ - crucible 02acbe6a-1c88-47e3-94c3-94084cbde098 in service fd00:1122:3344:101::27 - crucible 07c3c805-8888-4fe5-9543-3d2479dbe6f3 in service fd00:1122:3344:101::26 - crucible 10d98a73-ec88-4aff-a7e8-7db6a87880e6 in service fd00:1122:3344:101::24 - crucible 2a455c35-eb3c-4c73-ab6c-d0a706e25316 in service fd00:1122:3344:101::29 - crucible 3eda924f-22a9-4f3e-9a1b-91d1c47601ab in service fd00:1122:3344:101::23 - crucible 587be699-a320-4c79-b320-128d9ecddc0b in service fd00:1122:3344:101::2b - crucible 6fa06115-4959-4913-8e7b-dd70d7651f07 in service fd00:1122:3344:101::2c - crucible 8f3a1cc5-9195-4a30-ad02-b804278fe639 in service fd00:1122:3344:101::28 - crucible a1696cd4-588c-484a-b95b-66e824c0ce05 in service fd00:1122:3344:101::25 - crucible a2079cbc-a69e-41a1-b1e0-fbcb972d03f6 in service fd00:1122:3344:101::2a - internal_ntp 08c7f8aa-1ea9-469b-8cac-2fdbfc11ebcb in service fd00:1122:3344:101::21 - nexus c66ab6d5-ff7a-46d1-9fd0-70cefa352d25 in service fd00:1122:3344:101::22 + crucible 02acbe6a-1c88-47e3-94c3-94084cbde098 in service fd00:1122:3344:101::25 + crucible 07c3c805-8888-4fe5-9543-3d2479dbe6f3 in service fd00:1122:3344:101::24 + crucible 2a455c35-eb3c-4c73-ab6c-d0a706e25316 in service fd00:1122:3344:101::27 + crucible 47199d48-534c-4267-a654-d2d90e64b498 in service fd00:1122:3344:101::2b + crucible 587be699-a320-4c79-b320-128d9ecddc0b in service fd00:1122:3344:101::29 + crucible 6fa06115-4959-4913-8e7b-dd70d7651f07 in service fd00:1122:3344:101::2a + crucible 704e1fed-f8d6-4cfa-a470-bad27fdc06d1 in service fd00:1122:3344:101::2c + crucible 8f3a1cc5-9195-4a30-ad02-b804278fe639 in service fd00:1122:3344:101::26 + crucible a1696cd4-588c-484a-b95b-66e824c0ce05 in service fd00:1122:3344:101::23 + crucible a2079cbc-a69e-41a1-b1e0-fbcb972d03f6 in service fd00:1122:3344:101::28 + internal_dns 10d98a73-ec88-4aff-a7e8-7db6a87880e6 in service fd00:1122:3344:2::1 + internal_ntp c66ab6d5-ff7a-46d1-9fd0-70cefa352d25 in service fd00:1122:3344:101::21 + nexus 3eda924f-22a9-4f3e-9a1b-91d1c47601ab in service fd00:1122:3344:101::22 sled 590e3034-d946-4166-b0e5-2d0034197a07: @@ -97,18 +99,19 @@ to: blueprint 4171ad05-89dd-474b-846b-b007e4346366 ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ - crucible 18f8fe40-646e-4962-b17a-20e201f3a6e5 in service fd00:1122:3344:102::2a - crucible 56d5d7cf-db2c-40a3-a775-003241ad4820 in service fd00:1122:3344:102::29 - crucible 6af7f4d6-33b6-4eb3-a146-d8e9e4ae9d66 in service fd00:1122:3344:102::2b - crucible 7a9f60d3-2b66-4547-9b63-7d4f7a8b6382 in service fd00:1122:3344:102::26 - crucible 93f2f40c-5616-4d8d-8519-ec6debdcede0 in service fd00:1122:3344:102::2c - crucible ab7ba6df-d401-40bd-940e-faf57c57aa2a in service fd00:1122:3344:102::28 - crucible af322036-371f-437c-8c08-7f40f3f1403b in service fd00:1122:3344:102::23 - crucible d637264f-6f40-44c2-8b7e-a179430210d2 in service fd00:1122:3344:102::25 - crucible dce226c9-7373-4bfa-8a94-79dc472857a6 in service fd00:1122:3344:102::27 - crucible edabedf3-839c-488d-ad6f-508ffa864674 in service fd00:1122:3344:102::24 - internal_ntp 47199d48-534c-4267-a654-d2d90e64b498 in service fd00:1122:3344:102::21 - nexus 704e1fed-f8d6-4cfa-a470-bad27fdc06d1 in service fd00:1122:3344:102::22 + crucible 0565e7e4-f13a-4123-8928-d715f83e36aa in service fd00:1122:3344:102::2b + crucible 18f8fe40-646e-4962-b17a-20e201f3a6e5 in service fd00:1122:3344:102::27 + crucible 1cc3f503-2001-4d85-80e5-c7c40d2e3b10 in service fd00:1122:3344:102::2a + crucible 56d5d7cf-db2c-40a3-a775-003241ad4820 in service fd00:1122:3344:102::26 + crucible 62058f4c-c747-4e21-a8dc-2fd4a160c98c in service fd00:1122:3344:102::2c + crucible 6af7f4d6-33b6-4eb3-a146-d8e9e4ae9d66 in service fd00:1122:3344:102::28 + crucible 7a9f60d3-2b66-4547-9b63-7d4f7a8b6382 in service fd00:1122:3344:102::23 + crucible 93f2f40c-5616-4d8d-8519-ec6debdcede0 in service fd00:1122:3344:102::29 + crucible ab7ba6df-d401-40bd-940e-faf57c57aa2a in service fd00:1122:3344:102::25 + crucible dce226c9-7373-4bfa-8a94-79dc472857a6 in service fd00:1122:3344:102::24 + internal_dns d637264f-6f40-44c2-8b7e-a179430210d2 in service fd00:1122:3344:3::1 + internal_ntp af322036-371f-437c-8c08-7f40f3f1403b in service fd00:1122:3344:102::21 + nexus edabedf3-839c-488d-ad6f-508ffa864674 in service fd00:1122:3344:102::22 ADDED SLEDS: diff --git a/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_3_5.txt b/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_3_5.txt index b252a21d7d1..dde82c189fa 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_3_5.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_3_5.txt @@ -25,16 +25,17 @@ to: blueprint f432fcd5-1284-4058-8b4a-9286a3de6163 ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ - crucible 322ee9f1-8903-4542-a0a8-a54cefabdeca in service fd00:1122:3344:103::24 - crucible 4ab1650f-32c5-447f-939d-64b8103a7645 in service fd00:1122:3344:103::2a - crucible 64aa65f8-1ccb-4cd6-9953-027aebdac8ff in service fd00:1122:3344:103::27 - crucible 6e811d86-8aa7-4660-935b-84b4b7721b10 in service fd00:1122:3344:103::2b - crucible 747d2426-68bf-4c22-8806-41d290b5d5f5 in service fd00:1122:3344:103::25 - crucible 7fbd2c38-5dc3-48c4-b061-558a2041d70f in service fd00:1122:3344:103::2c - crucible 8e9e923e-62b1-4cbc-9f59-d6397e338b6b in service fd00:1122:3344:103::29 - crucible b14d5478-1a0e-4b90-b526-36b06339dfc4 in service fd00:1122:3344:103::28 - crucible b40f7c7b-526c-46c8-ae33-67280c280eb7 in service fd00:1122:3344:103::23 - crucible be97b92b-38d6-422a-8c76-d37060f75bd2 in service fd00:1122:3344:103::26 + crucible 08c7f8aa-1ea9-469b-8cac-2fdbfc11ebcb in service fd00:1122:3344:103::2c + crucible 322ee9f1-8903-4542-a0a8-a54cefabdeca in service fd00:1122:3344:103::23 + crucible 4ab1650f-32c5-447f-939d-64b8103a7645 in service fd00:1122:3344:103::29 + crucible 64aa65f8-1ccb-4cd6-9953-027aebdac8ff in service fd00:1122:3344:103::26 + crucible 6e811d86-8aa7-4660-935b-84b4b7721b10 in service fd00:1122:3344:103::2a + crucible 747d2426-68bf-4c22-8806-41d290b5d5f5 in service fd00:1122:3344:103::24 + crucible 7fbd2c38-5dc3-48c4-b061-558a2041d70f in service fd00:1122:3344:103::2b + crucible 8e9e923e-62b1-4cbc-9f59-d6397e338b6b in service fd00:1122:3344:103::28 + crucible b14d5478-1a0e-4b90-b526-36b06339dfc4 in service fd00:1122:3344:103::27 + crucible be97b92b-38d6-422a-8c76-d37060f75bd2 in service fd00:1122:3344:103::25 + internal_dns b40f7c7b-526c-46c8-ae33-67280c280eb7 in service fd00:1122:3344:1::1 internal_ntp 267ed614-92af-4b9d-bdba-c2881c2e43a2 in service fd00:1122:3344:103::21 nexus cc816cfe-3869-4dde-b596-397d41198628 in service fd00:1122:3344:103::22 @@ -61,18 +62,19 @@ to: blueprint f432fcd5-1284-4058-8b4a-9286a3de6163 ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ - crucible 02acbe6a-1c88-47e3-94c3-94084cbde098 in service fd00:1122:3344:101::27 - crucible 07c3c805-8888-4fe5-9543-3d2479dbe6f3 in service fd00:1122:3344:101::26 - crucible 10d98a73-ec88-4aff-a7e8-7db6a87880e6 in service fd00:1122:3344:101::24 - crucible 2a455c35-eb3c-4c73-ab6c-d0a706e25316 in service fd00:1122:3344:101::29 - crucible 3eda924f-22a9-4f3e-9a1b-91d1c47601ab in service fd00:1122:3344:101::23 - crucible 587be699-a320-4c79-b320-128d9ecddc0b in service fd00:1122:3344:101::2b - crucible 6fa06115-4959-4913-8e7b-dd70d7651f07 in service fd00:1122:3344:101::2c - crucible 8f3a1cc5-9195-4a30-ad02-b804278fe639 in service fd00:1122:3344:101::28 - crucible a1696cd4-588c-484a-b95b-66e824c0ce05 in service fd00:1122:3344:101::25 - crucible a2079cbc-a69e-41a1-b1e0-fbcb972d03f6 in service fd00:1122:3344:101::2a - internal_ntp 08c7f8aa-1ea9-469b-8cac-2fdbfc11ebcb in service fd00:1122:3344:101::21 - nexus c66ab6d5-ff7a-46d1-9fd0-70cefa352d25 in service fd00:1122:3344:101::22 + crucible 02acbe6a-1c88-47e3-94c3-94084cbde098 in service fd00:1122:3344:101::25 + crucible 07c3c805-8888-4fe5-9543-3d2479dbe6f3 in service fd00:1122:3344:101::24 + crucible 2a455c35-eb3c-4c73-ab6c-d0a706e25316 in service fd00:1122:3344:101::27 + crucible 47199d48-534c-4267-a654-d2d90e64b498 in service fd00:1122:3344:101::2b + crucible 587be699-a320-4c79-b320-128d9ecddc0b in service fd00:1122:3344:101::29 + crucible 6fa06115-4959-4913-8e7b-dd70d7651f07 in service fd00:1122:3344:101::2a + crucible 704e1fed-f8d6-4cfa-a470-bad27fdc06d1 in service fd00:1122:3344:101::2c + crucible 8f3a1cc5-9195-4a30-ad02-b804278fe639 in service fd00:1122:3344:101::26 + crucible a1696cd4-588c-484a-b95b-66e824c0ce05 in service fd00:1122:3344:101::23 + crucible a2079cbc-a69e-41a1-b1e0-fbcb972d03f6 in service fd00:1122:3344:101::28 + internal_dns 10d98a73-ec88-4aff-a7e8-7db6a87880e6 in service fd00:1122:3344:2::1 + internal_ntp c66ab6d5-ff7a-46d1-9fd0-70cefa352d25 in service fd00:1122:3344:101::21 + nexus 3eda924f-22a9-4f3e-9a1b-91d1c47601ab in service fd00:1122:3344:101::22 sled 590e3034-d946-4166-b0e5-2d0034197a07: @@ -97,18 +99,19 @@ to: blueprint f432fcd5-1284-4058-8b4a-9286a3de6163 ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ - crucible 18f8fe40-646e-4962-b17a-20e201f3a6e5 in service fd00:1122:3344:102::2a - crucible 56d5d7cf-db2c-40a3-a775-003241ad4820 in service fd00:1122:3344:102::29 - crucible 6af7f4d6-33b6-4eb3-a146-d8e9e4ae9d66 in service fd00:1122:3344:102::2b - crucible 7a9f60d3-2b66-4547-9b63-7d4f7a8b6382 in service fd00:1122:3344:102::26 - crucible 93f2f40c-5616-4d8d-8519-ec6debdcede0 in service fd00:1122:3344:102::2c - crucible ab7ba6df-d401-40bd-940e-faf57c57aa2a in service fd00:1122:3344:102::28 - crucible af322036-371f-437c-8c08-7f40f3f1403b in service fd00:1122:3344:102::23 - crucible d637264f-6f40-44c2-8b7e-a179430210d2 in service fd00:1122:3344:102::25 - crucible dce226c9-7373-4bfa-8a94-79dc472857a6 in service fd00:1122:3344:102::27 - crucible edabedf3-839c-488d-ad6f-508ffa864674 in service fd00:1122:3344:102::24 - internal_ntp 47199d48-534c-4267-a654-d2d90e64b498 in service fd00:1122:3344:102::21 - nexus 704e1fed-f8d6-4cfa-a470-bad27fdc06d1 in service fd00:1122:3344:102::22 + crucible 0565e7e4-f13a-4123-8928-d715f83e36aa in service fd00:1122:3344:102::2b + crucible 18f8fe40-646e-4962-b17a-20e201f3a6e5 in service fd00:1122:3344:102::27 + crucible 1cc3f503-2001-4d85-80e5-c7c40d2e3b10 in service fd00:1122:3344:102::2a + crucible 56d5d7cf-db2c-40a3-a775-003241ad4820 in service fd00:1122:3344:102::26 + crucible 62058f4c-c747-4e21-a8dc-2fd4a160c98c in service fd00:1122:3344:102::2c + crucible 6af7f4d6-33b6-4eb3-a146-d8e9e4ae9d66 in service fd00:1122:3344:102::28 + crucible 7a9f60d3-2b66-4547-9b63-7d4f7a8b6382 in service fd00:1122:3344:102::23 + crucible 93f2f40c-5616-4d8d-8519-ec6debdcede0 in service fd00:1122:3344:102::29 + crucible ab7ba6df-d401-40bd-940e-faf57c57aa2a in service fd00:1122:3344:102::25 + crucible dce226c9-7373-4bfa-8a94-79dc472857a6 in service fd00:1122:3344:102::24 + internal_dns d637264f-6f40-44c2-8b7e-a179430210d2 in service fd00:1122:3344:3::1 + internal_ntp af322036-371f-437c-8c08-7f40f3f1403b in service fd00:1122:3344:102::21 + nexus edabedf3-839c-488d-ad6f-508ffa864674 in service fd00:1122:3344:102::22 MODIFIED SLEDS: diff --git a/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt b/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt index 556ca094e18..f114e342415 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt @@ -1,44 +1,6 @@ from: blueprint 516e80a3-b362-4fac-bd3c-4559717120dd to: blueprint 1ac2d88f-27dd-4506-8585-6b2be832528e - UNCHANGED SLEDS: - - sled d67ce8f0-a691-4010-b414-420d82e80527: - - physical disks at generation 1: - ---------------------------------------------------------------------- - vendor model serial - ---------------------------------------------------------------------- - fake-vendor fake-model serial-1e2ec79e-9c11-4133-ac77-e0b994a507d5 - fake-vendor fake-model serial-440ae69d-5e2e-4539-91d0-e2930bdd7203 - fake-vendor fake-model serial-4e91d4a3-bb6c-44bb-bd4e-bf8913c1ba2b - fake-vendor fake-model serial-67de3a80-29cb-4066-b743-e285a2ca1f4e - fake-vendor fake-model serial-9139b70f-c1d3-475d-8f02-7c9acba52b2b - fake-vendor fake-model serial-95fbb110-5272-4646-ab50-21b31b7cde23 - fake-vendor fake-model serial-9bf35cd7-4938-4c34-8189-288b3195cb64 - fake-vendor fake-model serial-9d833141-18a1-4f24-8a34-6076c026aa87 - fake-vendor fake-model serial-a279461f-a7b9-413f-a79f-cb4dab4c3fce - fake-vendor fake-model serial-ff7e002b-3ad8-4d45-b03a-c46ef0ac8e59 - - - omicron zones at generation 2: - ------------------------------------------------------------------------------------------ - zone type zone id disposition underlay IP - ------------------------------------------------------------------------------------------ - crucible 15dbaa30-1539-49d6-970d-ba5962960f33 in service fd00:1122:3344:101::27 - crucible 1ec4cc7b-2f00-4d13-8176-3b9815533ae9 in service fd00:1122:3344:101::24 - crucible 2e65b765-5c41-4519-bf4e-e2a68569afc1 in service fd00:1122:3344:101::23 - crucible 3d4143df-e212-4774-9258-7d9b421fac2e in service fd00:1122:3344:101::25 - crucible 5d9d8fa7-8379-470b-90ba-fe84a3c45512 in service fd00:1122:3344:101::2a - crucible 70232a6d-6c9d-4fa6-a34d-9c73d940db33 in service fd00:1122:3344:101::28 - crucible 8567a616-a709-4c8c-a323-4474675dad5c in service fd00:1122:3344:101::2c - crucible 8b0b8623-930a-41af-9f9b-ca28b1b11139 in service fd00:1122:3344:101::29 - crucible cf87d2a3-d323-44a3-a87e-adc4ef6c75f4 in service fd00:1122:3344:101::2b - crucible eac6c0a0-baa5-4490-9cee-65198b7fbd9c in service fd00:1122:3344:101::26 - internal_ntp ad76d200-5675-444b-b19c-684689ff421f in service fd00:1122:3344:101::21 - nexus e9bf2525-5fa0-4c1b-b52d-481225083845 in service fd00:1122:3344:101::22 - - MODIFIED SLEDS: sled a1b477db-b629-48eb-911d-1ccdafca75b9: @@ -63,25 +25,27 @@ to: blueprint 1ac2d88f-27dd-4506-8585-6b2be832528e ------------------------------------------------------------------------------------------- zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------- -* crucible 1e1ed0cc-1adc-410f-943a-d1a3107de619 - in service fd00:1122:3344:103::27 +* crucible 1e1ed0cc-1adc-410f-943a-d1a3107de619 - in service fd00:1122:3344:103::26 + └─ + expunged +* crucible 2307bbed-02ba-493b-89e3-46585c74c8fc - in service fd00:1122:3344:103::27 └─ + expunged -* crucible 2307bbed-02ba-493b-89e3-46585c74c8fc - in service fd00:1122:3344:103::28 +* crucible 603e629d-2599-400e-b879-4134d4cc426e - in service fd00:1122:3344:103::2b └─ + expunged -* crucible 4e36b7ef-5684-4304-b7c3-3c31aaf83d4f - in service fd00:1122:3344:103::23 +* crucible 9179d6dc-387d-424e-8d62-ed59b2c728f6 - in service fd00:1122:3344:103::29 └─ + expunged -* crucible 603e629d-2599-400e-b879-4134d4cc426e - in service fd00:1122:3344:103::2c +* crucible ad76d200-5675-444b-b19c-684689ff421f - in service fd00:1122:3344:103::2c └─ + expunged -* crucible 9179d6dc-387d-424e-8d62-ed59b2c728f6 - in service fd00:1122:3344:103::2a +* crucible c28d7b4b-a259-45ad-945d-f19ca3c6964c - in service fd00:1122:3344:103::28 └─ + expunged -* crucible c28d7b4b-a259-45ad-945d-f19ca3c6964c - in service fd00:1122:3344:103::29 +* crucible e29998e7-9ed2-46b6-bb70-4118159fe07f - in service fd00:1122:3344:103::25 └─ + expunged -* crucible e29998e7-9ed2-46b6-bb70-4118159fe07f - in service fd00:1122:3344:103::26 +* crucible f06e91a1-0c17-4cca-adbc-1c9b67bdb11d - in service fd00:1122:3344:103::2a └─ + expunged -* crucible f06e91a1-0c17-4cca-adbc-1c9b67bdb11d - in service fd00:1122:3344:103::2b +* crucible f11f5c60-1ac7-4630-9a3a-a9bc85c75203 - in service fd00:1122:3344:103::24 └─ + expunged -* crucible f11f5c60-1ac7-4630-9a3a-a9bc85c75203 - in service fd00:1122:3344:103::25 +* crucible f231e4eb-3fc9-4964-9d71-2c41644852d9 - in service fd00:1122:3344:103::23 └─ + expunged -* crucible f231e4eb-3fc9-4964-9d71-2c41644852d9 - in service fd00:1122:3344:103::24 +* internal_dns 4e36b7ef-5684-4304-b7c3-3c31aaf83d4f - in service fd00:1122:3344:1::1 └─ + expunged * internal_ntp c62b87b6-b98d-4d22-ba4f-cee4499e2ba8 - in service fd00:1122:3344:103::21 └─ + expunged @@ -89,6 +53,44 @@ to: blueprint 1ac2d88f-27dd-4506-8585-6b2be832528e └─ + expunged + sled d67ce8f0-a691-4010-b414-420d82e80527: + + physical disks at generation 1: + ---------------------------------------------------------------------- + vendor model serial + ---------------------------------------------------------------------- + fake-vendor fake-model serial-1e2ec79e-9c11-4133-ac77-e0b994a507d5 + fake-vendor fake-model serial-440ae69d-5e2e-4539-91d0-e2930bdd7203 + fake-vendor fake-model serial-4e91d4a3-bb6c-44bb-bd4e-bf8913c1ba2b + fake-vendor fake-model serial-67de3a80-29cb-4066-b743-e285a2ca1f4e + fake-vendor fake-model serial-9139b70f-c1d3-475d-8f02-7c9acba52b2b + fake-vendor fake-model serial-95fbb110-5272-4646-ab50-21b31b7cde23 + fake-vendor fake-model serial-9bf35cd7-4938-4c34-8189-288b3195cb64 + fake-vendor fake-model serial-9d833141-18a1-4f24-8a34-6076c026aa87 + fake-vendor fake-model serial-a279461f-a7b9-413f-a79f-cb4dab4c3fce + fake-vendor fake-model serial-ff7e002b-3ad8-4d45-b03a-c46ef0ac8e59 + + + omicron zones generation 2 -> 3: + ------------------------------------------------------------------------------------------ + zone type zone id disposition underlay IP + ------------------------------------------------------------------------------------------ + crucible 15dbaa30-1539-49d6-970d-ba5962960f33 in service fd00:1122:3344:101::25 + crucible 3d4143df-e212-4774-9258-7d9b421fac2e in service fd00:1122:3344:101::23 + crucible 5d9d8fa7-8379-470b-90ba-fe84a3c45512 in service fd00:1122:3344:101::28 + crucible 70232a6d-6c9d-4fa6-a34d-9c73d940db33 in service fd00:1122:3344:101::26 + crucible 8567a616-a709-4c8c-a323-4474675dad5c in service fd00:1122:3344:101::2a + crucible 8b0b8623-930a-41af-9f9b-ca28b1b11139 in service fd00:1122:3344:101::27 + crucible 99c6401d-9796-4ae1-bf0c-9a097cf21c33 in service fd00:1122:3344:101::2c + crucible cf87d2a3-d323-44a3-a87e-adc4ef6c75f4 in service fd00:1122:3344:101::29 + crucible eac6c0a0-baa5-4490-9cee-65198b7fbd9c in service fd00:1122:3344:101::24 + crucible f68846ad-4619-4747-8293-a2b4aeeafc5b in service fd00:1122:3344:101::2b + internal_dns 1ec4cc7b-2f00-4d13-8176-3b9815533ae9 in service fd00:1122:3344:2::1 + internal_ntp e9bf2525-5fa0-4c1b-b52d-481225083845 in service fd00:1122:3344:101::21 + nexus 2e65b765-5c41-4519-bf4e-e2a68569afc1 in service fd00:1122:3344:101::22 ++ nexus ff9ce09c-afbf-425b-bbfa-3d8fb254f98e in service fd00:1122:3344:101::2d + + sled fefcf4cf-f7e7-46b3-b629-058526ce440e: physical disks at generation 1: @@ -111,19 +113,20 @@ to: blueprint 1ac2d88f-27dd-4506-8585-6b2be832528e ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ - crucible 0e2b035e-1de1-48af-8ac0-5316418e3de1 in service fd00:1122:3344:102::2a - crucible 4f8ce495-21dd-48a1-859c-80d34ce394ed in service fd00:1122:3344:102::23 - crucible 5c78756d-6182-4c27-a507-3419e8dbe76b in service fd00:1122:3344:102::28 - crucible a1ae92ac-e1f1-4654-ab54-5b75ba7c44d6 in service fd00:1122:3344:102::24 - crucible a308d3e1-118c-440a-947a-8b6ab7d833ab in service fd00:1122:3344:102::25 - crucible b7402110-d88f-4ca4-8391-4a2fda6ad271 in service fd00:1122:3344:102::29 - crucible b7ae596e-0c85-40b2-bb47-df9f76db3cca in service fd00:1122:3344:102::2b - crucible c552280f-ba02-4f8d-9049-bd269e6b7845 in service fd00:1122:3344:102::26 - crucible cf13b878-47f1-4ba0-b8c2-9f3e15f2ee87 in service fd00:1122:3344:102::2c - crucible e6d0df1f-9f98-4c5a-9540-8444d1185c7d in service fd00:1122:3344:102::27 - internal_ntp f68846ad-4619-4747-8293-a2b4aeeafc5b in service fd00:1122:3344:102::21 - nexus 99c6401d-9796-4ae1-bf0c-9a097cf21c33 in service fd00:1122:3344:102::22 -+ nexus c8851a11-a4f7-4b21-9281-6182fd15dc8d in service fd00:1122:3344:102::2d + crucible 0e2b035e-1de1-48af-8ac0-5316418e3de1 in service fd00:1122:3344:102::27 + crucible 2bf9ee97-90e1-48a7-bb06-a35cec63b7fe in service fd00:1122:3344:102::2b + crucible 5c78756d-6182-4c27-a507-3419e8dbe76b in service fd00:1122:3344:102::25 + crucible b7402110-d88f-4ca4-8391-4a2fda6ad271 in service fd00:1122:3344:102::26 + crucible b7ae596e-0c85-40b2-bb47-df9f76db3cca in service fd00:1122:3344:102::28 + crucible c552280f-ba02-4f8d-9049-bd269e6b7845 in service fd00:1122:3344:102::23 + crucible cf13b878-47f1-4ba0-b8c2-9f3e15f2ee87 in service fd00:1122:3344:102::29 + crucible e3bfcb1e-3708-45e7-a45a-2a2cab7ad829 in service fd00:1122:3344:102::2c + crucible e6d0df1f-9f98-4c5a-9540-8444d1185c7d in service fd00:1122:3344:102::24 + crucible eb034526-1767-4cc4-8225-ec962265710b in service fd00:1122:3344:102::2a + internal_dns a308d3e1-118c-440a-947a-8b6ab7d833ab in service fd00:1122:3344:3::1 + internal_ntp 4f8ce495-21dd-48a1-859c-80d34ce394ed in service fd00:1122:3344:102::21 + nexus a1ae92ac-e1f1-4654-ab54-5b75ba7c44d6 in service fd00:1122:3344:102::22 ++ internal_dns c8851a11-a4f7-4b21-9281-6182fd15dc8d in service fd00:1122:3344:4::1 COCKROACHDB SETTINGS: diff --git a/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_bp2.txt b/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_bp2.txt index 6954d4e12b3..5e48bdd646a 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_bp2.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_bp2.txt @@ -19,22 +19,24 @@ parent: 516e80a3-b362-4fac-bd3c-4559717120dd fake-vendor fake-model serial-ff7e002b-3ad8-4d45-b03a-c46ef0ac8e59 - omicron zones at generation 2: + omicron zones at generation 3: ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ - crucible 15dbaa30-1539-49d6-970d-ba5962960f33 in service fd00:1122:3344:101::27 - crucible 1ec4cc7b-2f00-4d13-8176-3b9815533ae9 in service fd00:1122:3344:101::24 - crucible 2e65b765-5c41-4519-bf4e-e2a68569afc1 in service fd00:1122:3344:101::23 - crucible 3d4143df-e212-4774-9258-7d9b421fac2e in service fd00:1122:3344:101::25 - crucible 5d9d8fa7-8379-470b-90ba-fe84a3c45512 in service fd00:1122:3344:101::2a - crucible 70232a6d-6c9d-4fa6-a34d-9c73d940db33 in service fd00:1122:3344:101::28 - crucible 8567a616-a709-4c8c-a323-4474675dad5c in service fd00:1122:3344:101::2c - crucible 8b0b8623-930a-41af-9f9b-ca28b1b11139 in service fd00:1122:3344:101::29 - crucible cf87d2a3-d323-44a3-a87e-adc4ef6c75f4 in service fd00:1122:3344:101::2b - crucible eac6c0a0-baa5-4490-9cee-65198b7fbd9c in service fd00:1122:3344:101::26 - internal_ntp ad76d200-5675-444b-b19c-684689ff421f in service fd00:1122:3344:101::21 - nexus e9bf2525-5fa0-4c1b-b52d-481225083845 in service fd00:1122:3344:101::22 + crucible 15dbaa30-1539-49d6-970d-ba5962960f33 in service fd00:1122:3344:101::25 + crucible 3d4143df-e212-4774-9258-7d9b421fac2e in service fd00:1122:3344:101::23 + crucible 5d9d8fa7-8379-470b-90ba-fe84a3c45512 in service fd00:1122:3344:101::28 + crucible 70232a6d-6c9d-4fa6-a34d-9c73d940db33 in service fd00:1122:3344:101::26 + crucible 8567a616-a709-4c8c-a323-4474675dad5c in service fd00:1122:3344:101::2a + crucible 8b0b8623-930a-41af-9f9b-ca28b1b11139 in service fd00:1122:3344:101::27 + crucible 99c6401d-9796-4ae1-bf0c-9a097cf21c33 in service fd00:1122:3344:101::2c + crucible cf87d2a3-d323-44a3-a87e-adc4ef6c75f4 in service fd00:1122:3344:101::29 + crucible eac6c0a0-baa5-4490-9cee-65198b7fbd9c in service fd00:1122:3344:101::24 + crucible f68846ad-4619-4747-8293-a2b4aeeafc5b in service fd00:1122:3344:101::2b + internal_dns 1ec4cc7b-2f00-4d13-8176-3b9815533ae9 in service fd00:1122:3344:2::1 + internal_ntp e9bf2525-5fa0-4c1b-b52d-481225083845 in service fd00:1122:3344:101::21 + nexus 2e65b765-5c41-4519-bf4e-e2a68569afc1 in service fd00:1122:3344:101::22 + nexus ff9ce09c-afbf-425b-bbfa-3d8fb254f98e in service fd00:1122:3344:101::2d @@ -60,19 +62,20 @@ parent: 516e80a3-b362-4fac-bd3c-4559717120dd ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ - crucible 0e2b035e-1de1-48af-8ac0-5316418e3de1 in service fd00:1122:3344:102::2a - crucible 4f8ce495-21dd-48a1-859c-80d34ce394ed in service fd00:1122:3344:102::23 - crucible 5c78756d-6182-4c27-a507-3419e8dbe76b in service fd00:1122:3344:102::28 - crucible a1ae92ac-e1f1-4654-ab54-5b75ba7c44d6 in service fd00:1122:3344:102::24 - crucible a308d3e1-118c-440a-947a-8b6ab7d833ab in service fd00:1122:3344:102::25 - crucible b7402110-d88f-4ca4-8391-4a2fda6ad271 in service fd00:1122:3344:102::29 - crucible b7ae596e-0c85-40b2-bb47-df9f76db3cca in service fd00:1122:3344:102::2b - crucible c552280f-ba02-4f8d-9049-bd269e6b7845 in service fd00:1122:3344:102::26 - crucible cf13b878-47f1-4ba0-b8c2-9f3e15f2ee87 in service fd00:1122:3344:102::2c - crucible e6d0df1f-9f98-4c5a-9540-8444d1185c7d in service fd00:1122:3344:102::27 - internal_ntp f68846ad-4619-4747-8293-a2b4aeeafc5b in service fd00:1122:3344:102::21 - nexus 99c6401d-9796-4ae1-bf0c-9a097cf21c33 in service fd00:1122:3344:102::22 - nexus c8851a11-a4f7-4b21-9281-6182fd15dc8d in service fd00:1122:3344:102::2d + crucible 0e2b035e-1de1-48af-8ac0-5316418e3de1 in service fd00:1122:3344:102::27 + crucible 2bf9ee97-90e1-48a7-bb06-a35cec63b7fe in service fd00:1122:3344:102::2b + crucible 5c78756d-6182-4c27-a507-3419e8dbe76b in service fd00:1122:3344:102::25 + crucible b7402110-d88f-4ca4-8391-4a2fda6ad271 in service fd00:1122:3344:102::26 + crucible b7ae596e-0c85-40b2-bb47-df9f76db3cca in service fd00:1122:3344:102::28 + crucible c552280f-ba02-4f8d-9049-bd269e6b7845 in service fd00:1122:3344:102::23 + crucible cf13b878-47f1-4ba0-b8c2-9f3e15f2ee87 in service fd00:1122:3344:102::29 + crucible e3bfcb1e-3708-45e7-a45a-2a2cab7ad829 in service fd00:1122:3344:102::2c + crucible e6d0df1f-9f98-4c5a-9540-8444d1185c7d in service fd00:1122:3344:102::24 + crucible eb034526-1767-4cc4-8225-ec962265710b in service fd00:1122:3344:102::2a + internal_dns a308d3e1-118c-440a-947a-8b6ab7d833ab in service fd00:1122:3344:3::1 + internal_dns c8851a11-a4f7-4b21-9281-6182fd15dc8d in service fd00:1122:3344:4::1 + internal_ntp 4f8ce495-21dd-48a1-859c-80d34ce394ed in service fd00:1122:3344:102::21 + nexus a1ae92ac-e1f1-4654-ab54-5b75ba7c44d6 in service fd00:1122:3344:102::22 @@ -82,16 +85,17 @@ WARNING: Zones exist without physical disks! ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ - crucible 1e1ed0cc-1adc-410f-943a-d1a3107de619 expunged fd00:1122:3344:103::27 - crucible 2307bbed-02ba-493b-89e3-46585c74c8fc expunged fd00:1122:3344:103::28 - crucible 4e36b7ef-5684-4304-b7c3-3c31aaf83d4f expunged fd00:1122:3344:103::23 - crucible 603e629d-2599-400e-b879-4134d4cc426e expunged fd00:1122:3344:103::2c - crucible 9179d6dc-387d-424e-8d62-ed59b2c728f6 expunged fd00:1122:3344:103::2a - crucible c28d7b4b-a259-45ad-945d-f19ca3c6964c expunged fd00:1122:3344:103::29 - crucible e29998e7-9ed2-46b6-bb70-4118159fe07f expunged fd00:1122:3344:103::26 - crucible f06e91a1-0c17-4cca-adbc-1c9b67bdb11d expunged fd00:1122:3344:103::2b - crucible f11f5c60-1ac7-4630-9a3a-a9bc85c75203 expunged fd00:1122:3344:103::25 - crucible f231e4eb-3fc9-4964-9d71-2c41644852d9 expunged fd00:1122:3344:103::24 + crucible 1e1ed0cc-1adc-410f-943a-d1a3107de619 expunged fd00:1122:3344:103::26 + crucible 2307bbed-02ba-493b-89e3-46585c74c8fc expunged fd00:1122:3344:103::27 + crucible 603e629d-2599-400e-b879-4134d4cc426e expunged fd00:1122:3344:103::2b + crucible 9179d6dc-387d-424e-8d62-ed59b2c728f6 expunged fd00:1122:3344:103::29 + crucible ad76d200-5675-444b-b19c-684689ff421f expunged fd00:1122:3344:103::2c + crucible c28d7b4b-a259-45ad-945d-f19ca3c6964c expunged fd00:1122:3344:103::28 + crucible e29998e7-9ed2-46b6-bb70-4118159fe07f expunged fd00:1122:3344:103::25 + crucible f06e91a1-0c17-4cca-adbc-1c9b67bdb11d expunged fd00:1122:3344:103::2a + crucible f11f5c60-1ac7-4630-9a3a-a9bc85c75203 expunged fd00:1122:3344:103::24 + crucible f231e4eb-3fc9-4964-9d71-2c41644852d9 expunged fd00:1122:3344:103::23 + internal_dns 4e36b7ef-5684-4304-b7c3-3c31aaf83d4f expunged fd00:1122:3344:1::1 internal_ntp c62b87b6-b98d-4d22-ba4f-cee4499e2ba8 expunged fd00:1122:3344:103::21 nexus 6a70a233-1900-43c0-9c00-aa9d1f7adfbc expunged fd00:1122:3344:103::22 @@ -104,7 +108,7 @@ WARNING: Zones exist without physical disks! METADATA: created by::::::::::: test_blueprint2 created at::::::::::: 1970-01-01T00:00:00.000Z - comment:::::::::::::: sled a1b477db-b629-48eb-911d-1ccdafca75b9: expunged 12 zones because: sled policy is expunged + comment:::::::::::::: sled a1b477db-b629-48eb-911d-1ccdafca75b9: expunged 13 zones because: sled policy is expunged internal DNS version: 1 external DNS version: 1 diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt index d3f667170ce..2199ce79e7f 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt @@ -25,16 +25,17 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ - crucible 19fbc4f8-a683-4f22-8f5a-e74782b935be in service fd00:1122:3344:105::26 - crucible 4f1ce8a2-d3a5-4a38-be4c-9817de52db37 in service fd00:1122:3344:105::2c - crucible 6b53ab2e-d98c-485f-87a3-4d5df595390f in service fd00:1122:3344:105::27 - crucible 93b137a1-a1d6-4b5b-b2cb-21a9f11e2883 in service fd00:1122:3344:105::23 - crucible 9f0abbad-dbd3-4d43-9675-78092217ffd9 in service fd00:1122:3344:105::25 - crucible b0c63f48-01ea-4aae-bb26-fb0dd59d1662 in service fd00:1122:3344:105::28 - crucible c406da50-34b9-4bb4-a460-8f49875d2a6a in service fd00:1122:3344:105::24 - crucible d660d7ed-28c0-45ae-9ace-dc3ecf7e8786 in service fd00:1122:3344:105::2a - crucible e98cc0de-abf6-4da4-a20d-d05c7a9bb1d7 in service fd00:1122:3344:105::2b - crucible f55e6aaf-e8fc-4913-9e3c-8cd1bd4bdad3 in service fd00:1122:3344:105::29 + crucible 19fbc4f8-a683-4f22-8f5a-e74782b935be in service fd00:1122:3344:105::25 + crucible 4f1ce8a2-d3a5-4a38-be4c-9817de52db37 in service fd00:1122:3344:105::2b + crucible 67d913e0-0005-4599-9b28-0abbf6cc2916 in service fd00:1122:3344:105::2c + crucible 6b53ab2e-d98c-485f-87a3-4d5df595390f in service fd00:1122:3344:105::26 + crucible 9f0abbad-dbd3-4d43-9675-78092217ffd9 in service fd00:1122:3344:105::24 + crucible b0c63f48-01ea-4aae-bb26-fb0dd59d1662 in service fd00:1122:3344:105::27 + crucible c406da50-34b9-4bb4-a460-8f49875d2a6a in service fd00:1122:3344:105::23 + crucible d660d7ed-28c0-45ae-9ace-dc3ecf7e8786 in service fd00:1122:3344:105::29 + crucible e98cc0de-abf6-4da4-a20d-d05c7a9bb1d7 in service fd00:1122:3344:105::2a + crucible f55e6aaf-e8fc-4913-9e3c-8cd1bd4bdad3 in service fd00:1122:3344:105::28 + internal_dns 93b137a1-a1d6-4b5b-b2cb-21a9f11e2883 in service fd00:1122:3344:1::1 internal_ntp 7f4e9f9f-08f8-4d14-885d-e977c05525ad in service fd00:1122:3344:105::21 nexus 6dff7633-66bb-4924-a6ff-2c896e66964b in service fd00:1122:3344:105::22 @@ -63,29 +64,31 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 ------------------------------------------------------------------------------------------- zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------- -* crucible 094f27af-1acb-4d1e-ba97-1fc1377d4bf2 - in service fd00:1122:3344:103::2c +* crucible 01d58626-e1b0-480f-96be-ac784863c7dc - in service fd00:1122:3344:103::2c └─ + expunged -* crucible 0dcfdfc5-481e-4153-b97c-11cf02b648ea - in service fd00:1122:3344:103::25 +* crucible 094f27af-1acb-4d1e-ba97-1fc1377d4bf2 - in service fd00:1122:3344:103::2a └─ + expunged -* crucible 2f5e8010-a94d-43a4-9c5c-3f52832f5f7f - in service fd00:1122:3344:103::27 +* crucible 0dcfdfc5-481e-4153-b97c-11cf02b648ea - in service fd00:1122:3344:103::23 └─ + expunged -* crucible 4a9a0a9d-87f0-4f1d-9181-27f6b435e637 - in service fd00:1122:3344:103::28 +* crucible 2f5e8010-a94d-43a4-9c5c-3f52832f5f7f - in service fd00:1122:3344:103::25 └─ + expunged -* crucible 56ac1706-9e2a-49ba-bd6f-a99c44cb2ccb - in service fd00:1122:3344:103::24 +* crucible 4a9a0a9d-87f0-4f1d-9181-27f6b435e637 - in service fd00:1122:3344:103::26 └─ + expunged -* crucible 67622d61-2df4-414d-aa0e-d1277265f405 - in service fd00:1122:3344:103::23 +* crucible b91b271d-8d80-4f49-99a0-34006ae86063 - in service fd00:1122:3344:103::28 └─ + expunged -* crucible b91b271d-8d80-4f49-99a0-34006ae86063 - in service fd00:1122:3344:103::2a +* crucible d6ee1338-3127-43ec-9aaa-b973ccf05496 - in service fd00:1122:3344:103::24 └─ + expunged -* crucible d6ee1338-3127-43ec-9aaa-b973ccf05496 - in service fd00:1122:3344:103::26 +* crucible e39d7c9e-182b-48af-af87-58079d723583 - in service fd00:1122:3344:103::27 └─ + expunged -* crucible e39d7c9e-182b-48af-af87-58079d723583 - in service fd00:1122:3344:103::29 +* crucible f3f2e4f3-0985-4ef6-8336-ce479382d05d - in service fd00:1122:3344:103::2b └─ + expunged -* crucible f69f92a1-5007-4bb0-a85b-604dc217154b - in service fd00:1122:3344:103::2b +* crucible f69f92a1-5007-4bb0-a85b-604dc217154b - in service fd00:1122:3344:103::29 └─ + expunged -* internal_ntp 67d913e0-0005-4599-9b28-0abbf6cc2916 - in service fd00:1122:3344:103::21 +* internal_dns 56ac1706-9e2a-49ba-bd6f-a99c44cb2ccb - in service fd00:1122:3344:2::1 └─ + expunged -* nexus 2aa0ea4f-3561-4989-a98c-9ab7d9a240fb - in service fd00:1122:3344:103::22 +* internal_ntp 2aa0ea4f-3561-4989-a98c-9ab7d9a240fb - in service fd00:1122:3344:103::21 + └─ + expunged +* nexus 67622d61-2df4-414d-aa0e-d1277265f405 - in service fd00:1122:3344:103::22 └─ + expunged @@ -111,18 +114,19 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ - crucible 3b3c14b6-a8e2-4054-a577-8d96cb576230 expunged fd00:1122:3344:102::2c - crucible 47a87c6e-ef45-4d52-9a3e-69cdd96737cc expunged fd00:1122:3344:102::23 - crucible 6464d025-4652-4948-919e-740bec5699b1 expunged fd00:1122:3344:102::24 - crucible 6939ce48-b17c-4616-b176-8a419a7697be expunged fd00:1122:3344:102::29 - crucible 878dfddd-3113-4197-a3ea-e0d4dbe9b476 expunged fd00:1122:3344:102::25 - crucible 8d4d2b28-82bb-4e36-80da-1408d8c35d82 expunged fd00:1122:3344:102::2b - crucible 9fd52961-426f-4e62-a644-b70871103fca expunged fd00:1122:3344:102::26 - crucible b44cdbc0-0ce0-46eb-8b21-a09e113aa1d0 expunged fd00:1122:3344:102::27 - crucible b6b759d0-f60d-42b7-bbbc-9d61c9e895a9 expunged fd00:1122:3344:102::28 - crucible c407795c-6c8b-428e-8ab8-b962913c447f expunged fd00:1122:3344:102::2a - internal_ntp f3f2e4f3-0985-4ef6-8336-ce479382d05d expunged fd00:1122:3344:102::21 - nexus 01d58626-e1b0-480f-96be-ac784863c7dc expunged fd00:1122:3344:102::22 + crucible 3b3c14b6-a8e2-4054-a577-8d96cb576230 expunged fd00:1122:3344:102::29 + crucible 57b96d5c-b71e-43e4-8869-7d514003d00d expunged fd00:1122:3344:102::2a + crucible 6939ce48-b17c-4616-b176-8a419a7697be expunged fd00:1122:3344:102::26 + crucible 8d4d2b28-82bb-4e36-80da-1408d8c35d82 expunged fd00:1122:3344:102::28 + crucible 9fd52961-426f-4e62-a644-b70871103fca expunged fd00:1122:3344:102::23 + crucible b44cdbc0-0ce0-46eb-8b21-a09e113aa1d0 expunged fd00:1122:3344:102::24 + crucible b4947d31-f70e-4ee0-8817-0ca6cea9b16b expunged fd00:1122:3344:102::2b + crucible b6b759d0-f60d-42b7-bbbc-9d61c9e895a9 expunged fd00:1122:3344:102::25 + crucible c407795c-6c8b-428e-8ab8-b962913c447f expunged fd00:1122:3344:102::27 + crucible e4b3e159-3dbe-48cb-8497-e3da92a90e5a expunged fd00:1122:3344:102::2c + internal_dns 878dfddd-3113-4197-a3ea-e0d4dbe9b476 expunged fd00:1122:3344:3::1 + internal_ntp 47a87c6e-ef45-4d52-9a3e-69cdd96737cc expunged fd00:1122:3344:102::21 + nexus 6464d025-4652-4948-919e-740bec5699b1 expunged fd00:1122:3344:102::22 sled 75bc286f-2b4b-482c-9431-59272af529da: @@ -147,18 +151,19 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ - crucible 15bb9def-69b8-4d2e-b04f-9fee1143387c in service fd00:1122:3344:104::25 - crucible 23a8fa2b-ef3e-4017-a43f-f7a83953bd7c in service fd00:1122:3344:104::2c - crucible 621509d6-3772-4009-aca1-35eefd1098fb in service fd00:1122:3344:104::28 - crucible 85b8c68a-160d-461d-94dd-1baf175fa75c in service fd00:1122:3344:104::2a - crucible 996d7570-b0df-46d5-aaa4-0c97697cf484 in service fd00:1122:3344:104::26 - crucible a732c489-d29a-4f75-b900-5966385943af in service fd00:1122:3344:104::29 - crucible b1783e95-9598-451d-b6ba-c50b52b428c3 in service fd00:1122:3344:104::24 - crucible c6dd531e-2d1d-423b-acc8-358533dab78c in service fd00:1122:3344:104::27 - crucible e4b3e159-3dbe-48cb-8497-e3da92a90e5a in service fd00:1122:3344:104::23 - crucible f0ff59e8-4105-4980-a4bb-a1f4c58de1e3 in service fd00:1122:3344:104::2b - internal_ntp 57b96d5c-b71e-43e4-8869-7d514003d00d in service fd00:1122:3344:104::21 - nexus b4947d31-f70e-4ee0-8817-0ca6cea9b16b in service fd00:1122:3344:104::22 + crucible 15c103f0-ac63-423b-ba5d-1b5fcd563ba3 in service fd00:1122:3344:104::2a + crucible 23a8fa2b-ef3e-4017-a43f-f7a83953bd7c in service fd00:1122:3344:104::28 + crucible 3aa07966-5899-4789-ace5-f8eeb375c6c3 in service fd00:1122:3344:104::2c + crucible 621509d6-3772-4009-aca1-35eefd1098fb in service fd00:1122:3344:104::24 + crucible 85b8c68a-160d-461d-94dd-1baf175fa75c in service fd00:1122:3344:104::26 + crucible 95482c25-1e7f-43e8-adf1-e3548a1b3ae0 in service fd00:1122:3344:104::2b + crucible a732c489-d29a-4f75-b900-5966385943af in service fd00:1122:3344:104::25 + crucible c6dd531e-2d1d-423b-acc8-358533dab78c in service fd00:1122:3344:104::23 + crucible f0ff59e8-4105-4980-a4bb-a1f4c58de1e3 in service fd00:1122:3344:104::27 + crucible f1a7b9a7-fc6a-4b23-b829-045ff33117ff in service fd00:1122:3344:104::29 + internal_dns 996d7570-b0df-46d5-aaa4-0c97697cf484 in service fd00:1122:3344:4::1 + internal_ntp b1783e95-9598-451d-b6ba-c50b52b428c3 in service fd00:1122:3344:104::21 + nexus 15bb9def-69b8-4d2e-b04f-9fee1143387c in service fd00:1122:3344:104::22 + nexus 2ec75441-3d7d-4b4b-9614-af03de5a3666 in service fd00:1122:3344:104::2d + nexus 508abd03-cbfe-4654-9a6d-7f15a1ad32e5 in service fd00:1122:3344:104::2e + nexus 59950bc8-1497-44dd-8cbf-b6502ba921b2 in service fd00:1122:3344:104::2f @@ -186,18 +191,19 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ - crucible 0dfbf374-9ef9-430f-b06d-f271bf7f84c4 in service fd00:1122:3344:101::27 - crucible 3aa07966-5899-4789-ace5-f8eeb375c6c3 in service fd00:1122:3344:101::24 - crucible 4ad0e9da-08f8-4d40-b4d3-d17e711b5bbf in service fd00:1122:3344:101::29 - crucible 72c5a909-077d-4ec1-a9d5-ae64ef9d716e in service fd00:1122:3344:101::26 - crucible 95482c25-1e7f-43e8-adf1-e3548a1b3ae0 in service fd00:1122:3344:101::23 - crucible a1c03689-fc62-4ea5-bb72-4d01f5138614 in service fd00:1122:3344:101::2a - crucible a568e92e-4fbd-4b69-acd8-f16277073031 in service fd00:1122:3344:101::2c - crucible bf79a56a-97af-4cc4-94a5-8b20d64c2cda in service fd00:1122:3344:101::28 - crucible c60379ba-4e30-4628-a79a-0ae509aef4c5 in service fd00:1122:3344:101::25 - crucible d47f4996-fac0-4657-bcea-01b1fee6404d in service fd00:1122:3344:101::2b - internal_ntp f1a7b9a7-fc6a-4b23-b829-045ff33117ff in service fd00:1122:3344:101::21 - nexus 15c103f0-ac63-423b-ba5d-1b5fcd563ba3 in service fd00:1122:3344:101::22 + crucible 414830dc-c8c1-4748-9e9e-bc3a6435a93c in service fd00:1122:3344:101::2b + crucible 4ad0e9da-08f8-4d40-b4d3-d17e711b5bbf in service fd00:1122:3344:101::24 + crucible 772cbcbd-58be-4158-be85-be744871fa22 in service fd00:1122:3344:101::28 + crucible a1c03689-fc62-4ea5-bb72-4d01f5138614 in service fd00:1122:3344:101::25 + crucible a568e92e-4fbd-4b69-acd8-f16277073031 in service fd00:1122:3344:101::27 + crucible be75764a-491b-4aec-992e-1c39e25de975 in service fd00:1122:3344:101::29 + crucible be920398-024a-4655-8c49-69b5ac48dfff in service fd00:1122:3344:101::2c + crucible bf79a56a-97af-4cc4-94a5-8b20d64c2cda in service fd00:1122:3344:101::23 + crucible d47f4996-fac0-4657-bcea-01b1fee6404d in service fd00:1122:3344:101::26 + crucible e001fea0-6594-4ece-97e3-6198c293e931 in service fd00:1122:3344:101::2a + internal_dns 0dfbf374-9ef9-430f-b06d-f271bf7f84c4 in service fd00:1122:3344:5::1 + internal_ntp c60379ba-4e30-4628-a79a-0ae509aef4c5 in service fd00:1122:3344:101::21 + nexus 72c5a909-077d-4ec1-a9d5-ae64ef9d716e in service fd00:1122:3344:101::22 + nexus 3ca5292f-8a59-4475-bb72-0f43714d0fff in service fd00:1122:3344:101::2e + nexus 99f6d544-8599-4e2b-a55a-82d9e0034662 in service fd00:1122:3344:101::2d + nexus c26b3bda-5561-44a1-a69f-22103fe209a1 in service fd00:1122:3344:101::2f diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt index 837cc565531..7ff2d585e1e 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt @@ -25,21 +25,22 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ - crucible 15bb9def-69b8-4d2e-b04f-9fee1143387c in service fd00:1122:3344:104::25 - crucible 23a8fa2b-ef3e-4017-a43f-f7a83953bd7c in service fd00:1122:3344:104::2c - crucible 621509d6-3772-4009-aca1-35eefd1098fb in service fd00:1122:3344:104::28 - crucible 85b8c68a-160d-461d-94dd-1baf175fa75c in service fd00:1122:3344:104::2a - crucible 996d7570-b0df-46d5-aaa4-0c97697cf484 in service fd00:1122:3344:104::26 - crucible a732c489-d29a-4f75-b900-5966385943af in service fd00:1122:3344:104::29 - crucible b1783e95-9598-451d-b6ba-c50b52b428c3 in service fd00:1122:3344:104::24 - crucible c6dd531e-2d1d-423b-acc8-358533dab78c in service fd00:1122:3344:104::27 - crucible e4b3e159-3dbe-48cb-8497-e3da92a90e5a in service fd00:1122:3344:104::23 - crucible f0ff59e8-4105-4980-a4bb-a1f4c58de1e3 in service fd00:1122:3344:104::2b - internal_ntp 57b96d5c-b71e-43e4-8869-7d514003d00d in service fd00:1122:3344:104::21 + crucible 15c103f0-ac63-423b-ba5d-1b5fcd563ba3 in service fd00:1122:3344:104::2a + crucible 23a8fa2b-ef3e-4017-a43f-f7a83953bd7c in service fd00:1122:3344:104::28 + crucible 3aa07966-5899-4789-ace5-f8eeb375c6c3 in service fd00:1122:3344:104::2c + crucible 621509d6-3772-4009-aca1-35eefd1098fb in service fd00:1122:3344:104::24 + crucible 85b8c68a-160d-461d-94dd-1baf175fa75c in service fd00:1122:3344:104::26 + crucible 95482c25-1e7f-43e8-adf1-e3548a1b3ae0 in service fd00:1122:3344:104::2b + crucible a732c489-d29a-4f75-b900-5966385943af in service fd00:1122:3344:104::25 + crucible c6dd531e-2d1d-423b-acc8-358533dab78c in service fd00:1122:3344:104::23 + crucible f0ff59e8-4105-4980-a4bb-a1f4c58de1e3 in service fd00:1122:3344:104::27 + crucible f1a7b9a7-fc6a-4b23-b829-045ff33117ff in service fd00:1122:3344:104::29 + internal_dns 996d7570-b0df-46d5-aaa4-0c97697cf484 in service fd00:1122:3344:4::1 + internal_ntp b1783e95-9598-451d-b6ba-c50b52b428c3 in service fd00:1122:3344:104::21 + nexus 15bb9def-69b8-4d2e-b04f-9fee1143387c in service fd00:1122:3344:104::22 nexus 2ec75441-3d7d-4b4b-9614-af03de5a3666 in service fd00:1122:3344:104::2d nexus 508abd03-cbfe-4654-9a6d-7f15a1ad32e5 in service fd00:1122:3344:104::2e nexus 59950bc8-1497-44dd-8cbf-b6502ba921b2 in service fd00:1122:3344:104::2f - nexus b4947d31-f70e-4ee0-8817-0ca6cea9b16b in service fd00:1122:3344:104::22 sled affab35f-600a-4109-8ea0-34a067a4e0bc: @@ -64,19 +65,20 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ - crucible 0dfbf374-9ef9-430f-b06d-f271bf7f84c4 in service fd00:1122:3344:101::27 - crucible 3aa07966-5899-4789-ace5-f8eeb375c6c3 in service fd00:1122:3344:101::24 - crucible 4ad0e9da-08f8-4d40-b4d3-d17e711b5bbf in service fd00:1122:3344:101::29 - crucible 72c5a909-077d-4ec1-a9d5-ae64ef9d716e in service fd00:1122:3344:101::26 - crucible 95482c25-1e7f-43e8-adf1-e3548a1b3ae0 in service fd00:1122:3344:101::23 - crucible a1c03689-fc62-4ea5-bb72-4d01f5138614 in service fd00:1122:3344:101::2a - crucible a568e92e-4fbd-4b69-acd8-f16277073031 in service fd00:1122:3344:101::2c - crucible bf79a56a-97af-4cc4-94a5-8b20d64c2cda in service fd00:1122:3344:101::28 - crucible c60379ba-4e30-4628-a79a-0ae509aef4c5 in service fd00:1122:3344:101::25 - crucible d47f4996-fac0-4657-bcea-01b1fee6404d in service fd00:1122:3344:101::2b - internal_ntp f1a7b9a7-fc6a-4b23-b829-045ff33117ff in service fd00:1122:3344:101::21 - nexus 15c103f0-ac63-423b-ba5d-1b5fcd563ba3 in service fd00:1122:3344:101::22 + crucible 414830dc-c8c1-4748-9e9e-bc3a6435a93c in service fd00:1122:3344:101::2b + crucible 4ad0e9da-08f8-4d40-b4d3-d17e711b5bbf in service fd00:1122:3344:101::24 + crucible 772cbcbd-58be-4158-be85-be744871fa22 in service fd00:1122:3344:101::28 + crucible a1c03689-fc62-4ea5-bb72-4d01f5138614 in service fd00:1122:3344:101::25 + crucible a568e92e-4fbd-4b69-acd8-f16277073031 in service fd00:1122:3344:101::27 + crucible be75764a-491b-4aec-992e-1c39e25de975 in service fd00:1122:3344:101::29 + crucible be920398-024a-4655-8c49-69b5ac48dfff in service fd00:1122:3344:101::2c + crucible bf79a56a-97af-4cc4-94a5-8b20d64c2cda in service fd00:1122:3344:101::23 + crucible d47f4996-fac0-4657-bcea-01b1fee6404d in service fd00:1122:3344:101::26 + crucible e001fea0-6594-4ece-97e3-6198c293e931 in service fd00:1122:3344:101::2a + internal_dns 0dfbf374-9ef9-430f-b06d-f271bf7f84c4 in service fd00:1122:3344:5::1 + internal_ntp c60379ba-4e30-4628-a79a-0ae509aef4c5 in service fd00:1122:3344:101::21 nexus 3ca5292f-8a59-4475-bb72-0f43714d0fff in service fd00:1122:3344:101::2e + nexus 72c5a909-077d-4ec1-a9d5-ae64ef9d716e in service fd00:1122:3344:101::22 nexus 99f6d544-8599-4e2b-a55a-82d9e0034662 in service fd00:1122:3344:101::2d nexus c26b3bda-5561-44a1-a69f-22103fe209a1 in service fd00:1122:3344:101::2f @@ -89,18 +91,19 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ -- crucible 3b3c14b6-a8e2-4054-a577-8d96cb576230 expunged fd00:1122:3344:102::2c -- crucible 47a87c6e-ef45-4d52-9a3e-69cdd96737cc expunged fd00:1122:3344:102::23 -- crucible 6464d025-4652-4948-919e-740bec5699b1 expunged fd00:1122:3344:102::24 -- crucible 6939ce48-b17c-4616-b176-8a419a7697be expunged fd00:1122:3344:102::29 -- crucible 878dfddd-3113-4197-a3ea-e0d4dbe9b476 expunged fd00:1122:3344:102::25 -- crucible 8d4d2b28-82bb-4e36-80da-1408d8c35d82 expunged fd00:1122:3344:102::2b -- crucible 9fd52961-426f-4e62-a644-b70871103fca expunged fd00:1122:3344:102::26 -- crucible b44cdbc0-0ce0-46eb-8b21-a09e113aa1d0 expunged fd00:1122:3344:102::27 -- crucible b6b759d0-f60d-42b7-bbbc-9d61c9e895a9 expunged fd00:1122:3344:102::28 -- crucible c407795c-6c8b-428e-8ab8-b962913c447f expunged fd00:1122:3344:102::2a -- internal_ntp f3f2e4f3-0985-4ef6-8336-ce479382d05d expunged fd00:1122:3344:102::21 -- nexus 01d58626-e1b0-480f-96be-ac784863c7dc expunged fd00:1122:3344:102::22 +- crucible 3b3c14b6-a8e2-4054-a577-8d96cb576230 expunged fd00:1122:3344:102::29 +- crucible 57b96d5c-b71e-43e4-8869-7d514003d00d expunged fd00:1122:3344:102::2a +- crucible 6939ce48-b17c-4616-b176-8a419a7697be expunged fd00:1122:3344:102::26 +- crucible 8d4d2b28-82bb-4e36-80da-1408d8c35d82 expunged fd00:1122:3344:102::28 +- crucible 9fd52961-426f-4e62-a644-b70871103fca expunged fd00:1122:3344:102::23 +- crucible b44cdbc0-0ce0-46eb-8b21-a09e113aa1d0 expunged fd00:1122:3344:102::24 +- crucible b4947d31-f70e-4ee0-8817-0ca6cea9b16b expunged fd00:1122:3344:102::2b +- crucible b6b759d0-f60d-42b7-bbbc-9d61c9e895a9 expunged fd00:1122:3344:102::25 +- crucible c407795c-6c8b-428e-8ab8-b962913c447f expunged fd00:1122:3344:102::27 +- crucible e4b3e159-3dbe-48cb-8497-e3da92a90e5a expunged fd00:1122:3344:102::2c +- internal_dns 878dfddd-3113-4197-a3ea-e0d4dbe9b476 expunged fd00:1122:3344:3::1 +- internal_ntp 47a87c6e-ef45-4d52-9a3e-69cdd96737cc expunged fd00:1122:3344:102::21 +- nexus 6464d025-4652-4948-919e-740bec5699b1 expunged fd00:1122:3344:102::22 MODIFIED SLEDS: @@ -124,20 +127,21 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 omicron zones at generation 2: - ---------------------------------------------------------------------------------------- - zone type zone id disposition underlay IP - ---------------------------------------------------------------------------------------- - crucible 6b53ab2e-d98c-485f-87a3-4d5df595390f in service fd00:1122:3344:105::27 - crucible 93b137a1-a1d6-4b5b-b2cb-21a9f11e2883 in service fd00:1122:3344:105::23 - crucible 9f0abbad-dbd3-4d43-9675-78092217ffd9 in service fd00:1122:3344:105::25 - crucible b0c63f48-01ea-4aae-bb26-fb0dd59d1662 in service fd00:1122:3344:105::28 - crucible c406da50-34b9-4bb4-a460-8f49875d2a6a in service fd00:1122:3344:105::24 - crucible d660d7ed-28c0-45ae-9ace-dc3ecf7e8786 in service fd00:1122:3344:105::2a - crucible e98cc0de-abf6-4da4-a20d-d05c7a9bb1d7 in service fd00:1122:3344:105::2b - crucible f55e6aaf-e8fc-4913-9e3c-8cd1bd4bdad3 in service fd00:1122:3344:105::29 -- crucible 4f1ce8a2-d3a5-4a38-be4c-9817de52db37 in service fd00:1122:3344:105::2c -* crucible 19fbc4f8-a683-4f22-8f5a-e74782b935be - in service fd00:1122:3344:105::26 - └─ + quiesced + ------------------------------------------------------------------------------------------- + zone type zone id disposition underlay IP + ------------------------------------------------------------------------------------------- + crucible 67d913e0-0005-4599-9b28-0abbf6cc2916 in service fd00:1122:3344:105::2c + crucible 6b53ab2e-d98c-485f-87a3-4d5df595390f in service fd00:1122:3344:105::26 + crucible 9f0abbad-dbd3-4d43-9675-78092217ffd9 in service fd00:1122:3344:105::24 + crucible b0c63f48-01ea-4aae-bb26-fb0dd59d1662 in service fd00:1122:3344:105::27 + crucible c406da50-34b9-4bb4-a460-8f49875d2a6a in service fd00:1122:3344:105::23 + crucible d660d7ed-28c0-45ae-9ace-dc3ecf7e8786 in service fd00:1122:3344:105::29 + crucible e98cc0de-abf6-4da4-a20d-d05c7a9bb1d7 in service fd00:1122:3344:105::2a + crucible f55e6aaf-e8fc-4913-9e3c-8cd1bd4bdad3 in service fd00:1122:3344:105::28 + internal_dns 93b137a1-a1d6-4b5b-b2cb-21a9f11e2883 in service fd00:1122:3344:1::1 +- crucible 4f1ce8a2-d3a5-4a38-be4c-9817de52db37 in service fd00:1122:3344:105::2b +* crucible 19fbc4f8-a683-4f22-8f5a-e74782b935be - in service fd00:1122:3344:105::25 + └─ + quiesced sled 48d95fef-bc9f-4f50-9a53-1e075836291d: @@ -146,18 +150,19 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ -- crucible 094f27af-1acb-4d1e-ba97-1fc1377d4bf2 expunged fd00:1122:3344:103::2c -- crucible 0dcfdfc5-481e-4153-b97c-11cf02b648ea expunged fd00:1122:3344:103::25 -- crucible 2f5e8010-a94d-43a4-9c5c-3f52832f5f7f expunged fd00:1122:3344:103::27 -- crucible 4a9a0a9d-87f0-4f1d-9181-27f6b435e637 expunged fd00:1122:3344:103::28 -- crucible 56ac1706-9e2a-49ba-bd6f-a99c44cb2ccb expunged fd00:1122:3344:103::24 -- crucible 67622d61-2df4-414d-aa0e-d1277265f405 expunged fd00:1122:3344:103::23 -- crucible b91b271d-8d80-4f49-99a0-34006ae86063 expunged fd00:1122:3344:103::2a -- crucible d6ee1338-3127-43ec-9aaa-b973ccf05496 expunged fd00:1122:3344:103::26 -- crucible e39d7c9e-182b-48af-af87-58079d723583 expunged fd00:1122:3344:103::29 -- crucible f69f92a1-5007-4bb0-a85b-604dc217154b expunged fd00:1122:3344:103::2b -- internal_ntp 67d913e0-0005-4599-9b28-0abbf6cc2916 expunged fd00:1122:3344:103::21 -- nexus 2aa0ea4f-3561-4989-a98c-9ab7d9a240fb expunged fd00:1122:3344:103::22 +- crucible 01d58626-e1b0-480f-96be-ac784863c7dc expunged fd00:1122:3344:103::2c +- crucible 094f27af-1acb-4d1e-ba97-1fc1377d4bf2 expunged fd00:1122:3344:103::2a +- crucible 0dcfdfc5-481e-4153-b97c-11cf02b648ea expunged fd00:1122:3344:103::23 +- crucible 2f5e8010-a94d-43a4-9c5c-3f52832f5f7f expunged fd00:1122:3344:103::25 +- crucible 4a9a0a9d-87f0-4f1d-9181-27f6b435e637 expunged fd00:1122:3344:103::26 +- crucible b91b271d-8d80-4f49-99a0-34006ae86063 expunged fd00:1122:3344:103::28 +- crucible d6ee1338-3127-43ec-9aaa-b973ccf05496 expunged fd00:1122:3344:103::24 +- crucible e39d7c9e-182b-48af-af87-58079d723583 expunged fd00:1122:3344:103::27 +- crucible f3f2e4f3-0985-4ef6-8336-ce479382d05d expunged fd00:1122:3344:103::2b +- crucible f69f92a1-5007-4bb0-a85b-604dc217154b expunged fd00:1122:3344:103::29 +- internal_dns 56ac1706-9e2a-49ba-bd6f-a99c44cb2ccb expunged fd00:1122:3344:2::1 +- internal_ntp 2aa0ea4f-3561-4989-a98c-9ab7d9a240fb expunged fd00:1122:3344:103::21 +- nexus 67622d61-2df4-414d-aa0e-d1277265f405 expunged fd00:1122:3344:103::22 ERRORS: diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt index 5a2ed5a28a8..13054383886 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt @@ -23,16 +23,17 @@ parent: 4d4e6c38-cd95-4c4e-8f45-6af4d686964b ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ - crucible 19fbc4f8-a683-4f22-8f5a-e74782b935be in service fd00:1122:3344:105::26 - crucible 4f1ce8a2-d3a5-4a38-be4c-9817de52db37 in service fd00:1122:3344:105::2c - crucible 6b53ab2e-d98c-485f-87a3-4d5df595390f in service fd00:1122:3344:105::27 - crucible 93b137a1-a1d6-4b5b-b2cb-21a9f11e2883 in service fd00:1122:3344:105::23 - crucible 9f0abbad-dbd3-4d43-9675-78092217ffd9 in service fd00:1122:3344:105::25 - crucible b0c63f48-01ea-4aae-bb26-fb0dd59d1662 in service fd00:1122:3344:105::28 - crucible c406da50-34b9-4bb4-a460-8f49875d2a6a in service fd00:1122:3344:105::24 - crucible d660d7ed-28c0-45ae-9ace-dc3ecf7e8786 in service fd00:1122:3344:105::2a - crucible e98cc0de-abf6-4da4-a20d-d05c7a9bb1d7 in service fd00:1122:3344:105::2b - crucible f55e6aaf-e8fc-4913-9e3c-8cd1bd4bdad3 in service fd00:1122:3344:105::29 + crucible 19fbc4f8-a683-4f22-8f5a-e74782b935be in service fd00:1122:3344:105::25 + crucible 4f1ce8a2-d3a5-4a38-be4c-9817de52db37 in service fd00:1122:3344:105::2b + crucible 67d913e0-0005-4599-9b28-0abbf6cc2916 in service fd00:1122:3344:105::2c + crucible 6b53ab2e-d98c-485f-87a3-4d5df595390f in service fd00:1122:3344:105::26 + crucible 9f0abbad-dbd3-4d43-9675-78092217ffd9 in service fd00:1122:3344:105::24 + crucible b0c63f48-01ea-4aae-bb26-fb0dd59d1662 in service fd00:1122:3344:105::27 + crucible c406da50-34b9-4bb4-a460-8f49875d2a6a in service fd00:1122:3344:105::23 + crucible d660d7ed-28c0-45ae-9ace-dc3ecf7e8786 in service fd00:1122:3344:105::29 + crucible e98cc0de-abf6-4da4-a20d-d05c7a9bb1d7 in service fd00:1122:3344:105::2a + crucible f55e6aaf-e8fc-4913-9e3c-8cd1bd4bdad3 in service fd00:1122:3344:105::28 + internal_dns 93b137a1-a1d6-4b5b-b2cb-21a9f11e2883 in service fd00:1122:3344:1::1 internal_ntp 7f4e9f9f-08f8-4d14-885d-e977c05525ad in service fd00:1122:3344:105::21 nexus 6dff7633-66bb-4924-a6ff-2c896e66964b in service fd00:1122:3344:105::22 @@ -60,21 +61,22 @@ parent: 4d4e6c38-cd95-4c4e-8f45-6af4d686964b ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ - crucible 15bb9def-69b8-4d2e-b04f-9fee1143387c in service fd00:1122:3344:104::25 - crucible 23a8fa2b-ef3e-4017-a43f-f7a83953bd7c in service fd00:1122:3344:104::2c - crucible 621509d6-3772-4009-aca1-35eefd1098fb in service fd00:1122:3344:104::28 - crucible 85b8c68a-160d-461d-94dd-1baf175fa75c in service fd00:1122:3344:104::2a - crucible 996d7570-b0df-46d5-aaa4-0c97697cf484 in service fd00:1122:3344:104::26 - crucible a732c489-d29a-4f75-b900-5966385943af in service fd00:1122:3344:104::29 - crucible b1783e95-9598-451d-b6ba-c50b52b428c3 in service fd00:1122:3344:104::24 - crucible c6dd531e-2d1d-423b-acc8-358533dab78c in service fd00:1122:3344:104::27 - crucible e4b3e159-3dbe-48cb-8497-e3da92a90e5a in service fd00:1122:3344:104::23 - crucible f0ff59e8-4105-4980-a4bb-a1f4c58de1e3 in service fd00:1122:3344:104::2b - internal_ntp 57b96d5c-b71e-43e4-8869-7d514003d00d in service fd00:1122:3344:104::21 + crucible 15c103f0-ac63-423b-ba5d-1b5fcd563ba3 in service fd00:1122:3344:104::2a + crucible 23a8fa2b-ef3e-4017-a43f-f7a83953bd7c in service fd00:1122:3344:104::28 + crucible 3aa07966-5899-4789-ace5-f8eeb375c6c3 in service fd00:1122:3344:104::2c + crucible 621509d6-3772-4009-aca1-35eefd1098fb in service fd00:1122:3344:104::24 + crucible 85b8c68a-160d-461d-94dd-1baf175fa75c in service fd00:1122:3344:104::26 + crucible 95482c25-1e7f-43e8-adf1-e3548a1b3ae0 in service fd00:1122:3344:104::2b + crucible a732c489-d29a-4f75-b900-5966385943af in service fd00:1122:3344:104::25 + crucible c6dd531e-2d1d-423b-acc8-358533dab78c in service fd00:1122:3344:104::23 + crucible f0ff59e8-4105-4980-a4bb-a1f4c58de1e3 in service fd00:1122:3344:104::27 + crucible f1a7b9a7-fc6a-4b23-b829-045ff33117ff in service fd00:1122:3344:104::29 + internal_dns 996d7570-b0df-46d5-aaa4-0c97697cf484 in service fd00:1122:3344:4::1 + internal_ntp b1783e95-9598-451d-b6ba-c50b52b428c3 in service fd00:1122:3344:104::21 + nexus 15bb9def-69b8-4d2e-b04f-9fee1143387c in service fd00:1122:3344:104::22 nexus 2ec75441-3d7d-4b4b-9614-af03de5a3666 in service fd00:1122:3344:104::2d nexus 508abd03-cbfe-4654-9a6d-7f15a1ad32e5 in service fd00:1122:3344:104::2e nexus 59950bc8-1497-44dd-8cbf-b6502ba921b2 in service fd00:1122:3344:104::2f - nexus b4947d31-f70e-4ee0-8817-0ca6cea9b16b in service fd00:1122:3344:104::22 @@ -100,19 +102,20 @@ parent: 4d4e6c38-cd95-4c4e-8f45-6af4d686964b ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ - crucible 0dfbf374-9ef9-430f-b06d-f271bf7f84c4 in service fd00:1122:3344:101::27 - crucible 3aa07966-5899-4789-ace5-f8eeb375c6c3 in service fd00:1122:3344:101::24 - crucible 4ad0e9da-08f8-4d40-b4d3-d17e711b5bbf in service fd00:1122:3344:101::29 - crucible 72c5a909-077d-4ec1-a9d5-ae64ef9d716e in service fd00:1122:3344:101::26 - crucible 95482c25-1e7f-43e8-adf1-e3548a1b3ae0 in service fd00:1122:3344:101::23 - crucible a1c03689-fc62-4ea5-bb72-4d01f5138614 in service fd00:1122:3344:101::2a - crucible a568e92e-4fbd-4b69-acd8-f16277073031 in service fd00:1122:3344:101::2c - crucible bf79a56a-97af-4cc4-94a5-8b20d64c2cda in service fd00:1122:3344:101::28 - crucible c60379ba-4e30-4628-a79a-0ae509aef4c5 in service fd00:1122:3344:101::25 - crucible d47f4996-fac0-4657-bcea-01b1fee6404d in service fd00:1122:3344:101::2b - internal_ntp f1a7b9a7-fc6a-4b23-b829-045ff33117ff in service fd00:1122:3344:101::21 - nexus 15c103f0-ac63-423b-ba5d-1b5fcd563ba3 in service fd00:1122:3344:101::22 + crucible 414830dc-c8c1-4748-9e9e-bc3a6435a93c in service fd00:1122:3344:101::2b + crucible 4ad0e9da-08f8-4d40-b4d3-d17e711b5bbf in service fd00:1122:3344:101::24 + crucible 772cbcbd-58be-4158-be85-be744871fa22 in service fd00:1122:3344:101::28 + crucible a1c03689-fc62-4ea5-bb72-4d01f5138614 in service fd00:1122:3344:101::25 + crucible a568e92e-4fbd-4b69-acd8-f16277073031 in service fd00:1122:3344:101::27 + crucible be75764a-491b-4aec-992e-1c39e25de975 in service fd00:1122:3344:101::29 + crucible be920398-024a-4655-8c49-69b5ac48dfff in service fd00:1122:3344:101::2c + crucible bf79a56a-97af-4cc4-94a5-8b20d64c2cda in service fd00:1122:3344:101::23 + crucible d47f4996-fac0-4657-bcea-01b1fee6404d in service fd00:1122:3344:101::26 + crucible e001fea0-6594-4ece-97e3-6198c293e931 in service fd00:1122:3344:101::2a + internal_dns 0dfbf374-9ef9-430f-b06d-f271bf7f84c4 in service fd00:1122:3344:5::1 + internal_ntp c60379ba-4e30-4628-a79a-0ae509aef4c5 in service fd00:1122:3344:101::21 nexus 3ca5292f-8a59-4475-bb72-0f43714d0fff in service fd00:1122:3344:101::2e + nexus 72c5a909-077d-4ec1-a9d5-ae64ef9d716e in service fd00:1122:3344:101::22 nexus 99f6d544-8599-4e2b-a55a-82d9e0034662 in service fd00:1122:3344:101::2d nexus c26b3bda-5561-44a1-a69f-22103fe209a1 in service fd00:1122:3344:101::2f @@ -124,18 +127,19 @@ WARNING: Zones exist without physical disks! ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ - crucible 094f27af-1acb-4d1e-ba97-1fc1377d4bf2 expunged fd00:1122:3344:103::2c - crucible 0dcfdfc5-481e-4153-b97c-11cf02b648ea expunged fd00:1122:3344:103::25 - crucible 2f5e8010-a94d-43a4-9c5c-3f52832f5f7f expunged fd00:1122:3344:103::27 - crucible 4a9a0a9d-87f0-4f1d-9181-27f6b435e637 expunged fd00:1122:3344:103::28 - crucible 56ac1706-9e2a-49ba-bd6f-a99c44cb2ccb expunged fd00:1122:3344:103::24 - crucible 67622d61-2df4-414d-aa0e-d1277265f405 expunged fd00:1122:3344:103::23 - crucible b91b271d-8d80-4f49-99a0-34006ae86063 expunged fd00:1122:3344:103::2a - crucible d6ee1338-3127-43ec-9aaa-b973ccf05496 expunged fd00:1122:3344:103::26 - crucible e39d7c9e-182b-48af-af87-58079d723583 expunged fd00:1122:3344:103::29 - crucible f69f92a1-5007-4bb0-a85b-604dc217154b expunged fd00:1122:3344:103::2b - internal_ntp 67d913e0-0005-4599-9b28-0abbf6cc2916 expunged fd00:1122:3344:103::21 - nexus 2aa0ea4f-3561-4989-a98c-9ab7d9a240fb expunged fd00:1122:3344:103::22 + crucible 01d58626-e1b0-480f-96be-ac784863c7dc expunged fd00:1122:3344:103::2c + crucible 094f27af-1acb-4d1e-ba97-1fc1377d4bf2 expunged fd00:1122:3344:103::2a + crucible 0dcfdfc5-481e-4153-b97c-11cf02b648ea expunged fd00:1122:3344:103::23 + crucible 2f5e8010-a94d-43a4-9c5c-3f52832f5f7f expunged fd00:1122:3344:103::25 + crucible 4a9a0a9d-87f0-4f1d-9181-27f6b435e637 expunged fd00:1122:3344:103::26 + crucible b91b271d-8d80-4f49-99a0-34006ae86063 expunged fd00:1122:3344:103::28 + crucible d6ee1338-3127-43ec-9aaa-b973ccf05496 expunged fd00:1122:3344:103::24 + crucible e39d7c9e-182b-48af-af87-58079d723583 expunged fd00:1122:3344:103::27 + crucible f3f2e4f3-0985-4ef6-8336-ce479382d05d expunged fd00:1122:3344:103::2b + crucible f69f92a1-5007-4bb0-a85b-604dc217154b expunged fd00:1122:3344:103::29 + internal_dns 56ac1706-9e2a-49ba-bd6f-a99c44cb2ccb expunged fd00:1122:3344:2::1 + internal_ntp 2aa0ea4f-3561-4989-a98c-9ab7d9a240fb expunged fd00:1122:3344:103::21 + nexus 67622d61-2df4-414d-aa0e-d1277265f405 expunged fd00:1122:3344:103::22 @@ -146,18 +150,19 @@ WARNING: Zones exist without physical disks! ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ - crucible 3b3c14b6-a8e2-4054-a577-8d96cb576230 expunged fd00:1122:3344:102::2c - crucible 47a87c6e-ef45-4d52-9a3e-69cdd96737cc expunged fd00:1122:3344:102::23 - crucible 6464d025-4652-4948-919e-740bec5699b1 expunged fd00:1122:3344:102::24 - crucible 6939ce48-b17c-4616-b176-8a419a7697be expunged fd00:1122:3344:102::29 - crucible 878dfddd-3113-4197-a3ea-e0d4dbe9b476 expunged fd00:1122:3344:102::25 - crucible 8d4d2b28-82bb-4e36-80da-1408d8c35d82 expunged fd00:1122:3344:102::2b - crucible 9fd52961-426f-4e62-a644-b70871103fca expunged fd00:1122:3344:102::26 - crucible b44cdbc0-0ce0-46eb-8b21-a09e113aa1d0 expunged fd00:1122:3344:102::27 - crucible b6b759d0-f60d-42b7-bbbc-9d61c9e895a9 expunged fd00:1122:3344:102::28 - crucible c407795c-6c8b-428e-8ab8-b962913c447f expunged fd00:1122:3344:102::2a - internal_ntp f3f2e4f3-0985-4ef6-8336-ce479382d05d expunged fd00:1122:3344:102::21 - nexus 01d58626-e1b0-480f-96be-ac784863c7dc expunged fd00:1122:3344:102::22 + crucible 3b3c14b6-a8e2-4054-a577-8d96cb576230 expunged fd00:1122:3344:102::29 + crucible 57b96d5c-b71e-43e4-8869-7d514003d00d expunged fd00:1122:3344:102::2a + crucible 6939ce48-b17c-4616-b176-8a419a7697be expunged fd00:1122:3344:102::26 + crucible 8d4d2b28-82bb-4e36-80da-1408d8c35d82 expunged fd00:1122:3344:102::28 + crucible 9fd52961-426f-4e62-a644-b70871103fca expunged fd00:1122:3344:102::23 + crucible b44cdbc0-0ce0-46eb-8b21-a09e113aa1d0 expunged fd00:1122:3344:102::24 + crucible b4947d31-f70e-4ee0-8817-0ca6cea9b16b expunged fd00:1122:3344:102::2b + crucible b6b759d0-f60d-42b7-bbbc-9d61c9e895a9 expunged fd00:1122:3344:102::25 + crucible c407795c-6c8b-428e-8ab8-b962913c447f expunged fd00:1122:3344:102::27 + crucible e4b3e159-3dbe-48cb-8497-e3da92a90e5a expunged fd00:1122:3344:102::2c + internal_dns 878dfddd-3113-4197-a3ea-e0d4dbe9b476 expunged fd00:1122:3344:3::1 + internal_ntp 47a87c6e-ef45-4d52-9a3e-69cdd96737cc expunged fd00:1122:3344:102::21 + nexus 6464d025-4652-4948-919e-740bec5699b1 expunged fd00:1122:3344:102::22 @@ -168,7 +173,7 @@ WARNING: Zones exist without physical disks! METADATA: created by::::::::::: test_blueprint2 created at::::::::::: 1970-01-01T00:00:00.000Z - comment:::::::::::::: sled 48d95fef-bc9f-4f50-9a53-1e075836291d: expunged 12 zones because: sled policy is expunged + comment:::::::::::::: sled 48d95fef-bc9f-4f50-9a53-1e075836291d: expunged 13 zones because: sled policy is expunged internal DNS version: 1 external DNS version: 1 diff --git a/nexus/reconfigurator/preparation/src/lib.rs b/nexus/reconfigurator/preparation/src/lib.rs index e0ba0f10bae..7fa22b8441a 100644 --- a/nexus/reconfigurator/preparation/src/lib.rs +++ b/nexus/reconfigurator/preparation/src/lib.rs @@ -33,13 +33,15 @@ use nexus_types::identity::Resource; use nexus_types::inventory::Collection; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; -use omicron_common::address::BOUNDARY_NTP_REDUNDANCY; -use omicron_common::address::COCKROACHDB_REDUNDANCY; -use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::Error; +use omicron_common::api::external::InternalContext; use omicron_common::api::external::LookupType; use omicron_common::disk::DiskIdentity; +use omicron_common::policy::BOUNDARY_NTP_REDUNDANCY; +use omicron_common::policy::COCKROACHDB_REDUNDANCY; +use omicron_common::policy::INTERNAL_DNS_REDUNDANCY; +use omicron_common::policy::NEXUS_REDUNDANCY; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::PhysicalDiskUuid; @@ -63,6 +65,7 @@ pub struct PlanningInputFromDb<'a> { pub service_nic_rows: &'a [nexus_db_model::ServiceNetworkInterface], pub target_boundary_ntp_zone_count: usize, pub target_nexus_zone_count: usize, + pub target_internal_dns_zone_count: usize, pub target_cockroachdb_zone_count: usize, pub target_cockroachdb_cluster_version: CockroachDbClusterVersion, pub internal_dns_version: nexus_db_model::Generation, @@ -72,6 +75,75 @@ pub struct PlanningInputFromDb<'a> { } impl PlanningInputFromDb<'_> { + pub async fn assemble( + opctx: &OpContext, + datastore: &DataStore, + ) -> Result { + opctx.check_complex_operations_allowed()?; + let sled_rows = datastore + .sled_list_all_batched(opctx, SledFilter::Commissioned) + .await + .internal_context("fetching all sleds")?; + let zpool_rows = datastore + .zpool_list_all_external_batched(opctx) + .await + .internal_context("fetching all external zpool rows")?; + let ip_pool_range_rows = { + let (authz_service_ip_pool, _) = datastore + .ip_pools_service_lookup(opctx) + .await + .internal_context("fetching IP services pool")?; + datastore + .ip_pool_list_ranges_batched(opctx, &authz_service_ip_pool) + .await + .internal_context("listing services IP pool ranges")? + }; + let external_ip_rows = datastore + .external_ip_list_service_all_batched(opctx) + .await + .internal_context("fetching service external IPs")?; + let service_nic_rows = datastore + .service_network_interfaces_all_list_batched(opctx) + .await + .internal_context("fetching service NICs")?; + let internal_dns_version = datastore + .dns_group_latest_version(opctx, DnsGroup::Internal) + .await + .internal_context("fetching internal DNS version")? + .version; + let external_dns_version = datastore + .dns_group_latest_version(opctx, DnsGroup::External) + .await + .internal_context("fetching external DNS version")? + .version; + let cockroachdb_settings = datastore + .cockroachdb_settings(opctx) + .await + .internal_context("fetching cockroachdb settings")?; + + let planning_input = PlanningInputFromDb { + sled_rows: &sled_rows, + zpool_rows: &zpool_rows, + ip_pool_range_rows: &ip_pool_range_rows, + target_boundary_ntp_zone_count: BOUNDARY_NTP_REDUNDANCY, + target_nexus_zone_count: NEXUS_REDUNDANCY, + target_internal_dns_zone_count: INTERNAL_DNS_REDUNDANCY, + target_cockroachdb_zone_count: COCKROACHDB_REDUNDANCY, + target_cockroachdb_cluster_version: + CockroachDbClusterVersion::POLICY, + external_ip_rows: &external_ip_rows, + service_nic_rows: &service_nic_rows, + log: &opctx.log, + internal_dns_version, + external_dns_version, + cockroachdb_settings: &cockroachdb_settings, + } + .build() + .internal_context("assembling planning_input")?; + + Ok(planning_input) + } + pub fn build(&self) -> Result { let service_ip_pool_ranges = self.ip_pool_range_rows.iter().map(IpRange::from).collect(); @@ -79,9 +151,11 @@ impl PlanningInputFromDb<'_> { service_ip_pool_ranges, target_boundary_ntp_zone_count: self.target_boundary_ntp_zone_count, target_nexus_zone_count: self.target_nexus_zone_count, + target_internal_dns_zone_count: self.target_internal_dns_zone_count, target_cockroachdb_zone_count: self.target_cockroachdb_zone_count, target_cockroachdb_cluster_version: self .target_cockroachdb_cluster_version, + clickhouse_policy: None, }; let mut builder = PlanningInputBuilder::new( policy, @@ -194,65 +268,8 @@ pub async fn reconfigurator_state_load( datastore: &DataStore, ) -> Result { opctx.check_complex_operations_allowed()?; - let sled_rows = datastore - .sled_list_all_batched(opctx, SledFilter::Commissioned) - .await - .context("listing sleds")?; - let zpool_rows = datastore - .zpool_list_all_external_batched(opctx) - .await - .context("listing zpools")?; - let ip_pool_range_rows = { - let (authz_service_ip_pool, _) = datastore - .ip_pools_service_lookup(opctx) - .await - .context("fetching IP services pool")?; - datastore - .ip_pool_list_ranges_batched(opctx, &authz_service_ip_pool) - .await - .context("listing services IP pool ranges")? - }; - let external_ip_rows = datastore - .external_ip_list_service_all_batched(opctx) - .await - .context("fetching service external IPs")?; - let service_nic_rows = datastore - .service_network_interfaces_all_list_batched(opctx) - .await - .context("fetching service NICs")?; - let internal_dns_version = datastore - .dns_group_latest_version(opctx, DnsGroup::Internal) - .await - .context("fetching internal DNS version")? - .version; - let external_dns_version = datastore - .dns_group_latest_version(opctx, DnsGroup::External) - .await - .context("fetching external DNS version")? - .version; - let cockroachdb_settings = datastore - .cockroachdb_settings(opctx) - .await - .context("fetching cockroachdb settings")?; - - let planning_input = PlanningInputFromDb { - sled_rows: &sled_rows, - zpool_rows: &zpool_rows, - ip_pool_range_rows: &ip_pool_range_rows, - target_boundary_ntp_zone_count: BOUNDARY_NTP_REDUNDANCY, - target_nexus_zone_count: NEXUS_REDUNDANCY, - target_cockroachdb_zone_count: COCKROACHDB_REDUNDANCY, - target_cockroachdb_cluster_version: CockroachDbClusterVersion::POLICY, - external_ip_rows: &external_ip_rows, - service_nic_rows: &service_nic_rows, - log: &opctx.log, - internal_dns_version, - external_dns_version, - cockroachdb_settings: &cockroachdb_settings, - } - .build() - .context("assembling planning_input")?; - + let planning_input = + PlanningInputFromDb::assemble(opctx, datastore).await?; let collection_ids = datastore .inventory_collections() .await diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index 850e63443a1..fedb74b81be 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -108,6 +108,10 @@ use super::tasks::phantom_disks; use super::tasks::physical_disk_adoption; use super::tasks::region_replacement; use super::tasks::region_replacement_driver; +use super::tasks::region_snapshot_replacement_finish::*; +use super::tasks::region_snapshot_replacement_garbage_collect::*; +use super::tasks::region_snapshot_replacement_start::*; +use super::tasks::region_snapshot_replacement_step::*; use super::tasks::saga_recovery; use super::tasks::service_firewall_rules; use super::tasks::sync_service_zone_nat::ServiceZoneNatTracker; @@ -161,6 +165,10 @@ pub struct BackgroundTasks { pub task_vpc_route_manager: Activator, pub task_saga_recovery: Activator, pub task_lookup_region_port: Activator, + pub task_region_snapshot_replacement_start: Activator, + pub task_region_snapshot_replacement_garbage_collection: Activator, + pub task_region_snapshot_replacement_step: Activator, + pub task_region_snapshot_replacement_finish: Activator, // Handles to activate background tasks that do not get used by Nexus // at-large. These background tasks are implementation details as far as @@ -242,6 +250,11 @@ impl BackgroundTasksInitializer { task_vpc_route_manager: Activator::new(), task_saga_recovery: Activator::new(), task_lookup_region_port: Activator::new(), + task_region_snapshot_replacement_start: Activator::new(), + task_region_snapshot_replacement_garbage_collection: Activator::new( + ), + task_region_snapshot_replacement_step: Activator::new(), + task_region_snapshot_replacement_finish: Activator::new(), task_internal_dns_propagation: Activator::new(), task_external_dns_propagation: Activator::new(), @@ -303,6 +316,10 @@ impl BackgroundTasksInitializer { task_vpc_route_manager, task_saga_recovery, task_lookup_region_port, + task_region_snapshot_replacement_start, + task_region_snapshot_replacement_garbage_collection, + task_region_snapshot_replacement_step, + task_region_snapshot_replacement_finish, // Add new background tasks here. Be sure to use this binding in a // call to `Driver::register()` below. That's what actually wires // up the Activator to the corresponding background task. @@ -439,7 +456,8 @@ impl BackgroundTasksInitializer { datastore.clone(), resolver.clone(), rx_blueprint.clone(), - nexus_id.to_string(), + nexus_id, + task_saga_recovery.clone(), ); let rx_blueprint_exec = blueprint_executor.watcher(); driver.register(TaskDefinition { @@ -721,13 +739,73 @@ impl BackgroundTasksInitializer { description: "fill in missing ports for region records", period: config.lookup_region_port.period_secs, task_impl: Box::new(lookup_region_port::LookupRegionPort::new( - datastore, + datastore.clone(), )), opctx: opctx.child(BTreeMap::new()), watchers: vec![], activator: task_lookup_region_port, }); + driver.register(TaskDefinition { + name: "region_snapshot_replacement_start", + description: + "detect if region snapshots need replacement and begin the \ + process", + period: config.region_snapshot_replacement_start.period_secs, + task_impl: Box::new(RegionSnapshotReplacementDetector::new( + datastore.clone(), + sagas.clone(), + )), + opctx: opctx.child(BTreeMap::new()), + watchers: vec![], + activator: task_region_snapshot_replacement_start, + }); + + driver.register(TaskDefinition { + name: "region_snapshot_replacement_garbage_collection", + description: + "clean up all region snapshot replacement step volumes", + period: config + .region_snapshot_replacement_garbage_collection + .period_secs, + task_impl: Box::new(RegionSnapshotReplacementGarbageCollect::new( + datastore.clone(), + sagas.clone(), + )), + opctx: opctx.child(BTreeMap::new()), + watchers: vec![], + activator: task_region_snapshot_replacement_garbage_collection, + }); + + driver.register(TaskDefinition { + name: "region_snapshot_replacement_step", + description: + "detect what volumes were affected by a region snapshot \ + replacement, and run the step saga for them", + period: config.region_snapshot_replacement_step.period_secs, + task_impl: Box::new(RegionSnapshotReplacementFindAffected::new( + datastore.clone(), + sagas.clone(), + )), + opctx: opctx.child(BTreeMap::new()), + watchers: vec![], + activator: task_region_snapshot_replacement_step, + }); + + driver.register(TaskDefinition { + name: "region_snapshot_replacement_finish", + description: + "complete a region snapshot replacement if all the steps are \ + done", + period: config.region_snapshot_replacement_finish.period_secs, + task_impl: Box::new(RegionSnapshotReplacementFinishDetector::new( + datastore, + )), + opctx: opctx.child(BTreeMap::new()), + watchers: vec![], + activator: task_region_snapshot_replacement_finish, + }); + driver } } diff --git a/nexus/src/app/background/tasks/abandoned_vmm_reaper.rs b/nexus/src/app/background/tasks/abandoned_vmm_reaper.rs index a81080ec752..ca6e7e42713 100644 --- a/nexus/src/app/background/tasks/abandoned_vmm_reaper.rs +++ b/nexus/src/app/background/tasks/abandoned_vmm_reaper.rs @@ -28,8 +28,8 @@ //! remains alive and continues to own its virtual provisioning resources. //! //! Cleanup of instance resources when an instance's *active* VMM is destroyed -//! is handled elsewhere, by `notify_instance_updated` and (eventually) the -//! `instance-update` saga. +//! is handled elsewhere, by `process_vmm_update` and the `instance-update` +//! saga. use crate::app::background::BackgroundTask; use anyhow::Context; diff --git a/nexus/src/app/background/tasks/blueprint_execution.rs b/nexus/src/app/background/tasks/blueprint_execution.rs index ee780812ae7..2b1e3eedca6 100644 --- a/nexus/src/app/background/tasks/blueprint_execution.rs +++ b/nexus/src/app/background/tasks/blueprint_execution.rs @@ -4,16 +4,18 @@ //! Background task for realizing a plan blueprint -use crate::app::background::BackgroundTask; +use crate::app::background::{Activator, BackgroundTask}; use futures::future::BoxFuture; use futures::FutureExt; use internal_dns::resolver::Resolver; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; +use nexus_reconfigurator_execution::RealizeBlueprintOutput; use nexus_types::deployment::{Blueprint, BlueprintTarget}; use serde_json::json; use std::sync::Arc; use tokio::sync::watch; +use uuid::Uuid; /// Background task that takes a [`Blueprint`] and realizes the change to /// the state of the system based on the `Blueprint`. @@ -21,8 +23,9 @@ pub struct BlueprintExecutor { datastore: Arc, resolver: Resolver, rx_blueprint: watch::Receiver>>, - nexus_label: String, + nexus_id: Uuid, tx: watch::Sender, + saga_recovery: Activator, } impl BlueprintExecutor { @@ -32,10 +35,18 @@ impl BlueprintExecutor { rx_blueprint: watch::Receiver< Option>, >, - nexus_label: String, + nexus_id: Uuid, + saga_recovery: Activator, ) -> BlueprintExecutor { let (tx, _) = watch::channel(0); - BlueprintExecutor { datastore, resolver, rx_blueprint, nexus_label, tx } + BlueprintExecutor { + datastore, + resolver, + rx_blueprint, + nexus_id, + tx, + saga_recovery, + } } pub fn watcher(&self) -> watch::Receiver { @@ -72,7 +83,7 @@ impl BlueprintExecutor { "target_id" => %blueprint.id); return json!({ "target_id": blueprint.id.to_string(), - "error": "blueprint disabled" + "enabled": false, }); } @@ -81,7 +92,7 @@ impl BlueprintExecutor { &self.datastore, &self.resolver, blueprint, - &self.nexus_label, + self.nexus_id, ) .await; @@ -90,12 +101,26 @@ impl BlueprintExecutor { // Return the result as a `serde_json::Value` match result { - Ok(()) => json!({}), + Ok(RealizeBlueprintOutput { needs_saga_recovery }) => { + // If executing the blueprint requires activating the saga + // recovery background task, do that now. + if needs_saga_recovery { + info!(&opctx.log, "activating saga recovery task"); + self.saga_recovery.activate(); + } + + json!({ + "target_id": blueprint.id.to_string(), + "enabled": true, + "needs_saga_recovery": needs_saga_recovery, + }) + } Err(errors) => { let errors: Vec<_> = errors.into_iter().map(|e| format!("{:#}", e)).collect(); json!({ "target_id": blueprint.id.to_string(), + "enabled": true, "errors": errors }) } @@ -115,7 +140,7 @@ impl BackgroundTask for BlueprintExecutor { #[cfg(test)] mod test { use super::BlueprintExecutor; - use crate::app::background::BackgroundTask; + use crate::app::background::{Activator, BackgroundTask}; use httptest::matchers::{all_of, request}; use httptest::responders::status_code; use httptest::Expectation; @@ -261,7 +286,8 @@ mod test { datastore.clone(), resolver.clone(), blueprint_rx, - String::from("test-suite"), + Uuid::new_v4(), + Activator::new(), ); // Now we're ready. @@ -284,10 +310,18 @@ mod test { ) .await, ); + let blueprint_id = blueprint.1.id; blueprint_tx.send(Some(blueprint)).unwrap(); let value = task.activate(&opctx).await; println!("activating with no zones: {:?}", value); - assert_eq!(value, json!({})); + assert_eq!( + value, + json!({ + "target_id": blueprint_id, + "enabled": true, + "needs_saga_recovery": false, + }) + ); // Create a non-empty blueprint describing two servers and verify that // the task correctly winds up making requests to both of them and @@ -375,7 +409,14 @@ mod test { // Activate the task to trigger zone configuration on the sled-agents let value = task.activate(&opctx).await; println!("activating two sled agents: {:?}", value); - assert_eq!(value, json!({})); + assert_eq!( + value, + json!({ + "target_id": blueprint.1.id.to_string(), + "enabled": true, + "needs_saga_recovery": false, + }) + ); s1.verify_and_clear(); s2.verify_and_clear(); @@ -390,7 +431,7 @@ mod test { assert_eq!( value, json!({ - "error": "blueprint disabled", + "enabled": false, "target_id": blueprint.1.id.to_string() }) ); diff --git a/nexus/src/app/background/tasks/blueprint_load.rs b/nexus/src/app/background/tasks/blueprint_load.rs index 31bc00441d4..70fcf713bce 100644 --- a/nexus/src/app/background/tasks/blueprint_load.rs +++ b/nexus/src/app/background/tasks/blueprint_load.rs @@ -78,6 +78,7 @@ impl BackgroundTask for TargetBlueprintLoader { }; // Decide what to do with the new blueprint + let enabled = new_bp_target.enabled; let Some((old_bp_target, old_blueprint)) = self.last.as_deref() else { // We've found a target blueprint for the first time. @@ -97,6 +98,7 @@ impl BackgroundTask for TargetBlueprintLoader { "time_created": time_created, "time_found": chrono::Utc::now(), "status": "first target blueprint", + "enabled": enabled, }); }; @@ -116,7 +118,8 @@ impl BackgroundTask for TargetBlueprintLoader { "target_id": target_id, "time_created": time_created, "time_found": chrono::Utc::now(), - "status": "target blueprint updated" + "status": "target blueprint updated", + "enabled": enabled, }) } else { // The new target id matches the old target id @@ -159,6 +162,7 @@ impl BackgroundTask for TargetBlueprintLoader { "time_created": time_created, "time_found": chrono::Utc::now(), "status": format!("target blueprint {status}"), + "enabled": enabled, }) } else { // We found a new target blueprint that exactly @@ -173,7 +177,8 @@ impl BackgroundTask for TargetBlueprintLoader { json!({ "target_id": target_id, "time_created": time_created, - "status": "target blueprint unchanged" + "status": "target blueprint unchanged", + "enabled": enabled, }) } } diff --git a/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs b/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs index 602f3f85e86..6e49ddc7f05 100644 --- a/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs +++ b/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs @@ -179,13 +179,13 @@ mod tests { use diesel::ExpressionMethods; use diesel::QueryDsl; use nexus_db_model::Dataset; - use nexus_db_model::DatasetKind; use nexus_db_model::PhysicalDisk; use nexus_db_model::PhysicalDiskKind; use nexus_db_model::PhysicalDiskPolicy; use nexus_db_model::Region; use nexus_test_utils::SLED_AGENT_UUID; use nexus_test_utils_macros::nexus_test; + use omicron_common::api::internal::shared::DatasetKind; use omicron_uuid_kinds::{ DatasetUuid, PhysicalDiskUuid, RegionUuid, SledUuid, }; diff --git a/nexus/src/app/background/tasks/instance_watcher.rs b/nexus/src/app/background/tasks/instance_watcher.rs index f63c21105e2..ae78392ea35 100644 --- a/nexus/src/app/background/tasks/instance_watcher.rs +++ b/nexus/src/app/background/tasks/instance_watcher.rs @@ -19,9 +19,9 @@ use nexus_types::identity::Asset; use nexus_types::identity::Resource; use omicron_common::api::external::Error; use omicron_common::api::external::InstanceState; -use omicron_common::api::internal::nexus::SledInstanceState; +use omicron_common::api::internal::nexus::SledVmmState; use omicron_uuid_kinds::GenericUuid; -use omicron_uuid_kinds::InstanceUuid; +use omicron_uuid_kinds::PropolisUuid; use oximeter::types::ProducerRegistry; use sled_agent_client::Client as SledAgentClient; use std::borrow::Cow; @@ -81,12 +81,12 @@ impl InstanceWatcher { let client = client.clone(); async move { - slog::trace!(opctx.log, "checking on instance..."); - let rsp = client - .instance_get_state(&InstanceUuid::from_untyped_uuid( - target.instance_id, - )) - .await; + let vmm_id = PropolisUuid::from_untyped_uuid(target.vmm_id); + slog::trace!( + opctx.log, "checking on VMM"; "propolis_id" => %vmm_id + ); + + let rsp = client.vmm_get_state(&vmm_id).await; let mut check = Check { target, outcome: Default::default(), @@ -151,7 +151,7 @@ impl InstanceWatcher { } }; - let new_runtime_state: SledInstanceState = state.into(); + let new_runtime_state: SledVmmState = state.into(); check.outcome = CheckOutcome::Success(new_runtime_state.vmm_state.state.into()); debug!( @@ -159,10 +159,10 @@ impl InstanceWatcher { "updating instance state"; "state" => ?new_runtime_state.vmm_state.state, ); - match crate::app::instance::notify_instance_updated( + match crate::app::instance::process_vmm_update( &datastore, &opctx, - InstanceUuid::from_untyped_uuid(target.instance_id), + PropolisUuid::from_untyped_uuid(target.vmm_id), &new_runtime_state, ) .await @@ -176,7 +176,7 @@ impl InstanceWatcher { _ => Err(Incomplete::UpdateFailed), }; } - Ok(Some(saga)) => { + Ok(Some((_, saga))) => { check.update_saga_queued = true; if let Err(e) = sagas.saga_start(saga).await { warn!(opctx.log, "update saga failed"; "error" => ?e); diff --git a/nexus/src/app/background/tasks/lookup_region_port.rs b/nexus/src/app/background/tasks/lookup_region_port.rs index fbfc5c5af28..df501fe6b1c 100644 --- a/nexus/src/app/background/tasks/lookup_region_port.rs +++ b/nexus/src/app/background/tasks/lookup_region_port.rs @@ -53,7 +53,6 @@ impl BackgroundTask for LookupRegionPort { ) -> BoxFuture<'a, serde_json::Value> { async { let log = &opctx.log; - info!(&log, "lookup region port task started"); let mut status = LookupRegionPortStatus::default(); @@ -147,8 +146,6 @@ impl BackgroundTask for LookupRegionPort { } } - info!(&log, "lookup region port task done"); - json!(status) } .boxed() diff --git a/nexus/src/app/background/tasks/mod.rs b/nexus/src/app/background/tasks/mod.rs index fe041a6daad..6cbba0a07b8 100644 --- a/nexus/src/app/background/tasks/mod.rs +++ b/nexus/src/app/background/tasks/mod.rs @@ -25,6 +25,10 @@ pub mod phantom_disks; pub mod physical_disk_adoption; pub mod region_replacement; pub mod region_replacement_driver; +pub mod region_snapshot_replacement_finish; +pub mod region_snapshot_replacement_garbage_collect; +pub mod region_snapshot_replacement_start; +pub mod region_snapshot_replacement_step; pub mod saga_recovery; pub mod service_firewall_rules; pub mod sync_service_zone_nat; diff --git a/nexus/src/app/background/tasks/phantom_disks.rs b/nexus/src/app/background/tasks/phantom_disks.rs index 4b0d8bec386..7f3fceab1c0 100644 --- a/nexus/src/app/background/tasks/phantom_disks.rs +++ b/nexus/src/app/background/tasks/phantom_disks.rs @@ -43,7 +43,6 @@ impl BackgroundTask for PhantomDiskDetector { ) -> BoxFuture<'a, serde_json::Value> { async { let log = &opctx.log; - warn!(&log, "phantom disk task started"); let phantom_disks = match self.datastore.find_phantom_disks().await { @@ -83,14 +82,13 @@ impl BackgroundTask for PhantomDiskDetector { } else { info!( &log, - "phandom disk {} un-deleted andset to faulted ok", + "phandom disk {} un-deleted and set to faulted ok", disk.id(), ); phantom_disk_deleted_ok += 1; } } - warn!(&log, "phantom disk task done"); json!({ "phantom_disk_deleted_ok": phantom_disk_deleted_ok, "phantom_disk_deleted_err": phantom_disk_deleted_err, diff --git a/nexus/src/app/background/tasks/physical_disk_adoption.rs b/nexus/src/app/background/tasks/physical_disk_adoption.rs index f3b9e8ac625..b1eceed0b64 100644 --- a/nexus/src/app/background/tasks/physical_disk_adoption.rs +++ b/nexus/src/app/background/tasks/physical_disk_adoption.rs @@ -96,8 +96,6 @@ impl BackgroundTask for PhysicalDiskAdoption { } let mut disks_added = 0; - let log = &opctx.log; - warn!(&log, "physical disk adoption task started"); let collection_id = *self.rx_inventory_collection.borrow(); let Some(collection_id) = collection_id else { @@ -171,7 +169,6 @@ impl BackgroundTask for PhysicalDiskAdoption { ); } - warn!(&log, "physical disk adoption task done"); json!({ "physical_disks_added": disks_added, }) diff --git a/nexus/src/app/background/tasks/region_replacement.rs b/nexus/src/app/background/tasks/region_replacement.rs index f852f217343..ba0e7f86fb4 100644 --- a/nexus/src/app/background/tasks/region_replacement.rs +++ b/nexus/src/app/background/tasks/region_replacement.rs @@ -61,7 +61,6 @@ impl BackgroundTask for RegionReplacementDetector { ) -> BoxFuture<'a, serde_json::Value> { async { let log = &opctx.log; - warn!(&log, "region replacement task started"); let mut ok = 0; let mut err = 0; @@ -182,8 +181,6 @@ impl BackgroundTask for RegionReplacementDetector { } } - warn!(&log, "region replacement task done"); - json!({ "region_replacement_started_ok": ok, "region_replacement_started_err": err, diff --git a/nexus/src/app/background/tasks/region_replacement_driver.rs b/nexus/src/app/background/tasks/region_replacement_driver.rs index 284ed2c368e..02db86eab39 100644 --- a/nexus/src/app/background/tasks/region_replacement_driver.rs +++ b/nexus/src/app/background/tasks/region_replacement_driver.rs @@ -227,16 +227,11 @@ impl BackgroundTask for RegionReplacementDriver { opctx: &'a OpContext, ) -> BoxFuture<'a, serde_json::Value> { async { - let log = &opctx.log; - info!(&log, "region replacement driver task started"); - let mut status = RegionReplacementDriverStatus::default(); self.drive_running_replacements_forward(opctx, &mut status).await; self.complete_done_replacements(opctx, &mut status).await; - info!(&log, "region replacement driver task done"); - json!(status) } .boxed() diff --git a/nexus/src/app/background/tasks/region_snapshot_replacement_finish.rs b/nexus/src/app/background/tasks/region_snapshot_replacement_finish.rs new file mode 100644 index 00000000000..134995d848b --- /dev/null +++ b/nexus/src/app/background/tasks/region_snapshot_replacement_finish.rs @@ -0,0 +1,332 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background task for detecting when a region snapshot replacement has all its +//! steps done, and finishing it. +//! +//! Once all related region snapshot replacement steps are done, the region +//! snapshot replacement can be completed. + +use crate::app::background::BackgroundTask; +use futures::future::BoxFuture; +use futures::FutureExt; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use nexus_types::internal_api::background::RegionSnapshotReplacementFinishStatus; +use serde_json::json; +use std::sync::Arc; + +pub struct RegionSnapshotReplacementFinishDetector { + datastore: Arc, +} + +impl RegionSnapshotReplacementFinishDetector { + pub fn new(datastore: Arc) -> Self { + RegionSnapshotReplacementFinishDetector { datastore } + } + + async fn transition_requests_to_done( + &self, + opctx: &OpContext, + status: &mut RegionSnapshotReplacementFinishStatus, + ) { + let log = &opctx.log; + + // Find all region snapshot replacement requests in state "Running" + let requests = match self + .datastore + .get_running_region_snapshot_replacements(opctx) + .await + { + Ok(requests) => requests, + + Err(e) => { + let s = format!( + "query for region snapshot replacement requests \ + failed: {e}", + ); + error!(&log, "{s}"); + status.errors.push(s); + + return; + } + }; + + for request in requests { + // Count associated region snapshot replacement steps that are not + // completed. + let count = match self + .datastore + .in_progress_region_snapshot_replacement_steps( + opctx, request.id, + ) + .await + { + Ok(count) => count, + + Err(e) => { + let s = format!( + "counting incomplete region snapshot replacement \ + steps failed: {e}", + ); + error!(&log, "{s}"); + status.errors.push(s); + + continue; + } + }; + + if count == 0 { + // If the region snapshot has been deleted, then the snapshot + // replacement is done: the reference number went to zero and it + // was deleted, therefore there aren't any volumes left that + // reference it! + match self + .datastore + .region_snapshot_get( + request.old_dataset_id, + request.old_region_id, + request.old_snapshot_id, + ) + .await + { + Ok(Some(_)) => { + info!( + &log, + "region snapshot still exists"; + "request.old_dataset_id" => %request.old_dataset_id, + "request.old_region_id" => %request.old_region_id, + "request.old_snapshot_id" => %request.old_snapshot_id, + ); + continue; + } + + Ok(None) => { + // gone! + } + + Err(e) => { + let s = format!( + "error querying for region snapshot {} {} {}: {e}", + request.old_dataset_id, + request.old_region_id, + request.old_snapshot_id, + ); + error!(&log, "{s}"); + status.errors.push(s); + + continue; + } + }; + + // Transition region snapshot replacement to Complete + match self + .datastore + .set_region_snapshot_replacement_complete(opctx, request.id) + .await + { + Ok(()) => { + let s = format!("set request {} to done", request.id); + info!(&log, "{s}"); + status.records_set_to_done.push(s); + } + + Err(e) => { + let s = format!( + "marking snapshot replacement as done failed: {e}" + ); + error!(&log, "{s}"); + status.errors.push(s); + } + } + } + } + } +} + +impl BackgroundTask for RegionSnapshotReplacementFinishDetector { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + async move { + let mut status = RegionSnapshotReplacementFinishStatus::default(); + + self.transition_requests_to_done(opctx, &mut status).await; + + json!(status) + } + .boxed() + } +} + +#[cfg(test)] +mod test { + use super::*; + use nexus_db_model::RegionSnapshotReplacement; + use nexus_db_model::RegionSnapshotReplacementStep; + use nexus_db_model::RegionSnapshotReplacementStepState; + use nexus_test_utils_macros::nexus_test; + use uuid::Uuid; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + #[nexus_test(server = crate::Server)] + async fn test_done_region_snapshot_replacement_causes_finish( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + let mut task = + RegionSnapshotReplacementFinishDetector::new(datastore.clone()); + + // Noop test + let result: RegionSnapshotReplacementFinishStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + assert_eq!(result, RegionSnapshotReplacementFinishStatus::default()); + + // Add a region snapshot replacement request for a fake region snapshot. + + let dataset_id = Uuid::new_v4(); + let region_id = Uuid::new_v4(); + let snapshot_id = Uuid::new_v4(); + + // Do not add the fake region snapshot to the database, as it should + // have been deleted by the time the request transitions to "Running" + + let request = + RegionSnapshotReplacement::new(dataset_id, region_id, snapshot_id); + + let request_id = request.id; + + datastore + .insert_region_snapshot_replacement_request_with_volume_id( + &opctx, + request, + Uuid::new_v4(), + ) + .await + .unwrap(); + + // Transition that to Allocating -> ReplacementDone -> DeletingOldVolume + // -> Running + + let operating_saga_id = Uuid::new_v4(); + + datastore + .set_region_snapshot_replacement_allocating( + &opctx, + request_id, + operating_saga_id, + ) + .await + .unwrap(); + + let new_region_id = Uuid::new_v4(); + let old_snapshot_volume_id = Uuid::new_v4(); + + datastore + .set_region_snapshot_replacement_replacement_done( + &opctx, + request_id, + operating_saga_id, + new_region_id, + old_snapshot_volume_id, + ) + .await + .unwrap(); + + datastore + .set_region_snapshot_replacement_deleting_old_volume( + &opctx, + request_id, + operating_saga_id, + ) + .await + .unwrap(); + + datastore + .set_region_snapshot_replacement_running( + &opctx, + request_id, + operating_saga_id, + ) + .await + .unwrap(); + + // Insert a few steps, not all finished yet + + let operating_saga_id = Uuid::new_v4(); + + let mut step_1 = + RegionSnapshotReplacementStep::new(request_id, Uuid::new_v4()); + step_1.replacement_state = RegionSnapshotReplacementStepState::Complete; + step_1.operating_saga_id = Some(operating_saga_id); + let step_1_id = step_1.id; + + let mut step_2 = + RegionSnapshotReplacementStep::new(request_id, Uuid::new_v4()); + step_2.replacement_state = RegionSnapshotReplacementStepState::Complete; + step_2.operating_saga_id = Some(operating_saga_id); + let step_2_id = step_2.id; + + datastore + .insert_region_snapshot_replacement_step(&opctx, step_1) + .await + .unwrap(); + datastore + .insert_region_snapshot_replacement_step(&opctx, step_2) + .await + .unwrap(); + + // Activate the task, it should do nothing yet + + let result: RegionSnapshotReplacementFinishStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + assert_eq!(result, RegionSnapshotReplacementFinishStatus::default()); + + // Transition one record to Complete, the task should still do nothing + + datastore + .set_region_snapshot_replacement_step_volume_deleted( + &opctx, step_1_id, + ) + .await + .unwrap(); + + let result: RegionSnapshotReplacementFinishStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + assert_eq!(result, RegionSnapshotReplacementFinishStatus::default()); + + // Transition the other record to Complete + + datastore + .set_region_snapshot_replacement_step_volume_deleted( + &opctx, step_2_id, + ) + .await + .unwrap(); + + // Activate the task - it should pick the request up, change the state, + // and try to run the region snapshot replacement finish saga + let result: RegionSnapshotReplacementFinishStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + + assert_eq!( + result, + RegionSnapshotReplacementFinishStatus { + records_set_to_done: vec![format!( + "set request {request_id} to done" + )], + errors: vec![], + }, + ); + } +} diff --git a/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs b/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs new file mode 100644 index 00000000000..77dc87c0602 --- /dev/null +++ b/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs @@ -0,0 +1,254 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background task for deleting volumes that stash a replaced region snapshot + +use crate::app::authn; +use crate::app::background::BackgroundTask; +use crate::app::saga::StartSaga; +use crate::app::sagas; +use crate::app::sagas::region_snapshot_replacement_garbage_collect::*; +use crate::app::sagas::NexusSaga; +use futures::future::BoxFuture; +use futures::FutureExt; +use nexus_db_model::RegionSnapshotReplacement; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use nexus_types::internal_api::background::RegionSnapshotReplacementGarbageCollectStatus; +use serde_json::json; +use std::sync::Arc; + +pub struct RegionSnapshotReplacementGarbageCollect { + datastore: Arc, + sagas: Arc, +} + +impl RegionSnapshotReplacementGarbageCollect { + pub fn new(datastore: Arc, sagas: Arc) -> Self { + RegionSnapshotReplacementGarbageCollect { datastore, sagas } + } + + async fn send_garbage_collect_request( + &self, + opctx: &OpContext, + request: RegionSnapshotReplacement, + ) -> Result<(), omicron_common::api::external::Error> { + let Some(old_snapshot_volume_id) = request.old_snapshot_volume_id + else { + // This state is illegal! + let s = format!( + "request {} old snapshot volume id is None!", + request.id, + ); + + return Err(omicron_common::api::external::Error::internal_error( + &s, + )); + }; + + let params = + sagas::region_snapshot_replacement_garbage_collect::Params { + serialized_authn: authn::saga::Serialized::for_opctx(opctx), + old_snapshot_volume_id, + request, + }; + + let saga_dag = + SagaRegionSnapshotReplacementGarbageCollect::prepare(¶ms)?; + self.sagas.saga_start(saga_dag).await + } + + async fn clean_up_region_snapshot_replacement_volumes( + &self, + opctx: &OpContext, + status: &mut RegionSnapshotReplacementGarbageCollectStatus, + ) { + let log = &opctx.log; + + let requests = match self + .datastore + .get_replacement_done_region_snapshot_replacements(opctx) + .await + { + Ok(requests) => requests, + + Err(e) => { + let s = format!("querying for requests to collect failed! {e}"); + error!(&log, "{s}"); + status.errors.push(s); + return; + } + }; + + for request in requests { + let request_id = request.id; + + let result = + self.send_garbage_collect_request(opctx, request.clone()).await; + + match result { + Ok(()) => { + let s = format!( + "region snapshot replacement garbage collect request \ + ok for {request_id}" + ); + + info!( + &log, + "{s}"; + "request.snapshot_id" => %request.old_snapshot_id, + "request.region_id" => %request.old_region_id, + "request.dataset_id" => %request.old_dataset_id, + ); + status.garbage_collect_requested.push(s); + } + + Err(e) => { + let s = format!( + "sending region snapshot replacement garbage collect \ + request failed: {e}", + ); + error!( + &log, + "{s}"; + "request.snapshot_id" => %request.old_snapshot_id, + "request.region_id" => %request.old_region_id, + "request.dataset_id" => %request.old_dataset_id, + ); + status.errors.push(s); + } + } + } + } +} + +impl BackgroundTask for RegionSnapshotReplacementGarbageCollect { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + async move { + let mut status = + RegionSnapshotReplacementGarbageCollectStatus::default(); + + self.clean_up_region_snapshot_replacement_volumes( + opctx, + &mut status, + ) + .await; + + json!(status) + } + .boxed() + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::app::background::init::test::NoopStartSaga; + use nexus_db_model::RegionSnapshotReplacement; + use nexus_db_model::RegionSnapshotReplacementState; + use nexus_test_utils_macros::nexus_test; + use uuid::Uuid; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + #[nexus_test(server = crate::Server)] + async fn test_region_snapshot_replacement_garbage_collect_task( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + let starter = Arc::new(NoopStartSaga::new()); + let mut task = RegionSnapshotReplacementGarbageCollect::new( + datastore.clone(), + starter.clone(), + ); + + // Noop test + let result: RegionSnapshotReplacementGarbageCollectStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + assert_eq!( + result, + RegionSnapshotReplacementGarbageCollectStatus::default() + ); + assert_eq!(starter.count_reset(), 0); + + // Add two region snapshot requests that need garbage collection + + let mut request = RegionSnapshotReplacement::new( + Uuid::new_v4(), + Uuid::new_v4(), + Uuid::new_v4(), + ); + request.replacement_state = + RegionSnapshotReplacementState::ReplacementDone; + request.old_snapshot_volume_id = Some(Uuid::new_v4()); + + let request_1_id = request.id; + + datastore + .insert_region_snapshot_replacement_request_with_volume_id( + &opctx, + request, + Uuid::new_v4(), + ) + .await + .unwrap(); + + let mut request = RegionSnapshotReplacement::new( + Uuid::new_v4(), + Uuid::new_v4(), + Uuid::new_v4(), + ); + request.replacement_state = + RegionSnapshotReplacementState::ReplacementDone; + request.old_snapshot_volume_id = Some(Uuid::new_v4()); + + let request_2_id = request.id; + + datastore + .insert_region_snapshot_replacement_request_with_volume_id( + &opctx, + request, + Uuid::new_v4(), + ) + .await + .unwrap(); + + // Activate the task - it should pick up the two requests + + let result: RegionSnapshotReplacementGarbageCollectStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + + for error in &result.errors { + eprintln!("{error}"); + } + + assert_eq!(result.garbage_collect_requested.len(), 2); + + let s = format!( + "region snapshot replacement garbage collect request ok for \ + {request_1_id}" + ); + assert!(result.garbage_collect_requested.contains(&s)); + + let s = format!( + "region snapshot replacement garbage collect request ok for \ + {request_2_id}" + ); + assert!(result.garbage_collect_requested.contains(&s)); + + assert_eq!(result.errors.len(), 0); + + assert_eq!(starter.count_reset(), 2); + } +} diff --git a/nexus/src/app/background/tasks/region_snapshot_replacement_start.rs b/nexus/src/app/background/tasks/region_snapshot_replacement_start.rs new file mode 100644 index 00000000000..1fdc17690da --- /dev/null +++ b/nexus/src/app/background/tasks/region_snapshot_replacement_start.rs @@ -0,0 +1,507 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background task for detecting region snapshots that need replacing and +//! beginning that process +//! +//! This task's responsibility is to create region snapshot replacement requests +//! when physical disks are expunged, and trigger the region snapshot +//! replacement start saga for any requests that are in state "Requested". See +//! the documentation in that saga's docstring for more information. + +use crate::app::authn; +use crate::app::background::BackgroundTask; +use crate::app::saga::StartSaga; +use crate::app::sagas; +use crate::app::sagas::region_snapshot_replacement_start::*; +use crate::app::sagas::NexusSaga; +use crate::app::RegionAllocationStrategy; +use futures::future::BoxFuture; +use futures::FutureExt; +use nexus_db_model::RegionSnapshotReplacement; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use nexus_types::internal_api::background::RegionSnapshotReplacementStartStatus; +use serde_json::json; +use std::sync::Arc; + +pub struct RegionSnapshotReplacementDetector { + datastore: Arc, + sagas: Arc, +} + +impl RegionSnapshotReplacementDetector { + pub fn new(datastore: Arc, sagas: Arc) -> Self { + RegionSnapshotReplacementDetector { datastore, sagas } + } + + async fn send_start_request( + &self, + serialized_authn: authn::saga::Serialized, + request: RegionSnapshotReplacement, + ) -> Result<(), omicron_common::api::external::Error> { + let params = sagas::region_snapshot_replacement_start::Params { + serialized_authn, + request, + allocation_strategy: + RegionAllocationStrategy::RandomWithDistinctSleds { seed: None }, + }; + + let saga_dag = SagaRegionSnapshotReplacementStart::prepare(¶ms)?; + self.sagas.saga_start(saga_dag).await + } + + /// Find region snapshots on expunged physical disks and create region + /// snapshot replacement requests for them. + async fn create_requests_for_region_snapshots_on_expunged_disks( + &self, + opctx: &OpContext, + status: &mut RegionSnapshotReplacementStartStatus, + ) { + let log = &opctx.log; + + // Find region snapshots on expunged physical disks + let region_snapshots_to_be_replaced = match self + .datastore + .find_region_snapshots_on_expunged_physical_disks(opctx) + .await + { + Ok(region_snapshots) => region_snapshots, + + Err(e) => { + let s = format!( + "find_region_snapshots_on_expunged_physical_disks \ + failed: {e}", + ); + + error!(&log, "{s}"); + status.errors.push(s); + return; + } + }; + + for region_snapshot in region_snapshots_to_be_replaced { + // If no request exists yet, create one. + let existing_request = match self + .datastore + .lookup_region_snapshot_replacement_request( + opctx, + ®ion_snapshot, + ) + .await + { + Ok(existing_request) => existing_request, + + Err(e) => { + let s = + format!("error looking up replacement request: {e}"); + + error!( + &log, + "{s}"; + "snapshot_id" => %region_snapshot.snapshot_id, + "region_id" => %region_snapshot.region_id, + "dataset_id" => %region_snapshot.dataset_id, + ); + status.errors.push(s); + continue; + } + }; + + if existing_request.is_none() { + match self + .datastore + .create_region_snapshot_replacement_request( + opctx, + ®ion_snapshot, + ) + .await + { + Ok(request_id) => { + let s = format!( + "created region snapshot replacement request \ + {request_id}" + ); + + info!( + &log, + "{s}"; + "snapshot_id" => %region_snapshot.snapshot_id, + "region_id" => %region_snapshot.region_id, + "dataset_id" => %region_snapshot.dataset_id, + ); + status.requests_created_ok.push(s); + } + + Err(e) => { + let s = + format!("error creating replacement request: {e}"); + + error!( + &log, + "{s}"; + "snapshot_id" => %region_snapshot.snapshot_id, + "region_id" => %region_snapshot.region_id, + "dataset_id" => %region_snapshot.dataset_id, + ); + status.errors.push(s); + } + } + } + } + } + + /// For each region snapshot replacement request in state "Requested", run + /// the start saga. + async fn start_requested_region_snapshot_replacements( + &self, + opctx: &OpContext, + status: &mut RegionSnapshotReplacementStartStatus, + ) { + let log = &opctx.log; + + let requests = match self + .datastore + .get_requested_region_snapshot_replacements(opctx) + .await + { + Ok(requests) => requests, + + Err(e) => { + let s = format!( + "query for region snapshot replacement requests failed: {e}" + ); + + error!(&log, "{s}"); + status.errors.push(s); + return; + } + }; + + for request in requests { + let request_id = request.id; + + let result = self + .send_start_request( + authn::saga::Serialized::for_opctx(opctx), + request.clone(), + ) + .await; + + match result { + Ok(()) => { + let s = format!( + "region snapshot replacement start invoked ok for \ + {request_id}" + ); + + info!( + &log, + "{s}"; + "request.snapshot_id" => %request.old_snapshot_id, + "request.region_id" => %request.old_region_id, + "request.dataset_id" => %request.old_dataset_id, + ); + status.start_invoked_ok.push(s); + } + + Err(e) => { + let s = format!( + "invoking region snapshot replacement start for \ + {request_id} failed: {e}", + ); + + error!( + &log, + "{s}"; + "request.snapshot_id" => %request.old_snapshot_id, + "request.region_id" => %request.old_region_id, + "request.dataset_id" => %request.old_dataset_id, + ); + status.errors.push(s); + } + } + } + } +} + +impl BackgroundTask for RegionSnapshotReplacementDetector { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + async { + let mut status = RegionSnapshotReplacementStartStatus::default(); + + self.create_requests_for_region_snapshots_on_expunged_disks( + opctx, + &mut status, + ) + .await; + + self.start_requested_region_snapshot_replacements( + opctx, + &mut status, + ) + .await; + + json!(status) + } + .boxed() + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::app::background::init::test::NoopStartSaga; + use crate::app::MIN_DISK_SIZE_BYTES; + use chrono::Utc; + use nexus_db_model::BlockSize; + use nexus_db_model::Generation; + use nexus_db_model::PhysicalDiskPolicy; + use nexus_db_model::RegionSnapshot; + use nexus_db_model::RegionSnapshotReplacement; + use nexus_db_model::Snapshot; + use nexus_db_model::SnapshotIdentity; + use nexus_db_model::SnapshotState; + use nexus_db_queries::authz; + use nexus_db_queries::db::lookup::LookupPath; + use nexus_test_utils::resource_helpers::create_project; + use nexus_test_utils_macros::nexus_test; + use omicron_common::api::external; + use omicron_uuid_kinds::GenericUuid; + use std::collections::BTreeMap; + use uuid::Uuid; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + type DiskTest<'a> = + nexus_test_utils::resource_helpers::DiskTest<'a, crate::Server>; + + #[nexus_test(server = crate::Server)] + async fn test_add_region_snapshot_replacement_causes_start( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + let starter = Arc::new(NoopStartSaga::new()); + let mut task = RegionSnapshotReplacementDetector::new( + datastore.clone(), + starter.clone(), + ); + + // Noop test + let result: RegionSnapshotReplacementStartStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + assert_eq!(result, RegionSnapshotReplacementStartStatus::default()); + assert_eq!(starter.count_reset(), 0); + + // Add a region snapshot replacement request for a fake region snapshot + + let request = RegionSnapshotReplacement::new( + Uuid::new_v4(), // dataset id + Uuid::new_v4(), // region id + Uuid::new_v4(), // snapshot id + ); + + let request_id = request.id; + + datastore + .insert_region_snapshot_replacement_request_with_volume_id( + &opctx, + request, + Uuid::new_v4(), + ) + .await + .unwrap(); + + // Activate the task - it should pick that up and try to run the + // region snapshot replacement start saga + let result: RegionSnapshotReplacementStartStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + + assert_eq!( + result, + RegionSnapshotReplacementStartStatus { + requests_created_ok: vec![], + start_invoked_ok: vec![format!( + "region snapshot replacement start invoked ok for \ + {request_id}" + )], + errors: vec![], + }, + ); + + assert_eq!(starter.count_reset(), 1); + } + + #[nexus_test(server = crate::Server)] + async fn test_expunge_disk_causes_region_snapshot_replacement_start( + cptestctx: &ControlPlaneTestContext, + ) { + let disk_test = DiskTest::new(cptestctx).await; + + let client = &cptestctx.external_client; + let project = create_project(&client, "testing").await; + let project_id = project.identity.id; + + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + let starter = Arc::new(NoopStartSaga::new()); + let mut task = RegionSnapshotReplacementDetector::new( + datastore.clone(), + starter.clone(), + ); + + // Noop test + let result: RegionSnapshotReplacementStartStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + assert_eq!(result, RegionSnapshotReplacementStartStatus::default()); + assert_eq!(starter.count_reset(), 0); + + // Add three region snapshots for each dataset + + let region_id = Uuid::new_v4(); + let snapshot_id = Uuid::new_v4(); + let mut dataset_to_zpool: BTreeMap = + BTreeMap::default(); + + for zpool in disk_test.zpools() { + for dataset in &zpool.datasets { + dataset_to_zpool + .insert(zpool.id.to_string(), dataset.id.to_string()); + + datastore + .region_snapshot_create(RegionSnapshot::new( + dataset.id, + region_id, + snapshot_id, + String::from("[fd00:1122:3344::101]:12345"), + )) + .await + .unwrap(); + } + } + + // Create the fake snapshot + + let (.., authz_project) = LookupPath::new(&opctx, &datastore) + .project_id(project_id) + .lookup_for(authz::Action::CreateChild) + .await + .unwrap(); + + datastore + .project_ensure_snapshot( + &opctx, + &authz_project, + Snapshot { + identity: SnapshotIdentity { + id: snapshot_id, + name: external::Name::try_from("snapshot".to_string()) + .unwrap() + .into(), + description: "snapshot".into(), + + time_created: Utc::now(), + time_modified: Utc::now(), + time_deleted: None, + }, + + project_id, + disk_id: Uuid::new_v4(), + volume_id: Uuid::new_v4(), + destination_volume_id: Uuid::new_v4(), + + gen: Generation::new(), + state: SnapshotState::Creating, + block_size: BlockSize::AdvancedFormat, + + size: external::ByteCount::try_from(MIN_DISK_SIZE_BYTES) + .unwrap() + .into(), + }, + ) + .await + .unwrap(); + + // Expunge one of the physical disks + + let first_zpool = + disk_test.zpools().next().expect("Expected at least one zpool"); + + let (_, db_zpool) = LookupPath::new(&opctx, datastore) + .zpool_id(first_zpool.id.into_untyped_uuid()) + .fetch() + .await + .unwrap(); + + datastore + .physical_disk_update_policy( + &opctx, + db_zpool.physical_disk_id, + PhysicalDiskPolicy::Expunged, + ) + .await + .unwrap(); + + // Activate the task - it should pick that up and try to run the region + // snapshot replacement start saga for the region snapshot on that + // expunged disk + + let result: RegionSnapshotReplacementStartStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + + eprintln!("{:?}", &result); + + assert_eq!(result.requests_created_ok.len(), 1); + assert_eq!(result.start_invoked_ok.len(), 1); + assert!(result.errors.is_empty()); + + // The last part of the message is the region snapshot replacement + // request id + let request_created_uuid: Uuid = result.requests_created_ok[0] + .split(" ") + .last() + .unwrap() + .parse() + .unwrap(); + let request_started_uuid: Uuid = result.start_invoked_ok[0] + .split(" ") + .last() + .unwrap() + .parse() + .unwrap(); + + assert_eq!(request_created_uuid, request_started_uuid); + + assert_eq!(starter.count_reset(), 1); + + let request = datastore + .get_region_snapshot_replacement_request_by_id( + &opctx, + request_created_uuid, + ) + .await + .unwrap(); + + assert_eq!(request.old_snapshot_id, snapshot_id); + assert_eq!(request.old_region_id, region_id); + + let dataset_id = + dataset_to_zpool.get(&first_zpool.id.to_string()).unwrap(); + assert_eq!(&request.old_dataset_id.to_string(), dataset_id); + } +} diff --git a/nexus/src/app/background/tasks/region_snapshot_replacement_step.rs b/nexus/src/app/background/tasks/region_snapshot_replacement_step.rs new file mode 100644 index 00000000000..cd13a56642a --- /dev/null +++ b/nexus/src/app/background/tasks/region_snapshot_replacement_step.rs @@ -0,0 +1,764 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background task for detecting volumes affected by a region snapshot +//! replacement, creating records for those, and triggering the "step" saga for +//! them. +//! +//! After the region snapshot replacement start saga finishes, the snapshot's +//! volume is no longer in a degraded state: the requested read-only region was +//! cloned to a new region, and the reference was replaced in the construction +//! request. Any disk that is now created using the snapshot as a source will +//! work without issues. +//! +//! The problem now is volumes that still reference the replaced read-only +//! region, and any Upstairs constructed from a VCR that references that region. +//! This task's responsibility is to find all volumes that reference the +//! replaced read-only region, create a record for them, and trigger the region +//! snapshot replacement step saga. This is a much less involved process than +//! region replacement: no continuous monitoring and driving is required. See +//! the "region snapshot replacement step" saga's docstring for more +//! information. + +use crate::app::authn; +use crate::app::background::BackgroundTask; +use crate::app::saga::StartSaga; +use crate::app::sagas; +use crate::app::sagas::region_snapshot_replacement_step::*; +use crate::app::sagas::region_snapshot_replacement_step_garbage_collect::*; +use crate::app::sagas::NexusSaga; +use futures::future::BoxFuture; +use futures::FutureExt; +use nexus_db_model::RegionSnapshotReplacementStep; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use nexus_types::identity::Asset; +use nexus_types::internal_api::background::RegionSnapshotReplacementStepStatus; +use serde_json::json; +use std::sync::Arc; + +pub struct RegionSnapshotReplacementFindAffected { + datastore: Arc, + sagas: Arc, +} + +impl RegionSnapshotReplacementFindAffected { + pub fn new(datastore: Arc, sagas: Arc) -> Self { + RegionSnapshotReplacementFindAffected { datastore, sagas } + } + + async fn send_start_request( + &self, + opctx: &OpContext, + request: RegionSnapshotReplacementStep, + ) -> Result<(), omicron_common::api::external::Error> { + let params = sagas::region_snapshot_replacement_step::Params { + serialized_authn: authn::saga::Serialized::for_opctx(opctx), + request, + }; + + let saga_dag = SagaRegionSnapshotReplacementStep::prepare(¶ms)?; + self.sagas.saga_start(saga_dag).await + } + + async fn send_garbage_collect_request( + &self, + opctx: &OpContext, + request: RegionSnapshotReplacementStep, + ) -> Result<(), omicron_common::api::external::Error> { + let Some(old_snapshot_volume_id) = request.old_snapshot_volume_id + else { + // This state is illegal! + let s = format!( + "request {} old snapshot volume id is None!", + request.id, + ); + + return Err(omicron_common::api::external::Error::internal_error( + &s, + )); + }; + + let params = + sagas::region_snapshot_replacement_step_garbage_collect::Params { + serialized_authn: authn::saga::Serialized::for_opctx(opctx), + old_snapshot_volume_id, + request, + }; + + let saga_dag = + SagaRegionSnapshotReplacementStepGarbageCollect::prepare(¶ms)?; + self.sagas.saga_start(saga_dag).await + } + + async fn clean_up_region_snapshot_replacement_step_volumes( + &self, + opctx: &OpContext, + status: &mut RegionSnapshotReplacementStepStatus, + ) { + let log = &opctx.log; + + let requests = match self + .datastore + .region_snapshot_replacement_steps_requiring_garbage_collection( + opctx, + ) + .await + { + Ok(requests) => requests, + + Err(e) => { + let s = format!("querying for steps to collect failed! {e}"); + error!(&log, "{s}"); + status.errors.push(s); + return; + } + }; + + for request in requests { + let request_id = request.id; + + let result = + self.send_garbage_collect_request(opctx, request.clone()).await; + + match result { + Ok(()) => { + let s = format!( + "region snapshot replacement step garbage \ + collect request ok for {request_id}" + ); + + info!( + &log, + "{s}"; + "request.volume_id" => %request.volume_id, + "request.old_snapshot_volume_id" => ?request.old_snapshot_volume_id, + ); + status.step_garbage_collect_invoked_ok.push(s); + } + + Err(e) => { + let s = format!( + "sending region snapshot replacement step garbage \ + collect request failed: {e}", + ); + error!( + &log, + "{s}"; + "request.volume_id" => %request.volume_id, + "request.old_snapshot_volume_id" => ?request.old_snapshot_volume_id, + ); + status.errors.push(s); + } + } + } + } + + // Any request in state Running means that the target replacement has + // occurred already, meaning the region snapshot being replaced is not + // present as a target in the snapshot's volume construction request + // anymore. Any future usage of that snapshot (as a source for a disk or + // otherwise) will get a volume construction request that references the + // replacement read-only region. + // + // "step" records are created here for each volume found that still + // references the replaced region snapshot, most likely having been created + // by copying the snapshot's volume construction request before the target + // replacement occurred. These volumes also need to have target replacement + // performed, and this is captured in this "step" record. + async fn create_step_records_for_affected_volumes( + &self, + opctx: &OpContext, + status: &mut RegionSnapshotReplacementStepStatus, + ) { + let log = &opctx.log; + + // Find all region snapshot replacement requests in state "Running" + let requests = match self + .datastore + .get_running_region_snapshot_replacements(opctx) + .await + { + Ok(requests) => requests, + + Err(e) => { + let s = format!( + "get_running_region_snapshot_replacements failed: {e}", + ); + + error!(&log, "{s}"); + status.errors.push(s); + return; + } + }; + + for request in requests { + // Find all volumes that reference the replaced snapshot + let region_snapshot = match self + .datastore + .region_snapshot_get( + request.old_dataset_id, + request.old_region_id, + request.old_snapshot_id, + ) + .await + { + Ok(Some(region_snapshot)) => region_snapshot, + + Ok(None) => { + let s = format!( + "region snapshot {} {} {} not found!", + request.old_dataset_id, + request.old_region_id, + request.old_snapshot_id, + ); + error!(&log, "{s}"); + status.errors.push(s); + + continue; + } + + Err(e) => { + let s = format!( + "error querying for region snapshot {} {} {}: {e}", + request.old_dataset_id, + request.old_region_id, + request.old_snapshot_id, + ); + error!(&log, "{s}"); + status.errors.push(s); + + continue; + } + }; + + let snapshot_addr = match region_snapshot.snapshot_addr.parse() { + Ok(addr) => addr, + + Err(e) => { + let s = format!( + "region snapshot addr {} could not be parsed: {e}", + region_snapshot.snapshot_addr, + ); + error!(&log, "{s}"); + status.errors.push(s); + + continue; + } + }; + + let volumes = match self + .datastore + .find_volumes_referencing_socket_addr(&opctx, snapshot_addr) + .await + { + Ok(volumes) => volumes, + + Err(e) => { + let s = format!("error finding referenced volumes: {e}"); + error!( + log, + "{s}"; + "request id" => ?request.id, + ); + status.errors.push(s); + + continue; + } + }; + + for volume in volumes { + // Any volume referencing the old socket addr needs to be + // replaced. Create a "step" record for this. + // + // Note: this function returns a conflict error if there already + // exists a step record referencing this volume ID because a + // volume repair record is also created using that volume ID, + // and only one of those can exist for a given volume at a time. + // + // Also note: this function returns a conflict error if another + // step record references this volume id in the "old snapshot + // volume id" column - this is ok! Region snapshot replacement + // step records are created for some volume id, and a null old + // snapshot volume id: + // + // volume_id: references snapshot_addr + // old_snapshot_volume_id: null + // + // The region snapshot replacement step saga will create a + // volume to stash the reference to snapshot_addr, and then call + // `volume_replace_snapshot`. This will swap snapshot_addr + // reference into the old snapshot volume for later deletion: + // + // volume_id: does _not_ reference snapshot_addr anymore + // old_snapshot_volume_id: now references snapshot_addr + // + // If `find_volumes_referencing_socket_addr` is executed before + // that volume is deleted, it will return the old snapshot + // volume id above, and then this for loop tries to make a + // region snapshot replacement step record for it! + // + // Allowing a region snapshot replacement step record to be + // created in this case would mean that (depending on when the + // functions execute), an indefinite amount of work would be + // created, continually "moving" the snapshot_addr from + // temporary volume to temporary volume. + + match self + .datastore + .create_region_snapshot_replacement_step( + opctx, + request.id, + volume.id(), + ) + .await + { + Ok(step_request_id) => { + let s = format!("created {step_request_id}"); + info!( + log, + "{s}"; + "request id" => ?request.id, + "volume id" => ?volume.id(), + ); + status.step_records_created_ok.push(s); + } + + Err(e) => { + let s = format!("error creating step request: {e}"); + error!( + log, + "{s}"; + "request id" => ?request.id, + "volume id" => ?volume.id(), + ); + status.errors.push(s); + } + } + } + } + } + + async fn invoke_step_saga_for_affected_volumes( + &self, + opctx: &OpContext, + status: &mut RegionSnapshotReplacementStepStatus, + ) { + let log = &opctx.log; + + // Once all region snapshot replacement step records have been created, + // trigger sagas as appropriate. + + let step_requests = match self + .datastore + .get_requested_region_snapshot_replacement_steps(opctx) + .await + { + Ok(step_requests) => step_requests, + + Err(e) => { + let s = format!( + "query for requested region snapshot replacement step \ + requests failed: {e}" + ); + error!(&log, "{s}"); + status.errors.push(s); + + return; + } + }; + + for request in step_requests { + let request_id = request.id; + + match self.send_start_request(opctx, request.clone()).await { + Ok(()) => { + let s = format!( + "region snapshot replacement step saga invoked ok for \ + {request_id}" + ); + + info!( + &log, + "{s}"; + "request.request_id" => %request.request_id, + "request.volume_id" => %request.volume_id, + ); + status.step_invoked_ok.push(s); + } + + Err(e) => { + let s = format!( + "invoking region snapshot replacement step saga for \ + {request_id} failed: {e}" + ); + + error!( + &log, + "{s}"; + "request.request_id" => %request.request_id, + "request.volume_id" => %request.volume_id, + ); + status.errors.push(s); + } + }; + } + } +} + +impl BackgroundTask for RegionSnapshotReplacementFindAffected { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + async move { + let mut status = RegionSnapshotReplacementStepStatus::default(); + + // Importantly, clean old steps up before finding affected volumes! + // Otherwise, will continue to find the snapshot in volumes to + // delete, and will continue to see conflicts in next function. + self.clean_up_region_snapshot_replacement_step_volumes( + opctx, + &mut status, + ) + .await; + + self.create_step_records_for_affected_volumes(opctx, &mut status) + .await; + + self.invoke_step_saga_for_affected_volumes(opctx, &mut status) + .await; + + json!(status) + } + .boxed() + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::app::background::init::test::NoopStartSaga; + use nexus_db_model::RegionSnapshot; + use nexus_db_model::RegionSnapshotReplacement; + use nexus_db_model::RegionSnapshotReplacementStep; + use nexus_db_model::RegionSnapshotReplacementStepState; + use nexus_db_model::Volume; + use nexus_test_utils_macros::nexus_test; + use sled_agent_client::types::CrucibleOpts; + use sled_agent_client::types::VolumeConstructionRequest; + use uuid::Uuid; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + async fn add_fake_volume_for_snapshot_addr( + datastore: &DataStore, + snapshot_addr: String, + ) -> Uuid { + let new_volume_id = Uuid::new_v4(); + + let volume_construction_request = VolumeConstructionRequest::Volume { + id: new_volume_id, + block_size: 0, + sub_volumes: vec![], + read_only_parent: Some(Box::new( + VolumeConstructionRequest::Region { + block_size: 0, + blocks_per_extent: 0, + extent_count: 0, + gen: 0, + opts: CrucibleOpts { + id: Uuid::new_v4(), + target: vec![snapshot_addr], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + }, + }, + )), + }; + + let volume_data = + serde_json::to_string(&volume_construction_request).unwrap(); + + let volume = Volume::new(new_volume_id, volume_data); + + datastore.volume_create(volume).await.unwrap(); + + new_volume_id + } + + #[nexus_test(server = crate::Server)] + async fn test_region_snapshot_replacement_step_task( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + let starter = Arc::new(NoopStartSaga::new()); + let mut task = RegionSnapshotReplacementFindAffected::new( + datastore.clone(), + starter.clone(), + ); + + // Noop test + let result: RegionSnapshotReplacementStepStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + assert_eq!(result, RegionSnapshotReplacementStepStatus::default()); + assert_eq!(starter.count_reset(), 0); + + // Add a region snapshot replacement request for a fake region snapshot. + + let dataset_id = Uuid::new_v4(); + let region_id = Uuid::new_v4(); + let snapshot_id = Uuid::new_v4(); + let snapshot_addr = String::from("[fd00:1122:3344::101]:9876"); + + let fake_region_snapshot = RegionSnapshot::new( + dataset_id, + region_id, + snapshot_id, + snapshot_addr.clone(), + ); + + datastore.region_snapshot_create(fake_region_snapshot).await.unwrap(); + + let request = + RegionSnapshotReplacement::new(dataset_id, region_id, snapshot_id); + + let request_id = request.id; + + datastore + .insert_region_snapshot_replacement_request_with_volume_id( + &opctx, + request, + Uuid::new_v4(), + ) + .await + .unwrap(); + + // Transition that to Allocating -> ReplacementDone -> DeletingOldVolume + // -> Running + + let operating_saga_id = Uuid::new_v4(); + + datastore + .set_region_snapshot_replacement_allocating( + &opctx, + request_id, + operating_saga_id, + ) + .await + .unwrap(); + + let new_region_id = Uuid::new_v4(); + let old_snapshot_volume_id = Uuid::new_v4(); + + datastore + .set_region_snapshot_replacement_replacement_done( + &opctx, + request_id, + operating_saga_id, + new_region_id, + old_snapshot_volume_id, + ) + .await + .unwrap(); + + datastore + .set_region_snapshot_replacement_deleting_old_volume( + &opctx, + request_id, + operating_saga_id, + ) + .await + .unwrap(); + + datastore + .set_region_snapshot_replacement_running( + &opctx, + request_id, + operating_saga_id, + ) + .await + .unwrap(); + + // Add some fake volumes that reference the region snapshot being + // replaced + + let new_volume_1_id = add_fake_volume_for_snapshot_addr( + &datastore, + snapshot_addr.clone(), + ) + .await; + let new_volume_2_id = add_fake_volume_for_snapshot_addr( + &datastore, + snapshot_addr.clone(), + ) + .await; + + // Add some fake volumes that do not + + let other_volume_1_id = add_fake_volume_for_snapshot_addr( + &datastore, + String::from("[fd00:1122:3344::101]:1000"), + ) + .await; + + let other_volume_2_id = add_fake_volume_for_snapshot_addr( + &datastore, + String::from("[fd12:5544:3344::912]:3901"), + ) + .await; + + // Activate the task - it should pick the running request up and try to + // run the region snapshot replacement step saga for the volumes + + let result: RegionSnapshotReplacementStepStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + + let requested_region_snapshot_replacement_steps = datastore + .get_requested_region_snapshot_replacement_steps(&opctx) + .await + .unwrap(); + + assert_eq!(requested_region_snapshot_replacement_steps.len(), 2); + + for step in &requested_region_snapshot_replacement_steps { + let s: String = format!("created {}", step.id); + assert!(result.step_records_created_ok.contains(&s)); + + let s: String = format!( + "region snapshot replacement step saga invoked ok for {}", + step.id + ); + assert!(result.step_invoked_ok.contains(&s)); + + if step.volume_id == new_volume_1_id + || step.volume_id == new_volume_2_id + { + // ok! + } else if step.volume_id == other_volume_1_id + || step.volume_id == other_volume_2_id + { + // error! + assert!(false); + } else { + // error! + assert!(false); + } + } + + // No garbage collection would be invoked yet, as the step records are + // not in state Complete + assert!(result.step_garbage_collect_invoked_ok.is_empty()); + + assert_eq!(result.errors.len(), 0); + + assert_eq!(starter.count_reset(), 2); + } + + #[nexus_test(server = crate::Server)] + async fn test_region_snapshot_replacement_step_task_gc( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + let starter = Arc::new(NoopStartSaga::new()); + let mut task = RegionSnapshotReplacementFindAffected::new( + datastore.clone(), + starter.clone(), + ); + + // Noop test + let result: RegionSnapshotReplacementStepStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + assert_eq!(result, RegionSnapshotReplacementStepStatus::default()); + assert_eq!(starter.count_reset(), 0); + + // Now, add some Complete records and make sure the garbage collection + // saga is invoked. + + datastore + .insert_region_snapshot_replacement_step(&opctx, { + let mut record = RegionSnapshotReplacementStep::new( + Uuid::new_v4(), + Uuid::new_v4(), + ); + + record.replacement_state = + RegionSnapshotReplacementStepState::Complete; + record.old_snapshot_volume_id = Some(Uuid::new_v4()); + + record + }) + .await + .unwrap(); + + datastore + .insert_region_snapshot_replacement_step(&opctx, { + let mut record = RegionSnapshotReplacementStep::new( + Uuid::new_v4(), + Uuid::new_v4(), + ); + + record.replacement_state = + RegionSnapshotReplacementStepState::Complete; + record.old_snapshot_volume_id = Some(Uuid::new_v4()); + + record + }) + .await + .unwrap(); + + // Activate the task - it should pick the complete steps up and try to + // run the region snapshot replacement step garbage collect saga + + let result: RegionSnapshotReplacementStepStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + + let region_snapshot_replacement_steps_requiring_gc = datastore + .region_snapshot_replacement_steps_requiring_garbage_collection( + &opctx, + ) + .await + .unwrap(); + + assert_eq!(region_snapshot_replacement_steps_requiring_gc.len(), 2); + + eprintln!("{:?}", result); + + for step in ®ion_snapshot_replacement_steps_requiring_gc { + let s: String = format!( + "region snapshot replacement step garbage collect request ok \ + for {}", + step.id + ); + assert!(result.step_garbage_collect_invoked_ok.contains(&s)); + } + + assert!(result.step_records_created_ok.is_empty()); + + assert!(result.step_invoked_ok.is_empty()); + + assert_eq!(result.errors.len(), 0); + + assert_eq!(starter.count_reset(), 2); + } +} diff --git a/nexus/src/app/background/tasks/saga_recovery.rs b/nexus/src/app/background/tasks/saga_recovery.rs index 7b0fe1b331b..42069ac4ed5 100644 --- a/nexus/src/app/background/tasks/saga_recovery.rs +++ b/nexus/src/app/background/tasks/saga_recovery.rs @@ -517,7 +517,7 @@ mod test { ) -> (dev::db::CockroachInstance, Arc) { let db = test_setup_database(&log).await; let cfg = nexus_db_queries::db::Config { url: db.pg_config().clone() }; - let pool = Arc::new(db::Pool::new(log, &cfg)); + let pool = Arc::new(db::Pool::new_single_host(log, &cfg)); let db_datastore = Arc::new( db::DataStore::new(&log, Arc::clone(&pool), None).await.unwrap(), ); diff --git a/nexus/src/app/background/tasks/sync_switch_configuration.rs b/nexus/src/app/background/tasks/sync_switch_configuration.rs index 20a12d1127a..f86bb1a782f 100644 --- a/nexus/src/app/background/tasks/sync_switch_configuration.rs +++ b/nexus/src/app/background/tasks/sync_switch_configuration.rs @@ -51,8 +51,9 @@ use omicron_common::{ use serde_json::json; use sled_agent_client::types::{ BgpConfig as SledBgpConfig, BgpPeerConfig as SledBgpPeerConfig, - EarlyNetworkConfig, EarlyNetworkConfigBody, HostPortConfig, PortConfigV2, - RackNetworkConfigV2, RouteConfig as SledRouteConfig, UplinkAddressConfig, + EarlyNetworkConfig, EarlyNetworkConfigBody, HostPortConfig, + LldpAdminStatus, LldpPortConfig, PortConfigV2, RackNetworkConfigV2, + RouteConfig as SledRouteConfig, UplinkAddressConfig, }; use std::{ collections::{hash_map::Entry, HashMap, HashSet}, @@ -564,7 +565,7 @@ impl BackgroundTask for SwitchPortSettingsManager { if !bgp_announce_prefixes.contains_key(&bgp_config.bgp_announce_set_id) { let announcements = match self .datastore - .bgp_announce_list( + .bgp_announcement_list( opctx, ¶ms::BgpAnnounceSetSelector { name_or_id: bgp_config @@ -977,6 +978,7 @@ impl BackgroundTask for SwitchPortSettingsManager { destination: r.dst.into(), nexthop: r.gw.ip(), vlan_id: r.vid.map(|x| x.0), + local_pref: r.local_pref.map(|x| x.0), }) .collect(), switch: *location, @@ -992,7 +994,23 @@ impl BackgroundTask for SwitchPortSettingsManager { .map(|l| l.speed) .unwrap_or(SwitchLinkSpeed::Speed100G) .into(), - }; + lldp: info + .link_lldp + .get(0) //TODO https://github.com/oxidecomputer/omicron/issues/3062 + .map(|c| LldpPortConfig { + status: match c.enabled { + true => LldpAdminStatus::Enabled, + false=> LldpAdminStatus::Disabled, + }, + port_id: c.link_name.clone(), + port_description: c.link_description.clone(), + chassis_id: c.chassis_id.clone(), + system_name: c.system_name.clone(), + system_description: c.system_description.clone(), + management_addrs:c.management_ip.map(|a| vec![a.ip()]), + }) + } + ; for peer in port_config.bgp_peers.iter_mut() { peer.communities = match self @@ -1411,6 +1429,29 @@ fn uplinks( let PortSettingsChange::Apply(config) = change else { continue; }; + + let lldp = if config.link_lldp.is_empty() { + None + } else { + let x = &config.link_lldp[0]; + Some(LldpPortConfig { + status: if x.enabled { + LldpAdminStatus::Enabled + } else { + LldpAdminStatus::Disabled + }, + port_id: x.link_name.clone(), + port_description: x.link_description.clone(), + chassis_id: x.chassis_id.clone(), + system_name: x.system_name.clone(), + system_description: x.system_description.clone(), + management_addrs: x.management_ip.map(|a| { + let ip: oxnet::IpNet = a.into(); + vec![ip.addr()] + }), + }) + }; + let config = HostPortConfig { port: port.port_name.clone(), addrs: config @@ -1421,6 +1462,7 @@ fn uplinks( vlan_id: a.vlan_id.map(|v| v.into()), }) .collect(), + lldp, }; match uplinks.entry(*location) { @@ -1455,7 +1497,8 @@ fn build_sled_agent_clients( sled_agent_clients } -type SwitchStaticRoutes = HashSet<(Ipv4Addr, Prefix4, Option)>; +type SwitchStaticRoutes = + HashSet<(Ipv4Addr, Prefix4, Option, Option)>; fn static_routes_to_del( current_static_routes: HashMap, @@ -1471,10 +1514,11 @@ fn static_routes_to_del( // if it's on the switch but not desired (in our db), it should be removed let stale_routes = routes_on_switch .difference(routes_wanted) - .map(|(nexthop, prefix, vlan_id)| StaticRoute4 { + .map(|(nexthop, prefix, vlan_id, local_pref)| StaticRoute4 { nexthop: *nexthop, prefix: *prefix, vlan_id: *vlan_id, + local_pref: *local_pref, }) .collect::>(); @@ -1488,10 +1532,11 @@ fn static_routes_to_del( // if no desired routes are present, all routes on this switch should be deleted let stale_routes = routes_on_switch .iter() - .map(|(nexthop, prefix, vlan_id)| StaticRoute4 { + .map(|(nexthop, prefix, vlan_id, local_pref)| StaticRoute4 { nexthop: *nexthop, prefix: *prefix, vlan_id: *vlan_id, + local_pref: *local_pref, }) .collect::>(); @@ -1538,10 +1583,11 @@ fn static_routes_to_add( }; let missing_routes = routes_wanted .difference(routes_on_switch) - .map(|(nexthop, prefix, vlan_id)| StaticRoute4 { + .map(|(nexthop, prefix, vlan_id, local_pref)| StaticRoute4 { nexthop: *nexthop, prefix: *prefix, vlan_id: *vlan_id, + local_pref: *local_pref, }) .collect::>(); @@ -1590,7 +1636,12 @@ fn static_routes_in_db( } IpAddr::V6(_) => continue, }; - routes.insert((nexthop, prefix, route.vid.map(|x| x.0))); + routes.insert(( + nexthop, + prefix, + route.vid.map(|x| x.0), + route.local_pref.map(|x| x.0), + )); } match routes_from_db.entry(*location) { @@ -1768,44 +1819,46 @@ async fn static_routes_on_switch<'a>( let mut routes_on_switch = HashMap::new(); for (location, client) in mgd_clients { - let static_routes: SwitchStaticRoutes = - match client.static_list_v4_routes().await { - Ok(routes) => { - let mut flattened = HashSet::new(); - for (destination, paths) in routes.iter() { - let Ok(dst) = destination.parse() else { - error!( - log, - "failed to parse static route destination: \ + let static_routes: SwitchStaticRoutes = match client + .static_list_v4_routes() + .await + { + Ok(routes) => { + let mut flattened = HashSet::new(); + for (destination, paths) in routes.iter() { + let Ok(dst) = destination.parse() else { + error!( + log, + "failed to parse static route destination: \ {destination}" - ); - continue; + ); + continue; + }; + for p in paths.iter() { + let nh = match p.nexthop { + IpAddr::V4(addr) => addr, + IpAddr::V6(addr) => { + error!( + log, + "ipv6 nexthops not supported: {addr}" + ); + continue; + } }; - for p in paths.iter() { - let nh = match p.nexthop { - IpAddr::V4(addr) => addr, - IpAddr::V6(addr) => { - error!( - log, - "ipv6 nexthops not supported: {addr}" - ); - continue; - } - }; - flattened.insert((nh, dst, p.vlan_id)); - } + flattened.insert((nh, dst, p.vlan_id, p.local_pref)); } - flattened } - Err(_) => { - error!( - &log, - "unable to retrieve routes from switch"; - "switch_location" => ?location, - ); - continue; - } - }; + flattened + } + Err(_) => { + error!( + &log, + "unable to retrieve routes from switch"; + "switch_location" => ?location, + ); + continue; + } + }; routes_on_switch.insert(*location, static_routes); } routes_on_switch diff --git a/nexus/src/app/bgp.rs b/nexus/src/app/bgp.rs index 118011500af..31a0faa6634 100644 --- a/nexus/src/app/bgp.rs +++ b/nexus/src/app/bgp.rs @@ -9,19 +9,20 @@ use nexus_db_model::{BgpAnnounceSet, BgpAnnouncement, BgpConfig}; use nexus_db_queries::context::OpContext; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::{ - self, BgpImportedRouteIpv4, BgpMessageHistory, BgpPeerStatus, CreateResult, - DeleteResult, ListResultVec, LookupResult, NameOrId, SwitchBgpHistory, + self, BgpExported, BgpImportedRouteIpv4, BgpMessageHistory, BgpPeerStatus, + CreateResult, DeleteResult, ListResultVec, LookupResult, NameOrId, + SwitchBgpHistory, }; use std::net::IpAddr; impl super::Nexus { - pub async fn bgp_config_set( + pub async fn bgp_config_create( &self, opctx: &OpContext, config: ¶ms::BgpConfigCreate, ) -> CreateResult { opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; - let result = self.db_datastore.bgp_config_set(opctx, config).await?; + let result = self.db_datastore.bgp_config_create(opctx, config).await?; Ok(result) } @@ -68,13 +69,13 @@ impl super::Nexus { Ok(result) } - pub async fn bgp_announce_list( + pub async fn bgp_announce_set_list( &self, opctx: &OpContext, - sel: ¶ms::BgpAnnounceSetSelector, - ) -> ListResultVec { + pagparams: &PaginatedBy<'_>, + ) -> ListResultVec { opctx.authorize(authz::Action::Read, &authz::FLEET).await?; - self.db_datastore.bgp_announce_list(opctx, sel).await + self.db_datastore.bgp_announce_set_list(opctx, pagparams).await } pub async fn bgp_delete_announce_set( @@ -88,6 +89,15 @@ impl super::Nexus { Ok(result) } + pub async fn bgp_announcement_list( + &self, + opctx: &OpContext, + sel: ¶ms::BgpAnnounceSetSelector, + ) -> ListResultVec { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + self.db_datastore.bgp_announcement_list(opctx, sel).await + } + pub async fn bgp_peer_status( &self, opctx: &OpContext, @@ -145,6 +155,74 @@ impl super::Nexus { Ok(result) } + pub async fn bgp_exported( + &self, + opctx: &OpContext, + ) -> LookupResult { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + let mut result = BgpExported::default(); + for (switch, client) in &self.mg_clients().await.map_err(|e| { + external::Error::internal_error(&format!( + "failed to get mg clients: {e}" + )) + })? { + let router_info = match client.read_routers().await { + Ok(result) => result.into_inner(), + Err(e) => { + error!( + self.log, + "failed to get routers from {switch}: {e}" + ); + continue; + } + }; + for r in &router_info { + let asn = r.asn; + + let exported = match client + .get_exported(&mg_admin_client::types::AsnSelector { asn }) + .await + { + Ok(result) => result.into_inner(), + Err(e) => { + error!( + self.log, + "failed to get exports for asn {asn} from {switch}: {e}" + ); + continue; + } + }; + for (addr, exports) in exported { + let mut xps = Vec::new(); + for ex in exports.iter() { + let net = match ex { + mg_admin_client::types::Prefix::V4(v4) => { + oxnet::Ipv4Net::new_unchecked( + v4.value, v4.length, + ) + } + mg_admin_client::types::Prefix::V6(v6) => { + let v6 = oxnet::IpNet::V6( + oxnet::Ipv6Net::new_unchecked( + v6.value, v6.length, + ), + ); + warn!( + self.log, + "{v6}: ipv6 exports not supported yet" + ); + continue; + } + }; + xps.push(net); + } + result.exports.insert(addr.to_string(), xps); + } + } + } + Ok(result) + } + pub async fn bgp_message_history( &self, opctx: &OpContext, diff --git a/nexus/src/app/crucible.rs b/nexus/src/app/crucible.rs index b8fca26c146..86de3283556 100644 --- a/nexus/src/app/crucible.rs +++ b/nexus/src/app/crucible.rs @@ -150,7 +150,7 @@ impl super::Nexus { } /// Call out to Crucible agent and perform region creation. Optionally, - /// supply a read-only source to invoke a clone. + /// supply a read-only source's repair address to invoke a clone. pub async fn ensure_region_in_dataset( &self, log: &Logger, diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index e9095cc9911..79e7a93e6da 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -4,7 +4,6 @@ //! Configuration of the deployment system -use nexus_db_model::DnsGroup; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_reconfigurator_planning::planner::Planner; @@ -13,13 +12,8 @@ use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintMetadata; use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::BlueprintTargetSet; -use nexus_types::deployment::CockroachDbClusterVersion; use nexus_types::deployment::PlanningInput; -use nexus_types::deployment::SledFilter; use nexus_types::inventory::Collection; -use omicron_common::address::BOUNDARY_NTP_REDUNDANCY; -use omicron_common::address::COCKROACHDB_REDUNDANCY; -use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::DeleteResult; @@ -132,61 +126,8 @@ impl super::Nexus { ) -> Result { let creator = self.id.to_string(); let datastore = self.datastore(); - - let sled_rows = datastore - .sled_list_all_batched(opctx, SledFilter::Commissioned) - .await?; - let zpool_rows = - datastore.zpool_list_all_external_batched(opctx).await?; - let ip_pool_range_rows = { - let (authz_service_ip_pool, _) = - datastore.ip_pools_service_lookup(opctx).await?; - datastore - .ip_pool_list_ranges_batched(opctx, &authz_service_ip_pool) - .await? - }; - let external_ip_rows = - datastore.external_ip_list_service_all_batched(opctx).await?; - let service_nic_rows = datastore - .service_network_interfaces_all_list_batched(opctx) - .await?; - - let internal_dns_version = datastore - .dns_group_latest_version(opctx, DnsGroup::Internal) - .await - .internal_context( - "fetching internal DNS version for blueprint planning", - )? - .version; - let external_dns_version = datastore - .dns_group_latest_version(opctx, DnsGroup::External) - .await - .internal_context( - "fetching external DNS version for blueprint planning", - )? - .version; - let cockroachdb_settings = - datastore.cockroachdb_settings(opctx).await.internal_context( - "fetching cockroachdb settings for blueprint planning", - )?; - - let planning_input = PlanningInputFromDb { - sled_rows: &sled_rows, - zpool_rows: &zpool_rows, - ip_pool_range_rows: &ip_pool_range_rows, - external_ip_rows: &external_ip_rows, - service_nic_rows: &service_nic_rows, - target_boundary_ntp_zone_count: BOUNDARY_NTP_REDUNDANCY, - target_nexus_zone_count: NEXUS_REDUNDANCY, - target_cockroachdb_zone_count: COCKROACHDB_REDUNDANCY, - target_cockroachdb_cluster_version: - CockroachDbClusterVersion::POLICY, - log: &opctx.log, - internal_dns_version, - external_dns_version, - cockroachdb_settings: &cockroachdb_settings, - } - .build()?; + let planning_input = + PlanningInputFromDb::assemble(opctx, datastore).await?; // The choice of which inventory collection to use here is not // necessarily trivial. Inventory collections may be incomplete due to diff --git a/nexus/src/app/external_dns.rs b/nexus/src/app/external_dns.rs index c6a8d833c20..4732146ce2e 100644 --- a/nexus/src/app/external_dns.rs +++ b/nexus/src/app/external_dns.rs @@ -5,15 +5,15 @@ use std::net::IpAddr; use std::net::SocketAddr; +use hickory_resolver::config::NameServerConfig; +use hickory_resolver::config::Protocol; +use hickory_resolver::config::ResolverConfig; +use hickory_resolver::config::ResolverOpts; +use hickory_resolver::TokioAsyncResolver; use hyper::client::connect::dns::Name; use omicron_common::address::DNS_PORT; -use trust_dns_resolver::config::NameServerConfig; -use trust_dns_resolver::config::Protocol; -use trust_dns_resolver::config::ResolverConfig; -use trust_dns_resolver::config::ResolverOpts; -use trust_dns_resolver::TokioAsyncResolver; -/// Wrapper around trust-dns-resolver to provide name resolution +/// Wrapper around hickory-resolver to provide name resolution /// using a given set of DNS servers for use with reqwest. pub struct Resolver(TokioAsyncResolver); @@ -26,18 +26,17 @@ impl Resolver { socket_addr: SocketAddr::new(*addr, DNS_PORT), protocol: Protocol::Udp, tls_dns_name: None, - trust_nx_responses: false, + trust_negative_responses: false, bind_addr: None, }); } let mut opts = ResolverOpts::default(); + // Enable edns for potentially larger records + opts.edns0 = true; opts.use_hosts_file = false; // Do as many requests in parallel as we have configured servers opts.num_concurrent_reqs = dns_servers.len(); - Resolver( - TokioAsyncResolver::tokio(rc, opts) - .expect("creating resovler shouldn't fail"), - ) + Resolver(TokioAsyncResolver::tokio(rc, opts)) } } @@ -48,7 +47,7 @@ impl reqwest::dns::Resolve for Resolver { let ips = resolver.lookup_ip(name.as_str()).await?; let addrs = ips .into_iter() - // trust-dns-resolver returns `IpAddr`s but reqwest wants + // hickory-resolver returns `IpAddr`s but reqwest wants // `SocketAddr`s (useful if you have a custom resolver that // returns a scoped IPv6 address). The port provided here // is ignored in favour of the scheme default (http/80, diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 344d2688f7c..b715b6bbd3d 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -60,7 +60,7 @@ use propolis_client::support::WebSocketStream; use sagas::instance_common::ExternalIpAttach; use sled_agent_client::types::InstanceMigrationTargetParams; use sled_agent_client::types::InstanceProperties; -use sled_agent_client::types::InstancePutStateBody; +use sled_agent_client::types::VmmPutStateBody; use std::matches; use std::net::SocketAddr; use std::sync::Arc; @@ -154,7 +154,7 @@ pub(crate) enum InstanceStateChangeRequest { } impl From - for sled_agent_client::types::InstanceStateRequested + for sled_agent_client::types::VmmStateRequested { fn from(value: InstanceStateChangeRequest) -> Self { match value { @@ -176,7 +176,7 @@ enum InstanceStateChangeRequestAction { /// Request the appropriate state change from the sled with the specified /// UUID. - SendToSled(SledUuid), + SendToSled { sled_id: SledUuid, propolis_id: PropolisUuid }, } /// What is the higher level operation that is calling @@ -191,6 +191,14 @@ enum InstanceStartDisposition { AlreadyStarted, } +/// The set of API resources needed when ensuring that an instance is registered +/// on a sled. +pub(crate) struct InstanceEnsureRegisteredApiResources { + pub(crate) authz_silo: nexus_auth::authz::Silo, + pub(crate) authz_project: nexus_auth::authz::Project, + pub(crate) authz_instance: nexus_auth::authz::Instance, +} + impl super::Nexus { pub fn instance_lookup<'a>( &'a self, @@ -473,14 +481,16 @@ impl super::Nexus { Ok(()) } - pub(crate) async fn project_instance_migrate( + pub(crate) async fn instance_migrate( self: &Arc, opctx: &OpContext, - instance_lookup: &lookup::Instance<'_>, - params: params::InstanceMigrate, + id: InstanceUuid, + params: nexus_types::internal_api::params::InstanceMigrateRequest, ) -> UpdateResult { - let (.., authz_instance) = - instance_lookup.lookup_for(authz::Action::Modify).await?; + let (.., authz_instance) = LookupPath::new(&opctx, &self.db_datastore) + .instance_id(id.into_untyped_uuid()) + .lookup_for(authz::Action::Modify) + .await?; let state = self .db_datastore @@ -543,7 +553,6 @@ impl super::Nexus { if let Err(e) = self .instance_request_state( opctx, - &authz_instance, state.instance(), state.vmm(), InstanceStateChangeRequest::Reboot, @@ -622,7 +631,6 @@ impl super::Nexus { if let Err(e) = self .instance_request_state( opctx, - &authz_instance, state.instance(), state.vmm(), InstanceStateChangeRequest::Stop, @@ -654,21 +662,18 @@ impl super::Nexus { /// this sled, this operation rudely terminates it. pub(crate) async fn instance_ensure_unregistered( &self, - opctx: &OpContext, - authz_instance: &authz::Instance, + propolis_id: &PropolisUuid, sled_id: &SledUuid, - ) -> Result, InstanceStateChangeError> - { - opctx.authorize(authz::Action::Modify, authz_instance).await?; + ) -> Result, InstanceStateChangeError> { let sa = self.sled_client(&sled_id).await?; - sa.instance_unregister(&InstanceUuid::from_untyped_uuid( - authz_instance.id(), - )) - .await - .map(|res| res.into_inner().updated_runtime.map(Into::into)) - .map_err(|e| { - InstanceStateChangeError::SledAgent(SledAgentInstancePutError(e)) - }) + sa.vmm_unregister(propolis_id) + .await + .map(|res| res.into_inner().updated_runtime.map(Into::into)) + .map_err(|e| { + InstanceStateChangeError::SledAgent(SledAgentInstancePutError( + e, + )) + }) } /// Determines the action to take on an instance's active VMM given a @@ -702,8 +707,11 @@ impl super::Nexus { // Requests that operate on active instances have to be directed to the // instance's current sled agent. If there is none, the request needs to // be handled specially based on its type. - let sled_id = if let Some(vmm) = vmm_state { - SledUuid::from_untyped_uuid(vmm.sled_id) + let (sled_id, propolis_id) = if let Some(vmm) = vmm_state { + ( + SledUuid::from_untyped_uuid(vmm.sled_id), + PropolisUuid::from_untyped_uuid(vmm.id), + ) } else { match effective_state { // If there's no active sled because the instance is stopped, @@ -804,7 +812,10 @@ impl super::Nexus { }; if allowed { - Ok(InstanceStateChangeRequestAction::SendToSled(sled_id)) + Ok(InstanceStateChangeRequestAction::SendToSled { + sled_id, + propolis_id, + }) } else { Err(Error::invalid_request(format!( "instance state cannot be changed from state \"{}\"", @@ -816,26 +827,25 @@ impl super::Nexus { pub(crate) async fn instance_request_state( &self, opctx: &OpContext, - authz_instance: &authz::Instance, prev_instance_state: &db::model::Instance, prev_vmm_state: &Option, requested: InstanceStateChangeRequest, ) -> Result<(), InstanceStateChangeError> { - opctx.authorize(authz::Action::Modify, authz_instance).await?; - let instance_id = InstanceUuid::from_untyped_uuid(authz_instance.id()); - match self.select_runtime_change_action( prev_instance_state, prev_vmm_state, &requested, )? { InstanceStateChangeRequestAction::AlreadyDone => Ok(()), - InstanceStateChangeRequestAction::SendToSled(sled_id) => { + InstanceStateChangeRequestAction::SendToSled { + sled_id, + propolis_id, + } => { let sa = self.sled_client(&sled_id).await?; let instance_put_result = sa - .instance_put_state( - &instance_id, - &InstancePutStateBody { state: requested.into() }, + .vmm_put_state( + &propolis_id, + &VmmPutStateBody { state: requested.into() }, ) .await .map(|res| res.into_inner().updated_runtime.map(Into::into)) @@ -852,7 +862,7 @@ impl super::Nexus { // Ok(None) here, in which case, there's nothing to write back. match instance_put_result { Ok(Some(ref state)) => self - .notify_instance_updated(opctx, instance_id, state) + .notify_vmm_updated(opctx, propolis_id, state) .await .map_err(Into::into), Ok(None) => Ok(()), @@ -867,7 +877,11 @@ impl super::Nexus { pub(crate) async fn instance_ensure_registered( &self, opctx: &OpContext, - authz_instance: &authz::Instance, + InstanceEnsureRegisteredApiResources { + authz_silo, + authz_project, + authz_instance, + }: &InstanceEnsureRegisteredApiResources, db_instance: &db::model::Instance, propolis_id: &PropolisUuid, initial_vmm: &db::model::Vmm, @@ -1067,23 +1081,9 @@ impl super::Nexus { let ssh_keys: Vec = ssh_keys.map(|ssh_key| ssh_key.public_key).collect(); - // Construct instance metadata used to track its statistics. - // - // This requires another fetch on the silo and project, to extract their - // IDs. - let (.., db_project) = self - .project_lookup( - opctx, - params::ProjectSelector { - project: NameOrId::Id(db_instance.project_id), - }, - )? - .fetch() - .await?; - let (_, db_silo) = self.current_silo_lookup(opctx)?.fetch().await?; let metadata = sled_agent_client::types::InstanceMetadata { - silo_id: db_silo.id(), - project_id: db_project.id(), + silo_id: authz_silo.id(), + project_id: authz_project.id(), }; // Ask the sled agent to begin the state change. Then update the @@ -1120,13 +1120,13 @@ impl super::Nexus { .sled_client(&SledUuid::from_untyped_uuid(initial_vmm.sled_id)) .await?; let instance_register_result = sa - .instance_register( - &instance_id, + .vmm_register( + propolis_id, &sled_agent_client::types::InstanceEnsureBody { hardware: instance_hardware, instance_runtime: db_instance.runtime().clone().into(), vmm_runtime: initial_vmm.clone().into(), - propolis_id: *propolis_id, + instance_id, propolis_addr: SocketAddr::new( initial_vmm.propolis_ip.ip(), initial_vmm.propolis_port.into(), @@ -1141,8 +1141,7 @@ impl super::Nexus { match instance_register_result { Ok(state) => { - self.notify_instance_updated(opctx, instance_id, &state) - .await?; + self.notify_vmm_updated(opctx, *propolis_id, &state).await?; } Err(e) => { if e.instance_unhealthy() { @@ -1321,19 +1320,22 @@ impl super::Nexus { /// Invoked by a sled agent to publish an updated runtime state for an /// Instance. - pub(crate) async fn notify_instance_updated( + pub(crate) async fn notify_vmm_updated( &self, opctx: &OpContext, - instance_id: InstanceUuid, - new_runtime_state: &nexus::SledInstanceState, + propolis_id: PropolisUuid, + new_runtime_state: &nexus::SledVmmState, ) -> Result<(), Error> { - let saga = notify_instance_updated( + let Some((instance_id, saga)) = process_vmm_update( &self.db_datastore, opctx, - instance_id, + propolis_id, new_runtime_state, ) - .await?; + .await? + else { + return Ok(()); + }; // We don't need to wait for the instance update saga to run to // completion to return OK to the sled-agent --- all it needs to care @@ -1344,53 +1346,51 @@ impl super::Nexus { // one is eventually executed. // // Therefore, just spawn the update saga in a new task, and return. - if let Some(saga) = saga { - info!(opctx.log, "starting update saga for {instance_id}"; - "instance_id" => %instance_id, - "vmm_state" => ?new_runtime_state.vmm_state, - "migration_state" => ?new_runtime_state.migrations(), - ); - let sagas = self.sagas.clone(); - let task_instance_updater = - self.background_tasks.task_instance_updater.clone(); - let log = opctx.log.clone(); - tokio::spawn(async move { - // TODO(eliza): maybe we should use the lower level saga API so - // we can see if the saga failed due to the lock being held and - // retry it immediately? - let running_saga = async move { - let runnable_saga = sagas.saga_prepare(saga).await?; - runnable_saga.start().await - } - .await; - let result = match running_saga { - Err(error) => { - error!(&log, "failed to start update saga for {instance_id}"; - "instance_id" => %instance_id, - "error" => %error, - ); - // If we couldn't start the update saga for this - // instance, kick the instance-updater background task - // to try and start it again in a timely manner. - task_instance_updater.activate(); - return; - } - Ok(saga) => { - saga.wait_until_stopped().await.into_omicron_result() - } - }; - if let Err(error) = result { - error!(&log, "update saga for {instance_id} failed"; + info!(opctx.log, "starting update saga for {instance_id}"; + "instance_id" => %instance_id, + "vmm_state" => ?new_runtime_state.vmm_state, + "migration_state" => ?new_runtime_state.migrations(), + ); + let sagas = self.sagas.clone(); + let task_instance_updater = + self.background_tasks.task_instance_updater.clone(); + let log = opctx.log.clone(); + tokio::spawn(async move { + // TODO(eliza): maybe we should use the lower level saga API so + // we can see if the saga failed due to the lock being held and + // retry it immediately? + let running_saga = async move { + let runnable_saga = sagas.saga_prepare(saga).await?; + runnable_saga.start().await + } + .await; + let result = match running_saga { + Err(error) => { + error!(&log, "failed to start update saga for {instance_id}"; "instance_id" => %instance_id, "error" => %error, ); - // If we couldn't complete the update saga for this + // If we couldn't start the update saga for this // instance, kick the instance-updater background task // to try and start it again in a timely manner. task_instance_updater.activate(); + return; } - }); - } + Ok(saga) => { + saga.wait_until_stopped().await.into_omicron_result() + } + }; + if let Err(error) = result { + error!(&log, "update saga for {instance_id} failed"; + "instance_id" => %instance_id, + "error" => %error, + ); + // If we couldn't complete the update saga for this + // instance, kick the instance-updater background task + // to try and start it again in a timely manner. + task_instance_updater.activate(); + } + }); Ok(()) } @@ -1830,21 +1830,27 @@ impl super::Nexus { } } -/// Invoked by a sled agent to publish an updated runtime state for an -/// Instance, returning an update saga for that instance (if one must be -/// executed). -pub(crate) async fn notify_instance_updated( +/// Writes the VMM and migration state supplied in `new_runtime_state` to the +/// database (provided that it's newer than what's already there). +/// +/// # Return value +/// +/// - `Ok(Some(instance_id, saga))` if the new VMM state obsoletes the current +/// instance state. The caller should execute the returned instance update +/// saga to reconcile the instance to the new VMM state. +/// - `Ok(None)` if the new state was successfully published but does not +/// require an instance update. +/// - `Err` if an error occurred. +pub(crate) async fn process_vmm_update( datastore: &DataStore, opctx: &OpContext, - instance_id: InstanceUuid, - new_runtime_state: &nexus::SledInstanceState, -) -> Result, Error> { + propolis_id: PropolisUuid, + new_runtime_state: &nexus::SledVmmState, +) -> Result, Error> { use sagas::instance_update; let migrations = new_runtime_state.migrations(); - let propolis_id = new_runtime_state.propolis_id; info!(opctx.log, "received new VMM runtime state from sled agent"; - "instance_id" => %instance_id, "propolis_id" => %propolis_id, "vmm_state" => ?new_runtime_state.vmm_state, "migration_state" => ?migrations, @@ -1864,21 +1870,34 @@ pub(crate) async fn notify_instance_updated( // prepare and return it. if instance_update::update_saga_needed( &opctx.log, - instance_id, + propolis_id, new_runtime_state, &result, ) { + let instance_id = + InstanceUuid::from_untyped_uuid(result.found_vmm.instance_id); + let (.., authz_instance) = LookupPath::new(&opctx, datastore) .instance_id(instance_id.into_untyped_uuid()) .lookup_for(authz::Action::Modify) .await?; - let saga = instance_update::SagaInstanceUpdate::prepare( + + match instance_update::SagaInstanceUpdate::prepare( &instance_update::Params { serialized_authn: authn::saga::Serialized::for_opctx(opctx), authz_instance, }, - )?; - Ok(Some(saga)) + ) { + Ok(saga) => Ok(Some((instance_id, saga))), + Err(e) => { + error!(opctx.log, "failed to prepare instance update saga"; + "error" => ?e, + "instance_id" => %instance_id, + "propolis_id" => %propolis_id); + + Err(e) + } + } } else { Ok(None) } diff --git a/nexus/src/app/metrics.rs b/nexus/src/app/metrics.rs index 3728a3bdc15..4dc7309e767 100644 --- a/nexus/src/app/metrics.rs +++ b/nexus/src/app/metrics.rs @@ -4,7 +4,6 @@ //! Metrics -use crate::external_api::http_entrypoints::SystemMetricName; use crate::external_api::params::ResourceMetrics; use dropshot::PaginationParams; use nexus_db_queries::authz; @@ -12,10 +11,10 @@ use nexus_db_queries::{ context::OpContext, db::{fixed_data::FLEET_ID, lookup}, }; +use nexus_external_api::TimeseriesSchemaPaginationParams; +use nexus_types::external_api::params::SystemMetricName; use omicron_common::api::external::{Error, InternalContext}; -use oximeter_db::{ - oxql, Measurement, TimeseriesSchema, TimeseriesSchemaPaginationParams, -}; +use oximeter_db::{Measurement, TimeseriesSchema}; use std::num::NonZeroU32; impl super::Nexus { @@ -138,7 +137,7 @@ impl super::Nexus { &self, opctx: &OpContext, query: impl AsRef, - ) -> Result, Error> { + ) -> Result, Error> { // Must be a fleet user to list timeseries schema. // // TODO-security: We need to figure out how to implement proper security diff --git a/nexus/src/app/oximeter.rs b/nexus/src/app/oximeter.rs index 9039d1b8fa0..0c7ec3a0166 100644 --- a/nexus/src/app/oximeter.rs +++ b/nexus/src/app/oximeter.rs @@ -12,7 +12,7 @@ use internal_dns::ServiceName; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; use nexus_db_queries::db::DataStore; -use omicron_common::address::CLICKHOUSE_PORT; +use omicron_common::address::CLICKHOUSE_HTTP_PORT; use omicron_common::api::external::Error; use omicron_common::api::external::{DataPageParams, ListResultVec}; use omicron_common::api::internal::nexus::{self, ProducerEndpoint}; @@ -65,7 +65,7 @@ impl LazyTimeseriesClient { ClientSource::FromIp { address } => *address, ClientSource::FromDns { resolver } => SocketAddr::new( resolver.lookup_ip(ServiceName::Clickhouse).await?, - CLICKHOUSE_PORT, + CLICKHOUSE_HTTP_PORT, ), }; diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 13b30fd47ad..835541c2eae 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -33,7 +33,7 @@ use nexus_types::external_api::params::BgpAnnounceSetCreate; use nexus_types::external_api::params::BgpAnnouncementCreate; use nexus_types::external_api::params::BgpConfigCreate; use nexus_types::external_api::params::LinkConfigCreate; -use nexus_types::external_api::params::LldpServiceConfigCreate; +use nexus_types::external_api::params::LldpLinkConfigCreate; use nexus_types::external_api::params::RouteConfig; use nexus_types::external_api::params::SwitchPortConfigCreate; use nexus_types::external_api::params::UninitializedSledId; @@ -61,6 +61,7 @@ use omicron_common::api::external::Name; use omicron_common::api::external::NameOrId; use omicron_common::api::external::ResourceType; use omicron_common::api::internal::shared::ExternalPortDiscovery; +use omicron_common::api::internal::shared::LldpAdminStatus; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::SledUuid; use oxnet::IpNet; @@ -146,7 +147,7 @@ impl super::Nexus { dataset.dataset_id, dataset.zpool_id, Some(dataset.request.address), - dataset.request.kind.into(), + dataset.request.kind, ) }) .collect(); @@ -241,22 +242,44 @@ impl super::Nexus { .internal_context( "fetching cockroachdb settings for rack initialization", )?; - self.datastore() - .cockroachdb_setting_set_string( - opctx, - cockroachdb_settings.state_fingerprint.clone(), - "cluster.preserve_downgrade_option", - CockroachDbClusterVersion::NEWLY_INITIALIZED.to_string(), - ) - .await - .internal_context( - "setting `cluster.preserve_downgrade_option` \ - for rack initialization", - )?; + blueprint.cockroachdb_setting_preserve_downgrade = + if cockroachdb_settings.preserve_downgrade.is_empty() { + // Set the option to the current policy in both the database and + // the blueprint. + self.datastore() + .cockroachdb_setting_set_string( + opctx, + cockroachdb_settings.state_fingerprint.clone(), + "cluster.preserve_downgrade_option", + CockroachDbClusterVersion::NEWLY_INITIALIZED + .to_string(), + ) + .await + .internal_context( + "setting `cluster.preserve_downgrade_option` \ + for rack initialization", + )?; + CockroachDbClusterVersion::NEWLY_INITIALIZED + } else { + // `cluster.preserve_downgrade_option` is set, so fill in the + // blueprint with the current value. This branch should never + // be hit during normal rack initialization; it's here for + // eventual test cases where `cluster.preserve_downgrade_option` + // is set by a test harness prior to rack initialization. + CockroachDbClusterVersion::from_str( + &cockroachdb_settings.preserve_downgrade, + ) + .map_err(|_| { + Error::internal_error(&format!( + "database has `cluster.preserve_downgrade_option` \ + set to invalid version {}", + cockroachdb_settings.preserve_downgrade + )) + })? + } + .into(); blueprint.cockroachdb_fingerprint = cockroachdb_settings.state_fingerprint; - blueprint.cockroachdb_setting_preserve_downgrade = - CockroachDbClusterVersion::NEWLY_INITIALIZED.into(); // Administrators of the Recovery Silo are automatically made // administrators of the Fleet. @@ -487,7 +510,7 @@ impl super::Nexus { match self .db_datastore - .bgp_config_set( + .bgp_config_create( &opctx, &BgpConfigCreate { identity: IdentityMetadataCreateParams { @@ -570,6 +593,7 @@ impl super::Nexus { dst: r.destination, gw: r.nexthop, vid: r.vlan_id, + local_pref: r.local_pref, }) .collect(); @@ -608,15 +632,30 @@ impl super::Nexus { .bgp_peers .insert("phy0".to_string(), BgpPeerConfig { peers }); - let link = LinkConfigCreate { - mtu: 1500, //TODO https://github.com/oxidecomputer/omicron/issues/2274 - lldp: LldpServiceConfigCreate { + let lldp = match &uplink_config.lldp { + None => LldpLinkConfigCreate { enabled: false, - lldp_config: None, + ..Default::default() }, + Some(l) => LldpLinkConfigCreate { + enabled: l.status == LldpAdminStatus::Enabled, + link_name: l.port_id.clone(), + link_description: l.port_description.clone(), + chassis_id: l.chassis_id.clone(), + system_name: l.system_name.clone(), + system_description: l.system_description.clone(), + management_ip: match &l.management_addrs { + Some(a) if !a.is_empty() => Some(a[0]), + _ => None, + }, + }, + }; + let link = LinkConfigCreate { + mtu: 1500, //TODO https://github.com/oxidecomputer/omicron/issues/2274 fec: uplink_config.uplink_port_fec.into(), speed: uplink_config.uplink_port_speed.into(), autoneg: uplink_config.autoneg, + lldp, }; port_settings_params.links.insert("phy".to_string(), link); diff --git a/nexus/src/app/saga.rs b/nexus/src/app/saga.rs index 5bc69946ad8..975df7fc3bf 100644 --- a/nexus/src/app/saga.rs +++ b/nexus/src/app/saga.rs @@ -469,6 +469,10 @@ impl super::Nexus { // We don't need the handle that runnable_saga.start() returns because // we're not going to wait for the saga to finish here. let _ = runnable_saga.start().await?; + + let mut demo_sagas = self.demo_sagas()?; + demo_sagas.preregister(demo_saga_id); + Ok(DemoSaga { saga_id, demo_saga_id }) } diff --git a/nexus/src/app/sagas/common_storage.rs b/nexus/src/app/sagas/common_storage.rs index 592463f5bbc..d37370506cb 100644 --- a/nexus/src/app/sagas/common_storage.rs +++ b/nexus/src/app/sagas/common_storage.rs @@ -15,6 +15,7 @@ use nexus_db_queries::db; use nexus_db_queries::db::lookup::LookupPath; use omicron_common::api::external::Error; use omicron_common::retry_until_known_result; +use slog::Logger; use std::net::SocketAddrV6; // Common Pantry operations @@ -107,3 +108,33 @@ pub(crate) async fn call_pantry_detach_for_disk( Ok(()) } + +pub(crate) fn find_only_new_region( + log: &Logger, + existing_datasets_and_regions: Vec<(db::model::Dataset, db::model::Region)>, + new_datasets_and_regions: Vec<(db::model::Dataset, db::model::Region)>, +) -> Option<(db::model::Dataset, db::model::Region)> { + // Only filter on whether or not a Region is in the existing list! Datasets + // can change values (like size_used) if this saga interleaves with other + // saga runs of the same type. + let mut dataset_and_region: Vec<(db::model::Dataset, db::model::Region)> = + new_datasets_and_regions + .into_iter() + .filter(|(_, r)| { + !existing_datasets_and_regions.iter().any(|(_, er)| er == r) + }) + .collect(); + + if dataset_and_region.len() != 1 { + error!( + log, + "find_only_new_region saw dataset_and_region len {}: {:?}", + dataset_and_region.len(), + dataset_and_region, + ); + + None + } else { + dataset_and_region.pop() + } +} diff --git a/nexus/src/app/sagas/demo.rs b/nexus/src/app/sagas/demo.rs index 4a8eda8b806..d76a48688d7 100644 --- a/nexus/src/app/sagas/demo.rs +++ b/nexus/src/app/sagas/demo.rs @@ -21,56 +21,66 @@ use super::NexusActionContext; use super::{ActionRegistry, NexusSaga, SagaInitError}; use crate::app::sagas::declare_saga_actions; -use anyhow::ensure; +use anyhow::Context; use omicron_common::api::external::Error; use omicron_uuid_kinds::DemoSagaUuid; use serde::Deserialize; use serde::Serialize; use slog::info; use std::collections::BTreeMap; +use std::future::Future; +use std::sync::Arc; use steno::ActionError; -use tokio::sync::oneshot; +use tokio::sync::Semaphore; -/// Set of demo sagas that have been marked completed +/// Rendezvous point for demo sagas /// -/// Nexus maintains one of these at the top level. Individual demo sagas wait -/// until their id shows up here, then remove it and proceed. +/// This is where: +/// +/// - demo sagas wait for a completion message +/// - completion messages are recorded for demo sagas that haven't started +/// waiting yet +/// +/// Nexus maintains one of these structures at the top level. pub struct CompletingDemoSagas { - ids: BTreeMap>, + sagas: BTreeMap>, } impl CompletingDemoSagas { pub fn new() -> CompletingDemoSagas { - CompletingDemoSagas { ids: BTreeMap::new() } + CompletingDemoSagas { sagas: BTreeMap::new() } } - pub fn complete(&mut self, id: DemoSagaUuid) -> Result<(), Error> { - self.ids - .remove(&id) - .ok_or_else(|| { - Error::non_resourcetype_not_found(format!( - "demo saga with id {:?}", - id - )) - })? - .send(()) - .map_err(|_| { - Error::internal_error( - "saga stopped listening (Nexus shutting down?)", - ) - }) + pub fn preregister(&mut self, id: DemoSagaUuid) { + assert!(self.sagas.insert(id, Arc::new(Semaphore::new(0))).is_none()); } pub fn subscribe( &mut self, id: DemoSagaUuid, - ) -> Result, anyhow::Error> { - let (tx, rx) = oneshot::channel(); - ensure!( - self.ids.insert(id, tx).is_none(), - "multiple subscriptions for the same demo saga" - ); - Ok(rx) + ) -> impl Future> { + let sem = + self.sagas.entry(id).or_insert_with(|| Arc::new(Semaphore::new(0))); + let sem_clone = sem.clone(); + async move { + sem_clone + .acquire() + .await + // We don't need the Semaphore permit once we've acquired it. + .map(|_| ()) + .context("acquiring demo saga semaphore") + } + } + + pub fn complete(&mut self, id: DemoSagaUuid) -> Result<(), Error> { + let sem = self.sagas.get_mut(&id).ok_or_else(|| { + Error::non_resourcetype_not_found(format!( + "demo saga with demo saga id {:?}", + id + )) + })?; + sem.add_permits(1); + Ok(()) } } @@ -115,21 +125,87 @@ async fn demo_wait(sagactx: NexusActionContext) -> Result<(), ActionError> { .nexus() .demo_sagas() .map_err(ActionError::action_failed)?; - demo_sagas.subscribe(demo_id).map_err(|e| { - ActionError::action_failed(Error::internal_error(&format!( - "demo saga subscribe failed: {:#}", - e - ))) - })? + demo_sagas.subscribe(demo_id) }; match rx.await { Ok(_) => { info!(log, "demo saga: completing"; "id" => %demo_id); + Ok(()) } - Err(_) => { - info!(log, "demo saga: waiting failed (Nexus shutting down?)"; - "id" => %demo_id); + Err(error) => { + warn!(log, "demo saga: waiting failed (Nexus shutting down?)"; + "id" => %demo_id, + "error" => #?error, + ); + Err(ActionError::action_failed(Error::internal_error(&format!( + "demo saga wait failed: {:#}", + error + )))) } } - Ok(()) +} + +#[cfg(test)] +mod test { + use super::*; + use assert_matches::assert_matches; + + #[tokio::test] + async fn test_demo_saga_rendezvous() { + let mut hub = CompletingDemoSagas::new(); + + // The most straightforward sequence is: + // - create (preregister) demo saga + // - demo saga starts and waits for completion (subscribe) + // - complete demo saga + let demo_saga_id = DemoSagaUuid::new_v4(); + println!("demo saga: {demo_saga_id}"); + hub.preregister(demo_saga_id); + println!("demo saga: {demo_saga_id} preregistered"); + let subscribe = hub.subscribe(demo_saga_id); + println!("demo saga: {demo_saga_id} subscribed"); + assert!(hub.complete(demo_saga_id).is_ok()); + println!("demo saga: {demo_saga_id} marked completed"); + subscribe.await.unwrap(); + println!("demo saga: {demo_saga_id} done"); + + // It's also possible that the completion request arrives before the + // saga started waiting. In that case, the sequence is: + // + // - create (preregister) demo saga + // - complete demo saga + // - demo saga starts and waits for completion (subscribe) + // + // This should work, too, with no errors. + let demo_saga_id = DemoSagaUuid::new_v4(); + println!("demo saga: {demo_saga_id}"); + hub.preregister(demo_saga_id); + println!("demo saga: {demo_saga_id} preregistered"); + assert!(hub.complete(demo_saga_id).is_ok()); + println!("demo saga: {demo_saga_id} marked completed"); + let subscribe = hub.subscribe(demo_saga_id); + println!("demo saga: {demo_saga_id} subscribed"); + subscribe.await.unwrap(); + println!("demo saga: {demo_saga_id} done"); + + // It's also possible to have no preregistration at all. This happens + // if the demo saga was recovered. That's fine, too, but then it will + // only work if the completion arrives after the saga starts waiting. + let demo_saga_id = DemoSagaUuid::new_v4(); + println!("demo saga: {demo_saga_id}"); + let subscribe = hub.subscribe(demo_saga_id); + println!("demo saga: {demo_saga_id} subscribed"); + assert!(hub.complete(demo_saga_id).is_ok()); + println!("demo saga: {demo_saga_id} marked completed"); + subscribe.await.unwrap(); + println!("demo saga: {demo_saga_id} done"); + + // If there's no preregistration and we get a completion request, then + // that request should fail. + let demo_saga_id = DemoSagaUuid::new_v4(); + println!("demo saga: {demo_saga_id}"); + let error = hub.complete(demo_saga_id).unwrap_err(); + assert_matches!(error, Error::NotFound { .. }); + println!("demo saga: {demo_saga_id} complete error: {:#}", error); + } } diff --git a/nexus/src/app/sagas/instance_common.rs b/nexus/src/app/sagas/instance_common.rs index 6e431aaca70..049673d2ee5 100644 --- a/nexus/src/app/sagas/instance_common.rs +++ b/nexus/src/app/sagas/instance_common.rs @@ -25,6 +25,12 @@ use super::NexusActionContext; /// The port propolis-server listens on inside the propolis zone. const DEFAULT_PROPOLIS_PORT: u16 = 12400; +#[derive(Clone, Debug, Serialize, Deserialize)] +pub(super) struct VmmAndSledIds { + pub(super) vmm_id: PropolisUuid, + pub(super) sled_id: SledUuid, +} + /// Reserves resources for a new VMM whose instance has `ncpus` guest logical /// processors and `guest_memory` bytes of guest RAM. The selected sled is /// random within the set of sleds allowed by the supplied `constraints`. @@ -213,12 +219,12 @@ pub async fn instance_ip_move_state( /// the Attaching or Detaching state so that concurrent attempts to start the /// instance will notice that the IP state is in flux and ask the caller to /// retry. -pub async fn instance_ip_get_instance_state( +pub(super) async fn instance_ip_get_instance_state( sagactx: &NexusActionContext, serialized_authn: &authn::saga::Serialized, authz_instance: &authz::Instance, verb: &str, -) -> Result, ActionError> { +) -> Result, ActionError> { // XXX: we can get instance state (but not sled ID) in same transaction // as attach (but not detach) wth current design. We need to re-query // for sled ID anyhow, so keep consistent between attach/detach. @@ -236,7 +242,11 @@ pub async fn instance_ip_get_instance_state( inst_and_vmm.vmm().as_ref().map(|vmm| vmm.runtime.state); let found_instance_state = inst_and_vmm.instance().runtime_state.nexus_state; - let mut sled_id = inst_and_vmm.sled_id(); + let mut propolis_and_sled_id = + inst_and_vmm.vmm().as_ref().map(|vmm| VmmAndSledIds { + vmm_id: PropolisUuid::from_untyped_uuid(vmm.id), + sled_id: SledUuid::from_untyped_uuid(vmm.sled_id), + }); slog::debug!( osagactx.log(), "evaluating instance state for IP attach/detach"; @@ -257,7 +267,7 @@ pub async fn instance_ip_get_instance_state( match (found_instance_state, found_vmm_state) { // If there's no VMM, the instance is definitely not on any sled. (InstanceState::NoVmm, _) | (_, Some(VmmState::SagaUnwound)) => { - sled_id = None; + propolis_and_sled_id = None; } // If the instance is running normally or rebooting, it's resident on @@ -340,7 +350,7 @@ pub async fn instance_ip_get_instance_state( } } - Ok(sled_id) + Ok(propolis_and_sled_id) } /// Adds a NAT entry to DPD, routing packets bound for `target_ip` to a @@ -441,18 +451,19 @@ pub async fn instance_ip_remove_nat( /// Inform the OPTE port for a running instance that it should start /// sending/receiving traffic on a given IP address. /// -/// This call is a no-op if `sled_uuid` is `None` or the saga is explicitly -/// set to be inactive in event of double attach/detach (`!target_ip.do_saga`). -pub async fn instance_ip_add_opte( +/// This call is a no-op if the instance is not active (`propolis_and_sled` is +/// `None`) or the calling saga is explicitly set to be inactive in the event of +/// a double attach/detach (`!target_ip.do_saga`). +pub(super) async fn instance_ip_add_opte( sagactx: &NexusActionContext, - authz_instance: &authz::Instance, - sled_uuid: Option, + vmm_and_sled: Option, target_ip: ModifyStateForExternalIp, ) -> Result<(), ActionError> { let osagactx = sagactx.user_data(); // No physical sled? Don't inform OPTE. - let Some(sled_uuid) = sled_uuid else { + let Some(VmmAndSledIds { vmm_id: propolis_id, sled_id }) = vmm_and_sled + else { return Ok(()); }; @@ -470,17 +481,14 @@ pub async fn instance_ip_add_opte( osagactx .nexus() - .sled_client(&sled_uuid) + .sled_client(&sled_id) .await .map_err(|_| { ActionError::action_failed(Error::unavail( "sled agent client went away mid-attach/detach", )) })? - .instance_put_external_ip( - &InstanceUuid::from_untyped_uuid(authz_instance.id()), - &sled_agent_body, - ) + .vmm_put_external_ip(&propolis_id, &sled_agent_body) .await .map_err(|e| { ActionError::action_failed(match e { @@ -499,18 +507,20 @@ pub async fn instance_ip_add_opte( /// Inform the OPTE port for a running instance that it should cease /// sending/receiving traffic on a given IP address. /// -/// This call is a no-op if `sled_uuid` is `None` or the saga is explicitly -/// set to be inactive in event of double attach/detach (`!target_ip.do_saga`). -pub async fn instance_ip_remove_opte( +/// This call is a no-op if the instance is not active (`propolis_and_sled` is +/// `None`) or the calling saga is explicitly set to be inactive in the event of +/// a double attach/detach (`!target_ip.do_saga`). +pub(super) async fn instance_ip_remove_opte( sagactx: &NexusActionContext, - authz_instance: &authz::Instance, - sled_uuid: Option, + propolis_and_sled: Option, target_ip: ModifyStateForExternalIp, ) -> Result<(), ActionError> { let osagactx = sagactx.user_data(); // No physical sled? Don't inform OPTE. - let Some(sled_uuid) = sled_uuid else { + let Some(VmmAndSledIds { vmm_id: propolis_id, sled_id }) = + propolis_and_sled + else { return Ok(()); }; @@ -528,17 +538,14 @@ pub async fn instance_ip_remove_opte( osagactx .nexus() - .sled_client(&sled_uuid) + .sled_client(&sled_id) .await .map_err(|_| { ActionError::action_failed(Error::unavail( "sled agent client went away mid-attach/detach", )) })? - .instance_delete_external_ip( - &InstanceUuid::from_untyped_uuid(authz_instance.id()), - &sled_agent_body, - ) + .vmm_delete_external_ip(&propolis_id, &sled_agent_body) .await .map_err(|e| { ActionError::action_failed(match e { diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index d19230892fa..0b6d8cc0f8a 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -1220,8 +1220,7 @@ pub mod test { } async fn no_instances_or_disks_on_sled(sled_agent: &SledAgent) -> bool { - sled_agent.instance_count().await == 0 - && sled_agent.disk_count().await == 0 + sled_agent.vmm_count().await == 0 && sled_agent.disk_count().await == 0 } pub(crate) async fn verify_clean_slate( diff --git a/nexus/src/app/sagas/instance_ip_attach.rs b/nexus/src/app/sagas/instance_ip_attach.rs index a14054cf662..e6fb8654ea0 100644 --- a/nexus/src/app/sagas/instance_ip_attach.rs +++ b/nexus/src/app/sagas/instance_ip_attach.rs @@ -5,7 +5,7 @@ use super::instance_common::{ instance_ip_add_nat, instance_ip_add_opte, instance_ip_get_instance_state, instance_ip_move_state, instance_ip_remove_opte, ExternalIpAttach, - ModifyStateForExternalIp, + ModifyStateForExternalIp, VmmAndSledIds, }; use super::{ActionRegistry, NexusActionContext, NexusSaga}; use crate::app::sagas::declare_saga_actions; @@ -13,7 +13,7 @@ use crate::app::{authn, authz}; use nexus_db_model::{IpAttachState, Ipv4NatEntry}; use nexus_types::external_api::views; use omicron_common::api::external::Error; -use omicron_uuid_kinds::{GenericUuid, InstanceUuid, SledUuid}; +use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; use serde::Deserialize; use serde::Serialize; use steno::ActionError; @@ -161,7 +161,7 @@ async fn siia_begin_attach_ip_undo( async fn siia_get_instance_state( sagactx: NexusActionContext, -) -> Result, ActionError> { +) -> Result, ActionError> { let params = sagactx.saga_params::()?; instance_ip_get_instance_state( &sagactx, @@ -177,7 +177,10 @@ async fn siia_nat( sagactx: NexusActionContext, ) -> Result, ActionError> { let params = sagactx.saga_params::()?; - let sled_id = sagactx.lookup::>("instance_state")?; + let sled_id = sagactx + .lookup::>("instance_state")? + .map(|ids| ids.sled_id); + let target_ip = sagactx.lookup::("target_ip")?; instance_ip_add_nat( &sagactx, @@ -245,28 +248,18 @@ async fn siia_nat_undo( async fn siia_update_opte( sagactx: NexusActionContext, ) -> Result<(), ActionError> { - let params = sagactx.saga_params::()?; - let sled_id = sagactx.lookup::>("instance_state")?; + let ids = sagactx.lookup::>("instance_state")?; let target_ip = sagactx.lookup::("target_ip")?; - instance_ip_add_opte(&sagactx, ¶ms.authz_instance, sled_id, target_ip) - .await + instance_ip_add_opte(&sagactx, ids, target_ip).await } async fn siia_update_opte_undo( sagactx: NexusActionContext, ) -> Result<(), anyhow::Error> { let log = sagactx.user_data().log(); - let params = sagactx.saga_params::()?; - let sled_id = sagactx.lookup::>("instance_state")?; + let ids = sagactx.lookup::>("instance_state")?; let target_ip = sagactx.lookup::("target_ip")?; - if let Err(e) = instance_ip_remove_opte( - &sagactx, - ¶ms.authz_instance, - sled_id, - target_ip, - ) - .await - { + if let Err(e) = instance_ip_remove_opte(&sagactx, ids, target_ip).await { error!(log, "siia_update_opte_undo: failed to notify sled-agent: {e}"); } Ok(()) @@ -436,8 +429,14 @@ pub(crate) mod test { } // Sled agent has a record of the new external IPs. + let VmmAndSledIds { vmm_id, .. } = + crate::app::sagas::test_helpers::instance_fetch_vmm_and_sled_ids( + cptestctx, + &instance_id, + ) + .await; let mut eips = sled_agent.external_ips.lock().await; - let my_eips = eips.entry(instance_id.into_untyped_uuid()).or_default(); + let my_eips = eips.entry(vmm_id).or_default(); assert!(my_eips .iter() .any(|v| matches!(v, InstanceExternalIpBody::Floating(_)))); @@ -458,7 +457,7 @@ pub(crate) mod test { pub(crate) async fn verify_clean_slate( cptestctx: &ControlPlaneTestContext, - instance_id: Uuid, + instance_id: InstanceUuid, ) { use nexus_db_queries::db::schema::external_ip::dsl; @@ -471,7 +470,7 @@ pub(crate) mod test { assert!(dsl::external_ip .filter(dsl::kind.eq(IpKind::Floating)) .filter(dsl::time_deleted.is_null()) - .filter(dsl::parent_id.eq(instance_id)) + .filter(dsl::parent_id.eq(instance_id.into_untyped_uuid())) .filter(dsl::state.ne(IpAttachState::Detached)) .select(ExternalIp::as_select()) .first_async::(&*conn) @@ -492,8 +491,14 @@ pub(crate) mod test { .is_none()); // No IP bindings remain on sled-agent. + let VmmAndSledIds { vmm_id, .. } = + crate::app::sagas::test_helpers::instance_fetch_vmm_and_sled_ids( + cptestctx, + &instance_id, + ) + .await; let mut eips = sled_agent.external_ips.lock().await; - let my_eips = eips.entry(instance_id).or_default(); + let my_eips = eips.entry(vmm_id).or_default(); assert!(my_eips.is_empty()); } @@ -512,9 +517,10 @@ pub(crate) mod test { let instance = create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + let instance_id = InstanceUuid::from_untyped_uuid(instance.identity.id); crate::app::sagas::test_helpers::instance_simulate( cptestctx, - &InstanceUuid::from_untyped_uuid(instance.identity.id), + &instance_id, ) .await; @@ -522,7 +528,7 @@ pub(crate) mod test { test_helpers::action_failure_can_unwind::( nexus, || Box::pin(new_test_params(&opctx, datastore, use_float) ), - || Box::pin(verify_clean_slate(&cptestctx, instance.id())), + || Box::pin(verify_clean_slate(&cptestctx, instance_id)), log, ) .await; @@ -544,9 +550,10 @@ pub(crate) mod test { let instance = create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + let instance_id = InstanceUuid::from_untyped_uuid(instance.identity.id); crate::app::sagas::test_helpers::instance_simulate( cptestctx, - &InstanceUuid::from_untyped_uuid(instance.identity.id), + &instance_id, ) .await; @@ -558,7 +565,7 @@ pub(crate) mod test { >( nexus, || Box::pin(new_test_params(&opctx, datastore, use_float)), - || Box::pin(verify_clean_slate(&cptestctx, instance.id())), + || Box::pin(verify_clean_slate(&cptestctx, instance_id)), log, ) .await; diff --git a/nexus/src/app/sagas/instance_ip_detach.rs b/nexus/src/app/sagas/instance_ip_detach.rs index a5b51ce3759..d9da9fc05cf 100644 --- a/nexus/src/app/sagas/instance_ip_detach.rs +++ b/nexus/src/app/sagas/instance_ip_detach.rs @@ -5,7 +5,7 @@ use super::instance_common::{ instance_ip_add_nat, instance_ip_add_opte, instance_ip_get_instance_state, instance_ip_move_state, instance_ip_remove_nat, instance_ip_remove_opte, - ModifyStateForExternalIp, + ModifyStateForExternalIp, VmmAndSledIds, }; use super::{ActionRegistry, NexusActionContext, NexusSaga}; use crate::app::sagas::declare_saga_actions; @@ -15,7 +15,7 @@ use nexus_db_model::IpAttachState; use nexus_db_queries::db::lookup::LookupPath; use nexus_types::external_api::views; use omicron_common::api::external::NameOrId; -use omicron_uuid_kinds::{GenericUuid, InstanceUuid, SledUuid}; +use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; use ref_cast::RefCast; use serde::Deserialize; use serde::Serialize; @@ -155,7 +155,7 @@ async fn siid_begin_detach_ip_undo( async fn siid_get_instance_state( sagactx: NexusActionContext, -) -> Result, ActionError> { +) -> Result, ActionError> { let params = sagactx.saga_params::()?; instance_ip_get_instance_state( &sagactx, @@ -168,7 +168,9 @@ async fn siid_get_instance_state( async fn siid_nat(sagactx: NexusActionContext) -> Result<(), ActionError> { let params = sagactx.saga_params::()?; - let sled_id = sagactx.lookup::>("instance_state")?; + let sled_id = sagactx + .lookup::>("instance_state")? + .map(|ids| ids.sled_id); let target_ip = sagactx.lookup::("target_ip")?; instance_ip_remove_nat( &sagactx, @@ -184,7 +186,9 @@ async fn siid_nat_undo( ) -> Result<(), anyhow::Error> { let log = sagactx.user_data().log(); let params = sagactx.saga_params::()?; - let sled_id = sagactx.lookup::>("instance_state")?; + let sled_id = sagactx + .lookup::>("instance_state")? + .map(|ids| ids.sled_id); let target_ip = sagactx.lookup::("target_ip")?; if let Err(e) = instance_ip_add_nat( &sagactx, @@ -204,33 +208,18 @@ async fn siid_nat_undo( async fn siid_update_opte( sagactx: NexusActionContext, ) -> Result<(), ActionError> { - let params = sagactx.saga_params::()?; - let sled_id = sagactx.lookup::>("instance_state")?; + let ids = sagactx.lookup::>("instance_state")?; let target_ip = sagactx.lookup::("target_ip")?; - instance_ip_remove_opte( - &sagactx, - ¶ms.authz_instance, - sled_id, - target_ip, - ) - .await + instance_ip_remove_opte(&sagactx, ids, target_ip).await } async fn siid_update_opte_undo( sagactx: NexusActionContext, ) -> Result<(), anyhow::Error> { let log = sagactx.user_data().log(); - let params = sagactx.saga_params::()?; - let sled_id = sagactx.lookup::>("instance_state")?; + let ids = sagactx.lookup::>("instance_state")?; let target_ip = sagactx.lookup::("target_ip")?; - if let Err(e) = instance_ip_add_opte( - &sagactx, - ¶ms.authz_instance, - sled_id, - target_ip, - ) - .await - { + if let Err(e) = instance_ip_add_opte(&sagactx, ids, target_ip).await { error!(log, "siid_update_opte_undo: failed to notify sled-agent: {e}"); } Ok(()) @@ -410,8 +399,14 @@ pub(crate) mod test { } // Sled agent has removed its records of the external IPs. + let VmmAndSledIds { vmm_id, .. } = + crate::app::sagas::test_helpers::instance_fetch_vmm_and_sled_ids( + cptestctx, + &instance_id, + ) + .await; let mut eips = sled_agent.external_ips.lock().await; - let my_eips = eips.entry(instance_id.into_untyped_uuid()).or_default(); + let my_eips = eips.entry(vmm_id).or_default(); assert!(my_eips.is_empty()); // DB only has record for SNAT. diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index bb4bf282e41..24d11fcae27 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -4,15 +4,15 @@ use super::{NexusActionContext, NexusSaga, ACTION_GENERATE_ID}; use crate::app::instance::{ - InstanceRegisterReason, InstanceStateChangeError, - InstanceStateChangeRequest, + InstanceEnsureRegisteredApiResources, InstanceRegisterReason, + InstanceStateChangeError, InstanceStateChangeRequest, }; use crate::app::sagas::{ declare_saga_actions, instance_common::allocate_vmm_ipv6, }; -use crate::external_api::params; use nexus_db_queries::db::{identity::Resource, lookup::LookupPath}; use nexus_db_queries::{authn, authz, db}; +use nexus_types::internal_api::params::InstanceMigrateRequest; use omicron_uuid_kinds::{GenericUuid, InstanceUuid, PropolisUuid, SledUuid}; use serde::Deserialize; use serde::Serialize; @@ -30,7 +30,7 @@ pub struct Params { pub serialized_authn: authn::saga::Serialized, pub instance: db::model::Instance, pub src_vmm: db::model::Vmm, - pub migrate_params: params::InstanceMigrate, + pub migrate_params: InstanceMigrateRequest, } // The migration saga is similar to the instance start saga: get a destination @@ -401,11 +401,12 @@ async fn sim_ensure_destination_propolis( "dst_propolis_id" => %vmm.id, "dst_vmm_state" => ?vmm); - let (.., authz_instance) = LookupPath::new(&opctx, &osagactx.datastore()) - .instance_id(db_instance.id()) - .lookup_for(authz::Action::Modify) - .await - .map_err(ActionError::action_failed)?; + let (authz_silo, authz_project, authz_instance) = + LookupPath::new(&opctx, &osagactx.datastore()) + .instance_id(db_instance.id()) + .lookup_for(authz::Action::Modify) + .await + .map_err(ActionError::action_failed)?; let src_propolis_id = PropolisUuid::from_untyped_uuid(params.src_vmm.id); let dst_propolis_id = PropolisUuid::from_untyped_uuid(vmm.id); @@ -413,7 +414,11 @@ async fn sim_ensure_destination_propolis( .nexus() .instance_ensure_registered( &opctx, - &authz_instance, + &InstanceEnsureRegisteredApiResources { + authz_silo, + authz_project, + authz_instance, + }, &db_instance, &dst_propolis_id, &vmm, @@ -432,20 +437,10 @@ async fn sim_ensure_destination_propolis_undo( sagactx: NexusActionContext, ) -> Result<(), anyhow::Error> { let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - + let dst_propolis_id = sagactx.lookup::("dst_propolis_id")?; let dst_sled_id = sagactx.lookup::("dst_sled_id")?; let db_instance = sagactx.lookup::("set_migration_ids")?; - let (.., authz_instance) = LookupPath::new(&opctx, &osagactx.datastore()) - .instance_id(db_instance.id()) - .lookup_for(authz::Action::Modify) - .await - .map_err(ActionError::action_failed)?; info!(osagactx.log(), "unregistering destination vmm for migration unwind"; "instance_id" => %db_instance.id(), @@ -460,7 +455,7 @@ async fn sim_ensure_destination_propolis_undo( // needed. match osagactx .nexus() - .instance_ensure_unregistered(&opctx, &authz_instance, &dst_sled_id) + .instance_ensure_unregistered(&dst_propolis_id, &dst_sled_id) .await { Ok(_) => Ok(()), @@ -495,12 +490,6 @@ async fn sim_instance_migrate( let src_propolis_id = db_instance.runtime().propolis_id.unwrap(); let dst_vmm = sagactx.lookup::("dst_vmm_record")?; - let (.., authz_instance) = LookupPath::new(&opctx, &osagactx.datastore()) - .instance_id(db_instance.id()) - .lookup_for(authz::Action::Modify) - .await - .map_err(ActionError::action_failed)?; - info!(osagactx.log(), "initiating migration from destination sled"; "instance_id" => %db_instance.id(), "dst_vmm_record" => ?dst_vmm, @@ -524,7 +513,6 @@ async fn sim_instance_migrate( .nexus() .instance_request_state( &opctx, - &authz_instance, &db_instance, &Some(dst_vmm), InstanceStateChangeRequest::Migrate( @@ -565,6 +553,7 @@ async fn sim_instance_migrate( mod tests { use super::*; use crate::app::sagas::test_helpers; + use crate::external_api::params; use dropshot::test_util::ClientTestContext; use nexus_test_utils::resource_helpers::{ create_default_ip_pool, create_project, object_create, @@ -637,7 +626,7 @@ mod tests { serialized_authn: authn::saga::Serialized::for_opctx(&opctx), instance: state.instance().clone(), src_vmm: vmm.clone(), - migrate_params: params::InstanceMigrate { + migrate_params: InstanceMigrateRequest { dst_sled_id: dst_sled_id.into_untyped_uuid(), }, }; @@ -706,7 +695,7 @@ mod tests { ), instance: old_instance.clone(), src_vmm: old_vmm.clone(), - migrate_params: params::InstanceMigrate { + migrate_params: InstanceMigrateRequest { dst_sled_id: dst_sled_id.into_untyped_uuid(), }, } diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index 9e4e010eeab..b6b78bd43cc 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -10,8 +10,10 @@ use super::{ instance_common::allocate_vmm_ipv6, NexusActionContext, NexusSaga, SagaInitError, }; -use crate::app::instance::InstanceRegisterReason; -use crate::app::instance::InstanceStateChangeError; +use crate::app::instance::{ + InstanceEnsureRegisteredApiResources, InstanceRegisterReason, + InstanceStateChangeError, +}; use crate::app::sagas::declare_saga_actions; use chrono::Utc; use nexus_db_queries::db::{identity::Resource, lookup::LookupPath}; @@ -502,17 +504,22 @@ async fn sis_ensure_registered( "instance_id" => %instance_id, "sled_id" => %sled_id); - let (.., authz_instance) = LookupPath::new(&opctx, &osagactx.datastore()) - .instance_id(instance_id) - .lookup_for(authz::Action::Modify) - .await - .map_err(ActionError::action_failed)?; + let (authz_silo, authz_project, authz_instance) = + LookupPath::new(&opctx, &osagactx.datastore()) + .instance_id(instance_id) + .lookup_for(authz::Action::Modify) + .await + .map_err(ActionError::action_failed)?; osagactx .nexus() .instance_ensure_registered( &opctx, - &authz_instance, + &InstanceEnsureRegisteredApiResources { + authz_silo, + authz_project, + authz_instance, + }, &db_instance, &propolis_id, &vmm_record, @@ -531,6 +538,7 @@ async fn sis_ensure_registered_undo( let params = sagactx.saga_params::()?; let datastore = osagactx.datastore(); let instance_id = InstanceUuid::from_untyped_uuid(params.db_instance.id()); + let propolis_id = sagactx.lookup::("propolis_id")?; let sled_id = sagactx.lookup::("sled_id")?; let opctx = crate::context::op_context_for_saga_action( &sagactx, @@ -539,11 +547,12 @@ async fn sis_ensure_registered_undo( info!(osagactx.log(), "start saga: unregistering instance from sled"; "instance_id" => %instance_id, + "propolis_id" => %propolis_id, "sled_id" => %sled_id); // Fetch the latest record so that this callee can drive the instance into // a Failed state if the unregister call fails. - let (.., authz_instance, db_instance) = LookupPath::new(&opctx, &datastore) + let (.., db_instance) = LookupPath::new(&opctx, &datastore) .instance_id(instance_id.into_untyped_uuid()) .fetch() .await @@ -556,7 +565,7 @@ async fn sis_ensure_registered_undo( // returned. if let Err(e) = osagactx .nexus() - .instance_ensure_unregistered(&opctx, &authz_instance, &sled_id) + .instance_ensure_unregistered(&propolis_id, &sled_id) .await { error!(osagactx.log(), @@ -637,7 +646,6 @@ async fn sis_ensure_running( ) -> Result<(), ActionError> { let osagactx = sagactx.user_data(); let params = sagactx.saga_params::()?; - let datastore = osagactx.datastore(); let opctx = crate::context::op_context_for_saga_action( &sagactx, ¶ms.serialized_authn, @@ -652,17 +660,10 @@ async fn sis_ensure_running( "instance_id" => %instance_id, "sled_id" => %sled_id); - let (.., authz_instance) = LookupPath::new(&opctx, &datastore) - .instance_id(instance_id.into_untyped_uuid()) - .lookup_for(authz::Action::Modify) - .await - .map_err(ActionError::action_failed)?; - match osagactx .nexus() .instance_request_state( &opctx, - &authz_instance, &db_instance, &Some(db_vmm), crate::app::instance::InstanceStateChangeRequest::Run, diff --git a/nexus/src/app/sagas/instance_update/mod.rs b/nexus/src/app/sagas/instance_update/mod.rs index 71abe63bbd1..4c4c4deff21 100644 --- a/nexus/src/app/sagas/instance_update/mod.rs +++ b/nexus/src/app/sagas/instance_update/mod.rs @@ -30,10 +30,9 @@ //! Nexus' `cpapi_instances_put` internal API endpoint, when a Nexus' //! `instance-watcher` background task *pulls* instance states from sled-agents //! periodically, or as the return value of an API call from Nexus to a -//! sled-agent. When a Nexus receives a new [`SledInstanceState`] from a -//! sled-agent through any of these mechanisms, the Nexus will write any changed -//! state to the `vmm` and/or `migration` tables directly on behalf of the -//! sled-agent. +//! sled-agent. When a Nexus receives a new [`SledVmmState`] from a sled-agent +//! through any of these mechanisms, the Nexus will write any changed state to +//! the `vmm` and/or `migration` tables directly on behalf of the sled-agent. //! //! Although Nexus is technically the party responsible for the database query //! that writes VMM and migration state updates received from sled-agent, it is @@ -236,9 +235,9 @@ //! updates is perhaps the simplest one: _avoiding unnecessary update sagas_. //! The `cpapi_instances_put` API endpoint and instance-watcher background tasks //! handle changes to VMM and migration states by calling the -//! [`notify_instance_updated`] method, which writes the new states to the -//! database and (potentially) starts an update saga. Naively, this method would -//! *always* start an update saga, but remember that --- as we discussed +//! [`process_vmm_update`] method, which writes the new states to the database +//! and (potentially) starts an update saga. Naively, this method would *always* +//! start an update saga, but remember that --- as we discussed //! [above](#background) --- many VMM/migration state changes don't actually //! require modifying the instance record. For example, if an instance's VMM //! transitions from [`VmmState::Starting`] to [`VmmState::Running`], that @@ -271,7 +270,7 @@ //! delayed. To improve the timeliness of update sagas, we will also explicitly //! activate the background task at any point where we know that an update saga //! *should* run but we were not able to run it. If an update saga cannot be -//! started, whether by [`notify_instance_updated`], a `start-instance-update` +//! started, whether by [`notify_vmm_updated`], a `start-instance-update` //! saga attempting to start its real saga, or an `instance-update` saga //! chaining into a new one as its last action, the `instance-watcher` //! background task is activated. Similarly, when a `start-instance-update` saga @@ -326,7 +325,8 @@ //! crate::app::db::datastore::DataStore::instance_updater_inherit_lock //! [instance_updater_unlock]: //! crate::app::db::datastore::DataStore::instance_updater_unlock -//! [`notify_instance_updated`]: crate::app::Nexus::notify_instance_updated +//! [`notify_vmm_updated`]: crate::app::Nexus::notify_vmm_updated +//! [`process_vmm_update`]: crate::app::instance::process_vmm_update //! //! [dist-locking]: //! https://martin.kleppmann.com/2016/02/08/how-to-do-distributed-locking.html @@ -362,7 +362,7 @@ use nexus_db_queries::{authn, authz}; use nexus_types::identity::Resource; use omicron_common::api::external::Error; use omicron_common::api::internal::nexus; -use omicron_common::api::internal::nexus::SledInstanceState; +use omicron_common::api::internal::nexus::SledVmmState; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::InstanceUuid; use omicron_uuid_kinds::PropolisUuid; @@ -388,8 +388,8 @@ pub(crate) use self::start::{Params, SagaInstanceUpdate}; mod destroyed; /// Returns `true` if an `instance-update` saga should be executed as a result -/// of writing the provided [`SledInstanceState`] to the database with the -/// provided [`VmmStateUpdateResult`]. +/// of writing the provided [`SledVmmState`] to the database with the provided +/// [`VmmStateUpdateResult`]. /// /// We determine this only after actually updating the database records, /// because we don't know whether a particular VMM or migration state is @@ -407,8 +407,8 @@ mod destroyed; /// VMM/migration states. pub fn update_saga_needed( log: &slog::Logger, - instance_id: InstanceUuid, - state: &SledInstanceState, + propolis_id: PropolisUuid, + state: &SledVmmState, result: &VmmStateUpdateResult, ) -> bool { // Currently, an instance-update saga is required if (and only if): @@ -443,8 +443,7 @@ pub fn update_saga_needed( debug!(log, "new VMM runtime state from sled agent requires an \ instance-update saga"; - "instance_id" => %instance_id, - "propolis_id" => %state.propolis_id, + "propolis_id" => %propolis_id, "vmm_needs_update" => vmm_needs_update, "migration_in_needs_update" => migration_in_needs_update, "migration_out_needs_update" => migration_out_needs_update, @@ -1403,6 +1402,7 @@ mod test { create_default_ip_pool, create_project, object_create, }; use nexus_test_utils_macros::nexus_test; + use nexus_types::internal_api::params::InstanceMigrateRequest; use omicron_common::api::internal::nexus::{ MigrationRuntimeState, MigrationState, Migrations, }; @@ -2358,7 +2358,7 @@ mod test { serialized_authn: authn::saga::Serialized::for_opctx(&opctx), instance: state.instance().clone(), src_vmm: vmm.clone(), - migrate_params: params::InstanceMigrate { + migrate_params: InstanceMigrateRequest { dst_sled_id: dst_sled_id.into_untyped_uuid(), }, }; diff --git a/nexus/src/app/sagas/mod.rs b/nexus/src/app/sagas/mod.rs index b944fb4d2b3..bd3ae62996c 100644 --- a/nexus/src/app/sagas/mod.rs +++ b/nexus/src/app/sagas/mod.rs @@ -39,6 +39,10 @@ pub mod project_create; pub mod region_replacement_drive; pub mod region_replacement_finish; pub mod region_replacement_start; +pub mod region_snapshot_replacement_garbage_collect; +pub mod region_snapshot_replacement_start; +pub mod region_snapshot_replacement_step; +pub mod region_snapshot_replacement_step_garbage_collect; pub mod snapshot_create; pub mod snapshot_delete; pub mod test_saga; @@ -190,6 +194,18 @@ fn make_action_registry() -> ActionRegistry { ::register_actions( &mut registry, ); + ::register_actions( + &mut registry, + ); + ::register_actions( + &mut registry, + ); + ::register_actions( + &mut registry, + ); + ::register_actions( + &mut registry, + ); #[cfg(test)] ::register_actions(&mut registry); diff --git a/nexus/src/app/sagas/region_replacement_start.rs b/nexus/src/app/sagas/region_replacement_start.rs index d4d455f927e..1bc14914686 100644 --- a/nexus/src/app/sagas/region_replacement_start.rs +++ b/nexus/src/app/sagas/region_replacement_start.rs @@ -26,12 +26,13 @@ //! ``` //! //! The first thing this saga does is set itself as the "operating saga" for the -//! request, and change the state to "Allocating". Then, it performs the following -//! steps: +//! request, and change the state to "Allocating". Then, it performs the +//! following steps: //! //! 1. Allocate a new region //! -//! 2. For the affected Volume, swap the region being replaced with the new region. +//! 2. For the affected Volume, swap the region being replaced with the new +//! region. //! //! 3. Create a fake volume that can be later deleted with the region being //! replaced. @@ -48,6 +49,7 @@ use super::{ ActionRegistry, NexusActionContext, NexusSaga, SagaInitError, ACTION_GENERATE_ID, }; +use crate::app::sagas::common_storage::find_only_new_region; use crate::app::sagas::declare_saga_actions; use crate::app::RegionAllocationStrategy; use crate::app::{authn, db}; @@ -57,7 +59,6 @@ use serde::Deserialize; use serde::Serialize; use sled_agent_client::types::CrucibleOpts; use sled_agent_client::types::VolumeConstructionRequest; -use slog::Logger; use std::net::SocketAddrV6; use steno::ActionError; use steno::Node; @@ -285,36 +286,6 @@ async fn srrs_alloc_new_region( Ok(datasets_and_regions) } -fn find_only_new_region( - log: &Logger, - existing_datasets_and_regions: Vec<(db::model::Dataset, db::model::Region)>, - new_datasets_and_regions: Vec<(db::model::Dataset, db::model::Region)>, -) -> Option<(db::model::Dataset, db::model::Region)> { - // Only filter on whether or not a Region is in the existing list! Datasets - // can change values (like size_used) if this saga interleaves with other - // saga runs of the same type. - let mut dataset_and_region: Vec<(db::model::Dataset, db::model::Region)> = - new_datasets_and_regions - .into_iter() - .filter(|(_, r)| { - !existing_datasets_and_regions.iter().any(|(_, er)| er == r) - }) - .collect(); - - if dataset_and_region.len() != 1 { - error!( - log, - "find_only_new_region saw dataset_and_region len {}: {:?}", - dataset_and_region.len(), - dataset_and_region, - ); - - None - } else { - dataset_and_region.pop() - } -} - async fn srrs_alloc_new_region_undo( sagactx: NexusActionContext, ) -> Result<(), anyhow::Error> { @@ -776,7 +747,6 @@ pub(crate) mod test { }; use chrono::Utc; use nexus_db_model::Dataset; - use nexus_db_model::DatasetKind; use nexus_db_model::Region; use nexus_db_model::RegionReplacement; use nexus_db_model::RegionReplacementState; @@ -787,6 +757,7 @@ pub(crate) mod test { use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils_macros::nexus_test; use nexus_types::identity::Asset; + use omicron_common::api::internal::shared::DatasetKind; use sled_agent_client::types::VolumeConstructionRequest; use uuid::Uuid; diff --git a/nexus/src/app/sagas/region_snapshot_replacement_garbage_collect.rs b/nexus/src/app/sagas/region_snapshot_replacement_garbage_collect.rs new file mode 100644 index 00000000000..e3c5143a680 --- /dev/null +++ b/nexus/src/app/sagas/region_snapshot_replacement_garbage_collect.rs @@ -0,0 +1,326 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Clean up the volume that stashes the target replaced during the region +//! snapshot replacement start saga. After that's done, change the region +//! snapshot replacement state to Running. This saga handles the following +//! region snapshot replacement request state transitions: +//! +//! ```text +//! ReplacementDone <-- +//! | +//! | | +//! v | +//! | +//! DeletingOldVolume -- +//! +//! | +//! v +//! +//! Running +//! ``` +//! +//! See the documentation for the "region snapshot replacement step" saga for +//! the next step(s) in the process. + +use super::{ + ActionRegistry, NexusActionContext, NexusSaga, SagaInitError, + ACTION_GENERATE_ID, +}; +use crate::app::sagas::declare_saga_actions; +use crate::app::sagas::volume_delete; +use crate::app::{authn, db}; +use serde::Deserialize; +use serde::Serialize; +use steno::ActionError; +use steno::Node; +use uuid::Uuid; + +// region snapshot replacement garbage collect saga: input parameters + +#[derive(Debug, Deserialize, Serialize)] +pub(crate) struct Params { + pub serialized_authn: authn::saga::Serialized, + /// The fake volume created for the snapshot that was replaced + // Note: this is only required in the params to build the volume-delete sub + // saga + pub old_snapshot_volume_id: Uuid, + pub request: db::model::RegionSnapshotReplacement, +} + +// region snapshot replacement garbage collect saga: actions + +declare_saga_actions! { + region_snapshot_replacement_garbage_collect; + SET_SAGA_ID -> "unused_1" { + + rsrgs_set_saga_id + - rsrgs_set_saga_id_undo + } + UPDATE_REQUEST_RECORD -> "unused_2" { + + rsrgs_update_request_record + } +} + +// region snapshot replacement garbage collect saga: definition + +#[derive(Debug)] +pub(crate) struct SagaRegionSnapshotReplacementGarbageCollect; +impl NexusSaga for SagaRegionSnapshotReplacementGarbageCollect { + const NAME: &'static str = "region-snapshot-replacement-garbage-collect"; + type Params = Params; + + fn register_actions(registry: &mut ActionRegistry) { + region_snapshot_replacement_garbage_collect_register_actions(registry); + } + + fn make_saga_dag( + params: &Self::Params, + mut builder: steno::DagBuilder, + ) -> Result { + builder.append(Node::action( + "saga_id", + "GenerateSagaId", + ACTION_GENERATE_ID.as_ref(), + )); + + builder.append(set_saga_id_action()); + + let subsaga_params = volume_delete::Params { + serialized_authn: params.serialized_authn.clone(), + volume_id: params.old_snapshot_volume_id, + }; + + let subsaga_dag = { + let subsaga_builder = steno::DagBuilder::new(steno::SagaName::new( + volume_delete::SagaVolumeDelete::NAME, + )); + volume_delete::SagaVolumeDelete::make_saga_dag( + &subsaga_params, + subsaga_builder, + )? + }; + + builder.append(Node::constant( + "params_for_volume_delete_subsaga", + serde_json::to_value(&subsaga_params).map_err(|e| { + SagaInitError::SerializeError( + "params_for_volume_delete_subsaga".to_string(), + e, + ) + })?, + )); + + builder.append(Node::subsaga( + "volume_delete_subsaga_no_result", + subsaga_dag, + "params_for_volume_delete_subsaga", + )); + + builder.append(update_request_record_action()); + + Ok(builder.build()?) + } +} + +// region snapshot replacement garbage collect saga: action implementations + +async fn rsrgs_set_saga_id( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + + // Change the request record here to an intermediate "deleting old volume" + // state to block out other sagas that will be triggered for the same + // request. + osagactx + .datastore() + .set_region_snapshot_replacement_deleting_old_volume( + &opctx, + params.request.id, + saga_id, + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +async fn rsrgs_set_saga_id_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + + osagactx + .datastore() + .undo_set_region_snapshot_replacement_deleting_old_volume( + &opctx, + params.request.id, + saga_id, + ) + .await?; + + Ok(()) +} + +async fn rsrgs_update_request_record( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let params = sagactx.saga_params::()?; + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + + // Now that the snapshot volume has been deleted, update the replacement + // request record to 'Running'. There is no undo step for this, it should + // succeed idempotently. + + datastore + .set_region_snapshot_replacement_running( + &opctx, + params.request.id, + saga_id, + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +#[cfg(test)] +pub(crate) mod test { + use crate::app::sagas::region_snapshot_replacement_garbage_collect::{ + Params, SagaRegionSnapshotReplacementGarbageCollect, + }; + use nexus_db_model::RegionSnapshotReplacement; + use nexus_db_model::RegionSnapshotReplacementState; + use nexus_db_model::Volume; + use nexus_db_queries::authn::saga::Serialized; + use nexus_db_queries::context::OpContext; + use nexus_test_utils_macros::nexus_test; + use sled_agent_client::types::CrucibleOpts; + use sled_agent_client::types::VolumeConstructionRequest; + use uuid::Uuid; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + #[nexus_test(server = crate::Server)] + async fn test_region_snapshot_replacement_garbage_collect_saga( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + // Manually insert required records + let old_snapshot_volume_id = Uuid::new_v4(); + + let volume_construction_request = VolumeConstructionRequest::Volume { + id: old_snapshot_volume_id, + block_size: 0, + sub_volumes: vec![VolumeConstructionRequest::Region { + block_size: 0, + blocks_per_extent: 0, + extent_count: 0, + gen: 0, + opts: CrucibleOpts { + id: old_snapshot_volume_id, + target: vec![ + // XXX if you put something here, you'll need a + // synthetic dataset record + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: false, + }, + }], + read_only_parent: None, + }; + + let volume_data = + serde_json::to_string(&volume_construction_request).unwrap(); + + datastore + .volume_create(Volume::new(old_snapshot_volume_id, volume_data)) + .await + .unwrap(); + + let mut request = RegionSnapshotReplacement::new( + Uuid::new_v4(), + Uuid::new_v4(), + Uuid::new_v4(), + ); + request.replacement_state = + RegionSnapshotReplacementState::ReplacementDone; + request.old_snapshot_volume_id = Some(old_snapshot_volume_id); + + datastore + .insert_region_snapshot_replacement_request_with_volume_id( + &opctx, + request.clone(), + Uuid::new_v4(), + ) + .await + .unwrap(); + + // Run the saga + let params = Params { + serialized_authn: Serialized::for_opctx(&opctx), + old_snapshot_volume_id, + request: request.clone(), + }; + + let _output = nexus + .sagas + .saga_execute::(params) + .await + .unwrap(); + + // Validate the state transition + let result = datastore + .get_region_snapshot_replacement_request_by_id(&opctx, request.id) + .await + .unwrap(); + + assert_eq!( + result.replacement_state, + RegionSnapshotReplacementState::Running + ); + + // Validate the Volume was deleted + assert!(datastore + .volume_get(old_snapshot_volume_id) + .await + .unwrap() + .is_none()); + } +} diff --git a/nexus/src/app/sagas/region_snapshot_replacement_start.rs b/nexus/src/app/sagas/region_snapshot_replacement_start.rs new file mode 100644 index 00000000000..941899d862d --- /dev/null +++ b/nexus/src/app/sagas/region_snapshot_replacement_start.rs @@ -0,0 +1,1134 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! In the same way that read/write regions need to be replaced when a physical +//! disk is expunged, read-only regions need to be replaced too: Volumes are in +//! a similarly degraded state when the read-only Downstairs have gone away, and +//! remain in this degraded state until a new Region replaces the one that is +//! gone. +//! +//! It's this saga's responsibility to start that replacement process. This saga +//! handles the following region snapshot replacement request state transitions: +//! +//! ```text +//! Requested <-- +//! | +//! | | +//! v | +//! | +//! Allocating -- +//! +//! | +//! v +//! +//! ReplacementDone +//! ``` +//! +//! The first thing this saga does is set itself as the "operating saga" for the +//! request, and change the state to "Allocating". Then, it performs the +//! following steps: +//! +//! 1. Allocate a new region +//! +//! 2. Create a blank volume that can be later deleted to stash the snapshot +//! being replaced. This is populated in the `volume_replace_snapshot` +//! transaction so that `volume_references` for the corresponding region +//! snapshot remains accurate. +//! +//! 3. For the affected Volume, swap the snapshot being replaced with the new +//! region. +//! +//! 4. Update the region snapshot replacement request by clearing the operating +//! saga id and changing the state to "ReplacementDone". +//! +//! Any unwind will place the state back into Requested. +//! +//! See the documentation for the "region snapshot replacement garbage collect" +//! saga for the next step in the process. + +use super::{ + ActionRegistry, NexusActionContext, NexusSaga, SagaInitError, + ACTION_GENERATE_ID, +}; +use crate::app::db::datastore::ExistingTarget; +use crate::app::db::datastore::RegionAllocationFor; +use crate::app::db::datastore::RegionAllocationParameters; +use crate::app::db::datastore::ReplacementTarget; +use crate::app::db::datastore::VolumeToDelete; +use crate::app::db::datastore::VolumeWithTarget; +use crate::app::db::lookup::LookupPath; +use crate::app::sagas::common_storage::find_only_new_region; +use crate::app::sagas::declare_saga_actions; +use crate::app::RegionAllocationStrategy; +use crate::app::{authn, db}; +use nexus_types::identity::Asset; +use nexus_types::identity::Resource; +use omicron_common::api::external::Error; +use serde::Deserialize; +use serde::Serialize; +use sled_agent_client::types::CrucibleOpts; +use sled_agent_client::types::VolumeConstructionRequest; +use std::net::SocketAddrV6; +use steno::ActionError; +use steno::Node; +use uuid::Uuid; + +// region snapshot replacement start saga: input parameters + +#[derive(Debug, Deserialize, Serialize)] +pub(crate) struct Params { + pub serialized_authn: authn::saga::Serialized, + pub request: db::model::RegionSnapshotReplacement, + pub allocation_strategy: RegionAllocationStrategy, +} + +// region snapshot replacement start saga: actions + +declare_saga_actions! { + region_snapshot_replacement_start; + SET_SAGA_ID -> "unused_1" { + + rsrss_set_saga_id + - rsrss_set_saga_id_undo + } + GET_ALLOC_REGION_PARAMS -> "alloc_region_params" { + + rsrss_get_alloc_region_params + } + ALLOC_NEW_REGION -> "new_datasets_and_regions" { + + rsrss_alloc_new_region + - rsrss_alloc_new_region_undo + } + FIND_NEW_REGION -> "new_dataset_and_region" { + + rsrss_find_new_region + } + NEW_REGION_ENSURE -> "ensured_dataset_and_region" { + + rsrss_new_region_ensure + - rsrss_new_region_ensure_undo + } + GET_OLD_SNAPSHOT_VOLUME_ID -> "old_snapshot_volume_id" { + + rsrss_get_old_snapshot_volume_id + } + CREATE_FAKE_VOLUME -> "unused_2" { + + rsrss_create_fake_volume + - rsrss_create_fake_volume_undo + } + REPLACE_SNAPSHOT_IN_VOLUME -> "unused_3" { + + rsrss_replace_snapshot_in_volume + - rsrss_replace_snapshot_in_volume_undo + } + UPDATE_REQUEST_RECORD -> "unused_4" { + + rsrss_update_request_record + } +} + +// region snapshot replacement start saga: definition + +#[derive(Debug)] +pub(crate) struct SagaRegionSnapshotReplacementStart; +impl NexusSaga for SagaRegionSnapshotReplacementStart { + const NAME: &'static str = "region-snapshot-replacement-start"; + type Params = Params; + + fn register_actions(registry: &mut ActionRegistry) { + region_snapshot_replacement_start_register_actions(registry); + } + + fn make_saga_dag( + _params: &Self::Params, + mut builder: steno::DagBuilder, + ) -> Result { + builder.append(Node::action( + "saga_id", + "GenerateSagaId", + ACTION_GENERATE_ID.as_ref(), + )); + + builder.append(Node::action( + "new_volume_id", + "GenerateNewVolumeId", + ACTION_GENERATE_ID.as_ref(), + )); + + builder.append(set_saga_id_action()); + builder.append(get_alloc_region_params_action()); + builder.append(alloc_new_region_action()); + builder.append(find_new_region_action()); + builder.append(new_region_ensure_action()); + builder.append(get_old_snapshot_volume_id_action()); + builder.append(create_fake_volume_action()); + builder.append(replace_snapshot_in_volume_action()); + builder.append(update_request_record_action()); + + Ok(builder.build()?) + } +} + +// region snapshot replacement start saga: action implementations + +async fn rsrss_set_saga_id( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + + // Change the request record here to an intermediate "allocating" state to + // block out other sagas that will be triggered for the same request. This + // avoids Nexus allocating a bunch of replacement read-only regions only to + // unwind all but one. + osagactx + .datastore() + .set_region_snapshot_replacement_allocating( + &opctx, + params.request.id, + saga_id, + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +async fn rsrss_set_saga_id_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + + osagactx + .datastore() + .undo_set_region_snapshot_replacement_allocating( + &opctx, + params.request.id, + saga_id, + ) + .await?; + + Ok(()) +} + +#[derive(Debug, Deserialize, Serialize)] +struct AllocRegionParams { + block_size: u64, + blocks_per_extent: u64, + extent_count: u64, + current_allocated_regions: Vec<(db::model::Dataset, db::model::Region)>, + snapshot_id: Uuid, + snapshot_volume_id: Uuid, +} + +async fn rsrss_get_alloc_region_params( + sagactx: NexusActionContext, +) -> Result { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + // Look up the existing snapshot + let (.., db_snapshot) = LookupPath::new(&opctx, &osagactx.datastore()) + .snapshot_id(params.request.old_snapshot_id) + .fetch() + .await + .map_err(ActionError::action_failed)?; + + // Find the region to replace + let db_region = osagactx + .datastore() + .get_region(params.request.old_region_id) + .await + .map_err(ActionError::action_failed)?; + + let current_allocated_regions = osagactx + .datastore() + .get_allocated_regions(db_snapshot.volume_id) + .await + .map_err(ActionError::action_failed)?; + + Ok(AllocRegionParams { + block_size: db_region.block_size().to_bytes(), + blocks_per_extent: db_region.blocks_per_extent(), + extent_count: db_region.extent_count(), + current_allocated_regions, + snapshot_id: db_snapshot.id(), + snapshot_volume_id: db_snapshot.volume_id, + }) +} + +async fn rsrss_alloc_new_region( + sagactx: NexusActionContext, +) -> Result, ActionError> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let alloc_region_params = + sagactx.lookup::("alloc_region_params")?; + + // Request an additional region for this snapshot volume. It's important + // _not_ to delete the existing snapshot first, as (if it's still there) + // then the Crucible agent could reuse the allocated port and cause trouble. + let datasets_and_regions = osagactx + .datastore() + .arbitrary_region_allocate( + &opctx, + RegionAllocationFor::SnapshotVolume { + volume_id: alloc_region_params.snapshot_volume_id, + snapshot_id: alloc_region_params.snapshot_id, + }, + RegionAllocationParameters::FromRaw { + block_size: alloc_region_params.block_size, + blocks_per_extent: alloc_region_params.blocks_per_extent, + extent_count: alloc_region_params.extent_count, + }, + ¶ms.allocation_strategy, + alloc_region_params.current_allocated_regions.len() + 1, + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(datasets_and_regions) +} + +async fn rsrss_alloc_new_region_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + let log = osagactx.log(); + + let alloc_region_params = + sagactx.lookup::("alloc_region_params")?; + + let maybe_dataset_and_region = find_only_new_region( + log, + alloc_region_params.current_allocated_regions, + sagactx.lookup::>( + "new_datasets_and_regions", + )?, + ); + + // It should be guaranteed that if rsrss_alloc_new_region succeeded then it + // would have bumped the region redundancy, so we should see something here. + // Guard against the case anyway. + if let Some(dataset_and_region) = maybe_dataset_and_region { + let (_, region) = dataset_and_region; + osagactx + .datastore() + .regions_hard_delete(log, vec![region.id()]) + .await?; + } else { + warn!(&log, "maybe_dataset_and_region is None!"); + } + + Ok(()) +} + +async fn rsrss_find_new_region( + sagactx: NexusActionContext, +) -> Result<(db::model::Dataset, db::model::Region), ActionError> { + let osagactx = sagactx.user_data(); + let log = osagactx.log(); + + let alloc_region_params = + sagactx.lookup::("alloc_region_params")?; + + let maybe_dataset_and_region = find_only_new_region( + log, + alloc_region_params.current_allocated_regions, + sagactx.lookup::>( + "new_datasets_and_regions", + )?, + ); + + let Some(dataset_and_region) = maybe_dataset_and_region else { + return Err(ActionError::action_failed(Error::internal_error( + &format!( + "expected dataset and region, saw {:?}!", + maybe_dataset_and_region, + ), + ))); + }; + + Ok(dataset_and_region) +} + +async fn rsrss_new_region_ensure( + sagactx: NexusActionContext, +) -> Result< + (nexus_db_model::Dataset, crucible_agent_client::types::Region), + ActionError, +> { + let params = sagactx.saga_params::()?; + let osagactx = sagactx.user_data(); + let log = osagactx.log(); + + // With a list of datasets and regions to ensure, other sagas need to have a + // separate no-op forward step for the undo action to ensure that the undo + // step occurs in the case that the ensure partially fails. Here this is not + // required, there's only one dataset and region. + let new_dataset_and_region = sagactx + .lookup::<(db::model::Dataset, db::model::Region)>( + "new_dataset_and_region", + )?; + + let region_snapshot = osagactx + .datastore() + .region_snapshot_get( + params.request.old_dataset_id, + params.request.old_region_id, + params.request.old_snapshot_id, + ) + .await + .map_err(ActionError::action_failed)?; + + let Some(region_snapshot) = region_snapshot else { + return Err(ActionError::action_failed(format!( + "region snapshot {} {} {} deleted!", + params.request.old_dataset_id, + params.request.old_region_id, + params.request.old_snapshot_id, + ))); + }; + + let (new_dataset, new_region) = new_dataset_and_region; + + // Currently, the repair port is set using a fixed offset above the + // downstairs port. Once this goes away, Nexus will require a way to query + // for the repair port! + + let mut source_repair_addr: SocketAddrV6 = + match region_snapshot.snapshot_addr.parse() { + Ok(addr) => addr, + + Err(e) => { + return Err(ActionError::action_failed(format!( + "error parsing region_snapshot.snapshot_addr: {e}" + ))); + } + }; + + source_repair_addr.set_port( + source_repair_addr.port() + crucible_common::REPAIR_PORT_OFFSET, + ); + + let ensured_region = osagactx + .nexus() + .ensure_region_in_dataset( + log, + &new_dataset, + &new_region, + Some(source_repair_addr.to_string()), + ) + .await + .map_err(ActionError::action_failed)?; + + Ok((new_dataset, ensured_region)) +} + +async fn rsrss_new_region_ensure_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let log = sagactx.user_data().log(); + let osagactx = sagactx.user_data(); + + warn!(log, "rsrss_new_region_ensure_undo: Deleting crucible regions"); + + let new_dataset_and_region = sagactx + .lookup::<(db::model::Dataset, db::model::Region)>( + "new_dataset_and_region", + )?; + + osagactx + .nexus() + .delete_crucible_regions(log, vec![new_dataset_and_region]) + .await?; + + Ok(()) +} + +async fn rsrss_get_old_snapshot_volume_id( + sagactx: NexusActionContext, +) -> Result { + // Save the snapshot's original volume ID, because we'll be altering it and + // need the original + + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let (.., db_snapshot) = LookupPath::new(&opctx, &osagactx.datastore()) + .snapshot_id(params.request.old_snapshot_id) + .fetch() + .await + .map_err(ActionError::action_failed)?; + + Ok(db_snapshot.volume_id) +} + +async fn rsrss_create_fake_volume( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + + let new_volume_id = sagactx.lookup::("new_volume_id")?; + + // Create a fake volume record for the old snapshot target. This will be + // deleted after snapshot replacement has finished. It can be completely + // blank here, it will be replaced by `volume_replace_snapshot`. + + let volume_construction_request = VolumeConstructionRequest::Volume { + id: new_volume_id, + block_size: 0, + sub_volumes: vec![VolumeConstructionRequest::Region { + block_size: 0, + blocks_per_extent: 0, + extent_count: 0, + gen: 0, + opts: CrucibleOpts { + id: new_volume_id, + target: vec![], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + }, + }], + read_only_parent: None, + }; + + let volume_data = serde_json::to_string(&volume_construction_request) + .map_err(|e| { + ActionError::action_failed(Error::internal_error(&e.to_string())) + })?; + + let volume = db::model::Volume::new(new_volume_id, volume_data); + + osagactx + .datastore() + .volume_create(volume) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +async fn rsrss_create_fake_volume_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + + // Delete the fake volume. + + let new_volume_id = sagactx.lookup::("new_volume_id")?; + osagactx.datastore().volume_hard_delete(new_volume_id).await?; + + Ok(()) +} + +#[derive(Debug)] +struct ReplaceParams { + old_volume_id: Uuid, + old_snapshot_address: SocketAddrV6, + new_region_address: SocketAddrV6, + new_volume_id: Uuid, +} + +async fn get_replace_params( + sagactx: &NexusActionContext, +) -> Result { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let new_volume_id = sagactx.lookup::("new_volume_id")?; + + let region_snapshot = osagactx + .datastore() + .region_snapshot_get( + params.request.old_dataset_id, + params.request.old_region_id, + params.request.old_snapshot_id, + ) + .await + .map_err(ActionError::action_failed)?; + + let Some(region_snapshot) = region_snapshot else { + return Err(ActionError::action_failed(format!( + "region snapshot {} {} {} deleted!", + params.request.old_dataset_id, + params.request.old_region_id, + params.request.old_snapshot_id, + ))); + }; + + let old_snapshot_address: SocketAddrV6 = + match region_snapshot.snapshot_addr.parse() { + Ok(addr) => addr, + + Err(e) => { + return Err(ActionError::action_failed(format!( + "parsing {} as SocketAddrV6 failed: {e}", + region_snapshot.snapshot_addr, + ))); + } + }; + + let (new_dataset, ensured_region) = sagactx.lookup::<( + db::model::Dataset, + crucible_agent_client::types::Region, + )>( + "ensured_dataset_and_region", + )?; + + let Some(new_dataset_address) = new_dataset.address() else { + return Err(ActionError::action_failed(format!( + "dataset {} does not have an address!", + new_dataset.id(), + ))); + }; + + let new_region_address = SocketAddrV6::new( + *new_dataset_address.ip(), + ensured_region.port_number, + 0, + 0, + ); + + let old_volume_id = sagactx.lookup::("old_snapshot_volume_id")?; + + // Return the replacement parameters for the forward action case - the undo + // will swap the existing and replacement target + Ok(ReplaceParams { + old_volume_id, + old_snapshot_address, + new_region_address, + new_volume_id, + }) +} + +async fn rsrss_replace_snapshot_in_volume( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let log = sagactx.user_data().log(); + let osagactx = sagactx.user_data(); + + let replacement_params = get_replace_params(&sagactx).await?; + + info!( + log, + "replacing {} with {} in volume {}", + replacement_params.old_snapshot_address, + replacement_params.new_region_address, + replacement_params.old_volume_id, + ); + + // `volume_replace_snapshot` will swap the old snapshot for the new region. + // No repair or reconcilation needs to occur after this. + osagactx + .datastore() + .volume_replace_snapshot( + VolumeWithTarget(replacement_params.old_volume_id), + ExistingTarget(replacement_params.old_snapshot_address), + ReplacementTarget(replacement_params.new_region_address), + VolumeToDelete(replacement_params.new_volume_id), + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +async fn rsrss_replace_snapshot_in_volume_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + // Undo the forward action's volume_replace_snapshot call by swapping the + // existing target and replacement target parameters. + + let log = sagactx.user_data().log(); + let osagactx = sagactx.user_data(); + + let replacement_params = get_replace_params(&sagactx).await?; + + // Note the old and new are _not_ swapped in this log message! The intention + // is that someone reviewing the logs could search for "replacing UUID with + // UUID in volume UUID" and get (in the case of no re-execution) two + // results. + info!( + log, + "undo: replacing {} with {} in volume {}", + replacement_params.old_snapshot_address, + replacement_params.new_region_address, + replacement_params.old_volume_id, + ); + + osagactx + .datastore() + .volume_replace_snapshot( + VolumeWithTarget(replacement_params.old_volume_id), + ExistingTarget(replacement_params.new_region_address), + ReplacementTarget(replacement_params.old_snapshot_address), + VolumeToDelete(replacement_params.new_volume_id), + ) + .await?; + + Ok(()) +} + +async fn rsrss_update_request_record( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let params = sagactx.saga_params::()?; + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + let new_dataset_and_region = sagactx + .lookup::<(db::model::Dataset, db::model::Region)>( + "new_dataset_and_region", + )?; + + let new_region_id = new_dataset_and_region.1.id(); + + let old_region_volume_id = sagactx.lookup::("new_volume_id")?; + + // Now that the region has been ensured and the construction request has + // been updated, update the replacement request record to 'ReplacementDone' + // and clear the operating saga id. There is no undo step for this, it + // should succeed idempotently. + datastore + .set_region_snapshot_replacement_replacement_done( + &opctx, + params.request.id, + saga_id, + new_region_id, + old_region_volume_id, + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +#[cfg(test)] +pub(crate) mod test { + use crate::{ + app::db::lookup::LookupPath, app::db::DataStore, + app::saga::create_saga_dag, + app::sagas::region_snapshot_replacement_start::*, + app::sagas::test_helpers::test_opctx, app::RegionAllocationStrategy, + }; + use nexus_db_model::RegionSnapshotReplacement; + use nexus_db_model::RegionSnapshotReplacementState; + use nexus_db_model::Volume; + use nexus_db_queries::authn::saga::Serialized; + use nexus_db_queries::context::OpContext; + use nexus_test_utils::resource_helpers::create_disk; + use nexus_test_utils::resource_helpers::create_project; + use nexus_test_utils::resource_helpers::create_snapshot; + use nexus_test_utils::resource_helpers::DiskTest; + use nexus_test_utils_macros::nexus_test; + use nexus_types::external_api::views; + use nexus_types::identity::Asset; + use sled_agent_client::types::VolumeConstructionRequest; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + const DISK_NAME: &str = "my-disk"; + const SNAPSHOT_NAME: &str = "my-snap"; + const PROJECT_NAME: &str = "springfield-squidport"; + + async fn prepare_for_test( + cptestctx: &ControlPlaneTestContext, + ) -> PrepareResult { + let client = &cptestctx.external_client; + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = test_opctx(cptestctx); + + assert_eq!(region_allocations(&datastore).await, 0); + + let mut disk_test = DiskTest::new(cptestctx).await; + disk_test.add_zpool_with_dataset(cptestctx.first_sled()).await; + + assert_eq!(region_allocations(&datastore).await, 0); + + let _project_id = + create_project(&client, PROJECT_NAME).await.identity.id; + + assert_eq!(region_allocations(&datastore).await, 0); + + // Create a disk + let disk = create_disk(&client, PROJECT_NAME, DISK_NAME).await; + + assert_eq!(region_allocations(&datastore).await, 3); + + let disk_id = disk.identity.id; + let (.., db_disk) = LookupPath::new(&opctx, &datastore) + .disk_id(disk_id) + .fetch() + .await + .unwrap_or_else(|_| panic!("test disk {:?} should exist", disk_id)); + + // Create a snapshot + let snapshot = + create_snapshot(&client, PROJECT_NAME, DISK_NAME, SNAPSHOT_NAME) + .await; + + assert_eq!(region_allocations(&datastore).await, 6); + + let snapshot_id = snapshot.identity.id; + let (.., db_snapshot) = LookupPath::new(&opctx, &datastore) + .snapshot_id(snapshot_id) + .fetch() + .await + .unwrap_or_else(|_| { + panic!("test snapshot {:?} should exist", snapshot_id) + }); + + PrepareResult { db_disk, snapshot, db_snapshot } + } + + struct PrepareResult { + db_disk: nexus_db_model::Disk, + snapshot: views::Snapshot, + db_snapshot: nexus_db_model::Snapshot, + } + + #[nexus_test(server = crate::Server)] + async fn test_region_snapshot_replacement_start_saga( + cptestctx: &ControlPlaneTestContext, + ) { + let PrepareResult { db_disk, snapshot, db_snapshot } = + prepare_for_test(cptestctx).await; + + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = test_opctx(cptestctx); + + // Assert disk has three allocated regions + let disk_allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + assert_eq!(disk_allocated_regions.len(), 3); + + // Assert the snapshot has zero allocated regions + let snapshot_id = snapshot.identity.id; + + let snapshot_allocated_regions = datastore + .get_allocated_regions(db_snapshot.volume_id) + .await + .unwrap(); + assert_eq!(snapshot_allocated_regions.len(), 0); + + // Replace one of the snapshot's targets + let region: &nexus_db_model::Region = &disk_allocated_regions[0].1; + + let region_snapshot = datastore + .region_snapshot_get(region.dataset_id(), region.id(), snapshot_id) + .await + .unwrap() + .unwrap(); + + // Manually insert the region snapshot replacement request + let request = + RegionSnapshotReplacement::for_region_snapshot(®ion_snapshot); + + datastore + .insert_region_snapshot_replacement_request(&opctx, request.clone()) + .await + .unwrap(); + + // Run the region snapshot replacement start saga + let dag = + create_saga_dag::(Params { + serialized_authn: Serialized::for_opctx(&opctx), + request: request.clone(), + allocation_strategy: RegionAllocationStrategy::Random { + seed: None, + }, + }) + .unwrap(); + + let runnable_saga = nexus.sagas.saga_prepare(dag).await.unwrap(); + + // Actually run the saga + runnable_saga.run_to_completion().await.unwrap(); + + // Validate the state transition + let result = datastore + .get_region_snapshot_replacement_request_by_id(&opctx, request.id) + .await + .unwrap(); + + assert_eq!( + result.replacement_state, + RegionSnapshotReplacementState::ReplacementDone + ); + assert!(result.new_region_id.is_some()); + assert!(result.operating_saga_id.is_none()); + + // Validate number of regions for disk didn't change + let disk_allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + assert_eq!(disk_allocated_regions.len(), 3); + + // Validate that the snapshot now has one allocated region + let snapshot_allocated_datasets_and_regions = datastore + .get_allocated_regions(db_snapshot.volume_id) + .await + .unwrap(); + + assert_eq!(snapshot_allocated_datasets_and_regions.len(), 1); + + let (_, snapshot_allocated_region) = + &snapshot_allocated_datasets_and_regions[0]; + + // Validate that the snapshot's volume contains this newly allocated + // region + + let new_region_addr = datastore + .region_addr(snapshot_allocated_region.id()) + .await + .unwrap() + .unwrap(); + + let volumes = datastore + .find_volumes_referencing_socket_addr( + &opctx, + new_region_addr.into(), + ) + .await + .unwrap(); + + assert_eq!(volumes.len(), 1); + assert_eq!(volumes[0].id(), db_snapshot.volume_id); + } + + fn new_test_params( + opctx: &OpContext, + request: &RegionSnapshotReplacement, + ) -> Params { + Params { + serialized_authn: Serialized::for_opctx(opctx), + request: request.clone(), + allocation_strategy: RegionAllocationStrategy::Random { + seed: None, + }, + } + } + + pub(crate) async fn verify_clean_slate( + cptestctx: &ControlPlaneTestContext, + request: &RegionSnapshotReplacement, + affected_volume_original: &Volume, + ) { + let datastore = cptestctx.server.server_context().nexus.datastore(); + + crate::app::sagas::test_helpers::assert_no_failed_undo_steps( + &cptestctx.logctx.log, + datastore, + ) + .await; + + // For these tests, six provisioned regions exist: three for the + // original disk, and three for the (currently unused) snapshot + // destination volume + assert_eq!(region_allocations(&datastore).await, 6); + assert_region_snapshot_replacement_request_untouched( + cptestctx, &datastore, &request, + ) + .await; + assert_volume_untouched(&datastore, &affected_volume_original).await; + } + + async fn region_allocations(datastore: &DataStore) -> usize { + use async_bb8_diesel::AsyncConnection; + use async_bb8_diesel::AsyncRunQueryDsl; + use async_bb8_diesel::AsyncSimpleConnection; + use diesel::QueryDsl; + use nexus_db_queries::db::queries::ALLOW_FULL_TABLE_SCAN_SQL; + use nexus_db_queries::db::schema::region::dsl; + + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + conn.transaction_async(|conn| async move { + // Selecting all regions requires a full table scan + conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL).await.unwrap(); + + dsl::region + .count() + .get_result_async(&conn) + .await + .map(|x: i64| x as usize) + }) + .await + .unwrap() + } + + async fn assert_region_snapshot_replacement_request_untouched( + cptestctx: &ControlPlaneTestContext, + datastore: &DataStore, + request: &RegionSnapshotReplacement, + ) { + let opctx = test_opctx(cptestctx); + let db_request = datastore + .get_region_snapshot_replacement_request_by_id(&opctx, request.id) + .await + .unwrap(); + + assert_eq!(db_request.new_region_id, None); + assert_eq!( + db_request.replacement_state, + RegionSnapshotReplacementState::Requested + ); + assert_eq!(db_request.operating_saga_id, None); + } + + async fn assert_volume_untouched( + datastore: &DataStore, + affected_volume_original: &Volume, + ) { + let affected_volume = datastore + .volume_get(affected_volume_original.id()) + .await + .unwrap() + .unwrap(); + + let actual: VolumeConstructionRequest = + serde_json::from_str(&affected_volume.data()).unwrap(); + + let expected: VolumeConstructionRequest = + serde_json::from_str(&affected_volume_original.data()).unwrap(); + + assert_eq!(actual, expected); + } + + #[nexus_test(server = crate::Server)] + async fn test_action_failure_can_unwind_idempotently( + cptestctx: &ControlPlaneTestContext, + ) { + let PrepareResult { db_disk, snapshot, db_snapshot } = + prepare_for_test(cptestctx).await; + + let log = &cptestctx.logctx.log; + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = test_opctx(cptestctx); + + let disk_allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + assert_eq!(disk_allocated_regions.len(), 3); + + let region: &nexus_db_model::Region = &disk_allocated_regions[0].1; + let snapshot_id = snapshot.identity.id; + + let region_snapshot = datastore + .region_snapshot_get(region.dataset_id(), region.id(), snapshot_id) + .await + .unwrap() + .unwrap(); + + let request = + RegionSnapshotReplacement::for_region_snapshot(®ion_snapshot); + + datastore + .insert_region_snapshot_replacement_request(&opctx, request.clone()) + .await + .unwrap(); + + let affected_volume_original = + datastore.volume_get(db_snapshot.volume_id).await.unwrap().unwrap(); + + verify_clean_slate(&cptestctx, &request, &affected_volume_original) + .await; + + crate::app::sagas::test_helpers::action_failure_can_unwind_idempotently::< + SagaRegionSnapshotReplacementStart, + _, + _ + >( + nexus, + || Box::pin(async { new_test_params(&opctx, &request) }), + || Box::pin(async { + verify_clean_slate( + &cptestctx, + &request, + &affected_volume_original, + ).await; + }), + log + ).await; + } + + #[nexus_test(server = crate::Server)] + async fn test_actions_succeed_idempotently( + cptestctx: &ControlPlaneTestContext, + ) { + let PrepareResult { db_disk, snapshot, db_snapshot: _ } = + prepare_for_test(cptestctx).await; + + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = test_opctx(cptestctx); + + let disk_allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + assert_eq!(disk_allocated_regions.len(), 3); + + let region: &nexus_db_model::Region = &disk_allocated_regions[0].1; + let snapshot_id = snapshot.identity.id; + + let region_snapshot = datastore + .region_snapshot_get(region.dataset_id(), region.id(), snapshot_id) + .await + .unwrap() + .unwrap(); + + let request = + RegionSnapshotReplacement::for_region_snapshot(®ion_snapshot); + + datastore + .insert_region_snapshot_replacement_request(&opctx, request.clone()) + .await + .unwrap(); + + // Build the saga DAG with the provided test parameters + let params = new_test_params(&opctx, &request); + let dag = create_saga_dag::(params) + .unwrap(); + crate::app::sagas::test_helpers::actions_succeed_idempotently( + nexus, dag, + ) + .await; + } +} diff --git a/nexus/src/app/sagas/region_snapshot_replacement_step.rs b/nexus/src/app/sagas/region_snapshot_replacement_step.rs new file mode 100644 index 00000000000..600bb155bff --- /dev/null +++ b/nexus/src/app/sagas/region_snapshot_replacement_step.rs @@ -0,0 +1,603 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Region snapshot replacement is distinct from region replacement: replacing +//! parts of a volume's read-only parent (and all the layers under it) is easier +//! because this does _not_ incur a live repair or reconciliation. Each part of +//! a read-only region set contains the same data that will never be modified. +//! +//! A region snapshot replacement request starts off in the "Requested" state, +//! just like a region replacement request. A background task will search for +//! region snapshot replacement requests in this state and trigger the "region +//! snapshot replacement start" saga. This will allocate a new region to replace +//! the requested one, and modify the snapshot VCR accordingly. If any disks are +//! then created using that snapshot as a source, they will have the replacement +//! and will not need a replace request. +//! +//! However, any past use of that snapshot as a source means that the Volume +//! created from that will have a copy of the unmodified snapshot Volume as a +//! read-only parent. Any construction of the Volume will be referencing the +//! replaced region snapshot (which could be gone if it is expunged). It is this +//! saga's responsibility to update all Volumes that reference the region +//! snapshot being replaced, and send a replacement request to any Upstairs that +//! were constructed. +//! +//! Some difficulty comes from the requirement to notify existing Upstairs that +//! reference the replaced read-only part, but even this is not as difficult as +//! region replacement: Nexus does not have to continually monitor and drive +//! either live repair or reconciliation, just ensure that the read-only +//! replacement occurs. Read-only replacements should be basically +//! instantaneous. +//! +//! A replace request only needs to be done once per Upstairs that has the old +//! reference. This is done as a "region snapshot replacement step", and once +//! all those are done, the region snapshot replacement request can be +//! "completed". +//! +//! Region snapshot replacement steps need to be written into the database and +//! have an associated state and operating saga id for the same reason that +//! region snapshot replacement requests do: multiple background tasks will +//! invoke multiple sagas, and there needs to be some exclusive access. +//! +//! See the documentation for the "region snapshot replacement step garbage +//! collect" saga for the next step in the process. + +use super::{ + ActionRegistry, NexusActionContext, NexusSaga, SagaInitError, + ACTION_GENERATE_ID, +}; +use crate::app::db::datastore::ExistingTarget; +use crate::app::db::datastore::ReplacementTarget; +use crate::app::db::datastore::VolumeToDelete; +use crate::app::db::datastore::VolumeWithTarget; +use crate::app::db::lookup::LookupPath; +use crate::app::sagas::declare_saga_actions; +use crate::app::{authn, authz, db}; +use nexus_db_model::VmmState; +use nexus_types::identity::Resource; +use omicron_common::api::external::Error; +use propolis_client::types::ReplaceResult; +use serde::Deserialize; +use serde::Serialize; +use sled_agent_client::types::CrucibleOpts; +use sled_agent_client::types::VolumeConstructionRequest; +use std::net::SocketAddrV6; +use steno::ActionError; +use steno::Node; +use uuid::Uuid; + +// region snapshot replacement step saga: input parameters + +#[derive(Debug, Deserialize, Serialize)] +pub(crate) struct Params { + pub serialized_authn: authn::saga::Serialized, + pub request: db::model::RegionSnapshotReplacementStep, +} + +// region snapshot replacement step saga: actions + +declare_saga_actions! { + region_snapshot_replacement_step; + SET_SAGA_ID -> "unused_1" { + + rsrss_set_saga_id + - rsrss_set_saga_id_undo + } + CREATE_REPLACE_PARAMS -> "replace_params" { + + rsrss_create_replace_params + } + CREATE_FAKE_VOLUME -> "unused_2" { + + rssrs_create_fake_volume + - rssrs_create_fake_volume_undo + } + REPLACE_SNAPSHOT_IN_VOLUME -> "unused_3" { + + rsrss_replace_snapshot_in_volume + - rsrss_replace_snapshot_in_volume_undo + } + NOTIFY_UPSTAIRS -> "unused_4" { + + rsrss_notify_upstairs + } + UPDATE_REQUEST_RECORD -> "unused_5" { + + rsrss_update_request_record + } +} + +// region snapshot replacement step saga: definition + +#[derive(Debug)] +pub(crate) struct SagaRegionSnapshotReplacementStep; +impl NexusSaga for SagaRegionSnapshotReplacementStep { + const NAME: &'static str = "region-snapshot-replacement-step"; + type Params = Params; + + fn register_actions(registry: &mut ActionRegistry) { + region_snapshot_replacement_step_register_actions(registry); + } + + fn make_saga_dag( + _params: &Self::Params, + mut builder: steno::DagBuilder, + ) -> Result { + builder.append(Node::action( + "saga_id", + "GenerateSagaId", + ACTION_GENERATE_ID.as_ref(), + )); + + builder.append(Node::action( + "new_volume_id", + "GenerateNewVolumeId", + ACTION_GENERATE_ID.as_ref(), + )); + + builder.append(set_saga_id_action()); + builder.append(create_replace_params_action()); + builder.append(create_fake_volume_action()); + builder.append(replace_snapshot_in_volume_action()); + builder.append(notify_upstairs_action()); + builder.append(update_request_record_action()); + + Ok(builder.build()?) + } +} + +// region snapshot replacement step saga: action implementations + +async fn rsrss_set_saga_id( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + + // Change the request record here to an intermediate "running" state to + // block out other sagas that will be triggered for the same request. + + osagactx + .datastore() + .set_region_snapshot_replacement_step_running( + &opctx, + params.request.id, + saga_id, + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +async fn rsrss_set_saga_id_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + + osagactx + .datastore() + .undo_set_region_snapshot_replacement_step_running( + &opctx, + params.request.id, + saga_id, + ) + .await?; + + Ok(()) +} + +#[derive(Debug, Serialize, Deserialize)] +struct ReplaceParams { + old_snapshot_address: SocketAddrV6, + new_region_address: SocketAddrV6, +} + +async fn rsrss_create_replace_params( + sagactx: NexusActionContext, +) -> Result { + let log = sagactx.user_data().log(); + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + // look up region snapshot replace request by id + + let region_snapshot_replace_request = osagactx + .datastore() + .get_region_snapshot_replacement_request_by_id( + &opctx, + params.request.request_id, + ) + .await + .map_err(ActionError::action_failed)?; + + let region_snapshot = osagactx + .datastore() + .region_snapshot_get( + region_snapshot_replace_request.old_dataset_id, + region_snapshot_replace_request.old_region_id, + region_snapshot_replace_request.old_snapshot_id, + ) + .await + .map_err(ActionError::action_failed)?; + + let Some(region_snapshot) = region_snapshot else { + return Err(ActionError::action_failed(format!( + "region snapshot {} {} {} deleted!", + region_snapshot_replace_request.old_dataset_id, + region_snapshot_replace_request.old_region_id, + region_snapshot_replace_request.old_snapshot_id, + ))); + }; + + let old_snapshot_address: SocketAddrV6 = + match region_snapshot.snapshot_addr.parse() { + Ok(addr) => addr, + + Err(e) => { + return Err(ActionError::action_failed(format!( + "parsing {} as SocketAddrV6 failed: {e}", + region_snapshot.snapshot_addr, + ))); + } + }; + + let Some(new_region_id) = region_snapshot_replace_request.new_region_id + else { + return Err(ActionError::action_failed(format!( + "request {} does not have a new_region_id!", + region_snapshot_replace_request.id, + ))); + }; + + let new_region_address = osagactx + .nexus() + .region_addr(&log, new_region_id) + .await + .map_err(ActionError::action_failed)?; + + Ok(ReplaceParams { old_snapshot_address, new_region_address }) +} + +async fn rssrs_create_fake_volume( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + + let new_volume_id = sagactx.lookup::("new_volume_id")?; + + // Create a fake volume record for the old snapshot target. This will be + // deleted after region snapshot replacement step saga has finished, and the + // region replacement snapshot gc step has run. It can be completely blank + // here, it will be replaced by `volume_replace_snapshot`. + + let volume_construction_request = VolumeConstructionRequest::Volume { + id: new_volume_id, + block_size: 0, + sub_volumes: vec![VolumeConstructionRequest::Region { + block_size: 0, + blocks_per_extent: 0, + extent_count: 0, + gen: 0, + opts: CrucibleOpts { + id: new_volume_id, + target: vec![], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + }, + }], + read_only_parent: None, + }; + + let volume_data = serde_json::to_string(&volume_construction_request) + .map_err(|e| { + ActionError::action_failed(Error::internal_error(&e.to_string())) + })?; + + let volume = db::model::Volume::new(new_volume_id, volume_data); + + osagactx + .datastore() + .volume_create(volume) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +async fn rssrs_create_fake_volume_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + + // Delete the fake volume. + + let new_volume_id = sagactx.lookup::("new_volume_id")?; + osagactx.datastore().volume_hard_delete(new_volume_id).await?; + + Ok(()) +} + +async fn rsrss_replace_snapshot_in_volume( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let replace_params = sagactx.lookup::("replace_params")?; + + let new_volume_id = sagactx.lookup::("new_volume_id")?; + + // `volume_replace_snapshot` will swap the old snapshot for the new region. + // No repair or reconcilation needs to occur after this. + osagactx + .datastore() + .volume_replace_snapshot( + VolumeWithTarget(params.request.volume_id), + ExistingTarget(replace_params.old_snapshot_address), + ReplacementTarget(replace_params.new_region_address), + VolumeToDelete(new_volume_id), + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +async fn rsrss_replace_snapshot_in_volume_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let replace_params = sagactx.lookup::("replace_params")?; + + let new_volume_id = sagactx.lookup::("new_volume_id")?; + + osagactx + .datastore() + .volume_replace_snapshot( + VolumeWithTarget(params.request.volume_id), + ExistingTarget(replace_params.new_region_address), + ReplacementTarget(replace_params.old_snapshot_address), + VolumeToDelete(new_volume_id), + ) + .await?; + + Ok(()) +} + +async fn rsrss_notify_upstairs( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let log = sagactx.user_data().log(); + + // Make an effort to notify a Propolis if one was booted for this volume. + // This is best effort: if there is a failure, this saga will unwind and be + // triggered again for the same request. If there is no Propolis booted for + // this volume, then there's nothing to be done: any future Propolis will + // receive the updated Volume. + // + // Unlike for region replacement, there's no step required here if there + // isn't an active Propolis: any Upstairs created after the snapshot_addr + // is replaced will reference the cloned data. + + let Some(disk) = osagactx + .datastore() + .disk_for_volume_id(params.request.volume_id) + .await + .map_err(ActionError::action_failed)? + else { + return Ok(()); + }; + + let Some(instance_id) = disk.runtime().attach_instance_id else { + return Ok(()); + }; + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let (.., authz_instance) = LookupPath::new(&opctx, &osagactx.datastore()) + .instance_id(instance_id) + .lookup_for(authz::Action::Read) + .await + .map_err(ActionError::action_failed)?; + + let instance_and_vmm = osagactx + .datastore() + .instance_fetch_with_vmm(&opctx, &authz_instance) + .await + .map_err(ActionError::action_failed)?; + + let Some(vmm) = instance_and_vmm.vmm() else { + return Ok(()); + }; + + let state = vmm.runtime.state; + + info!( + log, + "volume associated with disk attached to instance with vmm in \ + state {state}"; + "request id" => %params.request.id, + "volume id" => %params.request.volume_id, + "disk id" => ?disk.id(), + "instance id" => ?instance_id, + "vmm id" => ?vmm.id, + ); + + match &state { + VmmState::Running | VmmState::Rebooting => { + // Propolis server is ok to receive the volume replacement request. + } + + VmmState::Starting + | VmmState::Stopping + | VmmState::Stopped + | VmmState::Migrating + | VmmState::Failed + | VmmState::Destroyed + | VmmState::SagaUnwound => { + // Propolis server is not ok to receive volume replacement requests + // - unwind so that this saga can run again. + return Err(ActionError::action_failed(format!( + "vmm {} propolis not in a state to receive request", + vmm.id, + ))); + } + } + + let new_volume_vcr = match osagactx + .datastore() + .volume_get(params.request.volume_id) + .await + .map_err(ActionError::action_failed)? + { + Some(volume) => volume.data().to_string(), + + None => { + return Err(ActionError::action_failed(Error::internal_error( + "new volume is gone!", + ))); + } + }; + + let instance_lookup = + LookupPath::new(&opctx, &osagactx.datastore()).instance_id(instance_id); + + let (vmm, client) = osagactx + .nexus() + .propolis_client_for_instance( + &opctx, + &instance_lookup, + authz::Action::Modify, + ) + .await + .map_err(ActionError::action_failed)?; + + info!( + log, + "sending replacement request for disk volume to propolis"; + "request id" => %params.request.id, + "volume id" => %params.request.volume_id, + "disk id" => ?disk.id(), + "instance id" => ?instance_id, + "vmm id" => ?vmm.id, + ); + + let result = client + .instance_issue_crucible_vcr_request() + .id(disk.id()) + .body(propolis_client::types::InstanceVcrReplace { + name: disk.name().to_string(), + vcr_json: new_volume_vcr, + }) + .send() + .await + .map_err(|e| match e { + propolis_client::Error::ErrorResponse(rv) => { + ActionError::action_failed(rv.message.clone()) + } + + _ => ActionError::action_failed(format!( + "unexpected failure during \ + `instance_issue_crucible_vcr_request`: {e}", + )), + })?; + + let replace_result = result.into_inner(); + + info!( + log, + "saw replace result {replace_result:?}"; + "request id" => %params.request.id, + "volume id" => %params.request.volume_id, + "disk id" => ?disk.id(), + "instance id" => ?instance_id, + "vmm id" => ?vmm.id, + ); + + match &replace_result { + ReplaceResult::Started => { + // This saga's call just started the replacement + } + + ReplaceResult::StartedAlready => { + // A previous run of this saga (or saga node) started the + // replacement + } + + ReplaceResult::CompletedAlready => { + // It's done! We see this if the same propolis that received the + // original replace request started and finished the replacement. + } + + ReplaceResult::VcrMatches => { + // This propolis booted with the updated VCR + } + + ReplaceResult::Missing => { + // The volume does not contain the region to be replaced. This is an + // error! + return Err(ActionError::action_failed(String::from( + "saw ReplaceResult::Missing", + ))); + } + } + + Ok(()) +} + +async fn rsrss_update_request_record( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let params = sagactx.saga_params::()?; + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + let new_volume_id = sagactx.lookup::("new_volume_id")?; + + // Update the request record to 'Completed' and clear the operating saga id. + // There is no undo step for this, it should succeed idempotently. + datastore + .set_region_snapshot_replacement_step_complete( + &opctx, + params.request.id, + saga_id, + new_volume_id, + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} diff --git a/nexus/src/app/sagas/region_snapshot_replacement_step_garbage_collect.rs b/nexus/src/app/sagas/region_snapshot_replacement_step_garbage_collect.rs new file mode 100644 index 00000000000..93335b6125f --- /dev/null +++ b/nexus/src/app/sagas/region_snapshot_replacement_step_garbage_collect.rs @@ -0,0 +1,233 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Delete the volume that stashes the target replaced during a region snapshot +//! replacement step saga. After that's done, change the region snapshot +//! replacement step's state to "VolumeDeleted". + +use super::{ActionRegistry, NexusActionContext, NexusSaga, SagaInitError}; +use crate::app::sagas::declare_saga_actions; +use crate::app::sagas::volume_delete; +use crate::app::{authn, db}; +use serde::Deserialize; +use serde::Serialize; +use steno::ActionError; +use steno::Node; +use uuid::Uuid; + +// region snapshot replacement step garbage collect saga: input parameters + +#[derive(Debug, Deserialize, Serialize)] +pub(crate) struct Params { + pub serialized_authn: authn::saga::Serialized, + /// The fake volume created for the snapshot that was replaced + // Note: this is only required in the params to build the volume-delete sub + // saga + pub old_snapshot_volume_id: Uuid, + pub request: db::model::RegionSnapshotReplacementStep, +} + +// region snapshot replacement step garbage collect saga: actions + +declare_saga_actions! { + region_snapshot_replacement_step_garbage_collect; + UPDATE_REQUEST_RECORD -> "unused_1" { + + srsgs_update_request_record + } +} + +// region snapshot replacement step garbage collect saga: definition + +#[derive(Debug)] +pub(crate) struct SagaRegionSnapshotReplacementStepGarbageCollect; +impl NexusSaga for SagaRegionSnapshotReplacementStepGarbageCollect { + const NAME: &'static str = + "region-snapshot-replacement-step-garbage-collect"; + type Params = Params; + + fn register_actions(registry: &mut ActionRegistry) { + region_snapshot_replacement_step_garbage_collect_register_actions( + registry, + ); + } + + fn make_saga_dag( + params: &Self::Params, + mut builder: steno::DagBuilder, + ) -> Result { + let subsaga_params = volume_delete::Params { + serialized_authn: params.serialized_authn.clone(), + volume_id: params.old_snapshot_volume_id, + }; + + let subsaga_dag = { + let subsaga_builder = steno::DagBuilder::new(steno::SagaName::new( + volume_delete::SagaVolumeDelete::NAME, + )); + volume_delete::SagaVolumeDelete::make_saga_dag( + &subsaga_params, + subsaga_builder, + )? + }; + + builder.append(Node::constant( + "params_for_volume_delete_subsaga", + serde_json::to_value(&subsaga_params).map_err(|e| { + SagaInitError::SerializeError( + "params_for_volume_delete_subsaga".to_string(), + e, + ) + })?, + )); + + builder.append(Node::subsaga( + "volume_delete_subsaga_no_result", + subsaga_dag, + "params_for_volume_delete_subsaga", + )); + + builder.append(update_request_record_action()); + + Ok(builder.build()?) + } +} + +// region snapshot replacement step garbage collect saga: action implementations + +async fn srsgs_update_request_record( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let params = sagactx.saga_params::()?; + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + // Now that the region snapshot step volume has been deleted, update the + // replacement request record to 'VolumeDeleted'. There is no undo step for + // this, it should succeed idempotently. + + datastore + .set_region_snapshot_replacement_step_volume_deleted( + &opctx, + params.request.id, + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +#[cfg(test)] +pub(crate) mod test { + use crate::app::sagas::region_snapshot_replacement_step_garbage_collect::*; + use nexus_db_model::RegionSnapshotReplacementStep; + use nexus_db_model::RegionSnapshotReplacementStepState; + use nexus_db_model::Volume; + use nexus_db_queries::authn::saga::Serialized; + use nexus_db_queries::context::OpContext; + use nexus_test_utils_macros::nexus_test; + use sled_agent_client::types::CrucibleOpts; + use sled_agent_client::types::VolumeConstructionRequest; + use uuid::Uuid; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + #[nexus_test(server = crate::Server)] + async fn test_region_snapshot_replacement_step_garbage_collect_saga( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + // Manually insert required records + let old_snapshot_volume_id = Uuid::new_v4(); + + let volume_construction_request = VolumeConstructionRequest::Volume { + id: old_snapshot_volume_id, + block_size: 0, + sub_volumes: vec![VolumeConstructionRequest::Region { + block_size: 0, + blocks_per_extent: 0, + extent_count: 0, + gen: 0, + opts: CrucibleOpts { + id: old_snapshot_volume_id, + target: vec![ + // XXX if you put something here, you'll need a + // synthetic dataset record + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: false, + }, + }], + read_only_parent: None, + }; + + let volume_data = + serde_json::to_string(&volume_construction_request).unwrap(); + + datastore + .volume_create(Volume::new(old_snapshot_volume_id, volume_data)) + .await + .unwrap(); + + let mut request = + RegionSnapshotReplacementStep::new(Uuid::new_v4(), Uuid::new_v4()); + request.replacement_state = + RegionSnapshotReplacementStepState::Complete; + request.old_snapshot_volume_id = Some(old_snapshot_volume_id); + + datastore + .insert_region_snapshot_replacement_step(&opctx, request.clone()) + .await + .unwrap(); + + // Run the saga + let params = Params { + serialized_authn: Serialized::for_opctx(&opctx), + old_snapshot_volume_id, + request: request.clone(), + }; + + let _output = nexus + .sagas + .saga_execute::( + params, + ) + .await + .unwrap(); + + // Validate the state transition + let result = datastore + .get_region_snapshot_replacement_step_by_id(&opctx, request.id) + .await + .unwrap(); + + assert_eq!( + result.replacement_state, + RegionSnapshotReplacementStepState::VolumeDeleted + ); + + // Validate the Volume was deleted + assert!(datastore + .volume_get(old_snapshot_volume_id) + .await + .unwrap() + .is_none()); + } +} diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index eeb14091b25..540ab90e283 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -106,11 +106,12 @@ use nexus_db_queries::db::lookup::LookupPath; use omicron_common::api::external; use omicron_common::api::external::Error; use omicron_common::retry_until_known_result; +use omicron_uuid_kinds::{GenericUuid, PropolisUuid, SledUuid}; use rand::{rngs::StdRng, RngCore, SeedableRng}; use serde::Deserialize; use serde::Serialize; use sled_agent_client::types::CrucibleOpts; -use sled_agent_client::types::InstanceIssueDiskSnapshotRequestBody; +use sled_agent_client::types::VmmIssueDiskSnapshotRequestBody; use sled_agent_client::types::VolumeConstructionRequest; use slog::info; use std::collections::BTreeMap; @@ -826,39 +827,43 @@ async fn ssc_send_snapshot_request_to_sled_agent( .await .map_err(ActionError::action_failed)?; - let sled_id = osagactx + let instance_and_vmm = osagactx .datastore() .instance_fetch_with_vmm(&opctx, &authz_instance) .await - .map_err(ActionError::action_failed)? - .sled_id(); + .map_err(ActionError::action_failed)?; + + let vmm = instance_and_vmm.vmm(); // If this instance does not currently have a sled, we can't continue this // saga - the user will have to reissue the snapshot request and it will get // run on a Pantry. - let Some(sled_id) = sled_id else { + let Some((propolis_id, sled_id)) = + vmm.as_ref().map(|vmm| (vmm.id, vmm.sled_id)) + else { return Err(ActionError::action_failed(Error::unavail( - "sled id is None!", + "instance no longer has an active VMM!", ))); }; info!(log, "asking for disk snapshot from Propolis via sled agent"; "disk_id" => %params.disk_id, "instance_id" => %attach_instance_id, + "propolis_id" => %propolis_id, "sled_id" => %sled_id); let sled_agent_client = osagactx .nexus() - .sled_client(&sled_id) + .sled_client(&SledUuid::from_untyped_uuid(sled_id)) .await .map_err(ActionError::action_failed)?; retry_until_known_result(log, || async { sled_agent_client - .instance_issue_disk_snapshot_request( - &attach_instance_id, + .vmm_issue_disk_snapshot_request( + &PropolisUuid::from_untyped_uuid(propolis_id), ¶ms.disk_id, - &InstanceIssueDiskSnapshotRequestBody { snapshot_id }, + &VmmIssueDiskSnapshotRequestBody { snapshot_id }, ) .await }) @@ -2151,12 +2156,15 @@ mod test { .await .unwrap(); - let sled_id = instance_state - .sled_id() - .expect("starting instance should have a sled"); + let vmm_state = instance_state + .vmm() + .as_ref() + .expect("starting instance should have a vmm"); + let propolis_id = PropolisUuid::from_untyped_uuid(vmm_state.id); + let sled_id = SledUuid::from_untyped_uuid(vmm_state.sled_id); let sa = nexus.sled_client(&sled_id).await.unwrap(); + sa.vmm_finish_transition(propolis_id).await; - sa.instance_finish_transition(instance.identity.id).await; let instance_state = nexus .datastore() .instance_fetch_with_vmm(&opctx, &authz_instance) diff --git a/nexus/src/app/sagas/test_helpers.rs b/nexus/src/app/sagas/test_helpers.rs index b9388a1116a..1572ba43306 100644 --- a/nexus/src/app/sagas/test_helpers.rs +++ b/nexus/src/app/sagas/test_helpers.rs @@ -5,11 +5,8 @@ //! Helper functions for writing saga undo tests and working with instances in //! saga tests. -use super::NexusSaga; -use crate::{ - app::{saga::create_saga_dag, test_interfaces::TestInterfaces as _}, - Nexus, -}; +use super::{instance_common::VmmAndSledIds, NexusSaga}; +use crate::{app::saga::create_saga_dag, Nexus}; use async_bb8_diesel::{AsyncRunQueryDsl, AsyncSimpleConnection}; use camino::Utf8Path; use diesel::{ @@ -137,13 +134,14 @@ pub(crate) async fn instance_simulate( info!(&cptestctx.logctx.log, "Poking simulated instance"; "instance_id" => %instance_id); let nexus = &cptestctx.server.server_context().nexus; + let VmmAndSledIds { vmm_id, sled_id } = + instance_fetch_vmm_and_sled_ids(cptestctx, instance_id).await; let sa = nexus - .instance_sled_by_id(instance_id) + .sled_client(&sled_id) .await - .unwrap() .expect("instance must be on a sled to simulate a state change"); - sa.instance_finish_transition(instance_id.into_untyped_uuid()).await; + sa.vmm_finish_transition(vmm_id).await; } pub(crate) async fn instance_single_step_on_sled( @@ -158,12 +156,14 @@ pub(crate) async fn instance_single_step_on_sled( "sled_id" => %sled_id, ); let nexus = &cptestctx.server.server_context().nexus; + let VmmAndSledIds { vmm_id, sled_id } = + instance_fetch_vmm_and_sled_ids(cptestctx, instance_id).await; let sa = nexus - .sled_client(sled_id) + .sled_client(&sled_id) .await - .expect("sled must exist to simulate a state change"); + .expect("instance must be on a sled to simulate a state change"); - sa.instance_single_step(instance_id.into_untyped_uuid()).await; + sa.vmm_single_step(vmm_id).await; } pub(crate) async fn instance_simulate_by_name( @@ -186,12 +186,14 @@ pub(crate) async fn instance_simulate_by_name( let instance_lookup = nexus.instance_lookup(&opctx, instance_selector).unwrap(); let (.., instance) = instance_lookup.fetch().await.unwrap(); + let instance_id = InstanceUuid::from_untyped_uuid(instance.id()); + let VmmAndSledIds { vmm_id, sled_id } = + instance_fetch_vmm_and_sled_ids(cptestctx, &instance_id).await; let sa = nexus - .instance_sled_by_id(&InstanceUuid::from_untyped_uuid(instance.id())) + .sled_client(&sled_id) .await - .unwrap() .expect("instance must be on a sled to simulate a state change"); - sa.instance_finish_transition(instance.id()).await; + sa.vmm_finish_transition(vmm_id).await; } pub async fn instance_fetch( @@ -218,6 +220,21 @@ pub async fn instance_fetch( db_state } +pub(super) async fn instance_fetch_vmm_and_sled_ids( + cptestctx: &ControlPlaneTestContext, + instance_id: &InstanceUuid, +) -> VmmAndSledIds { + let instance_and_vmm = instance_fetch(cptestctx, *instance_id).await; + let vmm = instance_and_vmm + .vmm() + .as_ref() + .expect("can only fetch VMM and sled IDs for an active instance"); + + let vmm_id = PropolisUuid::from_untyped_uuid(vmm.id); + let sled_id = SledUuid::from_untyped_uuid(vmm.sled_id); + VmmAndSledIds { vmm_id, sled_id } +} + pub async fn instance_fetch_all( cptestctx: &ControlPlaneTestContext, instance_id: InstanceUuid, diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index 261045670e3..9c21ca73a17 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -12,7 +12,6 @@ use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; use nexus_db_queries::db::lookup; -use nexus_db_queries::db::model::DatasetKind; use nexus_sled_agent_shared::inventory::SledRole; use nexus_types::deployment::DiskFilter; use nexus_types::deployment::SledFilter; @@ -23,6 +22,7 @@ use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; +use omicron_common::api::internal::shared::DatasetKind; use omicron_uuid_kinds::{GenericUuid, SledUuid}; use sled_agent_client::Client as SledAgentClient; use std::net::SocketAddrV6; @@ -292,13 +292,12 @@ impl super::Nexus { // Datasets (contained within zpools) - /// Upserts a dataset into the database, updating it if it already exists. - pub(crate) async fn upsert_dataset( + /// Upserts a crucible dataset into the database, updating it if it already exists. + pub(crate) async fn upsert_crucible_dataset( &self, id: Uuid, zpool_id: Uuid, address: SocketAddrV6, - kind: DatasetKind, ) -> Result<(), Error> { info!( self.log, @@ -307,6 +306,7 @@ impl super::Nexus { "dataset_id" => id.to_string(), "address" => address.to_string() ); + let kind = DatasetKind::Crucible; let dataset = db::model::Dataset::new(id, zpool_id, Some(address), kind); self.db_datastore.dataset_upsert(dataset).await?; diff --git a/nexus/src/app/snapshot.rs b/nexus/src/app/snapshot.rs index 040c9fc0823..57b8edd1f03 100644 --- a/nexus/src/app/snapshot.rs +++ b/nexus/src/app/snapshot.rs @@ -109,7 +109,7 @@ impl super::Nexus { // If a Propolis _may_ exist, send the snapshot request there, // otherwise use the pantry. - !instance_state.vmm().is_some() + instance_state.vmm().is_none() } else { // This disk is not attached to an instance, use the pantry. true diff --git a/nexus/src/app/switch_port.rs b/nexus/src/app/switch_port.rs index 9726a59d331..b616531f533 100644 --- a/nexus/src/app/switch_port.rs +++ b/nexus/src/app/switch_port.rs @@ -30,6 +30,7 @@ impl super::Nexus { params: params::SwitchPortSettingsCreate, ) -> CreateResult { opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + Self::switch_port_settings_validate(¶ms)?; //TODO race conditions on exists check versus update/create. // Normally I would use a DB lock here, but not sure what @@ -54,6 +55,36 @@ impl super::Nexus { } } + // TODO: more validation wanted + fn switch_port_settings_validate( + params: ¶ms::SwitchPortSettingsCreate, + ) -> CreateResult<()> { + for x in params.bgp_peers.values() { + for p in x.peers.iter() { + if let Some(ref key) = p.md5_auth_key { + if key.len() > 80 { + return Err(Error::invalid_value( + "md5_auth_key", + format!("md5 auth key for {} is longer than 80 characters", p.addr) + )); + } + for c in key.chars() { + if !c.is_ascii() || c.is_ascii_control() { + return Err(Error::invalid_value( + "md5_auth_key", + format!( + "md5 auth key for {} must be printable ascii", + p.addr + ), + )); + } + } + } + } + } + Ok(()) + } + pub async fn switch_port_settings_create( self: &Arc, opctx: &OpContext, diff --git a/nexus/src/app/test_interfaces.rs b/nexus/src/app/test_interfaces.rs index adfafa523d0..9852225e8cc 100644 --- a/nexus/src/app/test_interfaces.rs +++ b/nexus/src/app/test_interfaces.rs @@ -6,8 +6,7 @@ use async_trait::async_trait; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::lookup::LookupPath; use omicron_common::api::external::Error; -use omicron_uuid_kinds::GenericUuid; -use omicron_uuid_kinds::{InstanceUuid, SledUuid}; +use omicron_uuid_kinds::{GenericUuid, InstanceUuid, PropolisUuid, SledUuid}; use sled_agent_client::Client as SledAgentClient; use std::sync::Arc; use uuid::Uuid; @@ -19,25 +18,47 @@ pub use super::update::SpUpdater; pub use super::update::UpdateProgress; pub use gateway_client::types::SpType; +/// The information needed to talk to a sled agent about an instance that is +/// active on that sled. +pub struct InstanceSledAgentInfo { + /// The ID of the Propolis job to send to sled agent. + pub propolis_id: PropolisUuid, + + /// The ID of the sled where the Propolis job is running. + pub sled_id: SledUuid, + + /// A client for talking to the Propolis's host sled. + pub sled_client: Arc, + + /// The ID of the instance's migration target Propolis, if it has one. + pub dst_propolis_id: Option, +} + /// Exposes additional [`super::Nexus`] interfaces for use by the test suite #[async_trait] pub trait TestInterfaces { /// Access the Rack ID of the currently executing Nexus. fn rack_id(&self) -> Uuid; - /// Returns the SledAgentClient for an Instance from its id. We may also - /// want to split this up into instance_lookup_by_id() and instance_sled(), - /// but after all it's a test suite special to begin with. - async fn instance_sled_by_id( + /// Attempts to obtain the Propolis ID and sled agent information for an + /// instance. + /// + /// # Arguments + /// + /// - `id`: The ID of the instance of interest. + /// - `opctx`: An optional operation context to use for authorization + /// checks. If `None`, this routine supplies the default test opctx. + /// + /// # Return value + /// + /// - `Ok(Some(info))` if the instance has an active Propolis. + /// - `Ok(None)` if the instance has no active Propolis. + /// - `Err` if an error occurred. + async fn active_instance_info( &self, id: &InstanceUuid, - ) -> Result>, Error>; - - async fn instance_sled_by_id_with_opctx( - &self, - id: &InstanceUuid, - opctx: &OpContext, - ) -> Result>, Error>; + opctx: Option<&OpContext>, + ) -> Result, Error>; /// Returns the SledAgentClient for the sled running an instance to which a /// disk is attached. @@ -46,18 +67,6 @@ pub trait TestInterfaces { id: &Uuid, ) -> Result>, Error>; - /// Returns the supplied instance's current active sled ID. - async fn instance_sled_id( - &self, - instance_id: &InstanceUuid, - ) -> Result, Error>; - - async fn instance_sled_id_with_opctx( - &self, - instance_id: &InstanceUuid, - opctx: &OpContext, - ) -> Result, Error>; - async fn set_disk_as_faulted(&self, disk_id: &Uuid) -> Result; fn set_samael_max_issue_delay(&self, max_issue_delay: chrono::Duration); @@ -69,30 +78,49 @@ impl TestInterfaces for super::Nexus { self.rack_id } - async fn instance_sled_by_id( + async fn active_instance_info( &self, id: &InstanceUuid, - ) -> Result>, Error> { - let opctx = OpContext::for_tests( - self.log.new(o!()), - Arc::clone(&self.db_datastore) - as Arc, - ); + opctx: Option<&OpContext>, + ) -> Result, Error> { + let local_opctx; + let opctx = match opctx { + Some(o) => o, + None => { + local_opctx = OpContext::for_tests( + self.log.new(o!()), + Arc::clone(&self.db_datastore) + as Arc, + ); + &local_opctx + } + }; - self.instance_sled_by_id_with_opctx(id, &opctx).await - } + let (.., authz_instance) = LookupPath::new(&opctx, &self.db_datastore) + .instance_id(id.into_untyped_uuid()) + .lookup_for(nexus_db_queries::authz::Action::Read) + .await?; - async fn instance_sled_by_id_with_opctx( - &self, - id: &InstanceUuid, - opctx: &OpContext, - ) -> Result>, Error> { - let sled_id = self.instance_sled_id_with_opctx(id, opctx).await?; - if let Some(sled_id) = sled_id { - Ok(Some(self.sled_client(&sled_id).await?)) - } else { - Ok(None) - } + let state = self + .datastore() + .instance_fetch_with_vmm(opctx, &authz_instance) + .await?; + + let Some(vmm) = state.vmm() else { + return Ok(None); + }; + + let sled_id = SledUuid::from_untyped_uuid(vmm.sled_id); + Ok(Some(InstanceSledAgentInfo { + propolis_id: PropolisUuid::from_untyped_uuid(vmm.id), + sled_id, + sled_client: self.sled_client(&sled_id).await?, + dst_propolis_id: state + .instance() + .runtime() + .dst_propolis_id + .map(PropolisUuid::from_untyped_uuid), + })) } async fn disk_sled_by_id( @@ -112,37 +140,11 @@ impl TestInterfaces for super::Nexus { let instance_id = InstanceUuid::from_untyped_uuid( db_disk.runtime().attach_instance_id.unwrap(), ); - self.instance_sled_by_id(&instance_id).await - } - - async fn instance_sled_id( - &self, - id: &InstanceUuid, - ) -> Result, Error> { - let opctx = OpContext::for_tests( - self.log.new(o!()), - Arc::clone(&self.db_datastore) - as Arc, - ); - - self.instance_sled_id_with_opctx(id, &opctx).await - } - - async fn instance_sled_id_with_opctx( - &self, - id: &InstanceUuid, - opctx: &OpContext, - ) -> Result, Error> { - let (.., authz_instance) = LookupPath::new(&opctx, &self.db_datastore) - .instance_id(id.into_untyped_uuid()) - .lookup_for(nexus_db_queries::authz::Action::Read) - .await?; Ok(self - .datastore() - .instance_fetch_with_vmm(opctx, &authz_instance) + .active_instance_info(&instance_id, Some(&opctx)) .await? - .sled_id()) + .map(|info| info.sled_client)) } async fn set_disk_as_faulted(&self, disk_id: &Uuid) -> Result { diff --git a/nexus/src/bin/nexus.rs b/nexus/src/bin/nexus.rs index 33870b39e31..01e4bfc3af2 100644 --- a/nexus/src/bin/nexus.rs +++ b/nexus/src/bin/nexus.rs @@ -16,20 +16,11 @@ use clap::Parser; use nexus_config::NexusConfig; use omicron_common::cmd::fatal; use omicron_common::cmd::CmdError; -use omicron_nexus::run_openapi_external; use omicron_nexus::run_server; #[derive(Debug, Parser)] #[clap(name = "nexus", about = "See README.adoc for more information")] struct Args { - #[clap( - short = 'O', - long = "openapi", - help = "Print the external OpenAPI Spec document and exit", - action - )] - openapi: bool, - #[clap(name = "CONFIG_FILE_PATH", action)] config_file_path: Option, } @@ -44,23 +35,19 @@ async fn main() { async fn do_run() -> Result<(), CmdError> { let args = Args::parse(); - if args.openapi { - run_openapi_external().map_err(|err| CmdError::Failure(anyhow!(err))) - } else { - let config_path = match args.config_file_path { - Some(path) => path, - None => { - use clap::CommandFactory; - - eprintln!("{}", Args::command().render_help()); - return Err(CmdError::Usage( - "CONFIG_FILE_PATH is required".to_string(), - )); - } - }; - let config = NexusConfig::from_file(config_path) - .map_err(|e| CmdError::Failure(anyhow!(e)))?; - - run_server(&config).await.map_err(|err| CmdError::Failure(anyhow!(err))) - } + let config_path = match args.config_file_path { + Some(path) => path, + None => { + use clap::CommandFactory; + + eprintln!("{}", Args::command().render_help()); + return Err(CmdError::Usage( + "CONFIG_FILE_PATH is required".to_string(), + )); + } + }; + let config = NexusConfig::from_file(config_path) + .map_err(|e| CmdError::Failure(anyhow!(e)))?; + + run_server(&config).await.map_err(|err| CmdError::Failure(anyhow!(err))) } diff --git a/nexus/src/bin/schema-updater.rs b/nexus/src/bin/schema-updater.rs index 7fe1ed84a47..4a43698f00c 100644 --- a/nexus/src/bin/schema-updater.rs +++ b/nexus/src/bin/schema-updater.rs @@ -71,7 +71,7 @@ async fn main() -> anyhow::Result<()> { let log = Logger::root(drain, slog::o!("unit" => "schema_updater")); let crdb_cfg = db::Config { url: args.url }; - let pool = Arc::new(db::Pool::new(&log, &crdb_cfg)); + let pool = Arc::new(db::Pool::new_single_host(&log, &crdb_cfg)); let schema_config = SchemaConfig { schema_dir: args.schema_directory }; let all_versions = AllSchemaVersions::load(&schema_config.schema_dir)?; diff --git a/nexus/src/context.rs b/nexus/src/context.rs index 95d69e0c880..9620a3937a4 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -11,9 +11,7 @@ use authn::external::token::HttpAuthnToken; use authn::external::HttpAuthnScheme; use camino::Utf8PathBuf; use chrono::Duration; -use internal_dns::ServiceName; use nexus_config::NexusConfig; -use nexus_config::PostgresConfigWithUrl; use nexus_config::SchemeName; use nexus_db_queries::authn::external::session_cookie::SessionStore; use nexus_db_queries::authn::ConsoleSessionWithSiloId; @@ -25,7 +23,6 @@ use oximeter::types::ProducerRegistry; use oximeter_instruments::http::{HttpService, LatencyTracker}; use slog::Logger; use std::env; -use std::str::FromStr; use std::sync::Arc; use uuid::Uuid; @@ -149,12 +146,14 @@ impl ServerContext { name: name.to_string().into(), id: config.deployment.id, }; - const START_LATENCY_DECADE: i16 = -6; - const END_LATENCY_DECADE: i16 = 3; - LatencyTracker::with_latency_decades( + // Start at 1 microsecond == 1e3 nanoseconds. + const LATENCY_START_POWER: u16 = 3; + // End at 1000s == (1e9 * 1e3) == 1e12 nanoseconds. + const LATENCY_END_POWER: u16 = 12; + LatencyTracker::with_log_linear_bins( target, - START_LATENCY_DECADE, - END_LATENCY_DECADE, + LATENCY_START_POWER, + LATENCY_END_POWER, ) .unwrap() }; @@ -210,7 +209,7 @@ impl ServerContext { // nexus in dev for everyone // Set up DNS Client - let resolver = match config.deployment.internal_dns { + let (resolver, dns_addrs) = match config.deployment.internal_dns { nexus_config::InternalDns::FromSubnet { subnet } => { let az_subnet = Ipv6Subnet::::new(subnet.net().addr()); @@ -219,11 +218,21 @@ impl ServerContext { "Setting up resolver using DNS servers for subnet: {:?}", az_subnet ); - internal_dns::resolver::Resolver::new_from_subnet( - log.new(o!("component" => "DnsResolver")), - az_subnet, + let resolver = + internal_dns::resolver::Resolver::new_from_subnet( + log.new(o!("component" => "DnsResolver")), + az_subnet, + ) + .map_err(|e| { + format!("Failed to create DNS resolver: {}", e) + })?; + + ( + resolver, + internal_dns::resolver::Resolver::servers_from_subnet( + az_subnet, + ), ) - .map_err(|e| format!("Failed to create DNS resolver: {}", e))? } nexus_config::InternalDns::FromAddress { address } => { info!( @@ -231,56 +240,33 @@ impl ServerContext { "Setting up resolver using DNS address: {:?}", address ); - internal_dns::resolver::Resolver::new_from_addrs( - log.new(o!("component" => "DnsResolver")), - &[address], - ) - .map_err(|e| format!("Failed to create DNS resolver: {}", e))? + let resolver = + internal_dns::resolver::Resolver::new_from_addrs( + log.new(o!("component" => "DnsResolver")), + &[address], + ) + .map_err(|e| { + format!("Failed to create DNS resolver: {}", e) + })?; + + (resolver, vec![address]) } }; - // Set up DB pool - let url = match &config.deployment.database { - nexus_config::Database::FromUrl { url } => url.clone(), + let pool = match &config.deployment.database { + nexus_config::Database::FromUrl { url } => { + info!(log, "Setting up qorb pool from a single host"; "url" => #?url); + db::Pool::new_single_host( + &log, + &db::Config { url: url.clone() }, + ) + } nexus_config::Database::FromDns => { - info!(log, "Accessing DB url from DNS"); - // It's been requested but unfortunately not supported to - // directly connect using SRV based lookup. - // TODO-robustness: the set of cockroachdb hosts we'll use will - // be fixed to whatever we got back from DNS at Nexus start. - // This means a new cockroachdb instance won't picked up until - // Nexus restarts. - let addrs = loop { - match resolver - .lookup_all_socket_v6(ServiceName::Cockroach) - .await - { - Ok(addrs) => break addrs, - Err(e) => { - warn!( - log, - "Failed to lookup cockroach addresses: {e}" - ); - tokio::time::sleep(std::time::Duration::from_secs( - 1, - )) - .await; - } - } - }; - let addrs_str = addrs - .iter() - .map(ToString::to_string) - .collect::>() - .join(","); - info!(log, "DB addresses: {}", addrs_str); - PostgresConfigWithUrl::from_str(&format!( - "postgresql://root@{addrs_str}/omicron?sslmode=disable", - )) - .map_err(|e| format!("Cannot parse Postgres URL: {}", e))? + info!(log, "Setting up qorb pool from DNS"; "dns_addrs" => #?dns_addrs); + db::Pool::new(&log, dns_addrs) } }; - let pool = db::Pool::new(&log, &db::Config { url }); + let nexus = Nexus::new_with_id( rack_id, log.new(o!("component" => "nexus")), diff --git a/nexus/src/external_api/console_api.rs b/nexus/src/external_api/console_api.rs index fb0a47bbea8..4ea8290bf96 100644 --- a/nexus/src/external_api/console_api.rs +++ b/nexus/src/external_api/console_api.rs @@ -25,37 +25,33 @@ use crate::context::ApiContext; use anyhow::Context; use camino::{Utf8Path, Utf8PathBuf}; use dropshot::{ - endpoint, http_response_found, http_response_see_other, HttpError, - HttpResponseFound, HttpResponseHeaders, HttpResponseSeeOther, - HttpResponseUpdatedNoContent, Path, Query, RequestContext, + http_response_found, http_response_see_other, HttpError, HttpResponseFound, + HttpResponseHeaders, HttpResponseSeeOther, HttpResponseUpdatedNoContent, + Path, Query, RequestContext, }; -use http::{header, HeaderName, HeaderValue, Response, StatusCode, Uri}; +use http::{header, HeaderName, HeaderValue, Response, StatusCode}; use hyper::Body; use nexus_db_model::AuthenticationMode; use nexus_db_queries::authn::silos::IdentityProviderType; use nexus_db_queries::context::OpContext; use nexus_db_queries::{ - authn::external::{ - cookies::Cookies, - session_cookie::{ - clear_session_cookie_header_value, session_cookie_header_value, - SessionStore, SESSION_COOKIE_COOKIE_NAME, - }, + authn::external::session_cookie::{ + clear_session_cookie_header_value, session_cookie_header_value, + SessionStore, SESSION_COOKIE_COOKIE_NAME, }, db::identity::Asset, }; -use nexus_types::external_api::params; +use nexus_types::authn::cookies::Cookies; +use nexus_types::external_api::params::{self, RelativeUri}; use nexus_types::identity::Resource; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::{DataPageParams, Error, NameOrId}; use once_cell::sync::Lazy; -use parse_display::Display; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use serde_urlencoded; use std::collections::HashMap; use std::num::NonZeroU32; -use std::str::FromStr; use tokio::fs::File; use tokio_util::codec::{BytesCodec, FramedRead}; @@ -196,12 +192,6 @@ use tokio_util::codec::{BytesCodec, FramedRead}; // // /logout/{silo_name}/{provider_name} -#[derive(Deserialize, JsonSchema)] -pub struct LoginToProviderPathParam { - pub silo_name: nexus_db_queries::db::model::Name, - pub provider_name: nexus_db_queries::db::model::Name, -} - #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] pub struct RelayState { pub redirect_uri: Option, @@ -230,36 +220,18 @@ impl RelayState { } } -/// SAML login console page (just a link to the IdP) -#[endpoint { - method = GET, - path = "/login/{silo_name}/saml/{provider_name}", - tags = ["login"], - unpublished = true, -}] pub(crate) async fn login_saml_begin( rqctx: RequestContext, - _path_params: Path, - _query_params: Query, + _path_params: Path, + _query_params: Query, ) -> Result, HttpError> { serve_console_index(rqctx).await } -/// Get a redirect straight to the IdP -/// -/// Console uses this to avoid having to ask the API anything about the IdP. It -/// already knows the IdP name from the path, so it can just link to this path -/// and rely on Nexus to redirect to the actual IdP. -#[endpoint { - method = GET, - path = "/login/{silo_name}/saml/{provider_name}/redirect", - tags = ["login"], - unpublished = true, -}] pub(crate) async fn login_saml_redirect( rqctx: RequestContext, - path_params: Path, - query_params: Query, + path_params: Path, + query_params: Query, ) -> Result { let apictx = rqctx.context(); let handler = async { @@ -274,8 +246,8 @@ pub(crate) async fn login_saml_redirect( .datastore() .identity_provider_lookup( &opctx, - &path_params.silo_name, - &path_params.provider_name, + &path_params.silo_name.into(), + &path_params.provider_name.into(), ) .await?; @@ -310,15 +282,9 @@ pub(crate) async fn login_saml_redirect( .await } -/// Authenticate a user via SAML -#[endpoint { - method = POST, - path = "/login/{silo_name}/saml/{provider_name}", - tags = ["login"], -}] pub(crate) async fn login_saml( rqctx: RequestContext, - path_params: Path, + path_params: Path, body_bytes: dropshot::UntypedBody, ) -> Result { let apictx = rqctx.context(); @@ -335,8 +301,8 @@ pub(crate) async fn login_saml( .datastore() .identity_provider_lookup( &opctx, - &path_params.silo_name, - &path_params.provider_name, + &path_params.silo_name.into(), + &path_params.provider_name.into(), ) .await?; @@ -397,21 +363,10 @@ pub(crate) async fn login_saml( .await } -#[derive(Deserialize, JsonSchema)] -pub struct LoginPathParam { - pub silo_name: nexus_db_queries::db::model::Name, -} - -#[endpoint { - method = GET, - path = "/login/{silo_name}/local", - tags = ["login"], - unpublished = true, -}] pub(crate) async fn login_local_begin( rqctx: RequestContext, - _path_params: Path, - _query_params: Query, + _path_params: Path, + _query_params: Query, ) -> Result, HttpError> { // TODO: figure out why instrumenting doesn't work // let apictx = rqctx.context(); @@ -420,15 +375,9 @@ pub(crate) async fn login_local_begin( serve_console_index(rqctx).await } -/// Authenticate a user via username and password -#[endpoint { - method = POST, - path = "/v1/login/{silo_name}/local", - tags = ["login"], -}] pub(crate) async fn login_local( rqctx: RequestContext, - path_params: Path, + path_params: Path, credentials: dropshot::TypedBody, ) -> Result, HttpError> { let apictx = rqctx.context(); @@ -487,13 +436,6 @@ async fn create_session( Ok(session) } -/// Log user out of web console by deleting session on client and server -#[endpoint { - // important for security that this be a POST despite the empty req body - method = POST, - path = "/v1/logout", - tags = ["hidden"], -}] pub(crate) async fn logout( rqctx: RequestContext, cookies: Cookies, @@ -541,53 +483,6 @@ pub(crate) async fn logout( .await } -#[derive(Deserialize, JsonSchema)] -pub struct RestPathParam { - path: Vec, -} - -/// This is meant as a security feature. We want to ensure we never redirect to -/// a URI on a different host. -#[derive(Serialize, Deserialize, Debug, JsonSchema, Clone, Display)] -#[serde(try_from = "String")] -#[display("{0}")] -pub struct RelativeUri(String); - -impl FromStr for RelativeUri { - type Err = String; - - fn from_str(s: &str) -> Result { - Self::try_from(s.to_string()) - } -} - -impl TryFrom for RelativeUri { - type Error = String; - - fn try_from(uri: Uri) -> Result { - if uri.host().is_none() && uri.scheme().is_none() { - Ok(Self(uri.to_string())) - } else { - Err(format!("\"{}\" is not a relative URI", uri)) - } - } -} - -impl TryFrom for RelativeUri { - type Error = String; - - fn try_from(s: String) -> Result { - s.parse::() - .map_err(|_| format!("\"{}\" is not a relative URI", s)) - .and_then(|uri| Self::try_from(uri)) - } -} - -#[derive(Serialize, Deserialize, JsonSchema)] -pub struct LoginUrlQuery { - redirect_uri: Option, -} - /// Generate URI to the appropriate login form for this Silo. Optional /// `redirect_uri` represents the URL to send the user back to after successful /// login, and is included in `state` query param if present @@ -644,7 +539,7 @@ async fn get_login_url( // Stick redirect_url into the state param and URL encode it so it can be // used as a query string. We assume it's not already encoded. - let query_data = LoginUrlQuery { redirect_uri }; + let query_data = params::LoginUrlQuery { redirect_uri }; Ok(match serde_urlencoded::to_string(query_data) { // only put the ? in front if there's something there @@ -654,15 +549,9 @@ async fn get_login_url( }) } -/// Redirect to a login page for the current Silo (if that can be determined) -#[endpoint { - method = GET, - path = "/login", - unpublished = true, -}] pub(crate) async fn login_begin( rqctx: RequestContext, - query_params: Query, + query_params: Query, ) -> Result { let apictx = rqctx.context(); let handler = async { @@ -696,7 +585,11 @@ pub(crate) async fn console_index_or_login_redirect( .request .uri() .path_and_query() - .map(|p| RelativeUri(p.to_string())); + .map(|p| p.to_string().parse::()) + .transpose() + .map_err(|e| { + HttpError::for_internal_error(format!("parsing URI: {}", e)) + })?; let login_url = get_login_url(&rqctx, redirect_uri).await?; Ok(Response::builder() @@ -711,8 +604,7 @@ pub(crate) async fn console_index_or_login_redirect( // to manually define more specific routes. macro_rules! console_page { - ($name:ident, $path:literal) => { - #[endpoint { method = GET, path = $path, unpublished = true, }] + ($name:ident) => { pub(crate) async fn $name( rqctx: RequestContext, ) -> Result, HttpError> { @@ -723,26 +615,25 @@ macro_rules! console_page { // only difference is the _path_params arg macro_rules! console_page_wildcard { - ($name:ident, $path:literal) => { - #[endpoint { method = GET, path = $path, unpublished = true, }] + ($name:ident) => { pub(crate) async fn $name( rqctx: RequestContext, - _path_params: Path, + _path_params: Path, ) -> Result, HttpError> { console_index_or_login_redirect(rqctx).await } }; } -console_page_wildcard!(console_projects, "/projects/{path:.*}"); -console_page_wildcard!(console_settings_page, "/settings/{path:.*}"); -console_page_wildcard!(console_system_page, "/system/{path:.*}"); -console_page_wildcard!(console_lookup, "/lookup/{path:.*}"); -console_page!(console_root, "/"); -console_page!(console_projects_new, "/projects-new"); -console_page!(console_silo_images, "/images"); -console_page!(console_silo_utilization, "/utilization"); -console_page!(console_silo_access, "/access"); +console_page_wildcard!(console_projects); +console_page_wildcard!(console_settings_page); +console_page_wildcard!(console_system_page); +console_page_wildcard!(console_lookup); +console_page!(console_root); +console_page!(console_projects_new); +console_page!(console_silo_images); +console_page!(console_silo_utilization); +console_page!(console_silo_access); /// Check if `gzip` is listed in the request's `Accept-Encoding` header. fn accept_gz(header_value: &str) -> bool { @@ -870,15 +761,10 @@ async fn serve_static( /// /// Note that Dropshot protects us from directory traversal attacks (e.g. /// `/assets/../../../etc/passwd`). This is tested in the `console_api` -/// integration tests. -#[endpoint { - method = GET, - path = "/assets/{path:.*}", - unpublished = true, -}] +/// integration tests pub(crate) async fn asset( rqctx: RequestContext, - path_params: Path, + path_params: Path, ) -> Result, HttpError> { // asset URLs contain hashes, so cache for 1 year const CACHE_CONTROL: HeaderValue = diff --git a/nexus/src/external_api/device_auth.rs b/nexus/src/external_api/device_auth.rs index 883dbf4e198..87ccbd97523 100644 --- a/nexus/src/external_api/device_auth.rs +++ b/nexus/src/external_api/device_auth.rs @@ -14,16 +14,14 @@ use super::views::DeviceAccessTokenGrant; use crate::app::external_endpoints::authority_for_request; use crate::ApiContext; use dropshot::{ - endpoint, HttpError, HttpResponseUpdatedNoContent, RequestContext, - TypedBody, + HttpError, HttpResponseUpdatedNoContent, RequestContext, TypedBody, }; use http::{header, Response, StatusCode}; use hyper::Body; use nexus_db_queries::db::model::DeviceAccessToken; +use nexus_types::external_api::params; use omicron_common::api::external::InternalContext; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; +use serde::Serialize; // Token granting à la RFC 8628 (OAuth 2.0 Device Authorization Grant) @@ -46,25 +44,9 @@ where .body(body.into())?) } -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct DeviceAuthRequest { - pub client_id: Uuid, -} - -/// Start an OAuth 2.0 Device Authorization Grant -/// -/// This endpoint is designed to be accessed from an *unauthenticated* -/// API client. It generates and records a `device_code` and `user_code` -/// which must be verified and confirmed prior to a token being granted. -#[endpoint { - method = POST, - path = "/device/auth", - content_type = "application/x-www-form-urlencoded", - tags = ["hidden"], // "token" -}] pub(crate) async fn device_auth_request( rqctx: RequestContext, - params: TypedBody, + params: TypedBody, ) -> Result, HttpError> { let apictx = rqctx.context(); let nexus = &apictx.context.nexus; @@ -99,53 +81,21 @@ pub(crate) async fn device_auth_request( .await } -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct DeviceAuthVerify { - pub user_code: String, -} - -/// Verify an OAuth 2.0 Device Authorization Grant -/// -/// This endpoint should be accessed in a full user agent (e.g., -/// a browser). If the user is not logged in, we redirect them to -/// the login page and use the `state` parameter to get them back -/// here on completion. If they are logged in, serve up the console -/// verification page so they can verify the user code. -#[endpoint { - method = GET, - path = "/device/verify", - unpublished = true, -}] pub(crate) async fn device_auth_verify( rqctx: RequestContext, ) -> Result, HttpError> { console_index_or_login_redirect(rqctx).await } -#[endpoint { - method = GET, - path = "/device/success", - unpublished = true, -}] pub(crate) async fn device_auth_success( rqctx: RequestContext, ) -> Result, HttpError> { console_index_or_login_redirect(rqctx).await } -/// Confirm an OAuth 2.0 Device Authorization Grant -/// -/// This endpoint is designed to be accessed by the user agent (browser), -/// not the client requesting the token. So we do not actually return the -/// token here; it will be returned in response to the poll on `/device/token`. -#[endpoint { - method = POST, - path = "/device/confirm", - tags = ["hidden"], // "token" -}] pub(crate) async fn device_auth_confirm( rqctx: RequestContext, - params: TypedBody, + params: TypedBody, ) -> Result { let apictx = rqctx.context(); let nexus = &apictx.context.nexus; @@ -171,13 +121,6 @@ pub(crate) async fn device_auth_confirm( .await } -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct DeviceAccessTokenRequest { - pub grant_type: String, - pub device_code: String, - pub client_id: Uuid, -} - #[derive(Debug)] pub enum DeviceAccessTokenResponse { Granted(DeviceAccessToken), @@ -186,23 +129,12 @@ pub enum DeviceAccessTokenResponse { Denied, } -/// Request a device access token -/// -/// This endpoint should be polled by the client until the user code -/// is verified and the grant is confirmed. -#[endpoint { - method = POST, - path = "/device/token", - content_type = "application/x-www-form-urlencoded", - tags = ["hidden"], // "token" -}] pub(crate) async fn device_access_token( rqctx: RequestContext, - params: TypedBody, + params: params::DeviceAccessTokenRequest, ) -> Result, HttpError> { let apictx = rqctx.context(); let nexus = &apictx.context.nexus; - let params = params.into_inner(); let handler = async { // RFC 8628 §3.4 if params.grant_type != "urn:ietf:params:oauth:grant-type:device_code" { diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index a87bdd834d9..a297eaa533c 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -14,8 +14,8 @@ use super::{ }, }; use crate::{context::ApiContext, external_api::shared}; -use dropshot::HttpResponseAccepted; -use dropshot::HttpResponseCreated; +use dropshot::EmptyScanParams; +use dropshot::HttpError; use dropshot::HttpResponseDeleted; use dropshot::HttpResponseOk; use dropshot::HttpResponseUpdatedNoContent; @@ -27,12 +27,12 @@ use dropshot::RequestContext; use dropshot::ResultsPage; use dropshot::TypedBody; use dropshot::WhichPage; -use dropshot::{ - channel, endpoint, WebsocketChannelResult, WebsocketConnection, -}; use dropshot::{ApiDescription, StreamingBody}; -use dropshot::{ApiDescriptionRegisterError, HttpError}; -use dropshot::{ApiEndpoint, EmptyScanParams}; +use dropshot::{HttpResponseAccepted, HttpResponseFound, HttpResponseSeeOther}; +use dropshot::{HttpResponseCreated, HttpResponseHeaders}; +use dropshot::{WebsocketChannelResult, WebsocketConnection}; +use http::Response; +use hyper::Body; use ipnetwork::IpNetwork; use nexus_db_queries::authz; use nexus_db_queries::db; @@ -40,7 +40,15 @@ use nexus_db_queries::db::identity::Resource; use nexus_db_queries::db::lookup::ImageLookup; use nexus_db_queries::db::lookup::ImageParentLookup; use nexus_db_queries::db::model::Name; -use nexus_types::external_api::shared::{BfdStatus, ProbeInfo}; +use nexus_external_api::*; +use nexus_types::{ + authn::cookies::Cookies, + external_api::{ + params::SystemMetricsPathParam, + shared::{BfdStatus, ProbeInfo}, + }, +}; +use omicron_common::api::external::http_pagination::data_page_params_for; use omicron_common::api::external::http_pagination::marker_for_name; use omicron_common::api::external::http_pagination::marker_for_name_or_id; use omicron_common::api::external::http_pagination::name_or_id_pagination; @@ -55,9 +63,11 @@ use omicron_common::api::external::http_pagination::ScanParams; use omicron_common::api::external::AddressLot; use omicron_common::api::external::AddressLotBlock; use omicron_common::api::external::AddressLotCreateResponse; +use omicron_common::api::external::AggregateBgpMessageHistory; use omicron_common::api::external::BgpAnnounceSet; use omicron_common::api::external::BgpAnnouncement; use omicron_common::api::external::BgpConfig; +use omicron_common::api::external::BgpExported; use omicron_common::api::external::BgpImportedRouteIpv4; use omicron_common::api::external::BgpPeerStatus; use omicron_common::api::external::DataPageParams; @@ -78,7060 +88,5810 @@ use omicron_common::api::external::TufRepoGetResponse; use omicron_common::api::external::TufRepoInsertResponse; use omicron_common::api::external::VpcFirewallRuleUpdateParams; use omicron_common::api::external::VpcFirewallRules; -use omicron_common::api::external::{ - http_pagination::data_page_params_for, AggregateBgpMessageHistory, -}; use omicron_common::bail_unless; use omicron_uuid_kinds::GenericUuid; -use parse_display::Display; use propolis_client::support::tungstenite::protocol::frame::coding::CloseCode; use propolis_client::support::tungstenite::protocol::{ CloseFrame, Role as WebSocketRole, }; use propolis_client::support::WebSocketStream; use ref_cast::RefCast; -use schemars::JsonSchema; -use serde::Deserialize; -use serde::Serialize; -use std::net::IpAddr; -use uuid::Uuid; type NexusApiDescription = ApiDescription; /// Returns a description of the external nexus API pub(crate) fn external_api() -> NexusApiDescription { - fn register_endpoints( - api: &mut NexusApiDescription, - ) -> Result<(), ApiDescriptionRegisterError> { - api.register(ping)?; - - api.register(system_policy_view)?; - api.register(system_policy_update)?; - - api.register(policy_view)?; - api.register(policy_update)?; - - api.register(project_list)?; - api.register(project_create)?; - api.register(project_view)?; - api.register(project_delete)?; - api.register(project_update)?; - api.register(project_policy_view)?; - api.register(project_policy_update)?; - api.register(project_ip_pool_list)?; - api.register(project_ip_pool_view)?; - - // Operator-Accessible IP Pools API - api.register(ip_pool_list)?; - api.register(ip_pool_create)?; - api.register(ip_pool_silo_list)?; - api.register(ip_pool_silo_link)?; - api.register(ip_pool_silo_unlink)?; - api.register(ip_pool_silo_update)?; - api.register(ip_pool_view)?; - api.register(ip_pool_delete)?; - api.register(ip_pool_update)?; - // Variants for internal services - api.register(ip_pool_service_view)?; - api.register(ip_pool_utilization_view)?; - - // Operator-Accessible IP Pool Range API - api.register(ip_pool_range_list)?; - api.register(ip_pool_range_add)?; - api.register(ip_pool_range_remove)?; - // Variants for internal services - api.register(ip_pool_service_range_list)?; - api.register(ip_pool_service_range_add)?; - api.register(ip_pool_service_range_remove)?; - - api.register(floating_ip_list)?; - api.register(floating_ip_create)?; - api.register(floating_ip_view)?; - api.register(floating_ip_update)?; - api.register(floating_ip_delete)?; - api.register(floating_ip_attach)?; - api.register(floating_ip_detach)?; - - api.register(disk_list)?; - api.register(disk_create)?; - api.register(disk_view)?; - api.register(disk_delete)?; - api.register(disk_metrics_list)?; - - api.register(disk_bulk_write_import_start)?; - api.register(disk_bulk_write_import)?; - api.register(disk_bulk_write_import_stop)?; - api.register(disk_finalize_import)?; - - api.register(instance_list)?; - api.register(instance_view)?; - api.register(instance_create)?; - api.register(instance_delete)?; - api.register(instance_migrate)?; - api.register(instance_reboot)?; - api.register(instance_start)?; - api.register(instance_stop)?; - api.register(instance_disk_list)?; - api.register(instance_disk_attach)?; - api.register(instance_disk_detach)?; - api.register(instance_serial_console)?; - api.register(instance_serial_console_stream)?; - api.register(instance_ssh_public_key_list)?; - - api.register(image_list)?; - api.register(image_create)?; - api.register(image_view)?; - api.register(image_delete)?; - api.register(image_promote)?; - api.register(image_demote)?; - - api.register(snapshot_list)?; - api.register(snapshot_create)?; - api.register(snapshot_view)?; - api.register(snapshot_delete)?; - - api.register(vpc_list)?; - api.register(vpc_create)?; - api.register(vpc_view)?; - api.register(vpc_update)?; - api.register(vpc_delete)?; - - api.register(vpc_subnet_list)?; - api.register(vpc_subnet_view)?; - api.register(vpc_subnet_create)?; - api.register(vpc_subnet_delete)?; - api.register(vpc_subnet_update)?; - api.register(vpc_subnet_list_network_interfaces)?; - - api.register(instance_network_interface_create)?; - api.register(instance_network_interface_list)?; - api.register(instance_network_interface_view)?; - api.register(instance_network_interface_update)?; - api.register(instance_network_interface_delete)?; - - api.register(instance_external_ip_list)?; - api.register(instance_ephemeral_ip_attach)?; - api.register(instance_ephemeral_ip_detach)?; - - api.register(vpc_router_list)?; - api.register(vpc_router_view)?; - api.register(vpc_router_create)?; - api.register(vpc_router_delete)?; - api.register(vpc_router_update)?; - - api.register(vpc_router_route_list)?; - api.register(vpc_router_route_view)?; - api.register(vpc_router_route_create)?; - api.register(vpc_router_route_delete)?; - api.register(vpc_router_route_update)?; - - api.register(vpc_firewall_rules_view)?; - api.register(vpc_firewall_rules_update)?; - - api.register(rack_list)?; - api.register(rack_view)?; - api.register(sled_list)?; - api.register(sled_view)?; - api.register(sled_set_provision_policy)?; - api.register(sled_instance_list)?; - api.register(sled_physical_disk_list)?; - api.register(physical_disk_list)?; - api.register(physical_disk_view)?; - api.register(switch_list)?; - api.register(switch_view)?; - api.register(sled_list_uninitialized)?; - api.register(sled_add)?; - - api.register(user_builtin_list)?; - api.register(user_builtin_view)?; - - api.register(role_list)?; - api.register(role_view)?; - - api.register(current_user_view)?; - api.register(current_user_groups)?; - api.register(current_user_ssh_key_list)?; - api.register(current_user_ssh_key_view)?; - api.register(current_user_ssh_key_create)?; - api.register(current_user_ssh_key_delete)?; - - // Customer network integration - api.register(networking_address_lot_list)?; - api.register(networking_address_lot_create)?; - api.register(networking_address_lot_delete)?; - api.register(networking_address_lot_block_list)?; - - api.register(networking_loopback_address_create)?; - api.register(networking_loopback_address_delete)?; - api.register(networking_loopback_address_list)?; - - api.register(networking_switch_port_settings_list)?; - api.register(networking_switch_port_settings_view)?; - api.register(networking_switch_port_settings_create)?; - api.register(networking_switch_port_settings_delete)?; - - api.register(networking_switch_port_list)?; - api.register(networking_switch_port_status)?; - api.register(networking_switch_port_apply_settings)?; - api.register(networking_switch_port_clear_settings)?; - - api.register(networking_bgp_config_create)?; - api.register(networking_bgp_config_list)?; - api.register(networking_bgp_status)?; - api.register(networking_bgp_imported_routes_ipv4)?; - api.register(networking_bgp_config_delete)?; - api.register(networking_bgp_announce_set_update)?; - api.register(networking_bgp_announce_set_list)?; - api.register(networking_bgp_announce_set_delete)?; - api.register(networking_bgp_message_history)?; - - api.register(networking_bfd_enable)?; - api.register(networking_bfd_disable)?; - api.register(networking_bfd_status)?; - - api.register(networking_allow_list_view)?; - api.register(networking_allow_list_update)?; - - api.register(utilization_view)?; - - // Fleet-wide API operations - api.register(silo_list)?; - api.register(silo_create)?; - api.register(silo_view)?; - api.register(silo_delete)?; - api.register(silo_policy_view)?; - api.register(silo_policy_update)?; - api.register(silo_ip_pool_list)?; - - api.register(silo_utilization_view)?; - api.register(silo_utilization_list)?; - - api.register(system_quotas_list)?; - api.register(silo_quotas_view)?; - api.register(silo_quotas_update)?; - - api.register(silo_identity_provider_list)?; - - api.register(saml_identity_provider_create)?; - api.register(saml_identity_provider_view)?; - - api.register(local_idp_user_create)?; - api.register(local_idp_user_delete)?; - api.register(local_idp_user_set_password)?; - - api.register(certificate_list)?; - api.register(certificate_create)?; - api.register(certificate_view)?; - api.register(certificate_delete)?; - - api.register(system_metric)?; - api.register(silo_metric)?; - api.register(timeseries_schema_list)?; - api.register(timeseries_query)?; - - api.register(system_update_put_repository)?; - api.register(system_update_get_repository)?; - - api.register(user_list)?; - api.register(silo_user_list)?; - api.register(silo_user_view)?; - api.register(group_list)?; - api.register(group_view)?; - - // Console API operations - api.register(console_api::login_begin)?; - api.register(console_api::login_local_begin)?; - api.register(console_api::login_local)?; - api.register(console_api::login_saml_begin)?; - api.register(console_api::login_saml_redirect)?; - api.register(console_api::login_saml)?; - api.register(console_api::logout)?; - - api.register(console_api::console_lookup)?; - api.register(console_api::console_projects)?; - api.register(console_api::console_projects_new)?; - api.register(console_api::console_silo_images)?; - api.register(console_api::console_silo_utilization)?; - api.register(console_api::console_silo_access)?; - api.register(console_api::console_root)?; - api.register(console_api::console_settings_page)?; - api.register(console_api::console_system_page)?; - api.register(console_api::asset)?; - - api.register(device_auth::device_auth_request)?; - api.register(device_auth::device_auth_verify)?; - api.register(device_auth::device_auth_success)?; - api.register(device_auth::device_auth_confirm)?; - api.register(device_auth::device_access_token)?; - - Ok(()) - } - - fn register_experimental( - api: &mut NexusApiDescription, - endpoint: T, - ) -> Result<(), ApiDescriptionRegisterError> - where - T: Into>, + nexus_external_api_mod::api_description::() + .expect("registered entrypoints") +} + +enum NexusExternalApiImpl {} + +impl NexusExternalApi for NexusExternalApiImpl { + type Context = ApiContext; + + async fn system_policy_view( + rqctx: RequestContext, + ) -> Result>, HttpError> { - let mut ep: ApiEndpoint = endpoint.into(); - // only one tag is allowed - ep.tags = vec![String::from("hidden")]; - ep.path = String::from("/experimental") + &ep.path; - api.register(ep) + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let policy = nexus.fleet_fetch_policy(&opctx).await?; + Ok(HttpResponseOk(policy)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn system_policy_update( + rqctx: RequestContext, + new_policy: TypedBody>, + ) -> Result>, HttpError> + { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let new_policy = new_policy.into_inner(); + let nasgns = new_policy.role_assignments.len(); + // This should have been validated during parsing. + bail_unless!(nasgns <= shared::MAX_ROLE_ASSIGNMENTS_PER_RESOURCE); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let policy = nexus.fleet_update_policy(&opctx, &new_policy).await?; + Ok(HttpResponseOk(policy)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await } - fn register_experimental_endpoints( - api: &mut NexusApiDescription, - ) -> Result<(), ApiDescriptionRegisterError> { - register_experimental(api, probe_list)?; - register_experimental(api, probe_view)?; - register_experimental(api, probe_create)?; - register_experimental(api, probe_delete)?; + async fn policy_view( + rqctx: RequestContext, + ) -> Result>, HttpError> + { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let silo: NameOrId = opctx + .authn + .silo_required() + .internal_context("loading current silo")? + .id() + .into(); + + let silo_lookup = nexus.silo_lookup(&opctx, silo)?; + let policy = nexus.silo_fetch_policy(&opctx, &silo_lookup).await?; + Ok(HttpResponseOk(policy)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - Ok(()) + async fn policy_update( + rqctx: RequestContext, + new_policy: TypedBody>, + ) -> Result>, HttpError> + { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let new_policy = new_policy.into_inner(); + let nasgns = new_policy.role_assignments.len(); + // This should have been validated during parsing. + bail_unless!(nasgns <= shared::MAX_ROLE_ASSIGNMENTS_PER_RESOURCE); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let silo: NameOrId = opctx + .authn + .silo_required() + .internal_context("loading current silo")? + .id() + .into(); + let silo_lookup = nexus.silo_lookup(&opctx, silo)?; + let policy = nexus + .silo_update_policy(&opctx, &silo_lookup, &new_policy) + .await?; + Ok(HttpResponseOk(policy)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await } - let conf = serde_json::from_str(include_str!("./tag-config.json")).unwrap(); - let mut api = NexusApiDescription::new().tag_config(conf); + async fn utilization_view( + rqctx: RequestContext, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let silo_lookup = nexus.current_silo_lookup(&opctx)?; + let utilization = + nexus.silo_utilization_view(&opctx, &silo_lookup).await?; + + Ok(HttpResponseOk(utilization.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - if let Err(err) = register_endpoints(&mut api) { - panic!("failed to register entrypoints: {}", err); + async fn silo_utilization_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let silo_lookup = + nexus.silo_lookup(&opctx, path_params.into_inner().silo)?; + let quotas = + nexus.silo_utilization_view(&opctx, &silo_lookup).await?; + + Ok(HttpResponseOk(quotas.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await } - if let Err(err) = register_experimental_endpoints(&mut api) { - panic!("failed to register experimental entrypoints: {}", err); + async fn silo_utilization_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + + let query = query_params.into_inner(); + let pagparams = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pagparams, scan_params)?; + + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let utilization = nexus + .silo_utilization_list(&opctx, &paginated_by) + .await? + .into_iter() + .map(|p| p.into()) + .collect(); + + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + utilization, + &marker_for_name_or_id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await } - api -} -// API ENDPOINT FUNCTION NAMING CONVENTIONS -// -// Generally, HTTP resources are grouped within some collection. For a -// relatively simple example: -// -// GET v1/projects (list the projects in the collection) -// POST v1/projects (create a project in the collection) -// GET v1/projects/{project} (look up a project in the collection) -// DELETE v1/projects/{project} (delete a project in the collection) -// PUT v1/projects/{project} (update a project in the collection) -// -// We pick a name for the function that implements a given API entrypoint -// based on how we expect it to appear in the CLI subcommand hierarchy. For -// example: -// -// GET v1/projects -> project_list() -// POST v1/projects -> project_create() -// GET v1/projects/{project} -> project_view() -// DELETE v1/projects/{project} -> project_delete() -// PUT v1/projects/{project} -> project_update() -// -// Note that the path typically uses the entity's plural form while the -// function name uses its singular. -// -// Operations beyond list, create, view, delete, and update should use a -// descriptive noun or verb, again bearing in mind that this will be -// transcribed into the CLI and SDKs: -// -// POST -> instance_reboot -// POST -> instance_stop -// GET -> instance_serial_console -// -// Note that these function names end up in generated OpenAPI spec as the -// operationId for each endpoint, and therefore represent a contract with -// clients. Client generators use operationId to name API methods, so changing -// a function name is a breaking change from a client perspective. - -/// Ping API -/// -/// Always responds with Ok if it responds at all. -#[endpoint { - method = GET, - path = "/v1/ping", - tags = ["system/status"], -}] -async fn ping( - _rqctx: RequestContext, -) -> Result, HttpError> { - Ok(HttpResponseOk(views::Ping { status: views::PingStatus::Ok })) -} + async fn system_quotas_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + + let query = query_params.into_inner(); + let pagparams = data_page_params_for(&rqctx, &query)?; + + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let quotas = nexus + .fleet_list_quotas(&opctx, &pagparams) + .await? + .into_iter() + .map(|p| p.into()) + .collect(); + + Ok(HttpResponseOk(ScanById::results_page( + &query, + quotas, + &|_, quota: &SiloQuotas| quota.silo_id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch top-level IAM policy -#[endpoint { - method = GET, - path = "/v1/system/policy", - tags = ["policy"], -}] -async fn system_policy_view( - rqctx: RequestContext, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let policy = nexus.fleet_fetch_policy(&opctx).await?; - Ok(HttpResponseOk(policy)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn silo_quotas_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let silo_lookup = + nexus.silo_lookup(&opctx, path_params.into_inner().silo)?; + let quota = nexus.silo_quotas_view(&opctx, &silo_lookup).await?; + Ok(HttpResponseOk(quota.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Update top-level IAM policy -#[endpoint { - method = PUT, - path = "/v1/system/policy", - tags = ["policy"], -}] -async fn system_policy_update( - rqctx: RequestContext, - new_policy: TypedBody>, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let new_policy = new_policy.into_inner(); - let nasgns = new_policy.role_assignments.len(); - // This should have been validated during parsing. - bail_unless!(nasgns <= shared::MAX_ROLE_ASSIGNMENTS_PER_RESOURCE); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let policy = nexus.fleet_update_policy(&opctx, &new_policy).await?; - Ok(HttpResponseOk(policy)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn silo_quotas_update( + rqctx: RequestContext, + path_params: Path, + new_quota: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let silo_lookup = + nexus.silo_lookup(&opctx, path_params.into_inner().silo)?; + let quota = nexus + .silo_update_quota( + &opctx, + &silo_lookup, + &new_quota.into_inner(), + ) + .await?; + Ok(HttpResponseOk(quota.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch current silo's IAM policy -#[endpoint { - method = GET, - path = "/v1/policy", - tags = ["silos"], - }] -pub(crate) async fn policy_view( - rqctx: RequestContext, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let silo: NameOrId = opctx - .authn - .silo_required() - .internal_context("loading current silo")? - .id() - .into(); - - let silo_lookup = nexus.silo_lookup(&opctx, silo)?; - let policy = nexus.silo_fetch_policy(&opctx, &silo_lookup).await?; - Ok(HttpResponseOk(policy)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn silo_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let silos = nexus + .silos_list(&opctx, &paginated_by) + .await? + .into_iter() + .map(|p| p.try_into()) + .collect::, Error>>()?; + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + silos, + &marker_for_name_or_id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Update current silo's IAM policy -#[endpoint { - method = PUT, - path = "/v1/policy", - tags = ["silos"], -}] -async fn policy_update( - rqctx: RequestContext, - new_policy: TypedBody>, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let new_policy = new_policy.into_inner(); - let nasgns = new_policy.role_assignments.len(); - // This should have been validated during parsing. - bail_unless!(nasgns <= shared::MAX_ROLE_ASSIGNMENTS_PER_RESOURCE); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let silo: NameOrId = opctx - .authn - .silo_required() - .internal_context("loading current silo")? - .id() - .into(); - let silo_lookup = nexus.silo_lookup(&opctx, silo)?; - let policy = - nexus.silo_update_policy(&opctx, &silo_lookup, &new_policy).await?; - Ok(HttpResponseOk(policy)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn silo_create( + rqctx: RequestContext, + new_silo_params: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let silo = + nexus.silo_create(&opctx, new_silo_params.into_inner()).await?; + Ok(HttpResponseCreated(silo.try_into()?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch resource utilization for user's current silo -#[endpoint { - method = GET, - path = "/v1/utilization", - tags = ["silos"], -}] -async fn utilization_view( - rqctx: RequestContext, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let silo_lookup = nexus.current_silo_lookup(&opctx)?; - let utilization = - nexus.silo_utilization_view(&opctx, &silo_lookup).await?; - - Ok(HttpResponseOk(utilization.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn silo_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let silo_lookup = nexus.silo_lookup(&opctx, path.silo)?; + let (.., silo) = silo_lookup.fetch().await?; + Ok(HttpResponseOk(silo.try_into()?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch current utilization for given silo -#[endpoint { - method = GET, - path = "/v1/system/utilization/silos/{silo}", - tags = ["system/silos"], -}] -async fn silo_utilization_view( - rqctx: RequestContext, - path_params: Path, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; + async fn silo_ip_pool_list( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + + let silo_lookup = nexus.silo_lookup(&opctx, path.silo)?; + let pools = nexus + .silo_ip_pool_list(&opctx, &silo_lookup, &paginated_by) + .await? + .iter() + .map(|(pool, silo_link)| views::SiloIpPool { + identity: pool.identity(), + is_default: silo_link.is_default, + }) + .collect(); + + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + pools, + &marker_for_name_or_id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let silo_lookup = - nexus.silo_lookup(&opctx, path_params.into_inner().silo)?; - let quotas = nexus.silo_utilization_view(&opctx, &silo_lookup).await?; - - Ok(HttpResponseOk(quotas.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} -/// List current utilization state for all silos -#[endpoint { - method = GET, - path = "/v1/system/utilization/silos", - tags = ["system/silos"], -}] -async fn silo_utilization_list( - rqctx: RequestContext, - query_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; + async fn silo_delete( + rqctx: RequestContext, + path_params: Path, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let params = path_params.into_inner(); + let silo_lookup = nexus.silo_lookup(&opctx, params.silo)?; + nexus.silo_delete(&opctx, &silo_lookup).await?; + Ok(HttpResponseDeleted()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - let query = query_params.into_inner(); - let pagparams = data_page_params_for(&rqctx, &query)?; - let scan_params = ScanByNameOrId::from_query(&query)?; - let paginated_by = name_or_id_pagination(&pagparams, scan_params)?; + async fn silo_policy_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result>, HttpError> + { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let silo_lookup = nexus.silo_lookup(&opctx, path.silo)?; + let policy = nexus.silo_fetch_policy(&opctx, &silo_lookup).await?; + Ok(HttpResponseOk(policy)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let utilization = nexus - .silo_utilization_list(&opctx, &paginated_by) - .await? - .into_iter() - .map(|p| p.into()) - .collect(); - - Ok(HttpResponseOk(ScanByNameOrId::results_page( - &query, - utilization, - &marker_for_name_or_id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn silo_policy_update( + rqctx: RequestContext, + path_params: Path, + new_policy: TypedBody>, + ) -> Result>, HttpError> + { + let apictx = rqctx.context(); + let handler = async { + let new_policy = new_policy.into_inner(); + let nasgns = new_policy.role_assignments.len(); + // This should have been validated during parsing. + bail_unless!(nasgns <= shared::MAX_ROLE_ASSIGNMENTS_PER_RESOURCE); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let silo_lookup = nexus.silo_lookup(&opctx, path.silo)?; + let policy = nexus + .silo_update_policy(&opctx, &silo_lookup, &new_policy) + .await?; + Ok(HttpResponseOk(policy)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Lists resource quotas for all silos -#[endpoint { - method = GET, - path = "/v1/system/silo-quotas", - tags = ["system/silos"], -}] -async fn system_quotas_list( - rqctx: RequestContext, - query_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; + // Silo-specific user endpoints + + async fn silo_user_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanById::from_query(&query)?; + let silo_lookup = + nexus.silo_lookup(&opctx, scan_params.selector.silo.clone())?; + let users = nexus + .silo_list_users(&opctx, &silo_lookup, &pag_params) + .await? + .into_iter() + .map(|i| i.into()) + .collect(); + Ok(HttpResponseOk(ScanById::results_page( + &query, + users, + &|_, user: &User| user.id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - let query = query_params.into_inner(); - let pagparams = data_page_params_for(&rqctx, &query)?; + async fn silo_user_view( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let silo_lookup = nexus.silo_lookup(&opctx, query.silo)?; + let user = nexus + .silo_user_fetch(&opctx, &silo_lookup, path.user_id) + .await?; + Ok(HttpResponseOk(user.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let quotas = nexus - .fleet_list_quotas(&opctx, &pagparams) - .await? - .into_iter() - .map(|p| p.into()) - .collect(); - - Ok(HttpResponseOk(ScanById::results_page( - &query, - quotas, - &|_, quota: &SiloQuotas| quota.silo_id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + // Silo identity providers + + async fn silo_identity_provider_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let silo_lookup = + nexus.silo_lookup(&opctx, scan_params.selector.silo.clone())?; + let identity_providers = nexus + .identity_provider_list(&opctx, &silo_lookup, &paginated_by) + .await? + .into_iter() + .map(|x| x.into()) + .collect(); + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + identity_providers, + &marker_for_name_or_id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch resource quotas for silo -#[endpoint { - method = GET, - path = "/v1/system/silos/{silo}/quotas", - tags = ["system/silos"], -}] -async fn silo_quotas_view( - rqctx: RequestContext, - path_params: Path, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; + // Silo SAML identity providers - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let silo_lookup = - nexus.silo_lookup(&opctx, path_params.into_inner().silo)?; - let quota = nexus.silo_quotas_view(&opctx, &silo_lookup).await?; - Ok(HttpResponseOk(quota.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn saml_identity_provider_create( + rqctx: RequestContext, + query_params: Query, + new_provider: TypedBody, + ) -> Result, HttpError> + { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let silo_lookup = nexus.silo_lookup(&opctx, query.silo)?; + let provider = nexus + .saml_identity_provider_create( + &opctx, + &silo_lookup, + new_provider.into_inner(), + ) + .await?; + Ok(HttpResponseCreated(provider.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Update resource quotas for silo -/// -/// If a quota value is not specified, it will remain unchanged. -#[endpoint { - method = PUT, - path = "/v1/system/silos/{silo}/quotas", - tags = ["system/silos"], -}] -async fn silo_quotas_update( - rqctx: RequestContext, - path_params: Path, - new_quota: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; + async fn saml_identity_provider_view( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let saml_identity_provider_selector = + params::SamlIdentityProviderSelector { + silo: Some(query.silo), + saml_identity_provider: path.provider, + }; + let (.., provider) = nexus + .saml_identity_provider_lookup( + &opctx, + saml_identity_provider_selector, + )? + .fetch() + .await?; + Ok(HttpResponseOk(provider.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let silo_lookup = - nexus.silo_lookup(&opctx, path_params.into_inner().silo)?; - let quota = nexus - .silo_update_quota(&opctx, &silo_lookup, &new_quota.into_inner()) - .await?; - Ok(HttpResponseOk(quota.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + // TODO: no DELETE for identity providers? + + // "Local" Identity Provider + + async fn local_idp_user_create( + rqctx: RequestContext, + query_params: Query, + new_user_params: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let silo_lookup = nexus.silo_lookup(&opctx, query.silo)?; + let user = nexus + .local_idp_create_user( + &opctx, + &silo_lookup, + new_user_params.into_inner(), + ) + .await?; + Ok(HttpResponseCreated(user.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// List silos -/// -/// Lists silos that are discoverable based on the current permissions. -#[endpoint { - method = GET, - path = "/v1/system/silos", - tags = ["system/silos"], -}] -async fn silo_list( - rqctx: RequestContext, - query_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let pag_params = data_page_params_for(&rqctx, &query)?; - let scan_params = ScanByNameOrId::from_query(&query)?; - let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let silos = nexus - .silos_list(&opctx, &paginated_by) - .await? - .into_iter() - .map(|p| p.try_into()) - .collect::, Error>>()?; - Ok(HttpResponseOk(ScanByNameOrId::results_page( - &query, - silos, - &marker_for_name_or_id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn local_idp_user_delete( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let silo_lookup = nexus.silo_lookup(&opctx, query.silo)?; + nexus + .local_idp_delete_user(&opctx, &silo_lookup, path.user_id) + .await?; + Ok(HttpResponseDeleted()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Create a silo -#[endpoint { - method = POST, - path = "/v1/system/silos", - tags = ["system/silos"], -}] -async fn silo_create( - rqctx: RequestContext, - new_silo_params: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let silo = - nexus.silo_create(&opctx, new_silo_params.into_inner()).await?; - Ok(HttpResponseCreated(silo.try_into()?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn local_idp_user_set_password( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + update: TypedBody, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let silo_lookup = nexus.silo_lookup(&opctx, query.silo)?; + nexus + .local_idp_user_set_password( + &opctx, + &silo_lookup, + path.user_id, + update.into_inner(), + ) + .await?; + Ok(HttpResponseUpdatedNoContent()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch silo -/// -/// Fetch silo by name or ID. -#[endpoint { - method = GET, - path = "/v1/system/silos/{silo}", - tags = ["system/silos"], -}] -async fn silo_view( - rqctx: RequestContext, - path_params: Path, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + async fn project_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let projects = nexus + .project_list(&opctx, &paginated_by) + .await? + .into_iter() + .map(|p| p.into()) + .collect(); + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + projects, + &marker_for_name_or_id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn project_create( + rqctx: RequestContext, + new_project: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let nexus = &apictx.context.nexus; + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let project = + nexus.project_create(&opctx, &new_project.into_inner()).await?; + Ok(HttpResponseCreated(project.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn project_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); let nexus = &apictx.context.nexus; let path = path_params.into_inner(); - let silo_lookup = nexus.silo_lookup(&opctx, path.silo)?; - let (.., silo) = silo_lookup.fetch().await?; - Ok(HttpResponseOk(silo.try_into()?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let project_selector = + params::ProjectSelector { project: path.project }; + let (.., project) = + nexus.project_lookup(&opctx, project_selector)?.fetch().await?; + Ok(HttpResponseOk(project.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// List IP pools linked to silo -/// -/// Linked IP pools are available to users in the specified silo. A silo can -/// have at most one default pool. IPs are allocated from the default pool when -/// users ask for one without specifying a pool. -#[endpoint { - method = GET, - path = "/v1/system/silos/{silo}/ip-pools", - tags = ["system/silos"], -}] -async fn silo_ip_pool_list( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + async fn project_delete( + rqctx: RequestContext, + path_params: Path, + ) -> Result { + let apictx = rqctx.context(); let nexus = &apictx.context.nexus; let path = path_params.into_inner(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let project_selector = + params::ProjectSelector { project: path.project }; + let project_lookup = + nexus.project_lookup(&opctx, project_selector)?; + nexus.project_delete(&opctx, &project_lookup).await?; + Ok(HttpResponseDeleted()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - let query = query_params.into_inner(); - let pag_params = data_page_params_for(&rqctx, &query)?; - let scan_params = ScanByNameOrId::from_query(&query)?; - let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; - - let silo_lookup = nexus.silo_lookup(&opctx, path.silo)?; - let pools = nexus - .silo_ip_pool_list(&opctx, &silo_lookup, &paginated_by) - .await? - .iter() - .map(|(pool, silo_link)| views::SiloIpPool { - identity: pool.identity(), - is_default: silo_link.is_default, - }) - .collect(); - - Ok(HttpResponseOk(ScanByNameOrId::results_page( - &query, - pools, - &marker_for_name_or_id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} - -/// Delete a silo -/// -/// Delete a silo by name or ID. -#[endpoint { - method = DELETE, - path = "/v1/system/silos/{silo}", - tags = ["system/silos"], -}] -async fn silo_delete( - rqctx: RequestContext, - path_params: Path, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + // TODO-correctness: Is it valid for PUT to accept application/json that's a + // subset of what the resource actually represents? If not, is that a problem? + // (HTTP may require that this be idempotent.) If so, can we get around that + // having this be a slightly different content-type (e.g., + // "application/json-patch")? We should see what other APIs do. + async fn project_update( + rqctx: RequestContext, + path_params: Path, + updated_project: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); let nexus = &apictx.context.nexus; - let params = path_params.into_inner(); - let silo_lookup = nexus.silo_lookup(&opctx, params.silo)?; - nexus.silo_delete(&opctx, &silo_lookup).await?; - Ok(HttpResponseDeleted()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let path = path_params.into_inner(); + let updated_project = updated_project.into_inner(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let project_selector = + params::ProjectSelector { project: path.project }; + let project_lookup = + nexus.project_lookup(&opctx, project_selector)?; + let project = nexus + .project_update(&opctx, &project_lookup, &updated_project) + .await?; + Ok(HttpResponseOk(project.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch silo IAM policy -#[endpoint { - method = GET, - path = "/v1/system/silos/{silo}/policy", - tags = ["system/silos"], -}] -async fn silo_policy_view( - rqctx: RequestContext, - path_params: Path, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + async fn project_policy_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result>, HttpError> + { + let apictx = rqctx.context(); let nexus = &apictx.context.nexus; let path = path_params.into_inner(); - let silo_lookup = nexus.silo_lookup(&opctx, path.silo)?; - let policy = nexus.silo_fetch_policy(&opctx, &silo_lookup).await?; - Ok(HttpResponseOk(policy)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let project_selector = + params::ProjectSelector { project: path.project }; + let project_lookup = + nexus.project_lookup(&opctx, project_selector)?; + let policy = + nexus.project_fetch_policy(&opctx, &project_lookup).await?; + Ok(HttpResponseOk(policy)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Update silo IAM policy -#[endpoint { - method = PUT, - path = "/v1/system/silos/{silo}/policy", - tags = ["system/silos"], -}] -async fn silo_policy_update( - rqctx: RequestContext, - path_params: Path, - new_policy: TypedBody>, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let new_policy = new_policy.into_inner(); - let nasgns = new_policy.role_assignments.len(); - // This should have been validated during parsing. - bail_unless!(nasgns <= shared::MAX_ROLE_ASSIGNMENTS_PER_RESOURCE); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + async fn project_policy_update( + rqctx: RequestContext, + path_params: Path, + new_policy: TypedBody>, + ) -> Result>, HttpError> + { + let apictx = rqctx.context(); let nexus = &apictx.context.nexus; let path = path_params.into_inner(); - let silo_lookup = nexus.silo_lookup(&opctx, path.silo)?; - let policy = - nexus.silo_update_policy(&opctx, &silo_lookup, &new_policy).await?; - Ok(HttpResponseOk(policy)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let new_policy = new_policy.into_inner(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let project_selector = + params::ProjectSelector { project: path.project }; + let project_lookup = + nexus.project_lookup(&opctx, project_selector)?; + nexus + .project_update_policy(&opctx, &project_lookup, &new_policy) + .await?; + Ok(HttpResponseOk(new_policy)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -// Silo-specific user endpoints - -/// List built-in (system) users in silo -#[endpoint { - method = GET, - path = "/v1/system/users", - tags = ["system/silos"], -}] -async fn silo_user_list( - rqctx: RequestContext, - query_params: Query>, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let pag_params = data_page_params_for(&rqctx, &query)?; - let scan_params = ScanById::from_query(&query)?; - let silo_lookup = - nexus.silo_lookup(&opctx, scan_params.selector.silo.clone())?; - let users = nexus - .silo_list_users(&opctx, &silo_lookup, &pag_params) - .await? - .into_iter() - .map(|i| i.into()) - .collect(); - Ok(HttpResponseOk(ScanById::results_page( - &query, - users, - &|_, user: &User| user.id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + // IP Pools + + async fn project_ip_pool_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let pools = nexus + .current_silo_ip_pool_list(&opctx, &paginated_by) + .await? + .into_iter() + .map(|(pool, silo_link)| views::SiloIpPool { + identity: pool.identity(), + is_default: silo_link.is_default, + }) + .collect(); + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + pools, + &marker_for_name_or_id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Path parameters for Silo User requests -#[derive(Deserialize, JsonSchema)] -struct UserParam { - /// The user's internal id - user_id: Uuid, -} + async fn project_ip_pool_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let pool_selector = path_params.into_inner().pool; + let (pool, silo_link) = + nexus.silo_ip_pool_fetch(&opctx, &pool_selector).await?; + Ok(HttpResponseOk(views::SiloIpPool { + identity: pool.identity(), + is_default: silo_link.is_default, + })) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch built-in (system) user -#[endpoint { - method = GET, - path = "/v1/system/users/{user_id}", - tags = ["system/silos"], -}] -async fn silo_user_view( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let silo_lookup = nexus.silo_lookup(&opctx, query.silo)?; - let user = - nexus.silo_user_fetch(&opctx, &silo_lookup, path.user_id).await?; - Ok(HttpResponseOk(user.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn ip_pool_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let pools = nexus + .ip_pools_list(&opctx, &paginated_by) + .await? + .into_iter() + .map(IpPool::from) + .collect(); + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + pools, + &marker_for_name_or_id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -// Silo identity providers - -/// List a silo's IdP's name -#[endpoint { - method = GET, - path = "/v1/system/identity-providers", - tags = ["system/silos"], -}] -async fn silo_identity_provider_list( - rqctx: RequestContext, - query_params: Query>, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let pag_params = data_page_params_for(&rqctx, &query)?; - let scan_params = ScanByNameOrId::from_query(&query)?; - let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; - let silo_lookup = - nexus.silo_lookup(&opctx, scan_params.selector.silo.clone())?; - let identity_providers = nexus - .identity_provider_list(&opctx, &silo_lookup, &paginated_by) - .await? - .into_iter() - .map(|x| x.into()) - .collect(); - Ok(HttpResponseOk(ScanByNameOrId::results_page( - &query, - identity_providers, - &marker_for_name_or_id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn ip_pool_create( + rqctx: RequestContext, + pool_params: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let nexus = &apictx.context.nexus; + let pool_params = pool_params.into_inner(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let pool = nexus.ip_pool_create(&opctx, &pool_params).await?; + Ok(HttpResponseCreated(IpPool::from(pool))) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -// Silo SAML identity providers - -/// Create SAML IdP -#[endpoint { - method = POST, - path = "/v1/system/identity-providers/saml", - tags = ["system/silos"], -}] -async fn saml_identity_provider_create( - rqctx: RequestContext, - query_params: Query, - new_provider: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let silo_lookup = nexus.silo_lookup(&opctx, query.silo)?; - let provider = nexus - .saml_identity_provider_create( - &opctx, - &silo_lookup, - new_provider.into_inner(), - ) - .await?; - Ok(HttpResponseCreated(provider.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn ip_pool_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let pool_selector = path_params.into_inner().pool; + // We do not prevent the service pool from being fetched by name or ID + // like we do for update, delete, associate. + let (.., pool) = + nexus.ip_pool_lookup(&opctx, &pool_selector)?.fetch().await?; + Ok(HttpResponseOk(IpPool::from(pool))) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch SAML IdP -#[endpoint { - method = GET, - path = "/v1/system/identity-providers/saml/{provider}", - tags = ["system/silos"], -}] -async fn saml_identity_provider_view( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let saml_identity_provider_selector = - params::SamlIdentityProviderSelector { - silo: Some(query.silo), - saml_identity_provider: path.provider, - }; - let (.., provider) = nexus - .saml_identity_provider_lookup( - &opctx, - saml_identity_provider_selector, - )? - .fetch() - .await?; - Ok(HttpResponseOk(provider.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn ip_pool_delete( + rqctx: RequestContext, + path_params: Path, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let pool_lookup = nexus.ip_pool_lookup(&opctx, &path.pool)?; + nexus.ip_pool_delete(&opctx, &pool_lookup).await?; + Ok(HttpResponseDeleted()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -// TODO: no DELETE for identity providers? - -// "Local" Identity Provider - -/// Create user -/// -/// Users can only be created in Silos with `provision_type` == `Fixed`. -/// Otherwise, Silo users are just-in-time (JIT) provisioned when a user first -/// logs in using an external Identity Provider. -#[endpoint { - method = POST, - path = "/v1/system/identity-providers/local/users", - tags = ["system/silos"], -}] -async fn local_idp_user_create( - rqctx: RequestContext, - query_params: Query, - new_user_params: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let silo_lookup = nexus.silo_lookup(&opctx, query.silo)?; - let user = nexus - .local_idp_create_user( - &opctx, - &silo_lookup, - new_user_params.into_inner(), - ) - .await?; - Ok(HttpResponseCreated(user.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} - -/// Delete user -#[endpoint { - method = DELETE, - path = "/v1/system/identity-providers/local/users/{user_id}", - tags = ["system/silos"], -}] -async fn local_idp_user_delete( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let silo_lookup = nexus.silo_lookup(&opctx, query.silo)?; - nexus.local_idp_delete_user(&opctx, &silo_lookup, path.user_id).await?; - Ok(HttpResponseDeleted()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn ip_pool_update( + rqctx: RequestContext, + path_params: Path, + updates: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let updates = updates.into_inner(); + let pool_lookup = nexus.ip_pool_lookup(&opctx, &path.pool)?; + let pool = + nexus.ip_pool_update(&opctx, &pool_lookup, &updates).await?; + Ok(HttpResponseOk(pool.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Set or invalidate user's password -/// -/// Passwords can only be updated for users in Silos with identity mode -/// `LocalOnly`. -#[endpoint { - method = POST, - path = "/v1/system/identity-providers/local/users/{user_id}/set-password", - tags = ["system/silos"], -}] -async fn local_idp_user_set_password( - rqctx: RequestContext, - path_params: Path, - query_params: Query, - update: TypedBody, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let silo_lookup = nexus.silo_lookup(&opctx, query.silo)?; - nexus - .local_idp_user_set_password( - &opctx, - &silo_lookup, - path.user_id, - update.into_inner(), - ) - .await?; - Ok(HttpResponseUpdatedNoContent()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn ip_pool_utilization_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let pool_selector = path_params.into_inner().pool; + // We do not prevent the service pool from being fetched by name or ID + // like we do for update, delete, associate. + let pool_lookup = nexus.ip_pool_lookup(&opctx, &pool_selector)?; + let utilization = + nexus.ip_pool_utilization_view(&opctx, &pool_lookup).await?; + Ok(HttpResponseOk(utilization.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// List projects -#[endpoint { - method = GET, - path = "/v1/projects", - tags = ["projects"], -}] -async fn project_list( - rqctx: RequestContext, - query_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let pag_params = data_page_params_for(&rqctx, &query)?; - let scan_params = ScanByNameOrId::from_query(&query)?; - let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let projects = nexus - .project_list(&opctx, &paginated_by) - .await? - .into_iter() - .map(|p| p.into()) - .collect(); - Ok(HttpResponseOk(ScanByNameOrId::results_page( - &query, - projects, - &marker_for_name_or_id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn ip_pool_silo_list( + rqctx: RequestContext, + path_params: Path, + // paginating by resource_id because they're unique per pool. most robust + // option would be to paginate by a composite key representing the (pool, + // resource_type, resource) + query_params: Query, + // TODO: this could just list views::Silo -- it's not like knowing silo_id + // and nothing else is particularly useful -- except we also want to say + // whether the pool is marked default on each silo. So one option would + // be to do the same as we did with SiloIpPool -- include is_default on + // whatever the thing is. Still... all we'd have to do to make this usable + // in both places would be to make it { ...IpPool, silo_id, silo_name, + // is_default } + ) -> Result>, HttpError> + { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; -/// Create project -#[endpoint { - method = POST, - path = "/v1/projects", - tags = ["projects"], -}] -async fn project_create( - rqctx: RequestContext, - new_project: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let project = - nexus.project_create(&opctx, &new_project.into_inner()).await?; - Ok(HttpResponseCreated(project.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; -/// Fetch project -#[endpoint { - method = GET, - path = "/v1/projects/{project}", - tags = ["projects"], -}] -async fn project_view( - rqctx: RequestContext, - path_params: Path, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let project_selector = - params::ProjectSelector { project: path.project }; - let (.., project) = - nexus.project_lookup(&opctx, project_selector)?.fetch().await?; - Ok(HttpResponseOk(project.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let path = path_params.into_inner(); + let pool_lookup = nexus.ip_pool_lookup(&opctx, &path.pool)?; -/// Delete project -#[endpoint { - method = DELETE, - path = "/v1/projects/{project}", - tags = ["projects"], -}] -async fn project_delete( - rqctx: RequestContext, - path_params: Path, -) -> Result { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let project_selector = - params::ProjectSelector { project: path.project }; - let project_lookup = nexus.project_lookup(&opctx, project_selector)?; - nexus.project_delete(&opctx, &project_lookup).await?; - Ok(HttpResponseDeleted()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let assocs = nexus + .ip_pool_silo_list(&opctx, &pool_lookup, &pag_params) + .await? + .into_iter() + .map(|assoc| assoc.into()) + .collect(); + + Ok(HttpResponseOk(ScanById::results_page( + &query, + assocs, + &|_, x: &views::IpPoolSiloLink| x.silo_id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -// TODO-correctness: Is it valid for PUT to accept application/json that's a -// subset of what the resource actually represents? If not, is that a problem? -// (HTTP may require that this be idempotent.) If so, can we get around that -// having this be a slightly different content-type (e.g., -// "application/json-patch")? We should see what other APIs do. -/// Update a project -#[endpoint { - method = PUT, - path = "/v1/projects/{project}", - tags = ["projects"], -}] -async fn project_update( - rqctx: RequestContext, - path_params: Path, - updated_project: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let updated_project = updated_project.into_inner(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let project_selector = - params::ProjectSelector { project: path.project }; - let project_lookup = nexus.project_lookup(&opctx, project_selector)?; - let project = nexus - .project_update(&opctx, &project_lookup, &updated_project) - .await?; - Ok(HttpResponseOk(project.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn ip_pool_silo_link( + rqctx: RequestContext, + path_params: Path, + resource_assoc: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let resource_assoc = resource_assoc.into_inner(); + let pool_lookup = nexus.ip_pool_lookup(&opctx, &path.pool)?; + let assoc = nexus + .ip_pool_link_silo(&opctx, &pool_lookup, &resource_assoc) + .await?; + Ok(HttpResponseCreated(assoc.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch project's IAM policy -#[endpoint { - method = GET, - path = "/v1/projects/{project}/policy", - tags = ["projects"], -}] -async fn project_policy_view( - rqctx: RequestContext, - path_params: Path, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let project_selector = - params::ProjectSelector { project: path.project }; - let project_lookup = nexus.project_lookup(&opctx, project_selector)?; - let policy = - nexus.project_fetch_policy(&opctx, &project_lookup).await?; - Ok(HttpResponseOk(policy)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn ip_pool_silo_unlink( + rqctx: RequestContext, + path_params: Path, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let pool_lookup = nexus.ip_pool_lookup(&opctx, &path.pool)?; + let silo_lookup = nexus.silo_lookup(&opctx, path.silo)?; + nexus + .ip_pool_unlink_silo(&opctx, &pool_lookup, &silo_lookup) + .await?; + Ok(HttpResponseUpdatedNoContent()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Update project's IAM policy -#[endpoint { - method = PUT, - path = "/v1/projects/{project}/policy", - tags = ["projects"], -}] -async fn project_policy_update( - rqctx: RequestContext, - path_params: Path, - new_policy: TypedBody>, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let new_policy = new_policy.into_inner(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let project_selector = - params::ProjectSelector { project: path.project }; - let project_lookup = nexus.project_lookup(&opctx, project_selector)?; - nexus - .project_update_policy(&opctx, &project_lookup, &new_policy) - .await?; - Ok(HttpResponseOk(new_policy)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn ip_pool_silo_update( + rqctx: RequestContext, + path_params: Path, + update: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let update = update.into_inner(); + let pool_lookup = nexus.ip_pool_lookup(&opctx, &path.pool)?; + let silo_lookup = nexus.silo_lookup(&opctx, path.silo)?; + let assoc = nexus + .ip_pool_silo_update( + &opctx, + &pool_lookup, + &silo_lookup, + &update, + ) + .await?; + Ok(HttpResponseOk(assoc.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -// IP Pools - -/// List IP pools -#[endpoint { - method = GET, - path = "/v1/ip-pools", - tags = ["projects"], -}] -async fn project_ip_pool_list( - rqctx: RequestContext, - query_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { + async fn ip_pool_service_view( + rqctx: RequestContext, + ) -> Result, HttpError> { + let apictx = rqctx.context(); let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let pag_params = data_page_params_for(&rqctx, &query)?; - let scan_params = ScanByNameOrId::from_query(&query)?; - let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let pools = nexus - .current_silo_ip_pool_list(&opctx, &paginated_by) - .await? - .into_iter() - .map(|(pool, silo_link)| views::SiloIpPool { - identity: pool.identity(), - is_default: silo_link.is_default, - }) - .collect(); - Ok(HttpResponseOk(ScanByNameOrId::results_page( - &query, - pools, - &marker_for_name_or_id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let pool = nexus.ip_pool_service_fetch(&opctx).await?; + Ok(HttpResponseOk(IpPool::from(pool))) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch IP pool -#[endpoint { - method = GET, - path = "/v1/ip-pools/{pool}", - tags = ["projects"], -}] -async fn project_ip_pool_view( - rqctx: RequestContext, - path_params: Path, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let pool_selector = path_params.into_inner().pool; - let (pool, silo_link) = - nexus.silo_ip_pool_fetch(&opctx, &pool_selector).await?; - Ok(HttpResponseOk(views::SiloIpPool { - identity: pool.identity(), - is_default: silo_link.is_default, - })) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn ip_pool_range_list( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let path = path_params.into_inner(); + let marker = match query.page { + WhichPage::First(_) => None, + WhichPage::Next(ref addr) => Some(addr), + }; + let pag_params = DataPageParams { + limit: rqctx.page_limit(&query)?, + direction: PaginationOrder::Ascending, + marker, + }; + let pool_lookup = nexus.ip_pool_lookup(&opctx, &path.pool)?; + let ranges = nexus + .ip_pool_list_ranges(&opctx, &pool_lookup, &pag_params) + .await? + .into_iter() + .map(|range| range.into()) + .collect(); + Ok(HttpResponseOk(ResultsPage::new( + ranges, + &EmptyScanParams {}, + |range: &IpPoolRange, _| { + IpNetwork::from(range.range.first_address()) + }, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// List IP pools -#[endpoint { - method = GET, - path = "/v1/system/ip-pools", - tags = ["system/networking"], -}] -async fn ip_pool_list( - rqctx: RequestContext, - query_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let pag_params = data_page_params_for(&rqctx, &query)?; - let scan_params = ScanByNameOrId::from_query(&query)?; - let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let pools = nexus - .ip_pools_list(&opctx, &paginated_by) - .await? - .into_iter() - .map(IpPool::from) - .collect(); - Ok(HttpResponseOk(ScanByNameOrId::results_page( - &query, - pools, - &marker_for_name_or_id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn ip_pool_range_add( + rqctx: RequestContext, + path_params: Path, + range_params: TypedBody, + ) -> Result, HttpError> { + let apictx = &rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let range = range_params.into_inner(); + let pool_lookup = nexus.ip_pool_lookup(&opctx, &path.pool)?; + let out = + nexus.ip_pool_add_range(&opctx, &pool_lookup, &range).await?; + Ok(HttpResponseCreated(out.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Create IP pool -#[endpoint { - method = POST, - path = "/v1/system/ip-pools", - tags = ["system/networking"], -}] -async fn ip_pool_create( - rqctx: RequestContext, - pool_params: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let pool_params = pool_params.into_inner(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let pool = nexus.ip_pool_create(&opctx, &pool_params).await?; - Ok(HttpResponseCreated(IpPool::from(pool))) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn ip_pool_range_remove( + rqctx: RequestContext, + path_params: Path, + range_params: TypedBody, + ) -> Result { + let apictx = &rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let range = range_params.into_inner(); + let pool_lookup = nexus.ip_pool_lookup(&opctx, &path.pool)?; + nexus.ip_pool_delete_range(&opctx, &pool_lookup, &range).await?; + Ok(HttpResponseUpdatedNoContent()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch IP pool -#[endpoint { - method = GET, - path = "/v1/system/ip-pools/{pool}", - tags = ["system/networking"], -}] -async fn ip_pool_view( - rqctx: RequestContext, - path_params: Path, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let pool_selector = path_params.into_inner().pool; - // We do not prevent the service pool from being fetched by name or ID - // like we do for update, delete, associate. - let (.., pool) = - nexus.ip_pool_lookup(&opctx, &pool_selector)?.fetch().await?; - Ok(HttpResponseOk(IpPool::from(pool))) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn ip_pool_service_range_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let marker = match query.page { + WhichPage::First(_) => None, + WhichPage::Next(ref addr) => Some(addr), + }; + let pag_params = DataPageParams { + limit: rqctx.page_limit(&query)?, + direction: PaginationOrder::Ascending, + marker, + }; + let ranges = nexus + .ip_pool_service_list_ranges(&opctx, &pag_params) + .await? + .into_iter() + .map(|range| range.into()) + .collect(); + Ok(HttpResponseOk(ResultsPage::new( + ranges, + &EmptyScanParams {}, + |range: &IpPoolRange, _| { + IpNetwork::from(range.range.first_address()) + }, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Delete IP pool -#[endpoint { - method = DELETE, - path = "/v1/system/ip-pools/{pool}", - tags = ["system/networking"], -}] -async fn ip_pool_delete( - rqctx: RequestContext, - path_params: Path, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let pool_lookup = nexus.ip_pool_lookup(&opctx, &path.pool)?; - nexus.ip_pool_delete(&opctx, &pool_lookup).await?; - Ok(HttpResponseDeleted()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn ip_pool_service_range_add( + rqctx: RequestContext, + range_params: TypedBody, + ) -> Result, HttpError> { + let apictx = &rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let range = range_params.into_inner(); + let out = nexus.ip_pool_service_add_range(&opctx, &range).await?; + Ok(HttpResponseCreated(out.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Update IP pool -#[endpoint { - method = PUT, - path = "/v1/system/ip-pools/{pool}", - tags = ["system/networking"], -}] -async fn ip_pool_update( - rqctx: RequestContext, - path_params: Path, - updates: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + async fn ip_pool_service_range_remove( + rqctx: RequestContext, + range_params: TypedBody, + ) -> Result { + let apictx = &rqctx.context(); let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let updates = updates.into_inner(); - let pool_lookup = nexus.ip_pool_lookup(&opctx, &path.pool)?; - let pool = nexus.ip_pool_update(&opctx, &pool_lookup, &updates).await?; - Ok(HttpResponseOk(pool.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let range = range_params.into_inner(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + nexus.ip_pool_service_delete_range(&opctx, &range).await?; + Ok(HttpResponseUpdatedNoContent()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch IP pool utilization -#[endpoint { - method = GET, - path = "/v1/system/ip-pools/{pool}/utilization", - tags = ["system/networking"], -}] -async fn ip_pool_utilization_view( - rqctx: RequestContext, - path_params: Path, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let pool_selector = path_params.into_inner().pool; - // We do not prevent the service pool from being fetched by name or ID - // like we do for update, delete, associate. - let pool_lookup = nexus.ip_pool_lookup(&opctx, &pool_selector)?; - let utilization = - nexus.ip_pool_utilization_view(&opctx, &pool_lookup).await?; - Ok(HttpResponseOk(utilization.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + // Floating IP Addresses + + async fn floating_ip_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let project_lookup = + nexus.project_lookup(&opctx, scan_params.selector.clone())?; + let ips = nexus + .floating_ips_list(&opctx, &project_lookup, &paginated_by) + .await?; + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + ips, + &marker_for_name_or_id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// List IP pool's linked silos -#[endpoint { - method = GET, - path = "/v1/system/ip-pools/{pool}/silos", - tags = ["system/networking"], -}] -async fn ip_pool_silo_list( - rqctx: RequestContext, - path_params: Path, - // paginating by resource_id because they're unique per pool. most robust - // option would be to paginate by a composite key representing the (pool, - // resource_type, resource) - query_params: Query, - // TODO: this could just list views::Silo -- it's not like knowing silo_id - // and nothing else is particularly useful -- except we also want to say - // whether the pool is marked default on each silo. So one option would - // be to do the same as we did with SiloIpPool -- include is_default on - // whatever the thing is. Still... all we'd have to do to make this usable - // in both places would be to make it { ...IpPool, silo_id, silo_name, - // is_default } -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; + async fn floating_ip_create( + rqctx: RequestContext, + query_params: Query, + floating_params: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let nexus = &apictx.context.nexus; + let floating_params = floating_params.into_inner(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let project_lookup = + nexus.project_lookup(&opctx, query_params.into_inner())?; + let ip = nexus + .floating_ip_create(&opctx, &project_lookup, floating_params) + .await?; + Ok(HttpResponseCreated(ip)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - let query = query_params.into_inner(); - let pag_params = data_page_params_for(&rqctx, &query)?; + async fn floating_ip_update( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + updated_floating_ip: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let updated_floating_ip_params = updated_floating_ip.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let floating_ip_selector = params::FloatingIpSelector { + project: query.project, + floating_ip: path.floating_ip, + }; + let floating_ip_lookup = + nexus.floating_ip_lookup(&opctx, floating_ip_selector)?; + let floating_ip = nexus + .floating_ip_update( + &opctx, + floating_ip_lookup, + updated_floating_ip_params, + ) + .await?; + Ok(HttpResponseOk(floating_ip)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - let path = path_params.into_inner(); - let pool_lookup = nexus.ip_pool_lookup(&opctx, &path.pool)?; - - let assocs = nexus - .ip_pool_silo_list(&opctx, &pool_lookup, &pag_params) - .await? - .into_iter() - .map(|assoc| assoc.into()) - .collect(); - - Ok(HttpResponseOk(ScanById::results_page( - &query, - assocs, - &|_, x: &views::IpPoolSiloLink| x.silo_id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn floating_ip_delete( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let floating_ip_selector = params::FloatingIpSelector { + floating_ip: path.floating_ip, + project: query.project, + }; + let fip_lookup = + nexus.floating_ip_lookup(&opctx, floating_ip_selector)?; -/// Link IP pool to silo -/// -/// Users in linked silos can allocate external IPs from this pool for their -/// instances. A silo can have at most one default pool. IPs are allocated from -/// the default pool when users ask for one without specifying a pool. -#[endpoint { - method = POST, - path = "/v1/system/ip-pools/{pool}/silos", - tags = ["system/networking"], -}] -async fn ip_pool_silo_link( - rqctx: RequestContext, - path_params: Path, - resource_assoc: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let resource_assoc = resource_assoc.into_inner(); - let pool_lookup = nexus.ip_pool_lookup(&opctx, &path.pool)?; - let assoc = nexus - .ip_pool_link_silo(&opctx, &pool_lookup, &resource_assoc) - .await?; - Ok(HttpResponseCreated(assoc.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + nexus.floating_ip_delete(&opctx, fip_lookup).await?; + Ok(HttpResponseDeleted()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Unlink IP pool from silo -/// -/// Will fail if there are any outstanding IPs allocated in the silo. -#[endpoint { - method = DELETE, - path = "/v1/system/ip-pools/{pool}/silos/{silo}", - tags = ["system/networking"], -}] -async fn ip_pool_silo_unlink( - rqctx: RequestContext, - path_params: Path, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let pool_lookup = nexus.ip_pool_lookup(&opctx, &path.pool)?; - let silo_lookup = nexus.silo_lookup(&opctx, path.silo)?; - nexus.ip_pool_unlink_silo(&opctx, &pool_lookup, &silo_lookup).await?; - Ok(HttpResponseUpdatedNoContent()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn floating_ip_view( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let floating_ip_selector = params::FloatingIpSelector { + floating_ip: path.floating_ip, + project: query.project, + }; + let (.., fip) = nexus + .floating_ip_lookup(&opctx, floating_ip_selector)? + .fetch() + .await?; + Ok(HttpResponseOk(fip.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Make IP pool default for silo -/// -/// When a user asks for an IP (e.g., at instance create time) without -/// specifying a pool, the IP comes from the default pool if a default is -/// configured. When a pool is made the default for a silo, any existing default -/// will remain linked to the silo, but will no longer be the default. -#[endpoint { - method = PUT, - path = "/v1/system/ip-pools/{pool}/silos/{silo}", - tags = ["system/networking"], -}] -async fn ip_pool_silo_update( - rqctx: RequestContext, - path_params: Path, - update: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let update = update.into_inner(); - let pool_lookup = nexus.ip_pool_lookup(&opctx, &path.pool)?; - let silo_lookup = nexus.silo_lookup(&opctx, path.silo)?; - let assoc = nexus - .ip_pool_silo_update(&opctx, &pool_lookup, &silo_lookup, &update) - .await?; - Ok(HttpResponseOk(assoc.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn floating_ip_attach( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + target: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let floating_ip_selector = params::FloatingIpSelector { + floating_ip: path.floating_ip, + project: query.project, + }; + let ip = nexus + .floating_ip_attach( + &opctx, + floating_ip_selector, + target.into_inner(), + ) + .await?; + Ok(HttpResponseAccepted(ip)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch Oxide service IP pool -#[endpoint { - method = GET, - path = "/v1/system/ip-pools-service", - tags = ["system/networking"], -}] -async fn ip_pool_service_view( - rqctx: RequestContext, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let pool = nexus.ip_pool_service_fetch(&opctx).await?; - Ok(HttpResponseOk(IpPool::from(pool))) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn floating_ip_detach( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let floating_ip_selector = params::FloatingIpSelector { + floating_ip: path.floating_ip, + project: query.project, + }; + let fip_lookup = + nexus.floating_ip_lookup(&opctx, floating_ip_selector)?; + let ip = nexus.floating_ip_detach(&opctx, fip_lookup).await?; + Ok(HttpResponseAccepted(ip)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -type IpPoolRangePaginationParams = PaginationParams; - -/// List ranges for IP pool -/// -/// Ranges are ordered by their first address. -#[endpoint { - method = GET, - path = "/v1/system/ip-pools/{pool}/ranges", - tags = ["system/networking"], -}] -async fn ip_pool_range_list( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let path = path_params.into_inner(); - let marker = match query.page { - WhichPage::First(_) => None, - WhichPage::Next(ref addr) => Some(addr), - }; - let pag_params = DataPageParams { - limit: rqctx.page_limit(&query)?, - direction: PaginationOrder::Ascending, - marker, - }; - let pool_lookup = nexus.ip_pool_lookup(&opctx, &path.pool)?; - let ranges = nexus - .ip_pool_list_ranges(&opctx, &pool_lookup, &pag_params) - .await? - .into_iter() - .map(|range| range.into()) - .collect(); - Ok(HttpResponseOk(ResultsPage::new( - ranges, - &EmptyScanParams {}, - |range: &IpPoolRange, _| { - IpNetwork::from(range.range.first_address()) - }, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + // Disks + + async fn disk_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let project_lookup = + nexus.project_lookup(&opctx, scan_params.selector.clone())?; + let disks = nexus + .disk_list(&opctx, &project_lookup, &paginated_by) + .await? + .into_iter() + .map(|disk| disk.into()) + .collect(); + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + disks, + &marker_for_name_or_id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Add range to IP pool -/// -/// IPv6 ranges are not allowed yet. -#[endpoint { - method = POST, - path = "/v1/system/ip-pools/{pool}/ranges/add", - tags = ["system/networking"], -}] -async fn ip_pool_range_add( - rqctx: RequestContext, - path_params: Path, - range_params: TypedBody, -) -> Result, HttpError> { - let apictx = &rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let range = range_params.into_inner(); - let pool_lookup = nexus.ip_pool_lookup(&opctx, &path.pool)?; - let out = nexus.ip_pool_add_range(&opctx, &pool_lookup, &range).await?; - Ok(HttpResponseCreated(out.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + // TODO-correctness See note about instance create. This should be async. + async fn disk_create( + rqctx: RequestContext, + query_params: Query, + new_disk: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let params = new_disk.into_inner(); + let project_lookup = nexus.project_lookup(&opctx, query)?; + let disk = nexus + .project_create_disk(&opctx, &project_lookup, ¶ms) + .await?; + Ok(HttpResponseCreated(disk.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Remove range from IP pool -#[endpoint { - method = POST, - path = "/v1/system/ip-pools/{pool}/ranges/remove", - tags = ["system/networking"], -}] -async fn ip_pool_range_remove( - rqctx: RequestContext, - path_params: Path, - range_params: TypedBody, -) -> Result { - let apictx = &rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let range = range_params.into_inner(); - let pool_lookup = nexus.ip_pool_lookup(&opctx, &path.pool)?; - nexus.ip_pool_delete_range(&opctx, &pool_lookup, &range).await?; - Ok(HttpResponseUpdatedNoContent()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn disk_view( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let disk_selector = params::DiskSelector { + disk: path.disk, + project: query.project, + }; + let (.., disk) = + nexus.disk_lookup(&opctx, disk_selector)?.fetch().await?; + Ok(HttpResponseOk(disk.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// List IP ranges for the Oxide service pool -/// -/// Ranges are ordered by their first address. -#[endpoint { - method = GET, - path = "/v1/system/ip-pools-service/ranges", - tags = ["system/networking"], -}] -async fn ip_pool_service_range_list( - rqctx: RequestContext, - query_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let marker = match query.page { - WhichPage::First(_) => None, - WhichPage::Next(ref addr) => Some(addr), - }; - let pag_params = DataPageParams { - limit: rqctx.page_limit(&query)?, - direction: PaginationOrder::Ascending, - marker, - }; - let ranges = nexus - .ip_pool_service_list_ranges(&opctx, &pag_params) - .await? - .into_iter() - .map(|range| range.into()) - .collect(); - Ok(HttpResponseOk(ResultsPage::new( - ranges, - &EmptyScanParams {}, - |range: &IpPoolRange, _| { - IpNetwork::from(range.range.first_address()) - }, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn disk_delete( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let disk_selector = params::DiskSelector { + disk: path.disk, + project: query.project, + }; + let disk_lookup = nexus.disk_lookup(&opctx, disk_selector)?; + nexus.project_delete_disk(&opctx, &disk_lookup).await?; + Ok(HttpResponseDeleted()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Add IP range to Oxide service pool -/// -/// IPv6 ranges are not allowed yet. -#[endpoint { - method = POST, - path = "/v1/system/ip-pools-service/ranges/add", - tags = ["system/networking"], -}] -async fn ip_pool_service_range_add( - rqctx: RequestContext, - range_params: TypedBody, -) -> Result, HttpError> { - let apictx = &rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let range = range_params.into_inner(); - let out = nexus.ip_pool_service_add_range(&opctx, &range).await?; - Ok(HttpResponseCreated(out.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn disk_metrics_list( + rqctx: RequestContext, + path_params: Path, + query_params: Query< + PaginationParams, + >, + selector_params: Query, + ) -> Result>, HttpError> + { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + + let selector = selector_params.into_inner(); + let limit = rqctx.page_limit(&query)?; + let disk_selector = params::DiskSelector { + disk: path.disk, + project: selector.project, + }; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let (.., authz_disk) = nexus + .disk_lookup(&opctx, disk_selector)? + .lookup_for(authz::Action::Read) + .await?; -/// Remove IP range from Oxide service pool -#[endpoint { - method = POST, - path = "/v1/system/ip-pools-service/ranges/remove", - tags = ["system/networking"], -}] -async fn ip_pool_service_range_remove( - rqctx: RequestContext, - range_params: TypedBody, -) -> Result { - let apictx = &rqctx.context(); - let nexus = &apictx.context.nexus; - let range = range_params.into_inner(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - nexus.ip_pool_service_delete_range(&opctx, &range).await?; - Ok(HttpResponseUpdatedNoContent()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let result = nexus + .select_timeseries( + &format!("crucible_upstairs:{}", path.metric), + &[&format!("upstairs_uuid=={}", authz_disk.id())], + query, + limit, + ) + .await?; -// Floating IP Addresses - -/// List floating IPs -#[endpoint { - method = GET, - path = "/v1/floating-ips", - tags = ["floating-ips"], -}] -async fn floating_ip_list( - rqctx: RequestContext, - query_params: Query>, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let query = query_params.into_inner(); - let pag_params = data_page_params_for(&rqctx, &query)?; - let scan_params = ScanByNameOrId::from_query(&query)?; - let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; - let project_lookup = - nexus.project_lookup(&opctx, scan_params.selector.clone())?; - let ips = nexus - .floating_ips_list(&opctx, &project_lookup, &paginated_by) - .await?; - Ok(HttpResponseOk(ScanByNameOrId::results_page( - &query, - ips, - &marker_for_name_or_id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + Ok(HttpResponseOk(result)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Create floating IP -#[endpoint { - method = POST, - path = "/v1/floating-ips", - tags = ["floating-ips"], -}] -async fn floating_ip_create( - rqctx: RequestContext, - query_params: Query, - floating_params: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let floating_params = floating_params.into_inner(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let project_lookup = - nexus.project_lookup(&opctx, query_params.into_inner())?; - let ip = nexus - .floating_ip_create(&opctx, &project_lookup, floating_params) - .await?; - Ok(HttpResponseCreated(ip)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn disk_bulk_write_import_start( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + + let disk_selector = params::DiskSelector { + disk: path.disk, + project: query.project, + }; + let disk_lookup = nexus.disk_lookup(&opctx, disk_selector)?; -/// Update floating IP -#[endpoint { - method = PUT, - path = "/v1/floating-ips/{floating_ip}", - tags = ["floating-ips"], -}] -async fn floating_ip_update( - rqctx: RequestContext, - path_params: Path, - query_params: Query, - updated_floating_ip: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let updated_floating_ip_params = updated_floating_ip.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let floating_ip_selector = params::FloatingIpSelector { - project: query.project, - floating_ip: path.floating_ip, + nexus.disk_manual_import_start(&opctx, &disk_lookup).await?; + + Ok(HttpResponseUpdatedNoContent()) }; - let floating_ip_lookup = - nexus.floating_ip_lookup(&opctx, floating_ip_selector)?; - let floating_ip = nexus - .floating_ip_update( - &opctx, - floating_ip_lookup, - updated_floating_ip_params, - ) - .await?; - Ok(HttpResponseOk(floating_ip)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Delete floating IP -#[endpoint { - method = DELETE, - path = "/v1/floating-ips/{floating_ip}", - tags = ["floating-ips"], -}] -async fn floating_ip_delete( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let floating_ip_selector = params::FloatingIpSelector { - floating_ip: path.floating_ip, - project: query.project, + async fn disk_bulk_write_import( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + import_params: TypedBody, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let params = import_params.into_inner(); + + let disk_selector = params::DiskSelector { + disk: path.disk, + project: query.project, + }; + let disk_lookup = nexus.disk_lookup(&opctx, disk_selector)?; + + nexus.disk_manual_import(&disk_lookup, params).await?; + + Ok(HttpResponseUpdatedNoContent()) }; - let fip_lookup = - nexus.floating_ip_lookup(&opctx, floating_ip_selector)?; - - nexus.floating_ip_delete(&opctx, fip_lookup).await?; - Ok(HttpResponseDeleted()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch floating IP -#[endpoint { - method = GET, - path = "/v1/floating-ips/{floating_ip}", - tags = ["floating-ips"] -}] -async fn floating_ip_view( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let floating_ip_selector = params::FloatingIpSelector { - floating_ip: path.floating_ip, - project: query.project, - }; - let (.., fip) = nexus - .floating_ip_lookup(&opctx, floating_ip_selector)? - .fetch() - .await?; - Ok(HttpResponseOk(fip.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn disk_bulk_write_import_stop( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + + let disk_selector = params::DiskSelector { + disk: path.disk, + project: query.project, + }; + let disk_lookup = nexus.disk_lookup(&opctx, disk_selector)?; -/// Attach floating IP -/// -/// Attach floating IP to an instance or other resource. -#[endpoint { - method = POST, - path = "/v1/floating-ips/{floating_ip}/attach", - tags = ["floating-ips"], -}] -async fn floating_ip_attach( - rqctx: RequestContext, - path_params: Path, - query_params: Query, - target: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let floating_ip_selector = params::FloatingIpSelector { - floating_ip: path.floating_ip, - project: query.project, - }; - let ip = nexus - .floating_ip_attach( - &opctx, - floating_ip_selector, - target.into_inner(), - ) - .await?; - Ok(HttpResponseAccepted(ip)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + nexus.disk_manual_import_stop(&opctx, &disk_lookup).await?; -/// Detach floating IP -/// -// Detach floating IP from instance or other resource. -#[endpoint { - method = POST, - path = "/v1/floating-ips/{floating_ip}/detach", - tags = ["floating-ips"], -}] -async fn floating_ip_detach( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let floating_ip_selector = params::FloatingIpSelector { - floating_ip: path.floating_ip, - project: query.project, + Ok(HttpResponseUpdatedNoContent()) }; - let fip_lookup = - nexus.floating_ip_lookup(&opctx, floating_ip_selector)?; - let ip = nexus.floating_ip_detach(&opctx, fip_lookup).await?; - Ok(HttpResponseAccepted(ip)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} - -// Disks - -/// List disks -#[endpoint { - method = GET, - path = "/v1/disks", - tags = ["disks"], -}] -async fn disk_list( - rqctx: RequestContext, - query_params: Query>, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let pag_params = data_page_params_for(&rqctx, &query)?; - let scan_params = ScanByNameOrId::from_query(&query)?; - let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; - let project_lookup = - nexus.project_lookup(&opctx, scan_params.selector.clone())?; - let disks = nexus - .disk_list(&opctx, &project_lookup, &paginated_by) - .await? - .into_iter() - .map(|disk| disk.into()) - .collect(); - Ok(HttpResponseOk(ScanByNameOrId::results_page( - &query, - disks, - &marker_for_name_or_id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} - -// TODO-correctness See note about instance create. This should be async. -/// Create a disk -#[endpoint { - method = POST, - path = "/v1/disks", - tags = ["disks"] -}] -async fn disk_create( - rqctx: RequestContext, - query_params: Query, - new_disk: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let params = new_disk.into_inner(); - let project_lookup = nexus.project_lookup(&opctx, query)?; - let disk = - nexus.project_create_disk(&opctx, &project_lookup, ¶ms).await?; - Ok(HttpResponseCreated(disk.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch disk -#[endpoint { - method = GET, - path = "/v1/disks/{disk}", - tags = ["disks"] -}] -async fn disk_view( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let disk_selector = - params::DiskSelector { disk: path.disk, project: query.project }; - let (.., disk) = - nexus.disk_lookup(&opctx, disk_selector)?.fetch().await?; - Ok(HttpResponseOk(disk.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn disk_finalize_import( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + finalize_params: TypedBody, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let params = finalize_params.into_inner(); + let disk_selector = params::DiskSelector { + disk: path.disk, + project: query.project, + }; + let disk_lookup = nexus.disk_lookup(&opctx, disk_selector)?; -/// Delete disk -#[endpoint { - method = DELETE, - path = "/v1/disks/{disk}", - tags = ["disks"], -}] -async fn disk_delete( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let disk_selector = - params::DiskSelector { disk: path.disk, project: query.project }; - let disk_lookup = nexus.disk_lookup(&opctx, disk_selector)?; - nexus.project_delete_disk(&opctx, &disk_lookup).await?; - Ok(HttpResponseDeleted()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + nexus.disk_finalize_import(&opctx, &disk_lookup, ¶ms).await?; -#[derive(Display, Serialize, Deserialize, JsonSchema)] -#[display(style = "snake_case")] -#[serde(rename_all = "snake_case")] -pub enum DiskMetricName { - Activated, - Flush, - Read, - ReadBytes, - Write, - WriteBytes, -} + Ok(HttpResponseUpdatedNoContent()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -#[derive(Serialize, Deserialize, JsonSchema)] -struct DiskMetricsPath { - disk: NameOrId, - metric: DiskMetricName, -} + // Instances + + async fn instance_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let project_lookup = + nexus.project_lookup(&opctx, scan_params.selector.clone())?; + let instances = nexus + .instance_list(&opctx, &project_lookup, &paginated_by) + .await? + .into_iter() + .map(|i| i.into()) + .collect(); + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + instances, + &marker_for_name_or_id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch disk metrics -#[endpoint { - method = GET, - path = "/v1/disks/{disk}/metrics/{metric}", - tags = ["disks"], -}] -async fn disk_metrics_list( - rqctx: RequestContext, - path_params: Path, - query_params: Query< - PaginationParams, - >, - selector_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { + async fn instance_create( + rqctx: RequestContext, + query_params: Query, + new_instance: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - - let selector = selector_params.into_inner(); - let limit = rqctx.page_limit(&query)?; - let disk_selector = - params::DiskSelector { disk: path.disk, project: selector.project }; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let (.., authz_disk) = nexus - .disk_lookup(&opctx, disk_selector)? - .lookup_for(authz::Action::Read) - .await?; - - let result = nexus - .select_timeseries( - &format!("crucible_upstairs:{}", path.metric), - &[&format!("upstairs_uuid=={}", authz_disk.id())], - query, - limit, - ) - .await?; - - Ok(HttpResponseOk(result)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let project_selector = query_params.into_inner(); + let new_instance_params = &new_instance.into_inner(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let project_lookup = + nexus.project_lookup(&opctx, project_selector)?; + let instance = nexus + .project_create_instance( + &opctx, + &project_lookup, + &new_instance_params, + ) + .await?; + Ok(HttpResponseCreated(instance.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Start importing blocks into disk -/// -/// Start the process of importing blocks into a disk -#[endpoint { - method = POST, - path = "/v1/disks/{disk}/bulk-write-start", - tags = ["disks"], -}] -async fn disk_bulk_write_import_start( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + async fn instance_view( + rqctx: RequestContext, + query_params: Query, + path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); let nexus = &apictx.context.nexus; let path = path_params.into_inner(); let query = query_params.into_inner(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let instance_selector = params::InstanceSelector { + project: query.project, + instance: path.instance, + }; + let instance_lookup = + nexus.instance_lookup(&opctx, instance_selector)?; + let (.., authz_instance) = + instance_lookup.lookup_for(authz::Action::Read).await?; + let instance_and_vmm = nexus + .datastore() + .instance_fetch_with_vmm(&opctx, &authz_instance) + .await?; + Ok(HttpResponseOk(instance_and_vmm.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - let disk_selector = - params::DiskSelector { disk: path.disk, project: query.project }; - let disk_lookup = nexus.disk_lookup(&opctx, disk_selector)?; - - nexus.disk_manual_import_start(&opctx, &disk_lookup).await?; - - Ok(HttpResponseUpdatedNoContent()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} - -/// Import blocks into disk -#[endpoint { - method = POST, - path = "/v1/disks/{disk}/bulk-write", - tags = ["disks"], -}] -async fn disk_bulk_write_import( - rqctx: RequestContext, - path_params: Path, - query_params: Query, - import_params: TypedBody, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + async fn instance_delete( + rqctx: RequestContext, + query_params: Query, + path_params: Path, + ) -> Result { + let apictx = rqctx.context(); let nexus = &apictx.context.nexus; let path = path_params.into_inner(); let query = query_params.into_inner(); - let params = import_params.into_inner(); - - let disk_selector = - params::DiskSelector { disk: path.disk, project: query.project }; - let disk_lookup = nexus.disk_lookup(&opctx, disk_selector)?; - - nexus.disk_manual_import(&disk_lookup, params).await?; - - Ok(HttpResponseUpdatedNoContent()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let instance_selector = params::InstanceSelector { + project: query.project, + instance: path.instance, + }; + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let instance_lookup = + nexus.instance_lookup(&opctx, instance_selector)?; + nexus.project_destroy_instance(&opctx, &instance_lookup).await?; + Ok(HttpResponseDeleted()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Stop importing blocks into disk -/// -/// Stop the process of importing blocks into a disk -#[endpoint { - method = POST, - path = "/v1/disks/{disk}/bulk-write-stop", - tags = ["disks"], -}] -async fn disk_bulk_write_import_stop( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + async fn instance_reboot( + rqctx: RequestContext, + query_params: Query, + path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); let nexus = &apictx.context.nexus; let path = path_params.into_inner(); let query = query_params.into_inner(); + let instance_selector = params::InstanceSelector { + project: query.project, + instance: path.instance, + }; + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let instance_lookup = + nexus.instance_lookup(&opctx, instance_selector)?; + let instance = + nexus.instance_reboot(&opctx, &instance_lookup).await?; + Ok(HttpResponseAccepted(instance.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - let disk_selector = - params::DiskSelector { disk: path.disk, project: query.project }; - let disk_lookup = nexus.disk_lookup(&opctx, disk_selector)?; - - nexus.disk_manual_import_stop(&opctx, &disk_lookup).await?; - - Ok(HttpResponseUpdatedNoContent()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} - -/// Confirm disk block import completion -#[endpoint { - method = POST, - path = "/v1/disks/{disk}/finalize", - tags = ["disks"], -}] -async fn disk_finalize_import( - rqctx: RequestContext, - path_params: Path, - query_params: Query, - finalize_params: TypedBody, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + async fn instance_start( + rqctx: RequestContext, + query_params: Query, + path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); let nexus = &apictx.context.nexus; let path = path_params.into_inner(); let query = query_params.into_inner(); - let params = finalize_params.into_inner(); - let disk_selector = - params::DiskSelector { disk: path.disk, project: query.project }; - let disk_lookup = nexus.disk_lookup(&opctx, disk_selector)?; - - nexus.disk_finalize_import(&opctx, &disk_lookup, ¶ms).await?; - - Ok(HttpResponseUpdatedNoContent()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} - -// Instances - -/// List instances -#[endpoint { - method = GET, - path = "/v1/instances", - tags = ["instances"], -}] -async fn instance_list( - rqctx: RequestContext, - query_params: Query>, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let pag_params = data_page_params_for(&rqctx, &query)?; - let scan_params = ScanByNameOrId::from_query(&query)?; - let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let project_lookup = - nexus.project_lookup(&opctx, scan_params.selector.clone())?; - let instances = nexus - .instance_list(&opctx, &project_lookup, &paginated_by) - .await? - .into_iter() - .map(|i| i.into()) - .collect(); - Ok(HttpResponseOk(ScanByNameOrId::results_page( - &query, - instances, - &marker_for_name_or_id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} - -/// Create instance -#[endpoint { - method = POST, - path = "/v1/instances", - tags = ["instances"], -}] -async fn instance_create( - rqctx: RequestContext, - query_params: Query, - new_instance: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let project_selector = query_params.into_inner(); - let new_instance_params = &new_instance.into_inner(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let project_lookup = nexus.project_lookup(&opctx, project_selector)?; - let instance = nexus - .project_create_instance( - &opctx, - &project_lookup, - &new_instance_params, - ) - .await?; - Ok(HttpResponseCreated(instance.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} - -/// Fetch instance -#[endpoint { - method = GET, - path = "/v1/instances/{instance}", - tags = ["instances"], -}] -async fn instance_view( - rqctx: RequestContext, - query_params: Query, - path_params: Path, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; let instance_selector = params::InstanceSelector { project: query.project, instance: path.instance, }; - let instance_lookup = - nexus.instance_lookup(&opctx, instance_selector)?; - let (.., authz_instance) = - instance_lookup.lookup_for(authz::Action::Read).await?; - let instance_and_vmm = nexus - .datastore() - .instance_fetch_with_vmm(&opctx, &authz_instance) - .await?; - Ok(HttpResponseOk(instance_and_vmm.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} - -/// Delete instance -#[endpoint { - method = DELETE, - path = "/v1/instances/{instance}", - tags = ["instances"], -}] -async fn instance_delete( - rqctx: RequestContext, - query_params: Query, - path_params: Path, -) -> Result { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let instance_selector = params::InstanceSelector { - project: query.project, - instance: path.instance, - }; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let instance_lookup = - nexus.instance_lookup(&opctx, instance_selector)?; - nexus.project_destroy_instance(&opctx, &instance_lookup).await?; - Ok(HttpResponseDeleted()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} - -// TODO should this be in the public API? -/// Migrate an instance -#[endpoint { - method = POST, - path = "/v1/instances/{instance}/migrate", - tags = ["instances"], -}] -async fn instance_migrate( - rqctx: RequestContext, - query_params: Query, - path_params: Path, - migrate_params: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let migrate_instance_params = migrate_params.into_inner(); - let instance_selector = params::InstanceSelector { - project: query.project, - instance: path.instance, - }; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let instance_lookup = - nexus.instance_lookup(&opctx, instance_selector)?; - let instance = nexus - .project_instance_migrate( - &opctx, - &instance_lookup, - migrate_instance_params, - ) - .await?; - Ok(HttpResponseOk(instance.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} - -/// Reboot an instance -#[endpoint { - method = POST, - path = "/v1/instances/{instance}/reboot", - tags = ["instances"], -}] -async fn instance_reboot( - rqctx: RequestContext, - query_params: Query, - path_params: Path, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let instance_selector = params::InstanceSelector { - project: query.project, - instance: path.instance, - }; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let instance_lookup = - nexus.instance_lookup(&opctx, instance_selector)?; - let instance = nexus.instance_reboot(&opctx, &instance_lookup).await?; - Ok(HttpResponseAccepted(instance.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} - -/// Boot instance -#[endpoint { - method = POST, - path = "/v1/instances/{instance}/start", - tags = ["instances"], -}] -async fn instance_start( - rqctx: RequestContext, - query_params: Query, - path_params: Path, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let instance_selector = params::InstanceSelector { - project: query.project, - instance: path.instance, - }; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let instance_lookup = - nexus.instance_lookup(&opctx, instance_selector)?; - let instance = nexus.instance_start(&opctx, &instance_lookup).await?; - Ok(HttpResponseAccepted(instance.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} - -/// Stop instance -#[endpoint { - method = POST, - path = "/v1/instances/{instance}/stop", - tags = ["instances"], -}] -async fn instance_stop( - rqctx: RequestContext, - query_params: Query, - path_params: Path, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let instance_selector = params::InstanceSelector { - project: query.project, - instance: path.instance, - }; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let instance_lookup = - nexus.instance_lookup(&opctx, instance_selector)?; - let instance = nexus.instance_stop(&opctx, &instance_lookup).await?; - Ok(HttpResponseAccepted(instance.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let instance_lookup = + nexus.instance_lookup(&opctx, instance_selector)?; + let instance = + nexus.instance_start(&opctx, &instance_lookup).await?; + Ok(HttpResponseAccepted(instance.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch instance serial console -#[endpoint { - method = GET, - path = "/v1/instances/{instance}/serial-console", - tags = ["instances"], -}] -async fn instance_serial_console( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + async fn instance_stop( + rqctx: RequestContext, + query_params: Query, + path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); let nexus = &apictx.context.nexus; let path = path_params.into_inner(); let query = query_params.into_inner(); let instance_selector = params::InstanceSelector { - project: query.project.clone(), + project: query.project, instance: path.instance, }; - let instance_lookup = - nexus.instance_lookup(&opctx, instance_selector)?; - let data = nexus - .instance_serial_console_data(&opctx, &instance_lookup, &query) - .await?; - Ok(HttpResponseOk(data)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let instance_lookup = + nexus.instance_lookup(&opctx, instance_selector)?; + let instance = + nexus.instance_stop(&opctx, &instance_lookup).await?; + Ok(HttpResponseAccepted(instance.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Stream instance serial console -#[channel { - protocol = WEBSOCKETS, - path = "/v1/instances/{instance}/serial-console/stream", - tags = ["instances"], -}] -async fn instance_serial_console_stream( - rqctx: RequestContext, - path_params: Path, - query_params: Query, - conn: WebsocketConnection, -) -> WebsocketChannelResult { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let instance_selector = params::InstanceSelector { - project: query.project.clone(), - instance: path.instance, - }; - let mut client_stream = WebSocketStream::from_raw_socket( - conn.into_inner(), - WebSocketRole::Server, - None, - ) - .await; - match nexus.instance_lookup(&opctx, instance_selector) { - Ok(instance_lookup) => { - nexus - .instance_serial_console_stream( - &opctx, - client_stream, - &instance_lookup, - &query, - ) + async fn instance_serial_console( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError> + { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let instance_selector = params::InstanceSelector { + project: query.project.clone(), + instance: path.instance, + }; + let instance_lookup = + nexus.instance_lookup(&opctx, instance_selector)?; + let data = nexus + .instance_serial_console_data(&opctx, &instance_lookup, &query) .await?; - Ok(()) - } - Err(e) => { - let _ = client_stream - .close(Some(CloseFrame { - code: CloseCode::Error, - reason: e.to_string().into(), - })) - .await - .is_ok(); - Err(e.into()) - } + Ok(HttpResponseOk(data)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await } -} -/// List SSH public keys for instance -/// -/// List SSH public keys injected via cloud-init during instance creation. Note -/// that this list is a snapshot in time and will not reflect updates made after -/// the instance is created. -#[endpoint { - method = GET, - path = "/v1/instances/{instance}/ssh-public-keys", - tags = ["instances"], -}] -async fn instance_ssh_public_key_list( - rqctx: RequestContext, - path_params: Path, - query_params: Query>, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { + async fn instance_serial_console_stream( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + conn: WebsocketConnection, + ) -> WebsocketChannelResult { + let apictx = rqctx.context(); let nexus = &apictx.context.nexus; let path = path_params.into_inner(); let query = query_params.into_inner(); - let pag_params = data_page_params_for(&rqctx, &query)?; - let scan_params = ScanByNameOrId::from_query(&query)?; - let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; let opctx = crate::context::op_context_for_external_api(&rqctx).await?; let instance_selector = params::InstanceSelector { - project: scan_params.selector.project.clone(), + project: query.project.clone(), instance: path.instance, }; - let instance_lookup = - nexus.instance_lookup(&opctx, instance_selector)?; - let ssh_keys = nexus - .instance_ssh_keys_list(&opctx, &instance_lookup, &paginated_by) - .await? - .into_iter() - .map(|k| k.into()) - .collect(); - Ok(HttpResponseOk(ScanByNameOrId::results_page( - &query, - ssh_keys, - &marker_for_name_or_id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let mut client_stream = WebSocketStream::from_raw_socket( + conn.into_inner(), + WebSocketRole::Server, + None, + ) + .await; + match nexus.instance_lookup(&opctx, instance_selector) { + Ok(instance_lookup) => { + nexus + .instance_serial_console_stream( + &opctx, + client_stream, + &instance_lookup, + &query, + ) + .await?; + Ok(()) + } + Err(e) => { + let _ = client_stream + .close(Some(CloseFrame { + code: CloseCode::Error, + reason: e.to_string().into(), + })) + .await + .is_ok(); + Err(e.into()) + } + } + } -/// List disks for instance -#[endpoint { - method = GET, - path = "/v1/instances/{instance}/disks", - tags = ["instances"], -}] -async fn instance_disk_list( - rqctx: RequestContext, - query_params: Query>, - path_params: Path, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let pag_params = data_page_params_for(&rqctx, &query)?; - let scan_params = ScanByNameOrId::from_query(&query)?; - let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let instance_selector = params::InstanceSelector { - project: scan_params.selector.project.clone(), - instance: path.instance, + async fn instance_ssh_public_key_list( + rqctx: RequestContext, + path_params: Path, + query_params: Query< + PaginatedByNameOrId, + >, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let instance_selector = params::InstanceSelector { + project: scan_params.selector.project.clone(), + instance: path.instance, + }; + let instance_lookup = + nexus.instance_lookup(&opctx, instance_selector)?; + let ssh_keys = nexus + .instance_ssh_keys_list(&opctx, &instance_lookup, &paginated_by) + .await? + .into_iter() + .map(|k| k.into()) + .collect(); + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + ssh_keys, + &marker_for_name_or_id, + )?)) }; - let instance_lookup = - nexus.instance_lookup(&opctx, instance_selector)?; - let disks = nexus - .instance_list_disks(&opctx, &instance_lookup, &paginated_by) - .await? - .into_iter() - .map(|d| d.into()) - .collect(); - Ok(HttpResponseOk(ScanByNameOrId::results_page( - &query, - disks, - &marker_for_name_or_id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Attach disk to instance -#[endpoint { - method = POST, - path = "/v1/instances/{instance}/disks/attach", - tags = ["instances"], -}] -async fn instance_disk_attach( - rqctx: RequestContext, - path_params: Path, - query_params: Query, - disk_to_attach: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let disk = disk_to_attach.into_inner().disk; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let instance_selector = params::InstanceSelector { - project: query.project, - instance: path.instance, + async fn instance_disk_list( + rqctx: RequestContext, + query_params: Query< + PaginatedByNameOrId, + >, + path_params: Path, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let instance_selector = params::InstanceSelector { + project: scan_params.selector.project.clone(), + instance: path.instance, + }; + let instance_lookup = + nexus.instance_lookup(&opctx, instance_selector)?; + let disks = nexus + .instance_list_disks(&opctx, &instance_lookup, &paginated_by) + .await? + .into_iter() + .map(|d| d.into()) + .collect(); + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + disks, + &marker_for_name_or_id, + )?)) }; - let instance_lookup = - nexus.instance_lookup(&opctx, instance_selector)?; - let disk = - nexus.instance_attach_disk(&opctx, &instance_lookup, disk).await?; - Ok(HttpResponseAccepted(disk.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Detach disk from instance -#[endpoint { - method = POST, - path = "/v1/instances/{instance}/disks/detach", - tags = ["instances"], -}] -async fn instance_disk_detach( - rqctx: RequestContext, - path_params: Path, - query_params: Query, - disk_to_detach: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + async fn instance_disk_attach( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + disk_to_attach: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); let nexus = &apictx.context.nexus; let path = path_params.into_inner(); let query = query_params.into_inner(); - let disk = disk_to_detach.into_inner().disk; - let instance_selector = params::InstanceSelector { - project: query.project, - instance: path.instance, + let disk = disk_to_attach.into_inner().disk; + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let instance_selector = params::InstanceSelector { + project: query.project, + instance: path.instance, + }; + let instance_lookup = + nexus.instance_lookup(&opctx, instance_selector)?; + let disk = nexus + .instance_attach_disk(&opctx, &instance_lookup, disk) + .await?; + Ok(HttpResponseAccepted(disk.into())) }; - let instance_lookup = - nexus.instance_lookup(&opctx, instance_selector)?; - let disk = - nexus.instance_detach_disk(&opctx, &instance_lookup, disk).await?; - Ok(HttpResponseAccepted(disk.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} - -// Certificates - -/// List certificates for external endpoints -/// -/// Returns a list of TLS certificates used for the external API (for the -/// current Silo). These are sorted by creation date, with the most recent -/// certificates appearing first. -#[endpoint { - method = GET, - path = "/v1/certificates", - tags = ["silos"], -}] -async fn certificate_list( - rqctx: RequestContext, - query_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let pag_params = data_page_params_for(&rqctx, &query)?; - let scan_params = ScanByNameOrId::from_query(&query)?; - let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let certs = nexus - .certificates_list(&opctx, &paginated_by) - .await? - .into_iter() - .map(|d| d.try_into()) - .collect::, Error>>()?; - Ok(HttpResponseOk(ScanByNameOrId::results_page( - &query, - certs, - &marker_for_name_or_id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Create new system-wide x.509 certificate -/// -/// This certificate is automatically used by the Oxide Control plane to serve -/// external connections. -#[endpoint { - method = POST, - path = "/v1/certificates", - tags = ["silos"] -}] -async fn certificate_create( - rqctx: RequestContext, - new_cert: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let new_cert_params = new_cert.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let cert = nexus.certificate_create(&opctx, new_cert_params).await?; - Ok(HttpResponseCreated(cert.try_into()?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn instance_disk_detach( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + disk_to_detach: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let disk = disk_to_detach.into_inner().disk; + let instance_selector = params::InstanceSelector { + project: query.project, + instance: path.instance, + }; + let instance_lookup = + nexus.instance_lookup(&opctx, instance_selector)?; + let disk = nexus + .instance_detach_disk(&opctx, &instance_lookup, disk) + .await?; + Ok(HttpResponseAccepted(disk.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Path parameters for Certificate requests -#[derive(Deserialize, JsonSchema)] -struct CertificatePathParam { - certificate: NameOrId, -} + // Certificates + + async fn certificate_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let certs = nexus + .certificates_list(&opctx, &paginated_by) + .await? + .into_iter() + .map(|d| d.try_into()) + .collect::, Error>>()?; + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + certs, + &marker_for_name_or_id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch certificate -/// -/// Returns the details of a specific certificate -#[endpoint { - method = GET, - path = "/v1/certificates/{certificate}", - tags = ["silos"], -}] -async fn certificate_view( - rqctx: RequestContext, - path_params: Path, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let (.., cert) = - nexus.certificate_lookup(&opctx, &path.certificate).fetch().await?; - Ok(HttpResponseOk(cert.try_into()?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn certificate_create( + rqctx: RequestContext, + new_cert: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let new_cert_params = new_cert.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let cert = + nexus.certificate_create(&opctx, new_cert_params).await?; + Ok(HttpResponseCreated(cert.try_into()?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Delete certificate -/// -/// Permanently delete a certificate. This operation cannot be undone. -#[endpoint { - method = DELETE, - path = "/v1/certificates/{certificate}", - tags = ["silos"], -}] -async fn certificate_delete( - rqctx: RequestContext, - path_params: Path, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - nexus - .certificate_delete( - &opctx, - nexus.certificate_lookup(&opctx, &path.certificate), - ) - .await?; - Ok(HttpResponseDeleted()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn certificate_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let (.., cert) = nexus + .certificate_lookup(&opctx, &path.certificate) + .fetch() + .await?; + Ok(HttpResponseOk(cert.try_into()?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Create address lot -#[endpoint { - method = POST, - path = "/v1/system/networking/address-lot", - tags = ["system/networking"], -}] -async fn networking_address_lot_create( - rqctx: RequestContext, - new_address_lot: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let params = new_address_lot.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let result = nexus.address_lot_create(&opctx, params).await?; - - let lot: AddressLot = result.lot.into(); - let blocks: Vec = - result.blocks.iter().map(|b| b.clone().into()).collect(); - - Ok(HttpResponseCreated(AddressLotCreateResponse { lot, blocks })) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn certificate_delete( + rqctx: RequestContext, + path_params: Path, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + nexus + .certificate_delete( + &opctx, + nexus.certificate_lookup(&opctx, &path.certificate), + ) + .await?; + Ok(HttpResponseDeleted()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Delete address lot -#[endpoint { - method = DELETE, - path = "/v1/system/networking/address-lot/{address_lot}", - tags = ["system/networking"], -}] -async fn networking_address_lot_delete( - rqctx: RequestContext, - path_params: Path, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let address_lot_lookup = - nexus.address_lot_lookup(&opctx, path.address_lot)?; - nexus.address_lot_delete(&opctx, &address_lot_lookup).await?; - Ok(HttpResponseDeleted()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn networking_address_lot_create( + rqctx: RequestContext, + new_address_lot: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let params = new_address_lot.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let result = nexus.address_lot_create(&opctx, params).await?; + + let lot: AddressLot = result.lot.into(); + let blocks: Vec = + result.blocks.iter().map(|b| b.clone().into()).collect(); + + Ok(HttpResponseCreated(AddressLotCreateResponse { lot, blocks })) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// List address lots -#[endpoint { - method = GET, - path = "/v1/system/networking/address-lot", - tags = ["system/networking"], -}] -async fn networking_address_lot_list( - rqctx: RequestContext, - query_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let pag_params = data_page_params_for(&rqctx, &query)?; - let scan_params = ScanByNameOrId::from_query(&query)?; - let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let lots = nexus - .address_lot_list(&opctx, &paginated_by) - .await? - .into_iter() - .map(|p| p.into()) - .collect(); - - Ok(HttpResponseOk(ScanByNameOrId::results_page( - &query, - lots, - &marker_for_name_or_id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn networking_address_lot_delete( + rqctx: RequestContext, + path_params: Path, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let address_lot_lookup = + nexus.address_lot_lookup(&opctx, path.address_lot)?; + nexus.address_lot_delete(&opctx, &address_lot_lookup).await?; + Ok(HttpResponseDeleted()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// List blocks in address lot -#[endpoint { - method = GET, - path = "/v1/system/networking/address-lot/{address_lot}/blocks", - tags = ["system/networking"], -}] -async fn networking_address_lot_block_list( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let path = path_params.into_inner(); - let pagparams = data_page_params_for(&rqctx, &query)?; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let address_lot_lookup = - nexus.address_lot_lookup(&opctx, path.address_lot)?; - let blocks = nexus - .address_lot_block_list(&opctx, &address_lot_lookup, &pagparams) - .await? - .into_iter() - .map(|p| p.into()) - .collect(); - - Ok(HttpResponseOk(ScanById::results_page( - &query, - blocks, - &|_, x: &AddressLotBlock| x.id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn networking_address_lot_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let lots = nexus + .address_lot_list(&opctx, &paginated_by) + .await? + .into_iter() + .map(|p| p.into()) + .collect(); + + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + lots, + &marker_for_name_or_id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Create loopback address -#[endpoint { - method = POST, - path = "/v1/system/networking/loopback-address", - tags = ["system/networking"], -}] -async fn networking_loopback_address_create( - rqctx: RequestContext, - new_loopback_address: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let params = new_loopback_address.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let result = nexus.loopback_address_create(&opctx, params).await?; + async fn networking_address_lot_block_list( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let path = path_params.into_inner(); + let pagparams = data_page_params_for(&rqctx, &query)?; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let address_lot_lookup = + nexus.address_lot_lookup(&opctx, path.address_lot)?; + let blocks = nexus + .address_lot_block_list(&opctx, &address_lot_lookup, &pagparams) + .await? + .into_iter() + .map(|p| p.into()) + .collect(); + + Ok(HttpResponseOk(ScanById::results_page( + &query, + blocks, + &|_, x: &AddressLotBlock| x.id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - let addr: LoopbackAddress = result.into(); + async fn networking_loopback_address_create( + rqctx: RequestContext, + new_loopback_address: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let params = new_loopback_address.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let result = nexus.loopback_address_create(&opctx, params).await?; + + let addr: LoopbackAddress = result.into(); + + Ok(HttpResponseCreated(addr)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - Ok(HttpResponseCreated(addr)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn networking_loopback_address_delete( + rqctx: RequestContext, + path: Path, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let path = path.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let addr = match IpNetwork::new(path.address, path.subnet_mask) { + Ok(addr) => Ok(addr), + Err(_) => Err(HttpError::for_bad_request( + None, + "invalid ip address".into(), + )), + }?; + nexus + .loopback_address_delete( + &opctx, + path.rack_id, + path.switch_location.into(), + addr.into(), + ) + .await?; + Ok(HttpResponseDeleted()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -#[derive(Serialize, Deserialize, JsonSchema)] -pub struct LoopbackAddressPath { - /// The rack to use when selecting the loopback address. - pub rack_id: Uuid, + async fn networking_loopback_address_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let pagparams = data_page_params_for(&rqctx, &query)?; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let addrs = nexus + .loopback_address_list(&opctx, &pagparams) + .await? + .into_iter() + .map(|p| p.into()) + .collect(); + + Ok(HttpResponseOk(ScanById::results_page( + &query, + addrs, + &|_, x: &LoopbackAddress| x.id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - /// The switch location to use when selecting the loopback address. - pub switch_location: Name, + async fn networking_switch_port_settings_create( + rqctx: RequestContext, + new_settings: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let params = new_settings.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let result = + nexus.switch_port_settings_post(&opctx, params).await?; + + let settings: SwitchPortSettingsView = result.into(); + Ok(HttpResponseCreated(settings)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - /// The IP address and subnet mask to use when selecting the loopback - /// address. - pub address: IpAddr, + async fn networking_switch_port_settings_delete( + rqctx: RequestContext, + query_params: Query, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let selector = query_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + nexus.switch_port_settings_delete(&opctx, &selector).await?; + Ok(HttpResponseDeleted()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - /// The IP address and subnet mask to use when selecting the loopback - /// address. - pub subnet_mask: u8, -} + async fn networking_switch_port_settings_list( + rqctx: RequestContext, + query_params: Query< + PaginatedByNameOrId, + >, + ) -> Result>, HttpError> + { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let settings = nexus + .switch_port_settings_list(&opctx, &paginated_by) + .await? + .into_iter() + .map(|p| p.into()) + .collect(); + + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + settings, + &marker_for_name_or_id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Delete loopback address -#[endpoint { - method = DELETE, - path = "/v1/system/networking/loopback-address/{rack_id}/{switch_location}/{address}/{subnet_mask}", - tags = ["system/networking"], -}] -async fn networking_loopback_address_delete( - rqctx: RequestContext, - path: Path, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let path = path.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let addr = match IpNetwork::new(path.address, path.subnet_mask) { - Ok(addr) => Ok(addr), - Err(_) => Err(HttpError::for_bad_request( - None, - "invalid ip address".into(), - )), - }?; - nexus - .loopback_address_delete( - &opctx, - path.rack_id, - path.switch_location.clone(), - addr.into(), - ) - .await?; - Ok(HttpResponseDeleted()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn networking_switch_port_settings_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let query = path_params.into_inner().port; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let settings = + nexus.switch_port_settings_get(&opctx, &query).await?; + Ok(HttpResponseOk(settings.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// List loopback addresses -#[endpoint { - method = GET, - path = "/v1/system/networking/loopback-address", - tags = ["system/networking"], -}] -async fn networking_loopback_address_list( - rqctx: RequestContext, - query_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let pagparams = data_page_params_for(&rqctx, &query)?; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let addrs = nexus - .loopback_address_list(&opctx, &pagparams) - .await? - .into_iter() - .map(|p| p.into()) - .collect(); - - Ok(HttpResponseOk(ScanById::results_page( - &query, - addrs, - &|_, x: &LoopbackAddress| x.id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn networking_switch_port_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let pagparams = data_page_params_for(&rqctx, &query)?; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let addrs = nexus + .switch_port_list(&opctx, &pagparams) + .await? + .into_iter() + .map(|p| p.into()) + .collect(); + + Ok(HttpResponseOk(ScanById::results_page( + &query, + addrs, + &|_, x: &SwitchPort| x.id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Create switch port settings -#[endpoint { - method = POST, - path = "/v1/system/networking/switch-port-settings", - tags = ["system/networking"], -}] -async fn networking_switch_port_settings_create( - rqctx: RequestContext, - new_settings: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let params = new_settings.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let result = nexus.switch_port_settings_post(&opctx, params).await?; - - let settings: SwitchPortSettingsView = result.into(); - Ok(HttpResponseCreated(settings)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn networking_switch_port_status( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let path = path_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + Ok(HttpResponseOk( + nexus + .switch_port_status( + &opctx, + query.switch_location, + path.port, + ) + .await?, + )) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Delete switch port settings -#[endpoint { - method = DELETE, - path = "/v1/system/networking/switch-port-settings", - tags = ["system/networking"], -}] -async fn networking_switch_port_settings_delete( - rqctx: RequestContext, - query_params: Query, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let selector = query_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - nexus.switch_port_settings_delete(&opctx, &selector).await?; - Ok(HttpResponseDeleted()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn networking_switch_port_apply_settings( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + settings_body: TypedBody, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let port = path_params.into_inner().port; + let query = query_params.into_inner(); + let settings = settings_body.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + nexus + .switch_port_apply_settings(&opctx, &port, &query, &settings) + .await?; + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// List switch port settings -#[endpoint { - method = GET, - path = "/v1/system/networking/switch-port-settings", - tags = ["system/networking"], -}] -async fn networking_switch_port_settings_list( - rqctx: RequestContext, - query_params: Query< - PaginatedByNameOrId, - >, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let pag_params = data_page_params_for(&rqctx, &query)?; - let scan_params = ScanByNameOrId::from_query(&query)?; - let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let settings = nexus - .switch_port_settings_list(&opctx, &paginated_by) - .await? - .into_iter() - .map(|p| p.into()) - .collect(); - - Ok(HttpResponseOk(ScanByNameOrId::results_page( - &query, - settings, - &marker_for_name_or_id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn networking_switch_port_clear_settings( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let port = path_params.into_inner().port; + let query = query_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + nexus.switch_port_clear_settings(&opctx, &port, &query).await?; + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Get information about switch port -#[endpoint { - method = GET, - path = "/v1/system/networking/switch-port-settings/{port}", - tags = ["system/networking"], -}] -async fn networking_switch_port_settings_view( - rqctx: RequestContext, - path_params: Path, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let query = path_params.into_inner().port; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let settings = nexus.switch_port_settings_get(&opctx, &query).await?; - Ok(HttpResponseOk(settings.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn networking_bgp_config_create( + rqctx: RequestContext, + config: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let config = config.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let result = nexus.bgp_config_create(&opctx, &config).await?; + Ok(HttpResponseCreated::(result.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// List switch ports -#[endpoint { - method = GET, - path = "/v1/system/hardware/switch-port", - tags = ["system/hardware"], -}] -async fn networking_switch_port_list( - rqctx: RequestContext, - query_params: Query>, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let pagparams = data_page_params_for(&rqctx, &query)?; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let addrs = nexus - .switch_port_list(&opctx, &pagparams) - .await? - .into_iter() - .map(|p| p.into()) - .collect(); - - Ok(HttpResponseOk(ScanById::results_page( - &query, - addrs, - &|_, x: &SwitchPort| x.id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn networking_bgp_config_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let configs = nexus + .bgp_config_list(&opctx, &paginated_by) + .await? + .into_iter() + .map(|p| p.into()) + .collect(); + + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + configs, + &marker_for_name_or_id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Get switch port status -#[endpoint { - method = GET, - path = "/v1/system/hardware/switch-port/{port}/status", - tags = ["system/hardware"], -}] -async fn networking_switch_port_status( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let path = path_params.into_inner(); + //TODO pagination? the normal by-name/by-id stuff does not work here + async fn networking_bgp_status( + rqctx: RequestContext, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - Ok(HttpResponseOk( - nexus - .switch_port_status(&opctx, query.switch_location, path.port) - .await?, - )) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let handler = async { + let nexus = &apictx.context.nexus; + let result = nexus.bgp_peer_status(&opctx).await?; + Ok(HttpResponseOk(result)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Apply switch port settings -#[endpoint { - method = POST, - path = "/v1/system/hardware/switch-port/{port}/settings", - tags = ["system/hardware"], -}] -async fn networking_switch_port_apply_settings( - rqctx: RequestContext, - path_params: Path, - query_params: Query, - settings_body: TypedBody, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let port = path_params.into_inner().port; - let query = query_params.into_inner(); - let settings = settings_body.into_inner(); + //TODO pagination? the normal by-name/by-id stuff does not work here + async fn networking_bgp_exported( + rqctx: RequestContext, + ) -> Result, HttpError> { + let apictx = rqctx.context(); let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - nexus - .switch_port_apply_settings(&opctx, &port, &query, &settings) - .await?; - Ok(HttpResponseUpdatedNoContent {}) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let handler = async { + let nexus = &apictx.context.nexus; + let result = nexus.bgp_exported(&opctx).await?; + Ok(HttpResponseOk(result)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Clear switch port settings -#[endpoint { - method = DELETE, - path = "/v1/system/hardware/switch-port/{port}/settings", - tags = ["system/hardware"], -}] -async fn networking_switch_port_clear_settings( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let port = path_params.into_inner().port; - let query = query_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - nexus.switch_port_clear_settings(&opctx, &port, &query).await?; - Ok(HttpResponseUpdatedNoContent {}) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn networking_bgp_message_history( + rqctx: RequestContext, + query_params: Query, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let handler = async { + let nexus = &apictx.context.nexus; + let sel = query_params.into_inner(); + let result = nexus.bgp_message_history(&opctx, &sel).await?; + Ok(HttpResponseOk(AggregateBgpMessageHistory::new(result))) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Create new BGP configuration -#[endpoint { - method = POST, - path = "/v1/system/networking/bgp", - tags = ["system/networking"], -}] -async fn networking_bgp_config_create( - rqctx: RequestContext, - config: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let config = config.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let result = nexus.bgp_config_set(&opctx, &config).await?; - Ok(HttpResponseCreated::(result.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + //TODO pagination? the normal by-name/by-id stuff does not work here + async fn networking_bgp_imported_routes_ipv4( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let handler = async { + let nexus = &apictx.context.nexus; + let sel = query_params.into_inner(); + let result = nexus.bgp_imported_routes_ipv4(&opctx, &sel).await?; + Ok(HttpResponseOk(result)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// List BGP configurations -#[endpoint { - method = GET, - path = "/v1/system/networking/bgp", - tags = ["system/networking"], -}] -async fn networking_bgp_config_list( - rqctx: RequestContext, - query_params: Query>, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let pag_params = data_page_params_for(&rqctx, &query)?; - let scan_params = ScanByNameOrId::from_query(&query)?; - let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let configs = nexus - .bgp_config_list(&opctx, &paginated_by) - .await? - .into_iter() - .map(|p| p.into()) - .collect(); - - Ok(HttpResponseOk(ScanByNameOrId::results_page( - &query, - configs, - &marker_for_name_or_id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn networking_bgp_config_delete( + rqctx: RequestContext, + sel: Query, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let sel = sel.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + nexus.bgp_config_delete(&opctx, &sel).await?; + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -//TODO pagination? the normal by-name/by-id stuff does not work here -/// Get BGP peer status -#[endpoint { - method = GET, - path = "/v1/system/networking/bgp-status", - tags = ["system/networking"], -}] -async fn networking_bgp_status( - rqctx: RequestContext, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let handler = async { - let nexus = &apictx.context.nexus; - let result = nexus.bgp_peer_status(&opctx).await?; - Ok(HttpResponseOk(result)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn networking_bgp_announce_set_update( + rqctx: RequestContext, + config: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let config = config.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let result = nexus.bgp_update_announce_set(&opctx, &config).await?; + Ok(HttpResponseCreated::(result.0.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Get BGP router message history -#[endpoint { - method = GET, - path = "/v1/system/networking/bgp-message-history", - tags = ["system/networking"], -}] -async fn networking_bgp_message_history( - rqctx: RequestContext, - query_params: Query, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let handler = async { - let nexus = &apictx.context.nexus; - let sel = query_params.into_inner(); - let result = nexus.bgp_message_history(&opctx, &sel).await?; - Ok(HttpResponseOk(AggregateBgpMessageHistory::new(result))) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn networking_bgp_announce_set_list( + rqctx: RequestContext, + query_params: Query< + PaginatedByNameOrId, + >, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let result = nexus + .bgp_announce_set_list(&opctx, &paginated_by) + .await? + .into_iter() + .map(|p| p.into()) + .collect(); + Ok(HttpResponseOk(result)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -//TODO pagination? the normal by-name/by-id stuff does not work here -/// Get imported IPv4 BGP routes -#[endpoint { - method = GET, - path = "/v1/system/networking/bgp-routes-ipv4", - tags = ["system/networking"], -}] -async fn networking_bgp_imported_routes_ipv4( - rqctx: RequestContext, - query_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let handler = async { - let nexus = &apictx.context.nexus; - let sel = query_params.into_inner(); - let result = nexus.bgp_imported_routes_ipv4(&opctx, &sel).await?; - Ok(HttpResponseOk(result)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn networking_bgp_announce_set_delete( + rqctx: RequestContext, + path_params: Path, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let sel = path_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + nexus.bgp_delete_announce_set(&opctx, &sel).await?; + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Delete BGP configuration -#[endpoint { - method = DELETE, - path = "/v1/system/networking/bgp", - tags = ["system/networking"], -}] -async fn networking_bgp_config_delete( - rqctx: RequestContext, - sel: Query, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let sel = sel.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - nexus.bgp_config_delete(&opctx, &sel).await?; - Ok(HttpResponseUpdatedNoContent {}) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn networking_bgp_announcement_list( + rqctx: RequestContext, + path_params: Path, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let sel = path_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + + let result = nexus + .bgp_announcement_list(&opctx, &sel) + .await? + .into_iter() + .map(|p| p.into()) + .collect(); -/// Update BGP announce set -/// -/// If the announce set exists, this endpoint replaces the existing announce -/// set with the one specified. -#[endpoint { - method = PUT, - path = "/v1/system/networking/bgp-announce", - tags = ["system/networking"], -}] -async fn networking_bgp_announce_set_update( - rqctx: RequestContext, - config: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let config = config.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let result = nexus.bgp_update_announce_set(&opctx, &config).await?; - Ok(HttpResponseCreated::(result.0.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + Ok(HttpResponseOk(result)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -//TODO pagination? the normal by-name/by-id stuff does not work here -/// Get originated routes for a BGP configuration -#[endpoint { - method = GET, - path = "/v1/system/networking/bgp-announce", - tags = ["system/networking"], -}] -async fn networking_bgp_announce_set_list( - rqctx: RequestContext, - query_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let sel = query_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let result = nexus - .bgp_announce_list(&opctx, &sel) - .await? - .into_iter() - .map(|p| p.into()) - .collect(); - Ok(HttpResponseOk(result)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn networking_bfd_enable( + rqctx: RequestContext, + session: TypedBody, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + nexus.bfd_enable(&opctx, session.into_inner()).await?; + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Delete BGP announce set -#[endpoint { - method = DELETE, - path = "/v1/system/networking/bgp-announce", - tags = ["system/networking"], -}] -async fn networking_bgp_announce_set_delete( - rqctx: RequestContext, - selector: Query, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let sel = selector.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - nexus.bgp_delete_announce_set(&opctx, &sel).await?; - Ok(HttpResponseUpdatedNoContent {}) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn networking_bfd_disable( + rqctx: RequestContext, + session: TypedBody, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + nexus.bfd_disable(&opctx, session.into_inner()).await?; + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Enable a BFD session -#[endpoint { - method = POST, - path = "/v1/system/networking/bfd-enable", - tags = ["system/networking"], -}] -async fn networking_bfd_enable( - rqctx: RequestContext, - session: TypedBody, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; - nexus.bfd_enable(&opctx, session.into_inner()).await?; - Ok(HttpResponseUpdatedNoContent {}) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn networking_bfd_status( + rqctx: RequestContext, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + let status = nexus.bfd_status(&opctx).await?; + Ok(HttpResponseOk(status)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Disable a BFD session -#[endpoint { - method = POST, - path = "/v1/system/networking/bfd-disable", - tags = ["system/networking"], -}] -async fn networking_bfd_disable( - rqctx: RequestContext, - session: TypedBody, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; - nexus.bfd_disable(&opctx, session.into_inner()).await?; - Ok(HttpResponseUpdatedNoContent {}) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn networking_allow_list_view( + rqctx: RequestContext, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + nexus + .allow_list_view(&opctx) + .await + .map(HttpResponseOk) + .map_err(HttpError::from) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Get BFD status -#[endpoint { - method = GET, - path = "/v1/system/networking/bfd-status", - tags = ["system/networking"], -}] -async fn networking_bfd_status( - rqctx: RequestContext, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; - let status = nexus.bfd_status(&opctx).await?; - Ok(HttpResponseOk(status)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn networking_allow_list_update( + rqctx: RequestContext, + params: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let server_kind = apictx.kind; + let params = params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let remote_addr = rqctx.request.remote_addr().ip(); + nexus + .allow_list_upsert(&opctx, remote_addr, server_kind, params) + .await + .map(HttpResponseOk) + .map_err(HttpError::from) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Get user-facing services IP allowlist -#[endpoint { - method = GET, - path = "/v1/system/networking/allow-list", - tags = ["system/networking"], -}] -async fn networking_allow_list_view( - rqctx: RequestContext, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - nexus - .allow_list_view(&opctx) - .await - .map(HttpResponseOk) - .map_err(HttpError::from) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + // Images + + async fn image_list( + rqctx: RequestContext, + query_params: Query< + PaginatedByNameOrId, + >, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let parent_lookup = match scan_params.selector.project.clone() { + Some(project) => { + let project_lookup = nexus.project_lookup( + &opctx, + params::ProjectSelector { project }, + )?; + ImageParentLookup::Project(project_lookup) + } + None => { + let silo_lookup = nexus.current_silo_lookup(&opctx)?; + ImageParentLookup::Silo(silo_lookup) + } + }; + let images = nexus + .image_list(&opctx, &parent_lookup, &paginated_by) + .await? + .into_iter() + .map(|d| d.into()) + .collect(); + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + images, + &marker_for_name_or_id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Update user-facing services IP allowlist -#[endpoint { - method = PUT, - path = "/v1/system/networking/allow-list", - tags = ["system/networking"], -}] -async fn networking_allow_list_update( - rqctx: RequestContext, - params: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let server_kind = apictx.kind; - let params = params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let remote_addr = rqctx.request.remote_addr().ip(); - nexus - .allow_list_upsert(&opctx, remote_addr, server_kind, params) - .await - .map(HttpResponseOk) - .map_err(HttpError::from) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn image_create( + rqctx: RequestContext, + query_params: Query, + new_image: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let params = &new_image.into_inner(); + let parent_lookup = match query.project.clone() { + Some(project) => { + let project_lookup = nexus.project_lookup( + &opctx, + params::ProjectSelector { project }, + )?; + ImageParentLookup::Project(project_lookup) + } + None => { + let silo_lookup = nexus.current_silo_lookup(&opctx)?; + ImageParentLookup::Silo(silo_lookup) + } + }; + let image = + nexus.image_create(&opctx, &parent_lookup, ¶ms).await?; + Ok(HttpResponseCreated(image.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -// Images - -/// List images -/// -/// List images which are global or scoped to the specified project. The images -/// are returned sorted by creation date, with the most recent images appearing first. -#[endpoint { - method = GET, - path = "/v1/images", - tags = ["images"], -}] -async fn image_list( - rqctx: RequestContext, - query_params: Query>, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let pag_params = data_page_params_for(&rqctx, &query)?; - let scan_params = ScanByNameOrId::from_query(&query)?; - let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; - let parent_lookup = match scan_params.selector.project.clone() { - Some(project) => { - let project_lookup = nexus.project_lookup( + async fn image_view( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let image: nexus_db_model::Image = match nexus + .image_lookup( &opctx, - params::ProjectSelector { project }, - )?; - ImageParentLookup::Project(project_lookup) - } - None => { - let silo_lookup = nexus.current_silo_lookup(&opctx)?; - ImageParentLookup::Silo(silo_lookup) - } + params::ImageSelector { + image: path.image, + project: query.project, + }, + ) + .await? + { + ImageLookup::ProjectImage(image) => { + let (.., db_image) = image.fetch().await?; + db_image.into() + } + ImageLookup::SiloImage(image) => { + let (.., db_image) = image.fetch().await?; + db_image.into() + } + }; + Ok(HttpResponseOk(image.into())) }; - let images = nexus - .image_list(&opctx, &parent_lookup, &paginated_by) - .await? - .into_iter() - .map(|d| d.into()) - .collect(); - Ok(HttpResponseOk(ScanByNameOrId::results_page( - &query, - images, - &marker_for_name_or_id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Create image -/// -/// Create a new image in a project. -#[endpoint { - method = POST, - path = "/v1/images", - tags = ["images"] -}] -async fn image_create( - rqctx: RequestContext, - query_params: Query, - new_image: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let params = &new_image.into_inner(); - let parent_lookup = match query.project.clone() { - Some(project) => { - let project_lookup = nexus.project_lookup( + async fn image_delete( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let image_lookup = nexus + .image_lookup( &opctx, - params::ProjectSelector { project }, - )?; - ImageParentLookup::Project(project_lookup) - } - None => { - let silo_lookup = nexus.current_silo_lookup(&opctx)?; - ImageParentLookup::Silo(silo_lookup) - } + params::ImageSelector { + image: path.image, + project: query.project, + }, + ) + .await?; + nexus.image_delete(&opctx, &image_lookup).await?; + Ok(HttpResponseDeleted()) }; - let image = nexus.image_create(&opctx, &parent_lookup, ¶ms).await?; - Ok(HttpResponseCreated(image.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch image -/// -/// Fetch the details for a specific image in a project. -#[endpoint { - method = GET, - path = "/v1/images/{image}", - tags = ["images"], -}] -async fn image_view( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let image: nexus_db_model::Image = match nexus - .image_lookup( - &opctx, - params::ImageSelector { - image: path.image, - project: query.project, - }, - ) - .await? - { - ImageLookup::ProjectImage(image) => { - let (.., db_image) = image.fetch().await?; - db_image.into() - } - ImageLookup::SiloImage(image) => { - let (.., db_image) = image.fetch().await?; - db_image.into() - } + async fn image_promote( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let image_lookup = nexus + .image_lookup( + &opctx, + params::ImageSelector { + image: path.image, + project: query.project, + }, + ) + .await?; + let image = nexus.image_promote(&opctx, &image_lookup).await?; + Ok(HttpResponseAccepted(image.into())) }; - Ok(HttpResponseOk(image.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Delete image -/// -/// Permanently delete an image from a project. This operation cannot be undone. -/// Any instances in the project using the image will continue to run, however -/// new instances can not be created with this image. -#[endpoint { - method = DELETE, - path = "/v1/images/{image}", - tags = ["images"], -}] -async fn image_delete( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let image_lookup = nexus - .image_lookup( - &opctx, - params::ImageSelector { - image: path.image, - project: query.project, - }, - ) - .await?; - nexus.image_delete(&opctx, &image_lookup).await?; - Ok(HttpResponseDeleted()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn image_demote( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let image_lookup = nexus + .image_lookup( + &opctx, + params::ImageSelector { image: path.image, project: None }, + ) + .await?; -/// Promote project image -/// -/// Promote project image to be visible to all projects in the silo -#[endpoint { - method = POST, - path = "/v1/images/{image}/promote", - tags = ["images"] -}] -async fn image_promote( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let image_lookup = nexus - .image_lookup( - &opctx, - params::ImageSelector { - image: path.image, - project: query.project, - }, - ) - .await?; - let image = nexus.image_promote(&opctx, &image_lookup).await?; - Ok(HttpResponseAccepted(image.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} - -/// Demote silo image -/// -/// Demote silo image to be visible only to a specified project -#[endpoint { - method = POST, - path = "/v1/images/{image}/demote", - tags = ["images"] -}] -async fn image_demote( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let image_lookup = nexus - .image_lookup( - &opctx, - params::ImageSelector { image: path.image, project: None }, - ) - .await?; - - let project_lookup = nexus.project_lookup(&opctx, query)?; - - let image = - nexus.image_demote(&opctx, &image_lookup, &project_lookup).await?; - Ok(HttpResponseAccepted(image.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} - -/// List network interfaces -#[endpoint { - method = GET, - path = "/v1/network-interfaces", - tags = ["instances"], -}] -async fn instance_network_interface_list( - rqctx: RequestContext, - query_params: Query>, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let pag_params = data_page_params_for(&rqctx, &query)?; - let scan_params = ScanByNameOrId::from_query(&query)?; - let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; - let instance_lookup = - nexus.instance_lookup(&opctx, scan_params.selector.clone())?; - let interfaces = nexus - .instance_network_interface_list( - &opctx, - &instance_lookup, - &paginated_by, - ) - .await? - .into_iter() - .map(|d| d.into()) - .collect(); - Ok(HttpResponseOk(ScanByNameOrId::results_page( - &query, - interfaces, - &marker_for_name_or_id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let project_lookup = nexus.project_lookup(&opctx, query)?; -/// Create network interface -#[endpoint { - method = POST, - path = "/v1/network-interfaces", - tags = ["instances"], -}] -async fn instance_network_interface_create( - rqctx: RequestContext, - query_params: Query, - interface_params: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let instance_lookup = nexus.instance_lookup(&opctx, query)?; - let iface = nexus - .network_interface_create( - &opctx, - &instance_lookup, - &interface_params.into_inner(), - ) - .await?; - Ok(HttpResponseCreated(iface.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let image = nexus + .image_demote(&opctx, &image_lookup, &project_lookup) + .await?; + Ok(HttpResponseAccepted(image.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Delete network interface -/// -/// Note that the primary interface for an instance cannot be deleted if there -/// are any secondary interfaces. A new primary interface must be designated -/// first. The primary interface can be deleted if there are no secondary -/// interfaces. -#[endpoint { - method = DELETE, - path = "/v1/network-interfaces/{interface}", - tags = ["instances"], -}] -async fn instance_network_interface_delete( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let interface_selector = params::InstanceNetworkInterfaceSelector { - project: query.project, - instance: query.instance, - network_interface: path.interface, - }; - let interface_lookup = nexus - .instance_network_interface_lookup(&opctx, interface_selector)?; - nexus - .instance_network_interface_delete(&opctx, &interface_lookup) - .await?; - Ok(HttpResponseDeleted()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn instance_network_interface_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError> + { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let instance_lookup = + nexus.instance_lookup(&opctx, scan_params.selector.clone())?; + let interfaces = nexus + .instance_network_interface_list( + &opctx, + &instance_lookup, + &paginated_by, + ) + .await? + .into_iter() + .map(|d| d.into()) + .collect(); + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + interfaces, + &marker_for_name_or_id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch network interface -#[endpoint { - method = GET, - path = "/v1/network-interfaces/{interface}", - tags = ["instances"], -}] -async fn instance_network_interface_view( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let interface_selector = params::InstanceNetworkInterfaceSelector { - project: query.project, - instance: query.instance, - network_interface: path.interface, - }; - let (.., interface) = nexus - .instance_network_interface_lookup(&opctx, interface_selector)? - .fetch() - .await?; - Ok(HttpResponseOk(interface.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn instance_network_interface_create( + rqctx: RequestContext, + query_params: Query, + interface_params: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let instance_lookup = nexus.instance_lookup(&opctx, query)?; + let iface = nexus + .network_interface_create( + &opctx, + &instance_lookup, + &interface_params.into_inner(), + ) + .await?; + Ok(HttpResponseCreated(iface.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Update network interface -#[endpoint { - method = PUT, - path = "/v1/network-interfaces/{interface}", - tags = ["instances"], -}] -async fn instance_network_interface_update( - rqctx: RequestContext, - path_params: Path, - query_params: Query, - updated_iface: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let updated_iface = updated_iface.into_inner(); - let network_interface_selector = - params::InstanceNetworkInterfaceSelector { + async fn instance_network_interface_delete( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let interface_selector = params::InstanceNetworkInterfaceSelector { project: query.project, instance: query.instance, network_interface: path.interface, }; - let network_interface_lookup = nexus - .instance_network_interface_lookup( + let interface_lookup = nexus.instance_network_interface_lookup( &opctx, - network_interface_selector, + interface_selector, )?; - let interface = nexus - .instance_network_interface_update( - &opctx, - &network_interface_lookup, - updated_iface, - ) - .await?; - Ok(HttpResponseOk(InstanceNetworkInterface::from(interface))) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + nexus + .instance_network_interface_delete(&opctx, &interface_lookup) + .await?; + Ok(HttpResponseDeleted()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -// External IP addresses for instances - -/// List external IP addresses -#[endpoint { - method = GET, - path = "/v1/instances/{instance}/external-ips", - tags = ["instances"], -}] -async fn instance_external_ip_list( - rqctx: RequestContext, - query_params: Query, - path_params: Path, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let instance_selector = params::InstanceSelector { - project: query.project, - instance: path.instance, + async fn instance_network_interface_view( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let interface_selector = params::InstanceNetworkInterfaceSelector { + project: query.project, + instance: query.instance, + network_interface: path.interface, + }; + let (.., interface) = nexus + .instance_network_interface_lookup(&opctx, interface_selector)? + .fetch() + .await?; + Ok(HttpResponseOk(interface.into())) }; - let instance_lookup = - nexus.instance_lookup(&opctx, instance_selector)?; - let ips = - nexus.instance_list_external_ips(&opctx, &instance_lookup).await?; - Ok(HttpResponseOk(ResultsPage { items: ips, next_page: None })) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Allocate and attach ephemeral IP to instance -#[endpoint { - method = POST, - path = "/v1/instances/{instance}/external-ips/ephemeral", - tags = ["instances"], -}] -async fn instance_ephemeral_ip_attach( - rqctx: RequestContext, - path_params: Path, - query_params: Query, - ip_to_create: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let instance_selector = params::InstanceSelector { - project: query.project, - instance: path.instance, + async fn instance_network_interface_update( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + updated_iface: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let updated_iface = updated_iface.into_inner(); + let network_interface_selector = + params::InstanceNetworkInterfaceSelector { + project: query.project, + instance: query.instance, + network_interface: path.interface, + }; + let network_interface_lookup = nexus + .instance_network_interface_lookup( + &opctx, + network_interface_selector, + )?; + let interface = nexus + .instance_network_interface_update( + &opctx, + &network_interface_lookup, + updated_iface, + ) + .await?; + Ok(HttpResponseOk(InstanceNetworkInterface::from(interface))) }; - let instance_lookup = - nexus.instance_lookup(&opctx, instance_selector)?; - let ip = nexus - .instance_attach_ephemeral_ip( - &opctx, - &instance_lookup, - ip_to_create.into_inner().pool, - ) - .await?; - Ok(HttpResponseAccepted(ip)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Detach and deallocate ephemeral IP from instance -#[endpoint { - method = DELETE, - path = "/v1/instances/{instance}/external-ips/ephemeral", - tags = ["instances"], -}] -async fn instance_ephemeral_ip_detach( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let instance_selector = params::InstanceSelector { - project: query.project, - instance: path.instance, + // External IP addresses for instances + + async fn instance_external_ip_list( + rqctx: RequestContext, + query_params: Query, + path_params: Path, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let instance_selector = params::InstanceSelector { + project: query.project, + instance: path.instance, + }; + let instance_lookup = + nexus.instance_lookup(&opctx, instance_selector)?; + let ips = nexus + .instance_list_external_ips(&opctx, &instance_lookup) + .await?; + Ok(HttpResponseOk(ResultsPage { items: ips, next_page: None })) }; - let instance_lookup = - nexus.instance_lookup(&opctx, instance_selector)?; - nexus - .instance_detach_external_ip( - &opctx, - &instance_lookup, - ¶ms::ExternalIpDetach::Ephemeral, - ) - .await?; - Ok(HttpResponseDeleted()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -// Snapshots - -/// List snapshots -#[endpoint { - method = GET, - path = "/v1/snapshots", - tags = ["snapshots"], -}] -async fn snapshot_list( - rqctx: RequestContext, - query_params: Query>, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let pag_params = data_page_params_for(&rqctx, &query)?; - let scan_params = ScanByNameOrId::from_query(&query)?; - let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; - let project_lookup = - nexus.project_lookup(&opctx, scan_params.selector.clone())?; - let snapshots = nexus - .snapshot_list(&opctx, &project_lookup, &paginated_by) - .await? - .into_iter() - .map(|d| d.into()) - .collect(); - Ok(HttpResponseOk(ScanByNameOrId::results_page( - &query, - snapshots, - &marker_for_name_or_id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn instance_ephemeral_ip_attach( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ip_to_create: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let instance_selector = params::InstanceSelector { + project: query.project, + instance: path.instance, + }; + let instance_lookup = + nexus.instance_lookup(&opctx, instance_selector)?; + let ip = nexus + .instance_attach_ephemeral_ip( + &opctx, + &instance_lookup, + ip_to_create.into_inner().pool, + ) + .await?; + Ok(HttpResponseAccepted(ip)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Create snapshot -/// -/// Creates a point-in-time snapshot from a disk. -#[endpoint { - method = POST, - path = "/v1/snapshots", - tags = ["snapshots"], -}] -async fn snapshot_create( - rqctx: RequestContext, - query_params: Query, - new_snapshot: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let new_snapshot_params = &new_snapshot.into_inner(); - let project_lookup = nexus.project_lookup(&opctx, query)?; - let snapshot = nexus - .snapshot_create(&opctx, project_lookup, &new_snapshot_params) - .await?; - Ok(HttpResponseCreated(snapshot.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn instance_ephemeral_ip_detach( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let instance_selector = params::InstanceSelector { + project: query.project, + instance: path.instance, + }; + let instance_lookup = + nexus.instance_lookup(&opctx, instance_selector)?; + nexus + .instance_detach_external_ip( + &opctx, + &instance_lookup, + ¶ms::ExternalIpDetach::Ephemeral, + ) + .await?; + Ok(HttpResponseDeleted()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch snapshot -#[endpoint { - method = GET, - path = "/v1/snapshots/{snapshot}", - tags = ["snapshots"], -}] -async fn snapshot_view( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let snapshot_selector = params::SnapshotSelector { - project: query.project, - snapshot: path.snapshot, - }; - let (.., snapshot) = - nexus.snapshot_lookup(&opctx, snapshot_selector)?.fetch().await?; - Ok(HttpResponseOk(snapshot.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + // Snapshots + + async fn snapshot_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let project_lookup = + nexus.project_lookup(&opctx, scan_params.selector.clone())?; + let snapshots = nexus + .snapshot_list(&opctx, &project_lookup, &paginated_by) + .await? + .into_iter() + .map(|d| d.into()) + .collect(); + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + snapshots, + &marker_for_name_or_id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Delete snapshot -#[endpoint { - method = DELETE, - path = "/v1/snapshots/{snapshot}", - tags = ["snapshots"], -}] -async fn snapshot_delete( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let snapshot_selector = params::SnapshotSelector { - project: query.project, - snapshot: path.snapshot, - }; - let snapshot_lookup = - nexus.snapshot_lookup(&opctx, snapshot_selector)?; - nexus.snapshot_delete(&opctx, &snapshot_lookup).await?; - Ok(HttpResponseDeleted()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn snapshot_create( + rqctx: RequestContext, + query_params: Query, + new_snapshot: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let new_snapshot_params = &new_snapshot.into_inner(); + let project_lookup = nexus.project_lookup(&opctx, query)?; + let snapshot = nexus + .snapshot_create(&opctx, project_lookup, &new_snapshot_params) + .await?; + Ok(HttpResponseCreated(snapshot.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -// VPCs - -/// List VPCs -#[endpoint { - method = GET, - path = "/v1/vpcs", - tags = ["vpcs"], -}] -async fn vpc_list( - rqctx: RequestContext, - query_params: Query>, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let pag_params = data_page_params_for(&rqctx, &query)?; - let scan_params = ScanByNameOrId::from_query(&query)?; - let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let project_lookup = - nexus.project_lookup(&opctx, scan_params.selector.clone())?; - let vpcs = nexus - .vpc_list(&opctx, &project_lookup, &paginated_by) - .await? - .into_iter() - .map(|p| p.into()) - .collect(); - - Ok(HttpResponseOk(ScanByNameOrId::results_page( - &query, - vpcs, - &marker_for_name_or_id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn snapshot_view( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let snapshot_selector = params::SnapshotSelector { + project: query.project, + snapshot: path.snapshot, + }; + let (.., snapshot) = nexus + .snapshot_lookup(&opctx, snapshot_selector)? + .fetch() + .await?; + Ok(HttpResponseOk(snapshot.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Create VPC -#[endpoint { - method = POST, - path = "/v1/vpcs", - tags = ["vpcs"], -}] -async fn vpc_create( - rqctx: RequestContext, - query_params: Query, - body: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let new_vpc_params = body.into_inner(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let project_lookup = nexus.project_lookup(&opctx, query)?; - let vpc = nexus - .project_create_vpc(&opctx, &project_lookup, &new_vpc_params) - .await?; - Ok(HttpResponseCreated(vpc.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn snapshot_delete( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let snapshot_selector = params::SnapshotSelector { + project: query.project, + snapshot: path.snapshot, + }; + let snapshot_lookup = + nexus.snapshot_lookup(&opctx, snapshot_selector)?; + nexus.snapshot_delete(&opctx, &snapshot_lookup).await?; + Ok(HttpResponseDeleted()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch VPC -#[endpoint { - method = GET, - path = "/v1/vpcs/{vpc}", - tags = ["vpcs"], -}] -async fn vpc_view( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let vpc_selector = - params::VpcSelector { project: query.project, vpc: path.vpc }; - let (.., vpc) = nexus.vpc_lookup(&opctx, vpc_selector)?.fetch().await?; - Ok(HttpResponseOk(vpc.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + // VPCs + + async fn vpc_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let project_lookup = + nexus.project_lookup(&opctx, scan_params.selector.clone())?; + let vpcs = nexus + .vpc_list(&opctx, &project_lookup, &paginated_by) + .await? + .into_iter() + .map(|p| p.into()) + .collect(); + + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + vpcs, + &marker_for_name_or_id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Update a VPC -#[endpoint { - method = PUT, - path = "/v1/vpcs/{vpc}", - tags = ["vpcs"], -}] -async fn vpc_update( - rqctx: RequestContext, - path_params: Path, - query_params: Query, - updated_vpc: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { + async fn vpc_create( + rqctx: RequestContext, + query_params: Query, + body: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); let query = query_params.into_inner(); - let updated_vpc_params = &updated_vpc.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let vpc_selector = - params::VpcSelector { project: query.project, vpc: path.vpc }; - let vpc_lookup = nexus.vpc_lookup(&opctx, vpc_selector)?; - let vpc = nexus - .project_update_vpc(&opctx, &vpc_lookup, &updated_vpc_params) - .await?; - Ok(HttpResponseOk(vpc.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let new_vpc_params = body.into_inner(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let project_lookup = nexus.project_lookup(&opctx, query)?; + let vpc = nexus + .project_create_vpc(&opctx, &project_lookup, &new_vpc_params) + .await?; + Ok(HttpResponseCreated(vpc.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Delete VPC -#[endpoint { - method = DELETE, - path = "/v1/vpcs/{vpc}", - tags = ["vpcs"], -}] -async fn vpc_delete( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let vpc_selector = - params::VpcSelector { project: query.project, vpc: path.vpc }; - let vpc_lookup = nexus.vpc_lookup(&opctx, vpc_selector)?; - nexus.project_delete_vpc(&opctx, &vpc_lookup).await?; - Ok(HttpResponseDeleted()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn vpc_view( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let vpc_selector = + params::VpcSelector { project: query.project, vpc: path.vpc }; + let (.., vpc) = + nexus.vpc_lookup(&opctx, vpc_selector)?.fetch().await?; + Ok(HttpResponseOk(vpc.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// List subnets -#[endpoint { - method = GET, - path = "/v1/vpc-subnets", - tags = ["vpcs"], -}] -async fn vpc_subnet_list( - rqctx: RequestContext, - query_params: Query>, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let pag_params = data_page_params_for(&rqctx, &query)?; - let scan_params = ScanByNameOrId::from_query(&query)?; - let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let vpc_lookup = - nexus.vpc_lookup(&opctx, scan_params.selector.clone())?; - let subnets = nexus - .vpc_subnet_list(&opctx, &vpc_lookup, &paginated_by) - .await? - .into_iter() - .map(|vpc| vpc.into()) - .collect(); - Ok(HttpResponseOk(ScanByNameOrId::results_page( - &query, - subnets, - &marker_for_name_or_id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn vpc_update( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + updated_vpc: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let updated_vpc_params = &updated_vpc.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let vpc_selector = + params::VpcSelector { project: query.project, vpc: path.vpc }; + let vpc_lookup = nexus.vpc_lookup(&opctx, vpc_selector)?; + let vpc = nexus + .project_update_vpc(&opctx, &vpc_lookup, &updated_vpc_params) + .await?; + Ok(HttpResponseOk(vpc.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Create subnet -#[endpoint { - method = POST, - path = "/v1/vpc-subnets", - tags = ["vpcs"], -}] -async fn vpc_subnet_create( - rqctx: RequestContext, - query_params: Query, - create_params: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let create = create_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let vpc_lookup = nexus.vpc_lookup(&opctx, query)?; - let subnet = - nexus.vpc_create_subnet(&opctx, &vpc_lookup, &create).await?; - Ok(HttpResponseCreated(subnet.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn vpc_delete( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let vpc_selector = + params::VpcSelector { project: query.project, vpc: path.vpc }; + let vpc_lookup = nexus.vpc_lookup(&opctx, vpc_selector)?; + nexus.project_delete_vpc(&opctx, &vpc_lookup).await?; + Ok(HttpResponseDeleted()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch subnet -#[endpoint { - method = GET, - path = "/v1/vpc-subnets/{subnet}", - tags = ["vpcs"], -}] -async fn vpc_subnet_view( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let subnet_selector = params::SubnetSelector { - project: query.project, - vpc: query.vpc, - subnet: path.subnet, - }; - let (.., subnet) = - nexus.vpc_subnet_lookup(&opctx, subnet_selector)?.fetch().await?; - Ok(HttpResponseOk(subnet.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn vpc_subnet_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let vpc_lookup = + nexus.vpc_lookup(&opctx, scan_params.selector.clone())?; + let subnets = nexus + .vpc_subnet_list(&opctx, &vpc_lookup, &paginated_by) + .await? + .into_iter() + .map(|vpc| vpc.into()) + .collect(); + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + subnets, + &marker_for_name_or_id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Delete subnet -#[endpoint { - method = DELETE, - path = "/v1/vpc-subnets/{subnet}", - tags = ["vpcs"], -}] -async fn vpc_subnet_delete( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let subnet_selector = params::SubnetSelector { - project: query.project, - vpc: query.vpc, - subnet: path.subnet, - }; - let subnet_lookup = nexus.vpc_subnet_lookup(&opctx, subnet_selector)?; - nexus.vpc_delete_subnet(&opctx, &subnet_lookup).await?; - Ok(HttpResponseDeleted()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn vpc_subnet_create( + rqctx: RequestContext, + query_params: Query, + create_params: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let create = create_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let vpc_lookup = nexus.vpc_lookup(&opctx, query)?; + let subnet = + nexus.vpc_create_subnet(&opctx, &vpc_lookup, &create).await?; + Ok(HttpResponseCreated(subnet.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Update subnet -#[endpoint { - method = PUT, - path = "/v1/vpc-subnets/{subnet}", - tags = ["vpcs"], -}] -async fn vpc_subnet_update( - rqctx: RequestContext, - path_params: Path, - query_params: Query, - subnet_params: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let subnet_params = subnet_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let subnet_selector = params::SubnetSelector { - project: query.project, - vpc: query.vpc, - subnet: path.subnet, - }; - let subnet_lookup = nexus.vpc_subnet_lookup(&opctx, subnet_selector)?; - let subnet = nexus - .vpc_update_subnet(&opctx, &subnet_lookup, &subnet_params) - .await?; - Ok(HttpResponseOk(subnet.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn vpc_subnet_view( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let subnet_selector = params::SubnetSelector { + project: query.project, + vpc: query.vpc, + subnet: path.subnet, + }; + let (.., subnet) = nexus + .vpc_subnet_lookup(&opctx, subnet_selector)? + .fetch() + .await?; + Ok(HttpResponseOk(subnet.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -// This endpoint is likely temporary. We would rather list all IPs allocated in -// a subnet whether they come from NICs or something else. See -// https://github.com/oxidecomputer/omicron/issues/2476 - -/// List network interfaces -#[endpoint { - method = GET, - path = "/v1/vpc-subnets/{subnet}/network-interfaces", - tags = ["vpcs"], -}] -async fn vpc_subnet_list_network_interfaces( - rqctx: RequestContext, - path_params: Path, - query_params: Query>, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let path = path_params.into_inner(); - let pag_params = data_page_params_for(&rqctx, &query)?; - let scan_params = ScanByNameOrId::from_query(&query)?; - let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let subnet_selector = params::SubnetSelector { - project: scan_params.selector.project.clone(), - vpc: scan_params.selector.vpc.clone(), - subnet: path.subnet, - }; - let subnet_lookup = nexus.vpc_subnet_lookup(&opctx, subnet_selector)?; - let interfaces = nexus - .subnet_list_instance_network_interfaces( - &opctx, - &subnet_lookup, - &paginated_by, - ) - .await? - .into_iter() - .map(|interfaces| interfaces.into()) - .collect(); - Ok(HttpResponseOk(ScanByNameOrId::results_page( - &query, - interfaces, - &marker_for_name_or_id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn vpc_subnet_delete( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let subnet_selector = params::SubnetSelector { + project: query.project, + vpc: query.vpc, + subnet: path.subnet, + }; + let subnet_lookup = + nexus.vpc_subnet_lookup(&opctx, subnet_selector)?; + nexus.vpc_delete_subnet(&opctx, &subnet_lookup).await?; + Ok(HttpResponseDeleted()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -// VPC Firewalls - -/// List firewall rules -#[endpoint { - method = GET, - path = "/v1/vpc-firewall-rules", - tags = ["vpcs"], -}] -async fn vpc_firewall_rules_view( - rqctx: RequestContext, - query_params: Query, -) -> Result, HttpError> { - // TODO: Check If-Match and fail if the ETag doesn't match anymore. - // Without this check, if firewall rules change while someone is listing - // the rules, they will see a mix of the old and new rules. - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let vpc_lookup = nexus.vpc_lookup(&opctx, query)?; - let rules = nexus.vpc_list_firewall_rules(&opctx, &vpc_lookup).await?; - Ok(HttpResponseOk(VpcFirewallRules { - rules: rules.into_iter().map(|rule| rule.into()).collect(), - })) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn vpc_subnet_update( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + subnet_params: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let subnet_params = subnet_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let subnet_selector = params::SubnetSelector { + project: query.project, + vpc: query.vpc, + subnet: path.subnet, + }; + let subnet_lookup = + nexus.vpc_subnet_lookup(&opctx, subnet_selector)?; + let subnet = nexus + .vpc_update_subnet(&opctx, &subnet_lookup, &subnet_params) + .await?; + Ok(HttpResponseOk(subnet.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -// Note: the limits in the below comment come from the firewall rules model -// file, nexus/db-model/src/vpc_firewall_rule.rs. - -/// Replace firewall rules -/// -/// The maximum number of rules per VPC is 1024. -/// -/// Targets are used to specify the set of instances to which a firewall rule -/// applies. You can target instances directly by name, or specify a VPC, VPC -/// subnet, IP, or IP subnet, which will apply the rule to traffic going to -/// all matching instances. Targets are additive: the rule applies to instances -/// matching ANY target. The maximum number of targets is 256. -/// -/// Filters reduce the scope of a firewall rule. Without filters, the rule -/// applies to all packets to the targets (or from the targets, if it's an -/// outbound rule). With multiple filters, the rule applies only to packets -/// matching ALL filters. The maximum number of each type of filter is 256. -#[endpoint { - method = PUT, - path = "/v1/vpc-firewall-rules", - tags = ["vpcs"], -}] -async fn vpc_firewall_rules_update( - rqctx: RequestContext, - query_params: Query, - router_params: TypedBody, -) -> Result, HttpError> { - // TODO: Check If-Match and fail if the ETag doesn't match anymore. - // TODO: limit size of the ruleset because the GET endpoint is not paginated - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let router_params = router_params.into_inner(); - let vpc_lookup = nexus.vpc_lookup(&opctx, query)?; - let rules = nexus - .vpc_update_firewall_rules(&opctx, &vpc_lookup, &router_params) - .await?; - Ok(HttpResponseOk(VpcFirewallRules { - rules: rules.into_iter().map(|rule| rule.into()).collect(), - })) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + // This endpoint is likely temporary. We would rather list all IPs allocated in + // a subnet whether they come from NICs or something else. See + // https://github.com/oxidecomputer/omicron/issues/2476 -// VPC Routers - -/// List routers -#[endpoint { - method = GET, - path = "/v1/vpc-routers", - tags = ["vpcs"], -}] -async fn vpc_router_list( - rqctx: RequestContext, - query_params: Query>, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let pag_params = data_page_params_for(&rqctx, &query)?; - let scan_params = ScanByNameOrId::from_query(&query)?; - let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; - let vpc_lookup = - nexus.vpc_lookup(&opctx, scan_params.selector.clone())?; - let routers = nexus - .vpc_router_list(&opctx, &vpc_lookup, &paginated_by) - .await? - .into_iter() - .map(|s| s.into()) - .collect(); - Ok(HttpResponseOk(ScanByNameOrId::results_page( - &query, - routers, - &marker_for_name_or_id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn vpc_subnet_list_network_interfaces( + rqctx: RequestContext, + path_params: Path, + query_params: Query>, + ) -> Result>, HttpError> + { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let path = path_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let subnet_selector = params::SubnetSelector { + project: scan_params.selector.project.clone(), + vpc: scan_params.selector.vpc.clone(), + subnet: path.subnet, + }; + let subnet_lookup = + nexus.vpc_subnet_lookup(&opctx, subnet_selector)?; + let interfaces = nexus + .subnet_list_instance_network_interfaces( + &opctx, + &subnet_lookup, + &paginated_by, + ) + .await? + .into_iter() + .map(|interfaces| interfaces.into()) + .collect(); + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + interfaces, + &marker_for_name_or_id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch router -#[endpoint { - method = GET, - path = "/v1/vpc-routers/{router}", - tags = ["vpcs"], -}] -async fn vpc_router_view( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let router_selector = params::RouterSelector { - project: query.project, - vpc: query.vpc, - router: path.router, - }; - let (.., vpc_router) = - nexus.vpc_router_lookup(&opctx, router_selector)?.fetch().await?; - Ok(HttpResponseOk(vpc_router.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + // VPC Firewalls + + async fn vpc_firewall_rules_view( + rqctx: RequestContext, + query_params: Query, + ) -> Result, HttpError> { + // TODO: Check If-Match and fail if the ETag doesn't match anymore. + // Without this check, if firewall rules change while someone is listing + // the rules, they will see a mix of the old and new rules. + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let vpc_lookup = nexus.vpc_lookup(&opctx, query)?; + let rules = + nexus.vpc_list_firewall_rules(&opctx, &vpc_lookup).await?; + Ok(HttpResponseOk(VpcFirewallRules { + rules: rules.into_iter().map(|rule| rule.into()).collect(), + })) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Create VPC router -#[endpoint { - method = POST, - path = "/v1/vpc-routers", - tags = ["vpcs"], -}] -async fn vpc_router_create( - rqctx: RequestContext, - query_params: Query, - create_params: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let create = create_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let vpc_lookup = nexus.vpc_lookup(&opctx, query)?; - let router = nexus - .vpc_create_router( - &opctx, - &vpc_lookup, - &db::model::VpcRouterKind::Custom, - &create, - ) - .await?; - Ok(HttpResponseCreated(router.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + // Note: the limits in the below comment come from the firewall rules model + // file, nexus/db-model/src/vpc_firewall_rule.rs. + + async fn vpc_firewall_rules_update( + rqctx: RequestContext, + query_params: Query, + router_params: TypedBody, + ) -> Result, HttpError> { + // TODO: Check If-Match and fail if the ETag doesn't match anymore. + // TODO: limit size of the ruleset because the GET endpoint is not paginated + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let router_params = router_params.into_inner(); + let vpc_lookup = nexus.vpc_lookup(&opctx, query)?; + let rules = nexus + .vpc_update_firewall_rules(&opctx, &vpc_lookup, &router_params) + .await?; + Ok(HttpResponseOk(VpcFirewallRules { + rules: rules.into_iter().map(|rule| rule.into()).collect(), + })) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Delete router -#[endpoint { - method = DELETE, - path = "/v1/vpc-routers/{router}", - tags = ["vpcs"], -}] -async fn vpc_router_delete( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let router_selector = params::RouterSelector { - project: query.project, - vpc: query.vpc, - router: path.router, - }; - let router_lookup = nexus.vpc_router_lookup(&opctx, router_selector)?; - nexus.vpc_delete_router(&opctx, &router_lookup).await?; - Ok(HttpResponseDeleted()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + // VPC Routers + + async fn vpc_router_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let vpc_lookup = + nexus.vpc_lookup(&opctx, scan_params.selector.clone())?; + let routers = nexus + .vpc_router_list(&opctx, &vpc_lookup, &paginated_by) + .await? + .into_iter() + .map(|s| s.into()) + .collect(); + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + routers, + &marker_for_name_or_id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Update router -#[endpoint { - method = PUT, - path = "/v1/vpc-routers/{router}", - tags = ["vpcs"], -}] -async fn vpc_router_update( - rqctx: RequestContext, - path_params: Path, - query_params: Query, - router_params: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let router_params = router_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let router_selector = params::RouterSelector { - project: query.project, - vpc: query.vpc, - router: path.router, - }; - let router_lookup = nexus.vpc_router_lookup(&opctx, router_selector)?; - let router = nexus - .vpc_update_router(&opctx, &router_lookup, &router_params) - .await?; - Ok(HttpResponseOk(router.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn vpc_router_view( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let router_selector = params::RouterSelector { + project: query.project, + vpc: query.vpc, + router: path.router, + }; + let (.., vpc_router) = nexus + .vpc_router_lookup(&opctx, router_selector)? + .fetch() + .await?; + Ok(HttpResponseOk(vpc_router.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// List routes -/// -/// List the routes associated with a router in a particular VPC. -#[endpoint { - method = GET, - path = "/v1/vpc-router-routes", - tags = ["vpcs"], -}] -async fn vpc_router_route_list( - rqctx: RequestContext, - query_params: Query>, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let pag_params = data_page_params_for(&rqctx, &query)?; - let scan_params = ScanByNameOrId::from_query(&query)?; - let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; - let router_lookup = - nexus.vpc_router_lookup(&opctx, scan_params.selector.clone())?; - let routes = nexus - .vpc_router_route_list(&opctx, &router_lookup, &paginated_by) - .await? - .into_iter() - .map(|route| route.into()) - .collect(); - Ok(HttpResponseOk(ScanByNameOrId::results_page( - &query, - routes, - &marker_for_name_or_id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn vpc_router_create( + rqctx: RequestContext, + query_params: Query, + create_params: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let create = create_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let vpc_lookup = nexus.vpc_lookup(&opctx, query)?; + let router = nexus + .vpc_create_router( + &opctx, + &vpc_lookup, + &db::model::VpcRouterKind::Custom, + &create, + ) + .await?; + Ok(HttpResponseCreated(router.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -// Vpc Router Routes - -/// Fetch route -#[endpoint { - method = GET, - path = "/v1/vpc-router-routes/{route}", - tags = ["vpcs"], -}] -async fn vpc_router_route_view( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let route_selector = params::RouteSelector { - project: query.project, - vpc: query.vpc, - router: Some(query.router), - route: path.route, - }; - let (.., route) = nexus - .vpc_router_route_lookup(&opctx, route_selector)? - .fetch() - .await?; - Ok(HttpResponseOk(route.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn vpc_router_delete( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let router_selector = params::RouterSelector { + project: query.project, + vpc: query.vpc, + router: path.router, + }; + let router_lookup = + nexus.vpc_router_lookup(&opctx, router_selector)?; + nexus.vpc_delete_router(&opctx, &router_lookup).await?; + Ok(HttpResponseDeleted()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Create route -#[endpoint { - method = POST, - path = "/v1/vpc-router-routes", - tags = ["vpcs"], -}] -async fn vpc_router_route_create( - rqctx: RequestContext, - query_params: Query, - create_params: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let create = create_params.into_inner(); - let router_lookup = nexus.vpc_router_lookup(&opctx, query)?; - let route = nexus - .router_create_route( - &opctx, - &router_lookup, - &RouterRouteKind::Custom, - &create, + async fn vpc_router_update( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + router_params: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let router_params = router_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let router_selector = params::RouterSelector { + project: query.project, + vpc: query.vpc, + router: path.router, + }; + let router_lookup = + nexus.vpc_router_lookup(&opctx, router_selector)?; + let router = nexus + .vpc_update_router(&opctx, &router_lookup, &router_params) + .await?; + Ok(HttpResponseOk(router.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn vpc_router_route_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let router_lookup = nexus + .vpc_router_lookup(&opctx, scan_params.selector.clone())?; + let routes = nexus + .vpc_router_route_list(&opctx, &router_lookup, &paginated_by) + .await? + .into_iter() + .map(|route| route.into()) + .collect(); + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + routes, + &marker_for_name_or_id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + // Vpc Router Routes + + async fn vpc_router_route_view( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let route_selector = params::RouteSelector { + project: query.project, + vpc: query.vpc, + router: Some(query.router), + route: path.route, + }; + let (.., route) = nexus + .vpc_router_route_lookup(&opctx, route_selector)? + .fetch() + .await?; + Ok(HttpResponseOk(route.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn vpc_router_route_create( + rqctx: RequestContext, + query_params: Query, + create_params: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let create = create_params.into_inner(); + let router_lookup = nexus.vpc_router_lookup(&opctx, query)?; + let route = nexus + .router_create_route( + &opctx, + &router_lookup, + &RouterRouteKind::Custom, + &create, + ) + .await?; + Ok(HttpResponseCreated(route.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn vpc_router_route_delete( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let route_selector = params::RouteSelector { + project: query.project, + vpc: query.vpc, + router: query.router, + route: path.route, + }; + let route_lookup = + nexus.vpc_router_route_lookup(&opctx, route_selector)?; + nexus.router_delete_route(&opctx, &route_lookup).await?; + Ok(HttpResponseDeleted()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn vpc_router_route_update( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + router_params: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let router_params = router_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let route_selector = params::RouteSelector { + project: query.project, + vpc: query.vpc, + router: query.router, + route: path.route, + }; + let route_lookup = + nexus.vpc_router_route_lookup(&opctx, route_selector)?; + let route = nexus + .router_update_route(&opctx, &route_lookup, &router_params) + .await?; + Ok(HttpResponseOk(route.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + // Racks + + async fn rack_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let racks = nexus + .racks_list(&opctx, &data_page_params_for(&rqctx, &query)?) + .await? + .into_iter() + .map(|r| r.into()) + .collect(); + Ok(HttpResponseOk(ScanById::results_page( + &query, + racks, + &|_, rack: &Rack| rack.identity.id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn rack_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let rack_info = nexus.rack_lookup(&opctx, &path.rack_id).await?; + Ok(HttpResponseOk(rack_info.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn sled_list_uninitialized( + rqctx: RequestContext, + query: Query>, + ) -> Result>, HttpError> + { + let apictx = rqctx.context(); + // We don't actually support real pagination + let pag_params = query.into_inner(); + if let dropshot::WhichPage::Next(last_seen) = &pag_params.page { + return Err(Error::invalid_value( + last_seen.clone(), + "bad page token", ) - .await?; - Ok(HttpResponseCreated(route.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + .into()); + } + let handler = async { + let nexus = &apictx.context.nexus; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let sleds = nexus.sled_list_uninitialized(&opctx).await?; + Ok(HttpResponseOk(ResultsPage { items: sleds, next_page: None })) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Delete route -#[endpoint { - method = DELETE, - path = "/v1/vpc-router-routes/{route}", - tags = ["vpcs"], -}] -async fn vpc_router_route_delete( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let route_selector = params::RouteSelector { - project: query.project, - vpc: query.vpc, - router: query.router, - route: path.route, - }; - let route_lookup = - nexus.vpc_router_route_lookup(&opctx, route_selector)?; - nexus.router_delete_route(&opctx, &route_lookup).await?; - Ok(HttpResponseDeleted()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn sled_add( + rqctx: RequestContext, + sled: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let nexus = &apictx.context.nexus; + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let id = nexus + .sled_add(&opctx, sled.into_inner()) + .await? + .into_untyped_uuid(); + Ok(HttpResponseCreated(views::SledId { id })) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Update route -#[endpoint { - method = PUT, - path = "/v1/vpc-router-routes/{route}", - tags = ["vpcs"], -}] -async fn vpc_router_route_update( - rqctx: RequestContext, - path_params: Path, - query_params: Query, - router_params: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let router_params = router_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let route_selector = params::RouteSelector { - project: query.project, - vpc: query.vpc, - router: query.router, - route: path.route, - }; - let route_lookup = - nexus.vpc_router_route_lookup(&opctx, route_selector)?; - let route = nexus - .router_update_route(&opctx, &route_lookup, &router_params) - .await?; - Ok(HttpResponseOk(route.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + // Sleds + + async fn sled_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let sleds = nexus + .sled_list(&opctx, &data_page_params_for(&rqctx, &query)?) + .await? + .into_iter() + .map(|s| s.into()) + .collect(); + Ok(HttpResponseOk(ScanById::results_page( + &query, + sleds, + &|_, sled: &Sled| sled.identity.id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -// Racks - -/// List racks -#[endpoint { - method = GET, - path = "/v1/system/hardware/racks", - tags = ["system/hardware"], -}] -async fn rack_list( - rqctx: RequestContext, - query_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let racks = nexus - .racks_list(&opctx, &data_page_params_for(&rqctx, &query)?) - .await? - .into_iter() - .map(|r| r.into()) - .collect(); - Ok(HttpResponseOk(ScanById::results_page( - &query, - racks, - &|_, rack: &Rack| rack.identity.id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn sled_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let (.., sled) = + nexus.sled_lookup(&opctx, &path.sled_id)?.fetch().await?; + Ok(HttpResponseOk(sled.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Path parameters for Rack requests -#[derive(Deserialize, JsonSchema)] -struct RackPathParam { - /// The rack's unique ID. - rack_id: Uuid, -} + async fn sled_set_provision_policy( + rqctx: RequestContext, + path_params: Path, + new_provision_state: TypedBody, + ) -> Result, HttpError> + { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; -/// Fetch rack -#[endpoint { - method = GET, - path = "/v1/system/hardware/racks/{rack_id}", - tags = ["system/hardware"], -}] -async fn rack_view( - rqctx: RequestContext, - path_params: Path, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let rack_info = nexus.rack_lookup(&opctx, &path.rack_id).await?; - Ok(HttpResponseOk(rack_info.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let path = path_params.into_inner(); + let new_state = new_provision_state.into_inner().state; -/// List uninitialized sleds -#[endpoint { - method = GET, - path = "/v1/system/hardware/sleds-uninitialized", - tags = ["system/hardware"] -}] -async fn sled_list_uninitialized( - rqctx: RequestContext, - query: Query>, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - // We don't actually support real pagination - let pag_params = query.into_inner(); - if let dropshot::WhichPage::Next(last_seen) = &pag_params.page { - return Err( - Error::invalid_value(last_seen.clone(), "bad page token").into() - ); - } - let handler = async { - let nexus = &apictx.context.nexus; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let sleds = nexus.sled_list_uninitialized(&opctx).await?; - Ok(HttpResponseOk(ResultsPage { items: sleds, next_page: None })) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; -/// The unique ID of a sled. -#[derive(Clone, Debug, Serialize, JsonSchema)] -pub struct SledId { - pub id: Uuid, -} + let sled_lookup = nexus.sled_lookup(&opctx, &path.sled_id)?; -/// Add sled to initialized rack -// -// TODO: In the future this should really be a PUT request, once we resolve -// https://github.com/oxidecomputer/omicron/issues/4494. It should also -// explicitly be tied to a rack via a `rack_id` path param. For now we assume -// we are only operating on single rack systems. -#[endpoint { - method = POST, - path = "/v1/system/hardware/sleds", - tags = ["system/hardware"] -}] -async fn sled_add( - rqctx: RequestContext, - sled: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let id = nexus - .sled_add(&opctx, sled.into_inner()) - .await? - .into_untyped_uuid(); - Ok(HttpResponseCreated(SledId { id })) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let old_state = nexus + .sled_set_provision_policy(&opctx, &sled_lookup, new_state) + .await?; -// Sleds - -/// List sleds -#[endpoint { - method = GET, - path = "/v1/system/hardware/sleds", - tags = ["system/hardware"], -}] -async fn sled_list( - rqctx: RequestContext, - query_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let sleds = nexus - .sled_list(&opctx, &data_page_params_for(&rqctx, &query)?) - .await? - .into_iter() - .map(|s| s.into()) - .collect(); - Ok(HttpResponseOk(ScanById::results_page( - &query, - sleds, - &|_, sled: &Sled| sled.identity.id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let response = + params::SledProvisionPolicyResponse { old_state, new_state }; -/// Fetch sled -#[endpoint { - method = GET, - path = "/v1/system/hardware/sleds/{sled_id}", - tags = ["system/hardware"], -}] -async fn sled_view( - rqctx: RequestContext, - path_params: Path, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let (.., sled) = - nexus.sled_lookup(&opctx, &path.sled_id)?.fetch().await?; - Ok(HttpResponseOk(sled.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + Ok(HttpResponseOk(response)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Set sled provision policy -#[endpoint { - method = PUT, - path = "/v1/system/hardware/sleds/{sled_id}/provision-policy", - tags = ["system/hardware"], -}] -async fn sled_set_provision_policy( - rqctx: RequestContext, - path_params: Path, - new_provision_state: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; + async fn sled_instance_list( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result>, HttpError> + { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let sled_lookup = nexus.sled_lookup(&opctx, &path.sled_id)?; + let sled_instances = nexus + .sled_instance_list( + &opctx, + &sled_lookup, + &data_page_params_for(&rqctx, &query)?, + ) + .await? + .into_iter() + .map(|s| s.into()) + .collect(); + Ok(HttpResponseOk(ScanById::results_page( + &query, + sled_instances, + &|_, sled_instance: &views::SledInstance| { + sled_instance.identity.id + }, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - let path = path_params.into_inner(); - let new_state = new_provision_state.into_inner().state; + // Physical disks + + async fn physical_disk_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let disks = nexus + .physical_disk_list( + &opctx, + &data_page_params_for(&rqctx, &query)?, + ) + .await? + .into_iter() + .map(|s| s.into()) + .collect(); + Ok(HttpResponseOk(ScanById::results_page( + &query, + disks, + &|_, disk: &PhysicalDisk| disk.identity.id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn physical_disk_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + + let (.., physical_disk) = + nexus.physical_disk_lookup(&opctx, &path)?.fetch().await?; + Ok(HttpResponseOk(physical_disk.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + // Switches + + async fn switch_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let switches = nexus + .switch_list(&opctx, &data_page_params_for(&rqctx, &query)?) + .await? + .into_iter() + .map(|s| s.into()) + .collect(); + Ok(HttpResponseOk(ScanById::results_page( + &query, + switches, + &|_, switch: &views::Switch| switch.identity.id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn switch_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let (.., switch) = nexus + .switch_lookup( + &opctx, + params::SwitchSelector { switch: path.switch_id }, + )? + .fetch() + .await?; + Ok(HttpResponseOk(switch.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn sled_physical_disk_list( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let disks = nexus + .sled_list_physical_disks( + &opctx, + path.sled_id, + &data_page_params_for(&rqctx, &query)?, + ) + .await? + .into_iter() + .map(|s| s.into()) + .collect(); + Ok(HttpResponseOk(ScanById::results_page( + &query, + disks, + &|_, disk: &PhysicalDisk| disk.identity.id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + // Metrics - let sled_lookup = nexus.sled_lookup(&opctx, &path.sled_id)?; + async fn system_metric( + rqctx: RequestContext, + path_params: Path, + pag_params: Query< + PaginationParams, + >, + other_params: Query, + ) -> Result>, HttpError> + { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let metric_name = path_params.into_inner().metric_name; + let pagination = pag_params.into_inner(); + let limit = rqctx.page_limit(&pagination)?; + + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let silo_lookup = match other_params.into_inner().silo { + Some(silo) => Some(nexus.silo_lookup(&opctx, silo)?), + _ => None, + }; - let old_state = nexus - .sled_set_provision_policy(&opctx, &sled_lookup, new_state) - .await?; + let result = nexus + .system_metric_list( + &opctx, + metric_name, + silo_lookup, + pagination, + limit, + ) + .await?; - let response = - params::SledProvisionPolicyResponse { old_state, new_state }; + Ok(HttpResponseOk(result)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - Ok(HttpResponseOk(response)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn silo_metric( + rqctx: RequestContext, + path_params: Path, + pag_params: Query< + PaginationParams, + >, + other_params: Query, + ) -> Result>, HttpError> + { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let metric_name = path_params.into_inner().metric_name; + + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let project_lookup = match other_params.into_inner().project { + Some(project) => { + let project_selector = params::ProjectSelector { project }; + Some(nexus.project_lookup(&opctx, project_selector)?) + } + _ => None, + }; -/// List instances running on given sled -#[endpoint { - method = GET, - path = "/v1/system/hardware/sleds/{sled_id}/instances", - tags = ["system/hardware"], -}] -async fn sled_instance_list( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let sled_lookup = nexus.sled_lookup(&opctx, &path.sled_id)?; - let sled_instances = nexus - .sled_instance_list( - &opctx, - &sled_lookup, - &data_page_params_for(&rqctx, &query)?, - ) - .await? - .into_iter() - .map(|s| s.into()) - .collect(); - Ok(HttpResponseOk(ScanById::results_page( - &query, - sled_instances, - &|_, sled_instance: &views::SledInstance| sled_instance.identity.id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let pagination = pag_params.into_inner(); + let limit = rqctx.page_limit(&pagination)?; -// Physical disks - -/// List physical disks -#[endpoint { - method = GET, - path = "/v1/system/hardware/disks", - tags = ["system/hardware"], -}] -async fn physical_disk_list( - rqctx: RequestContext, - query_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let disks = nexus - .physical_disk_list(&opctx, &data_page_params_for(&rqctx, &query)?) - .await? - .into_iter() - .map(|s| s.into()) - .collect(); - Ok(HttpResponseOk(ScanById::results_page( - &query, - disks, - &|_, disk: &PhysicalDisk| disk.identity.id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let result = nexus + .silo_metric_list( + &opctx, + metric_name, + project_lookup, + pagination, + limit, + ) + .await?; -/// Get a physical disk -#[endpoint { - method = GET, - path = "/v1/system/hardware/disks/{disk_id}", - tags = ["system/hardware"], -}] -async fn physical_disk_view( - rqctx: RequestContext, - path_params: Path, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + Ok(HttpResponseOk(result)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - let (.., physical_disk) = - nexus.physical_disk_lookup(&opctx, &path)?.fetch().await?; - Ok(HttpResponseOk(physical_disk.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn timeseries_schema_list( + rqctx: RequestContext, + pag_params: Query, + ) -> Result< + HttpResponseOk>, + HttpError, + > { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let pagination = pag_params.into_inner(); + let limit = rqctx.page_limit(&pagination)?; + nexus + .timeseries_schema_list(&opctx, &pagination, limit) + .await + .map(HttpResponseOk) + .map_err(HttpError::from) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -// Switches - -/// List switches -#[endpoint { - method = GET, - path = "/v1/system/hardware/switches", - tags = ["system/hardware"], -}] -async fn switch_list( - rqctx: RequestContext, - query_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let switches = nexus - .switch_list(&opctx, &data_page_params_for(&rqctx, &query)?) - .await? - .into_iter() - .map(|s| s.into()) - .collect(); - Ok(HttpResponseOk(ScanById::results_page( - &query, - switches, - &|_, switch: &views::Switch| switch.identity.id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn timeseries_query( + rqctx: RequestContext, + body: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let query = body.into_inner().query; + nexus + .timeseries_query(&opctx, &query) + .await + .map(|tables| HttpResponseOk(views::OxqlQueryResult { tables })) + .map_err(HttpError::from) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch switch -#[endpoint { - method = GET, - path = "/v1/system/hardware/switches/{switch_id}", - tags = ["system/hardware"], - }] -async fn switch_view( - rqctx: RequestContext, - path_params: Path, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let (.., switch) = nexus - .switch_lookup( - &opctx, - params::SwitchSelector { switch: path.switch_id }, - )? - .fetch() - .await?; - Ok(HttpResponseOk(switch.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + // Updates + + async fn system_update_put_repository( + rqctx: RequestContext, + query: Query, + body: StreamingBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let nexus = &apictx.context.nexus; + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let query = query.into_inner(); + let body = body.into_stream(); + let update = nexus + .updates_put_repository(&opctx, body, query.file_name) + .await?; + Ok(HttpResponseOk(update)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// List physical disks attached to sleds -#[endpoint { - method = GET, - path = "/v1/system/hardware/sleds/{sled_id}/disks", - tags = ["system/hardware"], -}] -async fn sled_physical_disk_list( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let disks = nexus - .sled_list_physical_disks( - &opctx, - path.sled_id, - &data_page_params_for(&rqctx, &query)?, - ) - .await? - .into_iter() - .map(|s| s.into()) - .collect(); - Ok(HttpResponseOk(ScanById::results_page( - &query, - disks, - &|_, disk: &PhysicalDisk| disk.identity.id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn system_update_get_repository( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let nexus = &apictx.context.nexus; + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let params = path_params.into_inner(); + let description = nexus + .updates_get_repository(&opctx, params.system_version) + .await?; + Ok(HttpResponseOk(TufRepoGetResponse { + description: description.into_external(), + })) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -// Metrics + // Silo users + + async fn user_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let pagparams = data_page_params_for(&rqctx, &query)?; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let scan_params = ScanById::from_query(&query)?; + + // TODO: a valid UUID gets parsed here and will 404 if it doesn't exist + // (as expected) but a non-UUID string just gets let through as None + // (i.e., ignored) instead of 400ing + + let users = if let Some(group_id) = scan_params.selector.group { + nexus + .current_silo_group_users_list( + &opctx, &pagparams, &group_id, + ) + .await? + } else { + nexus.silo_users_list_current(&opctx, &pagparams).await? + }; -#[derive(Display, Deserialize, JsonSchema)] -#[display(style = "snake_case")] -#[serde(rename_all = "snake_case")] -pub enum SystemMetricName { - VirtualDiskSpaceProvisioned, - CpusProvisioned, - RamProvisioned, -} + Ok(HttpResponseOk(ScanById::results_page( + &query, + users.into_iter().map(|i| i.into()).collect(), + &|_, user: &User| user.id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -#[derive(Deserialize, JsonSchema)] -struct SystemMetricsPathParam { - metric_name: SystemMetricName, -} + // Silo groups -/// View metrics -/// -/// View CPU, memory, or storage utilization metrics at the fleet or silo level. -#[endpoint { - method = GET, - path = "/v1/system/metrics/{metric_name}", - tags = ["system/metrics"], -}] -async fn system_metric( - rqctx: RequestContext, - path_params: Path, - pag_params: Query< - PaginationParams, - >, - other_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { + async fn group_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); let nexus = &apictx.context.nexus; - let metric_name = path_params.into_inner().metric_name; - let pagination = pag_params.into_inner(); - let limit = rqctx.page_limit(&pagination)?; + let query = query_params.into_inner(); + let pagparams = data_page_params_for(&rqctx, &query)?; + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let groups = nexus + .silo_groups_list(&opctx, &pagparams) + .await? + .into_iter() + .map(|i| i.into()) + .collect(); + Ok(HttpResponseOk(ScanById::results_page( + &query, + groups, + &|_, group: &Group| group.id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let silo_lookup = match other_params.into_inner().silo { - Some(silo) => Some(nexus.silo_lookup(&opctx, silo)?), - _ => None, + async fn group_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let (.., group) = + nexus.silo_group_lookup(&opctx, &path.group_id).fetch().await?; + Ok(HttpResponseOk(group.into())) }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - let result = nexus - .system_metric_list( - &opctx, - metric_name, - silo_lookup, - pagination, - limit, - ) - .await?; - - Ok(HttpResponseOk(result)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + // Built-in (system) users -/// View metrics -/// -/// View CPU, memory, or storage utilization metrics at the silo or project level. -#[endpoint { - method = GET, - path = "/v1/metrics/{metric_name}", - tags = ["metrics"], -}] -async fn silo_metric( - rqctx: RequestContext, - path_params: Path, - pag_params: Query< - PaginationParams, - >, - other_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { + async fn user_builtin_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); let nexus = &apictx.context.nexus; - let metric_name = path_params.into_inner().metric_name; - - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let project_lookup = match other_params.into_inner().project { - Some(project) => { - let project_selector = params::ProjectSelector { project }; - Some(nexus.project_lookup(&opctx, project_selector)?) - } - _ => None, + let query = query_params.into_inner(); + let pagparams = data_page_params_for(&rqctx, &query)? + .map_name(|n| Name::ref_cast(n)); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let users = nexus + .users_builtin_list(&opctx, &pagparams) + .await? + .into_iter() + .map(|i| i.into()) + .collect(); + Ok(HttpResponseOk(ScanByName::results_page( + &query, + users, + &marker_for_name, + )?)) }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - let pagination = pag_params.into_inner(); - let limit = rqctx.page_limit(&pagination)?; + async fn user_builtin_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let user_selector = path_params.into_inner(); + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let (.., user) = nexus + .user_builtin_lookup(&opctx, &user_selector)? + .fetch() + .await?; + Ok(HttpResponseOk(user.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let result = nexus - .silo_metric_list( - &opctx, - metric_name, - project_lookup, - pagination, - limit, - ) - .await?; - - Ok(HttpResponseOk(result)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + // Built-in roles -/// List timeseries schemas -#[endpoint { - method = GET, - path = "/v1/timeseries/schema", - tags = ["metrics"], -}] -async fn timeseries_schema_list( - rqctx: RequestContext, - pag_params: Query, -) -> Result>, HttpError> -{ - let apictx = rqctx.context(); - let handler = async { + async fn role_list( + rqctx: RequestContext, + query_params: Query< + PaginationParams, + >, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); let nexus = &apictx.context.nexus; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let pagination = pag_params.into_inner(); - let limit = rqctx.page_limit(&pagination)?; - nexus - .timeseries_schema_list(&opctx, &pagination, limit) - .await - .map(HttpResponseOk) - .map_err(HttpError::from) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let query = query_params.into_inner(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let marker = match &query.page { + WhichPage::First(..) => None, + WhichPage::Next(params::RolePage { last_seen }) => { + Some(last_seen.split_once('.').ok_or_else(|| { + Error::invalid_value( + last_seen.clone(), + "bad page token", + ) + })?) + .map(|(s1, s2)| (s1.to_string(), s2.to_string())) + } + }; + let pagparams = DataPageParams { + limit: rqctx.page_limit(&query)?, + direction: PaginationOrder::Ascending, + marker: marker.as_ref(), + }; + let roles = nexus + .roles_builtin_list(&opctx, &pagparams) + .await? + .into_iter() + .map(|i| i.into()) + .collect(); + Ok(HttpResponseOk(dropshot::ResultsPage::new( + roles, + &EmptyScanParams {}, + |role: &Role, _| params::RolePage { + last_seen: role.name.to_string(), + }, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -// TODO: can we link to an OxQL reference? Do we have one? Can we even do links? - -/// Run timeseries query -/// -/// Queries are written in OxQL. -#[endpoint { - method = POST, - path = "/v1/timeseries/query", - tags = ["metrics"], -}] -async fn timeseries_query( - rqctx: RequestContext, - body: TypedBody, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { + async fn role_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); let nexus = &apictx.context.nexus; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let query = body.into_inner().query; - nexus - .timeseries_query(&opctx, &query) - .await - .map(HttpResponseOk) - .map_err(HttpError::from) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + let path = path_params.into_inner(); + let role_name = &path.role_name; + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let role = nexus.role_builtin_fetch(&opctx, &role_name).await?; + Ok(HttpResponseOk(role.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -// Updates - -/// Upload TUF repository -#[endpoint { - method = PUT, - path = "/v1/system/update/repository", - tags = ["system/update"], - unpublished = true, -}] -async fn system_update_put_repository( - rqctx: RequestContext, - query: Query, - body: StreamingBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let query = query.into_inner(); - let body = body.into_stream(); - let update = - nexus.updates_put_repository(&opctx, body, query.file_name).await?; - Ok(HttpResponseOk(update)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + // Current user + + async fn current_user_view( + rqctx: RequestContext, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let nexus = &apictx.context.nexus; + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let user = nexus.silo_user_fetch_self(&opctx).await?; + let (_, silo) = nexus.current_silo_lookup(&opctx)?.fetch().await?; + Ok(HttpResponseOk(views::CurrentUser { + user: user.into(), + silo_name: silo.name().clone(), + })) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch TUF repository description -/// -/// Fetch description of TUF repository by system version. -#[endpoint { - method = GET, - path = "/v1/system/update/repository/{system_version}", - tags = ["system/update"], - unpublished = true, -}] -async fn system_update_get_repository( - rqctx: RequestContext, - path_params: Path, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let params = path_params.into_inner(); - let description = - nexus.updates_get_repository(&opctx, params.system_version).await?; - Ok(HttpResponseOk(TufRepoGetResponse { - description: description.into_external(), - })) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn current_user_groups( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let groups = nexus + .silo_user_fetch_groups_for_self( + &opctx, + &data_page_params_for(&rqctx, &query)?, + ) + .await? + .into_iter() + .map(|d| d.into()) + .collect(); + Ok(HttpResponseOk(ScanById::results_page( + &query, + groups, + &|_, group: &views::Group| group.id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -// Silo users - -/// List users -#[endpoint { - method = GET, - path = "/v1/users", - tags = ["silos"], -}] -async fn user_list( - rqctx: RequestContext, - query_params: Query>, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let pagparams = data_page_params_for(&rqctx, &query)?; - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let scan_params = ScanById::from_query(&query)?; + async fn current_user_ssh_key_list( + rqctx: RequestContext, + query_params: Query, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let &actor = opctx + .authn + .actor_required() + .internal_context("listing current user's ssh keys")?; + let ssh_keys = nexus + .ssh_keys_list(&opctx, actor.actor_id(), &paginated_by) + .await? + .into_iter() + .map(SshKey::from) + .collect::>(); + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + ssh_keys, + &marker_for_name_or_id, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn current_user_ssh_key_create( + rqctx: RequestContext, + new_key: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let &actor = opctx + .authn + .actor_required() + .internal_context("creating ssh key for current user")?; + let ssh_key = nexus + .ssh_key_create(&opctx, actor.actor_id(), new_key.into_inner()) + .await?; + Ok(HttpResponseCreated(ssh_key.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - // TODO: a valid UUID gets parsed here and will 404 if it doesn't exist - // (as expected) but a non-UUID string just gets let through as None - // (i.e., ignored) instead of 400ing + async fn current_user_ssh_key_view( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let &actor = opctx + .authn + .actor_required() + .internal_context("fetching one of current user's ssh keys")?; + let ssh_key_selector = params::SshKeySelector { + silo_user_id: actor.actor_id(), + ssh_key: path.ssh_key, + }; + let ssh_key_lookup = + nexus.ssh_key_lookup(&opctx, &ssh_key_selector)?; + let (.., silo_user, _, ssh_key) = ssh_key_lookup.fetch().await?; + // Ensure the SSH key exists in the current silo + assert_eq!(silo_user.id(), actor.actor_id()); + Ok(HttpResponseOk(ssh_key.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } - let users = if let Some(group_id) = scan_params.selector.group { + async fn current_user_ssh_key_delete( + rqctx: RequestContext, + path_params: Path, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let &actor = opctx + .authn + .actor_required() + .internal_context("deleting one of current user's ssh keys")?; + let ssh_key_selector = params::SshKeySelector { + silo_user_id: actor.actor_id(), + ssh_key: path.ssh_key, + }; + let ssh_key_lookup = + nexus.ssh_key_lookup(&opctx, &ssh_key_selector)?; nexus - .current_silo_group_users_list(&opctx, &pagparams, &group_id) - .await? - } else { - nexus.silo_users_list_current(&opctx, &pagparams).await? - }; - - Ok(HttpResponseOk(ScanById::results_page( - &query, - users.into_iter().map(|i| i.into()).collect(), - &|_, user: &User| user.id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + .ssh_key_delete(&opctx, actor.actor_id(), &ssh_key_lookup) + .await?; + Ok(HttpResponseDeleted()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -// Silo groups - -/// List groups -#[endpoint { - method = GET, - path = "/v1/groups", - tags = ["silos"], -}] -async fn group_list( - rqctx: RequestContext, - query_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let pagparams = data_page_params_for(&rqctx, &query)?; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let groups = nexus - .silo_groups_list(&opctx, &pagparams) - .await? - .into_iter() - .map(|i| i.into()) - .collect(); - Ok(HttpResponseOk(ScanById::results_page( - &query, - groups, - &|_, group: &Group| group.id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn probe_list( + rqctx: RequestContext, + query_params: Query>, + ) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + + let nexus = &apictx.context.nexus; + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let project_lookup = + nexus.project_lookup(&opctx, scan_params.selector.clone())?; + + let probes = nexus + .probe_list(&opctx, &project_lookup, &paginated_by) + .await?; -/// Fetch group -#[endpoint { - method = GET, - path = "/v1/groups/{group_id}", - tags = ["silos"], -}] -async fn group_view( - rqctx: RequestContext, - path_params: Path, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let (.., group) = - nexus.silo_group_lookup(&opctx, &path.group_id).fetch().await?; - Ok(HttpResponseOk(group.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + probes, + &|_, p: &ProbeInfo| match paginated_by { + PaginatedBy::Id(_) => NameOrId::Id(p.id), + PaginatedBy::Name(_) => NameOrId::Name(p.name.clone()), + }, + )?)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -// Built-in (system) users - -/// List built-in users -#[endpoint { - method = GET, - path = "/v1/system/users-builtin", - tags = ["system/silos"], -}] -async fn user_builtin_list( - rqctx: RequestContext, - query_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let pagparams = - data_page_params_for(&rqctx, &query)?.map_name(|n| Name::ref_cast(n)); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let users = nexus - .users_builtin_list(&opctx, &pagparams) - .await? - .into_iter() - .map(|i| i.into()) - .collect(); - Ok(HttpResponseOk(ScanByName::results_page( - &query, - users, - &marker_for_name, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn probe_view( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let project_selector = query_params.into_inner(); + let project_lookup = + nexus.project_lookup(&opctx, project_selector)?; + let probe = + nexus.probe_get(&opctx, &project_lookup, &path.probe).await?; + Ok(HttpResponseOk(probe)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -/// Fetch built-in user -#[endpoint { - method = GET, - path = "/v1/system/users-builtin/{user}", - tags = ["system/silos"], -}] -async fn user_builtin_view( - rqctx: RequestContext, - path_params: Path, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let nexus = &apictx.context.nexus; - let user_selector = path_params.into_inner(); - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let (.., user) = - nexus.user_builtin_lookup(&opctx, &user_selector)?.fetch().await?; - Ok(HttpResponseOk(user.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn probe_create( + rqctx: RequestContext, + query_params: Query, + new_probe: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + + let nexus = &apictx.context.nexus; + let new_probe_params = &new_probe.into_inner(); + let project_selector = query_params.into_inner(); + let project_lookup = + nexus.project_lookup(&opctx, project_selector)?; + let probe = nexus + .probe_create(&opctx, &project_lookup, &new_probe_params) + .await?; + Ok(HttpResponseCreated(probe.into())) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -// Built-in roles + async fn probe_delete( + rqctx: RequestContext, + query_params: Query, + path_params: Path, + ) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + + let nexus = &apictx.context.nexus; + let path = path_params.into_inner(); + let project_selector = query_params.into_inner(); + let project_lookup = + nexus.project_lookup(&opctx, project_selector)?; + nexus.probe_delete(&opctx, &project_lookup, path.probe).await?; + Ok(HttpResponseDeleted()) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } -// Roles have their own pagination scheme because they do not use the usual "id" -// or "name" types. For more, see the comment in dbinit.sql. -#[derive(Deserialize, JsonSchema, Serialize)] -struct RolePage { - last_seen: String, -} + async fn login_saml_begin( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError> { + console_api::login_saml_begin(rqctx, path_params, query_params).await + } -/// Path parameters for global (system) role requests -#[derive(Deserialize, JsonSchema)] -struct RolePathParam { - /// The built-in role's unique name. - role_name: String, -} + async fn login_saml_redirect( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result { + console_api::login_saml_redirect(rqctx, path_params, query_params).await + } -/// List built-in roles -#[endpoint { - method = GET, - path = "/v1/system/roles", - tags = ["roles"], -}] -async fn role_list( - rqctx: RequestContext, - query_params: Query>, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let marker = match &query.page { - WhichPage::First(..) => None, - WhichPage::Next(RolePage { last_seen }) => { - Some(last_seen.split_once('.').ok_or_else(|| { - Error::invalid_value(last_seen.clone(), "bad page token") - })?) - .map(|(s1, s2)| (s1.to_string(), s2.to_string())) - } - }; - let pagparams = DataPageParams { - limit: rqctx.page_limit(&query)?, - direction: PaginationOrder::Ascending, - marker: marker.as_ref(), - }; - let roles = nexus - .roles_builtin_list(&opctx, &pagparams) - .await? - .into_iter() - .map(|i| i.into()) - .collect(); - Ok(HttpResponseOk(dropshot::ResultsPage::new( - roles, - &EmptyScanParams {}, - |role: &Role, _| RolePage { last_seen: role.name.to_string() }, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn login_saml( + rqctx: RequestContext, + path_params: Path, + body_bytes: dropshot::UntypedBody, + ) -> Result { + console_api::login_saml(rqctx, path_params, body_bytes).await + } -/// Fetch built-in role -#[endpoint { - method = GET, - path = "/v1/system/roles/{role_name}", - tags = ["roles"], -}] -async fn role_view( - rqctx: RequestContext, - path_params: Path, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let role_name = &path.role_name; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let role = nexus.role_builtin_fetch(&opctx, &role_name).await?; - Ok(HttpResponseOk(role.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn login_local_begin( + rqctx: RequestContext, + path_params: Path, + query_params: Query, + ) -> Result, HttpError> { + console_api::login_local_begin(rqctx, path_params, query_params).await + } -// Current user - -/// Fetch user for current session -#[endpoint { - method = GET, - path = "/v1/me", - tags = ["session"], -}] -pub(crate) async fn current_user_view( - rqctx: RequestContext, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let user = nexus.silo_user_fetch_self(&opctx).await?; - let (_, silo) = nexus.current_silo_lookup(&opctx)?.fetch().await?; - Ok(HttpResponseOk(views::CurrentUser { - user: user.into(), - silo_name: silo.name().clone(), - })) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn login_local( + rqctx: RequestContext, + path_params: Path, + credentials: TypedBody, + ) -> Result, HttpError> + { + console_api::login_local(rqctx, path_params, credentials).await + } -/// Fetch current user's groups -#[endpoint { - method = GET, - path = "/v1/me/groups", - tags = ["session"], - }] -pub(crate) async fn current_user_groups( - rqctx: RequestContext, - query_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let groups = nexus - .silo_user_fetch_groups_for_self( - &opctx, - &data_page_params_for(&rqctx, &query)?, - ) - .await? - .into_iter() - .map(|d| d.into()) - .collect(); - Ok(HttpResponseOk(ScanById::results_page( - &query, - groups, - &|_, group: &views::Group| group.id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn logout( + rqctx: RequestContext, + cookies: Cookies, + ) -> Result, HttpError> + { + console_api::logout(rqctx, cookies).await + } -// Per-user SSH public keys - -/// List SSH public keys -/// -/// Lists SSH public keys for the currently authenticated user. -#[endpoint { - method = GET, - path = "/v1/me/ssh-keys", - tags = ["session"], -}] -async fn current_user_ssh_key_list( - rqctx: RequestContext, - query_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let pag_params = data_page_params_for(&rqctx, &query)?; - let scan_params = ScanByNameOrId::from_query(&query)?; - let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; - let &actor = opctx - .authn - .actor_required() - .internal_context("listing current user's ssh keys")?; - let ssh_keys = nexus - .ssh_keys_list(&opctx, actor.actor_id(), &paginated_by) - .await? - .into_iter() - .map(SshKey::from) - .collect::>(); - Ok(HttpResponseOk(ScanByNameOrId::results_page( - &query, - ssh_keys, - &marker_for_name_or_id, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn login_begin( + rqctx: RequestContext, + query_params: Query, + ) -> Result { + console_api::login_begin(rqctx, query_params).await + } -/// Create SSH public key -/// -/// Create an SSH public key for the currently authenticated user. -#[endpoint { - method = POST, - path = "/v1/me/ssh-keys", - tags = ["session"], -}] -async fn current_user_ssh_key_create( - rqctx: RequestContext, - new_key: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let &actor = opctx - .authn - .actor_required() - .internal_context("creating ssh key for current user")?; - let ssh_key = nexus - .ssh_key_create(&opctx, actor.actor_id(), new_key.into_inner()) - .await?; - Ok(HttpResponseCreated(ssh_key.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn console_projects( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + console_api::console_projects(rqctx, path_params).await + } -/// Fetch SSH public key -/// -/// Fetch SSH public key associated with the currently authenticated user. -#[endpoint { - method = GET, - path = "/v1/me/ssh-keys/{ssh_key}", - tags = ["session"], -}] -async fn current_user_ssh_key_view( - rqctx: RequestContext, - path_params: Path, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let &actor = opctx - .authn - .actor_required() - .internal_context("fetching one of current user's ssh keys")?; - let ssh_key_selector = params::SshKeySelector { - silo_user_id: actor.actor_id(), - ssh_key: path.ssh_key, - }; - let ssh_key_lookup = nexus.ssh_key_lookup(&opctx, &ssh_key_selector)?; - let (.., silo_user, _, ssh_key) = ssh_key_lookup.fetch().await?; - // Ensure the SSH key exists in the current silo - assert_eq!(silo_user.id(), actor.actor_id()); - Ok(HttpResponseOk(ssh_key.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn console_settings_page( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + console_api::console_settings_page(rqctx, path_params).await + } -/// Delete SSH public key -/// -/// Delete an SSH public key associated with the currently authenticated user. -#[endpoint { - method = DELETE, - path = "/v1/me/ssh-keys/{ssh_key}", - tags = ["session"], -}] -async fn current_user_ssh_key_delete( - rqctx: RequestContext, - path_params: Path, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let &actor = opctx - .authn - .actor_required() - .internal_context("deleting one of current user's ssh keys")?; - let ssh_key_selector = params::SshKeySelector { - silo_user_id: actor.actor_id(), - ssh_key: path.ssh_key, - }; - let ssh_key_lookup = nexus.ssh_key_lookup(&opctx, &ssh_key_selector)?; - nexus.ssh_key_delete(&opctx, actor.actor_id(), &ssh_key_lookup).await?; - Ok(HttpResponseDeleted()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn console_system_page( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + console_api::console_system_page(rqctx, path_params).await + } -/// List instrumentation probes -#[endpoint { - method = GET, - path = "/v1/probes", - tags = ["system/probes"], -}] -async fn probe_list( - rqctx: RequestContext, - query_params: Query>, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + async fn console_lookup( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + console_api::console_lookup(rqctx, path_params).await + } - let nexus = &apictx.context.nexus; - let query = query_params.into_inner(); - let pag_params = data_page_params_for(&rqctx, &query)?; - let scan_params = ScanByNameOrId::from_query(&query)?; - let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; - let project_lookup = - nexus.project_lookup(&opctx, scan_params.selector.clone())?; - - let probes = - nexus.probe_list(&opctx, &project_lookup, &paginated_by).await?; - - Ok(HttpResponseOk(ScanByNameOrId::results_page( - &query, - probes, - &|_, p: &ProbeInfo| match paginated_by { - PaginatedBy::Id(_) => NameOrId::Id(p.id), - PaginatedBy::Name(_) => NameOrId::Name(p.name.clone()), - }, - )?)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn console_root( + rqctx: RequestContext, + ) -> Result, HttpError> { + console_api::console_root(rqctx).await + } -/// View instrumentation probe -#[endpoint { - method = GET, - path = "/v1/probes/{probe}", - tags = ["system/probes"], -}] -async fn probe_view( - rqctx: RequestContext, - path_params: Path, - query_params: Query, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + async fn console_projects_new( + rqctx: RequestContext, + ) -> Result, HttpError> { + console_api::console_projects_new(rqctx).await + } - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let project_selector = query_params.into_inner(); - let project_lookup = nexus.project_lookup(&opctx, project_selector)?; - let probe = - nexus.probe_get(&opctx, &project_lookup, &path.probe).await?; - Ok(HttpResponseOk(probe)) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn console_silo_images( + rqctx: RequestContext, + ) -> Result, HttpError> { + console_api::console_silo_images(rqctx).await + } -/// Create instrumentation probe -#[endpoint { - method = POST, - path = "/v1/probes", - tags = ["system/probes"], -}] -async fn probe_create( - rqctx: RequestContext, - query_params: Query, - new_probe: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + async fn console_silo_utilization( + rqctx: RequestContext, + ) -> Result, HttpError> { + console_api::console_silo_utilization(rqctx).await + } - let nexus = &apictx.context.nexus; - let new_probe_params = &new_probe.into_inner(); - let project_selector = query_params.into_inner(); - let project_lookup = nexus.project_lookup(&opctx, project_selector)?; - let probe = nexus - .probe_create(&opctx, &project_lookup, &new_probe_params) - .await?; - Ok(HttpResponseCreated(probe.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn console_silo_access( + rqctx: RequestContext, + ) -> Result, HttpError> { + console_api::console_silo_access(rqctx).await + } -/// Delete instrumentation probe -#[endpoint { - method = DELETE, - path = "/v1/probes/{probe}", - tags = ["system/probes"], -}] -async fn probe_delete( - rqctx: RequestContext, - query_params: Query, - path_params: Path, -) -> Result { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + async fn asset( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + console_api::asset(rqctx, path_params).await + } - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let project_selector = query_params.into_inner(); - let project_lookup = nexus.project_lookup(&opctx, project_selector)?; - nexus.probe_delete(&opctx, &project_lookup, path.probe).await?; - Ok(HttpResponseDeleted()) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} + async fn device_auth_request( + rqctx: RequestContext, + params: TypedBody, + ) -> Result, HttpError> { + device_auth::device_auth_request(rqctx, params).await + } + + async fn device_auth_verify( + rqctx: RequestContext, + ) -> Result, HttpError> { + device_auth::device_auth_verify(rqctx).await + } + + async fn device_auth_success( + rqctx: RequestContext, + ) -> Result, HttpError> { + device_auth::device_auth_success(rqctx).await + } -#[cfg(test)] -mod test { - use super::external_api; + async fn device_auth_confirm( + rqctx: RequestContext, + params: TypedBody, + ) -> Result { + device_auth::device_auth_confirm(rqctx, params).await + } - #[test] - fn test_nexus_tag_policy() { - // This will fail if any of the endpoints don't match the policy in - // ./tag-config.json - let _ = external_api(); + async fn device_access_token( + rqctx: RequestContext, + params: TypedBody, + ) -> Result, HttpError> { + device_auth::device_access_token(rqctx, params.into_inner()).await } } diff --git a/nexus/src/external_api/tag-config.json b/nexus/src/external_api/tag-config.json deleted file mode 100644 index 6974906507d..00000000000 --- a/nexus/src/external_api/tag-config.json +++ /dev/null @@ -1,126 +0,0 @@ -{ - "allow_other_tags": false, - "endpoint_tag_policy": "ExactlyOne", - "tag_definitions": { - "disks": { - "description": "Virtual disks are used to store instance-local data which includes the operating system.", - "external_docs": { - "url": "http://docs.oxide.computer/api/disks" - } - }, - "floating-ips": { - "description": "Floating IPs allow a project to allocate well-known IPs to instances.", - "external_docs": { - "url": "http://docs.oxide.computer/api/floating-ips" - } - }, - "hidden": { - "description": "TODO operations that will not ship to customers", - "external_docs": { - "url": "http://docs.oxide.computer/api" - } - }, - "images": { - "description": "Images are read-only virtual disks that may be used to boot virtual machines.", - "external_docs": { - "url": "http://docs.oxide.computer/api/images" - } - }, - "instances": { - "description": "Virtual machine instances are the basic unit of computation. These operations are used for provisioning, controlling, and destroying instances.", - "external_docs": { - "url": "http://docs.oxide.computer/api/instances" - } - }, - "login": { - "description": "Authentication endpoints", - "external_docs": { - "url": "http://docs.oxide.computer/api/login" - } - }, - "metrics": { - "description": "Silo-scoped metrics", - "external_docs": { - "url": "http://docs.oxide.computer/api/metrics" - } - }, - "policy": { - "description": "System-wide IAM policy", - "external_docs": { - "url": "http://docs.oxide.computer/api/policy" - } - }, - "projects": { - "description": "Projects are a grouping of associated resources such as instances and disks within a silo for purposes of billing and access control.", - "external_docs": { - "url": "http://docs.oxide.computer/api/projects" - } - }, - "roles": { - "description": "Roles are a component of Identity and Access Management (IAM) that allow a user or agent account access to additional permissions.", - "external_docs": { - "url": "http://docs.oxide.computer/api/roles" - } - }, - "session": { - "description": "Information pertaining to the current session.", - "external_docs": { - "url": "http://docs.oxide.computer/api/session" - } - }, - "silos": { - "description": "Silos represent a logical partition of users and resources.", - "external_docs": { - "url": "http://docs.oxide.computer/api/silos" - } - }, - "snapshots": { - "description": "Snapshots of virtual disks at a particular point in time.", - "external_docs": { - "url": "http://docs.oxide.computer/api/snapshots" - } - }, - "vpcs": { - "description": "Virtual Private Clouds (VPCs) provide isolated network environments for managing and deploying services.", - "external_docs": { - "url": "http://docs.oxide.computer/api/vpcs" - } - }, - "system/probes": { - "description": "Probes for testing network connectivity", - "external_docs": { - "url": "http://docs.oxide.computer/api/probes" - } - }, - "system/status": { - "description": "Endpoints related to system health", - "external_docs": { - "url": "http://docs.oxide.computer/api/system-status" - } - }, - "system/hardware": { - "description": "These operations pertain to hardware inventory and management. Racks are the unit of expansion of an Oxide deployment. Racks are in turn composed of sleds, switches, power supplies, and a cabled backplane.", - "external_docs": { - "url": "http://docs.oxide.computer/api/system-hardware" - } - }, - "system/metrics": { - "description": "Metrics provide insight into the operation of the Oxide deployment. These include telemetry on hardware and software components that can be used to understand the current state as well as to diagnose issues.", - "external_docs": { - "url": "http://docs.oxide.computer/api/system-metrics" - } - }, - "system/networking": { - "description": "This provides rack-level network configuration.", - "external_docs": { - "url": "http://docs.oxide.computer/api/system-networking" - } - }, - "system/silos": { - "description": "Silos represent a logical partition of users and resources.", - "external_docs": { - "url": "http://docs.oxide.computer/api/system-silos" - } - } - } -} diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index c5322e3930a..66a8090f112 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -30,6 +30,7 @@ use nexus_types::external_api::params::UninitializedSledId; use nexus_types::external_api::shared::ProbeInfo; use nexus_types::external_api::shared::UninitializedSled; use nexus_types::external_api::views::SledPolicy; +use nexus_types::internal_api::params::InstanceMigrateRequest; use nexus_types::internal_api::params::SledAgentInfo; use nexus_types::internal_api::params::SwitchPutRequest; use nexus_types::internal_api::params::SwitchPutResponse; @@ -42,6 +43,7 @@ use omicron_common::api::external::http_pagination::data_page_params_for; use omicron_common::api::external::http_pagination::PaginatedById; use omicron_common::api::external::http_pagination::ScanById; use omicron_common::api::external::http_pagination::ScanParams; +use omicron_common::api::external::Instance; use omicron_common::api::internal::nexus::DiskRuntimeState; use omicron_common::api::internal::nexus::DownstairsClientStopRequest; use omicron_common::api::internal::nexus::DownstairsClientStopped; @@ -50,7 +52,7 @@ use omicron_common::api::internal::nexus::ProducerRegistrationResponse; use omicron_common::api::internal::nexus::RepairFinishInfo; use omicron_common::api::internal::nexus::RepairProgress; use omicron_common::api::internal::nexus::RepairStartInfo; -use omicron_common::api::internal::nexus::SledInstanceState; +use omicron_common::api::internal::nexus::SledVmmState; use omicron_common::update::ArtifactId; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::InstanceUuid; @@ -166,8 +168,8 @@ impl NexusInternalApi for NexusInternalApiImpl { async fn cpapi_instances_put( rqctx: RequestContext, - path_params: Path, - new_runtime_state: TypedBody, + path_params: Path, + new_runtime_state: TypedBody, ) -> Result { let apictx = &rqctx.context().context; let nexus = &apictx.nexus; @@ -176,13 +178,36 @@ impl NexusInternalApi for NexusInternalApiImpl { let opctx = crate::context::op_context_for_internal_api(&rqctx).await; let handler = async { nexus - .notify_instance_updated( + .notify_vmm_updated(&opctx, path.propolis_id, &new_state) + .await?; + Ok(HttpResponseUpdatedNoContent()) + }; + apictx + .internal_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + + async fn instance_migrate( + rqctx: RequestContext, + path_params: Path, + migrate_params: TypedBody, + ) -> Result, HttpError> { + let apictx = &rqctx.context().context; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let migrate = migrate_params.into_inner(); + let handler = async { + let opctx = + crate::context::op_context_for_internal_api(&rqctx).await; + let instance = nexus + .instance_migrate( &opctx, InstanceUuid::from_untyped_uuid(path.instance_id), - &new_state, + migrate, ) .await?; - Ok(HttpResponseUpdatedNoContent()) + Ok(HttpResponseOk(instance.into())) }; apictx .internal_latencies diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index d5c853b15b4..eaabbd748b7 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -53,18 +53,6 @@ use uuid::Uuid; #[macro_use] extern crate slog; -/// Run the OpenAPI generator for the external API, which emits the OpenAPI spec -/// to stdout. -pub fn run_openapi_external() -> Result<(), String> { - external_api() - .openapi("Oxide Region API", "20240821.0") - .description("API for interacting with the Oxide control plane") - .contact_url("https://oxide.computer") - .contact_email("api@oxide.computer") - .write(&mut std::io::stdout()) - .map_err(|e| e.to_string()) -} - /// A partially-initialized Nexus server, which exposes an internal interface, /// but is not ready to receive external requests. pub struct InternalServer { @@ -384,12 +372,7 @@ impl nexus_test_interface::NexusServer for Server { self.apictx .context .nexus - .upsert_dataset( - dataset_id, - zpool_id, - address, - nexus_db_queries::db::model::DatasetKind::Crucible, - ) + .upsert_crucible_dataset(dataset_id, zpool_id, address) .await .unwrap(); } diff --git a/nexus/src/populate.rs b/nexus/src/populate.rs index 4fcb1263560..f026b1b504c 100644 --- a/nexus/src/populate.rs +++ b/nexus/src/populate.rs @@ -380,7 +380,7 @@ mod test { let logctx = dev::test_setup_log("test_populator"); let mut db = test_setup_database(&logctx.log).await; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = Arc::new(db::Pool::new(&logctx.log, &cfg)); + let pool = Arc::new(db::Pool::new_single_host(&logctx.log, &cfg)); let datastore = Arc::new( db::DataStore::new(&logctx.log, pool, None).await.unwrap(), ); @@ -422,19 +422,13 @@ mod test { }) .unwrap(); - // Test again with the database offline. In principle we could do this - // immediately without creating a new pool and datastore. However, the - // pool's default behavior is to wait 30 seconds for a connection, which - // makes this test take a long time. (See the note in - // nexus/src/db/pool.rs about this.) So let's create a pool with an - // arbitrarily short timeout now. (We wouldn't want to do this above - // because we do want to wait a bit when we expect things to work, in - // case the test system is busy.) + // Test again with the database offline. In principle we could do this + // immediately without creating a new pool and datastore. // - // Anyway, if we try again with a broken database, we should get a + // If we try again with a broken database, we should get a // ServiceUnavailable error, which indicates a transient failure. let pool = - Arc::new(db::Pool::new_failfast_for_tests(&logctx.log, &cfg)); + Arc::new(db::Pool::new_single_host_failfast(&logctx.log, &cfg)); // We need to create the datastore before tearing down the database, as // it verifies the schema version of the DB while booting. let datastore = Arc::new( diff --git a/nexus/test-utils/Cargo.toml b/nexus/test-utils/Cargo.toml index a883bc83c56..ff89af7c6c4 100644 --- a/nexus/test-utils/Cargo.toml +++ b/nexus/test-utils/Cargo.toml @@ -25,6 +25,7 @@ http.workspace = true hyper.workspace = true illumos-utils.workspace = true internal-dns.workspace = true +nexus-client.workspace = true nexus-config.workspace = true nexus-db-queries.workspace = true nexus-sled-agent-shared.workspace = true @@ -46,7 +47,7 @@ sled-agent-client.workspace = true slog.workspace = true tokio.workspace = true tokio-util.workspace = true -trust-dns-resolver.workspace = true +hickory-resolver.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/nexus/test-utils/src/background.rs b/nexus/test-utils/src/background.rs new file mode 100644 index 00000000000..58792e547de --- /dev/null +++ b/nexus/test-utils/src/background.rs @@ -0,0 +1,86 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Helper functions related to Nexus background tasks + +use crate::http_testing::NexusRequest; +use dropshot::test_util::ClientTestContext; +use nexus_client::types::BackgroundTask; +use nexus_client::types::CurrentStatus; +use nexus_client::types::CurrentStatusRunning; +use nexus_client::types::LastResult; +use nexus_client::types::LastResultCompleted; +use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; +use std::time::Duration; + +/// Return the most recent start time for a background task +fn most_recent_start_time( + task: &BackgroundTask, +) -> Option> { + match task.current { + CurrentStatus::Idle => match task.last { + LastResult::Completed(LastResultCompleted { + start_time, .. + }) => Some(start_time), + LastResult::NeverCompleted => None, + }, + CurrentStatus::Running(CurrentStatusRunning { start_time, .. }) => { + Some(start_time) + } + } +} + +/// Given the name of a background task, activate it, then wait for it to +/// complete. Return the last polled `BackgroundTask` object. +pub async fn activate_background_task( + internal_client: &ClientTestContext, + task_name: &str, +) -> BackgroundTask { + let task = NexusRequest::object_get( + internal_client, + &format!("/bgtasks/view/{task_name}"), + ) + .execute_and_parse_unwrap::() + .await; + + let last_start = most_recent_start_time(&task); + + internal_client + .make_request( + http::Method::POST, + "/bgtasks/activate", + Some(serde_json::json!({ + "bgtask_names": vec![String::from(task_name)] + })), + http::StatusCode::NO_CONTENT, + ) + .await + .unwrap(); + + // Wait for the task to finish + let last_task_poll = wait_for_condition( + || async { + let task = NexusRequest::object_get( + internal_client, + &format!("/bgtasks/view/{task_name}"), + ) + .execute_and_parse_unwrap::() + .await; + + if matches!(&task.current, CurrentStatus::Idle) + && most_recent_start_time(&task) > last_start + { + Ok(task) + } else { + Err(CondCheckError::<()>::NotYet) + } + }, + &Duration::from_millis(500), + &Duration::from_secs(60), + ) + .await + .unwrap(); + + last_task_poll +} diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 7c190974a12..c3efbb83cf3 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -4,6 +4,7 @@ //! Integration testing facilities for Nexus +#[cfg(feature = "omicron-dev")] use anyhow::Context; use anyhow::Result; use camino::Utf8Path; @@ -17,6 +18,11 @@ use dropshot::HandlerTaskMode; use futures::future::BoxFuture; use futures::FutureExt; use gateway_test_utils::setup::GatewayTestContext; +use hickory_resolver::config::NameServerConfig; +use hickory_resolver::config::Protocol; +use hickory_resolver::config::ResolverConfig; +use hickory_resolver::config::ResolverOpts; +use hickory_resolver::TokioAsyncResolver; use nexus_config::Database; use nexus_config::DpdConfig; use nexus_config::InternalDns; @@ -73,16 +79,12 @@ use std::collections::HashMap; use std::fmt::Debug; use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; use std::time::Duration; -use trust_dns_resolver::config::NameServerConfig; -use trust_dns_resolver::config::Protocol; -use trust_dns_resolver::config::ResolverConfig; -use trust_dns_resolver::config::ResolverOpts; -use trust_dns_resolver::TokioAsyncResolver; use uuid::Uuid; pub use sim::TEST_HARDWARE_THREADS; pub use sim::TEST_RESERVOIR_RAM; +pub mod background; pub mod db; pub mod http_testing; pub mod resource_helpers; @@ -1427,6 +1429,7 @@ pub async fn start_oximeter( address: Some(SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db_port)), batch_size: 10, batch_interval: 1, + replicated: false, }; let config = oximeter_collector::Config { nexus_address: Some(nexus_address), @@ -1586,12 +1589,12 @@ pub async fn start_dns_server( socket_addr: dns_server.local_address(), protocol: Protocol::Udp, tls_dns_name: None, - trust_nx_responses: false, + trust_negative_responses: false, bind_addr: None, }); - let resolver = - TokioAsyncResolver::tokio(resolver_config, ResolverOpts::default()) - .context("creating DNS resolver")?; + let mut resolver_opts = ResolverOpts::default(); + resolver_opts.edns0 = true; + let resolver = TokioAsyncResolver::tokio(resolver_config, resolver_opts); Ok((dns_server, http_server, resolver)) } diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index ac7188f2323..14180459aba 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -432,6 +432,28 @@ pub async fn create_disk( .await } +pub async fn create_snapshot( + client: &ClientTestContext, + project_name: &str, + disk_name: &str, + snapshot_name: &str, +) -> views::Snapshot { + let snapshots_url = format!("/v1/snapshots?project={}", project_name); + + object_create( + client, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: snapshot_name.parse().unwrap(), + description: format!("snapshot {:?}", snapshot_name), + }, + disk: disk_name.to_string().try_into().unwrap(), + }, + ) + .await +} + pub async fn delete_disk( client: &ClientTestContext, project_name: &str, diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 8f65a73204a..bd338469e08 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -137,6 +137,10 @@ lookup_region_port.period_secs = 60 # Therefore, disable the background task during tests. instance_updater.disable = true instance_updater.period_secs = 60 +region_snapshot_replacement_start.period_secs = 30 +region_snapshot_replacement_garbage_collection.period_secs = 30 +region_snapshot_replacement_step.period_secs = 30 +region_snapshot_replacement_finish.period_secs = 30 [default_region_allocation_strategy] # we only have one sled in the test environment, so we need to use the diff --git a/nexus/tests/integration_tests/commands.rs b/nexus/tests/integration_tests/commands.rs index c2277ba776d..7ae42235569 100644 --- a/nexus/tests/integration_tests/commands.rs +++ b/nexus/tests/integration_tests/commands.rs @@ -15,10 +15,7 @@ use omicron_test_utils::dev::test_cmds::path_to_executable; use omicron_test_utils::dev::test_cmds::run_command; use omicron_test_utils::dev::test_cmds::temp_file_path; use omicron_test_utils::dev::test_cmds::EXIT_FAILURE; -use omicron_test_utils::dev::test_cmds::EXIT_SUCCESS; use omicron_test_utils::dev::test_cmds::EXIT_USAGE; -use openapiv3::OpenAPI; -use std::collections::BTreeMap; use std::fs; use std::path::PathBuf; use subprocess::Exec; @@ -78,105 +75,3 @@ fn test_nexus_invalid_config() { ); assert!(&stderr_text.starts_with(&expected_err)); } - -#[track_caller] -fn run_command_with_arg(arg: &str) -> (String, String) { - // This is a little goofy: we need a config file for the program. - // (Arguably, --openapi shouldn't require a config file, but it's - // conceivable that the API metadata or the exposed endpoints would depend - // on the configuration.) We ship a config file in "examples", and we may - // as well use it here -- it would be a bug if that one didn't work for this - // purpose. However, it's not clear how to reliably locate it at runtime. - // But we do know where it is at compile time, so we load it then. - let config = include_str!("../../examples/config.toml"); - let config_path = write_config(config); - let exec = Exec::cmd(path_to_nexus()).arg(&config_path).arg(arg); - let (exit_status, stdout_text, stderr_text) = run_command(exec); - fs::remove_file(&config_path).expect("failed to remove temporary file"); - assert_exit_code(exit_status, EXIT_SUCCESS, &stderr_text); - - (stdout_text, stderr_text) -} - -#[test] -fn test_nexus_openapi() { - let (stdout_text, stderr_text) = run_command_with_arg("--openapi"); - assert_contents("tests/output/cmd-nexus-openapi-stderr", &stderr_text); - - // Make sure the result parses as a valid OpenAPI spec and sanity-check a - // few fields. - let spec: OpenAPI = serde_json::from_str(&stdout_text) - .expect("stdout was not valid OpenAPI"); - assert_eq!(spec.openapi, "3.0.3"); - assert_eq!(spec.info.title, "Oxide Region API"); - assert_eq!(spec.info.version, "20240821.0"); - - // Spot check a couple of items. - assert!(!spec.paths.paths.is_empty()); - assert!(spec.paths.paths.get("/v1/projects").is_some()); - - // Check for lint errors. - let errors = openapi_lint::validate_external(&spec); - assert!(errors.is_empty(), "{}", errors.join("\n\n")); - - // Construct a string that helps us identify the organization of tags and - // operations. - let mut ops_by_tag = - BTreeMap::>::new(); - for (path, method, op) in spec.operations() { - // Make sure each operation has exactly one tag. Note, we intentionally - // do this before validating the OpenAPI output as fixing an error here - // would necessitate refreshing the spec file again. - assert_eq!( - op.tags.len(), - 1, - "operation '{}' has {} tags rather than 1", - op.operation_id.as_ref().unwrap(), - op.tags.len() - ); - - // Every non-hidden endpoint must have a summary - if !op.tags.contains(&"hidden".to_string()) { - assert!( - op.summary.is_some(), - "operation '{}' is missing a summary doc comment", - op.operation_id.as_ref().unwrap() - ); - } - - ops_by_tag - .entry(op.tags.first().unwrap().to_string()) - .or_default() - .push(( - op.operation_id.as_ref().unwrap().to_string(), - method.to_string().to_uppercase(), - path.to_string(), - )); - } - - let mut tags = String::new(); - for (tag, mut ops) in ops_by_tag { - ops.sort(); - tags.push_str(&format!(r#"API operations found with tag "{}""#, tag)); - tags.push_str(&format!( - "\n{:40} {:8} {}\n", - "OPERATION ID", "METHOD", "URL PATH" - )); - for (operation_id, method, path) in ops { - tags.push_str(&format!( - "{:40} {:8} {}\n", - operation_id, method, path - )); - } - tags.push('\n'); - } - - // Confirm that the output hasn't changed. It's expected that we'll change - // this file as the API evolves, but pay attention to the diffs to ensure - // that the changes match your expectations. - assert_contents("../openapi/nexus.json", &stdout_text); - - // When this fails, verify that operations on which you're adding, - // renaming, or changing the tags are what you intend. - assert_contents("tests/output/nexus_tags.txt", &tags); -} diff --git a/nexus/tests/integration_tests/device_auth.rs b/nexus/tests/integration_tests/device_auth.rs index 5bb34eb19e3..65730f6cc85 100644 --- a/nexus/tests/integration_tests/device_auth.rs +++ b/nexus/tests/integration_tests/device_auth.rs @@ -4,11 +4,11 @@ use nexus_test_utils::http_testing::{AuthnMode, NexusRequest, RequestBuilder}; use nexus_test_utils_macros::nexus_test; -use nexus_types::external_api::views::{ - DeviceAccessTokenGrant, DeviceAccessTokenType, DeviceAuthResponse, -}; -use omicron_nexus::external_api::device_auth::{ - DeviceAccessTokenRequest, DeviceAuthRequest, DeviceAuthVerify, +use nexus_types::external_api::{ + params::{DeviceAccessTokenRequest, DeviceAuthRequest, DeviceAuthVerify}, + views::{ + DeviceAccessTokenGrant, DeviceAccessTokenType, DeviceAuthResponse, + }, }; use http::{header, method::Method, StatusCode}; diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs index 234ab5f3821..fe6aab2770d 100644 --- a/nexus/tests/integration_tests/disks.rs +++ b/nexus/tests/integration_tests/disks.rs @@ -188,12 +188,13 @@ async fn set_instance_state( } async fn instance_simulate(nexus: &Arc, id: &InstanceUuid) { - let sa = nexus - .instance_sled_by_id(id) + let info = nexus + .active_instance_info(id, None) .await .unwrap() .expect("instance must be on a sled to simulate a state change"); - sa.instance_finish_transition(id.into_untyped_uuid()).await; + + info.sled_client.vmm_finish_transition(info.propolis_id).await; } #[nexus_test] diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 6e4e59688a7..9703004c733 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -359,12 +359,6 @@ pub static DEMO_INSTANCE_REBOOT_URL: Lazy = Lazy::new(|| { *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR ) }); -pub static DEMO_INSTANCE_MIGRATE_URL: Lazy = Lazy::new(|| { - format!( - "/v1/instances/{}/migrate?{}", - *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR - ) -}); pub static DEMO_INSTANCE_SERIAL_URL: Lazy = Lazy::new(|| { format!( "/v1/instances/{}/serial-console?{}", @@ -579,7 +573,7 @@ pub static DEMO_BGP_CONFIG: Lazy = shaper: None, }); pub const DEMO_BGP_ANNOUNCE_SET_URL: &'static str = - "/v1/system/networking/bgp-announce?name_or_id=a-bag-of-addrs"; + "/v1/system/networking/bgp-announce-set"; pub static DEMO_BGP_ANNOUNCE: Lazy = Lazy::new(|| params::BgpAnnounceSetCreate { identity: IdentityMetadataCreateParams { @@ -591,8 +585,14 @@ pub static DEMO_BGP_ANNOUNCE: Lazy = network: "10.0.0.0/16".parse().unwrap(), }], }); +pub const DEMO_BGP_ANNOUNCE_SET_DELETE_URL: &'static str = + "/v1/system/networking/bgp-announce-set/a-bag-of-addrs"; +pub const DEMO_BGP_ANNOUNCEMENT_URL: &'static str = + "/v1/system/networking/bgp-announce-set/a-bag-of-addrs/announcement"; pub const DEMO_BGP_STATUS_URL: &'static str = "/v1/system/networking/bgp-status"; +pub const DEMO_BGP_EXPORTED_URL: &'static str = + "/v1/system/networking/bgp-exported"; pub const DEMO_BGP_ROUTES_IPV4_URL: &'static str = "/v1/system/networking/bgp-routes-ipv4?asn=47"; pub const DEMO_BGP_MESSAGE_HISTORY_URL: &'static str = @@ -1823,18 +1823,6 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { AllowedMethod::Post(serde_json::Value::Null) ], }, - VerifyEndpoint { - url: &DEMO_INSTANCE_MIGRATE_URL, - visibility: Visibility::Protected, - unprivileged_access: UnprivilegedAccess::None, - allowed_methods: vec![ - AllowedMethod::Post(serde_json::to_value( - params::InstanceMigrate { - dst_sled_id: uuid::Uuid::new_v4(), - } - ).unwrap()), - ], - }, VerifyEndpoint { url: &DEMO_INSTANCE_SERIAL_URL, visibility: Visibility::Protected, @@ -2290,6 +2278,7 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { AllowedMethod::GetNonexistent ], }, + VerifyEndpoint { url: &DEMO_BGP_CONFIG_CREATE_URL, visibility: Visibility::Public, @@ -2311,11 +2300,28 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { AllowedMethod::Put( serde_json::to_value(&*DEMO_BGP_ANNOUNCE).unwrap(), ), - AllowedMethod::GetNonexistent, + AllowedMethod::Get, + ], + }, + + VerifyEndpoint { + url: &DEMO_BGP_ANNOUNCE_SET_DELETE_URL, + visibility: Visibility::Public, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ AllowedMethod::Delete ], }, + VerifyEndpoint { + url: &DEMO_BGP_ANNOUNCEMENT_URL, + visibility: Visibility::Public, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::GetNonexistent, + ], + }, + VerifyEndpoint { url: &DEMO_BGP_STATUS_URL, visibility: Visibility::Public, @@ -2325,6 +2331,15 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { ], }, + VerifyEndpoint { + url: &DEMO_BGP_EXPORTED_URL, + visibility: Visibility::Public, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::GetNonexistent, + ], + }, + VerifyEndpoint { url: &DEMO_BGP_ROUTES_IPV4_URL, visibility: Visibility::Public, diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 2e41fac3a4e..a7228e0841f 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -48,6 +48,7 @@ use nexus_types::external_api::shared::SiloIdentityMode; use nexus_types::external_api::views::SshKey; use nexus_types::external_api::{params, views}; use nexus_types::identity::Resource; +use nexus_types::internal_api::params::InstanceMigrateRequest; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Disk; use omicron_common::api::external::DiskState; @@ -737,6 +738,7 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { } let client = &cptestctx.external_client; + let internal_client = &cptestctx.internal_client; let apictx = &cptestctx.server.server_context(); let nexus = &apictx.nexus; let instance_name = "bird-ecology"; @@ -778,12 +780,13 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { let instance_next = instance_get(&client, &instance_url).await; assert_eq!(instance_next.runtime.run_state, InstanceState::Running); - let original_sled = nexus - .instance_sled_id(&instance_id) + let sled_info = nexus + .active_instance_info(&instance_id, None) .await .unwrap() .expect("running instance should have a sled"); + let original_sled = sled_info.sled_id; let dst_sled_id = if original_sled == default_sled_id { other_sled_id } else { @@ -791,10 +794,10 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { }; let migrate_url = - format!("/v1/instances/{}/migrate", &instance_id.to_string()); + format!("/instances/{}/migrate", &instance_id.to_string()); let instance = NexusRequest::new( - RequestBuilder::new(client, Method::POST, &migrate_url) - .body(Some(¶ms::InstanceMigrate { + RequestBuilder::new(internal_client, Method::POST, &migrate_url) + .body(Some(&InstanceMigrateRequest { dst_sled_id: dst_sled_id.into_untyped_uuid(), })) .expect_status(Some(StatusCode::OK)), @@ -806,12 +809,13 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { .parsed_body::() .unwrap(); - let current_sled = nexus - .instance_sled_id(&instance_id) + let new_sled_info = nexus + .active_instance_info(&instance_id, None) .await .unwrap() .expect("running instance should have a sled"); + let current_sled = new_sled_info.sled_id; assert_eq!(current_sled, original_sled); // Ensure that both sled agents report that the migration is in progress. @@ -838,6 +842,15 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { assert_eq!(migration.target_state, MigrationState::Pending.into()); assert_eq!(migration.source_state, MigrationState::Pending.into()); + let info = nexus + .active_instance_info(&instance_id, None) + .await + .unwrap() + .expect("instance should be on a sled"); + let src_propolis_id = info.propolis_id; + let dst_propolis_id = + info.dst_propolis_id.expect("instance should have a migration target"); + // Simulate the migration. We will use `instance_single_step_on_sled` to // single-step both sled-agents through the migration state machine and // ensure that the migration state looks nice at each step. @@ -845,15 +858,15 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { cptestctx, nexus, original_sled, - instance_id, + src_propolis_id, migration_id, ) .await; // Move source to "migrating". - instance_single_step_on_sled(cptestctx, nexus, original_sled, instance_id) + vmm_single_step_on_sled(cptestctx, nexus, original_sled, src_propolis_id) .await; - instance_single_step_on_sled(cptestctx, nexus, original_sled, instance_id) + vmm_single_step_on_sled(cptestctx, nexus, original_sled, src_propolis_id) .await; let migration = dbg!(migration_fetch(cptestctx, migration_id).await); @@ -863,9 +876,9 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { assert_eq!(instance.runtime.run_state, InstanceState::Migrating); // Move target to "migrating". - instance_single_step_on_sled(cptestctx, nexus, dst_sled_id, instance_id) + vmm_single_step_on_sled(cptestctx, nexus, dst_sled_id, dst_propolis_id) .await; - instance_single_step_on_sled(cptestctx, nexus, dst_sled_id, instance_id) + vmm_single_step_on_sled(cptestctx, nexus, dst_sled_id, dst_propolis_id) .await; let migration = dbg!(migration_fetch(cptestctx, migration_id).await); @@ -875,7 +888,7 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { assert_eq!(instance.runtime.run_state, InstanceState::Migrating); // Move the source to "completed" - instance_simulate_on_sled(cptestctx, nexus, original_sled, instance_id) + vmm_simulate_on_sled(cptestctx, nexus, original_sled, src_propolis_id) .await; let migration = dbg!(migration_fetch(cptestctx, migration_id).await); @@ -885,15 +898,16 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { assert_eq!(instance.runtime.run_state, InstanceState::Migrating); // Move the target to "completed". - instance_simulate_on_sled(cptestctx, nexus, dst_sled_id, instance_id).await; + vmm_simulate_on_sled(cptestctx, nexus, dst_sled_id, dst_propolis_id).await; instance_wait_for_state(&client, instance_id, InstanceState::Running).await; let current_sled = nexus - .instance_sled_id(&instance_id) + .active_instance_info(&instance_id, None) .await .unwrap() - .expect("migrated instance should still have a sled"); + .expect("migrated instance should still have a sled") + .sled_id; assert_eq!(current_sled, dst_sled_id); @@ -907,6 +921,7 @@ async fn test_instance_migrate_v2p_and_routes( cptestctx: &ControlPlaneTestContext, ) { let client = &cptestctx.external_client; + let internal_client = &cptestctx.internal_client; let apictx = &cptestctx.server.server_context(); let nexus = &apictx.nexus; let datastore = nexus.datastore(); @@ -975,11 +990,13 @@ async fn test_instance_migrate_v2p_and_routes( .derive_guest_network_interface_info(&opctx, &authz_instance) .await .unwrap(); + let original_sled_id = nexus - .instance_sled_id(&instance_id) + .active_instance_info(&instance_id, None) .await .unwrap() - .expect("running instance should have a sled"); + .expect("running instance should have a sled") + .sled_id; let mut sled_agents = vec![cptestctx.sled_agent.sled_agent.clone()]; sled_agents.extend(other_sleds.iter().map(|tup| tup.1.sled_agent.clone())); @@ -997,10 +1014,10 @@ async fn test_instance_migrate_v2p_and_routes( // Kick off migration and simulate its completion on the target. let migrate_url = - format!("/v1/instances/{}/migrate", &instance_id.to_string()); + format!("/instances/{}/migrate", &instance_id.to_string()); let _ = NexusRequest::new( - RequestBuilder::new(client, Method::POST, &migrate_url) - .body(Some(¶ms::InstanceMigrate { + RequestBuilder::new(internal_client, Method::POST, &migrate_url) + .body(Some(&InstanceMigrateRequest { dst_sled_id: dst_sled_id.into_untyped_uuid(), })) .expect_status(Some(StatusCode::OK)), @@ -1032,25 +1049,35 @@ async fn test_instance_migrate_v2p_and_routes( .expect("since we've started a migration, the instance record must have a migration id!") }; + let info = nexus + .active_instance_info(&instance_id, None) + .await + .unwrap() + .expect("instance should be on a sled"); + let src_propolis_id = info.propolis_id; + let dst_propolis_id = + info.dst_propolis_id.expect("instance should have a migration target"); + // Tell both sled-agents to pretend to do the migration. instance_simulate_migration_source( cptestctx, nexus, original_sled_id, - instance_id, + src_propolis_id, migration_id, ) .await; - instance_simulate_on_sled(cptestctx, nexus, original_sled_id, instance_id) + vmm_simulate_on_sled(cptestctx, nexus, original_sled_id, src_propolis_id) .await; - instance_simulate_on_sled(cptestctx, nexus, dst_sled_id, instance_id).await; + vmm_simulate_on_sled(cptestctx, nexus, dst_sled_id, dst_propolis_id).await; instance_wait_for_state(&client, instance_id, InstanceState::Running).await; let current_sled = nexus - .instance_sled_id(&instance_id) + .active_instance_info(&instance_id, None) .await .unwrap() - .expect("migrated instance should have a sled"); + .expect("migrated instance should have a sled") + .sled_id; assert_eq!(current_sled, dst_sled_id); for sled_agent in &sled_agents { @@ -1293,6 +1320,7 @@ async fn test_instance_metrics_with_migration( cptestctx: &ControlPlaneTestContext, ) { let client = &cptestctx.external_client; + let internal_client = &cptestctx.internal_client; let apictx = &cptestctx.server.server_context(); let nexus = &apictx.nexus; let instance_name = "bird-ecology"; @@ -1369,10 +1397,11 @@ async fn test_instance_metrics_with_migration( // Request migration to the other sled. This reserves resources on the // target sled, but shouldn't change the virtual provisioning counters. let original_sled = nexus - .instance_sled_id(&instance_id) + .active_instance_info(&instance_id, None) .await .unwrap() - .expect("running instance should have a sled"); + .expect("running instance should have a sled") + .sled_id; let dst_sled_id = if original_sled == default_sled_id { other_sled_id @@ -1381,10 +1410,10 @@ async fn test_instance_metrics_with_migration( }; let migrate_url = - format!("/v1/instances/{}/migrate", &instance_id.to_string()); + format!("/instances/{}/migrate", &instance_id.to_string()); let _ = NexusRequest::new( - RequestBuilder::new(client, Method::POST, &migrate_url) - .body(Some(¶ms::InstanceMigrate { + RequestBuilder::new(internal_client, Method::POST, &migrate_url) + .body(Some(&InstanceMigrateRequest { dst_sled_id: dst_sled_id.into_untyped_uuid(), })) .expect_status(Some(StatusCode::OK)), @@ -1416,6 +1445,15 @@ async fn test_instance_metrics_with_migration( .expect("since we've started a migration, the instance record must have a migration id!") }; + let info = nexus + .active_instance_info(&instance_id, None) + .await + .unwrap() + .expect("instance should be on a sled"); + let src_propolis_id = info.propolis_id; + let dst_propolis_id = + info.dst_propolis_id.expect("instance should have a migration target"); + // Wait for the instance to be in the `Migrating` state. Otherwise, the // subsequent `instance_wait_for_state(..., Running)` may see the `Running` // state from the *old* VMM, rather than waiting for the migration to @@ -1424,13 +1462,13 @@ async fn test_instance_metrics_with_migration( cptestctx, nexus, original_sled, - instance_id, + src_propolis_id, migration_id, ) .await; - instance_single_step_on_sled(cptestctx, nexus, original_sled, instance_id) + vmm_single_step_on_sled(cptestctx, nexus, original_sled, src_propolis_id) .await; - instance_single_step_on_sled(cptestctx, nexus, dst_sled_id, instance_id) + vmm_single_step_on_sled(cptestctx, nexus, dst_sled_id, dst_propolis_id) .await; instance_wait_for_state(&client, instance_id, InstanceState::Migrating) .await; @@ -1440,9 +1478,9 @@ async fn test_instance_metrics_with_migration( // Complete migration on the target. Simulated migrations always succeed. // After this the instance should be running and should continue to appear // to be provisioned. - instance_simulate_on_sled(cptestctx, nexus, original_sled, instance_id) + vmm_simulate_on_sled(cptestctx, nexus, original_sled, src_propolis_id) .await; - instance_simulate_on_sled(cptestctx, nexus, dst_sled_id, instance_id).await; + vmm_simulate_on_sled(cptestctx, nexus, dst_sled_id, dst_propolis_id).await; instance_wait_for_state(&client, instance_id, InstanceState::Running).await; check_provisioning_state(4, 1).await; @@ -3333,10 +3371,11 @@ async fn test_disks_detached_when_instance_destroyed( let apictx = &cptestctx.server.server_context(); let nexus = &apictx.nexus; let sa = nexus - .instance_sled_by_id(&instance_id) + .active_instance_info(&instance_id, None) .await .unwrap() - .expect("instance should be on a sled while it's running"); + .expect("instance should be on a sled while it's running") + .sled_client; // Stop and delete instance instance_post(&client, instance_name, InstanceOp::Stop).await; @@ -5076,28 +5115,29 @@ pub async fn assert_sled_vpc_routes( /// instance, and then tell it to finish simulating whatever async transition is /// going on. pub async fn instance_simulate(nexus: &Arc, id: &InstanceUuid) { - let sa = nexus - .instance_sled_by_id(id) + let sled_info = nexus + .active_instance_info(id, None) .await .unwrap() .expect("instance must be on a sled to simulate a state change"); - sa.instance_finish_transition(id.into_untyped_uuid()).await; + + sled_info.sled_client.vmm_finish_transition(sled_info.propolis_id).await; } /// Simulate one step of an ongoing instance state transition. To do this, we /// have to look up the instance, then get the sled agent associated with that /// instance, and then tell it to finish simulating whatever async transition is /// going on. -async fn instance_single_step_on_sled( +async fn vmm_single_step_on_sled( cptestctx: &ControlPlaneTestContext, nexus: &Arc, sled_id: SledUuid, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, ) { info!(&cptestctx.logctx.log, "Single-stepping simulated instance on sled"; - "instance_id" => %instance_id, "sled_id" => %sled_id); + "propolis_id" => %propolis_id, "sled_id" => %sled_id); let sa = nexus.sled_client(&sled_id).await.unwrap(); - sa.instance_single_step(instance_id.into_untyped_uuid()).await; + sa.vmm_single_step(propolis_id).await; } pub async fn instance_simulate_with_opctx( @@ -5105,27 +5145,28 @@ pub async fn instance_simulate_with_opctx( id: &InstanceUuid, opctx: &OpContext, ) { - let sa = nexus - .instance_sled_by_id_with_opctx(id, opctx) + let sled_info = nexus + .active_instance_info(id, Some(opctx)) .await .unwrap() .expect("instance must be on a sled to simulate a state change"); - sa.instance_finish_transition(id.into_untyped_uuid()).await; + + sled_info.sled_client.vmm_finish_transition(sled_info.propolis_id).await; } /// Simulates state transitions for the incarnation of the instance on the /// supplied sled (which may not be the sled ID currently stored in the /// instance's CRDB record). -async fn instance_simulate_on_sled( +async fn vmm_simulate_on_sled( cptestctx: &ControlPlaneTestContext, nexus: &Arc, sled_id: SledUuid, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, ) { info!(&cptestctx.logctx.log, "Poking simulated instance on sled"; - "instance_id" => %instance_id, "sled_id" => %sled_id); + "propolis_id" => %propolis_id, "sled_id" => %sled_id); let sa = nexus.sled_client(&sled_id).await.unwrap(); - sa.instance_finish_transition(instance_id.into_untyped_uuid()).await; + sa.vmm_finish_transition(propolis_id).await; } /// Simulates a migration source for the provided instance ID, sled ID, and @@ -5134,19 +5175,19 @@ async fn instance_simulate_migration_source( cptestctx: &ControlPlaneTestContext, nexus: &Arc, sled_id: SledUuid, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, migration_id: Uuid, ) { info!( &cptestctx.logctx.log, "Simulating migration source sled"; - "instance_id" => %instance_id, + "propolis_id" => %propolis_id, "sled_id" => %sled_id, "migration_id" => %migration_id, ); let sa = nexus.sled_client(&sled_id).await.unwrap(); - sa.instance_simulate_migration_source( - instance_id.into_untyped_uuid(), + sa.vmm_simulate_migration_source( + propolis_id, sled_agent_client::SimulateMigrationSource { migration_id, result: sled_agent_client::SimulatedMigrationResult::Success, diff --git a/nexus/tests/integration_tests/ip_pools.rs b/nexus/tests/integration_tests/ip_pools.rs index e872cc6fe3b..f56755d85cc 100644 --- a/nexus/tests/integration_tests/ip_pools.rs +++ b/nexus/tests/integration_tests/ip_pools.rs @@ -1344,12 +1344,12 @@ async fn test_ip_range_delete_with_allocated_external_ip_fails( .expect("Failed to stop instance"); // Simulate the transition, wait until it is in fact stopped. - let sa = nexus - .instance_sled_by_id(&instance_id) + let info = nexus + .active_instance_info(&instance_id, None) .await .unwrap() .expect("running instance should be on a sled"); - sa.instance_finish_transition(instance.identity.id).await; + info.sled_client.vmm_finish_transition(info.propolis_id).await; instance_wait_for_state(client, instance_id, InstanceState::Stopped).await; // Delete the instance diff --git a/nexus/tests/integration_tests/metrics.rs b/nexus/tests/integration_tests/metrics.rs index 9cfa0350e8c..f51f57d4147 100644 --- a/nexus/tests/integration_tests/metrics.rs +++ b/nexus/tests/integration_tests/metrics.rs @@ -19,11 +19,15 @@ use nexus_test_utils::resource_helpers::{ }; use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; +use nexus_types::external_api::views::OxqlQueryResult; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; use oximeter::types::Datum; +use oximeter::types::FieldValue; use oximeter::types::Measurement; use oximeter::TimeseriesSchema; +use std::borrow::Borrow; +use std::collections::HashMap; use uuid::Uuid; pub async fn query_for_metrics( @@ -284,7 +288,7 @@ async fn test_timeseries_schema_list( pub async fn timeseries_query( cptestctx: &ControlPlaneTestContext, query: impl ToString, -) -> Vec { +) -> Vec { // first, make sure the latest timeseries have been collected. cptestctx.oximeter.force_collect().await; @@ -307,12 +311,14 @@ pub async fn timeseries_query( .unwrap_or_else(|e| { panic!("timeseries query failed: {e:?}\nquery: {query}") }); - rsp.parsed_body().unwrap_or_else(|e| { - panic!( - "could not parse timeseries query response: {e:?}\n\ + rsp.parsed_body::() + .unwrap_or_else(|e| { + panic!( + "could not parse timeseries query response: {e:?}\n\ query: {query}\nresponse: {rsp:#?}" - ); - }) + ); + }) + .tables } #[nexus_test] @@ -341,7 +347,6 @@ async fn test_instance_watcher_metrics( ); }}; } - use oximeter::types::FieldValue; const INSTANCE_ID_FIELD: &str = "instance_id"; const STATE_FIELD: &str = "state"; const STATE_STARTING: &str = "starting"; @@ -354,86 +359,23 @@ async fn test_instance_watcher_metrics( let nexus = &cptestctx.server.server_context().nexus; let oximeter = &cptestctx.oximeter; - // TODO(eliza): consider factoring this out to a generic - // `activate_background_task` function in `nexus-test-utils` eventually? let activate_instance_watcher = || async { - use nexus_client::types::BackgroundTask; - use nexus_client::types::CurrentStatus; - use nexus_client::types::CurrentStatusRunning; - use nexus_client::types::LastResult; - use nexus_client::types::LastResultCompleted; - - fn most_recent_start_time( - task: &BackgroundTask, - ) -> Option> { - match task.current { - CurrentStatus::Idle => match task.last { - LastResult::Completed(LastResultCompleted { - start_time, - .. - }) => Some(start_time), - LastResult::NeverCompleted => None, - }, - CurrentStatus::Running(CurrentStatusRunning { - start_time, - .. - }) => Some(start_time), - } - } + use nexus_test_utils::background::activate_background_task; + + let _ = activate_background_task(&internal_client, "instance_watcher") + .await; - eprintln!("\n --- activating instance watcher ---\n"); - let task = NexusRequest::object_get( - internal_client, - "/bgtasks/view/instance_watcher", - ) - .execute_and_parse_unwrap::() - .await; - let last_start = most_recent_start_time(&task); - - internal_client - .make_request( - http::Method::POST, - "/bgtasks/activate", - Some(serde_json::json!({ - "bgtask_names": vec![String::from("instance_watcher")] - })), - http::StatusCode::NO_CONTENT, - ) - .await - .unwrap(); - // Wait for the instance watcher task to finish - wait_for_condition( - || async { - let task = NexusRequest::object_get( - internal_client, - "/bgtasks/view/instance_watcher", - ) - .execute_and_parse_unwrap::() - .await; - if matches!(&task.current, CurrentStatus::Idle) - && most_recent_start_time(&task) > last_start - { - Ok(()) - } else { - Err(CondCheckError::<()>::NotYet) - } - }, - &Duration::from_millis(500), - &Duration::from_secs(60), - ) - .await - .unwrap(); // Make sure that the latest metrics have been collected. oximeter.force_collect().await; }; #[track_caller] fn count_state( - table: &oximeter_db::oxql::Table, + table: &oxql_types::Table, instance_id: InstanceUuid, state: &'static str, ) -> i64 { - use oximeter_db::oxql::point::ValueArray; + use oxql_types::point::ValueArray; let uuid = FieldValue::Uuid(instance_id.into_untyped_uuid()); let state = FieldValue::String(state.into()); let mut timeserieses = table.timeseries().filter(|ts| { @@ -586,6 +528,229 @@ async fn test_instance_watcher_metrics( assert_gte!(ts2_running, 2); } +#[nexus_test] +async fn test_mgs_metrics( + cptestctx: &ControlPlaneTestContext, +) { + // Make a MGS + let (mut mgs_config, sp_sim_config) = + gateway_test_utils::setup::load_test_config(); + let mgs = { + // munge the already-parsed MGS config file to point it at the test + // Nexus' address. + mgs_config.metrics = Some(gateway_test_utils::setup::MetricsConfig { + disabled: false, + dev_bind_loopback: true, + dev_nexus_address: Some(cptestctx.internal_client.bind_address), + }); + gateway_test_utils::setup::test_setup_with_config( + "test_mgs_metrics", + gateway_messages::SpPort::One, + mgs_config, + &sp_sim_config, + None, + ) + .await + }; + + // Let's look at all the simulated SP components in the config file which + // have sensor readings, so we can assert that there are timeseries for all + // of them. + let all_sp_configs = { + let gimlet_configs = + sp_sim_config.simulated_sps.gimlet.iter().map(|g| &g.common); + let sidecar_configs = + sp_sim_config.simulated_sps.sidecar.iter().map(|s| &s.common); + gimlet_configs.chain(sidecar_configs) + }; + // XXX(eliza): yes, this code is repetitive. We could probably make it a + // little elss ugly with nested hash maps, but like...I already wrote it, so + // you don't have to. :) + // + // TODO(eliza): presently, we just expect that the number of timeseries for + // each serial number and sensor type lines up. If we wanted to be *really* + // fancy, we could also assert that all the component IDs, component kinds, + // and measurement values line up with the config. But, honestly, it's + // pretty unlikely that a bug in MGS' sensor metrics subsystem would mess + // that up --- the most important thing is just to make sure that the sensor + // data is *present*, as that should catch most regressions. + let mut temp_sensors = HashMap::new(); + let mut current_sensors = HashMap::new(); + let mut voltage_sensors = HashMap::new(); + let mut power_sensors = HashMap::new(); + let mut input_voltage_sensors = HashMap::new(); + let mut input_current_sensors = HashMap::new(); + let mut fan_speed_sensors = HashMap::new(); + for sp in all_sp_configs { + let mut temp = 0; + let mut current = 0; + let mut voltage = 0; + let mut input_voltage = 0; + let mut input_current = 0; + let mut power = 0; + let mut speed = 0; + for component in &sp.components { + for sensor in &component.sensors { + use gateway_messages::measurement::MeasurementKind as Kind; + match sensor.def.kind { + Kind::Temperature => temp += 1, + Kind::Current => current += 1, + Kind::Voltage => voltage += 1, + Kind::InputVoltage => input_voltage += 1, + Kind::InputCurrent => input_current += 1, + Kind::Speed => speed += 1, + Kind::Power => power += 1, + } + } + } + temp_sensors.insert(sp.serial_number.clone(), temp); + current_sensors.insert(sp.serial_number.clone(), current); + voltage_sensors.insert(sp.serial_number.clone(), voltage); + input_voltage_sensors.insert(sp.serial_number.clone(), input_voltage); + input_current_sensors.insert(sp.serial_number.clone(), input_current); + fan_speed_sensors.insert(sp.serial_number.clone(), speed); + power_sensors.insert(sp.serial_number.clone(), power); + } + + async fn check_all_timeseries_present( + cptestctx: &ControlPlaneTestContext, + name: &str, + expected: HashMap, + ) { + let metric_name = format!("hardware_component:{name}"); + eprintln!("\n=== checking timeseries for {metric_name} ===\n"); + + if expected.values().all(|&v| v == 0) { + eprintln!( + "-> SP sim config contains no {name} sensors, skipping it" + ); + return; + } + + let query = format!("get {metric_name}"); + + // MGS polls SP sensor data once every second. It's possible that, when + // we triggered Oximeter to collect samples from MGS, it may not have + // run a poll yet, so retry this a few times to avoid a flaky failure if + // no simulated SPs have been polled yet. + // + // We really don't need to wait that long to know that the sensor + // metrics will never be present. This could probably be shorter + // than 30 seconds, but I want to be fairly generous to make sure + // there are no flaky failures even when things take way longer than + // expected... + const MAX_RETRY_DURATION: Duration = Duration::from_secs(30); + let result = wait_for_condition( + || async { + match check_inner(cptestctx, &metric_name, &query, &expected).await { + Ok(_) => Ok(()), + Err(e) => { + eprintln!("{e}; will try again to ensure all samples are collected"); + Err(CondCheckError::<()>::NotYet) + } + } + }, + &Duration::from_secs(1), + &MAX_RETRY_DURATION, + ) + .await; + if result.is_err() { + panic!( + "failed to find expected timeseries when running OxQL query \ + {query:?} within {MAX_RETRY_DURATION:?}" + ) + }; + + // Note that *some* of these checks panic if they fail, but others call + // `anyhow::ensure!`. This is because, if we don't see all the expected + // timeseries, it's possible that this is because some sensor polls + // haven't completed yet, so we'll retry those checks a few times. On + // the other hand, if we see malformed timeseries, or timeseries that we + // don't expect to exist, that means something has gone wrong, and we + // will fail the test immediately. + async fn check_inner( + cptestctx: &ControlPlaneTestContext, + name: &str, + query: &str, + expected: &HashMap, + ) -> anyhow::Result<()> { + cptestctx.oximeter.force_collect().await; + let table = timeseries_query(&cptestctx, &query) + .await + .into_iter() + .find(|t| t.name() == name) + .ok_or_else(|| { + anyhow::anyhow!("failed to find table for {query}") + })?; + + let mut found = expected + .keys() + .map(|serial| (serial.clone(), 0)) + .collect::>(); + for timeseries in table.timeseries() { + let fields = ×eries.fields; + let n_points = timeseries.points.len(); + anyhow::ensure!( + n_points > 0, + "{name} timeseries {fields:?} should have points" + ); + let serial_str: &str = match timeseries.fields.get("chassis_serial") + { + Some(FieldValue::String(s)) => s.borrow(), + Some(x) => panic!( + "{name} `chassis_serial` field should be a string, but got: {x:?}" + ), + None => { + panic!("{name} timeseries should have a `chassis_serial` field") + } + }; + if let Some(count) = found.get_mut(serial_str) { + *count += 1; + } else { + panic!( + "{name} timeseries had an unexpected chassis serial \ + number {serial_str:?} (not in the config file)", + ); + } + } + + eprintln!("-> {name}: found timeseries: {found:#?}"); + anyhow::ensure!( + &found == expected, + "number of {name} timeseries didn't match expected in {table:#?}", + ); + eprintln!("-> okay, looks good!"); + Ok(()) + } + } + + // Wait until the MGS registers as a producer with Oximeter. + wait_for_producer(&cptestctx.oximeter, &mgs.gateway_id).await; + + check_all_timeseries_present(&cptestctx, "temperature", temp_sensors).await; + check_all_timeseries_present(&cptestctx, "voltage", voltage_sensors).await; + check_all_timeseries_present(&cptestctx, "current", current_sensors).await; + check_all_timeseries_present(&cptestctx, "power", power_sensors).await; + check_all_timeseries_present( + &cptestctx, + "input_voltage", + input_voltage_sensors, + ) + .await; + check_all_timeseries_present( + &cptestctx, + "input_current", + input_current_sensors, + ) + .await; + check_all_timeseries_present(&cptestctx, "fan_speed", fan_speed_sensors) + .await; + + // Because the `ControlPlaneTestContext` isn't managing the MGS we made for + // this test, we are responsible for removing its logs. + mgs.logctx.cleanup_successful(); +} + /// Wait until a producer is registered with Oximeter. /// /// This blocks until the producer is registered, for up to 60s. It panics if diff --git a/nexus/tests/integration_tests/pantry.rs b/nexus/tests/integration_tests/pantry.rs index d77ad49db69..22d35b01b52 100644 --- a/nexus/tests/integration_tests/pantry.rs +++ b/nexus/tests/integration_tests/pantry.rs @@ -88,12 +88,12 @@ async fn set_instance_state( } async fn instance_simulate(nexus: &Arc, id: &InstanceUuid) { - let sa = nexus - .instance_sled_by_id(id) + let info = nexus + .active_instance_info(id, None) .await .unwrap() .expect("instance must be on a sled to simulate a state change"); - sa.instance_finish_transition(id.into_untyped_uuid()).await; + info.sled_client.vmm_finish_transition(info.propolis_id).await; } async fn disk_get(client: &ClientTestContext, disk_url: &str) -> Disk { diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs index bf73855ea77..5201b5c9717 100644 --- a/nexus/tests/integration_tests/schema.rs +++ b/nexus/tests/integration_tests/schema.rs @@ -954,12 +954,12 @@ async fn dbinit_equals_sum_of_all_up() { // Create a connection pool after we apply the first schema version but // before applying the rest, and grab a connection from that pool. We'll use // it for an extra check later. - let pool = nexus_db_queries::db::Pool::new( + let pool = nexus_db_queries::db::Pool::new_single_host( log, &nexus_db_queries::db::Config { url: crdb.pg_config().clone() }, ); let conn_from_pool = - pool.pool().get().await.expect("failed to get pooled connection"); + pool.claim().await.expect("failed to get pooled connection"); // Go from the second version to the latest version. for version in all_versions.iter_versions().skip(1) { diff --git a/nexus/tests/integration_tests/silos.rs b/nexus/tests/integration_tests/silos.rs index 2c861ff1595..0de4d313958 100644 --- a/nexus/tests/integration_tests/silos.rs +++ b/nexus/tests/integration_tests/silos.rs @@ -37,6 +37,7 @@ use std::fmt::Write; use std::str::FromStr; use base64::Engine; +use hickory_resolver::error::ResolveErrorKind; use http::method::Method; use http::StatusCode; use httptest::{matchers::*, responders::*, Expectation, Server}; @@ -44,7 +45,6 @@ use nexus_types::external_api::shared::{FleetRole, SiloRole}; use std::convert::Infallible; use std::net::Ipv4Addr; use std::time::Duration; -use trust_dns_resolver::error::ResolveErrorKind; use uuid::Uuid; type ControlPlaneTestContext = @@ -2164,7 +2164,7 @@ pub async fn verify_silo_dns_name( .await { Ok(result) => { - let addrs: Vec<_> = result.iter().collect(); + let addrs: Vec<_> = result.iter().map(|a| &a.0).collect(); if addrs.is_empty() { false } else { diff --git a/nexus/tests/integration_tests/sp_updater.rs b/nexus/tests/integration_tests/sp_updater.rs index 8314d221735..6e482bc1adf 100644 --- a/nexus/tests/integration_tests/sp_updater.rs +++ b/nexus/tests/integration_tests/sp_updater.rs @@ -434,9 +434,23 @@ async fn test_sp_updater_switches_mgs_instances_on_failure() { #[tokio::test] async fn test_sp_updater_delivers_progress() { // Start MGS + Sim SP. - let mgstestctx = - mgs_setup::test_setup("test_sp_updater_delivers_progress", SpPort::One) - .await; + let mgstestctx = { + let (mut mgs_config, sp_sim_config) = mgs_setup::load_test_config(); + // Enabling SP metrics collection makes this alread-flaky test even + // flakier, so let's just turn it off. + // TODO(eliza): it would be nice if we didn't have to disable metrics in + // this test, so that we can better catch regressions that could be + // introduced by the metrics subsystem... + mgs_config.metrics.get_or_insert_with(Default::default).disabled = true; + mgs_setup::test_setup_with_config( + "test_sp_updater_delivers_progress", + SpPort::One, + mgs_config, + &sp_sim_config, + None, + ) + .await + }; // Configure an MGS client. let mut mgs_clients = diff --git a/nexus/tests/integration_tests/switch_port.rs b/nexus/tests/integration_tests/switch_port.rs index 0b71ddb2cfe..92c44eddadd 100644 --- a/nexus/tests/integration_tests/switch_port.rs +++ b/nexus/tests/integration_tests/switch_port.rs @@ -11,9 +11,9 @@ use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params::{ Address, AddressConfig, AddressLotBlockCreate, AddressLotCreate, BgpAnnounceSetCreate, BgpAnnouncementCreate, BgpConfigCreate, - BgpPeerConfig, LinkConfigCreate, LldpServiceConfigCreate, Route, - RouteConfig, SwitchInterfaceConfigCreate, SwitchInterfaceKind, - SwitchPortApplySettings, SwitchPortSettingsCreate, + BgpPeerConfig, LinkConfigCreate, LldpLinkConfigCreate, Route, RouteConfig, + SwitchInterfaceConfigCreate, SwitchInterfaceKind, SwitchPortApplySettings, + SwitchPortSettingsCreate, }; use nexus_types::external_api::views::Rack; use omicron_common::api::external::ImportExportPolicy; @@ -76,7 +76,7 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { NexusRequest::objects_post( client, - "/v1/system/networking/bgp-announce", + "/v1/system/networking/bgp-announce-set", &announce_set, ) .authn_as(AuthnMode::PrivilegedUser) @@ -118,7 +118,15 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { "phy0".into(), LinkConfigCreate { mtu: 4700, - lldp: LldpServiceConfigCreate { enabled: false, lldp_config: None }, + lldp: LldpLinkConfigCreate { + enabled: true, + link_name: Some("Link Name".into()), + link_description: Some("link_ Dscription".into()), + chassis_id: Some("Chassis ID".into()), + system_name: Some("System Name".into()), + system_description: Some("System description".into()), + management_ip: None, + }, fec: LinkFec::None, speed: LinkSpeed::Speed100G, autoneg: false, @@ -140,6 +148,7 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { dst: "1.2.3.0/24".parse().unwrap(), gw: "1.2.3.4".parse().unwrap(), vid: None, + local_pref: None, }], }, ); @@ -176,8 +185,16 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { assert_eq!(link0.mtu, 4700); let lldp0 = &created.link_lldp[0]; - assert_eq!(lldp0.enabled, false); - assert_eq!(lldp0.lldp_config_id, None); + assert_eq!(lldp0.enabled, true); + assert_eq!(lldp0.link_name, Some("Link Name".to_string())); + assert_eq!(lldp0.link_description, Some("Link Description".to_string())); + assert_eq!(lldp0.chassis_id, Some("Chassis ID".to_string())); + assert_eq!(lldp0.system_name, Some("System Name".to_string())); + assert_eq!( + lldp0.system_description, + Some("System Description".to_string()) + ); + assert_eq!(lldp0.management_ip, None); let ifx0 = &created.interfaces[0]; assert_eq!(&ifx0.interface_name, "phy0"); @@ -212,8 +229,16 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { assert_eq!(link0.mtu, 4700); let lldp0 = &roundtrip.link_lldp[0]; - assert_eq!(lldp0.enabled, false); - assert_eq!(lldp0.lldp_config_id, None); + assert_eq!(lldp0.enabled, true); + assert_eq!(lldp0.link_name, Some("Link Name".to_string())); + assert_eq!(lldp0.link_description, Some("Link Description".to_string())); + assert_eq!(lldp0.chassis_id, Some("Chassis ID".to_string())); + assert_eq!(lldp0.system_name, Some("System Name".to_string())); + assert_eq!( + lldp0.system_description, + Some("System Description".to_string()) + ); + assert_eq!(lldp0.management_ip, None); let ifx0 = &roundtrip.interfaces[0]; assert_eq!(&ifx0.interface_name, "phy0"); diff --git a/nexus/tests/output/cmd-nexus-noargs-stderr b/nexus/tests/output/cmd-nexus-noargs-stderr index 385248bd0e3..5a218b5c94a 100644 --- a/nexus/tests/output/cmd-nexus-noargs-stderr +++ b/nexus/tests/output/cmd-nexus-noargs-stderr @@ -1,12 +1,11 @@ See README.adoc for more information -Usage: nexus [OPTIONS] [CONFIG_FILE_PATH] +Usage: nexus [CONFIG_FILE_PATH] Arguments: [CONFIG_FILE_PATH] Options: - -O, --openapi Print the external OpenAPI Spec document and exit - -h, --help Print help + -h, --help Print help nexus: CONFIG_FILE_PATH is required diff --git a/nexus/tests/output/cmd-nexus-openapi-stderr b/nexus/tests/output/cmd-nexus-openapi-stderr deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/nexus/types/Cargo.toml b/nexus/types/Cargo.toml index a4418d2a740..6b31013d49d 100644 --- a/nexus/types/Cargo.toml +++ b/nexus/types/Cargo.toml @@ -9,16 +9,22 @@ workspace = true [dependencies] anyhow.workspace = true +async-trait.workspace = true +base64.workspace = true chrono.workspace = true clap.workspace = true -base64.workspace = true +cookie.workspace = true derive-where.workspace = true derive_more.workspace = true +dropshot.workspace = true futures.workspace = true +http.workspace = true humantime.workspace = true ipnetwork.workspace = true +newtype_derive.workspace = true omicron-uuid-kinds.workspace = true openssl.workspace = true +oxql-types.workspace = true oxnet.workspace = true parse-display.workspace = true schemars = { workspace = true, features = ["chrono", "uuid1"] } diff --git a/nexus/auth/src/authn/external/cookies.rs b/nexus/types/src/authn/cookies.rs similarity index 100% rename from nexus/auth/src/authn/external/cookies.rs rename to nexus/types/src/authn/cookies.rs diff --git a/nexus/types/src/authn/mod.rs b/nexus/types/src/authn/mod.rs new file mode 100644 index 00000000000..f87935428e7 --- /dev/null +++ b/nexus/types/src/authn/mod.rs @@ -0,0 +1,7 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Authentication types for the Nexus API. + +pub mod cookies; diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index cc48f2646a1..58bc35528f3 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -27,20 +27,17 @@ use omicron_common::api::external::Generation; use omicron_common::disk::DiskIdentity; use omicron_common::disk::OmicronPhysicalDisksConfig; use omicron_uuid_kinds::CollectionUuid; -use omicron_uuid_kinds::ExternalIpUuid; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledUuid; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; -use slog_error_chain::SlogInlineError; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::fmt; use std::net::Ipv6Addr; use strum::EnumIter; use strum::IntoEnumIterator; -use thiserror::Error; use uuid::Uuid; mod blueprint_diff; @@ -546,7 +543,7 @@ impl BlueprintZonesConfig { } /// Returns true if all zones in the blueprint have a disposition of - // `Expunged`, false otherwise. + /// `Expunged`, false otherwise. pub fn are_all_zones_expunged(&self) -> bool { self.zones .iter() @@ -595,13 +592,6 @@ fn zone_sort_key(z: &T) -> impl Ord { (z.kind(), z.id()) } -/// Errors from converting an [`OmicronZoneType`] into a [`BlueprintZoneType`]. -#[derive(Debug, Clone, Error, SlogInlineError)] -pub enum InvalidOmicronZoneType { - #[error("Omicron zone {} requires an external IP ID", kind.report_str())] - ExternalIpIdRequired { kind: ZoneKind }, -} - /// Describes one Omicron-managed zone in a blueprint. /// /// Part of [`BlueprintZonesConfig`]. @@ -616,168 +606,6 @@ pub struct BlueprintZoneConfig { pub zone_type: BlueprintZoneType, } -impl BlueprintZoneConfig { - /// Convert from an [`OmicronZoneConfig`]. - /// - /// This method is annoying to call correctly and will become more so over - /// time. Ideally we'd remove all callers and then remove this method, but - /// for now we keep it. - /// - /// # Errors - /// - /// If `config.zone_type` is a zone that has an external IP address (Nexus, - /// boundary NTP, external DNS), `external_ip_id` must be `Some(_)` or this - /// method will return an error. - pub fn from_omicron_zone_config( - config: OmicronZoneConfig, - disposition: BlueprintZoneDisposition, - external_ip_id: Option, - ) -> Result { - let kind = config.zone_type.kind(); - let zone_type = match config.zone_type { - OmicronZoneType::BoundaryNtp { - address, - dns_servers, - domain, - nic, - ntp_servers, - snat_cfg, - } => { - let external_ip_id = external_ip_id.ok_or( - InvalidOmicronZoneType::ExternalIpIdRequired { kind }, - )?; - BlueprintZoneType::BoundaryNtp( - blueprint_zone_type::BoundaryNtp { - address, - ntp_servers, - dns_servers, - domain, - nic, - external_ip: OmicronZoneExternalSnatIp { - id: external_ip_id, - snat_cfg, - }, - }, - ) - } - OmicronZoneType::Clickhouse { address, dataset } => { - BlueprintZoneType::Clickhouse(blueprint_zone_type::Clickhouse { - address, - dataset, - }) - } - OmicronZoneType::ClickhouseKeeper { address, dataset } => { - BlueprintZoneType::ClickhouseKeeper( - blueprint_zone_type::ClickhouseKeeper { address, dataset }, - ) - } - OmicronZoneType::ClickhouseServer { address, dataset } => { - BlueprintZoneType::ClickhouseServer( - blueprint_zone_type::ClickhouseServer { address, dataset }, - ) - } - OmicronZoneType::CockroachDb { address, dataset } => { - BlueprintZoneType::CockroachDb( - blueprint_zone_type::CockroachDb { address, dataset }, - ) - } - OmicronZoneType::Crucible { address, dataset } => { - BlueprintZoneType::Crucible(blueprint_zone_type::Crucible { - address, - dataset, - }) - } - OmicronZoneType::CruciblePantry { address } => { - BlueprintZoneType::CruciblePantry( - blueprint_zone_type::CruciblePantry { address }, - ) - } - OmicronZoneType::ExternalDns { - dataset, - dns_address, - http_address, - nic, - } => { - let external_ip_id = external_ip_id.ok_or( - InvalidOmicronZoneType::ExternalIpIdRequired { kind }, - )?; - BlueprintZoneType::ExternalDns( - blueprint_zone_type::ExternalDns { - dataset, - http_address, - dns_address: OmicronZoneExternalFloatingAddr { - id: external_ip_id, - addr: dns_address, - }, - nic, - }, - ) - } - OmicronZoneType::InternalDns { - dataset, - dns_address, - gz_address, - gz_address_index, - http_address, - } => BlueprintZoneType::InternalDns( - blueprint_zone_type::InternalDns { - dataset, - http_address, - dns_address, - gz_address, - gz_address_index, - }, - ), - OmicronZoneType::InternalNtp { - address, - dns_servers, - domain, - ntp_servers, - } => BlueprintZoneType::InternalNtp( - blueprint_zone_type::InternalNtp { - address, - ntp_servers, - dns_servers, - domain, - }, - ), - OmicronZoneType::Nexus { - external_dns_servers, - external_ip, - external_tls, - internal_address, - nic, - } => { - let external_ip_id = external_ip_id.ok_or( - InvalidOmicronZoneType::ExternalIpIdRequired { kind }, - )?; - BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { - internal_address, - external_ip: OmicronZoneExternalFloatingIp { - id: external_ip_id, - ip: external_ip, - }, - nic, - external_tls, - external_dns_servers, - }) - } - OmicronZoneType::Oximeter { address } => { - BlueprintZoneType::Oximeter(blueprint_zone_type::Oximeter { - address, - }) - } - }; - Ok(Self { - disposition, - id: OmicronZoneUuid::from_untyped_uuid(config.id), - underlay_address: config.underlay_address, - filesystem_pool: config.filesystem_pool, - zone_type, - }) - } -} - impl From for OmicronZoneConfig { fn from(z: BlueprintZoneConfig) -> Self { Self { diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs index 1af3636d0e8..a2eec5ca8ac 100644 --- a/nexus/types/src/deployment/planning_input.rs +++ b/nexus/types/src/deployment/planning_input.rs @@ -6,6 +6,8 @@ //! blueprints. use super::AddNetworkResourceError; +use super::Blueprint; +use super::BlueprintZoneFilter; use super::OmicronZoneExternalIp; use super::OmicronZoneNetworkResources; use super::OmicronZoneNic; @@ -16,6 +18,7 @@ use crate::external_api::views::SledProvisionPolicy; use crate::external_api::views::SledState; use clap::ValueEnum; use ipnetwork::IpNetwork; +use newtype_uuid::GenericUuid; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; @@ -25,6 +28,7 @@ use omicron_common::disk::DiskIdentity; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; +use omicron_uuid_kinds::VnicUuid; use omicron_uuid_kinds::ZpoolUuid; use schemars::JsonSchema; use serde::Deserialize; @@ -95,6 +99,10 @@ impl PlanningInput { self.policy.target_nexus_zone_count } + pub fn target_internal_dns_zone_count(&self) -> usize { + self.policy.target_internal_dns_zone_count + } + pub fn target_cockroachdb_zone_count(&self) -> usize { self.policy.target_cockroachdb_zone_count } @@ -280,6 +288,13 @@ pub enum CockroachDbPreserveDowngrade { } impl CockroachDbPreserveDowngrade { + pub fn is_set(self) -> bool { + match self { + CockroachDbPreserveDowngrade::Set(_) => true, + _ => false, + } + } + pub fn from_optional_string( value: &Option, ) -> Result { @@ -702,6 +717,11 @@ pub struct Policy { /// desired total number of deployed Nexus zones pub target_nexus_zone_count: usize, + /// desired total number of internal DNS zones. + /// Must be <= [`omicron_common::policy::MAX_INTERNAL_DNS_REDUNDANCY`], + /// and should be >= [`omicron_common::policy::INTERNAL_DNS_REDUNDANCY`]. + pub target_internal_dns_zone_count: usize, + /// desired total number of deployed CockroachDB zones pub target_cockroachdb_zone_count: usize, @@ -709,6 +729,23 @@ pub struct Policy { /// at present this is hardcoded based on the version of CockroachDB we /// presently ship and the tick-tock pattern described in RFD 469. pub target_cockroachdb_cluster_version: CockroachDbClusterVersion, + + /// Policy information for a replicated clickhouse setup + /// + /// If this policy is `None`, then we are using a single node clickhouse + /// setup. Eventually we will only allow multi-node setups and this will no + /// longer be an option. + pub clickhouse_policy: Option, +} + +/// Policy for replicated clickhouse setups +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ClickhousePolicy { + /// Desired number of clickhouse servers + pub target_servers: usize, + + /// Desired number of clickhouse keepers + pub target_keepers: usize, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -758,9 +795,11 @@ impl PlanningInputBuilder { service_ip_pool_ranges: Vec::new(), target_boundary_ntp_zone_count: 0, target_nexus_zone_count: 0, + target_internal_dns_zone_count: 0, target_cockroachdb_zone_count: 0, target_cockroachdb_cluster_version: CockroachDbClusterVersion::POLICY, + clickhouse_policy: None, }, internal_dns_version: Generation::new(), external_dns_version: Generation::new(), @@ -824,6 +863,35 @@ impl PlanningInputBuilder { &mut self.network_resources } + pub fn update_network_resources_from_blueprint( + &mut self, + blueprint: &Blueprint, + ) -> Result<(), PlanningInputBuildError> { + self.network_resources = OmicronZoneNetworkResources::new(); + for (_, zone) in + blueprint.all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) + { + let service_id = zone.id; + if let Some((external_ip, nic)) = + zone.zone_type.external_networking() + { + self.add_omicron_zone_external_ip(service_id, external_ip)?; + self.add_omicron_zone_nic( + service_id, + OmicronZoneNic { + // TODO-cleanup use `TypedUuid` everywhere + id: VnicUuid::from_untyped_uuid(nic.id), + mac: nic.mac, + ip: nic.ip, + slot: nic.slot, + primary: nic.primary, + }, + )?; + } + } + Ok(()) + } + pub fn policy_mut(&mut self) -> &mut Policy { &mut self.policy } diff --git a/nexus/types/src/deployment/zone_type.rs b/nexus/types/src/deployment/zone_type.rs index e4958fc3c30..e0f389fe2a5 100644 --- a/nexus/types/src/deployment/zone_type.rs +++ b/nexus/types/src/deployment/zone_type.rs @@ -73,56 +73,26 @@ impl BlueprintZoneType { /// Identifies whether this is an NTP zone (any flavor) pub fn is_ntp(&self) -> bool { - match self { + matches!( + self, BlueprintZoneType::InternalNtp(_) - | BlueprintZoneType::BoundaryNtp(_) => true, - BlueprintZoneType::Nexus(_) - | BlueprintZoneType::ExternalDns(_) - | BlueprintZoneType::Clickhouse(_) - | BlueprintZoneType::ClickhouseKeeper(_) - | BlueprintZoneType::ClickhouseServer(_) - | BlueprintZoneType::CockroachDb(_) - | BlueprintZoneType::Crucible(_) - | BlueprintZoneType::CruciblePantry(_) - | BlueprintZoneType::InternalDns(_) - | BlueprintZoneType::Oximeter(_) => false, - } + | BlueprintZoneType::BoundaryNtp(_) + ) } /// Identifies whether this is a Nexus zone pub fn is_nexus(&self) -> bool { - match self { - BlueprintZoneType::Nexus(_) => true, - BlueprintZoneType::BoundaryNtp(_) - | BlueprintZoneType::ExternalDns(_) - | BlueprintZoneType::Clickhouse(_) - | BlueprintZoneType::ClickhouseKeeper(_) - | BlueprintZoneType::ClickhouseServer(_) - | BlueprintZoneType::CockroachDb(_) - | BlueprintZoneType::Crucible(_) - | BlueprintZoneType::CruciblePantry(_) - | BlueprintZoneType::InternalDns(_) - | BlueprintZoneType::InternalNtp(_) - | BlueprintZoneType::Oximeter(_) => false, - } + matches!(self, BlueprintZoneType::Nexus(_)) + } + + /// Identifies whether this is an internal DNS zone + pub fn is_internal_dns(&self) -> bool { + matches!(self, BlueprintZoneType::InternalDns(_)) } /// Identifies whether this a Crucible (not Crucible pantry) zone pub fn is_crucible(&self) -> bool { - match self { - BlueprintZoneType::Crucible(_) => true, - BlueprintZoneType::BoundaryNtp(_) - | BlueprintZoneType::Clickhouse(_) - | BlueprintZoneType::ClickhouseKeeper(_) - | BlueprintZoneType::ClickhouseServer(_) - | BlueprintZoneType::CockroachDb(_) - | BlueprintZoneType::CruciblePantry(_) - | BlueprintZoneType::ExternalDns(_) - | BlueprintZoneType::InternalDns(_) - | BlueprintZoneType::InternalNtp(_) - | BlueprintZoneType::Nexus(_) - | BlueprintZoneType::Oximeter(_) => false, - } + matches!(self, BlueprintZoneType::Crucible(_)) } /// Returns the durable dataset associated with this zone, if any exists. diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 8dcce913b3d..691f36534d4 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -8,6 +8,7 @@ use crate::external_api::shared; use base64::Engine; use chrono::{DateTime, Utc}; +use http::Uri; use omicron_common::api::external::{ AddressLotKind, AllowedSourceIps, BfdMode, BgpPeer, ByteCount, Hostname, IdentityMetadataCreateParams, IdentityMetadataUpdateParams, @@ -16,6 +17,7 @@ use omicron_common::api::external::{ }; use omicron_common::disk::DiskVariant; use oxnet::{IpNet, Ipv4Net, Ipv6Net}; +use parse_display::Display; use schemars::JsonSchema; use serde::{ de::{self, Visitor}, @@ -83,11 +85,13 @@ path_param!(IpPoolPath, pool, "IP pool"); path_param!(SshKeyPath, ssh_key, "SSH key"); path_param!(AddressLotPath, address_lot, "address lot"); path_param!(ProbePath, probe, "probe"); +path_param!(CertificatePath, certificate, "certificate"); id_path_param!(GroupPath, group_id, "group"); // TODO: The hardware resources should be represented by its UUID or a hardware // ID that can be used to deterministically generate the UUID. +id_path_param!(RackPath, rack_id, "rack"); id_path_param!(SledPath, sled_id, "sled"); id_path_param!(SwitchPath, switch_id, "switch"); id_path_param!(PhysicalDiskPath, disk_id, "physical disk"); @@ -141,6 +145,13 @@ pub struct OptionalSiloSelector { pub silo: Option, } +/// Path parameters for Silo User requests +#[derive(Deserialize, JsonSchema)] +pub struct UserParam { + /// The user's internal ID + pub user_id: Uuid, +} + #[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq)] pub struct SamlIdentityProviderSelector { /// Name or ID of the silo in which the SAML identity provider is associated @@ -1093,12 +1104,6 @@ impl JsonSchema for UserData { } } -/// Migration parameters for an `Instance` -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct InstanceMigrate { - pub dst_sled_id: Uuid, -} - /// Forwarded to a propolis server to request the contents of an Instance's serial console. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] pub struct InstanceSerialConsoleRequest { @@ -1247,6 +1252,24 @@ pub struct RouterRouteUpdate { // DISKS +#[derive(Display, Serialize, Deserialize, JsonSchema)] +#[display(style = "snake_case")] +#[serde(rename_all = "snake_case")] +pub enum DiskMetricName { + Activated, + Flush, + Read, + ReadBytes, + Write, + WriteBytes, +} + +#[derive(Serialize, Deserialize, JsonSchema)] +pub struct DiskMetricsPath { + pub disk: NameOrId, + pub metric: DiskMetricName, +} + #[derive(Copy, Clone, Debug, Deserialize, Serialize)] #[serde(try_from = "u32")] // invoke the try_from validation routine below pub struct BlockSize(pub u32); @@ -1427,6 +1450,23 @@ pub struct LoopbackAddressCreate { pub anycast: bool, } +#[derive(Serialize, Deserialize, JsonSchema)] +pub struct LoopbackAddressPath { + /// The rack to use when selecting the loopback address. + pub rack_id: Uuid, + + /// The switch location to use when selecting the loopback address. + pub switch_location: Name, + + /// The IP address and subnet mask to use when selecting the loopback + /// address. + pub address: IpAddr, + + /// The IP address and subnet mask to use when selecting the loopback + /// address. + pub subnet_mask: u8, +} + /// Parameters for creating a port settings group. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] pub struct SwtichPortSettingsGroupCreate { @@ -1506,7 +1546,7 @@ pub struct LinkConfigCreate { pub mtu: u16, /// The link-layer discovery protocol (LLDP) configuration for the link. - pub lldp: LldpServiceConfigCreate, + pub lldp: LldpLinkConfigCreate, /// The forward error correction mode of the link. pub fec: LinkFec, @@ -1518,16 +1558,29 @@ pub struct LinkConfigCreate { pub autoneg: bool, } -/// The LLDP configuration associated with a port. LLDP may be either enabled or -/// disabled, if enabled, an LLDP configuration must be provided by name or id. -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct LldpServiceConfigCreate { +/// The LLDP configuration associated with a port. +#[derive(Clone, Debug, Default, Deserialize, Serialize, JsonSchema)] +pub struct LldpLinkConfigCreate { /// Whether or not LLDP is enabled. pub enabled: bool, - /// A reference to the LLDP configuration used. Must not be `None` when - /// `enabled` is `true`. - pub lldp_config: Option, + /// The LLDP link name TLV. + pub link_name: Option, + + /// The LLDP link description TLV. + pub link_description: Option, + + /// The LLDP chassis identifier TLV. + pub chassis_id: Option, + + /// The LLDP system name TLV. + pub system_name: Option, + + /// The LLDP system description TLV. + pub system_description: Option, + + /// The LLDP management IP TLV. + pub management_ip: Option, } /// A layer-3 switch interface configuration. When IPv6 is enabled, a link local @@ -1587,6 +1640,10 @@ pub struct Route { /// VLAN id the gateway is reachable over. pub vid: Option, + + /// Local preference for route. Higher preference indictes precedence + /// within and across protocols. + pub local_pref: Option, } /// Select a BGP config by a name or id. @@ -1618,6 +1675,13 @@ pub struct BgpAnnounceSetCreate { pub announcement: Vec, } +/// Optionally select a BGP announce set by a name or id. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +pub struct OptionalBgpAnnounceSetSelector { + /// A name or id to use when s electing BGP port settings + pub name_or_id: Option, +} + /// Select a BGP announce set by a name or id. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] pub struct BgpAnnounceSetSelector { @@ -1879,6 +1943,20 @@ pub struct SshKeyCreate { // METRICS +#[derive(Display, Deserialize, JsonSchema)] +#[display(style = "snake_case")] +#[serde(rename_all = "snake_case")] +pub enum SystemMetricName { + VirtualDiskSpaceProvisioned, + CpusProvisioned, + RamProvisioned, +} + +#[derive(Deserialize, JsonSchema)] +pub struct SystemMetricsPathParam { + pub metric_name: SystemMetricName, +} + /// Query parameters common to resource metrics endpoints. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] pub struct ResourceMetrics { @@ -1940,3 +2018,98 @@ pub struct AllowListUpdate { /// The new list of allowed source IPs. pub allowed_ips: AllowedSourceIps, } + +// Roles + +// Roles have their own pagination scheme because they do not use the usual "id" +// or "name" types. For more, see the comment in dbinit.sql. +#[derive(Deserialize, JsonSchema, Serialize)] +pub struct RolePage { + pub last_seen: String, +} + +/// Path parameters for global (system) role requests +#[derive(Deserialize, JsonSchema)] +pub struct RolePath { + /// The built-in role's unique name. + pub role_name: String, +} + +// Console API + +#[derive(Deserialize, JsonSchema)] +pub struct RestPathParam { + pub path: Vec, +} + +#[derive(Deserialize, JsonSchema)] +pub struct LoginToProviderPathParam { + pub silo_name: Name, + pub provider_name: Name, +} + +#[derive(Serialize, Deserialize, JsonSchema)] +pub struct LoginUrlQuery { + pub redirect_uri: Option, +} + +#[derive(Deserialize, JsonSchema)] +pub struct LoginPath { + pub silo_name: Name, +} + +/// This is meant as a security feature. We want to ensure we never redirect to +/// a URI on a different host. +#[derive(Serialize, Deserialize, Debug, JsonSchema, Clone, Display)] +#[serde(try_from = "String")] +#[display("{0}")] +pub struct RelativeUri(String); + +impl FromStr for RelativeUri { + type Err = String; + + fn from_str(s: &str) -> Result { + Self::try_from(s.to_string()) + } +} + +impl TryFrom for RelativeUri { + type Error = String; + + fn try_from(uri: Uri) -> Result { + if uri.host().is_none() && uri.scheme().is_none() { + Ok(Self(uri.to_string())) + } else { + Err(format!("\"{}\" is not a relative URI", uri)) + } + } +} + +impl TryFrom for RelativeUri { + type Error = String; + + fn try_from(s: String) -> Result { + s.parse::() + .map_err(|_| format!("\"{}\" is not a relative URI", s)) + .and_then(|uri| Self::try_from(uri)) + } +} + +// Device auth + +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct DeviceAuthRequest { + pub client_id: Uuid, +} + +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct DeviceAuthVerify { + pub user_code: String, +} + +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct DeviceAccessTokenRequest { + pub grant_type: String, + pub device_code: String, + pub client_id: Uuid, +} diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index e241f849eee..e8d81b05bb7 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -498,6 +498,12 @@ pub struct Rack { // SLEDS +/// The unique ID of a sled. +#[derive(Clone, Debug, Serialize, JsonSchema)] +pub struct SledId { + pub id: Uuid, +} + /// An operator's view of a Sled. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] pub struct Sled { @@ -971,3 +977,12 @@ pub struct AllowList { /// The allowlist of IPs or subnets. pub allowed_ips: ExternalAllowedSourceIps, } + +// OxQL QUERIES + +/// The result of a successful OxQL query. +#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] +pub struct OxqlQueryResult { + /// Tables resulting from the query, each containing timeseries. + pub tables: Vec, +} diff --git a/nexus/types/src/internal_api/background.rs b/nexus/types/src/internal_api/background.rs index 6463aa8ab6e..6f6e80cb60e 100644 --- a/nexus/types/src/internal_api/background.rs +++ b/nexus/types/src/internal_api/background.rs @@ -19,3 +19,37 @@ pub struct LookupRegionPortStatus { pub found_port_ok: Vec, pub errors: Vec, } + +/// The status of a `region_snapshot_replacement_start` background task +/// activation +#[derive(Serialize, Deserialize, Default, Debug, PartialEq, Eq)] +pub struct RegionSnapshotReplacementStartStatus { + pub requests_created_ok: Vec, + pub start_invoked_ok: Vec, + pub errors: Vec, +} + +/// The status of a `region_snapshot_replacement_garbage_collect` background +/// task activation +#[derive(Serialize, Deserialize, Default, Debug, PartialEq, Eq)] +pub struct RegionSnapshotReplacementGarbageCollectStatus { + pub garbage_collect_requested: Vec, + pub errors: Vec, +} + +/// The status of a `region_snapshot_replacement_step` background task +/// activation +#[derive(Serialize, Deserialize, Default, Debug, PartialEq, Eq)] +pub struct RegionSnapshotReplacementStepStatus { + pub step_records_created_ok: Vec, + pub step_garbage_collect_invoked_ok: Vec, + pub step_invoked_ok: Vec, + pub errors: Vec, +} + +/// The status of a `region_snapshot_replacement_finish` background task activation +#[derive(Serialize, Deserialize, Default, Debug, PartialEq, Eq)] +pub struct RegionSnapshotReplacementFinishStatus { + pub records_set_to_done: Vec, + pub errors: Vec, +} diff --git a/nexus/types/src/internal_api/params.rs b/nexus/types/src/internal_api/params.rs index 3a26dde4ba5..c803f003f1d 100644 --- a/nexus/types/src/internal_api/params.rs +++ b/nexus/types/src/internal_api/params.rs @@ -207,3 +207,10 @@ pub struct OximeterInfo { /// The address on which this oximeter instance listens for requests pub address: SocketAddr, } + +/// Parameters used when migrating an instance. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct InstanceMigrateRequest { + /// The ID of the sled to which to migrate the target instance. + pub dst_sled_id: Uuid, +} diff --git a/nexus/types/src/lib.rs b/nexus/types/src/lib.rs index 494573e8341..8a0a3ec80e9 100644 --- a/nexus/types/src/lib.rs +++ b/nexus/types/src/lib.rs @@ -29,6 +29,7 @@ //! rules, so our model layer knows about our views. That seems to be a //! relatively minor offense, so it's the way we leave things for now. +pub mod authn; pub mod deployment; pub mod external_api; pub mod identity; diff --git a/openapi/bootstrap-agent.json b/openapi/bootstrap-agent.json index 7b4f2576703..bd928001bbd 100644 --- a/openapi/bootstrap-agent.json +++ b/openapi/bootstrap-agent.json @@ -732,6 +732,67 @@ "last" ] }, + "LldpAdminStatus": { + "description": "To what extent should this port participate in LLDP", + "type": "string", + "enum": [ + "enabled", + "disabled", + "rx_only", + "tx_only" + ] + }, + "LldpPortConfig": { + "description": "Per-port LLDP configuration settings. Only the \"status\" setting is mandatory. All other fields have natural defaults or may be inherited from the switch.", + "type": "object", + "properties": { + "chassis_id": { + "nullable": true, + "description": "Chassis ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + }, + "management_addrs": { + "nullable": true, + "description": "Management IP addresses to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "port_description": { + "nullable": true, + "description": "Port description to advertise. If this is not set, no description will be advertised.", + "type": "string" + }, + "port_id": { + "nullable": true, + "description": "Port ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be set to the port name. e.g., qsfp0/0.", + "type": "string" + }, + "status": { + "description": "To what extent should this port participate in LLDP", + "allOf": [ + { + "$ref": "#/components/schemas/LldpAdminStatus" + } + ] + }, + "system_description": { + "nullable": true, + "description": "System description to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + }, + "system_name": { + "nullable": true, + "description": "System name to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + } + }, + "required": [ + "status" + ] + }, "Name": { "title": "A name unique within the parent collection", "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.", @@ -767,6 +828,15 @@ "$ref": "#/components/schemas/BgpPeerConfig" } }, + "lldp": { + "nullable": true, + "description": "LLDP configuration for this port", + "allOf": [ + { + "$ref": "#/components/schemas/LldpPortConfig" + } + ] + }, "port": { "description": "Nmae of the port this config applies to.", "type": "string" @@ -1183,6 +1253,14 @@ } ] }, + "local_pref": { + "nullable": true, + "description": "The local preference associated with this route.", + "default": null, + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "nexthop": { "description": "The nexthop/gateway address.", "type": "string", diff --git a/openapi/clickhouse-admin.json b/openapi/clickhouse-admin.json new file mode 100644 index 00000000000..6bb53677121 --- /dev/null +++ b/openapi/clickhouse-admin.json @@ -0,0 +1,84 @@ +{ + "openapi": "3.0.3", + "info": { + "title": "ClickHouse Cluster Admin API", + "description": "API for interacting with the Oxide control plane's ClickHouse cluster", + "contact": { + "url": "https://oxide.computer", + "email": "api@oxide.computer" + }, + "version": "0.0.1" + }, + "paths": { + "/node/address": { + "get": { + "summary": "Retrieve the address the ClickHouse server or keeper node is listening on", + "operationId": "clickhouse_address", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ClickhouseAddress" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + } + }, + "components": { + "schemas": { + "ClickhouseAddress": { + "type": "object", + "properties": { + "clickhouse_address": { + "type": "string" + } + }, + "required": [ + "clickhouse_address" + ] + }, + "Error": { + "description": "Error information from a response.", + "type": "object", + "properties": { + "error_code": { + "type": "string" + }, + "message": { + "type": "string" + }, + "request_id": { + "type": "string" + } + }, + "required": [ + "message", + "request_id" + ] + } + }, + "responses": { + "Error": { + "description": "Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + } + } + } + } + } + } +} diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 5dd7d3dea35..da8bbacf8bf 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -746,10 +746,9 @@ } } }, - "/instances/{instance_id}": { - "put": { - "summary": "Report updated state for an instance.", - "operationId": "cpapi_instances_put", + "/instances/{instance_id}/migrate": { + "post": { + "operationId": "instance_migrate", "parameters": [ { "in": "path", @@ -765,15 +764,22 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/SledInstanceState" + "$ref": "#/components/schemas/InstanceMigrateRequest" } } }, "required": true }, "responses": { - "204": { - "description": "resource updated" + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Instance" + } + } + } }, "4XX": { "$ref": "#/components/responses/Error" @@ -1426,6 +1432,43 @@ } } }, + "/vmms/{propolis_id}": { + "put": { + "summary": "Report updated state for a VMM.", + "operationId": "cpapi_instances_put", + "parameters": [ + { + "in": "path", + "name": "propolis_id", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForPropolisKind" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SledVmmState" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/volume/{volume_id}/remove-read-only-parent": { "post": { "summary": "Request removal of a read_only_parent from a volume.", @@ -2668,39 +2711,8 @@ ] }, "DatasetKind": { - "description": "Describes the purpose of the dataset.", - "oneOf": [ - { - "type": "string", - "enum": [ - "crucible", - "cockroach", - "external_dns", - "internal_dns" - ] - }, - { - "description": "Used for single-node clickhouse deployments", - "type": "string", - "enum": [ - "clickhouse" - ] - }, - { - "description": "Used for replicated clickhouse deployments", - "type": "string", - "enum": [ - "clickhouse_keeper" - ] - }, - { - "description": "Used for replicated clickhouse deployments", - "type": "string", - "enum": [ - "clickhouse_server" - ] - } - ] + "description": "The kind of dataset. See the `DatasetKind` enum in omicron-common for possible values.", + "type": "string" }, "DatasetPutRequest": { "description": "Describes a dataset within a pool.", @@ -3300,6 +3312,179 @@ } ] }, + "Instance": { + "description": "View of an Instance", + "type": "object", + "properties": { + "description": { + "description": "human-readable free-form text about a resource", + "type": "string" + }, + "hostname": { + "description": "RFC1035-compliant hostname for the Instance.", + "type": "string" + }, + "id": { + "description": "unique, immutable, system-controlled identifier for each resource", + "type": "string", + "format": "uuid" + }, + "memory": { + "description": "memory allocated for this Instance", + "allOf": [ + { + "$ref": "#/components/schemas/ByteCount" + } + ] + }, + "name": { + "description": "unique, mutable, user-controlled identifier for each resource", + "allOf": [ + { + "$ref": "#/components/schemas/Name" + } + ] + }, + "ncpus": { + "description": "number of CPUs allocated for this Instance", + "allOf": [ + { + "$ref": "#/components/schemas/InstanceCpuCount" + } + ] + }, + "project_id": { + "description": "id for the project containing this Instance", + "type": "string", + "format": "uuid" + }, + "run_state": { + "$ref": "#/components/schemas/InstanceState" + }, + "time_created": { + "description": "timestamp when this resource was created", + "type": "string", + "format": "date-time" + }, + "time_modified": { + "description": "timestamp when this resource was last modified", + "type": "string", + "format": "date-time" + }, + "time_run_state_updated": { + "type": "string", + "format": "date-time" + } + }, + "required": [ + "description", + "hostname", + "id", + "memory", + "name", + "ncpus", + "project_id", + "run_state", + "time_created", + "time_modified", + "time_run_state_updated" + ] + }, + "InstanceCpuCount": { + "description": "The number of CPUs in an Instance", + "type": "integer", + "format": "uint16", + "minimum": 0 + }, + "InstanceMigrateRequest": { + "description": "Parameters used when migrating an instance.", + "type": "object", + "properties": { + "dst_sled_id": { + "description": "The ID of the sled to which to migrate the target instance.", + "type": "string", + "format": "uuid" + } + }, + "required": [ + "dst_sled_id" + ] + }, + "InstanceState": { + "description": "Running state of an Instance (primarily: booted or stopped)\n\nThis typically reflects whether it's starting, running, stopping, or stopped, but also includes states related to the Instance's lifecycle", + "oneOf": [ + { + "description": "The instance is being created.", + "type": "string", + "enum": [ + "creating" + ] + }, + { + "description": "The instance is currently starting up.", + "type": "string", + "enum": [ + "starting" + ] + }, + { + "description": "The instance is currently running.", + "type": "string", + "enum": [ + "running" + ] + }, + { + "description": "The instance has been requested to stop and a transition to \"Stopped\" is imminent.", + "type": "string", + "enum": [ + "stopping" + ] + }, + { + "description": "The instance is currently stopped.", + "type": "string", + "enum": [ + "stopped" + ] + }, + { + "description": "The instance is in the process of rebooting - it will remain in the \"rebooting\" state until the VM is starting once more.", + "type": "string", + "enum": [ + "rebooting" + ] + }, + { + "description": "The instance is in the process of migrating - it will remain in the \"migrating\" state until the migration process is complete and the destination propolis is ready to continue execution.", + "type": "string", + "enum": [ + "migrating" + ] + }, + { + "description": "The instance is attempting to recover from a failure.", + "type": "string", + "enum": [ + "repairing" + ] + }, + { + "description": "The instance has encountered a failure.", + "type": "string", + "enum": [ + "failed" + ] + }, + { + "description": "The instance has been deleted.", + "type": "string", + "enum": [ + "destroyed" + ] + } + ] + }, "IpNet": { "x-rust-type": { "crate": "oxnet", @@ -3532,6 +3717,67 @@ "start_time" ] }, + "LldpAdminStatus": { + "description": "To what extent should this port participate in LLDP", + "type": "string", + "enum": [ + "enabled", + "disabled", + "rx_only", + "tx_only" + ] + }, + "LldpPortConfig": { + "description": "Per-port LLDP configuration settings. Only the \"status\" setting is mandatory. All other fields have natural defaults or may be inherited from the switch.", + "type": "object", + "properties": { + "chassis_id": { + "nullable": true, + "description": "Chassis ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + }, + "management_addrs": { + "nullable": true, + "description": "Management IP addresses to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "port_description": { + "nullable": true, + "description": "Port description to advertise. If this is not set, no description will be advertised.", + "type": "string" + }, + "port_id": { + "nullable": true, + "description": "Port ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be set to the port name. e.g., qsfp0/0.", + "type": "string" + }, + "status": { + "description": "To what extent should this port participate in LLDP", + "allOf": [ + { + "$ref": "#/components/schemas/LldpAdminStatus" + } + ] + }, + "system_description": { + "nullable": true, + "description": "System description to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + }, + "system_name": { + "nullable": true, + "description": "System name to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + } + }, + "required": [ + "status" + ] + }, "MacAddr": { "example": "ff:ff:ff:ff:ff:ff", "title": "A MAC address", @@ -3937,6 +4183,15 @@ "$ref": "#/components/schemas/BgpPeerConfig" } }, + "lldp": { + "nullable": true, + "description": "LLDP configuration for this port", + "allOf": [ + { + "$ref": "#/components/schemas/LldpPortConfig" + } + ] + }, "port": { "description": "Nmae of the port this config applies to.", "type": "string" @@ -4156,6 +4411,13 @@ "enum": [ "instance" ] + }, + { + "description": "The producer is a management gateway service.", + "type": "string", + "enum": [ + "management_gateway" + ] } ] }, @@ -4448,6 +4710,14 @@ } ] }, + "local_pref": { + "nullable": true, + "description": "The local preference associated with this route.", + "default": null, + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "nexthop": { "description": "The nexthop/gateway address.", "type": "string", @@ -4760,50 +5030,6 @@ "id" ] }, - "SledInstanceState": { - "description": "A wrapper type containing a sled's total knowledge of the state of a specific VMM and the instance it incarnates.", - "type": "object", - "properties": { - "migration_in": { - "nullable": true, - "description": "The current state of any inbound migration to this VMM.", - "allOf": [ - { - "$ref": "#/components/schemas/MigrationRuntimeState" - } - ] - }, - "migration_out": { - "nullable": true, - "description": "The state of any outbound migration from this VMM.", - "allOf": [ - { - "$ref": "#/components/schemas/MigrationRuntimeState" - } - ] - }, - "propolis_id": { - "description": "The ID of the VMM whose state is being reported.", - "allOf": [ - { - "$ref": "#/components/schemas/TypedUuidForPropolisKind" - } - ] - }, - "vmm_state": { - "description": "The most recent state of the sled's VMM process.", - "allOf": [ - { - "$ref": "#/components/schemas/VmmRuntimeState" - } - ] - } - }, - "required": [ - "propolis_id", - "vmm_state" - ] - }, "SledPolicy": { "description": "The operator-defined policy of a sled.", "oneOf": [ @@ -4918,6 +5144,41 @@ } ] }, + "SledVmmState": { + "description": "A wrapper type containing a sled's total knowledge of the state of a VMM.", + "type": "object", + "properties": { + "migration_in": { + "nullable": true, + "description": "The current state of any inbound migration to this VMM.", + "allOf": [ + { + "$ref": "#/components/schemas/MigrationRuntimeState" + } + ] + }, + "migration_out": { + "nullable": true, + "description": "The state of any outbound migration from this VMM.", + "allOf": [ + { + "$ref": "#/components/schemas/MigrationRuntimeState" + } + ] + }, + "vmm_state": { + "description": "The most recent state of the sled's VMM process.", + "allOf": [ + { + "$ref": "#/components/schemas/VmmRuntimeState" + } + ] + } + }, + "required": [ + "vmm_state" + ] + }, "SourceNatConfig": { "description": "An IP address and port range used for source NAT, i.e., making outbound network connections from guests or services.", "type": "object", @@ -5030,10 +5291,6 @@ "type": "string", "format": "uuid" }, - "TypedUuidForPropolisKind": { - "type": "string", - "format": "uuid" - }, "TypedUuidForSledKind": { "type": "string", "format": "uuid" @@ -5295,6 +5552,10 @@ ] } ] + }, + "TypedUuidForPropolisKind": { + "type": "string", + "format": "uuid" } }, "responses": { diff --git a/openapi/nexus.json b/openapi/nexus.json index da77eec2a86..a855378cd4d 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -468,6 +468,7 @@ { "in": "path", "name": "certificate", + "description": "Name or ID of the certificate", "required": true, "schema": { "$ref": "#/components/schemas/NameOrId" @@ -504,6 +505,7 @@ { "in": "path", "name": "certificate", + "description": "Name or ID of the certificate", "required": true, "schema": { "$ref": "#/components/schemas/NameOrId" @@ -2276,62 +2278,6 @@ } } }, - "/v1/instances/{instance}/migrate": { - "post": { - "tags": [ - "instances" - ], - "summary": "Migrate an instance", - "operationId": "instance_migrate", - "parameters": [ - { - "in": "query", - "name": "project", - "description": "Name or ID of the project", - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, - { - "in": "path", - "name": "instance", - "description": "Name or ID of the instance", - "required": true, - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/InstanceMigrate" - } - } - }, - "required": true - }, - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Instance" - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - } - }, "/v1/instances/{instance}/reboot": { "post": { "tags": [ @@ -4223,7 +4169,7 @@ { "in": "path", "name": "rack_id", - "description": "The rack's unique ID.", + "description": "ID of the rack", "required": true, "schema": { "type": "string", @@ -5084,7 +5030,7 @@ { "in": "path", "name": "user_id", - "description": "The user's internal id", + "description": "The user's internal ID", "required": true, "schema": { "type": "string", @@ -5126,7 +5072,7 @@ { "in": "path", "name": "user_id", - "description": "The user's internal id", + "description": "The user's internal ID", "required": true, "schema": { "type": "string", @@ -6589,22 +6535,48 @@ } } }, - "/v1/system/networking/bgp-announce": { + "/v1/system/networking/bgp-announce-set": { "get": { "tags": [ "system/networking" ], - "summary": "Get originated routes for a BGP configuration", + "summary": "List BGP announce sets", "operationId": "networking_bgp_announce_set_list", "parameters": [ + { + "in": "query", + "name": "limit", + "description": "Maximum number of items returned by a single call", + "schema": { + "nullable": true, + "type": "integer", + "format": "uint32", + "minimum": 1 + } + }, { "in": "query", "name": "name_or_id", - "description": "A name or id to use when selecting BGP port settings", - "required": true, + "description": "A name or id to use when s electing BGP port settings", "schema": { "$ref": "#/components/schemas/NameOrId" } + }, + { + "in": "query", + "name": "page_token", + "description": "Token returned by previous call to retrieve the subsequent page", + "schema": { + "nullable": true, + "type": "string" + } + }, + { + "in": "query", + "name": "sort_by", + "schema": { + "$ref": "#/components/schemas/NameOrIdSortMode" + } } ], "responses": { @@ -6613,10 +6585,10 @@ "content": { "application/json": { "schema": { - "title": "Array_of_BgpAnnouncement", + "title": "Array_of_BgpAnnounceSet", "type": "array", "items": { - "$ref": "#/components/schemas/BgpAnnouncement" + "$ref": "#/components/schemas/BgpAnnounceSet" } } } @@ -6628,6 +6600,9 @@ "5XX": { "$ref": "#/components/responses/Error" } + }, + "x-dropshot-pagination": { + "required": [] } }, "put": { @@ -6665,7 +6640,9 @@ "$ref": "#/components/responses/Error" } } - }, + } + }, + "/v1/system/networking/bgp-announce-set/{name_or_id}": { "delete": { "tags": [ "system/networking" @@ -6674,7 +6651,7 @@ "operationId": "networking_bgp_announce_set_delete", "parameters": [ { - "in": "query", + "in": "path", "name": "name_or_id", "description": "A name or id to use when selecting BGP port settings", "required": true, @@ -6696,6 +6673,75 @@ } } }, + "/v1/system/networking/bgp-announce-set/{name_or_id}/announcement": { + "get": { + "tags": [ + "system/networking" + ], + "summary": "Get originated routes for a specified BGP announce set", + "operationId": "networking_bgp_announcement_list", + "parameters": [ + { + "in": "path", + "name": "name_or_id", + "description": "A name or id to use when selecting BGP port settings", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_BgpAnnouncement", + "type": "array", + "items": { + "$ref": "#/components/schemas/BgpAnnouncement" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/v1/system/networking/bgp-exported": { + "get": { + "tags": [ + "system/networking" + ], + "summary": "Get BGP exported routes", + "operationId": "networking_bgp_exported", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BgpExported" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/system/networking/bgp-message-history": { "get": { "tags": [ @@ -7828,7 +7874,7 @@ { "in": "path", "name": "user_id", - "description": "The user's internal id", + "description": "The user's internal ID", "required": true, "schema": { "type": "string", @@ -8082,11 +8128,7 @@ "content": { "application/json": { "schema": { - "title": "Array_of_Table", - "type": "array", - "items": { - "$ref": "#/components/schemas/Table" - } + "$ref": "#/components/schemas/OxqlQueryResult" } } } @@ -10411,6 +10453,25 @@ "items" ] }, + "BgpExported": { + "description": "The current status of a BGP peer.", + "type": "object", + "properties": { + "exports": { + "description": "Exported routes indexed by peer address.", + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Ipv4Net" + } + } + } + }, + "required": [ + "exports" + ] + }, "BgpImportedRouteIpv4": { "description": "A route imported from a BGP peer.", "type": "object", @@ -15271,19 +15332,6 @@ } ] }, - "InstanceMigrate": { - "description": "Migration parameters for an `Instance`", - "type": "object", - "properties": { - "dst_sled_id": { - "type": "string", - "format": "uuid" - } - }, - "required": [ - "dst_sled_id" - ] - }, "InstanceNetworkInterface": { "description": "An `InstanceNetworkInterface` represents a virtual network interface device attached to an instance.", "type": "object", @@ -16053,7 +16101,7 @@ "description": "The link-layer discovery protocol (LLDP) configuration for the link.", "allOf": [ { - "$ref": "#/components/schemas/LldpServiceConfigCreate" + "$ref": "#/components/schemas/LldpLinkConfigCreate" } ] }, @@ -16174,10 +16222,15 @@ } ] }, - "LldpServiceConfig": { + "LldpLinkConfig": { "description": "A link layer discovery protocol (LLDP) service configuration.", "type": "object", "properties": { + "chassis_id": { + "nullable": true, + "description": "The LLDP chassis identifier TLV.", + "type": "string" + }, "enabled": { "description": "Whether or not the LLDP service is enabled.", "type": "boolean" @@ -16187,11 +16240,34 @@ "type": "string", "format": "uuid" }, - "lldp_config_id": { + "link_description": { "nullable": true, - "description": "The link-layer discovery protocol configuration for this service.", - "type": "string", - "format": "uuid" + "description": "The LLDP link description TLV.", + "type": "string" + }, + "link_name": { + "nullable": true, + "description": "The LLDP link name TLV.", + "type": "string" + }, + "management_ip": { + "nullable": true, + "description": "The LLDP management IP TLV.", + "allOf": [ + { + "$ref": "#/components/schemas/IpNet" + } + ] + }, + "system_description": { + "nullable": true, + "description": "The LLDP system description TLV.", + "type": "string" + }, + "system_name": { + "nullable": true, + "description": "The LLDP system name TLV.", + "type": "string" } }, "required": [ @@ -16199,22 +16275,44 @@ "id" ] }, - "LldpServiceConfigCreate": { - "description": "The LLDP configuration associated with a port. LLDP may be either enabled or disabled, if enabled, an LLDP configuration must be provided by name or id.", + "LldpLinkConfigCreate": { + "description": "The LLDP configuration associated with a port.", "type": "object", "properties": { + "chassis_id": { + "nullable": true, + "description": "The LLDP chassis identifier TLV.", + "type": "string" + }, "enabled": { "description": "Whether or not LLDP is enabled.", "type": "boolean" }, - "lldp_config": { + "link_description": { "nullable": true, - "description": "A reference to the LLDP configuration used. Must not be `None` when `enabled` is `true`.", - "allOf": [ - { - "$ref": "#/components/schemas/NameOrId" - } - ] + "description": "The LLDP link description TLV.", + "type": "string" + }, + "link_name": { + "nullable": true, + "description": "The LLDP link name TLV.", + "type": "string" + }, + "management_ip": { + "nullable": true, + "description": "The LLDP management IP TLV.", + "type": "string", + "format": "ip" + }, + "system_description": { + "nullable": true, + "description": "The LLDP system description TLV.", + "type": "string" + }, + "system_name": { + "nullable": true, + "description": "The LLDP system name TLV.", + "type": "string" } }, "required": [ @@ -16570,6 +16668,22 @@ } ] }, + "OxqlQueryResult": { + "description": "The result of a successful OxQL query.", + "type": "object", + "properties": { + "tables": { + "description": "Tables resulting from the query, each containing timeseries.", + "type": "array", + "items": { + "$ref": "#/components/schemas/Table" + } + } + }, + "required": [ + "tables" + ] + }, "Password": { "title": "A password used to authenticate a user", "description": "Passwords may be subject to additional constraints.", @@ -17237,6 +17351,13 @@ "type": "string", "format": "ip" }, + "local_pref": { + "nullable": true, + "description": "Local preference for route. Higher preference indictes precedence within and across protocols.", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "vid": { "nullable": true, "description": "VLAN id the gateway is reachable over.", @@ -19215,7 +19336,8 @@ "description": "The name of this link.", "type": "string" }, - "lldp_service_config_id": { + "lldp_link_config_id": { + "nullable": true, "description": "The link-layer discovery protocol service configuration id for this link.", "type": "string", "format": "uuid" @@ -19244,7 +19366,6 @@ "autoneg", "fec", "link_name", - "lldp_service_config_id", "mtu", "port_settings_id", "speed" @@ -19295,6 +19416,13 @@ "description": "The interface name this route configuration is assigned to.", "type": "string" }, + "local_pref": { + "nullable": true, + "description": "Local preference indicating priority within and across protocols.", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "port_settings_id": { "description": "The port settings object this route configuration belongs to.", "type": "string", @@ -19499,7 +19627,7 @@ "description": "Link-layer discovery protocol (LLDP) settings.", "type": "array", "items": { - "$ref": "#/components/schemas/LldpServiceConfig" + "$ref": "#/components/schemas/LldpLinkConfig" } }, "links": { @@ -19808,7 +19936,8 @@ "nanoseconds", "volts", "amps", - "degrees_celcius" + "watts", + "degrees_celsius" ] }, { @@ -20200,10 +20329,20 @@ "type": "object", "properties": { "metric_type": { - "$ref": "#/components/schemas/MetricType" + "description": "The type of this metric.", + "allOf": [ + { + "$ref": "#/components/schemas/MetricType" + } + ] }, "values": { - "$ref": "#/components/schemas/ValueArray" + "description": "The data values.", + "allOf": [ + { + "$ref": "#/components/schemas/ValueArray" + } + ] } }, "required": [ diff --git a/openapi/oximeter.json b/openapi/oximeter.json index f596ac6ee6b..327351d9616 100644 --- a/openapi/oximeter.json +++ b/openapi/oximeter.json @@ -277,6 +277,13 @@ "enum": [ "instance" ] + }, + { + "description": "The producer is a management gateway service.", + "type": "string", + "enum": [ + "management_gateway" + ] } ] } diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 9dcc4805218..e4777e0aae3 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -176,25 +176,37 @@ } } }, - "/disks/{disk_id}": { - "put": { - "operationId": "disk_put", - "parameters": [ - { - "in": "path", - "name": "disk_id", - "required": true, - "schema": { - "type": "string", - "format": "uuid" + "/datasets": { + "get": { + "summary": "Lists the datasets that this sled is configured to use", + "operationId": "datasets_get", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DatasetsConfig" + } + } } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" } - ], + } + }, + "put": { + "summary": "Configures datasets to be used on this sled", + "operationId": "datasets_put", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/DiskEnsureBody" + "$ref": "#/components/schemas/DatasetsConfig" } } }, @@ -206,7 +218,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/DiskRuntimeState" + "$ref": "#/components/schemas/DatasetsManagementResult" } } } @@ -220,16 +232,17 @@ } } }, - "/instances/{instance_id}": { + "/disks/{disk_id}": { "put": { - "operationId": "instance_register", + "operationId": "disk_put", "parameters": [ { "in": "path", - "name": "instance_id", + "name": "disk_id", "required": true, "schema": { - "$ref": "#/components/schemas/TypedUuidForInstanceKind" + "type": "string", + "format": "uuid" } } ], @@ -237,7 +250,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/InstanceEnsureBody" + "$ref": "#/components/schemas/DiskEnsureBody" } } }, @@ -249,7 +262,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/SledInstanceState" + "$ref": "#/components/schemas/DiskRuntimeState" } } } @@ -261,26 +274,19 @@ "$ref": "#/components/responses/Error" } } - }, - "delete": { - "operationId": "instance_unregister", - "parameters": [ - { - "in": "path", - "name": "instance_id", - "required": true, - "schema": { - "$ref": "#/components/schemas/TypedUuidForInstanceKind" - } - } - ], + } + }, + "/inventory": { + "get": { + "summary": "Fetch basic information about this sled", + "operationId": "inventory", "responses": { "200": { "description": "successful operation", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/InstanceUnregisterResponse" + "$ref": "#/components/schemas/Inventory" } } } @@ -294,47 +300,18 @@ } } }, - "/instances/{instance_id}/disks/{disk_id}/snapshot": { - "post": { - "summary": "Take a snapshot of a disk that is attached to an instance", - "operationId": "instance_issue_disk_snapshot_request", - "parameters": [ - { - "in": "path", - "name": "disk_id", - "required": true, - "schema": { - "type": "string", - "format": "uuid" - } - }, - { - "in": "path", - "name": "instance_id", - "required": true, - "schema": { - "type": "string", - "format": "uuid" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/InstanceIssueDiskSnapshotRequestBody" - } - } - }, - "required": true - }, + "/network-bootstore-config": { + "get": { + "summary": "This API endpoint is only reading the local sled agent's view of the", + "description": "bootstore. The boostore is a distributed data store that is eventually consistent. Reads from individual nodes may not represent the latest state.", + "operationId": "read_network_bootstore_config_cache", "responses": { "200": { "description": "successful operation", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/InstanceIssueDiskSnapshotRequestResponse" + "$ref": "#/components/schemas/EarlyNetworkConfig" } } } @@ -346,26 +323,14 @@ "$ref": "#/components/responses/Error" } } - } - }, - "/instances/{instance_id}/external-ip": { + }, "put": { - "operationId": "instance_put_external_ip", - "parameters": [ - { - "in": "path", - "name": "instance_id", - "required": true, - "schema": { - "$ref": "#/components/schemas/TypedUuidForInstanceKind" - } - } - ], + "operationId": "write_network_bootstore_config", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/InstanceExternalIpBody" + "$ref": "#/components/schemas/EarlyNetworkConfig" } } }, @@ -382,32 +347,52 @@ "$ref": "#/components/responses/Error" } } - }, - "delete": { - "operationId": "instance_delete_external_ip", - "parameters": [ - { - "in": "path", - "name": "instance_id", - "required": true, - "schema": { - "$ref": "#/components/schemas/TypedUuidForInstanceKind" + } + }, + "/omicron-physical-disks": { + "get": { + "operationId": "omicron_physical_disks_get", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/OmicronPhysicalDisksConfig" + } + } } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" } - ], + } + }, + "put": { + "operationId": "omicron_physical_disks_put", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/InstanceExternalIpBody" + "$ref": "#/components/schemas/OmicronPhysicalDisksConfig" } } }, "required": true }, "responses": { - "204": { - "description": "resource updated" + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DisksManagementResult" + } + } + } }, "4XX": { "$ref": "#/components/responses/Error" @@ -418,26 +403,16 @@ } } }, - "/instances/{instance_id}/state": { + "/omicron-zones": { "get": { - "operationId": "instance_get_state", - "parameters": [ - { - "in": "path", - "name": "instance_id", - "required": true, - "schema": { - "$ref": "#/components/schemas/TypedUuidForInstanceKind" - } - } - ], + "operationId": "omicron_zones_get", "responses": { "200": { "description": "successful operation", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/SledInstanceState" + "$ref": "#/components/schemas/OmicronZonesConfig" } } } @@ -451,37 +426,20 @@ } }, "put": { - "operationId": "instance_put_state", - "parameters": [ - { - "in": "path", - "name": "instance_id", - "required": true, - "schema": { - "$ref": "#/components/schemas/TypedUuidForInstanceKind" - } - } - ], + "operationId": "omicron_zones_put", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/InstancePutStateBody" + "$ref": "#/components/schemas/OmicronZonesConfig" } } }, "required": true }, "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/InstancePutStateResponse" - } - } - } + "204": { + "description": "resource updated" }, "4XX": { "$ref": "#/components/responses/Error" @@ -492,17 +450,17 @@ } } }, - "/inventory": { + "/sled-identifiers": { "get": { - "summary": "Fetch basic information about this sled", - "operationId": "inventory", + "summary": "Fetch sled identifiers", + "operationId": "sled_identifiers", "responses": { "200": { "description": "successful operation", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/Inventory" + "$ref": "#/components/schemas/SledIdentifiers" } } } @@ -516,18 +474,16 @@ } } }, - "/network-bootstore-config": { + "/sled-role": { "get": { - "summary": "This API endpoint is only reading the local sled agent's view of the", - "description": "bootstore. The boostore is a distributed data store that is eventually consistent. Reads from individual nodes may not represent the latest state.", - "operationId": "read_network_bootstore_config_cache", + "operationId": "sled_role_get", "responses": { "200": { "description": "successful operation", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/EarlyNetworkConfig" + "$ref": "#/components/schemas/SledRole" } } } @@ -539,14 +495,17 @@ "$ref": "#/components/responses/Error" } } - }, + } + }, + "/sleds": { "put": { - "operationId": "write_network_bootstore_config", + "summary": "Add a sled to a rack that was already initialized via RSS", + "operationId": "sled_add", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/EarlyNetworkConfig" + "$ref": "#/components/schemas/AddSledRequest" } } }, @@ -565,16 +524,42 @@ } } }, - "/omicron-physical-disks": { + "/switch-ports": { + "post": { + "operationId": "uplink_ensure", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SwitchPorts" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/timesync": { "get": { - "operationId": "omicron_physical_disks_get", + "operationId": "timesync_get", "responses": { "200": { "description": "successful operation", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OmicronPhysicalDisksConfig" + "$ref": "#/components/schemas/TimeSync" } } } @@ -586,29 +571,24 @@ "$ref": "#/components/responses/Error" } } - }, - "put": { - "operationId": "omicron_physical_disks_put", + } + }, + "/update": { + "post": { + "operationId": "update_artifact", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OmicronPhysicalDisksConfig" + "$ref": "#/components/schemas/UpdateArtifactId" } } }, "required": true }, "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/DisksManagementResult" - } - } - } + "204": { + "description": "resource updated" }, "4XX": { "$ref": "#/components/responses/Error" @@ -619,16 +599,21 @@ } } }, - "/omicron-zones": { + "/v2p": { "get": { - "operationId": "omicron_zones_get", + "summary": "List v2p mappings present on sled", + "operationId": "list_v2p", "responses": { "200": { "description": "successful operation", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OmicronZonesConfig" + "title": "Array_of_VirtualNetworkInterfaceHost", + "type": "array", + "items": { + "$ref": "#/components/schemas/VirtualNetworkInterfaceHost" + } } } } @@ -642,12 +627,38 @@ } }, "put": { - "operationId": "omicron_zones_put", + "summary": "Create a mapping from a virtual NIC to a physical host", + "operationId": "set_v2p", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/OmicronZonesConfig" + "$ref": "#/components/schemas/VirtualNetworkInterfaceHost" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "delete": { + "summary": "Delete a mapping from a virtual NIC to a physical host", + "operationId": "del_v2p", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VirtualNetworkInterfaceHost" } } }, @@ -666,17 +677,36 @@ } } }, - "/sled-identifiers": { - "get": { - "summary": "Fetch sled identifiers", - "operationId": "sled_identifiers", + "/vmms/{propolis_id}": { + "put": { + "operationId": "vmm_register", + "parameters": [ + { + "in": "path", + "name": "propolis_id", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForPropolisKind" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InstanceEnsureBody" + } + } + }, + "required": true + }, "responses": { "200": { "description": "successful operation", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/SledIdentifiers" + "$ref": "#/components/schemas/SledVmmState" } } } @@ -688,18 +718,26 @@ "$ref": "#/components/responses/Error" } } - } - }, - "/sled-role": { - "get": { - "operationId": "sled_role_get", + }, + "delete": { + "operationId": "vmm_unregister", + "parameters": [ + { + "in": "path", + "name": "propolis_id", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForPropolisKind" + } + } + ], "responses": { "200": { "description": "successful operation", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/SledRole" + "$ref": "#/components/schemas/VmmUnregisterResponse" } } } @@ -713,23 +751,49 @@ } } }, - "/sleds": { - "put": { - "summary": "Add a sled to a rack that was already initialized via RSS", - "operationId": "sled_add", + "/vmms/{propolis_id}/disks/{disk_id}/snapshot": { + "post": { + "summary": "Take a snapshot of a disk that is attached to an instance", + "operationId": "vmm_issue_disk_snapshot_request", + "parameters": [ + { + "in": "path", + "name": "disk_id", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + }, + { + "in": "path", + "name": "propolis_id", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForPropolisKind" + } + } + ], "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/AddSledRequest" + "$ref": "#/components/schemas/VmmIssueDiskSnapshotRequestBody" } } }, "required": true }, "responses": { - "204": { - "description": "resource updated" + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VmmIssueDiskSnapshotRequestResponse" + } + } + } }, "4XX": { "$ref": "#/components/responses/Error" @@ -740,14 +804,24 @@ } } }, - "/switch-ports": { - "post": { - "operationId": "uplink_ensure", + "/vmms/{propolis_id}/external-ip": { + "put": { + "operationId": "vmm_put_external_ip", + "parameters": [ + { + "in": "path", + "name": "propolis_id", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForPropolisKind" + } + } + ], "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/SwitchPorts" + "$ref": "#/components/schemas/InstanceExternalIpBody" } } }, @@ -764,39 +838,24 @@ "$ref": "#/components/responses/Error" } } - } - }, - "/timesync": { - "get": { - "operationId": "timesync_get", - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/TimeSync" - } - } + }, + "delete": { + "operationId": "vmm_delete_external_ip", + "parameters": [ + { + "in": "path", + "name": "propolis_id", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForPropolisKind" } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" } - } - } - }, - "/update": { - "post": { - "operationId": "update_artifact", + ], "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/UpdateArtifactId" + "$ref": "#/components/schemas/InstanceExternalIpBody" } } }, @@ -815,21 +874,26 @@ } } }, - "/v2p": { + "/vmms/{propolis_id}/state": { "get": { - "summary": "List v2p mappings present on sled", - "operationId": "list_v2p", + "operationId": "vmm_get_state", + "parameters": [ + { + "in": "path", + "name": "propolis_id", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForPropolisKind" + } + } + ], "responses": { "200": { "description": "successful operation", "content": { "application/json": { "schema": { - "title": "Array_of_VirtualNetworkInterfaceHost", - "type": "array", - "items": { - "$ref": "#/components/schemas/VirtualNetworkInterfaceHost" - } + "$ref": "#/components/schemas/SledVmmState" } } } @@ -843,46 +907,37 @@ } }, "put": { - "summary": "Create a mapping from a virtual NIC to a physical host", - "operationId": "set_v2p", - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/VirtualNetworkInterfaceHost" - } + "operationId": "vmm_put_state", + "parameters": [ + { + "in": "path", + "name": "propolis_id", + "required": true, + "schema": { + "$ref": "#/components/schemas/TypedUuidForPropolisKind" } - }, - "required": true - }, - "responses": { - "204": { - "description": "resource updated" - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" } - } - }, - "delete": { - "summary": "Delete a mapping from a virtual NIC to a physical host", - "operationId": "del_v2p", + ], "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/VirtualNetworkInterfaceHost" + "$ref": "#/components/schemas/VmmPutStateBody" } } }, "required": true }, "responses": { - "204": { - "description": "resource updated" + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VmmPutStateResponse" + } + } + } }, "4XX": { "$ref": "#/components/responses/Error" @@ -2006,6 +2061,112 @@ } ] }, + "CompressionAlgorithm": { + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "on" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "off" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "gzip" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "level": { + "$ref": "#/components/schemas/GzipLevel" + }, + "type": { + "type": "string", + "enum": [ + "gzip_n" + ] + } + }, + "required": [ + "level", + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "lz4" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "lzjb" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "zle" + ] + } + }, + "required": [ + "type" + ] + } + ] + }, "CrucibleOpts": { "description": "CrucibleOpts\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"id\", \"lossy\", \"read_only\", \"target\" ], \"properties\": { \"cert_pem\": { \"type\": [ \"string\", \"null\" ] }, \"control\": { \"type\": [ \"string\", \"null\" ] }, \"flush_timeout\": { \"type\": [ \"number\", \"null\" ], \"format\": \"float\" }, \"id\": { \"type\": \"string\", \"format\": \"uuid\" }, \"key\": { \"type\": [ \"string\", \"null\" ] }, \"key_pem\": { \"type\": [ \"string\", \"null\" ] }, \"lossy\": { \"type\": \"boolean\" }, \"read_only\": { \"type\": \"boolean\" }, \"root_cert_pem\": { \"type\": [ \"string\", \"null\" ] }, \"target\": { \"type\": \"array\", \"items\": { \"type\": \"string\" } } } } ```
", "type": "object", @@ -2059,6 +2220,128 @@ "target" ] }, + "DatasetConfig": { + "description": "Configuration information necessary to request a single dataset", + "type": "object", + "properties": { + "compression": { + "description": "The compression mode to be used by the dataset", + "allOf": [ + { + "$ref": "#/components/schemas/CompressionAlgorithm" + } + ] + }, + "id": { + "description": "The UUID of the dataset being requested", + "allOf": [ + { + "$ref": "#/components/schemas/TypedUuidForDatasetKind" + } + ] + }, + "name": { + "description": "The dataset's name", + "allOf": [ + { + "$ref": "#/components/schemas/DatasetName" + } + ] + }, + "quota": { + "nullable": true, + "description": "The upper bound on the amount of storage used by this dataset", + "type": "integer", + "format": "uint", + "minimum": 0 + }, + "reservation": { + "nullable": true, + "description": "The lower bound on the amount of storage usable by this dataset", + "type": "integer", + "format": "uint", + "minimum": 0 + } + }, + "required": [ + "compression", + "id", + "name" + ] + }, + "DatasetKind": { + "description": "The kind of dataset. See the `DatasetKind` enum in omicron-common for possible values.", + "type": "string" + }, + "DatasetManagementStatus": { + "description": "Identifies how a single dataset management operation may have succeeded or failed.", + "type": "object", + "properties": { + "dataset_name": { + "$ref": "#/components/schemas/DatasetName" + }, + "err": { + "nullable": true, + "type": "string" + } + }, + "required": [ + "dataset_name" + ] + }, + "DatasetName": { + "type": "object", + "properties": { + "kind": { + "$ref": "#/components/schemas/DatasetKind" + }, + "pool_name": { + "$ref": "#/components/schemas/ZpoolName" + } + }, + "required": [ + "kind", + "pool_name" + ] + }, + "DatasetsConfig": { + "type": "object", + "properties": { + "datasets": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/DatasetConfig" + } + }, + "generation": { + "description": "generation number of this configuration\n\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\n\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.\n\nNote that \"Generation::new()\", AKA, the first generation number, is reserved for \"no datasets\". This is the default configuration for a sled before any requests have been made.", + "allOf": [ + { + "$ref": "#/components/schemas/Generation" + } + ] + } + }, + "required": [ + "datasets", + "generation" + ] + }, + "DatasetsManagementResult": { + "description": "The result from attempting to manage datasets.", + "type": "object", + "properties": { + "status": { + "type": "array", + "items": { + "$ref": "#/components/schemas/DatasetManagementStatus" + } + } + }, + "required": [ + "status" + ] + }, "DhcpConfig": { "description": "DHCP configuration for a port\n\nNot present here: Hostname (DHCPv4 option 12; used in DHCPv6 option 39); we use `InstanceRuntimeState::hostname` for this value.", "type": "object", @@ -2701,6 +2984,11 @@ "format": "uint64", "minimum": 0 }, + "GzipLevel": { + "type": "integer", + "format": "uint8", + "minimum": 0 + }, "HostIdentifier": { "description": "A `HostIdentifier` represents either an IP host or network (v4 or v6), or an entire VPC (identified by its VNI). It is used in firewall rule host filters.", "oneOf": [ @@ -2752,6 +3040,14 @@ "$ref": "#/components/schemas/UplinkAddressConfig" } }, + "lldp": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/LldpPortConfig" + } + ] + }, "port": { "description": "Switchport to use for external connectivity", "type": "string" @@ -2829,6 +3125,14 @@ } ] }, + "instance_id": { + "description": "The ID of the instance for which this VMM is being created.", + "allOf": [ + { + "$ref": "#/components/schemas/TypedUuidForInstanceKind" + } + ] + }, "instance_runtime": { "description": "The instance runtime state for the instance being registered.", "allOf": [ @@ -2849,14 +3153,6 @@ "description": "The address at which this VMM should serve a Propolis server API.", "type": "string" }, - "propolis_id": { - "description": "The ID of the VMM being registered. This may not be the active VMM ID in the instance runtime state (e.g. if the new VMM is going to be a migration target).", - "allOf": [ - { - "$ref": "#/components/schemas/TypedUuidForPropolisKind" - } - ] - }, "vmm_runtime": { "description": "The initial VMM runtime state for the VMM being registered.", "allOf": [ @@ -2868,10 +3164,10 @@ }, "required": [ "hardware", + "instance_id", "instance_runtime", "metadata", "propolis_addr", - "propolis_id", "vmm_runtime" ] }, @@ -2977,30 +3273,6 @@ "source_nat" ] }, - "InstanceIssueDiskSnapshotRequestBody": { - "type": "object", - "properties": { - "snapshot_id": { - "type": "string", - "format": "uuid" - } - }, - "required": [ - "snapshot_id" - ] - }, - "InstanceIssueDiskSnapshotRequestResponse": { - "type": "object", - "properties": { - "snapshot_id": { - "type": "string", - "format": "uuid" - } - }, - "required": [ - "snapshot_id" - ] - }, "InstanceMetadata": { "description": "Metadata used to track statistics about an instance.", "type": "object", @@ -3063,38 +3335,6 @@ "ncpus" ] }, - "InstancePutStateBody": { - "description": "The body of a request to move a previously-ensured instance into a specific runtime state.", - "type": "object", - "properties": { - "state": { - "description": "The state into which the instance should be driven.", - "allOf": [ - { - "$ref": "#/components/schemas/InstanceStateRequested" - } - ] - } - }, - "required": [ - "state" - ] - }, - "InstancePutStateResponse": { - "description": "The response sent from a request to move an instance into a specific runtime state.", - "type": "object", - "properties": { - "updated_runtime": { - "nullable": true, - "description": "The current runtime state of the instance after handling the request to change its state. If the instance's state did not change, this field is `None`.", - "allOf": [ - { - "$ref": "#/components/schemas/SledInstanceState" - } - ] - } - } - }, "InstanceRuntimeState": { "description": "The dynamic runtime properties of an instance: its current VMM ID (if any), migration information (if any), and the instance state to report if there is no active VMM.", "type": "object", @@ -3121,110 +3361,26 @@ "description": "If a migration is active, the ID of that migration.", "type": "string", "format": "uuid" - }, - "propolis_id": { - "nullable": true, - "description": "The instance's currently active VMM ID.", - "allOf": [ - { - "$ref": "#/components/schemas/TypedUuidForPropolisKind" - } - ] - }, - "time_updated": { - "description": "Timestamp for this information.", - "type": "string", - "format": "date-time" - } - }, - "required": [ - "gen", - "time_updated" - ] - }, - "InstanceStateRequested": { - "description": "Requestable running state of an Instance.\n\nA subset of [`omicron_common::api::external::InstanceState`].", - "oneOf": [ - { - "description": "Run this instance by migrating in from a previous running incarnation of the instance.", - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "migration_target" - ] - }, - "value": { - "$ref": "#/components/schemas/InstanceMigrationTargetParams" - } - }, - "required": [ - "type", - "value" - ] - }, - { - "description": "Start the instance if it is not already running.", - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "running" - ] - } - }, - "required": [ - "type" - ] - }, - { - "description": "Stop the instance.", - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "stopped" - ] - } - }, - "required": [ - "type" - ] - }, - { - "description": "Immediately reset the instance, as though it had stopped and immediately began to run again.", - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "reboot" - ] - } - }, - "required": [ - "type" - ] - } - ] - }, - "InstanceUnregisterResponse": { - "description": "The response sent from a request to unregister an instance.", - "type": "object", - "properties": { - "updated_runtime": { + }, + "propolis_id": { "nullable": true, - "description": "The current state of the instance after handling the request to unregister it. If the instance's state did not change, this field is `None`.", + "description": "The instance's currently active VMM ID.", "allOf": [ { - "$ref": "#/components/schemas/SledInstanceState" + "$ref": "#/components/schemas/TypedUuidForPropolisKind" } ] + }, + "time_updated": { + "description": "Timestamp for this information.", + "type": "string", + "format": "date-time" } - } + }, + "required": [ + "gen", + "time_updated" + ] }, "Inventory": { "description": "Identity and basic status information about this sled agent", @@ -3434,6 +3590,67 @@ "minLength": 1, "maxLength": 11 }, + "LldpAdminStatus": { + "description": "To what extent should this port participate in LLDP", + "type": "string", + "enum": [ + "enabled", + "disabled", + "rx_only", + "tx_only" + ] + }, + "LldpPortConfig": { + "description": "Per-port LLDP configuration settings. Only the \"status\" setting is mandatory. All other fields have natural defaults or may be inherited from the switch.", + "type": "object", + "properties": { + "chassis_id": { + "nullable": true, + "description": "Chassis ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + }, + "management_addrs": { + "nullable": true, + "description": "Management IP addresses to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "port_description": { + "nullable": true, + "description": "Port description to advertise. If this is not set, no description will be advertised.", + "type": "string" + }, + "port_id": { + "nullable": true, + "description": "Port ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be set to the port name. e.g., qsfp0/0.", + "type": "string" + }, + "status": { + "description": "To what extent should this port participate in LLDP", + "allOf": [ + { + "$ref": "#/components/schemas/LldpAdminStatus" + } + ] + }, + "system_description": { + "nullable": true, + "description": "System description to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + }, + "system_name": { + "nullable": true, + "description": "System name to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + } + }, + "required": [ + "status" + ] + }, "MacAddr": { "example": "ff:ff:ff:ff:ff:ff", "title": "A MAC address", @@ -4134,6 +4351,15 @@ "$ref": "#/components/schemas/BgpPeerConfig" } }, + "lldp": { + "nullable": true, + "description": "LLDP configuration for this port", + "allOf": [ + { + "$ref": "#/components/schemas/LldpPortConfig" + } + ] + }, "port": { "description": "Nmae of the port this config applies to.", "type": "string" @@ -4409,6 +4635,14 @@ } ] }, + "local_pref": { + "nullable": true, + "description": "The local preference associated with this route.", + "default": null, + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "nexthop": { "description": "The nexthop/gateway address.", "type": "string", @@ -4611,8 +4845,27 @@ "sled_id" ] }, - "SledInstanceState": { - "description": "A wrapper type containing a sled's total knowledge of the state of a specific VMM and the instance it incarnates.", + "SledRole": { + "description": "Describes the role of the sled within the rack.\n\nNote that this may change if the sled is physically moved within the rack.", + "oneOf": [ + { + "description": "The sled is a general compute sled.", + "type": "string", + "enum": [ + "gimlet" + ] + }, + { + "description": "The sled is attached to the network switch, and has additional responsibilities.", + "type": "string", + "enum": [ + "scrimlet" + ] + } + ] + }, + "SledVmmState": { + "description": "A wrapper type containing a sled's total knowledge of the state of a VMM.", "type": "object", "properties": { "migration_in": { @@ -4633,14 +4886,6 @@ } ] }, - "propolis_id": { - "description": "The ID of the VMM whose state is being reported.", - "allOf": [ - { - "$ref": "#/components/schemas/TypedUuidForPropolisKind" - } - ] - }, "vmm_state": { "description": "The most recent state of the sled's VMM process.", "allOf": [ @@ -4651,29 +4896,9 @@ } }, "required": [ - "propolis_id", "vmm_state" ] }, - "SledRole": { - "description": "Describes the role of the sled within the rack.\n\nNote that this may change if the sled is physically moved within the rack.", - "oneOf": [ - { - "description": "The sled is a general compute sled.", - "type": "string", - "enum": [ - "gimlet" - ] - }, - { - "description": "The sled is attached to the network switch, and has additional responsibilities.", - "type": "string", - "enum": [ - "scrimlet" - ] - } - ] - }, "Slot": { "description": "A stable index which is translated by Propolis into a PCI BDF, visible to the guest.\n\n
JSON schema\n\n```json { \"description\": \"A stable index which is translated by Propolis into a PCI BDF, visible to the guest.\", \"type\": \"integer\", \"format\": \"uint8\", \"minimum\": 0.0 } ```
", "type": "integer", @@ -4856,6 +5081,14 @@ "sync" ] }, + "TypedUuidForDatasetKind": { + "type": "string", + "format": "uuid" + }, + "TypedUuidForInstanceKind": { + "type": "string", + "format": "uuid" + }, "TypedUuidForPropolisKind": { "type": "string", "format": "uuid" @@ -4940,6 +5173,62 @@ "vni" ] }, + "VmmIssueDiskSnapshotRequestBody": { + "type": "object", + "properties": { + "snapshot_id": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "snapshot_id" + ] + }, + "VmmIssueDiskSnapshotRequestResponse": { + "type": "object", + "properties": { + "snapshot_id": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "snapshot_id" + ] + }, + "VmmPutStateBody": { + "description": "The body of a request to move a previously-ensured instance into a specific runtime state.", + "type": "object", + "properties": { + "state": { + "description": "The state into which the instance should be driven.", + "allOf": [ + { + "$ref": "#/components/schemas/VmmStateRequested" + } + ] + } + }, + "required": [ + "state" + ] + }, + "VmmPutStateResponse": { + "description": "The response sent from a request to move an instance into a specific runtime state.", + "type": "object", + "properties": { + "updated_runtime": { + "nullable": true, + "description": "The current runtime state of the instance after handling the request to change its state. If the instance's state did not change, this field is `None`.", + "allOf": [ + { + "$ref": "#/components/schemas/SledVmmState" + } + ] + } + } + }, "VmmRuntimeState": { "description": "The dynamic runtime properties of an individual VMM process.", "type": "object", @@ -5033,6 +5322,90 @@ } ] }, + "VmmStateRequested": { + "description": "Requestable running state of an Instance.\n\nA subset of [`omicron_common::api::external::InstanceState`].", + "oneOf": [ + { + "description": "Run this instance by migrating in from a previous running incarnation of the instance.", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "migration_target" + ] + }, + "value": { + "$ref": "#/components/schemas/InstanceMigrationTargetParams" + } + }, + "required": [ + "type", + "value" + ] + }, + { + "description": "Start the instance if it is not already running.", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "running" + ] + } + }, + "required": [ + "type" + ] + }, + { + "description": "Stop the instance.", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "stopped" + ] + } + }, + "required": [ + "type" + ] + }, + { + "description": "Immediately reset the instance, as though it had stopped and immediately began to run again.", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "reboot" + ] + } + }, + "required": [ + "type" + ] + } + ] + }, + "VmmUnregisterResponse": { + "description": "The response sent from a request to unregister an instance.", + "type": "object", + "properties": { + "updated_runtime": { + "nullable": true, + "description": "The current state of the instance after handling the request to unregister it. If the instance's state did not change, this field is `None`.", + "allOf": [ + { + "$ref": "#/components/schemas/SledVmmState" + } + ] + } + } + }, "Vni": { "description": "A Geneve Virtual Network Identifier", "type": "integer", @@ -5352,10 +5725,6 @@ "A", "B" ] - }, - "TypedUuidForInstanceKind": { - "type": "string", - "format": "uuid" } }, "responses": { diff --git a/openapi/wicketd.json b/openapi/wicketd.json index 757383897bf..87cfe045d33 100644 --- a/openapi/wicketd.json +++ b/openapi/wicketd.json @@ -1773,6 +1773,67 @@ "last" ] }, + "LldpAdminStatus": { + "description": "To what extent should this port participate in LLDP", + "type": "string", + "enum": [ + "enabled", + "disabled", + "rx_only", + "tx_only" + ] + }, + "LldpPortConfig": { + "description": "Per-port LLDP configuration settings. Only the \"status\" setting is mandatory. All other fields have natural defaults or may be inherited from the switch.", + "type": "object", + "properties": { + "chassis_id": { + "nullable": true, + "description": "Chassis ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + }, + "management_addrs": { + "nullable": true, + "description": "Management IP addresses to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "port_description": { + "nullable": true, + "description": "Port description to advertise. If this is not set, no description will be advertised.", + "type": "string" + }, + "port_id": { + "nullable": true, + "description": "Port ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be set to the port name. e.g., qsfp0/0.", + "type": "string" + }, + "status": { + "description": "To what extent should this port participate in LLDP", + "allOf": [ + { + "$ref": "#/components/schemas/LldpAdminStatus" + } + ] + }, + "system_description": { + "nullable": true, + "description": "System description to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + }, + "system_name": { + "nullable": true, + "description": "System name to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + } + }, + "required": [ + "status" + ] + }, "Name": { "title": "A name unique within the parent collection", "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.", @@ -3062,6 +3123,14 @@ } ] }, + "local_pref": { + "nullable": true, + "description": "The local preference associated with this route.", + "default": null, + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "nexthop": { "description": "The nexthop/gateway address.", "type": "string", @@ -6296,6 +6365,15 @@ "$ref": "#/components/schemas/UserSpecifiedBgpPeerConfig" } }, + "lldp": { + "nullable": true, + "default": null, + "allOf": [ + { + "$ref": "#/components/schemas/LldpPortConfig" + } + ] + }, "routes": { "type": "array", "items": { diff --git a/oximeter/collector/src/agent.rs b/oximeter/collector/src/agent.rs index 5da9a1dfa8a..b13fbd3938c 100644 --- a/oximeter/collector/src/agent.rs +++ b/oximeter/collector/src/agent.rs @@ -17,8 +17,6 @@ use futures::TryStreamExt; use internal_dns::resolver::Resolver; use internal_dns::ServiceName; use nexus_client::types::IdSortMode; -use omicron_common::address::CLICKHOUSE_PORT; -use omicron_common::address::NEXUS_INTERNAL_PORT; use omicron_common::backoff; use omicron_common::backoff::BackoffError; use oximeter::types::ProducerResults; @@ -381,6 +379,7 @@ impl OximeterAgent { db_config: DbConfig, resolver: &Resolver, log: &Logger, + replicated: bool, ) -> Result { let (result_sender, result_receiver) = mpsc::channel(8); let log = log.new(o!( @@ -394,10 +393,15 @@ impl OximeterAgent { // database. let db_address = if let Some(address) = db_config.address { address + } else if replicated { + SocketAddr::V6( + resolver + .lookup_socket_v6(ServiceName::ClickhouseServer) + .await?, + ) } else { - SocketAddr::new( - resolver.lookup_ip(ServiceName::Clickhouse).await?, - CLICKHOUSE_PORT, + SocketAddr::V6( + resolver.lookup_socket_v6(ServiceName::Clickhouse).await?, ) }; @@ -423,7 +427,6 @@ impl OximeterAgent { .. }) => { debug!(log, "oximeter database does not exist, creating"); - let replicated = client.is_oximeter_cluster().await?; client .initialize_db_with_version( replicated, @@ -816,7 +819,7 @@ async fn refresh_producer_list(agent: OximeterAgent, resolver: Resolver) { async fn resolve_nexus_with_backoff( log: &Logger, resolver: &Resolver, -) -> SocketAddr { +) -> SocketAddrV6 { let log_failure = |error, delay| { warn!( log, @@ -827,12 +830,9 @@ async fn resolve_nexus_with_backoff( }; let do_lookup = || async { resolver - .lookup_ipv6(ServiceName::Nexus) + .lookup_socket_v6(ServiceName::Nexus) .await .map_err(|e| BackoffError::transient(e.to_string())) - .map(|ip| { - SocketAddr::V6(SocketAddrV6::new(ip, NEXUS_INTERNAL_PORT, 0, 0)) - }) }; backoff::retry_notify( backoff::retry_policy_internal_service(), diff --git a/oximeter/collector/src/bin/clickhouse-schema-updater.rs b/oximeter/collector/src/bin/clickhouse-schema-updater.rs index 20780c37e07..8e432e87c61 100644 --- a/oximeter/collector/src/bin/clickhouse-schema-updater.rs +++ b/oximeter/collector/src/bin/clickhouse-schema-updater.rs @@ -11,7 +11,7 @@ use anyhow::Context; use camino::Utf8PathBuf; use clap::Parser; use clap::Subcommand; -use omicron_common::address::CLICKHOUSE_PORT; +use omicron_common::address::CLICKHOUSE_HTTP_PORT; use oximeter_db::model::OXIMETER_VERSION; use oximeter_db::Client; use slog::Drain; @@ -24,7 +24,7 @@ use std::net::SocketAddrV6; const DEFAULT_HOST: SocketAddr = SocketAddr::V6(SocketAddrV6::new( Ipv6Addr::LOCALHOST, - CLICKHOUSE_PORT, + CLICKHOUSE_HTTP_PORT, 0, 0, )); diff --git a/oximeter/collector/src/lib.rs b/oximeter/collector/src/lib.rs index 02bf9152f42..0576c7d5325 100644 --- a/oximeter/collector/src/lib.rs +++ b/oximeter/collector/src/lib.rs @@ -14,7 +14,6 @@ use dropshot::HttpServerStarter; use internal_dns::resolver::ResolveError; use internal_dns::resolver::Resolver; use internal_dns::ServiceName; -use omicron_common::address::NEXUS_INTERNAL_PORT; use omicron_common::api::internal::nexus::ProducerEndpoint; use omicron_common::backoff; use omicron_common::FileKv; @@ -79,12 +78,18 @@ pub struct DbConfig { #[serde(default, skip_serializing_if = "Option::is_none")] pub address: Option, - /// Batch size of samples at which to insert + /// Batch size of samples at which to insert. pub batch_size: usize, /// Interval on which to insert data into the database, regardless of the number of collected /// samples. Value is in seconds. pub batch_interval: u64, + + // TODO (https://github.com/oxidecomputer/omicron/issues/4148): This field + // should be removed if single node functionality is removed. + /// Whether ClickHouse is running as a replicated cluster or + /// single-node server. + pub replicated: bool, } impl DbConfig { @@ -96,12 +101,16 @@ impl DbConfig { /// ClickHouse. pub const DEFAULT_BATCH_INTERVAL: u64 = 5; + /// Default ClickHouse topology. + pub const DEFAULT_REPLICATED: bool = false; + // Construct config with an address, using the defaults for other fields fn with_address(address: SocketAddr) -> Self { Self { address: Some(address), batch_size: Self::DEFAULT_BATCH_SIZE, batch_interval: Self::DEFAULT_BATCH_INTERVAL, + replicated: Self::DEFAULT_REPLICATED, } } } @@ -208,6 +217,7 @@ impl Oximeter { config.db, &resolver, &log, + config.db.replicated, ) .await?, )) @@ -251,14 +261,14 @@ impl Oximeter { let nexus_address = if let Some(address) = config.nexus_address { address } else { - SocketAddr::V6(SocketAddrV6::new( - resolver.lookup_ipv6(ServiceName::Nexus).await.map_err( - |e| backoff::BackoffError::transient(e.to_string()), - )?, - NEXUS_INTERNAL_PORT, - 0, - 0, - )) + SocketAddr::V6( + resolver + .lookup_socket_v6(ServiceName::Nexus) + .await + .map_err(|e| { + backoff::BackoffError::transient(e.to_string()) + })?, + ) }; let client = nexus_client::Client::new( &format!("http://{nexus_address}"), diff --git a/oximeter/collector/tests/output/self-stat-schema.json b/oximeter/collector/tests/output/self-stat-schema.json new file mode 100644 index 00000000000..5d325281abe --- /dev/null +++ b/oximeter/collector/tests/output/self-stat-schema.json @@ -0,0 +1,91 @@ +{ + "oximeter_collector:collections": { + "timeseries_name": "oximeter_collector:collections", + "field_schema": [ + { + "name": "base_route", + "field_type": "string", + "source": "metric" + }, + { + "name": "collector_id", + "field_type": "uuid", + "source": "target" + }, + { + "name": "collector_ip", + "field_type": "ip_addr", + "source": "target" + }, + { + "name": "collector_port", + "field_type": "u16", + "source": "target" + }, + { + "name": "producer_id", + "field_type": "uuid", + "source": "metric" + }, + { + "name": "producer_ip", + "field_type": "ip_addr", + "source": "metric" + }, + { + "name": "producer_port", + "field_type": "u16", + "source": "metric" + } + ], + "datum_type": "cumulative_u64", + "created": "2024-06-24T17:15:06.069658599Z" + }, + "oximeter_collector:failed_collections": { + "timeseries_name": "oximeter_collector:failed_collections", + "field_schema": [ + { + "name": "base_route", + "field_type": "string", + "source": "metric" + }, + { + "name": "collector_id", + "field_type": "uuid", + "source": "target" + }, + { + "name": "collector_ip", + "field_type": "ip_addr", + "source": "target" + }, + { + "name": "collector_port", + "field_type": "u16", + "source": "target" + }, + { + "name": "producer_id", + "field_type": "uuid", + "source": "metric" + }, + { + "name": "producer_ip", + "field_type": "ip_addr", + "source": "metric" + }, + { + "name": "producer_port", + "field_type": "u16", + "source": "metric" + }, + { + "name": "reason", + "field_type": "string", + "source": "metric" + } + ], + "datum_type": "cumulative_u64", + "created": "2024-06-24T17:15:06.070765692Z" + } +} \ No newline at end of file diff --git a/oximeter/db/Cargo.toml b/oximeter/db/Cargo.toml index e3cf089cb5e..2a9c615da23 100644 --- a/oximeter/db/Cargo.toml +++ b/oximeter/db/Cargo.toml @@ -24,6 +24,7 @@ num.workspace = true omicron-common.workspace = true omicron-workspace-hack.workspace = true oximeter.workspace = true +oxql-types.workspace = true regex.workspace = true serde.workspace = true serde_json.workspace = true @@ -89,6 +90,7 @@ expectorate.workspace = true indexmap.workspace = true itertools.workspace = true omicron-test-utils.workspace = true +oximeter-test-utils.workspace = true slog-dtrace.workspace = true sqlformat.workspace = true sqlparser.workspace = true diff --git a/oximeter/db/schema/replicated/10/00_add_last_updated_column_to_fields_i64_local.sql b/oximeter/db/schema/replicated/10/00_add_last_updated_column_to_fields_i64_local.sql new file mode 100644 index 00000000000..04158b36ce3 --- /dev/null +++ b/oximeter/db/schema/replicated/10/00_add_last_updated_column_to_fields_i64_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i64_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/01_materialize_last_updated_column_on_fields_i64_local.sql b/oximeter/db/schema/replicated/10/01_materialize_last_updated_column_on_fields_i64_local.sql new file mode 100644 index 00000000000..2e35dd27939 --- /dev/null +++ b/oximeter/db/schema/replicated/10/01_materialize_last_updated_column_on_fields_i64_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i64_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/02_add_ttl_to_fields_i64_local.sql b/oximeter/db/schema/replicated/10/02_add_ttl_to_fields_i64_local.sql new file mode 100644 index 00000000000..25e5303e5ad --- /dev/null +++ b/oximeter/db/schema/replicated/10/02_add_ttl_to_fields_i64_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i64_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/03_add_last_updated_column_to_fields_uuid_local.sql b/oximeter/db/schema/replicated/10/03_add_last_updated_column_to_fields_uuid_local.sql new file mode 100644 index 00000000000..f26fdedbb69 --- /dev/null +++ b/oximeter/db/schema/replicated/10/03_add_last_updated_column_to_fields_uuid_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_uuid_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/04_materialize_last_updated_column_on_fields_uuid_local.sql b/oximeter/db/schema/replicated/10/04_materialize_last_updated_column_on_fields_uuid_local.sql new file mode 100644 index 00000000000..1bc623f4187 --- /dev/null +++ b/oximeter/db/schema/replicated/10/04_materialize_last_updated_column_on_fields_uuid_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_uuid_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/05_add_ttl_to_fields_uuid_local.sql b/oximeter/db/schema/replicated/10/05_add_ttl_to_fields_uuid_local.sql new file mode 100644 index 00000000000..b98bba1e88e --- /dev/null +++ b/oximeter/db/schema/replicated/10/05_add_ttl_to_fields_uuid_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_uuid_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/06_add_last_updated_column_to_fields_bool_local.sql b/oximeter/db/schema/replicated/10/06_add_last_updated_column_to_fields_bool_local.sql new file mode 100644 index 00000000000..bf3c16dde5a --- /dev/null +++ b/oximeter/db/schema/replicated/10/06_add_last_updated_column_to_fields_bool_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_bool_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/07_materialize_last_updated_column_on_fields_bool_local.sql b/oximeter/db/schema/replicated/10/07_materialize_last_updated_column_on_fields_bool_local.sql new file mode 100644 index 00000000000..3ddb0eec845 --- /dev/null +++ b/oximeter/db/schema/replicated/10/07_materialize_last_updated_column_on_fields_bool_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_bool_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/08_add_ttl_to_fields_bool_local.sql b/oximeter/db/schema/replicated/10/08_add_ttl_to_fields_bool_local.sql new file mode 100644 index 00000000000..58d599cf49a --- /dev/null +++ b/oximeter/db/schema/replicated/10/08_add_ttl_to_fields_bool_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_bool_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/09_add_last_updated_column_to_fields_ipaddr_local.sql b/oximeter/db/schema/replicated/10/09_add_last_updated_column_to_fields_ipaddr_local.sql new file mode 100644 index 00000000000..94696b7b060 --- /dev/null +++ b/oximeter/db/schema/replicated/10/09_add_last_updated_column_to_fields_ipaddr_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_ipaddr_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/10_materialize_last_updated_column_on_fields_ipaddr_local.sql b/oximeter/db/schema/replicated/10/10_materialize_last_updated_column_on_fields_ipaddr_local.sql new file mode 100644 index 00000000000..f621033d564 --- /dev/null +++ b/oximeter/db/schema/replicated/10/10_materialize_last_updated_column_on_fields_ipaddr_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_ipaddr_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/11_add_ttl_to_fields_ipaddr_local.sql b/oximeter/db/schema/replicated/10/11_add_ttl_to_fields_ipaddr_local.sql new file mode 100644 index 00000000000..4a01da9e740 --- /dev/null +++ b/oximeter/db/schema/replicated/10/11_add_ttl_to_fields_ipaddr_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_ipaddr_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/12_add_last_updated_column_to_fields_string_local.sql b/oximeter/db/schema/replicated/10/12_add_last_updated_column_to_fields_string_local.sql new file mode 100644 index 00000000000..173d8034374 --- /dev/null +++ b/oximeter/db/schema/replicated/10/12_add_last_updated_column_to_fields_string_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_string_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/13_materialize_last_updated_column_on_fields_string_local.sql b/oximeter/db/schema/replicated/10/13_materialize_last_updated_column_on_fields_string_local.sql new file mode 100644 index 00000000000..d9fcc84eba2 --- /dev/null +++ b/oximeter/db/schema/replicated/10/13_materialize_last_updated_column_on_fields_string_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_string_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/14_add_ttl_to_fields_string_local.sql b/oximeter/db/schema/replicated/10/14_add_ttl_to_fields_string_local.sql new file mode 100644 index 00000000000..8c9aecca9d6 --- /dev/null +++ b/oximeter/db/schema/replicated/10/14_add_ttl_to_fields_string_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_string_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/15_add_last_updated_column_to_fields_i8_local.sql b/oximeter/db/schema/replicated/10/15_add_last_updated_column_to_fields_i8_local.sql new file mode 100644 index 00000000000..8d071424f65 --- /dev/null +++ b/oximeter/db/schema/replicated/10/15_add_last_updated_column_to_fields_i8_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i8_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/16_materialize_last_updated_column_on_fields_i8_local.sql b/oximeter/db/schema/replicated/10/16_materialize_last_updated_column_on_fields_i8_local.sql new file mode 100644 index 00000000000..ac5fa948ae9 --- /dev/null +++ b/oximeter/db/schema/replicated/10/16_materialize_last_updated_column_on_fields_i8_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i8_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/17_add_ttl_to_fields_i8_local.sql b/oximeter/db/schema/replicated/10/17_add_ttl_to_fields_i8_local.sql new file mode 100644 index 00000000000..3caa1b93f64 --- /dev/null +++ b/oximeter/db/schema/replicated/10/17_add_ttl_to_fields_i8_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i8_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/18_add_last_updated_column_to_fields_u8_local.sql b/oximeter/db/schema/replicated/10/18_add_last_updated_column_to_fields_u8_local.sql new file mode 100644 index 00000000000..ed6978c7e61 --- /dev/null +++ b/oximeter/db/schema/replicated/10/18_add_last_updated_column_to_fields_u8_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u8_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/19_materialize_last_updated_column_on_fields_u8_local.sql b/oximeter/db/schema/replicated/10/19_materialize_last_updated_column_on_fields_u8_local.sql new file mode 100644 index 00000000000..81ce8626a7a --- /dev/null +++ b/oximeter/db/schema/replicated/10/19_materialize_last_updated_column_on_fields_u8_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u8_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/20_add_ttl_to_fields_u8_local.sql b/oximeter/db/schema/replicated/10/20_add_ttl_to_fields_u8_local.sql new file mode 100644 index 00000000000..2a7c757dc89 --- /dev/null +++ b/oximeter/db/schema/replicated/10/20_add_ttl_to_fields_u8_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u8_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/21_add_last_updated_column_to_fields_i16_local.sql b/oximeter/db/schema/replicated/10/21_add_last_updated_column_to_fields_i16_local.sql new file mode 100644 index 00000000000..cbe0b08fe44 --- /dev/null +++ b/oximeter/db/schema/replicated/10/21_add_last_updated_column_to_fields_i16_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i16_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/22_materialize_last_updated_column_on_fields_i16_local.sql b/oximeter/db/schema/replicated/10/22_materialize_last_updated_column_on_fields_i16_local.sql new file mode 100644 index 00000000000..d4854807b7c --- /dev/null +++ b/oximeter/db/schema/replicated/10/22_materialize_last_updated_column_on_fields_i16_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i16_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/23_add_ttl_to_fields_i16_local.sql b/oximeter/db/schema/replicated/10/23_add_ttl_to_fields_i16_local.sql new file mode 100644 index 00000000000..c84b634a005 --- /dev/null +++ b/oximeter/db/schema/replicated/10/23_add_ttl_to_fields_i16_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i16_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/24_add_last_updated_column_to_fields_u16_local.sql b/oximeter/db/schema/replicated/10/24_add_last_updated_column_to_fields_u16_local.sql new file mode 100644 index 00000000000..60c28c0047e --- /dev/null +++ b/oximeter/db/schema/replicated/10/24_add_last_updated_column_to_fields_u16_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u16_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/25_materialize_last_updated_column_on_fields_u16_local.sql b/oximeter/db/schema/replicated/10/25_materialize_last_updated_column_on_fields_u16_local.sql new file mode 100644 index 00000000000..b38cdda831a --- /dev/null +++ b/oximeter/db/schema/replicated/10/25_materialize_last_updated_column_on_fields_u16_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u16_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/26_add_ttl_to_fields_u16_local.sql b/oximeter/db/schema/replicated/10/26_add_ttl_to_fields_u16_local.sql new file mode 100644 index 00000000000..cd533ffd8f6 --- /dev/null +++ b/oximeter/db/schema/replicated/10/26_add_ttl_to_fields_u16_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u16_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/27_add_last_updated_column_to_fields_i32_local.sql b/oximeter/db/schema/replicated/10/27_add_last_updated_column_to_fields_i32_local.sql new file mode 100644 index 00000000000..1ea7093d8fc --- /dev/null +++ b/oximeter/db/schema/replicated/10/27_add_last_updated_column_to_fields_i32_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i32_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/28_materialize_last_updated_column_on_fields_i32_local.sql b/oximeter/db/schema/replicated/10/28_materialize_last_updated_column_on_fields_i32_local.sql new file mode 100644 index 00000000000..f9f64647294 --- /dev/null +++ b/oximeter/db/schema/replicated/10/28_materialize_last_updated_column_on_fields_i32_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i32_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/29_add_ttl_to_fields_i32_local.sql b/oximeter/db/schema/replicated/10/29_add_ttl_to_fields_i32_local.sql new file mode 100644 index 00000000000..7c37ee9b21b --- /dev/null +++ b/oximeter/db/schema/replicated/10/29_add_ttl_to_fields_i32_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i32_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/30_add_last_updated_column_to_fields_u32_local.sql b/oximeter/db/schema/replicated/10/30_add_last_updated_column_to_fields_u32_local.sql new file mode 100644 index 00000000000..b15eab93877 --- /dev/null +++ b/oximeter/db/schema/replicated/10/30_add_last_updated_column_to_fields_u32_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u32_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/31_materialize_last_updated_column_on_fields_u32_local.sql b/oximeter/db/schema/replicated/10/31_materialize_last_updated_column_on_fields_u32_local.sql new file mode 100644 index 00000000000..caa96ab5eb8 --- /dev/null +++ b/oximeter/db/schema/replicated/10/31_materialize_last_updated_column_on_fields_u32_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u32_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/32_add_ttl_to_fields_u32_local.sql b/oximeter/db/schema/replicated/10/32_add_ttl_to_fields_u32_local.sql new file mode 100644 index 00000000000..25af5ee660f --- /dev/null +++ b/oximeter/db/schema/replicated/10/32_add_ttl_to_fields_u32_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u32_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/33_add_last_updated_column_to_fields_u64_local.sql b/oximeter/db/schema/replicated/10/33_add_last_updated_column_to_fields_u64_local.sql new file mode 100644 index 00000000000..e85bd845d4d --- /dev/null +++ b/oximeter/db/schema/replicated/10/33_add_last_updated_column_to_fields_u64_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u64_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/34_materialize_last_updated_column_on_fields_u64_local.sql b/oximeter/db/schema/replicated/10/34_materialize_last_updated_column_on_fields_u64_local.sql new file mode 100644 index 00000000000..d287a02c6f2 --- /dev/null +++ b/oximeter/db/schema/replicated/10/34_materialize_last_updated_column_on_fields_u64_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u64_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/35_add_ttl_to_fields_u64_local.sql b/oximeter/db/schema/replicated/10/35_add_ttl_to_fields_u64_local.sql new file mode 100644 index 00000000000..02eb09c3001 --- /dev/null +++ b/oximeter/db/schema/replicated/10/35_add_ttl_to_fields_u64_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u64_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/timeseries-to-delete.txt b/oximeter/db/schema/replicated/10/timeseries-to-delete.txt new file mode 100644 index 00000000000..40b90e05ffc --- /dev/null +++ b/oximeter/db/schema/replicated/10/timeseries-to-delete.txt @@ -0,0 +1 @@ +http_service:request_latency_histogram diff --git a/oximeter/db/schema/replicated/11/timeseries-to-delete.txt b/oximeter/db/schema/replicated/11/timeseries-to-delete.txt new file mode 100644 index 00000000000..4f0301a6b5c --- /dev/null +++ b/oximeter/db/schema/replicated/11/timeseries-to-delete.txt @@ -0,0 +1,9 @@ +switch_table:capacity +switch_table:collisions +switch_table:delete_misses +switch_table:deletes +switch_table:exhaustion +switch_table:inserts +switch_table:occupancy +switch_table:update_misses +switch_table:updates diff --git a/oximeter/db/schema/replicated/12/timeseries-to-delete.txt b/oximeter/db/schema/replicated/12/timeseries-to-delete.txt new file mode 100644 index 00000000000..40b90e05ffc --- /dev/null +++ b/oximeter/db/schema/replicated/12/timeseries-to-delete.txt @@ -0,0 +1 @@ +http_service:request_latency_histogram diff --git a/oximeter/db/schema/replicated/db-init-1.sql b/oximeter/db/schema/replicated/db-init-1.sql index 176e5b64f71..4eac2b4e37a 100644 --- a/oximeter/db/schema/replicated/db-init-1.sql +++ b/oximeter/db/schema/replicated/db-init-1.sql @@ -78,10 +78,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i64_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int64 + field_value Int64, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_i64_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i64 ON CLUSTER oximeter_cluster AS oximeter.fields_i64_local @@ -93,10 +95,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_uuid_local ON CLUSTER oximeter_cluste timeseries_name String, timeseries_key UInt64, field_name String, - field_value UUID + field_value UUID, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_uuid_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_uuid ON CLUSTER oximeter_cluster AS oximeter.fields_uuid_local diff --git a/oximeter/db/schema/replicated/db-init-2.sql b/oximeter/db/schema/replicated/db-init-2.sql index ae0431ec848..51e64e20e07 100644 --- a/oximeter/db/schema/replicated/db-init-2.sql +++ b/oximeter/db/schema/replicated/db-init-2.sql @@ -595,10 +595,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_bool_local ON CLUSTER oximeter_cluste timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt8 + field_value UInt8, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_bool_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_bool ON CLUSTER oximeter_cluster AS oximeter.fields_bool_local @@ -609,10 +611,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_ipaddr_local ON CLUSTER oximeter_clus timeseries_name String, timeseries_key UInt64, field_name String, - field_value IPv6 + field_value IPv6, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_ipaddr_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_ipaddr ON CLUSTER oximeter_cluster AS oximeter.fields_ipaddr_local @@ -623,10 +627,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_string_local ON CLUSTER oximeter_clus timeseries_name String, timeseries_key UInt64, field_name String, - field_value String + field_value String, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_string_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_string ON CLUSTER oximeter_cluster AS oximeter.fields_string_local @@ -637,10 +643,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i8_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int8 + field_value Int8, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_i8_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i8 ON CLUSTER oximeter_cluster AS oximeter.fields_i8_local @@ -651,10 +659,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u8_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt8 + field_value UInt8, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_u8_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u8 ON CLUSTER oximeter_cluster AS oximeter.fields_u8_local @@ -665,10 +675,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i16_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int16 + field_value Int16, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_i16_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i16 ON CLUSTER oximeter_cluster AS oximeter.fields_i16_local @@ -679,10 +691,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u16_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt16 + field_value UInt16, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_u16_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u16 ON CLUSTER oximeter_cluster AS oximeter.fields_u16_local @@ -693,10 +707,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i32_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int32 + field_value Int32, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_i32_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i32 ON CLUSTER oximeter_cluster AS oximeter.fields_i32_local @@ -707,10 +723,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u32_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt32 + field_value UInt32, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_u32_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u32 ON CLUSTER oximeter_cluster AS oximeter.fields_u32_local @@ -721,10 +739,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u64_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt64 + field_value UInt64, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_u64_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u64 ON CLUSTER oximeter_cluster AS oximeter.fields_u64_local diff --git a/oximeter/db/schema/single-node/10/00_add_last_updated_column_to_fields_bool.sql b/oximeter/db/schema/single-node/10/00_add_last_updated_column_to_fields_bool.sql new file mode 100644 index 00000000000..86f46a43bf3 --- /dev/null +++ b/oximeter/db/schema/single-node/10/00_add_last_updated_column_to_fields_bool.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_bool ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/01_materialize_last_updated_column_on_fields_bool.sql b/oximeter/db/schema/single-node/10/01_materialize_last_updated_column_on_fields_bool.sql new file mode 100644 index 00000000000..6ebec2d5068 --- /dev/null +++ b/oximeter/db/schema/single-node/10/01_materialize_last_updated_column_on_fields_bool.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_bool MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/02_add_ttl_to_fields_bool.sql b/oximeter/db/schema/single-node/10/02_add_ttl_to_fields_bool.sql new file mode 100644 index 00000000000..cc07b8cd1db --- /dev/null +++ b/oximeter/db/schema/single-node/10/02_add_ttl_to_fields_bool.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_bool MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/03_add_last_updated_column_to_fields_i8.sql b/oximeter/db/schema/single-node/10/03_add_last_updated_column_to_fields_i8.sql new file mode 100644 index 00000000000..884b5ffed60 --- /dev/null +++ b/oximeter/db/schema/single-node/10/03_add_last_updated_column_to_fields_i8.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i8 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/04_materialize_last_updated_column_on_fields_i8.sql b/oximeter/db/schema/single-node/10/04_materialize_last_updated_column_on_fields_i8.sql new file mode 100644 index 00000000000..ef569d80c30 --- /dev/null +++ b/oximeter/db/schema/single-node/10/04_materialize_last_updated_column_on_fields_i8.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i8 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/05_add_ttl_to_fields_i8.sql b/oximeter/db/schema/single-node/10/05_add_ttl_to_fields_i8.sql new file mode 100644 index 00000000000..adfc3dd1a40 --- /dev/null +++ b/oximeter/db/schema/single-node/10/05_add_ttl_to_fields_i8.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i8 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/06_add_last_updated_column_to_fields_u8.sql b/oximeter/db/schema/single-node/10/06_add_last_updated_column_to_fields_u8.sql new file mode 100644 index 00000000000..0f4e43ce2c8 --- /dev/null +++ b/oximeter/db/schema/single-node/10/06_add_last_updated_column_to_fields_u8.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u8 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/07_materialize_last_updated_column_on_fields_u8.sql b/oximeter/db/schema/single-node/10/07_materialize_last_updated_column_on_fields_u8.sql new file mode 100644 index 00000000000..8dcbb32bb2b --- /dev/null +++ b/oximeter/db/schema/single-node/10/07_materialize_last_updated_column_on_fields_u8.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u8 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/08_add_ttl_to_fields_u8.sql b/oximeter/db/schema/single-node/10/08_add_ttl_to_fields_u8.sql new file mode 100644 index 00000000000..11a83bde7a9 --- /dev/null +++ b/oximeter/db/schema/single-node/10/08_add_ttl_to_fields_u8.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u8 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/09_add_last_updated_column_to_fields_i16.sql b/oximeter/db/schema/single-node/10/09_add_last_updated_column_to_fields_i16.sql new file mode 100644 index 00000000000..d27f38f94f6 --- /dev/null +++ b/oximeter/db/schema/single-node/10/09_add_last_updated_column_to_fields_i16.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i16 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/10_materialize_last_updated_column_on_fields_i16.sql b/oximeter/db/schema/single-node/10/10_materialize_last_updated_column_on_fields_i16.sql new file mode 100644 index 00000000000..cd60a2a1e9e --- /dev/null +++ b/oximeter/db/schema/single-node/10/10_materialize_last_updated_column_on_fields_i16.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i16 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/11_add_ttl_to_fields_i16.sql b/oximeter/db/schema/single-node/10/11_add_ttl_to_fields_i16.sql new file mode 100644 index 00000000000..5b1b2fcfb61 --- /dev/null +++ b/oximeter/db/schema/single-node/10/11_add_ttl_to_fields_i16.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i16 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/12_add_last_updated_column_to_fields_u16.sql b/oximeter/db/schema/single-node/10/12_add_last_updated_column_to_fields_u16.sql new file mode 100644 index 00000000000..a71753f95d1 --- /dev/null +++ b/oximeter/db/schema/single-node/10/12_add_last_updated_column_to_fields_u16.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u16 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/13_materialize_last_updated_column_on_fields_u16.sql b/oximeter/db/schema/single-node/10/13_materialize_last_updated_column_on_fields_u16.sql new file mode 100644 index 00000000000..c8dbfb494ed --- /dev/null +++ b/oximeter/db/schema/single-node/10/13_materialize_last_updated_column_on_fields_u16.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u16 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/14_add_ttl_to_fields_u16.sql b/oximeter/db/schema/single-node/10/14_add_ttl_to_fields_u16.sql new file mode 100644 index 00000000000..30da688c8c9 --- /dev/null +++ b/oximeter/db/schema/single-node/10/14_add_ttl_to_fields_u16.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u16 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/15_add_last_updated_column_to_fields_i32.sql b/oximeter/db/schema/single-node/10/15_add_last_updated_column_to_fields_i32.sql new file mode 100644 index 00000000000..eb0f377e2d0 --- /dev/null +++ b/oximeter/db/schema/single-node/10/15_add_last_updated_column_to_fields_i32.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i32 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/16_materialize_last_updated_column_on_fields_i32.sql b/oximeter/db/schema/single-node/10/16_materialize_last_updated_column_on_fields_i32.sql new file mode 100644 index 00000000000..9cd4fa05c8c --- /dev/null +++ b/oximeter/db/schema/single-node/10/16_materialize_last_updated_column_on_fields_i32.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i32 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/17_add_ttl_to_fields_i32.sql b/oximeter/db/schema/single-node/10/17_add_ttl_to_fields_i32.sql new file mode 100644 index 00000000000..52306340974 --- /dev/null +++ b/oximeter/db/schema/single-node/10/17_add_ttl_to_fields_i32.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i32 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/18_add_last_updated_column_to_fields_u32.sql b/oximeter/db/schema/single-node/10/18_add_last_updated_column_to_fields_u32.sql new file mode 100644 index 00000000000..9d967784e92 --- /dev/null +++ b/oximeter/db/schema/single-node/10/18_add_last_updated_column_to_fields_u32.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u32 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/19_materialize_last_updated_column_on_fields_u32.sql b/oximeter/db/schema/single-node/10/19_materialize_last_updated_column_on_fields_u32.sql new file mode 100644 index 00000000000..f625138b59b --- /dev/null +++ b/oximeter/db/schema/single-node/10/19_materialize_last_updated_column_on_fields_u32.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u32 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/20_add_ttl_to_fields_u32.sql b/oximeter/db/schema/single-node/10/20_add_ttl_to_fields_u32.sql new file mode 100644 index 00000000000..fc80ce7102d --- /dev/null +++ b/oximeter/db/schema/single-node/10/20_add_ttl_to_fields_u32.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u32 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/21_add_last_updated_column_to_fields_i64.sql b/oximeter/db/schema/single-node/10/21_add_last_updated_column_to_fields_i64.sql new file mode 100644 index 00000000000..26256d3924e --- /dev/null +++ b/oximeter/db/schema/single-node/10/21_add_last_updated_column_to_fields_i64.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i64 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/22_materialize_last_updated_column_on_fields_i64.sql b/oximeter/db/schema/single-node/10/22_materialize_last_updated_column_on_fields_i64.sql new file mode 100644 index 00000000000..a81294e5356 --- /dev/null +++ b/oximeter/db/schema/single-node/10/22_materialize_last_updated_column_on_fields_i64.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i64 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/23_add_ttl_to_fields_i64.sql b/oximeter/db/schema/single-node/10/23_add_ttl_to_fields_i64.sql new file mode 100644 index 00000000000..43ca166755d --- /dev/null +++ b/oximeter/db/schema/single-node/10/23_add_ttl_to_fields_i64.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i64 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/24_add_last_updated_column_to_fields_u64.sql b/oximeter/db/schema/single-node/10/24_add_last_updated_column_to_fields_u64.sql new file mode 100644 index 00000000000..46074c79ce0 --- /dev/null +++ b/oximeter/db/schema/single-node/10/24_add_last_updated_column_to_fields_u64.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u64 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/25_materialize_last_updated_column_on_fields_u64.sql b/oximeter/db/schema/single-node/10/25_materialize_last_updated_column_on_fields_u64.sql new file mode 100644 index 00000000000..a68d449de79 --- /dev/null +++ b/oximeter/db/schema/single-node/10/25_materialize_last_updated_column_on_fields_u64.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u64 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/26_add_ttl_to_fields_u64.sql b/oximeter/db/schema/single-node/10/26_add_ttl_to_fields_u64.sql new file mode 100644 index 00000000000..48afb51bf1f --- /dev/null +++ b/oximeter/db/schema/single-node/10/26_add_ttl_to_fields_u64.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u64 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/27_add_last_updated_column_to_fields_ipaddr.sql b/oximeter/db/schema/single-node/10/27_add_last_updated_column_to_fields_ipaddr.sql new file mode 100644 index 00000000000..d3c6be90726 --- /dev/null +++ b/oximeter/db/schema/single-node/10/27_add_last_updated_column_to_fields_ipaddr.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_ipaddr ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/28_materialize_last_updated_column_on_fields_ipaddr.sql b/oximeter/db/schema/single-node/10/28_materialize_last_updated_column_on_fields_ipaddr.sql new file mode 100644 index 00000000000..5bdffd4b2ed --- /dev/null +++ b/oximeter/db/schema/single-node/10/28_materialize_last_updated_column_on_fields_ipaddr.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_ipaddr MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/29_add_ttl_to_fields_ipaddr.sql b/oximeter/db/schema/single-node/10/29_add_ttl_to_fields_ipaddr.sql new file mode 100644 index 00000000000..4551db90cde --- /dev/null +++ b/oximeter/db/schema/single-node/10/29_add_ttl_to_fields_ipaddr.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_ipaddr MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/30_add_last_updated_column_to_fields_string.sql b/oximeter/db/schema/single-node/10/30_add_last_updated_column_to_fields_string.sql new file mode 100644 index 00000000000..024c5f8f94f --- /dev/null +++ b/oximeter/db/schema/single-node/10/30_add_last_updated_column_to_fields_string.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_string ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/31_materialize_last_updated_column_on_fields_string.sql b/oximeter/db/schema/single-node/10/31_materialize_last_updated_column_on_fields_string.sql new file mode 100644 index 00000000000..67d3b7a5966 --- /dev/null +++ b/oximeter/db/schema/single-node/10/31_materialize_last_updated_column_on_fields_string.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_string MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/32_add_ttl_to_fields_string.sql b/oximeter/db/schema/single-node/10/32_add_ttl_to_fields_string.sql new file mode 100644 index 00000000000..c5272df459a --- /dev/null +++ b/oximeter/db/schema/single-node/10/32_add_ttl_to_fields_string.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_string MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/33_add_last_updated_column_to_fields_uuid.sql b/oximeter/db/schema/single-node/10/33_add_last_updated_column_to_fields_uuid.sql new file mode 100644 index 00000000000..8d01b382fec --- /dev/null +++ b/oximeter/db/schema/single-node/10/33_add_last_updated_column_to_fields_uuid.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_uuid ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/34_materialize_last_updated_column_on_fields_uuid.sql b/oximeter/db/schema/single-node/10/34_materialize_last_updated_column_on_fields_uuid.sql new file mode 100644 index 00000000000..06fbd94d02f --- /dev/null +++ b/oximeter/db/schema/single-node/10/34_materialize_last_updated_column_on_fields_uuid.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_uuid MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/35_add_ttl_to_fields_uuid.sql b/oximeter/db/schema/single-node/10/35_add_ttl_to_fields_uuid.sql new file mode 100644 index 00000000000..481055d4f5e --- /dev/null +++ b/oximeter/db/schema/single-node/10/35_add_ttl_to_fields_uuid.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_uuid MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/timeseries-to-delete.txt b/oximeter/db/schema/single-node/10/timeseries-to-delete.txt new file mode 100644 index 00000000000..40b90e05ffc --- /dev/null +++ b/oximeter/db/schema/single-node/10/timeseries-to-delete.txt @@ -0,0 +1 @@ +http_service:request_latency_histogram diff --git a/oximeter/db/schema/single-node/11/timeseries-to-delete.txt b/oximeter/db/schema/single-node/11/timeseries-to-delete.txt new file mode 100644 index 00000000000..4f0301a6b5c --- /dev/null +++ b/oximeter/db/schema/single-node/11/timeseries-to-delete.txt @@ -0,0 +1,9 @@ +switch_table:capacity +switch_table:collisions +switch_table:delete_misses +switch_table:deletes +switch_table:exhaustion +switch_table:inserts +switch_table:occupancy +switch_table:update_misses +switch_table:updates diff --git a/oximeter/db/schema/single-node/12/timeseries-to-delete.txt b/oximeter/db/schema/single-node/12/timeseries-to-delete.txt new file mode 100644 index 00000000000..40b90e05ffc --- /dev/null +++ b/oximeter/db/schema/single-node/12/timeseries-to-delete.txt @@ -0,0 +1 @@ +http_service:request_latency_histogram diff --git a/oximeter/db/schema/single-node/db-init.sql b/oximeter/db/schema/single-node/db-init.sql index 38e9d0b70c7..184951feeb6 100644 --- a/oximeter/db/schema/single-node/db-init.sql +++ b/oximeter/db/schema/single-node/db-init.sql @@ -504,126 +504,158 @@ TTL toDateTime(timestamp) + INTERVAL 30 DAY; * timeseries name and then key, since it would improve lookups where one * already has the key. Realistically though, these tables are quite small and * so performance benefits will be low in absolute terms. + * + * TTL: We use a materialized column to expire old field table records. This + * column is generated automatically by the database whenever a new row is + * inserted. It cannot be inserted directly, nor is it returned in a `SELECT *` + * query. Since these tables are `ReplacingMergeTree`s, that means the last + * record will remain during a deduplication, which will have the last + * timestamp. ClickHouse will then expire old data for us, similar to the + * measurement tables. */ CREATE TABLE IF NOT EXISTS oximeter.fields_bool ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt8 + field_value UInt8, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i8 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int8 + field_value Int8, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u8 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt8 + field_value UInt8, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i16 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int16 + field_value Int16, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u16 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt16 + field_value UInt16, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i32 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int32 + field_value Int32, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u32 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt32 + field_value UInt32, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i64 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int64 + field_value Int64, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u64 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt64 + field_value UInt64, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_ipaddr ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value IPv6 + field_value IPv6, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_string ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value String + field_value String, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_uuid ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value UUID + field_value UUID, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; /* The timeseries schema table stores the extracted schema for the samples * oximeter collects. diff --git a/oximeter/db/src/client/mod.rs b/oximeter/db/src/client/mod.rs index 30ae4b68d22..c2b07ebaa6f 100644 --- a/oximeter/db/src/client/mod.rs +++ b/oximeter/db/src/client/mod.rs @@ -22,8 +22,6 @@ use crate::Error; use crate::Metric; use crate::Target; use crate::Timeseries; -use crate::TimeseriesKey; -use crate::TimeseriesName; use crate::TimeseriesPageSelector; use crate::TimeseriesScanParams; use crate::TimeseriesSchema; @@ -31,7 +29,9 @@ use dropshot::EmptyScanParams; use dropshot::PaginationOrder; use dropshot::ResultsPage; use dropshot::WhichPage; +use oximeter::schema::TimeseriesKey; use oximeter::types::Sample; +use oximeter::TimeseriesName; use regex::Regex; use regex::RegexBuilder; use slog::debug; @@ -1191,7 +1191,6 @@ mod tests { }; use omicron_test_utils::dev::test_setup_log; use oximeter::histogram::Histogram; - use oximeter::test_util; use oximeter::types::MissingDatum; use oximeter::Datum; use oximeter::FieldValue; @@ -1723,7 +1722,7 @@ mod tests { let samples = { let mut s = Vec::with_capacity(8); for _ in 0..s.capacity() { - s.push(test_util::make_hist_sample()) + s.push(oximeter_test_utils::make_hist_sample()) } s }; @@ -1762,7 +1761,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let sample = test_util::make_sample(); + let sample = oximeter_test_utils::make_sample(); client.insert_samples(&[sample]).await.unwrap(); let bad_name = name_mismatch::TestTarget { @@ -1770,7 +1769,7 @@ mod tests { name2: "second_name".into(), num: 2, }; - let metric = test_util::TestMetric { + let metric = oximeter_test_utils::TestMetric { id: uuid::Uuid::new_v4(), good: true, datum: 1, @@ -1792,7 +1791,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let sample = test_util::make_sample(); + let sample = oximeter_test_utils::make_sample(); // Verify that this sample is considered new, i.e., we return rows to update the timeseries // schema table. @@ -1867,7 +1866,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = test_util::generate_test_samples(2, 2, 2, 2); + let samples = oximeter_test_utils::generate_test_samples(2, 2, 2, 2); client.insert_samples(&samples).await?; let sample = samples.first().unwrap(); @@ -1956,7 +1955,7 @@ mod tests { // we'd like to exercise the logic of ClickHouse's replacing merge tree engine. let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = test_util::generate_test_samples(2, 2, 2, 2); + let samples = oximeter_test_utils::generate_test_samples(2, 2, 2, 2); client.insert_samples(&samples).await?; async fn assert_table_count( @@ -2631,7 +2630,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = test_util::generate_test_samples(2, 2, 2, 2); + let samples = oximeter_test_utils::generate_test_samples(2, 2, 2, 2); client.insert_samples(&samples).await?; let original_schema = client.schema.lock().await.clone(); @@ -2656,7 +2655,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = test_util::generate_test_samples(2, 2, 2, 2); + let samples = oximeter_test_utils::generate_test_samples(2, 2, 2, 2); client.insert_samples(&samples).await?; let limit = 100u32.try_into().unwrap(); @@ -2691,7 +2690,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = test_util::generate_test_samples(2, 2, 2, 2); + let samples = oximeter_test_utils::generate_test_samples(2, 2, 2, 2); client.insert_samples(&samples).await?; let limit = 7u32.try_into().unwrap(); @@ -3364,7 +3363,7 @@ mod tests { // The values here don't matter much, we just want to check that // the database data hasn't been dropped. assert_eq!(0, get_schema_count(&client).await); - let sample = test_util::make_sample(); + let sample = oximeter_test_utils::make_sample(); client.insert_samples(&[sample.clone()]).await.unwrap(); assert_eq!(1, get_schema_count(&client).await); @@ -3438,7 +3437,7 @@ mod tests { // The values here don't matter much, we just want to check that // the database data gets dropped later. assert_eq!(0, get_schema_count(&client).await); - let sample = test_util::make_sample(); + let sample = oximeter_test_utils::make_sample(); client.insert_samples(&[sample.clone()]).await.unwrap(); assert_eq!(1, get_schema_count(&client).await); @@ -3464,7 +3463,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = [test_util::make_sample()]; + let samples = [oximeter_test_utils::make_sample()]; client.insert_samples(&samples).await.unwrap(); // Get the count of schema directly from the DB, which should have just @@ -3549,7 +3548,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = [test_util::make_sample()]; + let samples = [oximeter_test_utils::make_sample()]; // We're using the components of the `insert_samples()` method here, // which has been refactored explicitly for this test. We need to insert diff --git a/oximeter/db/src/client/oxql.rs b/oximeter/db/src/client/oxql.rs index 29586b8189b..4005fa873e8 100644 --- a/oximeter/db/src/client/oxql.rs +++ b/oximeter/db/src/client/oxql.rs @@ -18,7 +18,7 @@ use crate::query::field_table_name; use crate::Error; use crate::Metric; use crate::Target; -use crate::TimeseriesKey; +use oximeter::schema::TimeseriesKey; use oximeter::TimeseriesSchema; use slog::debug; use slog::trace; @@ -68,7 +68,7 @@ pub struct OxqlResult { pub query_summaries: Vec, /// The list of OxQL tables returned from the query. - pub tables: Vec, + pub tables: Vec, } /// The maximum number of data values fetched from the database for an OxQL @@ -479,7 +479,9 @@ impl Client { query_id, total_duration: query_start.elapsed(), query_summaries, - tables: vec![oxql::Table::new(schema.timeseries_name.as_str())], + tables: vec![oxql_types::Table::new( + schema.timeseries_name.as_str(), + )], }; return Ok(result); } @@ -503,7 +505,7 @@ impl Client { // At this point, let's construct a set of tables and run the results // through the transformation pipeline. - let mut tables = vec![oxql::Table::from_timeseries( + let mut tables = vec![oxql_types::Table::from_timeseries( schema.timeseries_name.as_str(), timeseries_by_key.into_values(), )?]; @@ -553,7 +555,7 @@ impl Client { limit: Option, total_rows_fetched: &mut u64, ) -> Result< - (Vec, BTreeMap), + (Vec, BTreeMap), Error, > { // We'll create timeseries for each key on the fly. To enable computing @@ -624,25 +626,25 @@ impl Client { for (key, measurements) in measurements_by_key.into_iter() { // Constuct a new timeseries, from the target/metric info. let (target, metric) = info.get(&key).unwrap(); - let mut timeseries = oxql::Timeseries::new( + let mut timeseries = oxql_types::Timeseries::new( target .fields .iter() .chain(metric.fields.iter()) .map(|field| (field.name.clone(), field.value.clone())), - oxql::point::DataType::try_from(schema.datum_type)?, + oxql_types::point::DataType::try_from(schema.datum_type)?, if schema.datum_type.is_cumulative() { - oxql::point::MetricType::Delta + oxql_types::point::MetricType::Delta } else { - oxql::point::MetricType::Gauge + oxql_types::point::MetricType::Gauge }, )?; // Covert its oximeter measurements into OxQL data types. let points = if schema.datum_type.is_cumulative() { - oxql::point::Points::delta_from_cumulative(&measurements)? + oxql_types::point::Points::delta_from_cumulative(&measurements)? } else { - oxql::point::Points::gauge_from_gauge(&measurements)? + oxql_types::point::Points::gauge_from_gauge(&measurements)? }; timeseries.points = points; debug!( @@ -1108,10 +1110,7 @@ fn update_total_rows_and_check( mod tests { use super::ConsistentKeyGroup; use crate::client::oxql::chunk_consistent_key_groups_impl; - use crate::{ - oxql::{point::Points, Table, Timeseries}, - Client, DbWrite, - }; + use crate::{Client, DbWrite}; use crate::{Metric, Target}; use chrono::{DateTime, Utc}; use dropshot::test_util::LogContext; @@ -1119,6 +1118,7 @@ mod tests { use omicron_test_utils::dev::test_setup_log; use oximeter::{types::Cumulative, FieldValue}; use oximeter::{DatumType, Sample}; + use oxql_types::{point::Points, Table, Timeseries}; use std::collections::BTreeMap; use std::time::Duration; diff --git a/oximeter/db/src/lib.rs b/oximeter/db/src/lib.rs index 9ad382c97da..2b3c2d6118d 100644 --- a/oximeter/db/src/lib.rs +++ b/oximeter/db/src/lib.rs @@ -10,10 +10,9 @@ use crate::query::StringFieldSelector; use anyhow::Context as _; use chrono::DateTime; use chrono::Utc; -use dropshot::EmptyScanParams; -use dropshot::PaginationParams; pub use oximeter::schema::FieldSchema; pub use oximeter::schema::FieldSource; +use oximeter::schema::TimeseriesKey; pub use oximeter::schema::TimeseriesName; pub use oximeter::schema::TimeseriesSchema; pub use oximeter::DatumType; @@ -234,10 +233,6 @@ impl From for DbFieldSource { } } -/// Type used to paginate request to list timeseries schema. -pub type TimeseriesSchemaPaginationParams = - PaginationParams; - #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub struct TimeseriesScanParams { pub timeseries_name: TimeseriesName, @@ -267,8 +262,6 @@ pub async fn make_client( Ok(client) } -pub(crate) type TimeseriesKey = u64; - // TODO-cleanup: Add the timeseries version in to the computation of the key. // This will require a full drop of the database, since we're changing the // sorting key and the timeseries key on each past sample. See diff --git a/oximeter/db/src/model.rs b/oximeter/db/src/model.rs index f27df4ed498..d57819b0d01 100644 --- a/oximeter/db/src/model.rs +++ b/oximeter/db/src/model.rs @@ -11,13 +11,13 @@ use crate::FieldSchema; use crate::FieldSource; use crate::Metric; use crate::Target; -use crate::TimeseriesKey; use crate::TimeseriesSchema; use bytes::Bytes; use chrono::DateTime; use chrono::Utc; use num::traits::Zero; use oximeter::histogram::Histogram; +use oximeter::schema::TimeseriesKey; use oximeter::traits; use oximeter::types::Cumulative; use oximeter::types::Datum; @@ -45,7 +45,7 @@ use uuid::Uuid; /// - [`crate::Client::initialize_db_with_version`] /// - [`crate::Client::ensure_schema`] /// - The `clickhouse-schema-updater` binary in this crate -pub const OXIMETER_VERSION: u64 = 9; +pub const OXIMETER_VERSION: u64 = 12; // Wrapper type to represent a boolean in the database. // @@ -1880,7 +1880,6 @@ mod tests { use super::*; use chrono::Timelike; use oximeter::histogram::Record; - use oximeter::test_util; use oximeter::Datum; #[test] @@ -1983,7 +1982,7 @@ mod tests { #[test] fn test_unroll_from_source() { - let sample = test_util::make_sample(); + let sample = oximeter_test_utils::make_sample(); let out = unroll_from_source(&sample); assert_eq!(out["oximeter.fields_string"].len(), 2); assert_eq!(out["oximeter.fields_i64"].len(), 1); @@ -2003,8 +2002,8 @@ mod tests { // datum. #[test] fn test_unroll_missing_measurement_row() { - let sample = test_util::make_sample(); - let missing_sample = test_util::make_missing_sample(); + let sample = oximeter_test_utils::make_sample(); + let missing_sample = oximeter_test_utils::make_missing_sample(); let (table_name, row) = unroll_measurement_row(&sample); let (missing_table_name, missing_row) = unroll_measurement_row(&missing_sample); @@ -2022,7 +2021,7 @@ mod tests { #[test] fn test_unroll_measurement_row() { - let sample = test_util::make_hist_sample(); + let sample = oximeter_test_utils::make_hist_sample(); let (table_name, row) = unroll_measurement_row(&sample); assert_eq!(table_name, "oximeter.measurements_histogramf64"); let unpacked: HistogramF64MeasurementRow = diff --git a/oximeter/db/src/oxql/ast/grammar.rs b/oximeter/db/src/oxql/ast/grammar.rs index a7585402b66..62182ec553a 100644 --- a/oximeter/db/src/oxql/ast/grammar.rs +++ b/oximeter/db/src/oxql/ast/grammar.rs @@ -189,11 +189,11 @@ peg::parser! { rule dashed_uuid_literal() -> Uuid = s:$( "\"" - ['a'..='f' | '0'..='9']*<8> "-" - ['a'..='f' | '0'..='9']*<4> "-" - ['a'..='f' | '0'..='9']*<4> "-" - ['a'..='f' | '0'..='9']*<4> "-" - ['a'..='f' | '0'..='9']*<12> + ['a'..='f' | 'A'..='F' | '0'..='9']*<8> "-" + ['a'..='f' | 'A'..='F' | '0'..='9']*<4> "-" + ['a'..='f' | 'A'..='F' | '0'..='9']*<4> "-" + ['a'..='f' | 'A'..='F' | '0'..='9']*<4> "-" + ['a'..='f' | 'A'..='F' | '0'..='9']*<12> "\"" ) {? let Some(middle) = s.get(1..37) else { @@ -202,7 +202,7 @@ peg::parser! { middle.parse().or(Err("invalid UUID literal")) } rule undashed_uuid_literal() -> Uuid - = s:$("\"" ['a'..='f' | '0'..='9']*<32> "\"") {? + = s:$("\"" ['a'..='f' | 'A'..='F' | '0'..='9']*<32> "\"") {? let Some(middle) = s.get(1..33) else { return Err("invalid UUID literal"); }; @@ -279,11 +279,27 @@ peg::parser! { pub rule string_literal() -> Literal = s:string_literal_impl() { Literal::String(s) } + pub(super) rule hex_integer_literal_impl() -> i128 + = n:$("0x" ['0'..='9' | 'a'..='f' | 'A'..='F']+ !['.']) + {? + let Some((maybe_sign, digits)) = n.split_once("0x") else { + return Err("hex literals should start with '0x'"); + }; + i128::from_str_radix(digits, 16).map_err(|_| "invalid hex literal") + } + + pub(super) rule dec_integer_literal_impl() -> i128 + = n:$(['0'..='9']+ !['e' | 'E' | '.']) + {? + n.parse().map_err(|_| "integer literal") + } + pub(super) rule integer_literal_impl() -> i128 - = n:$("-"? ['0'..='9']+ !['e' | 'E' | '.']) + = maybe_sign:$("-"?) n:(hex_integer_literal_impl() / dec_integer_literal_impl()) {? - let Ok(x) = n.parse() else { - return Err("integer literal"); + let sign = if maybe_sign == "-" { -1 } else { 1 }; + let Some(x) = n.checked_mul(sign) else { + return Err("negative overflow"); }; if x < i128::from(i64::MIN) { Err("negative overflow") @@ -734,17 +750,49 @@ mod tests { .is_err()); } + #[test] + fn test_uuid_literal_is_case_insensitive() { + const ID: Uuid = uuid::uuid!("880D82A1-102F-4699-BE1A-7E2A6A469E8E"); + let as_str = format!("\"{ID}\""); + let as_lower = as_str.to_lowercase(); + assert_eq!(query_parser::uuid_literal_impl(&as_str).unwrap(), ID,); + assert_eq!(query_parser::uuid_literal_impl(&as_lower).unwrap(), ID,); + } + #[test] fn test_integer_literal() { assert_eq!(query_parser::integer_literal_impl("1").unwrap(), 1); assert_eq!(query_parser::integer_literal_impl("-1").unwrap(), -1); - assert_eq!(query_parser::integer_literal_impl("-1").unwrap(), -1); assert!(query_parser::integer_literal_impl("-1.0").is_err()); assert!(query_parser::integer_literal_impl("-1.").is_err()); assert!(query_parser::integer_literal_impl("1e3").is_err()); } + #[test] + fn test_hex_integer_literal() { + assert_eq!(query_parser::integer_literal_impl("0x1").unwrap(), 1); + assert_eq!(query_parser::integer_literal_impl("-0x1").unwrap(), -1); + assert_eq!(query_parser::integer_literal_impl("-0xa").unwrap(), -0xa); + assert_eq!( + query_parser::integer_literal_impl("0xfeed").unwrap(), + 0xfeed + ); + assert_eq!( + query_parser::integer_literal_impl("0xFEED").unwrap(), + 0xfeed + ); + + // Out of range in either direction + assert!(query_parser::integer_literal_impl("0xFFFFFFFFFFFFFFFFFFFF") + .is_err()); + assert!(query_parser::integer_literal_impl("-0xFFFFFFFFFFFFFFFFFFFF") + .is_err()); + + assert!(query_parser::integer_literal_impl("-0x1.0").is_err()); + assert!(query_parser::integer_literal_impl("-0x1.").is_err()); + } + #[test] fn test_double_literal() { assert_eq!(query_parser::double_literal_impl("1.0").unwrap(), 1.0); diff --git a/oximeter/db/src/oxql/ast/table_ops/align.rs b/oximeter/db/src/oxql/ast/table_ops/align.rs index cf54ebc3125..b0cd7d80f12 100644 --- a/oximeter/db/src/oxql/ast/table_ops/align.rs +++ b/oximeter/db/src/oxql/ast/table_ops/align.rs @@ -6,19 +6,19 @@ // Copyright 2024 Oxide Computer Company -use crate::oxql::point::DataType; -use crate::oxql::point::MetricType; -use crate::oxql::point::Points; -use crate::oxql::point::ValueArray; -use crate::oxql::point::Values; -use crate::oxql::query::Alignment; -use crate::oxql::Error; -use crate::oxql::Table; -use crate::oxql::Timeseries; use anyhow::Context; +use anyhow::Error; use chrono::DateTime; use chrono::TimeDelta; use chrono::Utc; +use oxql_types::point::DataType; +use oxql_types::point::MetricType; +use oxql_types::point::Points; +use oxql_types::point::ValueArray; +use oxql_types::point::Values; +use oxql_types::Alignment; +use oxql_types::Table; +use oxql_types::Timeseries; use std::time::Duration; // The maximum factor by which an alignment operation may upsample data. @@ -144,7 +144,7 @@ fn align_mean_within( "Alignment by mean requires a gauge or delta metric, not {}", metric_type, ); - verify_max_upsampling_ratio(&points.timestamps, &period)?; + verify_max_upsampling_ratio(points.timestamps(), &period)?; // Always convert the output to doubles, when computing the mean. The // output is always a gauge, so we do not need the start times of the @@ -179,7 +179,7 @@ fn align_mean_within( // - Compute the mean of those. let period_ = TimeDelta::from_std(*period).context("time delta out of range")?; - let first_timestamp = points.timestamps[0]; + let first_timestamp = points.timestamps()[0]; let mut ix: u32 = 0; loop { // Compute the next output timestamp, by shifting the query end time @@ -220,15 +220,15 @@ fn align_mean_within( // entries. let output_value = if matches!(metric_type, MetricType::Gauge) { mean_gauge_value_in_window( - &points.timestamps, + points.timestamps(), &input_points, window_start, output_time, ) } else { mean_delta_value_in_window( - points.start_times.as_ref().unwrap(), - &points.timestamps, + points.start_times().unwrap(), + points.timestamps(), &input_points, window_start, output_time, @@ -255,10 +255,9 @@ fn align_mean_within( ValueArray::Double(output_values.into_iter().rev().collect()); let timestamps = output_timestamps.into_iter().rev().collect(); let values = Values { values, metric_type: MetricType::Gauge }; - new_timeseries.points = - Points { start_times: None, timestamps, values: vec![values] }; - new_timeseries.alignment = - Some(Alignment { end_time: *query_end, period: *period }); + new_timeseries.points = Points::new(None, timestamps, vec![values]); + new_timeseries + .set_alignment(Alignment { end_time: *query_end, period: *period }); output_table.insert(new_timeseries).unwrap(); } Ok(output_table) diff --git a/oximeter/db/src/oxql/ast/table_ops/filter.rs b/oximeter/db/src/oxql/ast/table_ops/filter.rs index b6fc533e4d8..ad398da9831 100644 --- a/oximeter/db/src/oxql/ast/table_ops/filter.rs +++ b/oximeter/db/src/oxql/ast/table_ops/filter.rs @@ -12,18 +12,18 @@ use crate::oxql::ast::literal::Literal; use crate::oxql::ast::logical_op::LogicalOp; use crate::oxql::ast::table_ops::limit::Limit; use crate::oxql::ast::table_ops::limit::LimitKind; -use crate::oxql::point::DataType; -use crate::oxql::point::MetricType; -use crate::oxql::point::Points; -use crate::oxql::point::ValueArray; use crate::oxql::Error; -use crate::oxql::Table; -use crate::oxql::Timeseries; use crate::shells::special_idents; use chrono::DateTime; use chrono::Utc; use oximeter::FieldType; use oximeter::FieldValue; +use oxql_types::point::DataType; +use oxql_types::point::MetricType; +use oxql_types::point::Points; +use oxql_types::point::ValueArray; +use oxql_types::Table; +use oxql_types::Timeseries; use regex::Regex; use std::collections::BTreeSet; use std::fmt; @@ -340,16 +340,13 @@ impl Filter { // Apply the filter to the data points as well. let points = self.filter_points(&input.points)?; - // Similar to above, if the filter removes all data points in - // the timeseries, let's remove the timeseries altogether. - if points.is_empty() { - continue; + if let Some(new_timeseries) = input.copy_with_points(points) { + timeseries.push(new_timeseries); + } else { + // None means that the filter removed all data points in + // the timeseries. In that case, we remove the timeseries + // altogether. } - timeseries.push(Timeseries { - fields: input.fields.clone(), - points, - alignment: input.alignment, - }) } output_tables.push(Table::from_timeseries( table.name(), @@ -823,7 +820,7 @@ impl SimpleFilter { ) -> Result, Error> { let ident = self.ident.as_str(); if ident == "timestamp" { - self.filter_points_by_timestamp(negated, &points.timestamps) + self.filter_points_by_timestamp(negated, points.timestamps()) } else if ident == "datum" { anyhow::ensure!( points.dimensionality() == 1, @@ -1151,15 +1148,15 @@ impl SimpleFilter { mod tests { use crate::oxql::ast::grammar::query_parser; use crate::oxql::ast::logical_op::LogicalOp; - use crate::oxql::point::DataType; - use crate::oxql::point::MetricType; - use crate::oxql::point::Points; - use crate::oxql::point::ValueArray; - use crate::oxql::point::Values; - use crate::oxql::Table; - use crate::oxql::Timeseries; use chrono::Utc; use oximeter::FieldValue; + use oxql_types::point::DataType; + use oxql_types::point::MetricType; + use oxql_types::point::Points; + use oxql_types::point::ValueArray; + use oxql_types::point::Values; + use oxql_types::Table; + use oxql_types::Timeseries; use std::time::Duration; use uuid::Uuid; @@ -1172,7 +1169,7 @@ mod tests { values: ValueArray::Double(vec![Some(0.0), Some(2.0)]), metric_type: MetricType::Gauge, }]; - let points = Points { start_times, timestamps, values }; + let points = Points::new(start_times, timestamps, values); // This filter should remove the first point based on its timestamp. let t = Utc::now() + Duration::from_secs(10); @@ -1205,7 +1202,7 @@ mod tests { values: ValueArray::Double(vec![Some(0.0), Some(2.0)]), metric_type: MetricType::Gauge, }]; - let points = Points { start_times, timestamps, values }; + let points = Points::new(start_times, timestamps, values); let filter = query_parser::filter("filter datum < \"something\"").unwrap(); diff --git a/oximeter/db/src/oxql/ast/table_ops/group_by.rs b/oximeter/db/src/oxql/ast/table_ops/group_by.rs index f40572d762c..c48804a788e 100644 --- a/oximeter/db/src/oxql/ast/table_ops/group_by.rs +++ b/oximeter/db/src/oxql/ast/table_ops/group_by.rs @@ -10,13 +10,13 @@ use chrono::DateTime; use chrono::Utc; use crate::oxql::ast::ident::Ident; -use crate::oxql::point::DataType; -use crate::oxql::point::MetricType; -use crate::oxql::point::ValueArray; -use crate::oxql::Error; -use crate::oxql::Table; -use crate::oxql::Timeseries; -use crate::TimeseriesKey; +use anyhow::Error; +use oximeter::schema::TimeseriesKey; +use oxql_types::point::DataType; +use oxql_types::point::MetricType; +use oxql_types::point::ValueArray; +use oxql_types::Table; +use oxql_types::Timeseries; use std::collections::btree_map::Entry; use std::collections::BTreeMap; @@ -98,7 +98,7 @@ impl GroupBy { ValueArray::Double(new_values), ValueArray::Double(existing_values), ) => { - let new_timestamps = &dropped.points.timestamps; + let new_timestamps = dropped.points.timestamps(); // We will be merging the new data with the // existing, but borrow-checking limits the degree @@ -106,7 +106,7 @@ impl GroupBy { // entry in the output table. Instead, aggregate // everything into a copy of the expected data. let mut timestamps = - existing.points.timestamps.clone(); + existing.points.timestamps().to_owned(); let mut values = existing_values.clone(); // Merge in the new values, so long as they actually @@ -152,10 +152,7 @@ impl GroupBy { // Replace the existing output timeseries's // timestamps and data arrays. - std::mem::swap( - &mut existing.points.timestamps, - &mut timestamps, - ); + existing.points.set_timestamps(timestamps); existing .points .values_mut(0) @@ -166,7 +163,7 @@ impl GroupBy { ValueArray::Integer(new_values), ValueArray::Integer(existing_values), ) => { - let new_timestamps = &dropped.points.timestamps; + let new_timestamps = dropped.points.timestamps(); // We will be merging the new data with the // existing, but borrow-checking limits the degree @@ -174,7 +171,7 @@ impl GroupBy { // entry in the output table. Instead, aggregate // everything into a copy of the expected data. let mut timestamps = - existing.points.timestamps.clone(); + existing.points.timestamps().to_owned(); let mut values = existing_values.clone(); // Merge in the new values, so long as they actually @@ -220,10 +217,7 @@ impl GroupBy { // Replace the existing output timeseries's // timestamps and data arrays. - std::mem::swap( - &mut existing.points.timestamps, - &mut timestamps, - ); + existing.points.set_timestamps(timestamps); existing .points .values_mut(0) @@ -286,14 +280,15 @@ impl GroupBy { else { unreachable!(); }; - let new_timestamps = &new_points.timestamps; + let new_timestamps = new_points.timestamps(); // We will be merging the new data with the // existing, but borrow-checking limits the degree // to which we can easily do this on the `existing` // entry in the output table. Instead, aggregate // everything into a copy of the expected data. - let mut timestamps = existing.points.timestamps.clone(); + let mut timestamps = + existing.points.timestamps().to_owned(); let mut values = existing .points .values(0) @@ -360,10 +355,7 @@ impl GroupBy { // Replace the existing output timeseries's // timestamps and data arrays. - std::mem::swap( - &mut existing.points.timestamps, - &mut timestamps, - ); + existing.points.set_timestamps(timestamps); existing .points .values_mut(0) @@ -388,7 +380,7 @@ impl GroupBy { // _zero_ for any where the values are none. let counts = new_timeseries .points - .timestamps + .timestamps() .iter() .zip(values) .map(|(timestamp, maybe_value)| { @@ -434,16 +426,16 @@ pub enum Reducer { #[cfg(test)] mod tests { use super::{GroupBy, Reducer}; - use crate::oxql::{ - ast::{ - ident::Ident, - table_ops::align::{Align, AlignmentMethod}, - }, - point::{DataType, MetricType, ValueArray}, - Table, Timeseries, + use crate::oxql::ast::{ + ident::Ident, + table_ops::align::{Align, AlignmentMethod}, }; use chrono::{DateTime, Utc}; use oximeter::FieldValue; + use oxql_types::{ + point::{DataType, MetricType, ValueArray}, + Table, Timeseries, + }; use std::{collections::BTreeMap, time::Duration}; // Which timeseries the second data point is missing from. @@ -495,8 +487,8 @@ mod tests { MetricType::Gauge, ) .unwrap(); - ts0.points.start_times = None; - ts0.points.timestamps.clone_from(×tamps); + ts0.points.clear_start_times(); + ts0.points.set_timestamps(timestamps.clone()); *ts0.points.values_mut(0).unwrap() = ValueArray::Double(vec![ Some(1.0), if matches!( @@ -527,7 +519,7 @@ mod tests { MetricType::Gauge, ) .unwrap(); - ts1.points.start_times = None; + ts1.points.clear_start_times(); // Non-overlapping in this test setup means that we just shift one // value from this array backward in time by one additional second. @@ -538,7 +530,7 @@ mod tests { // // When reducing, t0 is never changed, and t1-t2 are always reduced // together, if the values are present. - ts1.points.timestamps = if cfg.overlapping_times { + let new_timestamps = if cfg.overlapping_times { timestamps.clone() } else { let mut new_timestamps = timestamps.clone(); @@ -546,6 +538,7 @@ mod tests { timestamps.insert(0, new_timestamps[0]); new_timestamps }; + ts1.points.set_timestamps(new_timestamps); *ts1.points.values_mut(0).unwrap() = ValueArray::Double(vec![ Some(2.0), if matches!(cfg.missing_value, MissingValue::Both) { @@ -604,11 +597,13 @@ mod tests { let points = &grouped_timeseries.points; assert_eq!(points.dimensionality(), 1, "Points should still be 1D"); assert_eq!( - points.start_times, None, + points.start_times(), + None, "Points should not have start times" ); assert_eq!( - points.timestamps, test.timestamps, + points.timestamps(), + test.timestamps, "Points do not have correct timestamps" ); diff --git a/oximeter/db/src/oxql/ast/table_ops/join.rs b/oximeter/db/src/oxql/ast/table_ops/join.rs index 3c150a4acf9..2893f6cf3e0 100644 --- a/oximeter/db/src/oxql/ast/table_ops/join.rs +++ b/oximeter/db/src/oxql/ast/table_ops/join.rs @@ -6,12 +6,10 @@ // Copyright 2024 Oxide Computer Company -use crate::oxql::point::MetricType; -use crate::oxql::point::Points; -use crate::oxql::point::Values; -use crate::oxql::Error; -use crate::oxql::Table; use anyhow::Context; +use anyhow::Error; +use oxql_types::point::MetricType; +use oxql_types::Table; /// An AST node for a natural inner join. #[derive(Clone, Copy, Debug, PartialEq)] @@ -80,10 +78,8 @@ impl Join { // 1. They have the same alignment, and // 2. We merge the timepoints rather than simply creating a // ragged array of points. - timeseries.points = inner_join_point_arrays( - ×eries.points, - &next_timeseries.points, - )?; + timeseries.points = + timeseries.points.inner_join(&next_timeseries.points)?; } // We'll also update the name, to indicate the joined data. out.name.push(','); @@ -93,101 +89,6 @@ impl Join { } } -// Given two arrays of points, stack them together at matching timepoints. -// -// For time points in either which do not have a corresponding point in the -// other, the entire time point is elided. -fn inner_join_point_arrays( - left: &Points, - right: &Points, -) -> Result { - // Create an output array with roughly the right capacity, and double the - // number of dimensions. We're trying to stack output value arrays together - // along the dimension axis. - let data_types = - left.data_types().chain(right.data_types()).collect::>(); - let metric_types = - left.metric_types().chain(right.metric_types()).collect::>(); - let mut out = Points::with_capacity( - left.len().max(right.len()), - data_types.iter().copied(), - metric_types.iter().copied(), - )?; - - // Iterate through each array until one is exhausted. We're only inserting - // values from both arrays where the timestamps actually match, since this - // is an inner join. We may want to insert missing values where timestamps - // do not match on either side, when we support an outer join of some kind. - let n_left_dim = left.values.len(); - let mut left_ix = 0; - let mut right_ix = 0; - while left_ix < left.len() && right_ix < right.len() { - let left_timestamp = left.timestamps[left_ix]; - let right_timestamp = right.timestamps[right_ix]; - if left_timestamp == right_timestamp { - out.timestamps.push(left_timestamp); - push_concrete_values( - &mut out.values[..n_left_dim], - &left.values, - left_ix, - ); - push_concrete_values( - &mut out.values[n_left_dim..], - &right.values, - right_ix, - ); - left_ix += 1; - right_ix += 1; - } else if left_timestamp < right_timestamp { - left_ix += 1; - } else { - right_ix += 1; - } - } - Ok(out) -} - -// Push the `i`th value from each dimension of `from` onto `to`. -fn push_concrete_values(to: &mut [Values], from: &[Values], i: usize) { - assert_eq!(to.len(), from.len()); - for (output, input) in to.iter_mut().zip(from.iter()) { - let input_array = &input.values; - let output_array = &mut output.values; - assert_eq!(input_array.data_type(), output_array.data_type()); - if let Ok(ints) = input_array.as_integer() { - output_array.as_integer_mut().unwrap().push(ints[i]); - continue; - } - if let Ok(doubles) = input_array.as_double() { - output_array.as_double_mut().unwrap().push(doubles[i]); - continue; - } - if let Ok(bools) = input_array.as_boolean() { - output_array.as_boolean_mut().unwrap().push(bools[i]); - continue; - } - if let Ok(strings) = input_array.as_string() { - output_array.as_string_mut().unwrap().push(strings[i].clone()); - continue; - } - if let Ok(dists) = input_array.as_integer_distribution() { - output_array - .as_integer_distribution_mut() - .unwrap() - .push(dists[i].clone()); - continue; - } - if let Ok(dists) = input_array.as_double_distribution() { - output_array - .as_double_distribution_mut() - .unwrap() - .push(dists[i].clone()); - continue; - } - unreachable!(); - } -} - // Return an error if any metric types are not suitable for joining. fn ensure_all_metric_types( mut metric_types: impl ExactSizeIterator, @@ -200,186 +101,3 @@ fn ensure_all_metric_types( ); Ok(()) } - -#[cfg(test)] -mod tests { - use super::*; - use crate::oxql::point::DataType; - use crate::oxql::point::Datum; - use crate::oxql::point::ValueArray; - use chrono::Utc; - use std::time::Duration; - - #[test] - fn test_push_concrete_values() { - let mut points = Points::with_capacity( - 2, - [DataType::Integer, DataType::Double].into_iter(), - [MetricType::Gauge, MetricType::Gauge].into_iter(), - ) - .unwrap(); - - // Push a concrete value for the integer dimension - let from_ints = vec![Values { - values: ValueArray::Integer(vec![Some(1)]), - metric_type: MetricType::Gauge, - }]; - push_concrete_values(&mut points.values[..1], &from_ints, 0); - - // And another for the double dimension. - let from_doubles = vec![Values { - values: ValueArray::Double(vec![Some(2.0)]), - metric_type: MetricType::Gauge, - }]; - push_concrete_values(&mut points.values[1..], &from_doubles, 0); - - assert_eq!( - points.dimensionality(), - 2, - "Points should have 2 dimensions", - ); - let ints = points.values[0].values.as_integer().unwrap(); - assert_eq!( - ints.len(), - 1, - "Should have pushed one point in the first dimension" - ); - assert_eq!( - ints[0], - Some(1), - "Should have pushed 1 onto the first dimension" - ); - let doubles = points.values[1].values.as_double().unwrap(); - assert_eq!( - doubles.len(), - 1, - "Should have pushed one point in the second dimension" - ); - assert_eq!( - doubles[0], - Some(2.0), - "Should have pushed 2.0 onto the second dimension" - ); - } - - #[test] - fn test_join_point_arrays() { - let now = Utc::now(); - - // Create a set of integer points to join with. - // - // This will have two timestamps, one of which will match the points - // below that are merged in. - let int_points = Points { - start_times: None, - timestamps: vec![ - now - Duration::from_secs(3), - now - Duration::from_secs(2), - now, - ], - values: vec![Values { - values: ValueArray::Integer(vec![Some(1), Some(2), Some(3)]), - metric_type: MetricType::Gauge, - }], - }; - - // Create an additional set of double points. - // - // This also has two timepoints, one of which matches with the above, - // and one of which does not. - let double_points = Points { - start_times: None, - timestamps: vec![ - now - Duration::from_secs(3), - now - Duration::from_secs(1), - now, - ], - values: vec![Values { - values: ValueArray::Double(vec![ - Some(4.0), - Some(5.0), - Some(6.0), - ]), - metric_type: MetricType::Gauge, - }], - }; - - // Merge the arrays. - let merged = - inner_join_point_arrays(&int_points, &double_points).unwrap(); - - // Basic checks that we merged in the right values and have the right - // types and dimensions. - assert_eq!( - merged.dimensionality(), - 2, - "Should have appended the dimensions from each input array" - ); - assert_eq!(merged.len(), 2, "Should have merged two common points",); - assert_eq!( - merged.data_types().collect::>(), - &[DataType::Integer, DataType::Double], - "Should have combined the data types of the input arrays" - ); - assert_eq!( - merged.metric_types().collect::>(), - &[MetricType::Gauge, MetricType::Gauge], - "Should have combined the metric types of the input arrays" - ); - - // Check the actual values of the array. - let mut points = merged.iter_points(); - - // The first and last timepoint overlapped between the two arrays, so we - // should have both of them as concrete samples. - let pt = points.next().unwrap(); - assert_eq!(pt.start_time, None, "Gauges don't have a start time"); - assert_eq!( - *pt.timestamp, int_points.timestamps[0], - "Should have taken the first input timestamp from both arrays", - ); - assert_eq!( - *pt.timestamp, double_points.timestamps[0], - "Should have taken the first input timestamp from both arrays", - ); - let values = pt.values; - assert_eq!(values.len(), 2, "Should have 2 dimensions"); - assert_eq!( - &values[0], - &(Datum::Integer(Some(&1)), MetricType::Gauge), - "Should have pulled value from first integer array." - ); - assert_eq!( - &values[1], - &(Datum::Double(Some(&4.0)), MetricType::Gauge), - "Should have pulled value from second double array." - ); - - // And the next point - let pt = points.next().unwrap(); - assert_eq!(pt.start_time, None, "Gauges don't have a start time"); - assert_eq!( - *pt.timestamp, int_points.timestamps[2], - "Should have taken the input timestamp from both arrays", - ); - assert_eq!( - *pt.timestamp, double_points.timestamps[2], - "Should have taken the input timestamp from both arrays", - ); - let values = pt.values; - assert_eq!(values.len(), 2, "Should have 2 dimensions"); - assert_eq!( - &values[0], - &(Datum::Integer(Some(&3)), MetricType::Gauge), - "Should have pulled value from first integer array." - ); - assert_eq!( - &values[1], - &(Datum::Double(Some(&6.0)), MetricType::Gauge), - "Should have pulled value from second double array." - ); - - // And there should be no other values. - assert!(points.next().is_none(), "There should be no more points"); - } -} diff --git a/oximeter/db/src/oxql/ast/table_ops/limit.rs b/oximeter/db/src/oxql/ast/table_ops/limit.rs index 0205868f5c9..89afb31a7ce 100644 --- a/oximeter/db/src/oxql/ast/table_ops/limit.rs +++ b/oximeter/db/src/oxql/ast/table_ops/limit.rs @@ -6,12 +6,8 @@ // Copyright 2024 Oxide Computer Company -use crate::oxql::point::Points; -use crate::oxql::point::ValueArray; -use crate::oxql::point::Values; -use crate::oxql::Error; -use crate::oxql::Table; -use crate::oxql::Timeseries; +use anyhow::Error; +use oxql_types::Table; use std::num::NonZeroUsize; /// The kind of limiting operation @@ -65,58 +61,7 @@ impl Limit { } }; - // Slice the various data arrays. - let start_times = input_points - .start_times - .as_ref() - .map(|s| s[start..end].to_vec()); - let timestamps = - input_points.timestamps[start..end].to_vec(); - let values = input_points - .values - .iter() - .map(|vals| { - let values = match &vals.values { - ValueArray::Integer(inner) => { - ValueArray::Integer( - inner[start..end].to_vec(), - ) - } - ValueArray::Double(inner) => { - ValueArray::Double( - inner[start..end].to_vec(), - ) - } - ValueArray::Boolean(inner) => { - ValueArray::Boolean( - inner[start..end].to_vec(), - ) - } - ValueArray::String(inner) => { - ValueArray::String( - inner[start..end].to_vec(), - ) - } - ValueArray::IntegerDistribution(inner) => { - ValueArray::IntegerDistribution( - inner[start..end].to_vec(), - ) - } - ValueArray::DoubleDistribution(inner) => { - ValueArray::DoubleDistribution( - inner[start..end].to_vec(), - ) - } - }; - Values { values, metric_type: vals.metric_type } - }) - .collect(); - let points = Points { start_times, timestamps, values }; - Timeseries { - fields: timeseries.fields.clone(), - points, - alignment: timeseries.alignment, - } + timeseries.limit(start, end) }); Table::from_timeseries(table.name(), timeseries) }) @@ -127,9 +72,12 @@ impl Limit { #[cfg(test)] mod tests { use super::*; - use crate::oxql::point::{DataType, MetricType}; use chrono::Utc; use oximeter::FieldValue; + use oxql_types::{ + point::{DataType, MetricType}, + Timeseries, + }; use std::{collections::BTreeMap, time::Duration}; fn test_tables() -> Vec { @@ -150,12 +98,14 @@ mod tests { MetricType::Gauge, ) .unwrap(); - timeseries.points.timestamps.clone_from(×tamps); - timeseries.points.values[0].values.as_integer_mut().unwrap().extend([ - Some(1), - Some(2), - Some(3), - ]); + timeseries.points.set_timestamps(timestamps.clone()); + timeseries + .points + .values_mut(0) + .unwrap() + .as_integer_mut() + .unwrap() + .extend([Some(1), Some(2), Some(3)]); let table1 = Table::from_timeseries("first", std::iter::once(timeseries)) .unwrap(); @@ -166,12 +116,14 @@ mod tests { MetricType::Gauge, ) .unwrap(); - timeseries.points.timestamps.clone_from(×tamps); - timeseries.points.values[0].values.as_integer_mut().unwrap().extend([ - Some(4), - Some(5), - Some(6), - ]); + timeseries.points.set_timestamps(timestamps.clone()); + timeseries + .points + .values_mut(0) + .unwrap() + .as_integer_mut() + .unwrap() + .extend([Some(4), Some(5), Some(6)]); let table2 = Table::from_timeseries("second", std::iter::once(timeseries)) .unwrap(); @@ -223,7 +175,8 @@ mod tests { "Limited table should have the same fields" ); assert_eq!( - timeseries.alignment, limited_timeseries.alignment, + timeseries.alignment(), + limited_timeseries.alignment(), "Limited timeseries should have the same alignment" ); assert_eq!( @@ -237,14 +190,15 @@ mod tests { // These depend on the limit operation. let points = ×eries.points; let limited_points = &limited_timeseries.points; - assert_eq!(points.start_times, limited_points.start_times); + assert_eq!(points.start_times(), limited_points.start_times()); assert_eq!( - points.timestamps[start..end], - limited_points.timestamps + &points.timestamps()[start..end], + limited_points.timestamps() ); assert_eq!( - limited_points.values[0].values.as_integer().unwrap(), - &points.values[0].values.as_integer().unwrap()[start..end], + limited_points.values(0).unwrap().as_integer().unwrap(), + &points.values(0).unwrap().as_integer().unwrap() + [start..end], "Points should be limited to [{start}..{end}]", ); } diff --git a/oximeter/db/src/oxql/ast/table_ops/mod.rs b/oximeter/db/src/oxql/ast/table_ops/mod.rs index 46f5106a089..8b8d4cbe1b5 100644 --- a/oximeter/db/src/oxql/ast/table_ops/mod.rs +++ b/oximeter/db/src/oxql/ast/table_ops/mod.rs @@ -20,10 +20,10 @@ use self::join::Join; use self::limit::Limit; use crate::oxql::ast::Query; use crate::oxql::Error; -use crate::oxql::Table; use chrono::DateTime; use chrono::Utc; use oximeter::TimeseriesName; +use oxql_types::Table; /// A basic table operation, the atoms of an OxQL query. #[derive(Clone, Debug, PartialEq)] diff --git a/oximeter/db/src/oxql/mod.rs b/oximeter/db/src/oxql/mod.rs index 3961fae1cce..fcdfb783c5b 100644 --- a/oximeter/db/src/oxql/mod.rs +++ b/oximeter/db/src/oxql/mod.rs @@ -10,13 +10,9 @@ use peg::error::ParseError as PegError; use peg::str::LineCol; pub mod ast; -pub mod point; pub mod query; -pub mod table; pub use self::query::Query; -pub use self::table::Table; -pub use self::table::Timeseries; pub use anyhow::Error; /// Format a PEG parsing error into a nice anyhow error. diff --git a/oximeter/db/src/oxql/query/mod.rs b/oximeter/db/src/oxql/query/mod.rs index e1fada9f2aa..46c9bbc92c8 100644 --- a/oximeter/db/src/oxql/query/mod.rs +++ b/oximeter/db/src/oxql/query/mod.rs @@ -23,7 +23,6 @@ use crate::oxql::Error; use crate::TimeseriesName; use chrono::DateTime; use chrono::Utc; -use std::time::Duration; /// A parsed OxQL query. #[derive(Clone, Debug, PartialEq)] @@ -391,15 +390,6 @@ fn restrict_filter_idents( } } -/// Describes the time alignment for an OxQL query. -#[derive(Clone, Copy, Debug, PartialEq)] -pub struct Alignment { - /// The end time of the query, which the temporal reference point. - pub end_time: DateTime, - /// The alignment period, the interval on which values are produced. - pub period: Duration, -} - #[cfg(test)] mod tests { use super::Filter; diff --git a/oximeter/db/src/query.rs b/oximeter/db/src/query.rs index ceabf008883..556ced04375 100644 --- a/oximeter/db/src/query.rs +++ b/oximeter/db/src/query.rs @@ -6,11 +6,12 @@ // Copyright 2021 Oxide Computer Company use crate::{ - Error, FieldSchema, FieldSource, TimeseriesKey, TimeseriesSchema, - DATABASE_NAME, DATABASE_SELECT_FORMAT, + Error, FieldSchema, FieldSource, TimeseriesSchema, DATABASE_NAME, + DATABASE_SELECT_FORMAT, }; use chrono::{DateTime, Utc}; use dropshot::PaginationOrder; +use oximeter::schema::TimeseriesKey; use oximeter::types::{DatumType, FieldType, FieldValue}; use oximeter::{Metric, Target}; use regex::Regex; diff --git a/oximeter/db/src/shells/oxql.rs b/oximeter/db/src/shells/oxql.rs index 0f23ea7d64a..f46d08c0cfc 100644 --- a/oximeter/db/src/shells/oxql.rs +++ b/oximeter/db/src/shells/oxql.rs @@ -7,9 +7,10 @@ // Copyright 2024 Oxide Computer use super::{list_timeseries, prepare_columns}; -use crate::{make_client, oxql::Table, Client, OxqlResult}; +use crate::{make_client, Client, OxqlResult}; use clap::Args; use crossterm::style::Stylize; +use oxql_types::Table; use reedline::DefaultPrompt; use reedline::DefaultPromptSegment; use reedline::Reedline; diff --git a/oximeter/db/tests/integration_test.rs b/oximeter/db/tests/integration_test.rs index 732683c414e..f5d81d51d18 100644 --- a/oximeter/db/tests/integration_test.rs +++ b/oximeter/db/tests/integration_test.rs @@ -10,7 +10,6 @@ use clickward::{ use dropshot::test_util::log_prefix_for_test; use omicron_test_utils::dev::poll; use omicron_test_utils::dev::test_setup_log; -use oximeter::test_util; use oximeter_db::{Client, DbWrite, OxqlResult, Sample, TestDbWrite}; use slog::{debug, info, Logger}; use std::collections::BTreeSet; @@ -199,7 +198,7 @@ async fn test_cluster() -> anyhow::Result<()> { // Let's write some samples to our first replica and wait for them to show // up on replica 2. let start = tokio::time::Instant::now(); - let samples = test_util::generate_test_samples( + let samples = oximeter_test_utils::generate_test_samples( input.n_projects, input.n_instances, input.n_cpus, @@ -261,7 +260,7 @@ async fn test_cluster() -> anyhow::Result<()> { info!(log, "successfully stopped server 1"); // Generate some new samples and insert them at replica3 - let samples = test_util::generate_test_samples( + let samples = oximeter_test_utils::generate_test_samples( input.n_projects, input.n_instances, input.n_cpus, @@ -298,7 +297,7 @@ async fn test_cluster() -> anyhow::Result<()> { .expect("failed to get samples from client1"); // We still have a quorum (2 of 3 keepers), so we should be able to insert - let samples = test_util::generate_test_samples( + let samples = oximeter_test_utils::generate_test_samples( input.n_projects, input.n_instances, input.n_cpus, @@ -321,7 +320,7 @@ async fn test_cluster() -> anyhow::Result<()> { .expect("failed to get samples from client1"); info!(log, "Attempting to insert samples without keeper quorum"); - let samples = test_util::generate_test_samples( + let samples = oximeter_test_utils::generate_test_samples( input.n_projects, input.n_instances, input.n_cpus, @@ -350,7 +349,7 @@ async fn test_cluster() -> anyhow::Result<()> { ) .await .expect("failed to sync keepers"); - let samples = test_util::generate_test_samples( + let samples = oximeter_test_utils::generate_test_samples( input.n_projects, input.n_instances, input.n_cpus, @@ -370,7 +369,7 @@ async fn test_cluster() -> anyhow::Result<()> { ) .await .expect("failed to sync keepers"); - let samples = test_util::generate_test_samples( + let samples = oximeter_test_utils::generate_test_samples( input.n_projects, input.n_instances, input.n_cpus, diff --git a/oximeter/impl/src/test_util.rs b/oximeter/impl/src/test_util.rs deleted file mode 100644 index c2ac7b34bdf..00000000000 --- a/oximeter/impl/src/test_util.rs +++ /dev/null @@ -1,130 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Utilities for testing the oximeter crate. -// Copyright 2024 Oxide Computer Company - -use crate::histogram; -use crate::histogram::{Histogram, Record}; -use crate::types::{Cumulative, Sample}; -use uuid::Uuid; - -#[derive(oximeter::Target)] -pub struct TestTarget { - pub name1: String, - pub name2: String, - pub num: i64, -} - -impl Default for TestTarget { - fn default() -> Self { - TestTarget { - name1: "first_name".into(), - name2: "second_name".into(), - num: 0, - } - } -} - -#[derive(oximeter::Metric)] -pub struct TestMetric { - pub id: Uuid, - pub good: bool, - pub datum: i64, -} - -#[derive(oximeter::Metric)] -pub struct TestCumulativeMetric { - pub id: Uuid, - pub good: bool, - pub datum: Cumulative, -} - -#[derive(oximeter::Metric)] -pub struct TestHistogram { - pub id: Uuid, - pub good: bool, - pub datum: Histogram, -} - -const ID: Uuid = uuid::uuid!("e00ced4d-39d1-446a-ae85-a67f05c9750b"); - -pub fn make_sample() -> Sample { - let target = TestTarget::default(); - let metric = TestMetric { id: ID, good: true, datum: 1 }; - Sample::new(&target, &metric).unwrap() -} - -pub fn make_missing_sample() -> Sample { - let target = TestTarget::default(); - let metric = TestMetric { id: ID, good: true, datum: 1 }; - Sample::new_missing(&target, &metric).unwrap() -} - -pub fn make_hist_sample() -> Sample { - let target = TestTarget::default(); - let mut hist = histogram::Histogram::new(&[0.0, 5.0, 10.0]).unwrap(); - hist.sample(1.0).unwrap(); - hist.sample(2.0).unwrap(); - hist.sample(6.0).unwrap(); - let metric = TestHistogram { id: ID, good: true, datum: hist }; - Sample::new(&target, &metric).unwrap() -} - -/// A target identifying a single virtual machine instance -#[derive(Debug, Clone, Copy, oximeter::Target)] -pub struct VirtualMachine { - pub project_id: Uuid, - pub instance_id: Uuid, -} - -/// A metric recording the total time a vCPU is busy, by its ID -#[derive(Debug, Clone, Copy, oximeter::Metric)] -pub struct CpuBusy { - cpu_id: i64, - datum: Cumulative, -} - -pub fn generate_test_samples( - n_projects: usize, - n_instances: usize, - n_cpus: usize, - n_samples: usize, -) -> Vec { - let n_timeseries = n_projects * n_instances * n_cpus; - let mut samples = Vec::with_capacity(n_samples * n_timeseries); - for _ in 0..n_projects { - let project_id = Uuid::new_v4(); - for _ in 0..n_instances { - let vm = VirtualMachine { project_id, instance_id: Uuid::new_v4() }; - for cpu in 0..n_cpus { - for sample in 0..n_samples { - let cpu_busy = CpuBusy { - cpu_id: cpu as _, - datum: Cumulative::new(sample as f64), - }; - let sample = Sample::new(&vm, &cpu_busy).unwrap(); - samples.push(sample); - } - } - } - } - samples -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_gen_test_samples() { - let (n_projects, n_instances, n_cpus, n_samples) = (2, 2, 2, 2); - let samples = - generate_test_samples(n_projects, n_instances, n_cpus, n_samples); - assert_eq!( - samples.len(), - n_projects * n_instances * n_cpus * n_samples - ); - } -} diff --git a/oximeter/instruments/src/http.rs b/oximeter/instruments/src/http.rs index 6a0a35ce634..efd053ad66e 100644 --- a/oximeter/instruments/src/http.rs +++ b/oximeter/instruments/src/http.rs @@ -6,17 +6,14 @@ // Copyright 2024 Oxide Computer Company -use dropshot::{ - HttpError, HttpResponse, RequestContext, RequestInfo, ServerContext, -}; +use dropshot::{HttpError, HttpResponse, RequestContext, ServerContext}; use futures::Future; use http::StatusCode; -use http::Uri; use oximeter::{ histogram::Histogram, histogram::Record, MetricsError, Producer, Sample, }; -use std::borrow::Cow; -use std::collections::BTreeMap; +use std::collections::HashMap; +use std::hash::{DefaultHasher, Hash as _, Hasher}; use std::sync::{Arc, Mutex}; use std::time::{Duration, Instant}; @@ -24,65 +21,59 @@ oximeter::use_timeseries!("http-service.toml"); pub use http_service::HttpService; pub use http_service::RequestLatencyHistogram; -// Return the route portion of the request, normalized to include a single -// leading slash and no trailing slashes. -fn normalized_uri_path(uri: &Uri) -> Cow<'static, str> { - Cow::Owned(format!( - "/{}", - uri.path().trim_end_matches('/').trim_start_matches('/') - )) -} - impl RequestLatencyHistogram { /// Build a new `RequestLatencyHistogram` with a specified histogram. /// - /// Latencies are expressed in seconds. + /// Latencies are expressed in nanoseconds. pub fn new( - request: &RequestInfo, + operation_id: &str, status_code: StatusCode, - histogram: Histogram, + histogram: Histogram, ) -> Self { Self { - route: normalized_uri_path(request.uri()), - method: request.method().to_string().into(), - status_code: status_code.as_u16().into(), + operation_id: operation_id.to_string().into(), + status_code: status_code.as_u16(), datum: histogram, } } - /// Build a `RequestLatencyHistogram` with a histogram whose bins span the given decades. + /// Build a histogram whose bins span the given powers of ten. /// - /// `start_decade` and `end_decade` specify the lower and upper limits of the histogram's - /// range, as a power of 10. For example, passing `-3` and `2` results in a histogram with bins - /// spanning `[10 ** -3, 10 ** 2)`. There are 10 bins in each decade. See the - /// [`Histogram::span_decades`] method for more details. + /// `start_power` and `end_power` specify the lower and upper limits of + /// the histogram's range, as powers of 10. For example, passing 2 and 4 + /// results in a histogram with bins from `[10 ** 2, 10 ** 4)`. There are 10 + /// bins in each power of 10. /// - /// Latencies are expressed as seconds. - pub fn with_latency_decades( - request: &RequestInfo, + /// See the [`Histogram::span_decades`] method for more details. + /// + /// Latencies are expressed in nanoseconds. + pub fn with_log_linear_bins( + operation_id: &str, status_code: StatusCode, - start_decade: i16, - end_decade: i16, + start_power: u16, + end_power: u16, ) -> Result { Ok(Self::new( - request, + operation_id, status_code, - Histogram::span_decades(start_decade, end_decade)?, + Histogram::span_decades(start_power, end_power)?, )) } - fn key_for(request: &RequestInfo, status_code: StatusCode) -> String { - format!( - "{}:{}:{}", - normalized_uri_path(request.uri()), - request.method(), - status_code.as_u16() - ) + /// Return a key used to ID this histogram. + /// + /// This is a quick way to look up the histogram tracking any particular + /// request and response. + fn key_for(operation_id: &str, status_code: StatusCode) -> u64 { + let mut hasher = DefaultHasher::new(); + operation_id.hash(&mut hasher); + status_code.hash(&mut hasher); + hasher.finish() } } /// The `LatencyTracker` is an [`oximeter::Producer`] that tracks the latencies of requests for an -/// HTTP service, in seconds. +/// HTTP service, in nanoseconds. /// /// Consumers should construct one `LatencyTracker` for each HTTP service they wish to instrument. /// As requests are received, the [`LatencyTracker::update`] method can be called with the @@ -92,35 +83,47 @@ impl RequestLatencyHistogram { /// The `LatencyTracker` can be used to produce metric data collected by `oximeter`. #[derive(Debug, Clone)] pub struct LatencyTracker { + /// The HTTP service target for which we're tracking request histograms. pub service: HttpService, - latencies: Arc>>, - histogram: Histogram, + /// The latency histogram for each request. + /// + /// The map here use a hash of the request fields (operation and status + /// code) as the key to each histogram. It's a bit redundant to then store + /// that in a hashmap, but this lets us avoid creating a new + /// `RequestLatencyHistogram` when handling a request that we already have + /// one for. Instead, we use this key to get the existing entry. + latencies: Arc>>, + /// The histogram used to track each request. + /// + /// We store it here to clone as we see new requests. + histogram: Histogram, } impl LatencyTracker { /// Build a new tracker for the given `service`, using `histogram` to track latencies. /// /// Note that the same histogram is used for each tracked timeseries. - pub fn new(service: HttpService, histogram: Histogram) -> Self { + pub fn new(service: HttpService, histogram: Histogram) -> Self { Self { service, - latencies: Arc::new(Mutex::new(BTreeMap::new())), + latencies: Arc::new(Mutex::new(HashMap::new())), histogram, } } - /// Build a new tracker for the given `service`, with a histogram that spans the given decades - /// (powers of 10). See [`RequestLatencyHistogram::with_latency_decades`] for details on the + /// Build a new tracker with log-linear bins. + /// + /// This creates a tracker for the `service`, using 10 bins per power of 10, + /// from `[10 ** start_power, 10 ** end_power)`. + /// + /// [`RequestLatencyHistogram::with_log_linear_bins`] for details on the /// arguments. - pub fn with_latency_decades( + pub fn with_log_linear_bins( service: HttpService, - start_decade: i16, - end_decade: i16, + start_power: u16, + end_power: u16, ) -> Result { - Ok(Self::new( - service, - Histogram::span_decades(start_decade, end_decade)?, - )) + Ok(Self::new(service, Histogram::span_decades(start_power, end_power)?)) } /// Update (or create) a timeseries in response to a new request. @@ -129,20 +132,20 @@ impl LatencyTracker { /// to which the other arguments belong. (One is created if it does not exist.) pub fn update( &self, - request: &RequestInfo, + operation_id: &str, status_code: StatusCode, latency: Duration, ) -> Result<(), MetricsError> { - let key = RequestLatencyHistogram::key_for(request, status_code); + let key = RequestLatencyHistogram::key_for(operation_id, status_code); let mut latencies = self.latencies.lock().unwrap(); let entry = latencies.entry(key).or_insert_with(|| { RequestLatencyHistogram::new( - request, + operation_id, status_code, self.histogram.clone(), ) }); - entry.datum.sample(latency.as_secs_f64()).map_err(MetricsError::from) + entry.datum.sample(latency.as_nanos() as _).map_err(MetricsError::from) } /// Instrument the given Dropshot endpoint handler function. @@ -170,14 +173,14 @@ impl LatencyTracker { Ok(response) => response.status_code(), Err(ref e) => e.status_code, }; - if let Err(e) = self.update(&context.request, status_code, latency) { + if let Err(e) = self.update(&context.operation_id, status_code, latency) + { slog::error!( &context.log, "error instrumenting dropshot handler"; "error" => ?e, "status_code" => status_code.as_u16(), - "method" => %context.request.method(), - "uri" => %context.request.uri(), + "operation_id" => &context.operation_id, "remote_addr" => context.request.remote_addr(), "latency" => ?latency, ); @@ -218,43 +221,26 @@ mod tests { fn test_latency_tracker() { let service = HttpService { name: "my-service".into(), id: ID.parse().unwrap() }; - let hist = Histogram::new(&[0.0, 1.0]).unwrap(); + let hist = Histogram::new(&[100, 1000]).unwrap(); let tracker = LatencyTracker::new(service, hist); - let request = http::request::Builder::new() - .method(http::Method::GET) - .uri("/some/uri") - .body(()) + let status_code0 = StatusCode::OK; + let status_code1 = StatusCode::NOT_FOUND; + let operation_id = "some_operation_id"; + tracker + .update(operation_id, status_code0, Duration::from_nanos(200)) .unwrap(); - let status_code = StatusCode::OK; tracker - .update( - &RequestInfo::new(&request, "0.0.0.0:0".parse().unwrap()), - status_code, - Duration::from_secs_f64(0.5), - ) + .update(operation_id, status_code1, Duration::from_nanos(200)) .unwrap(); - - let key = "/some/uri:GET:200"; - let actual_hist = tracker.latencies.lock().unwrap()[key].datum.clone(); - assert_eq!(actual_hist.n_samples(), 1); - let bins = actual_hist.iter().collect::>(); - assert_eq!(bins[1].count, 1); - } - - #[test] - fn test_normalize_uri_path() { - const EXPECTED: &str = "/foo/bar"; - const TESTS: &[&str] = &[ - "/foo/bar", - "/foo/bar/", - "//foo/bar", - "//foo/bar/", - "/foo/bar//", - "////foo/bar/////", - ]; - for test in TESTS.iter() { - println!("{test}"); - assert_eq!(normalized_uri_path(&test.parse().unwrap()), EXPECTED); + let key0 = RequestLatencyHistogram::key_for(operation_id, status_code0); + let key1 = RequestLatencyHistogram::key_for(operation_id, status_code1); + let latencies = tracker.latencies.lock().unwrap(); + assert_eq!(latencies.len(), 2); + for key in [key0, key1] { + let actual_hist = &latencies[&key].datum; + assert_eq!(actual_hist.n_samples(), 1); + let bins = actual_hist.iter().collect::>(); + assert_eq!(bins[1].count, 1); } } } diff --git a/oximeter/oximeter/Cargo.toml b/oximeter/oximeter/Cargo.toml index c04d1bd3ae6..63b370bee6d 100644 --- a/oximeter/oximeter/Cargo.toml +++ b/oximeter/oximeter/Cargo.toml @@ -13,9 +13,10 @@ anyhow.workspace = true clap.workspace = true chrono.workspace = true omicron-workspace-hack.workspace = true -oximeter-impl.workspace = true oximeter-macro-impl.workspace = true +oximeter-schema.workspace = true oximeter-timeseries-macro.workspace = true +oximeter-types.workspace = true prettyplease.workspace = true syn.workspace = true toml.workspace = true diff --git a/oximeter/oximeter/schema/hardware-component.toml b/oximeter/oximeter/schema/hardware-component.toml new file mode 100644 index 00000000000..30a1d6510fb --- /dev/null +++ b/oximeter/oximeter/schema/hardware-component.toml @@ -0,0 +1,183 @@ +format_version = 1 + +[target] +name = "hardware_component" +description = "A hardware component on a compute sled, switch, or power shelf" +authz_scope = "fleet" +versions = [ + { version = 1, fields = [ + "rack_id", + "slot", + "chassis_kind", + "chassis_serial", + "chassis_model", + "chassis_revision", + "hubris_archive_id", + "gateway_id", + "component_kind", + "component_id", + "description", + ]} +] + +[fields.rack_id] +type = "uuid" +description = "ID of the rack on which this measurement was recorded." + +[fields.slot] +type = "u32" +description = """ +The cubby number or switch slot of the service processor reporting the \ +measurement""" + +[fields.chassis_model] +type = "string" +description = "Model number of the sled, switch, or power shelf" + +[fields.chassis_revision] +type = "u32" +description = "Revision number of the sled, switch, or power shelf" + +[fields.chassis_serial] +type = "string" +description = "Serial number of the sled, switch, or power shelf" + +[fields.hubris_archive_id] +type = "string" +description = """ +Hubris firmware archive ID of the service processor when the measurement \ +was recorded.""" + +[fields.gateway_id] +type = "uuid" +description = """ +ID of the Management Gateway Service process which recorded the measurement.""" + +[fields.chassis_kind] +type = "string" +description = """ +What kind of thing the component resides on. + +This will be one of 'sled', for components on compute sleds; 'switch', for \ +components on rack switches; or 'power', for components on power shelves.""" + +[fields.component_id] +type = "string" +description = """ +The service processor component ID uniquely identifying the hardware \ +component on the sled, switch, or power shelf.""" + +[fields.component_kind] +type = "string" +description = "What type of hardware component this thing is." + +[fields.description] +type = "string" +description = """ +A human-readable description of the hardware component. This may include \ +its location or role in the system (e.g. a DIMM's number, or a temperature \ +sensor's location).""" + +[fields.sensor] +type = "string" +description = """The name of a sensor that recorded a sensor reading.""" + +[fields.error] +type = "string" +description = "The kind of sensor error that occurred" + +[fields.sensor_kind] +type = "string" +description = """ +Which kind of sensor could not be read due to a sensor error. + +This will be one of 'temperature', 'current', 'power', 'voltage', \ +'input_current', 'input_voltage', or 'fan_speed' (the same names as \ +the metrics emitted by these sensors when they are read successfully).""" + +[[metrics]] +name = "temperature" +description = "A temperature reading from a hardware component." +units = "degrees_celsius" +datum_type = "f32" +versions = [ + { added_in = 1, fields = ["sensor"]} +] + +[[metrics]] +name = "current" +description = "Output current reading in amperes" +units = "amps" +datum_type = "f32" +versions = [ + { added_in = 1, fields = ["sensor"]} +] + +[[metrics]] +name = "power" +description = "Power reading, in watts" +units = "watts" +datum_type = "f32" +versions = [ + { added_in = 1, fields = ["sensor"]} +] + +[[metrics]] +name = "voltage" +description = "Output voltage reading, in volts" +units = "volts" +datum_type = "f32" +versions = [ + { added_in = 1, fields = ["sensor"]} +] + +[[metrics]] +name = "input_current" +description = "Input electric current reading in amperes" +units = "amps" +datum_type = "f32" +versions = [ + { added_in = 1, fields = ["sensor"]} +] + +[[metrics]] +name = "input_voltage" +description = "Input electric voltage reading, in volts" +units = "volts" +datum_type = "f32" +versions = [ + { added_in = 1, fields = ["sensor"]} +] + + +[[metrics]] +name = "fan_speed" +description = "A fan speed measurement, in rotations per minute" +units = "rpm" +datum_type = "f32" +versions = [ + { added_in = 1, fields = ["sensor"]} +] + +[[metrics]] +name = "sensor_error_count" +description = "Cumulative count of errors reported by a sensor" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = ["sensor", "error", "sensor_kind"]} +] + +[[metrics]] +name = "poll_error_count" +description = """ +Cumulative count of errors encountered whilst polling a component's sensors. + +Unlike the `sensor_error_count` metric, this counts errors encountered by \ +the management gateway while polling the component, rather than errors \ +reported by the component itself.""" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = ["error"] } +] diff --git a/oximeter/oximeter/schema/http-service.toml b/oximeter/oximeter/schema/http-service.toml index 90981106562..2e2e6fb3592 100644 --- a/oximeter/oximeter/schema/http-service.toml +++ b/oximeter/oximeter/schema/http-service.toml @@ -11,10 +11,10 @@ versions = [ [[metrics]] name = "request_latency_histogram" description = "Duration for the server to handle a request" -units = "seconds" -datum_type = "histogram_f64" +units = "nanoseconds" +datum_type = "histogram_u64" versions = [ - { added_in = 1, fields = [ "route", "method", "status_code" ] } + { added_in = 1, fields = [ "operation_id", "status_code" ] } ] [fields.name] @@ -25,14 +25,15 @@ description = "The name of the HTTP server, or program running it" type = "uuid" description = "UUID of the HTTP server" -[fields.route] +[fields.operation_id] type = "string" -description = "HTTP route in the request" +description = """\ +The identifier for the HTTP operation.\ -[fields.method] -type = "string" -description = "HTTP method in the request" +In most cases, this the OpenAPI `operationId` field that uniquely identifies the +endpoint the request is targeted to and the HTTP method used. +""" [fields.status_code] -type = "i64" +type = "u16" description = "HTTP status code in the server's response" diff --git a/oximeter/oximeter/schema/virtual-disk.toml b/oximeter/oximeter/schema/virtual-disk.toml new file mode 100644 index 00000000000..54cedae6e6f --- /dev/null +++ b/oximeter/oximeter/schema/virtual-disk.toml @@ -0,0 +1,127 @@ +format_version = 1 + +[target] +name = "virtual_disk" +description = "A virtual disk" +authz_scope = "project" +versions = [ + { version = 1, fields = [ "attached_instance_id", "block_size", "disk_id", "project_id", "silo_id", ] }, +] + +[fields.attached_instance_id] +type = "uuid" +description = "ID of the instance the disk is attached to" + +[fields.block_size] +type = "u32" +description = "Block size of the disk, in bytes" + +[fields.disk_id] +type = "uuid" +description = "ID of the disk" + +[fields.failure_reason] +type = "string" +description = "The reason an I/O operation failed" + +[fields.io_kind] +type = "string" +description = "The kind of I/O operation" + +[fields.project_id] +type = "uuid" +description = "ID of the project containing the disk" + +[fields.silo_id] +type = "uuid" +description = "ID for the silo containing the disk" + +[[metrics]] +name = "bytes_read" +description = "Number of bytes read from the disk" +units = "bytes" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [] } +] + +[[metrics]] +name = "reads" +description = "Total number of read operations from the disk" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [] } +] + +[[metrics]] +name = "failed_reads" +description = "Total number of failed read operations from the disk" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "failure_reason" ] } +] + +[[metrics]] +name = "bytes_written" +description = "Number of bytes written to the disk" +units = "bytes" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [] } +] + +[[metrics]] +name = "writes" +description = "Total number of write operations to the disk" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [] } +] + +[[metrics]] +name = "failed_writes" +description = "Total number of failed write operations to the disk" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "failure_reason" ] } +] + +[[metrics]] +name = "flushes" +description = "Total number of flush operations on the disk" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [] } +] + +[[metrics]] +name = "failed_flushes" +description = "Total number of failed flush operations on the disk" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "failure_reason" ] } +] + +[[metrics]] +name = "io_latency" +description = "Histogram of latency for I/O operations by kind" +units = "nanoseconds" +datum_type = "histogram_u64" +versions = [ + { added_in = 1, fields = [ "io_kind" ] } +] + +[[metrics]] +name = "io_size" +description = "Histogram of sizes for I/O operations by kind" +units = "bytes" +datum_type = "histogram_u64" +versions = [ + { added_in = 1, fields = [ "io_kind" ] } +] diff --git a/oximeter/oximeter/src/lib.rs b/oximeter/oximeter/src/lib.rs index 5ec6a49e5ce..913318b8a89 100644 --- a/oximeter/oximeter/src/lib.rs +++ b/oximeter/oximeter/src/lib.rs @@ -185,14 +185,15 @@ //! `Producer`s may be registered with the same `ProducerServer`, each with potentially different //! sampling intervals. -pub use oximeter_impl::*; +pub use oximeter_macro_impl::{Metric, Target}; pub use oximeter_timeseries_macro::use_timeseries; +pub use oximeter_types::*; #[cfg(test)] mod test { - use oximeter_impl::schema::ir::load_schema; - use oximeter_impl::schema::{FieldSource, SCHEMA_DIRECTORY}; - use oximeter_impl::TimeseriesSchema; + use oximeter_schema::ir::load_schema; + use oximeter_types::schema::{FieldSource, SCHEMA_DIRECTORY}; + use oximeter_types::TimeseriesSchema; use std::collections::BTreeMap; use std::fs; diff --git a/oximeter/oxql-types/Cargo.toml b/oximeter/oxql-types/Cargo.toml new file mode 100644 index 00000000000..da7c7bcd1c6 --- /dev/null +++ b/oximeter/oxql-types/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "oxql-types" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +chrono.workspace = true +highway.workspace = true +num.workspace = true +omicron-workspace-hack.workspace = true +oximeter-types.workspace = true +schemars.workspace = true +serde.workspace = true diff --git a/oximeter/oxql-types/src/lib.rs b/oximeter/oxql-types/src/lib.rs new file mode 100644 index 00000000000..00468705a93 --- /dev/null +++ b/oximeter/oxql-types/src/lib.rs @@ -0,0 +1,23 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Core types for OxQL. + +use chrono::{DateTime, Utc}; +use std::time::Duration; + +pub mod point; +pub mod table; + +pub use self::table::Table; +pub use self::table::Timeseries; + +/// Describes the time alignment for an OxQL query. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct Alignment { + /// The end time of the query, which the temporal reference point. + pub end_time: DateTime, + /// The alignment period, the interval on which values are produced. + pub period: Duration, +} diff --git a/oximeter/db/src/oxql/point.rs b/oximeter/oxql-types/src/point.rs similarity index 82% rename from oximeter/db/src/oxql/point.rs rename to oximeter/oxql-types/src/point.rs index e04193e8b83..6e3c7143dc1 100644 --- a/oximeter/db/src/oxql/point.rs +++ b/oximeter/oxql-types/src/point.rs @@ -6,15 +6,15 @@ // Copyright 2024 Oxide Computer Company -use super::Error; use anyhow::Context; +use anyhow::Error; use chrono::DateTime; use chrono::Utc; use num::ToPrimitive; -use oximeter::traits::HistogramSupport; -use oximeter::DatumType; -use oximeter::Measurement; -use oximeter::Quantile; +use oximeter_types::traits::HistogramSupport; +use oximeter_types::DatumType; +use oximeter_types::Measurement; +use oximeter_types::Quantile; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; @@ -131,32 +131,32 @@ impl CumulativeDatum { // not cumulative. fn from_cumulative(meas: &Measurement) -> Result { let datum = match meas.datum() { - oximeter::Datum::CumulativeI64(val) => { + oximeter_types::Datum::CumulativeI64(val) => { CumulativeDatum::Integer(val.value()) } - oximeter::Datum::CumulativeU64(val) => { + oximeter_types::Datum::CumulativeU64(val) => { let int = val .value() .try_into() .context("Overflow converting u64 to i64")?; CumulativeDatum::Integer(int) } - oximeter::Datum::CumulativeF32(val) => { + oximeter_types::Datum::CumulativeF32(val) => { CumulativeDatum::Double(val.value().into()) } - oximeter::Datum::CumulativeF64(val) => { + oximeter_types::Datum::CumulativeF64(val) => { CumulativeDatum::Double(val.value()) } - oximeter::Datum::HistogramI8(hist) => hist.into(), - oximeter::Datum::HistogramU8(hist) => hist.into(), - oximeter::Datum::HistogramI16(hist) => hist.into(), - oximeter::Datum::HistogramU16(hist) => hist.into(), - oximeter::Datum::HistogramI32(hist) => hist.into(), - oximeter::Datum::HistogramU32(hist) => hist.into(), - oximeter::Datum::HistogramI64(hist) => hist.into(), - oximeter::Datum::HistogramU64(hist) => hist.try_into()?, - oximeter::Datum::HistogramF32(hist) => hist.into(), - oximeter::Datum::HistogramF64(hist) => hist.into(), + oximeter_types::Datum::HistogramI8(hist) => hist.into(), + oximeter_types::Datum::HistogramU8(hist) => hist.into(), + oximeter_types::Datum::HistogramI16(hist) => hist.into(), + oximeter_types::Datum::HistogramU16(hist) => hist.into(), + oximeter_types::Datum::HistogramI32(hist) => hist.into(), + oximeter_types::Datum::HistogramU32(hist) => hist.into(), + oximeter_types::Datum::HistogramI64(hist) => hist.into(), + oximeter_types::Datum::HistogramU64(hist) => hist.try_into()?, + oximeter_types::Datum::HistogramF32(hist) => hist.into(), + oximeter_types::Datum::HistogramF64(hist) => hist.into(), other => anyhow::bail!( "Input datum of type {} is not cumulative", other.datum_type(), @@ -169,10 +169,10 @@ impl CumulativeDatum { /// A single list of values, for one dimension of a timeseries. #[derive(Clone, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] pub struct Values { - // The data values. - pub(super) values: ValueArray, - // The type of this metric. - pub(super) metric_type: MetricType, + /// The data values. + pub values: ValueArray, + /// The type of this metric. + pub metric_type: MetricType, } impl Values { @@ -285,14 +285,23 @@ impl<'a> fmt::Display for Datum<'a> { #[derive(Clone, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] pub struct Points { // The start time points for cumulative or delta metrics. - pub(super) start_times: Option>>, + pub(crate) start_times: Option>>, // The timestamp of each value. - pub(super) timestamps: Vec>, + pub(crate) timestamps: Vec>, // The array of data values, one for each dimension. - pub(super) values: Vec, + pub(crate) values: Vec, } impl Points { + /// Construct a new `Points` with the provided data. + pub fn new( + start_times: Option>>, + timestamps: Vec>, + values: Vec, + ) -> Self { + Self { start_times, timestamps, values } + } + /// Construct an empty array of points to hold data of the provided type. pub fn empty(data_type: DataType, metric_type: MetricType) -> Self { Self::with_capacity( @@ -303,8 +312,28 @@ impl Points { .unwrap() } - // Return a mutable reference to the value array of the specified dimension, if any. - pub(super) fn values_mut(&mut self, dim: usize) -> Option<&mut ValueArray> { + /// Return the start times of the points, if any. + pub fn start_times(&self) -> Option<&[DateTime]> { + self.start_times.as_deref() + } + + /// Clear the start times of the points. + pub fn clear_start_times(&mut self) { + self.start_times = None; + } + + /// Return the timestamps of the points. + pub fn timestamps(&self) -> &[DateTime] { + &self.timestamps + } + + pub fn set_timestamps(&mut self, timestamps: Vec>) { + self.timestamps = timestamps; + } + + /// Return a mutable reference to the value array of the specified + /// dimension, if any. + pub fn values_mut(&mut self, dim: usize) -> Option<&mut ValueArray> { self.values.get_mut(dim).map(|val| &mut val.values) } @@ -563,8 +592,8 @@ impl Points { }) } - // Filter points in self to those where `to_keep` is true. - pub(crate) fn filter(&self, to_keep: Vec) -> Result { + /// Filter points in self to those where `to_keep` is true. + pub fn filter(&self, to_keep: Vec) -> Result { anyhow::ensure!( to_keep.len() == self.len(), "Filter array must be the same length as self", @@ -646,8 +675,8 @@ impl Points { Ok(out) } - // Return a new set of points, with the values casted to the provided types. - pub(crate) fn cast(&self, types: &[DataType]) -> Result { + /// Return a new set of points, with the values casted to the provided types. + pub fn cast(&self, types: &[DataType]) -> Result { anyhow::ensure!( types.len() == self.dimensionality(), "Cannot cast to {} types, the data has dimensionality {}", @@ -863,12 +892,104 @@ impl Points { Ok(Self { start_times, timestamps, values: new_values }) } + /// Given two arrays of points, stack them together at matching timepoints. + /// + /// For time points in either which do not have a corresponding point in + /// the other, the entire time point is elided. + pub fn inner_join(&self, right: &Points) -> Result { + // Create an output array with roughly the right capacity, and double the + // number of dimensions. We're trying to stack output value arrays together + // along the dimension axis. + let data_types = + self.data_types().chain(right.data_types()).collect::>(); + let metric_types = + self.metric_types().chain(right.metric_types()).collect::>(); + let mut out = Points::with_capacity( + self.len().max(right.len()), + data_types.iter().copied(), + metric_types.iter().copied(), + )?; + + // Iterate through each array until one is exhausted. We're only inserting + // values from both arrays where the timestamps actually match, since this + // is an inner join. We may want to insert missing values where timestamps + // do not match on either side, when we support an outer join of some kind. + let n_left_dim = self.dimensionality(); + let mut left_ix = 0; + let mut right_ix = 0; + while left_ix < self.len() && right_ix < right.len() { + let left_timestamp = self.timestamps()[left_ix]; + let right_timestamp = right.timestamps()[right_ix]; + if left_timestamp == right_timestamp { + out.timestamps.push(left_timestamp); + push_concrete_values( + &mut out.values[..n_left_dim], + &self.values, + left_ix, + ); + push_concrete_values( + &mut out.values[n_left_dim..], + &right.values, + right_ix, + ); + left_ix += 1; + right_ix += 1; + } else if left_timestamp < right_timestamp { + left_ix += 1; + } else { + right_ix += 1; + } + } + Ok(out) + } + /// Return true if self contains no data points. pub fn is_empty(&self) -> bool { self.len() == 0 } } +// Push the `i`th value from each dimension of `from` onto `to`. +fn push_concrete_values(to: &mut [Values], from: &[Values], i: usize) { + assert_eq!(to.len(), from.len()); + for (output, input) in to.iter_mut().zip(from.iter()) { + let input_array = &input.values; + let output_array = &mut output.values; + assert_eq!(input_array.data_type(), output_array.data_type()); + if let Ok(ints) = input_array.as_integer() { + output_array.as_integer_mut().unwrap().push(ints[i]); + continue; + } + if let Ok(doubles) = input_array.as_double() { + output_array.as_double_mut().unwrap().push(doubles[i]); + continue; + } + if let Ok(bools) = input_array.as_boolean() { + output_array.as_boolean_mut().unwrap().push(bools[i]); + continue; + } + if let Ok(strings) = input_array.as_string() { + output_array.as_string_mut().unwrap().push(strings[i].clone()); + continue; + } + if let Ok(dists) = input_array.as_integer_distribution() { + output_array + .as_integer_distribution_mut() + .unwrap() + .push(dists[i].clone()); + continue; + } + if let Ok(dists) = input_array.as_double_distribution() { + output_array + .as_double_distribution_mut() + .unwrap() + .push(dists[i].clone()); + continue; + } + unreachable!(); + } +} + /// List of data values for one timeseries. /// /// Each element is an option, where `None` represents a missing sample. @@ -900,8 +1021,8 @@ impl ValueArray { } } - // Return the data type in self. - pub(super) fn data_type(&self) -> DataType { + /// Return the data type in self. + pub fn data_type(&self) -> DataType { match self { ValueArray::Integer(_) => DataType::Integer, ValueArray::Double(_) => DataType::Double, @@ -947,10 +1068,8 @@ impl ValueArray { Ok(inner) } - // Access the inner array of integers, if possible. - pub(super) fn as_integer_mut( - &mut self, - ) -> Result<&mut Vec>, Error> { + /// Access the inner array of integers, if possible. + pub fn as_integer_mut(&mut self) -> Result<&mut Vec>, Error> { let ValueArray::Integer(inner) = self else { anyhow::bail!( "Cannot access value array as integer type, it has type {}", @@ -1107,91 +1226,97 @@ impl ValueArray { // Push a value directly from a datum, without modification. fn push_value_from_datum( &mut self, - datum: &oximeter::Datum, + datum: &oximeter_types::Datum, ) -> Result<(), Error> { match datum { - oximeter::Datum::Bool(b) => self.as_boolean_mut()?.push(Some(*b)), - oximeter::Datum::I8(i) => { + oximeter_types::Datum::Bool(b) => { + self.as_boolean_mut()?.push(Some(*b)) + } + oximeter_types::Datum::I8(i) => { self.as_integer_mut()?.push(Some(i64::from(*i))) } - oximeter::Datum::U8(i) => { + oximeter_types::Datum::U8(i) => { self.as_integer_mut()?.push(Some(i64::from(*i))) } - oximeter::Datum::I16(i) => { + oximeter_types::Datum::I16(i) => { self.as_integer_mut()?.push(Some(i64::from(*i))) } - oximeter::Datum::U16(i) => { + oximeter_types::Datum::U16(i) => { self.as_integer_mut()?.push(Some(i64::from(*i))) } - oximeter::Datum::I32(i) => { + oximeter_types::Datum::I32(i) => { self.as_integer_mut()?.push(Some(i64::from(*i))) } - oximeter::Datum::U32(i) => { + oximeter_types::Datum::U32(i) => { self.as_integer_mut()?.push(Some(i64::from(*i))) } - oximeter::Datum::I64(i) => self.as_integer_mut()?.push(Some(*i)), - oximeter::Datum::U64(i) => { + oximeter_types::Datum::I64(i) => { + self.as_integer_mut()?.push(Some(*i)) + } + oximeter_types::Datum::U64(i) => { let i = i.to_i64().context("Failed to convert u64 datum to i64")?; self.as_integer_mut()?.push(Some(i)); } - oximeter::Datum::F32(f) => { + oximeter_types::Datum::F32(f) => { self.as_double_mut()?.push(Some(f64::from(*f))) } - oximeter::Datum::F64(f) => self.as_double_mut()?.push(Some(*f)), - oximeter::Datum::String(s) => { + oximeter_types::Datum::F64(f) => { + self.as_double_mut()?.push(Some(*f)) + } + oximeter_types::Datum::String(s) => { self.as_string_mut()?.push(Some(s.clone())) } - oximeter::Datum::Bytes(_) => { + oximeter_types::Datum::Bytes(_) => { anyhow::bail!("Bytes data types are not yet supported") } - oximeter::Datum::CumulativeI64(c) => { + oximeter_types::Datum::CumulativeI64(c) => { self.as_integer_mut()?.push(Some(c.value())) } - oximeter::Datum::CumulativeU64(c) => { + oximeter_types::Datum::CumulativeU64(c) => { let c = c .value() .to_i64() .context("Failed to convert u64 datum to i64")?; self.as_integer_mut()?.push(Some(c)); } - oximeter::Datum::CumulativeF32(c) => { + oximeter_types::Datum::CumulativeF32(c) => { self.as_double_mut()?.push(Some(f64::from(c.value()))) } - oximeter::Datum::CumulativeF64(c) => { + oximeter_types::Datum::CumulativeF64(c) => { self.as_double_mut()?.push(Some(c.value())) } - oximeter::Datum::HistogramI8(h) => self + oximeter_types::Datum::HistogramI8(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramU8(h) => self + oximeter_types::Datum::HistogramU8(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramI16(h) => self + oximeter_types::Datum::HistogramI16(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramU16(h) => self + oximeter_types::Datum::HistogramU16(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramI32(h) => self + oximeter_types::Datum::HistogramI32(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramU32(h) => self + oximeter_types::Datum::HistogramU32(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramI64(h) => self + oximeter_types::Datum::HistogramI64(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramU64(h) => self + oximeter_types::Datum::HistogramU64(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::try_from(h)?)), - oximeter::Datum::HistogramF32(h) => self + oximeter_types::Datum::HistogramF32(h) => self .as_double_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramF64(h) => self + oximeter_types::Datum::HistogramF64(h) => self .as_double_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::Missing(missing) => { + oximeter_types::Datum::Missing(missing) => { self.push_missing(missing.datum_type())? } } @@ -1216,7 +1341,7 @@ impl ValueArray { fn push_diff_from_last_to_datum( &mut self, last_datum: &Option, - new_datum: &oximeter::Datum, + new_datum: &oximeter_types::Datum, data_type: DataType, ) -> Result<(), Error> { match (last_datum.as_ref(), new_datum.is_missing()) { @@ -1253,49 +1378,49 @@ impl ValueArray { match (last_datum, new_datum) { ( CumulativeDatum::Integer(last), - oximeter::Datum::I8(new), + oximeter_types::Datum::I8(new), ) => { let new = i64::from(*new); self.as_integer_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::U8(new), + oximeter_types::Datum::U8(new), ) => { let new = i64::from(*new); self.as_integer_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::I16(new), + oximeter_types::Datum::I16(new), ) => { let new = i64::from(*new); self.as_integer_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::U16(new), + oximeter_types::Datum::U16(new), ) => { let new = i64::from(*new); self.as_integer_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::I32(new), + oximeter_types::Datum::I32(new), ) => { let new = i64::from(*new); self.as_integer_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::U32(new), + oximeter_types::Datum::U32(new), ) => { let new = i64::from(*new); self.as_integer_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::I64(new), + oximeter_types::Datum::I64(new), ) => { let diff = new .checked_sub(*last) @@ -1304,7 +1429,7 @@ impl ValueArray { } ( CumulativeDatum::Integer(last), - oximeter::Datum::U64(new), + oximeter_types::Datum::U64(new), ) => { let new = new .to_i64() @@ -1316,20 +1441,20 @@ impl ValueArray { } ( CumulativeDatum::Double(last), - oximeter::Datum::F32(new), + oximeter_types::Datum::F32(new), ) => { self.as_double_mut()? .push(Some(f64::from(*new) - last)); } ( CumulativeDatum::Double(last), - oximeter::Datum::F64(new), + oximeter_types::Datum::F64(new), ) => { self.as_double_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::CumulativeI64(new), + oximeter_types::Datum::CumulativeI64(new), ) => { let new = new.value(); let diff = new @@ -1339,7 +1464,7 @@ impl ValueArray { } ( CumulativeDatum::Integer(last), - oximeter::Datum::CumulativeU64(new), + oximeter_types::Datum::CumulativeU64(new), ) => { let new = new .value() @@ -1352,20 +1477,20 @@ impl ValueArray { } ( CumulativeDatum::Double(last), - oximeter::Datum::CumulativeF32(new), + oximeter_types::Datum::CumulativeF32(new), ) => { self.as_double_mut()? .push(Some(f64::from(new.value()) - last)); } ( CumulativeDatum::Double(last), - oximeter::Datum::CumulativeF64(new), + oximeter_types::Datum::CumulativeF64(new), ) => { self.as_double_mut()?.push(Some(new.value() - last)); } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramI8(new), + oximeter_types::Datum::HistogramI8(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1373,7 +1498,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramU8(new), + oximeter_types::Datum::HistogramU8(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1381,7 +1506,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramI16(new), + oximeter_types::Datum::HistogramI16(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1389,7 +1514,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramU16(new), + oximeter_types::Datum::HistogramU16(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1397,7 +1522,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramI32(new), + oximeter_types::Datum::HistogramI32(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1405,7 +1530,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramU32(new), + oximeter_types::Datum::HistogramU32(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1413,7 +1538,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramI64(new), + oximeter_types::Datum::HistogramI64(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1421,7 +1546,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramU64(new), + oximeter_types::Datum::HistogramU64(new), ) => { let new = Distribution::try_from(new)?; self.as_integer_distribution_mut()? @@ -1429,7 +1554,7 @@ impl ValueArray { } ( CumulativeDatum::DoubleDistribution(last), - oximeter::Datum::HistogramF32(new), + oximeter_types::Datum::HistogramF32(new), ) => { let new = Distribution::::from(new); self.as_double_distribution_mut()? @@ -1437,7 +1562,7 @@ impl ValueArray { } ( CumulativeDatum::DoubleDistribution(last), - oximeter::Datum::HistogramF64(new), + oximeter_types::Datum::HistogramF64(new), ) => { let new = Distribution::::from(new); self.as_double_distribution_mut()? @@ -1486,8 +1611,8 @@ impl ValueArray { } } - // Swap the value in self with other, asserting they're the same type. - pub(crate) fn swap(&mut self, mut values: ValueArray) { + /// Swap the value in self with other, asserting they're the same type. + pub fn swap(&mut self, mut values: ValueArray) { use std::mem::swap; match (self, &mut values) { (ValueArray::Integer(x), ValueArray::Integer(y)) => swap(x, y), @@ -1733,8 +1858,10 @@ where macro_rules! i64_dist_from { ($t:ty) => { - impl From<&oximeter::histogram::Histogram<$t>> for Distribution { - fn from(hist: &oximeter::histogram::Histogram<$t>) -> Self { + impl From<&oximeter_types::histogram::Histogram<$t>> + for Distribution + { + fn from(hist: &oximeter_types::histogram::Histogram<$t>) -> Self { let (bins, counts) = hist.bins_and_counts(); Self { bins: bins.into_iter().map(i64::from).collect(), @@ -1750,8 +1877,10 @@ macro_rules! i64_dist_from { } } - impl From<&oximeter::histogram::Histogram<$t>> for CumulativeDatum { - fn from(hist: &oximeter::histogram::Histogram<$t>) -> Self { + impl From<&oximeter_types::histogram::Histogram<$t>> + for CumulativeDatum + { + fn from(hist: &oximeter_types::histogram::Histogram<$t>) -> Self { CumulativeDatum::IntegerDistribution(hist.into()) } } @@ -1766,10 +1895,10 @@ i64_dist_from!(i32); i64_dist_from!(u32); i64_dist_from!(i64); -impl TryFrom<&oximeter::histogram::Histogram> for Distribution { +impl TryFrom<&oximeter_types::histogram::Histogram> for Distribution { type Error = Error; fn try_from( - hist: &oximeter::histogram::Histogram, + hist: &oximeter_types::histogram::Histogram, ) -> Result { let (bins, counts) = hist.bins_and_counts(); let bins = bins @@ -1791,10 +1920,10 @@ impl TryFrom<&oximeter::histogram::Histogram> for Distribution { } } -impl TryFrom<&oximeter::histogram::Histogram> for CumulativeDatum { +impl TryFrom<&oximeter_types::histogram::Histogram> for CumulativeDatum { type Error = Error; fn try_from( - hist: &oximeter::histogram::Histogram, + hist: &oximeter_types::histogram::Histogram, ) -> Result { hist.try_into().map(CumulativeDatum::IntegerDistribution) } @@ -1802,8 +1931,10 @@ impl TryFrom<&oximeter::histogram::Histogram> for CumulativeDatum { macro_rules! f64_dist_from { ($t:ty) => { - impl From<&oximeter::histogram::Histogram<$t>> for Distribution { - fn from(hist: &oximeter::histogram::Histogram<$t>) -> Self { + impl From<&oximeter_types::histogram::Histogram<$t>> + for Distribution + { + fn from(hist: &oximeter_types::histogram::Histogram<$t>) -> Self { let (bins, counts) = hist.bins_and_counts(); Self { bins: bins.into_iter().map(f64::from).collect(), @@ -1819,8 +1950,10 @@ macro_rules! f64_dist_from { } } - impl From<&oximeter::histogram::Histogram<$t>> for CumulativeDatum { - fn from(hist: &oximeter::histogram::Histogram<$t>) -> Self { + impl From<&oximeter_types::histogram::Histogram<$t>> + for CumulativeDatum + { + fn from(hist: &oximeter_types::histogram::Histogram<$t>) -> Self { CumulativeDatum::DoubleDistribution(hist.into()) } } @@ -1833,9 +1966,9 @@ f64_dist_from!(f64); #[cfg(test)] mod tests { use super::{Distribution, MetricType, Points, Values}; - use crate::oxql::point::{DataType, ValueArray}; + use crate::point::{push_concrete_values, DataType, Datum, ValueArray}; use chrono::{DateTime, Utc}; - use oximeter::{ + use oximeter_types::{ histogram::Record, types::Cumulative, Measurement, Quantile, }; use std::time::Duration; @@ -1939,12 +2072,12 @@ mod tests { let now = Utc::now(); let current1 = now + Duration::from_secs(1); let mut hist1 = - oximeter::histogram::Histogram::new(&[0i64, 10, 20]).unwrap(); + oximeter_types::histogram::Histogram::new(&[0i64, 10, 20]).unwrap(); hist1.sample(1).unwrap(); hist1.set_start_time(current1); let current2 = now + Duration::from_secs(2); let mut hist2 = - oximeter::histogram::Histogram::new(&[0i64, 10, 20]).unwrap(); + oximeter_types::histogram::Histogram::new(&[0i64, 10, 20]).unwrap(); hist2.sample(5).unwrap(); hist2.sample(10).unwrap(); hist2.sample(15).unwrap(); @@ -2273,4 +2406,176 @@ mod tests { .cast(&[DataType::DoubleDistribution, DataType::DoubleDistribution]) .is_err()); } + + #[test] + fn test_push_concrete_values() { + let mut points = Points::with_capacity( + 2, + [DataType::Integer, DataType::Double].into_iter(), + [MetricType::Gauge, MetricType::Gauge].into_iter(), + ) + .unwrap(); + + // Push a concrete value for the integer dimension + let from_ints = vec![Values { + values: ValueArray::Integer(vec![Some(1)]), + metric_type: MetricType::Gauge, + }]; + push_concrete_values(&mut points.values[..1], &from_ints, 0); + + // And another for the double dimension. + let from_doubles = vec![Values { + values: ValueArray::Double(vec![Some(2.0)]), + metric_type: MetricType::Gauge, + }]; + push_concrete_values(&mut points.values[1..], &from_doubles, 0); + + assert_eq!( + points.dimensionality(), + 2, + "Points should have 2 dimensions", + ); + let ints = points.values[0].values.as_integer().unwrap(); + assert_eq!( + ints.len(), + 1, + "Should have pushed one point in the first dimension" + ); + assert_eq!( + ints[0], + Some(1), + "Should have pushed 1 onto the first dimension" + ); + let doubles = points.values[1].values.as_double().unwrap(); + assert_eq!( + doubles.len(), + 1, + "Should have pushed one point in the second dimension" + ); + assert_eq!( + doubles[0], + Some(2.0), + "Should have pushed 2.0 onto the second dimension" + ); + } + + #[test] + fn test_join_point_arrays() { + let now = Utc::now(); + + // Create a set of integer points to join with. + // + // This will have two timestamps, one of which will match the points + // below that are merged in. + let int_points = Points { + start_times: None, + timestamps: vec![ + now - Duration::from_secs(3), + now - Duration::from_secs(2), + now, + ], + values: vec![Values { + values: ValueArray::Integer(vec![Some(1), Some(2), Some(3)]), + metric_type: MetricType::Gauge, + }], + }; + + // Create an additional set of double points. + // + // This also has two timepoints, one of which matches with the above, + // and one of which does not. + let double_points = Points { + start_times: None, + timestamps: vec![ + now - Duration::from_secs(3), + now - Duration::from_secs(1), + now, + ], + values: vec![Values { + values: ValueArray::Double(vec![ + Some(4.0), + Some(5.0), + Some(6.0), + ]), + metric_type: MetricType::Gauge, + }], + }; + + // Merge the arrays. + let merged = int_points.inner_join(&double_points).unwrap(); + + // Basic checks that we merged in the right values and have the right + // types and dimensions. + assert_eq!( + merged.dimensionality(), + 2, + "Should have appended the dimensions from each input array" + ); + assert_eq!(merged.len(), 2, "Should have merged two common points",); + assert_eq!( + merged.data_types().collect::>(), + &[DataType::Integer, DataType::Double], + "Should have combined the data types of the input arrays" + ); + assert_eq!( + merged.metric_types().collect::>(), + &[MetricType::Gauge, MetricType::Gauge], + "Should have combined the metric types of the input arrays" + ); + + // Check the actual values of the array. + let mut points = merged.iter_points(); + + // The first and last timepoint overlapped between the two arrays, so we + // should have both of them as concrete samples. + let pt = points.next().unwrap(); + assert_eq!(pt.start_time, None, "Gauges don't have a start time"); + assert_eq!( + *pt.timestamp, int_points.timestamps[0], + "Should have taken the first input timestamp from both arrays", + ); + assert_eq!( + *pt.timestamp, double_points.timestamps[0], + "Should have taken the first input timestamp from both arrays", + ); + let values = pt.values; + assert_eq!(values.len(), 2, "Should have 2 dimensions"); + assert_eq!( + &values[0], + &(Datum::Integer(Some(&1)), MetricType::Gauge), + "Should have pulled value from first integer array." + ); + assert_eq!( + &values[1], + &(Datum::Double(Some(&4.0)), MetricType::Gauge), + "Should have pulled value from second double array." + ); + + // And the next point + let pt = points.next().unwrap(); + assert_eq!(pt.start_time, None, "Gauges don't have a start time"); + assert_eq!( + *pt.timestamp, int_points.timestamps[2], + "Should have taken the input timestamp from both arrays", + ); + assert_eq!( + *pt.timestamp, double_points.timestamps[2], + "Should have taken the input timestamp from both arrays", + ); + let values = pt.values; + assert_eq!(values.len(), 2, "Should have 2 dimensions"); + assert_eq!( + &values[0], + &(Datum::Integer(Some(&3)), MetricType::Gauge), + "Should have pulled value from first integer array." + ); + assert_eq!( + &values[1], + &(Datum::Double(Some(&6.0)), MetricType::Gauge), + "Should have pulled value from second double array." + ); + + // And there should be no other values. + assert!(points.next().is_none(), "There should be no more points"); + } } diff --git a/oximeter/db/src/oxql/table.rs b/oximeter/oxql-types/src/table.rs similarity index 75% rename from oximeter/db/src/oxql/table.rs rename to oximeter/oxql-types/src/table.rs index 2cd141d2faa..f37992942fd 100644 --- a/oximeter/db/src/oxql/table.rs +++ b/oximeter/oxql-types/src/table.rs @@ -6,14 +6,16 @@ // Copyright 2024 Oxide Computer Company -use super::point::DataType; -use super::point::MetricType; -use super::point::Points; -use super::query::Alignment; -use super::Error; -use crate::TimeseriesKey; +use crate::point::DataType; +use crate::point::MetricType; +use crate::point::Points; +use crate::point::ValueArray; +use crate::point::Values; +use crate::Alignment; +use anyhow::Error; use highway::HighwayHasher; -use oximeter::FieldValue; +use oximeter_types::schema::TimeseriesKey; +use oximeter_types::FieldValue; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; @@ -67,10 +69,20 @@ impl Timeseries { hasher.finish() } + /// Return the alignment of this timeseries, if any. + pub fn alignment(&self) -> Option { + self.alignment + } + + /// Set the alignment of this timeseries. + pub fn set_alignment(&mut self, alignment: Alignment) { + self.alignment = Some(alignment); + } + /// Return a copy of the timeseries, keeping only the provided fields. /// /// An error is returned if the timeseries does not contain those fields. - pub(crate) fn copy_with_fields( + pub fn copy_with_fields( &self, kept_fields: &[&str], ) -> Result { @@ -88,6 +100,20 @@ impl Timeseries { }) } + /// Return a copy of the timeseries, keeping only the provided points. + /// + /// Returns `None` if `kept_points` is empty. + pub fn copy_with_points(&self, kept_points: Points) -> Option { + if kept_points.is_empty() { + return None; + } + Some(Self { + fields: self.fields.clone(), + points: kept_points, + alignment: self.alignment, + }) + } + // Return `true` if the schema in `other` matches that of `self`. fn matches_schema(&self, other: &Timeseries) -> bool { if self.fields.len() != other.fields.len() { @@ -125,7 +151,7 @@ impl Timeseries { /// This returns an error if the points cannot be so cast, or the /// dimensionality of the types requested differs from the dimensionality of /// the points themselves. - pub(crate) fn cast(&self, types: &[DataType]) -> Result { + pub fn cast(&self, types: &[DataType]) -> Result { let fields = self.fields.clone(); Ok(Self { fields, @@ -133,6 +159,49 @@ impl Timeseries { alignment: self.alignment, }) } + + /// Return a new timeseries, with the points limited to the provided range. + pub fn limit(&self, start: usize, end: usize) -> Self { + let input_points = &self.points; + + // Slice the various data arrays. + let start_times = + input_points.start_times().map(|s| s[start..end].to_vec()); + let timestamps = input_points.timestamps()[start..end].to_vec(); + let values = input_points + .values + .iter() + .map(|vals| { + let values = match &vals.values { + ValueArray::Integer(inner) => { + ValueArray::Integer(inner[start..end].to_vec()) + } + ValueArray::Double(inner) => { + ValueArray::Double(inner[start..end].to_vec()) + } + ValueArray::Boolean(inner) => { + ValueArray::Boolean(inner[start..end].to_vec()) + } + ValueArray::String(inner) => { + ValueArray::String(inner[start..end].to_vec()) + } + ValueArray::IntegerDistribution(inner) => { + ValueArray::IntegerDistribution( + inner[start..end].to_vec(), + ) + } + ValueArray::DoubleDistribution(inner) => { + ValueArray::DoubleDistribution( + inner[start..end].to_vec(), + ) + } + }; + Values { values, metric_type: vals.metric_type } + }) + .collect(); + let points = Points::new(start_times, timestamps, values); + Self { fields: self.fields.clone(), points, alignment: self.alignment } + } } /// A table represents one or more timeseries with the same schema. @@ -146,7 +215,7 @@ pub struct Table { // // This starts as the name of the timeseries schema the data is derived // from, but can be modified as operations are done. - pub(super) name: String, + pub name: String, // The set of timeseries in the table, ordered by key. timeseries: BTreeMap, } diff --git a/oximeter/schema/Cargo.toml b/oximeter/schema/Cargo.toml new file mode 100644 index 00000000000..fe2e28705a2 --- /dev/null +++ b/oximeter/schema/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "oximeter-schema" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +chrono.workspace = true +clap.workspace = true +heck.workspace = true +omicron-workspace-hack.workspace = true +oximeter-types.workspace = true +prettyplease.workspace = true +proc-macro2.workspace = true +quote.workspace = true +schemars.workspace = true +serde.workspace = true +slog-error-chain.workspace = true +syn.workspace = true +toml.workspace = true diff --git a/oximeter/oximeter/src/bin/oximeter-schema.rs b/oximeter/schema/src/bin/oximeter-schema.rs similarity index 93% rename from oximeter/oximeter/src/bin/oximeter-schema.rs rename to oximeter/schema/src/bin/oximeter-schema.rs index 14fb31b1e8c..5595a28639d 100644 --- a/oximeter/oximeter/src/bin/oximeter-schema.rs +++ b/oximeter/schema/src/bin/oximeter-schema.rs @@ -9,7 +9,7 @@ use anyhow::Context as _; use clap::Parser; use clap::Subcommand; -use oximeter::schema::ir::TimeseriesDefinition; +use oximeter_schema::ir::TimeseriesDefinition; use std::num::NonZeroU8; use std::path::PathBuf; @@ -56,7 +56,7 @@ fn main() -> anyhow::Result<()> { println!("{def:#?}"); } Cmd::Schema { timeseries, version } => { - let schema = oximeter_impl::schema::ir::load_schema(&contents)?; + let schema = oximeter_schema::ir::load_schema(&contents)?; match (timeseries, version) { (None, None) => { for each in schema.into_iter() { @@ -87,7 +87,7 @@ fn main() -> anyhow::Result<()> { } } Cmd::Emit => { - let code = oximeter::schema::codegen::use_timeseries(&contents)?; + let code = oximeter_schema::codegen::use_timeseries(&contents)?; let formatted = prettyplease::unparse(&syn::parse_file(&format!("{code}"))?); println!("{formatted}"); diff --git a/oximeter/impl/src/schema/codegen.rs b/oximeter/schema/src/codegen.rs similarity index 73% rename from oximeter/impl/src/schema/codegen.rs rename to oximeter/schema/src/codegen.rs index 4778cf49704..1e6e352c155 100644 --- a/oximeter/impl/src/schema/codegen.rs +++ b/oximeter/schema/src/codegen.rs @@ -6,18 +6,18 @@ //! Generate Rust types and code from oximeter schema definitions. -use crate::schema::ir::find_schema_version; -use crate::schema::ir::load_schema; -use crate::schema::AuthzScope; -use crate::schema::FieldSource; -use crate::schema::Units; -use crate::DatumType; -use crate::FieldSchema; -use crate::FieldType; -use crate::MetricsError; -use crate::TimeseriesSchema; +use crate::ir::find_schema_version; +use crate::ir::load_schema; use chrono::prelude::DateTime; use chrono::prelude::Utc; +use oximeter_types::AuthzScope; +use oximeter_types::DatumType; +use oximeter_types::FieldSchema; +use oximeter_types::FieldSource; +use oximeter_types::FieldType; +use oximeter_types::MetricsError; +use oximeter_types::TimeseriesSchema; +use oximeter_types::Units; use proc_macro2::TokenStream; use quote::quote; @@ -34,7 +34,7 @@ pub fn use_timeseries(contents: &str) -> Result { let latest = find_schema_version(schema.iter().cloned(), None); let mod_name = quote::format_ident!("{}", latest[0].target_name()); let types = emit_schema_types(latest); - let func = emit_schema_function(schema.into_iter()); + let func = emit_schema_function(schema.iter()); Ok(quote! { pub mod #mod_name { #types @@ -43,9 +43,10 @@ pub fn use_timeseries(contents: &str) -> Result { }) } -fn emit_schema_function( - list: impl Iterator, +fn emit_schema_function<'a>( + list: impl Iterator, ) -> TokenStream { + let list = list.map(quote_timeseries_schema); quote! { pub fn timeseries_schema() -> Vec<::oximeter::schema::TimeseriesSchema> { vec![ @@ -310,66 +311,63 @@ fn emit_one(source: FieldSource, schema: &TimeseriesSchema) -> TokenStream { // This is used so that we can emit a function that will return the same data as // we parse from the TOML file with the timeseries definition, as a way to // export the definitions without needing that actual file at runtime. -impl quote::ToTokens for DatumType { - fn to_tokens(&self, tokens: &mut TokenStream) { - let toks = match self { - DatumType::Bool => quote! { ::oximeter::DatumType::Bool }, - DatumType::I8 => quote! { ::oximeter::DatumType::I8 }, - DatumType::U8 => quote! { ::oximeter::DatumType::U8 }, - DatumType::I16 => quote! { ::oximeter::DatumType::I16 }, - DatumType::U16 => quote! { ::oximeter::DatumType::U16 }, - DatumType::I32 => quote! { ::oximeter::DatumType::I32 }, - DatumType::U32 => quote! { ::oximeter::DatumType::U32 }, - DatumType::I64 => quote! { ::oximeter::DatumType::I64 }, - DatumType::U64 => quote! { ::oximeter::DatumType::U64 }, - DatumType::F32 => quote! { ::oximeter::DatumType::F32 }, - DatumType::F64 => quote! { ::oximeter::DatumType::F64 }, - DatumType::String => quote! { ::oximeter::DatumType::String }, - DatumType::Bytes => quote! { ::oximeter::DatumType::Bytes }, - DatumType::CumulativeI64 => { - quote! { ::oximeter::DatumType::CumulativeI64 } - } - DatumType::CumulativeU64 => { - quote! { ::oximeter::DatumType::CumulativeU64 } - } - DatumType::CumulativeF32 => { - quote! { ::oximeter::DatumType::CumulativeF32 } - } - DatumType::CumulativeF64 => { - quote! { ::oximeter::DatumType::CumulativeF64 } - } - DatumType::HistogramI8 => { - quote! { ::oximeter::DatumType::HistogramI8 } - } - DatumType::HistogramU8 => { - quote! { ::oximeter::DatumType::HistogramU8 } - } - DatumType::HistogramI16 => { - quote! { ::oximeter::DatumType::HistogramI16 } - } - DatumType::HistogramU16 => { - quote! { ::oximeter::DatumType::HistogramU16 } - } - DatumType::HistogramI32 => { - quote! { ::oximeter::DatumType::HistogramI32 } - } - DatumType::HistogramU32 => { - quote! { ::oximeter::DatumType::HistogramU32 } - } - DatumType::HistogramI64 => { - quote! { ::oximeter::DatumType::HistogramI64 } - } - DatumType::HistogramU64 => { - quote! { ::oximeter::DatumType::HistogramU64 } - } - DatumType::HistogramF32 => { - quote! { ::oximeter::DatumType::HistogramF32 } - } - DatumType::HistogramF64 => { - quote! { ::oximeter::DatumType::HistogramF64 } - } - }; - toks.to_tokens(tokens); +fn quote_datum_type(datum_type: DatumType) -> TokenStream { + match datum_type { + DatumType::Bool => quote! { ::oximeter::DatumType::Bool }, + DatumType::I8 => quote! { ::oximeter::DatumType::I8 }, + DatumType::U8 => quote! { ::oximeter::DatumType::U8 }, + DatumType::I16 => quote! { ::oximeter::DatumType::I16 }, + DatumType::U16 => quote! { ::oximeter::DatumType::U16 }, + DatumType::I32 => quote! { ::oximeter::DatumType::I32 }, + DatumType::U32 => quote! { ::oximeter::DatumType::U32 }, + DatumType::I64 => quote! { ::oximeter::DatumType::I64 }, + DatumType::U64 => quote! { ::oximeter::DatumType::U64 }, + DatumType::F32 => quote! { ::oximeter::DatumType::F32 }, + DatumType::F64 => quote! { ::oximeter::DatumType::F64 }, + DatumType::String => quote! { ::oximeter::DatumType::String }, + DatumType::Bytes => quote! { ::oximeter::DatumType::Bytes }, + DatumType::CumulativeI64 => { + quote! { ::oximeter::DatumType::CumulativeI64 } + } + DatumType::CumulativeU64 => { + quote! { ::oximeter::DatumType::CumulativeU64 } + } + DatumType::CumulativeF32 => { + quote! { ::oximeter::DatumType::CumulativeF32 } + } + DatumType::CumulativeF64 => { + quote! { ::oximeter::DatumType::CumulativeF64 } + } + DatumType::HistogramI8 => { + quote! { ::oximeter::DatumType::HistogramI8 } + } + DatumType::HistogramU8 => { + quote! { ::oximeter::DatumType::HistogramU8 } + } + DatumType::HistogramI16 => { + quote! { ::oximeter::DatumType::HistogramI16 } + } + DatumType::HistogramU16 => { + quote! { ::oximeter::DatumType::HistogramU16 } + } + DatumType::HistogramI32 => { + quote! { ::oximeter::DatumType::HistogramI32 } + } + DatumType::HistogramU32 => { + quote! { ::oximeter::DatumType::HistogramU32 } + } + DatumType::HistogramI64 => { + quote! { ::oximeter::DatumType::HistogramI64 } + } + DatumType::HistogramU64 => { + quote! { ::oximeter::DatumType::HistogramU64 } + } + DatumType::HistogramF32 => { + quote! { ::oximeter::DatumType::HistogramF32 } + } + DatumType::HistogramF64 => { + quote! { ::oximeter::DatumType::HistogramF64 } + } } } @@ -452,55 +450,46 @@ fn emit_rust_type_for_field(field_type: FieldType) -> TokenStream { } } -impl quote::ToTokens for FieldSource { - fn to_tokens(&self, tokens: &mut TokenStream) { - let toks = match self { - FieldSource::Target => { - quote! { ::oximeter::schema::FieldSource::Target } - } - FieldSource::Metric => { - quote! { ::oximeter::schema::FieldSource::Metric } - } - }; - toks.to_tokens(tokens); +fn quote_field_source(source: FieldSource) -> TokenStream { + match source { + FieldSource::Target => { + quote! { ::oximeter::schema::FieldSource::Target } + } + FieldSource::Metric => { + quote! { ::oximeter::schema::FieldSource::Metric } + } } } -impl quote::ToTokens for FieldType { - fn to_tokens(&self, tokens: &mut TokenStream) { - let toks = match self { - FieldType::String => quote! { ::oximeter::FieldType::String }, - FieldType::I8 => quote! { ::oximeter::FieldType::I8 }, - FieldType::U8 => quote! { ::oximeter::FieldType::U8 }, - FieldType::I16 => quote! { ::oximeter::FieldType::I16 }, - FieldType::U16 => quote! { ::oximeter::FieldType::U16 }, - FieldType::I32 => quote! { ::oximeter::FieldType::I32 }, - FieldType::U32 => quote! { ::oximeter::FieldType::U32 }, - FieldType::I64 => quote! { ::oximeter::FieldType::I64 }, - FieldType::U64 => quote! { ::oximeter::FieldType::U64 }, - FieldType::IpAddr => quote! { ::oximeter::FieldType::IpAddr }, - FieldType::Uuid => quote! { ::oximeter::FieldType::Uuid }, - FieldType::Bool => quote! { ::oximeter::FieldType::Bool }, - }; - toks.to_tokens(tokens); +fn quote_field_type(field_type: FieldType) -> TokenStream { + match field_type { + FieldType::String => quote! { ::oximeter::FieldType::String }, + FieldType::I8 => quote! { ::oximeter::FieldType::I8 }, + FieldType::U8 => quote! { ::oximeter::FieldType::U8 }, + FieldType::I16 => quote! { ::oximeter::FieldType::I16 }, + FieldType::U16 => quote! { ::oximeter::FieldType::U16 }, + FieldType::I32 => quote! { ::oximeter::FieldType::I32 }, + FieldType::U32 => quote! { ::oximeter::FieldType::U32 }, + FieldType::I64 => quote! { ::oximeter::FieldType::I64 }, + FieldType::U64 => quote! { ::oximeter::FieldType::U64 }, + FieldType::IpAddr => quote! { ::oximeter::FieldType::IpAddr }, + FieldType::Uuid => quote! { ::oximeter::FieldType::Uuid }, + FieldType::Bool => quote! { ::oximeter::FieldType::Bool }, } } -impl quote::ToTokens for AuthzScope { - fn to_tokens(&self, tokens: &mut TokenStream) { - let toks = match self { - AuthzScope::Fleet => { - quote! { ::oximeter::schema::AuthzScope::Fleet } - } - AuthzScope::Silo => quote! { ::oximeter::schema::AuthzScope::Silo }, - AuthzScope::Project => { - quote! { ::oximeter::schema::AuthzScope::Project } - } - AuthzScope::ViewableToAll => { - quote! { ::oximeter::schema::AuthzScope::ViewableToAll } - } - }; - toks.to_tokens(tokens); +fn quote_authz_scope(authz_scope: AuthzScope) -> TokenStream { + match authz_scope { + AuthzScope::Fleet => { + quote! { ::oximeter::schema::AuthzScope::Fleet } + } + AuthzScope::Silo => quote! { ::oximeter::schema::AuthzScope::Silo }, + AuthzScope::Project => { + quote! { ::oximeter::schema::AuthzScope::Project } + } + AuthzScope::ViewableToAll => { + quote! { ::oximeter::schema::AuthzScope::ViewableToAll } + } } } @@ -512,85 +501,80 @@ fn quote_creation_time(created: DateTime) -> TokenStream { } } -impl quote::ToTokens for Units { - fn to_tokens(&self, tokens: &mut TokenStream) { - let toks = match self { - Units::None => quote! { ::oximeter::schema::Units::None }, - Units::Count => quote! { ::oximeter::schema::Units::Count }, - Units::Bytes => quote! { ::oximeter::schema::Units::Bytes }, - Units::Seconds => quote! { ::oximeter::schema::Units::Seconds }, - Units::Nanoseconds => { - quote! { ::oximeter::schema::Units::Nanoseconds } - } - Units::Amps => quote! { ::oximeter::schema::Units::Amps }, - Units::Volts => quote! { ::oximeter::schema::Units::Volts }, - Units::DegreesCelcius => { - quote! { ::oximeter::schema::Units::DegreesCelcius } - } - Units::Rpm => quote! { ::oximeter::schema::Units::Rpm }, - }; - toks.to_tokens(tokens); +fn quote_units(units: Units) -> TokenStream { + match units { + Units::None => quote! { ::oximeter::schema::Units::None }, + Units::Count => quote! { ::oximeter::schema::Units::Count }, + Units::Bytes => quote! { ::oximeter::schema::Units::Bytes }, + Units::Seconds => quote! { ::oximeter::schema::Units::Seconds }, + Units::Nanoseconds => { + quote! { ::oximeter::schema::Units::Nanoseconds } + } + Units::Amps => quote! { ::oximeter::schema::Units::Amps }, + Units::Volts => quote! { ::oximeter::schema::Units::Volts }, + Units::Watts => quote! { ::oximeter::schema::Units::Watts }, + Units::DegreesCelsius => { + quote! { ::oximeter::schema::Units::DegreesCelsius } + } + Units::Rpm => quote! { ::oximeter::schema::Units::Rpm }, } } -impl quote::ToTokens for FieldSchema { - fn to_tokens(&self, tokens: &mut TokenStream) { - let name = self.name.as_str(); - let field_type = self.field_type; - let source = self.source; - let description = self.description.as_str(); - let toks = quote! { - ::oximeter::FieldSchema { - name: String::from(#name), - field_type: #field_type, - source: #source, - description: String::from(#description), - } - }; - toks.to_tokens(tokens); +fn quote_field_schema(field_schema: &FieldSchema) -> TokenStream { + let name = field_schema.name.as_str(); + let field_type = quote_field_type(field_schema.field_type); + let source = quote_field_source(field_schema.source); + let description = field_schema.description.as_str(); + quote! { + ::oximeter::FieldSchema { + name: String::from(#name), + field_type: #field_type, + source: #source, + description: String::from(#description), + } } } -impl quote::ToTokens for TimeseriesSchema { - fn to_tokens(&self, tokens: &mut TokenStream) { - let field_schema = &self.field_schema; - let timeseries_name = self.timeseries_name.to_string(); - let target_description = self.description.target.as_str(); - let metric_description = self.description.metric.as_str(); - let authz_scope = self.authz_scope; - let units = self.units; - let datum_type = self.datum_type; - let ver = self.version.get(); - let version = quote! { ::core::num::NonZeroU8::new(#ver).unwrap() }; - let created = quote_creation_time(self.created); - let toks = quote! { - ::oximeter::schema::TimeseriesSchema { - timeseries_name: - <::oximeter::TimeseriesName as ::std::convert::TryFrom<&str>>::try_from( - #timeseries_name - ).unwrap(), - description: ::oximeter::schema::TimeseriesDescription { - target: String::from(#target_description), - metric: String::from(#metric_description), - }, - authz_scope: #authz_scope, - units: #units, - field_schema: ::std::collections::BTreeSet::from([ - #(#field_schema),* - ]), - datum_type: #datum_type, - version: #version, - created: #created, - } - }; - toks.to_tokens(tokens); +fn quote_timeseries_schema( + timeseries_schema: &TimeseriesSchema, +) -> TokenStream { + let field_schema = + timeseries_schema.field_schema.iter().map(quote_field_schema); + let timeseries_name = timeseries_schema.timeseries_name.to_string(); + let target_description = timeseries_schema.description.target.as_str(); + let metric_description = timeseries_schema.description.metric.as_str(); + let authz_scope = quote_authz_scope(timeseries_schema.authz_scope); + let units = quote_units(timeseries_schema.units); + let datum_type = quote_datum_type(timeseries_schema.datum_type); + let ver = timeseries_schema.version.get(); + let version = quote! { ::core::num::NonZeroU8::new(#ver).unwrap() }; + let created = quote_creation_time(timeseries_schema.created); + quote! { + ::oximeter::schema::TimeseriesSchema { + timeseries_name: + <::oximeter::TimeseriesName as ::std::convert::TryFrom<&str>>::try_from( + #timeseries_name + ).unwrap(), + description: ::oximeter::schema::TimeseriesDescription { + target: String::from(#target_description), + metric: String::from(#metric_description), + }, + authz_scope: #authz_scope, + units: #units, + field_schema: ::std::collections::BTreeSet::from([ + #(#field_schema),* + ]), + datum_type: #datum_type, + version: #version, + created: #created, + } } } #[cfg(test)] mod tests { use super::*; - use crate::schema::TimeseriesDescription; + use oximeter_types::TimeseriesDescription; use std::{collections::BTreeSet, num::NonZeroU8}; #[test] diff --git a/oximeter/impl/src/schema/ir.rs b/oximeter/schema/src/ir.rs similarity index 99% rename from oximeter/impl/src/schema/ir.rs rename to oximeter/schema/src/ir.rs index f7a209294f2..370236000ac 100644 --- a/oximeter/impl/src/schema/ir.rs +++ b/oximeter/schema/src/ir.rs @@ -11,17 +11,17 @@ //! inspected or used to generate code that contains the equivalent Rust types //! and trait implementations. -use crate::schema::AuthzScope; -use crate::schema::DatumType; -use crate::schema::FieldSource; -use crate::schema::FieldType; -use crate::schema::TimeseriesDescription; -use crate::schema::Units; -use crate::FieldSchema; -use crate::MetricsError; -use crate::TimeseriesName; -use crate::TimeseriesSchema; use chrono::Utc; +use oximeter_types::AuthzScope; +use oximeter_types::DatumType; +use oximeter_types::FieldSchema; +use oximeter_types::FieldSource; +use oximeter_types::FieldType; +use oximeter_types::MetricsError; +use oximeter_types::TimeseriesDescription; +use oximeter_types::TimeseriesName; +use oximeter_types::TimeseriesSchema; +use oximeter_types::Units; use serde::Deserialize; use std::collections::btree_map::Entry; use std::collections::BTreeMap; diff --git a/oximeter/schema/src/lib.rs b/oximeter/schema/src/lib.rs new file mode 100644 index 00000000000..b1ce73a940b --- /dev/null +++ b/oximeter/schema/src/lib.rs @@ -0,0 +1,12 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2024 Oxide Computer Company + +//! Tools for working with schemas for fields and timeseries. +//! +//! The actual schema type definitions are in [`oximeter_types::schema`]. + +pub mod codegen; +pub mod ir; diff --git a/oximeter/test-utils/Cargo.toml b/oximeter/test-utils/Cargo.toml new file mode 100644 index 00000000000..f463e74aca5 --- /dev/null +++ b/oximeter/test-utils/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "oximeter-test-utils" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +chrono.workspace = true +omicron-workspace-hack.workspace = true +oximeter-macro-impl.workspace = true +oximeter-types.workspace = true +uuid.workspace = true diff --git a/oximeter/test-utils/src/lib.rs b/oximeter/test-utils/src/lib.rs new file mode 100644 index 00000000000..04c49add65c --- /dev/null +++ b/oximeter/test-utils/src/lib.rs @@ -0,0 +1,295 @@ +// Copyright 2024 Oxide Computer Company + +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Utilities for testing the oximeter crate. + +// Export the current crate as `oximeter`. The macros defined in `oximeter-macro-impl` generate +// code referring to symbols like `oximeter::traits::Target`. In consumers of this crate, that's +// fine, but internally there _is_ no crate named `oximeter`, it's just `self` or `crate`. +// +// See https://github.com/rust-lang/rust/pull/55275 for the PR introducing this fix, which links to +// lots of related issues and discussion. +extern crate self as oximeter; + +use oximeter_macro_impl::{Metric, Target}; +use oximeter_types::histogram; +use oximeter_types::histogram::{Histogram, Record}; +use oximeter_types::traits; +use oximeter_types::types::{ + Cumulative, Datum, DatumType, FieldType, FieldValue, Measurement, Sample, +}; +use oximeter_types::{Metric, Target}; +use uuid::Uuid; + +#[derive(Target)] +pub struct TestTarget { + pub name1: String, + pub name2: String, + pub num: i64, +} + +impl Default for TestTarget { + fn default() -> Self { + TestTarget { + name1: "first_name".into(), + name2: "second_name".into(), + num: 0, + } + } +} + +#[derive(Metric)] +pub struct TestMetric { + pub id: Uuid, + pub good: bool, + pub datum: i64, +} + +#[derive(Metric)] +pub struct TestCumulativeMetric { + pub id: Uuid, + pub good: bool, + pub datum: Cumulative, +} + +#[derive(Metric)] +pub struct TestHistogram { + pub id: Uuid, + pub good: bool, + pub datum: Histogram, +} + +const ID: Uuid = uuid::uuid!("e00ced4d-39d1-446a-ae85-a67f05c9750b"); + +pub fn make_sample() -> Sample { + let target = TestTarget::default(); + let metric = TestMetric { id: ID, good: true, datum: 1 }; + Sample::new(&target, &metric).unwrap() +} + +pub fn make_missing_sample() -> Sample { + let target = TestTarget::default(); + let metric = TestMetric { id: ID, good: true, datum: 1 }; + Sample::new_missing(&target, &metric).unwrap() +} + +pub fn make_hist_sample() -> Sample { + let target = TestTarget::default(); + let mut hist = histogram::Histogram::new(&[0.0, 5.0, 10.0]).unwrap(); + hist.sample(1.0).unwrap(); + hist.sample(2.0).unwrap(); + hist.sample(6.0).unwrap(); + let metric = TestHistogram { id: ID, good: true, datum: hist }; + Sample::new(&target, &metric).unwrap() +} + +/// A target identifying a single virtual machine instance +#[derive(Debug, Clone, Copy, oximeter::Target)] +pub struct VirtualMachine { + pub project_id: Uuid, + pub instance_id: Uuid, +} + +/// A metric recording the total time a vCPU is busy, by its ID +#[derive(Debug, Clone, Copy, oximeter::Metric)] +pub struct CpuBusy { + cpu_id: i64, + datum: Cumulative, +} + +pub fn generate_test_samples( + n_projects: usize, + n_instances: usize, + n_cpus: usize, + n_samples: usize, +) -> Vec { + let n_timeseries = n_projects * n_instances * n_cpus; + let mut samples = Vec::with_capacity(n_samples * n_timeseries); + for _ in 0..n_projects { + let project_id = Uuid::new_v4(); + for _ in 0..n_instances { + let vm = VirtualMachine { project_id, instance_id: Uuid::new_v4() }; + for cpu in 0..n_cpus { + for sample in 0..n_samples { + let cpu_busy = CpuBusy { + cpu_id: cpu as _, + datum: Cumulative::new(sample as f64), + }; + let sample = Sample::new(&vm, &cpu_busy).unwrap(); + samples.push(sample); + } + } + } + } + samples +} + +#[cfg(test)] +mod tests { + use chrono::Utc; + use oximeter_types::{ + schema::{ + default_schema_version, AuthzScope, FieldSchema, FieldSource, + TimeseriesSchema, Units, + }, + TimeseriesName, + }; + + use super::*; + + #[test] + fn test_gen_test_samples() { + let (n_projects, n_instances, n_cpus, n_samples) = (2, 2, 2, 2); + let samples = + generate_test_samples(n_projects, n_instances, n_cpus, n_samples); + assert_eq!( + samples.len(), + n_projects * n_instances * n_cpus * n_samples + ); + } + + #[test] + fn test_sample_struct() { + let t = TestTarget::default(); + let m = TestMetric { id: Uuid::new_v4(), good: true, datum: 1i64 }; + let sample = Sample::new(&t, &m).unwrap(); + assert_eq!( + sample.timeseries_name, + format!("{}:{}", t.name(), m.name()) + ); + assert!(sample.measurement.start_time().is_none()); + assert_eq!(sample.measurement.datum(), &Datum::from(1i64)); + + let m = TestCumulativeMetric { + id: Uuid::new_v4(), + good: true, + datum: 1i64.into(), + }; + let sample = Sample::new(&t, &m).unwrap(); + assert!(sample.measurement.start_time().is_some()); + } + + #[derive(Target)] + struct MyTarget { + id: Uuid, + name: String, + } + + const ID: Uuid = uuid::uuid!("ca565ef4-65dc-4ab0-8622-7be43ed72105"); + + impl Default for MyTarget { + fn default() -> Self { + Self { id: ID, name: String::from("name") } + } + } + + #[derive(Metric)] + struct MyMetric { + happy: bool, + datum: u64, + } + + impl Default for MyMetric { + fn default() -> Self { + Self { happy: true, datum: 0 } + } + } + + #[test] + fn test_timeseries_schema_from_parts() { + let target = MyTarget::default(); + let metric = MyMetric::default(); + let schema = TimeseriesSchema::new(&target, &metric).unwrap(); + + assert_eq!(schema.timeseries_name, "my_target:my_metric"); + let f = schema.schema_for_field("id").unwrap(); + assert_eq!(f.name, "id"); + assert_eq!(f.field_type, FieldType::Uuid); + assert_eq!(f.source, FieldSource::Target); + + let f = schema.schema_for_field("name").unwrap(); + assert_eq!(f.name, "name"); + assert_eq!(f.field_type, FieldType::String); + assert_eq!(f.source, FieldSource::Target); + + let f = schema.schema_for_field("happy").unwrap(); + assert_eq!(f.name, "happy"); + assert_eq!(f.field_type, FieldType::Bool); + assert_eq!(f.source, FieldSource::Metric); + assert_eq!(schema.datum_type, DatumType::U64); + } + + #[test] + fn test_timeseries_schema_from_sample() { + let target = MyTarget::default(); + let metric = MyMetric::default(); + let sample = Sample::new(&target, &metric).unwrap(); + let schema = TimeseriesSchema::new(&target, &metric).unwrap(); + let schema_from_sample = TimeseriesSchema::from(&sample); + assert_eq!(schema, schema_from_sample); + } + + // Test that we correctly order field across a target and metric. + // + // In an earlier commit, we switched from storing fields in an unordered Vec + // to using a BTree{Map,Set} to ensure ordering by name. However, the + // `TimeseriesSchema` type stored all its fields by chaining the sorted + // fields from the target and metric, without then sorting _across_ them. + // + // This was exacerbated by the error reporting, where we did in fact sort + // all fields across the target and metric, making it difficult to tell how + // the derived schema was different, if at all. + // + // This test generates a sample with a schema where the target and metric + // fields are sorted within them, but not across them. We check that the + // derived schema are actually equal, which means we've imposed that + // ordering when deriving the schema. + #[test] + fn test_schema_field_ordering_across_target_metric() { + let target_field = FieldSchema { + name: String::from("later"), + field_type: FieldType::U64, + source: FieldSource::Target, + description: String::new(), + }; + let metric_field = FieldSchema { + name: String::from("earlier"), + field_type: FieldType::U64, + source: FieldSource::Metric, + description: String::new(), + }; + let timeseries_name: TimeseriesName = "foo:bar".parse().unwrap(); + let datum_type = DatumType::U64; + let field_schema = + [target_field.clone(), metric_field.clone()].into_iter().collect(); + let expected_schema = TimeseriesSchema { + timeseries_name, + description: Default::default(), + field_schema, + datum_type, + version: default_schema_version(), + authz_scope: AuthzScope::Fleet, + units: Units::Count, + created: Utc::now(), + }; + + #[derive(oximeter::Target)] + struct Foo { + later: u64, + } + #[derive(oximeter::Metric)] + struct Bar { + earlier: u64, + datum: u64, + } + + let target = Foo { later: 1 }; + let metric = Bar { earlier: 2, datum: 10 }; + let sample = Sample::new(&target, &metric).unwrap(); + let derived_schema = TimeseriesSchema::from(&sample); + assert_eq!(derived_schema, expected_schema); + } +} diff --git a/oximeter/timeseries-macro/Cargo.toml b/oximeter/timeseries-macro/Cargo.toml index db591aed060..2fb8b8f3129 100644 --- a/oximeter/timeseries-macro/Cargo.toml +++ b/oximeter/timeseries-macro/Cargo.toml @@ -8,7 +8,8 @@ proc-macro = true [dependencies] omicron-workspace-hack.workspace = true -oximeter-impl.workspace = true +oximeter-schema.workspace = true +oximeter-types.workspace = true proc-macro2.workspace = true quote.workspace = true syn.workspace = true diff --git a/oximeter/timeseries-macro/src/lib.rs b/oximeter/timeseries-macro/src/lib.rs index 317a8533a4a..12ec2cc417f 100644 --- a/oximeter/timeseries-macro/src/lib.rs +++ b/oximeter/timeseries-macro/src/lib.rs @@ -8,7 +8,7 @@ extern crate proc_macro; -use oximeter_impl::schema::SCHEMA_DIRECTORY; +use oximeter_types::schema::SCHEMA_DIRECTORY; /// Generate code to use the timeseries from one target. /// @@ -45,7 +45,7 @@ pub fn use_timeseries( .into(); } }; - match oximeter_impl::schema::codegen::use_timeseries(&contents) { + match oximeter_schema::codegen::use_timeseries(&contents) { Ok(toks) => { let path_ = path.display().to_string(); return quote::quote! { @@ -59,9 +59,8 @@ pub fn use_timeseries( Err(e) => { let msg = format!( "Failed to generate timeseries types \ - from '{}': {:?}", + from '{}': {e}", path.display(), - e, ); return syn::Error::new(token.span(), msg) .into_compile_error() diff --git a/oximeter/impl/Cargo.toml b/oximeter/types/Cargo.toml similarity index 78% rename from oximeter/impl/Cargo.toml rename to oximeter/types/Cargo.toml index 91277d9d47c..6d6bbc07e6b 100644 --- a/oximeter/impl/Cargo.toml +++ b/oximeter/types/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "oximeter-impl" +name = "oximeter-types" version = "0.1.0" edition = "2021" license = "MPL-2.0" @@ -11,22 +11,13 @@ workspace = true bytes = { workspace = true, features = [ "serde" ] } chrono.workspace = true float-ord.workspace = true -heck.workspace = true num.workspace = true omicron-common.workspace = true omicron-workspace-hack.workspace = true -oximeter-macro-impl.workspace = true -prettyplease.workspace = true -proc-macro2.workspace = true -quote.workspace = true regex.workspace = true schemars = { workspace = true, features = [ "uuid1", "bytes", "chrono" ] } serde.workspace = true -serde_json.workspace = true -slog-error-chain.workspace = true strum.workspace = true -syn.workspace = true -toml.workspace = true thiserror.workspace = true uuid.workspace = true @@ -34,6 +25,7 @@ uuid.workspace = true approx.workspace = true # For benchmark criterion.workspace = true +oximeter-macro-impl.workspace = true rand = { workspace = true, features = ["std_rng"] } rand_distr.workspace = true rstest.workspace = true diff --git a/oximeter/impl/benches/quantile.rs b/oximeter/types/benches/quantile.rs similarity index 97% rename from oximeter/impl/benches/quantile.rs rename to oximeter/types/benches/quantile.rs index 4540ba8f6a7..b88cb211e6a 100644 --- a/oximeter/impl/benches/quantile.rs +++ b/oximeter/types/benches/quantile.rs @@ -8,7 +8,7 @@ // Copyright 2024 Oxide Computer Company use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; -use oximeter_impl::Quantile; +use oximeter_types::Quantile; use rand_distr::{Distribution, Normal}; /// Emulates baseline code in a Python implementation of the P² diff --git a/oximeter/impl/src/histogram.rs b/oximeter/types/src/histogram.rs similarity index 95% rename from oximeter/impl/src/histogram.rs rename to oximeter/types/src/histogram.rs index 40df0a1b412..2507f2f5c60 100644 --- a/oximeter/impl/src/histogram.rs +++ b/oximeter/types/src/histogram.rs @@ -523,9 +523,9 @@ where /// Example /// ------- /// ```rust - /// # // Rename the impl crate so the doctests can refer to the public + /// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. - /// # use oximeter_impl as oximeter; + /// # use oximeter_types as oximeter; /// use oximeter::histogram::Histogram; /// /// let hist = Histogram::with_bins(&[(0..10).into(), (10..100).into()]).unwrap(); @@ -922,9 +922,9 @@ where /// ------- /// /// ```rust - /// # // Rename the impl crate so the doctests can refer to the public + /// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. - /// # use oximeter_impl as oximeter; + /// # use oximeter_types as oximeter; /// use oximeter::histogram::{Histogram, BinRange}; /// use std::ops::{RangeBounds, Bound}; /// @@ -1029,8 +1029,13 @@ where return Err(QuantizationError::InvalidSteps); } - // The highest power must be representable in the target type. - if self.checked_pow(hi.into()).is_none() { + // The highest power must be representable in the target type. Note that + // we have to convert to that target type _before_ doing this check. + let base = >::from(*self); + let Some(highest) = base.checked_pow(hi.into()) else { + return Err(QuantizationError::Overflow); + }; + if ::from(highest).is_none() { return Err(QuantizationError::Overflow); } @@ -1039,7 +1044,6 @@ where // // Note that we unwrap in a few places below, where we're sure the // narrowing conversion cannot fail, such as to a u32. - let base = >::from(*self); let lo = >::from(lo); let hi = >::from(hi); let count = ::from(count.get()) @@ -1057,7 +1061,6 @@ where let lo = base.pow(lo as _); let hi = base.pow(hi as _); let distance = hi - lo; - dbg!(distance, count); distance.is_multiple_of(&count) }) } @@ -1188,6 +1191,47 @@ where } } +pub trait Bits: Integer { + const BITS: u32; + fn next_power(self) -> Option; +} + +macro_rules! impl_bits { + ($type_:ty) => { + impl Bits for $type_ { + const BITS: u32 = Self::BITS; + + fn next_power(self) -> Option { + self.checked_mul(2) + } + } + }; +} + +impl_bits!(u8); +impl_bits!(u16); +impl_bits!(u32); +impl_bits!(u64); + +impl Histogram +where + T: Bits + HistogramSupport, +{ + /// Create a histogram with logarithmically spaced bins at each power of 2. + /// + /// This is only available for unsigned integer support types. + pub fn power_of_two() -> Self { + let mut bins = Vec::with_capacity(T::BITS as _); + let mut x = T::one(); + bins.push(x); + while let Some(next) = x.next_power() { + bins.push(next); + x = next; + } + Self::new(&bins).expect("Bits is statically known") + } +} + // Helper to ensure all values are comparable, i.e., not NaN. fn ensure_finite(value: T) -> Result<(), HistogramError> where @@ -1767,4 +1811,46 @@ mod tests { HistogramError::EmptyBins )); } + + #[test] + fn test_log_linear_bins_does_not_overflow_wide_bin_type() { + let start: u16 = 3; + // 10u16 ** 10u16 overflows, but what we should be computing is 10u64 ** + // 10u16, which would not overflow. We need to compute whether it + // overflows in the _support_ type. + let stop = 10; + Histogram::::span_decades(start, stop).expect( + "expected not to overflow, since support type is wide enough", + ); + } + + #[test] + fn test_log_linear_bins_does_overflow_narrow_bin_type() { + // In this case, the start / stop powers _and_ their resulting bins are + // both representable as u16s and also u64s. But we're generating bins + // that are u8s, which _the powers do_ overflow. + let start: u16 = 1; + let stop: u16 = 4; + Histogram::::span_decades(start, stop).expect( + "expected not to overflow a u32, since support type is wide enough", + ); + Histogram::::span_decades(start, stop).expect_err( + "expected to overflow a u8, since support type is not wide enough", + ); + } + + #[test] + fn test_log_bins_u8() { + let (bins, _) = Histogram::::power_of_two().bins_and_counts(); + assert_eq!(bins, [0, 1, 2, 4, 8, 16, 32, 64, 128],); + } + + #[test] + fn test_log_bins_u64() { + let (bins, _) = Histogram::::power_of_two().bins_and_counts(); + assert_eq!(bins[0], 0); + for (i, bin) in bins.iter().skip(1).enumerate() { + assert_eq!(*bin, 1u64 << i); + } + } } diff --git a/oximeter/impl/src/lib.rs b/oximeter/types/src/lib.rs similarity index 92% rename from oximeter/impl/src/lib.rs rename to oximeter/types/src/lib.rs index 5acbeb94220..7a1a480f8d7 100644 --- a/oximeter/impl/src/lib.rs +++ b/oximeter/types/src/lib.rs @@ -4,8 +4,6 @@ // Copyright 2024 Oxide Computer Company -pub use oximeter_macro_impl::*; - // Export the current crate as `oximeter`. The macros defined in `oximeter-macro-impl` generate // code referring to symbols like `oximeter::traits::Target`. In consumers of this crate, that's // fine, but internally there _is_ no crate named `oximeter`, it's just `self` or `crate`. @@ -17,15 +15,18 @@ extern crate self as oximeter; pub mod histogram; pub mod quantile; pub mod schema; -pub mod test_util; pub mod traits; pub mod types; pub use quantile::Quantile; pub use quantile::QuantileError; +pub use schema::AuthzScope; pub use schema::FieldSchema; +pub use schema::FieldSource; +pub use schema::TimeseriesDescription; pub use schema::TimeseriesName; pub use schema::TimeseriesSchema; +pub use schema::Units; pub use traits::Metric; pub use traits::Producer; pub use traits::Target; diff --git a/oximeter/impl/src/quantile.rs b/oximeter/types/src/quantile.rs similarity index 97% rename from oximeter/impl/src/quantile.rs rename to oximeter/types/src/quantile.rs index fafe9c9ecec..40777217e57 100644 --- a/oximeter/impl/src/quantile.rs +++ b/oximeter/types/src/quantile.rs @@ -78,9 +78,9 @@ impl Quantile { /// # Examples /// /// ``` - /// # // Rename the impl crate so the doctests can refer to the public + /// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. - /// # use oximeter_impl as oximeter; + /// # use oximeter_types as oximeter; /// use oximeter::Quantile; /// let q = Quantile::new(0.5).unwrap(); /// @@ -116,9 +116,9 @@ impl Quantile { /// /// # Examples /// ``` - /// # // Rename the impl crate so the doctests can refer to the public + /// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. - /// # use oximeter_impl as oximeter; + /// # use oximeter_types as oximeter; /// use oximeter::Quantile; /// let q = Quantile::from_parts( /// 0.5, @@ -200,9 +200,9 @@ impl Quantile { /// # Examples /// /// ``` - /// # // Rename the impl crate so the doctests can refer to the public + /// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. - /// # use oximeter_impl as oximeter; + /// # use oximeter_types as oximeter; /// use oximeter::Quantile; /// let mut q = Quantile::new(0.5).unwrap(); /// for o in 1..=100 { @@ -243,9 +243,9 @@ impl Quantile { /// # Examples /// /// ``` - /// # // Rename the impl crate so the doctests can refer to the public + /// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. - /// # use oximeter_impl as oximeter; + /// # use oximeter_types as oximeter; /// use oximeter::Quantile; /// let mut q = Quantile::new(0.9).unwrap(); /// q.append(10).unwrap(); diff --git a/oximeter/impl/src/schema/mod.rs b/oximeter/types/src/schema.rs similarity index 75% rename from oximeter/impl/src/schema/mod.rs rename to oximeter/types/src/schema.rs index 250604d7be8..135c77462a7 100644 --- a/oximeter/impl/src/schema/mod.rs +++ b/oximeter/types/src/schema.rs @@ -6,9 +6,6 @@ //! Tools for working with schema for fields and timeseries. -pub mod codegen; -pub mod ir; - use crate::types::DatumType; use crate::types::FieldType; use crate::types::MetricsError; @@ -31,6 +28,8 @@ use std::num::NonZeroU8; pub const SCHEMA_DIRECTORY: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/../oximeter/schema"); +pub type TimeseriesKey = u64; + /// The name and type information for a field of a timeseries schema. #[derive( Clone, @@ -190,7 +189,8 @@ pub enum Units { Nanoseconds, Volts, Amps, - DegreesCelcius, + Watts, + DegreesCelsius, /// Rotations per minute. Rpm, } @@ -402,7 +402,6 @@ pub enum AuthzScope { mod tests { use super::*; use std::convert::TryFrom; - use uuid::Uuid; #[test] fn test_timeseries_name() { @@ -426,127 +425,6 @@ mod tests { assert!(TimeseriesName::try_from("x.a:b").is_err()); } - #[derive(Target)] - struct MyTarget { - id: Uuid, - name: String, - } - - const ID: Uuid = uuid::uuid!("ca565ef4-65dc-4ab0-8622-7be43ed72105"); - - impl Default for MyTarget { - fn default() -> Self { - Self { id: ID, name: String::from("name") } - } - } - - #[derive(Metric)] - struct MyMetric { - happy: bool, - datum: u64, - } - - impl Default for MyMetric { - fn default() -> Self { - Self { happy: true, datum: 0 } - } - } - - #[test] - fn test_timeseries_schema_from_parts() { - let target = MyTarget::default(); - let metric = MyMetric::default(); - let schema = TimeseriesSchema::new(&target, &metric).unwrap(); - - assert_eq!(schema.timeseries_name, "my_target:my_metric"); - let f = schema.schema_for_field("id").unwrap(); - assert_eq!(f.name, "id"); - assert_eq!(f.field_type, FieldType::Uuid); - assert_eq!(f.source, FieldSource::Target); - - let f = schema.schema_for_field("name").unwrap(); - assert_eq!(f.name, "name"); - assert_eq!(f.field_type, FieldType::String); - assert_eq!(f.source, FieldSource::Target); - - let f = schema.schema_for_field("happy").unwrap(); - assert_eq!(f.name, "happy"); - assert_eq!(f.field_type, FieldType::Bool); - assert_eq!(f.source, FieldSource::Metric); - assert_eq!(schema.datum_type, DatumType::U64); - } - - #[test] - fn test_timeseries_schema_from_sample() { - let target = MyTarget::default(); - let metric = MyMetric::default(); - let sample = Sample::new(&target, &metric).unwrap(); - let schema = TimeseriesSchema::new(&target, &metric).unwrap(); - let schema_from_sample = TimeseriesSchema::from(&sample); - assert_eq!(schema, schema_from_sample); - } - - // Test that we correctly order field across a target and metric. - // - // In an earlier commit, we switched from storing fields in an unordered Vec - // to using a BTree{Map,Set} to ensure ordering by name. However, the - // `TimeseriesSchema` type stored all its fields by chaining the sorted - // fields from the target and metric, without then sorting _across_ them. - // - // This was exacerbated by the error reporting, where we did in fact sort - // all fields across the target and metric, making it difficult to tell how - // the derived schema was different, if at all. - // - // This test generates a sample with a schema where the target and metric - // fields are sorted within them, but not across them. We check that the - // derived schema are actually equal, which means we've imposed that - // ordering when deriving the schema. - #[test] - fn test_schema_field_ordering_across_target_metric() { - let target_field = FieldSchema { - name: String::from("later"), - field_type: FieldType::U64, - source: FieldSource::Target, - description: String::new(), - }; - let metric_field = FieldSchema { - name: String::from("earlier"), - field_type: FieldType::U64, - source: FieldSource::Metric, - description: String::new(), - }; - let timeseries_name: TimeseriesName = "foo:bar".parse().unwrap(); - let datum_type = DatumType::U64; - let field_schema = - [target_field.clone(), metric_field.clone()].into_iter().collect(); - let expected_schema = TimeseriesSchema { - timeseries_name, - description: Default::default(), - field_schema, - datum_type, - version: default_schema_version(), - authz_scope: AuthzScope::Fleet, - units: Units::Count, - created: Utc::now(), - }; - - #[derive(oximeter::Target)] - struct Foo { - later: u64, - } - #[derive(oximeter::Metric)] - struct Bar { - earlier: u64, - datum: u64, - } - - let target = Foo { later: 1 }; - let metric = Bar { earlier: 2, datum: 10 }; - let sample = Sample::new(&target, &metric).unwrap(); - let derived_schema = TimeseriesSchema::from(&sample); - assert_eq!(derived_schema, expected_schema); - } - #[test] fn test_field_schema_ordering() { let mut fields = BTreeSet::new(); diff --git a/oximeter/impl/src/traits.rs b/oximeter/types/src/traits.rs similarity index 96% rename from oximeter/impl/src/traits.rs rename to oximeter/types/src/traits.rs index 16baa4f6198..91ecca817de 100644 --- a/oximeter/impl/src/traits.rs +++ b/oximeter/types/src/traits.rs @@ -45,9 +45,9 @@ use std::ops::AddAssign; /// -------- /// /// ```rust -/// # // Rename the impl crate so the doctests can refer to the public +/// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. -/// # extern crate oximeter_impl as oximeter; +/// # extern crate oximeter_types as oximeter; /// # use oximeter_macro_impl::*; /// use oximeter::{traits::Target, types::FieldType}; /// use uuid::Uuid; @@ -75,9 +75,9 @@ use std::ops::AddAssign; /// supported types. /// /// ```compile_fail -/// # // Rename the impl crate so the doctests can refer to the public +/// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. -/// # extern crate oximeter_impl as oximeter; +/// # extern crate oximeter_types as oximeter; /// # use oximeter_macro_impl::*; /// #[derive(oximeter::Target)] /// struct Bad { @@ -160,9 +160,9 @@ pub trait Target { /// Example /// ------- /// ```rust -/// # // Rename the impl crate so the doctests can refer to the public +/// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. -/// # extern crate oximeter_impl as oximeter; +/// # extern crate oximeter_types as oximeter; /// # use oximeter_macro_impl::*; /// use chrono::Utc; /// use oximeter::Metric; @@ -185,9 +185,9 @@ pub trait Target { /// an unsupported type. /// /// ```compile_fail -/// # // Rename the impl crate so the doctests can refer to the public +/// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. -/// # extern crate oximeter_impl as oximeter; +/// # extern crate oximeter_types as oximeter; /// # use oximeter_macro_impl::*; /// #[derive(Metric)] /// pub struct BadType { @@ -364,9 +364,9 @@ pub use crate::histogram::HistogramSupport; /// Example /// ------- /// ```rust -/// # // Rename the impl crate so the doctests can refer to the public +/// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. -/// # extern crate oximeter_impl as oximeter; +/// # extern crate oximeter_types as oximeter; /// # use oximeter_macro_impl::*; /// use oximeter::{Datum, MetricsError, Metric, Producer, Target}; /// use oximeter::types::{Measurement, Sample, Cumulative}; @@ -464,6 +464,8 @@ pub trait Producer: Send + Sync + std::fmt::Debug + 'static { #[cfg(test)] mod tests { + use oximeter_macro_impl::{Metric, Target}; + use crate::types; use crate::{ Datum, DatumType, FieldType, FieldValue, Metric, MetricsError, diff --git a/oximeter/impl/src/types.rs b/oximeter/types/src/types.rs similarity index 97% rename from oximeter/impl/src/types.rs rename to oximeter/types/src/types.rs index 370557f7f77..60260e36490 100644 --- a/oximeter/impl/src/types.rs +++ b/oximeter/types/src/types.rs @@ -850,7 +850,7 @@ pub struct Sample { /// The version of the timeseries this sample belongs to // // TODO-cleanup: This should be removed once schema are tracked in CRDB. - #[serde(default = "::oximeter::schema::default_schema_version")] + #[serde(default = "crate::schema::default_schema_version")] pub timeseries_version: NonZeroU8, // Target name and fields @@ -1104,15 +1104,10 @@ mod tests { use super::Measurement; use super::MetricsError; use super::Sample; - use crate::test_util; - use crate::types; - use crate::Metric; - use crate::Target; use bytes::Bytes; use std::collections::BTreeMap; use std::net::Ipv4Addr; use std::net::Ipv6Addr; - use uuid::Uuid; #[test] fn test_cumulative_i64() { @@ -1176,31 +1171,6 @@ mod tests { assert!(measurement.timestamp() >= measurement.start_time().unwrap()); } - #[test] - fn test_sample_struct() { - let t = test_util::TestTarget::default(); - let m = test_util::TestMetric { - id: Uuid::new_v4(), - good: true, - datum: 1i64, - }; - let sample = types::Sample::new(&t, &m).unwrap(); - assert_eq!( - sample.timeseries_name, - format!("{}:{}", t.name(), m.name()) - ); - assert!(sample.measurement.start_time().is_none()); - assert_eq!(sample.measurement.datum(), &Datum::from(1i64)); - - let m = test_util::TestCumulativeMetric { - id: Uuid::new_v4(), - good: true, - datum: 1i64.into(), - }; - let sample = types::Sample::new(&t, &m).unwrap(); - assert!(sample.measurement.start_time().is_some()); - } - #[rstest::rstest] #[case::as_string("some string", FieldValue::String("some string".into()))] #[case::as_i8("2", FieldValue::I8(2))] diff --git a/oximeter/impl/tests/fail/failures.rs b/oximeter/types/tests/fail/failures.rs similarity index 100% rename from oximeter/impl/tests/fail/failures.rs rename to oximeter/types/tests/fail/failures.rs diff --git a/oximeter/impl/tests/fail/failures.stderr b/oximeter/types/tests/fail/failures.stderr similarity index 100% rename from oximeter/impl/tests/fail/failures.stderr rename to oximeter/types/tests/fail/failures.stderr diff --git a/oximeter/impl/tests/test_compilation.rs b/oximeter/types/tests/test_compilation.rs similarity index 100% rename from oximeter/impl/tests/test_compilation.rs rename to oximeter/types/tests/test_compilation.rs diff --git a/package-manifest.toml b/package-manifest.toml index 2c682570508..cab3c1877eb 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -140,19 +140,24 @@ source.type = "local" source.rust.binary_names = ["oximeter", "clickhouse-schema-updater"] source.rust.release = true source.paths = [ - { from = "smf/oximeter", to = "/var/svc/manifest/site/oximeter" }, { from = "oximeter/db/schema", to = "/opt/oxide/oximeter/schema" }, + { from = "smf/oximeter/{{clickhouse-topology}}/config.toml", to = "/var/svc/manifest/site/oximeter/config.toml" }, + { from = "smf/oximeter/manifest.xml", to = "/var/svc/manifest/site/oximeter/manifest.xml" }, ] output.type = "zone" output.intermediate_only = true [package.clickhouse] +# This service runs a single-node ClickHouse server. service_name = "clickhouse" only_for_targets.image = "standard" source.type = "composite" source.packages = [ "clickhouse_svc.tar.gz", "internal-dns-cli.tar.gz", + # TODO: This package is for solely for testing purposes. + # Remove once replicated clickhouse is up and running. + "omicron-clickhouse-admin.tar.gz", "zone-setup.tar.gz", "zone-network-install.tar.gz" ] @@ -166,19 +171,52 @@ source.paths = [ { from = "out/clickhouse", to = "/opt/oxide/clickhouse" }, { from = "smf/clickhouse/manifest.xml", to = "/var/svc/manifest/site/clickhouse/manifest.xml" }, { from = "smf/clickhouse/method_script.sh", to = "/opt/oxide/lib/svc/manifest/clickhouse.sh" }, - { from = "smf/clickhouse/config_replica.xml", to = "/opt/oxide/clickhouse/config.d/config_replica.xml" }, +] +output.type = "zone" +output.intermediate_only = true +setup_hint = "Run `cargo xtask download clickhouse` to download the necessary binaries" + +[package.clickhouse_server] +# This service runs a server for a replicated ClickHouse cluster. +# It is complimentary to the clickhouse_keeper service. +# One cannot be run without the other. +service_name = "clickhouse_server" +only_for_targets.image = "standard" +source.type = "composite" +source.packages = [ + "clickhouse_server_svc.tar.gz", + "internal-dns-cli.tar.gz", + "omicron-clickhouse-admin.tar.gz", + "zone-setup.tar.gz", + "zone-network-install.tar.gz" +] +output.type = "zone" + +[package.clickhouse_server_svc] +service_name = "clickhouse_server_svc" +only_for_targets.image = "standard" +source.type = "local" +source.paths = [ + { from = "out/clickhouse", to = "/opt/oxide/clickhouse_server" }, + { from = "smf/clickhouse_server/manifest.xml", to = "/var/svc/manifest/site/clickhouse_server/manifest.xml" }, + { from = "smf/clickhouse_server/method_script.sh", to = "/opt/oxide/lib/svc/manifest/clickhouse_server.sh" }, + { from = "smf/clickhouse_server/config_replica.xml", to = "/opt/oxide/clickhouse_server/config.d/config_replica.xml" }, ] output.type = "zone" output.intermediate_only = true setup_hint = "Run `cargo xtask download clickhouse` to download the necessary binaries" [package.clickhouse_keeper] +# This service runs a keeper for a replicated ClickHouse cluster. +# It is complimentary to the clickhouse_server service. +# One cannot be run without the other. service_name = "clickhouse_keeper" only_for_targets.image = "standard" source.type = "composite" source.packages = [ "clickhouse_keeper_svc.tar.gz", "internal-dns-cli.tar.gz", + "omicron-clickhouse-admin.tar.gz", "zone-setup.tar.gz", "zone-network-install.tar.gz" ] @@ -198,6 +236,18 @@ output.type = "zone" output.intermediate_only = true setup_hint = "Run `cargo xtask download clickhouse` to download the necessary binaries" +[package.omicron-clickhouse-admin] +service_name = "clickhouse-admin" +only_for_targets.image = "standard" +source.type = "local" +source.rust.binary_names = ["clickhouse-admin"] +source.rust.release = true +source.paths = [ + { from = "smf/clickhouse-admin", to = "/var/svc/manifest/site/clickhouse-admin" }, +] +output.type = "zone" +output.intermediate_only = true + [package.cockroachdb] service_name = "cockroachdb" only_for_targets.image = "standard" @@ -578,10 +628,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "0c4292fe5b3c8ac27d99b5a4502d595acdbf7441" +source.commit = "c92d6ff85db8992066f49da176cf686acfd8fe0f" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm-gz.sha256.txt -source.sha256 = "b0f08e754f7c834d7ca05093b13a574863f500cff56210591ef4cc7eaf20159b" +source.sha256 = "c33915998894dd36a2d1078f7e13717aa20760924c30640d7647d4791dd5f2ee" output.type = "tarball" [package.mg-ddm] @@ -594,10 +644,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "0c4292fe5b3c8ac27d99b5a4502d595acdbf7441" +source.commit = "c92d6ff85db8992066f49da176cf686acfd8fe0f" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "499962b57404626aff1ecd62d5045ba2ee06070d45f7cb2a8fc284e53eed17d6" +source.sha256 = "be9d657ec22a69468b18f2b4d48e55621538eade8b8d3e367a1d8d5cc686cfbe" output.type = "zone" output.intermediate_only = true @@ -609,10 +659,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "0c4292fe5b3c8ac27d99b5a4502d595acdbf7441" +source.commit = "c92d6ff85db8992066f49da176cf686acfd8fe0f" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mgd.sha256.txt -source.sha256 = "e15db7d262b5b2f08a2e2799668c67d0cb883e84c72736a30d299688115bf055" +source.sha256 = "e000485f7e04ac1cf9b3532b60bcf23598ab980331ba4f1c6788a7e95c1e9ef8" output.type = "zone" output.intermediate_only = true @@ -620,8 +670,8 @@ output.intermediate_only = true service_name = "lldp" source.type = "prebuilt" source.repo = "lldp" -source.commit = "30e5d89fae9190c69258ca77d5d5a1acec064742" -source.sha256 = "f58bfd1b77748544b5b1a99a07e52bab8dc5673b9bd3a745ebbfdd614d492328" +source.commit = "188f0f6d4c066f1515bd707050407cedd790fcf1" +source.sha256 = "132d0760be5208f60b58bcaed98fa6384b09f41dd5febf51970f5cbf46138ecf" output.type = "zone" output.intermediate_only = true @@ -660,8 +710,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "8293f28df659c070b48e13f87a51b836238b406e" -source.sha256 = "7400e4b0942b33af64a9aad1a429b0e2446e126f58a780328cf10eb46c63b7f8" +source.commit = "76c735d472e3badaeca08982e22496fccb1ce210" +source.sha256 = "3ee6cfe770da2855b4eb44c048637d56f8d72de45c8c396186dfe7232d8548fa" output.type = "zone" output.intermediate_only = true @@ -687,8 +737,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "8293f28df659c070b48e13f87a51b836238b406e" -source.sha256 = "68bf16452a3159529fb1bd11f43adfb002020d086e0f64f48bd766bf47843ae9" +source.commit = "76c735d472e3badaeca08982e22496fccb1ce210" +source.sha256 = "0e68ea8fbb609bbe2c643fc8cadc0197bd641006a323149159893bfd0d816805" output.type = "zone" output.intermediate_only = true @@ -707,8 +757,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "8293f28df659c070b48e13f87a51b836238b406e" -source.sha256 = "b7d6a1a20f302ded9c6e4bbba66b9432bec5edda593edfcdbb9429a95201655a" +source.commit = "76c735d472e3badaeca08982e22496fccb1ce210" +source.sha256 = "45484d6d8557a0656984d0e6db879589d841d43ab6a11116cb1da314b928a425" output.type = "zone" output.intermediate_only = true diff --git a/package/src/bin/omicron-package.rs b/package/src/bin/omicron-package.rs index b2b87030154..cd88345d0a0 100644 --- a/package/src/bin/omicron-package.rs +++ b/package/src/bin/omicron-package.rs @@ -265,12 +265,19 @@ async fn do_target( format!("failed to create directory {}", target_dir) })?; match subcommand { - TargetCommand::Create { image, machine, switch, rack_topology } => { + TargetCommand::Create { + image, + machine, + switch, + rack_topology, + clickhouse_topology, + } => { let target = KnownTarget::new( image.clone(), machine.clone(), switch.clone(), rack_topology.clone(), + clickhouse_topology.clone(), )?; let path = get_single_target(&target_dir, name).await?; diff --git a/package/src/lib.rs b/package/src/lib.rs index 2009de9dfea..b37c1774fda 100644 --- a/package/src/lib.rs +++ b/package/src/lib.rs @@ -68,6 +68,21 @@ pub enum TargetCommand { /// fail in a single-sled environment. `single-sled` relaxes this /// requirement. rack_topology: crate::target::RackTopology, + + #[clap( + short, + long, + default_value = Some("single-node"), + required = false + )] + // TODO (https://github.com/oxidecomputer/omicron/issues/4148): Remove + // once single-node functionality is removed. + /// Specify whether clickhouse will be deployed as a replicated cluster + /// or single-node configuration. + /// + /// Replicated cluster configuration is an experimental feature to be + /// used only for testing. + clickhouse_topology: crate::target::ClickhouseTopology, }, /// List all existing targets List, diff --git a/package/src/target.rs b/package/src/target.rs index 589dba7870b..6a6cbd32d8b 100644 --- a/package/src/target.rs +++ b/package/src/target.rs @@ -62,6 +62,18 @@ pub enum RackTopology { SingleSled, } +/// Topology of the ClickHouse installation within the rack. +#[derive(Clone, Debug, strum::EnumString, strum::Display, ValueEnum)] +#[strum(serialize_all = "kebab-case")] +#[clap(rename_all = "kebab-case")] +pub enum ClickhouseTopology { + /// Use configurations suitable for a replicated ClickHouse cluster deployment. + ReplicatedCluster, + + /// Use configurations suitable for a single-node ClickHouse deployment. + SingleNode, +} + /// A strongly-typed variant of [Target]. #[derive(Clone, Debug)] pub struct KnownTarget { @@ -69,6 +81,7 @@ pub struct KnownTarget { machine: Option, switch: Option, rack_topology: RackTopology, + clickhouse_topology: ClickhouseTopology, } impl KnownTarget { @@ -77,6 +90,7 @@ impl KnownTarget { machine: Option, switch: Option, rack_topology: RackTopology, + clickhouse_topology: ClickhouseTopology, ) -> Result { if matches!(image, Image::Trampoline) { if machine.is_some() { @@ -93,7 +107,7 @@ impl KnownTarget { bail!("'switch=asic' is only valid with 'machine=gimlet'"); } - Ok(Self { image, machine, switch, rack_topology }) + Ok(Self { image, machine, switch, rack_topology, clickhouse_topology }) } } @@ -104,6 +118,7 @@ impl Default for KnownTarget { machine: Some(Machine::NonGimlet), switch: Some(Switch::Stub), rack_topology: RackTopology::MultiSled, + clickhouse_topology: ClickhouseTopology::SingleNode, } } } @@ -119,6 +134,10 @@ impl From for Target { map.insert("switch".to_string(), switch.to_string()); } map.insert("rack-topology".to_string(), kt.rack_topology.to_string()); + map.insert( + "clickhouse-topology".to_string(), + kt.clickhouse_topology.to_string(), + ); Target(map) } } @@ -140,6 +159,7 @@ impl std::str::FromStr for KnownTarget { let mut machine = None; let mut switch = None; let mut rack_topology = None; + let mut clickhouse_topology = None; for (k, v) in target.0.into_iter() { match k.as_str() { @@ -155,6 +175,9 @@ impl std::str::FromStr for KnownTarget { "rack-topology" => { rack_topology = Some(v.parse()?); } + "clickhouse-topology" => { + clickhouse_topology = Some(v.parse()?); + } _ => { bail!( "Unknown target key {k}\nValid keys include: [{}]", @@ -173,6 +196,7 @@ impl std::str::FromStr for KnownTarget { machine, switch, rack_topology.unwrap_or(RackTopology::MultiSled), + clickhouse_topology.unwrap_or(ClickhouseTopology::SingleNode), ) } } diff --git a/schema/crdb/README.adoc b/schema/crdb/README.adoc index e017c013165..3567821ea66 100644 --- a/schema/crdb/README.adoc +++ b/schema/crdb/README.adoc @@ -126,19 +126,47 @@ same `NEW_VERSION`:**, then your `OLD_VERSION` has changed and so _your_ new version that came in from "main"). * Update the version in `dbinit.sql` to match the new `NEW_VERSION`. -=== General notes +=== Constraints on Schema Updates -CockroachDB's representation of the schema includes some opaque -internally-generated fields that are order dependent, like the names of -anonymous CHECK constraints. Our schema comparison tools intentionally ignore -these values. As a result, when performing schema changes, the order of new -tables and constraints should generally not be important. +==== Adding a new column without a default value [[add_column_constraint]] + +When adding a new non-nullable column to an existing table, that column must +contain a default to help back-fill existing rows in that table which may +exist. Without this default value, the schema upgrade might fail with +an error like `null value in column "..." violates not-null constraint`. +Unfortunately, it's possible that the schema upgrade might NOT fail with that +error, if no rows are present in the table when the schema is updated. This +results in an inconsistent state, where the schema upgrade might succeed on +some deployments but fail on others. + +If you'd like to add a column without a default value, we recommend +doing the following, if a `DEFAULT` value makes sense for a one-time update: + +1. Adding the column with a `DEFAULT` value. +2. Dropping the `DEFAULT` constraint. + +If a `DEFAULT` value does not make sense, then you need to implement a +multi-step process. + +. Add the column without a `NOT NULL` constraint. +. Migrate existing data to a non-null value. +. Once all data has been migrated to a non-null value, alter the table again to +add the `NOT NULL` constraint. -As convention, however, we recommend keeping the `db_metadata` file at the end -of `dbinit.sql`, so that the database does not contain a version until it is -fully populated. +For the time being, if you can write the data migration in SQL (e.g., using a +SQL `UPDATE`), then you can do this with a single new schema version where the +second step is an `UPDATE`. See schema version 54 (`blueprint-add-external-ip-id`) +for an example of this (though that one did not make the new column `NOT NULL` -- +you'd want to do that in another step). Update the `validate_data_migration()` +test in `nexus/tests/integration_tests/schema.rs` to add a test for this. -=== Scenario-specific gotchas +In the future when schema updates happen while the control plane is online, +this may not be a tenable path because the operation may take a very long time +on large tables. + +If you cannot write the data migration in SQL, you would need to figure out a +different way to backfill the data before you can apply the step that adds the +`NOT NULL` constraint. This is likely a substantial project ==== Renaming columns @@ -151,3 +179,66 @@ functions as a workaround.) An (imperfect) workaround is to use the `#[diesel(column_name = foo)]` attribute in Rust code to preserve the existing name of a column in the database while giving its corresponding struct field a different, more meaningful name. + +Note that this constraint does not apply to renaming tables: the statement +`ALTER TABLE IF EXISTS ... RENAME TO ...` is valid and idempotent. + +=== Fixing broken Schema Updates + +WARNING: This section is somewhat speculative - what "broken" means may differ +significantly from one schema update to the next. Take this as as a recommendation +based on experience, but not as a hard truth that will apply to all broken schema +updates. + +In cases where a schema update cannot complete successfully, additional steps +may be necessary to enable schema updates to proceed (for example, if a schema +update tried <>). In these situations, the goal should be +the following: + +. Fix the schema update such that deployments which have not applied it yet +do not fail. +.. It is important to update the *exact* "upN.sql" file which failed, rather than +re-numbering or otherwise changing the order of schema updates. Internally, Nexus +tracks which individual step of a schema update has been applied, to avoid applying +older schema upgrades which may no longer be relevant. +. Add a follow-up named schema update to ensure that deployments which have +*already* applied it arrive at the same state. This is only necessary if it is +possible for the schema update to apply successfully in any possible +deployment. This schema update should be added like any other "new" schema update, +appended to the list of all updates, rather than re-ordering history. This +schema update will run on systems that deployed both versions of the earlier +schema update. +. Determine whether any of the schema versions after the broken one need to +change because they depended on the specific behavior that you changed to _fix_ +that version. + +We can use the following terminology here: + +* `S(bad)`: The particular `upN.sql` schema update which is "broken". +* `S(fixed)`: That same `upN.sql` file after being updated to a non-broken version. +* `S(converge)`: Some later schema update that converges the deployment to a known-good +state. + +**This process is risky**. By changing the contents of the old schema update `S(bad)` +to `S(fixed)`, we create two divergent histories on our deployments: one where `S(bad)` +may have been applied, and one where only `S(fixed)` was applied. + +Although the goal of `S(converge)` is to make sure that these deployments end +up looking the same, there are no guarantees that other schema updates between +`S(bad)` and `S(converge)` will be identical between these two variant update +timelines. When fixing broken schema updates, do so with caution, and consider +all schema updates between `S(bad)` and `S(converge)` - these updates must be +able to complete successfully regardless of which one of `S(bad)` or `S(fixed)` +was applied. + +=== General notes + +CockroachDB's representation of the schema includes some opaque +internally-generated fields that are order dependent, like the names of +anonymous CHECK constraints. Our schema comparison tools intentionally ignore +these values. As a result, when performing schema changes, the order of new +tables and constraints should generally not be important. + +As convention, however, we recommend keeping the `db_metadata` row insertion at +the end of `dbinit.sql`, so that the database does not contain a version until +it is fully populated. diff --git a/schema/crdb/add-management-gateway-producer-kind/up.sql b/schema/crdb/add-management-gateway-producer-kind/up.sql new file mode 100644 index 00000000000..e872278e2f3 --- /dev/null +++ b/schema/crdb/add-management-gateway-producer-kind/up.sql @@ -0,0 +1,2 @@ +ALTER TYPE omicron.public.producer_kind + ADD VALUE IF NOT EXISTS 'management_gateway' AFTER 'instance'; diff --git a/schema/crdb/collapse_lldp_settings/up1.sql b/schema/crdb/collapse_lldp_settings/up1.sql new file mode 100644 index 00000000000..f7fb05d7267 --- /dev/null +++ b/schema/crdb/collapse_lldp_settings/up1.sql @@ -0,0 +1,4 @@ +/* + * The old lldp_service_config_id is being replaced with lldp_link_config_id. + */ +ALTER TABLE omicron.public.switch_port_settings_link_config DROP COLUMN IF EXISTS lldp_service_config_id; diff --git a/schema/crdb/collapse_lldp_settings/up2.sql b/schema/crdb/collapse_lldp_settings/up2.sql new file mode 100644 index 00000000000..8ead8a29b48 --- /dev/null +++ b/schema/crdb/collapse_lldp_settings/up2.sql @@ -0,0 +1,4 @@ +/* + * Add a pointer to this link's LLDP config settings. + */ +ALTER TABLE omicron.public.switch_port_settings_link_config ADD COLUMN IF NOT EXISTS lldp_link_config_id UUID; diff --git a/schema/crdb/collapse_lldp_settings/up3.sql b/schema/crdb/collapse_lldp_settings/up3.sql new file mode 100644 index 00000000000..9c4ef8549b7 --- /dev/null +++ b/schema/crdb/collapse_lldp_settings/up3.sql @@ -0,0 +1,5 @@ +/* + * Drop the old lldp_service_config table, which has been incorporated into the + * new lldp_link_config. + */ +DROP TABLE IF EXISTS omicron.public.lldp_service_config; diff --git a/schema/crdb/collapse_lldp_settings/up4.sql b/schema/crdb/collapse_lldp_settings/up4.sql new file mode 100644 index 00000000000..3c8d4e86cfc --- /dev/null +++ b/schema/crdb/collapse_lldp_settings/up4.sql @@ -0,0 +1,4 @@ +/* + * Drop the old lldp_config table, which has been replaced by lldp_link_config. + */ +DROP TABLE IF EXISTS omicron.public.lldp_config; diff --git a/schema/crdb/collapse_lldp_settings/up5.sql b/schema/crdb/collapse_lldp_settings/up5.sql new file mode 100644 index 00000000000..50dcd618d80 --- /dev/null +++ b/schema/crdb/collapse_lldp_settings/up5.sql @@ -0,0 +1,13 @@ +CREATE TABLE IF NOT EXISTS omicron.public.lldp_link_config ( + id UUID PRIMARY KEY, + enabled BOOL NOT NULL, + link_name STRING(63), + link_description STRING(512), + chassis_id STRING(63), + system_name STRING(63), + system_description STRING(512), + management_ip TEXT, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + time_deleted TIMESTAMPTZ +); diff --git a/schema/crdb/collapse_lldp_settings/up6.sql b/schema/crdb/collapse_lldp_settings/up6.sql new file mode 100644 index 00000000000..3b16af6f4bf --- /dev/null +++ b/schema/crdb/collapse_lldp_settings/up6.sql @@ -0,0 +1 @@ +DROP INDEX IF EXISTS lldp_config_by_name; diff --git a/schema/crdb/dataset-kinds-zone-and-debug/up01.sql b/schema/crdb/dataset-kinds-zone-and-debug/up01.sql new file mode 100644 index 00000000000..1cfe718d002 --- /dev/null +++ b/schema/crdb/dataset-kinds-zone-and-debug/up01.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.dataset_kind ADD VALUE IF NOT EXISTS 'zone_root' AFTER 'internal_dns'; diff --git a/schema/crdb/dataset-kinds-zone-and-debug/up02.sql b/schema/crdb/dataset-kinds-zone-and-debug/up02.sql new file mode 100644 index 00000000000..93178e36856 --- /dev/null +++ b/schema/crdb/dataset-kinds-zone-and-debug/up02.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.dataset_kind ADD VALUE IF NOT EXISTS 'zone' AFTER 'zone_root'; diff --git a/schema/crdb/dataset-kinds-zone-and-debug/up03.sql b/schema/crdb/dataset-kinds-zone-and-debug/up03.sql new file mode 100644 index 00000000000..58d215d177f --- /dev/null +++ b/schema/crdb/dataset-kinds-zone-and-debug/up03.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.dataset_kind ADD VALUE IF NOT EXISTS 'debug' AFTER 'zone'; diff --git a/schema/crdb/dataset-kinds-zone-and-debug/up04.sql b/schema/crdb/dataset-kinds-zone-and-debug/up04.sql new file mode 100644 index 00000000000..b92bce1b6ce --- /dev/null +++ b/schema/crdb/dataset-kinds-zone-and-debug/up04.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.dataset ADD COLUMN IF NOT EXISTS zone_name TEXT; diff --git a/schema/crdb/dataset-kinds-zone-and-debug/up05.sql b/schema/crdb/dataset-kinds-zone-and-debug/up05.sql new file mode 100644 index 00000000000..3f33b79c720 --- /dev/null +++ b/schema/crdb/dataset-kinds-zone-and-debug/up05.sql @@ -0,0 +1,4 @@ +ALTER TABLE omicron.public.dataset ADD CONSTRAINT IF NOT EXISTS zone_name_for_zone_kind CHECK ( + (kind != 'zone') OR + (kind = 'zone' AND zone_name IS NOT NULL) +) diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index dc0a29971b3..d4eec6ed884 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -509,7 +509,10 @@ CREATE TYPE IF NOT EXISTS omicron.public.dataset_kind AS ENUM ( 'clickhouse_keeper', 'clickhouse_server', 'external_dns', - 'internal_dns' + 'internal_dns', + 'zone_root', + 'zone', + 'debug' ); /* @@ -535,6 +538,9 @@ CREATE TABLE IF NOT EXISTS omicron.public.dataset ( /* An upper bound on the amount of space that might be in-use */ size_used INT, + /* Only valid if kind = zone -- the name of this zone */ + zone_name TEXT, + /* Crucible must make use of 'size_used'; other datasets manage their own storage */ CONSTRAINT size_used_column_set_for_crucible CHECK ( (kind != 'crucible') OR @@ -544,6 +550,11 @@ CREATE TABLE IF NOT EXISTS omicron.public.dataset ( CONSTRAINT ip_and_port_set_for_crucible CHECK ( (kind != 'crucible') OR (kind = 'crucible' AND ip IS NOT NULL and port IS NOT NULL) + ), + + CONSTRAINT zone_name_for_zone_kind CHECK ( + (kind != 'zone') OR + (kind = 'zone' AND zone_name IS NOT NULL) ) ); @@ -1334,7 +1345,9 @@ CREATE TYPE IF NOT EXISTS omicron.public.producer_kind AS ENUM ( -- removed). 'service', -- A Propolis VMM for an instance in the omicron.public.instance table - 'instance' + 'instance', + -- A management gateway service on a scrimlet. + 'management_gateway' ); /* @@ -2650,40 +2663,30 @@ CREATE TYPE IF NOT EXISTS omicron.public.switch_link_speed AS ENUM ( CREATE TABLE IF NOT EXISTS omicron.public.switch_port_settings_link_config ( port_settings_id UUID, - lldp_service_config_id UUID NOT NULL, link_name TEXT, mtu INT4, fec omicron.public.switch_link_fec, speed omicron.public.switch_link_speed, autoneg BOOL NOT NULL DEFAULT false, + lldp_link_config_id UUID, PRIMARY KEY (port_settings_id, link_name) ); -CREATE TABLE IF NOT EXISTS omicron.public.lldp_service_config ( +CREATE TABLE IF NOT EXISTS omicron.public.lldp_link_config ( id UUID PRIMARY KEY, - lldp_config_id UUID, - enabled BOOL NOT NULL -); - -CREATE TABLE IF NOT EXISTS omicron.public.lldp_config ( - id UUID PRIMARY KEY, - name STRING(63) NOT NULL, - description STRING(512) NOT NULL, + enabled BOOL NOT NULL, + link_name STRING(63), + link_description STRING(512), + chassis_id STRING(63), + system_name STRING(63), + system_description STRING(612), + management_ip TEXT, time_created TIMESTAMPTZ NOT NULL, time_modified TIMESTAMPTZ NOT NULL, - time_deleted TIMESTAMPTZ, - chassis_id TEXT, - system_name TEXT, - system_description TEXT, - management_ip TEXT + time_deleted TIMESTAMPTZ ); -CREATE UNIQUE INDEX IF NOT EXISTS lldp_config_by_name ON omicron.public.lldp_config ( - name -) WHERE - time_deleted IS NULL; - CREATE TYPE IF NOT EXISTS omicron.public.switch_interface_kind AS ENUM ( 'primary', 'vlan', @@ -2715,6 +2718,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.switch_port_settings_route_config ( dst INET, gw INET, vid INT4, + local_pref INT8, /* TODO https://github.com/oxidecomputer/omicron/issues/3013 */ PRIMARY KEY (port_settings_id, interface_name, dst, gw) @@ -2790,6 +2794,10 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_bgp_config_by_name ON omicron.public.bg ) WHERE time_deleted IS NULL; +CREATE INDEX IF NOT EXISTS lookup_bgp_config_by_asn ON omicron.public.bgp_config ( + asn +) WHERE time_deleted IS NULL; + CREATE TABLE IF NOT EXISTS omicron.public.bgp_announce_set ( id UUID PRIMARY KEY, name STRING(63) NOT NULL, @@ -4247,7 +4255,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '88.0.0', NULL) + (TRUE, NOW(), NOW(), '94.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/lldp-link-config-nullable/up1.sql b/schema/crdb/lldp-link-config-nullable/up1.sql new file mode 100644 index 00000000000..c8e1122f68a --- /dev/null +++ b/schema/crdb/lldp-link-config-nullable/up1.sql @@ -0,0 +1,20 @@ +-- Refer to https://github.com/oxidecomputer/omicron/issues/6433 for the justificaiton +-- behind this schema change. +-- +-- In short: the "collapse_lldp_settings" schema change was edited after +-- merging. That change included a schema change which added a non-null column +-- to an existing table. Such a data-modifying statement is only valid for +-- tables with no rows - however, in our test systems, we observed rows, which +-- prevented this schema change from progressing. +-- +-- To resolve: +-- 1. Within the old "collapse_lldp_settings" change, we retroactively dropped the +-- non-null constraint. For systems with populated +-- "switch_port_settings_link_config" tables, this allows the schema update to +-- complete without an error. +-- 2. Within this new "lldp-link-config-nullable" change, we ALSO dropped the +-- non-null constraint. For systems without populated +-- "switch_port_settings_link_config" tables -- which may have been able to +-- apply the "collapse_lldp_settings" change successfully -- this converges the state +-- of the database to the same outcome, where the columns is nullable. +ALTER TABLE omicron.public.switch_port_settings_link_config ALTER COLUMN lldp_link_config_id DROP NOT NULL; diff --git a/schema/crdb/lookup-bgp-config-by-asn/up01.sql b/schema/crdb/lookup-bgp-config-by-asn/up01.sql new file mode 100644 index 00000000000..e886015a291 --- /dev/null +++ b/schema/crdb/lookup-bgp-config-by-asn/up01.sql @@ -0,0 +1,3 @@ +CREATE INDEX IF NOT EXISTS lookup_bgp_config_by_asn ON omicron.public.bgp_config ( + asn +) WHERE time_deleted IS NULL; diff --git a/schema/crdb/route-local-pref/up.sql b/schema/crdb/route-local-pref/up.sql new file mode 100644 index 00000000000..d1051ccd0c5 --- /dev/null +++ b/schema/crdb/route-local-pref/up.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.switch_port_settings_route_config ADD COLUMN IF NOT EXISTS local_pref INT8; diff --git a/schema/omicron-datasets.json b/schema/omicron-datasets.json new file mode 100644 index 00000000000..07fc2cfb13c --- /dev/null +++ b/schema/omicron-datasets.json @@ -0,0 +1,226 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "DatasetsConfig", + "type": "object", + "required": [ + "datasets", + "generation" + ], + "properties": { + "datasets": { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/DatasetConfig" + } + }, + "generation": { + "description": "generation number of this configuration\n\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\n\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.\n\nNote that \"Generation::new()\", AKA, the first generation number, is reserved for \"no datasets\". This is the default configuration for a sled before any requests have been made.", + "allOf": [ + { + "$ref": "#/definitions/Generation" + } + ] + } + }, + "definitions": { + "CompressionAlgorithm": { + "oneOf": [ + { + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "on" + ] + } + } + }, + { + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "off" + ] + } + } + }, + { + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "gzip" + ] + } + } + }, + { + "type": "object", + "required": [ + "level", + "type" + ], + "properties": { + "level": { + "$ref": "#/definitions/GzipLevel" + }, + "type": { + "type": "string", + "enum": [ + "gzip_n" + ] + } + } + }, + { + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "lz4" + ] + } + } + }, + { + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "lzjb" + ] + } + } + }, + { + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "zle" + ] + } + } + } + ] + }, + "DatasetConfig": { + "description": "Configuration information necessary to request a single dataset", + "type": "object", + "required": [ + "compression", + "id", + "name" + ], + "properties": { + "compression": { + "description": "The compression mode to be used by the dataset", + "allOf": [ + { + "$ref": "#/definitions/CompressionAlgorithm" + } + ] + }, + "id": { + "description": "The UUID of the dataset being requested", + "allOf": [ + { + "$ref": "#/definitions/TypedUuidForDatasetKind" + } + ] + }, + "name": { + "description": "The dataset's name", + "allOf": [ + { + "$ref": "#/definitions/DatasetName" + } + ] + }, + "quota": { + "description": "The upper bound on the amount of storage used by this dataset", + "type": [ + "integer", + "null" + ], + "format": "uint", + "minimum": 0.0 + }, + "reservation": { + "description": "The lower bound on the amount of storage usable by this dataset", + "type": [ + "integer", + "null" + ], + "format": "uint", + "minimum": 0.0 + } + } + }, + "DatasetKind": { + "description": "The kind of dataset. See the `DatasetKind` enum in omicron-common for possible values.", + "type": "string" + }, + "DatasetName": { + "type": "object", + "required": [ + "kind", + "pool_name" + ], + "properties": { + "kind": { + "$ref": "#/definitions/DatasetKind" + }, + "pool_name": { + "$ref": "#/definitions/ZpoolName" + } + } + }, + "Generation": { + "description": "Generation numbers stored in the database, used for optimistic concurrency control", + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "GzipLevel": { + "type": "integer", + "format": "uint8", + "minimum": 0.0 + }, + "TypedUuidForDatasetKind": { + "type": "string", + "format": "uuid" + }, + "ZpoolName": { + "title": "The name of a Zpool", + "description": "Zpool names are of the format ox{i,p}_. They are either Internal or External, and should be unique", + "type": "string", + "pattern": "^ox[ip]_[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$" + } + } +} \ No newline at end of file diff --git a/schema/rss-service-plan-v4.json b/schema/rss-service-plan-v4.json new file mode 100644 index 00000000000..badfaf4589b --- /dev/null +++ b/schema/rss-service-plan-v4.json @@ -0,0 +1,999 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Plan", + "type": "object", + "required": [ + "dns_config", + "services" + ], + "properties": { + "dns_config": { + "$ref": "#/definitions/DnsConfigParams" + }, + "services": { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/SledConfig" + } + } + }, + "definitions": { + "BlueprintZoneConfig": { + "description": "Describes one Omicron-managed zone in a blueprint.\n\nPart of [`BlueprintZonesConfig`].", + "type": "object", + "required": [ + "disposition", + "id", + "underlay_address", + "zone_type" + ], + "properties": { + "disposition": { + "description": "The disposition (desired state) of this zone recorded in the blueprint.", + "allOf": [ + { + "$ref": "#/definitions/BlueprintZoneDisposition" + } + ] + }, + "filesystem_pool": { + "anyOf": [ + { + "$ref": "#/definitions/ZpoolName" + }, + { + "type": "null" + } + ] + }, + "id": { + "$ref": "#/definitions/TypedUuidForOmicronZoneKind" + }, + "underlay_address": { + "type": "string", + "format": "ipv6" + }, + "zone_type": { + "$ref": "#/definitions/BlueprintZoneType" + } + } + }, + "BlueprintZoneDisposition": { + "description": "The desired state of an Omicron-managed zone in a blueprint.\n\nPart of [`BlueprintZoneConfig`].", + "oneOf": [ + { + "description": "The zone is in-service.", + "type": "string", + "enum": [ + "in_service" + ] + }, + { + "description": "The zone is not in service.", + "type": "string", + "enum": [ + "quiesced" + ] + }, + { + "description": "The zone is permanently gone.", + "type": "string", + "enum": [ + "expunged" + ] + } + ] + }, + "BlueprintZoneType": { + "oneOf": [ + { + "type": "object", + "required": [ + "address", + "dns_servers", + "external_ip", + "nic", + "ntp_servers", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dns_servers": { + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "domain": { + "type": [ + "string", + "null" + ] + }, + "external_ip": { + "$ref": "#/definitions/OmicronZoneExternalSnatIp" + }, + "nic": { + "description": "The service vNIC providing outbound connectivity using OPTE.", + "allOf": [ + { + "$ref": "#/definitions/NetworkInterface" + } + ] + }, + "ntp_servers": { + "type": "array", + "items": { + "type": "string" + } + }, + "type": { + "type": "string", + "enum": [ + "boundary_ntp" + ] + } + } + }, + { + "description": "Used in single-node clickhouse setups", + "type": "object", + "required": [ + "address", + "dataset", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "clickhouse" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "dataset", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "clickhouse_keeper" + ] + } + } + }, + { + "description": "Used in replicated clickhouse setups", + "type": "object", + "required": [ + "address", + "dataset", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "clickhouse_server" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "dataset", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "cockroach_db" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "dataset", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "crucible" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "crucible_pantry" + ] + } + } + }, + { + "type": "object", + "required": [ + "dataset", + "dns_address", + "http_address", + "nic", + "type" + ], + "properties": { + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "dns_address": { + "description": "The address at which the external DNS server is reachable.", + "allOf": [ + { + "$ref": "#/definitions/OmicronZoneExternalFloatingAddr" + } + ] + }, + "http_address": { + "description": "The address at which the external DNS server API is reachable.", + "type": "string" + }, + "nic": { + "description": "The service vNIC providing external connectivity using OPTE.", + "allOf": [ + { + "$ref": "#/definitions/NetworkInterface" + } + ] + }, + "type": { + "type": "string", + "enum": [ + "external_dns" + ] + } + } + }, + { + "type": "object", + "required": [ + "dataset", + "dns_address", + "gz_address", + "gz_address_index", + "http_address", + "type" + ], + "properties": { + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "dns_address": { + "type": "string" + }, + "gz_address": { + "description": "The addresses in the global zone which should be created\n\nFor the DNS service, which exists outside the sleds's typical subnet - adding an address in the GZ is necessary to allow inter-zone traffic routing.", + "type": "string", + "format": "ipv6" + }, + "gz_address_index": { + "description": "The address is also identified with an auxiliary bit of information to ensure that the created global zone address can have a unique name.", + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "http_address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "internal_dns" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "dns_servers", + "ntp_servers", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dns_servers": { + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "domain": { + "type": [ + "string", + "null" + ] + }, + "ntp_servers": { + "type": "array", + "items": { + "type": "string" + } + }, + "type": { + "type": "string", + "enum": [ + "internal_ntp" + ] + } + } + }, + { + "type": "object", + "required": [ + "external_dns_servers", + "external_ip", + "external_tls", + "internal_address", + "nic", + "type" + ], + "properties": { + "external_dns_servers": { + "description": "External DNS servers Nexus can use to resolve external hosts.", + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "external_ip": { + "description": "The address at which the external nexus server is reachable.", + "allOf": [ + { + "$ref": "#/definitions/OmicronZoneExternalFloatingIp" + } + ] + }, + "external_tls": { + "description": "Whether Nexus's external endpoint should use TLS", + "type": "boolean" + }, + "internal_address": { + "description": "The address at which the internal nexus server is reachable.", + "type": "string" + }, + "nic": { + "description": "The service vNIC providing external connectivity using OPTE.", + "allOf": [ + { + "$ref": "#/definitions/NetworkInterface" + } + ] + }, + "type": { + "type": "string", + "enum": [ + "nexus" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "oximeter" + ] + } + } + } + ] + }, + "DiskIdentity": { + "description": "Uniquely identifies a disk.", + "type": "object", + "required": [ + "model", + "serial", + "vendor" + ], + "properties": { + "model": { + "type": "string" + }, + "serial": { + "type": "string" + }, + "vendor": { + "type": "string" + } + } + }, + "DnsConfigParams": { + "description": "DnsConfigParams\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"generation\", \"time_created\", \"zones\" ], \"properties\": { \"generation\": { \"type\": \"integer\", \"format\": \"uint64\", \"minimum\": 0.0 }, \"time_created\": { \"type\": \"string\", \"format\": \"date-time\" }, \"zones\": { \"type\": \"array\", \"items\": { \"$ref\": \"#/components/schemas/DnsConfigZone\" } } } } ```
", + "type": "object", + "required": [ + "generation", + "time_created", + "zones" + ], + "properties": { + "generation": { + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "time_created": { + "type": "string", + "format": "date-time" + }, + "zones": { + "type": "array", + "items": { + "$ref": "#/definitions/DnsConfigZone" + } + } + } + }, + "DnsConfigZone": { + "description": "DnsConfigZone\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"records\", \"zone_name\" ], \"properties\": { \"records\": { \"type\": \"object\", \"additionalProperties\": { \"type\": \"array\", \"items\": { \"$ref\": \"#/components/schemas/DnsRecord\" } } }, \"zone_name\": { \"type\": \"string\" } } } ```
", + "type": "object", + "required": [ + "records", + "zone_name" + ], + "properties": { + "records": { + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "$ref": "#/definitions/DnsRecord" + } + } + }, + "zone_name": { + "type": "string" + } + } + }, + "DnsRecord": { + "description": "DnsRecord\n\n
JSON schema\n\n```json { \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"data\", \"type\" ], \"properties\": { \"data\": { \"type\": \"string\", \"format\": \"ipv4\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"A\" ] } } }, { \"type\": \"object\", \"required\": [ \"data\", \"type\" ], \"properties\": { \"data\": { \"type\": \"string\", \"format\": \"ipv6\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"AAAA\" ] } } }, { \"type\": \"object\", \"required\": [ \"data\", \"type\" ], \"properties\": { \"data\": { \"$ref\": \"#/components/schemas/Srv\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"SRV\" ] } } } ] } ```
", + "oneOf": [ + { + "type": "object", + "required": [ + "data", + "type" + ], + "properties": { + "data": { + "type": "string", + "format": "ipv4" + }, + "type": { + "type": "string", + "enum": [ + "A" + ] + } + } + }, + { + "type": "object", + "required": [ + "data", + "type" + ], + "properties": { + "data": { + "type": "string", + "format": "ipv6" + }, + "type": { + "type": "string", + "enum": [ + "AAAA" + ] + } + } + }, + { + "type": "object", + "required": [ + "data", + "type" + ], + "properties": { + "data": { + "$ref": "#/definitions/Srv" + }, + "type": { + "type": "string", + "enum": [ + "SRV" + ] + } + } + } + ] + }, + "Generation": { + "description": "Generation numbers stored in the database, used for optimistic concurrency control", + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "IpNet": { + "oneOf": [ + { + "title": "v4", + "allOf": [ + { + "$ref": "#/definitions/Ipv4Net" + } + ] + }, + { + "title": "v6", + "allOf": [ + { + "$ref": "#/definitions/Ipv6Net" + } + ] + } + ], + "x-rust-type": { + "crate": "oxnet", + "path": "oxnet::IpNet", + "version": "0.1.0" + } + }, + "Ipv4Net": { + "title": "An IPv4 subnet", + "description": "An IPv4 subnet, including prefix and prefix length", + "examples": [ + "192.168.1.0/24" + ], + "type": "string", + "pattern": "^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/([0-9]|1[0-9]|2[0-9]|3[0-2])$", + "x-rust-type": { + "crate": "oxnet", + "path": "oxnet::Ipv4Net", + "version": "0.1.0" + } + }, + "Ipv6Net": { + "title": "An IPv6 subnet", + "description": "An IPv6 subnet, including prefix and subnet mask", + "examples": [ + "fd12:3456::/64" + ], + "type": "string", + "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$", + "x-rust-type": { + "crate": "oxnet", + "path": "oxnet::Ipv6Net", + "version": "0.1.0" + } + }, + "MacAddr": { + "title": "A MAC address", + "description": "A Media Access Control address, in EUI-48 format", + "examples": [ + "ff:ff:ff:ff:ff:ff" + ], + "type": "string", + "maxLength": 17, + "minLength": 5, + "pattern": "^([0-9a-fA-F]{0,2}:){5}[0-9a-fA-F]{0,2}$" + }, + "Name": { + "title": "A name unique within the parent collection", + "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.", + "type": "string", + "maxLength": 63, + "minLength": 1, + "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$" + }, + "NetworkInterface": { + "description": "Information required to construct a virtual network interface", + "type": "object", + "required": [ + "id", + "ip", + "kind", + "mac", + "name", + "primary", + "slot", + "subnet", + "vni" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "ip": { + "type": "string", + "format": "ip" + }, + "kind": { + "$ref": "#/definitions/NetworkInterfaceKind" + }, + "mac": { + "$ref": "#/definitions/MacAddr" + }, + "name": { + "$ref": "#/definitions/Name" + }, + "primary": { + "type": "boolean" + }, + "slot": { + "type": "integer", + "format": "uint8", + "minimum": 0.0 + }, + "subnet": { + "$ref": "#/definitions/IpNet" + }, + "transit_ips": { + "default": [], + "type": "array", + "items": { + "$ref": "#/definitions/IpNet" + } + }, + "vni": { + "$ref": "#/definitions/Vni" + } + } + }, + "NetworkInterfaceKind": { + "description": "The type of network interface", + "oneOf": [ + { + "description": "A vNIC attached to a guest instance", + "type": "object", + "required": [ + "id", + "type" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "instance" + ] + } + } + }, + { + "description": "A vNIC associated with an internal service", + "type": "object", + "required": [ + "id", + "type" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "service" + ] + } + } + }, + { + "description": "A vNIC associated with a probe", + "type": "object", + "required": [ + "id", + "type" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "probe" + ] + } + } + } + ] + }, + "OmicronPhysicalDiskConfig": { + "type": "object", + "required": [ + "id", + "identity", + "pool_id" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "identity": { + "$ref": "#/definitions/DiskIdentity" + }, + "pool_id": { + "$ref": "#/definitions/TypedUuidForZpoolKind" + } + } + }, + "OmicronPhysicalDisksConfig": { + "type": "object", + "required": [ + "disks", + "generation" + ], + "properties": { + "disks": { + "type": "array", + "items": { + "$ref": "#/definitions/OmicronPhysicalDiskConfig" + } + }, + "generation": { + "description": "generation number of this configuration\n\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\n\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.", + "allOf": [ + { + "$ref": "#/definitions/Generation" + } + ] + } + } + }, + "OmicronZoneDataset": { + "description": "Describes a persistent ZFS dataset associated with an Omicron zone", + "type": "object", + "required": [ + "pool_name" + ], + "properties": { + "pool_name": { + "$ref": "#/definitions/ZpoolName" + } + } + }, + "OmicronZoneExternalFloatingAddr": { + "description": "Floating external address with port allocated to an Omicron-managed zone.", + "type": "object", + "required": [ + "addr", + "id" + ], + "properties": { + "addr": { + "type": "string" + }, + "id": { + "$ref": "#/definitions/TypedUuidForExternalIpKind" + } + } + }, + "OmicronZoneExternalFloatingIp": { + "description": "Floating external IP allocated to an Omicron-managed zone.\n\nThis is a slimmer `nexus_db_model::ExternalIp` that only stores the fields necessary for blueprint planning, and requires that the zone have a single IP.", + "type": "object", + "required": [ + "id", + "ip" + ], + "properties": { + "id": { + "$ref": "#/definitions/TypedUuidForExternalIpKind" + }, + "ip": { + "type": "string", + "format": "ip" + } + } + }, + "OmicronZoneExternalSnatIp": { + "description": "SNAT (outbound) external IP allocated to an Omicron-managed zone.\n\nThis is a slimmer `nexus_db_model::ExternalIp` that only stores the fields necessary for blueprint planning, and requires that the zone have a single IP.", + "type": "object", + "required": [ + "id", + "snat_cfg" + ], + "properties": { + "id": { + "$ref": "#/definitions/TypedUuidForExternalIpKind" + }, + "snat_cfg": { + "$ref": "#/definitions/SourceNatConfig" + } + } + }, + "SledConfig": { + "type": "object", + "required": [ + "disks", + "zones" + ], + "properties": { + "disks": { + "description": "Control plane disks configured for this sled", + "allOf": [ + { + "$ref": "#/definitions/OmicronPhysicalDisksConfig" + } + ] + }, + "zones": { + "description": "zones configured for this sled", + "type": "array", + "items": { + "$ref": "#/definitions/BlueprintZoneConfig" + } + } + } + }, + "SourceNatConfig": { + "description": "An IP address and port range used for source NAT, i.e., making outbound network connections from guests or services.", + "type": "object", + "required": [ + "first_port", + "ip", + "last_port" + ], + "properties": { + "first_port": { + "description": "The first port used for source NAT, inclusive.", + "type": "integer", + "format": "uint16", + "minimum": 0.0 + }, + "ip": { + "description": "The external address provided to the instance or service.", + "type": "string", + "format": "ip" + }, + "last_port": { + "description": "The last port used for source NAT, also inclusive.", + "type": "integer", + "format": "uint16", + "minimum": 0.0 + } + } + }, + "Srv": { + "description": "Srv\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"port\", \"prio\", \"target\", \"weight\" ], \"properties\": { \"port\": { \"type\": \"integer\", \"format\": \"uint16\", \"minimum\": 0.0 }, \"prio\": { \"type\": \"integer\", \"format\": \"uint16\", \"minimum\": 0.0 }, \"target\": { \"type\": \"string\" }, \"weight\": { \"type\": \"integer\", \"format\": \"uint16\", \"minimum\": 0.0 } } } ```
", + "type": "object", + "required": [ + "port", + "prio", + "target", + "weight" + ], + "properties": { + "port": { + "type": "integer", + "format": "uint16", + "minimum": 0.0 + }, + "prio": { + "type": "integer", + "format": "uint16", + "minimum": 0.0 + }, + "target": { + "type": "string" + }, + "weight": { + "type": "integer", + "format": "uint16", + "minimum": 0.0 + } + } + }, + "TypedUuidForExternalIpKind": { + "type": "string", + "format": "uuid" + }, + "TypedUuidForOmicronZoneKind": { + "type": "string", + "format": "uuid" + }, + "TypedUuidForZpoolKind": { + "type": "string", + "format": "uuid" + }, + "Vni": { + "description": "A Geneve Virtual Network Identifier", + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "ZpoolName": { + "title": "The name of a Zpool", + "description": "Zpool names are of the format ox{i,p}_. They are either Internal or External, and should be unique", + "type": "string", + "pattern": "^ox[ip]_[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$" + } + } +} \ No newline at end of file diff --git a/schema/rss-sled-plan.json b/schema/rss-sled-plan.json index a3d34258703..b0abc8c67e1 100644 --- a/schema/rss-sled-plan.json +++ b/schema/rss-sled-plan.json @@ -604,6 +604,79 @@ } } }, + "LldpAdminStatus": { + "description": "To what extent should this port participate in LLDP", + "type": "string", + "enum": [ + "enabled", + "disabled", + "rx_only", + "tx_only" + ] + }, + "LldpPortConfig": { + "description": "Per-port LLDP configuration settings. Only the \"status\" setting is mandatory. All other fields have natural defaults or may be inherited from the switch.", + "type": "object", + "required": [ + "status" + ], + "properties": { + "chassis_id": { + "description": "Chassis ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be inherited from the switch-level settings.", + "type": [ + "string", + "null" + ] + }, + "management_addrs": { + "description": "Management IP addresses to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": [ + "array", + "null" + ], + "items": { + "type": "string", + "format": "ip" + } + }, + "port_description": { + "description": "Port description to advertise. If this is not set, no description will be advertised.", + "type": [ + "string", + "null" + ] + }, + "port_id": { + "description": "Port ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be set to the port name. e.g., qsfp0/0.", + "type": [ + "string", + "null" + ] + }, + "status": { + "description": "To what extent should this port participate in LLDP", + "allOf": [ + { + "$ref": "#/definitions/LldpAdminStatus" + } + ] + }, + "system_description": { + "description": "System description to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": [ + "string", + "null" + ] + }, + "system_name": { + "description": "System name to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": [ + "string", + "null" + ] + } + } + }, "Name": { "title": "A name unique within the parent collection", "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.", @@ -648,6 +721,17 @@ "$ref": "#/definitions/BgpPeerConfig" } }, + "lldp": { + "description": "LLDP configuration for this port", + "anyOf": [ + { + "$ref": "#/definitions/LldpPortConfig" + }, + { + "type": "null" + } + ] + }, "port": { "description": "Nmae of the port this config applies to.", "type": "string" @@ -894,6 +978,16 @@ } ] }, + "local_pref": { + "description": "The local preference associated with this route.", + "default": null, + "type": [ + "integer", + "null" + ], + "format": "uint32", + "minimum": 0.0 + }, "nexthop": { "description": "The nexthop/gateway address.", "type": "string", diff --git a/sled-agent/api/src/lib.rs b/sled-agent/api/src/lib.rs index c44b24d7121..d9e49a5c56d 100644 --- a/sled-agent/api/src/lib.rs +++ b/sled-agent/api/src/lib.rs @@ -15,15 +15,18 @@ use nexus_sled_agent_shared::inventory::{ }; use omicron_common::{ api::internal::{ - nexus::{DiskRuntimeState, SledInstanceState, UpdateArtifactId}, + nexus::{DiskRuntimeState, SledVmmState, UpdateArtifactId}, shared::{ ResolvedVpcRouteSet, ResolvedVpcRouteState, SledIdentifiers, SwitchPorts, VirtualNetworkInterfaceHost, }, }, - disk::{DiskVariant, DisksManagementResult, OmicronPhysicalDisksConfig}, + disk::{ + DatasetsConfig, DatasetsManagementResult, DiskVariant, + DisksManagementResult, OmicronPhysicalDisksConfig, + }, }; -use omicron_uuid_kinds::{InstanceUuid, ZpoolUuid}; +use omicron_uuid_kinds::{PropolisUuid, ZpoolUuid}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use sled_agent_types::{ @@ -36,8 +39,8 @@ use sled_agent_types::{ early_networking::EarlyNetworkConfig, firewall_rules::VpcFirewallRulesEnsureBody, instance::{ - InstanceEnsureBody, InstanceExternalIpBody, InstancePutStateBody, - InstancePutStateResponse, InstanceUnregisterResponse, + InstanceEnsureBody, InstanceExternalIpBody, VmmPutStateBody, + VmmPutStateResponse, VmmUnregisterResponse, }, sled::AddSledRequest, time_sync::TimeSync, @@ -168,6 +171,25 @@ pub trait SledAgentApi { body: TypedBody, ) -> Result; + /// Configures datasets to be used on this sled + #[endpoint { + method = PUT, + path = "/datasets", + }] + async fn datasets_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result, HttpError>; + + /// Lists the datasets that this sled is configured to use + #[endpoint { + method = GET, + path = "/datasets", + }] + async fn datasets_get( + rqctx: RequestContext, + ) -> Result, HttpError>; + #[endpoint { method = GET, path = "/omicron-physical-disks", @@ -212,59 +234,59 @@ pub trait SledAgentApi { #[endpoint { method = PUT, - path = "/instances/{instance_id}", + path = "/vmms/{propolis_id}", }] - async fn instance_register( + async fn vmm_register( rqctx: RequestContext, - path_params: Path, + path_params: Path, body: TypedBody, - ) -> Result, HttpError>; + ) -> Result, HttpError>; #[endpoint { method = DELETE, - path = "/instances/{instance_id}", + path = "/vmms/{propolis_id}", }] - async fn instance_unregister( + async fn vmm_unregister( rqctx: RequestContext, - path_params: Path, - ) -> Result, HttpError>; + path_params: Path, + ) -> Result, HttpError>; #[endpoint { method = PUT, - path = "/instances/{instance_id}/state", + path = "/vmms/{propolis_id}/state", }] - async fn instance_put_state( + async fn vmm_put_state( rqctx: RequestContext, - path_params: Path, - body: TypedBody, - ) -> Result, HttpError>; + path_params: Path, + body: TypedBody, + ) -> Result, HttpError>; #[endpoint { method = GET, - path = "/instances/{instance_id}/state", + path = "/vmms/{propolis_id}/state", }] - async fn instance_get_state( + async fn vmm_get_state( rqctx: RequestContext, - path_params: Path, - ) -> Result, HttpError>; + path_params: Path, + ) -> Result, HttpError>; #[endpoint { method = PUT, - path = "/instances/{instance_id}/external-ip", + path = "/vmms/{propolis_id}/external-ip", }] - async fn instance_put_external_ip( + async fn vmm_put_external_ip( rqctx: RequestContext, - path_params: Path, + path_params: Path, body: TypedBody, ) -> Result; #[endpoint { method = DELETE, - path = "/instances/{instance_id}/external-ip", + path = "/vmms/{propolis_id}/external-ip", }] - async fn instance_delete_external_ip( + async fn vmm_delete_external_ip( rqctx: RequestContext, - path_params: Path, + path_params: Path, body: TypedBody, ) -> Result; @@ -290,16 +312,13 @@ pub trait SledAgentApi { /// Take a snapshot of a disk that is attached to an instance #[endpoint { method = POST, - path = "/instances/{instance_id}/disks/{disk_id}/snapshot", + path = "/vmms/{propolis_id}/disks/{disk_id}/snapshot", }] - async fn instance_issue_disk_snapshot_request( + async fn vmm_issue_disk_snapshot_request( rqctx: RequestContext, - path_params: Path, - body: TypedBody, - ) -> Result< - HttpResponseOk, - HttpError, - >; + path_params: Path, + body: TypedBody, + ) -> Result, HttpError>; #[endpoint { method = PUT, @@ -516,8 +535,8 @@ impl From for DiskType { /// Path parameters for Instance requests (sled agent API) #[derive(Deserialize, JsonSchema)] -pub struct InstancePathParam { - pub instance_id: InstanceUuid, +pub struct VmmPathParam { + pub propolis_id: PropolisUuid, } /// Path parameters for Disk requests (sled agent API) @@ -527,18 +546,18 @@ pub struct DiskPathParam { } #[derive(Deserialize, JsonSchema)] -pub struct InstanceIssueDiskSnapshotRequestPathParam { - pub instance_id: Uuid, +pub struct VmmIssueDiskSnapshotRequestPathParam { + pub propolis_id: PropolisUuid, pub disk_id: Uuid, } #[derive(Deserialize, JsonSchema)] -pub struct InstanceIssueDiskSnapshotRequestBody { +pub struct VmmIssueDiskSnapshotRequestBody { pub snapshot_id: Uuid, } #[derive(Serialize, JsonSchema)] -pub struct InstanceIssueDiskSnapshotRequestResponse { +pub struct VmmIssueDiskSnapshotRequestResponse { pub snapshot_id: Uuid, } diff --git a/sled-agent/src/backing_fs.rs b/sled-agent/src/backing_fs.rs index 2e9ea4c8d91..a0f7826db38 100644 --- a/sled-agent/src/backing_fs.rs +++ b/sled-agent/src/backing_fs.rs @@ -25,6 +25,7 @@ use camino::Utf8PathBuf; use illumos_utils::zfs::{ EnsureFilesystemError, GetValueError, Mountpoint, SizeDetails, Zfs, }; +use omicron_common::disk::CompressionAlgorithm; use std::io; #[derive(Debug, thiserror::Error)] @@ -50,7 +51,7 @@ struct BackingFs<'a> { // Optional quota, in _bytes_ quota: Option, // Optional compression mode - compression: Option<&'static str>, + compression: CompressionAlgorithm, // Linked service service: Option<&'static str>, // Subdirectories to ensure @@ -63,7 +64,7 @@ impl<'a> BackingFs<'a> { name, mountpoint: "legacy", quota: None, - compression: None, + compression: CompressionAlgorithm::Off, service: None, subdirs: None, } @@ -79,8 +80,8 @@ impl<'a> BackingFs<'a> { self } - const fn compression(mut self, compression: &'static str) -> Self { - self.compression = Some(compression); + const fn compression(mut self, compression: CompressionAlgorithm) -> Self { + self.compression = compression; self } @@ -101,7 +102,7 @@ const BACKING_FMD_SUBDIRS: [&'static str; 3] = ["rsrc", "ckpt", "xprt"]; const BACKING_FMD_SERVICE: &'static str = "svc:/system/fmd:default"; const BACKING_FMD_QUOTA: usize = 500 * (1 << 20); // 500 MiB -const BACKING_COMPRESSION: &'static str = "on"; +const BACKING_COMPRESSION: CompressionAlgorithm = CompressionAlgorithm::On; const BACKINGFS_COUNT: usize = 1; static BACKINGFS: [BackingFs; BACKINGFS_COUNT] = @@ -137,6 +138,7 @@ pub(crate) fn ensure_backing_fs( let size_details = Some(SizeDetails { quota: bfs.quota, + reservation: None, compression: bfs.compression, }); diff --git a/sled-agent/src/bootstrap/early_networking.rs b/sled-agent/src/bootstrap/early_networking.rs index 95a1f873f6c..abc88d67c16 100644 --- a/sled-agent/src/bootstrap/early_networking.rs +++ b/sled-agent/src/bootstrap/early_networking.rs @@ -631,7 +631,8 @@ impl<'a> EarlyNetworkSetup<'a> { IpAddr::V6(_) => continue, }; let vlan_id = r.vlan_id; - let sr = StaticRoute4 { nexthop, prefix, vlan_id }; + let local_pref = r.local_pref; + let sr = StaticRoute4 { nexthop, prefix, vlan_id, local_pref }; rq.routes.list.push(sr); } } diff --git a/sled-agent/src/common/instance.rs b/sled-agent/src/common/instance.rs index adbeb9158f4..f95bf0cb647 100644 --- a/sled-agent/src/common/instance.rs +++ b/sled-agent/src/common/instance.rs @@ -7,10 +7,9 @@ use chrono::{DateTime, Utc}; use omicron_common::api::external::Generation; use omicron_common::api::internal::nexus::{ - MigrationRuntimeState, MigrationState, SledInstanceState, VmmRuntimeState, + MigrationRuntimeState, MigrationState, SledVmmState, VmmRuntimeState, VmmState, }; -use omicron_uuid_kinds::PropolisUuid; use propolis_client::types::{ InstanceMigrationStatus, InstanceState as PropolisApiState, InstanceStateMonitorResponse, MigrationState as PropolisMigrationState, @@ -21,7 +20,6 @@ use uuid::Uuid; #[derive(Clone, Debug)] pub struct InstanceStates { vmm: VmmRuntimeState, - propolis_id: PropolisUuid, migration_in: Option, migration_out: Option, } @@ -173,11 +171,7 @@ pub enum Action { } impl InstanceStates { - pub fn new( - vmm: VmmRuntimeState, - propolis_id: PropolisUuid, - migration_id: Option, - ) -> Self { + pub fn new(vmm: VmmRuntimeState, migration_id: Option) -> Self { // If this instance is created with a migration ID, we are the intended // target of a migration in. Set that up now. let migration_in = @@ -187,17 +181,13 @@ impl InstanceStates { gen: Generation::new(), time_updated: Utc::now(), }); - InstanceStates { vmm, propolis_id, migration_in, migration_out: None } + InstanceStates { vmm, migration_in, migration_out: None } } pub fn vmm(&self) -> &VmmRuntimeState { &self.vmm } - pub fn propolis_id(&self) -> PropolisUuid { - self.propolis_id - } - pub fn migration_in(&self) -> Option<&MigrationRuntimeState> { self.migration_in.as_ref() } @@ -209,10 +199,9 @@ impl InstanceStates { /// Creates a `SledInstanceState` structure containing the entirety of this /// structure's runtime state. This requires cloning; for simple read access /// use the `instance` or `vmm` accessors instead. - pub fn sled_instance_state(&self) -> SledInstanceState { - SledInstanceState { + pub fn sled_instance_state(&self) -> SledVmmState { + SledVmmState { vmm_state: self.vmm.clone(), - propolis_id: self.propolis_id, migration_in: self.migration_in.clone(), migration_out: self.migration_out.clone(), } @@ -377,7 +366,6 @@ mod test { use uuid::Uuid; fn make_instance() -> InstanceStates { - let propolis_id = PropolisUuid::new_v4(); let now = Utc::now(); let vmm = VmmRuntimeState { @@ -386,7 +374,7 @@ mod test { time_updated: now, }; - InstanceStates::new(vmm, propolis_id, None) + InstanceStates::new(vmm, None) } fn make_migration_source_instance() -> InstanceStates { @@ -406,7 +394,6 @@ mod test { } fn make_migration_target_instance() -> InstanceStates { - let propolis_id = PropolisUuid::new_v4(); let now = Utc::now(); let vmm = VmmRuntimeState { @@ -415,7 +402,7 @@ mod test { time_updated: now, }; - InstanceStates::new(vmm, propolis_id, Some(Uuid::new_v4())) + InstanceStates::new(vmm, Some(Uuid::new_v4())) } fn make_observed_state( diff --git a/sled-agent/src/fakes/nexus.rs b/sled-agent/src/fakes/nexus.rs index 246ef07b605..bd4680563eb 100644 --- a/sled-agent/src/fakes/nexus.rs +++ b/sled-agent/src/fakes/nexus.rs @@ -15,12 +15,11 @@ use hyper::Body; use internal_dns::ServiceName; use nexus_client::types::SledAgentInfo; use omicron_common::api::external::Error; -use omicron_common::api::internal::nexus::{ - SledInstanceState, UpdateArtifactId, -}; -use omicron_uuid_kinds::OmicronZoneUuid; +use omicron_common::api::internal::nexus::{SledVmmState, UpdateArtifactId}; +use omicron_uuid_kinds::{OmicronZoneUuid, PropolisUuid}; use schemars::JsonSchema; use serde::Deserialize; +use sled_agent_api::VmmPathParam; use uuid::Uuid; /// Implements a fake Nexus. @@ -50,8 +49,8 @@ pub trait FakeNexusServer: Send + Sync { fn cpapi_instances_put( &self, - _instance_id: Uuid, - _new_runtime_state: SledInstanceState, + _propolis_id: PropolisUuid, + _new_runtime_state: SledVmmState, ) -> Result<(), Error> { Err(Error::internal_error("Not implemented")) } @@ -118,22 +117,18 @@ async fn sled_agent_put( Ok(HttpResponseUpdatedNoContent()) } -#[derive(Deserialize, JsonSchema)] -struct InstancePathParam { - instance_id: Uuid, -} #[endpoint { method = PUT, - path = "/instances/{instance_id}", + path = "/vmms/{propolis_id}", }] async fn cpapi_instances_put( request_context: RequestContext, - path_params: Path, - new_runtime_state: TypedBody, + path_params: Path, + new_runtime_state: TypedBody, ) -> Result { let context = request_context.context(); context.cpapi_instances_put( - path_params.into_inner().instance_id, + path_params.into_inner().propolis_id, new_runtime_state.into_inner(), )?; Ok(HttpResponseUpdatedNoContent()) diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 2bf8067d1cf..1d61d97675f 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -21,16 +21,16 @@ use nexus_sled_agent_shared::inventory::{ }; use omicron_common::api::external::Error; use omicron_common::api::internal::nexus::{ - DiskRuntimeState, SledInstanceState, UpdateArtifactId, + DiskRuntimeState, SledVmmState, UpdateArtifactId, }; use omicron_common::api::internal::shared::{ ResolvedVpcRouteSet, ResolvedVpcRouteState, SledIdentifiers, SwitchPorts, VirtualNetworkInterfaceHost, }; use omicron_common::disk::{ - DiskVariant, DisksManagementResult, M2Slot, OmicronPhysicalDisksConfig, + DatasetsConfig, DatasetsManagementResult, DiskVariant, + DisksManagementResult, M2Slot, OmicronPhysicalDisksConfig, }; -use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; use sled_agent_api::*; use sled_agent_types::boot_disk::{ BootDiskOsWriteStatus, BootDiskPathParams, BootDiskUpdatePathParams, @@ -41,8 +41,8 @@ use sled_agent_types::disk::DiskEnsureBody; use sled_agent_types::early_networking::EarlyNetworkConfig; use sled_agent_types::firewall_rules::VpcFirewallRulesEnsureBody; use sled_agent_types::instance::{ - InstanceEnsureBody, InstanceExternalIpBody, InstancePutStateBody, - InstancePutStateResponse, InstanceUnregisterResponse, + InstanceEnsureBody, InstanceExternalIpBody, VmmPutStateBody, + VmmPutStateResponse, VmmUnregisterResponse, }; use sled_agent_types::sled::AddSledRequest; use sled_agent_types::time_sync::TimeSync; @@ -220,6 +220,23 @@ impl SledAgentApi for SledAgentImpl { .map_err(HttpError::from) } + async fn datasets_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let body_args = body.into_inner(); + let result = sa.datasets_ensure(body_args).await?; + Ok(HttpResponseOk(result)) + } + + async fn datasets_get( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.datasets_config_list().await?)) + } + async fn zone_bundle_cleanup( rqctx: RequestContext, ) -> Result>, HttpError> @@ -294,18 +311,18 @@ impl SledAgentApi for SledAgentImpl { Ok(HttpResponseUpdatedNoContent()) } - async fn instance_register( + async fn vmm_register( rqctx: RequestContext, - path_params: Path, + path_params: Path, body: TypedBody, - ) -> Result, HttpError> { + ) -> Result, HttpError> { let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; + let propolis_id = path_params.into_inner().propolis_id; let body_args = body.into_inner(); Ok(HttpResponseOk( sa.instance_ensure_registered( - instance_id, - body_args.propolis_id, + body_args.instance_id, + propolis_id, body_args.hardware, body_args.instance_runtime, body_args.vmm_runtime, @@ -316,58 +333,56 @@ impl SledAgentApi for SledAgentImpl { )) } - async fn instance_unregister( + async fn vmm_unregister( rqctx: RequestContext, - path_params: Path, - ) -> Result, HttpError> { + path_params: Path, + ) -> Result, HttpError> { let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - Ok(HttpResponseOk(sa.instance_ensure_unregistered(instance_id).await?)) + let id = path_params.into_inner().propolis_id; + Ok(HttpResponseOk(sa.instance_ensure_unregistered(id).await?)) } - async fn instance_put_state( + async fn vmm_put_state( rqctx: RequestContext, - path_params: Path, - body: TypedBody, - ) -> Result, HttpError> { + path_params: Path, + body: TypedBody, + ) -> Result, HttpError> { let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; + let id = path_params.into_inner().propolis_id; let body_args = body.into_inner(); - Ok(HttpResponseOk( - sa.instance_ensure_state(instance_id, body_args.state).await?, - )) + Ok(HttpResponseOk(sa.instance_ensure_state(id, body_args.state).await?)) } - async fn instance_get_state( + async fn vmm_get_state( rqctx: RequestContext, - path_params: Path, - ) -> Result, HttpError> { + path_params: Path, + ) -> Result, HttpError> { let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - Ok(HttpResponseOk(sa.instance_get_state(instance_id).await?)) + let id = path_params.into_inner().propolis_id; + Ok(HttpResponseOk(sa.instance_get_state(id).await?)) } - async fn instance_put_external_ip( + async fn vmm_put_external_ip( rqctx: RequestContext, - path_params: Path, + path_params: Path, body: TypedBody, ) -> Result { let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; + let id = path_params.into_inner().propolis_id; let body_args = body.into_inner(); - sa.instance_put_external_ip(instance_id, &body_args).await?; + sa.instance_put_external_ip(id, &body_args).await?; Ok(HttpResponseUpdatedNoContent()) } - async fn instance_delete_external_ip( + async fn vmm_delete_external_ip( rqctx: RequestContext, - path_params: Path, + path_params: Path, body: TypedBody, ) -> Result { let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; + let id = path_params.into_inner().propolis_id; let body_args = body.into_inner(); - sa.instance_delete_external_ip(instance_id, &body_args).await?; + sa.instance_delete_external_ip(id, &body_args).await?; Ok(HttpResponseUpdatedNoContent()) } @@ -399,26 +414,24 @@ impl SledAgentApi for SledAgentImpl { Ok(HttpResponseUpdatedNoContent()) } - async fn instance_issue_disk_snapshot_request( + async fn vmm_issue_disk_snapshot_request( rqctx: RequestContext, - path_params: Path, - body: TypedBody, - ) -> Result< - HttpResponseOk, - HttpError, - > { + path_params: Path, + body: TypedBody, + ) -> Result, HttpError> + { let sa = rqctx.context(); let path_params = path_params.into_inner(); let body = body.into_inner(); - sa.instance_issue_disk_snapshot_request( - InstanceUuid::from_untyped_uuid(path_params.instance_id), + sa.vmm_issue_disk_snapshot_request( + path_params.propolis_id, path_params.disk_id, body.snapshot_id, ) .await?; - Ok(HttpResponseOk(InstanceIssueDiskSnapshotRequestResponse { + Ok(HttpResponseOk(VmmIssueDiskSnapshotRequestResponse { snapshot_id: body.snapshot_id, })) } diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 0bcbc97fd28..33b2d0cf673 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -25,14 +25,13 @@ use illumos_utils::opte::{DhcpCfg, PortCreateParams, PortManager}; use illumos_utils::running_zone::{RunningZone, ZoneBuilderFactory}; use illumos_utils::svc::wait_for_service; use illumos_utils::zone::PROPOLIS_ZONE_PREFIX; -use omicron_common::api::internal::nexus::{ - SledInstanceState, VmmRuntimeState, -}; +use omicron_common::api::internal::nexus::{SledVmmState, VmmRuntimeState}; use omicron_common::api::internal::shared::{ NetworkInterface, ResolvedVpcFirewallRule, SledIdentifiers, SourceNatConfig, }; use omicron_common::backoff; use omicron_common::zpool_name::ZpoolName; +use omicron_common::NoDebug; use omicron_uuid_kinds::{GenericUuid, InstanceUuid, PropolisUuid}; use propolis_client::Client as PropolisClient; use rand::prelude::IteratorRandom; @@ -104,11 +103,11 @@ pub enum Error { #[error("Error resolving DNS name: {0}")] ResolveError(#[from] internal_dns::resolver::ResolveError), - #[error("Instance {0} not running!")] - InstanceNotRunning(InstanceUuid), + #[error("Propolis job with ID {0} is registered but not running")] + VmNotRunning(PropolisUuid), - #[error("Instance already registered with Propolis ID {0}")] - InstanceAlreadyRegistered(PropolisUuid), + #[error("Propolis job with ID {0} already registered")] + PropolisAlreadyRegistered(PropolisUuid), #[error("No U.2 devices found")] U2NotFound, @@ -217,15 +216,15 @@ enum InstanceRequest { tx: oneshot::Sender>, }, CurrentState { - tx: oneshot::Sender, + tx: oneshot::Sender, }, PutState { - state: InstanceStateRequested, - tx: oneshot::Sender>, + state: VmmStateRequested, + tx: oneshot::Sender>, }, Terminate { mark_failed: bool, - tx: oneshot::Sender>, + tx: oneshot::Sender>, }, IssueSnapshotRequest { disk_id: Uuid, @@ -337,7 +336,7 @@ struct InstanceRunner { // Disk related properties requested_disks: Vec, - cloud_init_bytes: Option, + cloud_init_bytes: Option>, // Internal State management state: InstanceStates, @@ -414,12 +413,12 @@ impl InstanceRunner { }, Some(PutState{ state, tx }) => { tx.send(self.put_state(state).await - .map(|r| InstancePutStateResponse { updated_runtime: Some(r) }) + .map(|r| VmmPutStateResponse { updated_runtime: Some(r) }) .map_err(|e| e.into())) .map_err(|_| Error::FailedSendClientClosed) }, Some(Terminate { mark_failed, tx }) => { - tx.send(Ok(InstanceUnregisterResponse { + tx.send(Ok(VmmUnregisterResponse { updated_runtime: Some(self.terminate(mark_failed).await) })) .map_err(|_| Error::FailedSendClientClosed) @@ -499,15 +498,10 @@ impl InstanceRunner { } /// Yields this instance's ID. - fn id(&self) -> InstanceUuid { + fn instance_id(&self) -> InstanceUuid { InstanceUuid::from_untyped_uuid(self.properties.id) } - /// Yields this instance's Propolis's ID. - fn propolis_id(&self) -> &PropolisUuid { - &self.propolis_id - } - async fn publish_state_to_nexus(&self) { // Retry until Nexus acknowledges that it has applied this state update. // Note that Nexus may receive this call but then fail while reacting @@ -518,15 +512,13 @@ impl InstanceRunner { || async { let state = self.state.sled_instance_state(); info!(self.log, "Publishing instance state update to Nexus"; - "instance_id" => %self.id(), + "instance_id" => %self.instance_id(), + "propolis_id" => %self.propolis_id, "state" => ?state, ); self.nexus_client - .cpapi_instances_put( - &self.id().into_untyped_uuid(), - &state.into(), - ) + .cpapi_instances_put(&self.propolis_id, &state.into()) .await .map_err(|err| -> backoff::BackoffError { match &err { @@ -576,7 +568,8 @@ impl InstanceRunner { warn!(self.log, "Failed to publish instance state to Nexus: {}", err.to_string(); - "instance_id" => %self.id(), + "instance_id" => %self.instance_id(), + "propolis_id" => %self.propolis_id, "retry_after" => ?delay); }, ) @@ -586,7 +579,8 @@ impl InstanceRunner { error!( self.log, "Failed to publish state to Nexus, will not retry: {:?}", e; - "instance_id" => %self.id() + "instance_id" => %self.instance_id(), + "propolis_id" => %self.propolis_id, ); } } @@ -622,7 +616,7 @@ impl InstanceRunner { info!( self.log, "updated state after observing Propolis state change"; - "propolis_id" => %self.state.propolis_id(), + "propolis_id" => %self.propolis_id, "new_vmm_state" => ?self.state.vmm() ); @@ -634,7 +628,8 @@ impl InstanceRunner { match action { Some(InstanceAction::Destroy) => { info!(self.log, "terminating VMM that has exited"; - "instance_id" => %self.id()); + "instance_id" => %self.instance_id(), + "propolis_id" => %self.propolis_id); let mark_failed = false; self.terminate(mark_failed).await; Reaction::Terminate @@ -724,10 +719,10 @@ impl InstanceRunner { .map(Into::into) .collect(), migrate, - cloud_init_bytes: self.cloud_init_bytes.clone(), + cloud_init_bytes: self.cloud_init_bytes.clone().map(|x| x.0), }; - info!(self.log, "Sending ensure request to propolis: {:?}", request); + debug!(self.log, "Sending ensure request to propolis: {:?}", request); let result = client.instance_ensure().body(request).send().await; info!(self.log, "result of instance_ensure call is {:?}", result); result?; @@ -780,7 +775,7 @@ impl InstanceRunner { /// This routine is safe to call even if the instance's zone was never /// started. It is also safe to call multiple times on a single instance. async fn terminate_inner(&mut self) { - let zname = propolis_zone_name(self.propolis_id()); + let zname = propolis_zone_name(&self.propolis_id); // First fetch the running state. // @@ -948,8 +943,10 @@ impl InstanceRunner { } } -/// A reference to a single instance running a running Propolis server. +/// Describes a single Propolis server that incarnates a specific instance. pub struct Instance { + id: InstanceUuid, + tx: mpsc::Sender, #[allow(dead_code)] @@ -1091,7 +1088,7 @@ impl Instance { dhcp_config, requested_disks: hardware.disks, cloud_init_bytes: hardware.cloud_init_bytes, - state: InstanceStates::new(vmm_runtime, propolis_id, migration_id), + state: InstanceStates::new(vmm_runtime, migration_id), running_state: None, nexus_client, storage, @@ -1104,7 +1101,11 @@ impl Instance { let runner_handle = tokio::task::spawn(async move { runner.run().await }); - Ok(Instance { tx, runner_handle }) + Ok(Instance { id, tx, runner_handle }) + } + + pub fn id(&self) -> InstanceUuid { + self.id } /// Create bundle from an instance zone. @@ -1130,7 +1131,7 @@ impl Instance { Ok(rx.await?) } - pub async fn current_state(&self) -> Result { + pub async fn current_state(&self) -> Result { let (tx, rx) = oneshot::channel(); self.tx .send(InstanceRequest::CurrentState { tx }) @@ -1152,8 +1153,8 @@ impl Instance { /// Rebooting to Running to Stopping to Stopped. pub async fn put_state( &self, - tx: oneshot::Sender>, - state: InstanceStateRequested, + tx: oneshot::Sender>, + state: VmmStateRequested, ) -> Result<(), Error> { self.tx .send(InstanceRequest::PutState { state, tx }) @@ -1166,7 +1167,7 @@ impl Instance { /// immediately transitions the instance to the Destroyed state. pub async fn terminate( &self, - tx: oneshot::Sender>, + tx: oneshot::Sender>, mark_failed: bool, ) -> Result<(), Error> { self.tx @@ -1224,7 +1225,7 @@ impl InstanceRunner { async fn request_zone_bundle( &self, ) -> Result { - let name = propolis_zone_name(self.propolis_id()); + let name = propolis_zone_name(&self.propolis_id); match &self.running_state { None => Err(BundleError::Unavailable { name }), Some(RunningState { ref running_zone, .. }) => { @@ -1242,7 +1243,7 @@ impl InstanceRunner { run_state.running_zone.root_zpool().map(|p| p.clone()) } - fn current_state(&self) -> SledInstanceState { + fn current_state(&self) -> SledVmmState { self.state.sled_instance_state() } @@ -1300,19 +1301,19 @@ impl InstanceRunner { async fn put_state( &mut self, - state: InstanceStateRequested, - ) -> Result { + state: VmmStateRequested, + ) -> Result { use propolis_client::types::InstanceStateRequested as PropolisRequest; let (propolis_state, next_published) = match state { - InstanceStateRequested::MigrationTarget(migration_params) => { + VmmStateRequested::MigrationTarget(migration_params) => { self.propolis_ensure(Some(migration_params)).await?; (None, None) } - InstanceStateRequested::Running => { + VmmStateRequested::Running => { self.propolis_ensure(None).await?; (Some(PropolisRequest::Run), None) } - InstanceStateRequested::Stopped => { + VmmStateRequested::Stopped => { // If the instance has not started yet, unregister it // immediately. Since there is no Propolis to push updates when // this happens, generate an instance record bearing the @@ -1328,9 +1329,9 @@ impl InstanceRunner { ) } } - InstanceStateRequested::Reboot => { + VmmStateRequested::Reboot => { if self.running_state.is_none() { - return Err(Error::InstanceNotRunning(self.id())); + return Err(Error::VmNotRunning(self.propolis_id)); } ( Some(PropolisRequest::Reboot), @@ -1379,7 +1380,7 @@ impl InstanceRunner { // Create a zone for the propolis instance, using the previously // configured VNICs. - let zname = propolis_zone_name(self.propolis_id()); + let zname = propolis_zone_name(&self.propolis_id); let mut rng = rand::rngs::StdRng::from_entropy(); let latest_disks = self .storage @@ -1399,7 +1400,7 @@ impl InstanceRunner { .with_zone_root_path(root) .with_zone_image_paths(&["/opt/oxide".into()]) .with_zone_type("propolis-server") - .with_unique_name(self.propolis_id().into_untyped_uuid()) + .with_unique_name(self.propolis_id.into_untyped_uuid()) .with_datasets(&[]) .with_filesystems(&[]) .with_data_links(&[]) @@ -1483,7 +1484,7 @@ impl InstanceRunner { Ok(PropolisSetup { client, running_zone }) } - async fn terminate(&mut self, mark_failed: bool) -> SledInstanceState { + async fn terminate(&mut self, mark_failed: bool) -> SledVmmState { self.terminate_inner().await; self.state.terminate_rudely(mark_failed); @@ -1508,9 +1509,7 @@ impl InstanceRunner { Ok(()) } else { - Err(Error::InstanceNotRunning(InstanceUuid::from_untyped_uuid( - self.properties.id, - ))) + Err(Error::VmNotRunning(self.propolis_id)) } } @@ -1604,7 +1603,7 @@ mod tests { enum ReceivedInstanceState { #[default] None, - InstancePut(SledInstanceState), + InstancePut(SledVmmState), } struct NexusServer { @@ -1614,8 +1613,8 @@ mod tests { impl FakeNexusServer for NexusServer { fn cpapi_instances_put( &self, - _instance_id: Uuid, - new_runtime_state: SledInstanceState, + _propolis_id: PropolisUuid, + new_runtime_state: SledVmmState, ) -> Result<(), omicron_common::api::external::Error> { self.observed_runtime_state .send(ReceivedInstanceState::InstancePut(new_runtime_state)) @@ -1760,7 +1759,7 @@ mod tests { let id = InstanceUuid::new_v4(); let propolis_id = PropolisUuid::from_untyped_uuid(PROPOLIS_ID); - let ticket = InstanceTicket::new_without_manager_for_test(id); + let ticket = InstanceTicket::new_without_manager_for_test(propolis_id); let initial_state = fake_instance_initial_state(propolis_addr); @@ -1917,7 +1916,7 @@ mod tests { // pretending we're InstanceManager::ensure_state, start our "instance" // (backed by fakes and propolis_mock_server) - inst.put_state(put_tx, InstanceStateRequested::Running) + inst.put_state(put_tx, VmmStateRequested::Running) .await .expect("failed to send Instance::put_state"); @@ -2011,7 +2010,7 @@ mod tests { // pretending we're InstanceManager::ensure_state, try in vain to start // our "instance", but no propolis server is running - inst.put_state(put_tx, InstanceStateRequested::Running) + inst.put_state(put_tx, VmmStateRequested::Running) .await .expect("failed to send Instance::put_state"); @@ -2025,7 +2024,7 @@ mod tests { .await .expect_err("*should've* timed out waiting for Instance::put_state, but didn't?"); - if let ReceivedInstanceState::InstancePut(SledInstanceState { + if let ReceivedInstanceState::InstancePut(SledVmmState { vmm_state: VmmRuntimeState { state: VmmState::Running, .. }, .. }) = state_rx.borrow().to_owned() @@ -2118,7 +2117,7 @@ mod tests { // pretending we're InstanceManager::ensure_state, try in vain to start // our "instance", but the zone never finishes installing - inst.put_state(put_tx, InstanceStateRequested::Running) + inst.put_state(put_tx, VmmStateRequested::Running) .await .expect("failed to send Instance::put_state"); @@ -2133,7 +2132,7 @@ mod tests { .expect_err("*should've* timed out waiting for Instance::put_state, but didn't?"); debug!(log, "Zone-boot timeout awaited"); - if let ReceivedInstanceState::InstancePut(SledInstanceState { + if let ReceivedInstanceState::InstancePut(SledVmmState { vmm_state: VmmRuntimeState { state: VmmState::Running, .. }, .. }) = state_rx.borrow().to_owned() @@ -2256,7 +2255,7 @@ mod tests { .await .unwrap(); - mgr.ensure_state(instance_id, InstanceStateRequested::Running) + mgr.ensure_state(propolis_id, VmmStateRequested::Running) .await .unwrap(); diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index 63164ed290c..24be8be89fe 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -4,13 +4,13 @@ //! API for controlling multiple instances on a sled. -use crate::instance::propolis_zone_name; use crate::instance::Instance; use crate::metrics::MetricsRequestQueue; use crate::nexus::NexusClient; use crate::vmm_reservoir::VmmReservoirManagerHandle; use crate::zone_bundle::BundleError; use crate::zone_bundle::ZoneBundler; +use illumos_utils::zone::PROPOLIS_ZONE_PREFIX; use omicron_common::api::external::ByteCount; use anyhow::anyhow; @@ -20,7 +20,7 @@ use illumos_utils::opte::PortManager; use illumos_utils::running_zone::ZoneBuilderFactory; use omicron_common::api::external::Generation; use omicron_common::api::internal::nexus::InstanceRuntimeState; -use omicron_common::api::internal::nexus::SledInstanceState; +use omicron_common::api::internal::nexus::SledVmmState; use omicron_common::api::internal::nexus::VmmRuntimeState; use omicron_common::api::internal::shared::SledIdentifiers; use omicron_uuid_kinds::InstanceUuid; @@ -44,8 +44,8 @@ pub enum Error { #[error("Instance error: {0}")] Instance(#[from] crate::instance::Error), - #[error("No such instance ID: {0}")] - NoSuchInstance(InstanceUuid), + #[error("VMM with ID {0} not found")] + NoSuchVmm(PropolisUuid), #[error("OPTE port management error: {0}")] Opte(#[from] illumos_utils::opte::Error), @@ -117,7 +117,7 @@ impl InstanceManager { terminate_tx, terminate_rx, nexus_client, - instances: BTreeMap::new(), + jobs: BTreeMap::new(), vnic_allocator: VnicAllocator::new("Instance", etherstub), port_manager, storage_generation: None, @@ -150,7 +150,7 @@ impl InstanceManager { propolis_addr: SocketAddr, sled_identifiers: SledIdentifiers, metadata: InstanceMetadata, - ) -> Result { + ) -> Result { let (tx, rx) = oneshot::channel(); self.inner .tx @@ -172,13 +172,13 @@ impl InstanceManager { pub async fn ensure_unregistered( &self, - instance_id: InstanceUuid, - ) -> Result { + propolis_id: PropolisUuid, + ) -> Result { let (tx, rx) = oneshot::channel(); self.inner .tx .send(InstanceManagerRequest::EnsureUnregistered { - instance_id, + propolis_id, tx, }) .await @@ -188,14 +188,14 @@ impl InstanceManager { pub async fn ensure_state( &self, - instance_id: InstanceUuid, - target: InstanceStateRequested, - ) -> Result { + propolis_id: PropolisUuid, + target: VmmStateRequested, + ) -> Result { let (tx, rx) = oneshot::channel(); self.inner .tx .send(InstanceManagerRequest::EnsureState { - instance_id, + propolis_id, target, tx, }) @@ -206,31 +206,32 @@ impl InstanceManager { // these may involve a long-running zone creation, so avoid HTTP // request timeouts by decoupling the response // (see InstanceRunner::put_state) - InstanceStateRequested::MigrationTarget(_) - | InstanceStateRequested::Running => { + VmmStateRequested::MigrationTarget(_) + | VmmStateRequested::Running => { // We don't want the sending side of the channel to see an // error if we drop rx without awaiting it. // Since we don't care about the response here, we spawn rx // into a task which will await it for us in the background. tokio::spawn(rx); - Ok(InstancePutStateResponse { updated_runtime: None }) + Ok(VmmPutStateResponse { updated_runtime: None }) + } + VmmStateRequested::Stopped | VmmStateRequested::Reboot => { + rx.await? } - InstanceStateRequested::Stopped - | InstanceStateRequested::Reboot => rx.await?, } } - pub async fn instance_issue_disk_snapshot_request( + pub async fn issue_disk_snapshot_request( &self, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, disk_id: Uuid, snapshot_id: Uuid, ) -> Result<(), Error> { let (tx, rx) = oneshot::channel(); self.inner .tx - .send(InstanceManagerRequest::InstanceIssueDiskSnapshot { - instance_id, + .send(InstanceManagerRequest::IssueDiskSnapshot { + propolis_id, disk_id, snapshot_id, tx, @@ -259,14 +260,14 @@ impl InstanceManager { pub async fn add_external_ip( &self, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, ip: &InstanceExternalIpBody, ) -> Result<(), Error> { let (tx, rx) = oneshot::channel(); self.inner .tx - .send(InstanceManagerRequest::InstanceAddExternalIp { - instance_id, + .send(InstanceManagerRequest::AddExternalIp { + propolis_id, ip: *ip, tx, }) @@ -277,14 +278,14 @@ impl InstanceManager { pub async fn delete_external_ip( &self, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, ip: &InstanceExternalIpBody, ) -> Result<(), Error> { let (tx, rx) = oneshot::channel(); self.inner .tx - .send(InstanceManagerRequest::InstanceDeleteExternalIp { - instance_id, + .send(InstanceManagerRequest::DeleteExternalIp { + propolis_id, ip: *ip, tx, }) @@ -300,12 +301,12 @@ impl InstanceManager { pub async fn get_instance_state( &self, - instance_id: InstanceUuid, - ) -> Result { + propolis_id: PropolisUuid, + ) -> Result { let (tx, rx) = oneshot::channel(); self.inner .tx - .send(InstanceManagerRequest::GetState { instance_id, tx }) + .send(InstanceManagerRequest::GetState { propolis_id, tx }) .await .map_err(|_| Error::FailedSendInstanceManagerClosed)?; rx.await? @@ -351,20 +352,20 @@ enum InstanceManagerRequest { // reasonable choice... sled_identifiers: Box, metadata: InstanceMetadata, - tx: oneshot::Sender>, + tx: oneshot::Sender>, }, EnsureUnregistered { - instance_id: InstanceUuid, - tx: oneshot::Sender>, + propolis_id: PropolisUuid, + tx: oneshot::Sender>, }, EnsureState { - instance_id: InstanceUuid, - target: InstanceStateRequested, - tx: oneshot::Sender>, + propolis_id: PropolisUuid, + target: VmmStateRequested, + tx: oneshot::Sender>, }, - InstanceIssueDiskSnapshot { - instance_id: InstanceUuid, + IssueDiskSnapshot { + propolis_id: PropolisUuid, disk_id: Uuid, snapshot_id: Uuid, tx: oneshot::Sender>, @@ -373,19 +374,19 @@ enum InstanceManagerRequest { name: String, tx: oneshot::Sender>, }, - InstanceAddExternalIp { - instance_id: InstanceUuid, + AddExternalIp { + propolis_id: PropolisUuid, ip: InstanceExternalIpBody, tx: oneshot::Sender>, }, - InstanceDeleteExternalIp { - instance_id: InstanceUuid, + DeleteExternalIp { + propolis_id: PropolisUuid, ip: InstanceExternalIpBody, tx: oneshot::Sender>, }, GetState { - instance_id: InstanceUuid, - tx: oneshot::Sender>, + propolis_id: PropolisUuid, + tx: oneshot::Sender>, }, OnlyUseDisks { disks: AllDisks, @@ -396,7 +397,7 @@ enum InstanceManagerRequest { // Requests that the instance manager stop processing information about a // particular instance. struct InstanceDeregisterRequest { - id: InstanceUuid, + id: PropolisUuid, } struct InstanceManagerRunner { @@ -422,8 +423,8 @@ struct InstanceManagerRunner { // TODO: If we held an object representing an enum of "Created OR Running" // instance, we could avoid the methods within "instance.rs" that panic // if the Propolis client hasn't been initialized. - /// A mapping from a Sled Agent "Instance ID" to ("Propolis ID", [Instance]). - instances: BTreeMap, + /// A mapping from a Propolis ID to the [Instance] that Propolis incarnates. + jobs: BTreeMap, vnic_allocator: VnicAllocator, port_manager: PortManager, @@ -451,7 +452,7 @@ impl InstanceManagerRunner { request = self.terminate_rx.recv() => { match request { Some(request) => { - self.instances.remove(&request.id); + self.jobs.remove(&request.id); }, None => { warn!(self.log, "InstanceManager's 'instance terminate' channel closed; shutting down"); @@ -484,31 +485,31 @@ impl InstanceManagerRunner { metadata ).await).map_err(|_| Error::FailedSendClientClosed) }, - Some(EnsureUnregistered { instance_id, tx }) => { - self.ensure_unregistered(tx, instance_id).await + Some(EnsureUnregistered { propolis_id, tx }) => { + self.ensure_unregistered(tx, propolis_id).await }, - Some(EnsureState { instance_id, target, tx }) => { - self.ensure_state(tx, instance_id, target).await + Some(EnsureState { propolis_id, target, tx }) => { + self.ensure_state(tx, propolis_id, target).await }, - Some(InstanceIssueDiskSnapshot { instance_id, disk_id, snapshot_id, tx }) => { - self.instance_issue_disk_snapshot_request(tx, instance_id, disk_id, snapshot_id).await + Some(IssueDiskSnapshot { propolis_id, disk_id, snapshot_id, tx }) => { + self.issue_disk_snapshot_request(tx, propolis_id, disk_id, snapshot_id).await }, Some(CreateZoneBundle { name, tx }) => { self.create_zone_bundle(tx, &name).await.map_err(Error::from) }, - Some(InstanceAddExternalIp { instance_id, ip, tx }) => { - self.add_external_ip(tx, instance_id, &ip).await + Some(AddExternalIp { propolis_id, ip, tx }) => { + self.add_external_ip(tx, propolis_id, &ip).await }, - Some(InstanceDeleteExternalIp { instance_id, ip, tx }) => { - self.delete_external_ip(tx, instance_id, &ip).await + Some(DeleteExternalIp { propolis_id, ip, tx }) => { + self.delete_external_ip(tx, propolis_id, &ip).await }, - Some(GetState { instance_id, tx }) => { + Some(GetState { propolis_id, tx }) => { // TODO(eliza): it could potentially be nice to // refactor this to use `tokio::sync::watch`, rather // than having to force `GetState` requests to // serialize with the requests that actually update // the state... - self.get_instance_state(tx, instance_id).await + self.get_instance_state(tx, propolis_id).await }, Some(OnlyUseDisks { disks, tx } ) => { self.use_only_these_disks(disks).await; @@ -533,8 +534,8 @@ impl InstanceManagerRunner { } } - fn get_instance(&self, instance_id: InstanceUuid) -> Option<&Instance> { - self.instances.get(&instance_id).map(|(_id, v)| v) + fn get_propolis(&self, propolis_id: PropolisUuid) -> Option<&Instance> { + self.jobs.get(&propolis_id) } /// Ensures that the instance manager contains a registered instance with @@ -565,7 +566,7 @@ impl InstanceManagerRunner { propolis_addr: SocketAddr, sled_identifiers: SledIdentifiers, metadata: InstanceMetadata, - ) -> Result { + ) -> Result { info!( &self.log, "ensuring instance is registered"; @@ -579,17 +580,16 @@ impl InstanceManagerRunner { ); let instance = { - if let Some((existing_propolis_id, existing_instance)) = - self.instances.get(&instance_id) - { - if propolis_id != *existing_propolis_id { + if let Some(existing_instance) = self.jobs.get(&propolis_id) { + if instance_id != existing_instance.id() { info!(&self.log, - "instance already registered with another Propolis ID"; - "instance_id" => %instance_id, - "existing_propolis_id" => %*existing_propolis_id); + "Propolis ID already used by another instance"; + "propolis_id" => %propolis_id, + "existing_instanceId" => %existing_instance.id()); + return Err(Error::Instance( - crate::instance::Error::InstanceAlreadyRegistered( - *existing_propolis_id, + crate::instance::Error::PropolisAlreadyRegistered( + propolis_id, ), )); } else { @@ -602,11 +602,16 @@ impl InstanceManagerRunner { } else { info!(&self.log, "registering new instance"; - "instance_id" => ?instance_id); - let instance_log = - self.log.new(o!("instance_id" => format!("{instance_id}"))); + "instance_id" => %instance_id, + "propolis_id" => %propolis_id); + + let instance_log = self.log.new(o!( + "instance_id" => instance_id.to_string(), + "propolis_id" => propolis_id.to_string(), + )); + let ticket = - InstanceTicket::new(instance_id, self.terminate_tx.clone()); + InstanceTicket::new(propolis_id, self.terminate_tx.clone()); let services = InstanceManagerServices { nexus_client: self.nexus_client.clone(), @@ -635,27 +640,26 @@ impl InstanceManagerRunner { sled_identifiers, metadata, )?; - let _old = - self.instances.insert(instance_id, (propolis_id, instance)); + let _old = self.jobs.insert(propolis_id, instance); assert!(_old.is_none()); - &self.instances.get(&instance_id).unwrap().1 + &self.jobs.get(&propolis_id).unwrap() } }; Ok(instance.current_state().await?) } - /// Idempotently ensures the instance is not registered with this instance - /// manager. If the instance exists and has a running Propolis, that - /// Propolis is rudely terminated. + /// Idempotently ensures this VM is not registered with this instance + /// manager. If this Propolis job is registered and has a running zone, the + /// zone is rudely terminated. async fn ensure_unregistered( &mut self, - tx: oneshot::Sender>, - instance_id: InstanceUuid, + tx: oneshot::Sender>, + propolis_id: PropolisUuid, ) -> Result<(), Error> { // If the instance does not exist, we response immediately. - let Some(instance) = self.get_instance(instance_id) else { - tx.send(Ok(InstanceUnregisterResponse { updated_runtime: None })) + let Some(instance) = self.get_propolis(propolis_id) else { + tx.send(Ok(VmmUnregisterResponse { updated_runtime: None })) .map_err(|_| Error::FailedSendClientClosed)?; return Ok(()); }; @@ -667,15 +671,15 @@ impl InstanceManagerRunner { Ok(()) } - /// Idempotently attempts to drive the supplied instance into the supplied + /// Idempotently attempts to drive the supplied Propolis into the supplied /// runtime state. async fn ensure_state( &mut self, - tx: oneshot::Sender>, - instance_id: InstanceUuid, - target: InstanceStateRequested, + tx: oneshot::Sender>, + propolis_id: PropolisUuid, + target: VmmStateRequested, ) -> Result<(), Error> { - let Some(instance) = self.get_instance(instance_id) else { + let Some(instance) = self.get_propolis(propolis_id) else { match target { // If the instance isn't registered, then by definition it // isn't running here. Allow requests to stop or destroy the @@ -685,14 +689,12 @@ impl InstanceManagerRunner { // Propolis handled it, sled agent unregistered the // instance, and only then did a second stop request // arrive. - InstanceStateRequested::Stopped => { - tx.send(Ok(InstancePutStateResponse { - updated_runtime: None, - })) - .map_err(|_| Error::FailedSendClientClosed)?; + VmmStateRequested::Stopped => { + tx.send(Ok(VmmPutStateResponse { updated_runtime: None })) + .map_err(|_| Error::FailedSendClientClosed)?; } _ => { - tx.send(Err(Error::NoSuchInstance(instance_id))) + tx.send(Err(Error::NoSuchVmm(propolis_id))) .map_err(|_| Error::FailedSendClientClosed)?; } } @@ -702,20 +704,15 @@ impl InstanceManagerRunner { Ok(()) } - async fn instance_issue_disk_snapshot_request( + async fn issue_disk_snapshot_request( &self, tx: oneshot::Sender>, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, disk_id: Uuid, snapshot_id: Uuid, ) -> Result<(), Error> { - let instance = { - let (_, instance) = self - .instances - .get(&instance_id) - .ok_or(Error::NoSuchInstance(instance_id))?; - instance - }; + let instance = + self.jobs.get(&propolis_id).ok_or(Error::NoSuchVmm(propolis_id))?; instance .issue_snapshot_request(tx, disk_id, snapshot_id) @@ -729,11 +726,19 @@ impl InstanceManagerRunner { tx: oneshot::Sender>, name: &str, ) -> Result<(), BundleError> { - let Some((_propolis_id, instance)) = - self.instances.values().find(|(propolis_id, _instance)| { - name == propolis_zone_name(propolis_id) - }) - else { + // A well-formed Propolis zone name must consist of + // `PROPOLIS_ZONE_PREFIX` and the Propolis ID. If the prefix is not + // present or the Propolis ID portion of the supplied zone name isn't + // parseable as a UUID, there is no Propolis zone with the specified + // name to capture into a bundle, so return a `NoSuchZone` error. + let vmm_id: PropolisUuid = name + .strip_prefix(PROPOLIS_ZONE_PREFIX) + .and_then(|uuid_str| uuid_str.parse::().ok()) + .ok_or_else(|| BundleError::NoSuchZone { + name: name.to_string(), + })?; + + let Some(instance) = self.jobs.get(&vmm_id) else { return Err(BundleError::NoSuchZone { name: name.to_string() }); }; instance.request_zone_bundle(tx).await @@ -742,11 +747,11 @@ impl InstanceManagerRunner { async fn add_external_ip( &self, tx: oneshot::Sender>, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, ip: &InstanceExternalIpBody, ) -> Result<(), Error> { - let Some(instance) = self.get_instance(instance_id) else { - return Err(Error::NoSuchInstance(instance_id)); + let Some(instance) = self.get_propolis(propolis_id) else { + return Err(Error::NoSuchVmm(propolis_id)); }; instance.add_external_ip(tx, ip).await?; Ok(()) @@ -755,11 +760,11 @@ impl InstanceManagerRunner { async fn delete_external_ip( &self, tx: oneshot::Sender>, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, ip: &InstanceExternalIpBody, ) -> Result<(), Error> { - let Some(instance) = self.get_instance(instance_id) else { - return Err(Error::NoSuchInstance(instance_id)); + let Some(instance) = self.get_propolis(propolis_id) else { + return Err(Error::NoSuchVmm(propolis_id)); }; instance.delete_external_ip(tx, ip).await?; @@ -768,12 +773,12 @@ impl InstanceManagerRunner { async fn get_instance_state( &self, - tx: oneshot::Sender>, - instance_id: InstanceUuid, + tx: oneshot::Sender>, + propolis_id: PropolisUuid, ) -> Result<(), Error> { - let Some(instance) = self.get_instance(instance_id) else { + let Some(instance) = self.get_propolis(propolis_id) else { return tx - .send(Err(Error::NoSuchInstance(instance_id))) + .send(Err(Error::NoSuchVmm(propolis_id))) .map_err(|_| Error::FailedSendClientClosed); }; @@ -801,7 +806,7 @@ impl InstanceManagerRunner { let u2_set: HashSet<_> = disks.all_u2_zpools().into_iter().collect(); let mut to_remove = vec![]; - for (id, (_, instance)) in self.instances.iter() { + for (id, instance) in self.jobs.iter() { // If we can read the filesystem pool, consider it. Otherwise, move // on, to prevent blocking the cleanup of other instances. let Ok(Some(filesystem_pool)) = @@ -817,7 +822,7 @@ impl InstanceManagerRunner { for id in to_remove { info!(self.log, "use_only_these_disks: Removing instance"; "instance_id" => ?id); - if let Some((_, instance)) = self.instances.remove(&id) { + if let Some(instance) = self.jobs.remove(&id) { let (tx, rx) = oneshot::channel(); let mark_failed = true; if let Err(e) = instance.terminate(tx, mark_failed).await { @@ -835,22 +840,22 @@ impl InstanceManagerRunner { /// Represents membership of an instance in the [`InstanceManager`]. pub struct InstanceTicket { - id: InstanceUuid, + id: PropolisUuid, terminate_tx: Option>, } impl InstanceTicket { - // Creates a new instance ticket for instance "id" to be removed - // from the manger on destruction. + // Creates a new instance ticket for the Propolis job with the supplied `id` + // to be removed from the manager on destruction. fn new( - id: InstanceUuid, + id: PropolisUuid, terminate_tx: mpsc::UnboundedSender, ) -> Self { InstanceTicket { id, terminate_tx: Some(terminate_tx) } } #[cfg(all(test, target_os = "illumos"))] - pub(crate) fn new_without_manager_for_test(id: InstanceUuid) -> Self { + pub(crate) fn new_without_manager_for_test(id: PropolisUuid) -> Self { Self { id, terminate_tx: None } } diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 419e897d756..de0b0867520 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -3,9 +3,8 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use nexus_sled_agent_shared::inventory::{OmicronZoneConfig, OmicronZoneType}; +use omicron_common::disk::{DatasetKind, DatasetName}; pub use sled_hardware::DendriteAsic; -use sled_storage::dataset::DatasetName; -use sled_storage::dataset::DatasetType; use std::net::SocketAddrV6; /// Extension trait for `OmicronZoneConfig`. @@ -49,25 +48,25 @@ pub(crate) trait OmicronZoneTypeExt { | OmicronZoneType::Oximeter { .. } | OmicronZoneType::CruciblePantry { .. } => None, OmicronZoneType::Clickhouse { dataset, address, .. } => { - Some((dataset, DatasetType::Clickhouse, address)) + Some((dataset, DatasetKind::Clickhouse, address)) } OmicronZoneType::ClickhouseKeeper { dataset, address, .. } => { - Some((dataset, DatasetType::ClickhouseKeeper, address)) + Some((dataset, DatasetKind::ClickhouseKeeper, address)) } OmicronZoneType::ClickhouseServer { dataset, address, .. } => { - Some((dataset, DatasetType::ClickhouseServer, address)) + Some((dataset, DatasetKind::ClickhouseServer, address)) } OmicronZoneType::CockroachDb { dataset, address, .. } => { - Some((dataset, DatasetType::CockroachDb, address)) + Some((dataset, DatasetKind::Cockroach, address)) } OmicronZoneType::Crucible { dataset, address, .. } => { - Some((dataset, DatasetType::Crucible, address)) + Some((dataset, DatasetKind::Crucible, address)) } OmicronZoneType::ExternalDns { dataset, http_address, .. } => { - Some((dataset, DatasetType::ExternalDns, http_address)) + Some((dataset, DatasetKind::ExternalDns, http_address)) } OmicronZoneType::InternalDns { dataset, http_address, .. } => { - Some((dataset, DatasetType::InternalDns, http_address)) + Some((dataset, DatasetKind::InternalDns, http_address)) } }?; diff --git a/sled-agent/src/rack_setup/mod.rs b/sled-agent/src/rack_setup/mod.rs index 0ec14138fce..e1b12d6b2bf 100644 --- a/sled-agent/src/rack_setup/mod.rs +++ b/sled-agent/src/rack_setup/mod.rs @@ -9,3 +9,8 @@ mod plan; pub mod service; pub use plan::service::SledConfig; +pub use plan::service::{ + from_ipaddr_to_external_floating_ip, + from_sockaddr_to_external_floating_addr, + from_source_nat_config_to_external_snat_ip, +}; diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index ff137f131fb..7ca2b295a04 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -10,14 +10,18 @@ use illumos_utils::zpool::ZpoolName; use internal_dns::config::{Host, Zone}; use internal_dns::ServiceName; use nexus_sled_agent_shared::inventory::{ - Inventory, OmicronZoneConfig, OmicronZoneDataset, OmicronZoneType, SledRole, + Inventory, OmicronZoneDataset, SledRole, +}; +use nexus_types::deployment::{ + blueprint_zone_type, BlueprintPhysicalDisksConfig, BlueprintZoneConfig, + BlueprintZoneDisposition, BlueprintZoneType, + OmicronZoneExternalFloatingAddr, OmicronZoneExternalFloatingIp, + OmicronZoneExternalSnatIp, }; use omicron_common::address::{ get_sled_address, get_switch_zone_address, Ipv6Subnet, ReservedRackSubnet, - BOUNDARY_NTP_REDUNDANCY, COCKROACHDB_REDUNDANCY, DENDRITE_PORT, - DNS_HTTP_PORT, DNS_PORT, DNS_REDUNDANCY, MAX_DNS_REDUNDANCY, MGD_PORT, - MGS_PORT, NEXUS_REDUNDANCY, NTP_PORT, NUM_SOURCE_NAT_PORTS, - RSS_RESERVED_ADDRESSES, SLED_PREFIX, + DENDRITE_PORT, DNS_HTTP_PORT, DNS_PORT, MGD_PORT, MGS_PORT, NTP_PORT, + NUM_SOURCE_NAT_PORTS, RSS_RESERVED_ADDRESSES, SLED_PREFIX, }; use omicron_common::api::external::{Generation, MacAddr, Vni}; use omicron_common::api::internal::shared::{ @@ -28,10 +32,17 @@ use omicron_common::backoff::{ retry_notify_ext, retry_policy_internal_service_aggressive, BackoffError, }; use omicron_common::disk::{ - DiskVariant, OmicronPhysicalDiskConfig, OmicronPhysicalDisksConfig, + DatasetKind, DatasetName, DiskVariant, OmicronPhysicalDiskConfig, + OmicronPhysicalDisksConfig, }; use omicron_common::ledger::{self, Ledger, Ledgerable}; -use omicron_uuid_kinds::{GenericUuid, OmicronZoneUuid, SledUuid, ZpoolUuid}; +use omicron_common::policy::{ + BOUNDARY_NTP_REDUNDANCY, COCKROACHDB_REDUNDANCY, INTERNAL_DNS_REDUNDANCY, + MAX_INTERNAL_DNS_REDUNDANCY, NEXUS_REDUNDANCY, +}; +use omicron_uuid_kinds::{ + ExternalIpUuid, GenericUuid, OmicronZoneUuid, SledUuid, ZpoolUuid, +}; use rand::prelude::SliceRandom; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -40,7 +51,7 @@ use sled_agent_client::{ }; use sled_agent_types::rack_init::RackInitializeRequest as Config; use sled_agent_types::sled::StartSledAgentRequest; -use sled_storage::dataset::{DatasetName, DatasetType, CONFIG_DATASET}; +use sled_storage::dataset::CONFIG_DATASET; use sled_storage::manager::StorageHandle; use slog::Logger; use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; @@ -54,12 +65,25 @@ use uuid::Uuid; const OXIMETER_COUNT: usize = 1; // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove // when Nexus provisions Clickhouse. -// TODO(https://github.com/oxidecomputer/omicron/issues/4000): Set to 2 once we enable replicated ClickHouse +// TODO(https://github.com/oxidecomputer/omicron/issues/4000): Use +// omicron_common::policy::CLICKHOUSE_SERVER_REDUNDANCY once we enable +// replicated ClickHouse. +// Set to 0 when testing replicated ClickHouse. const CLICKHOUSE_COUNT: usize = 1; // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove // when Nexus provisions Clickhouse keeper. -// TODO(https://github.com/oxidecomputer/omicron/issues/4000): Set to 3 once we enable replicated ClickHouse +// TODO(https://github.com/oxidecomputer/omicron/issues/4000): Use +// omicron_common::policy::CLICKHOUSE_KEEPER_REDUNDANCY once we enable +// replicated ClickHouse +// Set to 3 when testing replicated ClickHouse. const CLICKHOUSE_KEEPER_COUNT: usize = 0; +// TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove +// when Nexus provisions Clickhouse server. +// TODO(https://github.com/oxidecomputer/omicron/issues/4000): Use +// omicron_common::policy::CLICKHOUSE_SERVER_REDUNDANCY once we enable +// replicated ClickHouse. +// Set to 2 when testing replicated ClickHouse +const CLICKHOUSE_SERVER_COUNT: usize = 0; // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove. // when Nexus provisions Crucible. const MINIMUM_U2_COUNT: usize = 3; @@ -105,10 +129,10 @@ pub enum PlanError { #[derive(Clone, Debug, Default, Serialize, Deserialize, JsonSchema)] pub struct SledConfig { /// Control plane disks configured for this sled - pub disks: OmicronPhysicalDisksConfig, + pub disks: BlueprintPhysicalDisksConfig, /// zones configured for this sled - pub zones: Vec, + pub zones: Vec, } #[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] @@ -125,7 +149,53 @@ impl Ledgerable for Plan { } const RSS_SERVICE_PLAN_V1_FILENAME: &str = "rss-service-plan.json"; const RSS_SERVICE_PLAN_V2_FILENAME: &str = "rss-service-plan-v2.json"; -const RSS_SERVICE_PLAN_FILENAME: &str = "rss-service-plan-v3.json"; +const RSS_SERVICE_PLAN_V3_FILENAME: &str = "rss-service-plan-v3.json"; +const RSS_SERVICE_PLAN_FILENAME: &str = "rss-service-plan-v4.json"; + +pub fn from_sockaddr_to_external_floating_addr( + addr: SocketAddr, +) -> OmicronZoneExternalFloatingAddr { + // This is pretty weird: IP IDs don't exist yet, so it's fine for us + // to make them up (Nexus will record them as a part of the + // handoff). We could pass `None` here for some zone types, but it's + // a little simpler to just always pass a new ID, which will only be + // used if the zone type has an external IP. + // + // This should all go away once RSS starts using blueprints more + // directly (instead of this conversion after the fact): + // https://github.com/oxidecomputer/omicron/issues/5272 + OmicronZoneExternalFloatingAddr { id: ExternalIpUuid::new_v4(), addr } +} + +pub fn from_ipaddr_to_external_floating_ip( + ip: IpAddr, +) -> OmicronZoneExternalFloatingIp { + // This is pretty weird: IP IDs don't exist yet, so it's fine for us + // to make them up (Nexus will record them as a part of the + // handoff). We could pass `None` here for some zone types, but it's + // a little simpler to just always pass a new ID, which will only be + // used if the zone type has an external IP. + // + // This should all go away once RSS starts using blueprints more + // directly (instead of this conversion after the fact): + // https://github.com/oxidecomputer/omicron/issues/5272 + OmicronZoneExternalFloatingIp { id: ExternalIpUuid::new_v4(), ip } +} + +pub fn from_source_nat_config_to_external_snat_ip( + snat_cfg: SourceNatConfig, +) -> OmicronZoneExternalSnatIp { + // This is pretty weird: IP IDs don't exist yet, so it's fine for us + // to make them up (Nexus will record them as a part of the + // handoff). We could pass `None` here for some zone types, but it's + // a little simpler to just always pass a new ID, which will only be + // used if the zone type has an external IP. + // + // This should all go away once RSS starts using blueprints more + // directly (instead of this conversion after the fact): + // https://github.com/oxidecomputer/omicron/issues/5272 + OmicronZoneExternalSnatIp { id: ExternalIpUuid::new_v4(), snat_cfg } +} impl Plan { pub async fn load( @@ -185,6 +255,14 @@ impl Plan { } })? { Err(PlanError::FoundV2) + } else if Self::has_v3(storage_manager).await.map_err(|err| { + // Same as the comment above, but for version 3. + PlanError::Io { + message: String::from("looking for v3 RSS plan"), + err, + } + })? { + Err(PlanError::FoundV2) } else { Ok(None) } @@ -228,6 +306,25 @@ impl Plan { Ok(false) } + async fn has_v3( + storage_manager: &StorageHandle, + ) -> Result { + let paths = storage_manager + .get_latest_disks() + .await + .all_m2_mountpoints(CONFIG_DATASET) + .into_iter() + .map(|p| p.join(RSS_SERVICE_PLAN_V3_FILENAME)); + + for p in paths { + if p.try_exists()? { + return Ok(true); + } + } + + Ok(false) + } + async fn is_sled_scrimlet( log: &Logger, address: SocketAddrV6, @@ -373,9 +470,11 @@ impl Plan { // Provision internal DNS zones, striping across Sleds. let reserved_rack_subnet = ReservedRackSubnet::new(config.az_subnet()); - static_assertions::const_assert!(DNS_REDUNDANCY <= MAX_DNS_REDUNDANCY,); + static_assertions::const_assert!( + INTERNAL_DNS_REDUNDANCY <= MAX_INTERNAL_DNS_REDUNDANCY + ); let dns_subnets = - &reserved_rack_subnet.get_dns_subnets()[0..DNS_REDUNDANCY]; + &reserved_rack_subnet.get_dns_subnets()[0..INTERNAL_DNS_REDUNDANCY]; let rack_dns_servers = dns_subnets .into_iter() .map(|dns_subnet| dns_subnet.dns_address().into()) @@ -401,23 +500,25 @@ impl Plan { ) .unwrap(); let dataset_name = - sled.alloc_dataset_from_u2s(DatasetType::InternalDns)?; + sled.alloc_dataset_from_u2s(DatasetKind::InternalDns)?; let filesystem_pool = Some(dataset_name.pool().clone()); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: ip, - zone_type: OmicronZoneType::InternalDns { - dataset: OmicronZoneDataset { - pool_name: dataset_name.pool().clone(), - }, - http_address, - dns_address, - gz_address: dns_subnet.gz_address(), - gz_address_index: i.try_into().expect("Giant indices?"), - }, filesystem_pool, + zone_type: BlueprintZoneType::InternalDns( + blueprint_zone_type::InternalDns { + dataset: OmicronZoneDataset { + pool_name: dataset_name.pool().clone(), + }, + http_address, + dns_address, + gz_address: dns_subnet.gz_address(), + gz_address_index: i.try_into().expect("Giant indices?"), + }, + ), }); } @@ -441,18 +542,20 @@ impl Plan { ) .unwrap(); let dataset_name = - sled.alloc_dataset_from_u2s(DatasetType::CockroachDb)?; + sled.alloc_dataset_from_u2s(DatasetKind::Cockroach)?; let filesystem_pool = Some(dataset_name.pool().clone()); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: ip, - zone_type: OmicronZoneType::CockroachDb { - dataset: OmicronZoneDataset { - pool_name: dataset_name.pool().clone(), + zone_type: BlueprintZoneType::CockroachDb( + blueprint_zone_type::CockroachDb { + address, + dataset: OmicronZoneDataset { + pool_name: dataset_name.pool().clone(), + }, }, - address, - }, + ), filesystem_pool, }); } @@ -484,23 +587,27 @@ impl Plan { ) .unwrap(); let dns_port = omicron_common::address::DNS_PORT; - let dns_address = SocketAddr::new(external_ip, dns_port); - let dataset_kind = DatasetType::ExternalDns; + let dns_address = from_sockaddr_to_external_floating_addr( + SocketAddr::new(external_ip, dns_port), + ); + let dataset_kind = DatasetKind::ExternalDns; let dataset_name = sled.alloc_dataset_from_u2s(dataset_kind)?; let filesystem_pool = Some(dataset_name.pool().clone()); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: *http_address.ip(), - zone_type: OmicronZoneType::ExternalDns { - dataset: OmicronZoneDataset { - pool_name: dataset_name.pool().clone(), + zone_type: BlueprintZoneType::ExternalDns( + blueprint_zone_type::ExternalDns { + dataset: OmicronZoneDataset { + pool_name: dataset_name.pool().clone(), + }, + http_address, + dns_address, + nic, }, - http_address, - dns_address, - nic, - }, + ), filesystem_pool, }); } @@ -524,28 +631,32 @@ impl Plan { .unwrap(); let (nic, external_ip) = svc_port_builder.next_nexus(id)?; let filesystem_pool = Some(sled.alloc_zpool_from_u2s()?); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: address, - zone_type: OmicronZoneType::Nexus { - internal_address: SocketAddrV6::new( - address, - omicron_common::address::NEXUS_INTERNAL_PORT, - 0, - 0, - ), - external_ip, - nic, - // Tell Nexus to use TLS if and only if the caller - // provided TLS certificates. This effectively - // determines the status of TLS for the lifetime of - // the rack. In production-like deployments, we'd - // always expect TLS to be enabled. It's only in - // development that it might not be. - external_tls: !config.external_certificates.is_empty(), - external_dns_servers: config.dns_servers.clone(), - }, + zone_type: BlueprintZoneType::Nexus( + blueprint_zone_type::Nexus { + internal_address: SocketAddrV6::new( + address, + omicron_common::address::NEXUS_INTERNAL_PORT, + 0, + 0, + ), + external_ip: from_ipaddr_to_external_floating_ip( + external_ip, + ), + nic, + // Tell Nexus to use TLS if and only if the caller + // provided TLS certificates. This effectively + // determines the status of TLS for the lifetime of + // the rack. In production-like deployments, we'd + // always expect TLS to be enabled. It's only in + // development that it might not be. + external_tls: !config.external_certificates.is_empty(), + external_dns_servers: config.dns_servers.clone(), + }, + ), filesystem_pool, }); } @@ -569,18 +680,20 @@ impl Plan { ) .unwrap(); let filesystem_pool = Some(sled.alloc_zpool_from_u2s()?); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: address, - zone_type: OmicronZoneType::Oximeter { - address: SocketAddrV6::new( - address, - omicron_common::address::OXIMETER_PORT, - 0, - 0, - ), - }, + zone_type: BlueprintZoneType::Oximeter( + blueprint_zone_type::Oximeter { + address: SocketAddrV6::new( + address, + omicron_common::address::OXIMETER_PORT, + 0, + 0, + ), + }, + ), filesystem_pool, }) } @@ -595,7 +708,7 @@ impl Plan { }; let id = OmicronZoneUuid::new_v4(); let ip = sled.addr_alloc.next().expect("Not enough addrs"); - let port = omicron_common::address::CLICKHOUSE_PORT; + let port = omicron_common::address::CLICKHOUSE_HTTP_PORT; let address = SocketAddrV6::new(ip, port, 0, 0); dns_builder .host_zone_with_one_backend( @@ -606,18 +719,63 @@ impl Plan { ) .unwrap(); let dataset_name = - sled.alloc_dataset_from_u2s(DatasetType::Clickhouse)?; + sled.alloc_dataset_from_u2s(DatasetKind::Clickhouse)?; let filesystem_pool = Some(dataset_name.pool().clone()); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: ip, - zone_type: OmicronZoneType::Clickhouse { - address, - dataset: OmicronZoneDataset { - pool_name: dataset_name.pool().clone(), + zone_type: BlueprintZoneType::Clickhouse( + blueprint_zone_type::Clickhouse { + address, + dataset: OmicronZoneDataset { + pool_name: dataset_name.pool().clone(), + }, }, - }, + ), + filesystem_pool, + }); + } + + // Provision Clickhouse server zones, continuing to stripe across sleds. + // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove + // Temporary linter rule until replicated Clickhouse is enabled + #[allow(clippy::reversed_empty_ranges)] + for _ in 0..CLICKHOUSE_SERVER_COUNT { + let sled = { + let which_sled = + sled_allocator.next().ok_or(PlanError::NotEnoughSleds)?; + &mut sled_info[which_sled] + }; + let id = OmicronZoneUuid::new_v4(); + let ip = sled.addr_alloc.next().expect("Not enough addrs"); + // TODO: This may need to be a different port if/when to have single node + // and replicated running side by side as per stage 1 of RFD 468. + let port = omicron_common::address::CLICKHOUSE_HTTP_PORT; + let address = SocketAddrV6::new(ip, port, 0, 0); + dns_builder + .host_zone_with_one_backend( + id, + ip, + ServiceName::ClickhouseServer, + port, + ) + .unwrap(); + let dataset_name = + sled.alloc_dataset_from_u2s(DatasetKind::ClickhouseServer)?; + let filesystem_pool = Some(dataset_name.pool().clone()); + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, + underlay_address: ip, + zone_type: BlueprintZoneType::ClickhouseServer( + blueprint_zone_type::ClickhouseServer { + address, + dataset: OmicronZoneDataset { + pool_name: dataset_name.pool().clone(), + }, + }, + ), filesystem_pool, }); } @@ -634,7 +792,7 @@ impl Plan { }; let id = OmicronZoneUuid::new_v4(); let ip = sled.addr_alloc.next().expect("Not enough addrs"); - let port = omicron_common::address::CLICKHOUSE_KEEPER_PORT; + let port = omicron_common::address::CLICKHOUSE_KEEPER_TCP_PORT; let address = SocketAddrV6::new(ip, port, 0, 0); dns_builder .host_zone_with_one_backend( @@ -645,18 +803,20 @@ impl Plan { ) .unwrap(); let dataset_name = - sled.alloc_dataset_from_u2s(DatasetType::ClickhouseKeeper)?; + sled.alloc_dataset_from_u2s(DatasetKind::ClickhouseKeeper)?; let filesystem_pool = Some(dataset_name.pool().clone()); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: ip, - zone_type: OmicronZoneType::ClickhouseKeeper { - address, - dataset: OmicronZoneDataset { - pool_name: dataset_name.pool().clone(), + zone_type: BlueprintZoneType::ClickhouseKeeper( + blueprint_zone_type::ClickhouseKeeper { + address, + dataset: OmicronZoneDataset { + pool_name: dataset_name.pool().clone(), + }, }, - }, + ), filesystem_pool, }); } @@ -681,13 +841,15 @@ impl Plan { port, ) .unwrap(); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: address, - zone_type: OmicronZoneType::CruciblePantry { - address: SocketAddrV6::new(address, port, 0, 0), - }, + zone_type: BlueprintZoneType::CruciblePantry( + blueprint_zone_type::CruciblePantry { + address: SocketAddrV6::new(address, port, 0, 0), + }, + ), filesystem_pool, }); } @@ -709,14 +871,18 @@ impl Plan { ) .unwrap(); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: ip, - zone_type: OmicronZoneType::Crucible { - address, - dataset: OmicronZoneDataset { pool_name: pool.clone() }, - }, + zone_type: BlueprintZoneType::Crucible( + blueprint_zone_type::Crucible { + address, + dataset: OmicronZoneDataset { + pool_name: pool.clone(), + }, + }, + ), filesystem_pool: Some(pool.clone()), }); } @@ -737,24 +903,31 @@ impl Plan { .push(Host::for_zone(Zone::Other(id)).fqdn()); let (nic, snat_cfg) = svc_port_builder.next_snat(id)?; ( - OmicronZoneType::BoundaryNtp { - address: ntp_address, - ntp_servers: config.ntp_servers.clone(), - dns_servers: config.dns_servers.clone(), - domain: None, - nic, - snat_cfg, - }, + BlueprintZoneType::BoundaryNtp( + blueprint_zone_type::BoundaryNtp { + address: ntp_address, + ntp_servers: config.ntp_servers.clone(), + dns_servers: config.dns_servers.clone(), + domain: None, + nic, + external_ip: + from_source_nat_config_to_external_snat_ip( + snat_cfg, + ), + }, + ), ServiceName::BoundaryNtp, ) } else { ( - OmicronZoneType::InternalNtp { - address: ntp_address, - ntp_servers: boundary_ntp_servers.clone(), - dns_servers: rack_dns_servers.clone(), - domain: None, - }, + BlueprintZoneType::InternalNtp( + blueprint_zone_type::InternalNtp { + address: ntp_address, + ntp_servers: boundary_ntp_servers.clone(), + dns_servers: rack_dns_servers.clone(), + domain: None, + }, + ), ServiceName::InternalNtp, ) }; @@ -763,9 +936,9 @@ impl Plan { .host_zone_with_one_backend(id, address, svcname, NTP_PORT) .unwrap(); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: address, zone_type, filesystem_pool, @@ -864,7 +1037,7 @@ pub struct SledInfo { u2_zpools: Vec, /// spreads components across a Sled's zpools u2_zpool_allocators: - HashMap + Send + Sync>>, + HashMap + Send + Sync>>, /// whether this Sled is a scrimlet is_scrimlet: bool, /// allocator for addresses in this Sled's subnet @@ -905,7 +1078,7 @@ impl SledInfo { /// this Sled fn alloc_dataset_from_u2s( &mut self, - kind: DatasetType, + kind: DatasetKind, ) -> Result { // We have two goals here: // @@ -1323,10 +1496,10 @@ mod tests { } #[test] - fn test_rss_service_plan_v3_schema() { + fn test_rss_service_plan_v4_schema() { let schema = schemars::schema_for!(Plan); expectorate::assert_contents( - "../schema/rss-service-plan-v3.json", + "../schema/rss-service-plan-v4.json", &serde_json::to_string_pretty(&schema).unwrap(), ); } diff --git a/sled-agent/src/rack_setup/plan/sled.rs b/sled-agent/src/rack_setup/plan/sled.rs index c511cf14478..32906d01959 100644 --- a/sled-agent/src/rack_setup/plan/sled.rs +++ b/sled-agent/src/rack_setup/plan/sled.rs @@ -172,7 +172,7 @@ impl Plan { let mut ledger = Ledger::::new_with(log, paths, plan.clone()); ledger.commit().await?; - info!(log, "Sled plan written to storage"); + info!(log, "Sled plan written to storage: {plan:#?}"); Ok(plan) } } diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index a66f66a0e38..64afb8848af 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -71,7 +71,6 @@ use crate::bootstrap::early_networking::{ }; use crate::bootstrap::rss_handle::BootstrapAgentHandle; use crate::nexus::d2n_params; -use crate::params::OmicronZoneTypeExt; use crate::rack_setup::plan::service::{ Plan as ServicePlan, PlanError as ServicePlanError, }; @@ -91,14 +90,14 @@ use nexus_sled_agent_shared::inventory::{ OmicronZoneConfig, OmicronZoneType, OmicronZonesConfig, }; use nexus_types::deployment::{ - Blueprint, BlueprintPhysicalDisksConfig, BlueprintZoneConfig, - BlueprintZoneDisposition, BlueprintZonesConfig, - CockroachDbPreserveDowngrade, InvalidOmicronZoneType, + blueprint_zone_type, Blueprint, BlueprintZoneType, BlueprintZonesConfig, + CockroachDbPreserveDowngrade, }; use nexus_types::external_api::views::SledState; use omicron_common::address::get_sled_address; use omicron_common::api::external::Generation; use omicron_common::api::internal::shared::ExternalPortDiscovery; +use omicron_common::api::internal::shared::LldpAdminStatus; use omicron_common::backoff::{ retry_notify, retry_policy_internal_service_aggressive, BackoffError, }; @@ -107,8 +106,8 @@ use omicron_common::disk::{ }; use omicron_common::ledger::{self, Ledger, Ledgerable}; use omicron_ddm_admin_client::{Client as DdmAdminClient, DdmError}; +use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::SledUuid; -use omicron_uuid_kinds::{ExternalIpUuid, GenericUuid}; use serde::{Deserialize, Serialize}; use sled_agent_client::{ types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError, @@ -532,7 +531,7 @@ impl ServiceInner { .iter() .filter_map(|zone_config| { match &zone_config.zone_type { - OmicronZoneType::InternalDns { http_address, .. } + BlueprintZoneType::InternalDns(blueprint_zone_type::InternalDns{ http_address, .. }) => { Some(*http_address) }, @@ -718,15 +717,17 @@ impl ServiceInner { let mut datasets: Vec = vec![]; for sled_config in service_plan.services.values() { for zone in &sled_config.zones { - if let Some((dataset_name, dataset_address)) = - zone.dataset_name_and_address() - { + if let Some(dataset) = zone.zone_type.durable_dataset() { datasets.push(NexusTypes::DatasetCreateRequest { - zpool_id: dataset_name.pool().id().into_untyped_uuid(), - dataset_id: zone.id, + zpool_id: dataset + .dataset + .pool_name + .id() + .into_untyped_uuid(), + dataset_id: zone.id.into_untyped_uuid(), request: NexusTypes::DatasetPutRequest { - address: dataset_address.to_string(), - kind: dataset_name.dataset().kind(), + address: dataset.address.to_string(), + kind: dataset.kind, }, }) } @@ -750,23 +751,24 @@ impl ServiceInner { .iter() .map(|config| NexusTypes::PortConfigV2 { port: config.port.clone(), - routes: config + routes: config .routes .iter() .map(|r| NexusTypes::RouteConfig { destination: r.destination, nexthop: r.nexthop, vlan_id: r.vlan_id, + local_pref: r.local_pref, + }) + .collect(), + addresses: config + .addresses + .iter() + .map(|a| NexusTypes::UplinkAddressConfig { + address: a.address, + vlan_id: a.vlan_id, }) .collect(), - addresses: config - .addresses - .iter() - .map(|a| NexusTypes::UplinkAddressConfig { - address: a.address, - vlan_id: a.vlan_id - }) - .collect(), switch: config.switch.into(), uplink_port_speed: config.uplink_port_speed.into(), uplink_port_fec: config.uplink_port_fec.into(), @@ -786,7 +788,8 @@ impl ServiceInner { remote_asn: b.remote_asn, min_ttl: b.min_ttl, md5_auth_key: b.md5_auth_key.clone(), - multi_exit_discriminator: b.multi_exit_discriminator, + multi_exit_discriminator: b + .multi_exit_discriminator, local_pref: b.local_pref, enforce_first_as: b.enforce_first_as, communities: b.communities.clone(), @@ -795,6 +798,32 @@ impl ServiceInner { vlan_id: b.vlan_id, }) .collect(), + lldp: config.lldp.as_ref().map(|lp| { + NexusTypes::LldpPortConfig { + status: match lp.status { + LldpAdminStatus::Enabled => { + NexusTypes::LldpAdminStatus::Enabled + } + LldpAdminStatus::Disabled => { + NexusTypes::LldpAdminStatus::Disabled + } + LldpAdminStatus::TxOnly => { + NexusTypes::LldpAdminStatus::TxOnly + } + LldpAdminStatus::RxOnly => { + NexusTypes::LldpAdminStatus::RxOnly + } + }, + chassis_id: lp.chassis_id.clone(), + port_id: lp.port_id.clone(), + system_name: lp.system_name.clone(), + system_description: lp + .system_description + .clone(), + port_description: lp.port_description.clone(), + management_addrs: lp.management_addrs.clone(), + } + }), }) .collect(), bgp: config @@ -802,7 +831,12 @@ impl ServiceInner { .iter() .map(|config| NexusTypes::BgpConfig { asn: config.asn, - originate: config.originate.iter().cloned().map(Into::into).collect(), + originate: config + .originate + .iter() + .cloned() + .map(Into::into) + .collect(), shaper: config.shaper.clone(), checker: config.checker.clone(), }) @@ -810,25 +844,26 @@ impl ServiceInner { bfd: config .bfd .iter() - .map(|spec| NexusTypes::BfdPeerConfig { - detection_threshold: spec.detection_threshold, - local: spec.local, - mode: match spec.mode { - omicron_common::api::external::BfdMode::SingleHop => { - nexus_client::types::BfdMode::SingleHop - } - omicron_common::api::external::BfdMode::MultiHop => { - nexus_client::types::BfdMode::MultiHop - } - }, - remote: spec.remote, - required_rx: spec.required_rx, - switch: spec.switch.into(), + .map(|spec| { + NexusTypes::BfdPeerConfig { + detection_threshold: spec.detection_threshold, + local: spec.local, + mode: match spec.mode { + omicron_common::api::external::BfdMode::SingleHop => { + nexus_client::types::BfdMode::SingleHop + } + omicron_common::api::external::BfdMode::MultiHop => { + nexus_client::types::BfdMode::MultiHop + } + }, + remote: spec.remote, + required_rx: spec.required_rx, + switch: spec.switch.into(), + } }) .collect(), } }; - info!(self.log, "rack_network_config: {:#?}", rack_network_config); let physical_disks: Vec<_> = sled_configs_by_id @@ -946,7 +981,7 @@ impl ServiceInner { if sled_config.zones.iter().any(|zone_config| { matches!( &zone_config.zone_type, - OmicronZoneType::CockroachDb { .. } + BlueprintZoneType::CockroachDb(_) ) }) { Some(sled_address) @@ -1363,7 +1398,7 @@ fn build_initial_blueprint_from_plan( let blueprint = build_initial_blueprint_from_sled_configs( sled_configs_by_id, internal_dns_version, - )?; + ); Ok(blueprint) } @@ -1371,47 +1406,11 @@ fn build_initial_blueprint_from_plan( pub(crate) fn build_initial_blueprint_from_sled_configs( sled_configs_by_id: &BTreeMap, internal_dns_version: Generation, -) -> Result { - // Helper to convert an `OmicronZoneConfig` into a `BlueprintZoneConfig`. - // This is separate primarily so rustfmt doesn't lose its mind. - let to_bp_zone_config = |z: &OmicronZoneConfig| { - // All initial zones are in-service. - let disposition = BlueprintZoneDisposition::InService; - BlueprintZoneConfig::from_omicron_zone_config( - z.clone(), - disposition, - // This is pretty weird: IP IDs don't exist yet, so it's fine for us - // to make them up (Nexus will record them as a part of the - // handoff). We could pass `None` here for some zone types, but it's - // a little simpler to just always pass a new ID, which will only be - // used if the zone type has an external IP. - // - // This should all go away once RSS starts using blueprints more - // directly (instead of this conversion after the fact): - // https://github.com/oxidecomputer/omicron/issues/5272 - Some(ExternalIpUuid::new_v4()), - ) - }; - - let mut blueprint_disks = BTreeMap::new(); - for (sled_id, sled_config) in sled_configs_by_id { - blueprint_disks.insert( - *sled_id, - BlueprintPhysicalDisksConfig { - generation: sled_config.disks.generation, - disks: sled_config - .disks - .disks - .iter() - .map(|d| OmicronPhysicalDiskConfig { - identity: d.identity.clone(), - id: d.id, - pool_id: d.pool_id, - }) - .collect(), - }, - ); - } +) -> Blueprint { + let blueprint_disks: BTreeMap<_, _> = sled_configs_by_id + .iter() + .map(|(sled_id, sled_config)| (*sled_id, sled_config.disks.clone())) + .collect(); let mut blueprint_zones = BTreeMap::new(); let mut sled_state = BTreeMap::new(); @@ -1428,18 +1427,14 @@ pub(crate) fn build_initial_blueprint_from_sled_configs( // value, we will need to revisit storing this in the serialized // RSS plan. generation: DeployStepVersion::V5_EVERYTHING, - zones: sled_config - .zones - .iter() - .map(to_bp_zone_config) - .collect::>()?, + zones: sled_config.zones.clone(), }; blueprint_zones.insert(*sled_id, zones_config); sled_state.insert(*sled_id, SledState::Active); } - Ok(Blueprint { + Blueprint { id: Uuid::new_v4(), blueprint_zones, blueprint_disks, @@ -1457,7 +1452,7 @@ pub(crate) fn build_initial_blueprint_from_sled_configs( time_created: Utc::now(), creator: "RSS".to_string(), comment: "initial blueprint from rack setup".to_string(), - }) + } } /// Facilitates creating a sequence of OmicronZonesConfig objects for each sled @@ -1535,11 +1530,14 @@ impl<'a> OmicronZonesConfigGenerator<'a> { sled_config .zones .iter() + .cloned() + .map(|bp_zone_config| { + OmicronZoneConfig::from(bp_zone_config) + }) .filter(|z| { !zones_already.contains(&z.id) && zone_filter(&z.zone_type) - }) - .cloned(), + }), ); let config = OmicronZonesConfig { generation: version, zones }; diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index b822ae2963c..7677dfbd8ad 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -65,8 +65,9 @@ use nexus_config::{ConfigDropshotWithTls, DeploymentConfig}; use nexus_sled_agent_shared::inventory::{ OmicronZoneConfig, OmicronZoneType, OmicronZonesConfig, ZoneKind, }; -use omicron_common::address::CLICKHOUSE_KEEPER_PORT; -use omicron_common::address::CLICKHOUSE_PORT; +use omicron_common::address::CLICKHOUSE_ADMIN_PORT; +use omicron_common::address::CLICKHOUSE_HTTP_PORT; +use omicron_common::address::CLICKHOUSE_KEEPER_TCP_PORT; use omicron_common::address::COCKROACH_PORT; use omicron_common::address::CRUCIBLE_PANTRY_PORT; use omicron_common::address::CRUCIBLE_PORT; @@ -89,6 +90,7 @@ use omicron_common::api::internal::shared::{ use omicron_common::backoff::{ retry_notify, retry_policy_internal_service_aggressive, BackoffError, }; +use omicron_common::disk::{DatasetKind, DatasetName}; use omicron_common::ledger::{self, Ledger, Ledgerable}; use omicron_ddm_admin_client::{Client as DdmAdminClient, DdmError}; use once_cell::sync::OnceCell; @@ -102,9 +104,7 @@ use sled_hardware::underlay; use sled_hardware::SledMode; use sled_hardware_types::Baseboard; use sled_storage::config::MountConfig; -use sled_storage::dataset::{ - DatasetName, DatasetType, CONFIG_DATASET, INSTALL_DATASET, ZONE_DATASET, -}; +use sled_storage::dataset::{CONFIG_DATASET, INSTALL_DATASET, ZONE_DATASET}; use sled_storage::manager::StorageHandle; use slog::Logger; use std::collections::BTreeMap; @@ -1549,7 +1549,7 @@ impl ServiceManager { }; let listen_addr = *underlay_address; - let listen_port = &CLICKHOUSE_PORT.to_string(); + let listen_port = &CLICKHOUSE_HTTP_PORT.to_string(); let nw_setup_service = Self::zone_network_setup_install( Some(&info.underlay_address), @@ -1573,12 +1573,39 @@ impl ServiceManager { .add_property_group(config), ); + let ch_address = SocketAddr::new( + IpAddr::V6(listen_addr), + CLICKHOUSE_HTTP_PORT, + ) + .to_string(); + + let admin_address = SocketAddr::new( + IpAddr::V6(listen_addr), + CLICKHOUSE_ADMIN_PORT, + ) + .to_string(); + + let clickhouse_admin_config = + PropertyGroupBuilder::new("config") + .add_property( + "clickhouse_address", + "astring", + ch_address, + ) + .add_property("http_address", "astring", admin_address); + let clickhouse_admin_service = + ServiceBuilder::new("oxide/clickhouse-admin").add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(clickhouse_admin_config), + ); + let profile = ProfileBuilder::new("omicron") .add_service(nw_setup_service) .add_service(disabled_ssh_service) .add_service(clickhouse_service) .add_service(dns_service) - .add_service(enabled_dns_client_service); + .add_service(enabled_dns_client_service) + .add_service(clickhouse_admin_service); profile .add_to_zone(&self.inner.log, &installed_zone) .await @@ -1592,18 +1619,84 @@ impl ServiceManager { zone: OmicronZoneConfig { zone_type: OmicronZoneType::ClickhouseServer { .. }, - underlay_address: _, + underlay_address, .. }, .. }) => { - // We aren't yet deploying this service - error!( - &self.inner.log, - "Deploying ClickhouseServer zones is not yet supported" - ); + let Some(info) = self.inner.sled_info.get() else { + return Err(Error::SledAgentNotReady); + }; + + let listen_addr = *underlay_address; + let listen_port = CLICKHOUSE_HTTP_PORT.to_string(); + + let nw_setup_service = Self::zone_network_setup_install( + Some(&info.underlay_address), + &installed_zone, + &[listen_addr], + )?; + + let dns_service = Self::dns_install(info, None, &None).await?; + + let config = PropertyGroupBuilder::new("config") + .add_property( + "listen_addr", + "astring", + listen_addr.to_string(), + ) + .add_property("listen_port", "astring", listen_port) + .add_property("store", "astring", "/data"); + let clickhouse_server_service = + ServiceBuilder::new("oxide/clickhouse_server") + .add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(config), + ); + + let ch_address = SocketAddr::new( + IpAddr::V6(listen_addr), + CLICKHOUSE_HTTP_PORT, + ) + .to_string(); + + let admin_address = SocketAddr::new( + IpAddr::V6(listen_addr), + CLICKHOUSE_ADMIN_PORT, + ) + .to_string(); - todo!() + let clickhouse_admin_config = + PropertyGroupBuilder::new("config") + .add_property( + "clickhouse_address", + "astring", + ch_address, + ) + .add_property("http_address", "astring", admin_address); + let clickhouse_admin_service = + ServiceBuilder::new("oxide/clickhouse-admin").add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(clickhouse_admin_config), + ); + + let profile = ProfileBuilder::new("omicron") + .add_service(nw_setup_service) + .add_service(disabled_ssh_service) + .add_service(clickhouse_server_service) + .add_service(dns_service) + .add_service(enabled_dns_client_service) + .add_service(clickhouse_admin_service); + profile + .add_to_zone(&self.inner.log, &installed_zone) + .await + .map_err(|err| { + Error::io( + "Failed to setup clickhouse server profile", + err, + ) + })?; + RunningZone::boot(installed_zone).await? } ZoneArgs::Omicron(OmicronZoneConfigLocal { @@ -1620,7 +1713,7 @@ impl ServiceManager { }; let listen_addr = *underlay_address; - let listen_port = &CLICKHOUSE_KEEPER_PORT.to_string(); + let listen_port = &CLICKHOUSE_KEEPER_TCP_PORT.to_string(); let nw_setup_service = Self::zone_network_setup_install( Some(&info.underlay_address), @@ -1644,12 +1737,40 @@ impl ServiceManager { ServiceInstanceBuilder::new("default") .add_property_group(config), ); + + let ch_address = SocketAddr::new( + IpAddr::V6(listen_addr), + CLICKHOUSE_HTTP_PORT, + ) + .to_string(); + + let admin_address = SocketAddr::new( + IpAddr::V6(listen_addr), + CLICKHOUSE_ADMIN_PORT, + ) + .to_string(); + + let clickhouse_admin_config = + PropertyGroupBuilder::new("config") + .add_property( + "clickhouse_address", + "astring", + ch_address, + ) + .add_property("http_address", "astring", admin_address); + let clickhouse_admin_service = + ServiceBuilder::new("oxide/clickhouse-admin").add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(clickhouse_admin_config), + ); + let profile = ProfileBuilder::new("omicron") .add_service(nw_setup_service) .add_service(disabled_ssh_service) .add_service(clickhouse_keeper_service) .add_service(dns_service) - .add_service(enabled_dns_client_service); + .add_service(enabled_dns_client_service) + .add_service(clickhouse_admin_service); profile .add_to_zone(&self.inner.log, &installed_zone) .await @@ -1759,7 +1880,7 @@ impl ServiceManager { let dataset_name = DatasetName::new( dataset.pool_name.clone(), - DatasetType::Crucible, + DatasetKind::Crucible, ) .full_name(); let uuid = &Uuid::new_v4().to_string(); @@ -3879,6 +4000,19 @@ impl ServiceManager { &self, our_ports: Vec, ) -> Result<(), Error> { + // Helper function to add a property-value pair + // if the config actually has a value set. + fn apv( + smfh: &SmfHelper, + prop: &str, + val: &Option, + ) -> Result<(), Error> { + if let Some(v) = val { + smfh.addpropvalue_type(prop, v, "astring")? + } + Ok(()) + } + // We expect the switch zone to be running, as we're called immediately // after `ensure_zone()` above and we just successfully configured // uplinks via DPD running in our switch zone. If somehow we're in any @@ -3901,26 +4035,76 @@ impl ServiceManager { } }; - info!(self.inner.log, "Setting up uplinkd service"); - let smfh = SmfHelper::new(&zone, &SwitchService::Uplink); + info!(self.inner.log, "ensuring scrimlet uplinks"); + let usmfh = SmfHelper::new(&zone, &SwitchService::Uplink); + let lsmfh = SmfHelper::new( + &zone, + &SwitchService::Lldpd { baseboard: Baseboard::Unknown }, + ); // We want to delete all the properties in the `uplinks` group, but we // don't know their names, so instead we'll delete and recreate the // group, then add all our properties. - smfh.delpropgroup("uplinks")?; - smfh.addpropgroup("uplinks", "application")?; + let _ = usmfh.delpropgroup("uplinks"); + usmfh.addpropgroup("uplinks", "application")?; for port_config in &our_ports { for addr in &port_config.addrs { - info!(self.inner.log, "configuring port: {port_config:?}"); - smfh.addpropvalue_type( + usmfh.addpropvalue_type( &format!("uplinks/{}_0", port_config.port,), &addr.to_string(), "astring", )?; } + + if let Some(lldp_config) = &port_config.lldp { + let group_name = format!("port_{}", port_config.port); + info!(self.inner.log, "setting up {group_name}"); + let _ = lsmfh.delpropgroup(&group_name); + lsmfh.addpropgroup(&group_name, "application")?; + apv( + &lsmfh, + &format!("{group_name}/status"), + &Some(lldp_config.status.to_string()), + )?; + apv( + &lsmfh, + &format!("{group_name}/chassis_id"), + &lldp_config.chassis_id, + )?; + apv( + &lsmfh, + &format!("{group_name}/system_name"), + &lldp_config.system_name, + )?; + apv( + &lsmfh, + &format!("{group_name}/system_description"), + &lldp_config.system_description, + )?; + apv( + &lsmfh, + &format!("{group_name}/port_description"), + &lldp_config.port_description, + )?; + apv( + &lsmfh, + &format!("{group_name}/port_id"), + &lldp_config.port_id, + )?; + if let Some(a) = &lldp_config.management_addrs { + for address in a { + apv( + &lsmfh, + &format!("{group_name}/management_addrs"), + &Some(address.to_string()), + )?; + } + } + } } - smfh.refresh()?; + usmfh.refresh()?; + lsmfh.refresh()?; Ok(()) } diff --git a/sled-agent/src/sim/collection.rs b/sled-agent/src/sim/collection.rs index 6057d03f70c..d75081f1e4c 100644 --- a/sled-agent/src/sim/collection.rs +++ b/sled-agent/src/sim/collection.rs @@ -364,35 +364,6 @@ impl SimCollection { pub async fn contains_key(self: &Arc, id: &Uuid) -> bool { self.objects.lock().await.contains_key(id) } - - /// Iterates over all of the existing objects in the collection and, for any - /// that meet `condition`, asks to transition them into the supplied target - /// state. - /// - /// If any such transition fails, this routine short-circuits and does not - /// attempt to transition any other objects. - // - // TODO: It's likely more idiomatic to have an `iter_mut` routine that - // returns a struct that impls Iterator and yields &mut S references. The - // tricky bit is that the struct must hold the objects lock during the - // iteration. Figure out if there's a better way to arrange all this. - pub async fn sim_ensure_for_each_where( - self: &Arc, - condition: C, - target: &S::RequestedState, - ) -> Result<(), Error> - where - C: Fn(&S) -> bool, - { - let mut objects = self.objects.lock().await; - for o in objects.values_mut() { - if condition(&o.object) { - o.transition(target.clone())?; - } - } - - Ok(()) - } } impl SimCollection { @@ -421,30 +392,24 @@ mod test { use omicron_common::api::external::Error; use omicron_common::api::external::Generation; use omicron_common::api::internal::nexus::DiskRuntimeState; - use omicron_common::api::internal::nexus::SledInstanceState; + use omicron_common::api::internal::nexus::SledVmmState; use omicron_common::api::internal::nexus::VmmRuntimeState; use omicron_common::api::internal::nexus::VmmState; use omicron_test_utils::dev::test_setup_log; - use omicron_uuid_kinds::PropolisUuid; use sled_agent_types::disk::DiskStateRequested; - use sled_agent_types::instance::InstanceStateRequested; + use sled_agent_types::instance::VmmStateRequested; fn make_instance( logctx: &LogContext, ) -> (SimObject, Receiver<()>) { - let propolis_id = PropolisUuid::new_v4(); let vmm_state = VmmRuntimeState { state: VmmState::Starting, gen: Generation::new(), time_updated: Utc::now(), }; - let state = SledInstanceState { - vmm_state, - propolis_id, - migration_in: None, - migration_out: None, - }; + let state = + SledVmmState { vmm_state, migration_in: None, migration_out: None }; SimObject::new_simulated_auto(&state, logctx.log.new(o!())) } @@ -488,8 +453,7 @@ mod test { // Stopping an instance that was never started synchronously destroys // its VMM. let rprev = r1; - let dropped = - instance.transition(InstanceStateRequested::Stopped).unwrap(); + let dropped = instance.transition(VmmStateRequested::Stopped).unwrap(); assert!(dropped.is_none()); assert!(instance.object.desired().is_none()); let rnext = instance.object.current(); @@ -529,8 +493,7 @@ mod test { // simulated instance's state, but it does queue up a transition. let mut rprev = r1; assert!(rx.try_next().is_err()); - let dropped = - instance.transition(InstanceStateRequested::Running).unwrap(); + let dropped = instance.transition(VmmStateRequested::Running).unwrap(); assert!(dropped.is_none()); assert!(instance.object.desired().is_some()); assert!(rx.try_next().is_err()); @@ -562,8 +525,7 @@ mod test { // If we transition again to "Running", the process should complete // immediately. - let dropped = - instance.transition(InstanceStateRequested::Running).unwrap(); + let dropped = instance.transition(VmmStateRequested::Running).unwrap(); assert!(dropped.is_none()); assert!(instance.object.desired().is_none()); assert!(rx.try_next().is_err()); @@ -576,8 +538,7 @@ mod test { // If we go back to any stopped state, we go through the async process // again. assert!(rx.try_next().is_err()); - let dropped = - instance.transition(InstanceStateRequested::Stopped).unwrap(); + let dropped = instance.transition(VmmStateRequested::Stopped).unwrap(); assert!(dropped.is_none()); assert!(instance.object.desired().is_some()); let rnext = instance.object.current(); @@ -634,7 +595,7 @@ mod test { assert_eq!(r1.vmm_state.state, VmmState::Starting); assert_eq!(r1.vmm_state.gen, Generation::new()); assert!(instance - .transition(InstanceStateRequested::Running) + .transition(VmmStateRequested::Running) .unwrap() .is_none()); instance.transition_finish(); @@ -650,7 +611,7 @@ mod test { // Now reboot the instance. This is dispatched to Propolis, which will // move to the Rebooting state and then back to Running. assert!(instance - .transition(InstanceStateRequested::Reboot) + .transition(VmmStateRequested::Reboot) .unwrap() .is_none()); let (rprev, rnext) = (rnext, instance.object.current()); diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index c219a747cee..ac583a1a74e 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -5,38 +5,58 @@ //! HTTP entrypoint functions for the sled agent's exposed API use super::collection::PokeMode; +use camino::Utf8PathBuf; +use dropshot::endpoint; use dropshot::ApiDescription; +use dropshot::FreeformBody; use dropshot::HttpError; +use dropshot::HttpResponseCreated; +use dropshot::HttpResponseDeleted; +use dropshot::HttpResponseHeaders; use dropshot::HttpResponseOk; use dropshot::HttpResponseUpdatedNoContent; use dropshot::Path; +use dropshot::Query; use dropshot::RequestContext; +use dropshot::StreamingBody; use dropshot::TypedBody; -use dropshot::{endpoint, ApiDescriptionRegisterError}; +use nexus_sled_agent_shared::inventory::SledRole; use nexus_sled_agent_shared::inventory::{Inventory, OmicronZonesConfig}; use omicron_common::api::internal::nexus::DiskRuntimeState; -use omicron_common::api::internal::nexus::SledInstanceState; +use omicron_common::api::internal::nexus::SledVmmState; use omicron_common::api::internal::nexus::UpdateArtifactId; +use omicron_common::api::internal::shared::SledIdentifiers; use omicron_common::api::internal::shared::VirtualNetworkInterfaceHost; use omicron_common::api::internal::shared::{ ResolvedVpcRouteSet, ResolvedVpcRouteState, SwitchPorts, }; +use omicron_common::disk::DatasetsConfig; +use omicron_common::disk::DatasetsManagementResult; use omicron_common::disk::DisksManagementResult; use omicron_common::disk::OmicronPhysicalDisksConfig; -use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; +use sled_agent_api::*; +use sled_agent_types::boot_disk::BootDiskOsWriteStatus; +use sled_agent_types::boot_disk::BootDiskPathParams; +use sled_agent_types::boot_disk::BootDiskUpdatePathParams; +use sled_agent_types::boot_disk::BootDiskWriteStartQueryParams; +use sled_agent_types::bootstore::BootstoreStatus; use sled_agent_types::disk::DiskEnsureBody; use sled_agent_types::early_networking::EarlyNetworkConfig; use sled_agent_types::firewall_rules::VpcFirewallRulesEnsureBody; use sled_agent_types::instance::InstanceEnsureBody; use sled_agent_types::instance::InstanceExternalIpBody; -use sled_agent_types::instance::InstancePutStateBody; -use sled_agent_types::instance::InstancePutStateResponse; -use sled_agent_types::instance::InstanceUnregisterResponse; +use sled_agent_types::instance::VmmPutStateBody; +use sled_agent_types::instance::VmmPutStateResponse; +use sled_agent_types::instance::VmmUnregisterResponse; use sled_agent_types::sled::AddSledRequest; +use sled_agent_types::time_sync::TimeSync; +use sled_agent_types::zone_bundle::BundleUtilization; +use sled_agent_types::zone_bundle::CleanupContext; +use sled_agent_types::zone_bundle::CleanupCount; +use sled_agent_types::zone_bundle::ZoneBundleId; +use sled_agent_types::zone_bundle::ZoneBundleMetadata; +use std::collections::BTreeMap; use std::sync::Arc; -use uuid::Uuid; use super::sled_agent::SledAgent; @@ -44,510 +64,519 @@ type SledApiDescription = ApiDescription>; /// Returns a description of the sled agent API pub fn api() -> SledApiDescription { - fn register_endpoints( - api: &mut SledApiDescription, - ) -> Result<(), ApiDescriptionRegisterError> { - api.register(instance_put_state)?; - api.register(instance_get_state)?; - api.register(instance_register)?; - api.register(instance_unregister)?; - api.register(instance_put_external_ip)?; - api.register(instance_delete_external_ip)?; + fn register_endpoints() -> Result { + let mut api = sled_agent_api::sled_agent_api_mod::api_description::< + SledAgentSimImpl, + >()?; api.register(instance_poke_post)?; api.register(instance_poke_single_step_post)?; api.register(instance_post_sim_migration_source)?; - api.register(disk_put)?; api.register(disk_poke_post)?; - api.register(update_artifact)?; - api.register(instance_issue_disk_snapshot_request)?; - api.register(vpc_firewall_rules_put)?; - api.register(set_v2p)?; - api.register(del_v2p)?; - api.register(list_v2p)?; - api.register(uplink_ensure)?; - api.register(read_network_bootstore_config)?; - api.register(write_network_bootstore_config)?; - api.register(inventory)?; - api.register(omicron_physical_disks_get)?; - api.register(omicron_physical_disks_put)?; - api.register(omicron_zones_get)?; - api.register(omicron_zones_put)?; - api.register(sled_add)?; - api.register(list_vpc_routes)?; - api.register(set_vpc_routes)?; - - Ok(()) - } - - let mut api = SledApiDescription::new(); - if let Err(err) = register_endpoints(&mut api) { - panic!("failed to register entrypoints: {}", err); - } - api -} -/// Path parameters for Instance requests (sled agent API) -#[derive(Deserialize, JsonSchema)] -struct InstancePathParam { - instance_id: InstanceUuid, -} + Ok(api) + } -#[endpoint { - method = PUT, - path = "/instances/{instance_id}", -}] -async fn instance_register( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result, HttpError> { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - let body_args = body.into_inner(); - Ok(HttpResponseOk( - sa.instance_register( - instance_id, - body_args.propolis_id, - body_args.hardware, - body_args.instance_runtime, - body_args.vmm_runtime, - body_args.metadata, + register_endpoints().expect("failed to register entrypoints") +} + +enum SledAgentSimImpl {} + +impl SledAgentApi for SledAgentSimImpl { + type Context = Arc; + + async fn vmm_register( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let propolis_id = path_params.into_inner().propolis_id; + let body_args = body.into_inner(); + Ok(HttpResponseOk( + sa.instance_register( + body_args.instance_id, + propolis_id, + body_args.hardware, + body_args.instance_runtime, + body_args.vmm_runtime, + body_args.metadata, + ) + .await?, + )) + } + + async fn vmm_unregister( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let id = path_params.into_inner().propolis_id; + Ok(HttpResponseOk(sa.instance_unregister(id).await?)) + } + + async fn vmm_put_state( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let id = path_params.into_inner().propolis_id; + let body_args = body.into_inner(); + Ok(HttpResponseOk(sa.instance_ensure_state(id, body_args.state).await?)) + } + + async fn vmm_get_state( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let id = path_params.into_inner().propolis_id; + Ok(HttpResponseOk(sa.instance_get_state(id).await?)) + } + + async fn vmm_put_external_ip( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let id = path_params.into_inner().propolis_id; + let body_args = body.into_inner(); + sa.instance_put_external_ip(id, &body_args).await?; + Ok(HttpResponseUpdatedNoContent()) + } + + async fn vmm_delete_external_ip( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let id = path_params.into_inner().propolis_id; + let body_args = body.into_inner(); + sa.instance_delete_external_ip(id, &body_args).await?; + Ok(HttpResponseUpdatedNoContent()) + } + + async fn disk_put( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let disk_id = path_params.into_inner().disk_id; + let body_args = body.into_inner(); + Ok(HttpResponseOk( + sa.disk_ensure( + disk_id, + body_args.initial_runtime.clone(), + body_args.target.clone(), + ) + .await?, + )) + } + + async fn update_artifact( + rqctx: RequestContext, + artifact: TypedBody, + ) -> Result { + let sa = rqctx.context(); + sa.updates() + .download_artifact( + artifact.into_inner(), + rqctx.context().nexus_client.as_ref(), + ) + .await + .map_err(|e| HttpError::for_internal_error(e.to_string()))?; + Ok(HttpResponseUpdatedNoContent()) + } + + async fn vmm_issue_disk_snapshot_request( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result, HttpError> + { + let sa = rqctx.context(); + let path_params = path_params.into_inner(); + let body = body.into_inner(); + + sa.instance_issue_disk_snapshot_request( + path_params.propolis_id, + path_params.disk_id, + body.snapshot_id, ) - .await?, - )) -} + .await + .map_err(|e| HttpError::for_internal_error(e.to_string()))?; -#[endpoint { - method = DELETE, - path = "/instances/{instance_id}", -}] -async fn instance_unregister( - rqctx: RequestContext>, - path_params: Path, -) -> Result, HttpError> { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - Ok(HttpResponseOk(sa.instance_unregister(instance_id).await?)) -} + Ok(HttpResponseOk(VmmIssueDiskSnapshotRequestResponse { + snapshot_id: body.snapshot_id, + })) + } -#[endpoint { - method = PUT, - path = "/instances/{instance_id}/state", -}] -async fn instance_put_state( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result, HttpError> { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - let body_args = body.into_inner(); - Ok(HttpResponseOk( - sa.instance_ensure_state(instance_id, body_args.state).await?, - )) -} + async fn vpc_firewall_rules_put( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result { + let _sa = rqctx.context(); + let _vpc_id = path_params.into_inner().vpc_id; + let _body_args = body.into_inner(); -#[endpoint { - method = GET, - path = "/instances/{instance_id}/state", -}] -async fn instance_get_state( - rqctx: RequestContext>, - path_params: Path, -) -> Result, HttpError> { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - Ok(HttpResponseOk(sa.instance_get_state(instance_id).await?)) -} + Ok(HttpResponseUpdatedNoContent()) + } -#[endpoint { - method = PUT, - path = "/instances/{instance_id}/external-ip", -}] -async fn instance_put_external_ip( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - let body_args = body.into_inner(); - sa.instance_put_external_ip(instance_id, &body_args).await?; - Ok(HttpResponseUpdatedNoContent()) -} + async fn set_v2p( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let body_args = body.into_inner(); -#[endpoint { - method = DELETE, - path = "/instances/{instance_id}/external-ip", -}] -async fn instance_delete_external_ip( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - let body_args = body.into_inner(); - sa.instance_delete_external_ip(instance_id, &body_args).await?; - Ok(HttpResponseUpdatedNoContent()) -} + sa.set_virtual_nic_host(&body_args) + .await + .map_err(|e| HttpError::for_internal_error(e.to_string()))?; -#[endpoint { - method = POST, - path = "/instances/{instance_id}/poke", -}] -async fn instance_poke_post( - rqctx: RequestContext>, - path_params: Path, -) -> Result { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - sa.instance_poke(instance_id, PokeMode::Drain).await; - Ok(HttpResponseUpdatedNoContent()) -} + Ok(HttpResponseUpdatedNoContent()) + } -#[endpoint { - method = POST, - path = "/instances/{instance_id}/poke-single-step", -}] -async fn instance_poke_single_step_post( - rqctx: RequestContext>, - path_params: Path, -) -> Result { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - sa.instance_poke(instance_id, PokeMode::SingleStep).await; - Ok(HttpResponseUpdatedNoContent()) -} + async fn del_v2p( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let body_args = body.into_inner(); -#[endpoint { - method = POST, - path = "/instances/{instance_id}/sim-migration-source", -}] -async fn instance_post_sim_migration_source( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - sa.instance_simulate_migration_source(instance_id, body.into_inner()) - .await?; - Ok(HttpResponseUpdatedNoContent()) -} + sa.unset_virtual_nic_host(&body_args) + .await + .map_err(|e| HttpError::for_internal_error(e.to_string()))?; -/// Path parameters for Disk requests (sled agent API) -#[derive(Deserialize, JsonSchema)] -struct DiskPathParam { - disk_id: Uuid, -} + Ok(HttpResponseUpdatedNoContent()) + } -#[endpoint { - method = PUT, - path = "/disks/{disk_id}", -}] -async fn disk_put( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result, HttpError> { - let sa = rqctx.context(); - let disk_id = path_params.into_inner().disk_id; - let body_args = body.into_inner(); - Ok(HttpResponseOk( - sa.disk_ensure( - disk_id, - body_args.initial_runtime.clone(), - body_args.target.clone(), - ) - .await?, - )) -} + async fn list_v2p( + rqctx: RequestContext, + ) -> Result>, HttpError> + { + let sa = rqctx.context(); -#[endpoint { - method = POST, - path = "/disks/{disk_id}/poke", -}] -async fn disk_poke_post( - rqctx: RequestContext>, - path_params: Path, -) -> Result { - let sa = rqctx.context(); - let disk_id = path_params.into_inner().disk_id; - sa.disk_poke(disk_id).await; - Ok(HttpResponseUpdatedNoContent()) -} + let vnics = sa.list_virtual_nics().await.map_err(HttpError::from)?; -#[endpoint { - method = POST, - path = "/update" -}] -async fn update_artifact( - rqctx: RequestContext>, - artifact: TypedBody, -) -> Result { - let sa = rqctx.context(); - sa.updates() - .download_artifact( - artifact.into_inner(), - rqctx.context().nexus_client.as_ref(), - ) - .await - .map_err(|e| HttpError::for_internal_error(e.to_string()))?; - Ok(HttpResponseUpdatedNoContent()) -} + Ok(HttpResponseOk(vnics)) + } -#[derive(Deserialize, JsonSchema)] -pub struct InstanceIssueDiskSnapshotRequestPathParam { - instance_id: Uuid, - disk_id: Uuid, -} + async fn uplink_ensure( + _rqctx: RequestContext, + _body: TypedBody, + ) -> Result { + Ok(HttpResponseUpdatedNoContent()) + } -#[derive(Deserialize, JsonSchema)] -pub struct InstanceIssueDiskSnapshotRequestBody { - snapshot_id: Uuid, -} + async fn read_network_bootstore_config_cache( + rqctx: RequestContext, + ) -> Result, HttpError> { + let config = + rqctx.context().bootstore_network_config.lock().await.clone(); + Ok(HttpResponseOk(config)) + } -#[derive(Serialize, JsonSchema)] -pub struct InstanceIssueDiskSnapshotRequestResponse { - snapshot_id: Uuid, -} + async fn write_network_bootstore_config( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let mut config = rqctx.context().bootstore_network_config.lock().await; + *config = body.into_inner(); + Ok(HttpResponseUpdatedNoContent()) + } -/// Take a snapshot of a disk that is attached to an instance -#[endpoint { - method = POST, - path = "/instances/{instance_id}/disks/{disk_id}/snapshot", -}] -async fn instance_issue_disk_snapshot_request( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result, HttpError> -{ - let sa = rqctx.context(); - let path_params = path_params.into_inner(); - let body = body.into_inner(); - - sa.instance_issue_disk_snapshot_request( - InstanceUuid::from_untyped_uuid(path_params.instance_id), - path_params.disk_id, - body.snapshot_id, - ) - .await - .map_err(|e| HttpError::for_internal_error(e.to_string()))?; - - Ok(HttpResponseOk(InstanceIssueDiskSnapshotRequestResponse { - snapshot_id: body.snapshot_id, - })) -} + /// Fetch basic information about this sled + async fn inventory( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk( + sa.inventory(rqctx.server.local_addr).await.map_err(|e| { + HttpError::for_internal_error(format!("{:#}", e)) + })?, + )) + } -/// Path parameters for VPC requests (sled agent API) -#[derive(Deserialize, JsonSchema)] -struct VpcPathParam { - vpc_id: Uuid, -} + async fn datasets_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let body_args = body.into_inner(); + let result = sa.datasets_ensure(body_args).await?; + Ok(HttpResponseOk(result)) + } -#[endpoint { - method = PUT, - path = "/vpc/{vpc_id}/firewall/rules", -}] -async fn vpc_firewall_rules_put( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result { - let _sa = rqctx.context(); - let _vpc_id = path_params.into_inner().vpc_id; - let _body_args = body.into_inner(); + async fn datasets_get( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.datasets_config_list().await?)) + } - Ok(HttpResponseUpdatedNoContent()) -} + async fn omicron_physical_disks_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let body_args = body.into_inner(); + let result = sa.omicron_physical_disks_ensure(body_args).await?; + Ok(HttpResponseOk(result)) + } -/// Create a mapping from a virtual NIC to a physical host -#[endpoint { - method = PUT, - path = "/v2p/", -}] -async fn set_v2p( - rqctx: RequestContext>, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let body_args = body.into_inner(); + async fn omicron_physical_disks_get( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.omicron_physical_disks_list().await?)) + } - sa.set_virtual_nic_host(&body_args) - .await - .map_err(|e| HttpError::for_internal_error(e.to_string()))?; + async fn omicron_zones_get( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.omicron_zones_list().await)) + } - Ok(HttpResponseUpdatedNoContent()) -} + async fn omicron_zones_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let body_args = body.into_inner(); + sa.omicron_zones_ensure(body_args).await; + Ok(HttpResponseUpdatedNoContent()) + } -/// Delete a mapping from a virtual NIC to a physical host -#[endpoint { - method = DELETE, - path = "/v2p/", -}] -async fn del_v2p( - rqctx: RequestContext>, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let body_args = body.into_inner(); + async fn sled_add( + _rqctx: RequestContext, + _body: TypedBody, + ) -> Result { + Ok(HttpResponseUpdatedNoContent()) + } - sa.unset_virtual_nic_host(&body_args) - .await - .map_err(|e| HttpError::for_internal_error(e.to_string()))?; + async fn list_vpc_routes( + rqctx: RequestContext, + ) -> Result>, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.list_vpc_routes().await)) + } - Ok(HttpResponseUpdatedNoContent()) -} + async fn set_vpc_routes( + rqctx: RequestContext, + body: TypedBody>, + ) -> Result { + let sa = rqctx.context(); + sa.set_vpc_routes(body.into_inner()).await; + Ok(HttpResponseUpdatedNoContent()) + } -/// List v2p mappings present on sled -#[endpoint { - method = GET, - path = "/v2p/", -}] -async fn list_v2p( - rqctx: RequestContext>, -) -> Result>, HttpError> { - let sa = rqctx.context(); + // --- Unimplemented endpoints --- - let vnics = sa.list_virtual_nics().await.map_err(HttpError::from)?; + async fn zone_bundle_list_all( + _rqctx: RequestContext, + _query: Query, + ) -> Result>, HttpError> { + method_unimplemented() + } - Ok(HttpResponseOk(vnics)) -} + async fn zone_bundle_list( + _rqctx: RequestContext, + _params: Path, + ) -> Result>, HttpError> { + method_unimplemented() + } -#[endpoint { - method = POST, - path = "/switch-ports", -}] -async fn uplink_ensure( - _rqctx: RequestContext>, - _body: TypedBody, -) -> Result { - Ok(HttpResponseUpdatedNoContent()) -} + async fn zone_bundle_create( + _rqctx: RequestContext, + _params: Path, + ) -> Result, HttpError> { + method_unimplemented() + } -#[endpoint { - method = GET, - path = "/network-bootstore-config", -}] -async fn read_network_bootstore_config( - rqctx: RequestContext>, -) -> Result, HttpError> { - let config = rqctx.context().bootstore_network_config.lock().await.clone(); - Ok(HttpResponseOk(config)) -} + async fn zone_bundle_get( + _rqctx: RequestContext, + _params: Path, + ) -> Result>, HttpError> + { + method_unimplemented() + } -#[endpoint { - method = PUT, - path = "/network-bootstore-config", -}] -async fn write_network_bootstore_config( - rqctx: RequestContext>, - body: TypedBody, -) -> Result { - let mut config = rqctx.context().bootstore_network_config.lock().await; - *config = body.into_inner(); - Ok(HttpResponseUpdatedNoContent()) -} + async fn zone_bundle_delete( + _rqctx: RequestContext, + _params: Path, + ) -> Result { + method_unimplemented() + } -/// Fetch basic information about this sled -#[endpoint { - method = GET, - path = "/inventory", -}] -async fn inventory( - rqctx: RequestContext>, -) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk( - sa.inventory(rqctx.server.local_addr) - .await - .map_err(|e| HttpError::for_internal_error(format!("{:#}", e)))?, - )) -} + async fn zone_bundle_utilization( + _rqctx: RequestContext, + ) -> Result< + HttpResponseOk>, + HttpError, + > { + method_unimplemented() + } -#[endpoint { - method = PUT, - path = "/omicron-physical-disks", -}] -async fn omicron_physical_disks_put( - rqctx: RequestContext>, - body: TypedBody, -) -> Result, HttpError> { - let sa = rqctx.context(); - let body_args = body.into_inner(); - let result = sa.omicron_physical_disks_ensure(body_args).await?; - Ok(HttpResponseOk(result)) -} + async fn zone_bundle_cleanup_context( + _rqctx: RequestContext, + ) -> Result, HttpError> { + method_unimplemented() + } -#[endpoint { - method = GET, - path = "/omicron-physical-disks", -}] -async fn omicron_physical_disks_get( - rqctx: RequestContext>, -) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.omicron_physical_disks_list().await?)) + async fn zone_bundle_cleanup_context_update( + _rqctx: RequestContext, + _body: TypedBody, + ) -> Result { + method_unimplemented() + } + + async fn zone_bundle_cleanup( + _rqctx: RequestContext, + ) -> Result>, HttpError> + { + method_unimplemented() + } + + async fn zones_list( + _rqctx: RequestContext, + ) -> Result>, HttpError> { + method_unimplemented() + } + + async fn zpools_get( + _rqctx: RequestContext, + ) -> Result>, HttpError> { + method_unimplemented() + } + + async fn sled_role_get( + _rqctx: RequestContext, + ) -> Result, HttpError> { + method_unimplemented() + } + + async fn cockroachdb_init( + _rqctx: RequestContext, + ) -> Result { + method_unimplemented() + } + + async fn timesync_get( + _rqctx: RequestContext, + ) -> Result, HttpError> { + method_unimplemented() + } + + async fn host_os_write_start( + _rqctx: RequestContext, + _path_params: Path, + _query_params: Query, + _body: StreamingBody, + ) -> Result { + method_unimplemented() + } + + async fn host_os_write_status_get( + _rqctx: RequestContext, + _path_params: Path, + ) -> Result, HttpError> { + method_unimplemented() + } + + async fn host_os_write_status_delete( + _rqctx: RequestContext, + _path_params: Path, + ) -> Result { + method_unimplemented() + } + + async fn sled_identifiers( + _rqctx: RequestContext, + ) -> Result, HttpError> { + method_unimplemented() + } + + async fn bootstore_status( + _rqctx: RequestContext, + ) -> Result, HttpError> { + method_unimplemented() + } } -#[endpoint { - method = GET, - path = "/omicron-zones", -}] -async fn omicron_zones_get( - rqctx: RequestContext>, -) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.omicron_zones_list().await)) +fn method_unimplemented() -> Result { + Err(HttpError { + // Use a client error here (405 Method Not Allowed vs 501 Not + // Implemented) even though it isn't strictly accurate here, so tests + // get to see the error message. + status_code: http::StatusCode::METHOD_NOT_ALLOWED, + error_code: None, + external_message: "Method not implemented in sled-agent-sim" + .to_string(), + internal_message: "Method not implemented in sled-agent-sim" + .to_string(), + }) } +// --- Extra endpoints only available in the sim implementation --- + #[endpoint { - method = PUT, - path = "/omicron-zones", + method = POST, + path = "/vmms/{propolis_id}/poke", }] -async fn omicron_zones_put( +async fn instance_poke_post( rqctx: RequestContext>, - body: TypedBody, + path_params: Path, ) -> Result { let sa = rqctx.context(); - let body_args = body.into_inner(); - sa.omicron_zones_ensure(body_args).await; + let id = path_params.into_inner().propolis_id; + sa.vmm_poke(id, PokeMode::Drain).await; Ok(HttpResponseUpdatedNoContent()) } #[endpoint { - method = PUT, - path = "/sleds" + method = POST, + path = "/vmms/{propolis_id}/poke-single-step", }] -async fn sled_add( - _rqctx: RequestContext>, - _body: TypedBody, +async fn instance_poke_single_step_post( + rqctx: RequestContext>, + path_params: Path, ) -> Result { + let sa = rqctx.context(); + let id = path_params.into_inner().propolis_id; + sa.vmm_poke(id, PokeMode::SingleStep).await; Ok(HttpResponseUpdatedNoContent()) } #[endpoint { - method = GET, - path = "/vpc-routes", + method = POST, + path = "/vmms/{propolis_id}/sim-migration-source", }] -async fn list_vpc_routes( +async fn instance_post_sim_migration_source( rqctx: RequestContext>, -) -> Result>, HttpError> { + path_params: Path, + body: TypedBody, +) -> Result { let sa = rqctx.context(); - Ok(HttpResponseOk(sa.list_vpc_routes().await)) + let id = path_params.into_inner().propolis_id; + sa.instance_simulate_migration_source(id, body.into_inner()).await?; + Ok(HttpResponseUpdatedNoContent()) } #[endpoint { - method = PUT, - path = "/vpc-routes", + method = POST, + path = "/disks/{disk_id}/poke", }] -async fn set_vpc_routes( +async fn disk_poke_post( rqctx: RequestContext>, - body: TypedBody>, + path_params: Path, ) -> Result { let sa = rqctx.context(); - sa.set_vpc_routes(body.into_inner()).await; + let disk_id = path_params.into_inner().disk_id; + sa.disk_poke(disk_id).await; Ok(HttpResponseUpdatedNoContent()) } diff --git a/sled-agent/src/sim/instance.rs b/sled-agent/src/sim/instance.rs index 33bc1c40c1e..eb7ea0ca794 100644 --- a/sled-agent/src/sim/instance.rs +++ b/sled-agent/src/sim/instance.rs @@ -14,13 +14,14 @@ use nexus_client; use omicron_common::api::external::Error; use omicron_common::api::external::Generation; use omicron_common::api::external::ResourceType; -use omicron_common::api::internal::nexus::{SledInstanceState, VmmState}; +use omicron_common::api::internal::nexus::{SledVmmState, VmmState}; +use omicron_uuid_kinds::{GenericUuid, PropolisUuid}; use propolis_client::types::{ InstanceMigrateStatusResponse as PropolisMigrateResponse, InstanceMigrationStatus as PropolisMigrationStatus, InstanceState as PropolisInstanceState, InstanceStateMonitorResponse, }; -use sled_agent_types::instance::InstanceStateRequested; +use sled_agent_types::instance::VmmStateRequested; use std::collections::VecDeque; use std::sync::Arc; use std::sync::Mutex; @@ -170,13 +171,13 @@ impl SimInstanceInner { /// returning an action for the caller to simulate. fn request_transition( &mut self, - target: &InstanceStateRequested, + target: &VmmStateRequested, ) -> Result, Error> { match target { // When Nexus intends to migrate into a VMM, it should create that // VMM in the Migrating state and shouldn't request anything else // from it before asking to migrate in. - InstanceStateRequested::MigrationTarget(_) => { + VmmStateRequested::MigrationTarget(_) => { if !self.queue.is_empty() { return Err(Error::invalid_request(&format!( "can't request migration in with a non-empty state @@ -207,7 +208,7 @@ impl SimInstanceInner { SimulatedMigrationResult::Success, ); } - InstanceStateRequested::Running => { + VmmStateRequested::Running => { match self.next_resting_state() { VmmState::Starting => { self.queue_propolis_state( @@ -234,7 +235,7 @@ impl SimInstanceInner { } } } - InstanceStateRequested::Stopped => { + VmmStateRequested::Stopped => { match self.next_resting_state() { VmmState::Starting => { let mark_failed = false; @@ -256,7 +257,7 @@ impl SimInstanceInner { } } } - InstanceStateRequested::Reboot => match self.next_resting_state() { + VmmStateRequested::Reboot => match self.next_resting_state() { VmmState::Running => { // Further requests to reboot are ignored if the instance // is currently rebooting or about to reboot. @@ -315,7 +316,7 @@ impl SimInstanceInner { /// If the state change queue contains at least once instance state change, /// returns the requested instance state associated with the last instance /// state on the queue. Returns None otherwise. - fn desired(&self) -> Option { + fn desired(&self) -> Option { self.last_queued_instance_state().map(|terminal| match terminal { // State change requests may queue these states as intermediate // states, but the simulation (and the tests that rely on it) is @@ -331,13 +332,11 @@ impl SimInstanceInner { "pending resting state {:?} doesn't map to a requested state", terminal ), - PropolisInstanceState::Running => InstanceStateRequested::Running, + PropolisInstanceState::Running => VmmStateRequested::Running, PropolisInstanceState::Stopping | PropolisInstanceState::Stopped - | PropolisInstanceState::Destroyed => { - InstanceStateRequested::Stopped - } - PropolisInstanceState::Rebooting => InstanceStateRequested::Reboot, + | PropolisInstanceState::Destroyed => VmmStateRequested::Stopped, + PropolisInstanceState::Rebooting => VmmStateRequested::Reboot, }) } @@ -388,7 +387,7 @@ impl SimInstanceInner { /// Simulates rude termination by moving the instance to the Destroyed state /// immediately and clearing the queue of pending state transitions. - fn terminate(&mut self) -> SledInstanceState { + fn terminate(&mut self) -> SledVmmState { let mark_failed = false; self.state.terminate_rudely(mark_failed); self.queue.clear(); @@ -418,7 +417,7 @@ pub struct SimInstance { } impl SimInstance { - pub fn terminate(&self) -> SledInstanceState { + pub fn terminate(&self) -> SledVmmState { self.inner.lock().unwrap().terminate() } @@ -435,12 +434,12 @@ impl SimInstance { #[async_trait] impl Simulatable for SimInstance { - type CurrentState = SledInstanceState; - type RequestedState = InstanceStateRequested; + type CurrentState = SledVmmState; + type RequestedState = VmmStateRequested; type ProducerArgs = (); type Action = InstanceAction; - fn new(current: SledInstanceState) -> Self { + fn new(current: SledVmmState) -> Self { assert!(matches!( current.vmm_state.state, VmmState::Starting | VmmState::Migrating), @@ -453,7 +452,6 @@ impl Simulatable for SimInstance { inner: Arc::new(Mutex::new(SimInstanceInner { state: InstanceStates::new( current.vmm_state, - current.propolis_id, current.migration_in.map(|m| m.migration_id), ), last_response: InstanceStateMonitorResponse { @@ -480,7 +478,7 @@ impl Simulatable for SimInstance { fn request_transition( &mut self, - target: &InstanceStateRequested, + target: &VmmStateRequested, ) -> Result, Error> { self.inner.lock().unwrap().request_transition(target) } @@ -512,8 +510,8 @@ impl Simulatable for SimInstance { ) -> Result<(), Error> { nexus_client .cpapi_instances_put( - id, - &nexus_client::types::SledInstanceState::from(current), + &PropolisUuid::from_untyped_uuid(*id), + &nexus_client::types::SledVmmState::from(current), ) .await .map(|_| ()) diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs index 189f775adb7..b5460256540 100644 --- a/sled-agent/src/sim/server.rs +++ b/sled-agent/src/sim/server.rs @@ -12,6 +12,10 @@ use crate::nexus::d2n_params; use crate::nexus::NexusClient; use crate::rack_setup::service::build_initial_blueprint_from_sled_configs; use crate::rack_setup::SledConfig; +use crate::rack_setup::{ + from_ipaddr_to_external_floating_ip, + from_sockaddr_to_external_floating_addr, +}; use anyhow::anyhow; use crucible_agent_client::types::State as RegionState; use illumos_utils::zpool::ZpoolName; @@ -19,9 +23,11 @@ use internal_dns::ServiceName; use nexus_client::types as NexusTypes; use nexus_client::types::{IpRange, Ipv4Range, Ipv6Range}; use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; -use nexus_sled_agent_shared::inventory::OmicronZoneConfig; use nexus_sled_agent_shared::inventory::OmicronZoneDataset; -use nexus_sled_agent_shared::inventory::OmicronZoneType; +use nexus_types::deployment::blueprint_zone_type; +use nexus_types::deployment::{ + BlueprintZoneConfig, BlueprintZoneDisposition, BlueprintZoneType, +}; use nexus_types::inventory::NetworkInterfaceKind; use omicron_common::address::DNS_OPTE_IPV4_SUBNET; use omicron_common::address::NEXUS_OPTE_IPV4_SUBNET; @@ -36,6 +42,7 @@ use omicron_common::backoff::{ use omicron_common::disk::DiskIdentity; use omicron_common::FileKv; use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; use oxnet::Ipv6Net; @@ -375,19 +382,22 @@ pub async fn run_standalone_server( SocketAddr::V6(a) => a, }; let pool_name = ZpoolName::new_external(ZpoolUuid::new_v4()); - let mut zones = vec![OmicronZoneConfig { - id: Uuid::new_v4(), + let mut zones = vec![BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id: OmicronZoneUuid::new_v4(), underlay_address: *http_bound.ip(), - zone_type: OmicronZoneType::InternalDns { - dataset: OmicronZoneDataset { pool_name: pool_name.clone() }, - http_address: http_bound, - dns_address: match dns.dns_server.local_address() { - SocketAddr::V4(_) => panic!("did not expect v4 address"), - SocketAddr::V6(a) => a, + zone_type: BlueprintZoneType::InternalDns( + blueprint_zone_type::InternalDns { + dataset: OmicronZoneDataset { pool_name: pool_name.clone() }, + http_address: http_bound, + dns_address: match dns.dns_server.local_address() { + SocketAddr::V4(_) => panic!("did not expect v4 address"), + SocketAddr::V6(a) => a, + }, + gz_address: Ipv6Addr::LOCALHOST, + gz_address_index: 0, }, - gz_address: Ipv6Addr::LOCALHOST, - gz_address_index: 0, - }, + ), // Co-locate the filesystem pool with the dataset filesystem_pool: Some(pool_name), }]; @@ -396,23 +406,26 @@ pub async fn run_standalone_server( let mut macs = MacAddr::iter_system(); if let Some(nexus_external_addr) = rss_args.nexus_external_addr { let ip = nexus_external_addr.ip(); - let id = Uuid::new_v4(); + let id = OmicronZoneUuid::new_v4(); - zones.push(OmicronZoneConfig { + zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, id, underlay_address: match ip { IpAddr::V4(_) => panic!("did not expect v4 address"), IpAddr::V6(a) => a, }, - zone_type: OmicronZoneType::Nexus { + zone_type: BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { internal_address: match config.nexus_address { SocketAddr::V4(_) => panic!("did not expect v4 address"), SocketAddr::V6(a) => a, }, - external_ip: ip, + external_ip: from_ipaddr_to_external_floating_ip(ip), nic: nexus_types::inventory::NetworkInterface { id: Uuid::new_v4(), - kind: NetworkInterfaceKind::Service { id }, + kind: NetworkInterfaceKind::Service { + id: id.into_untyped_uuid(), + }, name: "nexus".parse().unwrap(), ip: NEXUS_OPTE_IPV4_SUBNET .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES + 1) @@ -427,7 +440,7 @@ pub async fn run_standalone_server( }, external_tls: false, external_dns_servers: vec![], - }, + }), filesystem_pool: Some(get_random_zpool()), }); @@ -445,31 +458,40 @@ pub async fn run_standalone_server( rss_args.external_dns_internal_addr { let ip = *external_dns_internal_addr.ip(); - let id = Uuid::new_v4(); + let id = OmicronZoneUuid::new_v4(); let pool_name = ZpoolName::new_external(ZpoolUuid::new_v4()); - zones.push(OmicronZoneConfig { + zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, id, underlay_address: ip, - zone_type: OmicronZoneType::ExternalDns { - dataset: OmicronZoneDataset { pool_name: pool_name.clone() }, - http_address: external_dns_internal_addr, - dns_address: SocketAddr::V6(external_dns_internal_addr), - nic: nexus_types::inventory::NetworkInterface { - id: Uuid::new_v4(), - kind: NetworkInterfaceKind::Service { id }, - name: "external-dns".parse().unwrap(), - ip: DNS_OPTE_IPV4_SUBNET - .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES + 1) - .unwrap() - .into(), - mac: macs.next().unwrap(), - subnet: (*DNS_OPTE_IPV4_SUBNET).into(), - vni: Vni::SERVICES_VNI, - primary: true, - slot: 0, - transit_ips: vec![], + zone_type: BlueprintZoneType::ExternalDns( + blueprint_zone_type::ExternalDns { + dataset: OmicronZoneDataset { + pool_name: pool_name.clone(), + }, + http_address: external_dns_internal_addr, + dns_address: from_sockaddr_to_external_floating_addr( + SocketAddr::V6(external_dns_internal_addr), + ), + nic: nexus_types::inventory::NetworkInterface { + id: Uuid::new_v4(), + kind: NetworkInterfaceKind::Service { + id: id.into_untyped_uuid(), + }, + name: "external-dns".parse().unwrap(), + ip: DNS_OPTE_IPV4_SUBNET + .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES + 1) + .unwrap() + .into(), + mac: macs.next().unwrap(), + subnet: (*DNS_OPTE_IPV4_SUBNET).into(), + vni: Vni::SERVICES_VNI, + primary: true, + slot: 0, + transit_ips: vec![], + }, }, - }, + ), // Co-locate the filesystem pool with the dataset filesystem_pool: Some(pool_name), }); @@ -530,8 +552,7 @@ pub async fn run_standalone_server( blueprint: build_initial_blueprint_from_sled_configs( &sled_configs, internal_dns_version, - ) - .expect("failed to construct initial blueprint"), + ), physical_disks, zpools, datasets, diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index a4ff57ac72b..3e1cb2ff99e 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -24,7 +24,7 @@ use omicron_common::api::external::{ ByteCount, DiskState, Error, Generation, ResourceType, }; use omicron_common::api::internal::nexus::{ - DiskRuntimeState, MigrationRuntimeState, MigrationState, SledInstanceState, + DiskRuntimeState, MigrationRuntimeState, MigrationState, SledVmmState, }; use omicron_common::api::internal::nexus::{ InstanceRuntimeState, VmmRuntimeState, @@ -35,8 +35,8 @@ use omicron_common::api::internal::shared::{ VirtualNetworkInterfaceHost, }; use omicron_common::disk::{ - DiskIdentity, DiskVariant, DisksManagementResult, - OmicronPhysicalDisksConfig, + DatasetsConfig, DatasetsManagementResult, DiskIdentity, DiskVariant, + DisksManagementResult, OmicronPhysicalDisksConfig, }; use omicron_uuid_kinds::{GenericUuid, InstanceUuid, PropolisUuid, ZpoolUuid}; use oxnet::Ipv6Net; @@ -50,8 +50,7 @@ use sled_agent_types::early_networking::{ }; use sled_agent_types::instance::{ InstanceExternalIpBody, InstanceHardware, InstanceMetadata, - InstancePutStateResponse, InstanceStateRequested, - InstanceUnregisterResponse, + VmmPutStateResponse, VmmStateRequested, VmmUnregisterResponse, }; use slog::Logger; use std::collections::{HashMap, HashSet, VecDeque}; @@ -71,8 +70,8 @@ use uuid::Uuid; pub struct SledAgent { pub id: Uuid, pub ip: IpAddr, - /// collection of simulated instances, indexed by instance uuid - instances: Arc>, + /// collection of simulated VMMs, indexed by Propolis uuid + vmms: Arc>, /// collection of simulated disks, indexed by disk uuid disks: Arc>, storage: Mutex, @@ -84,7 +83,8 @@ pub struct SledAgent { mock_propolis: Mutex>, PropolisClient)>>, /// lists of external IPs assigned to instances - pub external_ips: Mutex>>, + pub external_ips: + Mutex>>, pub vpc_routes: Mutex>, config: Config, fake_zones: Mutex, @@ -170,7 +170,7 @@ impl SledAgent { Arc::new(SledAgent { id, ip: config.dropshot.bind_address.ip(), - instances: Arc::new(SimCollection::new( + vmms: Arc::new(SimCollection::new( Arc::clone(&nexus_client), instance_log, sim_mode, @@ -269,7 +269,7 @@ impl SledAgent { instance_runtime: InstanceRuntimeState, vmm_runtime: VmmRuntimeState, metadata: InstanceMetadata, - ) -> Result { + ) -> Result { // respond with a fake 500 level failure if asked to ensure an instance // with more than 16 CPUs. let ncpus: i64 = (&hardware.properties.ncpus).into(); @@ -317,11 +317,7 @@ impl SledAgent { // point to the correct address. let mock_lock = self.mock_propolis.lock().await; if let Some((_srv, client)) = mock_lock.as_ref() { - if !self - .instances - .contains_key(&instance_id.into_untyped_uuid()) - .await - { + if !self.vmms.contains_key(&instance_id.into_untyped_uuid()).await { let metadata = propolis_client::types::InstanceMetadata { project_id: metadata.project_id, silo_id: metadata.silo_id, @@ -379,12 +375,11 @@ impl SledAgent { }); let instance_run_time_state = self - .instances + .vmms .sim_ensure( - &instance_id.into_untyped_uuid(), - SledInstanceState { + &propolis_id.into_untyped_uuid(), + SledVmmState { vmm_state: vmm_runtime, - propolis_id, migration_in, migration_out: None, }, @@ -417,56 +412,53 @@ impl SledAgent { /// not notified. pub async fn instance_unregister( self: &Arc, - instance_id: InstanceUuid, - ) -> Result { + propolis_id: PropolisUuid, + ) -> Result { let instance = match self - .instances - .sim_get_cloned_object(&instance_id.into_untyped_uuid()) + .vmms + .sim_get_cloned_object(&propolis_id.into_untyped_uuid()) .await { Ok(instance) => instance, Err(Error::ObjectNotFound { .. }) => { - return Ok(InstanceUnregisterResponse { updated_runtime: None }) + return Ok(VmmUnregisterResponse { updated_runtime: None }) } Err(e) => return Err(e), }; - self.detach_disks_from_instance(instance_id).await?; - let response = InstanceUnregisterResponse { + let response = VmmUnregisterResponse { updated_runtime: Some(instance.terminate()), }; - self.instances.sim_force_remove(instance_id.into_untyped_uuid()).await; + self.vmms.sim_force_remove(propolis_id.into_untyped_uuid()).await; Ok(response) } /// Asks the supplied instance to transition to the requested state. pub async fn instance_ensure_state( self: &Arc, - instance_id: InstanceUuid, - state: InstanceStateRequested, - ) -> Result { + propolis_id: PropolisUuid, + state: VmmStateRequested, + ) -> Result { if let Some(e) = self.instance_ensure_state_error.lock().await.as_ref() { return Err(e.clone()); } let current = match self - .instances - .sim_get_cloned_object(&instance_id.into_untyped_uuid()) + .vmms + .sim_get_cloned_object(&propolis_id.into_untyped_uuid()) .await { Ok(i) => i.current().clone(), Err(_) => match state { - InstanceStateRequested::Stopped => { - return Ok(InstancePutStateResponse { - updated_runtime: None, - }); + VmmStateRequested::Stopped => { + return Ok(VmmPutStateResponse { updated_runtime: None }); } _ => { return Err(Error::invalid_request(&format!( - "instance {} not registered on sled", - instance_id, + "Propolis {} not registered on sled", + propolis_id, ))); } }, @@ -475,43 +467,41 @@ impl SledAgent { let mock_lock = self.mock_propolis.lock().await; if let Some((_srv, client)) = mock_lock.as_ref() { let body = match state { - InstanceStateRequested::MigrationTarget(_) => { + VmmStateRequested::MigrationTarget(_) => { return Err(Error::internal_error( "migration not implemented for mock Propolis", )); } - InstanceStateRequested::Running => { - let instances = self.instances.clone(); + VmmStateRequested::Running => { + let vmms = self.vmms.clone(); let log = self.log.new( o!("component" => "SledAgent-insure_instance_state"), ); tokio::spawn(async move { tokio::time::sleep(Duration::from_secs(10)).await; - match instances + match vmms .sim_ensure( - &instance_id.into_untyped_uuid(), + &propolis_id.into_untyped_uuid(), current, Some(state), ) .await { Ok(state) => { - let instance_state: nexus_client::types::SledInstanceState = state.into(); - info!(log, "sim_ensure success"; "instance_state" => #?instance_state); + let vmm_state: nexus_client::types::SledVmmState = state.into(); + info!(log, "sim_ensure success"; "vmm_state" => #?vmm_state); } Err(instance_put_error) => { error!(log, "sim_ensure failure"; "error" => #?instance_put_error); } } }); - return Ok(InstancePutStateResponse { - updated_runtime: None, - }); + return Ok(VmmPutStateResponse { updated_runtime: None }); } - InstanceStateRequested::Stopped => { + VmmStateRequested::Stopped => { propolis_client::types::InstanceStateRequested::Stop } - InstanceStateRequested::Reboot => { + VmmStateRequested::Reboot => { propolis_client::types::InstanceStateRequested::Reboot } }; @@ -521,30 +511,24 @@ impl SledAgent { } let new_state = self - .instances - .sim_ensure(&instance_id.into_untyped_uuid(), current, Some(state)) + .vmms + .sim_ensure(&propolis_id.into_untyped_uuid(), current, Some(state)) .await?; - // If this request will shut down the simulated instance, look for any - // disks that are attached to it and drive them to the Detached state. - if matches!(state, InstanceStateRequested::Stopped) { - self.detach_disks_from_instance(instance_id).await?; - } - - Ok(InstancePutStateResponse { updated_runtime: Some(new_state) }) + Ok(VmmPutStateResponse { updated_runtime: Some(new_state) }) } pub async fn instance_get_state( &self, - instance_id: InstanceUuid, - ) -> Result { + propolis_id: PropolisUuid, + ) -> Result { let instance = self - .instances - .sim_get_cloned_object(&instance_id.into_untyped_uuid()) + .vmms + .sim_get_cloned_object(&propolis_id.into_untyped_uuid()) .await .map_err(|_| { crate::sled_agent::Error::Instance( - crate::instance_manager::Error::NoSuchInstance(instance_id), + crate::instance_manager::Error::NoSuchVmm(propolis_id), ) })?; Ok(instance.current()) @@ -552,16 +536,16 @@ impl SledAgent { pub async fn instance_simulate_migration_source( &self, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, migration: instance::SimulateMigrationSource, ) -> Result<(), HttpError> { let instance = self - .instances - .sim_get_cloned_object(&instance_id.into_untyped_uuid()) + .vmms + .sim_get_cloned_object(&propolis_id.into_untyped_uuid()) .await .map_err(|_| { crate::sled_agent::Error::Instance( - crate::instance_manager::Error::NoSuchInstance(instance_id), + crate::instance_manager::Error::NoSuchVmm(propolis_id), ) })?; instance.set_simulated_migration_source(migration); @@ -572,25 +556,6 @@ impl SledAgent { *self.instance_ensure_state_error.lock().await = error; } - async fn detach_disks_from_instance( - &self, - instance_id: InstanceUuid, - ) -> Result<(), Error> { - self.disks - .sim_ensure_for_each_where( - |disk| match disk.current().disk_state { - DiskState::Attached(id) | DiskState::Attaching(id) => { - id == instance_id.into_untyped_uuid() - } - _ => false, - }, - &DiskStateRequested::Detached, - ) - .await?; - - Ok(()) - } - /// Idempotently ensures that the given API Disk (described by `api_disk`) /// is attached (or not) as specified. This simulates disk attach and /// detach, similar to instance boot and halt. @@ -607,16 +572,16 @@ impl SledAgent { &self.updates } - pub async fn instance_count(&self) -> usize { - self.instances.size().await + pub async fn vmm_count(&self) -> usize { + self.vmms.size().await } pub async fn disk_count(&self) -> usize { self.disks.size().await } - pub async fn instance_poke(&self, id: InstanceUuid, mode: PokeMode) { - self.instances.sim_poke(id.into_untyped_uuid(), mode).await; + pub async fn vmm_poke(&self, id: PropolisUuid, mode: PokeMode) { + self.vmms.sim_poke(id.into_untyped_uuid(), mode).await; } pub async fn disk_poke(&self, id: Uuid) { @@ -699,7 +664,7 @@ impl SledAgent { /// snapshot here. pub async fn instance_issue_disk_snapshot_request( &self, - _instance_id: InstanceUuid, + _propolis_id: PropolisUuid, disk_id: Uuid, snapshot_id: Uuid, ) -> Result<(), Error> { @@ -760,18 +725,17 @@ impl SledAgent { pub async fn instance_put_external_ip( &self, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, body_args: &InstanceExternalIpBody, ) -> Result<(), Error> { - if !self.instances.contains_key(&instance_id.into_untyped_uuid()).await - { + if !self.vmms.contains_key(&propolis_id.into_untyped_uuid()).await { return Err(Error::internal_error( - "can't alter IP state for nonexistent instance", + "can't alter IP state for VMM that's not registered", )); } let mut eips = self.external_ips.lock().await; - let my_eips = eips.entry(instance_id.into_untyped_uuid()).or_default(); + let my_eips = eips.entry(propolis_id).or_default(); // High-level behaviour: this should always succeed UNLESS // trying to add a double ephemeral. @@ -794,18 +758,17 @@ impl SledAgent { pub async fn instance_delete_external_ip( &self, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, body_args: &InstanceExternalIpBody, ) -> Result<(), Error> { - if !self.instances.contains_key(&instance_id.into_untyped_uuid()).await - { + if !self.vmms.contains_key(&propolis_id.into_untyped_uuid()).await { return Err(Error::internal_error( - "can't alter IP state for nonexistent instance", + "can't alter IP state for VMM that's not registered", )); } let mut eips = self.external_ips.lock().await; - let my_eips = eips.entry(instance_id.into_untyped_uuid()).or_default(); + let my_eips = eips.entry(propolis_id).or_default(); my_eips.remove(&body_args); @@ -910,6 +873,19 @@ impl SledAgent { }) } + pub async fn datasets_ensure( + &self, + config: DatasetsConfig, + ) -> Result { + self.storage.lock().await.datasets_ensure(config).await + } + + pub async fn datasets_config_list( + &self, + ) -> Result { + self.storage.lock().await.datasets_config_list().await + } + pub async fn omicron_physical_disks_list( &self, ) -> Result { diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index 556388ce93b..144fb48aa9f 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -18,14 +18,17 @@ use crucible_agent_client::types::{ use dropshot::HandlerTaskMode; use dropshot::HttpError; use futures::lock::Mutex; +use omicron_common::disk::DatasetManagementStatus; +use omicron_common::disk::DatasetsConfig; +use omicron_common::disk::DatasetsManagementResult; use omicron_common::disk::DiskIdentity; use omicron_common::disk::DiskManagementStatus; use omicron_common::disk::DiskVariant; use omicron_common::disk::DisksManagementResult; use omicron_common::disk::OmicronPhysicalDisksConfig; use omicron_uuid_kinds::GenericUuid; -use omicron_uuid_kinds::InstanceUuid; use omicron_uuid_kinds::OmicronZoneUuid; +use omicron_uuid_kinds::PropolisUuid; use omicron_uuid_kinds::ZpoolUuid; use propolis_client::types::VolumeConstructionRequest; use slog::Logger; @@ -555,6 +558,7 @@ pub struct Storage { sled_id: Uuid, log: Logger, config: Option, + dataset_config: Option, physical_disks: HashMap, next_disk_slot: i64, zpools: HashMap, @@ -568,6 +572,7 @@ impl Storage { sled_id, log, config: None, + dataset_config: None, physical_disks: HashMap::new(), next_disk_slot: 0, zpools: HashMap::new(), @@ -581,6 +586,45 @@ impl Storage { &self.physical_disks } + pub async fn datasets_config_list( + &self, + ) -> Result { + let Some(config) = self.dataset_config.as_ref() else { + return Err(HttpError::for_not_found( + None, + "No control plane datasets".into(), + )); + }; + Ok(config.clone()) + } + + pub async fn datasets_ensure( + &mut self, + config: DatasetsConfig, + ) -> Result { + if let Some(stored_config) = self.dataset_config.as_ref() { + if stored_config.generation < config.generation { + return Err(HttpError::for_client_error( + None, + http::StatusCode::BAD_REQUEST, + "Generation number too old".to_string(), + )); + } + } + self.dataset_config.replace(config.clone()); + + Ok(DatasetsManagementResult { + status: config + .datasets + .values() + .map(|config| DatasetManagementStatus { + dataset_name: config.name.clone(), + err: None, + }) + .collect(), + }) + } + pub async fn omicron_physical_disks_list( &mut self, ) -> Result { @@ -869,7 +913,7 @@ impl Pantry { self.sled_agent .instance_issue_disk_snapshot_request( - InstanceUuid::new_v4(), // instance id, not used by function + PropolisUuid::new_v4(), // instance id, not used by function volume_id.parse().unwrap(), snapshot_id.parse().unwrap(), ) diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index ba9104d3a1e..f0ea303f142 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -38,9 +38,7 @@ use omicron_common::address::{ get_sled_address, get_switch_zone_address, Ipv6Subnet, SLED_PREFIX, }; use omicron_common::api::external::{ByteCount, ByteCountRangeError, Vni}; -use omicron_common::api::internal::nexus::{ - SledInstanceState, VmmRuntimeState, -}; +use omicron_common::api::internal::nexus::{SledVmmState, VmmRuntimeState}; use omicron_common::api::internal::shared::{ HostPortConfig, RackNetworkConfig, ResolvedVpcFirewallRule, ResolvedVpcRouteSet, ResolvedVpcRouteState, SledIdentifiers, @@ -53,7 +51,10 @@ use omicron_common::api::{ use omicron_common::backoff::{ retry_notify, retry_policy_internal_service_aggressive, BackoffError, }; -use omicron_common::disk::{DisksManagementResult, OmicronPhysicalDisksConfig}; +use omicron_common::disk::{ + DatasetsConfig, DatasetsManagementResult, DisksManagementResult, + OmicronPhysicalDisksConfig, +}; use omicron_ddm_admin_client::Client as DdmAdminClient; use omicron_uuid_kinds::{InstanceUuid, PropolisUuid}; use sled_agent_api::Zpool; @@ -61,8 +62,7 @@ use sled_agent_types::disk::DiskStateRequested; use sled_agent_types::early_networking::EarlyNetworkConfig; use sled_agent_types::instance::{ InstanceExternalIpBody, InstanceHardware, InstanceMetadata, - InstancePutStateResponse, InstanceStateRequested, - InstanceUnregisterResponse, + VmmPutStateResponse, VmmStateRequested, VmmUnregisterResponse, }; use sled_agent_types::sled::{BaseboardId, StartSledAgentRequest}; use sled_agent_types::time_sync::TimeSync; @@ -227,7 +227,7 @@ impl From for dropshot::HttpError { } } Error::Instance( - e @ crate::instance_manager::Error::NoSuchInstance(_), + e @ crate::instance_manager::Error::NoSuchVmm(_), ) => HttpError::for_not_found( Some(NO_SUCH_INSTANCE.to_string()), e.to_string(), @@ -811,6 +811,29 @@ impl SledAgent { self.inner.zone_bundler.cleanup().await.map_err(Error::from) } + pub async fn datasets_config_list(&self) -> Result { + Ok(self.storage().datasets_config_list().await?) + } + + pub async fn datasets_ensure( + &self, + config: DatasetsConfig, + ) -> Result { + info!(self.log, "datasets ensure"); + let datasets_result = self.storage().datasets_ensure(config).await?; + info!(self.log, "datasets ensure: Updated storage"); + + // TODO(https://github.com/oxidecomputer/omicron/issues/6177): + // At the moment, we don't actually remove any datasets -- this function + // just adds new datasets. + // + // Once we start removing old datasets, we should probably ensure that + // they are not longer in-use before returning (similar to + // omicron_physical_disks_ensure). + + Ok(datasets_result) + } + /// Requests the set of physical disks currently managed by the Sled Agent. /// /// This should be contrasted by the set of disks in the inventory, which @@ -899,7 +922,7 @@ impl SledAgent { &self, requested_zones: OmicronZonesConfig, ) -> Result<(), Error> { - // TODO: + // TODO(https://github.com/oxidecomputer/omicron/issues/6043): // - If these are the set of filesystems, we should also consider // removing the ones which are not listed here. // - It's probably worth sending a bulk request to the storage system, @@ -966,7 +989,7 @@ impl SledAgent { vmm_runtime: VmmRuntimeState, propolis_addr: SocketAddr, metadata: InstanceMetadata, - ) -> Result { + ) -> Result { self.inner .instances .ensure_registered( @@ -990,11 +1013,11 @@ impl SledAgent { /// rudely terminates the instance. pub async fn instance_ensure_unregistered( &self, - instance_id: InstanceUuid, - ) -> Result { + propolis_id: PropolisUuid, + ) -> Result { self.inner .instances - .ensure_unregistered(instance_id) + .ensure_unregistered(propolis_id) .await .map_err(|e| Error::Instance(e)) } @@ -1003,12 +1026,12 @@ impl SledAgent { /// state. pub async fn instance_ensure_state( &self, - instance_id: InstanceUuid, - target: InstanceStateRequested, - ) -> Result { + propolis_id: PropolisUuid, + target: VmmStateRequested, + ) -> Result { self.inner .instances - .ensure_state(instance_id, target) + .ensure_state(propolis_id, target) .await .map_err(|e| Error::Instance(e)) } @@ -1020,12 +1043,12 @@ impl SledAgent { /// does not match the current ephemeral IP. pub async fn instance_put_external_ip( &self, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, external_ip: &InstanceExternalIpBody, ) -> Result<(), Error> { self.inner .instances - .add_external_ip(instance_id, external_ip) + .add_external_ip(propolis_id, external_ip) .await .map_err(|e| Error::Instance(e)) } @@ -1034,12 +1057,12 @@ impl SledAgent { /// specified external IP address in either its ephemeral or floating IP set. pub async fn instance_delete_external_ip( &self, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, external_ip: &InstanceExternalIpBody, ) -> Result<(), Error> { self.inner .instances - .delete_external_ip(instance_id, external_ip) + .delete_external_ip(propolis_id, external_ip) .await .map_err(|e| Error::Instance(e)) } @@ -1047,11 +1070,11 @@ impl SledAgent { /// Returns the state of the instance with the provided ID. pub async fn instance_get_state( &self, - instance_id: InstanceUuid, - ) -> Result { + propolis_id: PropolisUuid, + ) -> Result { self.inner .instances - .get_instance_state(instance_id) + .get_instance_state(propolis_id) .await .map_err(|e| Error::Instance(e)) } @@ -1082,19 +1105,15 @@ impl SledAgent { } /// Issue a snapshot request for a Crucible disk attached to an instance - pub async fn instance_issue_disk_snapshot_request( + pub async fn vmm_issue_disk_snapshot_request( &self, - instance_id: InstanceUuid, + propolis_id: PropolisUuid, disk_id: Uuid, snapshot_id: Uuid, ) -> Result<(), Error> { self.inner .instances - .instance_issue_disk_snapshot_request( - instance_id, - disk_id, - snapshot_id, - ) + .issue_disk_snapshot_request(propolis_id, disk_id, snapshot_id) .await .map_err(Error::from) } diff --git a/sled-agent/tests/integration_tests/early_network.rs b/sled-agent/tests/integration_tests/early_network.rs index 6fa91e0e4a7..9b69975054b 100644 --- a/sled-agent/tests/integration_tests/early_network.rs +++ b/sled-agent/tests/integration_tests/early_network.rs @@ -126,6 +126,7 @@ fn current_config_example() -> (&'static str, EarlyNetworkConfig) { destination: "10.1.9.32/16".parse().unwrap(), nexthop: "10.1.9.32".parse().unwrap(), vlan_id: None, + local_pref: None, }], addresses: vec!["2001:db8::/96".parse().unwrap()], switch: SwitchLocation::Switch0, @@ -153,6 +154,7 @@ fn current_config_example() -> (&'static str, EarlyNetworkConfig) { vlan_id: None, }], autoneg: true, + lldp: None, }], bgp: vec![BgpConfig { asn: 20000, diff --git a/sled-agent/tests/output/new-rss-sled-plans/madrid-rss-sled-plan.json b/sled-agent/tests/output/new-rss-sled-plans/madrid-rss-sled-plan.json index efd1a3c1671..2da814042d2 100644 --- a/sled-agent/tests/output/new-rss-sled-plans/madrid-rss-sled-plan.json +++ b/sled-agent/tests/output/new-rss-sled-plans/madrid-rss-sled-plan.json @@ -128,7 +128,8 @@ { "destination": "0.0.0.0/0", "nexthop": "172.20.15.33", - "vlan_id": null + "vlan_id": null, + "local_pref": null } ], "addresses": [ @@ -142,14 +143,16 @@ "uplink_port_speed": "speed40_g", "uplink_port_fec": "none", "bgp_peers": [], - "autoneg": false + "autoneg": false, + "lldp": null }, { "routes": [ { "destination": "0.0.0.0/0", "nexthop": "172.20.15.33", - "vlan_id": null + "vlan_id": null, + "local_pref": null } ], "addresses": [ @@ -163,7 +166,8 @@ "uplink_port_speed": "speed40_g", "uplink_port_fec": "none", "bgp_peers": [], - "autoneg": false + "autoneg": false, + "lldp": null } ], "bgp": [], diff --git a/sled-agent/types/src/early_networking.rs b/sled-agent/types/src/early_networking.rs index dc93aa13009..755033dc236 100644 --- a/sled-agent/types/src/early_networking.rs +++ b/sled-agent/types/src/early_networking.rs @@ -299,6 +299,7 @@ pub mod back_compat { uplink_port_fec: v1.uplink_port_fec, bgp_peers: v1.bgp_peers.clone(), autoneg: v1.autoneg, + lldp: None, } } } @@ -322,6 +323,8 @@ pub mod back_compat { pub uplink_cidr: Ipv4Net, /// VLAN id to use for uplink pub uplink_vid: Option, + /// Local preference + pub local_pref: Option, } impl From for PortConfigV2 { @@ -331,6 +334,7 @@ pub mod back_compat { destination: "0.0.0.0/0".parse().unwrap(), nexthop: value.gateway_ip.into(), vlan_id: value.uplink_vid, + local_pref: value.local_pref, }], addresses: vec![UplinkAddressConfig { address: value.uplink_cidr.into(), @@ -342,6 +346,7 @@ pub mod back_compat { uplink_port_fec: value.uplink_port_fec, bgp_peers: vec![], autoneg: false, + lldp: None, } } } @@ -472,6 +477,7 @@ mod tests { uplink_port_fec: PortFec::None, uplink_cidr: "192.168.0.1/16".parse().unwrap(), uplink_vid: None, + local_pref: None, }], }), }; @@ -501,6 +507,7 @@ mod tests { destination: "0.0.0.0/0".parse().unwrap(), nexthop: uplink.gateway_ip.into(), vlan_id: None, + local_pref: None, }], addresses: vec![UplinkAddressConfig { address: uplink.uplink_cidr.into(), @@ -512,6 +519,7 @@ mod tests { uplink_port_fec: uplink.uplink_port_fec, autoneg: false, bgp_peers: vec![], + lldp: None, }], bgp: vec![], bfd: vec![], @@ -545,6 +553,7 @@ mod tests { destination: "0.0.0.0/0".parse().unwrap(), nexthop: "192.168.0.2".parse().unwrap(), vlan_id: None, + local_pref: None, }], addresses: vec!["192.168.0.1/16".parse().unwrap()], switch: SwitchLocation::Switch0, @@ -592,6 +601,7 @@ mod tests { uplink_port_fec: port.uplink_port_fec, autoneg: false, bgp_peers: vec![], + lldp: None, }], bgp: vec![], bfd: vec![], diff --git a/sled-agent/types/src/instance.rs b/sled-agent/types/src/instance.rs index 0753e273dc3..a39fae414b2 100644 --- a/sled-agent/types/src/instance.rs +++ b/sled-agent/types/src/instance.rs @@ -11,14 +11,14 @@ use std::{ use omicron_common::api::internal::{ nexus::{ - InstanceProperties, InstanceRuntimeState, SledInstanceState, - VmmRuntimeState, + InstanceProperties, InstanceRuntimeState, SledVmmState, VmmRuntimeState, }, shared::{ DhcpConfig, NetworkInterface, ResolvedVpcFirewallRule, SourceNatConfig, }, }; -use omicron_uuid_kinds::PropolisUuid; +use omicron_common::NoDebug; +use omicron_uuid_kinds::InstanceUuid; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use uuid::Uuid; @@ -37,10 +37,8 @@ pub struct InstanceEnsureBody { /// The initial VMM runtime state for the VMM being registered. pub vmm_runtime: VmmRuntimeState, - /// The ID of the VMM being registered. This may not be the active VMM ID in - /// the instance runtime state (e.g. if the new VMM is going to be a - /// migration target). - pub propolis_id: PropolisUuid, + /// The ID of the instance for which this VMM is being created. + pub instance_id: InstanceUuid, /// The address at which this VMM should serve a Propolis server API. pub propolis_addr: SocketAddr, @@ -63,7 +61,7 @@ pub struct InstanceHardware { pub dhcp_config: DhcpConfig, // TODO: replace `propolis_client::*` with locally-modeled request type pub disks: Vec, - pub cloud_init_bytes: Option, + pub cloud_init_bytes: Option>, } /// Metadata used to track statistics about an instance. @@ -80,19 +78,19 @@ pub struct InstanceMetadata { /// The body of a request to move a previously-ensured instance into a specific /// runtime state. #[derive(Serialize, Deserialize, JsonSchema)] -pub struct InstancePutStateBody { +pub struct VmmPutStateBody { /// The state into which the instance should be driven. - pub state: InstanceStateRequested, + pub state: VmmStateRequested, } /// The response sent from a request to move an instance into a specific runtime /// state. #[derive(Debug, Serialize, Deserialize, JsonSchema)] -pub struct InstancePutStateResponse { +pub struct VmmPutStateResponse { /// The current runtime state of the instance after handling the request to /// change its state. If the instance's state did not change, this field is /// `None`. - pub updated_runtime: Option, + pub updated_runtime: Option, } /// Requestable running state of an Instance. @@ -100,7 +98,7 @@ pub struct InstancePutStateResponse { /// A subset of [`omicron_common::api::external::InstanceState`]. #[derive(Copy, Clone, Debug, Deserialize, Serialize, JsonSchema)] #[serde(rename_all = "snake_case", tag = "type", content = "value")] -pub enum InstanceStateRequested { +pub enum VmmStateRequested { /// Run this instance by migrating in from a previous running incarnation of /// the instance. MigrationTarget(InstanceMigrationTargetParams), @@ -113,40 +111,40 @@ pub enum InstanceStateRequested { Reboot, } -impl fmt::Display for InstanceStateRequested { +impl fmt::Display for VmmStateRequested { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", self.label()) } } -impl InstanceStateRequested { +impl VmmStateRequested { fn label(&self) -> &str { match self { - InstanceStateRequested::MigrationTarget(_) => "migrating in", - InstanceStateRequested::Running => "running", - InstanceStateRequested::Stopped => "stopped", - InstanceStateRequested::Reboot => "reboot", + VmmStateRequested::MigrationTarget(_) => "migrating in", + VmmStateRequested::Running => "running", + VmmStateRequested::Stopped => "stopped", + VmmStateRequested::Reboot => "reboot", } } /// Returns true if the state represents a stopped Instance. pub fn is_stopped(&self) -> bool { match self { - InstanceStateRequested::MigrationTarget(_) => false, - InstanceStateRequested::Running => false, - InstanceStateRequested::Stopped => true, - InstanceStateRequested::Reboot => false, + VmmStateRequested::MigrationTarget(_) => false, + VmmStateRequested::Running => false, + VmmStateRequested::Stopped => true, + VmmStateRequested::Reboot => false, } } } /// The response sent from a request to unregister an instance. #[derive(Serialize, Deserialize, JsonSchema)] -pub struct InstanceUnregisterResponse { +pub struct VmmUnregisterResponse { /// The current state of the instance after handling the request to /// unregister it. If the instance's state did not change, this field is /// `None`. - pub updated_runtime: Option, + pub updated_runtime: Option, } /// Parameters used when directing Propolis to initialize itself via live diff --git a/sled-storage/src/dataset.rs b/sled-storage/src/dataset.rs index 74f2be782fc..e2b024db110 100644 --- a/sled-storage/src/dataset.rs +++ b/sled-storage/src/dataset.rs @@ -15,10 +15,10 @@ use illumos_utils::zfs::{ use illumos_utils::zpool::ZpoolName; use key_manager::StorageKeyRequester; use omicron_common::api::internal::shared::DatasetKind; -use omicron_common::disk::{DiskIdentity, DiskVariant}; +use omicron_common::disk::{ + CompressionAlgorithm, DatasetName, DiskIdentity, DiskVariant, GzipLevel, +}; use rand::distributions::{Alphanumeric, DistString}; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; use slog::{debug, info, Logger}; use std::process::Stdio; use std::str::FromStr; @@ -45,7 +45,8 @@ cfg_if! { // tuned as needed. pub const DUMP_DATASET_QUOTA: usize = 100 * (1 << 30); // passed to zfs create -o compression= -pub const DUMP_DATASET_COMPRESSION: &'static str = "gzip-9"; +pub const DUMP_DATASET_COMPRESSION: CompressionAlgorithm = + CompressionAlgorithm::GzipN { level: GzipLevel::new::<9>() }; // U.2 datasets live under the encrypted dataset and inherit encryption pub const ZONE_DATASET: &'static str = "crypt/zone"; @@ -102,12 +103,17 @@ struct ExpectedDataset { // Identifies if the dataset should be deleted on boot wipe: bool, // Optional compression mode - compression: Option<&'static str>, + compression: CompressionAlgorithm, } impl ExpectedDataset { const fn new(name: &'static str) -> Self { - ExpectedDataset { name, quota: None, wipe: false, compression: None } + ExpectedDataset { + name, + quota: None, + wipe: false, + compression: CompressionAlgorithm::Off, + } } const fn quota(mut self, quota: usize) -> Self { @@ -120,151 +126,12 @@ impl ExpectedDataset { self } - const fn compression(mut self, compression: &'static str) -> Self { - self.compression = Some(compression); + const fn compression(mut self, compression: CompressionAlgorithm) -> Self { + self.compression = compression; self } } -/// The type of a dataset, and an auxiliary information necessary to -/// successfully launch a zone managing the associated data. -/// -/// There is currently no auxiliary data here, but there's a separation from -/// omicron-common's `DatasetKind` in case there might be some in the future. -#[derive( - Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, -)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum DatasetType { - // TODO: `DatasetKind` uses `Cockroach`, not `CockroachDb`, for historical - // reasons. It may be worth using the same name for both. - CockroachDb, - Crucible, - Clickhouse, - ClickhouseKeeper, - ClickhouseServer, - ExternalDns, - InternalDns, -} - -impl DatasetType { - pub fn dataset_should_be_encrypted(&self) -> bool { - match self { - // We encrypt all datasets except Crucible. - // - // Crucible already performs encryption internally, and we - // avoid double-encryption. - DatasetType::Crucible => false, - _ => true, - } - } - - pub fn kind(&self) -> DatasetKind { - match self { - Self::Crucible => DatasetKind::Crucible, - Self::CockroachDb => DatasetKind::Cockroach, - Self::Clickhouse => DatasetKind::Clickhouse, - Self::ClickhouseKeeper => DatasetKind::ClickhouseKeeper, - Self::ClickhouseServer => DatasetKind::ClickhouseServer, - Self::ExternalDns => DatasetKind::ExternalDns, - Self::InternalDns => DatasetKind::InternalDns, - } - } -} - -#[derive(Debug, thiserror::Error)] -pub enum DatasetKindParseError { - #[error("Dataset unknown: {0}")] - UnknownDataset(String), -} - -impl FromStr for DatasetType { - type Err = DatasetKindParseError; - - fn from_str(s: &str) -> Result { - use DatasetType::*; - let kind = match s { - "crucible" => Crucible, - "cockroachdb" => CockroachDb, - "clickhouse" => Clickhouse, - "clickhouse_keeper" => ClickhouseKeeper, - "external_dns" => ExternalDns, - "internal_dns" => InternalDns, - _ => { - return Err(DatasetKindParseError::UnknownDataset( - s.to_string(), - )) - } - }; - Ok(kind) - } -} - -impl std::fmt::Display for DatasetType { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - use DatasetType::*; - let s = match self { - Crucible => "crucible", - CockroachDb => "cockroachdb", - Clickhouse => "clickhouse", - ClickhouseKeeper => "clickhouse_keeper", - ClickhouseServer => "clickhouse_server", - ExternalDns => "external_dns", - InternalDns => "internal_dns", - }; - write!(f, "{}", s) - } -} - -#[derive( - Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone, JsonSchema, -)] -pub struct DatasetName { - // A unique identifier for the Zpool on which the dataset is stored. - pool_name: ZpoolName, - // A name for the dataset within the Zpool. - kind: DatasetType, -} - -impl DatasetName { - pub fn new(pool_name: ZpoolName, kind: DatasetType) -> Self { - Self { pool_name, kind } - } - - pub fn pool(&self) -> &ZpoolName { - &self.pool_name - } - - pub fn dataset(&self) -> &DatasetType { - &self.kind - } - - /// Returns the full name of the dataset, as would be returned from - /// "zfs get" or "zfs list". - /// - /// If this dataset should be encrypted, this automatically adds the - /// "crypt" dataset component. - pub fn full_name(&self) -> String { - // Currently, we encrypt all datasets except Crucible. - // - // Crucible already performs encryption internally, and we - // avoid double-encryption. - if self.kind.dataset_should_be_encrypted() { - self.full_encrypted_name() - } else { - self.full_unencrypted_name() - } - } - - fn full_encrypted_name(&self) -> String { - format!("{}/crypt/{}", self.pool_name, self.kind) - } - - fn full_unencrypted_name(&self) -> String { - format!("{}/{}", self.pool_name, self.kind) - } -} - #[derive(Debug, thiserror::Error)] pub enum DatasetError { #[error("Cannot open {path} due to {error}")] @@ -431,6 +298,7 @@ pub(crate) async fn ensure_zpool_has_datasets( let encryption_details = None; let size_details = Some(SizeDetails { quota: dataset.quota, + reservation: None, compression: dataset.compression, }); Zfs::ensure_filesystem( @@ -577,7 +445,7 @@ async fn ensure_zpool_dataset_is_encrypted( zpool_name: &ZpoolName, unencrypted_dataset: &str, ) -> Result<(), DatasetEncryptionMigrationError> { - let Ok(kind) = DatasetType::from_str(&unencrypted_dataset) else { + let Ok(kind) = DatasetKind::from_str(&unencrypted_dataset) else { info!(log, "Unrecognized dataset kind"); return Ok(()); }; @@ -818,7 +686,7 @@ mod test { #[test] fn serialize_dataset_name() { let pool = ZpoolName::new_internal(ZpoolUuid::new_v4()); - let kind = DatasetType::Crucible; + let kind = DatasetKind::Crucible; let name = DatasetName::new(pool, kind); serde_json::to_string(&name).unwrap(); } diff --git a/sled-storage/src/error.rs b/sled-storage/src/error.rs index 4c5582fd795..988f7f363a2 100644 --- a/sled-storage/src/error.rs +++ b/sled-storage/src/error.rs @@ -4,11 +4,12 @@ //! Storage related errors -use crate::dataset::{DatasetError, DatasetName}; +use crate::dataset::DatasetError; use crate::disk::DiskError; use camino::Utf8PathBuf; use omicron_common::api::external::ByteCountRangeError; use omicron_common::api::external::Generation; +use omicron_common::disk::DatasetName; use uuid::Uuid; #[derive(thiserror::Error, Debug)] @@ -83,6 +84,15 @@ pub enum Error { current: Generation, }, + #[error("Invalid configuration (UUID mismatch in arguments)")] + ConfigUuidMismatch, + + #[error("Dataset configuration out-of-date (asked for {requested}, but latest is {current})")] + DatasetConfigurationOutdated { requested: Generation, current: Generation }, + + #[error("Dataset configuration changed for the same generation number: {generation}")] + DatasetConfigurationChanged { generation: Generation }, + #[error("Failed to update ledger in internal storage")] Ledger(#[from] omicron_common::ledger::Error), diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs index ca470eaeb3d..8baae911ce3 100644 --- a/sled-storage/src/manager.rs +++ b/sled-storage/src/manager.rs @@ -7,7 +7,7 @@ use std::collections::HashSet; use crate::config::MountConfig; -use crate::dataset::{DatasetName, CONFIG_DATASET}; +use crate::dataset::CONFIG_DATASET; use crate::disk::RawDisk; use crate::error::Error; use crate::resources::{AllDisks, StorageResources}; @@ -18,11 +18,14 @@ use illumos_utils::zfs::{Mountpoint, Zfs}; use illumos_utils::zpool::ZpoolName; use key_manager::StorageKeyRequester; use omicron_common::disk::{ - DiskIdentity, DiskVariant, DisksManagementResult, + DatasetConfig, DatasetManagementStatus, DatasetName, DatasetsConfig, + DatasetsManagementResult, DiskIdentity, DiskVariant, DisksManagementResult, OmicronPhysicalDisksConfig, }; use omicron_common::ledger::Ledger; -use slog::{info, o, warn, Logger}; +use omicron_uuid_kinds::DatasetUuid; +use omicron_uuid_kinds::GenericUuid; +use slog::{error, info, o, warn, Logger}; use std::future::Future; use tokio::sync::{mpsc, oneshot, watch}; use tokio::time::{interval, Duration, MissedTickBehavior}; @@ -62,6 +65,9 @@ const SYNCHRONIZE_INTERVAL: Duration = Duration::from_secs(10); // The filename of the ledger storing physical disk info const DISKS_LEDGER_FILENAME: &str = "omicron-physical-disks.json"; +// The filename of the ledger storing dataset info +const DATASETS_LEDGER_FILENAME: &str = "omicron-datasets.json"; + #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum StorageManagerState { // We know that any attempts to manage disks will fail, as the key manager @@ -114,6 +120,16 @@ pub(crate) enum StorageRequest { tx: DebugIgnore>>, }, + DatasetsEnsure { + config: DatasetsConfig, + tx: DebugIgnore< + oneshot::Sender>, + >, + }, + DatasetsList { + tx: DebugIgnore>>, + }, + // Requests to explicitly manage or stop managing a set of devices OmicronPhysicalDisksEnsure { config: OmicronPhysicalDisksConfig, @@ -240,6 +256,31 @@ impl StorageHandle { rx.map(|result| result.unwrap()) } + pub async fn datasets_ensure( + &self, + config: DatasetsConfig, + ) -> Result { + let (tx, rx) = oneshot::channel(); + self.tx + .send(StorageRequest::DatasetsEnsure { config, tx: tx.into() }) + .await + .unwrap(); + + rx.await.unwrap() + } + + /// Reads the last value written to storage by + /// [Self::datasets_ensure]. + pub async fn datasets_config_list(&self) -> Result { + let (tx, rx) = oneshot::channel(); + self.tx + .send(StorageRequest::DatasetsList { tx: tx.into() }) + .await + .unwrap(); + + rx.await.unwrap() + } + pub async fn omicron_physical_disks_ensure( &self, config: OmicronPhysicalDisksConfig, @@ -322,6 +363,10 @@ impl StorageHandle { rx.await.unwrap() } + // TODO(https://github.com/oxidecomputer/omicron/issues/6043): + // + // Deprecate usage of this function, prefer to call "datasets_ensure" + // and ask for the set of all datasets from Nexus. pub async fn upsert_filesystem( &self, dataset_id: Uuid, @@ -428,6 +473,12 @@ impl StorageManager { self.ensure_using_exactly_these_disks(raw_disks).await; let _ = tx.0.send(Ok(())); } + StorageRequest::DatasetsEnsure { config, tx } => { + let _ = tx.0.send(self.datasets_ensure(config).await); + } + StorageRequest::DatasetsList { tx } => { + let _ = tx.0.send(self.datasets_config_list().await); + } StorageRequest::OmicronPhysicalDisksEnsure { config, tx } => { let _ = tx.0.send(self.omicron_physical_disks_ensure(config).await); @@ -485,6 +536,10 @@ impl StorageManager { ); } + // Sled Agents can remember which disks they need to manage by reading + // a configuration file from the M.2s. + // + // This function returns the paths to those configuration files. async fn all_omicron_disk_ledgers(&self) -> Vec { self.resources .disks() @@ -494,6 +549,19 @@ impl StorageManager { .collect() } + // Sled Agents can remember which datasets they need to manage by reading + // a configuration file from the M.2s. + // + // This function returns the paths to those configuration files. + async fn all_omicron_dataset_ledgers(&self) -> Vec { + self.resources + .disks() + .all_m2_mountpoints(CONFIG_DATASET) + .into_iter() + .map(|p| p.join(DATASETS_LEDGER_FILENAME)) + .collect() + } + // Manages a newly detected disk that has been attached to this sled. // // For U.2s: we update our inventory. @@ -545,9 +613,11 @@ impl StorageManager { self.resources.insert_or_update_disk(raw_disk).await } - async fn load_ledger(&self) -> Option> { + async fn load_disks_ledger( + &self, + ) -> Option> { let ledger_paths = self.all_omicron_disk_ledgers().await; - let log = self.log.new(o!("request" => "load_ledger")); + let log = self.log.new(o!("request" => "load_disks_ledger")); let maybe_ledger = Ledger::::new( &log, ledger_paths.clone(), @@ -579,7 +649,7 @@ impl StorageManager { // Now that we're actually able to unpack U.2s, attempt to load the // set of disks which we previously stored in the ledger, if one // existed. - let ledger = self.load_ledger().await; + let ledger = self.load_disks_ledger().await; if let Some(ledger) = ledger { info!(self.log, "Setting StorageResources state to match ledger"); @@ -591,9 +661,160 @@ impl StorageManager { info!(self.log, "KeyManager ready, but no ledger detected"); } + // We don't load any configuration for datasets, since we aren't + // currently storing any dataset information in-memory. + // + // If we ever wanted to do so, however, we could load that information + // here. + Ok(()) } + async fn datasets_ensure( + &mut self, + config: DatasetsConfig, + ) -> Result { + let log = self.log.new(o!("request" => "datasets_ensure")); + + // As a small input-check, confirm that the UUID of the map of inputs + // matches the DatasetConfig. + // + // The dataset configs are sorted by UUID so they always appear in the + // same order, but this check prevents adding an entry of: + // - (UUID: X, Config(UUID: Y)), for X != Y + if !config.datasets.iter().all(|(id, config)| *id == config.id) { + return Err(Error::ConfigUuidMismatch); + } + + // We rely on the schema being stable across reboots -- observe + // "test_datasets_schema" below for that property guarantee. + let ledger_paths = self.all_omicron_dataset_ledgers().await; + let maybe_ledger = + Ledger::::new(&log, ledger_paths.clone()).await; + + let mut ledger = match maybe_ledger { + Some(ledger) => { + info!( + log, + "Comparing 'requested datasets' to ledger on internal storage" + ); + let ledger_data = ledger.data(); + if config.generation < ledger_data.generation { + warn!( + log, + "Request looks out-of-date compared to prior request"; + "requested_generation" => ?config.generation, + "ledger_generation" => ?ledger_data.generation, + ); + return Err(Error::DatasetConfigurationOutdated { + requested: config.generation, + current: ledger_data.generation, + }); + } else if config.generation == ledger_data.generation { + info!( + log, + "Requested generation number matches prior request", + ); + + if ledger_data != &config { + error!( + log, + "Requested configuration changed (with the same generation)"; + "generation" => ?config.generation + ); + return Err(Error::DatasetConfigurationChanged { + generation: config.generation, + }); + } + } else { + info!( + log, + "Request looks newer than prior requests"; + "requested_generation" => ?config.generation, + "ledger_generation" => ?ledger_data.generation, + ); + } + ledger + } + None => { + info!(log, "No previously-stored 'requested datasets', creating new ledger"); + Ledger::::new_with( + &log, + ledger_paths.clone(), + DatasetsConfig::default(), + ) + } + }; + + let result = self.datasets_ensure_internal(&log, &config).await; + + let ledger_data = ledger.data_mut(); + if *ledger_data == config { + return Ok(result); + } + *ledger_data = config; + ledger.commit().await?; + + Ok(result) + } + + // Attempts to ensure that each dataset exist. + // + // Does not return an error, because the [DatasetsManagementResult] type + // includes details about all possible errors that may occur on + // a per-dataset granularity. + async fn datasets_ensure_internal( + &mut self, + log: &Logger, + config: &DatasetsConfig, + ) -> DatasetsManagementResult { + let mut status = vec![]; + for dataset in config.datasets.values() { + status.push(self.dataset_ensure_internal(log, dataset).await); + } + DatasetsManagementResult { status } + } + + async fn dataset_ensure_internal( + &mut self, + log: &Logger, + config: &DatasetConfig, + ) -> DatasetManagementStatus { + let log = log.new(o!("name" => config.name.full_name())); + info!(log, "Ensuring dataset"); + let mut status = DatasetManagementStatus { + dataset_name: config.name.clone(), + err: None, + }; + + if let Err(err) = self.ensure_dataset(config).await { + warn!(log, "Failed to ensure dataset"; "dataset" => ?status.dataset_name, "err" => ?err); + status.err = Some(err.to_string()); + }; + + status + } + + // Lists datasets that this sled is configured to use. + async fn datasets_config_list(&mut self) -> Result { + let log = self.log.new(o!("request" => "datasets_config_list")); + + let ledger_paths = self.all_omicron_dataset_ledgers().await; + let maybe_ledger = + Ledger::::new(&log, ledger_paths.clone()).await; + + match maybe_ledger { + Some(ledger) => { + info!(log, "Found ledger on internal storage"); + return Ok(ledger.data().clone()); + } + None => { + info!(log, "No ledger detected on internal storage"); + return Err(Error::LedgerNotFound); + } + } + } + // Makes an U.2 disk managed by the control plane within [`StorageResources`]. async fn omicron_physical_disks_ensure( &mut self, @@ -765,6 +986,77 @@ impl StorageManager { } } + // Ensures a dataset exists within a zpool, according to `config`. + async fn ensure_dataset( + &mut self, + config: &DatasetConfig, + ) -> Result<(), Error> { + info!(self.log, "ensure_dataset"; "config" => ?config); + + // We can only place datasets within managed disks. + // If a disk is attached to this sled, but not a part of the Control + // Plane, it is treated as "not found" for dataset placement. + if !self + .resources + .disks() + .iter_managed() + .any(|(_, disk)| disk.zpool_name() == config.name.pool()) + { + return Err(Error::ZpoolNotFound(format!( + "{}", + config.name.pool(), + ))); + } + + let zoned = config.name.dataset().zoned(); + let mountpoint_path = if zoned { + Utf8PathBuf::from("/data") + } else { + config.name.pool().dataset_mountpoint( + &Utf8PathBuf::from("/"), + &config.name.dataset().to_string(), + ) + }; + let mountpoint = Mountpoint::Path(mountpoint_path); + + let fs_name = &config.name.full_name(); + let do_format = true; + + // The "crypt" dataset needs these details, but should already exist + // by the time we're creating datasets inside. + let encryption_details = None; + let size_details = Some(illumos_utils::zfs::SizeDetails { + quota: config.quota, + reservation: config.reservation, + compression: config.compression, + }); + Zfs::ensure_filesystem( + fs_name, + mountpoint, + zoned, + do_format, + encryption_details, + size_details, + None, + )?; + // Ensure the dataset has a usable UUID. + if let Ok(id_str) = Zfs::get_oxide_value(&fs_name, "uuid") { + if let Ok(id) = id_str.parse::() { + if id != config.id { + return Err(Error::UuidMismatch { + name: Box::new(config.name.clone()), + old: id.into_untyped_uuid(), + new: config.id.into_untyped_uuid(), + }); + } + return Ok(()); + } + } + Zfs::set_oxide_value(&fs_name, "uuid", &config.id.to_string())?; + + Ok(()) + } + // Attempts to add a dataset within a zpool, according to `request`. async fn add_dataset( &mut self, @@ -824,16 +1116,19 @@ impl StorageManager { /// systems. #[cfg(all(test, target_os = "illumos"))] mod tests { - use crate::dataset::DatasetType; use crate::disk::RawSyntheticDisk; use crate::manager_test_harness::StorageManagerTestHarness; use super::*; use camino_tempfile::tempdir_in; + use omicron_common::api::external::Generation; + use omicron_common::disk::CompressionAlgorithm; + use omicron_common::disk::DatasetKind; use omicron_common::disk::DiskManagementError; use omicron_common::ledger; use omicron_test_utils::dev::test_setup_log; use sled_hardware::DiskFirmware; + use std::collections::BTreeMap; use std::sync::atomic::Ordering; use uuid::Uuid; @@ -1300,7 +1595,7 @@ mod tests { let dataset_id = Uuid::new_v4(); let zpool_name = ZpoolName::new_external(config.disks[0].pool_id); let dataset_name = - DatasetName::new(zpool_name.clone(), DatasetType::Crucible); + DatasetName::new(zpool_name.clone(), DatasetKind::Crucible); harness .handle() .upsert_filesystem(dataset_id, dataset_name) @@ -1310,6 +1605,86 @@ mod tests { harness.cleanup().await; logctx.cleanup_successful(); } + + #[tokio::test] + async fn ensure_datasets() { + illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); + let logctx = test_setup_log("ensure_datasets"); + let mut harness = StorageManagerTestHarness::new(&logctx.log).await; + + // Test setup: Add a U.2 and M.2, adopt them into the "control plane" + // for usage. + harness.handle().key_manager_ready().await; + let raw_disks = + harness.add_vdevs(&["u2_under_test.vdev", "m2_helping.vdev"]).await; + let config = harness.make_config(1, &raw_disks); + let result = harness + .handle() + .omicron_physical_disks_ensure(config.clone()) + .await + .expect("Ensuring disks should work after key manager is ready"); + assert!(!result.has_error(), "{:?}", result); + + // Create a dataset on the newly formatted U.2 + let id = DatasetUuid::new_v4(); + let zpool_name = ZpoolName::new_external(config.disks[0].pool_id); + let name = DatasetName::new(zpool_name.clone(), DatasetKind::Crucible); + let datasets = BTreeMap::from([( + id, + DatasetConfig { + id, + name, + compression: CompressionAlgorithm::Off, + quota: None, + reservation: None, + }, + )]); + // "Generation = 1" is reserved as "no requests seen yet", so we jump + // past it. + let generation = Generation::new().next(); + let mut config = DatasetsConfig { generation, datasets }; + + let status = + harness.handle().datasets_ensure(config.clone()).await.unwrap(); + assert!(!status.has_error()); + + // List datasets, expect to see what we just created + let observed_config = + harness.handle().datasets_config_list().await.unwrap(); + assert_eq!(config, observed_config); + + // Calling "datasets_ensure" with the same input should succeed. + let status = + harness.handle().datasets_ensure(config.clone()).await.unwrap(); + assert!(!status.has_error()); + + let current_config_generation = config.generation; + let next_config_generation = config.generation.next(); + + // Calling "datasets_ensure" with an old generation should fail + config.generation = Generation::new(); + let err = + harness.handle().datasets_ensure(config.clone()).await.unwrap_err(); + assert!(matches!(err, Error::DatasetConfigurationOutdated { .. })); + + // However, calling it with a different input and the same generation + // number should fail. + config.generation = current_config_generation; + config.datasets.values_mut().next().unwrap().reservation = Some(1024); + let err = + harness.handle().datasets_ensure(config.clone()).await.unwrap_err(); + assert!(matches!(err, Error::DatasetConfigurationChanged { .. })); + + // If we bump the generation number while making a change, updated + // configs will work. + config.generation = next_config_generation; + let status = + harness.handle().datasets_ensure(config.clone()).await.unwrap(); + assert!(!status.has_error()); + + harness.cleanup().await; + logctx.cleanup_successful(); + } } #[cfg(test)] @@ -1323,4 +1698,13 @@ mod test { &serde_json::to_string_pretty(&schema).unwrap(), ); } + + #[test] + fn test_datasets_schema() { + let schema = schemars::schema_for!(DatasetsConfig); + expectorate::assert_contents( + "../schema/omicron-datasets.json", + &serde_json::to_string_pretty(&schema).unwrap(), + ); + } } diff --git a/smf/clickhouse-admin/config.toml b/smf/clickhouse-admin/config.toml new file mode 100644 index 00000000000..86ee2c5d4b3 --- /dev/null +++ b/smf/clickhouse-admin/config.toml @@ -0,0 +1,10 @@ +[dropshot] +# 1 MiB; we don't expect any requests of more than nominal size. +request_body_max_bytes = 1048576 + +[log] +# Show log messages of this level and more severe +level = "info" +mode = "file" +path = "/dev/stdout" +if_exists = "append" diff --git a/smf/clickhouse-admin/manifest.xml b/smf/clickhouse-admin/manifest.xml new file mode 100644 index 00000000000..435f8a86acf --- /dev/null +++ b/smf/clickhouse-admin/manifest.xml @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/smf/clickhouse/method_script.sh b/smf/clickhouse/method_script.sh index 224d759cf3f..bb5dd960a16 100755 --- a/smf/clickhouse/method_script.sh +++ b/smf/clickhouse/method_script.sh @@ -10,136 +10,13 @@ LISTEN_ADDR="$(svcprop -c -p config/listen_addr "${SMF_FMRI}")" LISTEN_PORT="$(svcprop -c -p config/listen_port "${SMF_FMRI}")" DATASTORE="$(svcprop -c -p config/store "${SMF_FMRI}")" -# TEMPORARY: Racks will be set up with single node ClickHouse until -# Nexus provisions services so there is no divergence between racks -# https://github.com/oxidecomputer/omicron/issues/732 -single_node=true +args=( +"--log-file" "/var/tmp/clickhouse-server.log" +"--errorlog-file" "/var/tmp/clickhouse-server.errlog" +"--" +"--path" "${DATASTORE}" +"--listen_host" "$LISTEN_ADDR" +"--http_port" "$LISTEN_PORT" +) -command=() -# TODO((https://github.com/oxidecomputer/omicron/issues/4000)): Remove single node mode once all racks are running in replicated mode -if $single_node -then - command+=( - "/opt/oxide/clickhouse/clickhouse" "server" - "--log-file" "/var/tmp/clickhouse-server.log" - "--errorlog-file" "/var/tmp/clickhouse-server.errlog" - "--" - "--path" "${DATASTORE}" - "--listen_host" "$LISTEN_ADDR" - "--http_port" "$LISTEN_PORT" - ) -else - # Retrieve hostnames (SRV records in internal DNS) of the clickhouse nodes. - CH_ADDRS="$(/opt/oxide/internal-dns-cli/bin/dnswait clickhouse -H)" - - if [[ -z "$CH_ADDRS" ]]; then - printf 'ERROR: found no hostnames for other ClickHouse nodes\n' >&2 - exit "$SMF_EXIT_ERR_CONFIG" - fi - - declare -a nodes=($CH_ADDRS) - - for i in "${nodes[@]}" - do - if ! grep -q "host.control-plane.oxide.internal" <<< "${i}"; then - printf 'ERROR: retrieved ClickHouse hostname does not match the expected format\n' >&2 - exit "$SMF_EXIT_ERR_CONFIG" - fi - done - - # Assign hostnames to replicas - REPLICA_HOST_01="${nodes[0]}" - REPLICA_HOST_02="${nodes[1]}" - - # Retrieve hostnames (SRV records in internal DNS) of the keeper nodes. - K_ADDRS="$(/opt/oxide/internal-dns-cli/bin/dnswait clickhouse-keeper -H)" - - if [[ -z "$K_ADDRS" ]]; then - printf 'ERROR: found no hostnames for other ClickHouse Keeper nodes\n' >&2 - exit "$SMF_EXIT_ERR_CONFIG" - fi - - declare -a keepers=($K_ADDRS) - - for i in "${keepers[@]}" - do - if ! grep -q "host.control-plane.oxide.internal" <<< "${i}"; then - printf 'ERROR: retrieved ClickHouse Keeper hostname does not match the expected format\n' >&2 - exit "$SMF_EXIT_ERR_CONFIG" - fi - done - - if [[ "${#keepers[@]}" != 3 ]] - then - printf "ERROR: expected 3 ClickHouse Keeper hosts, found "${#keepers[@]}" instead\n" >&2 - exit "$SMF_EXIT_ERR_CONFIG" - fi - - # Identify the node type this is as this will influence how the config is constructed - # TODO(https://github.com/oxidecomputer/omicron/issues/3824): There are probably much - # better ways to do this service discovery, but this works for now. - # The services contain the same IDs as the hostnames. - CLICKHOUSE_SVC="$(zonename | tr -dc [:digit:])" - REPLICA_IDENTIFIER_01="$( echo "${REPLICA_HOST_01}" | tr -dc [:digit:])" - REPLICA_IDENTIFIER_02="$( echo "${REPLICA_HOST_02}" | tr -dc [:digit:])" - if [[ $REPLICA_IDENTIFIER_01 == $CLICKHOUSE_SVC ]] - then - REPLICA_DISPLAY_NAME="oximeter_cluster node 1" - REPLICA_NUMBER="01" - elif [[ $REPLICA_IDENTIFIER_02 == $CLICKHOUSE_SVC ]] - then - REPLICA_DISPLAY_NAME="oximeter_cluster node 2" - REPLICA_NUMBER="02" - else - printf 'ERROR: service name does not match any of the identified ClickHouse hostnames\n' >&2 - exit "$SMF_EXIT_ERR_CONFIG" - fi - - # Setting environment variables this way is best practice, but has the downside of - # obscuring the field values to anyone ssh-ing into the zone. To mitigate this, - # we will be saving them to ${DATASTORE}/config_env_vars - export CH_LOG="${DATASTORE}/clickhouse-server.log" - export CH_ERROR_LOG="${DATASTORE}/clickhouse-server.errlog" - export CH_REPLICA_DISPLAY_NAME=${REPLICA_DISPLAY_NAME} - export CH_LISTEN_ADDR=${LISTEN_ADDR} - export CH_LISTEN_PORT=${LISTEN_PORT} - export CH_DATASTORE=${DATASTORE} - export CH_TMP_PATH="${DATASTORE}/tmp/" - export CH_USER_FILES_PATH="${DATASTORE}/user_files/" - export CH_USER_LOCAL_DIR="${DATASTORE}/access/" - export CH_FORMAT_SCHEMA_PATH="${DATASTORE}/format_schemas/" - export CH_REPLICA_NUMBER=${REPLICA_NUMBER} - export CH_REPLICA_HOST_01=${REPLICA_HOST_01} - export CH_REPLICA_HOST_02=${REPLICA_HOST_02} - export CH_KEEPER_HOST_01="${keepers[0]}" - export CH_KEEPER_HOST_02="${keepers[1]}" - export CH_KEEPER_HOST_03="${keepers[2]}" - - content="CH_LOG="${CH_LOG}"\n\ - CH_ERROR_LOG="${CH_ERROR_LOG}"\n\ - CH_REPLICA_DISPLAY_NAME="${CH_REPLICA_DISPLAY_NAME}"\n\ - CH_LISTEN_ADDR="${CH_LISTEN_ADDR}"\n\ - CH_LISTEN_PORT="${CH_LISTEN_PORT}"\n\ - CH_DATASTORE="${CH_DATASTORE}"\n\ - CH_TMP_PATH="${CH_TMP_PATH}"\n\ - CH_USER_FILES_PATH="${CH_USER_FILES_PATH}"\n\ - CH_USER_LOCAL_DIR="${CH_USER_LOCAL_DIR}"\n\ - CH_FORMAT_SCHEMA_PATH="${CH_FORMAT_SCHEMA_PATH}"\n\ - CH_REPLICA_NUMBER="${CH_REPLICA_NUMBER}"\n\ - CH_REPLICA_HOST_01="${CH_REPLICA_HOST_01}"\n\ - CH_REPLICA_HOST_02="${CH_REPLICA_HOST_02}"\n\ - CH_KEEPER_HOST_01="${CH_KEEPER_HOST_01}"\n\ - CH_KEEPER_HOST_02="${CH_KEEPER_HOST_02}"\n\ - CH_KEEPER_HOST_03="${CH_KEEPER_HOST_03}"" - - echo $content >> "${DATASTORE}/config_env_vars" - - - # The clickhouse binary must be run from within the directory that contains it. - # Otherwise, it does not automatically detect the configuration files, nor does - # it append them when necessary - cd /opt/oxide/clickhouse/ - command+=("./clickhouse" "server") -fi - -exec "${command[@]}" & \ No newline at end of file +exec /opt/oxide/clickhouse/clickhouse server "${args[@]}" & \ No newline at end of file diff --git a/smf/clickhouse/config_replica.xml b/smf/clickhouse_server/config_replica.xml similarity index 100% rename from smf/clickhouse/config_replica.xml rename to smf/clickhouse_server/config_replica.xml diff --git a/smf/clickhouse_server/manifest.xml b/smf/clickhouse_server/manifest.xml new file mode 100644 index 00000000000..8ab4f78bcb9 --- /dev/null +++ b/smf/clickhouse_server/manifest.xml @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/smf/clickhouse_server/method_script.sh b/smf/clickhouse_server/method_script.sh new file mode 100755 index 00000000000..a0d61072acf --- /dev/null +++ b/smf/clickhouse_server/method_script.sh @@ -0,0 +1,124 @@ +#!/bin/bash + +set -x +set -o errexit +set -o pipefail + +. /lib/svc/share/smf_include.sh + +LISTEN_ADDR="$(svcprop -c -p config/listen_addr "${SMF_FMRI}")" +LISTEN_PORT="$(svcprop -c -p config/listen_port "${SMF_FMRI}")" +DATASTORE="$(svcprop -c -p config/store "${SMF_FMRI}")" + +# Retrieve hostnames (SRV records in internal DNS) of the clickhouse nodes. +CH_ADDRS="$(/opt/oxide/internal-dns-cli/bin/dnswait clickhouse-server -H)" + +if [[ -z "$CH_ADDRS" ]]; then + printf 'ERROR: found no hostnames for other ClickHouse server nodes\n' >&2 + exit "$SMF_EXIT_ERR_CONFIG" +fi + +declare -a nodes=($CH_ADDRS) + +for i in "${nodes[@]}" +do + if ! grep -q "host.control-plane.oxide.internal" <<< "${i}"; then + printf 'ERROR: retrieved ClickHouse hostname does not match the expected format\n' >&2 + exit "$SMF_EXIT_ERR_CONFIG" + fi +done + +# Assign hostnames to replicas +REPLICA_HOST_01="${nodes[0]}" +REPLICA_HOST_02="${nodes[1]}" + +# Retrieve hostnames (SRV records in internal DNS) of the keeper nodes. +K_ADDRS="$(/opt/oxide/internal-dns-cli/bin/dnswait clickhouse-keeper -H)" + +if [[ -z "$K_ADDRS" ]]; then + printf 'ERROR: found no hostnames for other ClickHouse Keeper nodes\n' >&2 + exit "$SMF_EXIT_ERR_CONFIG" +fi + +declare -a keepers=($K_ADDRS) + +for i in "${keepers[@]}" +do + if ! grep -q "host.control-plane.oxide.internal" <<< "${i}"; then + printf 'ERROR: retrieved ClickHouse Keeper hostname does not match the expected format\n' >&2 + exit "$SMF_EXIT_ERR_CONFIG" + fi +done + +if [[ "${#keepers[@]}" != 3 ]] +then + printf "ERROR: expected 3 ClickHouse Keeper hosts, found "${#keepers[@]}" instead\n" >&2 + exit "$SMF_EXIT_ERR_CONFIG" +fi + +# Identify the node type this is as this will influence how the config is constructed +# TODO(https://github.com/oxidecomputer/omicron/issues/3824): There are probably much +# better ways to do this service discovery, but this works for now. +# The services contain the same IDs as the hostnames. +CLICKHOUSE_SVC="$(zonename | tr -dc [:digit:])" +REPLICA_IDENTIFIER_01="$( echo "${REPLICA_HOST_01}" | tr -dc [:digit:])" +REPLICA_IDENTIFIER_02="$( echo "${REPLICA_HOST_02}" | tr -dc [:digit:])" +if [[ $REPLICA_IDENTIFIER_01 == $CLICKHOUSE_SVC ]] +then + REPLICA_DISPLAY_NAME="oximeter_cluster node 1" + REPLICA_NUMBER="01" +elif [[ $REPLICA_IDENTIFIER_02 == $CLICKHOUSE_SVC ]] +then + REPLICA_DISPLAY_NAME="oximeter_cluster node 2" + REPLICA_NUMBER="02" +else + printf 'ERROR: service name does not match any of the identified ClickHouse hostnames\n' >&2 + exit "$SMF_EXIT_ERR_CONFIG" +fi + +# Setting environment variables this way is best practice, but has the downside of +# obscuring the field values to anyone ssh-ing into the zone. To mitigate this, +# we will be saving them to ${DATASTORE}/config_env_vars +export CH_LOG="${DATASTORE}/clickhouse-server.log" +export CH_ERROR_LOG="${DATASTORE}/clickhouse-server.errlog" +export CH_REPLICA_DISPLAY_NAME=${REPLICA_DISPLAY_NAME} +export CH_LISTEN_ADDR=${LISTEN_ADDR} +export CH_LISTEN_PORT=${LISTEN_PORT} +export CH_DATASTORE=${DATASTORE} +export CH_TMP_PATH="${DATASTORE}/tmp/" +export CH_USER_FILES_PATH="${DATASTORE}/user_files/" +export CH_USER_LOCAL_DIR="${DATASTORE}/access/" +export CH_FORMAT_SCHEMA_PATH="${DATASTORE}/format_schemas/" +export CH_REPLICA_NUMBER=${REPLICA_NUMBER} +export CH_REPLICA_HOST_01=${REPLICA_HOST_01} +export CH_REPLICA_HOST_02=${REPLICA_HOST_02} +export CH_KEEPER_HOST_01="${keepers[0]}" +export CH_KEEPER_HOST_02="${keepers[1]}" +export CH_KEEPER_HOST_03="${keepers[2]}" + +content="CH_LOG="${CH_LOG}"\n\ +CH_ERROR_LOG="${CH_ERROR_LOG}"\n\ +CH_REPLICA_DISPLAY_NAME="${CH_REPLICA_DISPLAY_NAME}"\n\ +CH_LISTEN_ADDR="${CH_LISTEN_ADDR}"\n\ +CH_LISTEN_PORT="${CH_LISTEN_PORT}"\n\ +CH_DATASTORE="${CH_DATASTORE}"\n\ +CH_TMP_PATH="${CH_TMP_PATH}"\n\ +CH_USER_FILES_PATH="${CH_USER_FILES_PATH}"\n\ +CH_USER_LOCAL_DIR="${CH_USER_LOCAL_DIR}"\n\ +CH_FORMAT_SCHEMA_PATH="${CH_FORMAT_SCHEMA_PATH}"\n\ +CH_REPLICA_NUMBER="${CH_REPLICA_NUMBER}"\n\ +CH_REPLICA_HOST_01="${CH_REPLICA_HOST_01}"\n\ +CH_REPLICA_HOST_02="${CH_REPLICA_HOST_02}"\n\ +CH_KEEPER_HOST_01="${CH_KEEPER_HOST_01}"\n\ +CH_KEEPER_HOST_02="${CH_KEEPER_HOST_02}"\n\ +CH_KEEPER_HOST_03="${CH_KEEPER_HOST_03}"" + +echo $content >> "${DATASTORE}/config_env_vars" + + +# The clickhouse binary must be run from within the directory that contains it. +# Otherwise, it does not automatically detect the configuration files, nor does +# it append them when necessary +cd /opt/oxide/clickhouse_server/ + +exec ./clickhouse server & \ No newline at end of file diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml index c502c20b1ba..30b86767856 100644 --- a/smf/nexus/multi-sled/config-partial.toml +++ b/smf/nexus/multi-sled/config-partial.toml @@ -65,6 +65,10 @@ abandoned_vmm_reaper.period_secs = 60 saga_recovery.period_secs = 600 lookup_region_port.period_secs = 60 instance_updater.period_secs = 30 +region_snapshot_replacement_start.period_secs = 30 +region_snapshot_replacement_garbage_collection.period_secs = 30 +region_snapshot_replacement_step.period_secs = 30 +region_snapshot_replacement_finish.period_secs = 30 [default_region_allocation_strategy] # by default, allocate across 3 distinct sleds diff --git a/smf/nexus/single-sled/config-partial.toml b/smf/nexus/single-sled/config-partial.toml index 30a02431229..1761d416982 100644 --- a/smf/nexus/single-sled/config-partial.toml +++ b/smf/nexus/single-sled/config-partial.toml @@ -65,6 +65,10 @@ abandoned_vmm_reaper.period_secs = 60 saga_recovery.period_secs = 600 lookup_region_port.period_secs = 60 instance_updater.period_secs = 30 +region_snapshot_replacement_start.period_secs = 30 +region_snapshot_replacement_garbage_collection.period_secs = 30 +region_snapshot_replacement_step.period_secs = 30 +region_snapshot_replacement_finish.period_secs = 30 [default_region_allocation_strategy] # by default, allocate without requirement for distinct sleds. diff --git a/smf/oximeter/config.toml b/smf/oximeter/replicated-cluster/config.toml similarity index 91% rename from smf/oximeter/config.toml rename to smf/oximeter/replicated-cluster/config.toml index ca14fe6ec8b..f7958e5eb11 100644 --- a/smf/oximeter/config.toml +++ b/smf/oximeter/replicated-cluster/config.toml @@ -3,6 +3,7 @@ [db] batch_size = 1000 batch_interval = 5 # In seconds +replicated = true [log] level = "debug" diff --git a/smf/oximeter/single-node/config.toml b/smf/oximeter/single-node/config.toml new file mode 100644 index 00000000000..bc0418159ca --- /dev/null +++ b/smf/oximeter/single-node/config.toml @@ -0,0 +1,12 @@ +# Example configuration file for running an oximeter collector server + +[db] +batch_size = 1000 +batch_interval = 5 # In seconds +replicated = false + +[log] +level = "debug" +mode = "file" +path = "/dev/stdout" +if_exists = "append" diff --git a/smf/sled-agent/non-gimlet/config-rss.toml b/smf/sled-agent/non-gimlet/config-rss.toml index 90f5339e849..a61ac81d910 100644 --- a/smf/sled-agent/non-gimlet/config-rss.toml +++ b/smf/sled-agent/non-gimlet/config-rss.toml @@ -118,6 +118,22 @@ switch = "switch0" # Neighbors we expect to peer with over BGP on this port. bgp_peers = [] +# LLDP settings for this port +#[rack_network_config.switch0.qsfp0.lldp] +#status = "Enabled" +# Optional Port ID, overriding default of qsfpX/0 +#port_id = "" +## Optional port description +#port_description = "uplink 0" +# Optional chassid ID, overriding the switch-level setting +#chassis_id = "" +# Optional system name, overriding the switch-level setting +#system_name = "" +# Optional system description, overriding the switch-level setting +#system_description = "" +# Optional management addresses to advertise, overriding switch-level setting +#management_addrs = [] + # An allowlist of source IPs that can make requests to user-facing services can # be specified here. It can be written as the string "any" ... [allowed_source_ips] diff --git a/sp-sim/examples/config.toml b/sp-sim/examples/config.toml index cf338ecf2e8..f53ea7cfd87 100644 --- a/sp-sim/examples/config.toml +++ b/sp-sim/examples/config.toml @@ -24,6 +24,16 @@ capabilities = 0 presence = "Present" serial_console = "[::1]:33312" +[[simulated_sps.gimlet.components]] +id = "dev-0" +device = "tmp117" +description = "FAKE Southwest temperature sensor" +capabilities = 2 +presence = "Present" +sensors = [ + { name = "Southwest", kind = "Temperature", last_data.value = 41.7890625, last_data.timestamp = 1234 }, +] + [[simulated_sps.gimlet]] multicast_addr = "ff15:0:1de::2" bind_addrs = ["[::]:33320", "[::]:33321"] @@ -39,6 +49,17 @@ capabilities = 0 presence = "Present" serial_console = "[::1]:33322" +[[simulated_sps.gimlet.components]] +id = "dev-0" +device = "tmp117" +description = "FAKE Southwest temperature sensor" +capabilities = 2 +presence = "Present" +sensors = [ + { name = "Southwest", kind = "Temperature", last_data.value = 41.7890625, last_data.timestamp = 1234 }, +] + + [log] # Show log messages of this level and more severe level = "debug" diff --git a/sp-sim/src/config.rs b/sp-sim/src/config.rs index b64953e5ed8..d45e956dee6 100644 --- a/sp-sim/src/config.rs +++ b/sp-sim/src/config.rs @@ -5,6 +5,7 @@ //! Interfaces for parsing configuration files and working with a simulated SP //! configuration +use crate::sensors; use dropshot::ConfigLogging; use gateway_messages::DeviceCapabilities; use gateway_messages::DevicePresence; @@ -59,6 +60,9 @@ pub struct SpComponentConfig { /// /// Only supported for components inside a [`GimletConfig`]. pub serial_console: Option, + + #[serde(skip_serializing_if = "Vec::is_empty", default)] + pub sensors: Vec, } /// Configuration of a simulated sidecar SP @@ -93,6 +97,16 @@ pub struct Config { pub log: ConfigLogging, } +/// Configuration for a component's sensor readings. +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub struct SensorConfig { + #[serde(flatten)] + pub def: sensors::SensorDef, + + #[serde(flatten)] + pub state: sensors::SensorState, +} + impl Config { /// Load a `Config` from the given TOML file /// diff --git a/sp-sim/src/gimlet.rs b/sp-sim/src/gimlet.rs index e980a4b67d4..70c2e72fcb4 100644 --- a/sp-sim/src/gimlet.rs +++ b/sp-sim/src/gimlet.rs @@ -6,6 +6,7 @@ use crate::config::GimletConfig; use crate::config::SpComponentConfig; use crate::helpers::rot_slot_id_from_u16; use crate::helpers::rot_slot_id_to_u16; +use crate::sensors::Sensors; use crate::serial_number_padded; use crate::server; use crate::server::SimSpHandler; @@ -630,6 +631,7 @@ struct Handler { startup_options: StartupOptions, update_state: SimSpUpdate, reset_pending: Option, + sensors: Sensors, last_request_handled: Option, @@ -665,9 +667,12 @@ impl Handler { .push(&*Box::leak(c.description.clone().into_boxed_str())); } + let sensors = Sensors::from_component_configs(&components); + Self { log, components, + sensors, leaked_component_device_strings, leaked_component_description_strings, serial_number, @@ -1206,13 +1211,16 @@ impl SpHandler for Handler { port: SpPort, component: SpComponent, ) -> Result { + let num_details = + self.sensors.num_component_details(&component).unwrap_or(0); debug!( - &self.log, "asked for component details (returning 0 details)"; + &self.log, "asked for number of component details"; "sender" => %sender, "port" => ?port, "component" => ?component, + "num_details" => num_details ); - Ok(0) + Ok(num_details) } fn component_details( @@ -1220,9 +1228,20 @@ impl SpHandler for Handler { component: SpComponent, index: BoundsChecked, ) -> ComponentDetails { - // We return 0 for all components, so we should never be called (`index` - // would have to have been bounds checked to live in 0..0). - unreachable!("asked for {component:?} details index {index:?}") + let Some(sensor_details) = + self.sensors.component_details(&component, index) + else { + unreachable!( + "this is a gimlet, so it should have no port status details" + ); + }; + debug!( + &self.log, "asked for component details for a sensor"; + "component" => ?component, + "index" => index.0, + "details" => ?sensor_details + ); + sensor_details } fn component_clear_status( @@ -1445,9 +1464,9 @@ impl SpHandler for Handler { fn read_sensor( &mut self, - _request: gateway_messages::SensorRequest, + request: gateway_messages::SensorRequest, ) -> std::result::Result { - Err(SpError::RequestUnsupportedForSp) + self.sensors.read_sensor(request).map_err(SpError::Sensor) } fn current_time(&mut self) -> std::result::Result { diff --git a/sp-sim/src/lib.rs b/sp-sim/src/lib.rs index 0f340ed6423..15f2034aa8b 100644 --- a/sp-sim/src/lib.rs +++ b/sp-sim/src/lib.rs @@ -5,6 +5,7 @@ pub mod config; mod gimlet; mod helpers; +mod sensors; mod server; mod sidecar; mod update; diff --git a/sp-sim/src/sensors.rs b/sp-sim/src/sensors.rs new file mode 100644 index 00000000000..fc684af01b9 --- /dev/null +++ b/sp-sim/src/sensors.rs @@ -0,0 +1,218 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::config::SpComponentConfig; +use gateway_messages::measurement::MeasurementError; +use gateway_messages::measurement::MeasurementKind; +use gateway_messages::sp_impl::BoundsChecked; +use gateway_messages::ComponentDetails; +use gateway_messages::DeviceCapabilities; +use gateway_messages::Measurement; +use gateway_messages::SensorDataMissing; +use gateway_messages::SensorError; +use gateway_messages::SensorReading; +use gateway_messages::SensorRequest; +use gateway_messages::SensorRequestKind; +use gateway_messages::SensorResponse; +use gateway_messages::SpComponent; + +use std::collections::HashMap; + +pub(crate) struct Sensors { + by_component: HashMap>, + sensors: Vec, +} + +#[derive(Debug)] +struct Sensor { + def: SensorDef, + state: SensorState, +} + +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize, PartialEq)] +pub struct SensorDef { + pub name: String, + pub kind: MeasurementKind, +} + +// TODO(eliza): note that currently, we just hardcode these in +// `MeasurementConfig`. Eventually, it would be neat to allow the sensor to be +// changed dynamically as part of a simulation. +#[derive(Clone, Debug, serde::Serialize, serde::Deserialize, PartialEq)] +pub struct SensorState { + #[serde(default)] + pub last_error: Option, + + #[serde(default)] + pub last_data: Option, +} + +#[derive( + Clone, Copy, Debug, serde::Serialize, serde::Deserialize, PartialEq, +)] +pub struct LastError { + pub timestamp: u64, + pub value: SensorDataMissing, +} + +#[derive( + Clone, Copy, Debug, serde::Serialize, serde::Deserialize, PartialEq, +)] +pub struct LastData { + pub timestamp: u64, + pub value: f32, +} + +impl SensorState { + fn last_reading(&self) -> SensorReading { + match self { + Self { last_data: Some(data), last_error: Some(error) } => { + if data.timestamp >= error.timestamp { + SensorReading { + value: Ok(data.value), + timestamp: data.timestamp, + } + } else { + SensorReading { + value: Err(error.value), + timestamp: error.timestamp, + } + } + } + Self { last_data: Some(data), last_error: None } => SensorReading { + value: Ok(data.value), + timestamp: data.timestamp, + }, + Self { last_data: None, last_error: Some(error) } => { + SensorReading { + value: Err(error.value), + timestamp: error.timestamp, + } + } + Self { last_data: None, last_error: None } => SensorReading { + value: Err(SensorDataMissing::DeviceNotPresent), + timestamp: 0, // TODO(eliza): what do? + }, + } + } +} + +impl Sensors { + pub(crate) fn from_component_configs<'a>( + cfgs: impl IntoIterator, + ) -> Self { + let mut sensors = Vec::new(); + let mut by_component = HashMap::new(); + for cfg in cfgs { + if cfg.sensors.is_empty() { + continue; + } + if !cfg + .capabilities + .contains(DeviceCapabilities::HAS_MEASUREMENT_CHANNELS) + { + panic!( + "invalid component config: a device with sensors should \ + have the `HAS_MEASUREMENT_CHANNELS` capability:{cfg:#?}" + ); + } + + let mut ids = Vec::with_capacity(cfg.sensors.len()); + for sensor in &cfg.sensors { + let sensor_id = sensors.len() as u32; + sensors.push(Sensor { + def: sensor.def.clone(), + state: sensor.state.clone(), + }); + ids.push(sensor_id) + } + + let component = SpComponent::try_from(cfg.id.as_str()).unwrap(); + let prev = by_component.insert(component, ids); + assert!(prev.is_none(), "component ID {component} already exists!"); + } + Self { sensors, by_component } + } + + fn sensor_for_component<'sensors>( + &'sensors self, + component: &SpComponent, + index: BoundsChecked, + ) -> Option<&'sensors Sensor> { + let &id = self.by_component.get(component)?.get(index.0 as usize)?; + self.sensors.get(id as usize) + } + + pub(crate) fn num_component_details( + &self, + component: &SpComponent, + ) -> Option { + let len = self + .by_component + .get(component)? + .len() + .try_into() + .expect("why would you have more than `u32::MAX` sensors?"); + Some(len) + } + + /// This method returns an `Option` because the component's details might + /// be a port status rather than a measurement, if we eventually decide to + /// implement port statuses in the simulated sidecar... + pub(crate) fn component_details( + &self, + component: &SpComponent, + index: BoundsChecked, + ) -> Option { + let sensor = self.sensor_for_component(component, index)?; + let value = + sensor.state.last_reading().value.map_err(|err| match err { + SensorDataMissing::DeviceError => MeasurementError::DeviceError, + SensorDataMissing::DeviceNotPresent => { + MeasurementError::NotPresent + } + SensorDataMissing::DeviceOff => MeasurementError::DeviceOff, + SensorDataMissing::DeviceTimeout => { + MeasurementError::DeviceTimeout + } + SensorDataMissing::DeviceUnavailable => { + MeasurementError::DeviceUnavailable + } + }); + Some(ComponentDetails::Measurement(Measurement { + name: sensor.def.name.clone(), + kind: sensor.def.kind, + value, + })) + } + + pub(crate) fn read_sensor( + &self, + SensorRequest { id, kind }: SensorRequest, + ) -> Result { + let sensor = + self.sensors.get(id as usize).ok_or(SensorError::InvalidSensor)?; + match kind { + SensorRequestKind::LastReading => { + Ok(SensorResponse::LastReading(sensor.state.last_reading())) + } + SensorRequestKind::ErrorCount => { + let count = + // TODO(eliza): simulate more than one error... + if sensor.state.last_error.is_some() { 1 } else { 0 }; + Ok(SensorResponse::ErrorCount(count)) + } + SensorRequestKind::LastData => { + let LastData { timestamp, value } = + sensor.state.last_data.ok_or(SensorError::NoReading)?; + Ok(SensorResponse::LastData { value, timestamp }) + } + SensorRequestKind::LastError => { + let LastError { timestamp, value } = + sensor.state.last_error.ok_or(SensorError::NoReading)?; + Ok(SensorResponse::LastError { value, timestamp }) + } + } + } +} diff --git a/sp-sim/src/sidecar.rs b/sp-sim/src/sidecar.rs index c2fb2467d83..bef1d26c78c 100644 --- a/sp-sim/src/sidecar.rs +++ b/sp-sim/src/sidecar.rs @@ -8,6 +8,7 @@ use crate::config::SimulatedSpsConfig; use crate::config::SpComponentConfig; use crate::helpers::rot_slot_id_from_u16; use crate::helpers::rot_slot_id_to_u16; +use crate::sensors::Sensors; use crate::serial_number_padded; use crate::server; use crate::server::SimSpHandler; @@ -377,6 +378,7 @@ struct Handler { // our life as a simulator. leaked_component_device_strings: Vec<&'static str>, leaked_component_description_strings: Vec<&'static str>, + sensors: Sensors, serial_number: String, ignition: FakeIgnition, @@ -417,9 +419,12 @@ impl Handler { .push(&*Box::leak(c.description.clone().into_boxed_str())); } + let sensors = Sensors::from_component_configs(&components); + Self { log, components, + sensors, leaked_component_device_strings, leaked_component_description_strings, serial_number, @@ -929,13 +934,18 @@ impl SpHandler for Handler { port: SpPort, component: SpComponent, ) -> Result { - warn!( - &self.log, "asked for component details (returning 0 details)"; + let num_sensor_details = + self.sensors.num_component_details(&component).unwrap_or(0); + // TODO: here is where we might also handle port statuses, if we decide + // to simulate that later... + debug!( + &self.log, "asked for number of component details"; "sender" => %sender, "port" => ?port, "component" => ?component, + "num_details" => num_sensor_details ); - Ok(0) + Ok(num_sensor_details) } fn component_details( @@ -943,9 +953,18 @@ impl SpHandler for Handler { component: SpComponent, index: BoundsChecked, ) -> ComponentDetails { - // We return 0 for all components, so we should never be called (`index` - // would have to have been bounds checked to live in 0..0). - unreachable!("asked for {component:?} details index {index:?}") + let Some(sensor_details) = + self.sensors.component_details(&component, index) + else { + todo!("simulate port status details..."); + }; + debug!( + &self.log, "asked for component details for a sensor"; + "component" => ?component, + "index" => index.0, + "details" => ?sensor_details + ); + sensor_details } fn component_clear_status( @@ -1163,9 +1182,9 @@ impl SpHandler for Handler { fn read_sensor( &mut self, - _request: gateway_messages::SensorRequest, + request: gateway_messages::SensorRequest, ) -> std::result::Result { - Err(SpError::RequestUnsupportedForSp) + self.sensors.read_sensor(request).map_err(SpError::Sensor) } fn current_time(&mut self) -> std::result::Result { diff --git a/tools/console_version b/tools/console_version index 4f670647332..b2fc99daf37 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="17ae890c68a5277fbefe773694e790a8f1b178b4" -SHA2="273a31ba14546305bfafeb9aedb2d9a7530328a0359cda363380c9ca3240b948" +COMMIT="771276573549dd255c6749980636aa7140e8bab8" +SHA2="4d441de0784bb0d775e0a7f4067758fd6c37fbf050ed76b744cd37d6e81af3d3" diff --git a/tools/dendrite_openapi_version b/tools/dendrite_openapi_version index 652ebc31eb2..a9e13c083aa 100755 --- a/tools/dendrite_openapi_version +++ b/tools/dendrite_openapi_version @@ -1,2 +1,2 @@ -COMMIT="8293f28df659c070b48e13f87a51b836238b406e" +COMMIT="76c735d472e3badaeca08982e22496fccb1ce210" SHA2="3a54305ab4b1270c9a5fb0603f481fce199f3767c174a03559ff642f7f44687e" diff --git a/tools/dendrite_stub_checksums b/tools/dendrite_stub_checksums index cd8eb65a3e1..075ead47521 100644 --- a/tools/dendrite_stub_checksums +++ b/tools/dendrite_stub_checksums @@ -1,3 +1,3 @@ -CIDL_SHA256_ILLUMOS="7400e4b0942b33af64a9aad1a429b0e2446e126f58a780328cf10eb46c63b7f8" -CIDL_SHA256_LINUX_DPD="290edfc4076d31d6f70aa7cc16ce758e10d14777d8542b688fa2880fdfde398c" +CIDL_SHA256_ILLUMOS="3ee6cfe770da2855b4eb44c048637d56f8d72de45c8c396186dfe7232d8548fa" +CIDL_SHA256_LINUX_DPD="5c70318c6feb7595bdbf41d8b33827100d28fcdf34ad738a5af10e0411463f64" CIDL_SHA256_LINUX_SWADM="e1e35784538a4fdd76dc257cc636ac3f43f7ef2842dabfe981f17f8ce6b8e1a2" diff --git a/tools/generate-nexus-api.sh b/tools/generate-nexus-api.sh index a0c7d131651..9e3f8d63f69 100755 --- a/tools/generate-nexus-api.sh +++ b/tools/generate-nexus-api.sh @@ -1,4 +1,3 @@ #!/usr/bin/env bash ./target/debug/nexus nexus/examples/config.toml -O > openapi/nexus.json -./target/debug/nexus nexus/examples/config.toml -I > openapi/nexus-internal.json diff --git a/tools/maghemite_ddm_openapi_version b/tools/maghemite_ddm_openapi_version index c1e011e38d7..0c223c85a86 100644 --- a/tools/maghemite_ddm_openapi_version +++ b/tools/maghemite_ddm_openapi_version @@ -1,2 +1,2 @@ -COMMIT="0c4292fe5b3c8ac27d99b5a4502d595acdbf7441" +COMMIT="c92d6ff85db8992066f49da176cf686acfd8fe0f" SHA2="007bfb717ccbc077c0250dee3121aeb0c5bb0d1c16795429a514fa4f8635a5ef" diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version index 1184f6e4fda..0db6a3b63de 100644 --- a/tools/maghemite_mg_openapi_version +++ b/tools/maghemite_mg_openapi_version @@ -1,2 +1,2 @@ -COMMIT="0c4292fe5b3c8ac27d99b5a4502d595acdbf7441" -SHA2="e4b42ab9daad90f0c561a830b62a9d17e294b4d0da0a6d44b4030929b0c37b7e" +COMMIT="c92d6ff85db8992066f49da176cf686acfd8fe0f" +SHA2="5b327f213f8f341cf9072d428980f53757b2c6383f684ac80bbccfb1984ffe5f" diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums index 7ca642fa709..2e180a83db6 100644 --- a/tools/maghemite_mgd_checksums +++ b/tools/maghemite_mgd_checksums @@ -1,2 +1,2 @@ -CIDL_SHA256="e15db7d262b5b2f08a2e2799668c67d0cb883e84c72736a30d299688115bf055" -MGD_LINUX_SHA256="915e7b5cac8ff1deb6549b86e4ba49fd5c6adbdcc56ae5dc3c7b3e69555a7c2c" \ No newline at end of file +CIDL_SHA256="e000485f7e04ac1cf9b3532b60bcf23598ab980331ba4f1c6788a7e95c1e9ef8" +MGD_LINUX_SHA256="1c3d93bbfbe4ce97af7cb81c13e42a2eea464e18de6827794a55d5bfd971b66c" \ No newline at end of file diff --git a/tools/opte_version b/tools/opte_version index dfbb589f244..0e2023666fe 100644 --- a/tools/opte_version +++ b/tools/opte_version @@ -1 +1 @@ -0.33.277 +0.33.293 diff --git a/tools/permslip_staging b/tools/permslip_staging index d886cc42460..d2ddc45f202 100644 --- a/tools/permslip_staging +++ b/tools/permslip_staging @@ -1,5 +1,5 @@ -34cf117633f82cc8f665dc3b6c78dc2aff61ca87d2b2687290605080265dda30 manifest-gimlet-v1.0.23.toml +6ea87b554882860f1a9b1cf97b2f4a9c61fadf3d69e6ea1bdcd781d306d6ca9c manifest-gimlet-v1.0.24.toml 85553dd164933a9b9e4f22409abd1190b1d632d192b5f7527129acaa778a671a manifest-oxide-rot-1-v1.0.13.toml -db995edfe91959df3cb20ea8156f75b9dcff5ec5e77f98a28766617a8ed2e0c5 manifest-psc-v1.0.22.toml -26b6096a377edb3d7da50b1b499af104e6195bc7c7c6eb1b2751b32434d7ac9e manifest-sidecar-v1.0.23.toml +11bc0684155119f494a6e21810e4dc97b9efadb8154d570f67143dae98a45060 manifest-psc-v1.0.23.toml +60205852109f1584d29e2b086eae5a72d7f61b2e1f64d958e6326312ed2b0d66 manifest-sidecar-v1.0.24.toml c0fecaefac7674138337f3bd4ce4ce5b884053dead5ec27b575701471631ea2f manifest-bootleby-v1.3.0.toml diff --git a/tools/update_lldp.sh b/tools/update_lldp.sh index bf7f19eb028..2a9d1d6bae0 100755 --- a/tools/update_lldp.sh +++ b/tools/update_lldp.sh @@ -47,7 +47,9 @@ function main { esac done - TARGET_COMMIT=$(get_latest_commit_from_gh "$REPO" "$TARGET_COMMIT") + if [[ -z "$TARGET_COMMIT" ]]; then + TARGET_COMMIT=$(get_latest_commit_from_gh "$REPO" "$TARGET_BRANCH") + fi install_toml2json do_update_packages "$TARGET_COMMIT" "$DRY_RUN" "$REPO" "${PACKAGES[@]}" do_update_crates "$TARGET_COMMIT" "$DRY_RUN" "$REPO" "${CRATES[@]}" diff --git a/update-engine/src/display/group_display.rs b/update-engine/src/display/group_display.rs index 0e04361ce4b..9e75b64757a 100644 --- a/update-engine/src/display/group_display.rs +++ b/update-engine/src/display/group_display.rs @@ -12,8 +12,9 @@ use swrite::{swrite, SWrite}; use unicode_width::UnicodeWidthStr; use crate::{ - errors::UnknownReportKey, events::EventReport, EventBuffer, - ExecutionStatus, ExecutionTerminalInfo, StepSpec, TerminalKind, + display::ProgressRatioDisplay, errors::UnknownReportKey, + events::EventReport, EventBuffer, ExecutionStatus, ExecutionTerminalInfo, + StepSpec, TerminalKind, }; use super::{ @@ -309,11 +310,13 @@ impl GroupDisplayStats { }; swrite!(line, "{:>HEADER_WIDTH$} ", header.style(header_style)); - let terminal_count = self.terminal_count(); swrite!( line, - "{terminal_count}/{}: {} running, {} {}", - self.total, + "{}: {} running, {} {}", + ProgressRatioDisplay::current_and_total( + self.terminal_count(), + self.total + ), self.running.style(formatter.styles().meta_style), self.completed.style(formatter.styles().meta_style), "completed".style(formatter.styles().progress_style), diff --git a/update-engine/src/display/line_display_shared.rs b/update-engine/src/display/line_display_shared.rs index 99b03b13f76..e31d36dcd7c 100644 --- a/update-engine/src/display/line_display_shared.rs +++ b/update-engine/src/display/line_display_shared.rs @@ -16,6 +16,7 @@ use owo_colors::OwoColorize; use swrite::{swrite, SWrite as _}; use crate::{ + display::ProgressRatioDisplay, events::{ ProgressCounter, ProgressEvent, ProgressEventKind, StepEvent, StepEventKind, StepInfo, StepOutcome, @@ -633,10 +634,12 @@ fn format_progress_counter(counter: &ProgressCounter) -> String { let percent = (counter.current as f64 / total as f64) * 100.0; // <12.34> is 5 characters wide. let percent_width = 5; - let counter_width = total.to_string().len(); format!( - "{:>percent_width$.2}% ({:>counter_width$}/{} {})", - percent, counter.current, total, counter.units, + "{:>percent_width$.2}% ({} {})", + percent, + ProgressRatioDisplay::current_and_total(counter.current, total) + .padded(true), + counter.units, ) } None => format!("{} {}", counter.current, counter.units), @@ -716,17 +719,16 @@ impl LineDisplayFormatter { ) { ld_step_info.nest_data.add_prefix(line); - // Print out "/)". Leave space such that we - // print out e.g. "1/8)" and " 3/14)". - // Add 1 to the index to make it 1-based. - let step_index = ld_step_info.step_info.index + 1; - let step_index_width = ld_step_info.total_steps.to_string().len(); + // Print out "(/)" in a padded way, so that successive + // steps are vertically aligned. swrite!( line, - "{:width$}/{:width$}) ", - step_index, - ld_step_info.total_steps, - width = step_index_width + "({}) ", + ProgressRatioDisplay::index_and_total( + ld_step_info.step_info.index, + ld_step_info.total_steps + ) + .padded(true), ); swrite!( diff --git a/update-engine/src/display/mod.rs b/update-engine/src/display/mod.rs index c58a4535a08..f6775dd37be 100644 --- a/update-engine/src/display/mod.rs +++ b/update-engine/src/display/mod.rs @@ -11,11 +11,14 @@ //! * [`LineDisplay`]: a line-oriented display suitable for the command line. //! * [`GroupDisplay`]: manages state and shows the results of several //! [`LineDisplay`]s at once. +//! * Some utility displayers which can be used to build custom displayers. mod group_display; mod line_display; mod line_display_shared; +mod utils; pub use group_display::GroupDisplay; pub use line_display::{LineDisplay, LineDisplayStyles}; use line_display_shared::*; +pub use utils::*; diff --git a/update-engine/src/display/utils.rs b/update-engine/src/display/utils.rs new file mode 100644 index 00000000000..08790f352b3 --- /dev/null +++ b/update-engine/src/display/utils.rs @@ -0,0 +1,108 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Utility displayers. + +use std::fmt; + +/// Given current and total, displays `{current}/{total}`. +/// +/// * If the `index_and_total` constructor is called, then `current` is `index +/// + 1`. +/// * If `padded` is `true`, `current` is right-aligned and padded with spaces +/// to the width of `total`. +/// +/// # Examples +/// +/// ``` +/// use update_engine::display::ProgressRatioDisplay; +/// +/// // 0-based index and total. +/// let display = ProgressRatioDisplay::index_and_total(0 as u64, 8 as u64); +/// assert_eq!(display.to_string(), "1/8"); +/// +/// // 1-based current and total. +/// let display = ProgressRatioDisplay::current_and_total(82 as u64, 230 as u64); +/// assert_eq!(display.to_string(), "82/230"); +/// +/// // With padding. +/// let display = display.padded(true); +/// assert_eq!(display.to_string(), " 82/230"); +/// ``` +#[derive(Debug)] +pub struct ProgressRatioDisplay { + current: u64, + total: u64, + padded: bool, +} + +impl ProgressRatioDisplay { + /// Create a new `ProgressRatioDisplay` with current and total values. + /// + /// `current` is considered to be 1-based. For example, "20/80 jobs done". + pub fn current_and_total(current: T, total: T) -> Self { + Self { current: current.to_u64(), total: total.to_u64(), padded: false } + } + + /// Create a new `ProgressRatioDisplay` with index and total values. + /// + /// The index is 0-based (i.e. 1 is added to it). For example, step index 0 + /// out of 8 total steps is shown as "1/8". + pub fn index_and_total(index: T, total: T) -> Self { + Self { + current: index + .to_u64() + .checked_add(1) + .expect("index can't be u64::MAX"), + total: total.to_u64(), + padded: false, + } + } + + /// If set to true, the current value is padded to the same width as the + /// total. + pub fn padded(self, padded: bool) -> Self { + Self { padded, ..self } + } +} + +impl fmt::Display for ProgressRatioDisplay { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.padded { + let width = self.total.to_string().len(); + write!(f, "{:>width$}/{}", self.current, self.total) + } else { + write!(f, "{}/{}", self.current, self.total) + } + } +} + +/// Trait that abstracts over `usize` and `u64`. +/// +/// There are no `From` implementations between `usize` and `u64`, but we +/// assert below that all the architectures we support are 64-bit. +pub trait ToU64 { + fn to_u64(self) -> u64; +} + +const _: () = { + assert!( + std::mem::size_of::() == std::mem::size_of::(), + "usize and u64 are the same size" + ); +}; + +impl ToU64 for usize { + #[inline] + fn to_u64(self) -> u64 { + self as u64 + } +} + +impl ToU64 for u64 { + #[inline] + fn to_u64(self) -> u64 { + self + } +} diff --git a/wicket-common/src/example.rs b/wicket-common/src/example.rs index bb70273b459..34af11e9061 100644 --- a/wicket-common/src/example.rs +++ b/wicket-common/src/example.rs @@ -12,7 +12,8 @@ use omicron_common::{ api::{ external::AllowedSourceIps, internal::shared::{ - BgpConfig, BgpPeerConfig, PortFec, PortSpeed, RouteConfig, + BgpConfig, BgpPeerConfig, LldpAdminStatus, LldpPortConfig, PortFec, + PortSpeed, RouteConfig, }, }, }; @@ -166,23 +167,45 @@ impl ExampleRackSetupData { vlan_id: None, }]; + let switch0_port0_lldp = Some(LldpPortConfig { + status: LldpAdminStatus::Enabled, + chassis_id: Some("chassid id override".to_string()), + port_id: Some("port id override".to_string()), + system_name: Some("system name override".to_string()), + system_description: Some("system description override".to_string()), + port_description: Some("port description override".to_string()), + management_addrs: None, + }); + + let switch1_port0_lldp = Some(LldpPortConfig { + status: LldpAdminStatus::Enabled, + chassis_id: Some("chassid id override".to_string()), + port_id: Some("port id override".to_string()), + system_name: Some("system name override".to_string()), + system_description: Some("system description override".to_string()), + port_description: Some("port description override".to_string()), + management_addrs: Some(vec!["172.32.0.4".parse().unwrap()]), + }); + let rack_network_config = UserSpecifiedRackNetworkConfig { infra_ip_first: "172.30.0.1".parse().unwrap(), infra_ip_last: "172.30.0.10".parse().unwrap(), switch0: btreemap! { "port0".to_owned() => UserSpecifiedPortConfig { - addresses: vec!["172.30.0.1/24".parse().unwrap()], - routes: vec![RouteConfig { + addresses: vec!["172.30.0.1/24".parse().unwrap()], + routes: vec![RouteConfig { destination: "0.0.0.0/0".parse().unwrap(), nexthop: "172.30.0.10".parse().unwrap(), vlan_id: Some(1), + local_pref: None, }], bgp_peers: switch0_port0_bgp_peers, uplink_port_speed: PortSpeed::Speed400G, uplink_port_fec: PortFec::Firecode, + lldp: switch0_port0_lldp, autoneg: true, }, - }, + }, switch1: btreemap! { // Use the same port name as in switch0 to test that it doesn't // collide. @@ -192,10 +215,12 @@ impl ExampleRackSetupData { destination: "0.0.0.0/0".parse().unwrap(), nexthop: "172.33.0.10".parse().unwrap(), vlan_id: Some(1), + local_pref: None, }], bgp_peers: switch1_port0_bgp_peers, uplink_port_speed: PortSpeed::Speed400G, uplink_port_fec: PortFec::Firecode, + lldp: switch1_port0_lldp, autoneg: true, }, }, diff --git a/wicket-common/src/rack_setup.rs b/wicket-common/src/rack_setup.rs index 7fd83e522a4..cb6b13422ba 100644 --- a/wicket-common/src/rack_setup.rs +++ b/wicket-common/src/rack_setup.rs @@ -11,6 +11,7 @@ use omicron_common::api::external::SwitchLocation; use omicron_common::api::internal::shared::AllowedSourceIps; use omicron_common::api::internal::shared::BgpConfig; use omicron_common::api::internal::shared::BgpPeerConfig; +use omicron_common::api::internal::shared::LldpPortConfig; use omicron_common::api::internal::shared::PortFec; use omicron_common::api::internal::shared::PortSpeed; use omicron_common::api::internal::shared::RouteConfig; @@ -185,6 +186,8 @@ pub struct UserSpecifiedPortConfig { pub autoneg: bool, #[serde(default)] pub bgp_peers: Vec, + #[serde(default)] + pub lldp: Option, } /// User-specified version of [`BgpPeerConfig`]. diff --git a/wicket/src/cli/rack_setup/config_toml.rs b/wicket/src/cli/rack_setup/config_toml.rs index 68485815a8c..17b31e77306 100644 --- a/wicket/src/cli/rack_setup/config_toml.rs +++ b/wicket/src/cli/rack_setup/config_toml.rs @@ -8,6 +8,7 @@ use omicron_common::address::IpRange; use omicron_common::api::external::AllowedSourceIps; use omicron_common::api::internal::shared::BgpConfig; +use omicron_common::api::internal::shared::LldpPortConfig; use omicron_common::api::internal::shared::RouteConfig; use omicron_common::api::internal::shared::UplinkAddressConfig; use serde::Serialize; @@ -320,6 +321,7 @@ fn populate_uplink_table(cfg: &UserSpecifiedPortConfig) -> Table { uplink_port_fec, autoneg, bgp_peers, + lldp, } = cfg; let mut uplink = Table::new(); @@ -327,13 +329,16 @@ fn populate_uplink_table(cfg: &UserSpecifiedPortConfig) -> Table { // routes = [] let mut routes_out = Array::new(); for r in routes { - let RouteConfig { destination, nexthop, vlan_id } = r; + let RouteConfig { destination, nexthop, vlan_id, local_pref } = r; let mut route = InlineTable::new(); route.insert("nexthop", string_value(nexthop)); route.insert("destination", string_value(destination)); if let Some(vlan_id) = vlan_id { route.insert("vlan_id", i64_value(i64::from(*vlan_id))); } + if let Some(local_pref) = local_pref { + route.insert("local_pref", i64_value(i64::from(*local_pref))); + } routes_out.push(Value::InlineTable(route)); } uplink.insert("routes", Item::Value(Value::Array(routes_out))); @@ -488,6 +493,46 @@ fn populate_uplink_table(cfg: &UserSpecifiedPortConfig) -> Table { uplink.insert("bgp_peers", Item::ArrayOfTables(peers)); + if let Some(l) = lldp { + let LldpPortConfig { + status, + chassis_id, + port_id, + system_name, + system_description, + port_description, + management_addrs, + } = l; + let mut lldp = Table::new(); + lldp.insert("status", string_item(status)); + if let Some(x) = chassis_id { + lldp.insert("chassis_id", string_item(x)); + } + if let Some(x) = port_id { + lldp.insert("port_id", string_item(x)); + } + if let Some(x) = system_name { + lldp.insert("system_name", string_item(x)); + } + if let Some(x) = system_description { + lldp.insert("system_description", string_item(x)); + } + if let Some(x) = port_description { + lldp.insert("port_description", string_item(x)); + } + if let Some(addrs) = management_addrs { + let mut addresses_out = Array::new(); + for a in addrs { + addresses_out.push(string_value(a)); + } + lldp.insert( + "management_addrs", + Item::Value(Value::Array(addresses_out)), + ); + } + uplink.insert("lldp", Item::Table(lldp)); + } + uplink } diff --git a/wicket/src/ui/panes/rack_setup.rs b/wicket/src/ui/panes/rack_setup.rs index 7bb63b6b1b7..cc6a2c5621a 100644 --- a/wicket/src/ui/panes/rack_setup.rs +++ b/wicket/src/ui/panes/rack_setup.rs @@ -21,6 +21,7 @@ use itertools::Itertools; use omicron_common::address::IpRange; use omicron_common::api::internal::shared::AllowedSourceIps; use omicron_common::api::internal::shared::BgpConfig; +use omicron_common::api::internal::shared::LldpPortConfig; use omicron_common::api::internal::shared::RouteConfig; use ratatui::layout::Constraint; use ratatui::layout::Direction; @@ -284,10 +285,29 @@ fn draw_rack_reset_popup( "Rack Reset (DESTRUCTIVE!)", style::header(true), )]); - let body = Text::from(vec![Line::from(vec![Span::styled( + let mut body = Text::from(vec![Line::from(vec![Span::styled( "Would you like to reset the rack to an uninitialized state?", style::plain_text(), )])]); + // One might see this warning and ask "why is this feature even + // here, then?" We do eventually want "rack reset" to work as a + // sort of factory reset, and the current implementation is a good + // starting point, so there's no sense in removing it (this is + // certainly not the only feature currently in this state). + // + // The warning is intended to remove the speed bump where someone + // has to find out the hard way that this doesn't work, without + // removing the speed bump where we're reminded of the feature that + // doesn't work yet. + body.lines.push(Line::from("")); + body.lines.push(Line::from(vec![ + Span::styled("WARNING: ", style::warning()), + Span::styled( + "This does not work yet and will leave the rack \ + in an unknown state (see omicron#3820)", + style::plain_text(), + ), + ])); let buttons = vec![ButtonText::new("Yes", "Y"), ButtonText::new("No", "N")]; @@ -740,6 +760,7 @@ fn rss_config_text<'a>( uplink_port_fec, autoneg, bgp_peers, + lldp, } = uplink; let mut items = vec![ @@ -771,7 +792,8 @@ fn rss_config_text<'a>( ]; let routes = routes.iter().map(|r| { - let RouteConfig { destination, nexthop, vlan_id } = r; + let RouteConfig { destination, nexthop, vlan_id, local_pref } = + r; let mut items = vec![ Span::styled(" • Route : ", label_style), @@ -787,6 +809,13 @@ fn rss_config_text<'a>( Span::styled(")", label_style), ]); } + if let Some(local_pref) = local_pref { + items.extend([ + Span::styled(" (local_pref=", label_style), + Span::styled(local_pref.to_string(), ok_style), + Span::styled(")", label_style), + ]); + } items }); @@ -1027,6 +1056,68 @@ fn rss_config_text<'a>( items.extend(addresses); items.extend(peers); + if let Some(lp) = lldp { + let LldpPortConfig { + status, + chassis_id, + port_id, + system_name, + system_description, + port_description, + management_addrs, + } = lp; + + let mut lldp = vec![ + vec![Span::styled(" • LLDP port settings: ", label_style)], + vec![ + Span::styled(" • Admin status : ", label_style), + Span::styled(status.to_string(), ok_style), + ], + ]; + + if let Some(c) = chassis_id { + lldp.push(vec![ + Span::styled(" • Chassis ID : ", label_style), + Span::styled(c.to_string(), ok_style), + ]) + } + if let Some(s) = system_name { + lldp.push(vec![ + Span::styled(" • System name : ", label_style), + Span::styled(s.to_string(), ok_style), + ]) + } + if let Some(s) = system_description { + lldp.push(vec![ + Span::styled(" • System description: ", label_style), + Span::styled(s.to_string(), ok_style), + ]) + } + if let Some(p) = port_id { + lldp.push(vec![ + Span::styled(" • Port ID : ", label_style), + Span::styled(p.to_string(), ok_style), + ]) + } + if let Some(p) = port_description { + lldp.push(vec![ + Span::styled(" • Port description : ", label_style), + Span::styled(p.to_string(), ok_style), + ]) + } + if let Some(addrs) = management_addrs { + let mut label = " • Management addrs : "; + for a in addrs { + lldp.push(vec![ + Span::styled(label, label_style), + Span::styled(a.to_string(), ok_style), + ]); + label = " : "; + } + } + items.extend(lldp); + } + append_list( &mut spans, Cow::from(format!("Uplink {}: ", i + 1)), diff --git a/wicket/src/ui/panes/update.rs b/wicket/src/ui/panes/update.rs index 3a61e25a3aa..de34391fccb 100644 --- a/wicket/src/ui/panes/update.rs +++ b/wicket/src/ui/panes/update.rs @@ -29,6 +29,7 @@ use ratatui::widgets::{ use ratatui::Frame; use slog::{info, o, Logger}; use tui_tree_widget::{Tree, TreeItem, TreeState}; +use update_engine::display::ProgressRatioDisplay; use update_engine::{ AbortReason, CompletionReason, ExecutionStatus, FailureReason, StepKey, TerminalKind, WillNotBeRunReason, @@ -1984,9 +1985,11 @@ impl ComponentUpdateListState { )); status_text.push(Span::styled( format!( - " (step {}/{})", - step_key.index + 1, - summary.total_steps, + " (step {})", + ProgressRatioDisplay::index_and_total( + step_key.index, + summary.total_steps, + ) ), style::plain_text(), )); @@ -2015,9 +2018,11 @@ impl ComponentUpdateListState { )); status_text.push(Span::styled( format!( - " at step {}/{}", - info.step_key.index + 1, - summary.total_steps, + " at step {}", + ProgressRatioDisplay::index_and_total( + info.step_key.index, + summary.total_steps, + ) ), style::plain_text(), )); @@ -2033,9 +2038,11 @@ impl ComponentUpdateListState { )); status_text.push(Span::styled( format!( - " at step {}/{}", - info.step_key.index + 1, - summary.total_steps, + " at step {}", + ProgressRatioDisplay::index_and_total( + info.step_key.index, + summary.total_steps, + ) ), style::plain_text(), )); diff --git a/wicket/tests/output/example_non_empty.toml b/wicket/tests/output/example_non_empty.toml index 717e940ca5a..fafb31048d4 100644 --- a/wicket/tests/output/example_non_empty.toml +++ b/wicket/tests/output/example_non_empty.toml @@ -111,6 +111,14 @@ allowed_export = [] local_pref = 80 enforce_first_as = true +[rack_network_config.switch0.port0.lldp] +status = "enabled" +chassis_id = "chassid id override" +port_id = "port id override" +system_name = "system name override" +system_description = "system description override" +port_description = "port description override" + [rack_network_config.switch1.port0] routes = [{ nexthop = "172.33.0.10", destination = "0.0.0.0/0", vlan_id = 1 }] addresses = [{ address = "172.32.0.1/24" }] @@ -131,6 +139,15 @@ auth_key_id = "bgp-key-1" allowed_import = ["224.0.0.0/4"] enforce_first_as = false +[rack_network_config.switch1.port0.lldp] +status = "enabled" +chassis_id = "chassid id override" +port_id = "port id override" +system_name = "system name override" +system_description = "system description override" +port_description = "port description override" +management_addrs = ["172.32.0.4"] + [[rack_network_config.bgp]] asn = 47 originate = ["10.0.0.0/16"] diff --git a/wicketd/Cargo.toml b/wicketd/Cargo.toml index 324ae01b42a..6e2c27a97ef 100644 --- a/wicketd/Cargo.toml +++ b/wicketd/Cargo.toml @@ -25,6 +25,7 @@ flume.workspace = true futures.workspace = true gateway-messages.workspace = true hex.workspace = true +hickory-resolver.workspace = true http.workspace = true hubtools.workspace = true hyper.workspace = true @@ -46,7 +47,6 @@ tokio-stream.workspace = true tokio-util.workspace = true toml.workspace = true tough.workspace = true -trust-dns-resolver.workspace = true uuid.workspace = true bootstrap-agent-client.workspace = true diff --git a/wicketd/src/http_entrypoints.rs b/wicketd/src/http_entrypoints.rs index 55b4d61c9a2..3f460f1e376 100644 --- a/wicketd/src/http_entrypoints.rs +++ b/wicketd/src/http_entrypoints.rs @@ -82,6 +82,7 @@ impl WicketdApi for WicketdApiImpl { config.update_with_inventory_and_bootstrap_peers( &inventory, &ctx.bootstrap_peers, + &ctx.log, ); Ok(HttpResponseOk((&*config).into())) @@ -101,6 +102,7 @@ impl WicketdApi for WicketdApiImpl { config.update_with_inventory_and_bootstrap_peers( &inventory, &ctx.bootstrap_peers, + &ctx.log, ); config .update(body.into_inner(), ctx.baseboard.as_ref()) diff --git a/wicketd/src/preflight_check/uplink.rs b/wicketd/src/preflight_check/uplink.rs index 36a4f617791..fb0914e8368 100644 --- a/wicketd/src/preflight_check/uplink.rs +++ b/wicketd/src/preflight_check/uplink.rs @@ -14,6 +14,11 @@ use dpd_client::types::PortSpeed as DpdPortSpeed; use dpd_client::Client as DpdClient; use dpd_client::ClientState as DpdClientState; use either::Either; +use hickory_resolver::config::NameServerConfigGroup; +use hickory_resolver::config::ResolverConfig; +use hickory_resolver::config::ResolverOpts; +use hickory_resolver::error::ResolveErrorKind; +use hickory_resolver::TokioAsyncResolver; use illumos_utils::zone::SVCCFG; use illumos_utils::PFEXEC; use omicron_common::address::DENDRITE_PORT; @@ -35,12 +40,6 @@ use std::time::Duration; use std::time::Instant; use tokio::process::Command; use tokio::sync::mpsc; -use trust_dns_resolver::config::NameServerConfigGroup; -use trust_dns_resolver::config::ResolverConfig; -use trust_dns_resolver::config::ResolverOpts; -use trust_dns_resolver::error::ResolveError; -use trust_dns_resolver::error::ResolveErrorKind; -use trust_dns_resolver::TokioAsyncResolver; use wicket_common::preflight_check::EventBuffer; use wicket_common::preflight_check::StepContext; use wicket_common::preflight_check::StepProgress; @@ -930,16 +929,7 @@ impl DnsLookupStep { }; 'dns_servers: for &dns_ip in dns_servers { - let resolver = match self.build_resolver(dns_ip) { - Ok(resolver) => resolver, - Err(err) => { - self.warnings.push(format!( - "failed to create resolver for {dns_ip}: {}", - DisplayErrorChain::new(&err) - )); - continue; - } - }; + let resolver = self.build_resolver(dns_ip); // Attempt to resolve any NTP servers that aren't IP addresses. for &ntp_name in &ntp_names_to_resolve { @@ -1052,14 +1042,18 @@ impl DnsLookupStep { ( "A", resolver.ipv4_lookup(name).await.map(|records| { - Either::Left(records.into_iter().map(IpAddr::V4)) + Either::Left( + records.into_iter().map(|x| IpAddr::V4(x.into())), + ) }), ) } else { ( "AAAA", resolver.ipv6_lookup(name).await.map(|records| { - Either::Right(records.into_iter().map(IpAddr::V6)) + Either::Right( + records.into_iter().map(|x| IpAddr::V6(x.into())), + ) }), ) }; @@ -1175,12 +1169,12 @@ impl DnsLookupStep { /// /// If building it fails, we'll append to our internal `warnings` and return /// `None`. - fn build_resolver( - &mut self, - dns_ip: IpAddr, - ) -> Result { + fn build_resolver(&mut self, dns_ip: IpAddr) -> TokioAsyncResolver { let mut options = ResolverOpts::default(); + // Enable edns for potentially larger records + options.edns0 = true; + // We will retry ourselves; we don't want the resolver // retrying internally too. options.attempts = 1; diff --git a/wicketd/src/rss_config.rs b/wicketd/src/rss_config.rs index c6f2dd58920..46ede25eaa0 100644 --- a/wicketd/src/rss_config.rs +++ b/wicketd/src/rss_config.rs @@ -26,6 +26,7 @@ use omicron_common::api::external::AllowedSourceIps; use omicron_common::api::external::SwitchLocation; use once_cell::sync::Lazy; use sled_hardware_types::Baseboard; +use slog::debug; use slog::warn; use std::collections::btree_map; use std::collections::BTreeMap; @@ -115,6 +116,7 @@ impl CurrentRssConfig { &mut self, inventory: &RackV1Inventory, bootstrap_peers: &BootstrapPeers, + log: &slog::Logger, ) { let bootstrap_sleds = bootstrap_peers.sleds(); @@ -126,7 +128,15 @@ impl CurrentRssConfig { return None; } - let state = sp.state.as_ref()?; + let Some(state) = sp.state.as_ref() else { + debug!( + log, + "in update_with_inventory_and_bootstrap_peers, \ + filtering out SP with no state"; + "sp" => ?sp, + ); + return None; + }; let baseboard = Baseboard::new_gimlet( state.serial_number.clone(), state.model.clone(), @@ -686,11 +696,14 @@ fn build_port_config( bgp_auth_keys: &BTreeMap>, ) -> BaPortConfigV2 { use bootstrap_agent_client::types::BgpPeerConfig as BaBgpPeerConfig; + use bootstrap_agent_client::types::LldpAdminStatus as BaLldpAdminStatus; + use bootstrap_agent_client::types::LldpPortConfig as BaLldpPortConfig; use bootstrap_agent_client::types::PortFec as BaPortFec; use bootstrap_agent_client::types::PortSpeed as BaPortSpeed; use bootstrap_agent_client::types::RouteConfig as BaRouteConfig; use bootstrap_agent_client::types::SwitchLocation as BaSwitchLocation; use bootstrap_agent_client::types::UplinkAddressConfig as BaUplinkAddressConfig; + use omicron_common::api::internal::shared::LldpAdminStatus; use omicron_common::api::internal::shared::PortFec; use omicron_common::api::internal::shared::PortSpeed; @@ -703,6 +716,7 @@ fn build_port_config( destination: r.destination, nexthop: r.nexthop, vlan_id: r.vlan_id, + local_pref: r.local_pref, }) .collect(), addresses: config @@ -779,6 +793,20 @@ fn build_port_config( PortFec::Rs => BaPortFec::Rs, }, autoneg: config.autoneg, + lldp: config.lldp.as_ref().map(|c| BaLldpPortConfig { + status: match c.status { + LldpAdminStatus::Enabled => BaLldpAdminStatus::Enabled, + LldpAdminStatus::Disabled => BaLldpAdminStatus::Disabled, + LldpAdminStatus::TxOnly => BaLldpAdminStatus::TxOnly, + LldpAdminStatus::RxOnly => BaLldpAdminStatus::RxOnly, + }, + chassis_id: c.chassis_id.clone(), + port_id: c.port_id.clone(), + system_name: c.system_name.clone(), + system_description: c.system_description.clone(), + port_description: c.port_description.clone(), + management_addrs: c.management_addrs.clone(), + }), } } diff --git a/wicketd/tests/integration_tests/inventory.rs b/wicketd/tests/integration_tests/inventory.rs index ed5ad22d5d5..c7057e3adcf 100644 --- a/wicketd/tests/integration_tests/inventory.rs +++ b/wicketd/tests/integration_tests/inventory.rs @@ -10,6 +10,7 @@ use super::setup::WicketdTestContext; use gateway_messages::SpPort; use gateway_test_utils::setup as gateway_setup; use sled_hardware_types::Baseboard; +use slog::{info, warn}; use wicket::OutputKind; use wicket_common::inventory::{SpIdentifier, SpType}; use wicket_common::rack_setup::BootstrapSledDescription; @@ -32,13 +33,29 @@ async fn test_inventory() { .into_inner(); match response { GetInventoryResponse::Response { inventory, .. } => { - break inventory - } - GetInventoryResponse::Unavailable => { - // Keep polling wicketd until it receives its first results from MGS. - tokio::time::sleep(Duration::from_millis(100)).await; + // Ensure that the SP state is populated -- if it's not, + // then the `configured-bootstrap-sleds` command below + // might return an empty list. + let sp_state_none: Vec<_> = inventory + .sps + .iter() + .filter(|sp| sp.state.is_none()) + .collect(); + if sp_state_none.is_empty() { + break inventory; + } + + warn!( + wicketd_testctx.log(), + "SP state not yet populated for some SPs, retrying"; + "sps" => ?sp_state_none + ) } + GetInventoryResponse::Unavailable => {} } + + // Keep polling wicketd until it receives its first results from MGS. + tokio::time::sleep(Duration::from_millis(100)).await; } }; let inventory = @@ -46,6 +63,8 @@ async fn test_inventory() { .await .expect("get_inventory completed within 10 seconds"); + info!(wicketd_testctx.log(), "inventory returned"; "inventory" => ?inventory); + // 4 SPs attached to the inventory. assert_eq!(inventory.sps.len(), 4); @@ -70,17 +89,17 @@ async fn test_inventory() { serde_json::from_slice(&stdout).expect("stdout is valid JSON"); // This only tests the case that we get sleds back with no current - // bootstrap IP. This does provide svalue: it check that the command - // exists, accesses data within wicket, and returns it in the schema we - // expect. But it does not test the case where a sled does have a - // bootstrap IP. + // bootstrap IP. This does provide some value: it checks that the + // command exists, accesses data within wicket, and returns it in the + // schema we expect. But it does not test the case where a sled does + // have a bootstrap IP. // // Unfortunately, that's a difficult thing to test today. Wicket gets // that information by enumerating the IPs on the bootstrap network and // reaching out to the bootstrap_agent on them directly to ask them who // they are. Our testing setup does not have a way to provide such an // IP, or run a bootstrap_agent on an IP to respond. We should update - // this test when we do have that capabilitiy. + // this test when we do have that capability. assert_eq!( response, vec![ diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 854a0201678..ab1f8b971ea 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -22,17 +22,17 @@ aho-corasick = { version = "1.1.3" } anyhow = { version = "1.0.86", features = ["backtrace"] } base16ct = { version = "0.2.0", default-features = false, features = ["alloc"] } base64 = { version = "0.22.1" } +base64ct = { version = "1.6.0", default-features = false, features = ["std"] } bit-set = { version = "0.5.3" } bit-vec = { version = "0.6.3" } bitflags-dff4ba8e3ae991db = { package = "bitflags", version = "1.3.2" } bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.6.0", default-features = false, features = ["serde", "std"] } -bstr-6f8ce4dd05d13bba = { package = "bstr", version = "0.2.17" } -bstr-dff4ba8e3ae991db = { package = "bstr", version = "1.9.1" } +bstr = { version = "1.9.1" } byteorder = { version = "1.5.0" } bytes = { version = "1.7.1", features = ["serde"] } chrono = { version = "0.4.38", features = ["serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } -clap = { version = "4.5.15", features = ["cargo", "derive", "env", "wrap_help"] } +clap = { version = "4.5.16", features = ["cargo", "derive", "env", "wrap_help"] } clap_builder = { version = "4.5.15", default-features = false, features = ["cargo", "color", "env", "std", "suggestions", "usage", "wrap_help"] } console = { version = "0.15.8" } const-oid = { version = "0.9.6", default-features = false, features = ["db", "std"] } @@ -59,20 +59,21 @@ getrandom = { version = "0.2.14", default-features = false, features = ["js", "r group = { version = "0.13.0", default-features = false, features = ["alloc"] } hashbrown = { version = "0.14.5", features = ["raw"] } hex = { version = "0.4.3", features = ["serde"] } +hickory-proto = { version = "0.24.1", features = ["text-parsing"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "0.14.30", features = ["full"] } -indexmap = { version = "2.3.0", features = ["serde"] } +indexmap = { version = "2.4.0", features = ["serde"] } inout = { version = "0.1.3", default-features = false, features = ["std"] } itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12.1" } itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10.5" } lalrpop-util = { version = "0.19.12" } lazy_static = { version = "1.5.0", default-features = false, features = ["spin_no_std"] } -libc = { version = "0.2.155", features = ["extra_traits"] } +libc = { version = "0.2.158", features = ["extra_traits"] } log = { version = "0.4.21", default-features = false, features = ["std"] } managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] } memchr = { version = "2.7.2" } nom = { version = "7.1.3" } -num-bigint = { version = "0.4.5", features = ["rand"] } +num-bigint-dig = { version = "0.8.4", default-features = false, features = ["i128", "prime", "serde", "u64_digit", "zeroize"] } num-integer = { version = "0.1.46", features = ["i128"] } num-iter = { version = "0.1.45", default-features = false, features = ["i128"] } num-traits = { version = "0.2.19", features = ["i128", "libm"] } @@ -80,21 +81,25 @@ openapiv3 = { version = "2.0.0", default-features = false, features = ["skip_ser peg-runtime = { version = "0.8.3", default-features = false, features = ["std"] } pem-rfc7468 = { version = "0.7.0", default-features = false, features = ["std"] } petgraph = { version = "0.6.5", features = ["serde-1"] } +pkcs8 = { version = "0.10.2", default-features = false, features = ["encryption", "pem", "std"] } postgres-types = { version = "0.2.7", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } predicates = { version = "3.1.2" } proc-macro2 = { version = "1.0.86" } +quote = { version = "1.0.36" } regex = { version = "1.10.6" } regex-automata = { version = "0.4.6", default-features = false, features = ["dfa", "hybrid", "meta", "nfa", "perf", "unicode"] } regex-syntax = { version = "0.8.4" } reqwest = { version = "0.11.27", features = ["blocking", "cookies", "json", "rustls-tls", "stream"] } ring = { version = "0.17.8", features = ["std"] } +rsa = { version = "0.9.6", features = ["serde", "sha2"] } schemars = { version = "0.8.21", features = ["bytes", "chrono", "uuid1"] } scopeguard = { version = "1.2.0" } semver = { version = "1.0.23", features = ["serde"] } -serde = { version = "1.0.207", features = ["alloc", "derive", "rc"] } -serde_json = { version = "1.0.124", features = ["raw_value", "unbounded_depth"] } +serde = { version = "1.0.208", features = ["alloc", "derive", "rc"] } +serde_json = { version = "1.0.125", features = ["raw_value", "unbounded_depth"] } +sha1 = { version = "0.10.6", features = ["oid"] } sha2 = { version = "0.10.8", features = ["oid"] } -similar = { version = "2.5.0", features = ["bytes", "inline", "unicode"] } +similar = { version = "2.6.0", features = ["bytes", "inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } smallvec = { version = "1.13.2", default-features = false, features = ["const_new"] } spin = { version = "0.9.8" } @@ -102,20 +107,19 @@ string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.74", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.36", features = ["formatting", "local-offset", "macros", "parsing"] } -tokio = { version = "1.38.1", features = ["full", "test-util"] } +tokio = { version = "1.39.3", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.11", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } -tokio-stream = { version = "0.1.15", features = ["net"] } +tokio-stream = { version = "0.1.15", features = ["net", "sync"] } tokio-util = { version = "0.7.11", features = ["codec", "io-util"] } toml = { version = "0.7.8" } +toml_datetime = { version = "0.6.8", default-features = false, features = ["serde"] } toml_edit-3c51e837cfc5589a = { package = "toml_edit", version = "0.22.20", features = ["serde"] } tracing = { version = "0.1.40", features = ["log"] } -trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.15" } unicode-normalization = { version = "0.1.23" } usdt = { version = "0.5.0" } usdt-impl = { version = "0.5.0", default-features = false, features = ["asm", "des"] } uuid = { version = "1.10.0", features = ["serde", "v4"] } -yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } zerocopy = { version = "0.7.34", features = ["derive", "simd"] } zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } @@ -125,17 +129,18 @@ aho-corasick = { version = "1.1.3" } anyhow = { version = "1.0.86", features = ["backtrace"] } base16ct = { version = "0.2.0", default-features = false, features = ["alloc"] } base64 = { version = "0.22.1" } +base64ct = { version = "1.6.0", default-features = false, features = ["std"] } bit-set = { version = "0.5.3" } bit-vec = { version = "0.6.3" } bitflags-dff4ba8e3ae991db = { package = "bitflags", version = "1.3.2" } bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.6.0", default-features = false, features = ["serde", "std"] } -bstr-6f8ce4dd05d13bba = { package = "bstr", version = "0.2.17" } -bstr-dff4ba8e3ae991db = { package = "bstr", version = "1.9.1" } +bstr = { version = "1.9.1" } byteorder = { version = "1.5.0" } bytes = { version = "1.7.1", features = ["serde"] } +cc = { version = "1.0.97", default-features = false, features = ["parallel"] } chrono = { version = "0.4.38", features = ["serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } -clap = { version = "4.5.15", features = ["cargo", "derive", "env", "wrap_help"] } +clap = { version = "4.5.16", features = ["cargo", "derive", "env", "wrap_help"] } clap_builder = { version = "4.5.15", default-features = false, features = ["cargo", "color", "env", "std", "suggestions", "usage", "wrap_help"] } console = { version = "0.15.8" } const-oid = { version = "0.9.6", default-features = false, features = ["db", "std"] } @@ -162,20 +167,21 @@ getrandom = { version = "0.2.14", default-features = false, features = ["js", "r group = { version = "0.13.0", default-features = false, features = ["alloc"] } hashbrown = { version = "0.14.5", features = ["raw"] } hex = { version = "0.4.3", features = ["serde"] } +hickory-proto = { version = "0.24.1", features = ["text-parsing"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "0.14.30", features = ["full"] } -indexmap = { version = "2.3.0", features = ["serde"] } +indexmap = { version = "2.4.0", features = ["serde"] } inout = { version = "0.1.3", default-features = false, features = ["std"] } itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12.1" } itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10.5" } lalrpop-util = { version = "0.19.12" } lazy_static = { version = "1.5.0", default-features = false, features = ["spin_no_std"] } -libc = { version = "0.2.155", features = ["extra_traits"] } +libc = { version = "0.2.158", features = ["extra_traits"] } log = { version = "0.4.21", default-features = false, features = ["std"] } managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] } memchr = { version = "2.7.2" } nom = { version = "7.1.3" } -num-bigint = { version = "0.4.5", features = ["rand"] } +num-bigint-dig = { version = "0.8.4", default-features = false, features = ["i128", "prime", "serde", "u64_digit", "zeroize"] } num-integer = { version = "0.1.46", features = ["i128"] } num-iter = { version = "0.1.45", default-features = false, features = ["i128"] } num-traits = { version = "0.2.19", features = ["i128", "libm"] } @@ -183,21 +189,25 @@ openapiv3 = { version = "2.0.0", default-features = false, features = ["skip_ser peg-runtime = { version = "0.8.3", default-features = false, features = ["std"] } pem-rfc7468 = { version = "0.7.0", default-features = false, features = ["std"] } petgraph = { version = "0.6.5", features = ["serde-1"] } +pkcs8 = { version = "0.10.2", default-features = false, features = ["encryption", "pem", "std"] } postgres-types = { version = "0.2.7", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } predicates = { version = "3.1.2" } proc-macro2 = { version = "1.0.86" } +quote = { version = "1.0.36" } regex = { version = "1.10.6" } regex-automata = { version = "0.4.6", default-features = false, features = ["dfa", "hybrid", "meta", "nfa", "perf", "unicode"] } regex-syntax = { version = "0.8.4" } reqwest = { version = "0.11.27", features = ["blocking", "cookies", "json", "rustls-tls", "stream"] } ring = { version = "0.17.8", features = ["std"] } +rsa = { version = "0.9.6", features = ["serde", "sha2"] } schemars = { version = "0.8.21", features = ["bytes", "chrono", "uuid1"] } scopeguard = { version = "1.2.0" } semver = { version = "1.0.23", features = ["serde"] } -serde = { version = "1.0.207", features = ["alloc", "derive", "rc"] } -serde_json = { version = "1.0.124", features = ["raw_value", "unbounded_depth"] } +serde = { version = "1.0.208", features = ["alloc", "derive", "rc"] } +serde_json = { version = "1.0.125", features = ["raw_value", "unbounded_depth"] } +sha1 = { version = "0.10.6", features = ["oid"] } sha2 = { version = "0.10.8", features = ["oid"] } -similar = { version = "2.5.0", features = ["bytes", "inline", "unicode"] } +similar = { version = "2.6.0", features = ["bytes", "inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } smallvec = { version = "1.13.2", default-features = false, features = ["const_new"] } spin = { version = "0.9.8" } @@ -207,28 +217,28 @@ syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extr syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.74", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.36", features = ["formatting", "local-offset", "macros", "parsing"] } time-macros = { version = "0.2.18", default-features = false, features = ["formatting", "parsing"] } -tokio = { version = "1.38.1", features = ["full", "test-util"] } +tokio = { version = "1.39.3", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.11", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } -tokio-stream = { version = "0.1.15", features = ["net"] } +tokio-stream = { version = "0.1.15", features = ["net", "sync"] } tokio-util = { version = "0.7.11", features = ["codec", "io-util"] } toml = { version = "0.7.8" } +toml_datetime = { version = "0.6.8", default-features = false, features = ["serde"] } toml_edit-3c51e837cfc5589a = { package = "toml_edit", version = "0.22.20", features = ["serde"] } tracing = { version = "0.1.40", features = ["log"] } -trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.15" } unicode-normalization = { version = "0.1.23" } unicode-xid = { version = "0.2.4" } usdt = { version = "0.5.0" } usdt-impl = { version = "0.5.0", default-features = false, features = ["asm", "des"] } uuid = { version = "1.10.0", features = ["serde", "v4"] } -yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } zerocopy = { version = "0.7.34", features = ["derive", "simd"] } zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } [target.x86_64-unknown-linux-gnu.dependencies] dof = { version = "0.3.0", default-features = false, features = ["des"] } linux-raw-sys = { version = "0.4.13", default-features = false, features = ["elf", "errno", "general", "ioctl", "no_std", "std", "system"] } -mio = { version = "0.8.11", features = ["net", "os-ext"] } +mio = { version = "1.0.2", features = ["net", "os-ext"] } +nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } @@ -236,51 +246,56 @@ signal-hook-mio = { version = "0.2.4", default-features = false, features = ["su [target.x86_64-unknown-linux-gnu.build-dependencies] dof = { version = "0.3.0", default-features = false, features = ["des"] } linux-raw-sys = { version = "0.4.13", default-features = false, features = ["elf", "errno", "general", "ioctl", "no_std", "std", "system"] } -mio = { version = "0.8.11", features = ["net", "os-ext"] } +mio = { version = "1.0.2", features = ["net", "os-ext"] } +nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } [target.x86_64-apple-darwin.dependencies] -mio = { version = "0.8.11", features = ["net", "os-ext"] } +mio = { version = "1.0.2", features = ["net", "os-ext"] } +nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } [target.x86_64-apple-darwin.build-dependencies] -mio = { version = "0.8.11", features = ["net", "os-ext"] } +mio = { version = "1.0.2", features = ["net", "os-ext"] } +nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } [target.aarch64-apple-darwin.dependencies] -mio = { version = "0.8.11", features = ["net", "os-ext"] } +mio = { version = "1.0.2", features = ["net", "os-ext"] } +nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } [target.aarch64-apple-darwin.build-dependencies] -mio = { version = "0.8.11", features = ["net", "os-ext"] } +mio = { version = "1.0.2", features = ["net", "os-ext"] } +nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } [target.x86_64-unknown-illumos.dependencies] dof = { version = "0.3.0", default-features = false, features = ["des"] } -mio = { version = "0.8.11", features = ["net", "os-ext"] } +mio = { version = "1.0.2", features = ["net", "os-ext"] } +nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } -toml_datetime = { version = "0.6.8", default-features = false, features = ["serde"] } toml_edit-cdcf2f9584511fe6 = { package = "toml_edit", version = "0.19.15", features = ["serde"] } [target.x86_64-unknown-illumos.build-dependencies] dof = { version = "0.3.0", default-features = false, features = ["des"] } -mio = { version = "0.8.11", features = ["net", "os-ext"] } +mio = { version = "1.0.2", features = ["net", "os-ext"] } +nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } -toml_datetime = { version = "0.6.8", default-features = false, features = ["serde"] } toml_edit-cdcf2f9584511fe6 = { package = "toml_edit", version = "0.19.15", features = ["serde"] } ### END HAKARI SECTION