diff --git a/.github/ISSUE_TEMPLATE/test-flake-from-buildomat.md b/.github/ISSUE_TEMPLATE/test-flake-from-buildomat.md new file mode 100644 index 0000000000..eb1ac2c6e9 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/test-flake-from-buildomat.md @@ -0,0 +1,65 @@ +--- +name: Test flake from buildomat +about: Report a test failure from a CI run either on "main" or on a PR where you don't + think the PR changes caused the failure +title: 'test failed in CI: NAME_OF_TEST' +labels: Test Flake +assignees: '' + +--- + + + +This test failed on a CI run on **"main" (or pull request XXX)**: + + Link here to the GitHub page showing the test failure. + If it's from a PR, this might look like: + https://github.com/oxidecomputer/omicron/pull/4588/checks?check_run_id=19198066410 + It could also be a link to a failure on "main", which would look like: + https://github.com/oxidecomputer/omicron/runs/20589829185 + This is useful because it shows which commit failed and all the surrounding context. + +Log showing the specific test failure: + + + Link here to the specific line of output from the buildomat log showing the failure: + https://buildomat.eng.oxide.computer/wg/0/details/01HGH32FQYKZJNX9J62HNABKPA/31C5jyox8tyHUIuDDevKkXlDZCyNw143z4nOq8wLl3xtjKzT/01HGH32V3P0HH6B56S46AJAT63#S4455 + This is useful because it shows all the details about the test failure. + +Excerpt from the log showing the failure: + +``` +Paste here an excerpt from the log. +This is redundant with the log above but helps people searching for the error message +or test name. It also works if the link above becomes unavailable. +Here's an example: + +------ + +failures: + integration_tests::updates::test_update_races + +test result: FAILED. 0 passed; 1 failed; 0 ignored; 0 measured; 4 filtered out; finished in 4.84s + + +--- STDERR: wicketd::mod integration_tests::updates::test_update_races --- +log file: /var/tmp/omicron_tmp/mod-ae2eb84a30e4213e-test_artifact_upload_while_updating.14133.0.log +note: configured to log to "/var/tmp/omicron_tmp/mod-ae2eb84a30e4213e-test_artifact_upload_while_updating.14133.0.log" +hint: Generated a random key: +hint: +hint: ed25519:826a8f799d4cc767158c990a60f721215bfd71f8f94fa88ba1960037bd6e5554 +hint: +hint: To modify this repository, you will need this key. Use the -k/--key +hint: command line flag or the TUFACEOUS_KEY environment variable: +hint: +hint: export TUFACEOUS_KEY=ed25519:826a8f799d4cc767158c990a60f721215bfd71f8f94fa88ba1960037bd6e5554 +hint: +hint: To prevent this default behavior, use --no-generate-key. +thread 'integration_tests::updates::test_update_races' panicked at wicketd/tests/integration_tests/updates.rs:482:41: +at least one event +stack backtrace: +... +``` diff --git a/.github/ISSUE_TEMPLATE/test-flake-from-local-failure.md b/.github/ISSUE_TEMPLATE/test-flake-from-local-failure.md new file mode 100644 index 0000000000..e963c83926 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/test-flake-from-local-failure.md @@ -0,0 +1,42 @@ +--- +name: Test flake from local failure +about: Report a test failure that happened locally (not CI) that you believe is not + related to local changes +title: 'test failure: TEST_NAME' +labels: Test Flake +assignees: '' + +--- + +On branch **BRANCH** commit **COMMIT**, I saw this test failure: + +``` +Include the trimmed, relevant output from `cargo nextest`. Here's an example: + +------- +failures: + integration_tests::updates::test_update_races + +test result: FAILED. 0 passed; 1 failed; 0 ignored; 0 measured; 4 filtered out; finished in 4.84s + + +--- STDERR: wicketd::mod integration_tests::updates::test_update_races --- +log file: /var/tmp/omicron_tmp/mod-ae2eb84a30e4213e-test_artifact_upload_while_updating.14133.0.log +note: configured to log to "/var/tmp/omicron_tmp/mod-ae2eb84a30e4213e-test_artifact_upload_while_updating.14133.0.log" +hint: Generated a random key: +hint: +hint: ed25519:826a8f799d4cc767158c990a60f721215bfd71f8f94fa88ba1960037bd6e5554 +hint: +hint: To modify this repository, you will need this key. Use the -k/--key +hint: command line flag or the TUFACEOUS_KEY environment variable: +hint: +hint: export TUFACEOUS_KEY=ed25519:826a8f799d4cc767158c990a60f721215bfd71f8f94fa88ba1960037bd6e5554 +hint: +hint: To prevent this default behavior, use --no-generate-key. +thread 'integration_tests::updates::test_update_races' panicked at wicketd/tests/integration_tests/updates.rs:482:41: +at least one event +stack backtrace: +... +``` + +**NOTE: Consider attaching any log files produced by the test.** diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index ddc4ffc021..63752880d6 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@4f13fb62448d53782828736cd5b0fd395b5f0c06 # v2 + uses: taiki-e/install-action@37129d5de13e9122cce55a7a5e7e49981cef514c # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date diff --git a/Cargo.lock b/Cargo.lock index 544097d1e5..249b7c5cea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -815,9 +815,9 @@ dependencies = [ [[package]] name = "camino" -version = "1.1.7" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0ec6b951b160caa93cc0c7b209e5a3bff7aae9062213451ac99493cd844c239" +checksum = "8b96ec4966b5813e2c0507c1f86115c8c5abaadc3980879c3424042a02fd1ad3" dependencies = [ "serde", ] @@ -1074,9 +1074,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.15" +version = "4.5.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11d8838454fda655dafd3accb2b6e2bea645b9e4078abe84a22ceb947235c5cc" +checksum = "ed6719fffa43d0d87e5fd8caeab59be1554fb028cd30edc88fc4369b17971019" dependencies = [ "clap_builder", "clap_derive", @@ -1786,7 +1786,7 @@ dependencies = [ [[package]] name = "ddm-admin-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/maghemite?rev=220dd026e83142b83bd93123f465a64dd4600201#220dd026e83142b83bd93123f465a64dd4600201" +source = "git+https://github.com/oxidecomputer/maghemite?rev=9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42#9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42" dependencies = [ "oxnet", "percent-encoding", @@ -2106,6 +2106,10 @@ dependencies = [ "dns-service-client", "dropshot", "expectorate", + "hickory-client", + "hickory-proto", + "hickory-resolver", + "hickory-server", "http 0.2.12", "omicron-test-utils", "omicron-workspace-hack", @@ -2125,10 +2129,6 @@ dependencies = [ "thiserror", "tokio", "toml 0.8.19", - "trust-dns-client", - "trust-dns-proto", - "trust-dns-resolver", - "trust-dns-server", "uuid", ] @@ -2213,7 +2213,7 @@ dependencies = [ [[package]] name = "dropshot" version = "0.10.2-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#52d900a470b8f08eddf021813470b2a9194f2cc0" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#06c8dab40e28d313f8bb0e15e1027eeace3bce89" dependencies = [ "async-stream", "async-trait", @@ -2259,7 +2259,7 @@ dependencies = [ [[package]] name = "dropshot_endpoint" version = "0.10.2-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#52d900a470b8f08eddf021813470b2a9194f2cc0" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#06c8dab40e28d313f8bb0e15e1027eeace3bce89" dependencies = [ "heck 0.5.0", "proc-macro2", @@ -2408,6 +2408,7 @@ dependencies = [ "clap", "colored", "dhcproto", + "hickory-resolver", "http 0.2.12", "humantime", "hyper 0.14.30", @@ -2428,7 +2429,6 @@ dependencies = [ "socket2 0.5.7", "tokio", "toml 0.8.19", - "trust-dns-resolver", "uuid", ] @@ -2450,6 +2450,18 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "enum-as-inner" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ffccbb6966c05b32ef8fbac435df276c4ae4d3dc55a8cd0eb9745e6c12f546a" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "syn 2.0.74", +] + [[package]] name = "env_logger" version = "0.9.3" @@ -3318,6 +3330,90 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6fe2267d4ed49bc07b63801559be28c718ea06c4738b7a03c94df7386d2cde46" +[[package]] +name = "hickory-client" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bab9683b08d8f8957a857b0236455d80e1886eaa8c6178af556aa7871fb61b55" +dependencies = [ + "cfg-if", + "data-encoding", + "futures-channel", + "futures-util", + "hickory-proto", + "once_cell", + "radix_trie", + "rand", + "thiserror", + "tokio", + "tracing", +] + +[[package]] +name = "hickory-proto" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07698b8420e2f0d6447a436ba999ec85d8fbf2a398bbd737b82cac4a2e96e512" +dependencies = [ + "async-trait", + "cfg-if", + "data-encoding", + "enum-as-inner 0.6.0", + "futures-channel", + "futures-io", + "futures-util", + "idna 0.4.0", + "ipnet", + "once_cell", + "rand", + "thiserror", + "tinyvec", + "tokio", + "tracing", + "url", +] + +[[package]] +name = "hickory-resolver" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28757f23aa75c98f254cf0405e6d8c25b831b32921b050a66692427679b1f243" +dependencies = [ + "cfg-if", + "futures-util", + "hickory-proto", + "ipconfig", + "lru-cache", + "once_cell", + "parking_lot 0.12.2", + "rand", + "resolv-conf", + "smallvec 1.13.2", + "thiserror", + "tokio", + "tracing", +] + +[[package]] +name = "hickory-server" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9be0e43c556b9b3fdb6c7c71a9a32153a2275d02419e3de809e520bfcfe40c37" +dependencies = [ + "async-trait", + "bytes", + "cfg-if", + "enum-as-inner 0.6.0", + "futures-util", + "hickory-proto", + "serde", + "thiserror", + "time", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "highway" version = "1.2.0" @@ -3520,7 +3616,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2 0.5.7", + "socket2 0.4.10", "tokio", "tower-service", "tracing", @@ -3681,6 +3777,16 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "idna" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + [[package]] name = "idna" version = "0.5.0" @@ -3704,7 +3810,7 @@ dependencies = [ [[package]] name = "illumos-sys-hdrs" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d#3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d" +source = "git+https://github.com/oxidecomputer/opte?rev=76878de67229ea113d70503c441eab47ac5dc653#76878de67229ea113d70503c441eab47ac5dc653" [[package]] name = "illumos-utils" @@ -3947,6 +4053,7 @@ dependencies = [ "dropshot", "expectorate", "futures", + "hickory-resolver", "hyper 0.14.30", "omicron-common", "omicron-test-utils", @@ -3961,7 +4068,6 @@ dependencies = [ "tempfile", "thiserror", "tokio", - "trust-dns-resolver", "uuid", ] @@ -3972,12 +4078,12 @@ dependencies = [ "anyhow", "clap", "dropshot", + "hickory-resolver", "internal-dns", "omicron-common", "omicron-workspace-hack", "slog", "tokio", - "trust-dns-resolver", ] [[package]] @@ -4140,7 +4246,7 @@ dependencies = [ [[package]] name = "kstat-macro" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d#3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d" +source = "git+https://github.com/oxidecomputer/opte?rev=76878de67229ea113d70503c441eab47ac5dc653#76878de67229ea113d70503c441eab47ac5dc653" dependencies = [ "quote", "syn 2.0.74", @@ -4201,9 +4307,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.155" +version = "0.2.156" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +checksum = "a5f43f184355eefb8d17fc948dbecf6c13be3c141f20d834ae842193a448c72a" [[package]] name = "libdlpi-sys" @@ -4591,7 +4697,7 @@ dependencies = [ [[package]] name = "mg-admin-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/maghemite?rev=220dd026e83142b83bd93123f465a64dd4600201#220dd026e83142b83bd93123f465a64dd4600201" +source = "git+https://github.com/oxidecomputer/maghemite?rev=9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42#9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42" dependencies = [ "anyhow", "chrono", @@ -4614,6 +4720,7 @@ dependencies = [ "gateway-messages", "gateway-test-utils", "libc", + "omicron-gateway", "omicron-workspace-hack", "signal-hook-tokio", "tokio", @@ -5139,6 +5246,7 @@ dependencies = [ "httptest", "internal-dns", "ipnet", + "newtype-uuid", "nexus-config", "nexus-db-model", "nexus-db-queries", @@ -5292,6 +5400,7 @@ dependencies = [ "gateway-messages", "gateway-test-utils", "headers", + "hickory-resolver", "http 0.2.12", "hyper 0.14.30", "illumos-utils", @@ -5318,7 +5427,6 @@ dependencies = [ "slog", "tokio", "tokio-util", - "trust-dns-resolver", "uuid", ] @@ -5337,17 +5445,22 @@ version = "0.1.0" dependencies = [ "anyhow", "api_identity", + "async-trait", "base64 0.22.1", "chrono", "clap", + "cookie 0.18.1", "derive-where", "derive_more", "dns-service-client", + "dropshot", "futures", "gateway-client", + "http 0.2.12", "humantime", "ipnetwork", "newtype-uuid", + "newtype_derive", "nexus-sled-agent-shared", "omicron-common", "omicron-passwords", @@ -5355,6 +5468,7 @@ dependencies = [ "omicron-workspace-hack", "openssl", "oxnet", + "oxql-types", "parse-display", "proptest", "schemars", @@ -5567,7 +5681,7 @@ version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcbff9bc912032c62bf65ef1d5aea88983b420f4f839db1e9b0c281a25c9c799" dependencies = [ - "proc-macro-crate", + "proc-macro-crate 1.3.1", "proc-macro2", "quote", "syn 1.0.109", @@ -5849,6 +5963,7 @@ dependencies = [ "anyhow", "base64 0.22.1", "camino", + "chrono", "clap", "dropshot", "expectorate", @@ -5867,6 +5982,8 @@ dependencies = [ "omicron-test-utils", "omicron-workspace-hack", "once_cell", + "oximeter", + "oximeter-producer", "schemars", "serde", "serde_json", @@ -5920,6 +6037,7 @@ dependencies = [ "gateway-test-utils", "headers", "hex", + "hickory-resolver", "http 0.2.12", "httptest", "hubtools", @@ -5970,6 +6088,7 @@ dependencies = [ "oximeter-instruments", "oximeter-producer", "oxnet", + "oxql-types", "parse-display", "paste", "pem", @@ -6012,7 +6131,6 @@ dependencies = [ "tokio-postgres", "tokio-util", "tough", - "trust-dns-resolver", "tufaceous", "tufaceous-lib", "update-common", @@ -6040,6 +6158,7 @@ dependencies = [ "gateway-client", "gateway-messages", "gateway-test-utils", + "http 0.2.12", "humantime", "indicatif", "internal-dns", @@ -6333,7 +6452,6 @@ dependencies = [ "bit-vec", "bitflags 1.3.2", "bitflags 2.6.0", - "bstr 0.2.17", "bstr 1.9.1", "byteorder", "bytes", @@ -6368,6 +6486,7 @@ dependencies = [ "group", "hashbrown 0.14.5", "hex", + "hickory-proto", "hmac", "hyper 0.14.30", "indexmap 2.4.0", @@ -6397,6 +6516,7 @@ dependencies = [ "postgres-types", "predicates", "proc-macro2", + "quote", "regex", "regex-automata 0.4.6", "regex-syntax 0.8.4", @@ -6415,6 +6535,7 @@ dependencies = [ "similar", "slog", "smallvec 1.13.2", + "socket2 0.5.7", "spin 0.9.8", "string_cache", "subtle", @@ -6431,7 +6552,6 @@ dependencies = [ "toml_edit 0.19.15", "toml_edit 0.22.20", "tracing", - "trust-dns-proto", "unicode-bidi", "unicode-normalization", "unicode-xid", @@ -6524,6 +6644,7 @@ dependencies = [ "nexus-internal-api", "omicron-workspace-hack", "openapi-lint", + "openapi-manager-types", "openapiv3", "owo-colors", "oximeter-api", @@ -6534,6 +6655,15 @@ dependencies = [ "wicketd-api", ] +[[package]] +name = "openapi-manager-types" +version = "0.1.0" +dependencies = [ + "anyhow", + "camino", + "omicron-workspace-hack", +] + [[package]] name = "openapiv3" version = "2.0.0" @@ -6592,7 +6722,7 @@ dependencies = [ [[package]] name = "opte" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d#3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d" +source = "git+https://github.com/oxidecomputer/opte?rev=76878de67229ea113d70503c441eab47ac5dc653#76878de67229ea113d70503c441eab47ac5dc653" dependencies = [ "cfg-if", "dyn-clone", @@ -6609,7 +6739,7 @@ dependencies = [ [[package]] name = "opte-api" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d#3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d" +source = "git+https://github.com/oxidecomputer/opte?rev=76878de67229ea113d70503c441eab47ac5dc653#76878de67229ea113d70503c441eab47ac5dc653" dependencies = [ "illumos-sys-hdrs", "ipnetwork", @@ -6621,7 +6751,7 @@ dependencies = [ [[package]] name = "opte-ioctl" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d#3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d" +source = "git+https://github.com/oxidecomputer/opte?rev=76878de67229ea113d70503c441eab47ac5dc653#76878de67229ea113d70503c441eab47ac5dc653" dependencies = [ "libc", "libnet 0.1.0 (git+https://github.com/oxidecomputer/netadm-sys)", @@ -6677,6 +6807,7 @@ dependencies = [ "base64 0.22.1", "chrono", "futures", + "hickory-resolver", "http 0.2.12", "hyper 0.14.30", "omicron-workspace-hack", @@ -6688,14 +6819,13 @@ dependencies = [ "serde_json", "thiserror", "tokio", - "trust-dns-resolver", "uuid", ] [[package]] name = "oxide-vpc" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d#3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d" +source = "git+https://github.com/oxidecomputer/opte?rev=76878de67229ea113d70503c441eab47ac5dc653#76878de67229ea113d70503c441eab47ac5dc653" dependencies = [ "cfg-if", "illumos-sys-hdrs", @@ -6715,9 +6845,10 @@ dependencies = [ "chrono", "clap", "omicron-workspace-hack", - "oximeter-impl", "oximeter-macro-impl", + "oximeter-schema", "oximeter-timeseries-macro", + "oximeter-types", "prettyplease", "syn 2.0.74", "toml 0.8.19", @@ -6819,6 +6950,8 @@ dependencies = [ "omicron-test-utils", "omicron-workspace-hack", "oximeter", + "oximeter-test-utils", + "oxql-types", "peg", "reedline", "regex", @@ -6842,39 +6975,6 @@ dependencies = [ "uuid", ] -[[package]] -name = "oximeter-impl" -version = "0.1.0" -dependencies = [ - "approx", - "bytes", - "chrono", - "criterion", - "float-ord", - "heck 0.5.0", - "num", - "omicron-common", - "omicron-workspace-hack", - "oximeter-macro-impl", - "prettyplease", - "proc-macro2", - "quote", - "rand", - "rand_distr", - "regex", - "rstest", - "schemars", - "serde", - "serde_json", - "slog-error-chain", - "strum", - "syn 2.0.74", - "thiserror", - "toml 0.8.19", - "trybuild", - "uuid", -] - [[package]] name = "oximeter-instruments" version = "0.1.0" @@ -6935,17 +7035,75 @@ dependencies = [ "uuid", ] +[[package]] +name = "oximeter-schema" +version = "0.1.0" +dependencies = [ + "anyhow", + "chrono", + "clap", + "heck 0.5.0", + "omicron-workspace-hack", + "oximeter-types", + "prettyplease", + "proc-macro2", + "quote", + "schemars", + "serde", + "slog-error-chain", + "syn 2.0.74", + "toml 0.8.19", +] + +[[package]] +name = "oximeter-test-utils" +version = "0.1.0" +dependencies = [ + "chrono", + "omicron-workspace-hack", + "oximeter-macro-impl", + "oximeter-types", + "uuid", +] + [[package]] name = "oximeter-timeseries-macro" version = "0.1.0" dependencies = [ "omicron-workspace-hack", - "oximeter-impl", + "oximeter-schema", + "oximeter-types", "proc-macro2", "quote", "syn 2.0.74", ] +[[package]] +name = "oximeter-types" +version = "0.1.0" +dependencies = [ + "approx", + "bytes", + "chrono", + "criterion", + "float-ord", + "num", + "omicron-common", + "omicron-workspace-hack", + "oximeter-macro-impl", + "rand", + "rand_distr", + "regex", + "rstest", + "schemars", + "serde", + "serde_json", + "strum", + "thiserror", + "trybuild", + "uuid", +] + [[package]] name = "oxlog" version = "0.1.0" @@ -6970,6 +7128,20 @@ dependencies = [ "serde_json", ] +[[package]] +name = "oxql-types" +version = "0.1.0" +dependencies = [ + "anyhow", + "chrono", + "highway", + "num", + "omicron-workspace-hack", + "oximeter-types", + "schemars", + "serde", +] + [[package]] name = "p256" version = "0.13.2" @@ -7653,6 +7825,15 @@ dependencies = [ "toml_edit 0.19.15", ] +[[package]] +name = "proc-macro-crate" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d37c51ca738a55da99dc0c4a34860fd675453b8b36209178c2249bb13651284" +dependencies = [ + "toml_edit 0.21.1", +] + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -8429,9 +8610,9 @@ dependencies = [ [[package]] name = "rstest" -version = "0.19.0" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d5316d2a1479eeef1ea21e7f9ddc67c191d497abc8fc3ba2467857abbb68330" +checksum = "7b423f0e62bdd61734b67cd21ff50871dfaeb9cc74f869dcd6af974fbcb19936" dependencies = [ "futures", "futures-timer", @@ -8441,12 +8622,13 @@ dependencies = [ [[package]] name = "rstest_macros" -version = "0.19.0" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04a9df72cc1f67020b0d63ad9bfe4a323e459ea7eb68e03bd9824db49f9a4c25" +checksum = "c5e1711e7d14f74b12a58411c542185ef7fb7f2e7f8ee6e2940a883628522b42" dependencies = [ "cfg-if", "glob", + "proc-macro-crate 3.1.0", "proc-macro2", "quote", "regex", @@ -8999,9 +9181,9 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.207" +version = "1.0.208" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5665e14a49a4ea1b91029ba7d3bca9f299e1f7cfa194388ccc20f14743e784f2" +checksum = "cff085d2cb684faa248efb494c39b68e522822ac0de72ccf08109abde717cfb2" dependencies = [ "serde_derive", ] @@ -9037,9 +9219,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.207" +version = "1.0.208" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6aea2634c86b0e8ef2cfdc0c340baede54ec27b1e46febd7f80dffb2aa44a00e" +checksum = "24008e81ff7613ed8e5ba0cfaf24e2c2f1e5b8a0495711e44fcd4882fca62bcf" dependencies = [ "proc-macro2", "quote", @@ -9143,9 +9325,9 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.8.3" +version = "3.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e73139bc5ec2d45e6c5fd85be5a46949c1c39a4c18e56915f5eb4c12f975e377" +checksum = "69cecfa94848272156ea67b2b1a53f20fc7bc638c4a46d2f8abde08f05f4b857" dependencies = [ "base64 0.22.1", "chrono", @@ -9161,9 +9343,9 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.8.3" +version = "3.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b80d3d6b56b64335c0180e5ffde23b3c5e08c14c585b51a15bd0e95393f46703" +checksum = "a8fee4991ef4f274617a51ad4af30519438dacb2f56ac773b08a1922ff743350" dependencies = [ "darling", "proc-macro2", @@ -9292,11 +9474,11 @@ dependencies = [ [[package]] name = "similar" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa42c91313f1d05da9b26f267f931cf178d4aba455b4c4622dd7355eb80c6640" +checksum = "1de1d4f81173b03af4c0cbed3c898f6bff5b870e4a7f5d6f4057d62a7a4b686e" dependencies = [ - "bstr 0.2.17", + "bstr 1.9.1", "unicode-segmentation", ] @@ -10573,15 +10755,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "toml" -version = "0.5.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" -dependencies = [ - "serde", -] - [[package]] name = "toml" version = "0.7.8" @@ -10628,6 +10801,17 @@ dependencies = [ "winnow 0.5.40", ] +[[package]] +name = "toml_edit" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8534fd7f78b5405e860340ad6575217ce99f38d4d5c8f2442cb5ecb50090e1" +dependencies = [ + "indexmap 2.4.0", + "toml_datetime", + "winnow 0.5.40", +] + [[package]] name = "toml_edit" version = "0.22.20" @@ -10754,26 +10938,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "trust-dns-client" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c408c32e6a9dbb38037cece35740f2cf23c875d8ca134d33631cec83f74d3fe" -dependencies = [ - "cfg-if", - "data-encoding", - "futures-channel", - "futures-util", - "lazy_static", - "radix_trie", - "rand", - "thiserror", - "time", - "tokio", - "tracing", - "trust-dns-proto", -] - [[package]] name = "trust-dns-proto" version = "0.22.0" @@ -10783,7 +10947,7 @@ dependencies = [ "async-trait", "cfg-if", "data-encoding", - "enum-as-inner", + "enum-as-inner 0.5.1", "futures-channel", "futures-io", "futures-util", @@ -10794,53 +10958,10 @@ dependencies = [ "smallvec 1.13.2", "thiserror", "tinyvec", - "tokio", "tracing", "url", ] -[[package]] -name = "trust-dns-resolver" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aff21aa4dcefb0a1afbfac26deb0adc93888c7d295fb63ab273ef276ba2b7cfe" -dependencies = [ - "cfg-if", - "futures-util", - "ipconfig", - "lazy_static", - "lru-cache", - "parking_lot 0.12.2", - "resolv-conf", - "smallvec 1.13.2", - "thiserror", - "tokio", - "tracing", - "trust-dns-proto", -] - -[[package]] -name = "trust-dns-server" -version = "0.22.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99022f9befa6daec2a860be68ac28b1f0d9d7ccf441d8c5a695e35a58d88840d" -dependencies = [ - "async-trait", - "bytes", - "cfg-if", - "enum-as-inner", - "futures-executor", - "futures-util", - "serde", - "thiserror", - "time", - "tokio", - "toml 0.5.11", - "tracing", - "trust-dns-client", - "trust-dns-proto", -] - [[package]] name = "try-lock" version = "0.2.5" @@ -11707,6 +11828,7 @@ dependencies = [ "gateway-messages", "gateway-test-utils", "hex", + "hickory-resolver", "http 0.2.12", "hubtools", "hyper 0.14.30", @@ -11746,7 +11868,6 @@ dependencies = [ "tokio-util", "toml 0.8.19", "tough", - "trust-dns-resolver", "tufaceous", "tufaceous-lib", "update-common", diff --git a/Cargo.toml b/Cargo.toml index a1ae9858ab..cbb0216d5f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,7 @@ members = [ "dev-tools/omicron-dev", "dev-tools/omicron-dev-lib", "dev-tools/openapi-manager", + "dev-tools/openapi-manager/types", "dev-tools/oxlog", "dev-tools/reconfigurator-cli", "dev-tools/releng", @@ -77,12 +78,15 @@ members = [ "oximeter/api", "oximeter/collector", "oximeter/db", - "oximeter/impl", "oximeter/instruments", "oximeter/oximeter-macro-impl", "oximeter/oximeter", + "oximeter/oxql-types", "oximeter/producer", + "oximeter/schema", + "oximeter/test-utils", "oximeter/timeseries-macro", + "oximeter/types", "package", "passwords", "rpaths", @@ -142,6 +146,7 @@ default-members = [ "dev-tools/omicron-dev", "dev-tools/omicron-dev-lib", "dev-tools/openapi-manager", + "dev-tools/openapi-manager/types", "dev-tools/oxlog", "dev-tools/reconfigurator-cli", "dev-tools/releng", @@ -191,12 +196,15 @@ default-members = [ "oximeter/api", "oximeter/collector", "oximeter/db", - "oximeter/impl", "oximeter/instruments", "oximeter/oximeter-macro-impl", "oximeter/oximeter", + "oximeter/oxql-types", "oximeter/producer", + "oximeter/schema", + "oximeter/test-utils", "oximeter/timeseries-macro", + "oximeter/types", "package", "passwords", "rpaths", @@ -362,6 +370,10 @@ headers = "0.3.9" heck = "0.5" hex = "0.4.3" hex-literal = "0.4.1" +hickory-client = "0.24.1" +hickory-proto = "0.24.1" +hickory-resolver = "0.24.1" +hickory-server = "0.24.1" highway = "1.2.0" hkdf = "0.12.4" http = "0.2.12" @@ -388,7 +400,7 @@ ipnetwork = { version = "0.20", features = ["schemars"] } ispf = { git = "https://github.com/oxidecomputer/ispf" } key-manager = { path = "key-manager" } kstat-rs = "0.2.4" -libc = "0.2.155" +libc = "0.2.156" libfalcon = { git = "https://github.com/oxidecomputer/falcon", rev = "e69694a1f7cc9fe31fab27f321017280531fb5f7" } libnvme = { git = "https://github.com/oxidecomputer/libnvme", rev = "dd5bb221d327a1bc9287961718c3c10d6bd37da0" } linear-map = "1.2.0" @@ -396,8 +408,8 @@ macaddr = { version = "1.0.1", features = ["serde_std"] } maplit = "1.0.2" mockall = "0.13" newtype_derive = "0.1.6" -mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "220dd026e83142b83bd93123f465a64dd4600201" } -ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "220dd026e83142b83bd93123f465a64dd4600201" } +mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42" } +ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42" } multimap = "0.10.0" nexus-auth = { path = "nexus/auth" } nexus-client = { path = "clients/nexus-client" } @@ -438,16 +450,17 @@ omicron-test-utils = { path = "test-utils" } omicron-workspace-hack = "0.1.0" omicron-zone-package = "0.11.0" oxide-client = { path = "clients/oxide-client" } -oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d", features = [ "api", "std" ] } +oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "76878de67229ea113d70503c441eab47ac5dc653", features = [ "api", "std" ] } oxlog = { path = "dev-tools/oxlog" } oxnet = { git = "https://github.com/oxidecomputer/oxnet" } once_cell = "1.19.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } +openapi-manager-types = { path = "dev-tools/openapi-manager/types" } openapiv3 = "2.0.0" # must match samael's crate! openssl = "0.10" openssl-sys = "0.9" -opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d" } +opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "76878de67229ea113d70503c441eab47ac5dc653" } oso = "0.27" owo-colors = "4.0.0" oximeter = { path = "oximeter/oximeter" } @@ -455,11 +468,14 @@ oximeter-api = { path = "oximeter/api" } oximeter-client = { path = "clients/oximeter-client" } oximeter-db = { path = "oximeter/db/", default-features = false } oximeter-collector = { path = "oximeter/collector" } -oximeter-impl = { path = "oximeter/impl" } oximeter-instruments = { path = "oximeter/instruments" } oximeter-macro-impl = { path = "oximeter/oximeter-macro-impl" } oximeter-producer = { path = "oximeter/producer" } +oximeter-schema = { path = "oximeter/schema" } +oximeter-test-utils = { path = "oximeter/test-utils" } oximeter-timeseries-macro = { path = "oximeter/timeseries-macro" } +oximeter-types = { path = "oximeter/types" } +oxql-types = { path = "oximeter/oxql-types" } p256 = "0.13" parse-display = "0.10.0" partial-io = { version = "0.5.4", features = ["proptest1", "tokio1"] } @@ -496,7 +512,7 @@ regress = "0.9.1" reqwest = { version = "0.11", default-features = false } ring = "0.17.8" rpassword = "7.3.1" -rstest = "0.19.0" +rstest = "0.22.0" rustfmt-wrapper = "0.2" rustls = "0.22.2" rustls-pemfile = "2.1.3" @@ -511,14 +527,14 @@ serde_json = "1.0.125" serde_path_to_error = "0.1.16" serde_tokenstream = "0.2" serde_urlencoded = "0.7.1" -serde_with = "3.8.3" +serde_with = "3.9.0" sha2 = "0.10.8" sha3 = "0.10.8" shell-words = "1.1.0" signal-hook = "0.3" signal-hook-tokio = { version = "0.3", features = [ "futures-v0_3" ] } sigpipe = "0.1.3" -similar = { version = "2.5.0", features = ["bytes"] } +similar = { version = "2.6.0", features = ["bytes"] } similar-asserts = "1.5.0" # Don't change sled's version on accident; sled's on-disk format is not yet # stable and requires manual migrations. In the limit this won't matter because @@ -572,10 +588,6 @@ tokio-util = { version = "0.7.11", features = ["io", "io-util"] } toml = "0.8.19" toml_edit = "0.22.20" tough = { version = "0.17.1", features = [ "http" ] } -trust-dns-client = "0.22" -trust-dns-proto = "0.22" -trust-dns-resolver = "0.22" -trust-dns-server = "0.22" trybuild = "1.0.99" tufaceous = { path = "tufaceous" } tufaceous-lib = { path = "tufaceous-lib" } @@ -734,8 +746,6 @@ opt-level = 3 opt-level = 3 [profile.dev.package.rand_core] opt-level = 3 -[profile.dev.package.rand_hc] -opt-level = 3 [profile.dev.package.rand_xorshift] opt-level = 3 [profile.dev.package.rsa] diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs index 62366c45e1..a55c5d4013 100644 --- a/clients/nexus-client/src/lib.rs +++ b/clients/nexus-client/src/lib.rs @@ -213,6 +213,7 @@ impl From fn from(kind: omicron_common::api::internal::nexus::ProducerKind) -> Self { use omicron_common::api::internal::nexus::ProducerKind; match kind { + ProducerKind::ManagementGateway => Self::ManagementGateway, ProducerKind::SledAgent => Self::SledAgent, ProducerKind::Service => Self::Service, ProducerKind::Instance => Self::Instance, @@ -390,6 +391,9 @@ impl From fn from(kind: types::ProducerKind) -> Self { use omicron_common::api::internal::nexus::ProducerKind; match kind { + types::ProducerKind::ManagementGateway => { + ProducerKind::ManagementGateway + } types::ProducerKind::SledAgent => ProducerKind::SledAgent, types::ProducerKind::Instance => ProducerKind::Instance, types::ProducerKind::Service => ProducerKind::Service, diff --git a/clients/oxide-client/Cargo.toml b/clients/oxide-client/Cargo.toml index f2adcacb1b..183640946f 100644 --- a/clients/oxide-client/Cargo.toml +++ b/clients/oxide-client/Cargo.toml @@ -12,6 +12,7 @@ anyhow.workspace = true base64.workspace = true chrono.workspace = true futures.workspace = true +hickory-resolver.workspace = true http.workspace = true hyper.workspace = true progenitor.workspace = true @@ -22,6 +23,5 @@ serde.workspace = true serde_json.workspace = true thiserror.workspace = true tokio = { workspace = true, features = [ "net" ] } -trust-dns-resolver.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/clients/oxide-client/src/lib.rs b/clients/oxide-client/src/lib.rs index 07a190c38e..249ea18146 100644 --- a/clients/oxide-client/src/lib.rs +++ b/clients/oxide-client/src/lib.rs @@ -7,13 +7,13 @@ use anyhow::anyhow; use anyhow::Context; use futures::FutureExt; +use hickory_resolver::config::{ + NameServerConfig, Protocol, ResolverConfig, ResolverOpts, +}; +use hickory_resolver::TokioAsyncResolver; use std::net::SocketAddr; use std::sync::Arc; use thiserror::Error; -use trust_dns_resolver::config::{ - NameServerConfig, Protocol, ResolverConfig, ResolverOpts, -}; -use trust_dns_resolver::TokioAsyncResolver; progenitor::generate_api!( spec = "../../openapi/nexus.json", @@ -46,14 +46,15 @@ impl CustomDnsResolver { socket_addr: dns_addr, protocol: Protocol::Udp, tls_dns_name: None, - trust_nx_responses: false, + trust_negative_responses: false, bind_addr: None, }); + let mut resolver_opts = ResolverOpts::default(); + // Enable edns for potentially larger records + resolver_opts.edns0 = true; - let resolver = Arc::new( - TokioAsyncResolver::tokio(resolver_config, ResolverOpts::default()) - .context("failed to create resolver")?, - ); + let resolver = + Arc::new(TokioAsyncResolver::tokio(resolver_config, resolver_opts)); Ok(CustomDnsResolver { dns_addr, resolver }) } diff --git a/clients/oximeter-client/src/lib.rs b/clients/oximeter-client/src/lib.rs index 74fc6968e8..c23e5177a0 100644 --- a/clients/oximeter-client/src/lib.rs +++ b/clients/oximeter-client/src/lib.rs @@ -26,6 +26,7 @@ impl From fn from(kind: omicron_common::api::internal::nexus::ProducerKind) -> Self { use omicron_common::api::internal::nexus; match kind { + nexus::ProducerKind::ManagementGateway => Self::ManagementGateway, nexus::ProducerKind::Service => Self::Service, nexus::ProducerKind::SledAgent => Self::SledAgent, nexus::ProducerKind::Instance => Self::Instance, diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index 4ed5aaa1cb..ed96d762dc 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -29,6 +29,7 @@ progenitor::generate_api!( BfdPeerConfig = { derives = [Eq, Hash] }, BgpConfig = { derives = [Eq, Hash] }, BgpPeerConfig = { derives = [Eq, Hash] }, + LldpPortConfig = { derives = [Eq, Hash, PartialOrd, Ord] }, OmicronPhysicalDiskConfig = { derives = [Eq, Hash, PartialOrd, Ord] }, PortConfigV2 = { derives = [Eq, Hash] }, RouteConfig = { derives = [Eq, Hash] }, diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index c7421aa5ee..07e4fd0b83 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -23,6 +23,7 @@ pub use dropshot::PaginationOrder; pub use error::*; use futures::stream::BoxStream; use oxnet::IpNet; +use oxnet::Ipv4Net; use parse_display::Display; use parse_display::FromStr; use rand::thread_rng; @@ -2228,7 +2229,7 @@ pub struct SwitchPortSettingsView { pub links: Vec, /// Link-layer discovery protocol (LLDP) settings. - pub link_lldp: Vec, + pub link_lldp: Vec, /// Layer 3 interface settings. pub interfaces: Vec, @@ -2370,7 +2371,7 @@ pub struct SwitchPortLinkConfig { /// The link-layer discovery protocol service configuration id for this /// link. - pub lldp_service_config_id: Uuid, + pub lldp_link_config_id: Uuid, /// The name of this link. pub link_name: String, @@ -2390,34 +2391,30 @@ pub struct SwitchPortLinkConfig { /// A link layer discovery protocol (LLDP) service configuration. #[derive(Clone, Debug, Deserialize, JsonSchema, Serialize, PartialEq)] -pub struct LldpServiceConfig { +pub struct LldpLinkConfig { /// The id of this LLDP service instance. pub id: Uuid, - /// The link-layer discovery protocol configuration for this service. - pub lldp_config_id: Option, - /// Whether or not the LLDP service is enabled. pub enabled: bool, -} -/// A link layer discovery protocol (LLDP) base configuration. -#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize, PartialEq)] -pub struct LldpConfig { - #[serde(flatten)] - pub identity: IdentityMetadata, + /// The LLDP link name TLV. + pub link_name: Option, + + /// The LLDP link description TLV. + pub link_description: Option, /// The LLDP chassis identifier TLV. - pub chassis_id: String, + pub chassis_id: Option, - /// THE LLDP system name TLV. - pub system_name: String, + /// The LLDP system name TLV. + pub system_name: Option, - /// THE LLDP system description TLV. - pub system_description: String, + /// The LLDP system description TLV. + pub system_description: Option, - /// THE LLDP management IP TLV. - pub management_ip: oxnet::IpNet, + /// The LLDP management IP TLV. + pub management_ip: Option, } /// Describes the kind of an switch interface. @@ -2492,6 +2489,9 @@ pub struct SwitchPortRouteConfig { /// The VLAN identifier for the route. Use this if the gateway is reachable /// over an 802.1Q tagged L2 segment. pub vlan_id: Option, + + /// Local preference indicating priority within and across protocols. + pub local_pref: Option, } /* @@ -2705,6 +2705,15 @@ pub struct BgpPeerStatus { pub switch: SwitchLocation, } +/// The current status of a BGP peer. +#[derive( + Clone, Debug, Deserialize, JsonSchema, Serialize, PartialEq, Default, +)] +pub struct BgpExported { + /// Exported routes indexed by peer address. + pub exports: HashMap>, +} + /// Opaque object representing BGP message history for a given BGP peer. The /// contents of this object are not yet stable. #[derive(Clone, Debug, Deserialize, Serialize)] diff --git a/common/src/api/internal/nexus.rs b/common/src/api/internal/nexus.rs index 7f4eb358a4..4daea6a198 100644 --- a/common/src/api/internal/nexus.rs +++ b/common/src/api/internal/nexus.rs @@ -223,6 +223,8 @@ pub enum ProducerKind { Service, /// The producer is a Propolis VMM managing a guest instance. Instance, + /// The producer is a management gateway service. + ManagementGateway, } /// Information announced by a metric server, used so that clients can contact it and collect diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index 089ff9b324..5945efe16d 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -305,6 +305,9 @@ pub struct RouteConfig { /// The VLAN id associated with this route. #[serde(default)] pub vlan_id: Option, + /// The local preference associated with this route. + #[serde(default)] + pub local_pref: Option, } #[derive( @@ -376,6 +379,84 @@ impl FromStr for UplinkAddressConfig { } } +#[derive( + Clone, Debug, Default, Deserialize, Serialize, PartialEq, Eq, JsonSchema, +)] +#[serde(rename_all = "snake_case")] +/// To what extent should this port participate in LLDP +pub enum LldpAdminStatus { + #[default] + Enabled, + Disabled, + RxOnly, + TxOnly, +} + +impl fmt::Display for LldpAdminStatus { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + LldpAdminStatus::Enabled => write!(f, "enabled"), + LldpAdminStatus::Disabled => write!(f, "disabled"), + LldpAdminStatus::RxOnly => write!(f, "rx_only"), + LldpAdminStatus::TxOnly => write!(f, "tx_only"), + } + } +} + +#[derive(Debug, PartialEq, Eq, Deserialize, Serialize)] +pub struct ParseLldpAdminStatusError(String); + +impl std::fmt::Display for ParseLldpAdminStatusError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "LLDP admin status error: {}", self.0) + } +} + +impl FromStr for LldpAdminStatus { + type Err = ParseLldpAdminStatusError; + + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "enabled" => Ok(Self::Enabled), + "disabled" => Ok(Self::Disabled), + "rxonly" | "rx_only" => Ok(Self::RxOnly), + "txonly" | "tx_only" => Ok(Self::TxOnly), + _ => Err(ParseLldpAdminStatusError(format!( + "not a valid admin status: {s}" + ))), + } + } +} + +/// Per-port LLDP configuration settings. Only the "status" setting is +/// mandatory. All other fields have natural defaults or may be inherited from +/// the switch. +#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq, JsonSchema)] +pub struct LldpPortConfig { + /// To what extent should this port participate in LLDP + pub status: LldpAdminStatus, + /// Chassis ID to advertise. If this is set, it will be advertised as a + /// LocallyAssigned ID type. If this is not set, it will be + /// inherited from the switch-level settings. + pub chassis_id: Option, + /// Port ID to advertise. If this is set, it will be advertised as a + /// LocallyAssigned ID type. If this is not set, it will be set to + /// the port name. e.g., qsfp0/0. + pub port_id: Option, + /// Port description to advertise. If this is not set, no + /// description will be advertised. + pub port_description: Option, + /// System name to advertise. If this is not set, it will be + /// inherited from the switch-level settings. + pub system_name: Option, + /// System description to advertise. If this is not set, it will be + /// inherited from the switch-level settings. + pub system_description: Option, + /// Management IP addresses to advertise. If this is not set, it will be + /// inherited from the switch-level settings. + pub management_addrs: Option>, +} + #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq, JsonSchema)] pub struct PortConfigV2 { /// The set of routes associated with this port. @@ -395,6 +476,8 @@ pub struct PortConfigV2 { /// Whether or not to set autonegotiation #[serde(default)] pub autoneg: bool, + /// LLDP configuration for this port + pub lldp: Option, } /// A set of switch uplinks. @@ -411,11 +494,13 @@ pub struct HostPortConfig { /// IP Address and prefix (e.g., `192.168.0.1/16`) to apply to switchport /// (must be in infra_ip pool). May also include an optional VLAN ID. pub addrs: Vec, + + pub lldp: Option, } impl From for HostPortConfig { fn from(x: PortConfigV2) -> Self { - Self { port: x.port, addrs: x.addresses } + Self { port: x.port, addrs: x.addresses, lldp: x.lldp.clone() } } } diff --git a/dev-tools/downloader/src/lib.rs b/dev-tools/downloader/src/lib.rs index d5b436244c..c3d6e165ff 100644 --- a/dev-tools/downloader/src/lib.rs +++ b/dev-tools/downloader/src/lib.rs @@ -586,7 +586,10 @@ impl<'a> Downloader<'a> { let version = version.trim(); let (url_base, suffix) = match os { - Os::Illumos => ("https://illumos.org/downloads", "tar.gz"), + Os::Illumos => ( + "https://oxide-cockroachdb-build.s3.us-west-2.amazonaws.com", + "tar.gz", + ), Os::Linux | Os::Mac => ("https://binaries.cockroachdb.com", "tgz"), }; let build = match os { diff --git a/dev-tools/mgs-dev/Cargo.toml b/dev-tools/mgs-dev/Cargo.toml index d5f61f4b96..70382c0469 100644 --- a/dev-tools/mgs-dev/Cargo.toml +++ b/dev-tools/mgs-dev/Cargo.toml @@ -14,6 +14,7 @@ futures.workspace = true gateway-messages.workspace = true gateway-test-utils.workspace = true libc.workspace = true +omicron-gateway.workspace = true omicron-workspace-hack.workspace = true signal-hook-tokio.workspace = true tokio.workspace = true diff --git a/dev-tools/mgs-dev/src/main.rs b/dev-tools/mgs-dev/src/main.rs index 85b1313d68..77947999d9 100644 --- a/dev-tools/mgs-dev/src/main.rs +++ b/dev-tools/mgs-dev/src/main.rs @@ -8,6 +8,7 @@ use clap::{Args, Parser, Subcommand}; use futures::StreamExt; use libc::SIGINT; use signal_hook_tokio::Signals; +use std::net::SocketAddr; #[tokio::main] async fn main() -> anyhow::Result<()> { @@ -36,7 +37,12 @@ enum MgsDevCmd { } #[derive(Clone, Debug, Args)] -struct MgsRunArgs {} +struct MgsRunArgs { + /// Override the address of the Nexus instance to use when registering the + /// Oximeter producer. + #[clap(long)] + nexus_address: Option, +} impl MgsRunArgs { async fn exec(&self) -> Result<(), anyhow::Error> { @@ -46,9 +52,23 @@ impl MgsRunArgs { let mut signal_stream = signals.fuse(); println!("mgs-dev: setting up MGS ... "); - let gwtestctx = gateway_test_utils::setup::test_setup( + let (mut mgs_config, sp_sim_config) = + gateway_test_utils::setup::load_test_config(); + if let Some(addr) = self.nexus_address { + mgs_config.metrics = + Some(gateway_test_utils::setup::MetricsConfig { + disabled: false, + dev_nexus_address: Some(addr), + dev_bind_loopback: true, + }); + } + + let gwtestctx = gateway_test_utils::setup::test_setup_with_config( "mgs-dev", gateway_messages::SpPort::One, + mgs_config, + &sp_sim_config, + None, ) .await; println!("mgs-dev: MGS is running."); diff --git a/dev-tools/omdb/Cargo.toml b/dev-tools/omdb/Cargo.toml index a92de1b6a9..4cc484b9a9 100644 --- a/dev-tools/omdb/Cargo.toml +++ b/dev-tools/omdb/Cargo.toml @@ -62,13 +62,14 @@ multimap.workspace = true indicatif.workspace = true [dev-dependencies] +camino-tempfile.workspace = true expectorate.workspace = true +http.workspace = true nexus-test-utils.workspace = true nexus-test-utils-macros.workspace = true omicron-nexus.workspace = true omicron-test-utils.workspace = true subprocess.workspace = true -camino-tempfile.workspace = true # Disable doc builds by default for our binaries to work around issue # rust-lang/cargo#8373. These docs would not be very useful anyway. diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 5f88778829..757b4e8888 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -141,9 +141,16 @@ SP DETAILS: type "Sled" slot 0 COMPONENTS - NAME DESCRIPTION DEVICE PRESENCE SERIAL - sp3-host-cpu FAKE host cpu sp3-host-cpu Present None - dev-0 FAKE temperature sensor fake-tmp-sensor Failed None + NAME DESCRIPTION DEVICE PRESENCE SERIAL + sp3-host-cpu FAKE host cpu sp3-host-cpu Present None + dev-0 FAKE temperature sensor fake-tmp-sensor Failed None + dev-1 FAKE temperature sensor tmp117 Present None + dev-2 FAKE Southeast temperature sensor tmp117 Present None + dev-6 FAKE U.2 Sharkfin A VPD at24csw080 Present None + dev-7 FAKE U.2 Sharkfin A hot swap controller max5970 Present None + dev-8 FAKE U.2 A NVMe Basic Management Command nvme_bmc Present None + dev-39 FAKE T6 temperature sensor tmp451 Present None + dev-53 FAKE Fan controller max31790 Present None CABOOSES: none found @@ -167,8 +174,16 @@ SP DETAILS: type "Sled" slot 1 COMPONENTS - NAME DESCRIPTION DEVICE PRESENCE SERIAL - sp3-host-cpu FAKE host cpu sp3-host-cpu Present None + NAME DESCRIPTION DEVICE PRESENCE SERIAL + sp3-host-cpu FAKE host cpu sp3-host-cpu Present None + dev-0 FAKE temperature sensor tmp117 Present None + dev-1 FAKE temperature sensor tmp117 Present None + dev-2 FAKE Southeast temperature sensor tmp117 Present None + dev-6 FAKE U.2 Sharkfin A VPD at24csw080 Present None + dev-7 FAKE U.2 Sharkfin A hot swap controller max5970 Present None + dev-8 FAKE U.2 A NVMe Basic Management Command nvme_bmc Present None + dev-39 FAKE T6 temperature sensor tmp451 Present None + dev-53 FAKE Fan controller max31790 Present None CABOOSES: none found @@ -1160,8 +1175,8 @@ WARNING: Zones exist without physical disks! COCKROACHDB SETTINGS: - state fingerprint::::::::::::::::: d4d87aa2ad877a4cc2fddd0573952362739110de - cluster.preserve_downgrade_option: "22.1" + state fingerprint::::::::::::::::: + cluster.preserve_downgrade_option: METADATA: created by::::::::::: nexus-test-utils @@ -1198,8 +1213,8 @@ WARNING: Zones exist without physical disks! COCKROACHDB SETTINGS: - state fingerprint::::::::::::::::: d4d87aa2ad877a4cc2fddd0573952362739110de - cluster.preserve_downgrade_option: "22.1" + state fingerprint::::::::::::::::: + cluster.preserve_downgrade_option: METADATA: created by::::::::::: nexus-test-utils @@ -1239,8 +1254,8 @@ to: blueprint ............. COCKROACHDB SETTINGS: - state fingerprint::::::::::::::::: d4d87aa2ad877a4cc2fddd0573952362739110de (unchanged) - cluster.preserve_downgrade_option: "22.1" (unchanged) + state fingerprint::::::::::::::::: (unchanged) + cluster.preserve_downgrade_option: (unchanged) METADATA: internal DNS version: 1 (unchanged) diff --git a/dev-tools/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs index 45492c14ce..d266e59ce8 100644 --- a/dev-tools/omdb/tests/test_all_output.rs +++ b/dev-tools/omdb/tests/test_all_output.rs @@ -7,9 +7,12 @@ //! Feel free to change the tool's output. This test just makes it easy to make //! sure you're only breaking what you intend. +use dropshot::Method; use expectorate::assert_contents; +use http::StatusCode; use nexus_test_utils::{OXIMETER_UUID, PRODUCER_UUID}; use nexus_test_utils_macros::nexus_test; +use nexus_types::deployment::Blueprint; use nexus_types::deployment::SledFilter; use nexus_types::deployment::UnstableReconfiguratorState; use omicron_test_utils::dev::test_cmds::path_to_executable; @@ -56,6 +59,7 @@ fn assert_oximeter_list_producers_output( #[tokio::test] async fn test_omdb_usage_errors() { + clear_omdb_env(); let cmd_path = path_to_executable(CMD_OMDB); let mut output = String::new(); let invocations: &[&[&'static str]] = &[ @@ -111,6 +115,8 @@ async fn test_omdb_usage_errors() { #[nexus_test] async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { + clear_omdb_env(); + let gwtestctx = gateway_test_utils::setup::test_setup( "test_omdb_success_case", gateway_messages::SpPort::One, @@ -131,6 +137,20 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { let tmppath = tmpdir.path().join("reconfigurator-save.out"); let initial_blueprint_id = cptestctx.initial_blueprint_id.to_string(); + // Get the CockroachDB metadata from the blueprint so we can redact it + let initial_blueprint: Blueprint = dropshot::test_util::read_json( + &mut cptestctx + .internal_client + .make_request_no_body( + Method::GET, + &format!("/deployment/blueprints/all/{initial_blueprint_id}"), + StatusCode::OK, + ) + .await + .unwrap(), + ) + .await; + let mut output = String::new(); let invocations: &[&[&str]] = &[ @@ -183,6 +203,19 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { // ControlPlaneTestContext. ]; + let mut redactions = ExtraRedactions::new(); + redactions + .variable_length("tmp_path", tmppath.as_str()) + .fixed_length("blueprint_id", &initial_blueprint_id) + .variable_length( + "cockroachdb_fingerprint", + &initial_blueprint.cockroachdb_fingerprint, + ); + let crdb_version = + initial_blueprint.cockroachdb_setting_preserve_downgrade.to_string(); + if initial_blueprint.cockroachdb_setting_preserve_downgrade.is_set() { + redactions.variable_length("cockroachdb_version", &crdb_version); + } for args in invocations { println!("running commands with args: {:?}", args); let p = postgres_url.to_string(); @@ -201,11 +234,7 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { }, &cmd_path, args, - Some( - ExtraRedactions::new() - .variable_length("tmp_path", tmppath.as_str()) - .fixed_length("blueprint_id", &initial_blueprint_id), - ), + Some(&redactions), ) .await; } @@ -271,6 +300,8 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { /// that's covered by the success tests above. #[nexus_test] async fn test_omdb_env_settings(cptestctx: &ControlPlaneTestContext) { + clear_omdb_env(); + let cmd_path = path_to_executable(CMD_OMDB); let postgres_url = cptestctx.database.listen_url().to_string(); let nexus_internal_url = @@ -504,3 +535,22 @@ async fn do_run_extra( write!(output, "=============================================\n").unwrap(); } + +// We're testing behavior that can be affected by OMDB-related environment +// variables. Clear all of them from the current process so that all child +// processes don't have them. OMDB environment variables can affect even the +// help output provided by clap. See clap-rs/clap#5673 for an example. +fn clear_omdb_env() { + // Rust documents that it's not safe to manipulate the environment in a + // multi-threaded process outside of Windows because it's possible that + // other threads are reading or writing the environment and most systems do + // not support this. On illumos, the underlying interfaces are broadly + // thread-safe. Further, Omicron only supports running tests under `cargo + // nextest`, in which case there are no threads running concurrently here + // that may be reading or modifying the environment. + for (env_var, _) in std::env::vars().filter(|(k, _)| k.starts_with("OMDB_")) + { + eprintln!("removing {:?} from environment", env_var); + std::env::remove_var(env_var); + } +} diff --git a/dev-tools/openapi-manager/Cargo.toml b/dev-tools/openapi-manager/Cargo.toml index fe90737d9e..2ca1bc3e4d 100644 --- a/dev-tools/openapi-manager/Cargo.toml +++ b/dev-tools/openapi-manager/Cargo.toml @@ -25,6 +25,7 @@ nexus-internal-api.workspace = true omicron-workspace-hack.workspace = true openapiv3.workspace = true openapi-lint.workspace = true +openapi-manager-types.workspace = true owo-colors.workspace = true oximeter-api.workspace = true serde_json.workspace = true diff --git a/dev-tools/openapi-manager/src/check.rs b/dev-tools/openapi-manager/src/check.rs index 182ed9fb19..b43e43e7e5 100644 --- a/dev-tools/openapi-manager/src/check.rs +++ b/dev-tools/openapi-manager/src/check.rs @@ -5,17 +5,16 @@ use std::{io::Write, process::ExitCode}; use anyhow::Result; -use camino::Utf8Path; use indent_write::io::IndentWriter; use owo_colors::OwoColorize; use similar::TextDiff; use crate::{ output::{ - display_api_spec, display_error, display_summary, headers::*, plural, - write_diff, OutputOpts, Styles, + display_api_spec, display_api_spec_file, display_error, + display_summary, headers::*, plural, write_diff, OutputOpts, Styles, }, - spec::{all_apis, CheckStatus}, + spec::{all_apis, CheckStale, Environment}, FAILURE_EXIT_CODE, NEEDS_UPDATE_EXIT_CODE, }; @@ -37,7 +36,7 @@ impl CheckResult { } pub(crate) fn check_impl( - dir: &Utf8Path, + env: &Environment, output: &OutputOpts, ) -> Result { let mut styles = Styles::default(); @@ -48,6 +47,7 @@ pub(crate) fn check_impl( let all_apis = all_apis(); let total = all_apis.len(); let count_width = total.to_string().len(); + let count_section_indent = count_section_indent(count_width); let continued_indent = continued_indent(count_width); eprintln!("{:>HEADER_WIDTH$}", SEPARATOR); @@ -58,57 +58,89 @@ pub(crate) fn check_impl( total.style(styles.bold), plural::documents(total), ); - let mut num_up_to_date = 0; + let mut num_fresh = 0; let mut num_stale = 0; - let mut num_missing = 0; let mut num_failed = 0; for (ix, spec) in all_apis.iter().enumerate() { let count = ix + 1; - match spec.check(&dir) { - Ok(status) => match status { - CheckStatus::Ok(summary) => { - eprintln!( - "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}: {}", - UP_TO_DATE.style(styles.success_header), - display_api_spec(spec, &styles), - display_summary(&summary, &styles), - ); + match spec.check(env) { + Ok(status) => { + let total_errors = status.total_errors(); + let total_errors_width = total_errors.to_string().len(); + + if total_errors == 0 { + // Success case. + let extra = if status.extra_files_len() > 0 { + format!( + ", {} extra files", + status.extra_files_len().style(styles.bold) + ) + } else { + "".to_string() + }; - num_up_to_date += 1; - } - CheckStatus::Stale { full_path, actual, expected } => { eprintln!( - "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}", - STALE.style(styles.warning_header), + "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}: {}{extra}", + FRESH.style(styles.success_header), display_api_spec(spec, &styles), + display_summary(&status.summary, &styles), ); - let diff = TextDiff::from_lines(&actual, &expected); - write_diff( - &diff, - &full_path, - &styles, - // Add an indent to align diff with the status message. - &mut IndentWriter::new( - &continued_indent, - std::io::stderr(), - ), - )?; - - num_stale += 1; + num_fresh += 1; + continue; } - CheckStatus::Missing => { - eprintln!( - "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}", - MISSING.style(styles.warning_header), - display_api_spec(spec, &styles), - ); - num_missing += 1; + // Out of date: print errors. + eprintln!( + "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}", + STALE.style(styles.warning_header), + display_api_spec(spec, &styles), + ); + num_stale += 1; + + for (error_ix, (spec_file, error)) in + status.iter_errors().enumerate() + { + let error_count = error_ix + 1; + + let display_heading = |heading: &str| { + eprintln!( + "{:>HEADER_WIDTH$}{count_section_indent}\ + ({error_count:>total_errors_width$}/{total_errors}) {}", + heading.style(styles.warning_header), + display_api_spec_file(spec, spec_file, &styles), + ); + }; + + match error { + CheckStale::Modified { + full_path, + actual, + expected, + } => { + display_heading(MODIFIED); + + let diff = + TextDiff::from_lines(&**actual, &**expected); + write_diff( + &diff, + &full_path, + &styles, + // Add an indent to align diff with the status message. + &mut IndentWriter::new( + &continued_indent, + std::io::stderr(), + ), + )?; + } + CheckStale::New => { + display_heading(NEW); + } + } } - }, + } Err(error) => { eprint!( "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}", @@ -138,13 +170,12 @@ pub(crate) fn check_impl( }; eprintln!( - "{:>HEADER_WIDTH$} {} {} checked: {} up-to-date, {} stale, {} missing, {} failed", + "{:>HEADER_WIDTH$} {} {} checked: {} fresh, {} stale, {} failed", status_header, total.style(styles.bold), plural::documents(total), - num_up_to_date.style(styles.bold), + num_fresh.style(styles.bold), num_stale.style(styles.bold), - num_missing.style(styles.bold), num_failed.style(styles.bold), ); if num_failed > 0 { @@ -170,14 +201,14 @@ pub(crate) fn check_impl( mod tests { use std::process::ExitCode; - use crate::spec::find_openapi_dir; + use crate::spec::Environment; use super::*; #[test] fn check_apis_up_to_date() -> Result { let output = OutputOpts { color: clap::ColorChoice::Auto }; - let dir = find_openapi_dir()?; + let dir = Environment::new(None)?; let result = check_impl(&dir, &output)?; Ok(result.to_exit_code()) diff --git a/dev-tools/openapi-manager/src/dispatch.rs b/dev-tools/openapi-manager/src/dispatch.rs index 937a8b485f..ca2989396f 100644 --- a/dev-tools/openapi-manager/src/dispatch.rs +++ b/dev-tools/openapi-manager/src/dispatch.rs @@ -10,7 +10,7 @@ use clap::{Args, Parser, Subcommand}; use crate::{ check::check_impl, generate::generate_impl, list::list_impl, - output::OutputOpts, spec::openapi_dir, + output::OutputOpts, spec::Environment, }; /// Manage OpenAPI specifications. @@ -73,7 +73,7 @@ pub struct GenerateArgs { impl GenerateArgs { fn exec(self, output: &OutputOpts) -> anyhow::Result { - let dir = openapi_dir(self.dir)?; + let dir = Environment::new(self.dir)?; Ok(generate_impl(&dir, output)?.to_exit_code()) } } @@ -87,8 +87,8 @@ pub struct CheckArgs { impl CheckArgs { fn exec(self, output: &OutputOpts) -> anyhow::Result { - let dir = openapi_dir(self.dir)?; - Ok(check_impl(&dir, output)?.to_exit_code()) + let env = Environment::new(self.dir)?; + Ok(check_impl(&env, output)?.to_exit_code()) } } diff --git a/dev-tools/openapi-manager/src/generate.rs b/dev-tools/openapi-manager/src/generate.rs index f776ff2709..1cf9ebbb61 100644 --- a/dev-tools/openapi-manager/src/generate.rs +++ b/dev-tools/openapi-manager/src/generate.rs @@ -5,7 +5,6 @@ use std::{io::Write, process::ExitCode}; use anyhow::Result; -use camino::Utf8Path; use indent_write::io::IndentWriter; use owo_colors::OwoColorize; @@ -14,7 +13,7 @@ use crate::{ display_api_spec, display_error, display_summary, headers::*, plural, OutputOpts, Styles, }, - spec::{all_apis, OverwriteStatus}, + spec::{all_apis, Environment}, FAILURE_EXIT_CODE, }; @@ -34,7 +33,7 @@ impl GenerateResult { } pub(crate) fn generate_impl( - dir: &Utf8Path, + env: &Environment, output: &OutputOpts, ) -> Result { let mut styles = Styles::default(); @@ -62,27 +61,30 @@ pub(crate) fn generate_impl( for (ix, spec) in all_apis.iter().enumerate() { let count = ix + 1; - match spec.overwrite(&dir) { - Ok((status, summary)) => match status { - OverwriteStatus::Updated => { + match spec.overwrite(env) { + Ok(status) => { + let updated_count = status.updated_count(); + + if updated_count > 0 { eprintln!( - "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}: {}", + "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}: {} ({} {} updated)", UPDATED.style(styles.success_header), display_api_spec(spec, &styles), - display_summary(&summary, &styles), + display_summary(&status.summary, &styles), + updated_count.style(styles.bold), + plural::files(updated_count), ); num_updated += 1; - } - OverwriteStatus::Unchanged => { + } else { eprintln!( "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}: {}", UNCHANGED.style(styles.unchanged_header), display_api_spec(spec, &styles), - display_summary(&summary, &styles), + display_summary(&status.summary, &styles), ); num_unchanged += 1; } - }, + } Err(err) => { eprintln!( "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}", diff --git a/dev-tools/openapi-manager/src/output.rs b/dev-tools/openapi-manager/src/output.rs index 6cd578e778..fee7f0f15c 100644 --- a/dev-tools/openapi-manager/src/output.rs +++ b/dev-tools/openapi-manager/src/output.rs @@ -10,7 +10,7 @@ use indent_write::fmt::IndentWriter; use owo_colors::{OwoColorize, Style}; use similar::{ChangeTag, DiffableStr, TextDiff}; -use crate::spec::{ApiSpec, DocumentSummary}; +use crate::spec::{ApiSpec, ApiSpecFile, DocumentSummary}; #[derive(Debug, Args)] #[clap(next_help_heading = "Global options")] @@ -123,6 +123,21 @@ pub(crate) fn display_api_spec(spec: &ApiSpec, styles: &Styles) -> String { ) } +pub(crate) fn display_api_spec_file( + spec: &ApiSpec, + spec_file: ApiSpecFile<'_>, + styles: &Styles, +) -> String { + match spec_file { + ApiSpecFile::Openapi => { + format!("OpenAPI document {}", spec.filename.style(styles.filename)) + } + ApiSpecFile::Extra(path) => { + format!("Extra file {}", path.style(styles.filename)) + } + } +} + pub(crate) fn display_summary( summary: &DocumentSummary, styles: &Styles, @@ -201,9 +216,14 @@ pub(crate) mod headers { pub(crate) static CHECKING: &str = "Checking"; pub(crate) static GENERATING: &str = "Generating"; - pub(crate) static UP_TO_DATE: &str = "Up-to-date"; + pub(crate) static FRESH: &str = "Fresh"; + + // Stale encompasses: + // - Stale: the file on disk is different from what we generated. + // - Missing: the file on disk does not exist. pub(crate) static STALE: &str = "Stale"; - pub(crate) static MISSING: &str = "Missing"; + pub(crate) static NEW: &str = "-> New"; + pub(crate) static MODIFIED: &str = "-> Modified"; pub(crate) static UPDATED: &str = "Updated"; pub(crate) static UNCHANGED: &str = "Unchanged"; @@ -211,22 +231,38 @@ pub(crate) mod headers { pub(crate) static SUCCESS: &str = "Success"; pub(crate) static FAILURE: &str = "Failure"; - pub(crate) fn continued_indent(count_width: usize) -> String { + fn count_section_width(count_width: usize) -> usize { // Status strings are of the form: // // Generated [ 1/12] api.json: 1 path, 1 schema + // ^^^^^^^^^ // - // So the continued indent is: - // - // HEADER_WIDTH for the status string - // + (count_width * 2) for current and total counts + // So the width of the count section is: + // (count_width * 2) for current and total counts // + 3 for '[/]' // + 2 for spaces on either side. - " ".repeat(HEADER_WIDTH + count_width * 2 + 3 + 2) + count_width * 2 + 3 + 2 + } + + pub(crate) fn count_section_indent(count_width: usize) -> String { + " ".repeat(count_section_width(count_width)) + } + + pub(crate) fn continued_indent(count_width: usize) -> String { + // HEADER_WIDTH for the status string + count_section_width + " ".repeat(HEADER_WIDTH + count_section_width(count_width)) } } pub(crate) mod plural { + pub(crate) fn files(count: usize) -> &'static str { + if count == 1 { + "file" + } else { + "files" + } + } + pub(crate) fn documents(count: usize) -> &'static str { if count == 1 { "document" diff --git a/dev-tools/openapi-manager/src/spec.rs b/dev-tools/openapi-manager/src/spec.rs index 29601a63d6..e74cf7ed7a 100644 --- a/dev-tools/openapi-manager/src/spec.rs +++ b/dev-tools/openapi-manager/src/spec.rs @@ -9,6 +9,7 @@ use atomicwrites::AtomicFile; use camino::{Utf8Path, Utf8PathBuf}; use dropshot::{ApiDescription, ApiDescriptionBuildErrors, StubContext}; use fs_err as fs; +use openapi_manager_types::{ValidationBackend, ValidationContext}; use openapiv3::OpenAPI; /// All APIs managed by openapi-manager. @@ -143,47 +144,64 @@ pub struct ApiSpec { pub filename: &'static str, /// Extra validation to perform on the OpenAPI spec, if any. - pub extra_validation: Option anyhow::Result<()>>, + pub extra_validation: Option)>, } impl ApiSpec { pub(crate) fn overwrite( &self, - dir: &Utf8Path, - ) -> Result<(OverwriteStatus, DocumentSummary)> { + env: &Environment, + ) -> Result { let contents = self.to_json_bytes()?; - let summary = self + let (summary, validation_result) = self .validate_json(&contents) .context("OpenAPI document validation failed")?; - let full_path = dir.join(&self.filename); - let status = overwrite_file(&full_path, &contents)?; - - Ok((status, summary)) + let full_path = env.openapi_dir.join(&self.filename); + let openapi_doc_status = overwrite_file(&full_path, &contents)?; + + let extra_files = validation_result + .extra_files + .into_iter() + .map(|(path, contents)| { + let full_path = env.workspace_root.join(&path); + let status = overwrite_file(&full_path, &contents)?; + Ok((path, status)) + }) + .collect::>()?; + + Ok(SpecOverwriteStatus { + summary, + openapi_doc: openapi_doc_status, + extra_files, + }) } - pub(crate) fn check(&self, dir: &Utf8Path) -> Result { + pub(crate) fn check(&self, env: &Environment) -> Result { let contents = self.to_json_bytes()?; - let summary = self + let (summary, validation_result) = self .validate_json(&contents) .context("OpenAPI document validation failed")?; - let full_path = dir.join(&self.filename); - let existing_contents = - read_opt(&full_path).context("failed to read contents on disk")?; - - match existing_contents { - Some(existing_contents) if existing_contents == contents => { - Ok(CheckStatus::Ok(summary)) - } - Some(existing_contents) => Ok(CheckStatus::Stale { - full_path, - actual: existing_contents, - expected: contents, - }), - None => Ok(CheckStatus::Missing), - } + let full_path = env.openapi_dir.join(&self.filename); + let openapi_doc_status = check_file(full_path, contents)?; + + let extra_files = validation_result + .extra_files + .into_iter() + .map(|(path, contents)| { + let full_path = env.workspace_root.join(&path); + let status = check_file(full_path, contents)?; + Ok((path, status)) + }) + .collect::>()?; + + Ok(SpecCheckStatus { + summary, + openapi_doc: openapi_doc_status, + extra_files, + }) } pub(crate) fn to_openapi_doc(&self) -> Result { @@ -216,7 +234,10 @@ impl ApiSpec { Ok(contents) } - fn validate_json(&self, contents: &[u8]) -> Result { + fn validate_json( + &self, + contents: &[u8], + ) -> Result<(DocumentSummary, ValidationResult)> { let openapi_doc = contents_to_openapi(contents) .context("JSON returned by ApiDescription is not valid OpenAPI")?; @@ -231,11 +252,51 @@ impl ApiSpec { return Err(anyhow::anyhow!("{}", errors.join("\n\n"))); } - if let Some(extra_validation) = self.extra_validation { - extra_validation(&openapi_doc)?; - } + let extra_files = if let Some(extra_validation) = self.extra_validation + { + let mut validation_context = + ValidationContextImpl { errors: Vec::new(), files: Vec::new() }; + extra_validation( + &openapi_doc, + ValidationContext::new(&mut validation_context), + ); + + if !validation_context.errors.is_empty() { + return Err(anyhow::anyhow!( + "OpenAPI document extended validation failed:\n{}", + validation_context + .errors + .iter() + .map(|e| e.to_string()) + .collect::>() + .join("\n") + )); + } + + validation_context.files + } else { + Vec::new() + }; + + Ok(( + DocumentSummary::new(&openapi_doc), + ValidationResult { extra_files }, + )) + } +} + +struct ValidationContextImpl { + errors: Vec, + files: Vec<(Utf8PathBuf, Vec)>, +} + +impl ValidationBackend for ValidationContextImpl { + fn report_error(&mut self, error: anyhow::Error) { + self.errors.push(error); + } - Ok(DocumentSummary::new(&openapi_doc)) + fn record_file_contents(&mut self, path: Utf8PathBuf, contents: Vec) { + self.files.push((path, contents)); } } @@ -260,6 +321,32 @@ impl fmt::Display for ApiBoundary { } } +#[derive(Debug)] +#[must_use] +pub(crate) struct SpecOverwriteStatus { + pub(crate) summary: DocumentSummary, + openapi_doc: OverwriteStatus, + extra_files: Vec<(Utf8PathBuf, OverwriteStatus)>, +} + +impl SpecOverwriteStatus { + pub(crate) fn updated_count(&self) -> usize { + self.iter() + .filter(|(_, status)| matches!(status, OverwriteStatus::Updated)) + .count() + } + + fn iter( + &self, + ) -> impl Iterator, &OverwriteStatus)> { + std::iter::once((ApiSpecFile::Openapi, &self.openapi_doc)).chain( + self.extra_files.iter().map(|(file_name, status)| { + (ApiSpecFile::Extra(file_name), status) + }), + ) + } +} + #[derive(Debug)] #[must_use] pub(crate) enum OverwriteStatus { @@ -267,12 +354,58 @@ pub(crate) enum OverwriteStatus { Unchanged, } +#[derive(Debug)] +#[must_use] +pub(crate) struct SpecCheckStatus { + pub(crate) summary: DocumentSummary, + pub(crate) openapi_doc: CheckStatus, + pub(crate) extra_files: Vec<(Utf8PathBuf, CheckStatus)>, +} + +impl SpecCheckStatus { + pub(crate) fn total_errors(&self) -> usize { + self.iter_errors().count() + } + + pub(crate) fn extra_files_len(&self) -> usize { + self.extra_files.len() + } + + pub(crate) fn iter_errors( + &self, + ) -> impl Iterator, &CheckStale)> { + std::iter::once((ApiSpecFile::Openapi, &self.openapi_doc)) + .chain(self.extra_files.iter().map(|(file_name, status)| { + (ApiSpecFile::Extra(file_name), status) + })) + .filter_map(|(spec_file, status)| { + if let CheckStatus::Stale(e) = status { + Some((spec_file, e)) + } else { + None + } + }) + } +} + +#[derive(Clone, Copy, Debug)] +pub(crate) enum ApiSpecFile<'a> { + Openapi, + Extra(&'a Utf8Path), +} + #[derive(Debug)] #[must_use] pub(crate) enum CheckStatus { - Ok(DocumentSummary), - Stale { full_path: Utf8PathBuf, actual: Vec, expected: Vec }, - Missing, + Fresh, + Stale(CheckStale), +} + +#[derive(Debug)] +#[must_use] +pub(crate) enum CheckStale { + Modified { full_path: Utf8PathBuf, actual: Vec, expected: Vec }, + New, } #[derive(Debug)] @@ -295,31 +428,45 @@ impl DocumentSummary { } } -pub(crate) fn openapi_dir(dir: Option) -> Result { - match dir { - Some(dir) => Ok(dir.canonicalize_utf8().with_context(|| { - format!("failed to canonicalize directory: {}", dir) - })?), - None => find_openapi_dir().context("failed to find openapi directory"), - } +#[derive(Debug)] +#[must_use] +struct ValidationResult { + // Extra files recorded by the validation context. + extra_files: Vec<(Utf8PathBuf, Vec)>, } -pub(crate) fn find_openapi_dir() -> Result { - let mut root = Utf8PathBuf::from(env!("CARGO_MANIFEST_DIR")); - // This crate is two levels down from the root of omicron, so go up twice. - root.pop(); - root.pop(); +pub(crate) struct Environment { + pub(crate) workspace_root: Utf8PathBuf, + pub(crate) openapi_dir: Utf8PathBuf, +} - root.push("openapi"); - let root = root.canonicalize_utf8().with_context(|| { - format!("failed to canonicalize openapi directory: {}", root) - })?; +impl Environment { + pub(crate) fn new(openapi_dir: Option) -> Result { + let mut root = Utf8PathBuf::from(env!("CARGO_MANIFEST_DIR")); + // This crate is two levels down from the root of omicron, so go up twice. + root.pop(); + root.pop(); - if !root.is_dir() { - anyhow::bail!("openapi root is not a directory: {}", root); - } + let workspace_root = root.canonicalize_utf8().with_context(|| { + format!("failed to canonicalize workspace root: {}", root) + })?; - Ok(root) + let openapi_dir = + openapi_dir.unwrap_or_else(|| workspace_root.join("openapi")); + let openapi_dir = + openapi_dir.canonicalize_utf8().with_context(|| { + format!( + "failed to canonicalize openapi directory: {}", + openapi_dir + ) + })?; + + if !openapi_dir.is_dir() { + anyhow::bail!("openapi root is not a directory: {}", root); + } + + Ok(Self { workspace_root, openapi_dir }) + } } /// Overwrite a file with new contents, if the contents are different. @@ -344,6 +491,29 @@ fn overwrite_file(path: &Utf8Path, contents: &[u8]) -> Result { Ok(OverwriteStatus::Updated) } +/// Check a file against expected contents. +fn check_file( + full_path: Utf8PathBuf, + contents: Vec, +) -> Result { + let existing_contents = + read_opt(&full_path).context("failed to read contents on disk")?; + + match existing_contents { + Some(existing_contents) if existing_contents == contents => { + Ok(CheckStatus::Fresh) + } + Some(existing_contents) => { + Ok(CheckStatus::Stale(CheckStale::Modified { + full_path, + actual: existing_contents, + expected: contents, + })) + } + None => Ok(CheckStatus::Stale(CheckStale::New)), + } +} + fn read_opt(path: &Utf8Path) -> std::io::Result>> { match fs::read(path) { Ok(contents) => Ok(Some(contents)), diff --git a/dev-tools/openapi-manager/types/Cargo.toml b/dev-tools/openapi-manager/types/Cargo.toml new file mode 100644 index 0000000000..262529f1a9 --- /dev/null +++ b/dev-tools/openapi-manager/types/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "openapi-manager-types" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +camino.workspace = true +omicron-workspace-hack.workspace = true diff --git a/dev-tools/openapi-manager/types/src/lib.rs b/dev-tools/openapi-manager/types/src/lib.rs new file mode 100644 index 0000000000..b48ea03e74 --- /dev/null +++ b/dev-tools/openapi-manager/types/src/lib.rs @@ -0,0 +1,12 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Shared types for the OpenAPI manager. +//! +//! API trait crates can depend on this crate to get access to interfaces +//! exposed by the OpenAPI manager. + +mod validation; + +pub use validation::*; diff --git a/dev-tools/openapi-manager/types/src/validation.rs b/dev-tools/openapi-manager/types/src/validation.rs new file mode 100644 index 0000000000..6f22228f4d --- /dev/null +++ b/dev-tools/openapi-manager/types/src/validation.rs @@ -0,0 +1,47 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use camino::Utf8PathBuf; + +/// Context for validation of OpenAPI specifications. +pub struct ValidationContext<'a> { + backend: &'a mut dyn ValidationBackend, +} + +impl<'a> ValidationContext<'a> { + /// Note part of the public API -- only called by the OpenAPI manager. + #[doc(hidden)] + pub fn new(backend: &'a mut dyn ValidationBackend) -> Self { + Self { backend } + } + + /// Reports a validation error. + pub fn report_error(&mut self, error: anyhow::Error) { + self.backend.report_error(error); + } + + /// Records that the file has the given contents. + /// + /// In check mode, if the files differ, an error is logged. + /// + /// In generate mode, the file is overwritten with the given contents. + /// + /// The path is treated as relative to the root of the repository. + pub fn record_file_contents( + &mut self, + path: impl Into, + contents: Vec, + ) { + self.backend.record_file_contents(path.into(), contents); + } +} + +/// The backend for validation. +/// +/// Not part of the public API -- only implemented by the OpenAPI manager. +#[doc(hidden)] +pub trait ValidationBackend { + fn report_error(&mut self, error: anyhow::Error); + fn record_file_contents(&mut self, path: Utf8PathBuf, contents: Vec); +} diff --git a/dev-tools/releng/src/main.rs b/dev-tools/releng/src/main.rs index ee649e79b2..264eec2503 100644 --- a/dev-tools/releng/src/main.rs +++ b/dev-tools/releng/src/main.rs @@ -143,6 +143,10 @@ struct Args { /// Path to a pre-built omicron-package binary (skips building if set) #[clap(long, env = "OMICRON_PACKAGE")] omicron_package_bin: Option, + + /// Build the helios OS image from local sources. + #[clap(long)] + helios_local: bool, } impl Args { @@ -286,7 +290,7 @@ async fn main() -> Result<()> { logger, "helios checkout at {0} is out-of-date; run \ `git pull -C {0}`, or run omicron-releng with \ - --ignore-helios-origin or --helios-path", + --ignore-helios-origin or --helios-dir", shell_words::quote(args.helios_dir.as_str()) ); preflight_ok = false; @@ -496,39 +500,42 @@ async fn main() -> Result<()> { Utc::now().format("%Y-%m-%d %H:%M") ); - // helios-build experiment-image - jobs.push_command( - format!("{}-image", target), - Command::new("ptime") - .arg("-m") - .arg(args.helios_dir.join("helios-build")) - .arg("experiment-image") - .arg("-o") // output directory for image - .arg(args.output_dir.join(format!("os-{}", target))) + let mut image_cmd = Command::new("ptime") + .arg("-m") + .arg(args.helios_dir.join("helios-build")) + .arg("experiment-image") + .arg("-o") // output directory for image + .arg(args.output_dir.join(format!("os-{}", target))) + .arg("-F") // pass extra image builder features + .arg(format!("optever={}", opte_version.trim())) + .arg("-P") // include all files from extra proto area + .arg(proto_dir.join("root")) + .arg("-N") // image name + .arg(image_name) + .arg("-s") // tempdir name suffix + .arg(target.as_str()) + .args(target.image_build_args()) + .current_dir(&args.helios_dir) + .env( + "IMAGE_DATASET", + match target { + Target::Host => &args.host_dataset, + Target::Recovery => &args.recovery_dataset, + }, + ) + .env_remove("CARGO") + .env_remove("RUSTUP_TOOLCHAIN"); + + if !args.helios_local { + image_cmd = image_cmd .arg("-p") // use an external package repository - .arg(format!("helios-dev={}", HELIOS_REPO)) - .arg("-F") // pass extra image builder features - .arg(format!("optever={}", opte_version.trim())) - .arg("-P") // include all files from extra proto area - .arg(proto_dir.join("root")) - .arg("-N") // image name - .arg(image_name) - .arg("-s") // tempdir name suffix - .arg(target.as_str()) - .args(target.image_build_args()) - .current_dir(&args.helios_dir) - .env( - "IMAGE_DATASET", - match target { - Target::Host => &args.host_dataset, - Target::Recovery => &args.recovery_dataset, - }, - ) - .env_remove("CARGO") - .env_remove("RUSTUP_TOOLCHAIN"), - ) - .after("helios-setup") - .after(format!("{}-proto", target)); + .arg(format!("helios-dev={HELIOS_REPO}")) + } + + // helios-build experiment-image + jobs.push_command(format!("{}-image", target), image_cmd) + .after("helios-setup") + .after(format!("{}-proto", target)); } // Build the recovery target after we build the host target. Only one // of these will build at a time since Cargo locks its target directory; diff --git a/dns-server/Cargo.toml b/dns-server/Cargo.toml index d11dabaf85..b4516b8b77 100644 --- a/dns-server/Cargo.toml +++ b/dns-server/Cargo.toml @@ -15,24 +15,24 @@ clap.workspace = true dns-server-api.workspace = true dns-service-client.workspace = true dropshot.workspace = true +hickory-client.workspace = true +hickory-proto.workspace = true +hickory-resolver.workspace = true +hickory-server.workspace = true http.workspace = true pretty-hex.workspace = true schemars.workspace = true serde.workspace = true serde_json.workspace = true sled.workspace = true -slog.workspace = true -slog-term.workspace = true slog-async.workspace = true slog-envlogger.workspace = true +slog-term.workspace = true +slog.workspace = true tempfile.workspace = true thiserror.workspace = true tokio = { workspace = true, features = [ "full" ] } toml.workspace = true -trust-dns-client.workspace = true -trust-dns-proto.workspace = true -trust-dns-resolver.workspace = true -trust-dns-server.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true @@ -44,4 +44,3 @@ openapiv3.workspace = true openapi-lint.workspace = true serde_json.workspace = true subprocess.workspace = true -trust-dns-resolver.workspace = true diff --git a/dns-server/src/bin/dns-server.rs b/dns-server/src/bin/dns-server.rs index 52a9c17c0d..9e8d098ee2 100644 --- a/dns-server/src/bin/dns-server.rs +++ b/dns-server/src/bin/dns-server.rs @@ -3,7 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. //! Executable that starts the HTTP-configurable DNS server used for both -//! internal DNS (RFD 248) and extenral DNS (RFD 357) for the Oxide system +//! internal DNS (RFD 248) and external DNS (RFD 357) for the Oxide system use anyhow::anyhow; use anyhow::Context; diff --git a/dns-server/src/dns_server.rs b/dns-server/src/dns_server.rs index 5c761f2aa3..4ecbe382c8 100644 --- a/dns-server/src/dns_server.rs +++ b/dns-server/src/dns_server.rs @@ -13,6 +13,19 @@ use crate::storage::Store; use anyhow::anyhow; use anyhow::Context; use dns_server_api::DnsRecord; +use hickory_proto::op::Header; +use hickory_proto::op::ResponseCode; +use hickory_proto::rr::rdata::SRV; +use hickory_proto::rr::RData; +use hickory_proto::rr::Record; +use hickory_proto::rr::RecordType; +use hickory_proto::serialize::binary::BinDecodable; +use hickory_proto::serialize::binary::BinDecoder; +use hickory_proto::serialize::binary::BinEncoder; +use hickory_resolver::Name; +use hickory_server::authority::MessageRequest; +use hickory_server::authority::MessageResponse; +use hickory_server::authority::MessageResponseBuilder; use pretty_hex::*; use serde::Deserialize; use slog::{debug, error, info, o, trace, Logger}; @@ -21,17 +34,6 @@ use std::str::FromStr; use std::sync::Arc; use thiserror::Error; use tokio::net::UdpSocket; -use trust_dns_proto::op::header::Header; -use trust_dns_proto::op::response_code::ResponseCode; -use trust_dns_proto::rr::rdata::SRV; -use trust_dns_proto::rr::record_data::RData; -use trust_dns_proto::rr::record_type::RecordType; -use trust_dns_proto::rr::{Name, Record}; -use trust_dns_proto::serialize::binary::{ - BinDecodable, BinDecoder, BinEncoder, -}; -use trust_dns_server::authority::MessageResponse; -use trust_dns_server::authority::{MessageRequest, MessageResponseBuilder}; use uuid::Uuid; /// Configuration related to the DNS server @@ -167,7 +169,10 @@ async fn handle_dns_packet(request: Request) { Err(error) => { let header = Header::response_from_request(mr.header()); let rb_servfail = MessageResponseBuilder::from_message_request(&mr); - error!(log, "failed to handle incoming DNS message: {:#}", error); + error!( + log, + "failed to handle incoming DNS message: {:#?} {:#}", mr, error + ); match error { RequestError::NxDomain(_) => { let rb_nxdomain = @@ -222,7 +227,7 @@ fn dns_record_to_record( let mut a = Record::new(); a.set_name(name.clone()) .set_rr_type(RecordType::A) - .set_data(Some(RData::A(addr))); + .set_data(Some(RData::A(addr.into()))); Ok(a) } @@ -230,7 +235,7 @@ fn dns_record_to_record( let mut aaaa = Record::new(); aaaa.set_name(name.clone()) .set_rr_type(RecordType::AAAA) - .set_data(Some(RData::AAAA(addr))); + .set_data(Some(RData::AAAA(addr.into()))); Ok(aaaa) } diff --git a/dns-server/src/lib.rs b/dns-server/src/lib.rs index 424159e41d..8abd3b945e 100644 --- a/dns-server/src/lib.rs +++ b/dns-server/src/lib.rs @@ -47,13 +47,13 @@ pub mod http_server; pub mod storage; use anyhow::{anyhow, Context}; +use hickory_resolver::config::NameServerConfig; +use hickory_resolver::config::Protocol; +use hickory_resolver::config::ResolverConfig; +use hickory_resolver::config::ResolverOpts; +use hickory_resolver::TokioAsyncResolver; use slog::o; use std::net::SocketAddr; -use trust_dns_resolver::config::NameServerConfig; -use trust_dns_resolver::config::Protocol; -use trust_dns_resolver::config::ResolverConfig; -use trust_dns_resolver::config::ResolverOpts; -use trust_dns_resolver::TokioAsyncResolver; /// Starts both the HTTP and DNS servers over a given store. pub async fn start_servers( @@ -167,12 +167,14 @@ impl TransientServer { socket_addr: self.dns_server.local_address(), protocol: Protocol::Udp, tls_dns_name: None, - trust_nx_responses: false, + trust_negative_responses: false, bind_addr: None, }); + let mut resolver_opts = ResolverOpts::default(); + // Enable edns for potentially larger records + resolver_opts.edns0 = true; let resolver = - TokioAsyncResolver::tokio(resolver_config, ResolverOpts::default()) - .context("creating DNS resolver")?; + TokioAsyncResolver::tokio(resolver_config, resolver_opts); Ok(resolver) } } diff --git a/dns-server/src/storage.rs b/dns-server/src/storage.rs index 85b2e79b8b..b3141f6751 100644 --- a/dns-server/src/storage.rs +++ b/dns-server/src/storage.rs @@ -95,6 +95,8 @@ use anyhow::{anyhow, Context}; use camino::Utf8PathBuf; use dns_server_api::{DnsConfig, DnsConfigParams, DnsConfigZone, DnsRecord}; +use hickory_proto::rr::LowerName; +use hickory_resolver::Name; use serde::{Deserialize, Serialize}; use sled::transaction::ConflictableTransactionError; use slog::{debug, error, info, o, warn}; @@ -104,8 +106,6 @@ use std::sync::atomic::Ordering; use std::sync::Arc; use thiserror::Error; use tokio::sync::Mutex; -use trust_dns_client::rr::LowerName; -use trust_dns_client::rr::Name; const KEY_CONFIG: &'static str = "config"; @@ -586,7 +586,7 @@ impl Store { /// If the returned set would have been empty, returns `QueryError::NoName`. pub(crate) fn query( &self, - mr: &trust_dns_server::authority::MessageRequest, + mr: &hickory_server::authority::MessageRequest, ) -> Result, QueryError> { let name = mr.query().name(); let orig_name = mr.query().original().name(); @@ -784,14 +784,14 @@ mod test { use dns_server_api::DnsConfigParams; use dns_server_api::DnsConfigZone; use dns_server_api::DnsRecord; + use hickory_proto::rr::LowerName; + use hickory_resolver::Name; use omicron_test_utils::dev::test_setup_log; use std::collections::BTreeSet; use std::collections::HashMap; use std::net::Ipv6Addr; use std::str::FromStr; use std::sync::Arc; - use trust_dns_client::rr::LowerName; - use trust_dns_client::rr::Name; /// As usual, `TestContext` groups the various pieces we need in a bunch of /// our tests and helps make sure they get cleaned up properly. diff --git a/dns-server/tests/basic_test.rs b/dns-server/tests/basic_test.rs index b3b7f37378..fa5bfea468 100644 --- a/dns-server/tests/basic_test.rs +++ b/dns-server/tests/basic_test.rs @@ -9,6 +9,12 @@ use dns_service_client::{ Client, }; use dropshot::{test_util::LogContext, HandlerTaskMode}; +use hickory_resolver::error::ResolveErrorKind; +use hickory_resolver::TokioAsyncResolver; +use hickory_resolver::{ + config::{NameServerConfig, Protocol, ResolverConfig, ResolverOpts}, + proto::op::ResponseCode, +}; use omicron_test_utils::dev::test_setup_log; use slog::o; use std::{ @@ -16,12 +22,6 @@ use std::{ net::Ipv6Addr, net::{IpAddr, Ipv4Addr}, }; -use trust_dns_resolver::error::ResolveErrorKind; -use trust_dns_resolver::TokioAsyncResolver; -use trust_dns_resolver::{ - config::{NameServerConfig, Protocol, ResolverConfig, ResolverOpts}, - proto::op::ResponseCode, -}; const TEST_ZONE: &'static str = "oxide.internal"; @@ -374,17 +374,19 @@ async fn init_client_server( ) .await?; - let mut rc = ResolverConfig::new(); - rc.add_name_server(NameServerConfig { + let mut resolver_config = ResolverConfig::new(); + resolver_config.add_name_server(NameServerConfig { socket_addr: dns_server.local_address(), protocol: Protocol::Udp, tls_dns_name: None, - trust_nx_responses: false, + trust_negative_responses: false, bind_addr: None, }); + let mut resolver_opts = ResolverOpts::default(); + // Enable edns for potentially larger records + resolver_opts.edns0 = true; - let resolver = - TokioAsyncResolver::tokio(rc, ResolverOpts::default()).unwrap(); + let resolver = TokioAsyncResolver::tokio(resolver_config, resolver_opts); let client = Client::new(&format!("http://{}", dropshot_server.local_addr()), log); diff --git a/docs/crdb-upgrades.adoc b/docs/crdb-upgrades.adoc index eecfa9194e..52231ee199 100644 --- a/docs/crdb-upgrades.adoc +++ b/docs/crdb-upgrades.adoc @@ -60,13 +60,15 @@ a tick, but they must occur in that order.) . Add an enum variant for the new version to `CockroachDbClusterVersion` in `nexus/types/src/deployment/planning_input.rs`, and change the associated constant `NEWLY_INITIALIZED` to that value. -. Run the test suite, which should catch any unexpected SQL +. Regenerate the Nexus internal OpenAPI document, which contains an enum + of CockroachDB versions: ++ +.... +EXPECTORATE=overwrite cargo nextest run -p omicron-nexus -- integration_tests::commands::test_nexus_openapi_internal +.... +. Run the full test suite, which should catch any unexpected SQL compatibility issues between releases and help validate that your build works. - * You will need to run the `test_omdb_success_cases` test from - omicron-omdb with `EXPECTORATE=overwrite`; this file contains the - expected output of various omdb commands, including a fingerprint of - CockroachDB's cluster state. . Submit a PR for your changes to garbage-compactor; when merged, publish the final build to the `oxide-cockroachdb-build` S3 bucket. . Update `tools/cockroachdb_checksums`. For non-illumos checksums, use diff --git a/end-to-end-tests/Cargo.toml b/end-to-end-tests/Cargo.toml index 781f3fb1c6..b2400f7603 100644 --- a/end-to-end-tests/Cargo.toml +++ b/end-to-end-tests/Cargo.toml @@ -26,7 +26,7 @@ serde_json.workspace = true sled-agent-types.workspace = true tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } toml.workspace = true -trust-dns-resolver.workspace = true +hickory-resolver.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true ispf.workspace = true diff --git a/end-to-end-tests/src/helpers/ctx.rs b/end-to-end-tests/src/helpers/ctx.rs index d9a2d7027a..5363557502 100644 --- a/end-to-end-tests/src/helpers/ctx.rs +++ b/end-to-end-tests/src/helpers/ctx.rs @@ -1,6 +1,7 @@ use crate::helpers::generate_name; use anyhow::{anyhow, Context as _, Result}; use chrono::Utc; +use hickory_resolver::error::ResolveErrorKind; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use oxide_client::types::{Name, ProjectCreate}; use oxide_client::CustomDnsResolver; @@ -13,7 +14,6 @@ use std::net::IpAddr; use std::net::SocketAddr; use std::sync::Arc; use std::time::Duration; -use trust_dns_resolver::error::ResolveErrorKind; use uuid::Uuid; const RSS_CONFIG_STR: &str = include_str!(concat!( diff --git a/gateway-test-utils/configs/config.test.toml b/gateway-test-utils/configs/config.test.toml index 79975f4611..4e3e9c6e6e 100644 --- a/gateway-test-utils/configs/config.test.toml +++ b/gateway-test-utils/configs/config.test.toml @@ -88,6 +88,15 @@ addr = "[::1]:0" ignition-target = 3 location = { switch0 = ["sled", 1], switch1 = ["sled", 1] } +# +# Configuration for SP sensor metrics polling +# +[metrics] +# Allow the Oximeter metrics endpoint to bind on the loopback IP. This is +# useful in local testing and development, when the gateway service is not +# given a "real" underlay network IP. +dev_bind_loopback = true + # # NOTE: for the test suite, if mode = "file", the file path MUST be the sentinel # string "UNUSED". The actual path will be generated by the test suite for each diff --git a/gateway-test-utils/configs/sp_sim_config.test.toml b/gateway-test-utils/configs/sp_sim_config.test.toml index cc08eec30b..4f370a167c 100644 --- a/gateway-test-utils/configs/sp_sim_config.test.toml +++ b/gateway-test-utils/configs/sp_sim_config.test.toml @@ -20,6 +20,9 @@ device = "fake-tmp-sensor" description = "FAKE temperature sensor 1" capabilities = 0x2 presence = "Present" +sensors = [ + {name = "Southwest", kind = "Temperature", last_data.value = 41.7890625, last_data.timestamp = 1234 }, +] [[simulated_sps.sidecar.components]] id = "dev-1" @@ -27,6 +30,9 @@ device = "fake-tmp-sensor" description = "FAKE temperature sensor 2" capabilities = 0x2 presence = "Failed" +sensors = [ + { name = "South", kind = "Temperature", last_error.value = "DeviceError", last_error.timestamp = 1234 }, +] [[simulated_sps.sidecar]] multicast_addr = "::1" @@ -56,6 +62,82 @@ device = "fake-tmp-sensor" description = "FAKE temperature sensor" capabilities = 0x2 presence = "Failed" +sensors = [ + { name = "Southwest", kind = "Temperature", last_error.value = "DeviceError", last_error.timestamp = 1234 }, +] +[[simulated_sps.gimlet.components]] +id = "dev-1" +device = "tmp117" +description = "FAKE temperature sensor" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "South", kind = "Temperature", last_data.value = 42.5625, last_data.timestamp = 1234 }, +] + +[[simulated_sps.gimlet.components]] +id = "dev-2" +device = "tmp117" +description = "FAKE Southeast temperature sensor" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "Southeast", kind = "Temperature", last_data.value = 41.570313, last_data.timestamp = 1234 }, +] + +[[simulated_sps.gimlet.components]] +id = "dev-6" +device = "at24csw080" +description = "FAKE U.2 Sharkfin A VPD" +capabilities = 0x0 +presence = "Present" + +[[simulated_sps.gimlet.components]] +id = "dev-7" +device = "max5970" +description = "FAKE U.2 Sharkfin A hot swap controller" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "V12_U2A_A0", kind = "Current", last_data.value = 0.45898438, last_data.timestamp = 1234 }, + { name = "V3P3_U2A_A0", kind = "Current", last_data.value = 0.024414063, last_data.timestamp = 1234 }, + { name = "V12_U2A_A0", kind = "Voltage", last_data.value = 12.03125, last_data.timestamp = 1234 }, + { name = "V3P3_U2A_A0", kind = "Voltage", last_data.value = 3.328125, last_data.timestamp = 1234 }, +] + +[[simulated_sps.gimlet.components]] +id = "dev-8" +device = "nvme_bmc" +description = "FAKE U.2 A NVMe Basic Management Command" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "U2_N0", kind = "Temperature", last_data.value = 56.0, last_data.timestamp = 1234 }, +] +[[simulated_sps.gimlet.components]] +id = "dev-39" +device = "tmp451" +description = "FAKE T6 temperature sensor" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "t6", kind = "Temperature", last_data.value = 70.625, last_data.timestamp = 1234 }, +] +[[simulated_sps.gimlet.components]] +id = "dev-53" +device = "max31790" +description = "FAKE Fan controller" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "Southeast", kind = "Speed", last_data.value = 2607.0, last_data.timestamp = 1234 }, + { name = "Northeast", kind = "Speed", last_data.value = 2476.0, last_data.timestamp = 1234 }, + { name = "South", kind = "Speed", last_data.value = 2553.0, last_data.timestamp = 1234 }, + { name = "North", kind = "Speed", last_data.value = 2265.0, last_data.timestamp = 1234 }, + { name = "Southwest", kind = "Speed", last_data.value = 2649.0, last_data.timestamp = 1234 }, + { name = "Northwest", kind = "Speed", last_data.value = 2275.0, last_data.timestamp = 1234 }, +] + [[simulated_sps.gimlet]] multicast_addr = "::1" @@ -72,6 +154,90 @@ capabilities = 0 presence = "Present" serial_console = "[::1]:0" + +[[simulated_sps.gimlet.components]] +id = "dev-0" +device = "tmp117" +description = "FAKE temperature sensor" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "Southwest", kind = "Temperature", last_data.value = 41.3629, last_data.timestamp = 1234 }, +] +[[simulated_sps.gimlet.components]] +id = "dev-1" +device = "tmp117" +description = "FAKE temperature sensor" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "South", kind = "Temperature", last_data.value = 42.5625, last_data.timestamp = 1234 }, +] + +[[simulated_sps.gimlet.components]] +id = "dev-2" +device = "tmp117" +description = "FAKE Southeast temperature sensor" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "Southeast", kind = "Temperature", last_data.value = 41.570313, last_data.timestamp = 1234 }, +] + +[[simulated_sps.gimlet.components]] +id = "dev-6" +device = "at24csw080" +description = "FAKE U.2 Sharkfin A VPD" +capabilities = 0x0 +presence = "Present" + +[[simulated_sps.gimlet.components]] +id = "dev-7" +device = "max5970" +description = "FAKE U.2 Sharkfin A hot swap controller" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "V12_U2A_A0", kind = "Current", last_data.value = 0.41893438, last_data.timestamp = 1234 }, + { name = "V3P3_U2A_A0", kind = "Current", last_data.value = 0.025614603, last_data.timestamp = 1234 }, + { name = "V12_U2A_A0", kind = "Voltage", last_data.value = 12.02914, last_data.timestamp = 1234 }, + { name = "V3P3_U2A_A0", kind = "Voltage", last_data.value = 3.2618, last_data.timestamp = 1234 }, +] + +[[simulated_sps.gimlet.components]] +id = "dev-8" +device = "nvme_bmc" +description = "FAKE U.2 A NVMe Basic Management Command" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "U2_N0", kind = "Temperature", last_data.value = 56.0, last_data.timestamp = 1234 }, +] +[[simulated_sps.gimlet.components]] +id = "dev-39" +device = "tmp451" +description = "FAKE T6 temperature sensor" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "t6", kind = "Temperature", last_data.value = 70.625, last_data.timestamp = 1234 }, +] +[[simulated_sps.gimlet.components]] +id = "dev-53" +device = "max31790" +description = "FAKE Fan controller" +capabilities = 0x2 +presence = "Present" +sensors = [ + { name = "Southeast", kind = "Speed", last_data.value = 2510.0, last_data.timestamp = 1234 }, + { name = "Northeast", kind = "Speed", last_data.value = 2390.0, last_data.timestamp = 1234 }, + { name = "South", kind = "Speed", last_data.value = 2467.0, last_data.timestamp = 1234 }, + { name = "North", kind = "Speed", last_data.value = 2195.0, last_data.timestamp = 1234 }, + { name = "Southwest", kind = "Speed", last_data.value = 2680.0, last_data.timestamp = 1234 }, + { name = "Northwest", kind = "Speed", last_data.value = 2212.0, last_data.timestamp = 1234 }, +] + + # # NOTE: for the test suite, the [log] section is ignored; sp-sim logs are rolled # into the gateway logfile. diff --git a/gateway-test-utils/src/setup.rs b/gateway-test-utils/src/setup.rs index 46bc55805a..056bb451f7 100644 --- a/gateway-test-utils/src/setup.rs +++ b/gateway-test-utils/src/setup.rs @@ -8,6 +8,7 @@ use camino::Utf8Path; use dropshot::test_util::ClientTestContext; use dropshot::test_util::LogContext; use gateway_messages::SpPort; +pub use omicron_gateway::metrics::MetricsConfig; use omicron_gateway::MgsArguments; use omicron_gateway::SpType; use omicron_gateway::SwitchPortConfig; @@ -33,6 +34,7 @@ pub struct GatewayTestContext { pub server: omicron_gateway::Server, pub simrack: SimRack, pub logctx: LogContext, + pub gateway_id: Uuid, } impl GatewayTestContext { @@ -48,13 +50,18 @@ pub fn load_test_config() -> (omicron_gateway::Config, sp_sim::Config) { let manifest_dir = Utf8Path::new(env!("CARGO_MANIFEST_DIR")); let server_config_file_path = manifest_dir.join("configs/config.test.toml"); let server_config = - omicron_gateway::Config::from_file(&server_config_file_path) - .expect("failed to load config.test.toml"); + match omicron_gateway::Config::from_file(&server_config_file_path) { + Ok(config) => config, + Err(e) => panic!("failed to load MGS config: {e}"), + }; let sp_sim_config_file_path = manifest_dir.join("configs/sp_sim_config.test.toml"); - let sp_sim_config = sp_sim::Config::from_file(&sp_sim_config_file_path) - .expect("failed to load sp_sim_config.test.toml"); + let sp_sim_config = + match sp_sim::Config::from_file(&sp_sim_config_file_path) { + Ok(config) => config, + Err(e) => panic!("failed to load SP simulator config: {e}"), + }; (server_config, sp_sim_config) } @@ -143,8 +150,8 @@ pub async fn test_setup_with_config( // Start gateway server let rack_id = Some(Uuid::parse_str(RACK_UUID).unwrap()); - - let args = MgsArguments { id: Uuid::new_v4(), addresses, rack_id }; + let gateway_id = Uuid::new_v4(); + let args = MgsArguments { id: gateway_id, addresses, rack_id }; let server = omicron_gateway::Server::start( server_config.clone(), args, @@ -206,5 +213,5 @@ pub async fn test_setup_with_config( log.new(o!("component" => "client test context")), ); - GatewayTestContext { client, server, simrack, logctx } + GatewayTestContext { client, server, simrack, logctx, gateway_id } } diff --git a/gateway/Cargo.toml b/gateway/Cargo.toml index 3cfd1d447b..2dce15892d 100644 --- a/gateway/Cargo.toml +++ b/gateway/Cargo.toml @@ -11,6 +11,7 @@ workspace = true anyhow.workspace = true base64.workspace = true camino.workspace = true +chrono.workspace = true clap.workspace = true dropshot.workspace = true futures.workspace = true @@ -39,6 +40,8 @@ tokio-tungstenite.workspace = true toml.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true +oximeter.workspace = true +oximeter-producer.workspace = true [dev-dependencies] expectorate.workspace = true diff --git a/gateway/examples/config.toml b/gateway/examples/config.toml index d29d9508b9..a76edcd7b5 100644 --- a/gateway/examples/config.toml +++ b/gateway/examples/config.toml @@ -71,6 +71,15 @@ addr = "[::1]:33320" ignition-target = 3 location = { switch0 = ["sled", 1], switch1 = ["sled", 1] } +# +# Configuration for SP sensor metrics polling +# +[metrics] +# Allow the Oximeter metrics endpoint to bind on the loopback IP. This is +# useful in local testing and development, when the gateway service is not +# given a "real" underlay network IP. +dev_bind_loopback = true + [log] # Show log messages of this level and more severe level = "debug" diff --git a/gateway/src/config.rs b/gateway/src/config.rs index afdb046881..edf895ef59 100644 --- a/gateway/src/config.rs +++ b/gateway/src/config.rs @@ -6,6 +6,7 @@ //! configuration use crate::management_switch::SwitchConfig; +use crate::metrics::MetricsConfig; use camino::Utf8Path; use camino::Utf8PathBuf; use dropshot::ConfigLogging; @@ -25,6 +26,8 @@ pub struct Config { pub switch: SwitchConfig, /// Server-wide logging configuration. pub log: ConfigLogging, + /// Configuration for SP sensor metrics. + pub metrics: Option, } impl Config { @@ -47,13 +50,13 @@ pub struct PartialDropshotConfig { #[derive(Debug, Error, SlogInlineError)] pub enum LoadError { - #[error("error reading \"{path}\"")] + #[error("error reading \"{path}\": {err}")] Io { path: Utf8PathBuf, #[source] err: std::io::Error, }, - #[error("error parsing \"{path}\"")] + #[error("error parsing \"{path}\": {err}")] Parse { path: Utf8PathBuf, #[source] diff --git a/gateway/src/error.rs b/gateway/src/error.rs index 5933daa340..ee148e0c98 100644 --- a/gateway/src/error.rs +++ b/gateway/src/error.rs @@ -26,12 +26,8 @@ pub enum StartupError { #[derive(Debug, Error, SlogInlineError)] pub enum SpCommsError { - #[error("discovery process not yet complete")] - DiscoveryNotYetComplete, - #[error("location discovery failed: {reason}")] - DiscoveryFailed { reason: String }, - #[error("nonexistent SP {0:?}")] - SpDoesNotExist(SpIdentifier), + #[error(transparent)] + Discovery(#[from] SpLookupError), #[error("unknown socket address for SP {0:?}")] SpAddressUnknown(SpIdentifier), #[error( @@ -52,13 +48,22 @@ pub enum SpCommsError { }, } +/// Errors returned by attempts to look up a SP in the management switch's +/// discovery map. +#[derive(Debug, Error, SlogInlineError)] +pub enum SpLookupError { + #[error("discovery process not yet complete")] + DiscoveryNotYetComplete, + #[error("location discovery failed: {reason}")] + DiscoveryFailed { reason: String }, + #[error("nonexistent SP {0:?}")] + SpDoesNotExist(SpIdentifier), +} + impl From for HttpError { fn from(error: SpCommsError) -> Self { match error { - SpCommsError::SpDoesNotExist(_) => HttpError::for_bad_request( - Some("InvalidSp".to_string()), - InlineErrorChain::new(&error).to_string(), - ), + SpCommsError::Discovery(err) => HttpError::from(err), SpCommsError::SpCommunicationFailed { err: CommunicationError::SpError( @@ -124,21 +129,11 @@ impl From for HttpError { "UpdateInProgress", InlineErrorChain::new(&error).to_string(), ), - SpCommsError::DiscoveryNotYetComplete => http_err_with_message( - http::StatusCode::SERVICE_UNAVAILABLE, - "DiscoveryNotYetComplete", - InlineErrorChain::new(&error).to_string(), - ), SpCommsError::SpAddressUnknown(_) => http_err_with_message( http::StatusCode::SERVICE_UNAVAILABLE, "SpAddressUnknown", InlineErrorChain::new(&error).to_string(), ), - SpCommsError::DiscoveryFailed { .. } => http_err_with_message( - http::StatusCode::SERVICE_UNAVAILABLE, - "DiscoveryFailed ", - InlineErrorChain::new(&error).to_string(), - ), SpCommsError::Timeout { .. } => http_err_with_message( http::StatusCode::SERVICE_UNAVAILABLE, "Timeout ", @@ -160,6 +155,27 @@ impl From for HttpError { } } +impl From for HttpError { + fn from(error: SpLookupError) -> Self { + match error { + SpLookupError::SpDoesNotExist(_) => HttpError::for_bad_request( + Some("InvalidSp".to_string()), + InlineErrorChain::new(&error).to_string(), + ), + SpLookupError::DiscoveryNotYetComplete => http_err_with_message( + http::StatusCode::SERVICE_UNAVAILABLE, + "DiscoveryNotYetComplete", + InlineErrorChain::new(&error).to_string(), + ), + SpLookupError::DiscoveryFailed { .. } => http_err_with_message( + http::StatusCode::SERVICE_UNAVAILABLE, + "DiscoveryFailed ", + InlineErrorChain::new(&error).to_string(), + ), + } + } +} + // Helper function to return an `HttpError` with the same internal and external // message. MGS is an "internal" service - even when we return a 500-level // status code, we want to give our caller some information about what is going diff --git a/gateway/src/lib.rs b/gateway/src/lib.rs index e1eed05334..8e764dc63f 100644 --- a/gateway/src/lib.rs +++ b/gateway/src/lib.rs @@ -6,6 +6,7 @@ mod config; mod context; mod error; mod management_switch; +pub mod metrics; mod serial_console; pub mod http_entrypoints; // TODO pub only for testing - is this right? @@ -62,6 +63,8 @@ pub struct Server { /// `http_servers` all_servers_shutdown: FuturesUnordered, request_body_max_bytes: usize, + /// handle to the SP sensor metrics subsystem + metrics: metrics::Metrics, log: Logger, } @@ -151,6 +154,9 @@ impl Server { let mut http_servers = HashMap::with_capacity(args.addresses.len()); let all_servers_shutdown = FuturesUnordered::new(); + let metrics = + metrics::Metrics::new(&log, &args, config.metrics, apictx.clone()); + for addr in args.addresses { start_dropshot_server( &apictx, @@ -167,6 +173,7 @@ impl Server { http_servers, all_servers_shutdown, request_body_max_bytes: config.dropshot.request_body_max_bytes, + metrics, log, }) } @@ -275,12 +282,14 @@ impl Server { server.close().await?; } + self.metrics.update_server_addrs(addresses).await; + Ok(()) } /// The rack_id will be set on a refresh of the SMF property when the sled /// agent starts. - pub fn set_rack_id(&self, rack_id: Option) { + pub fn set_rack_id(&mut self, rack_id: Option) { if let Some(rack_id) = rack_id { let val = self.apictx.rack_id.get_or_init(|| rack_id); if *val != rack_id { @@ -291,6 +300,7 @@ impl Server { "ignored_new_rack_id" => %rack_id); } else { info!(self.apictx.log, "Set rack_id"; "rack_id" => %rack_id); + self.metrics.set_rack_id(rack_id); } } else { warn!(self.apictx.log, "SMF refresh called without a rack id"); diff --git a/gateway/src/management_switch.rs b/gateway/src/management_switch.rs index a93c44d62c..23dfbe01a8 100644 --- a/gateway/src/management_switch.rs +++ b/gateway/src/management_switch.rs @@ -20,6 +20,7 @@ pub use self::location_map::SwitchPortConfig; pub use self::location_map::SwitchPortDescription; use self::location_map::ValidatedLocationConfig; use crate::error::SpCommsError; +use crate::error::SpLookupError; use crate::error::StartupError; use gateway_messages::IgnitionState; use gateway_sp_comms::default_discovery_addr; @@ -316,18 +317,18 @@ impl ManagementSwitch { self.location_map.get().is_some() } - fn location_map(&self) -> Result<&LocationMap, SpCommsError> { + fn location_map(&self) -> Result<&LocationMap, SpLookupError> { let discovery_result = self .location_map .get() - .ok_or(SpCommsError::DiscoveryNotYetComplete)?; + .ok_or(SpLookupError::DiscoveryNotYetComplete)?; discovery_result .as_ref() - .map_err(|s| SpCommsError::DiscoveryFailed { reason: s.clone() }) + .map_err(|s| SpLookupError::DiscoveryFailed { reason: s.clone() }) } /// Get the identifier of our local switch. - pub fn local_switch(&self) -> Result { + pub fn local_switch(&self) -> Result { let location_map = self.location_map()?; Ok(location_map.port_to_id(self.local_ignition_controller_port)) } @@ -347,11 +348,11 @@ impl ManagementSwitch { /// This method will fail if discovery is not yet complete (i.e., we don't /// know the logical identifiers of any SP yet!) or if `id` specifies an SP /// that doesn't exist in our discovered location map. - fn get_port(&self, id: SpIdentifier) -> Result { + fn get_port(&self, id: SpIdentifier) -> Result { let location_map = self.location_map()?; let port = location_map .id_to_port(id) - .ok_or(SpCommsError::SpDoesNotExist(id))?; + .ok_or(SpLookupError::SpDoesNotExist(id))?; Ok(port) } @@ -362,7 +363,7 @@ impl ManagementSwitch { /// This method will fail if discovery is not yet complete (i.e., we don't /// know the logical identifiers of any SP yet!) or if `id` specifies an SP /// that doesn't exist in our discovered location map. - pub fn sp(&self, id: SpIdentifier) -> Result<&SingleSp, SpCommsError> { + pub fn sp(&self, id: SpIdentifier) -> Result<&SingleSp, SpLookupError> { let port = self.get_port(id)?; Ok(self.port_to_sp(port)) } @@ -377,7 +378,7 @@ impl ManagementSwitch { pub fn ignition_target( &self, id: SpIdentifier, - ) -> Result { + ) -> Result { let port = self.get_port(id)?; Ok(self.port_to_ignition_target[port.0]) } @@ -389,7 +390,7 @@ impl ManagementSwitch { /// therefore can't map our switch ports to SP identities). pub(crate) fn all_sps( &self, - ) -> Result, SpCommsError> + ) -> Result, SpLookupError> { let location_map = self.location_map()?; Ok(location_map diff --git a/gateway/src/metrics.rs b/gateway/src/metrics.rs new file mode 100644 index 0000000000..d4e0795ae0 --- /dev/null +++ b/gateway/src/metrics.rs @@ -0,0 +1,1159 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. +use crate::error::CommunicationError; +use crate::management_switch::SpIdentifier; +use crate::management_switch::SpType; +use crate::MgsArguments; +use crate::ServerContext; +use anyhow::Context; +use gateway_messages::measurement::MeasurementError; +use gateway_messages::measurement::MeasurementKind; +use gateway_messages::ComponentDetails; +use gateway_messages::DeviceCapabilities; +use gateway_sp_comms::SingleSp; +use gateway_sp_comms::SpComponent; +use gateway_sp_comms::VersionedSpState; +use omicron_common::api::internal::nexus::ProducerEndpoint; +use omicron_common::api::internal::nexus::ProducerKind; +use omicron_common::backoff; +use oximeter::types::Cumulative; +use oximeter::types::ProducerRegistry; +use oximeter::types::Sample; +use oximeter::MetricsError; +use std::borrow::Cow; +use std::collections::hash_map; +use std::collections::hash_map::HashMap; +use std::net::IpAddr; +use std::net::SocketAddr; +use std::net::SocketAddrV6; +use std::sync::Arc; +use std::time::Duration; +use tokio::sync::broadcast; +use tokio::sync::oneshot; +use tokio::sync::watch; +use tokio::task::JoinHandle; +use uuid::Uuid; + +oximeter::use_timeseries!("hardware-component.toml"); +use hardware_component as metric; + +/// Handle to the metrics tasks. +pub struct Metrics { + /// If the metrics subsystem is disabled, this is `None`. + inner: Option, +} + +struct Handles { + addrs_tx: watch::Sender>, + rack_id_tx: Option>, + server: JoinHandle>, +} + +/// Configuration for metrics. +/// +/// In order to reduce the risk of a bad config file taking down the whole +/// management network, we try to keep the metrics-specific portion of the +/// config file as minimal as possible. At present, it only includes development +/// configurations that shouldn't be present in production configs. +#[derive( + Clone, Debug, Default, PartialEq, Eq, serde::Deserialize, serde::Serialize, +)] +#[serde(deny_unknown_fields)] +pub struct MetricsConfig { + /// Completely disable the metrics subsystem. + /// + /// If `disabled = true`, sensor data metrics will not be collected, and the + /// metrics polling tasks will not be started. + #[serde(default)] + pub disabled: bool, + + /// Override the Nexus address used to register the SP metrics Oximeter + /// producer. This is intended for use in development and testing. + /// + /// If this argument is not present, Nexus is discovered through DNS. + #[serde(default)] + pub dev_nexus_address: Option, + + /// Allow the metrics producer endpoint to bind on loopback. + /// + /// This should be disabled in production, as Nexus will not be able to + /// reach the loopback interface, but is necessary for local development and + /// test purposes. + #[serde(default)] + pub dev_bind_loopback: bool, +} + +/// Polls sensor readings from an individual SP. +struct SpPoller { + spid: SpIdentifier, + known_state: Option, + components: HashMap, + log: slog::Logger, + rack_id: Uuid, + mgs_id: Uuid, + sample_tx: broadcast::Sender>, +} + +struct ComponentMetrics { + target: metric::HardwareComponent, + /// Counts of errors reported by sensors on this component. + sensor_errors: HashMap>, + /// Counts of errors that occurred whilst polling the SP for measurements + /// from this component. + poll_errors: HashMap<&'static str, Cumulative>, +} + +#[derive(Eq, PartialEq, Hash)] +struct SensorErrorKey { + name: Cow<'static, str>, + kind: &'static str, + error: &'static str, +} + +/// Manages a metrics server and stuff. +struct ServerManager { + log: slog::Logger, + addrs: watch::Receiver>, + registry: ProducerRegistry, +} + +#[derive(Debug)] +struct Producer { + /// Receiver for samples produced by SP pollers. + sample_rx: broadcast::Receiver>, + /// Logging context. + /// + /// We stick this on the producer because we would like to be able to log + /// when stale samples are dropped. + log: slog::Logger, +} + +/// The maximum Dropshot request size for the metrics server. +const METRIC_REQUEST_MAX_SIZE: usize = 10 * 1024 * 1024; + +/// Poll interval for requesting sensor readings from SPs. +/// +/// Bryan wants to try polling at 1Hz, so let's do that for now. +const SP_POLL_INTERVAL: Duration = Duration::from_secs(1); + +///The interval at which we will ask Oximeter to collect our metric samples. +/// +/// Every ten seconds seems good. +const OXIMETER_COLLECTION_INTERVAL: Duration = Duration::from_secs(10); + +/// The expected number of SPs in a fully-loaded rack. +/// +/// N.B. that there *might* be more than this; we shouldn't ever panic or +/// otherwise misbehave if we see more than this number. This is just intended +/// for sizing buffers/map allocations and so forth; we can always realloc if we +/// see a bonus SP or two. That's why it's called "normal number of SPs" and not +/// "MAX_SPS" or similar. +/// +/// Additionally, note that we always determine the channel capacity based on +/// the assumption that *someday*, the rack might be fully loaded with compute +/// sleds, even if it isn't *right now*. A rack with 16 sleds could always grow +/// another 16 later! +const NORMAL_NUMBER_OF_SPS: usize = + 32 // 32 compute sleds + + 2 // two switches + + 2 // two power shelves, someday. + ; + +/// What size should we make the +const MAX_BUFFERED_SAMPLE_CHUNKS: usize = { + // Roughly how many times will we poll SPs for each metrics collection + // interval? + let polls_per_metrics_interval = { + let collection_interval_secs: usize = + OXIMETER_COLLECTION_INTERVAL.as_secs() as usize; + let poll_interval_secs: usize = SP_POLL_INTERVAL.as_secs() as usize; + + collection_interval_secs / poll_interval_secs + }; + + // How many sample collection intervals do we want to allow to elapse before + // we start putting stuff on the floor? + // + // Let's say 16. Chosen totally arbitrarily but seems reasonable-ish. + let sloppiness = 16; + let capacity = + NORMAL_NUMBER_OF_SPS * polls_per_metrics_interval * sloppiness; + // Finally, the buffer capacity will probably be allocated in a power of two + // anyway, so let's make sure our thing is a power of two so we don't waste + // the allocation we're gonna get anyway. + capacity.next_power_of_two() +}; + +impl Metrics { + pub fn new( + log: &slog::Logger, + args: &MgsArguments, + cfg: Option, + apictx: Arc, + ) -> Self { + let &MgsArguments { id, rack_id, ref addresses } = args; + + if cfg.as_ref().map(|c| c.disabled).unwrap_or(false) { + slog::warn!(&log, "metrics subsystem disabled by config"); + return Self { inner: None }; + } + + // Create a channel for the SP poller tasks to send samples to the + // Oximeter producer endpoint. + // + // A broadcast channel is used here, not because we are actually + // multi-consumer (`Producer::produce` is never called concurrently), + // but because the broadcast channel has properly ring-buffer-like + // behavior, where earlier messages are discarded, rather than exerting + // backpressure on senders (as Tokio's MPSC channel does). This + // is what we want, as we would prefer a full buffer to result in + // clobbering the oldest measurements, rather than leaving the newest + // ones on the floor. + let (sample_tx, sample_rx) = + broadcast::channel(MAX_BUFFERED_SAMPLE_CHUNKS); + + // Using a channel for this is, admittedly, a bit of an end-run around + // the `OnceLock` on the `ServerContext` that *also* stores the rack ID, + // but it has the nice benefit of allowing the `PollerManager` task to _await_ + // the rack ID being set...we might want to change other code to use a + // similar approach in the future. + let (rack_id_tx, rack_id_rx) = oneshot::channel(); + let rack_id_tx = if let Some(rack_id) = rack_id { + rack_id_tx.send(rack_id).expect( + "we just created the channel; it therefore will not be \ + closed", + ); + None + } else { + Some(rack_id_tx) + }; + + tokio::spawn(start_pollers( + log.new(slog::o!("component" => "sensor-poller")), + apictx.clone(), + rack_id_rx, + id, + sample_tx, + )); + + let (addrs_tx, addrs_rx) = + tokio::sync::watch::channel(addresses.clone()); + let server = { + let log = log.new(slog::o!("component" => "producer-server")); + let registry = ProducerRegistry::with_id(id); + registry + .register_producer(Producer { sample_rx, log: log.clone() }) + // TODO(ben): when you change `register_producer` to not return + // a `Result`, delete this `expect`. thanks in advance! :) + .expect( + "`ProducerRegistry::register_producer()` will never \ + actually return an `Err`, so this shouldn't ever \ + happen...", + ); + + tokio::spawn( + ServerManager { log, addrs: addrs_rx, registry }.run(cfg), + ) + }; + Self { inner: Some(Handles { addrs_tx, rack_id_tx, server }) } + } + + pub fn set_rack_id(&mut self, rack_id: Uuid) { + let tx = self.inner.as_mut().and_then(|i| i.rack_id_tx.take()); + if let Some(tx) = tx { + // If the task that starts sensor pollers has gone away already, + // we're probably shutting down, and shouldn't panic. + let _ = tx.send(rack_id); + } + // Ignoring duplicate attempt to set the rack ID... + } + + pub async fn update_server_addrs(&self, new_addrs: &[SocketAddrV6]) { + if let Some(ref inner) = self.inner { + inner.addrs_tx.send_if_modified(|current_addrs| { + if current_addrs.len() == new_addrs.len() + // N.B. that we could make this "faster" with a `HashSet`, + // but...the size of this Vec of addresses is probably going to + // two or three items, max, so the linear scan actually probably + // outperforms it... + && current_addrs.iter().all(|addr| new_addrs.contains(addr)) + { + return false; + } + + // Reuse existing `Vec` capacity if possible.This is almost + // certainly not performance-critical, but it makes me feel happy. + current_addrs.clear(); + current_addrs.extend_from_slice(new_addrs); + true + }); + } + } +} + +impl Drop for Metrics { + fn drop(&mut self) { + // Clean up our children on drop. + if let Some(ref mut inner) = self.inner { + inner.server.abort(); + } + } +} + +impl oximeter::Producer for Producer { + fn produce( + &mut self, + ) -> Result>, MetricsError> { + // Drain all samples currently in the queue into a `Vec`. + // + // N.B. it may be tempting to pursue an alternative design where we + // implement `Iterator` for a `broadcast::Receiver>` and + // just return that using `Receiver::resubscribe`...DON'T DO THAT! The + // `resubscribe` function creates a receiver at the current *tail* of + // the ringbuffer, so it won't see any samples produced *before* now. + // Which is the opposite of what we want! + let mut samples = Vec::with_capacity(self.sample_rx.len()); + // Because we receive the individual samples in a `Vec` of all samples + // produced by a poller, let's also sum the length of each of those + // `Vec`s here, so we can log it later. + let mut total_samples = 0; + // Also, track whether any sample chunks were dropped off the end of the + // ring buffer. + let mut dropped_chunks = 0; + + use broadcast::error::TryRecvError; + loop { + match self.sample_rx.try_recv() { + Ok(sample_chunk) => { + total_samples += sample_chunk.len(); + samples.push(sample_chunk) + } + // This error indicates that an old ringbuffer entry was + // overwritten. That's fine, just get the next one. + Err(TryRecvError::Lagged(dropped)) => { + dropped_chunks += dropped; + } + // We've drained all currently available samples! We're done here! + Err(TryRecvError::Empty) => break, + // This should only happen when shutting down. + Err(TryRecvError::Closed) => { + slog::debug!(&self.log, "sample producer channel closed"); + break; + } + } + } + + if dropped_chunks > 0 { + slog::info!( + &self.log, + "produced metric samples. some old sample chunks were dropped!"; + "samples" => total_samples, + "sample_chunks" => samples.len(), + "dropped_chunks" => dropped_chunks, + ); + } else { + slog::debug!( + &self.log, + "produced metric samples"; + "samples" => total_samples, + "sample_chunks" => samples.len(), + ); + } + + // There you go, that's all I've got. + Ok(Box::new(samples.into_iter().flatten())) + } +} + +async fn start_pollers( + log: slog::Logger, + apictx: Arc, + rack_id: oneshot::Receiver, + mgs_id: Uuid, + sample_tx: broadcast::Sender>, +) -> anyhow::Result<()> { + let switch = &apictx.mgmt_switch; + + // First, wait until we know what the rack ID is known... + let rack_id = rack_id + .await + .context("rack ID sender has gone away...we must be shutting down")?; + + // Wait for SP discovery to complete, if it hasn't already. + // TODO(eliza): presently, we busy-poll here. It would be nicer to + // replace the `OnceLock` in `ManagementSwitch` + // with a `tokio::sync::watch` + let sps = backoff::retry_notify_ext( + backoff::retry_policy_local(), + || async { switch.all_sps().map_err(backoff::BackoffError::transient) }, + |err, _, elapsed| { + let secs = elapsed.as_secs(); + if secs < 30 { + slog::debug!( + &log, + "waiting for SP discovery to complete..."; + "elapsed" => ?elapsed, + "error" => err, + ); + } else if secs < 180 { + slog::info!( + &log, + "still waiting for SP discovery to complete..."; + "elapsed" => ?elapsed, + "error" => err, + ) + } else { + slog::warn!( + &log, + "we have been waiting for SP discovery to complete \ + for a pretty long time!"; + "elapsed" => ?elapsed, + "error" => err, + ) + } + }, + ) + .await + .context("we should never return a fatal error here")?; + + slog::info!( + &log, + "starting to poll SP sensor data every {SP_POLL_INTERVAL:?}" + ); + + for (spid, _) in sps { + slog::info!( + &log, + "found a new little friend!"; + "sp_slot" => ?spid.slot, + "chassis_type" => ?spid.typ, + ); + + let poller = SpPoller { + spid, + rack_id, + mgs_id, + log: log.new(slog::o!( + "sp_slot" => spid.slot, + "chassis_type" => format!("{:?}", spid.typ), + )), + components: HashMap::new(), + known_state: None, + sample_tx: sample_tx.clone(), + }; + tokio::spawn(poller.run(apictx.clone())); + } + + Ok(()) +} + +impl SpPoller { + async fn run(mut self, apictx: Arc) { + let mut interval = tokio::time::interval(SP_POLL_INTERVAL); + let switch = &apictx.mgmt_switch; + let sp = match switch.sp(self.spid) { + Ok(sp) => sp, + Err(e) => { + // This should never happen, but it's not worth taking down the + // entire management network over that... + const MSG: &'static str = + "the `SpPoller::run` function is only called after \ + discovery completes successfully, and the `SpIdentifier` \ + used was returned by the management switch, \ + so it should be valid."; + if cfg!(debug_assertions) { + unreachable!( + "{MSG} nonetheless, we saw a {e:?} error when looking \ + up {:?}", + self.spid + ); + } else { + slog::error!( + &self.log, + "THIS SHOULDN'T HAPPEN: {MSG}"; + "error" => e, + "sp" => ?self.spid, + ); + return; + } + } + }; + loop { + interval.tick().await; + slog::trace!(&self.log, "interval elapsed, polling SP..."); + + match self.poll(sp).await { + // No sense cluttering the ringbuffer with empty vecs... + Ok(samples) if samples.is_empty() => { + slog::trace!( + &self.log, + "polled SP, no samples returned"; + "num_samples" => 0usize + ); + } + Ok(samples) => { + slog::trace!( + &self.log, + "polled SP successfully"; + "num_samples" => samples.len(), + ); + + if let Err(_) = self.sample_tx.send(samples) { + slog::debug!( + &self.log, + "all sample receiver handles have been dropped! \ + presumably we are shutting down..."; + ); + return; + } + } + // No SP is currently present for this ID. This may change in + // the future: a cubby that is not populated at present may have + // a sled added to it in the future. So, let's wait until it + // changes. + Err(CommunicationError::NoSpDiscovered) => { + slog::info!( + &self.log, + "no SP is present for this slot. waiting for a \ + little buddy to appear..."; + ); + let mut watch = sp.sp_addr_watch().clone(); + loop { + if let Some((addr, port)) = *watch.borrow_and_update() { + // Ladies and gentlemen...we got him! + slog::info!( + &self.log, + "found a SP, resuming polling."; + "sp_addr" => ?addr, + "sp_port" => ?port, + ); + break; + } + + // Wait for an address to be discovered. + slog::debug!(&self.log, "waiting for a SP to appear."); + if watch.changed().await.is_err() { + slog::debug!( + &self.log, + "SP address watch has been closed, presumably \ + we are shutting down"; + ); + return; + } + } + } + Err(error) => { + slog::warn!( + &self.log, + "failed to poll SP, will try again momentarily..."; + "error" => %error, + ); + // TODO(eliza): we should probably have a metric for failed + // SP polls. + } + } + } + } + + async fn poll( + &mut self, + sp: &SingleSp, + ) -> Result, CommunicationError> { + let mut current_state = SpUnderstanding::from(sp.state().await?); + let mut samples = Vec::new(); + // If the SP's state changes dramatically *during* a poll, it may be + // necessary to re-do the metrics scrape, thus the loop. Normally, we + // will only loop a single time, but may retry if necessary. + loop { + // Check if the SP's state has changed. If it has, we need to make sure + // we still know what all of its sensors are. + if Some(¤t_state) != self.known_state.as_ref() { + // The SP's state appears to have changed. Time to make sure our + // understanding of its devices and identity is up to date! + + let chassis_kind = match self.spid.typ { + SpType::Sled => "sled", + SpType::Switch => "switch", + SpType::Power => "power", + }; + let model = stringify_byte_string(¤t_state.model[..]); + let serial = + stringify_byte_string(¤t_state.serial_number[..]); + let hubris_archive_id = + hex::encode(¤t_state.hubris_archive_id); + + slog::debug!( + &self.log, + "our little friend seems to have changed in some kind of way"; + "current_state" => ?current_state, + "known_state" => ?self.known_state, + "new_model" => %model, + "new_serial" => %serial, + "new_hubris_archive_id" => %hubris_archive_id, + ); + + let inv_devices = sp.inventory().await?.devices; + + // Clear out any previously-known devices, and preallocate capacity + // for all the new ones. + self.components.clear(); + self.components.reserve(inv_devices.len()); + + for dev in inv_devices { + // Skip devices which have nothing interesting for us. + if !dev + .capabilities + .contains(DeviceCapabilities::HAS_MEASUREMENT_CHANNELS) + { + continue; + } + let component_id = match dev.component.as_str() { + Some(c) => Cow::Owned(c.to_string()), + None => { + // These are supposed to always be strings. But, if we + // see one that's not a string, fall back to the hex + // representation rather than panicking. + let hex = hex::encode(dev.component.id); + slog::warn!( + &self.log, + "a SP component ID was not a string! this isn't \ + supposed to happen!"; + "component" => %hex, + "device" => ?dev, + ); + Cow::Owned(hex) + } + }; + + // TODO(eliza): i hate having to clone all these strings for + // every device on the SP...it would be cool if Oximeter let us + // reference count them... + let target = metric::HardwareComponent { + rack_id: self.rack_id, + gateway_id: self.mgs_id, + chassis_model: Cow::Owned(model.clone()), + chassis_revision: current_state.revision, + chassis_kind: Cow::Borrowed(chassis_kind), + chassis_serial: Cow::Owned(serial.clone()), + hubris_archive_id: Cow::Owned( + hubris_archive_id.clone(), + ), + slot: self.spid.slot as u32, + component_kind: Cow::Owned(dev.device), + component_id, + description: Cow::Owned(dev.description), + }; + match self.components.entry(dev.component) { + // Found a new device! + hash_map::Entry::Vacant(entry) => { + slog::debug!( + &self.log, + "discovered a new component!"; + "component_id" => %target.component_id, + "component_kind" => %target.component_kind, + "description" => %target.component_id, + ); + entry.insert(ComponentMetrics { + target, + sensor_errors: HashMap::new(), + poll_errors: HashMap::new(), + }); + } + // We previously had a known device for this thing, but + // the metrics target has changed, so we should reset + // its cumulative metrics. + hash_map::Entry::Occupied(mut entry) + if entry.get().target != target => + { + slog::trace!( + &self.log, + "target has changed, resetting cumulative metrics \ + for component"; + "component" => ?dev.component, + ); + entry.insert(ComponentMetrics { + target, + sensor_errors: HashMap::new(), + poll_errors: HashMap::new(), + }); + } + + // The target for this device hasn't changed, don't reset it. + hash_map::Entry::Occupied(_) => {} + } + } + + self.known_state = Some(current_state); + } + + // We will need capacity for *at least* the number of components on the + // SP --- it will probably be more, as several components have multiple + // measurement channels which will produce independent samples (e.g. a + // power rail will likely have both voltage and current measurements, + // and a device may have multiple rails...) but, this way, we can avoid + // *some* amount of reallocating... + samples.reserve(self.components.len()); + for (c, metrics) in &mut self.components { + // Metrics samples *should* always be well-formed. If we ever emit a + // messed up one, this is a programmer error, and therefore should + // fail in test, but should probably *not* take down the whole + // management gateway in a real-life rack, especially because it's + // probably going to happen again if we were to get restarted. + const BAD_SAMPLE: &str = + "we emitted a bad metrics sample! this should never happen"; + macro_rules! try_sample { + ($sample:expr) => { + match $sample { + Ok(sample) => samples.push(sample), + + Err(err) => { + slog::error!( + &self.log, + "{BAD_SAMPLE}!"; + "error" => %err, + ); + #[cfg(debug_assertions)] + unreachable!("{BAD_SAMPLE}: {err}"); + } + } + } + } + let details = match sp.component_details(*c).await { + Ok(deets) => deets, + // SP seems gone! + Err(CommunicationError::NoSpDiscovered) => { + return Err(CommunicationError::NoSpDiscovered) + } + Err(error) => { + slog::warn!( + &self.log, + "failed to read details on SP component"; + "sp_component" => %c, + "error" => %error, + ); + try_sample!(metrics.poll_error(comms_error_str(error))); + continue; + } + }; + if details.entries.is_empty() { + slog::warn!( + &self.log, + "a component which claimed to have measurement channels \ + had empty details. this seems weird..."; + "sp_component" => %c, + ); + try_sample!(metrics.poll_error("no_measurement_channels")); + continue; + } + + let ComponentMetrics { sensor_errors, target, .. } = metrics; + for d in details.entries { + let ComponentDetails::Measurement(m) = d else { + // If the component details are switch port details rather + // than measurement channels, ignore it for now. + continue; + }; + let sensor: Cow<'static, str> = Cow::Owned(m.name); + + // First, if there's a measurement error, increment the + // error count metric. We will synthesize a missing sample + // for the sensor's metric as well, after we produce the + // measurement error sample. + // + // We do this first so that we only have to clone the + // sensor's name if there's an error, rather than always + // cloning it in *case* there's an error. + if let Err(error) = m.value { + let kind = match m.kind { + MeasurementKind::Temperature => "temperature", + MeasurementKind::Current => "current", + MeasurementKind::Voltage => "voltage", + MeasurementKind::Power => "power", + MeasurementKind::InputCurrent => "input_current", + MeasurementKind::InputVoltage => "input_voltage", + MeasurementKind::Speed => "fan_speed", + }; + let error = match error { + MeasurementError::InvalidSensor => "invalid_sensor", + MeasurementError::NoReading => "no_reading", + MeasurementError::NotPresent => "not_present", + MeasurementError::DeviceError => "device_error", + MeasurementError::DeviceUnavailable => { + "device_unavailable" + } + MeasurementError::DeviceTimeout => "device_timeout", + MeasurementError::DeviceOff => "device_off", + }; + let datum = sensor_errors + .entry(SensorErrorKey { + name: sensor.clone(), + kind, + error, + }) + .or_insert(Cumulative::new(0)); + // TODO(eliza): perhaps we should treat this as + // "level-triggered" and only increment the counter + // when the sensor has *changed* to an errored + // state after we have seen at least one good + // measurement from it since the last time the error + // was observed? + datum.increment(); + try_sample!(Sample::new( + target, + &metric::SensorErrorCount { + error: Cow::Borrowed(error), + sensor: sensor.clone(), + datum: *datum, + sensor_kind: Cow::Borrowed(kind), + }, + )); + } + + // I don't love this massive `match`, but because the + // `Sample::new_missing` constructor is a different function + // from `Sample::new`, we need separate branches for the + // error and not-error cases, rather than just doing + // something to produce a datum from both the `Ok` and + // `Error` cases... + let sample = match (m.value, m.kind) { + (Ok(datum), MeasurementKind::Temperature) => { + Sample::new( + target, + &metric::Temperature { sensor, datum }, + ) + } + (Err(_), MeasurementKind::Temperature) => { + Sample::new_missing( + target, + &metric::Temperature { sensor, datum: 0.0 }, + ) + } + (Ok(datum), MeasurementKind::Current) => Sample::new( + target, + &metric::Current { sensor, datum }, + ), + (Err(_), MeasurementKind::Current) => { + Sample::new_missing( + target, + &metric::Current { sensor, datum: 0.0 }, + ) + } + (Ok(datum), MeasurementKind::Voltage) => Sample::new( + target, + &metric::Voltage { sensor, datum }, + ), + + (Err(_), MeasurementKind::Voltage) => { + Sample::new_missing( + target, + &metric::Voltage { sensor, datum: 0.0 }, + ) + } + (Ok(datum), MeasurementKind::Power) => Sample::new( + target, + &metric::Power { sensor, datum }, + ), + (Err(_), MeasurementKind::Power) => { + Sample::new_missing( + target, + &metric::Power { sensor, datum: 0.0 }, + ) + } + (Ok(datum), MeasurementKind::InputCurrent) => { + Sample::new( + target, + &metric::InputCurrent { sensor, datum }, + ) + } + (Err(_), MeasurementKind::InputCurrent) => { + Sample::new_missing( + target, + &metric::InputCurrent { sensor, datum: 0.0 }, + ) + } + (Ok(datum), MeasurementKind::InputVoltage) => { + Sample::new( + target, + &metric::InputVoltage { sensor, datum }, + ) + } + (Err(_), MeasurementKind::InputVoltage) => { + Sample::new_missing( + target, + &metric::InputVoltage { sensor, datum: 0.0 }, + ) + } + (Ok(datum), MeasurementKind::Speed) => Sample::new( + target, + &metric::FanSpeed { sensor, datum }, + ), + (Err(_), MeasurementKind::Speed) => { + Sample::new_missing( + target, + &metric::FanSpeed { sensor, datum: 0.0 }, + ) + } + }; + try_sample!(sample); + } + } + + // Now, fetch the SP's state *again*. It is possible that, while we + // were scraping the SP's samples, the SP's identity changed in some + // way: perhaps its version was updated during the poll, or it + // was removed from the rack and replaced with an entirely different + // chassis! If that's the case, some of the samples we collected may + // have a metrics target describing the wrong thing (e.g. they could + // still have the previous firmware's `hubris_archive_id`, if the SP + // was updated). In that case, we need to throw away the samples we + // collected and try again, potentially rebuilding our understanding + // of the SP's inventory. + let state = SpUnderstanding::from(sp.state().await?); + if state == current_state { + // All good, the SP is still who we thought it was! We can + // "commit" this batch of samples + return Ok(samples); + } + + slog::info!( + &self.log, + "SP's state changed mid-poll! discarding current samples and \ + starting over!"; + "new_state" => ?state, + "current_state" => ?current_state, + ); + // Let's reuse the buffer we already have for the next batch of + // samples. + samples.clear(); + //...and try again with the new state. + current_state = state; + } + } +} + +/// The fields of the `gateway_messages` `VersionedSpState` and +/// `SpStateV1`/`SpStateV2`/`SpStateV3` that we actually care about for purposes +/// of determining whether our understanding of the SP's components are still +/// valid. +/// +/// In particular, we throw out the RoT state and the SP's power state, because +/// those changing won't actually invalidate our understanding of the SP's +/// components. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +struct SpUnderstanding { + hubris_archive_id: [u8; 8], + serial_number: [u8; 32], + model: [u8; 32], + revision: u32, +} + +impl From for SpUnderstanding { + fn from(v: VersionedSpState) -> Self { + match v { + VersionedSpState::V1(gateway_messages::SpStateV1 { + hubris_archive_id, + serial_number, + model, + revision, + .. + }) => Self { hubris_archive_id, serial_number, model, revision }, + VersionedSpState::V2(gateway_messages::SpStateV2 { + hubris_archive_id, + serial_number, + model, + revision, + .. + }) => Self { hubris_archive_id, serial_number, model, revision }, + VersionedSpState::V3(gateway_messages::SpStateV3 { + hubris_archive_id, + serial_number, + model, + revision, + .. + }) => Self { hubris_archive_id, serial_number, model, revision }, + } + } +} + +// Reimplement this ourselves because we don't really care about +// reading the RoT state at present. This is unfortunately copied +// from `gateway_messages`. +fn stringify_byte_string(bytes: &[u8]) -> String { + // We expect serial and model numbers to be ASCII and 0-padded: find the first 0 + // byte and convert to a string. If that fails, hexlify the entire slice. + let first_zero = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len()); + + std::str::from_utf8(&bytes[..first_zero]) + .map(|s| s.to_string()) + .unwrap_or_else(|_err| hex::encode(bytes)) +} + +impl ServerManager { + async fn run(mut self, cfg: Option) -> anyhow::Result<()> { + let (registration_address, bind_loopback) = + if let Some(MetricsConfig { + dev_bind_loopback, + dev_nexus_address, + .. + }) = cfg + { + if dev_bind_loopback || dev_nexus_address.is_some() { + slog::warn!( + &self.log, + "using development metrics configuration overrides!"; + "nexus_address" => ?dev_nexus_address, + "bind_loopback" => dev_bind_loopback, + ); + } + (dev_nexus_address, dev_bind_loopback) + } else { + (None, false) + }; + let id = self.registry.producer_id(); + + let mut current_server: Option = None; + loop { + let current_ip = current_server.as_ref().map(|s| s.address().ip()); + let mut new_ip = None; + for addr in self.addrs.borrow_and_update().iter() { + let &ip = addr.ip(); + // Don't bind the metrics endpoint on ::1 + if ip.is_loopback() && !bind_loopback { + continue; + } + // If our current address is contained in the new addresses, + // no need to rebind. + if current_ip == Some(IpAddr::V6(ip)) { + new_ip = None; + break; + } else { + new_ip = Some(ip); + } + } + + if let Some(ip) = new_ip { + slog::debug!( + &self.log, + "rebinding producer server on new IP"; + "new_ip" => ?ip, + "current_ip" => ?current_ip, + "collection_interval" => ?OXIMETER_COLLECTION_INTERVAL, + "producer_id" => ?id, + ); + let server = { + // Listen on any available socket, using the provided underlay IP. + let address = SocketAddr::new(ip.into(), 0); + + let server_info = ProducerEndpoint { + id, + kind: ProducerKind::ManagementGateway, + address, + interval: OXIMETER_COLLECTION_INTERVAL, + }; + let config = oximeter_producer::Config { + server_info, + registration_address, + request_body_max_bytes: METRIC_REQUEST_MAX_SIZE, + log: oximeter_producer::LogConfig::Logger( + self.log.clone(), + ), + }; + oximeter_producer::Server::with_registry( + self.registry.clone(), + &config, + ) + .context("failed to start producer server")? + }; + + slog::info!( + &self.log, + "bound metrics producer server"; + "collection_interval" => ?OXIMETER_COLLECTION_INTERVAL, + "producer_id" => ?id, + "address" => %server.address(), + ); + + if let Some(old_server) = current_server.replace(server) { + let old_addr = old_server.address(); + if let Err(error) = old_server.close().await { + slog::error!( + &self.log, + "failed to close old metrics producer server"; + "address" => %old_addr, + "error" => %error, + ); + } else { + slog::debug!( + &self.log, + "old metrics producer server shut down"; + "address" => %old_addr, + ) + } + } + } + + // Wait for a subsequent address change. + self.addrs.changed().await?; + } + } +} + +impl ComponentMetrics { + fn poll_error( + &mut self, + error_str: &'static str, + ) -> Result { + let datum = self + .poll_errors + .entry(error_str) + .or_insert_with(|| Cumulative::new(0)); + datum.increment(); + Sample::new( + &self.target, + &metric::PollErrorCount { + error: Cow::Borrowed(error_str), + datum: *datum, + }, + ) + } +} + +fn comms_error_str(error: CommunicationError) -> &'static str { + // TODO(eliza): a bunch of these probably can't be returned by the specific + // operations we try to do. It could be good to make the methods this code + // calls return a smaller enum of just the errors it might actually + // encounter? Figure this out later. + match error { + CommunicationError::NoSpDiscovered => "no_sp_discovered", + CommunicationError::InterfaceError(_) => "interface", + CommunicationError::ScopeIdChangingFrequently { .. } => { + "scope_id_changing_frequently" + } + CommunicationError::JoinMulticast { .. } => "join_multicast", + CommunicationError::UdpSendTo { .. } => "udp_send_to", + CommunicationError::UdpRecv(_) => "udp_recv", + CommunicationError::Deserialize { .. } => "deserialize", + CommunicationError::ExhaustedNumAttempts(_) => "exhausted_num_attempts", + CommunicationError::BadResponseType { .. } => "bad_response_type", + CommunicationError::SpError { .. } => "sp_error", + CommunicationError::BogusSerialConsoleState { .. } => { + "bogus_serial_console_state" + } + CommunicationError::VersionMismatch { .. } => { + "protocol_version_mismatch" + } + CommunicationError::TlvDeserialize { .. } => "tlv_deserialize", + CommunicationError::TlvDecode(_) => "tlv_decode", + CommunicationError::TlvPagination { .. } => "tlv_pagination", + CommunicationError::IpccKeyLookupValueTooLarge => { + "ipcc_key_lookup_value_too_large" + } + CommunicationError::UnexpectedTrailingData(_) => { + "unexpected_trailing_data" + } + CommunicationError::BadTrailingDataSize { .. } => { + "bad_trailing_data_size" + } + } +} diff --git a/gateway/tests/integration_tests/component_list.rs b/gateway/tests/integration_tests/component_list.rs index ec876c0783..993dcc9e93 100644 --- a/gateway/tests/integration_tests/component_list.rs +++ b/gateway/tests/integration_tests/component_list.rs @@ -57,7 +57,71 @@ async fn component_list() { capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS .bits(), presence: SpComponentPresence::Failed, - } + }, + SpComponentInfo { + component: "dev-1".to_string(), + device: "tmp117".to_string(), + serial_number: None, + description: "FAKE temperature sensor".to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-2".to_string(), + device: "tmp117".to_string(), + serial_number: None, + description: "FAKE Southeast temperature sensor".to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-6".to_string(), + device: "at24csw080".to_string(), + serial_number: None, + description: "FAKE U.2 Sharkfin A VPD".to_string(), + capabilities: 0, + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-7".to_string(), + device: "max5970".to_string(), + serial_number: None, + description: "FAKE U.2 Sharkfin A hot swap controller" + .to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-8".to_string(), + device: "nvme_bmc".to_string(), + serial_number: None, + description: "FAKE U.2 A NVMe Basic Management Command" + .to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-39".to_string(), + device: "tmp451".to_string(), + serial_number: None, + description: "FAKE T6 temperature sensor".to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-53".to_string(), + device: "max31790".to_string(), + serial_number: None, + description: "FAKE Fan controller".to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, ] ); @@ -67,14 +131,89 @@ async fn component_list() { assert_eq!( resp.components, - &[SpComponentInfo { - component: SpComponent::SP3_HOST_CPU.const_as_str().to_string(), - device: SpComponent::SP3_HOST_CPU.const_as_str().to_string(), - serial_number: None, - description: "FAKE host cpu".to_string(), - capabilities: 0, - presence: SpComponentPresence::Present, - },] + &[ + SpComponentInfo { + component: SpComponent::SP3_HOST_CPU.const_as_str().to_string(), + device: SpComponent::SP3_HOST_CPU.const_as_str().to_string(), + serial_number: None, + description: "FAKE host cpu".to_string(), + capabilities: 0, + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-0".to_string(), + device: "tmp117".to_string(), + serial_number: None, + description: "FAKE temperature sensor".to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-1".to_string(), + device: "tmp117".to_string(), + serial_number: None, + description: "FAKE temperature sensor".to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-2".to_string(), + device: "tmp117".to_string(), + serial_number: None, + description: "FAKE Southeast temperature sensor".to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-6".to_string(), + device: "at24csw080".to_string(), + serial_number: None, + description: "FAKE U.2 Sharkfin A VPD".to_string(), + capabilities: 0, + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-7".to_string(), + device: "max5970".to_string(), + serial_number: None, + description: "FAKE U.2 Sharkfin A hot swap controller" + .to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-8".to_string(), + device: "nvme_bmc".to_string(), + serial_number: None, + description: "FAKE U.2 A NVMe Basic Management Command" + .to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-39".to_string(), + device: "tmp451".to_string(), + serial_number: None, + description: "FAKE T6 temperature sensor".to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + SpComponentInfo { + component: "dev-53".to_string(), + device: "max31790".to_string(), + serial_number: None, + description: "FAKE Fan controller".to_string(), + capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS + .bits(), + presence: SpComponentPresence::Present, + }, + ] ); // Get the component list for switch 0. diff --git a/illumos-utils/src/smf_helper.rs b/illumos-utils/src/smf_helper.rs index 2c24ceaa4d..2d29376950 100644 --- a/illumos-utils/src/smf_helper.rs +++ b/illumos-utils/src/smf_helper.rs @@ -77,7 +77,7 @@ impl<'t> SmfHelper<'t> { "addpropvalue", &prop.to_string(), &format!("{}:", valtype.to_string()), - &val.to_string(), + &format!("\"{}\"", val.to_string()), ]) .map_err(|err| Error::ZoneCommand { intent: format!("add {} smf property value", prop.to_string()), diff --git a/internal-dns-cli/Cargo.toml b/internal-dns-cli/Cargo.toml index dae0af0280..3e34c21622 100644 --- a/internal-dns-cli/Cargo.toml +++ b/internal-dns-cli/Cargo.toml @@ -11,9 +11,9 @@ workspace = true anyhow.workspace = true clap.workspace = true dropshot.workspace = true +hickory-resolver.workspace = true internal-dns.workspace = true omicron-common.workspace = true slog.workspace = true tokio.workspace = true -trust-dns-resolver.workspace = true omicron-workspace-hack.workspace = true diff --git a/internal-dns-cli/src/bin/dnswait.rs b/internal-dns-cli/src/bin/dnswait.rs index 9e003ed14f..f9875e71a0 100644 --- a/internal-dns-cli/src/bin/dnswait.rs +++ b/internal-dns-cli/src/bin/dnswait.rs @@ -36,15 +36,17 @@ struct Opt { #[value(rename_all = "kebab-case")] enum ServiceName { Cockroach, - Clickhouse, ClickhouseKeeper, + ClickhouseServer, } impl From for internal_dns::ServiceName { fn from(value: ServiceName) -> Self { match value { ServiceName::Cockroach => internal_dns::ServiceName::Cockroach, - ServiceName::Clickhouse => internal_dns::ServiceName::Clickhouse, + ServiceName::ClickhouseServer => { + internal_dns::ServiceName::ClickhouseServer + } ServiceName::ClickhouseKeeper => { internal_dns::ServiceName::ClickhouseKeeper } @@ -65,10 +67,8 @@ async fn main() -> Result<()> { let resolver = if opt.nameserver_addresses.is_empty() { info!(&log, "using system configuration"); - let async_resolver = - trust_dns_resolver::AsyncResolver::tokio_from_system_conf() - .context("initializing resolver from system configuration")?; - Resolver::new_with_resolver(log.clone(), async_resolver) + Resolver::new_from_system_conf(log.clone()) + .context("initializing resolver from system configuration")? } else { let addrs = opt.nameserver_addresses; info!(&log, "using explicit nameservers"; "nameservers" => ?addrs); diff --git a/internal-dns/Cargo.toml b/internal-dns/Cargo.toml index c08cc012c1..c12035e2cb 100644 --- a/internal-dns/Cargo.toml +++ b/internal-dns/Cargo.toml @@ -18,7 +18,7 @@ omicron-uuid-kinds.workspace = true reqwest = { workspace = true, features = ["rustls-tls", "stream"] } slog.workspace = true thiserror.workspace = true -trust-dns-resolver.workspace = true +hickory-resolver.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/internal-dns/src/config.rs b/internal-dns/src/config.rs index a9ff664030..e9d7ed873d 100644 --- a/internal-dns/src/config.rs +++ b/internal-dns/src/config.rs @@ -510,6 +510,10 @@ mod test { ServiceName::ClickhouseKeeper.dns_name(), "_clickhouse-keeper._tcp", ); + assert_eq!( + ServiceName::ClickhouseServer.dns_name(), + "_clickhouse-server._tcp", + ); assert_eq!(ServiceName::Cockroach.dns_name(), "_cockroach._tcp",); assert_eq!(ServiceName::InternalDns.dns_name(), "_nameservice._tcp",); assert_eq!(ServiceName::Nexus.dns_name(), "_nexus._tcp",); diff --git a/internal-dns/src/resolver.rs b/internal-dns/src/resolver.rs index fdd5dce428..5d3832a417 100644 --- a/internal-dns/src/resolver.rs +++ b/internal-dns/src/resolver.rs @@ -2,24 +2,24 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use hickory_resolver::config::{ + LookupIpStrategy, NameServerConfig, Protocol, ResolverConfig, ResolverOpts, +}; +use hickory_resolver::lookup::SrvLookup; +use hickory_resolver::TokioAsyncResolver; use hyper::client::connect::dns::Name; use omicron_common::address::{ Ipv6Subnet, ReservedRackSubnet, AZ_PREFIX, DNS_PORT, }; use slog::{debug, error, info, trace}; use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; -use trust_dns_resolver::config::{ - LookupIpStrategy, NameServerConfig, Protocol, ResolverConfig, ResolverOpts, -}; -use trust_dns_resolver::lookup::SrvLookup; -use trust_dns_resolver::TokioAsyncResolver; pub type DnsError = dns_service_client::Error; #[derive(Debug, Clone, thiserror::Error)] pub enum ResolveError { #[error(transparent)] - Resolve(#[from] trust_dns_resolver::error::ResolveError), + Resolve(#[from] hickory_resolver::error::ResolveError), #[error("Record not found for SRV key: {}", .0.dns_name())] NotFound(crate::ServiceName), @@ -52,6 +52,19 @@ impl reqwest::dns::Resolve for Resolver { } impl Resolver { + /// Construct a new DNS resolver from the system configuration. + pub fn new_from_system_conf( + log: slog::Logger, + ) -> Result { + let (rc, mut opts) = hickory_resolver::system_conf::read_system_conf()?; + // Enable edns for potentially larger records + opts.edns0 = true; + + let resolver = TokioAsyncResolver::tokio(rc, opts); + + Ok(Self { log, resolver }) + } + /// Construct a new DNS resolver from specific DNS server addresses. pub fn new_from_addrs( log: slog::Logger, @@ -66,18 +79,20 @@ impl Resolver { socket_addr, protocol: Protocol::Udp, tls_dns_name: None, - trust_nx_responses: false, + trust_negative_responses: false, bind_addr: None, }); } let mut opts = ResolverOpts::default(); + // Enable edns for potentially larger records + opts.edns0 = true; opts.use_hosts_file = false; opts.num_concurrent_reqs = dns_server_count; // The underlay is IPv6 only, so this helps avoid needless lookups of // the IPv4 variant. opts.ip_strategy = LookupIpStrategy::Ipv6Only; opts.negative_max_ttl = Some(std::time::Duration::from_secs(15)); - let resolver = TokioAsyncResolver::tokio(rc, opts)?; + let resolver = TokioAsyncResolver::tokio(rc, opts); Ok(Self { log, resolver }) } @@ -145,27 +160,6 @@ impl Resolver { self.resolver.clear_cache(); } - /// Looks up a single [`Ipv6Addr`] based on the SRV name. - /// Returns an error if the record does not exist. - // TODO: There are lots of ways this API can expand: Caching, - // actually respecting TTL, looking up ports, etc. - // - // For now, however, it serves as a very simple "get everyone using DNS" - // API that can be improved upon later. - pub async fn lookup_ipv6( - &self, - srv: crate::ServiceName, - ) -> Result { - let name = srv.srv_name(); - debug!(self.log, "lookup_ipv6 srv"; "dns_name" => &name); - let response = self.resolver.ipv6_lookup(&name).await?; - let address = response - .iter() - .next() - .ok_or_else(|| ResolveError::NotFound(srv))?; - Ok(*address) - } - /// Returns the targets of the SRV records for a DNS name /// /// The returned values are generally other DNS names that themselves would @@ -220,6 +214,12 @@ impl Resolver { // TODO-robustness: any callers of this should probably be using // all the targets for a given SRV and not just the first one // we get, see [`Resolver::lookup_all_socket_v6`]. + // + // TODO: There are lots of ways this API can expand: Caching, + // actually respecting TTL, looking up ports, etc. + // + // For now, however, it serves as a very simple "get everyone using DNS" + // API that can be improved upon later. pub async fn lookup_socket_v6( &self, service: crate::ServiceName, @@ -313,7 +313,7 @@ impl Resolver { // (1) it returns `IpAddr`'s rather than `SocketAddr`'s // (2) it doesn't actually return all the addresses from the Additional // section of the DNS server's response. - // See bluejekyll/trust-dns#1980 + // See hickory-dns/hickory-dns#1980 // // (1) is not a huge deal as we can try to match up the targets ourselves // to grab the port for creating a `SocketAddr` but (2) means we need to do @@ -350,10 +350,9 @@ impl Resolver { .await .into_iter() .flat_map(move |target| match target { - Ok((ips, port)) => Some( - ips.into_iter() - .map(move |ip| SocketAddrV6::new(ip, port, 0, 0)), - ), + Ok((ips, port)) => Some(ips.into_iter().map(move |aaaa| { + SocketAddrV6::new(aaaa.into(), port, 0, 0) + })), Err((target, err)) => { error!( log, @@ -511,7 +510,7 @@ mod test { assert!( matches!( dns_error.kind(), - trust_dns_resolver::error::ResolveErrorKind::NoRecordsFound { .. }, + hickory_resolver::error::ResolveErrorKind::NoRecordsFound { .. }, ), "Saw error: {dns_error}", ); @@ -535,11 +534,11 @@ mod test { dns_server.update(&dns_config).await.unwrap(); let resolver = dns_server.resolver().unwrap(); - let found_ip = resolver - .lookup_ipv6(ServiceName::Cockroach) + let found_addr = resolver + .lookup_socket_v6(ServiceName::Cockroach) .await .expect("Should have been able to look up IP address"); - assert_eq!(found_ip, ip,); + assert_eq!(found_addr.ip(), &ip,); dns_server.cleanup_successful(); logctx.cleanup_successful(); @@ -617,11 +616,13 @@ mod test { // Look up Cockroach let resolver = dns_server.resolver().unwrap(); - let ip = resolver - .lookup_ipv6(ServiceName::Cockroach) + let resolved_addr = resolver + .lookup_socket_v6(ServiceName::Cockroach) .await .expect("Should have been able to look up IP address"); - assert!(cockroach_addrs.iter().any(|addr| addr.ip() == &ip)); + assert!(cockroach_addrs + .iter() + .any(|addr| addr.ip() == resolved_addr.ip())); // Look up all the Cockroach addresses. let mut ips = @@ -635,18 +636,18 @@ mod test { ); // Look up Clickhouse - let ip = resolver - .lookup_ipv6(ServiceName::Clickhouse) + let addr = resolver + .lookup_socket_v6(ServiceName::Clickhouse) .await .expect("Should have been able to look up IP address"); - assert_eq!(&ip, clickhouse_addr.ip()); + assert_eq!(addr.ip(), clickhouse_addr.ip()); // Look up Backend Service - let ip = resolver - .lookup_ipv6(srv_backend) + let addr = resolver + .lookup_socket_v6(srv_backend) .await .expect("Should have been able to look up IP address"); - assert_eq!(&ip, crucible_addr.ip()); + assert_eq!(addr.ip(), crucible_addr.ip()); // If we deploy a new generation that removes all records, then we don't // find anything any more. @@ -657,14 +658,14 @@ mod test { // If we remove the records for all services, we won't find them any // more. (e.g., there's no hidden caching going on) let error = resolver - .lookup_ipv6(ServiceName::Cockroach) + .lookup_socket_v6(ServiceName::Cockroach) .await .expect_err("unexpectedly found records"); assert_matches!( error, ResolveError::Resolve(error) if matches!(error.kind(), - trust_dns_resolver::error::ResolveErrorKind::NoRecordsFound { .. } + hickory_resolver::error::ResolveErrorKind::NoRecordsFound { .. } ) ); @@ -694,11 +695,11 @@ mod test { dns_builder.service_backend_zone(srv_crdb, &zone, 12345).unwrap(); let dns_config = dns_builder.build_full_config_for_initial_generation(); dns_server.update(&dns_config).await.unwrap(); - let found_ip = resolver - .lookup_ipv6(ServiceName::Cockroach) + let found_addr = resolver + .lookup_socket_v6(ServiceName::Cockroach) .await .expect("Should have been able to look up IP address"); - assert_eq!(found_ip, ip1); + assert_eq!(found_addr.ip(), &ip1); // If we insert the same record with a new address, it should be // updated. @@ -712,11 +713,11 @@ mod test { dns_builder.build_full_config_for_initial_generation(); dns_config.generation += 1; dns_server.update(&dns_config).await.unwrap(); - let found_ip = resolver - .lookup_ipv6(ServiceName::Cockroach) + let found_addr = resolver + .lookup_socket_v6(ServiceName::Cockroach) .await .expect("Should have been able to look up IP address"); - assert_eq!(found_ip, ip2); + assert_eq!(found_addr.ip(), &ip2); dns_server.cleanup_successful(); logctx.cleanup_successful(); @@ -847,11 +848,11 @@ mod test { dns_server.update(&dns_config).await.unwrap(); // Confirm that we can access this record manually. - let found_ip = resolver - .lookup_ipv6(ServiceName::Nexus) + let found_addr = resolver + .lookup_socket_v6(ServiceName::Nexus) .await .expect("Should have been able to look up IP address"); - assert_eq!(found_ip, ip); + assert_eq!(found_addr.ip(), &ip); // Confirm that the progenitor client can access this record too. let value = client.test_endpoint().await.unwrap(); diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index 1128cd8f0f..5b181c7fa0 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -35,6 +35,7 @@ futures.workspace = true gateway-client.workspace = true headers.workspace = true hex.workspace = true +hickory-resolver.workspace = true http.workspace = true hyper.workspace = true illumos-utils.workspace = true @@ -56,6 +57,7 @@ openssl.workspace = true oximeter-client.workspace = true oximeter-db = { workspace = true, default-features = false, features = [ "oxql" ] } oxnet.workspace = true +oxql-types.workspace = true parse-display.workspace = true paste.workspace = true # See omicron-rpaths for more about the "pq-sys" dependency. @@ -87,7 +89,6 @@ tokio = { workspace = true, features = ["full"] } tokio-postgres = { workspace = true, features = ["with-serde_json-1"] } tokio-util = { workspace = true, features = ["codec"] } tough.workspace = true -trust-dns-resolver.workspace = true uuid.workspace = true nexus-auth.workspace = true @@ -143,7 +144,7 @@ sp-sim.workspace = true rustls.workspace = true subprocess.workspace = true term.workspace = true -trust-dns-resolver.workspace = true +hickory-resolver.workspace = true tufaceous.workspace = true tufaceous-lib.workspace = true httptest.workspace = true diff --git a/nexus/auth/src/authn/external/mod.rs b/nexus/auth/src/authn/external/mod.rs index ccb7218285..5c7fc7af05 100644 --- a/nexus/auth/src/authn/external/mod.rs +++ b/nexus/auth/src/authn/external/mod.rs @@ -13,7 +13,6 @@ use slog::trace; use std::borrow::Borrow; use uuid::Uuid; -pub mod cookies; pub mod session_cookie; pub mod spoof; pub mod token; diff --git a/nexus/auth/src/authn/external/session_cookie.rs b/nexus/auth/src/authn/external/session_cookie.rs index 7811bf2826..f6b23308a0 100644 --- a/nexus/auth/src/authn/external/session_cookie.rs +++ b/nexus/auth/src/authn/external/session_cookie.rs @@ -4,7 +4,6 @@ //! authn scheme for console that looks up cookie values in a session table -use super::cookies::parse_cookies; use super::{HttpAuthnScheme, Reason, SchemeResult}; use crate::authn; use crate::authn::{Actor, Details}; @@ -13,6 +12,7 @@ use async_trait::async_trait; use chrono::{DateTime, Duration, Utc}; use dropshot::HttpError; use http::HeaderValue; +use nexus_types::authn::cookies::parse_cookies; use slog::debug; use uuid::Uuid; diff --git a/nexus/db-model/src/producer_endpoint.rs b/nexus/db-model/src/producer_endpoint.rs index 74a7356adb..c2fab2de5a 100644 --- a/nexus/db-model/src/producer_endpoint.rs +++ b/nexus/db-model/src/producer_endpoint.rs @@ -22,6 +22,7 @@ impl_enum_type!( #[diesel(sql_type = ProducerKindEnum)] pub enum ProducerKind; + ManagementGateway => b"management_gateway" SledAgent => b"sled_agent" Service => b"service" Instance => b"instance" @@ -30,6 +31,9 @@ impl_enum_type!( impl From for ProducerKind { fn from(kind: internal::nexus::ProducerKind) -> Self { match kind { + internal::nexus::ProducerKind::ManagementGateway => { + ProducerKind::ManagementGateway + } internal::nexus::ProducerKind::SledAgent => ProducerKind::SledAgent, internal::nexus::ProducerKind::Service => ProducerKind::Service, internal::nexus::ProducerKind::Instance => ProducerKind::Instance, @@ -40,6 +44,9 @@ impl From for ProducerKind { impl From for internal::nexus::ProducerKind { fn from(kind: ProducerKind) -> Self { match kind { + ProducerKind::ManagementGateway => { + internal::nexus::ProducerKind::ManagementGateway + } ProducerKind::SledAgent => internal::nexus::ProducerKind::SledAgent, ProducerKind::Service => internal::nexus::ProducerKind::Service, ProducerKind::Instance => internal::nexus::ProducerKind::Instance, diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 845da13a44..f630bbbeac 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -139,35 +139,28 @@ table! { table! { switch_port_settings_link_config (port_settings_id, link_name) { port_settings_id -> Uuid, - lldp_service_config_id -> Uuid, link_name -> Text, mtu -> Int4, fec -> crate::SwitchLinkFecEnum, speed -> crate::SwitchLinkSpeedEnum, autoneg -> Bool, + lldp_link_config_id -> Uuid, } } table! { - lldp_service_config (id) { + lldp_link_config (id) { id -> Uuid, enabled -> Bool, - lldp_config_id -> Nullable, - } -} - -table! { - lldp_config (id) { - id -> Uuid, - name -> Text, - description -> Text, + link_name -> Nullable, + link_description -> Nullable, + chassis_id -> Nullable, + system_name -> Nullable, + system_description -> Nullable, + management_ip -> Nullable, time_created -> Timestamptz, time_modified -> Timestamptz, time_deleted -> Nullable, - chassis_id -> Text, - system_name -> Text, - system_description -> Text, - management_ip -> Inet, } } @@ -195,6 +188,7 @@ table! { dst -> Inet, gw -> Inet, vid -> Nullable, + local_pref -> Nullable, } } @@ -1895,7 +1889,8 @@ allow_tables_to_appear_in_same_query!( allow_tables_to_appear_in_same_query!( switch_port, - switch_port_settings_bgp_peer_config + switch_port_settings_bgp_peer_config, + bgp_config ); allow_tables_to_appear_in_same_query!(disk, virtual_provisioning_resource); diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 1e0caabb02..aef95e6d53 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(87, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(91, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,10 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(91, "add-management-gateway-producer-kind"), + KnownVersion::new(90, "lookup-bgp-config-by-asn"), + KnownVersion::new(89, "collapse_lldp_settings"), + KnownVersion::new(88, "route-local-pref"), KnownVersion::new(87, "add-clickhouse-server-enum-variants"), KnownVersion::new(86, "snapshot-replacement"), KnownVersion::new(85, "add-migrations-by-time-created-index"), diff --git a/nexus/db-model/src/switch_port.rs b/nexus/db-model/src/switch_port.rs index f790d7d527..09f1327be2 100644 --- a/nexus/db-model/src/switch_port.rs +++ b/nexus/db-model/src/switch_port.rs @@ -3,7 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use crate::schema::{ - lldp_config, lldp_service_config, switch_port, switch_port_settings, + lldp_link_config, switch_port, switch_port_settings, switch_port_settings_address_config, switch_port_settings_bgp_peer_config, switch_port_settings_bgp_peer_config_allow_export, switch_port_settings_bgp_peer_config_allow_import, @@ -14,6 +14,7 @@ use crate::schema::{ }; use crate::{impl_enum_type, SqlU32}; use crate::{SqlU16, SqlU8}; +use chrono::{DateTime, Utc}; use db_macros::Resource; use diesel::AsChangeset; use ipnetwork::IpNetwork; @@ -380,7 +381,7 @@ impl Into for SwitchPortConfig { #[diesel(table_name = switch_port_settings_link_config)] pub struct SwitchPortLinkConfig { pub port_settings_id: Uuid, - pub lldp_service_config_id: Uuid, + pub lldp_link_config_id: Uuid, pub link_name: String, pub mtu: SqlU16, pub fec: SwitchLinkFec, @@ -391,7 +392,7 @@ pub struct SwitchPortLinkConfig { impl SwitchPortLinkConfig { pub fn new( port_settings_id: Uuid, - lldp_service_config_id: Uuid, + lldp_link_config_id: Uuid, link_name: String, mtu: u16, fec: SwitchLinkFec, @@ -400,7 +401,7 @@ impl SwitchPortLinkConfig { ) -> Self { Self { port_settings_id, - lldp_service_config_id, + lldp_link_config_id, link_name, fec, speed, @@ -414,7 +415,7 @@ impl Into for SwitchPortLinkConfig { fn into(self) -> external::SwitchPortLinkConfig { external::SwitchPortLinkConfig { port_settings_id: self.port_settings_id, - lldp_service_config_id: self.lldp_service_config_id, + lldp_link_config_id: self.lldp_link_config_id, link_name: self.link_name.clone(), mtu: self.mtu.into(), fec: self.fec.into(), @@ -434,57 +435,61 @@ impl Into for SwitchPortLinkConfig { Deserialize, AsChangeset, )] -#[diesel(table_name = lldp_service_config)] -pub struct LldpServiceConfig { +#[diesel(table_name = lldp_link_config)] +pub struct LldpLinkConfig { pub id: Uuid, pub enabled: bool, - pub lldp_config_id: Option, -} - -impl LldpServiceConfig { - pub fn new(enabled: bool, lldp_config_id: Option) -> Self { - Self { id: Uuid::new_v4(), enabled, lldp_config_id } + pub link_name: Option, + pub link_description: Option, + pub chassis_id: Option, + pub system_name: Option, + pub system_description: Option, + pub management_ip: Option, + pub time_created: DateTime, + pub time_modified: DateTime, + pub time_deleted: Option>, +} + +impl LldpLinkConfig { + pub fn new( + enabled: bool, + link_name: Option, + link_description: Option, + chassis_id: Option, + system_name: Option, + system_description: Option, + management_ip: Option, + ) -> Self { + let now = Utc::now(); + Self { + id: Uuid::new_v4(), + enabled, + link_name, + link_description, + chassis_id, + system_name, + system_description, + management_ip, + time_created: now, + time_modified: now, + time_deleted: None, + } } } -impl Into for LldpServiceConfig { - fn into(self) -> external::LldpServiceConfig { - external::LldpServiceConfig { +// This converts the internal database version of the config into the +// user-facing version. +impl Into for LldpLinkConfig { + fn into(self) -> external::LldpLinkConfig { + external::LldpLinkConfig { id: self.id, - lldp_config_id: self.lldp_config_id, enabled: self.enabled, - } - } -} - -#[derive( - Queryable, - Insertable, - Selectable, - Clone, - Debug, - Resource, - Serialize, - Deserialize, -)] -#[diesel(table_name = lldp_config)] -pub struct LldpConfig { - #[diesel(embed)] - pub identity: LldpConfigIdentity, - pub chassis_id: String, - pub system_name: String, - pub system_description: String, - pub management_ip: IpNetwork, -} - -impl Into for LldpConfig { - fn into(self) -> external::LldpConfig { - external::LldpConfig { - identity: self.identity(), + link_name: self.link_name.clone(), + link_description: self.link_description.clone(), chassis_id: self.chassis_id.clone(), system_name: self.system_name.clone(), system_description: self.system_description.clone(), - management_ip: self.management_ip.into(), + management_ip: self.management_ip.map(|a| a.into()), } } } @@ -554,6 +559,7 @@ pub struct SwitchPortRouteConfig { pub dst: IpNetwork, pub gw: IpNetwork, pub vid: Option, + pub local_pref: Option, } impl SwitchPortRouteConfig { @@ -563,8 +569,9 @@ impl SwitchPortRouteConfig { dst: IpNetwork, gw: IpNetwork, vid: Option, + local_pref: Option, ) -> Self { - Self { port_settings_id, interface_name, dst, gw, vid } + Self { port_settings_id, interface_name, dst, gw, vid, local_pref } } } @@ -576,6 +583,7 @@ impl Into for SwitchPortRouteConfig { dst: self.dst.into(), gw: self.gw.into(), vlan_id: self.vid.map(Into::into), + local_pref: self.local_pref.map(Into::into), } } } diff --git a/nexus/db-queries/src/db/datastore/bgp.rs b/nexus/db-queries/src/db/datastore/bgp.rs index f4bea0f605..fdb9629543 100644 --- a/nexus/db-queries/src/db/datastore/bgp.rs +++ b/nexus/db-queries/src/db/datastore/bgp.rs @@ -28,7 +28,7 @@ use ref_cast::RefCast; use uuid::Uuid; impl DataStore { - pub async fn bgp_config_set( + pub async fn bgp_config_create( &self, opctx: &OpContext, config: ¶ms::BgpConfigCreate, @@ -37,80 +37,187 @@ impl DataStore { use db::schema::{ bgp_announce_set, bgp_announce_set::dsl as announce_set_dsl, }; - use diesel::sql_types; - use diesel::IntoSql; let conn = self.pool_connection_authorized(opctx).await?; - self.transaction_retry_wrapper("bgp_config_set") - .transaction(&conn, |conn| async move { - let announce_set_id: Uuid = match &config.bgp_announce_set_id { - NameOrId::Name(name) => { - announce_set_dsl::bgp_announce_set + let err = OptionalError::new(); + self.transaction_retry_wrapper("bgp_config_create") + .transaction(&conn, |conn| { + + let err = err.clone(); + async move { + let announce_set_id = match config.bgp_announce_set_id.clone() { + // Resolve Name to UUID + NameOrId::Name(name) => announce_set_dsl::bgp_announce_set .filter(bgp_announce_set::time_deleted.is_null()) .filter(bgp_announce_set::name.eq(name.to_string())) .select(bgp_announce_set::id) .limit(1) .first_async::(&conn) - .await? + .await + .map_err(|e| { + let msg = "failed to lookup announce set by name"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => { + err.bail(Error::not_found_by_name( + ResourceType::BgpAnnounceSet, + &name, + )) + } + _ => err.bail(Error::internal_error(msg)), + + } + }), + + // We cannot assume that the provided UUID is actually real. + // Lookup the parent record by UUID to verify that it is valid. + NameOrId::Id(id) => announce_set_dsl::bgp_announce_set + .filter(bgp_announce_set::time_deleted.is_null()) + .filter(bgp_announce_set::id.eq(id)) + .select(bgp_announce_set::id) + .limit(1) + .first_async::(&conn) + .await + .map_err(|e| { + let msg = "failed to lookup announce set by id"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => { + err.bail(Error::not_found_by_id( + ResourceType::BgpAnnounceSet, + &id, + )) + } + _ => err.bail(Error::internal_error(msg)), + + } + }), + }?; + + let config = + BgpConfig::from_config_create(config, announce_set_id); + + // Idempotency: + // Check to see if an exact match for the config already exists + let query = dsl::bgp_config + .filter(dsl::name.eq(config.name().to_string())) + .filter(dsl::asn.eq(config.asn)) + .filter(dsl::bgp_announce_set_id.eq(config.bgp_announce_set_id)) + .into_boxed(); + + let query = match config.vrf.clone() { + Some(v) => query.filter(dsl::vrf.eq(v)), + None => query.filter(dsl::vrf.is_null()), + }; + + let query = match config.shaper.clone() { + Some(v) => query.filter(dsl::shaper.eq(v)), + None => query.filter(dsl::shaper.is_null()), + }; + + let query = match config.checker.clone() { + Some(v) => query.filter(dsl::checker.eq(v)), + None => query.filter(dsl::checker.is_null()), + }; + + let matching_config = match query + .filter(dsl::time_deleted.is_null()) + .select(BgpConfig::as_select()) + .first_async::(&conn) + .await { + Ok(v) => Ok(Some(v)), + Err(e) => { + match e { + diesel::result::Error::NotFound => { + info!(opctx.log, "no matching bgp config found"); + Ok(None) + } + _ => { + let msg = "error while checking if bgp config exists"; + error!(opctx.log, "{msg}"; "error" => ?e); + Err(err.bail(Error::internal_error(msg))) + } + } + } + }?; + + // If so, we're done! + if let Some(existing_config) = matching_config { + return Ok(existing_config); } - NameOrId::Id(id) => *id, - }; - let config = - BgpConfig::from_config_create(config, announce_set_id); - - let matching_entry_subquery = dsl::bgp_config - .filter(dsl::name.eq(Name::from(config.name().clone()))) - .filter(dsl::time_deleted.is_null()) - .select(dsl::name); - - // SELECT exactly the values we're trying to INSERT, but only - // if it does not already exist. - let new_entry_subquery = diesel::dsl::select(( - config.id().into_sql::(), - config.name().to_string().into_sql::(), - config - .description() - .to_string() - .into_sql::(), - config.asn.into_sql::(), - config.bgp_announce_set_id.into_sql::(), - config - .vrf - .clone() - .into_sql::>(), - Utc::now().into_sql::(), - Utc::now().into_sql::(), - )) - .filter(diesel::dsl::not(diesel::dsl::exists( - matching_entry_subquery, - ))); - - diesel::insert_into(dsl::bgp_config) - .values(new_entry_subquery) - .into_columns(( - dsl::id, - dsl::name, - dsl::description, - dsl::asn, - dsl::bgp_announce_set_id, - dsl::vrf, - dsl::time_created, - dsl::time_modified, - )) - .execute_async(&conn) - .await?; + // TODO: remove once per-switch-multi-asn support is added + // Bail if a conflicting config for this ASN already exists. + // This is a temporary measure until multi-asn-per-switch is supported. + let configs_with_asn: Vec = dsl::bgp_config + .filter(dsl::asn.eq(config.asn)) + .filter(dsl::time_deleted.is_null()) + .select(BgpConfig::as_select()) + .load_async(&conn) + .await?; + + if !configs_with_asn.is_empty() { + error!( + opctx.log, + "different config for asn already exists"; + "asn" => ?config.asn, + "requested_config" => ?config, + "conflicting_configs" => ?configs_with_asn + ); + return Err(err.bail(Error::conflict("cannot have more than one configuration per ASN"))); + } - dsl::bgp_config - .filter(dsl::name.eq(Name::from(config.name().clone()))) - .filter(dsl::time_deleted.is_null()) - .select(BgpConfig::as_select()) - .limit(1) - .first_async(&conn) - .await + diesel::insert_into(dsl::bgp_config) + .values(config.clone()) + .returning(BgpConfig::as_returning()) + .get_result_async(&conn) + .await + .map_err(|e | { + let msg = "failed to insert bgp config"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::DatabaseError(kind, _) => { + match kind { + diesel::result::DatabaseErrorKind::UniqueViolation => { + err.bail(Error::conflict("a field that must be unique conflicts with an existing record")) + }, + // technically we don't use Foreign Keys but it doesn't hurt to match on them + // instead of returning a 500 by default in the event that we do switch to Foreign Keys + diesel::result::DatabaseErrorKind::ForeignKeyViolation => { + err.bail(Error::conflict("an id field references an object that does not exist")) + } + diesel::result::DatabaseErrorKind::NotNullViolation => { + err.bail(Error::invalid_request("a required field was not provided")) + } + diesel::result::DatabaseErrorKind::CheckViolation => { + err.bail(Error::invalid_request("one or more fields are not valid values")) + }, + _ => err.bail(Error::internal_error(msg)), + } + } + _ => err.bail(Error::internal_error(msg)), + } + }) + } }) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .map_err(|e|{ + let msg = "bgp_config_create failed"; + if let Some(err) = err.take() { + error!(opctx.log, "{msg}"; "error" => ?err); + err + } else { + // The transaction handler errors along with any errors emitted via "?" + // will fall through to here. These errors should truly be 500s + // because they are an internal hiccup that likely was not triggered by + // user input. + error!(opctx.log, "{msg}"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + } + }) } pub async fn bgp_config_delete( @@ -124,11 +231,6 @@ impl DataStore { use db::schema::switch_port_settings_bgp_peer_config as sps_bgp_peer_config; use db::schema::switch_port_settings_bgp_peer_config::dsl as sps_bgp_peer_config_dsl; - #[derive(Debug)] - enum BgpConfigDeleteError { - ConfigInUse, - } - let err = OptionalError::new(); let conn = self.pool_connection_authorized(opctx).await?; self.transaction_retry_wrapper("bgp_config_delete") @@ -138,26 +240,60 @@ impl DataStore { let name_or_id = sel.name_or_id.clone(); let id: Uuid = match name_or_id { - NameOrId::Id(id) => id, - NameOrId::Name(name) => { + NameOrId::Id(id) => bgp_config_dsl::bgp_config + .filter(bgp_config::id.eq(id)) + .select(bgp_config::id) + .limit(1) + .first_async::(&conn) + .await + .map_err(|e| { + let msg = "failed to lookup bgp config by id"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => { + err.bail(Error::not_found_by_id( + ResourceType::BgpConfig, + &id, + )) + } + _ => err.bail(Error::internal_error(msg)), + + } + }), + NameOrId::Name(name) => bgp_config_dsl::bgp_config - .filter(bgp_config::name.eq(name.to_string())) - .select(bgp_config::id) - .limit(1) - .first_async::(&conn) - .await? - } - }; + .filter(bgp_config::name.eq(name.to_string())) + .select(bgp_config::id) + .limit(1) + .first_async::(&conn) + .await + .map_err(|e| { + let msg = "failed to lookup bgp config by name"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => { + err.bail(Error::not_found_by_name( + ResourceType::BgpConfig, + &name, + )) + } + _ => err.bail(Error::internal_error(msg)), + + } + }), + }?; let count = sps_bgp_peer_config_dsl::switch_port_settings_bgp_peer_config - .filter(sps_bgp_peer_config::bgp_config_id.eq(id)) - .count() - .execute_async(&conn) - .await?; + .filter(sps_bgp_peer_config::bgp_config_id.eq(id)) + .count() + .execute_async(&conn) + .await?; if count > 0 { - return Err(err.bail(BgpConfigDeleteError::ConfigInUse)); + return Err(err.bail(Error::conflict("BGP Config is in use and cannot be deleted"))); } diesel::update(bgp_config_dsl::bgp_config) @@ -171,13 +307,12 @@ impl DataStore { }) .await .map_err(|e| { + let msg = "bgp_config_delete failed"; if let Some(err) = err.take() { - match err { - BgpConfigDeleteError::ConfigInUse => { - Error::invalid_request("BGP config in use") - } - } + error!(opctx.log, "{msg}"; "error" => ?err); + err } else { + error!(opctx.log, "{msg}"; "error" => ?e); public_error_from_diesel(e, ErrorHandler::Server) } }) @@ -194,24 +329,45 @@ impl DataStore { let name_or_id = name_or_id.clone(); - let config = match name_or_id { + match name_or_id { NameOrId::Name(name) => dsl::bgp_config .filter(bgp_config::name.eq(name.to_string())) .select(BgpConfig::as_select()) .limit(1) .first_async::(&*conn) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)), + .map_err(|e| { + let msg = "failed to lookup bgp config by name"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => { + Error::not_found_by_name( + ResourceType::BgpConfig, + &name, + ) + } + _ => Error::internal_error(msg), + } + }), NameOrId::Id(id) => dsl::bgp_config .filter(bgp_config::id.eq(id)) .select(BgpConfig::as_select()) .limit(1) .first_async::(&*conn) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)), - }?; + .map_err(|e| { + let msg = "failed to lookup bgp config by id"; + error!(opctx.log, "{msg}"; "error" => ?e); - Ok(config) + match e { + diesel::result::Error::NotFound => { + Error::not_found_by_id(ResourceType::BgpConfig, &id) + } + _ => Error::internal_error(msg), + } + }), + } } pub async fn bgp_config_list( @@ -237,10 +393,42 @@ impl DataStore { .select(BgpConfig::as_select()) .load_async(&*conn) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .map_err(|e| { + error!(opctx.log, "bgp_config_list failed"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + }) + } + + pub async fn bgp_announce_set_list( + &self, + opctx: &OpContext, + pagparams: &PaginatedBy<'_>, + ) -> ListResultVec { + use db::schema::bgp_announce_set::dsl; + + let conn = self.pool_connection_authorized(opctx).await?; + + match pagparams { + PaginatedBy::Id(pagparams) => { + paginated(dsl::bgp_announce_set, dsl::id, &pagparams) + } + PaginatedBy::Name(pagparams) => paginated( + dsl::bgp_announce_set, + dsl::name, + &pagparams.map_name(|n| Name::ref_cast(n)), + ), + } + .filter(dsl::time_deleted.is_null()) + .select(BgpAnnounceSet::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + error!(opctx.log, "bgp_announce_set_list failed"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + }) } - pub async fn bgp_announce_list( + pub async fn bgp_announcement_list( &self, opctx: &OpContext, sel: ¶ms::BgpAnnounceSetSelector, @@ -250,11 +438,6 @@ impl DataStore { bgp_announcement::dsl as announce_dsl, }; - #[derive(Debug)] - enum BgpAnnounceListError { - AnnounceSetNotFound(Name), - } - let err = OptionalError::new(); let conn = self.pool_connection_authorized(opctx).await?; self.transaction_retry_wrapper("bgp_announce_list") @@ -264,7 +447,26 @@ impl DataStore { let name_or_id = sel.name_or_id.clone(); let announce_id: Uuid = match name_or_id { - NameOrId::Id(id) => id, + NameOrId::Id(id) => announce_set_dsl::bgp_announce_set + .filter(bgp_announce_set::time_deleted.is_null()) + .filter(bgp_announce_set::id.eq(id)) + .select(bgp_announce_set::id) + .limit(1) + .first_async::(&conn) + .await + .map_err(|e| { + let msg = "failed to lookup announce set by id"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => err + .bail(Error::not_found_by_id( + ResourceType::BgpAnnounceSet, + &id, + )), + _ => err.bail(Error::internal_error(msg)), + } + }), NameOrId::Name(name) => { announce_set_dsl::bgp_announce_set .filter( @@ -278,15 +480,23 @@ impl DataStore { .first_async::(&conn) .await .map_err(|e| { - err.bail_retryable_or( - e, - BgpAnnounceListError::AnnounceSetNotFound( - Name::from(name.clone()), - ) - ) - })? + let msg = + "failed to lookup announce set by name"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => err + .bail(Error::not_found_by_name( + ResourceType::BgpAnnounceSet, + &name, + )), + _ => { + err.bail(Error::internal_error(msg)) + } + } + }) } - }; + }?; let result = announce_dsl::bgp_announcement .filter(announce_dsl::announce_set_id.eq(announce_id)) @@ -299,21 +509,18 @@ impl DataStore { }) .await .map_err(|e| { + error!(opctx.log, "bgp_announce_list failed"; "error" => ?e); if let Some(err) = err.take() { - match err { - BgpAnnounceListError::AnnounceSetNotFound(name) => { - Error::not_found_by_name( - ResourceType::BgpAnnounceSet, - &name, - ) - } - } + err } else { public_error_from_diesel(e, ErrorHandler::Server) } }) } + // TODO: it seems this logic actually performs a find OR create for an announce set, and then replaces its child announcements. + // This might be changed in omicron#6016 to an api that creates an announce set then allows adding / removal of announcements + // to match how our other APIs work. pub async fn bgp_update_announce_set( &self, opctx: &OpContext, @@ -383,9 +590,16 @@ impl DataStore { Ok((db_as, db_annoucements)) }) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .map_err(|e| { + let msg = "bgp_update_announce_set failed"; + error!(opctx.log, "{msg}"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + }) } + // TODO: it seems this logic actually performs a create OR update of an announce set and its child announcements + // (for example, it will add missing announcements). This might be changed in omicron#6016 to an api that creates an announce set + // then allows adding / removal of announcements to match how our other APIs work. pub async fn bgp_create_announce_set( &self, opctx: &OpContext, @@ -466,7 +680,11 @@ impl DataStore { Ok((db_as, db_annoucements)) }) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .map_err(|e| { + let msg = "bgp_create_announce_set failed"; + error!(opctx.log, "{msg}"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + }) } pub async fn bgp_delete_announce_set( @@ -481,11 +699,6 @@ impl DataStore { use db::schema::bgp_config; use db::schema::bgp_config::dsl as bgp_config_dsl; - #[derive(Debug)] - enum BgpAnnounceSetDeleteError { - AnnounceSetInUse, - } - let conn = self.pool_connection_authorized(opctx).await?; let name_or_id = sel.name_or_id.clone(); @@ -496,18 +709,56 @@ impl DataStore { let name_or_id = name_or_id.clone(); async move { let id: Uuid = match name_or_id { + NameOrId::Id(id) => announce_set_dsl::bgp_announce_set + .filter(bgp_announce_set::time_deleted.is_null()) + .filter(bgp_announce_set::id.eq(id)) + .select(bgp_announce_set::id) + .limit(1) + .first_async::(&conn) + .await + .map_err(|e| { + let msg = "failed to lookup announce set by id"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => err + .bail(Error::not_found_by_id( + ResourceType::BgpAnnounceSet, + &id, + )), + _ => err.bail(Error::internal_error(msg)), + } + }), NameOrId::Name(name) => { announce_set_dsl::bgp_announce_set + .filter( + bgp_announce_set::time_deleted.is_null(), + ) .filter( bgp_announce_set::name.eq(name.to_string()), ) .select(bgp_announce_set::id) .limit(1) .first_async::(&conn) - .await? + .await + .map_err(|e| { + let msg = + "failed to lookup announce set by name"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => err + .bail(Error::not_found_by_name( + ResourceType::BgpAnnounceSet, + &name, + )), + _ => { + err.bail(Error::internal_error(msg)) + } + } + }) } - NameOrId::Id(id) => id, - }; + }?; let count = bgp_config_dsl::bgp_config .filter(bgp_config::bgp_announce_set_id.eq(id)) @@ -516,9 +767,9 @@ impl DataStore { .await?; if count > 0 { - return Err(err.bail( - BgpAnnounceSetDeleteError::AnnounceSetInUse, - )); + return Err( + err.bail(Error::conflict("announce set in use")) + ); } diesel::update(announce_set_dsl::bgp_announce_set) @@ -537,13 +788,12 @@ impl DataStore { }) .await .map_err(|e| { + let msg = "bgp_delete_announce_set failed"; if let Some(err) = err.take() { - match err { - BgpAnnounceSetDeleteError::AnnounceSetInUse => { - Error::invalid_request("BGP announce set in use") - } - } + error!(opctx.log, "{msg}"; "error" => ?err); + err } else { + error!(opctx.log, "{msg}"; "error" => ?e); public_error_from_diesel(e, ErrorHandler::Server) } }) @@ -563,7 +813,11 @@ impl DataStore { .select(BgpPeerView::as_select()) .load_async(&*self.pool_connection_authorized(opctx).await?) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + .map_err(|e| { + let msg = "bgp_peer_configs failed"; + error!(opctx.log, "{msg}"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + })?; Ok(results) } @@ -583,7 +837,11 @@ impl DataStore { .filter(dsl::addr.eq(addr)) .load_async(&*self.pool_connection_authorized(opctx).await?) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + .map_err(|e| { + let msg = "communities_for_peer failed"; + error!(opctx.log, "{msg}"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + })?; Ok(results) } @@ -601,24 +859,40 @@ impl DataStore { use db::schema::switch_port_settings_bgp_peer_config_allow_export::dsl; let conn = self.pool_connection_authorized(opctx).await?; - let result = self - .transaction_retry_wrapper("bgp_allow_export_for_peer") - .transaction(&conn, |conn| async move { - let active = peer_dsl::switch_port_settings_bgp_peer_config - .filter(db_peer::port_settings_id.eq(port_settings_id)) - .filter(db_peer::addr.eq(addr)) - .select(db_peer::allow_export_list_active) - .limit(1) - .first_async::(&conn) - .await?; - - if !active { - return Ok(None); - } + let err = OptionalError::new(); + self.transaction_retry_wrapper("bgp_allow_export_for_peer") + .transaction(&conn, |conn| { + let err = err.clone(); + async move { + let active = peer_dsl::switch_port_settings_bgp_peer_config + .filter(db_peer::port_settings_id.eq(port_settings_id)) + .filter(db_peer::addr.eq(addr)) + .select(db_peer::allow_export_list_active) + .limit(1) + .first_async::(&conn) + .await + .map_err(|e| { + let msg = "failed to lookup export settings for peer"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => { + let not_found_msg = format!("peer with {addr} not found for port settings {port_settings_id}"); + err.bail(Error::non_resourcetype_not_found(not_found_msg)) + }, + _ => err.bail(Error::internal_error(msg)), + } + })?; + + if !active { + return Ok(None); + } - let list = - dsl::switch_port_settings_bgp_peer_config_allow_export - .filter(db_allow::port_settings_id.eq(port_settings_id)) + let list = + dsl::switch_port_settings_bgp_peer_config_allow_export + .filter( + db_allow::port_settings_id.eq(port_settings_id), + ) .filter( db_allow::interface_name .eq(interface_name.to_owned()), @@ -627,12 +901,20 @@ impl DataStore { .load_async(&conn) .await?; - Ok(Some(list)) + Ok(Some(list)) + } }) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; - - Ok(result) + .map_err(|e| { + let msg = "allow_export_for_peer failed"; + if let Some(err) = err.take() { + error!(opctx.log, "{msg}"; "error" => ?err); + err + } else { + error!(opctx.log, "{msg}"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + } + }) } pub async fn allow_import_for_peer( @@ -647,25 +929,42 @@ impl DataStore { use db::schema::switch_port_settings_bgp_peer_config_allow_import as db_allow; use db::schema::switch_port_settings_bgp_peer_config_allow_import::dsl; + let err = OptionalError::new(); let conn = self.pool_connection_authorized(opctx).await?; - let result = self - .transaction_retry_wrapper("bgp_allow_export_for_peer") - .transaction(&conn, |conn| async move { - let active = peer_dsl::switch_port_settings_bgp_peer_config - .filter(db_peer::port_settings_id.eq(port_settings_id)) - .filter(db_peer::addr.eq(addr)) - .select(db_peer::allow_import_list_active) - .limit(1) - .first_async::(&conn) - .await?; - - if !active { - return Ok(None); - } + self + .transaction_retry_wrapper("bgp_allow_import_for_peer") + .transaction(&conn, |conn| { + let err = err.clone(); + async move { + let active = peer_dsl::switch_port_settings_bgp_peer_config + .filter(db_peer::port_settings_id.eq(port_settings_id)) + .filter(db_peer::addr.eq(addr)) + .select(db_peer::allow_import_list_active) + .limit(1) + .first_async::(&conn) + .await + .map_err(|e| { + let msg = "failed to lookup import settings for peer"; + error!(opctx.log, "{msg}"; "error" => ?e); + + match e { + diesel::result::Error::NotFound => { + let not_found_msg = format!("peer with {addr} not found for port settings {port_settings_id}"); + err.bail(Error::non_resourcetype_not_found(not_found_msg)) + }, + _ => err.bail(Error::internal_error(msg)), + } + })?; + + if !active { + return Ok(None); + } - let list = - dsl::switch_port_settings_bgp_peer_config_allow_import - .filter(db_allow::port_settings_id.eq(port_settings_id)) + let list = + dsl::switch_port_settings_bgp_peer_config_allow_import + .filter( + db_allow::port_settings_id.eq(port_settings_id), + ) .filter( db_allow::interface_name .eq(interface_name.to_owned()), @@ -674,11 +973,19 @@ impl DataStore { .load_async(&conn) .await?; - Ok(Some(list)) + Ok(Some(list)) + } }) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; - - Ok(result) + .map_err(|e| { + let msg = "allow_import_for_peer failed"; + if let Some(err) = err.take() { + error!(opctx.log, "{msg}"; "error" => ?err); + err + } else { + error!(opctx.log, "{msg}"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + } + }) } } diff --git a/nexus/db-queries/src/db/datastore/cockroachdb_settings.rs b/nexus/db-queries/src/db/datastore/cockroachdb_settings.rs index e7a975fa69..a38cfb8935 100644 --- a/nexus/db-queries/src/db/datastore/cockroachdb_settings.rs +++ b/nexus/db-queries/src/db/datastore/cockroachdb_settings.rs @@ -153,10 +153,22 @@ mod test { ); let settings = datastore.cockroachdb_settings(&opctx).await.unwrap(); - // With a fresh cluster, this is the expected state - let version = CockroachDbClusterVersion::NEWLY_INITIALIZED.to_string(); - assert_eq!(settings.version, version); - assert_eq!(settings.preserve_downgrade, ""); + let version: CockroachDbClusterVersion = + settings.version.parse().expect("unexpected cluster version"); + if settings.preserve_downgrade == "" { + // This is the expected value while running tests normally. + assert_eq!(version, CockroachDbClusterVersion::NEWLY_INITIALIZED); + } else if settings.preserve_downgrade == version.to_string() { + // This is the expected value if the cluster was created on a + // previous version and `cluster.preserve_downgrade_option` was set. + assert_eq!(version, CockroachDbClusterVersion::POLICY); + } else { + panic!( + "`cluster.preserve_downgrade_option` is {:?}, + but it should be empty or \"{}\"", + settings.preserve_downgrade, version + ); + } // Verify that if a fingerprint is wrong, we get the expected SQL error // back. @@ -165,7 +177,7 @@ mod test { &opctx, String::new(), "cluster.preserve_downgrade_option", - version.clone(), + version.to_string(), ) .await else { @@ -190,7 +202,7 @@ mod test { &opctx, settings.state_fingerprint.clone(), "cluster.preserve_downgrade_option", - version.clone(), + version.to_string(), ) .await .unwrap(); @@ -198,8 +210,8 @@ mod test { datastore.cockroachdb_settings(&opctx).await.unwrap(), CockroachDbSettings { state_fingerprint: settings.state_fingerprint.clone(), - version: version.clone(), - preserve_downgrade: version.clone(), + version: version.to_string(), + preserve_downgrade: version.to_string(), } ); } @@ -215,14 +227,24 @@ mod test { ) .await .unwrap(); - assert_eq!( - datastore.cockroachdb_settings(&opctx).await.unwrap(), - CockroachDbSettings { - state_fingerprint: settings.state_fingerprint.clone(), - version: version.clone(), - preserve_downgrade: String::new(), - } - ); + let settings = + datastore.cockroachdb_settings(&opctx).await.unwrap(); + if version == CockroachDbClusterVersion::NEWLY_INITIALIZED { + assert_eq!( + settings, + CockroachDbSettings { + state_fingerprint: settings.state_fingerprint.clone(), + version: version.to_string(), + preserve_downgrade: String::new(), + } + ); + } else { + // Resetting it permits auto-finalization, so the state + // fingerprint and version are not predictable until that + // completes, but we can still verify that the variable was + // reset. + assert!(settings.preserve_downgrade.is_empty()); + } } db.cleanup().await.unwrap(); diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 13c3708e4a..2cd21754f8 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -366,6 +366,7 @@ impl DataStore { } } +#[derive(Clone, Copy, Debug)] pub enum UpdatePrecondition { DontCare, Null, diff --git a/nexus/db-queries/src/db/datastore/saga.rs b/nexus/db-queries/src/db/datastore/saga.rs index 939929e665..0b626804e1 100644 --- a/nexus/db-queries/src/db/datastore/saga.rs +++ b/nexus/db-queries/src/db/datastore/saga.rs @@ -9,7 +9,6 @@ use super::SQL_BATCH_SIZE; use crate::db; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; -use crate::db::model::Generation; use crate::db::pagination::paginated; use crate::db::pagination::paginated_multicolumn; use crate::db::pagination::Paginator; @@ -17,10 +16,12 @@ use crate::db::update_and_check::UpdateAndCheck; use crate::db::update_and_check::UpdateStatus; use async_bb8_diesel::AsyncRunQueryDsl; use diesel::prelude::*; +use nexus_auth::authz; use nexus_auth::context::OpContext; use omicron_common::api::external::Error; use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; +use std::ops::Add; impl DataStore { pub async fn saga_create( @@ -80,21 +81,15 @@ impl DataStore { /// now, we're implementing saga adoption only in cases where the original /// SEC/Nexus has been expunged.) /// - /// However, in the future, it may be possible for multiple SECs to try and - /// update the same saga, and overwrite each other's state. For example, - /// one SEC might try and update the state to Running while the other one - /// updates it to Done. That case would have to be carefully considered and - /// tested here, probably using the (currently unused) - /// `current_adopt_generation` field to enable optimistic concurrency. - /// - /// To reiterate, we are *not* considering the case where several SECs try - /// to update the same saga. That will be a future enhancement. + /// It's conceivable that multiple SECs do try to udpate the same saga + /// concurrently. That would be a bug. This is noticed and prevented by + /// making this query conditional on current_sec and failing with a conflict + /// if the current SEC has changed. pub async fn saga_update_state( &self, saga_id: steno::SagaId, new_state: steno::SagaCachedState, current_sec: db::saga_types::SecId, - current_adopt_generation: Generation, ) -> Result<(), Error> { use db::schema::saga::dsl; @@ -102,7 +97,6 @@ impl DataStore { let result = diesel::update(dsl::saga) .filter(dsl::id.eq(saga_id)) .filter(dsl::current_sec.eq(current_sec)) - .filter(dsl::adopt_generation.eq(current_adopt_generation)) .set(dsl::saga_state.eq(db::saga_types::SagaCachedState(new_state))) .check_if_exists::(saga_id) .execute_and_check(&*self.pool_connection_unauthorized().await?) @@ -119,20 +113,19 @@ impl DataStore { match result.status { UpdateStatus::Updated => Ok(()), - UpdateStatus::NotUpdatedButExists => Err(Error::invalid_request( - format!( - "failed to update saga {:?} with state {:?}: preconditions not met: \ - expected current_sec = {:?}, adopt_generation = {:?}, \ - but found current_sec = {:?}, adopt_generation = {:?}, state = {:?}", + UpdateStatus::NotUpdatedButExists => { + Err(Error::invalid_request(format!( + "failed to update saga {:?} with state {:?}:\ + preconditions not met: \ + expected current_sec = {:?}, \ + but found current_sec = {:?}, state = {:?}", saga_id, new_state, current_sec, - current_adopt_generation, result.found.current_sec, - result.found.adopt_generation, result.found.saga_state, - ) - )), + ))) + } } } @@ -207,16 +200,75 @@ impl DataStore { Ok(events) } + + /// Updates all sagas that are currently assigned to any of the SEC ids in + /// `sec_ids`, assigning them to `new_sec_id` instead. + /// + /// Generally, an SEC id corresponds to a Nexus id. This change causes the + /// Nexus instance `new_sec_id` to discover these sagas and resume executing + /// them the next time it performs saga recovery (which is normally on + /// startup and periodically). Generally, `new_sec_id` is the _current_ + /// Nexus instance and the caller should activate the saga recovery + /// background task after calling this function to immediately resume the + /// newly-assigned sagas. + /// + /// **Warning:** This operation is only safe if the other SECs `sec_ids` are + /// not currently running. If those SECs are still running, then two (or + /// more) SECs may wind up running the same saga concurrently. This would + /// likely violate implicit assumptions made by various saga actions, + /// leading to hard-to-debug errors and state corruption. + pub async fn sagas_reassign_sec( + &self, + opctx: &OpContext, + sec_ids: &[db::saga_types::SecId], + new_sec_id: db::saga_types::SecId, + ) -> Result { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + + let now = chrono::Utc::now(); + let conn = self.pool_connection_authorized(opctx).await?; + + // It would be more robust to do this in batches. However, Diesel does + // not appear to support the UPDATE ... LIMIT syntax using the normal + // builder. In practice, it's extremely unlikely we'd have so many + // in-progress sagas that this would be a problem. + use db::schema::saga::dsl; + diesel::update( + dsl::saga + .filter(dsl::current_sec.is_not_null()) + .filter( + dsl::current_sec.eq_any( + sec_ids.into_iter().cloned().collect::>(), + ), + ) + .filter(dsl::saga_state.ne(db::saga_types::SagaCachedState( + steno::SagaCachedState::Done, + ))), + ) + .set(( + dsl::current_sec.eq(Some(new_sec_id)), + dsl::adopt_generation.eq(dsl::adopt_generation.add(1)), + dsl::adopt_time.eq(now), + )) + .execute_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } } #[cfg(test)] mod test { use super::*; use crate::db::datastore::test_utils::datastore_test; + use async_bb8_diesel::AsyncConnection; + use async_bb8_diesel::AsyncSimpleConnection; + use db::queries::ALLOW_FULL_TABLE_SCAN_SQL; use nexus_db_model::{SagaNodeEvent, SecId}; use nexus_test_utils::db::test_setup_database; + use omicron_common::api::external::Generation; use omicron_test_utils::dev; use rand::seq::SliceRandom; + use std::collections::BTreeSet; use uuid::Uuid; // Tests pagination in listing sagas that are candidates for recovery @@ -440,7 +492,6 @@ mod test { node_cx.saga_id, steno::SagaCachedState::Running, node_cx.sec_id, - db::model::Generation::new(), ) .await .expect("updating state to Running again"); @@ -451,7 +502,6 @@ mod test { node_cx.saga_id, steno::SagaCachedState::Done, node_cx.sec_id, - db::model::Generation::new(), ) .await .expect("updating state to Done"); @@ -463,7 +513,6 @@ mod test { node_cx.saga_id, steno::SagaCachedState::Done, node_cx.sec_id, - db::model::Generation::new(), ) .await .expect("updating state to Done again"); @@ -509,4 +558,156 @@ mod test { SagaNodeEvent::new(event, self.sec_id) } } + + #[tokio::test] + async fn test_saga_reassignment() { + // Test setup + let logctx = dev::test_setup_log("test_saga_reassignment"); + let mut db = test_setup_database(&logctx.log).await; + let (_, datastore) = datastore_test(&logctx, &db).await; + let opctx = OpContext::for_tests(logctx.log.clone(), datastore.clone()); + + // Populate the database with a few different sagas: + // + // - assigned to SEC A: done, running, and unwinding + // - assigned to SEC B: done, running, and unwinding + // - assigned to SEC C: done, running, and unwinding + // - assigned to SEC D: done, running, and unwinding + // + // Then we'll reassign SECs B's and C's sagas to SEC A and check exactly + // which sagas were changed by this. This exercises: + // - that we don't touch A's sagas (the one we're assigning *to*) + // - that we do touch both B's and C's sagas (the ones we're assigning + // *from*) + // - that we don't touch D's sagas (some other SEC) + // - that we don't touch any "done" sagas + // - that we do touch both running and unwinding sagas + let mut sagas_to_insert = Vec::new(); + let sec_a = SecId(Uuid::new_v4()); + let sec_b = SecId(Uuid::new_v4()); + let sec_c = SecId(Uuid::new_v4()); + let sec_d = SecId(Uuid::new_v4()); + + for sec_id in [sec_a, sec_b, sec_c, sec_d] { + for state in [ + steno::SagaCachedState::Running, + steno::SagaCachedState::Unwinding, + steno::SagaCachedState::Done, + ] { + let params = steno::SagaCreateParams { + id: steno::SagaId(Uuid::new_v4()), + name: steno::SagaName::new("tewst saga"), + dag: serde_json::value::Value::Null, + state, + }; + + sagas_to_insert + .push(db::model::saga_types::Saga::new(sec_id, params)); + } + } + println!("sagas to insert: {:?}", sagas_to_insert); + + // These two sets are complements, but we write out the conditions to + // double-check that we've got it right. + let sagas_affected: BTreeSet<_> = sagas_to_insert + .iter() + .filter_map(|saga| { + ((saga.creator == sec_b || saga.creator == sec_c) + && (saga.saga_state.0 == steno::SagaCachedState::Running + || saga.saga_state.0 + == steno::SagaCachedState::Unwinding)) + .then(|| saga.id) + }) + .collect(); + let sagas_unaffected: BTreeSet<_> = sagas_to_insert + .iter() + .filter_map(|saga| { + (saga.creator == sec_a + || saga.creator == sec_d + || saga.saga_state.0 == steno::SagaCachedState::Done) + .then(|| saga.id) + }) + .collect(); + println!("sagas affected: {:?}", sagas_affected); + println!("sagas UNaffected: {:?}", sagas_unaffected); + assert_eq!(sagas_affected.intersection(&sagas_unaffected).count(), 0); + assert_eq!( + sagas_affected.len() + sagas_unaffected.len(), + sagas_to_insert.len() + ); + + // Insert the sagas. + let count = { + use db::schema::saga::dsl; + let conn = datastore.pool_connection_for_tests().await.unwrap(); + diesel::insert_into(dsl::saga) + .values(sagas_to_insert) + .execute_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .expect("successful insertion") + }; + assert_eq!(count, sagas_affected.len() + sagas_unaffected.len()); + + // Reassign uncompleted sagas from SECs B and C to SEC A. + let nreassigned = datastore + .sagas_reassign_sec(&opctx, &[sec_b, sec_c], sec_a) + .await + .expect("failed to re-assign sagas"); + + // Fetch all the sagas and check their states. + let all_sagas: Vec<_> = datastore + .pool_connection_for_tests() + .await + .unwrap() + .transaction_async(|conn| async move { + use db::schema::saga::dsl; + conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL).await?; + dsl::saga + .select(nexus_db_model::Saga::as_select()) + .load_async(&conn) + .await + }) + .await + .unwrap(); + + for saga in all_sagas { + println!("checking saga: {:?}", saga); + let current_sec = saga.current_sec.unwrap(); + if sagas_affected.contains(&saga.id) { + assert!(saga.creator == sec_b || saga.creator == sec_c); + assert_eq!(current_sec, sec_a); + assert_eq!(*saga.adopt_generation, Generation::from(2)); + assert!( + saga.saga_state.0 == steno::SagaCachedState::Running + || saga.saga_state.0 + == steno::SagaCachedState::Unwinding + ); + } else if sagas_unaffected.contains(&saga.id) { + assert_eq!(current_sec, saga.creator); + assert_eq!(*saga.adopt_generation, Generation::from(1)); + // Its SEC and state could be anything since we've deliberately + // included sagas with various states and SECs that should not + // be affected by the reassignment. + } else { + println!( + "ignoring saga that was not created by this test: {:?}", + saga + ); + } + } + + assert_eq!(nreassigned, sagas_affected.len()); + + // If we do it again, we should make no changes. + let nreassigned = datastore + .sagas_reassign_sec(&opctx, &[sec_b, sec_c], sec_a) + .await + .expect("failed to re-assign sagas"); + assert_eq!(nreassigned, 0); + + // Test cleanup + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } } diff --git a/nexus/db-queries/src/db/datastore/switch_port.rs b/nexus/db-queries/src/db/datastore/switch_port.rs index 159933dce0..2e09c1ac13 100644 --- a/nexus/db-queries/src/db/datastore/switch_port.rs +++ b/nexus/db-queries/src/db/datastore/switch_port.rs @@ -15,7 +15,7 @@ use crate::db::datastore::UpdatePrecondition; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::model::{ - LldpServiceConfig, Name, SwitchInterfaceConfig, SwitchPort, + LldpLinkConfig, Name, SwitchInterfaceConfig, SwitchPort, SwitchPortAddressConfig, SwitchPortBgpPeerConfig, SwitchPortConfig, SwitchPortLinkConfig, SwitchPortRouteConfig, SwitchPortSettings, SwitchPortSettingsGroup, SwitchPortSettingsGroups, @@ -31,7 +31,7 @@ use diesel::{ use diesel_dtrace::DTraceConnection; use ipnetwork::IpNetwork; use nexus_db_model::{ - SqlU16, SqlU32, SqlU8, SwitchPortBgpPeerConfigAllowExport, + BgpConfig, SqlU16, SqlU32, SqlU8, SwitchPortBgpPeerConfigAllowExport, SwitchPortBgpPeerConfigAllowImport, SwitchPortBgpPeerConfigCommunity, }; use nexus_types::external_api::params; @@ -101,7 +101,7 @@ pub struct SwitchPortSettingsCombinedResult { pub groups: Vec, pub port: SwitchPortConfig, pub links: Vec, - pub link_lldp: Vec, + pub link_lldp: Vec, pub interfaces: Vec, pub vlan_interfaces: Vec, pub routes: Vec, @@ -333,6 +333,7 @@ impl DataStore { SwitchPortSettingsCreateError::ReserveBlock( ReserveBlockError::AddressNotInLot, ) => Error::invalid_request("address not in lot"), + } } else { @@ -451,19 +452,18 @@ impl DataStore { .load_async::(&conn) .await?; - let lldp_svc_ids: Vec = result + let lldp_link_ids: Vec = result .links .iter() - .map(|link| link.lldp_service_config_id) + .map(|link| link.lldp_link_config_id) .collect(); - use db::schema::lldp_service_config as lldp_config; - use db::schema::lldp_service_config::dsl as lldp_dsl; - result.link_lldp = lldp_dsl::lldp_service_config - .filter(lldp_config::id.eq_any(lldp_svc_ids)) - .select(LldpServiceConfig::as_select()) + use db::schema::lldp_link_config; + result.link_lldp = lldp_link_config::dsl::lldp_link_config + .filter(lldp_link_config::id.eq_any(lldp_link_ids)) + .select(LldpLinkConfig::as_select()) .limit(1) - .load_async::(&conn) + .load_async::(&conn) .await?; // get the interface configs @@ -829,45 +829,158 @@ impl DataStore { port_settings_id: Option, current: UpdatePrecondition, ) -> UpdateResult<()> { + use db::schema::bgp_config::dsl as bgp_config_dsl; use db::schema::switch_port; use db::schema::switch_port::dsl as switch_port_dsl; + use db::schema::switch_port_settings_bgp_peer_config::dsl as bgp_peer_dsl; let conn = self.pool_connection_authorized(opctx).await?; - match current { - UpdatePrecondition::DontCare => { - diesel::update(switch_port_dsl::switch_port) - .filter(switch_port::id.eq(switch_port_id)) - .set(switch_port::port_settings_id.eq(port_settings_id)) - .execute_async(&*conn) - .await - .map_err(|e| { - public_error_from_diesel(e, ErrorHandler::Server) - })?; - } - UpdatePrecondition::Null => { - diesel::update(switch_port_dsl::switch_port) - .filter(switch_port::id.eq(switch_port_id)) - .filter(switch_port::port_settings_id.is_null()) - .set(switch_port::port_settings_id.eq(port_settings_id)) - .execute_async(&*conn) - .await - .map_err(|e| { - public_error_from_diesel(e, ErrorHandler::Server) - })?; - } - UpdatePrecondition::Value(current_id) => { - diesel::update(switch_port_dsl::switch_port) - .filter(switch_port::id.eq(switch_port_id)) - .filter(switch_port::port_settings_id.eq(current_id)) - .set(switch_port::port_settings_id.eq(port_settings_id)) - .execute_async(&*conn) - .await - .map_err(|e| { - public_error_from_diesel(e, ErrorHandler::Server) - })?; - } - } + let err = OptionalError::new(); + self.transaction_retry_wrapper("switch_port_set_settings_id") + .transaction(&conn, |conn| { + let err = err.clone(); + async move { + // TODO: remove once per-switch-multi-asn support is added + // Bail if user attempts to assign multiple ASNs to a switch via switch port settings + // This is a temporary measure until multi-asn-per-switch is supported. + + // what switch are we adding a configuration to? + let switch = switch_port_dsl::switch_port + .filter(switch_port_dsl::id.eq(switch_port_id)) + .select(switch_port_dsl::switch_location) + .limit(1) + .first_async::(&conn) + .await + .map_err(|e: diesel::result::Error| { + let msg = "failed to look up switch port by id"; + error!(opctx.log, "{msg}"; "error" => ?e); + match e { + diesel::result::Error::NotFound => { + err.bail(Error::not_found_by_id( + ResourceType::SwitchPort, + &switch_port_id, + )) + } + _ => err.bail(Error::internal_error(msg)), + } + })?; + + // if we're setting a port settings id (and therefore activating a configuration + // on a port) we need to make sure there aren't any conflicting bgp configurations + if let Some(psid) = port_settings_id { + let bgp_config: Option = + match bgp_peer_dsl::switch_port_settings_bgp_peer_config + .inner_join( + bgp_config_dsl::bgp_config + .on(bgp_peer_dsl::bgp_config_id + .eq(bgp_config_dsl::id)), + ) + .filter( + bgp_peer_dsl::port_settings_id + .eq(psid), + ) + .select(BgpConfig::as_select()) + .limit(1) + .first_async::(&conn) + .await { + Ok(v) => Ok(Some(v)), + Err(e) => { + let msg = "failed to check if bgp peer exists in switch port settings"; + error!(opctx.log, "{msg}"; "error" => ?e); + match e { + diesel::result::Error::NotFound => { + Ok(None) + } + _ => Err(err.bail(Error::internal_error(msg))), + } + } + }?; + + // find all port settings for the targeted switch + // switch port + // inner join bgp peer on port settings id + // inner join bgp config on bgp config id + // filter switch location eq switch + // filter port settings id not null + // filter asn doesn't equal our asn + + if let Some(config) = bgp_config { + let conflicting_bgp_configs: Vec = switch_port_dsl::switch_port + .inner_join( + bgp_peer_dsl::switch_port_settings_bgp_peer_config + .on(bgp_peer_dsl::port_settings_id + .nullable() + .eq(switch_port_dsl::port_settings_id)), + ) + .inner_join(bgp_config_dsl::bgp_config.on( + bgp_peer_dsl::bgp_config_id.eq(bgp_config_dsl::id), + )) + .filter(switch_port_dsl::switch_location.eq(switch)) + .filter(switch_port_dsl::port_settings_id.is_not_null()) + .filter(bgp_config_dsl::asn.ne(config.asn)) + .select(BgpConfig::as_select()) + .load_async(&conn) + .await?; + + if !conflicting_bgp_configs.is_empty() { + return Err(err.bail(Error::conflict("a different asn is already configured on this switch"))); + } + } + + } + + // perform the requested update + match current { + UpdatePrecondition::DontCare => { + diesel::update(switch_port_dsl::switch_port) + .filter(switch_port::id.eq(switch_port_id)) + .set( + switch_port::port_settings_id + .eq(port_settings_id), + ) + .execute_async(&conn) + .await + } + UpdatePrecondition::Null => { + diesel::update(switch_port_dsl::switch_port) + .filter(switch_port::id.eq(switch_port_id)) + .filter(switch_port::port_settings_id.is_null()) + .set( + switch_port::port_settings_id + .eq(port_settings_id), + ) + .execute_async(&conn) + .await + } + UpdatePrecondition::Value(current_id) => { + diesel::update(switch_port_dsl::switch_port) + .filter(switch_port::id.eq(switch_port_id)) + .filter( + switch_port::port_settings_id + .eq(current_id), + ) + .set( + switch_port::port_settings_id + .eq(port_settings_id), + ) + .execute_async(&conn) + .await + } + } + } + }) + .await + .map_err(|e| { + let msg = "switch_port_set_settings_id failed"; + if let Some(err) = err.take() { + error!(opctx.log, "{msg}"; "error" => ?err); + err + } else { + error!(opctx.log, "{msg}"; "error" => ?e); + public_error_from_diesel(e, ErrorHandler::Server) + } + })?; Ok(()) } @@ -946,10 +1059,10 @@ impl DataStore { .eq(route_config_dsl::port_settings_id.nullable())), ) .select(SwitchPort::as_select()) - // TODO: #3592 Correctness - // In single rack deployments there are only 64 ports. We'll need - // pagination in the future, or maybe a way to constrain the query to - // a rack? + // TODO: #3592 Correctness + // In single rack deployments there are only 64 ports. We'll need + // pagination in the future, or maybe a way to constrain the query to + // a rack? .limit(64) .union( switch_port_dsl::switch_port @@ -958,7 +1071,7 @@ impl DataStore { bgp_peer_config_dsl::switch_port_settings_bgp_peer_config .on(switch_port_dsl::port_settings_id .eq(bgp_peer_config_dsl::port_settings_id.nullable()), - ), + ), ) .select(SwitchPort::as_select()) .limit(64), @@ -987,7 +1100,7 @@ async fn do_switch_port_settings_create( ) -> Result { use db::schema::{ address_lot::dsl as address_lot_dsl, bgp_config::dsl as bgp_config_dsl, - lldp_service_config::dsl as lldp_config_dsl, + lldp_link_config::dsl as lldp_link_config_dsl, switch_port_settings::dsl as port_settings_dsl, switch_port_settings_address_config::dsl as address_config_dsl, switch_port_settings_bgp_peer_config::dsl as bgp_peer_dsl, @@ -1047,17 +1160,21 @@ async fn do_switch_port_settings_create( let mut link_config = Vec::with_capacity(params.links.len()); for (link_name, c) in ¶ms.links { - let lldp_config_id = match c.lldp.lldp_config { - Some(_) => todo!(), // TODO actual lldp support - None => None, - }; - let lldp_svc_config = - LldpServiceConfig::new(c.lldp.enabled, lldp_config_id); + let lldp_link_config = LldpLinkConfig::new( + c.lldp.enabled, + c.lldp.link_name.clone(), + c.lldp.link_description.clone(), + c.lldp.chassis_id.clone(), + c.lldp.system_name.clone(), + c.lldp.system_description.clone(), + c.lldp.management_ip.map(|a| a.into()), + ); + let lldp_config_id = lldp_link_config.id; + lldp_config.push(lldp_link_config); - lldp_config.push(lldp_svc_config.clone()); link_config.push(SwitchPortLinkConfig::new( psid, - lldp_svc_config.id, + lldp_config_id, link_name.clone(), c.mtu, c.fec.into(), @@ -1066,9 +1183,9 @@ async fn do_switch_port_settings_create( )); } result.link_lldp = - diesel::insert_into(lldp_config_dsl::lldp_service_config) + diesel::insert_into(lldp_link_config_dsl::lldp_link_config) .values(lldp_config.clone()) - .returning(LldpServiceConfig::as_returning()) + .returning(LldpLinkConfig::as_returning()) .get_results_async(conn) .await?; @@ -1120,6 +1237,7 @@ async fn do_switch_port_settings_create( route.dst.into(), route.gw.into(), route.vid.map(Into::into), + route.local_pref.map(Into::into), )); } } @@ -1144,18 +1262,18 @@ async fn do_switch_port_settings_create( NameOrId::Name(name) => { let name = name.to_string(); bgp_config_dsl::bgp_config - .filter(bgp_config::time_deleted.is_null()) - .filter(bgp_config::name.eq(name)) - .select(bgp_config::id) - .limit(1) - .first_async::(conn) - .await - .map_err(|diesel_error| { - err.bail_retryable_or( - diesel_error, - SwitchPortSettingsCreateError::BgpConfigNotFound - ) - })? + .filter(bgp_config::time_deleted.is_null()) + .filter(bgp_config::name.eq(name)) + .select(bgp_config::id) + .limit(1) + .first_async::(conn) + .await + .map_err(|diesel_error| { + err.bail_retryable_or( + diesel_error, + SwitchPortSettingsCreateError::BgpConfigNotFound + ) + })? } }; @@ -1173,9 +1291,9 @@ async fn do_switch_port_settings_create( .collect(); diesel::insert_into(allow_import_dsl::switch_port_settings_bgp_peer_config_allow_import) - .values(to_insert) - .execute_async(conn) - .await?; + .values(to_insert) + .execute_async(conn) + .await?; } if let ImportExportPolicy::Allow(list) = &p.allowed_export { @@ -1192,9 +1310,9 @@ async fn do_switch_port_settings_create( .collect(); diesel::insert_into(allow_export_dsl::switch_port_settings_bgp_peer_config_allow_export) - .values(to_insert) - .execute_async(conn) - .await?; + .values(to_insert) + .execute_async(conn) + .await?; } if !p.communities.is_empty() { @@ -1212,9 +1330,9 @@ async fn do_switch_port_settings_create( .collect(); diesel::insert_into(bgp_communities_dsl::switch_port_settings_bgp_peer_config_communities) - .values(to_insert) - .execute_async(conn) - .await?; + .values(to_insert) + .execute_async(conn) + .await?; } bgp_peer_config.push(SwitchPortBgpPeerConfig::new( @@ -1225,6 +1343,7 @@ async fn do_switch_port_settings_create( )); } } + let db_bgp_peers: Vec = diesel::insert_into(bgp_peer_dsl::switch_port_settings_bgp_peer_config) .values(bgp_peer_config) @@ -1278,18 +1397,18 @@ async fn do_switch_port_settings_create( NameOrId::Name(name) => { let name = name.to_string(); address_lot_dsl::address_lot - .filter(address_lot::time_deleted.is_null()) - .filter(address_lot::name.eq(name)) - .select(address_lot::id) - .limit(1) - .first_async::(conn) - .await - .map_err(|diesel_error| { - err.bail_retryable_or( - diesel_error, - SwitchPortSettingsCreateError::AddressLotNotFound - ) - })? + .filter(address_lot::time_deleted.is_null()) + .filter(address_lot::name.eq(name)) + .select(address_lot::id) + .limit(1) + .first_async::(conn) + .await + .map_err(|diesel_error| { + err.bail_retryable_or( + diesel_error, + SwitchPortSettingsCreateError::AddressLotNotFound + ) + })? } }; // TODO: Reduce DB round trips needed for reserving ip blocks @@ -1349,18 +1468,18 @@ async fn do_switch_port_settings_delete( NameOrId::Name(name) => { let name = name.to_string(); port_settings_dsl::switch_port_settings - .filter(switch_port_settings::time_deleted.is_null()) - .filter(switch_port_settings::name.eq(name)) - .select(switch_port_settings::id) - .limit(1) - .first_async::(conn) - .await - .map_err(|diesel_error| { - err.bail_retryable_or( - diesel_error, - SwitchPortSettingsDeleteError::SwitchPortSettingsNotFound - ) - })? + .filter(switch_port_settings::time_deleted.is_null()) + .filter(switch_port_settings::name.eq(name)) + .select(switch_port_settings::id) + .limit(1) + .first_async::(conn) + .await + .map_err(|diesel_error| { + err.bail_retryable_or( + diesel_error, + SwitchPortSettingsDeleteError::SwitchPortSettingsNotFound + ) + })? } }; @@ -1389,13 +1508,12 @@ async fn do_switch_port_settings_delete( .returning(SwitchPortLinkConfig::as_returning()) .get_results_async(conn) .await?; - // delete lldp configs - use db::schema::lldp_service_config::{self, dsl as lldp_config_dsl}; - let lldp_svc_ids: Vec = - links.iter().map(|link| link.lldp_service_config_id).collect(); - diesel::delete(lldp_config_dsl::lldp_service_config) - .filter(lldp_service_config::id.eq_any(lldp_svc_ids)) + use db::schema::lldp_link_config; + let lldp_link_ids: Vec = + links.iter().map(|link| link.lldp_link_config_id).collect(); + diesel::delete(lldp_link_config::dsl::lldp_link_config) + .filter(lldp_link_config::id.eq_any(lldp_link_ids)) .execute_async(conn) .await?; @@ -1556,7 +1674,7 @@ mod test { shaper: None, }; - datastore.bgp_config_set(&opctx, &bgp_config).await.unwrap(); + datastore.bgp_config_create(&opctx, &bgp_config).await.unwrap(); let settings = SwitchPortSettingsCreate { identity: IdentityMetadataCreateParams { diff --git a/nexus/db-queries/src/db/sec_store.rs b/nexus/db-queries/src/db/sec_store.rs index 0dcc3aa717..920ff3aee1 100644 --- a/nexus/db-queries/src/db/sec_store.rs +++ b/nexus/db-queries/src/db/sec_store.rs @@ -4,7 +4,7 @@ //! Implementation of [`steno::SecStore`] backed by Omicron's database -use crate::db::{self, model::Generation}; +use crate::db; use anyhow::Context; use async_trait::async_trait; use dropshot::HttpError; @@ -102,12 +102,7 @@ impl steno::SecStore for CockroachDbSecStore { &log, || { self.datastore - .saga_update_state( - id, - update, - self.sec_id, - Generation::new(), - ) + .saga_update_state(id, update, self.sec_id) .map_err(backoff::BackoffError::transient) }, "updating saga state", diff --git a/nexus/reconfigurator/execution/Cargo.toml b/nexus/reconfigurator/execution/Cargo.toml index a531b66df4..1c62e553a8 100644 --- a/nexus/reconfigurator/execution/Cargo.toml +++ b/nexus/reconfigurator/execution/Cargo.toml @@ -16,6 +16,7 @@ dns-service-client.workspace = true chrono.workspace = true futures.workspace = true internal-dns.workspace = true +newtype-uuid.workspace = true nexus-config.workspace = true nexus-db-model.workspace = true nexus-db-queries.workspace = true diff --git a/nexus/reconfigurator/execution/src/cockroachdb.rs b/nexus/reconfigurator/execution/src/cockroachdb.rs index 498944598d..12ff896d9d 100644 --- a/nexus/reconfigurator/execution/src/cockroachdb.rs +++ b/nexus/reconfigurator/execution/src/cockroachdb.rs @@ -34,11 +34,13 @@ pub(crate) async fn ensure_settings( mod test { use super::*; use crate::overridables::Overridables; + use crate::RealizeBlueprintOutput; use nexus_db_queries::authn; use nexus_db_queries::authz; use nexus_test_utils_macros::nexus_test; - use nexus_types::deployment::CockroachDbClusterVersion; + use nexus_types::deployment::CockroachDbPreserveDowngrade; use std::sync::Arc; + use uuid::Uuid; type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; @@ -69,24 +71,26 @@ mod test { .await .expect("failed to get blueprint from datastore"); eprintln!("blueprint: {}", blueprint.display()); - // The initial blueprint should already have these filled in. + // The initial blueprint should already have the state fingerprint + // filled in. assert_eq!( blueprint.cockroachdb_fingerprint, settings.state_fingerprint ); - assert_eq!( - blueprint.cockroachdb_setting_preserve_downgrade, - CockroachDbClusterVersion::NEWLY_INITIALIZED.into() - ); - // The cluster version, preserve downgrade setting, and - // `NEWLY_INITIALIZED` should all match. - assert_eq!( - settings.version, - CockroachDbClusterVersion::NEWLY_INITIALIZED.to_string() - ); + // The initial blueprint should already have the preserve downgrade + // setting filled in. (It might be the current or previous version, but + // it should be `Set` regardless.) + let CockroachDbPreserveDowngrade::Set(bp_preserve_downgrade) = + blueprint.cockroachdb_setting_preserve_downgrade + else { + panic!("blueprint does not set preserve downgrade option"); + }; + // The cluster version, preserve downgrade setting, and the value in the + // blueprint should all match. + assert_eq!(settings.version, bp_preserve_downgrade.to_string()); assert_eq!( settings.preserve_downgrade, - CockroachDbClusterVersion::NEWLY_INITIALIZED.to_string() + bp_preserve_downgrade.to_string() ); // Record the zpools so we don't fail to ensure datasets (unrelated to // crdb settings) during blueprint execution. @@ -96,16 +100,17 @@ mod test { .await; // Execute the initial blueprint. let overrides = Overridables::for_test(cptestctx); - crate::realize_blueprint_with_overrides( - &opctx, - datastore, - resolver, - &blueprint, - "test-suite", - &overrides, - ) - .await - .expect("failed to execute initial blueprint"); + let _: RealizeBlueprintOutput = + crate::realize_blueprint_with_overrides( + &opctx, + datastore, + resolver, + &blueprint, + Uuid::new_v4(), + &overrides, + ) + .await + .expect("failed to execute initial blueprint"); // The CockroachDB settings should not have changed. assert_eq!( settings, diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index 846d19ead3..1c878a9ada 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -458,6 +458,7 @@ pub fn blueprint_nexus_external_ips(blueprint: &Blueprint) -> Vec { mod test { use super::*; use crate::overridables::Overridables; + use crate::RealizeBlueprintOutput; use crate::Sled; use dns_service_client::DnsDiff; use internal_dns::config::Host; @@ -466,6 +467,7 @@ mod test { use internal_dns::resolver::Resolver; use internal_dns::ServiceName; use internal_dns::DNS_ZONE; + use newtype_uuid::GenericUuid; use nexus_db_model::DnsGroup; use nexus_db_model::Silo; use nexus_db_queries::authn; @@ -477,6 +479,8 @@ mod test { use nexus_reconfigurator_planning::blueprint_builder::EnsureMultiple; use nexus_reconfigurator_planning::example::example; use nexus_reconfigurator_preparation::PlanningInputFromDb; + use nexus_sled_agent_shared::inventory::OmicronZoneConfig; + use nexus_sled_agent_shared::inventory::OmicronZoneType; use nexus_sled_agent_shared::inventory::ZoneKind; use nexus_test_utils::resource_helpers::create_silo; use nexus_test_utils::resource_helpers::DiskTestBuilder; @@ -489,6 +493,9 @@ mod test { use nexus_types::deployment::CockroachDbClusterVersion; use nexus_types::deployment::CockroachDbPreserveDowngrade; use nexus_types::deployment::CockroachDbSettings; + pub use nexus_types::deployment::OmicronZoneExternalFloatingAddr; + pub use nexus_types::deployment::OmicronZoneExternalFloatingIp; + pub use nexus_types::deployment::OmicronZoneExternalSnatIp; use nexus_types::deployment::SledFilter; use nexus_types::external_api::params; use nexus_types::external_api::shared; @@ -538,6 +545,212 @@ mod test { } } + /// ********************************************************************** + /// DEPRECATION WARNING: + /// + /// Remove when `deprecated_omicron_zone_config_to_blueprint_zone_config` + /// is deleted. + /// ********************************************************************** + /// + /// Errors from converting an [`OmicronZoneType`] into a [`BlueprintZoneType`]. + #[derive(Debug, Clone)] + pub enum InvalidOmicronZoneType { + #[allow(unused)] + ExternalIpIdRequired { kind: ZoneKind }, + } + + /// ********************************************************************** + /// DEPRECATION WARNING: Do not call this function in new code !!! + /// ********************************************************************** + /// + /// Convert an [`OmicronZoneConfig`] to a [`BlueprintZoneConfig`]. + /// + /// A `BlueprintZoneConfig` is a superset of `OmicronZoneConfig` and + /// contains auxiliary information not present in an `OmicronZoneConfig`. + /// Therefore, the only valid direction for a real system to take is a + /// lossy conversion from `BlueprintZoneConfig` to `OmicronZoneConfig`. + /// This function, however, does the opposite. We therefore have to inject + /// fake information to fill in the unknown fields in the generated + /// `OmicronZoneConfig`. + /// + /// This is bad, and we should generally feel bad for doing it :). At + /// the time this was done we were backporting the blueprint system into + /// RSS while trying not to change too much code. This was a judicious + /// shortcut used right before a release for stability reasons. As the + /// number of zones managed by the reconfigurator has grown, the use + /// of this function has become more egregious, and so it was removed + /// from the production code path and into this test module. This move + /// itself is a judicious shortcut. We have a test in this module, + /// `test_blueprint_internal_dns_basic`, that is the last caller of this + /// function, and so we have moved this function into this module. + /// + /// Ideally, we would get rid of this function altogether and use another + /// method for generating `BlueprintZoneConfig` structures. Unfortunately, + /// there are still a few remaining zones that need to be implemented in the + /// `BlueprintBuilder`, and some of them require custom code. Until that is + /// done, we don't have a good way of generating a test representation of + /// the real system that would properly serve this test. We could generate + /// a `BlueprintZoneConfig` by hand for each zone type in this test, on + /// top of the more modern `SystemDescription` setup, but that isn't much + /// different than what we do in this test. We'd also eventually remove it + /// for better test setup when our `BlueprintBuilder` is capable of properly + /// constructing all zone types. Instead, we do the simple thing, and reuse + /// what we alreaady have. + /// + /// # Errors + /// + /// If `config.zone_type` is a zone that has an external IP address (Nexus, + /// boundary NTP, external DNS), `external_ip_id` must be `Some(_)` or this + /// method will return an error. + pub fn deprecated_omicron_zone_config_to_blueprint_zone_config( + config: OmicronZoneConfig, + disposition: BlueprintZoneDisposition, + external_ip_id: Option, + ) -> Result { + let kind = config.zone_type.kind(); + let zone_type = match config.zone_type { + OmicronZoneType::BoundaryNtp { + address, + dns_servers, + domain, + nic, + ntp_servers, + snat_cfg, + } => { + let external_ip_id = external_ip_id.ok_or( + InvalidOmicronZoneType::ExternalIpIdRequired { kind }, + )?; + BlueprintZoneType::BoundaryNtp( + blueprint_zone_type::BoundaryNtp { + address, + ntp_servers, + dns_servers, + domain, + nic, + external_ip: OmicronZoneExternalSnatIp { + id: external_ip_id, + snat_cfg, + }, + }, + ) + } + OmicronZoneType::Clickhouse { address, dataset } => { + BlueprintZoneType::Clickhouse(blueprint_zone_type::Clickhouse { + address, + dataset, + }) + } + OmicronZoneType::ClickhouseKeeper { address, dataset } => { + BlueprintZoneType::ClickhouseKeeper( + blueprint_zone_type::ClickhouseKeeper { address, dataset }, + ) + } + OmicronZoneType::ClickhouseServer { address, dataset } => { + BlueprintZoneType::ClickhouseServer( + blueprint_zone_type::ClickhouseServer { address, dataset }, + ) + } + OmicronZoneType::CockroachDb { address, dataset } => { + BlueprintZoneType::CockroachDb( + blueprint_zone_type::CockroachDb { address, dataset }, + ) + } + OmicronZoneType::Crucible { address, dataset } => { + BlueprintZoneType::Crucible(blueprint_zone_type::Crucible { + address, + dataset, + }) + } + OmicronZoneType::CruciblePantry { address } => { + BlueprintZoneType::CruciblePantry( + blueprint_zone_type::CruciblePantry { address }, + ) + } + OmicronZoneType::ExternalDns { + dataset, + dns_address, + http_address, + nic, + } => { + let external_ip_id = external_ip_id.ok_or( + InvalidOmicronZoneType::ExternalIpIdRequired { kind }, + )?; + BlueprintZoneType::ExternalDns( + blueprint_zone_type::ExternalDns { + dataset, + http_address, + dns_address: OmicronZoneExternalFloatingAddr { + id: external_ip_id, + addr: dns_address, + }, + nic, + }, + ) + } + OmicronZoneType::InternalDns { + dataset, + dns_address, + gz_address, + gz_address_index, + http_address, + } => BlueprintZoneType::InternalDns( + blueprint_zone_type::InternalDns { + dataset, + http_address, + dns_address, + gz_address, + gz_address_index, + }, + ), + OmicronZoneType::InternalNtp { + address, + dns_servers, + domain, + ntp_servers, + } => BlueprintZoneType::InternalNtp( + blueprint_zone_type::InternalNtp { + address, + ntp_servers, + dns_servers, + domain, + }, + ), + OmicronZoneType::Nexus { + external_dns_servers, + external_ip, + external_tls, + internal_address, + nic, + } => { + let external_ip_id = external_ip_id.ok_or( + InvalidOmicronZoneType::ExternalIpIdRequired { kind }, + )?; + BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + internal_address, + external_ip: OmicronZoneExternalFloatingIp { + id: external_ip_id, + ip: external_ip, + }, + nic, + external_tls, + external_dns_servers, + }) + } + OmicronZoneType::Oximeter { address } => { + BlueprintZoneType::Oximeter(blueprint_zone_type::Oximeter { + address, + }) + } + }; + Ok(BlueprintZoneConfig { + disposition, + id: OmicronZoneUuid::from_untyped_uuid(config.id), + underlay_address: config.underlay_address, + filesystem_pool: config.filesystem_pool, + zone_type, + }) + } + /// test blueprint_internal_dns_config(): trivial case of an empty blueprint #[test] fn test_blueprint_internal_dns_empty() { @@ -588,7 +801,7 @@ mod test { .zones .into_iter() .map(|config| -> BlueprintZoneConfig { - BlueprintZoneConfig::from_omicron_zone_config( + deprecated_omicron_zone_config_to_blueprint_zone_config( config, BlueprintZoneDisposition::InService, // We don't get external IP IDs in inventory @@ -1245,16 +1458,17 @@ mod test { // Now, execute the initial blueprint. let overrides = Overridables::for_test(cptestctx); - crate::realize_blueprint_with_overrides( - &opctx, - datastore, - resolver, - &blueprint, - "test-suite", - &overrides, - ) - .await - .expect("failed to execute initial blueprint"); + let _: RealizeBlueprintOutput = + crate::realize_blueprint_with_overrides( + &opctx, + datastore, + resolver, + &blueprint, + Uuid::new_v4(), + &overrides, + ) + .await + .expect("failed to execute initial blueprint"); // DNS ought not to have changed. verify_dns_unchanged( @@ -1385,16 +1599,17 @@ mod test { .await .expect("failed to set blueprint as target"); - crate::realize_blueprint_with_overrides( - &opctx, - datastore, - resolver, - &blueprint2, - "test-suite", - &overrides, - ) - .await - .expect("failed to execute second blueprint"); + let _: RealizeBlueprintOutput = + crate::realize_blueprint_with_overrides( + &opctx, + datastore, + resolver, + &blueprint2, + Uuid::new_v4(), + &overrides, + ) + .await + .expect("failed to execute second blueprint"); // Now fetch DNS again. Both should have changed this time. let dns_latest_internal = datastore @@ -1459,16 +1674,17 @@ mod test { } // If we execute it again, we should see no more changes. - crate::realize_blueprint_with_overrides( - &opctx, - datastore, - resolver, - &blueprint2, - "test-suite", - &overrides, - ) - .await - .expect("failed to execute second blueprint again"); + let _: RealizeBlueprintOutput = + crate::realize_blueprint_with_overrides( + &opctx, + datastore, + resolver, + &blueprint2, + Uuid::new_v4(), + &overrides, + ) + .await + .expect("failed to execute second blueprint again"); verify_dns_unchanged( &opctx, datastore, @@ -1495,16 +1711,17 @@ mod test { // One more time, make sure that executing the blueprint does not do // anything. - crate::realize_blueprint_with_overrides( - &opctx, - datastore, - resolver, - &blueprint2, - "test-suite", - &overrides, - ) - .await - .expect("failed to execute second blueprint again"); + let _: RealizeBlueprintOutput = + crate::realize_blueprint_with_overrides( + &opctx, + datastore, + resolver, + &blueprint2, + Uuid::new_v4(), + &overrides, + ) + .await + .expect("failed to execute second blueprint again"); verify_dns_unchanged( &opctx, datastore, @@ -1589,16 +1806,17 @@ mod test { ); // If we execute the blueprint, DNS should not be changed. - crate::realize_blueprint_with_overrides( - &opctx, - datastore, - resolver, - &blueprint, - "test-suite", - &overrides, - ) - .await - .expect("failed to execute blueprint"); + let _: RealizeBlueprintOutput = + crate::realize_blueprint_with_overrides( + &opctx, + datastore, + resolver, + &blueprint, + Uuid::new_v4(), + &overrides, + ) + .await + .expect("failed to execute blueprint"); let dns_latest_internal = datastore .dns_config_read(&opctx, DnsGroup::Internal) .await diff --git a/nexus/reconfigurator/execution/src/lib.rs b/nexus/reconfigurator/execution/src/lib.rs index bb525b1b8b..2c70c7acbb 100644 --- a/nexus/reconfigurator/execution/src/lib.rs +++ b/nexus/reconfigurator/execution/src/lib.rs @@ -24,6 +24,7 @@ use slog::info; use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; use std::net::SocketAddrV6; +use uuid::Uuid; mod cockroachdb; mod datasets; @@ -31,6 +32,7 @@ mod dns; mod omicron_physical_disks; mod omicron_zones; mod overridables; +mod sagas; mod sled_state; pub use dns::blueprint_external_dns_config; @@ -68,43 +70,46 @@ impl From for Sled { } } +/// The result of calling [`realize_blueprint`] or +/// [`realize_blueprint_with_overrides`]. +#[derive(Debug)] +#[must_use = "the output of realize_blueprint should probably be used"] +pub struct RealizeBlueprintOutput { + /// Whether any sagas need to be reassigned to a new Nexus. + pub needs_saga_recovery: bool, +} + /// Make one attempt to realize the given blueprint, meaning to take actions to /// alter the real system to match the blueprint /// /// The assumption is that callers are running this periodically or in a loop to /// deal with transient errors or changes in the underlying system state. -pub async fn realize_blueprint( +pub async fn realize_blueprint( opctx: &OpContext, datastore: &DataStore, resolver: &Resolver, blueprint: &Blueprint, - nexus_label: S, -) -> Result<(), Vec> -where - String: From, -{ + nexus_id: Uuid, +) -> Result> { realize_blueprint_with_overrides( opctx, datastore, resolver, blueprint, - nexus_label, + nexus_id, &Default::default(), ) .await } -pub async fn realize_blueprint_with_overrides( +pub async fn realize_blueprint_with_overrides( opctx: &OpContext, datastore: &DataStore, resolver: &Resolver, blueprint: &Blueprint, - nexus_label: S, + nexus_id: Uuid, overrides: &Overridables, -) -> Result<(), Vec> -where - String: From, -{ +) -> Result> { let opctx = opctx.child(BTreeMap::from([( "comment".to_string(), blueprint.comment.clone(), @@ -136,7 +141,7 @@ where }) .collect(); - omicron_physical_disks::deploy_disks( + let deploy_disks_done = omicron_physical_disks::deploy_disks( &opctx, &sleds_by_id, &blueprint.blueprint_disks, @@ -182,7 +187,7 @@ where dns::deploy_dns( &opctx, datastore, - String::from(nexus_label), + nexus_id.to_string(), blueprint, &sleds_by_id, overrides, @@ -209,20 +214,50 @@ where ) .await?; - // This depends on the "deploy_disks" call earlier -- disk expungement is a - // statement of policy, but we need to be assured that the Sled Agent has - // stopped using that disk before we can mark its state as decommissioned. - omicron_physical_disks::decommission_expunged_disks(&opctx, datastore) - .await?; + omicron_physical_disks::decommission_expunged_disks( + &opctx, + datastore, + deploy_disks_done, + ) + .await?; + + // From this point on, we'll assume that any errors that we encounter do + // *not* require stopping execution. We'll just accumulate them and return + // them all at the end. + // + // TODO We should probably do this with more of the errors above, too. + let mut errors = Vec::new(); + + // For any expunged Nexus zones, re-assign in-progress sagas to some other + // Nexus. If this fails for some reason, it doesn't affect anything else. + let sec_id = nexus_db_model::SecId(nexus_id); + let reassigned = sagas::reassign_sagas_from_expunged( + &opctx, datastore, blueprint, sec_id, + ) + .await + .context("failed to re-assign sagas"); + let needs_saga_recovery = match reassigned { + Ok(needs_recovery) => needs_recovery, + Err(error) => { + errors.push(error); + false + } + }; // This is likely to error if any cluster upgrades are in progress (which // can take some time), so it should remain at the end so that other parts // of the blueprint can progress normally. - cockroachdb::ensure_settings(&opctx, datastore, blueprint) - .await - .map_err(|err| vec![err])?; + if let Err(error) = + cockroachdb::ensure_settings(&opctx, datastore, blueprint).await + { + errors.push(error); + } - Ok(()) + if errors.is_empty() { + Ok(RealizeBlueprintOutput { needs_saga_recovery }) + } else { + Err(errors) + } } #[cfg(test)] diff --git a/nexus/reconfigurator/execution/src/omicron_physical_disks.rs b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs index 7adc41213e..af95eb8e77 100644 --- a/nexus/reconfigurator/execution/src/omicron_physical_disks.rs +++ b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs @@ -25,7 +25,7 @@ pub(crate) async fn deploy_disks( opctx: &OpContext, sleds_by_id: &BTreeMap, sled_configs: &BTreeMap, -) -> Result<(), Vec> { +) -> Result> { let errors: Vec<_> = stream::iter(sled_configs) .filter_map(|(sled_id, config)| async move { let log = opctx.log.new(o!( @@ -92,16 +92,26 @@ pub(crate) async fn deploy_disks( .await; if errors.is_empty() { - Ok(()) + Ok(DeployDisksDone {}) } else { Err(errors) } } -/// Decommissions all disks which are currently expunged +/// Typestate indicating that the deploy disks step was performed. +#[derive(Debug)] +#[must_use = "this should be passed into decommission_expunged_disks"] +pub(crate) struct DeployDisksDone {} + +/// Decommissions all disks which are currently expunged. pub(crate) async fn decommission_expunged_disks( opctx: &OpContext, datastore: &DataStore, + // This is taken as a parameter to ensure that this depends on a + // "deploy_disks" call made earlier. Disk expungement is a statement of + // policy, but we need to be assured that the Sled Agent has stopped using + // that disk before we can mark its state as decommissioned. + _deploy_disks_done: DeployDisksDone, ) -> Result<(), Vec> { datastore .physical_disk_decommission_all_expunged(&opctx) @@ -113,6 +123,7 @@ pub(crate) async fn decommission_expunged_disks( #[cfg(test)] mod test { use super::deploy_disks; + use super::DeployDisksDone; use crate::DataStore; use crate::Sled; @@ -217,9 +228,13 @@ mod test { // Get a success result back when the blueprint has an empty set of // disks. let (_, blueprint) = create_blueprint(BTreeMap::new()); - deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) - .await - .expect("failed to deploy no disks"); + // Use an explicit type here because not doing so can cause errors to + // be ignored (this behavior is genuinely terrible). Instead, ensure + // that the type has the right result. + let _: DeployDisksDone = + deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) + .await + .expect("failed to deploy no disks"); // Disks are updated in a particular order, but each request contains // the full set of disks that must be running. @@ -272,9 +287,10 @@ mod test { } // Execute it. - deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) - .await - .expect("failed to deploy initial disks"); + let _: DeployDisksDone = + deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) + .await + .expect("failed to deploy initial disks"); s1.verify_and_clear(); s2.verify_and_clear(); @@ -293,9 +309,10 @@ mod test { )), ); } - deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) - .await - .expect("failed to deploy same disks"); + let _: DeployDisksDone = + deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) + .await + .expect("failed to deploy same disks"); s1.verify_and_clear(); s2.verify_and_clear(); @@ -567,7 +584,15 @@ mod test { assert_eq!(d.disk_state, PhysicalDiskState::Active); assert_eq!(d.disk_policy, PhysicalDiskPolicy::InService); - super::decommission_expunged_disks(&opctx, &datastore).await.unwrap(); + super::decommission_expunged_disks( + &opctx, + &datastore, + // This is an internal test, and we're testing decommissioning in + // isolation, so it's okay to create the typestate here. + DeployDisksDone {}, + ) + .await + .unwrap(); // After decommissioning, we see the expunged disk become // decommissioned. The other disk remains in-service. diff --git a/nexus/reconfigurator/execution/src/sagas.rs b/nexus/reconfigurator/execution/src/sagas.rs new file mode 100644 index 0000000000..458328ef00 --- /dev/null +++ b/nexus/reconfigurator/execution/src/sagas.rs @@ -0,0 +1,71 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Re-assign sagas from expunged Nexus zones + +use nexus_db_model::SecId; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintZoneFilter; +use omicron_common::api::external::Error; +use omicron_uuid_kinds::GenericUuid; +use slog::{debug, info, warn}; + +/// For each expunged Nexus zone, re-assign sagas owned by that Nexus to the +/// specified nexus (`nexus_id`). +pub(crate) async fn reassign_sagas_from_expunged( + opctx: &OpContext, + datastore: &DataStore, + blueprint: &Blueprint, + nexus_id: SecId, +) -> Result { + let log = &opctx.log; + + // Identify any Nexus zones that have been expunged and need to have sagas + // re-assigned. + // + // TODO: Currently, we take any expunged Nexus instances and attempt to + // assign all their sagas to ourselves. Per RFD 289, we can only re-assign + // sagas between two instances of Nexus that are at the same version. Right + // now this can't happen so there's nothing to do here to ensure that + // constraint. However, once we support allowing the control plane to be + // online _during_ an upgrade, there may be multiple different Nexus + // instances running at the same time. At that point, we will need to make + // sure that we only ever try to assign ourselves sagas from other Nexus + // instances that we know are running the same version as ourselves. + let nexus_zone_ids: Vec<_> = blueprint + .all_omicron_zones(BlueprintZoneFilter::Expunged) + .filter_map(|(_, z)| { + z.zone_type + .is_nexus() + .then(|| nexus_db_model::SecId(z.id.into_untyped_uuid())) + }) + .collect(); + + debug!(log, "re-assign sagas: found Nexus instances"; + "nexus_zone_ids" => ?nexus_zone_ids); + + let result = + datastore.sagas_reassign_sec(opctx, &nexus_zone_ids, nexus_id).await; + + match result { + Ok(count) => { + info!(log, "re-assigned sagas"; + "nexus_zone_ids" => ?nexus_zone_ids, + "count" => count, + ); + + Ok(count != 0) + } + Err(error) => { + warn!(log, "failed to re-assign sagas"; + "nexus_zone_ids" => ?nexus_zone_ids, + &error, + ); + + Err(error) + } + } +} diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index 1fda8fd20b..ae4309d8f9 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -452,7 +452,8 @@ impl BackgroundTasksInitializer { datastore.clone(), resolver.clone(), rx_blueprint.clone(), - nexus_id.to_string(), + nexus_id, + task_saga_recovery.clone(), ); let rx_blueprint_exec = blueprint_executor.watcher(); driver.register(TaskDefinition { diff --git a/nexus/src/app/background/tasks/blueprint_execution.rs b/nexus/src/app/background/tasks/blueprint_execution.rs index ee780812ae..dbbfcb3b14 100644 --- a/nexus/src/app/background/tasks/blueprint_execution.rs +++ b/nexus/src/app/background/tasks/blueprint_execution.rs @@ -4,16 +4,18 @@ //! Background task for realizing a plan blueprint -use crate::app::background::BackgroundTask; +use crate::app::background::{Activator, BackgroundTask}; use futures::future::BoxFuture; use futures::FutureExt; use internal_dns::resolver::Resolver; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; +use nexus_reconfigurator_execution::RealizeBlueprintOutput; use nexus_types::deployment::{Blueprint, BlueprintTarget}; use serde_json::json; use std::sync::Arc; use tokio::sync::watch; +use uuid::Uuid; /// Background task that takes a [`Blueprint`] and realizes the change to /// the state of the system based on the `Blueprint`. @@ -21,8 +23,9 @@ pub struct BlueprintExecutor { datastore: Arc, resolver: Resolver, rx_blueprint: watch::Receiver>>, - nexus_label: String, + nexus_id: Uuid, tx: watch::Sender, + saga_recovery: Activator, } impl BlueprintExecutor { @@ -32,10 +35,18 @@ impl BlueprintExecutor { rx_blueprint: watch::Receiver< Option>, >, - nexus_label: String, + nexus_id: Uuid, + saga_recovery: Activator, ) -> BlueprintExecutor { let (tx, _) = watch::channel(0); - BlueprintExecutor { datastore, resolver, rx_blueprint, nexus_label, tx } + BlueprintExecutor { + datastore, + resolver, + rx_blueprint, + nexus_id, + tx, + saga_recovery, + } } pub fn watcher(&self) -> watch::Receiver { @@ -81,7 +92,7 @@ impl BlueprintExecutor { &self.datastore, &self.resolver, blueprint, - &self.nexus_label, + self.nexus_id, ) .await; @@ -90,7 +101,19 @@ impl BlueprintExecutor { // Return the result as a `serde_json::Value` match result { - Ok(()) => json!({}), + Ok(RealizeBlueprintOutput { needs_saga_recovery }) => { + // If executing the blueprint requires activating the saga + // recovery background task, do that now. + if needs_saga_recovery { + info!(&opctx.log, "activating saga recovery task"); + self.saga_recovery.activate(); + } + + json!({ + "target_id": blueprint.id.to_string(), + "needs_saga_recovery": needs_saga_recovery, + }) + } Err(errors) => { let errors: Vec<_> = errors.into_iter().map(|e| format!("{:#}", e)).collect(); @@ -115,7 +138,7 @@ impl BackgroundTask for BlueprintExecutor { #[cfg(test)] mod test { use super::BlueprintExecutor; - use crate::app::background::BackgroundTask; + use crate::app::background::{Activator, BackgroundTask}; use httptest::matchers::{all_of, request}; use httptest::responders::status_code; use httptest::Expectation; @@ -261,7 +284,8 @@ mod test { datastore.clone(), resolver.clone(), blueprint_rx, - String::from("test-suite"), + Uuid::new_v4(), + Activator::new(), ); // Now we're ready. @@ -284,10 +308,17 @@ mod test { ) .await, ); + let blueprint_id = blueprint.1.id; blueprint_tx.send(Some(blueprint)).unwrap(); let value = task.activate(&opctx).await; println!("activating with no zones: {:?}", value); - assert_eq!(value, json!({})); + assert_eq!( + value, + json!({ + "target_id": blueprint_id, + "needs_saga_recovery": false, + }) + ); // Create a non-empty blueprint describing two servers and verify that // the task correctly winds up making requests to both of them and @@ -375,7 +406,13 @@ mod test { // Activate the task to trigger zone configuration on the sled-agents let value = task.activate(&opctx).await; println!("activating two sled agents: {:?}", value); - assert_eq!(value, json!({})); + assert_eq!( + value, + json!({ + "target_id": blueprint.1.id.to_string(), + "needs_saga_recovery": false, + }) + ); s1.verify_and_clear(); s2.verify_and_clear(); diff --git a/nexus/src/app/background/tasks/lookup_region_port.rs b/nexus/src/app/background/tasks/lookup_region_port.rs index fbfc5c5af2..df501fe6b1 100644 --- a/nexus/src/app/background/tasks/lookup_region_port.rs +++ b/nexus/src/app/background/tasks/lookup_region_port.rs @@ -53,7 +53,6 @@ impl BackgroundTask for LookupRegionPort { ) -> BoxFuture<'a, serde_json::Value> { async { let log = &opctx.log; - info!(&log, "lookup region port task started"); let mut status = LookupRegionPortStatus::default(); @@ -147,8 +146,6 @@ impl BackgroundTask for LookupRegionPort { } } - info!(&log, "lookup region port task done"); - json!(status) } .boxed() diff --git a/nexus/src/app/background/tasks/phantom_disks.rs b/nexus/src/app/background/tasks/phantom_disks.rs index 4b0d8bec38..7f3fceab1c 100644 --- a/nexus/src/app/background/tasks/phantom_disks.rs +++ b/nexus/src/app/background/tasks/phantom_disks.rs @@ -43,7 +43,6 @@ impl BackgroundTask for PhantomDiskDetector { ) -> BoxFuture<'a, serde_json::Value> { async { let log = &opctx.log; - warn!(&log, "phantom disk task started"); let phantom_disks = match self.datastore.find_phantom_disks().await { @@ -83,14 +82,13 @@ impl BackgroundTask for PhantomDiskDetector { } else { info!( &log, - "phandom disk {} un-deleted andset to faulted ok", + "phandom disk {} un-deleted and set to faulted ok", disk.id(), ); phantom_disk_deleted_ok += 1; } } - warn!(&log, "phantom disk task done"); json!({ "phantom_disk_deleted_ok": phantom_disk_deleted_ok, "phantom_disk_deleted_err": phantom_disk_deleted_err, diff --git a/nexus/src/app/background/tasks/physical_disk_adoption.rs b/nexus/src/app/background/tasks/physical_disk_adoption.rs index f3b9e8ac62..b1eceed0b6 100644 --- a/nexus/src/app/background/tasks/physical_disk_adoption.rs +++ b/nexus/src/app/background/tasks/physical_disk_adoption.rs @@ -96,8 +96,6 @@ impl BackgroundTask for PhysicalDiskAdoption { } let mut disks_added = 0; - let log = &opctx.log; - warn!(&log, "physical disk adoption task started"); let collection_id = *self.rx_inventory_collection.borrow(); let Some(collection_id) = collection_id else { @@ -171,7 +169,6 @@ impl BackgroundTask for PhysicalDiskAdoption { ); } - warn!(&log, "physical disk adoption task done"); json!({ "physical_disks_added": disks_added, }) diff --git a/nexus/src/app/background/tasks/region_replacement.rs b/nexus/src/app/background/tasks/region_replacement.rs index f852f21734..ba0e7f86fb 100644 --- a/nexus/src/app/background/tasks/region_replacement.rs +++ b/nexus/src/app/background/tasks/region_replacement.rs @@ -61,7 +61,6 @@ impl BackgroundTask for RegionReplacementDetector { ) -> BoxFuture<'a, serde_json::Value> { async { let log = &opctx.log; - warn!(&log, "region replacement task started"); let mut ok = 0; let mut err = 0; @@ -182,8 +181,6 @@ impl BackgroundTask for RegionReplacementDetector { } } - warn!(&log, "region replacement task done"); - json!({ "region_replacement_started_ok": ok, "region_replacement_started_err": err, diff --git a/nexus/src/app/background/tasks/region_replacement_driver.rs b/nexus/src/app/background/tasks/region_replacement_driver.rs index 284ed2c368..02db86eab3 100644 --- a/nexus/src/app/background/tasks/region_replacement_driver.rs +++ b/nexus/src/app/background/tasks/region_replacement_driver.rs @@ -227,16 +227,11 @@ impl BackgroundTask for RegionReplacementDriver { opctx: &'a OpContext, ) -> BoxFuture<'a, serde_json::Value> { async { - let log = &opctx.log; - info!(&log, "region replacement driver task started"); - let mut status = RegionReplacementDriverStatus::default(); self.drive_running_replacements_forward(opctx, &mut status).await; self.complete_done_replacements(opctx, &mut status).await; - info!(&log, "region replacement driver task done"); - json!(status) } .boxed() diff --git a/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs b/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs index 4c66c166ff..77dc87c060 100644 --- a/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs +++ b/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs @@ -129,12 +129,6 @@ impl BackgroundTask for RegionSnapshotReplacementGarbageCollect { opctx: &'a OpContext, ) -> BoxFuture<'a, serde_json::Value> { async move { - let log = &opctx.log; - info!( - &log, - "region snapshot replacement garbage collect task started", - ); - let mut status = RegionSnapshotReplacementGarbageCollectStatus::default(); @@ -144,11 +138,6 @@ impl BackgroundTask for RegionSnapshotReplacementGarbageCollect { ) .await; - info!( - &log, - "region snapshot replacement garbage collect task done" - ); - json!(status) } .boxed() diff --git a/nexus/src/app/background/tasks/region_snapshot_replacement_start.rs b/nexus/src/app/background/tasks/region_snapshot_replacement_start.rs index 9bc66d48c8..1fdc17690d 100644 --- a/nexus/src/app/background/tasks/region_snapshot_replacement_start.rs +++ b/nexus/src/app/background/tasks/region_snapshot_replacement_start.rs @@ -232,9 +232,6 @@ impl BackgroundTask for RegionSnapshotReplacementDetector { opctx: &'a OpContext, ) -> BoxFuture<'a, serde_json::Value> { async { - let log = &opctx.log; - info!(&log, "region snapshot replacement start task started"); - let mut status = RegionSnapshotReplacementStartStatus::default(); self.create_requests_for_region_snapshots_on_expunged_disks( @@ -249,8 +246,6 @@ impl BackgroundTask for RegionSnapshotReplacementDetector { ) .await; - info!(&log, "region snapshot replacement start task done"); - json!(status) } .boxed() diff --git a/nexus/src/app/background/tasks/sync_switch_configuration.rs b/nexus/src/app/background/tasks/sync_switch_configuration.rs index 20a12d1127..f86bb1a782 100644 --- a/nexus/src/app/background/tasks/sync_switch_configuration.rs +++ b/nexus/src/app/background/tasks/sync_switch_configuration.rs @@ -51,8 +51,9 @@ use omicron_common::{ use serde_json::json; use sled_agent_client::types::{ BgpConfig as SledBgpConfig, BgpPeerConfig as SledBgpPeerConfig, - EarlyNetworkConfig, EarlyNetworkConfigBody, HostPortConfig, PortConfigV2, - RackNetworkConfigV2, RouteConfig as SledRouteConfig, UplinkAddressConfig, + EarlyNetworkConfig, EarlyNetworkConfigBody, HostPortConfig, + LldpAdminStatus, LldpPortConfig, PortConfigV2, RackNetworkConfigV2, + RouteConfig as SledRouteConfig, UplinkAddressConfig, }; use std::{ collections::{hash_map::Entry, HashMap, HashSet}, @@ -564,7 +565,7 @@ impl BackgroundTask for SwitchPortSettingsManager { if !bgp_announce_prefixes.contains_key(&bgp_config.bgp_announce_set_id) { let announcements = match self .datastore - .bgp_announce_list( + .bgp_announcement_list( opctx, ¶ms::BgpAnnounceSetSelector { name_or_id: bgp_config @@ -977,6 +978,7 @@ impl BackgroundTask for SwitchPortSettingsManager { destination: r.dst.into(), nexthop: r.gw.ip(), vlan_id: r.vid.map(|x| x.0), + local_pref: r.local_pref.map(|x| x.0), }) .collect(), switch: *location, @@ -992,7 +994,23 @@ impl BackgroundTask for SwitchPortSettingsManager { .map(|l| l.speed) .unwrap_or(SwitchLinkSpeed::Speed100G) .into(), - }; + lldp: info + .link_lldp + .get(0) //TODO https://github.com/oxidecomputer/omicron/issues/3062 + .map(|c| LldpPortConfig { + status: match c.enabled { + true => LldpAdminStatus::Enabled, + false=> LldpAdminStatus::Disabled, + }, + port_id: c.link_name.clone(), + port_description: c.link_description.clone(), + chassis_id: c.chassis_id.clone(), + system_name: c.system_name.clone(), + system_description: c.system_description.clone(), + management_addrs:c.management_ip.map(|a| vec![a.ip()]), + }) + } + ; for peer in port_config.bgp_peers.iter_mut() { peer.communities = match self @@ -1411,6 +1429,29 @@ fn uplinks( let PortSettingsChange::Apply(config) = change else { continue; }; + + let lldp = if config.link_lldp.is_empty() { + None + } else { + let x = &config.link_lldp[0]; + Some(LldpPortConfig { + status: if x.enabled { + LldpAdminStatus::Enabled + } else { + LldpAdminStatus::Disabled + }, + port_id: x.link_name.clone(), + port_description: x.link_description.clone(), + chassis_id: x.chassis_id.clone(), + system_name: x.system_name.clone(), + system_description: x.system_description.clone(), + management_addrs: x.management_ip.map(|a| { + let ip: oxnet::IpNet = a.into(); + vec![ip.addr()] + }), + }) + }; + let config = HostPortConfig { port: port.port_name.clone(), addrs: config @@ -1421,6 +1462,7 @@ fn uplinks( vlan_id: a.vlan_id.map(|v| v.into()), }) .collect(), + lldp, }; match uplinks.entry(*location) { @@ -1455,7 +1497,8 @@ fn build_sled_agent_clients( sled_agent_clients } -type SwitchStaticRoutes = HashSet<(Ipv4Addr, Prefix4, Option)>; +type SwitchStaticRoutes = + HashSet<(Ipv4Addr, Prefix4, Option, Option)>; fn static_routes_to_del( current_static_routes: HashMap, @@ -1471,10 +1514,11 @@ fn static_routes_to_del( // if it's on the switch but not desired (in our db), it should be removed let stale_routes = routes_on_switch .difference(routes_wanted) - .map(|(nexthop, prefix, vlan_id)| StaticRoute4 { + .map(|(nexthop, prefix, vlan_id, local_pref)| StaticRoute4 { nexthop: *nexthop, prefix: *prefix, vlan_id: *vlan_id, + local_pref: *local_pref, }) .collect::>(); @@ -1488,10 +1532,11 @@ fn static_routes_to_del( // if no desired routes are present, all routes on this switch should be deleted let stale_routes = routes_on_switch .iter() - .map(|(nexthop, prefix, vlan_id)| StaticRoute4 { + .map(|(nexthop, prefix, vlan_id, local_pref)| StaticRoute4 { nexthop: *nexthop, prefix: *prefix, vlan_id: *vlan_id, + local_pref: *local_pref, }) .collect::>(); @@ -1538,10 +1583,11 @@ fn static_routes_to_add( }; let missing_routes = routes_wanted .difference(routes_on_switch) - .map(|(nexthop, prefix, vlan_id)| StaticRoute4 { + .map(|(nexthop, prefix, vlan_id, local_pref)| StaticRoute4 { nexthop: *nexthop, prefix: *prefix, vlan_id: *vlan_id, + local_pref: *local_pref, }) .collect::>(); @@ -1590,7 +1636,12 @@ fn static_routes_in_db( } IpAddr::V6(_) => continue, }; - routes.insert((nexthop, prefix, route.vid.map(|x| x.0))); + routes.insert(( + nexthop, + prefix, + route.vid.map(|x| x.0), + route.local_pref.map(|x| x.0), + )); } match routes_from_db.entry(*location) { @@ -1768,44 +1819,46 @@ async fn static_routes_on_switch<'a>( let mut routes_on_switch = HashMap::new(); for (location, client) in mgd_clients { - let static_routes: SwitchStaticRoutes = - match client.static_list_v4_routes().await { - Ok(routes) => { - let mut flattened = HashSet::new(); - for (destination, paths) in routes.iter() { - let Ok(dst) = destination.parse() else { - error!( - log, - "failed to parse static route destination: \ + let static_routes: SwitchStaticRoutes = match client + .static_list_v4_routes() + .await + { + Ok(routes) => { + let mut flattened = HashSet::new(); + for (destination, paths) in routes.iter() { + let Ok(dst) = destination.parse() else { + error!( + log, + "failed to parse static route destination: \ {destination}" - ); - continue; + ); + continue; + }; + for p in paths.iter() { + let nh = match p.nexthop { + IpAddr::V4(addr) => addr, + IpAddr::V6(addr) => { + error!( + log, + "ipv6 nexthops not supported: {addr}" + ); + continue; + } }; - for p in paths.iter() { - let nh = match p.nexthop { - IpAddr::V4(addr) => addr, - IpAddr::V6(addr) => { - error!( - log, - "ipv6 nexthops not supported: {addr}" - ); - continue; - } - }; - flattened.insert((nh, dst, p.vlan_id)); - } + flattened.insert((nh, dst, p.vlan_id, p.local_pref)); } - flattened } - Err(_) => { - error!( - &log, - "unable to retrieve routes from switch"; - "switch_location" => ?location, - ); - continue; - } - }; + flattened + } + Err(_) => { + error!( + &log, + "unable to retrieve routes from switch"; + "switch_location" => ?location, + ); + continue; + } + }; routes_on_switch.insert(*location, static_routes); } routes_on_switch diff --git a/nexus/src/app/bgp.rs b/nexus/src/app/bgp.rs index 118011500a..31a0faa663 100644 --- a/nexus/src/app/bgp.rs +++ b/nexus/src/app/bgp.rs @@ -9,19 +9,20 @@ use nexus_db_model::{BgpAnnounceSet, BgpAnnouncement, BgpConfig}; use nexus_db_queries::context::OpContext; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::{ - self, BgpImportedRouteIpv4, BgpMessageHistory, BgpPeerStatus, CreateResult, - DeleteResult, ListResultVec, LookupResult, NameOrId, SwitchBgpHistory, + self, BgpExported, BgpImportedRouteIpv4, BgpMessageHistory, BgpPeerStatus, + CreateResult, DeleteResult, ListResultVec, LookupResult, NameOrId, + SwitchBgpHistory, }; use std::net::IpAddr; impl super::Nexus { - pub async fn bgp_config_set( + pub async fn bgp_config_create( &self, opctx: &OpContext, config: ¶ms::BgpConfigCreate, ) -> CreateResult { opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; - let result = self.db_datastore.bgp_config_set(opctx, config).await?; + let result = self.db_datastore.bgp_config_create(opctx, config).await?; Ok(result) } @@ -68,13 +69,13 @@ impl super::Nexus { Ok(result) } - pub async fn bgp_announce_list( + pub async fn bgp_announce_set_list( &self, opctx: &OpContext, - sel: ¶ms::BgpAnnounceSetSelector, - ) -> ListResultVec { + pagparams: &PaginatedBy<'_>, + ) -> ListResultVec { opctx.authorize(authz::Action::Read, &authz::FLEET).await?; - self.db_datastore.bgp_announce_list(opctx, sel).await + self.db_datastore.bgp_announce_set_list(opctx, pagparams).await } pub async fn bgp_delete_announce_set( @@ -88,6 +89,15 @@ impl super::Nexus { Ok(result) } + pub async fn bgp_announcement_list( + &self, + opctx: &OpContext, + sel: ¶ms::BgpAnnounceSetSelector, + ) -> ListResultVec { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + self.db_datastore.bgp_announcement_list(opctx, sel).await + } + pub async fn bgp_peer_status( &self, opctx: &OpContext, @@ -145,6 +155,74 @@ impl super::Nexus { Ok(result) } + pub async fn bgp_exported( + &self, + opctx: &OpContext, + ) -> LookupResult { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + let mut result = BgpExported::default(); + for (switch, client) in &self.mg_clients().await.map_err(|e| { + external::Error::internal_error(&format!( + "failed to get mg clients: {e}" + )) + })? { + let router_info = match client.read_routers().await { + Ok(result) => result.into_inner(), + Err(e) => { + error!( + self.log, + "failed to get routers from {switch}: {e}" + ); + continue; + } + }; + for r in &router_info { + let asn = r.asn; + + let exported = match client + .get_exported(&mg_admin_client::types::AsnSelector { asn }) + .await + { + Ok(result) => result.into_inner(), + Err(e) => { + error!( + self.log, + "failed to get exports for asn {asn} from {switch}: {e}" + ); + continue; + } + }; + for (addr, exports) in exported { + let mut xps = Vec::new(); + for ex in exports.iter() { + let net = match ex { + mg_admin_client::types::Prefix::V4(v4) => { + oxnet::Ipv4Net::new_unchecked( + v4.value, v4.length, + ) + } + mg_admin_client::types::Prefix::V6(v6) => { + let v6 = oxnet::IpNet::V6( + oxnet::Ipv6Net::new_unchecked( + v6.value, v6.length, + ), + ); + warn!( + self.log, + "{v6}: ipv6 exports not supported yet" + ); + continue; + } + }; + xps.push(net); + } + result.exports.insert(addr.to_string(), xps); + } + } + } + Ok(result) + } + pub async fn bgp_message_history( &self, opctx: &OpContext, diff --git a/nexus/src/app/external_dns.rs b/nexus/src/app/external_dns.rs index c6a8d833c2..4732146ce2 100644 --- a/nexus/src/app/external_dns.rs +++ b/nexus/src/app/external_dns.rs @@ -5,15 +5,15 @@ use std::net::IpAddr; use std::net::SocketAddr; +use hickory_resolver::config::NameServerConfig; +use hickory_resolver::config::Protocol; +use hickory_resolver::config::ResolverConfig; +use hickory_resolver::config::ResolverOpts; +use hickory_resolver::TokioAsyncResolver; use hyper::client::connect::dns::Name; use omicron_common::address::DNS_PORT; -use trust_dns_resolver::config::NameServerConfig; -use trust_dns_resolver::config::Protocol; -use trust_dns_resolver::config::ResolverConfig; -use trust_dns_resolver::config::ResolverOpts; -use trust_dns_resolver::TokioAsyncResolver; -/// Wrapper around trust-dns-resolver to provide name resolution +/// Wrapper around hickory-resolver to provide name resolution /// using a given set of DNS servers for use with reqwest. pub struct Resolver(TokioAsyncResolver); @@ -26,18 +26,17 @@ impl Resolver { socket_addr: SocketAddr::new(*addr, DNS_PORT), protocol: Protocol::Udp, tls_dns_name: None, - trust_nx_responses: false, + trust_negative_responses: false, bind_addr: None, }); } let mut opts = ResolverOpts::default(); + // Enable edns for potentially larger records + opts.edns0 = true; opts.use_hosts_file = false; // Do as many requests in parallel as we have configured servers opts.num_concurrent_reqs = dns_servers.len(); - Resolver( - TokioAsyncResolver::tokio(rc, opts) - .expect("creating resovler shouldn't fail"), - ) + Resolver(TokioAsyncResolver::tokio(rc, opts)) } } @@ -48,7 +47,7 @@ impl reqwest::dns::Resolve for Resolver { let ips = resolver.lookup_ip(name.as_str()).await?; let addrs = ips .into_iter() - // trust-dns-resolver returns `IpAddr`s but reqwest wants + // hickory-resolver returns `IpAddr`s but reqwest wants // `SocketAddr`s (useful if you have a custom resolver that // returns a scoped IPv6 address). The port provided here // is ignored in favour of the scheme default (http/80, diff --git a/nexus/src/app/metrics.rs b/nexus/src/app/metrics.rs index 3728a3bdc1..3a6e7e27be 100644 --- a/nexus/src/app/metrics.rs +++ b/nexus/src/app/metrics.rs @@ -14,7 +14,7 @@ use nexus_db_queries::{ }; use omicron_common::api::external::{Error, InternalContext}; use oximeter_db::{ - oxql, Measurement, TimeseriesSchema, TimeseriesSchemaPaginationParams, + Measurement, TimeseriesSchema, TimeseriesSchemaPaginationParams, }; use std::num::NonZeroU32; @@ -138,7 +138,7 @@ impl super::Nexus { &self, opctx: &OpContext, query: impl AsRef, - ) -> Result, Error> { + ) -> Result, Error> { // Must be a fleet user to list timeseries schema. // // TODO-security: We need to figure out how to implement proper security diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 13b30fd47a..f3c0031327 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -33,7 +33,7 @@ use nexus_types::external_api::params::BgpAnnounceSetCreate; use nexus_types::external_api::params::BgpAnnouncementCreate; use nexus_types::external_api::params::BgpConfigCreate; use nexus_types::external_api::params::LinkConfigCreate; -use nexus_types::external_api::params::LldpServiceConfigCreate; +use nexus_types::external_api::params::LldpLinkConfigCreate; use nexus_types::external_api::params::RouteConfig; use nexus_types::external_api::params::SwitchPortConfigCreate; use nexus_types::external_api::params::UninitializedSledId; @@ -61,6 +61,7 @@ use omicron_common::api::external::Name; use omicron_common::api::external::NameOrId; use omicron_common::api::external::ResourceType; use omicron_common::api::internal::shared::ExternalPortDiscovery; +use omicron_common::api::internal::shared::LldpAdminStatus; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::SledUuid; use oxnet::IpNet; @@ -241,22 +242,44 @@ impl super::Nexus { .internal_context( "fetching cockroachdb settings for rack initialization", )?; - self.datastore() - .cockroachdb_setting_set_string( - opctx, - cockroachdb_settings.state_fingerprint.clone(), - "cluster.preserve_downgrade_option", - CockroachDbClusterVersion::NEWLY_INITIALIZED.to_string(), - ) - .await - .internal_context( - "setting `cluster.preserve_downgrade_option` \ - for rack initialization", - )?; + blueprint.cockroachdb_setting_preserve_downgrade = + if cockroachdb_settings.preserve_downgrade.is_empty() { + // Set the option to the current policy in both the database and + // the blueprint. + self.datastore() + .cockroachdb_setting_set_string( + opctx, + cockroachdb_settings.state_fingerprint.clone(), + "cluster.preserve_downgrade_option", + CockroachDbClusterVersion::NEWLY_INITIALIZED + .to_string(), + ) + .await + .internal_context( + "setting `cluster.preserve_downgrade_option` \ + for rack initialization", + )?; + CockroachDbClusterVersion::NEWLY_INITIALIZED + } else { + // `cluster.preserve_downgrade_option` is set, so fill in the + // blueprint with the current value. This branch should never + // be hit during normal rack initialization; it's here for + // eventual test cases where `cluster.preserve_downgrade_option` + // is set by a test harness prior to rack initialization. + CockroachDbClusterVersion::from_str( + &cockroachdb_settings.preserve_downgrade, + ) + .map_err(|_| { + Error::internal_error(&format!( + "database has `cluster.preserve_downgrade_option` \ + set to invalid version {}", + cockroachdb_settings.preserve_downgrade + )) + })? + } + .into(); blueprint.cockroachdb_fingerprint = cockroachdb_settings.state_fingerprint; - blueprint.cockroachdb_setting_preserve_downgrade = - CockroachDbClusterVersion::NEWLY_INITIALIZED.into(); // Administrators of the Recovery Silo are automatically made // administrators of the Fleet. @@ -487,7 +510,7 @@ impl super::Nexus { match self .db_datastore - .bgp_config_set( + .bgp_config_create( &opctx, &BgpConfigCreate { identity: IdentityMetadataCreateParams { @@ -570,6 +593,7 @@ impl super::Nexus { dst: r.destination, gw: r.nexthop, vid: r.vlan_id, + local_pref: r.local_pref, }) .collect(); @@ -608,15 +632,30 @@ impl super::Nexus { .bgp_peers .insert("phy0".to_string(), BgpPeerConfig { peers }); - let link = LinkConfigCreate { - mtu: 1500, //TODO https://github.com/oxidecomputer/omicron/issues/2274 - lldp: LldpServiceConfigCreate { + let lldp = match &uplink_config.lldp { + None => LldpLinkConfigCreate { enabled: false, - lldp_config: None, + ..Default::default() }, + Some(l) => LldpLinkConfigCreate { + enabled: l.status == LldpAdminStatus::Enabled, + link_name: l.port_id.clone(), + link_description: l.port_description.clone(), + chassis_id: l.chassis_id.clone(), + system_name: l.system_name.clone(), + system_description: l.system_description.clone(), + management_ip: match &l.management_addrs { + Some(a) if !a.is_empty() => Some(a[0]), + _ => None, + }, + }, + }; + let link = LinkConfigCreate { + mtu: 1500, //TODO https://github.com/oxidecomputer/omicron/issues/2274 fec: uplink_config.uplink_port_fec.into(), speed: uplink_config.uplink_port_speed.into(), autoneg: uplink_config.autoneg, + lldp, }; port_settings_params.links.insert("phy".to_string(), link); diff --git a/nexus/src/app/saga.rs b/nexus/src/app/saga.rs index 5bc69946ad..975df7fc3b 100644 --- a/nexus/src/app/saga.rs +++ b/nexus/src/app/saga.rs @@ -469,6 +469,10 @@ impl super::Nexus { // We don't need the handle that runnable_saga.start() returns because // we're not going to wait for the saga to finish here. let _ = runnable_saga.start().await?; + + let mut demo_sagas = self.demo_sagas()?; + demo_sagas.preregister(demo_saga_id); + Ok(DemoSaga { saga_id, demo_saga_id }) } diff --git a/nexus/src/app/sagas/demo.rs b/nexus/src/app/sagas/demo.rs index 4a8eda8b80..d76a48688d 100644 --- a/nexus/src/app/sagas/demo.rs +++ b/nexus/src/app/sagas/demo.rs @@ -21,56 +21,66 @@ use super::NexusActionContext; use super::{ActionRegistry, NexusSaga, SagaInitError}; use crate::app::sagas::declare_saga_actions; -use anyhow::ensure; +use anyhow::Context; use omicron_common::api::external::Error; use omicron_uuid_kinds::DemoSagaUuid; use serde::Deserialize; use serde::Serialize; use slog::info; use std::collections::BTreeMap; +use std::future::Future; +use std::sync::Arc; use steno::ActionError; -use tokio::sync::oneshot; +use tokio::sync::Semaphore; -/// Set of demo sagas that have been marked completed +/// Rendezvous point for demo sagas /// -/// Nexus maintains one of these at the top level. Individual demo sagas wait -/// until their id shows up here, then remove it and proceed. +/// This is where: +/// +/// - demo sagas wait for a completion message +/// - completion messages are recorded for demo sagas that haven't started +/// waiting yet +/// +/// Nexus maintains one of these structures at the top level. pub struct CompletingDemoSagas { - ids: BTreeMap>, + sagas: BTreeMap>, } impl CompletingDemoSagas { pub fn new() -> CompletingDemoSagas { - CompletingDemoSagas { ids: BTreeMap::new() } + CompletingDemoSagas { sagas: BTreeMap::new() } } - pub fn complete(&mut self, id: DemoSagaUuid) -> Result<(), Error> { - self.ids - .remove(&id) - .ok_or_else(|| { - Error::non_resourcetype_not_found(format!( - "demo saga with id {:?}", - id - )) - })? - .send(()) - .map_err(|_| { - Error::internal_error( - "saga stopped listening (Nexus shutting down?)", - ) - }) + pub fn preregister(&mut self, id: DemoSagaUuid) { + assert!(self.sagas.insert(id, Arc::new(Semaphore::new(0))).is_none()); } pub fn subscribe( &mut self, id: DemoSagaUuid, - ) -> Result, anyhow::Error> { - let (tx, rx) = oneshot::channel(); - ensure!( - self.ids.insert(id, tx).is_none(), - "multiple subscriptions for the same demo saga" - ); - Ok(rx) + ) -> impl Future> { + let sem = + self.sagas.entry(id).or_insert_with(|| Arc::new(Semaphore::new(0))); + let sem_clone = sem.clone(); + async move { + sem_clone + .acquire() + .await + // We don't need the Semaphore permit once we've acquired it. + .map(|_| ()) + .context("acquiring demo saga semaphore") + } + } + + pub fn complete(&mut self, id: DemoSagaUuid) -> Result<(), Error> { + let sem = self.sagas.get_mut(&id).ok_or_else(|| { + Error::non_resourcetype_not_found(format!( + "demo saga with demo saga id {:?}", + id + )) + })?; + sem.add_permits(1); + Ok(()) } } @@ -115,21 +125,87 @@ async fn demo_wait(sagactx: NexusActionContext) -> Result<(), ActionError> { .nexus() .demo_sagas() .map_err(ActionError::action_failed)?; - demo_sagas.subscribe(demo_id).map_err(|e| { - ActionError::action_failed(Error::internal_error(&format!( - "demo saga subscribe failed: {:#}", - e - ))) - })? + demo_sagas.subscribe(demo_id) }; match rx.await { Ok(_) => { info!(log, "demo saga: completing"; "id" => %demo_id); + Ok(()) } - Err(_) => { - info!(log, "demo saga: waiting failed (Nexus shutting down?)"; - "id" => %demo_id); + Err(error) => { + warn!(log, "demo saga: waiting failed (Nexus shutting down?)"; + "id" => %demo_id, + "error" => #?error, + ); + Err(ActionError::action_failed(Error::internal_error(&format!( + "demo saga wait failed: {:#}", + error + )))) } } - Ok(()) +} + +#[cfg(test)] +mod test { + use super::*; + use assert_matches::assert_matches; + + #[tokio::test] + async fn test_demo_saga_rendezvous() { + let mut hub = CompletingDemoSagas::new(); + + // The most straightforward sequence is: + // - create (preregister) demo saga + // - demo saga starts and waits for completion (subscribe) + // - complete demo saga + let demo_saga_id = DemoSagaUuid::new_v4(); + println!("demo saga: {demo_saga_id}"); + hub.preregister(demo_saga_id); + println!("demo saga: {demo_saga_id} preregistered"); + let subscribe = hub.subscribe(demo_saga_id); + println!("demo saga: {demo_saga_id} subscribed"); + assert!(hub.complete(demo_saga_id).is_ok()); + println!("demo saga: {demo_saga_id} marked completed"); + subscribe.await.unwrap(); + println!("demo saga: {demo_saga_id} done"); + + // It's also possible that the completion request arrives before the + // saga started waiting. In that case, the sequence is: + // + // - create (preregister) demo saga + // - complete demo saga + // - demo saga starts and waits for completion (subscribe) + // + // This should work, too, with no errors. + let demo_saga_id = DemoSagaUuid::new_v4(); + println!("demo saga: {demo_saga_id}"); + hub.preregister(demo_saga_id); + println!("demo saga: {demo_saga_id} preregistered"); + assert!(hub.complete(demo_saga_id).is_ok()); + println!("demo saga: {demo_saga_id} marked completed"); + let subscribe = hub.subscribe(demo_saga_id); + println!("demo saga: {demo_saga_id} subscribed"); + subscribe.await.unwrap(); + println!("demo saga: {demo_saga_id} done"); + + // It's also possible to have no preregistration at all. This happens + // if the demo saga was recovered. That's fine, too, but then it will + // only work if the completion arrives after the saga starts waiting. + let demo_saga_id = DemoSagaUuid::new_v4(); + println!("demo saga: {demo_saga_id}"); + let subscribe = hub.subscribe(demo_saga_id); + println!("demo saga: {demo_saga_id} subscribed"); + assert!(hub.complete(demo_saga_id).is_ok()); + println!("demo saga: {demo_saga_id} marked completed"); + subscribe.await.unwrap(); + println!("demo saga: {demo_saga_id} done"); + + // If there's no preregistration and we get a completion request, then + // that request should fail. + let demo_saga_id = DemoSagaUuid::new_v4(); + println!("demo saga: {demo_saga_id}"); + let error = hub.complete(demo_saga_id).unwrap_err(); + assert_matches!(error, Error::NotFound { .. }); + println!("demo saga: {demo_saga_id} complete error: {:#}", error); + } } diff --git a/nexus/src/app/switch_port.rs b/nexus/src/app/switch_port.rs index 9726a59d33..b616531f53 100644 --- a/nexus/src/app/switch_port.rs +++ b/nexus/src/app/switch_port.rs @@ -30,6 +30,7 @@ impl super::Nexus { params: params::SwitchPortSettingsCreate, ) -> CreateResult { opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + Self::switch_port_settings_validate(¶ms)?; //TODO race conditions on exists check versus update/create. // Normally I would use a DB lock here, but not sure what @@ -54,6 +55,36 @@ impl super::Nexus { } } + // TODO: more validation wanted + fn switch_port_settings_validate( + params: ¶ms::SwitchPortSettingsCreate, + ) -> CreateResult<()> { + for x in params.bgp_peers.values() { + for p in x.peers.iter() { + if let Some(ref key) = p.md5_auth_key { + if key.len() > 80 { + return Err(Error::invalid_value( + "md5_auth_key", + format!("md5 auth key for {} is longer than 80 characters", p.addr) + )); + } + for c in key.chars() { + if !c.is_ascii() || c.is_ascii_control() { + return Err(Error::invalid_value( + "md5_auth_key", + format!( + "md5 auth key for {} must be printable ascii", + p.addr + ), + )); + } + } + } + } + } + Ok(()) + } + pub async fn switch_port_settings_create( self: &Arc, opctx: &OpContext, diff --git a/nexus/src/external_api/console_api.rs b/nexus/src/external_api/console_api.rs index fb0a47bbea..2169b631a7 100644 --- a/nexus/src/external_api/console_api.rs +++ b/nexus/src/external_api/console_api.rs @@ -35,15 +35,13 @@ use nexus_db_model::AuthenticationMode; use nexus_db_queries::authn::silos::IdentityProviderType; use nexus_db_queries::context::OpContext; use nexus_db_queries::{ - authn::external::{ - cookies::Cookies, - session_cookie::{ - clear_session_cookie_header_value, session_cookie_header_value, - SessionStore, SESSION_COOKIE_COOKIE_NAME, - }, + authn::external::session_cookie::{ + clear_session_cookie_header_value, session_cookie_header_value, + SessionStore, SESSION_COOKIE_COOKIE_NAME, }, db::identity::Asset, }; +use nexus_types::authn::cookies::Cookies; use nexus_types::external_api::params; use nexus_types::identity::Resource; use omicron_common::api::external::http_pagination::PaginatedBy; diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 8e8b63229b..e11256f06e 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -41,6 +41,7 @@ use nexus_db_queries::db::lookup::ImageLookup; use nexus_db_queries::db::lookup::ImageParentLookup; use nexus_db_queries::db::model::Name; use nexus_types::external_api::shared::{BfdStatus, ProbeInfo}; +use omicron_common::api::external::http_pagination::data_page_params_for; use omicron_common::api::external::http_pagination::marker_for_name; use omicron_common::api::external::http_pagination::marker_for_name_or_id; use omicron_common::api::external::http_pagination::name_or_id_pagination; @@ -55,9 +56,11 @@ use omicron_common::api::external::http_pagination::ScanParams; use omicron_common::api::external::AddressLot; use omicron_common::api::external::AddressLotBlock; use omicron_common::api::external::AddressLotCreateResponse; +use omicron_common::api::external::AggregateBgpMessageHistory; use omicron_common::api::external::BgpAnnounceSet; use omicron_common::api::external::BgpAnnouncement; use omicron_common::api::external::BgpConfig; +use omicron_common::api::external::BgpExported; use omicron_common::api::external::BgpImportedRouteIpv4; use omicron_common::api::external::BgpPeerStatus; use omicron_common::api::external::DataPageParams; @@ -78,9 +81,6 @@ use omicron_common::api::external::TufRepoGetResponse; use omicron_common::api::external::TufRepoInsertResponse; use omicron_common::api::external::VpcFirewallRuleUpdateParams; use omicron_common::api::external::VpcFirewallRules; -use omicron_common::api::external::{ - http_pagination::data_page_params_for, AggregateBgpMessageHistory, -}; use omicron_common::bail_unless; use omicron_uuid_kinds::GenericUuid; use parse_display::Display; @@ -277,6 +277,7 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(networking_bgp_config_create)?; api.register(networking_bgp_config_list)?; api.register(networking_bgp_status)?; + api.register(networking_bgp_exported)?; api.register(networking_bgp_imported_routes_ipv4)?; api.register(networking_bgp_config_delete)?; api.register(networking_bgp_announce_set_update)?; @@ -284,6 +285,8 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(networking_bgp_announce_set_delete)?; api.register(networking_bgp_message_history)?; + api.register(networking_bgp_announcement_list)?; + api.register(networking_bfd_enable)?; api.register(networking_bfd_disable)?; api.register(networking_bfd_status)?; @@ -3865,7 +3868,7 @@ async fn networking_bgp_config_create( let nexus = &apictx.context.nexus; let config = config.into_inner(); let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let result = nexus.bgp_config_set(&opctx, &config).await?; + let result = nexus.bgp_config_create(&opctx, &config).await?; Ok(HttpResponseCreated::(result.into())) }; apictx @@ -3937,6 +3940,30 @@ async fn networking_bgp_status( .await } +//TODO pagination? the normal by-name/by-id stuff does not work here +/// Get BGP exported routes +#[endpoint { + method = GET, + path = "/v1/system/networking/bgp-exported", + tags = ["system/networking"], +}] +async fn networking_bgp_exported( + rqctx: RequestContext, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let handler = async { + let nexus = &apictx.context.nexus; + let result = nexus.bgp_exported(&opctx).await?; + Ok(HttpResponseOk(result)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await +} + /// Get BGP router message history #[endpoint { method = GET, @@ -4019,7 +4046,7 @@ async fn networking_bgp_config_delete( /// set with the one specified. #[endpoint { method = PUT, - path = "/v1/system/networking/bgp-announce", + path = "/v1/system/networking/bgp-announce-set", tags = ["system/networking"], }] async fn networking_bgp_announce_set_update( @@ -4041,24 +4068,28 @@ async fn networking_bgp_announce_set_update( .await } -//TODO pagination? the normal by-name/by-id stuff does not work here -/// Get originated routes for a BGP configuration +/// List BGP announce sets #[endpoint { method = GET, - path = "/v1/system/networking/bgp-announce", + path = "/v1/system/networking/bgp-announce-set", tags = ["system/networking"], }] async fn networking_bgp_announce_set_list( rqctx: RequestContext, - query_params: Query, -) -> Result>, HttpError> { + query_params: Query< + PaginatedByNameOrId, + >, +) -> Result>, HttpError> { let apictx = rqctx.context(); let handler = async { let nexus = &apictx.context.nexus; - let sel = query_params.into_inner(); + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; let opctx = crate::context::op_context_for_external_api(&rqctx).await?; let result = nexus - .bgp_announce_list(&opctx, &sel) + .bgp_announce_set_list(&opctx, &paginated_by) .await? .into_iter() .map(|p| p.into()) @@ -4075,17 +4106,17 @@ async fn networking_bgp_announce_set_list( /// Delete BGP announce set #[endpoint { method = DELETE, - path = "/v1/system/networking/bgp-announce", + path = "/v1/system/networking/bgp-announce-set/{name_or_id}", tags = ["system/networking"], }] async fn networking_bgp_announce_set_delete( rqctx: RequestContext, - selector: Query, + path_params: Path, ) -> Result { let apictx = rqctx.context(); let handler = async { let nexus = &apictx.context.nexus; - let sel = selector.into_inner(); + let sel = path_params.into_inner(); let opctx = crate::context::op_context_for_external_api(&rqctx).await?; nexus.bgp_delete_announce_set(&opctx, &sel).await?; Ok(HttpResponseUpdatedNoContent {}) @@ -4097,6 +4128,40 @@ async fn networking_bgp_announce_set_delete( .await } +// TODO: is pagination necessary here? How large do we expect the list of +// announcements to become in real usage? +/// Get originated routes for a specified BGP announce set +#[endpoint { + method = GET, + path = "/v1/system/networking/bgp-announce-set/{name_or_id}/announcement", + tags = ["system/networking"], +}] +async fn networking_bgp_announcement_list( + rqctx: RequestContext, + path_params: Path, +) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let sel = path_params.into_inner(); + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + + let result = nexus + .bgp_announcement_list(&opctx, &sel) + .await? + .into_iter() + .map(|p| p.into()) + .collect(); + + Ok(HttpResponseOk(result)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await +} + /// Enable a BFD session #[endpoint { method = POST, @@ -6386,7 +6451,7 @@ async fn timeseries_schema_list( async fn timeseries_query( rqctx: RequestContext, body: TypedBody, -) -> Result>, HttpError> { +) -> Result, HttpError> { let apictx = rqctx.context(); let handler = async { let nexus = &apictx.context.nexus; @@ -6395,7 +6460,7 @@ async fn timeseries_query( nexus .timeseries_query(&opctx, &query) .await - .map(HttpResponseOk) + .map(|tables| HttpResponseOk(views::OxqlQueryResult { tables })) .map_err(HttpError::from) }; apictx diff --git a/nexus/test-utils/Cargo.toml b/nexus/test-utils/Cargo.toml index a883bc83c5..50110ecaca 100644 --- a/nexus/test-utils/Cargo.toml +++ b/nexus/test-utils/Cargo.toml @@ -46,7 +46,7 @@ sled-agent-client.workspace = true slog.workspace = true tokio.workspace = true tokio-util.workspace = true -trust-dns-resolver.workspace = true +hickory-resolver.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 7c190974a1..acee46ce10 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -4,6 +4,7 @@ //! Integration testing facilities for Nexus +#[cfg(feature = "omicron-dev")] use anyhow::Context; use anyhow::Result; use camino::Utf8Path; @@ -17,6 +18,11 @@ use dropshot::HandlerTaskMode; use futures::future::BoxFuture; use futures::FutureExt; use gateway_test_utils::setup::GatewayTestContext; +use hickory_resolver::config::NameServerConfig; +use hickory_resolver::config::Protocol; +use hickory_resolver::config::ResolverConfig; +use hickory_resolver::config::ResolverOpts; +use hickory_resolver::TokioAsyncResolver; use nexus_config::Database; use nexus_config::DpdConfig; use nexus_config::InternalDns; @@ -73,11 +79,6 @@ use std::collections::HashMap; use std::fmt::Debug; use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; use std::time::Duration; -use trust_dns_resolver::config::NameServerConfig; -use trust_dns_resolver::config::Protocol; -use trust_dns_resolver::config::ResolverConfig; -use trust_dns_resolver::config::ResolverOpts; -use trust_dns_resolver::TokioAsyncResolver; use uuid::Uuid; pub use sim::TEST_HARDWARE_THREADS; @@ -1427,6 +1428,7 @@ pub async fn start_oximeter( address: Some(SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db_port)), batch_size: 10, batch_interval: 1, + replicated: false, }; let config = oximeter_collector::Config { nexus_address: Some(nexus_address), @@ -1586,12 +1588,12 @@ pub async fn start_dns_server( socket_addr: dns_server.local_address(), protocol: Protocol::Udp, tls_dns_name: None, - trust_nx_responses: false, + trust_negative_responses: false, bind_addr: None, }); - let resolver = - TokioAsyncResolver::tokio(resolver_config, ResolverOpts::default()) - .context("creating DNS resolver")?; + let mut resolver_opts = ResolverOpts::default(); + resolver_opts.edns0 = true; + let resolver = TokioAsyncResolver::tokio(resolver_config, resolver_opts); Ok((dns_server, http_server, resolver)) } diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 9097082a20..9703004c73 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -573,7 +573,7 @@ pub static DEMO_BGP_CONFIG: Lazy = shaper: None, }); pub const DEMO_BGP_ANNOUNCE_SET_URL: &'static str = - "/v1/system/networking/bgp-announce?name_or_id=a-bag-of-addrs"; + "/v1/system/networking/bgp-announce-set"; pub static DEMO_BGP_ANNOUNCE: Lazy = Lazy::new(|| params::BgpAnnounceSetCreate { identity: IdentityMetadataCreateParams { @@ -585,8 +585,14 @@ pub static DEMO_BGP_ANNOUNCE: Lazy = network: "10.0.0.0/16".parse().unwrap(), }], }); +pub const DEMO_BGP_ANNOUNCE_SET_DELETE_URL: &'static str = + "/v1/system/networking/bgp-announce-set/a-bag-of-addrs"; +pub const DEMO_BGP_ANNOUNCEMENT_URL: &'static str = + "/v1/system/networking/bgp-announce-set/a-bag-of-addrs/announcement"; pub const DEMO_BGP_STATUS_URL: &'static str = "/v1/system/networking/bgp-status"; +pub const DEMO_BGP_EXPORTED_URL: &'static str = + "/v1/system/networking/bgp-exported"; pub const DEMO_BGP_ROUTES_IPV4_URL: &'static str = "/v1/system/networking/bgp-routes-ipv4?asn=47"; pub const DEMO_BGP_MESSAGE_HISTORY_URL: &'static str = @@ -2272,6 +2278,7 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { AllowedMethod::GetNonexistent ], }, + VerifyEndpoint { url: &DEMO_BGP_CONFIG_CREATE_URL, visibility: Visibility::Public, @@ -2293,11 +2300,28 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { AllowedMethod::Put( serde_json::to_value(&*DEMO_BGP_ANNOUNCE).unwrap(), ), - AllowedMethod::GetNonexistent, + AllowedMethod::Get, + ], + }, + + VerifyEndpoint { + url: &DEMO_BGP_ANNOUNCE_SET_DELETE_URL, + visibility: Visibility::Public, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ AllowedMethod::Delete ], }, + VerifyEndpoint { + url: &DEMO_BGP_ANNOUNCEMENT_URL, + visibility: Visibility::Public, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::GetNonexistent, + ], + }, + VerifyEndpoint { url: &DEMO_BGP_STATUS_URL, visibility: Visibility::Public, @@ -2307,6 +2331,15 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { ], }, + VerifyEndpoint { + url: &DEMO_BGP_EXPORTED_URL, + visibility: Visibility::Public, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::GetNonexistent, + ], + }, + VerifyEndpoint { url: &DEMO_BGP_ROUTES_IPV4_URL, visibility: Visibility::Public, diff --git a/nexus/tests/integration_tests/metrics.rs b/nexus/tests/integration_tests/metrics.rs index 9cfa0350e8..9f4652c2da 100644 --- a/nexus/tests/integration_tests/metrics.rs +++ b/nexus/tests/integration_tests/metrics.rs @@ -19,11 +19,15 @@ use nexus_test_utils::resource_helpers::{ }; use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; +use nexus_types::external_api::views::OxqlQueryResult; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; use oximeter::types::Datum; +use oximeter::types::FieldValue; use oximeter::types::Measurement; use oximeter::TimeseriesSchema; +use std::borrow::Borrow; +use std::collections::HashMap; use uuid::Uuid; pub async fn query_for_metrics( @@ -284,7 +288,7 @@ async fn test_timeseries_schema_list( pub async fn timeseries_query( cptestctx: &ControlPlaneTestContext, query: impl ToString, -) -> Vec { +) -> Vec { // first, make sure the latest timeseries have been collected. cptestctx.oximeter.force_collect().await; @@ -307,12 +311,14 @@ pub async fn timeseries_query( .unwrap_or_else(|e| { panic!("timeseries query failed: {e:?}\nquery: {query}") }); - rsp.parsed_body().unwrap_or_else(|e| { - panic!( - "could not parse timeseries query response: {e:?}\n\ + rsp.parsed_body::() + .unwrap_or_else(|e| { + panic!( + "could not parse timeseries query response: {e:?}\n\ query: {query}\nresponse: {rsp:#?}" - ); - }) + ); + }) + .tables } #[nexus_test] @@ -341,7 +347,6 @@ async fn test_instance_watcher_metrics( ); }}; } - use oximeter::types::FieldValue; const INSTANCE_ID_FIELD: &str = "instance_id"; const STATE_FIELD: &str = "state"; const STATE_STARTING: &str = "starting"; @@ -429,11 +434,11 @@ async fn test_instance_watcher_metrics( #[track_caller] fn count_state( - table: &oximeter_db::oxql::Table, + table: &oxql_types::Table, instance_id: InstanceUuid, state: &'static str, ) -> i64 { - use oximeter_db::oxql::point::ValueArray; + use oxql_types::point::ValueArray; let uuid = FieldValue::Uuid(instance_id.into_untyped_uuid()); let state = FieldValue::String(state.into()); let mut timeserieses = table.timeseries().filter(|ts| { @@ -586,6 +591,183 @@ async fn test_instance_watcher_metrics( assert_gte!(ts2_running, 2); } +#[nexus_test] +async fn test_mgs_metrics( + cptestctx: &ControlPlaneTestContext, +) { + // Make a MGS + let (mut mgs_config, sp_sim_config) = + gateway_test_utils::setup::load_test_config(); + let mgs = { + // munge the already-parsed MGS config file to point it at the test + // Nexus' address. + mgs_config.metrics = Some(gateway_test_utils::setup::MetricsConfig { + disabled: false, + dev_bind_loopback: true, + dev_nexus_address: Some(cptestctx.internal_client.bind_address), + }); + gateway_test_utils::setup::test_setup_with_config( + "test_mgs_metrics", + gateway_messages::SpPort::One, + mgs_config, + &sp_sim_config, + None, + ) + .await + }; + + // Let's look at all the simulated SP components in the config file which + // have sensor readings, so we can assert that there are timeseries for all + // of them. + let all_sp_configs = { + let gimlet_configs = + sp_sim_config.simulated_sps.gimlet.iter().map(|g| &g.common); + let sidecar_configs = + sp_sim_config.simulated_sps.sidecar.iter().map(|s| &s.common); + gimlet_configs.chain(sidecar_configs) + }; + // XXX(eliza): yes, this code is repetitive. We could probably make it a + // little elss ugly with nested hash maps, but like...I already wrote it, so + // you don't have to. :) + // + // TODO(eliza): presently, we just expect that the number of timeseries for + // each serial number and sensor type lines up. If we wanted to be *really* + // fancy, we could also assert that all the component IDs, component kinds, + // and measurement values line up with the config. But, honestly, it's + // pretty unlikely that a bug in MGS' sensor metrics subsystem would mess + // that up --- the most important thing is just to make sure that the sensor + // data is *present*, as that should catch most regressions. + let mut temp_sensors = HashMap::new(); + let mut current_sensors = HashMap::new(); + let mut voltage_sensors = HashMap::new(); + let mut power_sensors = HashMap::new(); + let mut input_voltage_sensors = HashMap::new(); + let mut input_current_sensors = HashMap::new(); + let mut fan_speed_sensors = HashMap::new(); + for sp in all_sp_configs { + let mut temp = 0; + let mut current = 0; + let mut voltage = 0; + let mut input_voltage = 0; + let mut input_current = 0; + let mut power = 0; + let mut speed = 0; + for component in &sp.components { + for sensor in &component.sensors { + use gateway_messages::measurement::MeasurementKind as Kind; + match sensor.def.kind { + Kind::Temperature => temp += 1, + Kind::Current => current += 1, + Kind::Voltage => voltage += 1, + Kind::InputVoltage => input_voltage += 1, + Kind::InputCurrent => input_current += 1, + Kind::Speed => speed += 1, + Kind::Power => power += 1, + } + } + } + temp_sensors.insert(sp.serial_number.clone(), temp); + current_sensors.insert(sp.serial_number.clone(), current); + voltage_sensors.insert(sp.serial_number.clone(), voltage); + input_voltage_sensors.insert(sp.serial_number.clone(), input_voltage); + input_current_sensors.insert(sp.serial_number.clone(), input_current); + fan_speed_sensors.insert(sp.serial_number.clone(), speed); + power_sensors.insert(sp.serial_number.clone(), power); + } + + async fn check_all_timeseries_present( + cptestctx: &ControlPlaneTestContext, + name: &str, + expected: HashMap, + ) { + let metric_name = format!("hardware_component:{name}"); + eprintln!("\n=== checking timeseries for {metric_name} ===\n"); + + if expected.values().all(|&v| v == 0) { + eprintln!( + "-> SP sim config contains no {name} sensors, skipping it" + ); + return; + } + + let table = timeseries_query(&cptestctx, &format!("get {metric_name}")) + .await + .into_iter() + .find(|t| t.name() == metric_name); + let table = match table { + Some(table) => table, + None => panic!("missing table for {metric_name}"), + }; + + let mut found = expected + .keys() + .map(|serial| (serial.clone(), 0)) + .collect::>(); + for timeseries in table.timeseries() { + let fields = ×eries.fields; + let n_points = timeseries.points.len(); + assert!( + n_points > 0, + "{metric_name} timeseries {fields:?} should have points" + ); + let serial_str: &str = match timeseries.fields.get("chassis_serial") + { + Some(FieldValue::String(s)) => s.borrow(), + Some(x) => panic!( + "{metric_name} `chassis_serial` field should be a string, but got: {x:?}" + ), + None => { + panic!("{metric_name} timeseries should have a `chassis_serial` field") + } + }; + if let Some(count) = found.get_mut(serial_str) { + *count += 1; + } else { + panic!( + "{metric_name} timeseries had an unexpected chassis serial \ + number {serial_str:?} (not in the config file)", + ); + } + } + + eprintln!("-> {metric_name}: found timeseries: {found:#?}"); + assert_eq!( + found, expected, + "number of {metric_name} timeseries didn't match expected in {table:#?}", + ); + eprintln!("-> okay, looks good!"); + } + + // Wait until the MGS registers as a producer with Oximeter. + wait_for_producer(&cptestctx.oximeter, &mgs.gateway_id).await; + + // ...and collect its samples. + cptestctx.oximeter.force_collect().await; + + check_all_timeseries_present(&cptestctx, "temperature", temp_sensors).await; + check_all_timeseries_present(&cptestctx, "voltage", voltage_sensors).await; + check_all_timeseries_present(&cptestctx, "current", current_sensors).await; + check_all_timeseries_present(&cptestctx, "power", power_sensors).await; + check_all_timeseries_present( + &cptestctx, + "input_voltage", + input_voltage_sensors, + ) + .await; + check_all_timeseries_present( + &cptestctx, + "input_current", + input_current_sensors, + ) + .await; + check_all_timeseries_present(&cptestctx, "fan_speed", fan_speed_sensors) + .await; + + // Because the `ControlPlaneTestContext` isn't managing the MGS we made for + // this test, we are responsible for removing its logs. + mgs.logctx.cleanup_successful(); +} + /// Wait until a producer is registered with Oximeter. /// /// This blocks until the producer is registered, for up to 60s. It panics if diff --git a/nexus/tests/integration_tests/silos.rs b/nexus/tests/integration_tests/silos.rs index 2c861ff159..0de4d31395 100644 --- a/nexus/tests/integration_tests/silos.rs +++ b/nexus/tests/integration_tests/silos.rs @@ -37,6 +37,7 @@ use std::fmt::Write; use std::str::FromStr; use base64::Engine; +use hickory_resolver::error::ResolveErrorKind; use http::method::Method; use http::StatusCode; use httptest::{matchers::*, responders::*, Expectation, Server}; @@ -44,7 +45,6 @@ use nexus_types::external_api::shared::{FleetRole, SiloRole}; use std::convert::Infallible; use std::net::Ipv4Addr; use std::time::Duration; -use trust_dns_resolver::error::ResolveErrorKind; use uuid::Uuid; type ControlPlaneTestContext = @@ -2164,7 +2164,7 @@ pub async fn verify_silo_dns_name( .await { Ok(result) => { - let addrs: Vec<_> = result.iter().collect(); + let addrs: Vec<_> = result.iter().map(|a| &a.0).collect(); if addrs.is_empty() { false } else { diff --git a/nexus/tests/integration_tests/sp_updater.rs b/nexus/tests/integration_tests/sp_updater.rs index 8314d22173..6e482bc1ad 100644 --- a/nexus/tests/integration_tests/sp_updater.rs +++ b/nexus/tests/integration_tests/sp_updater.rs @@ -434,9 +434,23 @@ async fn test_sp_updater_switches_mgs_instances_on_failure() { #[tokio::test] async fn test_sp_updater_delivers_progress() { // Start MGS + Sim SP. - let mgstestctx = - mgs_setup::test_setup("test_sp_updater_delivers_progress", SpPort::One) - .await; + let mgstestctx = { + let (mut mgs_config, sp_sim_config) = mgs_setup::load_test_config(); + // Enabling SP metrics collection makes this alread-flaky test even + // flakier, so let's just turn it off. + // TODO(eliza): it would be nice if we didn't have to disable metrics in + // this test, so that we can better catch regressions that could be + // introduced by the metrics subsystem... + mgs_config.metrics.get_or_insert_with(Default::default).disabled = true; + mgs_setup::test_setup_with_config( + "test_sp_updater_delivers_progress", + SpPort::One, + mgs_config, + &sp_sim_config, + None, + ) + .await + }; // Configure an MGS client. let mut mgs_clients = diff --git a/nexus/tests/integration_tests/switch_port.rs b/nexus/tests/integration_tests/switch_port.rs index 0b71ddb2cf..92c44eddad 100644 --- a/nexus/tests/integration_tests/switch_port.rs +++ b/nexus/tests/integration_tests/switch_port.rs @@ -11,9 +11,9 @@ use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params::{ Address, AddressConfig, AddressLotBlockCreate, AddressLotCreate, BgpAnnounceSetCreate, BgpAnnouncementCreate, BgpConfigCreate, - BgpPeerConfig, LinkConfigCreate, LldpServiceConfigCreate, Route, - RouteConfig, SwitchInterfaceConfigCreate, SwitchInterfaceKind, - SwitchPortApplySettings, SwitchPortSettingsCreate, + BgpPeerConfig, LinkConfigCreate, LldpLinkConfigCreate, Route, RouteConfig, + SwitchInterfaceConfigCreate, SwitchInterfaceKind, SwitchPortApplySettings, + SwitchPortSettingsCreate, }; use nexus_types::external_api::views::Rack; use omicron_common::api::external::ImportExportPolicy; @@ -76,7 +76,7 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { NexusRequest::objects_post( client, - "/v1/system/networking/bgp-announce", + "/v1/system/networking/bgp-announce-set", &announce_set, ) .authn_as(AuthnMode::PrivilegedUser) @@ -118,7 +118,15 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { "phy0".into(), LinkConfigCreate { mtu: 4700, - lldp: LldpServiceConfigCreate { enabled: false, lldp_config: None }, + lldp: LldpLinkConfigCreate { + enabled: true, + link_name: Some("Link Name".into()), + link_description: Some("link_ Dscription".into()), + chassis_id: Some("Chassis ID".into()), + system_name: Some("System Name".into()), + system_description: Some("System description".into()), + management_ip: None, + }, fec: LinkFec::None, speed: LinkSpeed::Speed100G, autoneg: false, @@ -140,6 +148,7 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { dst: "1.2.3.0/24".parse().unwrap(), gw: "1.2.3.4".parse().unwrap(), vid: None, + local_pref: None, }], }, ); @@ -176,8 +185,16 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { assert_eq!(link0.mtu, 4700); let lldp0 = &created.link_lldp[0]; - assert_eq!(lldp0.enabled, false); - assert_eq!(lldp0.lldp_config_id, None); + assert_eq!(lldp0.enabled, true); + assert_eq!(lldp0.link_name, Some("Link Name".to_string())); + assert_eq!(lldp0.link_description, Some("Link Description".to_string())); + assert_eq!(lldp0.chassis_id, Some("Chassis ID".to_string())); + assert_eq!(lldp0.system_name, Some("System Name".to_string())); + assert_eq!( + lldp0.system_description, + Some("System Description".to_string()) + ); + assert_eq!(lldp0.management_ip, None); let ifx0 = &created.interfaces[0]; assert_eq!(&ifx0.interface_name, "phy0"); @@ -212,8 +229,16 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { assert_eq!(link0.mtu, 4700); let lldp0 = &roundtrip.link_lldp[0]; - assert_eq!(lldp0.enabled, false); - assert_eq!(lldp0.lldp_config_id, None); + assert_eq!(lldp0.enabled, true); + assert_eq!(lldp0.link_name, Some("Link Name".to_string())); + assert_eq!(lldp0.link_description, Some("Link Description".to_string())); + assert_eq!(lldp0.chassis_id, Some("Chassis ID".to_string())); + assert_eq!(lldp0.system_name, Some("System Name".to_string())); + assert_eq!( + lldp0.system_description, + Some("System Description".to_string()) + ); + assert_eq!(lldp0.management_ip, None); let ifx0 = &roundtrip.interfaces[0]; assert_eq!(&ifx0.interface_name, "phy0"); diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index 340d72569b..bde11e2de3 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -178,12 +178,14 @@ networking_allow_list_view GET /v1/system/networking/allow-li networking_bfd_disable POST /v1/system/networking/bfd-disable networking_bfd_enable POST /v1/system/networking/bfd-enable networking_bfd_status GET /v1/system/networking/bfd-status -networking_bgp_announce_set_delete DELETE /v1/system/networking/bgp-announce -networking_bgp_announce_set_list GET /v1/system/networking/bgp-announce -networking_bgp_announce_set_update PUT /v1/system/networking/bgp-announce +networking_bgp_announce_set_delete DELETE /v1/system/networking/bgp-announce-set/{name_or_id} +networking_bgp_announce_set_list GET /v1/system/networking/bgp-announce-set +networking_bgp_announce_set_update PUT /v1/system/networking/bgp-announce-set +networking_bgp_announcement_list GET /v1/system/networking/bgp-announce-set/{name_or_id}/announcement networking_bgp_config_create POST /v1/system/networking/bgp networking_bgp_config_delete DELETE /v1/system/networking/bgp networking_bgp_config_list GET /v1/system/networking/bgp +networking_bgp_exported GET /v1/system/networking/bgp-exported networking_bgp_imported_routes_ipv4 GET /v1/system/networking/bgp-routes-ipv4 networking_bgp_message_history GET /v1/system/networking/bgp-message-history networking_bgp_status GET /v1/system/networking/bgp-status diff --git a/nexus/types/Cargo.toml b/nexus/types/Cargo.toml index a4418d2a74..124f0d42c9 100644 --- a/nexus/types/Cargo.toml +++ b/nexus/types/Cargo.toml @@ -9,16 +9,22 @@ workspace = true [dependencies] anyhow.workspace = true +async-trait.workspace = true chrono.workspace = true clap.workspace = true +cookie.workspace = true base64.workspace = true derive-where.workspace = true derive_more.workspace = true +dropshot.workspace = true futures.workspace = true +http.workspace = true humantime.workspace = true ipnetwork.workspace = true +newtype_derive.workspace = true omicron-uuid-kinds.workspace = true openssl.workspace = true +oxql-types.workspace = true oxnet.workspace = true parse-display.workspace = true schemars = { workspace = true, features = ["chrono", "uuid1"] } diff --git a/nexus/auth/src/authn/external/cookies.rs b/nexus/types/src/authn/cookies.rs similarity index 100% rename from nexus/auth/src/authn/external/cookies.rs rename to nexus/types/src/authn/cookies.rs diff --git a/nexus/types/src/authn/mod.rs b/nexus/types/src/authn/mod.rs new file mode 100644 index 0000000000..f87935428e --- /dev/null +++ b/nexus/types/src/authn/mod.rs @@ -0,0 +1,7 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Authentication types for the Nexus API. + +pub mod cookies; diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index cc48f2646a..96de893fa3 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -27,20 +27,17 @@ use omicron_common::api::external::Generation; use omicron_common::disk::DiskIdentity; use omicron_common::disk::OmicronPhysicalDisksConfig; use omicron_uuid_kinds::CollectionUuid; -use omicron_uuid_kinds::ExternalIpUuid; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledUuid; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; -use slog_error_chain::SlogInlineError; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::fmt; use std::net::Ipv6Addr; use strum::EnumIter; use strum::IntoEnumIterator; -use thiserror::Error; use uuid::Uuid; mod blueprint_diff; @@ -595,13 +592,6 @@ fn zone_sort_key(z: &T) -> impl Ord { (z.kind(), z.id()) } -/// Errors from converting an [`OmicronZoneType`] into a [`BlueprintZoneType`]. -#[derive(Debug, Clone, Error, SlogInlineError)] -pub enum InvalidOmicronZoneType { - #[error("Omicron zone {} requires an external IP ID", kind.report_str())] - ExternalIpIdRequired { kind: ZoneKind }, -} - /// Describes one Omicron-managed zone in a blueprint. /// /// Part of [`BlueprintZonesConfig`]. @@ -616,168 +606,6 @@ pub struct BlueprintZoneConfig { pub zone_type: BlueprintZoneType, } -impl BlueprintZoneConfig { - /// Convert from an [`OmicronZoneConfig`]. - /// - /// This method is annoying to call correctly and will become more so over - /// time. Ideally we'd remove all callers and then remove this method, but - /// for now we keep it. - /// - /// # Errors - /// - /// If `config.zone_type` is a zone that has an external IP address (Nexus, - /// boundary NTP, external DNS), `external_ip_id` must be `Some(_)` or this - /// method will return an error. - pub fn from_omicron_zone_config( - config: OmicronZoneConfig, - disposition: BlueprintZoneDisposition, - external_ip_id: Option, - ) -> Result { - let kind = config.zone_type.kind(); - let zone_type = match config.zone_type { - OmicronZoneType::BoundaryNtp { - address, - dns_servers, - domain, - nic, - ntp_servers, - snat_cfg, - } => { - let external_ip_id = external_ip_id.ok_or( - InvalidOmicronZoneType::ExternalIpIdRequired { kind }, - )?; - BlueprintZoneType::BoundaryNtp( - blueprint_zone_type::BoundaryNtp { - address, - ntp_servers, - dns_servers, - domain, - nic, - external_ip: OmicronZoneExternalSnatIp { - id: external_ip_id, - snat_cfg, - }, - }, - ) - } - OmicronZoneType::Clickhouse { address, dataset } => { - BlueprintZoneType::Clickhouse(blueprint_zone_type::Clickhouse { - address, - dataset, - }) - } - OmicronZoneType::ClickhouseKeeper { address, dataset } => { - BlueprintZoneType::ClickhouseKeeper( - blueprint_zone_type::ClickhouseKeeper { address, dataset }, - ) - } - OmicronZoneType::ClickhouseServer { address, dataset } => { - BlueprintZoneType::ClickhouseServer( - blueprint_zone_type::ClickhouseServer { address, dataset }, - ) - } - OmicronZoneType::CockroachDb { address, dataset } => { - BlueprintZoneType::CockroachDb( - blueprint_zone_type::CockroachDb { address, dataset }, - ) - } - OmicronZoneType::Crucible { address, dataset } => { - BlueprintZoneType::Crucible(blueprint_zone_type::Crucible { - address, - dataset, - }) - } - OmicronZoneType::CruciblePantry { address } => { - BlueprintZoneType::CruciblePantry( - blueprint_zone_type::CruciblePantry { address }, - ) - } - OmicronZoneType::ExternalDns { - dataset, - dns_address, - http_address, - nic, - } => { - let external_ip_id = external_ip_id.ok_or( - InvalidOmicronZoneType::ExternalIpIdRequired { kind }, - )?; - BlueprintZoneType::ExternalDns( - blueprint_zone_type::ExternalDns { - dataset, - http_address, - dns_address: OmicronZoneExternalFloatingAddr { - id: external_ip_id, - addr: dns_address, - }, - nic, - }, - ) - } - OmicronZoneType::InternalDns { - dataset, - dns_address, - gz_address, - gz_address_index, - http_address, - } => BlueprintZoneType::InternalDns( - blueprint_zone_type::InternalDns { - dataset, - http_address, - dns_address, - gz_address, - gz_address_index, - }, - ), - OmicronZoneType::InternalNtp { - address, - dns_servers, - domain, - ntp_servers, - } => BlueprintZoneType::InternalNtp( - blueprint_zone_type::InternalNtp { - address, - ntp_servers, - dns_servers, - domain, - }, - ), - OmicronZoneType::Nexus { - external_dns_servers, - external_ip, - external_tls, - internal_address, - nic, - } => { - let external_ip_id = external_ip_id.ok_or( - InvalidOmicronZoneType::ExternalIpIdRequired { kind }, - )?; - BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { - internal_address, - external_ip: OmicronZoneExternalFloatingIp { - id: external_ip_id, - ip: external_ip, - }, - nic, - external_tls, - external_dns_servers, - }) - } - OmicronZoneType::Oximeter { address } => { - BlueprintZoneType::Oximeter(blueprint_zone_type::Oximeter { - address, - }) - } - }; - Ok(Self { - disposition, - id: OmicronZoneUuid::from_untyped_uuid(config.id), - underlay_address: config.underlay_address, - filesystem_pool: config.filesystem_pool, - zone_type, - }) - } -} - impl From for OmicronZoneConfig { fn from(z: BlueprintZoneConfig) -> Self { Self { diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs index c6a61aac78..dabb47066e 100644 --- a/nexus/types/src/deployment/planning_input.rs +++ b/nexus/types/src/deployment/planning_input.rs @@ -280,6 +280,13 @@ pub enum CockroachDbPreserveDowngrade { } impl CockroachDbPreserveDowngrade { + pub fn is_set(self) -> bool { + match self { + CockroachDbPreserveDowngrade::Set(_) => true, + _ => false, + } + } + pub fn from_optional_string( value: &Option, ) -> Result { diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index a7dd0a72cc..83897cbd1d 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -1500,7 +1500,7 @@ pub struct LinkConfigCreate { pub mtu: u16, /// The link-layer discovery protocol (LLDP) configuration for the link. - pub lldp: LldpServiceConfigCreate, + pub lldp: LldpLinkConfigCreate, /// The forward error correction mode of the link. pub fec: LinkFec, @@ -1512,16 +1512,29 @@ pub struct LinkConfigCreate { pub autoneg: bool, } -/// The LLDP configuration associated with a port. LLDP may be either enabled or -/// disabled, if enabled, an LLDP configuration must be provided by name or id. -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct LldpServiceConfigCreate { +/// The LLDP configuration associated with a port. +#[derive(Clone, Debug, Default, Deserialize, Serialize, JsonSchema)] +pub struct LldpLinkConfigCreate { /// Whether or not LLDP is enabled. pub enabled: bool, - /// A reference to the LLDP configuration used. Must not be `None` when - /// `enabled` is `true`. - pub lldp_config: Option, + /// The LLDP link name TLV. + pub link_name: Option, + + /// The LLDP link description TLV. + pub link_description: Option, + + /// The LLDP chassis identifier TLV. + pub chassis_id: Option, + + /// The LLDP system name TLV. + pub system_name: Option, + + /// The LLDP system description TLV. + pub system_description: Option, + + /// The LLDP management IP TLV. + pub management_ip: Option, } /// A layer-3 switch interface configuration. When IPv6 is enabled, a link local @@ -1581,6 +1594,10 @@ pub struct Route { /// VLAN id the gateway is reachable over. pub vid: Option, + + /// Local preference for route. Higher preference indictes precedence + /// within and across protocols. + pub local_pref: Option, } /// Select a BGP config by a name or id. @@ -1612,6 +1629,13 @@ pub struct BgpAnnounceSetCreate { pub announcement: Vec, } +/// Optionally select a BGP announce set by a name or id. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +pub struct OptionalBgpAnnounceSetSelector { + /// A name or id to use when s electing BGP port settings + pub name_or_id: Option, +} + /// Select a BGP announce set by a name or id. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] pub struct BgpAnnounceSetSelector { diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index e241f849ee..58c2e560ab 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -971,3 +971,12 @@ pub struct AllowList { /// The allowlist of IPs or subnets. pub allowed_ips: ExternalAllowedSourceIps, } + +// OxQL QUERIES + +/// The result of a successful OxQL query. +#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] +pub struct OxqlQueryResult { + /// Tables resulting from the query, each containing timeseries. + pub tables: Vec, +} diff --git a/nexus/types/src/lib.rs b/nexus/types/src/lib.rs index 494573e834..8a0a3ec80e 100644 --- a/nexus/types/src/lib.rs +++ b/nexus/types/src/lib.rs @@ -29,6 +29,7 @@ //! rules, so our model layer knows about our views. That seems to be a //! relatively minor offense, so it's the way we leave things for now. +pub mod authn; pub mod deployment; pub mod external_api; pub mod identity; diff --git a/openapi/bootstrap-agent.json b/openapi/bootstrap-agent.json index 7b4f257670..bd928001bb 100644 --- a/openapi/bootstrap-agent.json +++ b/openapi/bootstrap-agent.json @@ -732,6 +732,67 @@ "last" ] }, + "LldpAdminStatus": { + "description": "To what extent should this port participate in LLDP", + "type": "string", + "enum": [ + "enabled", + "disabled", + "rx_only", + "tx_only" + ] + }, + "LldpPortConfig": { + "description": "Per-port LLDP configuration settings. Only the \"status\" setting is mandatory. All other fields have natural defaults or may be inherited from the switch.", + "type": "object", + "properties": { + "chassis_id": { + "nullable": true, + "description": "Chassis ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + }, + "management_addrs": { + "nullable": true, + "description": "Management IP addresses to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "port_description": { + "nullable": true, + "description": "Port description to advertise. If this is not set, no description will be advertised.", + "type": "string" + }, + "port_id": { + "nullable": true, + "description": "Port ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be set to the port name. e.g., qsfp0/0.", + "type": "string" + }, + "status": { + "description": "To what extent should this port participate in LLDP", + "allOf": [ + { + "$ref": "#/components/schemas/LldpAdminStatus" + } + ] + }, + "system_description": { + "nullable": true, + "description": "System description to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + }, + "system_name": { + "nullable": true, + "description": "System name to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + } + }, + "required": [ + "status" + ] + }, "Name": { "title": "A name unique within the parent collection", "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.", @@ -767,6 +828,15 @@ "$ref": "#/components/schemas/BgpPeerConfig" } }, + "lldp": { + "nullable": true, + "description": "LLDP configuration for this port", + "allOf": [ + { + "$ref": "#/components/schemas/LldpPortConfig" + } + ] + }, "port": { "description": "Nmae of the port this config applies to.", "type": "string" @@ -1183,6 +1253,14 @@ } ] }, + "local_pref": { + "nullable": true, + "description": "The local preference associated with this route.", + "default": null, + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "nexthop": { "description": "The nexthop/gateway address.", "type": "string", diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index d054591f3a..111bd552d0 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -3749,6 +3749,67 @@ "start_time" ] }, + "LldpAdminStatus": { + "description": "To what extent should this port participate in LLDP", + "type": "string", + "enum": [ + "enabled", + "disabled", + "rx_only", + "tx_only" + ] + }, + "LldpPortConfig": { + "description": "Per-port LLDP configuration settings. Only the \"status\" setting is mandatory. All other fields have natural defaults or may be inherited from the switch.", + "type": "object", + "properties": { + "chassis_id": { + "nullable": true, + "description": "Chassis ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + }, + "management_addrs": { + "nullable": true, + "description": "Management IP addresses to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "port_description": { + "nullable": true, + "description": "Port description to advertise. If this is not set, no description will be advertised.", + "type": "string" + }, + "port_id": { + "nullable": true, + "description": "Port ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be set to the port name. e.g., qsfp0/0.", + "type": "string" + }, + "status": { + "description": "To what extent should this port participate in LLDP", + "allOf": [ + { + "$ref": "#/components/schemas/LldpAdminStatus" + } + ] + }, + "system_description": { + "nullable": true, + "description": "System description to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + }, + "system_name": { + "nullable": true, + "description": "System name to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + } + }, + "required": [ + "status" + ] + }, "MacAddr": { "example": "ff:ff:ff:ff:ff:ff", "title": "A MAC address", @@ -4154,6 +4215,15 @@ "$ref": "#/components/schemas/BgpPeerConfig" } }, + "lldp": { + "nullable": true, + "description": "LLDP configuration for this port", + "allOf": [ + { + "$ref": "#/components/schemas/LldpPortConfig" + } + ] + }, "port": { "description": "Nmae of the port this config applies to.", "type": "string" @@ -4373,6 +4443,13 @@ "enum": [ "instance" ] + }, + { + "description": "The producer is a management gateway service.", + "type": "string", + "enum": [ + "management_gateway" + ] } ] }, @@ -4665,6 +4742,14 @@ } ] }, + "local_pref": { + "nullable": true, + "description": "The local preference associated with this route.", + "default": null, + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "nexthop": { "description": "The nexthop/gateway address.", "type": "string", diff --git a/openapi/nexus.json b/openapi/nexus.json index 27e2870b6e..f6d140ed05 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -6533,22 +6533,48 @@ } } }, - "/v1/system/networking/bgp-announce": { + "/v1/system/networking/bgp-announce-set": { "get": { "tags": [ "system/networking" ], - "summary": "Get originated routes for a BGP configuration", + "summary": "List BGP announce sets", "operationId": "networking_bgp_announce_set_list", "parameters": [ + { + "in": "query", + "name": "limit", + "description": "Maximum number of items returned by a single call", + "schema": { + "nullable": true, + "type": "integer", + "format": "uint32", + "minimum": 1 + } + }, { "in": "query", "name": "name_or_id", - "description": "A name or id to use when selecting BGP port settings", - "required": true, + "description": "A name or id to use when s electing BGP port settings", "schema": { "$ref": "#/components/schemas/NameOrId" } + }, + { + "in": "query", + "name": "page_token", + "description": "Token returned by previous call to retrieve the subsequent page", + "schema": { + "nullable": true, + "type": "string" + } + }, + { + "in": "query", + "name": "sort_by", + "schema": { + "$ref": "#/components/schemas/NameOrIdSortMode" + } } ], "responses": { @@ -6557,10 +6583,10 @@ "content": { "application/json": { "schema": { - "title": "Array_of_BgpAnnouncement", + "title": "Array_of_BgpAnnounceSet", "type": "array", "items": { - "$ref": "#/components/schemas/BgpAnnouncement" + "$ref": "#/components/schemas/BgpAnnounceSet" } } } @@ -6572,6 +6598,9 @@ "5XX": { "$ref": "#/components/responses/Error" } + }, + "x-dropshot-pagination": { + "required": [] } }, "put": { @@ -6609,7 +6638,9 @@ "$ref": "#/components/responses/Error" } } - }, + } + }, + "/v1/system/networking/bgp-announce-set/{name_or_id}": { "delete": { "tags": [ "system/networking" @@ -6618,7 +6649,7 @@ "operationId": "networking_bgp_announce_set_delete", "parameters": [ { - "in": "query", + "in": "path", "name": "name_or_id", "description": "A name or id to use when selecting BGP port settings", "required": true, @@ -6640,6 +6671,75 @@ } } }, + "/v1/system/networking/bgp-announce-set/{name_or_id}/announcement": { + "get": { + "tags": [ + "system/networking" + ], + "summary": "Get originated routes for a specified BGP announce set", + "operationId": "networking_bgp_announcement_list", + "parameters": [ + { + "in": "path", + "name": "name_or_id", + "description": "A name or id to use when selecting BGP port settings", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_BgpAnnouncement", + "type": "array", + "items": { + "$ref": "#/components/schemas/BgpAnnouncement" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/v1/system/networking/bgp-exported": { + "get": { + "tags": [ + "system/networking" + ], + "summary": "Get BGP exported routes", + "operationId": "networking_bgp_exported", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BgpExported" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/system/networking/bgp-message-history": { "get": { "tags": [ @@ -8026,11 +8126,7 @@ "content": { "application/json": { "schema": { - "title": "Array_of_Table", - "type": "array", - "items": { - "$ref": "#/components/schemas/Table" - } + "$ref": "#/components/schemas/OxqlQueryResult" } } } @@ -10355,6 +10451,25 @@ "items" ] }, + "BgpExported": { + "description": "The current status of a BGP peer.", + "type": "object", + "properties": { + "exports": { + "description": "Exported routes indexed by peer address.", + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Ipv4Net" + } + } + } + }, + "required": [ + "exports" + ] + }, "BgpImportedRouteIpv4": { "description": "A route imported from a BGP peer.", "type": "object", @@ -15984,7 +16099,7 @@ "description": "The link-layer discovery protocol (LLDP) configuration for the link.", "allOf": [ { - "$ref": "#/components/schemas/LldpServiceConfigCreate" + "$ref": "#/components/schemas/LldpLinkConfigCreate" } ] }, @@ -16105,10 +16220,15 @@ } ] }, - "LldpServiceConfig": { + "LldpLinkConfig": { "description": "A link layer discovery protocol (LLDP) service configuration.", "type": "object", "properties": { + "chassis_id": { + "nullable": true, + "description": "The LLDP chassis identifier TLV.", + "type": "string" + }, "enabled": { "description": "Whether or not the LLDP service is enabled.", "type": "boolean" @@ -16118,11 +16238,34 @@ "type": "string", "format": "uuid" }, - "lldp_config_id": { + "link_description": { "nullable": true, - "description": "The link-layer discovery protocol configuration for this service.", - "type": "string", - "format": "uuid" + "description": "The LLDP link description TLV.", + "type": "string" + }, + "link_name": { + "nullable": true, + "description": "The LLDP link name TLV.", + "type": "string" + }, + "management_ip": { + "nullable": true, + "description": "The LLDP management IP TLV.", + "allOf": [ + { + "$ref": "#/components/schemas/IpNet" + } + ] + }, + "system_description": { + "nullable": true, + "description": "The LLDP system description TLV.", + "type": "string" + }, + "system_name": { + "nullable": true, + "description": "The LLDP system name TLV.", + "type": "string" } }, "required": [ @@ -16130,22 +16273,44 @@ "id" ] }, - "LldpServiceConfigCreate": { - "description": "The LLDP configuration associated with a port. LLDP may be either enabled or disabled, if enabled, an LLDP configuration must be provided by name or id.", + "LldpLinkConfigCreate": { + "description": "The LLDP configuration associated with a port.", "type": "object", "properties": { + "chassis_id": { + "nullable": true, + "description": "The LLDP chassis identifier TLV.", + "type": "string" + }, "enabled": { "description": "Whether or not LLDP is enabled.", "type": "boolean" }, - "lldp_config": { + "link_description": { "nullable": true, - "description": "A reference to the LLDP configuration used. Must not be `None` when `enabled` is `true`.", - "allOf": [ - { - "$ref": "#/components/schemas/NameOrId" - } - ] + "description": "The LLDP link description TLV.", + "type": "string" + }, + "link_name": { + "nullable": true, + "description": "The LLDP link name TLV.", + "type": "string" + }, + "management_ip": { + "nullable": true, + "description": "The LLDP management IP TLV.", + "type": "string", + "format": "ip" + }, + "system_description": { + "nullable": true, + "description": "The LLDP system description TLV.", + "type": "string" + }, + "system_name": { + "nullable": true, + "description": "The LLDP system name TLV.", + "type": "string" } }, "required": [ @@ -16501,6 +16666,22 @@ } ] }, + "OxqlQueryResult": { + "description": "The result of a successful OxQL query.", + "type": "object", + "properties": { + "tables": { + "description": "Tables resulting from the query, each containing timeseries.", + "type": "array", + "items": { + "$ref": "#/components/schemas/Table" + } + } + }, + "required": [ + "tables" + ] + }, "Password": { "title": "A password used to authenticate a user", "description": "Passwords may be subject to additional constraints.", @@ -17168,6 +17349,13 @@ "type": "string", "format": "ip" }, + "local_pref": { + "nullable": true, + "description": "Local preference for route. Higher preference indictes precedence within and across protocols.", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "vid": { "nullable": true, "description": "VLAN id the gateway is reachable over.", @@ -19146,7 +19334,7 @@ "description": "The name of this link.", "type": "string" }, - "lldp_service_config_id": { + "lldp_link_config_id": { "description": "The link-layer discovery protocol service configuration id for this link.", "type": "string", "format": "uuid" @@ -19175,7 +19363,7 @@ "autoneg", "fec", "link_name", - "lldp_service_config_id", + "lldp_link_config_id", "mtu", "port_settings_id", "speed" @@ -19226,6 +19414,13 @@ "description": "The interface name this route configuration is assigned to.", "type": "string" }, + "local_pref": { + "nullable": true, + "description": "Local preference indicating priority within and across protocols.", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "port_settings_id": { "description": "The port settings object this route configuration belongs to.", "type": "string", @@ -19430,7 +19625,7 @@ "description": "Link-layer discovery protocol (LLDP) settings.", "type": "array", "items": { - "$ref": "#/components/schemas/LldpServiceConfig" + "$ref": "#/components/schemas/LldpLinkConfig" } }, "links": { @@ -19739,7 +19934,8 @@ "nanoseconds", "volts", "amps", - "degrees_celcius" + "watts", + "degrees_celsius" ] }, { @@ -20131,10 +20327,20 @@ "type": "object", "properties": { "metric_type": { - "$ref": "#/components/schemas/MetricType" + "description": "The type of this metric.", + "allOf": [ + { + "$ref": "#/components/schemas/MetricType" + } + ] }, "values": { - "$ref": "#/components/schemas/ValueArray" + "description": "The data values.", + "allOf": [ + { + "$ref": "#/components/schemas/ValueArray" + } + ] } }, "required": [ diff --git a/openapi/oximeter.json b/openapi/oximeter.json index f596ac6ee6..327351d961 100644 --- a/openapi/oximeter.json +++ b/openapi/oximeter.json @@ -277,6 +277,13 @@ "enum": [ "instance" ] + }, + { + "description": "The producer is a management gateway service.", + "type": "string", + "enum": [ + "management_gateway" + ] } ] } diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 1241248a5e..4c40fb5da0 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -2752,6 +2752,14 @@ "$ref": "#/components/schemas/UplinkAddressConfig" } }, + "lldp": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/LldpPortConfig" + } + ] + }, "port": { "description": "Switchport to use for external connectivity", "type": "string" @@ -3404,6 +3412,67 @@ "minLength": 1, "maxLength": 11 }, + "LldpAdminStatus": { + "description": "To what extent should this port participate in LLDP", + "type": "string", + "enum": [ + "enabled", + "disabled", + "rx_only", + "tx_only" + ] + }, + "LldpPortConfig": { + "description": "Per-port LLDP configuration settings. Only the \"status\" setting is mandatory. All other fields have natural defaults or may be inherited from the switch.", + "type": "object", + "properties": { + "chassis_id": { + "nullable": true, + "description": "Chassis ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + }, + "management_addrs": { + "nullable": true, + "description": "Management IP addresses to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "port_description": { + "nullable": true, + "description": "Port description to advertise. If this is not set, no description will be advertised.", + "type": "string" + }, + "port_id": { + "nullable": true, + "description": "Port ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be set to the port name. e.g., qsfp0/0.", + "type": "string" + }, + "status": { + "description": "To what extent should this port participate in LLDP", + "allOf": [ + { + "$ref": "#/components/schemas/LldpAdminStatus" + } + ] + }, + "system_description": { + "nullable": true, + "description": "System description to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + }, + "system_name": { + "nullable": true, + "description": "System name to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + } + }, + "required": [ + "status" + ] + }, "MacAddr": { "example": "ff:ff:ff:ff:ff:ff", "title": "A MAC address", @@ -4104,6 +4173,15 @@ "$ref": "#/components/schemas/BgpPeerConfig" } }, + "lldp": { + "nullable": true, + "description": "LLDP configuration for this port", + "allOf": [ + { + "$ref": "#/components/schemas/LldpPortConfig" + } + ] + }, "port": { "description": "Nmae of the port this config applies to.", "type": "string" @@ -4379,6 +4457,14 @@ } ] }, + "local_pref": { + "nullable": true, + "description": "The local preference associated with this route.", + "default": null, + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "nexthop": { "description": "The nexthop/gateway address.", "type": "string", diff --git a/openapi/wicketd.json b/openapi/wicketd.json index 757383897b..87cfe045d3 100644 --- a/openapi/wicketd.json +++ b/openapi/wicketd.json @@ -1773,6 +1773,67 @@ "last" ] }, + "LldpAdminStatus": { + "description": "To what extent should this port participate in LLDP", + "type": "string", + "enum": [ + "enabled", + "disabled", + "rx_only", + "tx_only" + ] + }, + "LldpPortConfig": { + "description": "Per-port LLDP configuration settings. Only the \"status\" setting is mandatory. All other fields have natural defaults or may be inherited from the switch.", + "type": "object", + "properties": { + "chassis_id": { + "nullable": true, + "description": "Chassis ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + }, + "management_addrs": { + "nullable": true, + "description": "Management IP addresses to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "port_description": { + "nullable": true, + "description": "Port description to advertise. If this is not set, no description will be advertised.", + "type": "string" + }, + "port_id": { + "nullable": true, + "description": "Port ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be set to the port name. e.g., qsfp0/0.", + "type": "string" + }, + "status": { + "description": "To what extent should this port participate in LLDP", + "allOf": [ + { + "$ref": "#/components/schemas/LldpAdminStatus" + } + ] + }, + "system_description": { + "nullable": true, + "description": "System description to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + }, + "system_name": { + "nullable": true, + "description": "System name to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": "string" + } + }, + "required": [ + "status" + ] + }, "Name": { "title": "A name unique within the parent collection", "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.", @@ -3062,6 +3123,14 @@ } ] }, + "local_pref": { + "nullable": true, + "description": "The local preference associated with this route.", + "default": null, + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "nexthop": { "description": "The nexthop/gateway address.", "type": "string", @@ -6296,6 +6365,15 @@ "$ref": "#/components/schemas/UserSpecifiedBgpPeerConfig" } }, + "lldp": { + "nullable": true, + "default": null, + "allOf": [ + { + "$ref": "#/components/schemas/LldpPortConfig" + } + ] + }, "routes": { "type": "array", "items": { diff --git a/oximeter/collector/src/agent.rs b/oximeter/collector/src/agent.rs index 5da9a1dfa8..b13fbd3938 100644 --- a/oximeter/collector/src/agent.rs +++ b/oximeter/collector/src/agent.rs @@ -17,8 +17,6 @@ use futures::TryStreamExt; use internal_dns::resolver::Resolver; use internal_dns::ServiceName; use nexus_client::types::IdSortMode; -use omicron_common::address::CLICKHOUSE_PORT; -use omicron_common::address::NEXUS_INTERNAL_PORT; use omicron_common::backoff; use omicron_common::backoff::BackoffError; use oximeter::types::ProducerResults; @@ -381,6 +379,7 @@ impl OximeterAgent { db_config: DbConfig, resolver: &Resolver, log: &Logger, + replicated: bool, ) -> Result { let (result_sender, result_receiver) = mpsc::channel(8); let log = log.new(o!( @@ -394,10 +393,15 @@ impl OximeterAgent { // database. let db_address = if let Some(address) = db_config.address { address + } else if replicated { + SocketAddr::V6( + resolver + .lookup_socket_v6(ServiceName::ClickhouseServer) + .await?, + ) } else { - SocketAddr::new( - resolver.lookup_ip(ServiceName::Clickhouse).await?, - CLICKHOUSE_PORT, + SocketAddr::V6( + resolver.lookup_socket_v6(ServiceName::Clickhouse).await?, ) }; @@ -423,7 +427,6 @@ impl OximeterAgent { .. }) => { debug!(log, "oximeter database does not exist, creating"); - let replicated = client.is_oximeter_cluster().await?; client .initialize_db_with_version( replicated, @@ -816,7 +819,7 @@ async fn refresh_producer_list(agent: OximeterAgent, resolver: Resolver) { async fn resolve_nexus_with_backoff( log: &Logger, resolver: &Resolver, -) -> SocketAddr { +) -> SocketAddrV6 { let log_failure = |error, delay| { warn!( log, @@ -827,12 +830,9 @@ async fn resolve_nexus_with_backoff( }; let do_lookup = || async { resolver - .lookup_ipv6(ServiceName::Nexus) + .lookup_socket_v6(ServiceName::Nexus) .await .map_err(|e| BackoffError::transient(e.to_string())) - .map(|ip| { - SocketAddr::V6(SocketAddrV6::new(ip, NEXUS_INTERNAL_PORT, 0, 0)) - }) }; backoff::retry_notify( backoff::retry_policy_internal_service(), diff --git a/oximeter/collector/src/lib.rs b/oximeter/collector/src/lib.rs index 02bf9152f4..0576c7d532 100644 --- a/oximeter/collector/src/lib.rs +++ b/oximeter/collector/src/lib.rs @@ -14,7 +14,6 @@ use dropshot::HttpServerStarter; use internal_dns::resolver::ResolveError; use internal_dns::resolver::Resolver; use internal_dns::ServiceName; -use omicron_common::address::NEXUS_INTERNAL_PORT; use omicron_common::api::internal::nexus::ProducerEndpoint; use omicron_common::backoff; use omicron_common::FileKv; @@ -79,12 +78,18 @@ pub struct DbConfig { #[serde(default, skip_serializing_if = "Option::is_none")] pub address: Option, - /// Batch size of samples at which to insert + /// Batch size of samples at which to insert. pub batch_size: usize, /// Interval on which to insert data into the database, regardless of the number of collected /// samples. Value is in seconds. pub batch_interval: u64, + + // TODO (https://github.com/oxidecomputer/omicron/issues/4148): This field + // should be removed if single node functionality is removed. + /// Whether ClickHouse is running as a replicated cluster or + /// single-node server. + pub replicated: bool, } impl DbConfig { @@ -96,12 +101,16 @@ impl DbConfig { /// ClickHouse. pub const DEFAULT_BATCH_INTERVAL: u64 = 5; + /// Default ClickHouse topology. + pub const DEFAULT_REPLICATED: bool = false; + // Construct config with an address, using the defaults for other fields fn with_address(address: SocketAddr) -> Self { Self { address: Some(address), batch_size: Self::DEFAULT_BATCH_SIZE, batch_interval: Self::DEFAULT_BATCH_INTERVAL, + replicated: Self::DEFAULT_REPLICATED, } } } @@ -208,6 +217,7 @@ impl Oximeter { config.db, &resolver, &log, + config.db.replicated, ) .await?, )) @@ -251,14 +261,14 @@ impl Oximeter { let nexus_address = if let Some(address) = config.nexus_address { address } else { - SocketAddr::V6(SocketAddrV6::new( - resolver.lookup_ipv6(ServiceName::Nexus).await.map_err( - |e| backoff::BackoffError::transient(e.to_string()), - )?, - NEXUS_INTERNAL_PORT, - 0, - 0, - )) + SocketAddr::V6( + resolver + .lookup_socket_v6(ServiceName::Nexus) + .await + .map_err(|e| { + backoff::BackoffError::transient(e.to_string()) + })?, + ) }; let client = nexus_client::Client::new( &format!("http://{nexus_address}"), diff --git a/oximeter/collector/tests/output/self-stat-schema.json b/oximeter/collector/tests/output/self-stat-schema.json new file mode 100644 index 0000000000..5d325281ab --- /dev/null +++ b/oximeter/collector/tests/output/self-stat-schema.json @@ -0,0 +1,91 @@ +{ + "oximeter_collector:collections": { + "timeseries_name": "oximeter_collector:collections", + "field_schema": [ + { + "name": "base_route", + "field_type": "string", + "source": "metric" + }, + { + "name": "collector_id", + "field_type": "uuid", + "source": "target" + }, + { + "name": "collector_ip", + "field_type": "ip_addr", + "source": "target" + }, + { + "name": "collector_port", + "field_type": "u16", + "source": "target" + }, + { + "name": "producer_id", + "field_type": "uuid", + "source": "metric" + }, + { + "name": "producer_ip", + "field_type": "ip_addr", + "source": "metric" + }, + { + "name": "producer_port", + "field_type": "u16", + "source": "metric" + } + ], + "datum_type": "cumulative_u64", + "created": "2024-06-24T17:15:06.069658599Z" + }, + "oximeter_collector:failed_collections": { + "timeseries_name": "oximeter_collector:failed_collections", + "field_schema": [ + { + "name": "base_route", + "field_type": "string", + "source": "metric" + }, + { + "name": "collector_id", + "field_type": "uuid", + "source": "target" + }, + { + "name": "collector_ip", + "field_type": "ip_addr", + "source": "target" + }, + { + "name": "collector_port", + "field_type": "u16", + "source": "target" + }, + { + "name": "producer_id", + "field_type": "uuid", + "source": "metric" + }, + { + "name": "producer_ip", + "field_type": "ip_addr", + "source": "metric" + }, + { + "name": "producer_port", + "field_type": "u16", + "source": "metric" + }, + { + "name": "reason", + "field_type": "string", + "source": "metric" + } + ], + "datum_type": "cumulative_u64", + "created": "2024-06-24T17:15:06.070765692Z" + } +} \ No newline at end of file diff --git a/oximeter/db/Cargo.toml b/oximeter/db/Cargo.toml index e3cf089cb5..2a9c615da2 100644 --- a/oximeter/db/Cargo.toml +++ b/oximeter/db/Cargo.toml @@ -24,6 +24,7 @@ num.workspace = true omicron-common.workspace = true omicron-workspace-hack.workspace = true oximeter.workspace = true +oxql-types.workspace = true regex.workspace = true serde.workspace = true serde_json.workspace = true @@ -89,6 +90,7 @@ expectorate.workspace = true indexmap.workspace = true itertools.workspace = true omicron-test-utils.workspace = true +oximeter-test-utils.workspace = true slog-dtrace.workspace = true sqlformat.workspace = true sqlparser.workspace = true diff --git a/oximeter/db/schema/replicated/10/00_add_last_updated_column_to_fields_i64_local.sql b/oximeter/db/schema/replicated/10/00_add_last_updated_column_to_fields_i64_local.sql new file mode 100644 index 0000000000..04158b36ce --- /dev/null +++ b/oximeter/db/schema/replicated/10/00_add_last_updated_column_to_fields_i64_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i64_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/01_materialize_last_updated_column_on_fields_i64_local.sql b/oximeter/db/schema/replicated/10/01_materialize_last_updated_column_on_fields_i64_local.sql new file mode 100644 index 0000000000..2e35dd2793 --- /dev/null +++ b/oximeter/db/schema/replicated/10/01_materialize_last_updated_column_on_fields_i64_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i64_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/02_add_ttl_to_fields_i64_local.sql b/oximeter/db/schema/replicated/10/02_add_ttl_to_fields_i64_local.sql new file mode 100644 index 0000000000..25e5303e5a --- /dev/null +++ b/oximeter/db/schema/replicated/10/02_add_ttl_to_fields_i64_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i64_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/03_add_last_updated_column_to_fields_uuid_local.sql b/oximeter/db/schema/replicated/10/03_add_last_updated_column_to_fields_uuid_local.sql new file mode 100644 index 0000000000..f26fdedbb6 --- /dev/null +++ b/oximeter/db/schema/replicated/10/03_add_last_updated_column_to_fields_uuid_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_uuid_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/04_materialize_last_updated_column_on_fields_uuid_local.sql b/oximeter/db/schema/replicated/10/04_materialize_last_updated_column_on_fields_uuid_local.sql new file mode 100644 index 0000000000..1bc623f418 --- /dev/null +++ b/oximeter/db/schema/replicated/10/04_materialize_last_updated_column_on_fields_uuid_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_uuid_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/05_add_ttl_to_fields_uuid_local.sql b/oximeter/db/schema/replicated/10/05_add_ttl_to_fields_uuid_local.sql new file mode 100644 index 0000000000..b98bba1e88 --- /dev/null +++ b/oximeter/db/schema/replicated/10/05_add_ttl_to_fields_uuid_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_uuid_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/06_add_last_updated_column_to_fields_bool_local.sql b/oximeter/db/schema/replicated/10/06_add_last_updated_column_to_fields_bool_local.sql new file mode 100644 index 0000000000..bf3c16dde5 --- /dev/null +++ b/oximeter/db/schema/replicated/10/06_add_last_updated_column_to_fields_bool_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_bool_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/07_materialize_last_updated_column_on_fields_bool_local.sql b/oximeter/db/schema/replicated/10/07_materialize_last_updated_column_on_fields_bool_local.sql new file mode 100644 index 0000000000..3ddb0eec84 --- /dev/null +++ b/oximeter/db/schema/replicated/10/07_materialize_last_updated_column_on_fields_bool_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_bool_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/08_add_ttl_to_fields_bool_local.sql b/oximeter/db/schema/replicated/10/08_add_ttl_to_fields_bool_local.sql new file mode 100644 index 0000000000..58d599cf49 --- /dev/null +++ b/oximeter/db/schema/replicated/10/08_add_ttl_to_fields_bool_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_bool_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/09_add_last_updated_column_to_fields_ipaddr_local.sql b/oximeter/db/schema/replicated/10/09_add_last_updated_column_to_fields_ipaddr_local.sql new file mode 100644 index 0000000000..94696b7b06 --- /dev/null +++ b/oximeter/db/schema/replicated/10/09_add_last_updated_column_to_fields_ipaddr_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_ipaddr_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/10_materialize_last_updated_column_on_fields_ipaddr_local.sql b/oximeter/db/schema/replicated/10/10_materialize_last_updated_column_on_fields_ipaddr_local.sql new file mode 100644 index 0000000000..f621033d56 --- /dev/null +++ b/oximeter/db/schema/replicated/10/10_materialize_last_updated_column_on_fields_ipaddr_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_ipaddr_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/11_add_ttl_to_fields_ipaddr_local.sql b/oximeter/db/schema/replicated/10/11_add_ttl_to_fields_ipaddr_local.sql new file mode 100644 index 0000000000..4a01da9e74 --- /dev/null +++ b/oximeter/db/schema/replicated/10/11_add_ttl_to_fields_ipaddr_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_ipaddr_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/12_add_last_updated_column_to_fields_string_local.sql b/oximeter/db/schema/replicated/10/12_add_last_updated_column_to_fields_string_local.sql new file mode 100644 index 0000000000..173d803437 --- /dev/null +++ b/oximeter/db/schema/replicated/10/12_add_last_updated_column_to_fields_string_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_string_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/13_materialize_last_updated_column_on_fields_string_local.sql b/oximeter/db/schema/replicated/10/13_materialize_last_updated_column_on_fields_string_local.sql new file mode 100644 index 0000000000..d9fcc84eba --- /dev/null +++ b/oximeter/db/schema/replicated/10/13_materialize_last_updated_column_on_fields_string_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_string_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/14_add_ttl_to_fields_string_local.sql b/oximeter/db/schema/replicated/10/14_add_ttl_to_fields_string_local.sql new file mode 100644 index 0000000000..8c9aecca9d --- /dev/null +++ b/oximeter/db/schema/replicated/10/14_add_ttl_to_fields_string_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_string_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/15_add_last_updated_column_to_fields_i8_local.sql b/oximeter/db/schema/replicated/10/15_add_last_updated_column_to_fields_i8_local.sql new file mode 100644 index 0000000000..8d071424f6 --- /dev/null +++ b/oximeter/db/schema/replicated/10/15_add_last_updated_column_to_fields_i8_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i8_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/16_materialize_last_updated_column_on_fields_i8_local.sql b/oximeter/db/schema/replicated/10/16_materialize_last_updated_column_on_fields_i8_local.sql new file mode 100644 index 0000000000..ac5fa948ae --- /dev/null +++ b/oximeter/db/schema/replicated/10/16_materialize_last_updated_column_on_fields_i8_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i8_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/17_add_ttl_to_fields_i8_local.sql b/oximeter/db/schema/replicated/10/17_add_ttl_to_fields_i8_local.sql new file mode 100644 index 0000000000..3caa1b93f6 --- /dev/null +++ b/oximeter/db/schema/replicated/10/17_add_ttl_to_fields_i8_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i8_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/18_add_last_updated_column_to_fields_u8_local.sql b/oximeter/db/schema/replicated/10/18_add_last_updated_column_to_fields_u8_local.sql new file mode 100644 index 0000000000..ed6978c7e6 --- /dev/null +++ b/oximeter/db/schema/replicated/10/18_add_last_updated_column_to_fields_u8_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u8_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/19_materialize_last_updated_column_on_fields_u8_local.sql b/oximeter/db/schema/replicated/10/19_materialize_last_updated_column_on_fields_u8_local.sql new file mode 100644 index 0000000000..81ce8626a7 --- /dev/null +++ b/oximeter/db/schema/replicated/10/19_materialize_last_updated_column_on_fields_u8_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u8_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/20_add_ttl_to_fields_u8_local.sql b/oximeter/db/schema/replicated/10/20_add_ttl_to_fields_u8_local.sql new file mode 100644 index 0000000000..2a7c757dc8 --- /dev/null +++ b/oximeter/db/schema/replicated/10/20_add_ttl_to_fields_u8_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u8_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/21_add_last_updated_column_to_fields_i16_local.sql b/oximeter/db/schema/replicated/10/21_add_last_updated_column_to_fields_i16_local.sql new file mode 100644 index 0000000000..cbe0b08fe4 --- /dev/null +++ b/oximeter/db/schema/replicated/10/21_add_last_updated_column_to_fields_i16_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i16_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/22_materialize_last_updated_column_on_fields_i16_local.sql b/oximeter/db/schema/replicated/10/22_materialize_last_updated_column_on_fields_i16_local.sql new file mode 100644 index 0000000000..d4854807b7 --- /dev/null +++ b/oximeter/db/schema/replicated/10/22_materialize_last_updated_column_on_fields_i16_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i16_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/23_add_ttl_to_fields_i16_local.sql b/oximeter/db/schema/replicated/10/23_add_ttl_to_fields_i16_local.sql new file mode 100644 index 0000000000..c84b634a00 --- /dev/null +++ b/oximeter/db/schema/replicated/10/23_add_ttl_to_fields_i16_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i16_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/24_add_last_updated_column_to_fields_u16_local.sql b/oximeter/db/schema/replicated/10/24_add_last_updated_column_to_fields_u16_local.sql new file mode 100644 index 0000000000..60c28c0047 --- /dev/null +++ b/oximeter/db/schema/replicated/10/24_add_last_updated_column_to_fields_u16_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u16_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/25_materialize_last_updated_column_on_fields_u16_local.sql b/oximeter/db/schema/replicated/10/25_materialize_last_updated_column_on_fields_u16_local.sql new file mode 100644 index 0000000000..b38cdda831 --- /dev/null +++ b/oximeter/db/schema/replicated/10/25_materialize_last_updated_column_on_fields_u16_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u16_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/26_add_ttl_to_fields_u16_local.sql b/oximeter/db/schema/replicated/10/26_add_ttl_to_fields_u16_local.sql new file mode 100644 index 0000000000..cd533ffd8f --- /dev/null +++ b/oximeter/db/schema/replicated/10/26_add_ttl_to_fields_u16_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u16_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/27_add_last_updated_column_to_fields_i32_local.sql b/oximeter/db/schema/replicated/10/27_add_last_updated_column_to_fields_i32_local.sql new file mode 100644 index 0000000000..1ea7093d8f --- /dev/null +++ b/oximeter/db/schema/replicated/10/27_add_last_updated_column_to_fields_i32_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i32_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/28_materialize_last_updated_column_on_fields_i32_local.sql b/oximeter/db/schema/replicated/10/28_materialize_last_updated_column_on_fields_i32_local.sql new file mode 100644 index 0000000000..f9f6464729 --- /dev/null +++ b/oximeter/db/schema/replicated/10/28_materialize_last_updated_column_on_fields_i32_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i32_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/29_add_ttl_to_fields_i32_local.sql b/oximeter/db/schema/replicated/10/29_add_ttl_to_fields_i32_local.sql new file mode 100644 index 0000000000..7c37ee9b21 --- /dev/null +++ b/oximeter/db/schema/replicated/10/29_add_ttl_to_fields_i32_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i32_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/30_add_last_updated_column_to_fields_u32_local.sql b/oximeter/db/schema/replicated/10/30_add_last_updated_column_to_fields_u32_local.sql new file mode 100644 index 0000000000..b15eab9387 --- /dev/null +++ b/oximeter/db/schema/replicated/10/30_add_last_updated_column_to_fields_u32_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u32_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/31_materialize_last_updated_column_on_fields_u32_local.sql b/oximeter/db/schema/replicated/10/31_materialize_last_updated_column_on_fields_u32_local.sql new file mode 100644 index 0000000000..caa96ab5eb --- /dev/null +++ b/oximeter/db/schema/replicated/10/31_materialize_last_updated_column_on_fields_u32_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u32_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/32_add_ttl_to_fields_u32_local.sql b/oximeter/db/schema/replicated/10/32_add_ttl_to_fields_u32_local.sql new file mode 100644 index 0000000000..25af5ee660 --- /dev/null +++ b/oximeter/db/schema/replicated/10/32_add_ttl_to_fields_u32_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u32_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/33_add_last_updated_column_to_fields_u64_local.sql b/oximeter/db/schema/replicated/10/33_add_last_updated_column_to_fields_u64_local.sql new file mode 100644 index 0000000000..e85bd845d4 --- /dev/null +++ b/oximeter/db/schema/replicated/10/33_add_last_updated_column_to_fields_u64_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u64_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/34_materialize_last_updated_column_on_fields_u64_local.sql b/oximeter/db/schema/replicated/10/34_materialize_last_updated_column_on_fields_u64_local.sql new file mode 100644 index 0000000000..d287a02c6f --- /dev/null +++ b/oximeter/db/schema/replicated/10/34_materialize_last_updated_column_on_fields_u64_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u64_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/35_add_ttl_to_fields_u64_local.sql b/oximeter/db/schema/replicated/10/35_add_ttl_to_fields_u64_local.sql new file mode 100644 index 0000000000..02eb09c300 --- /dev/null +++ b/oximeter/db/schema/replicated/10/35_add_ttl_to_fields_u64_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u64_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/timeseries-to-delete.txt b/oximeter/db/schema/replicated/10/timeseries-to-delete.txt new file mode 100644 index 0000000000..40b90e05ff --- /dev/null +++ b/oximeter/db/schema/replicated/10/timeseries-to-delete.txt @@ -0,0 +1 @@ +http_service:request_latency_histogram diff --git a/oximeter/db/schema/replicated/11/timeseries-to-delete.txt b/oximeter/db/schema/replicated/11/timeseries-to-delete.txt new file mode 100644 index 0000000000..4f0301a6b5 --- /dev/null +++ b/oximeter/db/schema/replicated/11/timeseries-to-delete.txt @@ -0,0 +1,9 @@ +switch_table:capacity +switch_table:collisions +switch_table:delete_misses +switch_table:deletes +switch_table:exhaustion +switch_table:inserts +switch_table:occupancy +switch_table:update_misses +switch_table:updates diff --git a/oximeter/db/schema/replicated/db-init-1.sql b/oximeter/db/schema/replicated/db-init-1.sql index 176e5b64f7..4eac2b4e37 100644 --- a/oximeter/db/schema/replicated/db-init-1.sql +++ b/oximeter/db/schema/replicated/db-init-1.sql @@ -78,10 +78,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i64_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int64 + field_value Int64, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_i64_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i64 ON CLUSTER oximeter_cluster AS oximeter.fields_i64_local @@ -93,10 +95,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_uuid_local ON CLUSTER oximeter_cluste timeseries_name String, timeseries_key UInt64, field_name String, - field_value UUID + field_value UUID, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_uuid_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_uuid ON CLUSTER oximeter_cluster AS oximeter.fields_uuid_local diff --git a/oximeter/db/schema/replicated/db-init-2.sql b/oximeter/db/schema/replicated/db-init-2.sql index ae0431ec84..51e64e20e0 100644 --- a/oximeter/db/schema/replicated/db-init-2.sql +++ b/oximeter/db/schema/replicated/db-init-2.sql @@ -595,10 +595,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_bool_local ON CLUSTER oximeter_cluste timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt8 + field_value UInt8, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_bool_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_bool ON CLUSTER oximeter_cluster AS oximeter.fields_bool_local @@ -609,10 +611,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_ipaddr_local ON CLUSTER oximeter_clus timeseries_name String, timeseries_key UInt64, field_name String, - field_value IPv6 + field_value IPv6, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_ipaddr_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_ipaddr ON CLUSTER oximeter_cluster AS oximeter.fields_ipaddr_local @@ -623,10 +627,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_string_local ON CLUSTER oximeter_clus timeseries_name String, timeseries_key UInt64, field_name String, - field_value String + field_value String, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_string_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_string ON CLUSTER oximeter_cluster AS oximeter.fields_string_local @@ -637,10 +643,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i8_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int8 + field_value Int8, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_i8_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i8 ON CLUSTER oximeter_cluster AS oximeter.fields_i8_local @@ -651,10 +659,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u8_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt8 + field_value UInt8, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_u8_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u8 ON CLUSTER oximeter_cluster AS oximeter.fields_u8_local @@ -665,10 +675,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i16_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int16 + field_value Int16, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_i16_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i16 ON CLUSTER oximeter_cluster AS oximeter.fields_i16_local @@ -679,10 +691,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u16_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt16 + field_value UInt16, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_u16_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u16 ON CLUSTER oximeter_cluster AS oximeter.fields_u16_local @@ -693,10 +707,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i32_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int32 + field_value Int32, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_i32_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i32 ON CLUSTER oximeter_cluster AS oximeter.fields_i32_local @@ -707,10 +723,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u32_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt32 + field_value UInt32, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_u32_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u32 ON CLUSTER oximeter_cluster AS oximeter.fields_u32_local @@ -721,10 +739,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u64_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt64 + field_value UInt64, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_u64_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u64 ON CLUSTER oximeter_cluster AS oximeter.fields_u64_local diff --git a/oximeter/db/schema/single-node/10/00_add_last_updated_column_to_fields_bool.sql b/oximeter/db/schema/single-node/10/00_add_last_updated_column_to_fields_bool.sql new file mode 100644 index 0000000000..86f46a43bf --- /dev/null +++ b/oximeter/db/schema/single-node/10/00_add_last_updated_column_to_fields_bool.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_bool ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/01_materialize_last_updated_column_on_fields_bool.sql b/oximeter/db/schema/single-node/10/01_materialize_last_updated_column_on_fields_bool.sql new file mode 100644 index 0000000000..6ebec2d506 --- /dev/null +++ b/oximeter/db/schema/single-node/10/01_materialize_last_updated_column_on_fields_bool.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_bool MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/02_add_ttl_to_fields_bool.sql b/oximeter/db/schema/single-node/10/02_add_ttl_to_fields_bool.sql new file mode 100644 index 0000000000..cc07b8cd1d --- /dev/null +++ b/oximeter/db/schema/single-node/10/02_add_ttl_to_fields_bool.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_bool MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/03_add_last_updated_column_to_fields_i8.sql b/oximeter/db/schema/single-node/10/03_add_last_updated_column_to_fields_i8.sql new file mode 100644 index 0000000000..884b5ffed6 --- /dev/null +++ b/oximeter/db/schema/single-node/10/03_add_last_updated_column_to_fields_i8.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i8 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/04_materialize_last_updated_column_on_fields_i8.sql b/oximeter/db/schema/single-node/10/04_materialize_last_updated_column_on_fields_i8.sql new file mode 100644 index 0000000000..ef569d80c3 --- /dev/null +++ b/oximeter/db/schema/single-node/10/04_materialize_last_updated_column_on_fields_i8.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i8 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/05_add_ttl_to_fields_i8.sql b/oximeter/db/schema/single-node/10/05_add_ttl_to_fields_i8.sql new file mode 100644 index 0000000000..adfc3dd1a4 --- /dev/null +++ b/oximeter/db/schema/single-node/10/05_add_ttl_to_fields_i8.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i8 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/06_add_last_updated_column_to_fields_u8.sql b/oximeter/db/schema/single-node/10/06_add_last_updated_column_to_fields_u8.sql new file mode 100644 index 0000000000..0f4e43ce2c --- /dev/null +++ b/oximeter/db/schema/single-node/10/06_add_last_updated_column_to_fields_u8.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u8 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/07_materialize_last_updated_column_on_fields_u8.sql b/oximeter/db/schema/single-node/10/07_materialize_last_updated_column_on_fields_u8.sql new file mode 100644 index 0000000000..8dcbb32bb2 --- /dev/null +++ b/oximeter/db/schema/single-node/10/07_materialize_last_updated_column_on_fields_u8.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u8 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/08_add_ttl_to_fields_u8.sql b/oximeter/db/schema/single-node/10/08_add_ttl_to_fields_u8.sql new file mode 100644 index 0000000000..11a83bde7a --- /dev/null +++ b/oximeter/db/schema/single-node/10/08_add_ttl_to_fields_u8.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u8 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/09_add_last_updated_column_to_fields_i16.sql b/oximeter/db/schema/single-node/10/09_add_last_updated_column_to_fields_i16.sql new file mode 100644 index 0000000000..d27f38f94f --- /dev/null +++ b/oximeter/db/schema/single-node/10/09_add_last_updated_column_to_fields_i16.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i16 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/10_materialize_last_updated_column_on_fields_i16.sql b/oximeter/db/schema/single-node/10/10_materialize_last_updated_column_on_fields_i16.sql new file mode 100644 index 0000000000..cd60a2a1e9 --- /dev/null +++ b/oximeter/db/schema/single-node/10/10_materialize_last_updated_column_on_fields_i16.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i16 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/11_add_ttl_to_fields_i16.sql b/oximeter/db/schema/single-node/10/11_add_ttl_to_fields_i16.sql new file mode 100644 index 0000000000..5b1b2fcfb6 --- /dev/null +++ b/oximeter/db/schema/single-node/10/11_add_ttl_to_fields_i16.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i16 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/12_add_last_updated_column_to_fields_u16.sql b/oximeter/db/schema/single-node/10/12_add_last_updated_column_to_fields_u16.sql new file mode 100644 index 0000000000..a71753f95d --- /dev/null +++ b/oximeter/db/schema/single-node/10/12_add_last_updated_column_to_fields_u16.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u16 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/13_materialize_last_updated_column_on_fields_u16.sql b/oximeter/db/schema/single-node/10/13_materialize_last_updated_column_on_fields_u16.sql new file mode 100644 index 0000000000..c8dbfb494e --- /dev/null +++ b/oximeter/db/schema/single-node/10/13_materialize_last_updated_column_on_fields_u16.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u16 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/14_add_ttl_to_fields_u16.sql b/oximeter/db/schema/single-node/10/14_add_ttl_to_fields_u16.sql new file mode 100644 index 0000000000..30da688c8c --- /dev/null +++ b/oximeter/db/schema/single-node/10/14_add_ttl_to_fields_u16.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u16 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/15_add_last_updated_column_to_fields_i32.sql b/oximeter/db/schema/single-node/10/15_add_last_updated_column_to_fields_i32.sql new file mode 100644 index 0000000000..eb0f377e2d --- /dev/null +++ b/oximeter/db/schema/single-node/10/15_add_last_updated_column_to_fields_i32.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i32 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/16_materialize_last_updated_column_on_fields_i32.sql b/oximeter/db/schema/single-node/10/16_materialize_last_updated_column_on_fields_i32.sql new file mode 100644 index 0000000000..9cd4fa05c8 --- /dev/null +++ b/oximeter/db/schema/single-node/10/16_materialize_last_updated_column_on_fields_i32.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i32 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/17_add_ttl_to_fields_i32.sql b/oximeter/db/schema/single-node/10/17_add_ttl_to_fields_i32.sql new file mode 100644 index 0000000000..5230634097 --- /dev/null +++ b/oximeter/db/schema/single-node/10/17_add_ttl_to_fields_i32.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i32 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/18_add_last_updated_column_to_fields_u32.sql b/oximeter/db/schema/single-node/10/18_add_last_updated_column_to_fields_u32.sql new file mode 100644 index 0000000000..9d967784e9 --- /dev/null +++ b/oximeter/db/schema/single-node/10/18_add_last_updated_column_to_fields_u32.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u32 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/19_materialize_last_updated_column_on_fields_u32.sql b/oximeter/db/schema/single-node/10/19_materialize_last_updated_column_on_fields_u32.sql new file mode 100644 index 0000000000..f625138b59 --- /dev/null +++ b/oximeter/db/schema/single-node/10/19_materialize_last_updated_column_on_fields_u32.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u32 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/20_add_ttl_to_fields_u32.sql b/oximeter/db/schema/single-node/10/20_add_ttl_to_fields_u32.sql new file mode 100644 index 0000000000..fc80ce7102 --- /dev/null +++ b/oximeter/db/schema/single-node/10/20_add_ttl_to_fields_u32.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u32 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/21_add_last_updated_column_to_fields_i64.sql b/oximeter/db/schema/single-node/10/21_add_last_updated_column_to_fields_i64.sql new file mode 100644 index 0000000000..26256d3924 --- /dev/null +++ b/oximeter/db/schema/single-node/10/21_add_last_updated_column_to_fields_i64.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i64 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/22_materialize_last_updated_column_on_fields_i64.sql b/oximeter/db/schema/single-node/10/22_materialize_last_updated_column_on_fields_i64.sql new file mode 100644 index 0000000000..a81294e535 --- /dev/null +++ b/oximeter/db/schema/single-node/10/22_materialize_last_updated_column_on_fields_i64.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i64 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/23_add_ttl_to_fields_i64.sql b/oximeter/db/schema/single-node/10/23_add_ttl_to_fields_i64.sql new file mode 100644 index 0000000000..43ca166755 --- /dev/null +++ b/oximeter/db/schema/single-node/10/23_add_ttl_to_fields_i64.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i64 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/24_add_last_updated_column_to_fields_u64.sql b/oximeter/db/schema/single-node/10/24_add_last_updated_column_to_fields_u64.sql new file mode 100644 index 0000000000..46074c79ce --- /dev/null +++ b/oximeter/db/schema/single-node/10/24_add_last_updated_column_to_fields_u64.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u64 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/25_materialize_last_updated_column_on_fields_u64.sql b/oximeter/db/schema/single-node/10/25_materialize_last_updated_column_on_fields_u64.sql new file mode 100644 index 0000000000..a68d449de7 --- /dev/null +++ b/oximeter/db/schema/single-node/10/25_materialize_last_updated_column_on_fields_u64.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u64 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/26_add_ttl_to_fields_u64.sql b/oximeter/db/schema/single-node/10/26_add_ttl_to_fields_u64.sql new file mode 100644 index 0000000000..48afb51bf1 --- /dev/null +++ b/oximeter/db/schema/single-node/10/26_add_ttl_to_fields_u64.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u64 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/27_add_last_updated_column_to_fields_ipaddr.sql b/oximeter/db/schema/single-node/10/27_add_last_updated_column_to_fields_ipaddr.sql new file mode 100644 index 0000000000..d3c6be9072 --- /dev/null +++ b/oximeter/db/schema/single-node/10/27_add_last_updated_column_to_fields_ipaddr.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_ipaddr ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/28_materialize_last_updated_column_on_fields_ipaddr.sql b/oximeter/db/schema/single-node/10/28_materialize_last_updated_column_on_fields_ipaddr.sql new file mode 100644 index 0000000000..5bdffd4b2e --- /dev/null +++ b/oximeter/db/schema/single-node/10/28_materialize_last_updated_column_on_fields_ipaddr.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_ipaddr MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/29_add_ttl_to_fields_ipaddr.sql b/oximeter/db/schema/single-node/10/29_add_ttl_to_fields_ipaddr.sql new file mode 100644 index 0000000000..4551db90cd --- /dev/null +++ b/oximeter/db/schema/single-node/10/29_add_ttl_to_fields_ipaddr.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_ipaddr MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/30_add_last_updated_column_to_fields_string.sql b/oximeter/db/schema/single-node/10/30_add_last_updated_column_to_fields_string.sql new file mode 100644 index 0000000000..024c5f8f94 --- /dev/null +++ b/oximeter/db/schema/single-node/10/30_add_last_updated_column_to_fields_string.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_string ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/31_materialize_last_updated_column_on_fields_string.sql b/oximeter/db/schema/single-node/10/31_materialize_last_updated_column_on_fields_string.sql new file mode 100644 index 0000000000..67d3b7a596 --- /dev/null +++ b/oximeter/db/schema/single-node/10/31_materialize_last_updated_column_on_fields_string.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_string MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/32_add_ttl_to_fields_string.sql b/oximeter/db/schema/single-node/10/32_add_ttl_to_fields_string.sql new file mode 100644 index 0000000000..c5272df459 --- /dev/null +++ b/oximeter/db/schema/single-node/10/32_add_ttl_to_fields_string.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_string MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/33_add_last_updated_column_to_fields_uuid.sql b/oximeter/db/schema/single-node/10/33_add_last_updated_column_to_fields_uuid.sql new file mode 100644 index 0000000000..8d01b382fe --- /dev/null +++ b/oximeter/db/schema/single-node/10/33_add_last_updated_column_to_fields_uuid.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_uuid ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/34_materialize_last_updated_column_on_fields_uuid.sql b/oximeter/db/schema/single-node/10/34_materialize_last_updated_column_on_fields_uuid.sql new file mode 100644 index 0000000000..06fbd94d02 --- /dev/null +++ b/oximeter/db/schema/single-node/10/34_materialize_last_updated_column_on_fields_uuid.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_uuid MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/35_add_ttl_to_fields_uuid.sql b/oximeter/db/schema/single-node/10/35_add_ttl_to_fields_uuid.sql new file mode 100644 index 0000000000..481055d4f5 --- /dev/null +++ b/oximeter/db/schema/single-node/10/35_add_ttl_to_fields_uuid.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_uuid MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/timeseries-to-delete.txt b/oximeter/db/schema/single-node/10/timeseries-to-delete.txt new file mode 100644 index 0000000000..40b90e05ff --- /dev/null +++ b/oximeter/db/schema/single-node/10/timeseries-to-delete.txt @@ -0,0 +1 @@ +http_service:request_latency_histogram diff --git a/oximeter/db/schema/single-node/11/timeseries-to-delete.txt b/oximeter/db/schema/single-node/11/timeseries-to-delete.txt new file mode 100644 index 0000000000..4f0301a6b5 --- /dev/null +++ b/oximeter/db/schema/single-node/11/timeseries-to-delete.txt @@ -0,0 +1,9 @@ +switch_table:capacity +switch_table:collisions +switch_table:delete_misses +switch_table:deletes +switch_table:exhaustion +switch_table:inserts +switch_table:occupancy +switch_table:update_misses +switch_table:updates diff --git a/oximeter/db/schema/single-node/db-init.sql b/oximeter/db/schema/single-node/db-init.sql index 38e9d0b70c..184951feeb 100644 --- a/oximeter/db/schema/single-node/db-init.sql +++ b/oximeter/db/schema/single-node/db-init.sql @@ -504,126 +504,158 @@ TTL toDateTime(timestamp) + INTERVAL 30 DAY; * timeseries name and then key, since it would improve lookups where one * already has the key. Realistically though, these tables are quite small and * so performance benefits will be low in absolute terms. + * + * TTL: We use a materialized column to expire old field table records. This + * column is generated automatically by the database whenever a new row is + * inserted. It cannot be inserted directly, nor is it returned in a `SELECT *` + * query. Since these tables are `ReplacingMergeTree`s, that means the last + * record will remain during a deduplication, which will have the last + * timestamp. ClickHouse will then expire old data for us, similar to the + * measurement tables. */ CREATE TABLE IF NOT EXISTS oximeter.fields_bool ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt8 + field_value UInt8, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i8 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int8 + field_value Int8, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u8 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt8 + field_value UInt8, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i16 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int16 + field_value Int16, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u16 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt16 + field_value UInt16, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i32 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int32 + field_value Int32, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u32 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt32 + field_value UInt32, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i64 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int64 + field_value Int64, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u64 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt64 + field_value UInt64, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_ipaddr ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value IPv6 + field_value IPv6, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_string ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value String + field_value String, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_uuid ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value UUID + field_value UUID, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; /* The timeseries schema table stores the extracted schema for the samples * oximeter collects. diff --git a/oximeter/db/src/client/mod.rs b/oximeter/db/src/client/mod.rs index 30ae4b68d2..c2b07ebaa6 100644 --- a/oximeter/db/src/client/mod.rs +++ b/oximeter/db/src/client/mod.rs @@ -22,8 +22,6 @@ use crate::Error; use crate::Metric; use crate::Target; use crate::Timeseries; -use crate::TimeseriesKey; -use crate::TimeseriesName; use crate::TimeseriesPageSelector; use crate::TimeseriesScanParams; use crate::TimeseriesSchema; @@ -31,7 +29,9 @@ use dropshot::EmptyScanParams; use dropshot::PaginationOrder; use dropshot::ResultsPage; use dropshot::WhichPage; +use oximeter::schema::TimeseriesKey; use oximeter::types::Sample; +use oximeter::TimeseriesName; use regex::Regex; use regex::RegexBuilder; use slog::debug; @@ -1191,7 +1191,6 @@ mod tests { }; use omicron_test_utils::dev::test_setup_log; use oximeter::histogram::Histogram; - use oximeter::test_util; use oximeter::types::MissingDatum; use oximeter::Datum; use oximeter::FieldValue; @@ -1723,7 +1722,7 @@ mod tests { let samples = { let mut s = Vec::with_capacity(8); for _ in 0..s.capacity() { - s.push(test_util::make_hist_sample()) + s.push(oximeter_test_utils::make_hist_sample()) } s }; @@ -1762,7 +1761,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let sample = test_util::make_sample(); + let sample = oximeter_test_utils::make_sample(); client.insert_samples(&[sample]).await.unwrap(); let bad_name = name_mismatch::TestTarget { @@ -1770,7 +1769,7 @@ mod tests { name2: "second_name".into(), num: 2, }; - let metric = test_util::TestMetric { + let metric = oximeter_test_utils::TestMetric { id: uuid::Uuid::new_v4(), good: true, datum: 1, @@ -1792,7 +1791,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let sample = test_util::make_sample(); + let sample = oximeter_test_utils::make_sample(); // Verify that this sample is considered new, i.e., we return rows to update the timeseries // schema table. @@ -1867,7 +1866,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = test_util::generate_test_samples(2, 2, 2, 2); + let samples = oximeter_test_utils::generate_test_samples(2, 2, 2, 2); client.insert_samples(&samples).await?; let sample = samples.first().unwrap(); @@ -1956,7 +1955,7 @@ mod tests { // we'd like to exercise the logic of ClickHouse's replacing merge tree engine. let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = test_util::generate_test_samples(2, 2, 2, 2); + let samples = oximeter_test_utils::generate_test_samples(2, 2, 2, 2); client.insert_samples(&samples).await?; async fn assert_table_count( @@ -2631,7 +2630,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = test_util::generate_test_samples(2, 2, 2, 2); + let samples = oximeter_test_utils::generate_test_samples(2, 2, 2, 2); client.insert_samples(&samples).await?; let original_schema = client.schema.lock().await.clone(); @@ -2656,7 +2655,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = test_util::generate_test_samples(2, 2, 2, 2); + let samples = oximeter_test_utils::generate_test_samples(2, 2, 2, 2); client.insert_samples(&samples).await?; let limit = 100u32.try_into().unwrap(); @@ -2691,7 +2690,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = test_util::generate_test_samples(2, 2, 2, 2); + let samples = oximeter_test_utils::generate_test_samples(2, 2, 2, 2); client.insert_samples(&samples).await?; let limit = 7u32.try_into().unwrap(); @@ -3364,7 +3363,7 @@ mod tests { // The values here don't matter much, we just want to check that // the database data hasn't been dropped. assert_eq!(0, get_schema_count(&client).await); - let sample = test_util::make_sample(); + let sample = oximeter_test_utils::make_sample(); client.insert_samples(&[sample.clone()]).await.unwrap(); assert_eq!(1, get_schema_count(&client).await); @@ -3438,7 +3437,7 @@ mod tests { // The values here don't matter much, we just want to check that // the database data gets dropped later. assert_eq!(0, get_schema_count(&client).await); - let sample = test_util::make_sample(); + let sample = oximeter_test_utils::make_sample(); client.insert_samples(&[sample.clone()]).await.unwrap(); assert_eq!(1, get_schema_count(&client).await); @@ -3464,7 +3463,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = [test_util::make_sample()]; + let samples = [oximeter_test_utils::make_sample()]; client.insert_samples(&samples).await.unwrap(); // Get the count of schema directly from the DB, which should have just @@ -3549,7 +3548,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = [test_util::make_sample()]; + let samples = [oximeter_test_utils::make_sample()]; // We're using the components of the `insert_samples()` method here, // which has been refactored explicitly for this test. We need to insert diff --git a/oximeter/db/src/client/oxql.rs b/oximeter/db/src/client/oxql.rs index 29586b8189..4005fa873e 100644 --- a/oximeter/db/src/client/oxql.rs +++ b/oximeter/db/src/client/oxql.rs @@ -18,7 +18,7 @@ use crate::query::field_table_name; use crate::Error; use crate::Metric; use crate::Target; -use crate::TimeseriesKey; +use oximeter::schema::TimeseriesKey; use oximeter::TimeseriesSchema; use slog::debug; use slog::trace; @@ -68,7 +68,7 @@ pub struct OxqlResult { pub query_summaries: Vec, /// The list of OxQL tables returned from the query. - pub tables: Vec, + pub tables: Vec, } /// The maximum number of data values fetched from the database for an OxQL @@ -479,7 +479,9 @@ impl Client { query_id, total_duration: query_start.elapsed(), query_summaries, - tables: vec![oxql::Table::new(schema.timeseries_name.as_str())], + tables: vec![oxql_types::Table::new( + schema.timeseries_name.as_str(), + )], }; return Ok(result); } @@ -503,7 +505,7 @@ impl Client { // At this point, let's construct a set of tables and run the results // through the transformation pipeline. - let mut tables = vec![oxql::Table::from_timeseries( + let mut tables = vec![oxql_types::Table::from_timeseries( schema.timeseries_name.as_str(), timeseries_by_key.into_values(), )?]; @@ -553,7 +555,7 @@ impl Client { limit: Option, total_rows_fetched: &mut u64, ) -> Result< - (Vec, BTreeMap), + (Vec, BTreeMap), Error, > { // We'll create timeseries for each key on the fly. To enable computing @@ -624,25 +626,25 @@ impl Client { for (key, measurements) in measurements_by_key.into_iter() { // Constuct a new timeseries, from the target/metric info. let (target, metric) = info.get(&key).unwrap(); - let mut timeseries = oxql::Timeseries::new( + let mut timeseries = oxql_types::Timeseries::new( target .fields .iter() .chain(metric.fields.iter()) .map(|field| (field.name.clone(), field.value.clone())), - oxql::point::DataType::try_from(schema.datum_type)?, + oxql_types::point::DataType::try_from(schema.datum_type)?, if schema.datum_type.is_cumulative() { - oxql::point::MetricType::Delta + oxql_types::point::MetricType::Delta } else { - oxql::point::MetricType::Gauge + oxql_types::point::MetricType::Gauge }, )?; // Covert its oximeter measurements into OxQL data types. let points = if schema.datum_type.is_cumulative() { - oxql::point::Points::delta_from_cumulative(&measurements)? + oxql_types::point::Points::delta_from_cumulative(&measurements)? } else { - oxql::point::Points::gauge_from_gauge(&measurements)? + oxql_types::point::Points::gauge_from_gauge(&measurements)? }; timeseries.points = points; debug!( @@ -1108,10 +1110,7 @@ fn update_total_rows_and_check( mod tests { use super::ConsistentKeyGroup; use crate::client::oxql::chunk_consistent_key_groups_impl; - use crate::{ - oxql::{point::Points, Table, Timeseries}, - Client, DbWrite, - }; + use crate::{Client, DbWrite}; use crate::{Metric, Target}; use chrono::{DateTime, Utc}; use dropshot::test_util::LogContext; @@ -1119,6 +1118,7 @@ mod tests { use omicron_test_utils::dev::test_setup_log; use oximeter::{types::Cumulative, FieldValue}; use oximeter::{DatumType, Sample}; + use oxql_types::{point::Points, Table, Timeseries}; use std::collections::BTreeMap; use std::time::Duration; diff --git a/oximeter/db/src/lib.rs b/oximeter/db/src/lib.rs index 9ad382c97d..5d56d802c9 100644 --- a/oximeter/db/src/lib.rs +++ b/oximeter/db/src/lib.rs @@ -14,6 +14,7 @@ use dropshot::EmptyScanParams; use dropshot::PaginationParams; pub use oximeter::schema::FieldSchema; pub use oximeter::schema::FieldSource; +use oximeter::schema::TimeseriesKey; pub use oximeter::schema::TimeseriesName; pub use oximeter::schema::TimeseriesSchema; pub use oximeter::DatumType; @@ -267,8 +268,6 @@ pub async fn make_client( Ok(client) } -pub(crate) type TimeseriesKey = u64; - // TODO-cleanup: Add the timeseries version in to the computation of the key. // This will require a full drop of the database, since we're changing the // sorting key and the timeseries key on each past sample. See diff --git a/oximeter/db/src/model.rs b/oximeter/db/src/model.rs index f27df4ed49..a3e9d109ff 100644 --- a/oximeter/db/src/model.rs +++ b/oximeter/db/src/model.rs @@ -11,13 +11,13 @@ use crate::FieldSchema; use crate::FieldSource; use crate::Metric; use crate::Target; -use crate::TimeseriesKey; use crate::TimeseriesSchema; use bytes::Bytes; use chrono::DateTime; use chrono::Utc; use num::traits::Zero; use oximeter::histogram::Histogram; +use oximeter::schema::TimeseriesKey; use oximeter::traits; use oximeter::types::Cumulative; use oximeter::types::Datum; @@ -45,7 +45,7 @@ use uuid::Uuid; /// - [`crate::Client::initialize_db_with_version`] /// - [`crate::Client::ensure_schema`] /// - The `clickhouse-schema-updater` binary in this crate -pub const OXIMETER_VERSION: u64 = 9; +pub const OXIMETER_VERSION: u64 = 11; // Wrapper type to represent a boolean in the database. // @@ -1880,7 +1880,6 @@ mod tests { use super::*; use chrono::Timelike; use oximeter::histogram::Record; - use oximeter::test_util; use oximeter::Datum; #[test] @@ -1983,7 +1982,7 @@ mod tests { #[test] fn test_unroll_from_source() { - let sample = test_util::make_sample(); + let sample = oximeter_test_utils::make_sample(); let out = unroll_from_source(&sample); assert_eq!(out["oximeter.fields_string"].len(), 2); assert_eq!(out["oximeter.fields_i64"].len(), 1); @@ -2003,8 +2002,8 @@ mod tests { // datum. #[test] fn test_unroll_missing_measurement_row() { - let sample = test_util::make_sample(); - let missing_sample = test_util::make_missing_sample(); + let sample = oximeter_test_utils::make_sample(); + let missing_sample = oximeter_test_utils::make_missing_sample(); let (table_name, row) = unroll_measurement_row(&sample); let (missing_table_name, missing_row) = unroll_measurement_row(&missing_sample); @@ -2022,7 +2021,7 @@ mod tests { #[test] fn test_unroll_measurement_row() { - let sample = test_util::make_hist_sample(); + let sample = oximeter_test_utils::make_hist_sample(); let (table_name, row) = unroll_measurement_row(&sample); assert_eq!(table_name, "oximeter.measurements_histogramf64"); let unpacked: HistogramF64MeasurementRow = diff --git a/oximeter/db/src/oxql/ast/grammar.rs b/oximeter/db/src/oxql/ast/grammar.rs index a7585402b6..62182ec553 100644 --- a/oximeter/db/src/oxql/ast/grammar.rs +++ b/oximeter/db/src/oxql/ast/grammar.rs @@ -189,11 +189,11 @@ peg::parser! { rule dashed_uuid_literal() -> Uuid = s:$( "\"" - ['a'..='f' | '0'..='9']*<8> "-" - ['a'..='f' | '0'..='9']*<4> "-" - ['a'..='f' | '0'..='9']*<4> "-" - ['a'..='f' | '0'..='9']*<4> "-" - ['a'..='f' | '0'..='9']*<12> + ['a'..='f' | 'A'..='F' | '0'..='9']*<8> "-" + ['a'..='f' | 'A'..='F' | '0'..='9']*<4> "-" + ['a'..='f' | 'A'..='F' | '0'..='9']*<4> "-" + ['a'..='f' | 'A'..='F' | '0'..='9']*<4> "-" + ['a'..='f' | 'A'..='F' | '0'..='9']*<12> "\"" ) {? let Some(middle) = s.get(1..37) else { @@ -202,7 +202,7 @@ peg::parser! { middle.parse().or(Err("invalid UUID literal")) } rule undashed_uuid_literal() -> Uuid - = s:$("\"" ['a'..='f' | '0'..='9']*<32> "\"") {? + = s:$("\"" ['a'..='f' | 'A'..='F' | '0'..='9']*<32> "\"") {? let Some(middle) = s.get(1..33) else { return Err("invalid UUID literal"); }; @@ -279,11 +279,27 @@ peg::parser! { pub rule string_literal() -> Literal = s:string_literal_impl() { Literal::String(s) } + pub(super) rule hex_integer_literal_impl() -> i128 + = n:$("0x" ['0'..='9' | 'a'..='f' | 'A'..='F']+ !['.']) + {? + let Some((maybe_sign, digits)) = n.split_once("0x") else { + return Err("hex literals should start with '0x'"); + }; + i128::from_str_radix(digits, 16).map_err(|_| "invalid hex literal") + } + + pub(super) rule dec_integer_literal_impl() -> i128 + = n:$(['0'..='9']+ !['e' | 'E' | '.']) + {? + n.parse().map_err(|_| "integer literal") + } + pub(super) rule integer_literal_impl() -> i128 - = n:$("-"? ['0'..='9']+ !['e' | 'E' | '.']) + = maybe_sign:$("-"?) n:(hex_integer_literal_impl() / dec_integer_literal_impl()) {? - let Ok(x) = n.parse() else { - return Err("integer literal"); + let sign = if maybe_sign == "-" { -1 } else { 1 }; + let Some(x) = n.checked_mul(sign) else { + return Err("negative overflow"); }; if x < i128::from(i64::MIN) { Err("negative overflow") @@ -734,17 +750,49 @@ mod tests { .is_err()); } + #[test] + fn test_uuid_literal_is_case_insensitive() { + const ID: Uuid = uuid::uuid!("880D82A1-102F-4699-BE1A-7E2A6A469E8E"); + let as_str = format!("\"{ID}\""); + let as_lower = as_str.to_lowercase(); + assert_eq!(query_parser::uuid_literal_impl(&as_str).unwrap(), ID,); + assert_eq!(query_parser::uuid_literal_impl(&as_lower).unwrap(), ID,); + } + #[test] fn test_integer_literal() { assert_eq!(query_parser::integer_literal_impl("1").unwrap(), 1); assert_eq!(query_parser::integer_literal_impl("-1").unwrap(), -1); - assert_eq!(query_parser::integer_literal_impl("-1").unwrap(), -1); assert!(query_parser::integer_literal_impl("-1.0").is_err()); assert!(query_parser::integer_literal_impl("-1.").is_err()); assert!(query_parser::integer_literal_impl("1e3").is_err()); } + #[test] + fn test_hex_integer_literal() { + assert_eq!(query_parser::integer_literal_impl("0x1").unwrap(), 1); + assert_eq!(query_parser::integer_literal_impl("-0x1").unwrap(), -1); + assert_eq!(query_parser::integer_literal_impl("-0xa").unwrap(), -0xa); + assert_eq!( + query_parser::integer_literal_impl("0xfeed").unwrap(), + 0xfeed + ); + assert_eq!( + query_parser::integer_literal_impl("0xFEED").unwrap(), + 0xfeed + ); + + // Out of range in either direction + assert!(query_parser::integer_literal_impl("0xFFFFFFFFFFFFFFFFFFFF") + .is_err()); + assert!(query_parser::integer_literal_impl("-0xFFFFFFFFFFFFFFFFFFFF") + .is_err()); + + assert!(query_parser::integer_literal_impl("-0x1.0").is_err()); + assert!(query_parser::integer_literal_impl("-0x1.").is_err()); + } + #[test] fn test_double_literal() { assert_eq!(query_parser::double_literal_impl("1.0").unwrap(), 1.0); diff --git a/oximeter/db/src/oxql/ast/table_ops/align.rs b/oximeter/db/src/oxql/ast/table_ops/align.rs index cf54ebc312..b0cd7d80f1 100644 --- a/oximeter/db/src/oxql/ast/table_ops/align.rs +++ b/oximeter/db/src/oxql/ast/table_ops/align.rs @@ -6,19 +6,19 @@ // Copyright 2024 Oxide Computer Company -use crate::oxql::point::DataType; -use crate::oxql::point::MetricType; -use crate::oxql::point::Points; -use crate::oxql::point::ValueArray; -use crate::oxql::point::Values; -use crate::oxql::query::Alignment; -use crate::oxql::Error; -use crate::oxql::Table; -use crate::oxql::Timeseries; use anyhow::Context; +use anyhow::Error; use chrono::DateTime; use chrono::TimeDelta; use chrono::Utc; +use oxql_types::point::DataType; +use oxql_types::point::MetricType; +use oxql_types::point::Points; +use oxql_types::point::ValueArray; +use oxql_types::point::Values; +use oxql_types::Alignment; +use oxql_types::Table; +use oxql_types::Timeseries; use std::time::Duration; // The maximum factor by which an alignment operation may upsample data. @@ -144,7 +144,7 @@ fn align_mean_within( "Alignment by mean requires a gauge or delta metric, not {}", metric_type, ); - verify_max_upsampling_ratio(&points.timestamps, &period)?; + verify_max_upsampling_ratio(points.timestamps(), &period)?; // Always convert the output to doubles, when computing the mean. The // output is always a gauge, so we do not need the start times of the @@ -179,7 +179,7 @@ fn align_mean_within( // - Compute the mean of those. let period_ = TimeDelta::from_std(*period).context("time delta out of range")?; - let first_timestamp = points.timestamps[0]; + let first_timestamp = points.timestamps()[0]; let mut ix: u32 = 0; loop { // Compute the next output timestamp, by shifting the query end time @@ -220,15 +220,15 @@ fn align_mean_within( // entries. let output_value = if matches!(metric_type, MetricType::Gauge) { mean_gauge_value_in_window( - &points.timestamps, + points.timestamps(), &input_points, window_start, output_time, ) } else { mean_delta_value_in_window( - points.start_times.as_ref().unwrap(), - &points.timestamps, + points.start_times().unwrap(), + points.timestamps(), &input_points, window_start, output_time, @@ -255,10 +255,9 @@ fn align_mean_within( ValueArray::Double(output_values.into_iter().rev().collect()); let timestamps = output_timestamps.into_iter().rev().collect(); let values = Values { values, metric_type: MetricType::Gauge }; - new_timeseries.points = - Points { start_times: None, timestamps, values: vec![values] }; - new_timeseries.alignment = - Some(Alignment { end_time: *query_end, period: *period }); + new_timeseries.points = Points::new(None, timestamps, vec![values]); + new_timeseries + .set_alignment(Alignment { end_time: *query_end, period: *period }); output_table.insert(new_timeseries).unwrap(); } Ok(output_table) diff --git a/oximeter/db/src/oxql/ast/table_ops/filter.rs b/oximeter/db/src/oxql/ast/table_ops/filter.rs index b6fc533e4d..ad398da983 100644 --- a/oximeter/db/src/oxql/ast/table_ops/filter.rs +++ b/oximeter/db/src/oxql/ast/table_ops/filter.rs @@ -12,18 +12,18 @@ use crate::oxql::ast::literal::Literal; use crate::oxql::ast::logical_op::LogicalOp; use crate::oxql::ast::table_ops::limit::Limit; use crate::oxql::ast::table_ops::limit::LimitKind; -use crate::oxql::point::DataType; -use crate::oxql::point::MetricType; -use crate::oxql::point::Points; -use crate::oxql::point::ValueArray; use crate::oxql::Error; -use crate::oxql::Table; -use crate::oxql::Timeseries; use crate::shells::special_idents; use chrono::DateTime; use chrono::Utc; use oximeter::FieldType; use oximeter::FieldValue; +use oxql_types::point::DataType; +use oxql_types::point::MetricType; +use oxql_types::point::Points; +use oxql_types::point::ValueArray; +use oxql_types::Table; +use oxql_types::Timeseries; use regex::Regex; use std::collections::BTreeSet; use std::fmt; @@ -340,16 +340,13 @@ impl Filter { // Apply the filter to the data points as well. let points = self.filter_points(&input.points)?; - // Similar to above, if the filter removes all data points in - // the timeseries, let's remove the timeseries altogether. - if points.is_empty() { - continue; + if let Some(new_timeseries) = input.copy_with_points(points) { + timeseries.push(new_timeseries); + } else { + // None means that the filter removed all data points in + // the timeseries. In that case, we remove the timeseries + // altogether. } - timeseries.push(Timeseries { - fields: input.fields.clone(), - points, - alignment: input.alignment, - }) } output_tables.push(Table::from_timeseries( table.name(), @@ -823,7 +820,7 @@ impl SimpleFilter { ) -> Result, Error> { let ident = self.ident.as_str(); if ident == "timestamp" { - self.filter_points_by_timestamp(negated, &points.timestamps) + self.filter_points_by_timestamp(negated, points.timestamps()) } else if ident == "datum" { anyhow::ensure!( points.dimensionality() == 1, @@ -1151,15 +1148,15 @@ impl SimpleFilter { mod tests { use crate::oxql::ast::grammar::query_parser; use crate::oxql::ast::logical_op::LogicalOp; - use crate::oxql::point::DataType; - use crate::oxql::point::MetricType; - use crate::oxql::point::Points; - use crate::oxql::point::ValueArray; - use crate::oxql::point::Values; - use crate::oxql::Table; - use crate::oxql::Timeseries; use chrono::Utc; use oximeter::FieldValue; + use oxql_types::point::DataType; + use oxql_types::point::MetricType; + use oxql_types::point::Points; + use oxql_types::point::ValueArray; + use oxql_types::point::Values; + use oxql_types::Table; + use oxql_types::Timeseries; use std::time::Duration; use uuid::Uuid; @@ -1172,7 +1169,7 @@ mod tests { values: ValueArray::Double(vec![Some(0.0), Some(2.0)]), metric_type: MetricType::Gauge, }]; - let points = Points { start_times, timestamps, values }; + let points = Points::new(start_times, timestamps, values); // This filter should remove the first point based on its timestamp. let t = Utc::now() + Duration::from_secs(10); @@ -1205,7 +1202,7 @@ mod tests { values: ValueArray::Double(vec![Some(0.0), Some(2.0)]), metric_type: MetricType::Gauge, }]; - let points = Points { start_times, timestamps, values }; + let points = Points::new(start_times, timestamps, values); let filter = query_parser::filter("filter datum < \"something\"").unwrap(); diff --git a/oximeter/db/src/oxql/ast/table_ops/group_by.rs b/oximeter/db/src/oxql/ast/table_ops/group_by.rs index f40572d762..c48804a788 100644 --- a/oximeter/db/src/oxql/ast/table_ops/group_by.rs +++ b/oximeter/db/src/oxql/ast/table_ops/group_by.rs @@ -10,13 +10,13 @@ use chrono::DateTime; use chrono::Utc; use crate::oxql::ast::ident::Ident; -use crate::oxql::point::DataType; -use crate::oxql::point::MetricType; -use crate::oxql::point::ValueArray; -use crate::oxql::Error; -use crate::oxql::Table; -use crate::oxql::Timeseries; -use crate::TimeseriesKey; +use anyhow::Error; +use oximeter::schema::TimeseriesKey; +use oxql_types::point::DataType; +use oxql_types::point::MetricType; +use oxql_types::point::ValueArray; +use oxql_types::Table; +use oxql_types::Timeseries; use std::collections::btree_map::Entry; use std::collections::BTreeMap; @@ -98,7 +98,7 @@ impl GroupBy { ValueArray::Double(new_values), ValueArray::Double(existing_values), ) => { - let new_timestamps = &dropped.points.timestamps; + let new_timestamps = dropped.points.timestamps(); // We will be merging the new data with the // existing, but borrow-checking limits the degree @@ -106,7 +106,7 @@ impl GroupBy { // entry in the output table. Instead, aggregate // everything into a copy of the expected data. let mut timestamps = - existing.points.timestamps.clone(); + existing.points.timestamps().to_owned(); let mut values = existing_values.clone(); // Merge in the new values, so long as they actually @@ -152,10 +152,7 @@ impl GroupBy { // Replace the existing output timeseries's // timestamps and data arrays. - std::mem::swap( - &mut existing.points.timestamps, - &mut timestamps, - ); + existing.points.set_timestamps(timestamps); existing .points .values_mut(0) @@ -166,7 +163,7 @@ impl GroupBy { ValueArray::Integer(new_values), ValueArray::Integer(existing_values), ) => { - let new_timestamps = &dropped.points.timestamps; + let new_timestamps = dropped.points.timestamps(); // We will be merging the new data with the // existing, but borrow-checking limits the degree @@ -174,7 +171,7 @@ impl GroupBy { // entry in the output table. Instead, aggregate // everything into a copy of the expected data. let mut timestamps = - existing.points.timestamps.clone(); + existing.points.timestamps().to_owned(); let mut values = existing_values.clone(); // Merge in the new values, so long as they actually @@ -220,10 +217,7 @@ impl GroupBy { // Replace the existing output timeseries's // timestamps and data arrays. - std::mem::swap( - &mut existing.points.timestamps, - &mut timestamps, - ); + existing.points.set_timestamps(timestamps); existing .points .values_mut(0) @@ -286,14 +280,15 @@ impl GroupBy { else { unreachable!(); }; - let new_timestamps = &new_points.timestamps; + let new_timestamps = new_points.timestamps(); // We will be merging the new data with the // existing, but borrow-checking limits the degree // to which we can easily do this on the `existing` // entry in the output table. Instead, aggregate // everything into a copy of the expected data. - let mut timestamps = existing.points.timestamps.clone(); + let mut timestamps = + existing.points.timestamps().to_owned(); let mut values = existing .points .values(0) @@ -360,10 +355,7 @@ impl GroupBy { // Replace the existing output timeseries's // timestamps and data arrays. - std::mem::swap( - &mut existing.points.timestamps, - &mut timestamps, - ); + existing.points.set_timestamps(timestamps); existing .points .values_mut(0) @@ -388,7 +380,7 @@ impl GroupBy { // _zero_ for any where the values are none. let counts = new_timeseries .points - .timestamps + .timestamps() .iter() .zip(values) .map(|(timestamp, maybe_value)| { @@ -434,16 +426,16 @@ pub enum Reducer { #[cfg(test)] mod tests { use super::{GroupBy, Reducer}; - use crate::oxql::{ - ast::{ - ident::Ident, - table_ops::align::{Align, AlignmentMethod}, - }, - point::{DataType, MetricType, ValueArray}, - Table, Timeseries, + use crate::oxql::ast::{ + ident::Ident, + table_ops::align::{Align, AlignmentMethod}, }; use chrono::{DateTime, Utc}; use oximeter::FieldValue; + use oxql_types::{ + point::{DataType, MetricType, ValueArray}, + Table, Timeseries, + }; use std::{collections::BTreeMap, time::Duration}; // Which timeseries the second data point is missing from. @@ -495,8 +487,8 @@ mod tests { MetricType::Gauge, ) .unwrap(); - ts0.points.start_times = None; - ts0.points.timestamps.clone_from(×tamps); + ts0.points.clear_start_times(); + ts0.points.set_timestamps(timestamps.clone()); *ts0.points.values_mut(0).unwrap() = ValueArray::Double(vec![ Some(1.0), if matches!( @@ -527,7 +519,7 @@ mod tests { MetricType::Gauge, ) .unwrap(); - ts1.points.start_times = None; + ts1.points.clear_start_times(); // Non-overlapping in this test setup means that we just shift one // value from this array backward in time by one additional second. @@ -538,7 +530,7 @@ mod tests { // // When reducing, t0 is never changed, and t1-t2 are always reduced // together, if the values are present. - ts1.points.timestamps = if cfg.overlapping_times { + let new_timestamps = if cfg.overlapping_times { timestamps.clone() } else { let mut new_timestamps = timestamps.clone(); @@ -546,6 +538,7 @@ mod tests { timestamps.insert(0, new_timestamps[0]); new_timestamps }; + ts1.points.set_timestamps(new_timestamps); *ts1.points.values_mut(0).unwrap() = ValueArray::Double(vec![ Some(2.0), if matches!(cfg.missing_value, MissingValue::Both) { @@ -604,11 +597,13 @@ mod tests { let points = &grouped_timeseries.points; assert_eq!(points.dimensionality(), 1, "Points should still be 1D"); assert_eq!( - points.start_times, None, + points.start_times(), + None, "Points should not have start times" ); assert_eq!( - points.timestamps, test.timestamps, + points.timestamps(), + test.timestamps, "Points do not have correct timestamps" ); diff --git a/oximeter/db/src/oxql/ast/table_ops/join.rs b/oximeter/db/src/oxql/ast/table_ops/join.rs index 3c150a4acf..2893f6cf3e 100644 --- a/oximeter/db/src/oxql/ast/table_ops/join.rs +++ b/oximeter/db/src/oxql/ast/table_ops/join.rs @@ -6,12 +6,10 @@ // Copyright 2024 Oxide Computer Company -use crate::oxql::point::MetricType; -use crate::oxql::point::Points; -use crate::oxql::point::Values; -use crate::oxql::Error; -use crate::oxql::Table; use anyhow::Context; +use anyhow::Error; +use oxql_types::point::MetricType; +use oxql_types::Table; /// An AST node for a natural inner join. #[derive(Clone, Copy, Debug, PartialEq)] @@ -80,10 +78,8 @@ impl Join { // 1. They have the same alignment, and // 2. We merge the timepoints rather than simply creating a // ragged array of points. - timeseries.points = inner_join_point_arrays( - ×eries.points, - &next_timeseries.points, - )?; + timeseries.points = + timeseries.points.inner_join(&next_timeseries.points)?; } // We'll also update the name, to indicate the joined data. out.name.push(','); @@ -93,101 +89,6 @@ impl Join { } } -// Given two arrays of points, stack them together at matching timepoints. -// -// For time points in either which do not have a corresponding point in the -// other, the entire time point is elided. -fn inner_join_point_arrays( - left: &Points, - right: &Points, -) -> Result { - // Create an output array with roughly the right capacity, and double the - // number of dimensions. We're trying to stack output value arrays together - // along the dimension axis. - let data_types = - left.data_types().chain(right.data_types()).collect::>(); - let metric_types = - left.metric_types().chain(right.metric_types()).collect::>(); - let mut out = Points::with_capacity( - left.len().max(right.len()), - data_types.iter().copied(), - metric_types.iter().copied(), - )?; - - // Iterate through each array until one is exhausted. We're only inserting - // values from both arrays where the timestamps actually match, since this - // is an inner join. We may want to insert missing values where timestamps - // do not match on either side, when we support an outer join of some kind. - let n_left_dim = left.values.len(); - let mut left_ix = 0; - let mut right_ix = 0; - while left_ix < left.len() && right_ix < right.len() { - let left_timestamp = left.timestamps[left_ix]; - let right_timestamp = right.timestamps[right_ix]; - if left_timestamp == right_timestamp { - out.timestamps.push(left_timestamp); - push_concrete_values( - &mut out.values[..n_left_dim], - &left.values, - left_ix, - ); - push_concrete_values( - &mut out.values[n_left_dim..], - &right.values, - right_ix, - ); - left_ix += 1; - right_ix += 1; - } else if left_timestamp < right_timestamp { - left_ix += 1; - } else { - right_ix += 1; - } - } - Ok(out) -} - -// Push the `i`th value from each dimension of `from` onto `to`. -fn push_concrete_values(to: &mut [Values], from: &[Values], i: usize) { - assert_eq!(to.len(), from.len()); - for (output, input) in to.iter_mut().zip(from.iter()) { - let input_array = &input.values; - let output_array = &mut output.values; - assert_eq!(input_array.data_type(), output_array.data_type()); - if let Ok(ints) = input_array.as_integer() { - output_array.as_integer_mut().unwrap().push(ints[i]); - continue; - } - if let Ok(doubles) = input_array.as_double() { - output_array.as_double_mut().unwrap().push(doubles[i]); - continue; - } - if let Ok(bools) = input_array.as_boolean() { - output_array.as_boolean_mut().unwrap().push(bools[i]); - continue; - } - if let Ok(strings) = input_array.as_string() { - output_array.as_string_mut().unwrap().push(strings[i].clone()); - continue; - } - if let Ok(dists) = input_array.as_integer_distribution() { - output_array - .as_integer_distribution_mut() - .unwrap() - .push(dists[i].clone()); - continue; - } - if let Ok(dists) = input_array.as_double_distribution() { - output_array - .as_double_distribution_mut() - .unwrap() - .push(dists[i].clone()); - continue; - } - unreachable!(); - } -} - // Return an error if any metric types are not suitable for joining. fn ensure_all_metric_types( mut metric_types: impl ExactSizeIterator, @@ -200,186 +101,3 @@ fn ensure_all_metric_types( ); Ok(()) } - -#[cfg(test)] -mod tests { - use super::*; - use crate::oxql::point::DataType; - use crate::oxql::point::Datum; - use crate::oxql::point::ValueArray; - use chrono::Utc; - use std::time::Duration; - - #[test] - fn test_push_concrete_values() { - let mut points = Points::with_capacity( - 2, - [DataType::Integer, DataType::Double].into_iter(), - [MetricType::Gauge, MetricType::Gauge].into_iter(), - ) - .unwrap(); - - // Push a concrete value for the integer dimension - let from_ints = vec![Values { - values: ValueArray::Integer(vec![Some(1)]), - metric_type: MetricType::Gauge, - }]; - push_concrete_values(&mut points.values[..1], &from_ints, 0); - - // And another for the double dimension. - let from_doubles = vec![Values { - values: ValueArray::Double(vec![Some(2.0)]), - metric_type: MetricType::Gauge, - }]; - push_concrete_values(&mut points.values[1..], &from_doubles, 0); - - assert_eq!( - points.dimensionality(), - 2, - "Points should have 2 dimensions", - ); - let ints = points.values[0].values.as_integer().unwrap(); - assert_eq!( - ints.len(), - 1, - "Should have pushed one point in the first dimension" - ); - assert_eq!( - ints[0], - Some(1), - "Should have pushed 1 onto the first dimension" - ); - let doubles = points.values[1].values.as_double().unwrap(); - assert_eq!( - doubles.len(), - 1, - "Should have pushed one point in the second dimension" - ); - assert_eq!( - doubles[0], - Some(2.0), - "Should have pushed 2.0 onto the second dimension" - ); - } - - #[test] - fn test_join_point_arrays() { - let now = Utc::now(); - - // Create a set of integer points to join with. - // - // This will have two timestamps, one of which will match the points - // below that are merged in. - let int_points = Points { - start_times: None, - timestamps: vec![ - now - Duration::from_secs(3), - now - Duration::from_secs(2), - now, - ], - values: vec![Values { - values: ValueArray::Integer(vec![Some(1), Some(2), Some(3)]), - metric_type: MetricType::Gauge, - }], - }; - - // Create an additional set of double points. - // - // This also has two timepoints, one of which matches with the above, - // and one of which does not. - let double_points = Points { - start_times: None, - timestamps: vec![ - now - Duration::from_secs(3), - now - Duration::from_secs(1), - now, - ], - values: vec![Values { - values: ValueArray::Double(vec![ - Some(4.0), - Some(5.0), - Some(6.0), - ]), - metric_type: MetricType::Gauge, - }], - }; - - // Merge the arrays. - let merged = - inner_join_point_arrays(&int_points, &double_points).unwrap(); - - // Basic checks that we merged in the right values and have the right - // types and dimensions. - assert_eq!( - merged.dimensionality(), - 2, - "Should have appended the dimensions from each input array" - ); - assert_eq!(merged.len(), 2, "Should have merged two common points",); - assert_eq!( - merged.data_types().collect::>(), - &[DataType::Integer, DataType::Double], - "Should have combined the data types of the input arrays" - ); - assert_eq!( - merged.metric_types().collect::>(), - &[MetricType::Gauge, MetricType::Gauge], - "Should have combined the metric types of the input arrays" - ); - - // Check the actual values of the array. - let mut points = merged.iter_points(); - - // The first and last timepoint overlapped between the two arrays, so we - // should have both of them as concrete samples. - let pt = points.next().unwrap(); - assert_eq!(pt.start_time, None, "Gauges don't have a start time"); - assert_eq!( - *pt.timestamp, int_points.timestamps[0], - "Should have taken the first input timestamp from both arrays", - ); - assert_eq!( - *pt.timestamp, double_points.timestamps[0], - "Should have taken the first input timestamp from both arrays", - ); - let values = pt.values; - assert_eq!(values.len(), 2, "Should have 2 dimensions"); - assert_eq!( - &values[0], - &(Datum::Integer(Some(&1)), MetricType::Gauge), - "Should have pulled value from first integer array." - ); - assert_eq!( - &values[1], - &(Datum::Double(Some(&4.0)), MetricType::Gauge), - "Should have pulled value from second double array." - ); - - // And the next point - let pt = points.next().unwrap(); - assert_eq!(pt.start_time, None, "Gauges don't have a start time"); - assert_eq!( - *pt.timestamp, int_points.timestamps[2], - "Should have taken the input timestamp from both arrays", - ); - assert_eq!( - *pt.timestamp, double_points.timestamps[2], - "Should have taken the input timestamp from both arrays", - ); - let values = pt.values; - assert_eq!(values.len(), 2, "Should have 2 dimensions"); - assert_eq!( - &values[0], - &(Datum::Integer(Some(&3)), MetricType::Gauge), - "Should have pulled value from first integer array." - ); - assert_eq!( - &values[1], - &(Datum::Double(Some(&6.0)), MetricType::Gauge), - "Should have pulled value from second double array." - ); - - // And there should be no other values. - assert!(points.next().is_none(), "There should be no more points"); - } -} diff --git a/oximeter/db/src/oxql/ast/table_ops/limit.rs b/oximeter/db/src/oxql/ast/table_ops/limit.rs index 0205868f5c..89afb31a7c 100644 --- a/oximeter/db/src/oxql/ast/table_ops/limit.rs +++ b/oximeter/db/src/oxql/ast/table_ops/limit.rs @@ -6,12 +6,8 @@ // Copyright 2024 Oxide Computer Company -use crate::oxql::point::Points; -use crate::oxql::point::ValueArray; -use crate::oxql::point::Values; -use crate::oxql::Error; -use crate::oxql::Table; -use crate::oxql::Timeseries; +use anyhow::Error; +use oxql_types::Table; use std::num::NonZeroUsize; /// The kind of limiting operation @@ -65,58 +61,7 @@ impl Limit { } }; - // Slice the various data arrays. - let start_times = input_points - .start_times - .as_ref() - .map(|s| s[start..end].to_vec()); - let timestamps = - input_points.timestamps[start..end].to_vec(); - let values = input_points - .values - .iter() - .map(|vals| { - let values = match &vals.values { - ValueArray::Integer(inner) => { - ValueArray::Integer( - inner[start..end].to_vec(), - ) - } - ValueArray::Double(inner) => { - ValueArray::Double( - inner[start..end].to_vec(), - ) - } - ValueArray::Boolean(inner) => { - ValueArray::Boolean( - inner[start..end].to_vec(), - ) - } - ValueArray::String(inner) => { - ValueArray::String( - inner[start..end].to_vec(), - ) - } - ValueArray::IntegerDistribution(inner) => { - ValueArray::IntegerDistribution( - inner[start..end].to_vec(), - ) - } - ValueArray::DoubleDistribution(inner) => { - ValueArray::DoubleDistribution( - inner[start..end].to_vec(), - ) - } - }; - Values { values, metric_type: vals.metric_type } - }) - .collect(); - let points = Points { start_times, timestamps, values }; - Timeseries { - fields: timeseries.fields.clone(), - points, - alignment: timeseries.alignment, - } + timeseries.limit(start, end) }); Table::from_timeseries(table.name(), timeseries) }) @@ -127,9 +72,12 @@ impl Limit { #[cfg(test)] mod tests { use super::*; - use crate::oxql::point::{DataType, MetricType}; use chrono::Utc; use oximeter::FieldValue; + use oxql_types::{ + point::{DataType, MetricType}, + Timeseries, + }; use std::{collections::BTreeMap, time::Duration}; fn test_tables() -> Vec { @@ -150,12 +98,14 @@ mod tests { MetricType::Gauge, ) .unwrap(); - timeseries.points.timestamps.clone_from(×tamps); - timeseries.points.values[0].values.as_integer_mut().unwrap().extend([ - Some(1), - Some(2), - Some(3), - ]); + timeseries.points.set_timestamps(timestamps.clone()); + timeseries + .points + .values_mut(0) + .unwrap() + .as_integer_mut() + .unwrap() + .extend([Some(1), Some(2), Some(3)]); let table1 = Table::from_timeseries("first", std::iter::once(timeseries)) .unwrap(); @@ -166,12 +116,14 @@ mod tests { MetricType::Gauge, ) .unwrap(); - timeseries.points.timestamps.clone_from(×tamps); - timeseries.points.values[0].values.as_integer_mut().unwrap().extend([ - Some(4), - Some(5), - Some(6), - ]); + timeseries.points.set_timestamps(timestamps.clone()); + timeseries + .points + .values_mut(0) + .unwrap() + .as_integer_mut() + .unwrap() + .extend([Some(4), Some(5), Some(6)]); let table2 = Table::from_timeseries("second", std::iter::once(timeseries)) .unwrap(); @@ -223,7 +175,8 @@ mod tests { "Limited table should have the same fields" ); assert_eq!( - timeseries.alignment, limited_timeseries.alignment, + timeseries.alignment(), + limited_timeseries.alignment(), "Limited timeseries should have the same alignment" ); assert_eq!( @@ -237,14 +190,15 @@ mod tests { // These depend on the limit operation. let points = ×eries.points; let limited_points = &limited_timeseries.points; - assert_eq!(points.start_times, limited_points.start_times); + assert_eq!(points.start_times(), limited_points.start_times()); assert_eq!( - points.timestamps[start..end], - limited_points.timestamps + &points.timestamps()[start..end], + limited_points.timestamps() ); assert_eq!( - limited_points.values[0].values.as_integer().unwrap(), - &points.values[0].values.as_integer().unwrap()[start..end], + limited_points.values(0).unwrap().as_integer().unwrap(), + &points.values(0).unwrap().as_integer().unwrap() + [start..end], "Points should be limited to [{start}..{end}]", ); } diff --git a/oximeter/db/src/oxql/ast/table_ops/mod.rs b/oximeter/db/src/oxql/ast/table_ops/mod.rs index 46f5106a08..8b8d4cbe1b 100644 --- a/oximeter/db/src/oxql/ast/table_ops/mod.rs +++ b/oximeter/db/src/oxql/ast/table_ops/mod.rs @@ -20,10 +20,10 @@ use self::join::Join; use self::limit::Limit; use crate::oxql::ast::Query; use crate::oxql::Error; -use crate::oxql::Table; use chrono::DateTime; use chrono::Utc; use oximeter::TimeseriesName; +use oxql_types::Table; /// A basic table operation, the atoms of an OxQL query. #[derive(Clone, Debug, PartialEq)] diff --git a/oximeter/db/src/oxql/mod.rs b/oximeter/db/src/oxql/mod.rs index 3961fae1cc..fcdfb783c5 100644 --- a/oximeter/db/src/oxql/mod.rs +++ b/oximeter/db/src/oxql/mod.rs @@ -10,13 +10,9 @@ use peg::error::ParseError as PegError; use peg::str::LineCol; pub mod ast; -pub mod point; pub mod query; -pub mod table; pub use self::query::Query; -pub use self::table::Table; -pub use self::table::Timeseries; pub use anyhow::Error; /// Format a PEG parsing error into a nice anyhow error. diff --git a/oximeter/db/src/oxql/query/mod.rs b/oximeter/db/src/oxql/query/mod.rs index e1fada9f2a..46c9bbc92c 100644 --- a/oximeter/db/src/oxql/query/mod.rs +++ b/oximeter/db/src/oxql/query/mod.rs @@ -23,7 +23,6 @@ use crate::oxql::Error; use crate::TimeseriesName; use chrono::DateTime; use chrono::Utc; -use std::time::Duration; /// A parsed OxQL query. #[derive(Clone, Debug, PartialEq)] @@ -391,15 +390,6 @@ fn restrict_filter_idents( } } -/// Describes the time alignment for an OxQL query. -#[derive(Clone, Copy, Debug, PartialEq)] -pub struct Alignment { - /// The end time of the query, which the temporal reference point. - pub end_time: DateTime, - /// The alignment period, the interval on which values are produced. - pub period: Duration, -} - #[cfg(test)] mod tests { use super::Filter; diff --git a/oximeter/db/src/query.rs b/oximeter/db/src/query.rs index ceabf00888..556ced0437 100644 --- a/oximeter/db/src/query.rs +++ b/oximeter/db/src/query.rs @@ -6,11 +6,12 @@ // Copyright 2021 Oxide Computer Company use crate::{ - Error, FieldSchema, FieldSource, TimeseriesKey, TimeseriesSchema, - DATABASE_NAME, DATABASE_SELECT_FORMAT, + Error, FieldSchema, FieldSource, TimeseriesSchema, DATABASE_NAME, + DATABASE_SELECT_FORMAT, }; use chrono::{DateTime, Utc}; use dropshot::PaginationOrder; +use oximeter::schema::TimeseriesKey; use oximeter::types::{DatumType, FieldType, FieldValue}; use oximeter::{Metric, Target}; use regex::Regex; diff --git a/oximeter/db/src/shells/oxql.rs b/oximeter/db/src/shells/oxql.rs index 0f23ea7d64..f46d08c0cf 100644 --- a/oximeter/db/src/shells/oxql.rs +++ b/oximeter/db/src/shells/oxql.rs @@ -7,9 +7,10 @@ // Copyright 2024 Oxide Computer use super::{list_timeseries, prepare_columns}; -use crate::{make_client, oxql::Table, Client, OxqlResult}; +use crate::{make_client, Client, OxqlResult}; use clap::Args; use crossterm::style::Stylize; +use oxql_types::Table; use reedline::DefaultPrompt; use reedline::DefaultPromptSegment; use reedline::Reedline; diff --git a/oximeter/db/tests/integration_test.rs b/oximeter/db/tests/integration_test.rs index 732683c414..f5d81d51d1 100644 --- a/oximeter/db/tests/integration_test.rs +++ b/oximeter/db/tests/integration_test.rs @@ -10,7 +10,6 @@ use clickward::{ use dropshot::test_util::log_prefix_for_test; use omicron_test_utils::dev::poll; use omicron_test_utils::dev::test_setup_log; -use oximeter::test_util; use oximeter_db::{Client, DbWrite, OxqlResult, Sample, TestDbWrite}; use slog::{debug, info, Logger}; use std::collections::BTreeSet; @@ -199,7 +198,7 @@ async fn test_cluster() -> anyhow::Result<()> { // Let's write some samples to our first replica and wait for them to show // up on replica 2. let start = tokio::time::Instant::now(); - let samples = test_util::generate_test_samples( + let samples = oximeter_test_utils::generate_test_samples( input.n_projects, input.n_instances, input.n_cpus, @@ -261,7 +260,7 @@ async fn test_cluster() -> anyhow::Result<()> { info!(log, "successfully stopped server 1"); // Generate some new samples and insert them at replica3 - let samples = test_util::generate_test_samples( + let samples = oximeter_test_utils::generate_test_samples( input.n_projects, input.n_instances, input.n_cpus, @@ -298,7 +297,7 @@ async fn test_cluster() -> anyhow::Result<()> { .expect("failed to get samples from client1"); // We still have a quorum (2 of 3 keepers), so we should be able to insert - let samples = test_util::generate_test_samples( + let samples = oximeter_test_utils::generate_test_samples( input.n_projects, input.n_instances, input.n_cpus, @@ -321,7 +320,7 @@ async fn test_cluster() -> anyhow::Result<()> { .expect("failed to get samples from client1"); info!(log, "Attempting to insert samples without keeper quorum"); - let samples = test_util::generate_test_samples( + let samples = oximeter_test_utils::generate_test_samples( input.n_projects, input.n_instances, input.n_cpus, @@ -350,7 +349,7 @@ async fn test_cluster() -> anyhow::Result<()> { ) .await .expect("failed to sync keepers"); - let samples = test_util::generate_test_samples( + let samples = oximeter_test_utils::generate_test_samples( input.n_projects, input.n_instances, input.n_cpus, @@ -370,7 +369,7 @@ async fn test_cluster() -> anyhow::Result<()> { ) .await .expect("failed to sync keepers"); - let samples = test_util::generate_test_samples( + let samples = oximeter_test_utils::generate_test_samples( input.n_projects, input.n_instances, input.n_cpus, diff --git a/oximeter/impl/src/test_util.rs b/oximeter/impl/src/test_util.rs deleted file mode 100644 index c2ac7b34bd..0000000000 --- a/oximeter/impl/src/test_util.rs +++ /dev/null @@ -1,130 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Utilities for testing the oximeter crate. -// Copyright 2024 Oxide Computer Company - -use crate::histogram; -use crate::histogram::{Histogram, Record}; -use crate::types::{Cumulative, Sample}; -use uuid::Uuid; - -#[derive(oximeter::Target)] -pub struct TestTarget { - pub name1: String, - pub name2: String, - pub num: i64, -} - -impl Default for TestTarget { - fn default() -> Self { - TestTarget { - name1: "first_name".into(), - name2: "second_name".into(), - num: 0, - } - } -} - -#[derive(oximeter::Metric)] -pub struct TestMetric { - pub id: Uuid, - pub good: bool, - pub datum: i64, -} - -#[derive(oximeter::Metric)] -pub struct TestCumulativeMetric { - pub id: Uuid, - pub good: bool, - pub datum: Cumulative, -} - -#[derive(oximeter::Metric)] -pub struct TestHistogram { - pub id: Uuid, - pub good: bool, - pub datum: Histogram, -} - -const ID: Uuid = uuid::uuid!("e00ced4d-39d1-446a-ae85-a67f05c9750b"); - -pub fn make_sample() -> Sample { - let target = TestTarget::default(); - let metric = TestMetric { id: ID, good: true, datum: 1 }; - Sample::new(&target, &metric).unwrap() -} - -pub fn make_missing_sample() -> Sample { - let target = TestTarget::default(); - let metric = TestMetric { id: ID, good: true, datum: 1 }; - Sample::new_missing(&target, &metric).unwrap() -} - -pub fn make_hist_sample() -> Sample { - let target = TestTarget::default(); - let mut hist = histogram::Histogram::new(&[0.0, 5.0, 10.0]).unwrap(); - hist.sample(1.0).unwrap(); - hist.sample(2.0).unwrap(); - hist.sample(6.0).unwrap(); - let metric = TestHistogram { id: ID, good: true, datum: hist }; - Sample::new(&target, &metric).unwrap() -} - -/// A target identifying a single virtual machine instance -#[derive(Debug, Clone, Copy, oximeter::Target)] -pub struct VirtualMachine { - pub project_id: Uuid, - pub instance_id: Uuid, -} - -/// A metric recording the total time a vCPU is busy, by its ID -#[derive(Debug, Clone, Copy, oximeter::Metric)] -pub struct CpuBusy { - cpu_id: i64, - datum: Cumulative, -} - -pub fn generate_test_samples( - n_projects: usize, - n_instances: usize, - n_cpus: usize, - n_samples: usize, -) -> Vec { - let n_timeseries = n_projects * n_instances * n_cpus; - let mut samples = Vec::with_capacity(n_samples * n_timeseries); - for _ in 0..n_projects { - let project_id = Uuid::new_v4(); - for _ in 0..n_instances { - let vm = VirtualMachine { project_id, instance_id: Uuid::new_v4() }; - for cpu in 0..n_cpus { - for sample in 0..n_samples { - let cpu_busy = CpuBusy { - cpu_id: cpu as _, - datum: Cumulative::new(sample as f64), - }; - let sample = Sample::new(&vm, &cpu_busy).unwrap(); - samples.push(sample); - } - } - } - } - samples -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_gen_test_samples() { - let (n_projects, n_instances, n_cpus, n_samples) = (2, 2, 2, 2); - let samples = - generate_test_samples(n_projects, n_instances, n_cpus, n_samples); - assert_eq!( - samples.len(), - n_projects * n_instances * n_cpus * n_samples - ); - } -} diff --git a/oximeter/instruments/src/http.rs b/oximeter/instruments/src/http.rs index 6a0a35ce63..2eef327d02 100644 --- a/oximeter/instruments/src/http.rs +++ b/oximeter/instruments/src/http.rs @@ -6,17 +6,14 @@ // Copyright 2024 Oxide Computer Company -use dropshot::{ - HttpError, HttpResponse, RequestContext, RequestInfo, ServerContext, -}; +use dropshot::{HttpError, HttpResponse, RequestContext, ServerContext}; use futures::Future; use http::StatusCode; -use http::Uri; use oximeter::{ histogram::Histogram, histogram::Record, MetricsError, Producer, Sample, }; -use std::borrow::Cow; -use std::collections::BTreeMap; +use std::collections::HashMap; +use std::hash::{DefaultHasher, Hash as _, Hasher}; use std::sync::{Arc, Mutex}; use std::time::{Duration, Instant}; @@ -24,28 +21,18 @@ oximeter::use_timeseries!("http-service.toml"); pub use http_service::HttpService; pub use http_service::RequestLatencyHistogram; -// Return the route portion of the request, normalized to include a single -// leading slash and no trailing slashes. -fn normalized_uri_path(uri: &Uri) -> Cow<'static, str> { - Cow::Owned(format!( - "/{}", - uri.path().trim_end_matches('/').trim_start_matches('/') - )) -} - impl RequestLatencyHistogram { /// Build a new `RequestLatencyHistogram` with a specified histogram. /// /// Latencies are expressed in seconds. pub fn new( - request: &RequestInfo, + operation_id: &str, status_code: StatusCode, histogram: Histogram, ) -> Self { Self { - route: normalized_uri_path(request.uri()), - method: request.method().to_string().into(), - status_code: status_code.as_u16().into(), + operation_id: operation_id.to_string().into(), + status_code: status_code.as_u16(), datum: histogram, } } @@ -59,25 +46,27 @@ impl RequestLatencyHistogram { /// /// Latencies are expressed as seconds. pub fn with_latency_decades( - request: &RequestInfo, + operation_id: &str, status_code: StatusCode, start_decade: i16, end_decade: i16, ) -> Result { Ok(Self::new( - request, + operation_id, status_code, Histogram::span_decades(start_decade, end_decade)?, )) } - fn key_for(request: &RequestInfo, status_code: StatusCode) -> String { - format!( - "{}:{}:{}", - normalized_uri_path(request.uri()), - request.method(), - status_code.as_u16() - ) + /// Return a key used to ID this histogram. + /// + /// This is a quick way to look up the histogram tracking any particular + /// request and response. + fn key_for(operation_id: &str, status_code: StatusCode) -> u64 { + let mut hasher = DefaultHasher::new(); + operation_id.hash(&mut hasher); + status_code.hash(&mut hasher); + hasher.finish() } } @@ -92,8 +81,19 @@ impl RequestLatencyHistogram { /// The `LatencyTracker` can be used to produce metric data collected by `oximeter`. #[derive(Debug, Clone)] pub struct LatencyTracker { + /// The HTTP service target for which we're tracking request histograms. pub service: HttpService, - latencies: Arc>>, + /// The latency histogram for each request. + /// + /// The map here use a hash of the request fields (operation and status + /// code) as the key to each histogram. It's a bit redundant to then store + /// that in a hashmap, but this lets us avoid creating a new + /// `RequestLatencyHistogram` when handling a request that we already have + /// one for. Instead, we use this key to get the existing entry. + latencies: Arc>>, + /// The histogram used to track each request. + /// + /// We store it here to clone as we see new requests. histogram: Histogram, } @@ -104,7 +104,7 @@ impl LatencyTracker { pub fn new(service: HttpService, histogram: Histogram) -> Self { Self { service, - latencies: Arc::new(Mutex::new(BTreeMap::new())), + latencies: Arc::new(Mutex::new(HashMap::new())), histogram, } } @@ -129,15 +129,15 @@ impl LatencyTracker { /// to which the other arguments belong. (One is created if it does not exist.) pub fn update( &self, - request: &RequestInfo, + operation_id: &str, status_code: StatusCode, latency: Duration, ) -> Result<(), MetricsError> { - let key = RequestLatencyHistogram::key_for(request, status_code); + let key = RequestLatencyHistogram::key_for(operation_id, status_code); let mut latencies = self.latencies.lock().unwrap(); let entry = latencies.entry(key).or_insert_with(|| { RequestLatencyHistogram::new( - request, + operation_id, status_code, self.histogram.clone(), ) @@ -170,14 +170,14 @@ impl LatencyTracker { Ok(response) => response.status_code(), Err(ref e) => e.status_code, }; - if let Err(e) = self.update(&context.request, status_code, latency) { + if let Err(e) = self.update(&context.operation_id, status_code, latency) + { slog::error!( &context.log, "error instrumenting dropshot handler"; "error" => ?e, "status_code" => status_code.as_u16(), - "method" => %context.request.method(), - "uri" => %context.request.uri(), + "operation_id" => &context.operation_id, "remote_addr" => context.request.remote_addr(), "latency" => ?latency, ); @@ -220,41 +220,24 @@ mod tests { HttpService { name: "my-service".into(), id: ID.parse().unwrap() }; let hist = Histogram::new(&[0.0, 1.0]).unwrap(); let tracker = LatencyTracker::new(service, hist); - let request = http::request::Builder::new() - .method(http::Method::GET) - .uri("/some/uri") - .body(()) + let status_code0 = StatusCode::OK; + let status_code1 = StatusCode::NOT_FOUND; + let operation_id = "some_operation_id"; + tracker + .update(operation_id, status_code0, Duration::from_secs_f64(0.5)) .unwrap(); - let status_code = StatusCode::OK; tracker - .update( - &RequestInfo::new(&request, "0.0.0.0:0".parse().unwrap()), - status_code, - Duration::from_secs_f64(0.5), - ) + .update(operation_id, status_code1, Duration::from_secs_f64(0.5)) .unwrap(); - - let key = "/some/uri:GET:200"; - let actual_hist = tracker.latencies.lock().unwrap()[key].datum.clone(); - assert_eq!(actual_hist.n_samples(), 1); - let bins = actual_hist.iter().collect::>(); - assert_eq!(bins[1].count, 1); - } - - #[test] - fn test_normalize_uri_path() { - const EXPECTED: &str = "/foo/bar"; - const TESTS: &[&str] = &[ - "/foo/bar", - "/foo/bar/", - "//foo/bar", - "//foo/bar/", - "/foo/bar//", - "////foo/bar/////", - ]; - for test in TESTS.iter() { - println!("{test}"); - assert_eq!(normalized_uri_path(&test.parse().unwrap()), EXPECTED); + let key0 = RequestLatencyHistogram::key_for(operation_id, status_code0); + let key1 = RequestLatencyHistogram::key_for(operation_id, status_code1); + let latencies = tracker.latencies.lock().unwrap(); + assert_eq!(latencies.len(), 2); + for key in [key0, key1] { + let actual_hist = &latencies[&key].datum; + assert_eq!(actual_hist.n_samples(), 1); + let bins = actual_hist.iter().collect::>(); + assert_eq!(bins[1].count, 1); } } } diff --git a/oximeter/oximeter/Cargo.toml b/oximeter/oximeter/Cargo.toml index c04d1bd3ae..63b370bee6 100644 --- a/oximeter/oximeter/Cargo.toml +++ b/oximeter/oximeter/Cargo.toml @@ -13,9 +13,10 @@ anyhow.workspace = true clap.workspace = true chrono.workspace = true omicron-workspace-hack.workspace = true -oximeter-impl.workspace = true oximeter-macro-impl.workspace = true +oximeter-schema.workspace = true oximeter-timeseries-macro.workspace = true +oximeter-types.workspace = true prettyplease.workspace = true syn.workspace = true toml.workspace = true diff --git a/oximeter/oximeter/schema/hardware-component.toml b/oximeter/oximeter/schema/hardware-component.toml new file mode 100644 index 0000000000..30a1d6510f --- /dev/null +++ b/oximeter/oximeter/schema/hardware-component.toml @@ -0,0 +1,183 @@ +format_version = 1 + +[target] +name = "hardware_component" +description = "A hardware component on a compute sled, switch, or power shelf" +authz_scope = "fleet" +versions = [ + { version = 1, fields = [ + "rack_id", + "slot", + "chassis_kind", + "chassis_serial", + "chassis_model", + "chassis_revision", + "hubris_archive_id", + "gateway_id", + "component_kind", + "component_id", + "description", + ]} +] + +[fields.rack_id] +type = "uuid" +description = "ID of the rack on which this measurement was recorded." + +[fields.slot] +type = "u32" +description = """ +The cubby number or switch slot of the service processor reporting the \ +measurement""" + +[fields.chassis_model] +type = "string" +description = "Model number of the sled, switch, or power shelf" + +[fields.chassis_revision] +type = "u32" +description = "Revision number of the sled, switch, or power shelf" + +[fields.chassis_serial] +type = "string" +description = "Serial number of the sled, switch, or power shelf" + +[fields.hubris_archive_id] +type = "string" +description = """ +Hubris firmware archive ID of the service processor when the measurement \ +was recorded.""" + +[fields.gateway_id] +type = "uuid" +description = """ +ID of the Management Gateway Service process which recorded the measurement.""" + +[fields.chassis_kind] +type = "string" +description = """ +What kind of thing the component resides on. + +This will be one of 'sled', for components on compute sleds; 'switch', for \ +components on rack switches; or 'power', for components on power shelves.""" + +[fields.component_id] +type = "string" +description = """ +The service processor component ID uniquely identifying the hardware \ +component on the sled, switch, or power shelf.""" + +[fields.component_kind] +type = "string" +description = "What type of hardware component this thing is." + +[fields.description] +type = "string" +description = """ +A human-readable description of the hardware component. This may include \ +its location or role in the system (e.g. a DIMM's number, or a temperature \ +sensor's location).""" + +[fields.sensor] +type = "string" +description = """The name of a sensor that recorded a sensor reading.""" + +[fields.error] +type = "string" +description = "The kind of sensor error that occurred" + +[fields.sensor_kind] +type = "string" +description = """ +Which kind of sensor could not be read due to a sensor error. + +This will be one of 'temperature', 'current', 'power', 'voltage', \ +'input_current', 'input_voltage', or 'fan_speed' (the same names as \ +the metrics emitted by these sensors when they are read successfully).""" + +[[metrics]] +name = "temperature" +description = "A temperature reading from a hardware component." +units = "degrees_celsius" +datum_type = "f32" +versions = [ + { added_in = 1, fields = ["sensor"]} +] + +[[metrics]] +name = "current" +description = "Output current reading in amperes" +units = "amps" +datum_type = "f32" +versions = [ + { added_in = 1, fields = ["sensor"]} +] + +[[metrics]] +name = "power" +description = "Power reading, in watts" +units = "watts" +datum_type = "f32" +versions = [ + { added_in = 1, fields = ["sensor"]} +] + +[[metrics]] +name = "voltage" +description = "Output voltage reading, in volts" +units = "volts" +datum_type = "f32" +versions = [ + { added_in = 1, fields = ["sensor"]} +] + +[[metrics]] +name = "input_current" +description = "Input electric current reading in amperes" +units = "amps" +datum_type = "f32" +versions = [ + { added_in = 1, fields = ["sensor"]} +] + +[[metrics]] +name = "input_voltage" +description = "Input electric voltage reading, in volts" +units = "volts" +datum_type = "f32" +versions = [ + { added_in = 1, fields = ["sensor"]} +] + + +[[metrics]] +name = "fan_speed" +description = "A fan speed measurement, in rotations per minute" +units = "rpm" +datum_type = "f32" +versions = [ + { added_in = 1, fields = ["sensor"]} +] + +[[metrics]] +name = "sensor_error_count" +description = "Cumulative count of errors reported by a sensor" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = ["sensor", "error", "sensor_kind"]} +] + +[[metrics]] +name = "poll_error_count" +description = """ +Cumulative count of errors encountered whilst polling a component's sensors. + +Unlike the `sensor_error_count` metric, this counts errors encountered by \ +the management gateway while polling the component, rather than errors \ +reported by the component itself.""" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = ["error"] } +] diff --git a/oximeter/oximeter/schema/http-service.toml b/oximeter/oximeter/schema/http-service.toml index 9098110656..5270f6942c 100644 --- a/oximeter/oximeter/schema/http-service.toml +++ b/oximeter/oximeter/schema/http-service.toml @@ -14,7 +14,7 @@ description = "Duration for the server to handle a request" units = "seconds" datum_type = "histogram_f64" versions = [ - { added_in = 1, fields = [ "route", "method", "status_code" ] } + { added_in = 1, fields = [ "operation_id", "status_code" ] } ] [fields.name] @@ -25,14 +25,15 @@ description = "The name of the HTTP server, or program running it" type = "uuid" description = "UUID of the HTTP server" -[fields.route] +[fields.operation_id] type = "string" -description = "HTTP route in the request" +description = """\ +The identifier for the HTTP operation.\ -[fields.method] -type = "string" -description = "HTTP method in the request" +In most cases, this the OpenAPI `operationId` field that uniquely identifies the +endpoint the request is targeted to and the HTTP method used. +""" [fields.status_code] -type = "i64" +type = "u16" description = "HTTP status code in the server's response" diff --git a/oximeter/oximeter/src/lib.rs b/oximeter/oximeter/src/lib.rs index 5ec6a49e5c..913318b8a8 100644 --- a/oximeter/oximeter/src/lib.rs +++ b/oximeter/oximeter/src/lib.rs @@ -185,14 +185,15 @@ //! `Producer`s may be registered with the same `ProducerServer`, each with potentially different //! sampling intervals. -pub use oximeter_impl::*; +pub use oximeter_macro_impl::{Metric, Target}; pub use oximeter_timeseries_macro::use_timeseries; +pub use oximeter_types::*; #[cfg(test)] mod test { - use oximeter_impl::schema::ir::load_schema; - use oximeter_impl::schema::{FieldSource, SCHEMA_DIRECTORY}; - use oximeter_impl::TimeseriesSchema; + use oximeter_schema::ir::load_schema; + use oximeter_types::schema::{FieldSource, SCHEMA_DIRECTORY}; + use oximeter_types::TimeseriesSchema; use std::collections::BTreeMap; use std::fs; diff --git a/oximeter/oxql-types/Cargo.toml b/oximeter/oxql-types/Cargo.toml new file mode 100644 index 0000000000..da7c7bcd1c --- /dev/null +++ b/oximeter/oxql-types/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "oxql-types" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +chrono.workspace = true +highway.workspace = true +num.workspace = true +omicron-workspace-hack.workspace = true +oximeter-types.workspace = true +schemars.workspace = true +serde.workspace = true diff --git a/oximeter/oxql-types/src/lib.rs b/oximeter/oxql-types/src/lib.rs new file mode 100644 index 0000000000..00468705a9 --- /dev/null +++ b/oximeter/oxql-types/src/lib.rs @@ -0,0 +1,23 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Core types for OxQL. + +use chrono::{DateTime, Utc}; +use std::time::Duration; + +pub mod point; +pub mod table; + +pub use self::table::Table; +pub use self::table::Timeseries; + +/// Describes the time alignment for an OxQL query. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct Alignment { + /// The end time of the query, which the temporal reference point. + pub end_time: DateTime, + /// The alignment period, the interval on which values are produced. + pub period: Duration, +} diff --git a/oximeter/db/src/oxql/point.rs b/oximeter/oxql-types/src/point.rs similarity index 82% rename from oximeter/db/src/oxql/point.rs rename to oximeter/oxql-types/src/point.rs index e04193e8b8..6e3c7143dc 100644 --- a/oximeter/db/src/oxql/point.rs +++ b/oximeter/oxql-types/src/point.rs @@ -6,15 +6,15 @@ // Copyright 2024 Oxide Computer Company -use super::Error; use anyhow::Context; +use anyhow::Error; use chrono::DateTime; use chrono::Utc; use num::ToPrimitive; -use oximeter::traits::HistogramSupport; -use oximeter::DatumType; -use oximeter::Measurement; -use oximeter::Quantile; +use oximeter_types::traits::HistogramSupport; +use oximeter_types::DatumType; +use oximeter_types::Measurement; +use oximeter_types::Quantile; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; @@ -131,32 +131,32 @@ impl CumulativeDatum { // not cumulative. fn from_cumulative(meas: &Measurement) -> Result { let datum = match meas.datum() { - oximeter::Datum::CumulativeI64(val) => { + oximeter_types::Datum::CumulativeI64(val) => { CumulativeDatum::Integer(val.value()) } - oximeter::Datum::CumulativeU64(val) => { + oximeter_types::Datum::CumulativeU64(val) => { let int = val .value() .try_into() .context("Overflow converting u64 to i64")?; CumulativeDatum::Integer(int) } - oximeter::Datum::CumulativeF32(val) => { + oximeter_types::Datum::CumulativeF32(val) => { CumulativeDatum::Double(val.value().into()) } - oximeter::Datum::CumulativeF64(val) => { + oximeter_types::Datum::CumulativeF64(val) => { CumulativeDatum::Double(val.value()) } - oximeter::Datum::HistogramI8(hist) => hist.into(), - oximeter::Datum::HistogramU8(hist) => hist.into(), - oximeter::Datum::HistogramI16(hist) => hist.into(), - oximeter::Datum::HistogramU16(hist) => hist.into(), - oximeter::Datum::HistogramI32(hist) => hist.into(), - oximeter::Datum::HistogramU32(hist) => hist.into(), - oximeter::Datum::HistogramI64(hist) => hist.into(), - oximeter::Datum::HistogramU64(hist) => hist.try_into()?, - oximeter::Datum::HistogramF32(hist) => hist.into(), - oximeter::Datum::HistogramF64(hist) => hist.into(), + oximeter_types::Datum::HistogramI8(hist) => hist.into(), + oximeter_types::Datum::HistogramU8(hist) => hist.into(), + oximeter_types::Datum::HistogramI16(hist) => hist.into(), + oximeter_types::Datum::HistogramU16(hist) => hist.into(), + oximeter_types::Datum::HistogramI32(hist) => hist.into(), + oximeter_types::Datum::HistogramU32(hist) => hist.into(), + oximeter_types::Datum::HistogramI64(hist) => hist.into(), + oximeter_types::Datum::HistogramU64(hist) => hist.try_into()?, + oximeter_types::Datum::HistogramF32(hist) => hist.into(), + oximeter_types::Datum::HistogramF64(hist) => hist.into(), other => anyhow::bail!( "Input datum of type {} is not cumulative", other.datum_type(), @@ -169,10 +169,10 @@ impl CumulativeDatum { /// A single list of values, for one dimension of a timeseries. #[derive(Clone, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] pub struct Values { - // The data values. - pub(super) values: ValueArray, - // The type of this metric. - pub(super) metric_type: MetricType, + /// The data values. + pub values: ValueArray, + /// The type of this metric. + pub metric_type: MetricType, } impl Values { @@ -285,14 +285,23 @@ impl<'a> fmt::Display for Datum<'a> { #[derive(Clone, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] pub struct Points { // The start time points for cumulative or delta metrics. - pub(super) start_times: Option>>, + pub(crate) start_times: Option>>, // The timestamp of each value. - pub(super) timestamps: Vec>, + pub(crate) timestamps: Vec>, // The array of data values, one for each dimension. - pub(super) values: Vec, + pub(crate) values: Vec, } impl Points { + /// Construct a new `Points` with the provided data. + pub fn new( + start_times: Option>>, + timestamps: Vec>, + values: Vec, + ) -> Self { + Self { start_times, timestamps, values } + } + /// Construct an empty array of points to hold data of the provided type. pub fn empty(data_type: DataType, metric_type: MetricType) -> Self { Self::with_capacity( @@ -303,8 +312,28 @@ impl Points { .unwrap() } - // Return a mutable reference to the value array of the specified dimension, if any. - pub(super) fn values_mut(&mut self, dim: usize) -> Option<&mut ValueArray> { + /// Return the start times of the points, if any. + pub fn start_times(&self) -> Option<&[DateTime]> { + self.start_times.as_deref() + } + + /// Clear the start times of the points. + pub fn clear_start_times(&mut self) { + self.start_times = None; + } + + /// Return the timestamps of the points. + pub fn timestamps(&self) -> &[DateTime] { + &self.timestamps + } + + pub fn set_timestamps(&mut self, timestamps: Vec>) { + self.timestamps = timestamps; + } + + /// Return a mutable reference to the value array of the specified + /// dimension, if any. + pub fn values_mut(&mut self, dim: usize) -> Option<&mut ValueArray> { self.values.get_mut(dim).map(|val| &mut val.values) } @@ -563,8 +592,8 @@ impl Points { }) } - // Filter points in self to those where `to_keep` is true. - pub(crate) fn filter(&self, to_keep: Vec) -> Result { + /// Filter points in self to those where `to_keep` is true. + pub fn filter(&self, to_keep: Vec) -> Result { anyhow::ensure!( to_keep.len() == self.len(), "Filter array must be the same length as self", @@ -646,8 +675,8 @@ impl Points { Ok(out) } - // Return a new set of points, with the values casted to the provided types. - pub(crate) fn cast(&self, types: &[DataType]) -> Result { + /// Return a new set of points, with the values casted to the provided types. + pub fn cast(&self, types: &[DataType]) -> Result { anyhow::ensure!( types.len() == self.dimensionality(), "Cannot cast to {} types, the data has dimensionality {}", @@ -863,12 +892,104 @@ impl Points { Ok(Self { start_times, timestamps, values: new_values }) } + /// Given two arrays of points, stack them together at matching timepoints. + /// + /// For time points in either which do not have a corresponding point in + /// the other, the entire time point is elided. + pub fn inner_join(&self, right: &Points) -> Result { + // Create an output array with roughly the right capacity, and double the + // number of dimensions. We're trying to stack output value arrays together + // along the dimension axis. + let data_types = + self.data_types().chain(right.data_types()).collect::>(); + let metric_types = + self.metric_types().chain(right.metric_types()).collect::>(); + let mut out = Points::with_capacity( + self.len().max(right.len()), + data_types.iter().copied(), + metric_types.iter().copied(), + )?; + + // Iterate through each array until one is exhausted. We're only inserting + // values from both arrays where the timestamps actually match, since this + // is an inner join. We may want to insert missing values where timestamps + // do not match on either side, when we support an outer join of some kind. + let n_left_dim = self.dimensionality(); + let mut left_ix = 0; + let mut right_ix = 0; + while left_ix < self.len() && right_ix < right.len() { + let left_timestamp = self.timestamps()[left_ix]; + let right_timestamp = right.timestamps()[right_ix]; + if left_timestamp == right_timestamp { + out.timestamps.push(left_timestamp); + push_concrete_values( + &mut out.values[..n_left_dim], + &self.values, + left_ix, + ); + push_concrete_values( + &mut out.values[n_left_dim..], + &right.values, + right_ix, + ); + left_ix += 1; + right_ix += 1; + } else if left_timestamp < right_timestamp { + left_ix += 1; + } else { + right_ix += 1; + } + } + Ok(out) + } + /// Return true if self contains no data points. pub fn is_empty(&self) -> bool { self.len() == 0 } } +// Push the `i`th value from each dimension of `from` onto `to`. +fn push_concrete_values(to: &mut [Values], from: &[Values], i: usize) { + assert_eq!(to.len(), from.len()); + for (output, input) in to.iter_mut().zip(from.iter()) { + let input_array = &input.values; + let output_array = &mut output.values; + assert_eq!(input_array.data_type(), output_array.data_type()); + if let Ok(ints) = input_array.as_integer() { + output_array.as_integer_mut().unwrap().push(ints[i]); + continue; + } + if let Ok(doubles) = input_array.as_double() { + output_array.as_double_mut().unwrap().push(doubles[i]); + continue; + } + if let Ok(bools) = input_array.as_boolean() { + output_array.as_boolean_mut().unwrap().push(bools[i]); + continue; + } + if let Ok(strings) = input_array.as_string() { + output_array.as_string_mut().unwrap().push(strings[i].clone()); + continue; + } + if let Ok(dists) = input_array.as_integer_distribution() { + output_array + .as_integer_distribution_mut() + .unwrap() + .push(dists[i].clone()); + continue; + } + if let Ok(dists) = input_array.as_double_distribution() { + output_array + .as_double_distribution_mut() + .unwrap() + .push(dists[i].clone()); + continue; + } + unreachable!(); + } +} + /// List of data values for one timeseries. /// /// Each element is an option, where `None` represents a missing sample. @@ -900,8 +1021,8 @@ impl ValueArray { } } - // Return the data type in self. - pub(super) fn data_type(&self) -> DataType { + /// Return the data type in self. + pub fn data_type(&self) -> DataType { match self { ValueArray::Integer(_) => DataType::Integer, ValueArray::Double(_) => DataType::Double, @@ -947,10 +1068,8 @@ impl ValueArray { Ok(inner) } - // Access the inner array of integers, if possible. - pub(super) fn as_integer_mut( - &mut self, - ) -> Result<&mut Vec>, Error> { + /// Access the inner array of integers, if possible. + pub fn as_integer_mut(&mut self) -> Result<&mut Vec>, Error> { let ValueArray::Integer(inner) = self else { anyhow::bail!( "Cannot access value array as integer type, it has type {}", @@ -1107,91 +1226,97 @@ impl ValueArray { // Push a value directly from a datum, without modification. fn push_value_from_datum( &mut self, - datum: &oximeter::Datum, + datum: &oximeter_types::Datum, ) -> Result<(), Error> { match datum { - oximeter::Datum::Bool(b) => self.as_boolean_mut()?.push(Some(*b)), - oximeter::Datum::I8(i) => { + oximeter_types::Datum::Bool(b) => { + self.as_boolean_mut()?.push(Some(*b)) + } + oximeter_types::Datum::I8(i) => { self.as_integer_mut()?.push(Some(i64::from(*i))) } - oximeter::Datum::U8(i) => { + oximeter_types::Datum::U8(i) => { self.as_integer_mut()?.push(Some(i64::from(*i))) } - oximeter::Datum::I16(i) => { + oximeter_types::Datum::I16(i) => { self.as_integer_mut()?.push(Some(i64::from(*i))) } - oximeter::Datum::U16(i) => { + oximeter_types::Datum::U16(i) => { self.as_integer_mut()?.push(Some(i64::from(*i))) } - oximeter::Datum::I32(i) => { + oximeter_types::Datum::I32(i) => { self.as_integer_mut()?.push(Some(i64::from(*i))) } - oximeter::Datum::U32(i) => { + oximeter_types::Datum::U32(i) => { self.as_integer_mut()?.push(Some(i64::from(*i))) } - oximeter::Datum::I64(i) => self.as_integer_mut()?.push(Some(*i)), - oximeter::Datum::U64(i) => { + oximeter_types::Datum::I64(i) => { + self.as_integer_mut()?.push(Some(*i)) + } + oximeter_types::Datum::U64(i) => { let i = i.to_i64().context("Failed to convert u64 datum to i64")?; self.as_integer_mut()?.push(Some(i)); } - oximeter::Datum::F32(f) => { + oximeter_types::Datum::F32(f) => { self.as_double_mut()?.push(Some(f64::from(*f))) } - oximeter::Datum::F64(f) => self.as_double_mut()?.push(Some(*f)), - oximeter::Datum::String(s) => { + oximeter_types::Datum::F64(f) => { + self.as_double_mut()?.push(Some(*f)) + } + oximeter_types::Datum::String(s) => { self.as_string_mut()?.push(Some(s.clone())) } - oximeter::Datum::Bytes(_) => { + oximeter_types::Datum::Bytes(_) => { anyhow::bail!("Bytes data types are not yet supported") } - oximeter::Datum::CumulativeI64(c) => { + oximeter_types::Datum::CumulativeI64(c) => { self.as_integer_mut()?.push(Some(c.value())) } - oximeter::Datum::CumulativeU64(c) => { + oximeter_types::Datum::CumulativeU64(c) => { let c = c .value() .to_i64() .context("Failed to convert u64 datum to i64")?; self.as_integer_mut()?.push(Some(c)); } - oximeter::Datum::CumulativeF32(c) => { + oximeter_types::Datum::CumulativeF32(c) => { self.as_double_mut()?.push(Some(f64::from(c.value()))) } - oximeter::Datum::CumulativeF64(c) => { + oximeter_types::Datum::CumulativeF64(c) => { self.as_double_mut()?.push(Some(c.value())) } - oximeter::Datum::HistogramI8(h) => self + oximeter_types::Datum::HistogramI8(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramU8(h) => self + oximeter_types::Datum::HistogramU8(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramI16(h) => self + oximeter_types::Datum::HistogramI16(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramU16(h) => self + oximeter_types::Datum::HistogramU16(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramI32(h) => self + oximeter_types::Datum::HistogramI32(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramU32(h) => self + oximeter_types::Datum::HistogramU32(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramI64(h) => self + oximeter_types::Datum::HistogramI64(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramU64(h) => self + oximeter_types::Datum::HistogramU64(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::try_from(h)?)), - oximeter::Datum::HistogramF32(h) => self + oximeter_types::Datum::HistogramF32(h) => self .as_double_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramF64(h) => self + oximeter_types::Datum::HistogramF64(h) => self .as_double_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::Missing(missing) => { + oximeter_types::Datum::Missing(missing) => { self.push_missing(missing.datum_type())? } } @@ -1216,7 +1341,7 @@ impl ValueArray { fn push_diff_from_last_to_datum( &mut self, last_datum: &Option, - new_datum: &oximeter::Datum, + new_datum: &oximeter_types::Datum, data_type: DataType, ) -> Result<(), Error> { match (last_datum.as_ref(), new_datum.is_missing()) { @@ -1253,49 +1378,49 @@ impl ValueArray { match (last_datum, new_datum) { ( CumulativeDatum::Integer(last), - oximeter::Datum::I8(new), + oximeter_types::Datum::I8(new), ) => { let new = i64::from(*new); self.as_integer_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::U8(new), + oximeter_types::Datum::U8(new), ) => { let new = i64::from(*new); self.as_integer_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::I16(new), + oximeter_types::Datum::I16(new), ) => { let new = i64::from(*new); self.as_integer_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::U16(new), + oximeter_types::Datum::U16(new), ) => { let new = i64::from(*new); self.as_integer_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::I32(new), + oximeter_types::Datum::I32(new), ) => { let new = i64::from(*new); self.as_integer_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::U32(new), + oximeter_types::Datum::U32(new), ) => { let new = i64::from(*new); self.as_integer_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::I64(new), + oximeter_types::Datum::I64(new), ) => { let diff = new .checked_sub(*last) @@ -1304,7 +1429,7 @@ impl ValueArray { } ( CumulativeDatum::Integer(last), - oximeter::Datum::U64(new), + oximeter_types::Datum::U64(new), ) => { let new = new .to_i64() @@ -1316,20 +1441,20 @@ impl ValueArray { } ( CumulativeDatum::Double(last), - oximeter::Datum::F32(new), + oximeter_types::Datum::F32(new), ) => { self.as_double_mut()? .push(Some(f64::from(*new) - last)); } ( CumulativeDatum::Double(last), - oximeter::Datum::F64(new), + oximeter_types::Datum::F64(new), ) => { self.as_double_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::CumulativeI64(new), + oximeter_types::Datum::CumulativeI64(new), ) => { let new = new.value(); let diff = new @@ -1339,7 +1464,7 @@ impl ValueArray { } ( CumulativeDatum::Integer(last), - oximeter::Datum::CumulativeU64(new), + oximeter_types::Datum::CumulativeU64(new), ) => { let new = new .value() @@ -1352,20 +1477,20 @@ impl ValueArray { } ( CumulativeDatum::Double(last), - oximeter::Datum::CumulativeF32(new), + oximeter_types::Datum::CumulativeF32(new), ) => { self.as_double_mut()? .push(Some(f64::from(new.value()) - last)); } ( CumulativeDatum::Double(last), - oximeter::Datum::CumulativeF64(new), + oximeter_types::Datum::CumulativeF64(new), ) => { self.as_double_mut()?.push(Some(new.value() - last)); } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramI8(new), + oximeter_types::Datum::HistogramI8(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1373,7 +1498,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramU8(new), + oximeter_types::Datum::HistogramU8(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1381,7 +1506,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramI16(new), + oximeter_types::Datum::HistogramI16(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1389,7 +1514,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramU16(new), + oximeter_types::Datum::HistogramU16(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1397,7 +1522,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramI32(new), + oximeter_types::Datum::HistogramI32(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1405,7 +1530,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramU32(new), + oximeter_types::Datum::HistogramU32(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1413,7 +1538,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramI64(new), + oximeter_types::Datum::HistogramI64(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1421,7 +1546,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramU64(new), + oximeter_types::Datum::HistogramU64(new), ) => { let new = Distribution::try_from(new)?; self.as_integer_distribution_mut()? @@ -1429,7 +1554,7 @@ impl ValueArray { } ( CumulativeDatum::DoubleDistribution(last), - oximeter::Datum::HistogramF32(new), + oximeter_types::Datum::HistogramF32(new), ) => { let new = Distribution::::from(new); self.as_double_distribution_mut()? @@ -1437,7 +1562,7 @@ impl ValueArray { } ( CumulativeDatum::DoubleDistribution(last), - oximeter::Datum::HistogramF64(new), + oximeter_types::Datum::HistogramF64(new), ) => { let new = Distribution::::from(new); self.as_double_distribution_mut()? @@ -1486,8 +1611,8 @@ impl ValueArray { } } - // Swap the value in self with other, asserting they're the same type. - pub(crate) fn swap(&mut self, mut values: ValueArray) { + /// Swap the value in self with other, asserting they're the same type. + pub fn swap(&mut self, mut values: ValueArray) { use std::mem::swap; match (self, &mut values) { (ValueArray::Integer(x), ValueArray::Integer(y)) => swap(x, y), @@ -1733,8 +1858,10 @@ where macro_rules! i64_dist_from { ($t:ty) => { - impl From<&oximeter::histogram::Histogram<$t>> for Distribution { - fn from(hist: &oximeter::histogram::Histogram<$t>) -> Self { + impl From<&oximeter_types::histogram::Histogram<$t>> + for Distribution + { + fn from(hist: &oximeter_types::histogram::Histogram<$t>) -> Self { let (bins, counts) = hist.bins_and_counts(); Self { bins: bins.into_iter().map(i64::from).collect(), @@ -1750,8 +1877,10 @@ macro_rules! i64_dist_from { } } - impl From<&oximeter::histogram::Histogram<$t>> for CumulativeDatum { - fn from(hist: &oximeter::histogram::Histogram<$t>) -> Self { + impl From<&oximeter_types::histogram::Histogram<$t>> + for CumulativeDatum + { + fn from(hist: &oximeter_types::histogram::Histogram<$t>) -> Self { CumulativeDatum::IntegerDistribution(hist.into()) } } @@ -1766,10 +1895,10 @@ i64_dist_from!(i32); i64_dist_from!(u32); i64_dist_from!(i64); -impl TryFrom<&oximeter::histogram::Histogram> for Distribution { +impl TryFrom<&oximeter_types::histogram::Histogram> for Distribution { type Error = Error; fn try_from( - hist: &oximeter::histogram::Histogram, + hist: &oximeter_types::histogram::Histogram, ) -> Result { let (bins, counts) = hist.bins_and_counts(); let bins = bins @@ -1791,10 +1920,10 @@ impl TryFrom<&oximeter::histogram::Histogram> for Distribution { } } -impl TryFrom<&oximeter::histogram::Histogram> for CumulativeDatum { +impl TryFrom<&oximeter_types::histogram::Histogram> for CumulativeDatum { type Error = Error; fn try_from( - hist: &oximeter::histogram::Histogram, + hist: &oximeter_types::histogram::Histogram, ) -> Result { hist.try_into().map(CumulativeDatum::IntegerDistribution) } @@ -1802,8 +1931,10 @@ impl TryFrom<&oximeter::histogram::Histogram> for CumulativeDatum { macro_rules! f64_dist_from { ($t:ty) => { - impl From<&oximeter::histogram::Histogram<$t>> for Distribution { - fn from(hist: &oximeter::histogram::Histogram<$t>) -> Self { + impl From<&oximeter_types::histogram::Histogram<$t>> + for Distribution + { + fn from(hist: &oximeter_types::histogram::Histogram<$t>) -> Self { let (bins, counts) = hist.bins_and_counts(); Self { bins: bins.into_iter().map(f64::from).collect(), @@ -1819,8 +1950,10 @@ macro_rules! f64_dist_from { } } - impl From<&oximeter::histogram::Histogram<$t>> for CumulativeDatum { - fn from(hist: &oximeter::histogram::Histogram<$t>) -> Self { + impl From<&oximeter_types::histogram::Histogram<$t>> + for CumulativeDatum + { + fn from(hist: &oximeter_types::histogram::Histogram<$t>) -> Self { CumulativeDatum::DoubleDistribution(hist.into()) } } @@ -1833,9 +1966,9 @@ f64_dist_from!(f64); #[cfg(test)] mod tests { use super::{Distribution, MetricType, Points, Values}; - use crate::oxql::point::{DataType, ValueArray}; + use crate::point::{push_concrete_values, DataType, Datum, ValueArray}; use chrono::{DateTime, Utc}; - use oximeter::{ + use oximeter_types::{ histogram::Record, types::Cumulative, Measurement, Quantile, }; use std::time::Duration; @@ -1939,12 +2072,12 @@ mod tests { let now = Utc::now(); let current1 = now + Duration::from_secs(1); let mut hist1 = - oximeter::histogram::Histogram::new(&[0i64, 10, 20]).unwrap(); + oximeter_types::histogram::Histogram::new(&[0i64, 10, 20]).unwrap(); hist1.sample(1).unwrap(); hist1.set_start_time(current1); let current2 = now + Duration::from_secs(2); let mut hist2 = - oximeter::histogram::Histogram::new(&[0i64, 10, 20]).unwrap(); + oximeter_types::histogram::Histogram::new(&[0i64, 10, 20]).unwrap(); hist2.sample(5).unwrap(); hist2.sample(10).unwrap(); hist2.sample(15).unwrap(); @@ -2273,4 +2406,176 @@ mod tests { .cast(&[DataType::DoubleDistribution, DataType::DoubleDistribution]) .is_err()); } + + #[test] + fn test_push_concrete_values() { + let mut points = Points::with_capacity( + 2, + [DataType::Integer, DataType::Double].into_iter(), + [MetricType::Gauge, MetricType::Gauge].into_iter(), + ) + .unwrap(); + + // Push a concrete value for the integer dimension + let from_ints = vec![Values { + values: ValueArray::Integer(vec![Some(1)]), + metric_type: MetricType::Gauge, + }]; + push_concrete_values(&mut points.values[..1], &from_ints, 0); + + // And another for the double dimension. + let from_doubles = vec![Values { + values: ValueArray::Double(vec![Some(2.0)]), + metric_type: MetricType::Gauge, + }]; + push_concrete_values(&mut points.values[1..], &from_doubles, 0); + + assert_eq!( + points.dimensionality(), + 2, + "Points should have 2 dimensions", + ); + let ints = points.values[0].values.as_integer().unwrap(); + assert_eq!( + ints.len(), + 1, + "Should have pushed one point in the first dimension" + ); + assert_eq!( + ints[0], + Some(1), + "Should have pushed 1 onto the first dimension" + ); + let doubles = points.values[1].values.as_double().unwrap(); + assert_eq!( + doubles.len(), + 1, + "Should have pushed one point in the second dimension" + ); + assert_eq!( + doubles[0], + Some(2.0), + "Should have pushed 2.0 onto the second dimension" + ); + } + + #[test] + fn test_join_point_arrays() { + let now = Utc::now(); + + // Create a set of integer points to join with. + // + // This will have two timestamps, one of which will match the points + // below that are merged in. + let int_points = Points { + start_times: None, + timestamps: vec![ + now - Duration::from_secs(3), + now - Duration::from_secs(2), + now, + ], + values: vec![Values { + values: ValueArray::Integer(vec![Some(1), Some(2), Some(3)]), + metric_type: MetricType::Gauge, + }], + }; + + // Create an additional set of double points. + // + // This also has two timepoints, one of which matches with the above, + // and one of which does not. + let double_points = Points { + start_times: None, + timestamps: vec![ + now - Duration::from_secs(3), + now - Duration::from_secs(1), + now, + ], + values: vec![Values { + values: ValueArray::Double(vec![ + Some(4.0), + Some(5.0), + Some(6.0), + ]), + metric_type: MetricType::Gauge, + }], + }; + + // Merge the arrays. + let merged = int_points.inner_join(&double_points).unwrap(); + + // Basic checks that we merged in the right values and have the right + // types and dimensions. + assert_eq!( + merged.dimensionality(), + 2, + "Should have appended the dimensions from each input array" + ); + assert_eq!(merged.len(), 2, "Should have merged two common points",); + assert_eq!( + merged.data_types().collect::>(), + &[DataType::Integer, DataType::Double], + "Should have combined the data types of the input arrays" + ); + assert_eq!( + merged.metric_types().collect::>(), + &[MetricType::Gauge, MetricType::Gauge], + "Should have combined the metric types of the input arrays" + ); + + // Check the actual values of the array. + let mut points = merged.iter_points(); + + // The first and last timepoint overlapped between the two arrays, so we + // should have both of them as concrete samples. + let pt = points.next().unwrap(); + assert_eq!(pt.start_time, None, "Gauges don't have a start time"); + assert_eq!( + *pt.timestamp, int_points.timestamps[0], + "Should have taken the first input timestamp from both arrays", + ); + assert_eq!( + *pt.timestamp, double_points.timestamps[0], + "Should have taken the first input timestamp from both arrays", + ); + let values = pt.values; + assert_eq!(values.len(), 2, "Should have 2 dimensions"); + assert_eq!( + &values[0], + &(Datum::Integer(Some(&1)), MetricType::Gauge), + "Should have pulled value from first integer array." + ); + assert_eq!( + &values[1], + &(Datum::Double(Some(&4.0)), MetricType::Gauge), + "Should have pulled value from second double array." + ); + + // And the next point + let pt = points.next().unwrap(); + assert_eq!(pt.start_time, None, "Gauges don't have a start time"); + assert_eq!( + *pt.timestamp, int_points.timestamps[2], + "Should have taken the input timestamp from both arrays", + ); + assert_eq!( + *pt.timestamp, double_points.timestamps[2], + "Should have taken the input timestamp from both arrays", + ); + let values = pt.values; + assert_eq!(values.len(), 2, "Should have 2 dimensions"); + assert_eq!( + &values[0], + &(Datum::Integer(Some(&3)), MetricType::Gauge), + "Should have pulled value from first integer array." + ); + assert_eq!( + &values[1], + &(Datum::Double(Some(&6.0)), MetricType::Gauge), + "Should have pulled value from second double array." + ); + + // And there should be no other values. + assert!(points.next().is_none(), "There should be no more points"); + } } diff --git a/oximeter/db/src/oxql/table.rs b/oximeter/oxql-types/src/table.rs similarity index 75% rename from oximeter/db/src/oxql/table.rs rename to oximeter/oxql-types/src/table.rs index 2cd141d2fa..f37992942f 100644 --- a/oximeter/db/src/oxql/table.rs +++ b/oximeter/oxql-types/src/table.rs @@ -6,14 +6,16 @@ // Copyright 2024 Oxide Computer Company -use super::point::DataType; -use super::point::MetricType; -use super::point::Points; -use super::query::Alignment; -use super::Error; -use crate::TimeseriesKey; +use crate::point::DataType; +use crate::point::MetricType; +use crate::point::Points; +use crate::point::ValueArray; +use crate::point::Values; +use crate::Alignment; +use anyhow::Error; use highway::HighwayHasher; -use oximeter::FieldValue; +use oximeter_types::schema::TimeseriesKey; +use oximeter_types::FieldValue; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; @@ -67,10 +69,20 @@ impl Timeseries { hasher.finish() } + /// Return the alignment of this timeseries, if any. + pub fn alignment(&self) -> Option { + self.alignment + } + + /// Set the alignment of this timeseries. + pub fn set_alignment(&mut self, alignment: Alignment) { + self.alignment = Some(alignment); + } + /// Return a copy of the timeseries, keeping only the provided fields. /// /// An error is returned if the timeseries does not contain those fields. - pub(crate) fn copy_with_fields( + pub fn copy_with_fields( &self, kept_fields: &[&str], ) -> Result { @@ -88,6 +100,20 @@ impl Timeseries { }) } + /// Return a copy of the timeseries, keeping only the provided points. + /// + /// Returns `None` if `kept_points` is empty. + pub fn copy_with_points(&self, kept_points: Points) -> Option { + if kept_points.is_empty() { + return None; + } + Some(Self { + fields: self.fields.clone(), + points: kept_points, + alignment: self.alignment, + }) + } + // Return `true` if the schema in `other` matches that of `self`. fn matches_schema(&self, other: &Timeseries) -> bool { if self.fields.len() != other.fields.len() { @@ -125,7 +151,7 @@ impl Timeseries { /// This returns an error if the points cannot be so cast, or the /// dimensionality of the types requested differs from the dimensionality of /// the points themselves. - pub(crate) fn cast(&self, types: &[DataType]) -> Result { + pub fn cast(&self, types: &[DataType]) -> Result { let fields = self.fields.clone(); Ok(Self { fields, @@ -133,6 +159,49 @@ impl Timeseries { alignment: self.alignment, }) } + + /// Return a new timeseries, with the points limited to the provided range. + pub fn limit(&self, start: usize, end: usize) -> Self { + let input_points = &self.points; + + // Slice the various data arrays. + let start_times = + input_points.start_times().map(|s| s[start..end].to_vec()); + let timestamps = input_points.timestamps()[start..end].to_vec(); + let values = input_points + .values + .iter() + .map(|vals| { + let values = match &vals.values { + ValueArray::Integer(inner) => { + ValueArray::Integer(inner[start..end].to_vec()) + } + ValueArray::Double(inner) => { + ValueArray::Double(inner[start..end].to_vec()) + } + ValueArray::Boolean(inner) => { + ValueArray::Boolean(inner[start..end].to_vec()) + } + ValueArray::String(inner) => { + ValueArray::String(inner[start..end].to_vec()) + } + ValueArray::IntegerDistribution(inner) => { + ValueArray::IntegerDistribution( + inner[start..end].to_vec(), + ) + } + ValueArray::DoubleDistribution(inner) => { + ValueArray::DoubleDistribution( + inner[start..end].to_vec(), + ) + } + }; + Values { values, metric_type: vals.metric_type } + }) + .collect(); + let points = Points::new(start_times, timestamps, values); + Self { fields: self.fields.clone(), points, alignment: self.alignment } + } } /// A table represents one or more timeseries with the same schema. @@ -146,7 +215,7 @@ pub struct Table { // // This starts as the name of the timeseries schema the data is derived // from, but can be modified as operations are done. - pub(super) name: String, + pub name: String, // The set of timeseries in the table, ordered by key. timeseries: BTreeMap, } diff --git a/oximeter/schema/Cargo.toml b/oximeter/schema/Cargo.toml new file mode 100644 index 0000000000..fe2e28705a --- /dev/null +++ b/oximeter/schema/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "oximeter-schema" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +chrono.workspace = true +clap.workspace = true +heck.workspace = true +omicron-workspace-hack.workspace = true +oximeter-types.workspace = true +prettyplease.workspace = true +proc-macro2.workspace = true +quote.workspace = true +schemars.workspace = true +serde.workspace = true +slog-error-chain.workspace = true +syn.workspace = true +toml.workspace = true diff --git a/oximeter/oximeter/src/bin/oximeter-schema.rs b/oximeter/schema/src/bin/oximeter-schema.rs similarity index 93% rename from oximeter/oximeter/src/bin/oximeter-schema.rs rename to oximeter/schema/src/bin/oximeter-schema.rs index 14fb31b1e8..5595a28639 100644 --- a/oximeter/oximeter/src/bin/oximeter-schema.rs +++ b/oximeter/schema/src/bin/oximeter-schema.rs @@ -9,7 +9,7 @@ use anyhow::Context as _; use clap::Parser; use clap::Subcommand; -use oximeter::schema::ir::TimeseriesDefinition; +use oximeter_schema::ir::TimeseriesDefinition; use std::num::NonZeroU8; use std::path::PathBuf; @@ -56,7 +56,7 @@ fn main() -> anyhow::Result<()> { println!("{def:#?}"); } Cmd::Schema { timeseries, version } => { - let schema = oximeter_impl::schema::ir::load_schema(&contents)?; + let schema = oximeter_schema::ir::load_schema(&contents)?; match (timeseries, version) { (None, None) => { for each in schema.into_iter() { @@ -87,7 +87,7 @@ fn main() -> anyhow::Result<()> { } } Cmd::Emit => { - let code = oximeter::schema::codegen::use_timeseries(&contents)?; + let code = oximeter_schema::codegen::use_timeseries(&contents)?; let formatted = prettyplease::unparse(&syn::parse_file(&format!("{code}"))?); println!("{formatted}"); diff --git a/oximeter/impl/src/schema/codegen.rs b/oximeter/schema/src/codegen.rs similarity index 73% rename from oximeter/impl/src/schema/codegen.rs rename to oximeter/schema/src/codegen.rs index 4778cf4970..1e6e352c15 100644 --- a/oximeter/impl/src/schema/codegen.rs +++ b/oximeter/schema/src/codegen.rs @@ -6,18 +6,18 @@ //! Generate Rust types and code from oximeter schema definitions. -use crate::schema::ir::find_schema_version; -use crate::schema::ir::load_schema; -use crate::schema::AuthzScope; -use crate::schema::FieldSource; -use crate::schema::Units; -use crate::DatumType; -use crate::FieldSchema; -use crate::FieldType; -use crate::MetricsError; -use crate::TimeseriesSchema; +use crate::ir::find_schema_version; +use crate::ir::load_schema; use chrono::prelude::DateTime; use chrono::prelude::Utc; +use oximeter_types::AuthzScope; +use oximeter_types::DatumType; +use oximeter_types::FieldSchema; +use oximeter_types::FieldSource; +use oximeter_types::FieldType; +use oximeter_types::MetricsError; +use oximeter_types::TimeseriesSchema; +use oximeter_types::Units; use proc_macro2::TokenStream; use quote::quote; @@ -34,7 +34,7 @@ pub fn use_timeseries(contents: &str) -> Result { let latest = find_schema_version(schema.iter().cloned(), None); let mod_name = quote::format_ident!("{}", latest[0].target_name()); let types = emit_schema_types(latest); - let func = emit_schema_function(schema.into_iter()); + let func = emit_schema_function(schema.iter()); Ok(quote! { pub mod #mod_name { #types @@ -43,9 +43,10 @@ pub fn use_timeseries(contents: &str) -> Result { }) } -fn emit_schema_function( - list: impl Iterator, +fn emit_schema_function<'a>( + list: impl Iterator, ) -> TokenStream { + let list = list.map(quote_timeseries_schema); quote! { pub fn timeseries_schema() -> Vec<::oximeter::schema::TimeseriesSchema> { vec![ @@ -310,66 +311,63 @@ fn emit_one(source: FieldSource, schema: &TimeseriesSchema) -> TokenStream { // This is used so that we can emit a function that will return the same data as // we parse from the TOML file with the timeseries definition, as a way to // export the definitions without needing that actual file at runtime. -impl quote::ToTokens for DatumType { - fn to_tokens(&self, tokens: &mut TokenStream) { - let toks = match self { - DatumType::Bool => quote! { ::oximeter::DatumType::Bool }, - DatumType::I8 => quote! { ::oximeter::DatumType::I8 }, - DatumType::U8 => quote! { ::oximeter::DatumType::U8 }, - DatumType::I16 => quote! { ::oximeter::DatumType::I16 }, - DatumType::U16 => quote! { ::oximeter::DatumType::U16 }, - DatumType::I32 => quote! { ::oximeter::DatumType::I32 }, - DatumType::U32 => quote! { ::oximeter::DatumType::U32 }, - DatumType::I64 => quote! { ::oximeter::DatumType::I64 }, - DatumType::U64 => quote! { ::oximeter::DatumType::U64 }, - DatumType::F32 => quote! { ::oximeter::DatumType::F32 }, - DatumType::F64 => quote! { ::oximeter::DatumType::F64 }, - DatumType::String => quote! { ::oximeter::DatumType::String }, - DatumType::Bytes => quote! { ::oximeter::DatumType::Bytes }, - DatumType::CumulativeI64 => { - quote! { ::oximeter::DatumType::CumulativeI64 } - } - DatumType::CumulativeU64 => { - quote! { ::oximeter::DatumType::CumulativeU64 } - } - DatumType::CumulativeF32 => { - quote! { ::oximeter::DatumType::CumulativeF32 } - } - DatumType::CumulativeF64 => { - quote! { ::oximeter::DatumType::CumulativeF64 } - } - DatumType::HistogramI8 => { - quote! { ::oximeter::DatumType::HistogramI8 } - } - DatumType::HistogramU8 => { - quote! { ::oximeter::DatumType::HistogramU8 } - } - DatumType::HistogramI16 => { - quote! { ::oximeter::DatumType::HistogramI16 } - } - DatumType::HistogramU16 => { - quote! { ::oximeter::DatumType::HistogramU16 } - } - DatumType::HistogramI32 => { - quote! { ::oximeter::DatumType::HistogramI32 } - } - DatumType::HistogramU32 => { - quote! { ::oximeter::DatumType::HistogramU32 } - } - DatumType::HistogramI64 => { - quote! { ::oximeter::DatumType::HistogramI64 } - } - DatumType::HistogramU64 => { - quote! { ::oximeter::DatumType::HistogramU64 } - } - DatumType::HistogramF32 => { - quote! { ::oximeter::DatumType::HistogramF32 } - } - DatumType::HistogramF64 => { - quote! { ::oximeter::DatumType::HistogramF64 } - } - }; - toks.to_tokens(tokens); +fn quote_datum_type(datum_type: DatumType) -> TokenStream { + match datum_type { + DatumType::Bool => quote! { ::oximeter::DatumType::Bool }, + DatumType::I8 => quote! { ::oximeter::DatumType::I8 }, + DatumType::U8 => quote! { ::oximeter::DatumType::U8 }, + DatumType::I16 => quote! { ::oximeter::DatumType::I16 }, + DatumType::U16 => quote! { ::oximeter::DatumType::U16 }, + DatumType::I32 => quote! { ::oximeter::DatumType::I32 }, + DatumType::U32 => quote! { ::oximeter::DatumType::U32 }, + DatumType::I64 => quote! { ::oximeter::DatumType::I64 }, + DatumType::U64 => quote! { ::oximeter::DatumType::U64 }, + DatumType::F32 => quote! { ::oximeter::DatumType::F32 }, + DatumType::F64 => quote! { ::oximeter::DatumType::F64 }, + DatumType::String => quote! { ::oximeter::DatumType::String }, + DatumType::Bytes => quote! { ::oximeter::DatumType::Bytes }, + DatumType::CumulativeI64 => { + quote! { ::oximeter::DatumType::CumulativeI64 } + } + DatumType::CumulativeU64 => { + quote! { ::oximeter::DatumType::CumulativeU64 } + } + DatumType::CumulativeF32 => { + quote! { ::oximeter::DatumType::CumulativeF32 } + } + DatumType::CumulativeF64 => { + quote! { ::oximeter::DatumType::CumulativeF64 } + } + DatumType::HistogramI8 => { + quote! { ::oximeter::DatumType::HistogramI8 } + } + DatumType::HistogramU8 => { + quote! { ::oximeter::DatumType::HistogramU8 } + } + DatumType::HistogramI16 => { + quote! { ::oximeter::DatumType::HistogramI16 } + } + DatumType::HistogramU16 => { + quote! { ::oximeter::DatumType::HistogramU16 } + } + DatumType::HistogramI32 => { + quote! { ::oximeter::DatumType::HistogramI32 } + } + DatumType::HistogramU32 => { + quote! { ::oximeter::DatumType::HistogramU32 } + } + DatumType::HistogramI64 => { + quote! { ::oximeter::DatumType::HistogramI64 } + } + DatumType::HistogramU64 => { + quote! { ::oximeter::DatumType::HistogramU64 } + } + DatumType::HistogramF32 => { + quote! { ::oximeter::DatumType::HistogramF32 } + } + DatumType::HistogramF64 => { + quote! { ::oximeter::DatumType::HistogramF64 } + } } } @@ -452,55 +450,46 @@ fn emit_rust_type_for_field(field_type: FieldType) -> TokenStream { } } -impl quote::ToTokens for FieldSource { - fn to_tokens(&self, tokens: &mut TokenStream) { - let toks = match self { - FieldSource::Target => { - quote! { ::oximeter::schema::FieldSource::Target } - } - FieldSource::Metric => { - quote! { ::oximeter::schema::FieldSource::Metric } - } - }; - toks.to_tokens(tokens); +fn quote_field_source(source: FieldSource) -> TokenStream { + match source { + FieldSource::Target => { + quote! { ::oximeter::schema::FieldSource::Target } + } + FieldSource::Metric => { + quote! { ::oximeter::schema::FieldSource::Metric } + } } } -impl quote::ToTokens for FieldType { - fn to_tokens(&self, tokens: &mut TokenStream) { - let toks = match self { - FieldType::String => quote! { ::oximeter::FieldType::String }, - FieldType::I8 => quote! { ::oximeter::FieldType::I8 }, - FieldType::U8 => quote! { ::oximeter::FieldType::U8 }, - FieldType::I16 => quote! { ::oximeter::FieldType::I16 }, - FieldType::U16 => quote! { ::oximeter::FieldType::U16 }, - FieldType::I32 => quote! { ::oximeter::FieldType::I32 }, - FieldType::U32 => quote! { ::oximeter::FieldType::U32 }, - FieldType::I64 => quote! { ::oximeter::FieldType::I64 }, - FieldType::U64 => quote! { ::oximeter::FieldType::U64 }, - FieldType::IpAddr => quote! { ::oximeter::FieldType::IpAddr }, - FieldType::Uuid => quote! { ::oximeter::FieldType::Uuid }, - FieldType::Bool => quote! { ::oximeter::FieldType::Bool }, - }; - toks.to_tokens(tokens); +fn quote_field_type(field_type: FieldType) -> TokenStream { + match field_type { + FieldType::String => quote! { ::oximeter::FieldType::String }, + FieldType::I8 => quote! { ::oximeter::FieldType::I8 }, + FieldType::U8 => quote! { ::oximeter::FieldType::U8 }, + FieldType::I16 => quote! { ::oximeter::FieldType::I16 }, + FieldType::U16 => quote! { ::oximeter::FieldType::U16 }, + FieldType::I32 => quote! { ::oximeter::FieldType::I32 }, + FieldType::U32 => quote! { ::oximeter::FieldType::U32 }, + FieldType::I64 => quote! { ::oximeter::FieldType::I64 }, + FieldType::U64 => quote! { ::oximeter::FieldType::U64 }, + FieldType::IpAddr => quote! { ::oximeter::FieldType::IpAddr }, + FieldType::Uuid => quote! { ::oximeter::FieldType::Uuid }, + FieldType::Bool => quote! { ::oximeter::FieldType::Bool }, } } -impl quote::ToTokens for AuthzScope { - fn to_tokens(&self, tokens: &mut TokenStream) { - let toks = match self { - AuthzScope::Fleet => { - quote! { ::oximeter::schema::AuthzScope::Fleet } - } - AuthzScope::Silo => quote! { ::oximeter::schema::AuthzScope::Silo }, - AuthzScope::Project => { - quote! { ::oximeter::schema::AuthzScope::Project } - } - AuthzScope::ViewableToAll => { - quote! { ::oximeter::schema::AuthzScope::ViewableToAll } - } - }; - toks.to_tokens(tokens); +fn quote_authz_scope(authz_scope: AuthzScope) -> TokenStream { + match authz_scope { + AuthzScope::Fleet => { + quote! { ::oximeter::schema::AuthzScope::Fleet } + } + AuthzScope::Silo => quote! { ::oximeter::schema::AuthzScope::Silo }, + AuthzScope::Project => { + quote! { ::oximeter::schema::AuthzScope::Project } + } + AuthzScope::ViewableToAll => { + quote! { ::oximeter::schema::AuthzScope::ViewableToAll } + } } } @@ -512,85 +501,80 @@ fn quote_creation_time(created: DateTime) -> TokenStream { } } -impl quote::ToTokens for Units { - fn to_tokens(&self, tokens: &mut TokenStream) { - let toks = match self { - Units::None => quote! { ::oximeter::schema::Units::None }, - Units::Count => quote! { ::oximeter::schema::Units::Count }, - Units::Bytes => quote! { ::oximeter::schema::Units::Bytes }, - Units::Seconds => quote! { ::oximeter::schema::Units::Seconds }, - Units::Nanoseconds => { - quote! { ::oximeter::schema::Units::Nanoseconds } - } - Units::Amps => quote! { ::oximeter::schema::Units::Amps }, - Units::Volts => quote! { ::oximeter::schema::Units::Volts }, - Units::DegreesCelcius => { - quote! { ::oximeter::schema::Units::DegreesCelcius } - } - Units::Rpm => quote! { ::oximeter::schema::Units::Rpm }, - }; - toks.to_tokens(tokens); +fn quote_units(units: Units) -> TokenStream { + match units { + Units::None => quote! { ::oximeter::schema::Units::None }, + Units::Count => quote! { ::oximeter::schema::Units::Count }, + Units::Bytes => quote! { ::oximeter::schema::Units::Bytes }, + Units::Seconds => quote! { ::oximeter::schema::Units::Seconds }, + Units::Nanoseconds => { + quote! { ::oximeter::schema::Units::Nanoseconds } + } + Units::Amps => quote! { ::oximeter::schema::Units::Amps }, + Units::Volts => quote! { ::oximeter::schema::Units::Volts }, + Units::Watts => quote! { ::oximeter::schema::Units::Watts }, + Units::DegreesCelsius => { + quote! { ::oximeter::schema::Units::DegreesCelsius } + } + Units::Rpm => quote! { ::oximeter::schema::Units::Rpm }, } } -impl quote::ToTokens for FieldSchema { - fn to_tokens(&self, tokens: &mut TokenStream) { - let name = self.name.as_str(); - let field_type = self.field_type; - let source = self.source; - let description = self.description.as_str(); - let toks = quote! { - ::oximeter::FieldSchema { - name: String::from(#name), - field_type: #field_type, - source: #source, - description: String::from(#description), - } - }; - toks.to_tokens(tokens); +fn quote_field_schema(field_schema: &FieldSchema) -> TokenStream { + let name = field_schema.name.as_str(); + let field_type = quote_field_type(field_schema.field_type); + let source = quote_field_source(field_schema.source); + let description = field_schema.description.as_str(); + quote! { + ::oximeter::FieldSchema { + name: String::from(#name), + field_type: #field_type, + source: #source, + description: String::from(#description), + } } } -impl quote::ToTokens for TimeseriesSchema { - fn to_tokens(&self, tokens: &mut TokenStream) { - let field_schema = &self.field_schema; - let timeseries_name = self.timeseries_name.to_string(); - let target_description = self.description.target.as_str(); - let metric_description = self.description.metric.as_str(); - let authz_scope = self.authz_scope; - let units = self.units; - let datum_type = self.datum_type; - let ver = self.version.get(); - let version = quote! { ::core::num::NonZeroU8::new(#ver).unwrap() }; - let created = quote_creation_time(self.created); - let toks = quote! { - ::oximeter::schema::TimeseriesSchema { - timeseries_name: - <::oximeter::TimeseriesName as ::std::convert::TryFrom<&str>>::try_from( - #timeseries_name - ).unwrap(), - description: ::oximeter::schema::TimeseriesDescription { - target: String::from(#target_description), - metric: String::from(#metric_description), - }, - authz_scope: #authz_scope, - units: #units, - field_schema: ::std::collections::BTreeSet::from([ - #(#field_schema),* - ]), - datum_type: #datum_type, - version: #version, - created: #created, - } - }; - toks.to_tokens(tokens); +fn quote_timeseries_schema( + timeseries_schema: &TimeseriesSchema, +) -> TokenStream { + let field_schema = + timeseries_schema.field_schema.iter().map(quote_field_schema); + let timeseries_name = timeseries_schema.timeseries_name.to_string(); + let target_description = timeseries_schema.description.target.as_str(); + let metric_description = timeseries_schema.description.metric.as_str(); + let authz_scope = quote_authz_scope(timeseries_schema.authz_scope); + let units = quote_units(timeseries_schema.units); + let datum_type = quote_datum_type(timeseries_schema.datum_type); + let ver = timeseries_schema.version.get(); + let version = quote! { ::core::num::NonZeroU8::new(#ver).unwrap() }; + let created = quote_creation_time(timeseries_schema.created); + quote! { + ::oximeter::schema::TimeseriesSchema { + timeseries_name: + <::oximeter::TimeseriesName as ::std::convert::TryFrom<&str>>::try_from( + #timeseries_name + ).unwrap(), + description: ::oximeter::schema::TimeseriesDescription { + target: String::from(#target_description), + metric: String::from(#metric_description), + }, + authz_scope: #authz_scope, + units: #units, + field_schema: ::std::collections::BTreeSet::from([ + #(#field_schema),* + ]), + datum_type: #datum_type, + version: #version, + created: #created, + } } } #[cfg(test)] mod tests { use super::*; - use crate::schema::TimeseriesDescription; + use oximeter_types::TimeseriesDescription; use std::{collections::BTreeSet, num::NonZeroU8}; #[test] diff --git a/oximeter/impl/src/schema/ir.rs b/oximeter/schema/src/ir.rs similarity index 99% rename from oximeter/impl/src/schema/ir.rs rename to oximeter/schema/src/ir.rs index f7a209294f..370236000a 100644 --- a/oximeter/impl/src/schema/ir.rs +++ b/oximeter/schema/src/ir.rs @@ -11,17 +11,17 @@ //! inspected or used to generate code that contains the equivalent Rust types //! and trait implementations. -use crate::schema::AuthzScope; -use crate::schema::DatumType; -use crate::schema::FieldSource; -use crate::schema::FieldType; -use crate::schema::TimeseriesDescription; -use crate::schema::Units; -use crate::FieldSchema; -use crate::MetricsError; -use crate::TimeseriesName; -use crate::TimeseriesSchema; use chrono::Utc; +use oximeter_types::AuthzScope; +use oximeter_types::DatumType; +use oximeter_types::FieldSchema; +use oximeter_types::FieldSource; +use oximeter_types::FieldType; +use oximeter_types::MetricsError; +use oximeter_types::TimeseriesDescription; +use oximeter_types::TimeseriesName; +use oximeter_types::TimeseriesSchema; +use oximeter_types::Units; use serde::Deserialize; use std::collections::btree_map::Entry; use std::collections::BTreeMap; diff --git a/oximeter/schema/src/lib.rs b/oximeter/schema/src/lib.rs new file mode 100644 index 0000000000..b1ce73a940 --- /dev/null +++ b/oximeter/schema/src/lib.rs @@ -0,0 +1,12 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2024 Oxide Computer Company + +//! Tools for working with schemas for fields and timeseries. +//! +//! The actual schema type definitions are in [`oximeter_types::schema`]. + +pub mod codegen; +pub mod ir; diff --git a/oximeter/test-utils/Cargo.toml b/oximeter/test-utils/Cargo.toml new file mode 100644 index 0000000000..f463e74aca --- /dev/null +++ b/oximeter/test-utils/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "oximeter-test-utils" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +chrono.workspace = true +omicron-workspace-hack.workspace = true +oximeter-macro-impl.workspace = true +oximeter-types.workspace = true +uuid.workspace = true diff --git a/oximeter/test-utils/src/lib.rs b/oximeter/test-utils/src/lib.rs new file mode 100644 index 0000000000..04c49add65 --- /dev/null +++ b/oximeter/test-utils/src/lib.rs @@ -0,0 +1,295 @@ +// Copyright 2024 Oxide Computer Company + +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Utilities for testing the oximeter crate. + +// Export the current crate as `oximeter`. The macros defined in `oximeter-macro-impl` generate +// code referring to symbols like `oximeter::traits::Target`. In consumers of this crate, that's +// fine, but internally there _is_ no crate named `oximeter`, it's just `self` or `crate`. +// +// See https://github.com/rust-lang/rust/pull/55275 for the PR introducing this fix, which links to +// lots of related issues and discussion. +extern crate self as oximeter; + +use oximeter_macro_impl::{Metric, Target}; +use oximeter_types::histogram; +use oximeter_types::histogram::{Histogram, Record}; +use oximeter_types::traits; +use oximeter_types::types::{ + Cumulative, Datum, DatumType, FieldType, FieldValue, Measurement, Sample, +}; +use oximeter_types::{Metric, Target}; +use uuid::Uuid; + +#[derive(Target)] +pub struct TestTarget { + pub name1: String, + pub name2: String, + pub num: i64, +} + +impl Default for TestTarget { + fn default() -> Self { + TestTarget { + name1: "first_name".into(), + name2: "second_name".into(), + num: 0, + } + } +} + +#[derive(Metric)] +pub struct TestMetric { + pub id: Uuid, + pub good: bool, + pub datum: i64, +} + +#[derive(Metric)] +pub struct TestCumulativeMetric { + pub id: Uuid, + pub good: bool, + pub datum: Cumulative, +} + +#[derive(Metric)] +pub struct TestHistogram { + pub id: Uuid, + pub good: bool, + pub datum: Histogram, +} + +const ID: Uuid = uuid::uuid!("e00ced4d-39d1-446a-ae85-a67f05c9750b"); + +pub fn make_sample() -> Sample { + let target = TestTarget::default(); + let metric = TestMetric { id: ID, good: true, datum: 1 }; + Sample::new(&target, &metric).unwrap() +} + +pub fn make_missing_sample() -> Sample { + let target = TestTarget::default(); + let metric = TestMetric { id: ID, good: true, datum: 1 }; + Sample::new_missing(&target, &metric).unwrap() +} + +pub fn make_hist_sample() -> Sample { + let target = TestTarget::default(); + let mut hist = histogram::Histogram::new(&[0.0, 5.0, 10.0]).unwrap(); + hist.sample(1.0).unwrap(); + hist.sample(2.0).unwrap(); + hist.sample(6.0).unwrap(); + let metric = TestHistogram { id: ID, good: true, datum: hist }; + Sample::new(&target, &metric).unwrap() +} + +/// A target identifying a single virtual machine instance +#[derive(Debug, Clone, Copy, oximeter::Target)] +pub struct VirtualMachine { + pub project_id: Uuid, + pub instance_id: Uuid, +} + +/// A metric recording the total time a vCPU is busy, by its ID +#[derive(Debug, Clone, Copy, oximeter::Metric)] +pub struct CpuBusy { + cpu_id: i64, + datum: Cumulative, +} + +pub fn generate_test_samples( + n_projects: usize, + n_instances: usize, + n_cpus: usize, + n_samples: usize, +) -> Vec { + let n_timeseries = n_projects * n_instances * n_cpus; + let mut samples = Vec::with_capacity(n_samples * n_timeseries); + for _ in 0..n_projects { + let project_id = Uuid::new_v4(); + for _ in 0..n_instances { + let vm = VirtualMachine { project_id, instance_id: Uuid::new_v4() }; + for cpu in 0..n_cpus { + for sample in 0..n_samples { + let cpu_busy = CpuBusy { + cpu_id: cpu as _, + datum: Cumulative::new(sample as f64), + }; + let sample = Sample::new(&vm, &cpu_busy).unwrap(); + samples.push(sample); + } + } + } + } + samples +} + +#[cfg(test)] +mod tests { + use chrono::Utc; + use oximeter_types::{ + schema::{ + default_schema_version, AuthzScope, FieldSchema, FieldSource, + TimeseriesSchema, Units, + }, + TimeseriesName, + }; + + use super::*; + + #[test] + fn test_gen_test_samples() { + let (n_projects, n_instances, n_cpus, n_samples) = (2, 2, 2, 2); + let samples = + generate_test_samples(n_projects, n_instances, n_cpus, n_samples); + assert_eq!( + samples.len(), + n_projects * n_instances * n_cpus * n_samples + ); + } + + #[test] + fn test_sample_struct() { + let t = TestTarget::default(); + let m = TestMetric { id: Uuid::new_v4(), good: true, datum: 1i64 }; + let sample = Sample::new(&t, &m).unwrap(); + assert_eq!( + sample.timeseries_name, + format!("{}:{}", t.name(), m.name()) + ); + assert!(sample.measurement.start_time().is_none()); + assert_eq!(sample.measurement.datum(), &Datum::from(1i64)); + + let m = TestCumulativeMetric { + id: Uuid::new_v4(), + good: true, + datum: 1i64.into(), + }; + let sample = Sample::new(&t, &m).unwrap(); + assert!(sample.measurement.start_time().is_some()); + } + + #[derive(Target)] + struct MyTarget { + id: Uuid, + name: String, + } + + const ID: Uuid = uuid::uuid!("ca565ef4-65dc-4ab0-8622-7be43ed72105"); + + impl Default for MyTarget { + fn default() -> Self { + Self { id: ID, name: String::from("name") } + } + } + + #[derive(Metric)] + struct MyMetric { + happy: bool, + datum: u64, + } + + impl Default for MyMetric { + fn default() -> Self { + Self { happy: true, datum: 0 } + } + } + + #[test] + fn test_timeseries_schema_from_parts() { + let target = MyTarget::default(); + let metric = MyMetric::default(); + let schema = TimeseriesSchema::new(&target, &metric).unwrap(); + + assert_eq!(schema.timeseries_name, "my_target:my_metric"); + let f = schema.schema_for_field("id").unwrap(); + assert_eq!(f.name, "id"); + assert_eq!(f.field_type, FieldType::Uuid); + assert_eq!(f.source, FieldSource::Target); + + let f = schema.schema_for_field("name").unwrap(); + assert_eq!(f.name, "name"); + assert_eq!(f.field_type, FieldType::String); + assert_eq!(f.source, FieldSource::Target); + + let f = schema.schema_for_field("happy").unwrap(); + assert_eq!(f.name, "happy"); + assert_eq!(f.field_type, FieldType::Bool); + assert_eq!(f.source, FieldSource::Metric); + assert_eq!(schema.datum_type, DatumType::U64); + } + + #[test] + fn test_timeseries_schema_from_sample() { + let target = MyTarget::default(); + let metric = MyMetric::default(); + let sample = Sample::new(&target, &metric).unwrap(); + let schema = TimeseriesSchema::new(&target, &metric).unwrap(); + let schema_from_sample = TimeseriesSchema::from(&sample); + assert_eq!(schema, schema_from_sample); + } + + // Test that we correctly order field across a target and metric. + // + // In an earlier commit, we switched from storing fields in an unordered Vec + // to using a BTree{Map,Set} to ensure ordering by name. However, the + // `TimeseriesSchema` type stored all its fields by chaining the sorted + // fields from the target and metric, without then sorting _across_ them. + // + // This was exacerbated by the error reporting, where we did in fact sort + // all fields across the target and metric, making it difficult to tell how + // the derived schema was different, if at all. + // + // This test generates a sample with a schema where the target and metric + // fields are sorted within them, but not across them. We check that the + // derived schema are actually equal, which means we've imposed that + // ordering when deriving the schema. + #[test] + fn test_schema_field_ordering_across_target_metric() { + let target_field = FieldSchema { + name: String::from("later"), + field_type: FieldType::U64, + source: FieldSource::Target, + description: String::new(), + }; + let metric_field = FieldSchema { + name: String::from("earlier"), + field_type: FieldType::U64, + source: FieldSource::Metric, + description: String::new(), + }; + let timeseries_name: TimeseriesName = "foo:bar".parse().unwrap(); + let datum_type = DatumType::U64; + let field_schema = + [target_field.clone(), metric_field.clone()].into_iter().collect(); + let expected_schema = TimeseriesSchema { + timeseries_name, + description: Default::default(), + field_schema, + datum_type, + version: default_schema_version(), + authz_scope: AuthzScope::Fleet, + units: Units::Count, + created: Utc::now(), + }; + + #[derive(oximeter::Target)] + struct Foo { + later: u64, + } + #[derive(oximeter::Metric)] + struct Bar { + earlier: u64, + datum: u64, + } + + let target = Foo { later: 1 }; + let metric = Bar { earlier: 2, datum: 10 }; + let sample = Sample::new(&target, &metric).unwrap(); + let derived_schema = TimeseriesSchema::from(&sample); + assert_eq!(derived_schema, expected_schema); + } +} diff --git a/oximeter/timeseries-macro/Cargo.toml b/oximeter/timeseries-macro/Cargo.toml index db591aed06..2fb8b8f312 100644 --- a/oximeter/timeseries-macro/Cargo.toml +++ b/oximeter/timeseries-macro/Cargo.toml @@ -8,7 +8,8 @@ proc-macro = true [dependencies] omicron-workspace-hack.workspace = true -oximeter-impl.workspace = true +oximeter-schema.workspace = true +oximeter-types.workspace = true proc-macro2.workspace = true quote.workspace = true syn.workspace = true diff --git a/oximeter/timeseries-macro/src/lib.rs b/oximeter/timeseries-macro/src/lib.rs index 317a8533a4..12ec2cc417 100644 --- a/oximeter/timeseries-macro/src/lib.rs +++ b/oximeter/timeseries-macro/src/lib.rs @@ -8,7 +8,7 @@ extern crate proc_macro; -use oximeter_impl::schema::SCHEMA_DIRECTORY; +use oximeter_types::schema::SCHEMA_DIRECTORY; /// Generate code to use the timeseries from one target. /// @@ -45,7 +45,7 @@ pub fn use_timeseries( .into(); } }; - match oximeter_impl::schema::codegen::use_timeseries(&contents) { + match oximeter_schema::codegen::use_timeseries(&contents) { Ok(toks) => { let path_ = path.display().to_string(); return quote::quote! { @@ -59,9 +59,8 @@ pub fn use_timeseries( Err(e) => { let msg = format!( "Failed to generate timeseries types \ - from '{}': {:?}", + from '{}': {e}", path.display(), - e, ); return syn::Error::new(token.span(), msg) .into_compile_error() diff --git a/oximeter/impl/Cargo.toml b/oximeter/types/Cargo.toml similarity index 78% rename from oximeter/impl/Cargo.toml rename to oximeter/types/Cargo.toml index 91277d9d47..6d6bbc07e6 100644 --- a/oximeter/impl/Cargo.toml +++ b/oximeter/types/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "oximeter-impl" +name = "oximeter-types" version = "0.1.0" edition = "2021" license = "MPL-2.0" @@ -11,22 +11,13 @@ workspace = true bytes = { workspace = true, features = [ "serde" ] } chrono.workspace = true float-ord.workspace = true -heck.workspace = true num.workspace = true omicron-common.workspace = true omicron-workspace-hack.workspace = true -oximeter-macro-impl.workspace = true -prettyplease.workspace = true -proc-macro2.workspace = true -quote.workspace = true regex.workspace = true schemars = { workspace = true, features = [ "uuid1", "bytes", "chrono" ] } serde.workspace = true -serde_json.workspace = true -slog-error-chain.workspace = true strum.workspace = true -syn.workspace = true -toml.workspace = true thiserror.workspace = true uuid.workspace = true @@ -34,6 +25,7 @@ uuid.workspace = true approx.workspace = true # For benchmark criterion.workspace = true +oximeter-macro-impl.workspace = true rand = { workspace = true, features = ["std_rng"] } rand_distr.workspace = true rstest.workspace = true diff --git a/oximeter/impl/benches/quantile.rs b/oximeter/types/benches/quantile.rs similarity index 97% rename from oximeter/impl/benches/quantile.rs rename to oximeter/types/benches/quantile.rs index 4540ba8f6a..b88cb211e6 100644 --- a/oximeter/impl/benches/quantile.rs +++ b/oximeter/types/benches/quantile.rs @@ -8,7 +8,7 @@ // Copyright 2024 Oxide Computer Company use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; -use oximeter_impl::Quantile; +use oximeter_types::Quantile; use rand_distr::{Distribution, Normal}; /// Emulates baseline code in a Python implementation of the P² diff --git a/oximeter/impl/src/histogram.rs b/oximeter/types/src/histogram.rs similarity index 97% rename from oximeter/impl/src/histogram.rs rename to oximeter/types/src/histogram.rs index 40df0a1b41..2a4feab382 100644 --- a/oximeter/impl/src/histogram.rs +++ b/oximeter/types/src/histogram.rs @@ -523,9 +523,9 @@ where /// Example /// ------- /// ```rust - /// # // Rename the impl crate so the doctests can refer to the public + /// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. - /// # use oximeter_impl as oximeter; + /// # use oximeter_types as oximeter; /// use oximeter::histogram::Histogram; /// /// let hist = Histogram::with_bins(&[(0..10).into(), (10..100).into()]).unwrap(); @@ -922,9 +922,9 @@ where /// ------- /// /// ```rust - /// # // Rename the impl crate so the doctests can refer to the public + /// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. - /// # use oximeter_impl as oximeter; + /// # use oximeter_types as oximeter; /// use oximeter::histogram::{Histogram, BinRange}; /// use std::ops::{RangeBounds, Bound}; /// @@ -1029,8 +1029,13 @@ where return Err(QuantizationError::InvalidSteps); } - // The highest power must be representable in the target type. - if self.checked_pow(hi.into()).is_none() { + // The highest power must be representable in the target type. Note that + // we have to convert to that target type _before_ doing this check. + let base = >::from(*self); + let Some(highest) = base.checked_pow(hi.into()) else { + return Err(QuantizationError::Overflow); + }; + if ::from(highest).is_none() { return Err(QuantizationError::Overflow); } @@ -1039,7 +1044,6 @@ where // // Note that we unwrap in a few places below, where we're sure the // narrowing conversion cannot fail, such as to a u32. - let base = >::from(*self); let lo = >::from(lo); let hi = >::from(hi); let count = ::from(count.get()) @@ -1057,7 +1061,6 @@ where let lo = base.pow(lo as _); let hi = base.pow(hi as _); let distance = hi - lo; - dbg!(distance, count); distance.is_multiple_of(&count) }) } @@ -1767,4 +1770,31 @@ mod tests { HistogramError::EmptyBins )); } + + #[test] + fn test_log_linear_bins_does_not_overflow_wide_bin_type() { + let start: u16 = 3; + // 10u16 ** 10u16 overflows, but what we should be computing is 10u64 ** + // 10u16, which would not overflow. We need to compute whether it + // overflows in the _support_ type. + let stop = 10; + Histogram::::span_decades(start, stop).expect( + "expected not to overflow, since support type is wide enough", + ); + } + + #[test] + fn test_log_linear_bins_does_overflow_narrow_bin_type() { + // In this case, the start / stop powers _and_ their resulting bins are + // both representable as u16s and also u64s. But we're generating bins + // that are u8s, which _the powers do_ overflow. + let start: u16 = 1; + let stop: u16 = 4; + Histogram::::span_decades(start, stop).expect( + "expected not to overflow a u32, since support type is wide enough", + ); + Histogram::::span_decades(start, stop).expect_err( + "expected to overflow a u8, since support type is not wide enough", + ); + } } diff --git a/oximeter/impl/src/lib.rs b/oximeter/types/src/lib.rs similarity index 92% rename from oximeter/impl/src/lib.rs rename to oximeter/types/src/lib.rs index 5acbeb9422..7a1a480f8d 100644 --- a/oximeter/impl/src/lib.rs +++ b/oximeter/types/src/lib.rs @@ -4,8 +4,6 @@ // Copyright 2024 Oxide Computer Company -pub use oximeter_macro_impl::*; - // Export the current crate as `oximeter`. The macros defined in `oximeter-macro-impl` generate // code referring to symbols like `oximeter::traits::Target`. In consumers of this crate, that's // fine, but internally there _is_ no crate named `oximeter`, it's just `self` or `crate`. @@ -17,15 +15,18 @@ extern crate self as oximeter; pub mod histogram; pub mod quantile; pub mod schema; -pub mod test_util; pub mod traits; pub mod types; pub use quantile::Quantile; pub use quantile::QuantileError; +pub use schema::AuthzScope; pub use schema::FieldSchema; +pub use schema::FieldSource; +pub use schema::TimeseriesDescription; pub use schema::TimeseriesName; pub use schema::TimeseriesSchema; +pub use schema::Units; pub use traits::Metric; pub use traits::Producer; pub use traits::Target; diff --git a/oximeter/impl/src/quantile.rs b/oximeter/types/src/quantile.rs similarity index 97% rename from oximeter/impl/src/quantile.rs rename to oximeter/types/src/quantile.rs index fafe9c9ece..40777217e5 100644 --- a/oximeter/impl/src/quantile.rs +++ b/oximeter/types/src/quantile.rs @@ -78,9 +78,9 @@ impl Quantile { /// # Examples /// /// ``` - /// # // Rename the impl crate so the doctests can refer to the public + /// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. - /// # use oximeter_impl as oximeter; + /// # use oximeter_types as oximeter; /// use oximeter::Quantile; /// let q = Quantile::new(0.5).unwrap(); /// @@ -116,9 +116,9 @@ impl Quantile { /// /// # Examples /// ``` - /// # // Rename the impl crate so the doctests can refer to the public + /// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. - /// # use oximeter_impl as oximeter; + /// # use oximeter_types as oximeter; /// use oximeter::Quantile; /// let q = Quantile::from_parts( /// 0.5, @@ -200,9 +200,9 @@ impl Quantile { /// # Examples /// /// ``` - /// # // Rename the impl crate so the doctests can refer to the public + /// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. - /// # use oximeter_impl as oximeter; + /// # use oximeter_types as oximeter; /// use oximeter::Quantile; /// let mut q = Quantile::new(0.5).unwrap(); /// for o in 1..=100 { @@ -243,9 +243,9 @@ impl Quantile { /// # Examples /// /// ``` - /// # // Rename the impl crate so the doctests can refer to the public + /// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. - /// # use oximeter_impl as oximeter; + /// # use oximeter_types as oximeter; /// use oximeter::Quantile; /// let mut q = Quantile::new(0.9).unwrap(); /// q.append(10).unwrap(); diff --git a/oximeter/impl/src/schema/mod.rs b/oximeter/types/src/schema.rs similarity index 75% rename from oximeter/impl/src/schema/mod.rs rename to oximeter/types/src/schema.rs index 250604d7be..135c77462a 100644 --- a/oximeter/impl/src/schema/mod.rs +++ b/oximeter/types/src/schema.rs @@ -6,9 +6,6 @@ //! Tools for working with schema for fields and timeseries. -pub mod codegen; -pub mod ir; - use crate::types::DatumType; use crate::types::FieldType; use crate::types::MetricsError; @@ -31,6 +28,8 @@ use std::num::NonZeroU8; pub const SCHEMA_DIRECTORY: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/../oximeter/schema"); +pub type TimeseriesKey = u64; + /// The name and type information for a field of a timeseries schema. #[derive( Clone, @@ -190,7 +189,8 @@ pub enum Units { Nanoseconds, Volts, Amps, - DegreesCelcius, + Watts, + DegreesCelsius, /// Rotations per minute. Rpm, } @@ -402,7 +402,6 @@ pub enum AuthzScope { mod tests { use super::*; use std::convert::TryFrom; - use uuid::Uuid; #[test] fn test_timeseries_name() { @@ -426,127 +425,6 @@ mod tests { assert!(TimeseriesName::try_from("x.a:b").is_err()); } - #[derive(Target)] - struct MyTarget { - id: Uuid, - name: String, - } - - const ID: Uuid = uuid::uuid!("ca565ef4-65dc-4ab0-8622-7be43ed72105"); - - impl Default for MyTarget { - fn default() -> Self { - Self { id: ID, name: String::from("name") } - } - } - - #[derive(Metric)] - struct MyMetric { - happy: bool, - datum: u64, - } - - impl Default for MyMetric { - fn default() -> Self { - Self { happy: true, datum: 0 } - } - } - - #[test] - fn test_timeseries_schema_from_parts() { - let target = MyTarget::default(); - let metric = MyMetric::default(); - let schema = TimeseriesSchema::new(&target, &metric).unwrap(); - - assert_eq!(schema.timeseries_name, "my_target:my_metric"); - let f = schema.schema_for_field("id").unwrap(); - assert_eq!(f.name, "id"); - assert_eq!(f.field_type, FieldType::Uuid); - assert_eq!(f.source, FieldSource::Target); - - let f = schema.schema_for_field("name").unwrap(); - assert_eq!(f.name, "name"); - assert_eq!(f.field_type, FieldType::String); - assert_eq!(f.source, FieldSource::Target); - - let f = schema.schema_for_field("happy").unwrap(); - assert_eq!(f.name, "happy"); - assert_eq!(f.field_type, FieldType::Bool); - assert_eq!(f.source, FieldSource::Metric); - assert_eq!(schema.datum_type, DatumType::U64); - } - - #[test] - fn test_timeseries_schema_from_sample() { - let target = MyTarget::default(); - let metric = MyMetric::default(); - let sample = Sample::new(&target, &metric).unwrap(); - let schema = TimeseriesSchema::new(&target, &metric).unwrap(); - let schema_from_sample = TimeseriesSchema::from(&sample); - assert_eq!(schema, schema_from_sample); - } - - // Test that we correctly order field across a target and metric. - // - // In an earlier commit, we switched from storing fields in an unordered Vec - // to using a BTree{Map,Set} to ensure ordering by name. However, the - // `TimeseriesSchema` type stored all its fields by chaining the sorted - // fields from the target and metric, without then sorting _across_ them. - // - // This was exacerbated by the error reporting, where we did in fact sort - // all fields across the target and metric, making it difficult to tell how - // the derived schema was different, if at all. - // - // This test generates a sample with a schema where the target and metric - // fields are sorted within them, but not across them. We check that the - // derived schema are actually equal, which means we've imposed that - // ordering when deriving the schema. - #[test] - fn test_schema_field_ordering_across_target_metric() { - let target_field = FieldSchema { - name: String::from("later"), - field_type: FieldType::U64, - source: FieldSource::Target, - description: String::new(), - }; - let metric_field = FieldSchema { - name: String::from("earlier"), - field_type: FieldType::U64, - source: FieldSource::Metric, - description: String::new(), - }; - let timeseries_name: TimeseriesName = "foo:bar".parse().unwrap(); - let datum_type = DatumType::U64; - let field_schema = - [target_field.clone(), metric_field.clone()].into_iter().collect(); - let expected_schema = TimeseriesSchema { - timeseries_name, - description: Default::default(), - field_schema, - datum_type, - version: default_schema_version(), - authz_scope: AuthzScope::Fleet, - units: Units::Count, - created: Utc::now(), - }; - - #[derive(oximeter::Target)] - struct Foo { - later: u64, - } - #[derive(oximeter::Metric)] - struct Bar { - earlier: u64, - datum: u64, - } - - let target = Foo { later: 1 }; - let metric = Bar { earlier: 2, datum: 10 }; - let sample = Sample::new(&target, &metric).unwrap(); - let derived_schema = TimeseriesSchema::from(&sample); - assert_eq!(derived_schema, expected_schema); - } - #[test] fn test_field_schema_ordering() { let mut fields = BTreeSet::new(); diff --git a/oximeter/impl/src/traits.rs b/oximeter/types/src/traits.rs similarity index 96% rename from oximeter/impl/src/traits.rs rename to oximeter/types/src/traits.rs index 16baa4f619..91ecca817d 100644 --- a/oximeter/impl/src/traits.rs +++ b/oximeter/types/src/traits.rs @@ -45,9 +45,9 @@ use std::ops::AddAssign; /// -------- /// /// ```rust -/// # // Rename the impl crate so the doctests can refer to the public +/// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. -/// # extern crate oximeter_impl as oximeter; +/// # extern crate oximeter_types as oximeter; /// # use oximeter_macro_impl::*; /// use oximeter::{traits::Target, types::FieldType}; /// use uuid::Uuid; @@ -75,9 +75,9 @@ use std::ops::AddAssign; /// supported types. /// /// ```compile_fail -/// # // Rename the impl crate so the doctests can refer to the public +/// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. -/// # extern crate oximeter_impl as oximeter; +/// # extern crate oximeter_types as oximeter; /// # use oximeter_macro_impl::*; /// #[derive(oximeter::Target)] /// struct Bad { @@ -160,9 +160,9 @@ pub trait Target { /// Example /// ------- /// ```rust -/// # // Rename the impl crate so the doctests can refer to the public +/// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. -/// # extern crate oximeter_impl as oximeter; +/// # extern crate oximeter_types as oximeter; /// # use oximeter_macro_impl::*; /// use chrono::Utc; /// use oximeter::Metric; @@ -185,9 +185,9 @@ pub trait Target { /// an unsupported type. /// /// ```compile_fail -/// # // Rename the impl crate so the doctests can refer to the public +/// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. -/// # extern crate oximeter_impl as oximeter; +/// # extern crate oximeter_types as oximeter; /// # use oximeter_macro_impl::*; /// #[derive(Metric)] /// pub struct BadType { @@ -364,9 +364,9 @@ pub use crate::histogram::HistogramSupport; /// Example /// ------- /// ```rust -/// # // Rename the impl crate so the doctests can refer to the public +/// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. -/// # extern crate oximeter_impl as oximeter; +/// # extern crate oximeter_types as oximeter; /// # use oximeter_macro_impl::*; /// use oximeter::{Datum, MetricsError, Metric, Producer, Target}; /// use oximeter::types::{Measurement, Sample, Cumulative}; @@ -464,6 +464,8 @@ pub trait Producer: Send + Sync + std::fmt::Debug + 'static { #[cfg(test)] mod tests { + use oximeter_macro_impl::{Metric, Target}; + use crate::types; use crate::{ Datum, DatumType, FieldType, FieldValue, Metric, MetricsError, diff --git a/oximeter/impl/src/types.rs b/oximeter/types/src/types.rs similarity index 97% rename from oximeter/impl/src/types.rs rename to oximeter/types/src/types.rs index 370557f7f7..60260e3649 100644 --- a/oximeter/impl/src/types.rs +++ b/oximeter/types/src/types.rs @@ -850,7 +850,7 @@ pub struct Sample { /// The version of the timeseries this sample belongs to // // TODO-cleanup: This should be removed once schema are tracked in CRDB. - #[serde(default = "::oximeter::schema::default_schema_version")] + #[serde(default = "crate::schema::default_schema_version")] pub timeseries_version: NonZeroU8, // Target name and fields @@ -1104,15 +1104,10 @@ mod tests { use super::Measurement; use super::MetricsError; use super::Sample; - use crate::test_util; - use crate::types; - use crate::Metric; - use crate::Target; use bytes::Bytes; use std::collections::BTreeMap; use std::net::Ipv4Addr; use std::net::Ipv6Addr; - use uuid::Uuid; #[test] fn test_cumulative_i64() { @@ -1176,31 +1171,6 @@ mod tests { assert!(measurement.timestamp() >= measurement.start_time().unwrap()); } - #[test] - fn test_sample_struct() { - let t = test_util::TestTarget::default(); - let m = test_util::TestMetric { - id: Uuid::new_v4(), - good: true, - datum: 1i64, - }; - let sample = types::Sample::new(&t, &m).unwrap(); - assert_eq!( - sample.timeseries_name, - format!("{}:{}", t.name(), m.name()) - ); - assert!(sample.measurement.start_time().is_none()); - assert_eq!(sample.measurement.datum(), &Datum::from(1i64)); - - let m = test_util::TestCumulativeMetric { - id: Uuid::new_v4(), - good: true, - datum: 1i64.into(), - }; - let sample = types::Sample::new(&t, &m).unwrap(); - assert!(sample.measurement.start_time().is_some()); - } - #[rstest::rstest] #[case::as_string("some string", FieldValue::String("some string".into()))] #[case::as_i8("2", FieldValue::I8(2))] diff --git a/oximeter/impl/tests/fail/failures.rs b/oximeter/types/tests/fail/failures.rs similarity index 100% rename from oximeter/impl/tests/fail/failures.rs rename to oximeter/types/tests/fail/failures.rs diff --git a/oximeter/impl/tests/fail/failures.stderr b/oximeter/types/tests/fail/failures.stderr similarity index 100% rename from oximeter/impl/tests/fail/failures.stderr rename to oximeter/types/tests/fail/failures.stderr diff --git a/oximeter/impl/tests/test_compilation.rs b/oximeter/types/tests/test_compilation.rs similarity index 100% rename from oximeter/impl/tests/test_compilation.rs rename to oximeter/types/tests/test_compilation.rs diff --git a/package-manifest.toml b/package-manifest.toml index 9189ed09a0..cab3c1877e 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -140,13 +140,15 @@ source.type = "local" source.rust.binary_names = ["oximeter", "clickhouse-schema-updater"] source.rust.release = true source.paths = [ - { from = "smf/oximeter", to = "/var/svc/manifest/site/oximeter" }, { from = "oximeter/db/schema", to = "/opt/oxide/oximeter/schema" }, + { from = "smf/oximeter/{{clickhouse-topology}}/config.toml", to = "/var/svc/manifest/site/oximeter/config.toml" }, + { from = "smf/oximeter/manifest.xml", to = "/var/svc/manifest/site/oximeter/manifest.xml" }, ] output.type = "zone" output.intermediate_only = true [package.clickhouse] +# This service runs a single-node ClickHouse server. service_name = "clickhouse" only_for_targets.image = "standard" source.type = "composite" @@ -169,13 +171,45 @@ source.paths = [ { from = "out/clickhouse", to = "/opt/oxide/clickhouse" }, { from = "smf/clickhouse/manifest.xml", to = "/var/svc/manifest/site/clickhouse/manifest.xml" }, { from = "smf/clickhouse/method_script.sh", to = "/opt/oxide/lib/svc/manifest/clickhouse.sh" }, - { from = "smf/clickhouse/config_replica.xml", to = "/opt/oxide/clickhouse/config.d/config_replica.xml" }, +] +output.type = "zone" +output.intermediate_only = true +setup_hint = "Run `cargo xtask download clickhouse` to download the necessary binaries" + +[package.clickhouse_server] +# This service runs a server for a replicated ClickHouse cluster. +# It is complimentary to the clickhouse_keeper service. +# One cannot be run without the other. +service_name = "clickhouse_server" +only_for_targets.image = "standard" +source.type = "composite" +source.packages = [ + "clickhouse_server_svc.tar.gz", + "internal-dns-cli.tar.gz", + "omicron-clickhouse-admin.tar.gz", + "zone-setup.tar.gz", + "zone-network-install.tar.gz" +] +output.type = "zone" + +[package.clickhouse_server_svc] +service_name = "clickhouse_server_svc" +only_for_targets.image = "standard" +source.type = "local" +source.paths = [ + { from = "out/clickhouse", to = "/opt/oxide/clickhouse_server" }, + { from = "smf/clickhouse_server/manifest.xml", to = "/var/svc/manifest/site/clickhouse_server/manifest.xml" }, + { from = "smf/clickhouse_server/method_script.sh", to = "/opt/oxide/lib/svc/manifest/clickhouse_server.sh" }, + { from = "smf/clickhouse_server/config_replica.xml", to = "/opt/oxide/clickhouse_server/config.d/config_replica.xml" }, ] output.type = "zone" output.intermediate_only = true setup_hint = "Run `cargo xtask download clickhouse` to download the necessary binaries" [package.clickhouse_keeper] +# This service runs a keeper for a replicated ClickHouse cluster. +# It is complimentary to the clickhouse_server service. +# One cannot be run without the other. service_name = "clickhouse_keeper" only_for_targets.image = "standard" source.type = "composite" @@ -594,10 +628,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "0c4292fe5b3c8ac27d99b5a4502d595acdbf7441" +source.commit = "c92d6ff85db8992066f49da176cf686acfd8fe0f" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm-gz.sha256.txt -source.sha256 = "b0f08e754f7c834d7ca05093b13a574863f500cff56210591ef4cc7eaf20159b" +source.sha256 = "c33915998894dd36a2d1078f7e13717aa20760924c30640d7647d4791dd5f2ee" output.type = "tarball" [package.mg-ddm] @@ -610,10 +644,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "0c4292fe5b3c8ac27d99b5a4502d595acdbf7441" +source.commit = "c92d6ff85db8992066f49da176cf686acfd8fe0f" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "499962b57404626aff1ecd62d5045ba2ee06070d45f7cb2a8fc284e53eed17d6" +source.sha256 = "be9d657ec22a69468b18f2b4d48e55621538eade8b8d3e367a1d8d5cc686cfbe" output.type = "zone" output.intermediate_only = true @@ -625,10 +659,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "0c4292fe5b3c8ac27d99b5a4502d595acdbf7441" +source.commit = "c92d6ff85db8992066f49da176cf686acfd8fe0f" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mgd.sha256.txt -source.sha256 = "e15db7d262b5b2f08a2e2799668c67d0cb883e84c72736a30d299688115bf055" +source.sha256 = "e000485f7e04ac1cf9b3532b60bcf23598ab980331ba4f1c6788a7e95c1e9ef8" output.type = "zone" output.intermediate_only = true @@ -636,8 +670,8 @@ output.intermediate_only = true service_name = "lldp" source.type = "prebuilt" source.repo = "lldp" -source.commit = "30e5d89fae9190c69258ca77d5d5a1acec064742" -source.sha256 = "f58bfd1b77748544b5b1a99a07e52bab8dc5673b9bd3a745ebbfdd614d492328" +source.commit = "188f0f6d4c066f1515bd707050407cedd790fcf1" +source.sha256 = "132d0760be5208f60b58bcaed98fa6384b09f41dd5febf51970f5cbf46138ecf" output.type = "zone" output.intermediate_only = true @@ -676,8 +710,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "21b16567f28e103f145cd18d53fac6958429c4ff" -source.sha256 = "3771671f0069b33143774e560eb258db99253dba9b78fa3ca974f02a8e1145b4" +source.commit = "76c735d472e3badaeca08982e22496fccb1ce210" +source.sha256 = "3ee6cfe770da2855b4eb44c048637d56f8d72de45c8c396186dfe7232d8548fa" output.type = "zone" output.intermediate_only = true @@ -703,8 +737,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "21b16567f28e103f145cd18d53fac6958429c4ff" -source.sha256 = "ad02632713a57fe8c5371316320309e1fad52f0ce2f7e6f768859aa94dfbb1d9" +source.commit = "76c735d472e3badaeca08982e22496fccb1ce210" +source.sha256 = "0e68ea8fbb609bbe2c643fc8cadc0197bd641006a323149159893bfd0d816805" output.type = "zone" output.intermediate_only = true @@ -723,8 +757,8 @@ only_for_targets.image = "standard" # the other `source.*` keys. source.type = "prebuilt" source.repo = "dendrite" -source.commit = "21b16567f28e103f145cd18d53fac6958429c4ff" -source.sha256 = "23bca3873cdb0441cd18c0cf071b86d49755be06837479661876ac95d2f10f27" +source.commit = "76c735d472e3badaeca08982e22496fccb1ce210" +source.sha256 = "45484d6d8557a0656984d0e6db879589d841d43ab6a11116cb1da314b928a425" output.type = "zone" output.intermediate_only = true diff --git a/package/src/bin/omicron-package.rs b/package/src/bin/omicron-package.rs index b2b8703015..cd88345d0a 100644 --- a/package/src/bin/omicron-package.rs +++ b/package/src/bin/omicron-package.rs @@ -265,12 +265,19 @@ async fn do_target( format!("failed to create directory {}", target_dir) })?; match subcommand { - TargetCommand::Create { image, machine, switch, rack_topology } => { + TargetCommand::Create { + image, + machine, + switch, + rack_topology, + clickhouse_topology, + } => { let target = KnownTarget::new( image.clone(), machine.clone(), switch.clone(), rack_topology.clone(), + clickhouse_topology.clone(), )?; let path = get_single_target(&target_dir, name).await?; diff --git a/package/src/lib.rs b/package/src/lib.rs index 2009de9dfe..b37c1774fd 100644 --- a/package/src/lib.rs +++ b/package/src/lib.rs @@ -68,6 +68,21 @@ pub enum TargetCommand { /// fail in a single-sled environment. `single-sled` relaxes this /// requirement. rack_topology: crate::target::RackTopology, + + #[clap( + short, + long, + default_value = Some("single-node"), + required = false + )] + // TODO (https://github.com/oxidecomputer/omicron/issues/4148): Remove + // once single-node functionality is removed. + /// Specify whether clickhouse will be deployed as a replicated cluster + /// or single-node configuration. + /// + /// Replicated cluster configuration is an experimental feature to be + /// used only for testing. + clickhouse_topology: crate::target::ClickhouseTopology, }, /// List all existing targets List, diff --git a/package/src/target.rs b/package/src/target.rs index 589dba7870..6a6cbd32d8 100644 --- a/package/src/target.rs +++ b/package/src/target.rs @@ -62,6 +62,18 @@ pub enum RackTopology { SingleSled, } +/// Topology of the ClickHouse installation within the rack. +#[derive(Clone, Debug, strum::EnumString, strum::Display, ValueEnum)] +#[strum(serialize_all = "kebab-case")] +#[clap(rename_all = "kebab-case")] +pub enum ClickhouseTopology { + /// Use configurations suitable for a replicated ClickHouse cluster deployment. + ReplicatedCluster, + + /// Use configurations suitable for a single-node ClickHouse deployment. + SingleNode, +} + /// A strongly-typed variant of [Target]. #[derive(Clone, Debug)] pub struct KnownTarget { @@ -69,6 +81,7 @@ pub struct KnownTarget { machine: Option, switch: Option, rack_topology: RackTopology, + clickhouse_topology: ClickhouseTopology, } impl KnownTarget { @@ -77,6 +90,7 @@ impl KnownTarget { machine: Option, switch: Option, rack_topology: RackTopology, + clickhouse_topology: ClickhouseTopology, ) -> Result { if matches!(image, Image::Trampoline) { if machine.is_some() { @@ -93,7 +107,7 @@ impl KnownTarget { bail!("'switch=asic' is only valid with 'machine=gimlet'"); } - Ok(Self { image, machine, switch, rack_topology }) + Ok(Self { image, machine, switch, rack_topology, clickhouse_topology }) } } @@ -104,6 +118,7 @@ impl Default for KnownTarget { machine: Some(Machine::NonGimlet), switch: Some(Switch::Stub), rack_topology: RackTopology::MultiSled, + clickhouse_topology: ClickhouseTopology::SingleNode, } } } @@ -119,6 +134,10 @@ impl From for Target { map.insert("switch".to_string(), switch.to_string()); } map.insert("rack-topology".to_string(), kt.rack_topology.to_string()); + map.insert( + "clickhouse-topology".to_string(), + kt.clickhouse_topology.to_string(), + ); Target(map) } } @@ -140,6 +159,7 @@ impl std::str::FromStr for KnownTarget { let mut machine = None; let mut switch = None; let mut rack_topology = None; + let mut clickhouse_topology = None; for (k, v) in target.0.into_iter() { match k.as_str() { @@ -155,6 +175,9 @@ impl std::str::FromStr for KnownTarget { "rack-topology" => { rack_topology = Some(v.parse()?); } + "clickhouse-topology" => { + clickhouse_topology = Some(v.parse()?); + } _ => { bail!( "Unknown target key {k}\nValid keys include: [{}]", @@ -173,6 +196,7 @@ impl std::str::FromStr for KnownTarget { machine, switch, rack_topology.unwrap_or(RackTopology::MultiSled), + clickhouse_topology.unwrap_or(ClickhouseTopology::SingleNode), ) } } diff --git a/schema/crdb/add-management-gateway-producer-kind/up.sql b/schema/crdb/add-management-gateway-producer-kind/up.sql new file mode 100644 index 0000000000..e872278e2f --- /dev/null +++ b/schema/crdb/add-management-gateway-producer-kind/up.sql @@ -0,0 +1,2 @@ +ALTER TYPE omicron.public.producer_kind + ADD VALUE IF NOT EXISTS 'management_gateway' AFTER 'instance'; diff --git a/schema/crdb/collapse_lldp_settings/up1.sql b/schema/crdb/collapse_lldp_settings/up1.sql new file mode 100644 index 0000000000..f7fb05d726 --- /dev/null +++ b/schema/crdb/collapse_lldp_settings/up1.sql @@ -0,0 +1,4 @@ +/* + * The old lldp_service_config_id is being replaced with lldp_link_config_id. + */ +ALTER TABLE omicron.public.switch_port_settings_link_config DROP COLUMN IF EXISTS lldp_service_config_id; diff --git a/schema/crdb/collapse_lldp_settings/up2.sql b/schema/crdb/collapse_lldp_settings/up2.sql new file mode 100644 index 0000000000..b2d884d068 --- /dev/null +++ b/schema/crdb/collapse_lldp_settings/up2.sql @@ -0,0 +1,4 @@ +/* + * Add a pointer to this link's LLDP config settings. + */ +ALTER TABLE omicron.public.switch_port_settings_link_config ADD COLUMN IF NOT EXISTS lldp_link_config_id UUID NOT NULL; diff --git a/schema/crdb/collapse_lldp_settings/up3.sql b/schema/crdb/collapse_lldp_settings/up3.sql new file mode 100644 index 0000000000..9c4ef8549b --- /dev/null +++ b/schema/crdb/collapse_lldp_settings/up3.sql @@ -0,0 +1,5 @@ +/* + * Drop the old lldp_service_config table, which has been incorporated into the + * new lldp_link_config. + */ +DROP TABLE IF EXISTS omicron.public.lldp_service_config; diff --git a/schema/crdb/collapse_lldp_settings/up4.sql b/schema/crdb/collapse_lldp_settings/up4.sql new file mode 100644 index 0000000000..3c8d4e86cf --- /dev/null +++ b/schema/crdb/collapse_lldp_settings/up4.sql @@ -0,0 +1,4 @@ +/* + * Drop the old lldp_config table, which has been replaced by lldp_link_config. + */ +DROP TABLE IF EXISTS omicron.public.lldp_config; diff --git a/schema/crdb/collapse_lldp_settings/up5.sql b/schema/crdb/collapse_lldp_settings/up5.sql new file mode 100644 index 0000000000..50dcd618d8 --- /dev/null +++ b/schema/crdb/collapse_lldp_settings/up5.sql @@ -0,0 +1,13 @@ +CREATE TABLE IF NOT EXISTS omicron.public.lldp_link_config ( + id UUID PRIMARY KEY, + enabled BOOL NOT NULL, + link_name STRING(63), + link_description STRING(512), + chassis_id STRING(63), + system_name STRING(63), + system_description STRING(512), + management_ip TEXT, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + time_deleted TIMESTAMPTZ +); diff --git a/schema/crdb/collapse_lldp_settings/up6.sql b/schema/crdb/collapse_lldp_settings/up6.sql new file mode 100644 index 0000000000..3b16af6f4b --- /dev/null +++ b/schema/crdb/collapse_lldp_settings/up6.sql @@ -0,0 +1 @@ +DROP INDEX IF EXISTS lldp_config_by_name; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index ddc399d282..1457532c49 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -1334,7 +1334,9 @@ CREATE TYPE IF NOT EXISTS omicron.public.producer_kind AS ENUM ( -- removed). 'service', -- A Propolis VMM for an instance in the omicron.public.instance table - 'instance' + 'instance', + -- A management gateway service on a scrimlet. + 'management_gateway' ); /* @@ -2650,40 +2652,30 @@ CREATE TYPE IF NOT EXISTS omicron.public.switch_link_speed AS ENUM ( CREATE TABLE IF NOT EXISTS omicron.public.switch_port_settings_link_config ( port_settings_id UUID, - lldp_service_config_id UUID NOT NULL, link_name TEXT, mtu INT4, fec omicron.public.switch_link_fec, speed omicron.public.switch_link_speed, autoneg BOOL NOT NULL DEFAULT false, + lldp_link_config_id UUID NOT NULL, PRIMARY KEY (port_settings_id, link_name) ); -CREATE TABLE IF NOT EXISTS omicron.public.lldp_service_config ( - id UUID PRIMARY KEY, - lldp_config_id UUID, - enabled BOOL NOT NULL -); - -CREATE TABLE IF NOT EXISTS omicron.public.lldp_config ( +CREATE TABLE IF NOT EXISTS omicron.public.lldp_link_config ( id UUID PRIMARY KEY, - name STRING(63) NOT NULL, - description STRING(512) NOT NULL, + enabled BOOL NOT NULL, + link_name STRING(63), + link_description STRING(512), + chassis_id STRING(63), + system_name STRING(63), + system_description STRING(612), + management_ip TEXT, time_created TIMESTAMPTZ NOT NULL, time_modified TIMESTAMPTZ NOT NULL, - time_deleted TIMESTAMPTZ, - chassis_id TEXT, - system_name TEXT, - system_description TEXT, - management_ip TEXT + time_deleted TIMESTAMPTZ ); -CREATE UNIQUE INDEX IF NOT EXISTS lldp_config_by_name ON omicron.public.lldp_config ( - name -) WHERE - time_deleted IS NULL; - CREATE TYPE IF NOT EXISTS omicron.public.switch_interface_kind AS ENUM ( 'primary', 'vlan', @@ -2715,6 +2707,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.switch_port_settings_route_config ( dst INET, gw INET, vid INT4, + local_pref INT8, /* TODO https://github.com/oxidecomputer/omicron/issues/3013 */ PRIMARY KEY (port_settings_id, interface_name, dst, gw) @@ -2790,6 +2783,10 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_bgp_config_by_name ON omicron.public.bg ) WHERE time_deleted IS NULL; +CREATE INDEX IF NOT EXISTS lookup_bgp_config_by_asn ON omicron.public.bgp_config ( + asn +) WHERE time_deleted IS NULL; + CREATE TABLE IF NOT EXISTS omicron.public.bgp_announce_set ( id UUID PRIMARY KEY, name STRING(63) NOT NULL, @@ -4217,7 +4214,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '87.0.0', NULL) + (TRUE, NOW(), NOW(), '91.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/lookup-bgp-config-by-asn/up01.sql b/schema/crdb/lookup-bgp-config-by-asn/up01.sql new file mode 100644 index 0000000000..e886015a29 --- /dev/null +++ b/schema/crdb/lookup-bgp-config-by-asn/up01.sql @@ -0,0 +1,3 @@ +CREATE INDEX IF NOT EXISTS lookup_bgp_config_by_asn ON omicron.public.bgp_config ( + asn +) WHERE time_deleted IS NULL; diff --git a/schema/crdb/route-local-pref/up.sql b/schema/crdb/route-local-pref/up.sql new file mode 100644 index 0000000000..d1051ccd0c --- /dev/null +++ b/schema/crdb/route-local-pref/up.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.switch_port_settings_route_config ADD COLUMN IF NOT EXISTS local_pref INT8; diff --git a/schema/rss-service-plan-v4.json b/schema/rss-service-plan-v4.json new file mode 100644 index 0000000000..badfaf4589 --- /dev/null +++ b/schema/rss-service-plan-v4.json @@ -0,0 +1,999 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Plan", + "type": "object", + "required": [ + "dns_config", + "services" + ], + "properties": { + "dns_config": { + "$ref": "#/definitions/DnsConfigParams" + }, + "services": { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/SledConfig" + } + } + }, + "definitions": { + "BlueprintZoneConfig": { + "description": "Describes one Omicron-managed zone in a blueprint.\n\nPart of [`BlueprintZonesConfig`].", + "type": "object", + "required": [ + "disposition", + "id", + "underlay_address", + "zone_type" + ], + "properties": { + "disposition": { + "description": "The disposition (desired state) of this zone recorded in the blueprint.", + "allOf": [ + { + "$ref": "#/definitions/BlueprintZoneDisposition" + } + ] + }, + "filesystem_pool": { + "anyOf": [ + { + "$ref": "#/definitions/ZpoolName" + }, + { + "type": "null" + } + ] + }, + "id": { + "$ref": "#/definitions/TypedUuidForOmicronZoneKind" + }, + "underlay_address": { + "type": "string", + "format": "ipv6" + }, + "zone_type": { + "$ref": "#/definitions/BlueprintZoneType" + } + } + }, + "BlueprintZoneDisposition": { + "description": "The desired state of an Omicron-managed zone in a blueprint.\n\nPart of [`BlueprintZoneConfig`].", + "oneOf": [ + { + "description": "The zone is in-service.", + "type": "string", + "enum": [ + "in_service" + ] + }, + { + "description": "The zone is not in service.", + "type": "string", + "enum": [ + "quiesced" + ] + }, + { + "description": "The zone is permanently gone.", + "type": "string", + "enum": [ + "expunged" + ] + } + ] + }, + "BlueprintZoneType": { + "oneOf": [ + { + "type": "object", + "required": [ + "address", + "dns_servers", + "external_ip", + "nic", + "ntp_servers", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dns_servers": { + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "domain": { + "type": [ + "string", + "null" + ] + }, + "external_ip": { + "$ref": "#/definitions/OmicronZoneExternalSnatIp" + }, + "nic": { + "description": "The service vNIC providing outbound connectivity using OPTE.", + "allOf": [ + { + "$ref": "#/definitions/NetworkInterface" + } + ] + }, + "ntp_servers": { + "type": "array", + "items": { + "type": "string" + } + }, + "type": { + "type": "string", + "enum": [ + "boundary_ntp" + ] + } + } + }, + { + "description": "Used in single-node clickhouse setups", + "type": "object", + "required": [ + "address", + "dataset", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "clickhouse" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "dataset", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "clickhouse_keeper" + ] + } + } + }, + { + "description": "Used in replicated clickhouse setups", + "type": "object", + "required": [ + "address", + "dataset", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "clickhouse_server" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "dataset", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "cockroach_db" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "dataset", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "crucible" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "crucible_pantry" + ] + } + } + }, + { + "type": "object", + "required": [ + "dataset", + "dns_address", + "http_address", + "nic", + "type" + ], + "properties": { + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "dns_address": { + "description": "The address at which the external DNS server is reachable.", + "allOf": [ + { + "$ref": "#/definitions/OmicronZoneExternalFloatingAddr" + } + ] + }, + "http_address": { + "description": "The address at which the external DNS server API is reachable.", + "type": "string" + }, + "nic": { + "description": "The service vNIC providing external connectivity using OPTE.", + "allOf": [ + { + "$ref": "#/definitions/NetworkInterface" + } + ] + }, + "type": { + "type": "string", + "enum": [ + "external_dns" + ] + } + } + }, + { + "type": "object", + "required": [ + "dataset", + "dns_address", + "gz_address", + "gz_address_index", + "http_address", + "type" + ], + "properties": { + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "dns_address": { + "type": "string" + }, + "gz_address": { + "description": "The addresses in the global zone which should be created\n\nFor the DNS service, which exists outside the sleds's typical subnet - adding an address in the GZ is necessary to allow inter-zone traffic routing.", + "type": "string", + "format": "ipv6" + }, + "gz_address_index": { + "description": "The address is also identified with an auxiliary bit of information to ensure that the created global zone address can have a unique name.", + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "http_address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "internal_dns" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "dns_servers", + "ntp_servers", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dns_servers": { + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "domain": { + "type": [ + "string", + "null" + ] + }, + "ntp_servers": { + "type": "array", + "items": { + "type": "string" + } + }, + "type": { + "type": "string", + "enum": [ + "internal_ntp" + ] + } + } + }, + { + "type": "object", + "required": [ + "external_dns_servers", + "external_ip", + "external_tls", + "internal_address", + "nic", + "type" + ], + "properties": { + "external_dns_servers": { + "description": "External DNS servers Nexus can use to resolve external hosts.", + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "external_ip": { + "description": "The address at which the external nexus server is reachable.", + "allOf": [ + { + "$ref": "#/definitions/OmicronZoneExternalFloatingIp" + } + ] + }, + "external_tls": { + "description": "Whether Nexus's external endpoint should use TLS", + "type": "boolean" + }, + "internal_address": { + "description": "The address at which the internal nexus server is reachable.", + "type": "string" + }, + "nic": { + "description": "The service vNIC providing external connectivity using OPTE.", + "allOf": [ + { + "$ref": "#/definitions/NetworkInterface" + } + ] + }, + "type": { + "type": "string", + "enum": [ + "nexus" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "oximeter" + ] + } + } + } + ] + }, + "DiskIdentity": { + "description": "Uniquely identifies a disk.", + "type": "object", + "required": [ + "model", + "serial", + "vendor" + ], + "properties": { + "model": { + "type": "string" + }, + "serial": { + "type": "string" + }, + "vendor": { + "type": "string" + } + } + }, + "DnsConfigParams": { + "description": "DnsConfigParams\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"generation\", \"time_created\", \"zones\" ], \"properties\": { \"generation\": { \"type\": \"integer\", \"format\": \"uint64\", \"minimum\": 0.0 }, \"time_created\": { \"type\": \"string\", \"format\": \"date-time\" }, \"zones\": { \"type\": \"array\", \"items\": { \"$ref\": \"#/components/schemas/DnsConfigZone\" } } } } ```
", + "type": "object", + "required": [ + "generation", + "time_created", + "zones" + ], + "properties": { + "generation": { + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "time_created": { + "type": "string", + "format": "date-time" + }, + "zones": { + "type": "array", + "items": { + "$ref": "#/definitions/DnsConfigZone" + } + } + } + }, + "DnsConfigZone": { + "description": "DnsConfigZone\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"records\", \"zone_name\" ], \"properties\": { \"records\": { \"type\": \"object\", \"additionalProperties\": { \"type\": \"array\", \"items\": { \"$ref\": \"#/components/schemas/DnsRecord\" } } }, \"zone_name\": { \"type\": \"string\" } } } ```
", + "type": "object", + "required": [ + "records", + "zone_name" + ], + "properties": { + "records": { + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "$ref": "#/definitions/DnsRecord" + } + } + }, + "zone_name": { + "type": "string" + } + } + }, + "DnsRecord": { + "description": "DnsRecord\n\n
JSON schema\n\n```json { \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"data\", \"type\" ], \"properties\": { \"data\": { \"type\": \"string\", \"format\": \"ipv4\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"A\" ] } } }, { \"type\": \"object\", \"required\": [ \"data\", \"type\" ], \"properties\": { \"data\": { \"type\": \"string\", \"format\": \"ipv6\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"AAAA\" ] } } }, { \"type\": \"object\", \"required\": [ \"data\", \"type\" ], \"properties\": { \"data\": { \"$ref\": \"#/components/schemas/Srv\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"SRV\" ] } } } ] } ```
", + "oneOf": [ + { + "type": "object", + "required": [ + "data", + "type" + ], + "properties": { + "data": { + "type": "string", + "format": "ipv4" + }, + "type": { + "type": "string", + "enum": [ + "A" + ] + } + } + }, + { + "type": "object", + "required": [ + "data", + "type" + ], + "properties": { + "data": { + "type": "string", + "format": "ipv6" + }, + "type": { + "type": "string", + "enum": [ + "AAAA" + ] + } + } + }, + { + "type": "object", + "required": [ + "data", + "type" + ], + "properties": { + "data": { + "$ref": "#/definitions/Srv" + }, + "type": { + "type": "string", + "enum": [ + "SRV" + ] + } + } + } + ] + }, + "Generation": { + "description": "Generation numbers stored in the database, used for optimistic concurrency control", + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "IpNet": { + "oneOf": [ + { + "title": "v4", + "allOf": [ + { + "$ref": "#/definitions/Ipv4Net" + } + ] + }, + { + "title": "v6", + "allOf": [ + { + "$ref": "#/definitions/Ipv6Net" + } + ] + } + ], + "x-rust-type": { + "crate": "oxnet", + "path": "oxnet::IpNet", + "version": "0.1.0" + } + }, + "Ipv4Net": { + "title": "An IPv4 subnet", + "description": "An IPv4 subnet, including prefix and prefix length", + "examples": [ + "192.168.1.0/24" + ], + "type": "string", + "pattern": "^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/([0-9]|1[0-9]|2[0-9]|3[0-2])$", + "x-rust-type": { + "crate": "oxnet", + "path": "oxnet::Ipv4Net", + "version": "0.1.0" + } + }, + "Ipv6Net": { + "title": "An IPv6 subnet", + "description": "An IPv6 subnet, including prefix and subnet mask", + "examples": [ + "fd12:3456::/64" + ], + "type": "string", + "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$", + "x-rust-type": { + "crate": "oxnet", + "path": "oxnet::Ipv6Net", + "version": "0.1.0" + } + }, + "MacAddr": { + "title": "A MAC address", + "description": "A Media Access Control address, in EUI-48 format", + "examples": [ + "ff:ff:ff:ff:ff:ff" + ], + "type": "string", + "maxLength": 17, + "minLength": 5, + "pattern": "^([0-9a-fA-F]{0,2}:){5}[0-9a-fA-F]{0,2}$" + }, + "Name": { + "title": "A name unique within the parent collection", + "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.", + "type": "string", + "maxLength": 63, + "minLength": 1, + "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$" + }, + "NetworkInterface": { + "description": "Information required to construct a virtual network interface", + "type": "object", + "required": [ + "id", + "ip", + "kind", + "mac", + "name", + "primary", + "slot", + "subnet", + "vni" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "ip": { + "type": "string", + "format": "ip" + }, + "kind": { + "$ref": "#/definitions/NetworkInterfaceKind" + }, + "mac": { + "$ref": "#/definitions/MacAddr" + }, + "name": { + "$ref": "#/definitions/Name" + }, + "primary": { + "type": "boolean" + }, + "slot": { + "type": "integer", + "format": "uint8", + "minimum": 0.0 + }, + "subnet": { + "$ref": "#/definitions/IpNet" + }, + "transit_ips": { + "default": [], + "type": "array", + "items": { + "$ref": "#/definitions/IpNet" + } + }, + "vni": { + "$ref": "#/definitions/Vni" + } + } + }, + "NetworkInterfaceKind": { + "description": "The type of network interface", + "oneOf": [ + { + "description": "A vNIC attached to a guest instance", + "type": "object", + "required": [ + "id", + "type" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "instance" + ] + } + } + }, + { + "description": "A vNIC associated with an internal service", + "type": "object", + "required": [ + "id", + "type" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "service" + ] + } + } + }, + { + "description": "A vNIC associated with a probe", + "type": "object", + "required": [ + "id", + "type" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "probe" + ] + } + } + } + ] + }, + "OmicronPhysicalDiskConfig": { + "type": "object", + "required": [ + "id", + "identity", + "pool_id" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "identity": { + "$ref": "#/definitions/DiskIdentity" + }, + "pool_id": { + "$ref": "#/definitions/TypedUuidForZpoolKind" + } + } + }, + "OmicronPhysicalDisksConfig": { + "type": "object", + "required": [ + "disks", + "generation" + ], + "properties": { + "disks": { + "type": "array", + "items": { + "$ref": "#/definitions/OmicronPhysicalDiskConfig" + } + }, + "generation": { + "description": "generation number of this configuration\n\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\n\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.", + "allOf": [ + { + "$ref": "#/definitions/Generation" + } + ] + } + } + }, + "OmicronZoneDataset": { + "description": "Describes a persistent ZFS dataset associated with an Omicron zone", + "type": "object", + "required": [ + "pool_name" + ], + "properties": { + "pool_name": { + "$ref": "#/definitions/ZpoolName" + } + } + }, + "OmicronZoneExternalFloatingAddr": { + "description": "Floating external address with port allocated to an Omicron-managed zone.", + "type": "object", + "required": [ + "addr", + "id" + ], + "properties": { + "addr": { + "type": "string" + }, + "id": { + "$ref": "#/definitions/TypedUuidForExternalIpKind" + } + } + }, + "OmicronZoneExternalFloatingIp": { + "description": "Floating external IP allocated to an Omicron-managed zone.\n\nThis is a slimmer `nexus_db_model::ExternalIp` that only stores the fields necessary for blueprint planning, and requires that the zone have a single IP.", + "type": "object", + "required": [ + "id", + "ip" + ], + "properties": { + "id": { + "$ref": "#/definitions/TypedUuidForExternalIpKind" + }, + "ip": { + "type": "string", + "format": "ip" + } + } + }, + "OmicronZoneExternalSnatIp": { + "description": "SNAT (outbound) external IP allocated to an Omicron-managed zone.\n\nThis is a slimmer `nexus_db_model::ExternalIp` that only stores the fields necessary for blueprint planning, and requires that the zone have a single IP.", + "type": "object", + "required": [ + "id", + "snat_cfg" + ], + "properties": { + "id": { + "$ref": "#/definitions/TypedUuidForExternalIpKind" + }, + "snat_cfg": { + "$ref": "#/definitions/SourceNatConfig" + } + } + }, + "SledConfig": { + "type": "object", + "required": [ + "disks", + "zones" + ], + "properties": { + "disks": { + "description": "Control plane disks configured for this sled", + "allOf": [ + { + "$ref": "#/definitions/OmicronPhysicalDisksConfig" + } + ] + }, + "zones": { + "description": "zones configured for this sled", + "type": "array", + "items": { + "$ref": "#/definitions/BlueprintZoneConfig" + } + } + } + }, + "SourceNatConfig": { + "description": "An IP address and port range used for source NAT, i.e., making outbound network connections from guests or services.", + "type": "object", + "required": [ + "first_port", + "ip", + "last_port" + ], + "properties": { + "first_port": { + "description": "The first port used for source NAT, inclusive.", + "type": "integer", + "format": "uint16", + "minimum": 0.0 + }, + "ip": { + "description": "The external address provided to the instance or service.", + "type": "string", + "format": "ip" + }, + "last_port": { + "description": "The last port used for source NAT, also inclusive.", + "type": "integer", + "format": "uint16", + "minimum": 0.0 + } + } + }, + "Srv": { + "description": "Srv\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"port\", \"prio\", \"target\", \"weight\" ], \"properties\": { \"port\": { \"type\": \"integer\", \"format\": \"uint16\", \"minimum\": 0.0 }, \"prio\": { \"type\": \"integer\", \"format\": \"uint16\", \"minimum\": 0.0 }, \"target\": { \"type\": \"string\" }, \"weight\": { \"type\": \"integer\", \"format\": \"uint16\", \"minimum\": 0.0 } } } ```
", + "type": "object", + "required": [ + "port", + "prio", + "target", + "weight" + ], + "properties": { + "port": { + "type": "integer", + "format": "uint16", + "minimum": 0.0 + }, + "prio": { + "type": "integer", + "format": "uint16", + "minimum": 0.0 + }, + "target": { + "type": "string" + }, + "weight": { + "type": "integer", + "format": "uint16", + "minimum": 0.0 + } + } + }, + "TypedUuidForExternalIpKind": { + "type": "string", + "format": "uuid" + }, + "TypedUuidForOmicronZoneKind": { + "type": "string", + "format": "uuid" + }, + "TypedUuidForZpoolKind": { + "type": "string", + "format": "uuid" + }, + "Vni": { + "description": "A Geneve Virtual Network Identifier", + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "ZpoolName": { + "title": "The name of a Zpool", + "description": "Zpool names are of the format ox{i,p}_. They are either Internal or External, and should be unique", + "type": "string", + "pattern": "^ox[ip]_[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$" + } + } +} \ No newline at end of file diff --git a/schema/rss-sled-plan.json b/schema/rss-sled-plan.json index a3d3425870..b0abc8c67e 100644 --- a/schema/rss-sled-plan.json +++ b/schema/rss-sled-plan.json @@ -604,6 +604,79 @@ } } }, + "LldpAdminStatus": { + "description": "To what extent should this port participate in LLDP", + "type": "string", + "enum": [ + "enabled", + "disabled", + "rx_only", + "tx_only" + ] + }, + "LldpPortConfig": { + "description": "Per-port LLDP configuration settings. Only the \"status\" setting is mandatory. All other fields have natural defaults or may be inherited from the switch.", + "type": "object", + "required": [ + "status" + ], + "properties": { + "chassis_id": { + "description": "Chassis ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be inherited from the switch-level settings.", + "type": [ + "string", + "null" + ] + }, + "management_addrs": { + "description": "Management IP addresses to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": [ + "array", + "null" + ], + "items": { + "type": "string", + "format": "ip" + } + }, + "port_description": { + "description": "Port description to advertise. If this is not set, no description will be advertised.", + "type": [ + "string", + "null" + ] + }, + "port_id": { + "description": "Port ID to advertise. If this is set, it will be advertised as a LocallyAssigned ID type. If this is not set, it will be set to the port name. e.g., qsfp0/0.", + "type": [ + "string", + "null" + ] + }, + "status": { + "description": "To what extent should this port participate in LLDP", + "allOf": [ + { + "$ref": "#/definitions/LldpAdminStatus" + } + ] + }, + "system_description": { + "description": "System description to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": [ + "string", + "null" + ] + }, + "system_name": { + "description": "System name to advertise. If this is not set, it will be inherited from the switch-level settings.", + "type": [ + "string", + "null" + ] + } + } + }, "Name": { "title": "A name unique within the parent collection", "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.", @@ -648,6 +721,17 @@ "$ref": "#/definitions/BgpPeerConfig" } }, + "lldp": { + "description": "LLDP configuration for this port", + "anyOf": [ + { + "$ref": "#/definitions/LldpPortConfig" + }, + { + "type": "null" + } + ] + }, "port": { "description": "Nmae of the port this config applies to.", "type": "string" @@ -894,6 +978,16 @@ } ] }, + "local_pref": { + "description": "The local preference associated with this route.", + "default": null, + "type": [ + "integer", + "null" + ], + "format": "uint32", + "minimum": 0.0 + }, "nexthop": { "description": "The nexthop/gateway address.", "type": "string", diff --git a/sled-agent/src/bootstrap/early_networking.rs b/sled-agent/src/bootstrap/early_networking.rs index 95a1f873f6..abc88d67c1 100644 --- a/sled-agent/src/bootstrap/early_networking.rs +++ b/sled-agent/src/bootstrap/early_networking.rs @@ -631,7 +631,8 @@ impl<'a> EarlyNetworkSetup<'a> { IpAddr::V6(_) => continue, }; let vlan_id = r.vlan_id; - let sr = StaticRoute4 { nexthop, prefix, vlan_id }; + let local_pref = r.local_pref; + let sr = StaticRoute4 { nexthop, prefix, vlan_id, local_pref }; rq.routes.list.push(sr); } } diff --git a/sled-agent/src/rack_setup/mod.rs b/sled-agent/src/rack_setup/mod.rs index 0ec14138fc..e1b12d6b2b 100644 --- a/sled-agent/src/rack_setup/mod.rs +++ b/sled-agent/src/rack_setup/mod.rs @@ -9,3 +9,8 @@ mod plan; pub mod service; pub use plan::service::SledConfig; +pub use plan::service::{ + from_ipaddr_to_external_floating_ip, + from_sockaddr_to_external_floating_addr, + from_source_nat_config_to_external_snat_ip, +}; diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index c9ed0c2248..a376096a87 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -10,7 +10,13 @@ use illumos_utils::zpool::ZpoolName; use internal_dns::config::{Host, Zone}; use internal_dns::ServiceName; use nexus_sled_agent_shared::inventory::{ - Inventory, OmicronZoneConfig, OmicronZoneDataset, OmicronZoneType, SledRole, + Inventory, OmicronZoneDataset, SledRole, +}; +use nexus_types::deployment::{ + blueprint_zone_type, BlueprintPhysicalDisksConfig, BlueprintZoneConfig, + BlueprintZoneDisposition, BlueprintZoneType, + OmicronZoneExternalFloatingAddr, OmicronZoneExternalFloatingIp, + OmicronZoneExternalSnatIp, }; use omicron_common::address::{ get_sled_address, get_switch_zone_address, Ipv6Subnet, ReservedRackSubnet, @@ -33,7 +39,9 @@ use omicron_common::policy::{ BOUNDARY_NTP_REDUNDANCY, COCKROACHDB_REDUNDANCY, DNS_REDUNDANCY, MAX_DNS_REDUNDANCY, NEXUS_REDUNDANCY, }; -use omicron_uuid_kinds::{GenericUuid, OmicronZoneUuid, SledUuid, ZpoolUuid}; +use omicron_uuid_kinds::{ + ExternalIpUuid, GenericUuid, OmicronZoneUuid, SledUuid, ZpoolUuid, +}; use rand::prelude::SliceRandom; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -58,14 +66,23 @@ const OXIMETER_COUNT: usize = 1; // when Nexus provisions Clickhouse. // TODO(https://github.com/oxidecomputer/omicron/issues/4000): Use // omicron_common::policy::CLICKHOUSE_SERVER_REDUNDANCY once we enable -// replicated ClickHouse +// replicated ClickHouse. +// Set to 0 when testing replicated ClickHouse. const CLICKHOUSE_COUNT: usize = 1; // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove // when Nexus provisions Clickhouse keeper. // TODO(https://github.com/oxidecomputer/omicron/issues/4000): Use // omicron_common::policy::CLICKHOUSE_KEEPER_REDUNDANCY once we enable // replicated ClickHouse +// Set to 3 when testing replicated ClickHouse. const CLICKHOUSE_KEEPER_COUNT: usize = 0; +// TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove +// when Nexus provisions Clickhouse server. +// TODO(https://github.com/oxidecomputer/omicron/issues/4000): Use +// omicron_common::policy::CLICKHOUSE_SERVER_REDUNDANCY once we enable +// replicated ClickHouse. +// Set to 2 when testing replicated ClickHouse +const CLICKHOUSE_SERVER_COUNT: usize = 0; // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove. // when Nexus provisions Crucible. const MINIMUM_U2_COUNT: usize = 3; @@ -111,10 +128,10 @@ pub enum PlanError { #[derive(Clone, Debug, Default, Serialize, Deserialize, JsonSchema)] pub struct SledConfig { /// Control plane disks configured for this sled - pub disks: OmicronPhysicalDisksConfig, + pub disks: BlueprintPhysicalDisksConfig, /// zones configured for this sled - pub zones: Vec, + pub zones: Vec, } #[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] @@ -131,7 +148,53 @@ impl Ledgerable for Plan { } const RSS_SERVICE_PLAN_V1_FILENAME: &str = "rss-service-plan.json"; const RSS_SERVICE_PLAN_V2_FILENAME: &str = "rss-service-plan-v2.json"; -const RSS_SERVICE_PLAN_FILENAME: &str = "rss-service-plan-v3.json"; +const RSS_SERVICE_PLAN_V3_FILENAME: &str = "rss-service-plan-v3.json"; +const RSS_SERVICE_PLAN_FILENAME: &str = "rss-service-plan-v4.json"; + +pub fn from_sockaddr_to_external_floating_addr( + addr: SocketAddr, +) -> OmicronZoneExternalFloatingAddr { + // This is pretty weird: IP IDs don't exist yet, so it's fine for us + // to make them up (Nexus will record them as a part of the + // handoff). We could pass `None` here for some zone types, but it's + // a little simpler to just always pass a new ID, which will only be + // used if the zone type has an external IP. + // + // This should all go away once RSS starts using blueprints more + // directly (instead of this conversion after the fact): + // https://github.com/oxidecomputer/omicron/issues/5272 + OmicronZoneExternalFloatingAddr { id: ExternalIpUuid::new_v4(), addr } +} + +pub fn from_ipaddr_to_external_floating_ip( + ip: IpAddr, +) -> OmicronZoneExternalFloatingIp { + // This is pretty weird: IP IDs don't exist yet, so it's fine for us + // to make them up (Nexus will record them as a part of the + // handoff). We could pass `None` here for some zone types, but it's + // a little simpler to just always pass a new ID, which will only be + // used if the zone type has an external IP. + // + // This should all go away once RSS starts using blueprints more + // directly (instead of this conversion after the fact): + // https://github.com/oxidecomputer/omicron/issues/5272 + OmicronZoneExternalFloatingIp { id: ExternalIpUuid::new_v4(), ip } +} + +pub fn from_source_nat_config_to_external_snat_ip( + snat_cfg: SourceNatConfig, +) -> OmicronZoneExternalSnatIp { + // This is pretty weird: IP IDs don't exist yet, so it's fine for us + // to make them up (Nexus will record them as a part of the + // handoff). We could pass `None` here for some zone types, but it's + // a little simpler to just always pass a new ID, which will only be + // used if the zone type has an external IP. + // + // This should all go away once RSS starts using blueprints more + // directly (instead of this conversion after the fact): + // https://github.com/oxidecomputer/omicron/issues/5272 + OmicronZoneExternalSnatIp { id: ExternalIpUuid::new_v4(), snat_cfg } +} impl Plan { pub async fn load( @@ -191,6 +254,14 @@ impl Plan { } })? { Err(PlanError::FoundV2) + } else if Self::has_v3(storage_manager).await.map_err(|err| { + // Same as the comment above, but for version 3. + PlanError::Io { + message: String::from("looking for v3 RSS plan"), + err, + } + })? { + Err(PlanError::FoundV2) } else { Ok(None) } @@ -234,6 +305,25 @@ impl Plan { Ok(false) } + async fn has_v3( + storage_manager: &StorageHandle, + ) -> Result { + let paths = storage_manager + .get_latest_disks() + .await + .all_m2_mountpoints(CONFIG_DATASET) + .into_iter() + .map(|p| p.join(RSS_SERVICE_PLAN_V3_FILENAME)); + + for p in paths { + if p.try_exists()? { + return Ok(true); + } + } + + Ok(false) + } + async fn is_sled_scrimlet( log: &Logger, address: SocketAddrV6, @@ -410,20 +500,22 @@ impl Plan { sled.alloc_dataset_from_u2s(DatasetType::InternalDns)?; let filesystem_pool = Some(dataset_name.pool().clone()); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: ip, - zone_type: OmicronZoneType::InternalDns { - dataset: OmicronZoneDataset { - pool_name: dataset_name.pool().clone(), - }, - http_address, - dns_address, - gz_address: dns_subnet.gz_address(), - gz_address_index: i.try_into().expect("Giant indices?"), - }, filesystem_pool, + zone_type: BlueprintZoneType::InternalDns( + blueprint_zone_type::InternalDns { + dataset: OmicronZoneDataset { + pool_name: dataset_name.pool().clone(), + }, + http_address, + dns_address, + gz_address: dns_subnet.gz_address(), + gz_address_index: i.try_into().expect("Giant indices?"), + }, + ), }); } @@ -449,16 +541,18 @@ impl Plan { let dataset_name = sled.alloc_dataset_from_u2s(DatasetType::CockroachDb)?; let filesystem_pool = Some(dataset_name.pool().clone()); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: ip, - zone_type: OmicronZoneType::CockroachDb { - dataset: OmicronZoneDataset { - pool_name: dataset_name.pool().clone(), + zone_type: BlueprintZoneType::CockroachDb( + blueprint_zone_type::CockroachDb { + address, + dataset: OmicronZoneDataset { + pool_name: dataset_name.pool().clone(), + }, }, - address, - }, + ), filesystem_pool, }); } @@ -490,23 +584,27 @@ impl Plan { ) .unwrap(); let dns_port = omicron_common::address::DNS_PORT; - let dns_address = SocketAddr::new(external_ip, dns_port); + let dns_address = from_sockaddr_to_external_floating_addr( + SocketAddr::new(external_ip, dns_port), + ); let dataset_kind = DatasetType::ExternalDns; let dataset_name = sled.alloc_dataset_from_u2s(dataset_kind)?; let filesystem_pool = Some(dataset_name.pool().clone()); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: *http_address.ip(), - zone_type: OmicronZoneType::ExternalDns { - dataset: OmicronZoneDataset { - pool_name: dataset_name.pool().clone(), + zone_type: BlueprintZoneType::ExternalDns( + blueprint_zone_type::ExternalDns { + dataset: OmicronZoneDataset { + pool_name: dataset_name.pool().clone(), + }, + http_address, + dns_address, + nic, }, - http_address, - dns_address, - nic, - }, + ), filesystem_pool, }); } @@ -530,28 +628,32 @@ impl Plan { .unwrap(); let (nic, external_ip) = svc_port_builder.next_nexus(id)?; let filesystem_pool = Some(sled.alloc_zpool_from_u2s()?); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: address, - zone_type: OmicronZoneType::Nexus { - internal_address: SocketAddrV6::new( - address, - omicron_common::address::NEXUS_INTERNAL_PORT, - 0, - 0, - ), - external_ip, - nic, - // Tell Nexus to use TLS if and only if the caller - // provided TLS certificates. This effectively - // determines the status of TLS for the lifetime of - // the rack. In production-like deployments, we'd - // always expect TLS to be enabled. It's only in - // development that it might not be. - external_tls: !config.external_certificates.is_empty(), - external_dns_servers: config.dns_servers.clone(), - }, + zone_type: BlueprintZoneType::Nexus( + blueprint_zone_type::Nexus { + internal_address: SocketAddrV6::new( + address, + omicron_common::address::NEXUS_INTERNAL_PORT, + 0, + 0, + ), + external_ip: from_ipaddr_to_external_floating_ip( + external_ip, + ), + nic, + // Tell Nexus to use TLS if and only if the caller + // provided TLS certificates. This effectively + // determines the status of TLS for the lifetime of + // the rack. In production-like deployments, we'd + // always expect TLS to be enabled. It's only in + // development that it might not be. + external_tls: !config.external_certificates.is_empty(), + external_dns_servers: config.dns_servers.clone(), + }, + ), filesystem_pool, }); } @@ -575,18 +677,20 @@ impl Plan { ) .unwrap(); let filesystem_pool = Some(sled.alloc_zpool_from_u2s()?); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: address, - zone_type: OmicronZoneType::Oximeter { - address: SocketAddrV6::new( - address, - omicron_common::address::OXIMETER_PORT, - 0, - 0, - ), - }, + zone_type: BlueprintZoneType::Oximeter( + blueprint_zone_type::Oximeter { + address: SocketAddrV6::new( + address, + omicron_common::address::OXIMETER_PORT, + 0, + 0, + ), + }, + ), filesystem_pool, }) } @@ -614,16 +718,61 @@ impl Plan { let dataset_name = sled.alloc_dataset_from_u2s(DatasetType::Clickhouse)?; let filesystem_pool = Some(dataset_name.pool().clone()); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: ip, - zone_type: OmicronZoneType::Clickhouse { - address, - dataset: OmicronZoneDataset { - pool_name: dataset_name.pool().clone(), + zone_type: BlueprintZoneType::Clickhouse( + blueprint_zone_type::Clickhouse { + address, + dataset: OmicronZoneDataset { + pool_name: dataset_name.pool().clone(), + }, + }, + ), + filesystem_pool, + }); + } + + // Provision Clickhouse server zones, continuing to stripe across sleds. + // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove + // Temporary linter rule until replicated Clickhouse is enabled + #[allow(clippy::reversed_empty_ranges)] + for _ in 0..CLICKHOUSE_SERVER_COUNT { + let sled = { + let which_sled = + sled_allocator.next().ok_or(PlanError::NotEnoughSleds)?; + &mut sled_info[which_sled] + }; + let id = OmicronZoneUuid::new_v4(); + let ip = sled.addr_alloc.next().expect("Not enough addrs"); + // TODO: This may need to be a different port if/when to have single node + // and replicated running side by side as per stage 1 of RFD 468. + let port = omicron_common::address::CLICKHOUSE_PORT; + let address = SocketAddrV6::new(ip, port, 0, 0); + dns_builder + .host_zone_with_one_backend( + id, + ip, + ServiceName::ClickhouseServer, + port, + ) + .unwrap(); + let dataset_name = + sled.alloc_dataset_from_u2s(DatasetType::ClickhouseServer)?; + let filesystem_pool = Some(dataset_name.pool().clone()); + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, + underlay_address: ip, + zone_type: BlueprintZoneType::ClickhouseServer( + blueprint_zone_type::ClickhouseServer { + address, + dataset: OmicronZoneDataset { + pool_name: dataset_name.pool().clone(), + }, }, - }, + ), filesystem_pool, }); } @@ -653,16 +802,18 @@ impl Plan { let dataset_name = sled.alloc_dataset_from_u2s(DatasetType::ClickhouseKeeper)?; let filesystem_pool = Some(dataset_name.pool().clone()); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: ip, - zone_type: OmicronZoneType::ClickhouseKeeper { - address, - dataset: OmicronZoneDataset { - pool_name: dataset_name.pool().clone(), + zone_type: BlueprintZoneType::ClickhouseKeeper( + blueprint_zone_type::ClickhouseKeeper { + address, + dataset: OmicronZoneDataset { + pool_name: dataset_name.pool().clone(), + }, }, - }, + ), filesystem_pool, }); } @@ -687,13 +838,15 @@ impl Plan { port, ) .unwrap(); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: address, - zone_type: OmicronZoneType::CruciblePantry { - address: SocketAddrV6::new(address, port, 0, 0), - }, + zone_type: BlueprintZoneType::CruciblePantry( + blueprint_zone_type::CruciblePantry { + address: SocketAddrV6::new(address, port, 0, 0), + }, + ), filesystem_pool, }); } @@ -715,14 +868,18 @@ impl Plan { ) .unwrap(); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: ip, - zone_type: OmicronZoneType::Crucible { - address, - dataset: OmicronZoneDataset { pool_name: pool.clone() }, - }, + zone_type: BlueprintZoneType::Crucible( + blueprint_zone_type::Crucible { + address, + dataset: OmicronZoneDataset { + pool_name: pool.clone(), + }, + }, + ), filesystem_pool: Some(pool.clone()), }); } @@ -743,24 +900,31 @@ impl Plan { .push(Host::for_zone(Zone::Other(id)).fqdn()); let (nic, snat_cfg) = svc_port_builder.next_snat(id)?; ( - OmicronZoneType::BoundaryNtp { - address: ntp_address, - ntp_servers: config.ntp_servers.clone(), - dns_servers: config.dns_servers.clone(), - domain: None, - nic, - snat_cfg, - }, + BlueprintZoneType::BoundaryNtp( + blueprint_zone_type::BoundaryNtp { + address: ntp_address, + ntp_servers: config.ntp_servers.clone(), + dns_servers: config.dns_servers.clone(), + domain: None, + nic, + external_ip: + from_source_nat_config_to_external_snat_ip( + snat_cfg, + ), + }, + ), ServiceName::BoundaryNtp, ) } else { ( - OmicronZoneType::InternalNtp { - address: ntp_address, - ntp_servers: boundary_ntp_servers.clone(), - dns_servers: rack_dns_servers.clone(), - domain: None, - }, + BlueprintZoneType::InternalNtp( + blueprint_zone_type::InternalNtp { + address: ntp_address, + ntp_servers: boundary_ntp_servers.clone(), + dns_servers: rack_dns_servers.clone(), + domain: None, + }, + ), ServiceName::InternalNtp, ) }; @@ -769,9 +933,9 @@ impl Plan { .host_zone_with_one_backend(id, address, svcname, NTP_PORT) .unwrap(); - sled.request.zones.push(OmicronZoneConfig { - // TODO-cleanup use TypedUuid everywhere - id: id.into_untyped_uuid(), + sled.request.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id, underlay_address: address, zone_type, filesystem_pool, @@ -1329,10 +1493,10 @@ mod tests { } #[test] - fn test_rss_service_plan_v3_schema() { + fn test_rss_service_plan_v4_schema() { let schema = schemars::schema_for!(Plan); expectorate::assert_contents( - "../schema/rss-service-plan-v3.json", + "../schema/rss-service-plan-v4.json", &serde_json::to_string_pretty(&schema).unwrap(), ); } diff --git a/sled-agent/src/rack_setup/plan/sled.rs b/sled-agent/src/rack_setup/plan/sled.rs index c511cf1447..32906d0195 100644 --- a/sled-agent/src/rack_setup/plan/sled.rs +++ b/sled-agent/src/rack_setup/plan/sled.rs @@ -172,7 +172,7 @@ impl Plan { let mut ledger = Ledger::::new_with(log, paths, plan.clone()); ledger.commit().await?; - info!(log, "Sled plan written to storage"); + info!(log, "Sled plan written to storage: {plan:#?}"); Ok(plan) } } diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index bead95be80..3f73e55d0f 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -71,7 +71,6 @@ use crate::bootstrap::early_networking::{ }; use crate::bootstrap::rss_handle::BootstrapAgentHandle; use crate::nexus::d2n_params; -use crate::params::OmicronZoneTypeExt; use crate::rack_setup::plan::service::{ Plan as ServicePlan, PlanError as ServicePlanError, }; @@ -91,14 +90,14 @@ use nexus_sled_agent_shared::inventory::{ OmicronZoneConfig, OmicronZoneType, OmicronZonesConfig, }; use nexus_types::deployment::{ - Blueprint, BlueprintPhysicalDisksConfig, BlueprintZoneConfig, - BlueprintZoneDisposition, BlueprintZonesConfig, - CockroachDbPreserveDowngrade, InvalidOmicronZoneType, + blueprint_zone_type, Blueprint, BlueprintZoneType, BlueprintZonesConfig, + CockroachDbPreserveDowngrade, }; use nexus_types::external_api::views::SledState; use omicron_common::address::get_sled_address; use omicron_common::api::external::Generation; use omicron_common::api::internal::shared::ExternalPortDiscovery; +use omicron_common::api::internal::shared::LldpAdminStatus; use omicron_common::backoff::{ retry_notify, retry_policy_internal_service_aggressive, BackoffError, }; @@ -107,8 +106,8 @@ use omicron_common::disk::{ }; use omicron_common::ledger::{self, Ledger, Ledgerable}; use omicron_ddm_admin_client::{Client as DdmAdminClient, DdmError}; +use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::SledUuid; -use omicron_uuid_kinds::{ExternalIpUuid, GenericUuid}; use serde::{Deserialize, Serialize}; use sled_agent_client::{ types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError, @@ -532,7 +531,7 @@ impl ServiceInner { .iter() .filter_map(|zone_config| { match &zone_config.zone_type { - OmicronZoneType::InternalDns { http_address, .. } + BlueprintZoneType::InternalDns(blueprint_zone_type::InternalDns{ http_address, .. }) => { Some(*http_address) }, @@ -718,15 +717,17 @@ impl ServiceInner { let mut datasets: Vec = vec![]; for sled_config in service_plan.services.values() { for zone in &sled_config.zones { - if let Some((dataset_name, dataset_address)) = - zone.dataset_name_and_address() - { + if let Some(dataset) = zone.zone_type.durable_dataset() { datasets.push(NexusTypes::DatasetCreateRequest { - zpool_id: dataset_name.pool().id().into_untyped_uuid(), - dataset_id: zone.id, + zpool_id: dataset + .dataset + .pool_name + .id() + .into_untyped_uuid(), + dataset_id: zone.id.into_untyped_uuid(), request: NexusTypes::DatasetPutRequest { - address: dataset_address.to_string(), - kind: dataset_name.dataset().kind(), + address: dataset.address.to_string(), + kind: dataset.kind, }, }) } @@ -750,23 +751,24 @@ impl ServiceInner { .iter() .map(|config| NexusTypes::PortConfigV2 { port: config.port.clone(), - routes: config + routes: config .routes .iter() .map(|r| NexusTypes::RouteConfig { destination: r.destination, nexthop: r.nexthop, vlan_id: r.vlan_id, + local_pref: r.local_pref, + }) + .collect(), + addresses: config + .addresses + .iter() + .map(|a| NexusTypes::UplinkAddressConfig { + address: a.address, + vlan_id: a.vlan_id, }) .collect(), - addresses: config - .addresses - .iter() - .map(|a| NexusTypes::UplinkAddressConfig { - address: a.address, - vlan_id: a.vlan_id - }) - .collect(), switch: config.switch.into(), uplink_port_speed: config.uplink_port_speed.into(), uplink_port_fec: config.uplink_port_fec.into(), @@ -786,7 +788,8 @@ impl ServiceInner { remote_asn: b.remote_asn, min_ttl: b.min_ttl, md5_auth_key: b.md5_auth_key.clone(), - multi_exit_discriminator: b.multi_exit_discriminator, + multi_exit_discriminator: b + .multi_exit_discriminator, local_pref: b.local_pref, enforce_first_as: b.enforce_first_as, communities: b.communities.clone(), @@ -795,6 +798,32 @@ impl ServiceInner { vlan_id: b.vlan_id, }) .collect(), + lldp: config.lldp.as_ref().map(|lp| { + NexusTypes::LldpPortConfig { + status: match lp.status { + LldpAdminStatus::Enabled => { + NexusTypes::LldpAdminStatus::Enabled + } + LldpAdminStatus::Disabled => { + NexusTypes::LldpAdminStatus::Disabled + } + LldpAdminStatus::TxOnly => { + NexusTypes::LldpAdminStatus::TxOnly + } + LldpAdminStatus::RxOnly => { + NexusTypes::LldpAdminStatus::RxOnly + } + }, + chassis_id: lp.chassis_id.clone(), + port_id: lp.port_id.clone(), + system_name: lp.system_name.clone(), + system_description: lp + .system_description + .clone(), + port_description: lp.port_description.clone(), + management_addrs: lp.management_addrs.clone(), + } + }), }) .collect(), bgp: config @@ -802,7 +831,12 @@ impl ServiceInner { .iter() .map(|config| NexusTypes::BgpConfig { asn: config.asn, - originate: config.originate.iter().cloned().map(Into::into).collect(), + originate: config + .originate + .iter() + .cloned() + .map(Into::into) + .collect(), shaper: config.shaper.clone(), checker: config.checker.clone(), }) @@ -810,25 +844,26 @@ impl ServiceInner { bfd: config .bfd .iter() - .map(|spec| NexusTypes::BfdPeerConfig { - detection_threshold: spec.detection_threshold, - local: spec.local, - mode: match spec.mode { - omicron_common::api::external::BfdMode::SingleHop => { - nexus_client::types::BfdMode::SingleHop - } - omicron_common::api::external::BfdMode::MultiHop => { - nexus_client::types::BfdMode::MultiHop - } - }, - remote: spec.remote, - required_rx: spec.required_rx, - switch: spec.switch.into(), + .map(|spec| { + NexusTypes::BfdPeerConfig { + detection_threshold: spec.detection_threshold, + local: spec.local, + mode: match spec.mode { + omicron_common::api::external::BfdMode::SingleHop => { + nexus_client::types::BfdMode::SingleHop + } + omicron_common::api::external::BfdMode::MultiHop => { + nexus_client::types::BfdMode::MultiHop + } + }, + remote: spec.remote, + required_rx: spec.required_rx, + switch: spec.switch.into(), + } }) .collect(), } }; - info!(self.log, "rack_network_config: {:#?}", rack_network_config); let physical_disks: Vec<_> = sled_configs_by_id @@ -946,7 +981,7 @@ impl ServiceInner { if sled_config.zones.iter().any(|zone_config| { matches!( &zone_config.zone_type, - OmicronZoneType::CockroachDb { .. } + BlueprintZoneType::CockroachDb(_) ) }) { Some(sled_address) @@ -1363,7 +1398,7 @@ fn build_initial_blueprint_from_plan( let blueprint = build_initial_blueprint_from_sled_configs( sled_configs_by_id, internal_dns_version, - )?; + ); Ok(blueprint) } @@ -1371,47 +1406,11 @@ fn build_initial_blueprint_from_plan( pub(crate) fn build_initial_blueprint_from_sled_configs( sled_configs_by_id: &BTreeMap, internal_dns_version: Generation, -) -> Result { - // Helper to convert an `OmicronZoneConfig` into a `BlueprintZoneConfig`. - // This is separate primarily so rustfmt doesn't lose its mind. - let to_bp_zone_config = |z: &OmicronZoneConfig| { - // All initial zones are in-service. - let disposition = BlueprintZoneDisposition::InService; - BlueprintZoneConfig::from_omicron_zone_config( - z.clone(), - disposition, - // This is pretty weird: IP IDs don't exist yet, so it's fine for us - // to make them up (Nexus will record them as a part of the - // handoff). We could pass `None` here for some zone types, but it's - // a little simpler to just always pass a new ID, which will only be - // used if the zone type has an external IP. - // - // This should all go away once RSS starts using blueprints more - // directly (instead of this conversion after the fact): - // https://github.com/oxidecomputer/omicron/issues/5272 - Some(ExternalIpUuid::new_v4()), - ) - }; - - let mut blueprint_disks = BTreeMap::new(); - for (sled_id, sled_config) in sled_configs_by_id { - blueprint_disks.insert( - *sled_id, - BlueprintPhysicalDisksConfig { - generation: sled_config.disks.generation, - disks: sled_config - .disks - .disks - .iter() - .map(|d| OmicronPhysicalDiskConfig { - identity: d.identity.clone(), - id: d.id, - pool_id: d.pool_id, - }) - .collect(), - }, - ); - } +) -> Blueprint { + let blueprint_disks: BTreeMap<_, _> = sled_configs_by_id + .iter() + .map(|(sled_id, sled_config)| (*sled_id, sled_config.disks.clone())) + .collect(); let mut blueprint_zones = BTreeMap::new(); let mut sled_state = BTreeMap::new(); @@ -1428,18 +1427,14 @@ pub(crate) fn build_initial_blueprint_from_sled_configs( // value, we will need to revisit storing this in the serialized // RSS plan. generation: DeployStepVersion::V5_EVERYTHING, - zones: sled_config - .zones - .iter() - .map(to_bp_zone_config) - .collect::>()?, + zones: sled_config.zones.clone(), }; blueprint_zones.insert(*sled_id, zones_config); sled_state.insert(*sled_id, SledState::Active); } - Ok(Blueprint { + Blueprint { id: Uuid::new_v4(), blueprint_zones, blueprint_disks, @@ -1457,7 +1452,7 @@ pub(crate) fn build_initial_blueprint_from_sled_configs( time_created: Utc::now(), creator: "RSS".to_string(), comment: "initial blueprint from rack setup".to_string(), - }) + } } /// Facilitates creating a sequence of OmicronZonesConfig objects for each sled @@ -1535,11 +1530,14 @@ impl<'a> OmicronZonesConfigGenerator<'a> { sled_config .zones .iter() + .cloned() + .map(|bp_zone_config| { + OmicronZoneConfig::from(bp_zone_config) + }) .filter(|z| { !zones_already.contains(&z.id) && zone_filter(&z.zone_type) - }) - .cloned(), + }), ); let config = OmicronZonesConfig { generation: version, zones }; diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 32cf844e6d..22cbb62f70 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -1618,18 +1618,82 @@ impl ServiceManager { zone: OmicronZoneConfig { zone_type: OmicronZoneType::ClickhouseServer { .. }, - underlay_address: _, + underlay_address, .. }, .. }) => { - // We aren't yet deploying this service - error!( - &self.inner.log, - "Deploying ClickhouseServer zones is not yet supported" - ); + let Some(info) = self.inner.sled_info.get() else { + return Err(Error::SledAgentNotReady); + }; + + let listen_addr = *underlay_address; + let listen_port = CLICKHOUSE_PORT.to_string(); + + let nw_setup_service = Self::zone_network_setup_install( + Some(&info.underlay_address), + &installed_zone, + &[listen_addr], + )?; + + let dns_service = Self::dns_install(info, None, &None).await?; + + let config = PropertyGroupBuilder::new("config") + .add_property( + "listen_addr", + "astring", + listen_addr.to_string(), + ) + .add_property("listen_port", "astring", listen_port) + .add_property("store", "astring", "/data"); + let clickhouse_server_service = + ServiceBuilder::new("oxide/clickhouse_server") + .add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(config), + ); - todo!() + let ch_address = + SocketAddr::new(IpAddr::V6(listen_addr), CLICKHOUSE_PORT) + .to_string(); + + let admin_address = SocketAddr::new( + IpAddr::V6(listen_addr), + CLICKHOUSE_ADMIN_PORT, + ) + .to_string(); + + let clickhouse_admin_config = + PropertyGroupBuilder::new("config") + .add_property( + "clickhouse_address", + "astring", + ch_address, + ) + .add_property("http_address", "astring", admin_address); + let clickhouse_admin_service = + ServiceBuilder::new("oxide/clickhouse-admin").add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(clickhouse_admin_config), + ); + + let profile = ProfileBuilder::new("omicron") + .add_service(nw_setup_service) + .add_service(disabled_ssh_service) + .add_service(clickhouse_server_service) + .add_service(dns_service) + .add_service(enabled_dns_client_service) + .add_service(clickhouse_admin_service); + profile + .add_to_zone(&self.inner.log, &installed_zone) + .await + .map_err(|err| { + Error::io( + "Failed to setup clickhouse server profile", + err, + ) + })?; + RunningZone::boot(installed_zone).await? } ZoneArgs::Omicron(OmicronZoneConfigLocal { @@ -3931,6 +3995,19 @@ impl ServiceManager { &self, our_ports: Vec, ) -> Result<(), Error> { + // Helper function to add a property-value pair + // if the config actually has a value set. + fn apv( + smfh: &SmfHelper, + prop: &str, + val: &Option, + ) -> Result<(), Error> { + if let Some(v) = val { + smfh.addpropvalue_type(prop, v, "astring")? + } + Ok(()) + } + // We expect the switch zone to be running, as we're called immediately // after `ensure_zone()` above and we just successfully configured // uplinks via DPD running in our switch zone. If somehow we're in any @@ -3953,26 +4030,76 @@ impl ServiceManager { } }; - info!(self.inner.log, "Setting up uplinkd service"); - let smfh = SmfHelper::new(&zone, &SwitchService::Uplink); + info!(self.inner.log, "ensuring scrimlet uplinks"); + let usmfh = SmfHelper::new(&zone, &SwitchService::Uplink); + let lsmfh = SmfHelper::new( + &zone, + &SwitchService::Lldpd { baseboard: Baseboard::Unknown }, + ); // We want to delete all the properties in the `uplinks` group, but we // don't know their names, so instead we'll delete and recreate the // group, then add all our properties. - smfh.delpropgroup("uplinks")?; - smfh.addpropgroup("uplinks", "application")?; + let _ = usmfh.delpropgroup("uplinks"); + usmfh.addpropgroup("uplinks", "application")?; for port_config in &our_ports { for addr in &port_config.addrs { - info!(self.inner.log, "configuring port: {port_config:?}"); - smfh.addpropvalue_type( + usmfh.addpropvalue_type( &format!("uplinks/{}_0", port_config.port,), &addr.to_string(), "astring", )?; } + + if let Some(lldp_config) = &port_config.lldp { + let group_name = format!("port_{}", port_config.port); + info!(self.inner.log, "setting up {group_name}"); + let _ = lsmfh.delpropgroup(&group_name); + lsmfh.addpropgroup(&group_name, "application")?; + apv( + &lsmfh, + &format!("{group_name}/status"), + &Some(lldp_config.status.to_string()), + )?; + apv( + &lsmfh, + &format!("{group_name}/chassis_id"), + &lldp_config.chassis_id, + )?; + apv( + &lsmfh, + &format!("{group_name}/system_name"), + &lldp_config.system_name, + )?; + apv( + &lsmfh, + &format!("{group_name}/system_description"), + &lldp_config.system_description, + )?; + apv( + &lsmfh, + &format!("{group_name}/port_description"), + &lldp_config.port_description, + )?; + apv( + &lsmfh, + &format!("{group_name}/port_id"), + &lldp_config.port_id, + )?; + if let Some(a) = &lldp_config.management_addrs { + for address in a { + apv( + &lsmfh, + &format!("{group_name}/management_addrs"), + &Some(address.to_string()), + )?; + } + } + } } - smfh.refresh()?; + usmfh.refresh()?; + lsmfh.refresh()?; Ok(()) } diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index c219a747ce..e93bebad98 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -5,18 +5,27 @@ //! HTTP entrypoint functions for the sled agent's exposed API use super::collection::PokeMode; +use camino::Utf8PathBuf; +use dropshot::endpoint; use dropshot::ApiDescription; +use dropshot::FreeformBody; use dropshot::HttpError; +use dropshot::HttpResponseCreated; +use dropshot::HttpResponseDeleted; +use dropshot::HttpResponseHeaders; use dropshot::HttpResponseOk; use dropshot::HttpResponseUpdatedNoContent; use dropshot::Path; +use dropshot::Query; use dropshot::RequestContext; +use dropshot::StreamingBody; use dropshot::TypedBody; -use dropshot::{endpoint, ApiDescriptionRegisterError}; +use nexus_sled_agent_shared::inventory::SledRole; use nexus_sled_agent_shared::inventory::{Inventory, OmicronZonesConfig}; use omicron_common::api::internal::nexus::DiskRuntimeState; use omicron_common::api::internal::nexus::SledInstanceState; use omicron_common::api::internal::nexus::UpdateArtifactId; +use omicron_common::api::internal::shared::SledIdentifiers; use omicron_common::api::internal::shared::VirtualNetworkInterfaceHost; use omicron_common::api::internal::shared::{ ResolvedVpcRouteSet, ResolvedVpcRouteState, SwitchPorts, @@ -24,8 +33,12 @@ use omicron_common::api::internal::shared::{ use omicron_common::disk::DisksManagementResult; use omicron_common::disk::OmicronPhysicalDisksConfig; use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; +use sled_agent_api::*; +use sled_agent_types::boot_disk::BootDiskOsWriteStatus; +use sled_agent_types::boot_disk::BootDiskPathParams; +use sled_agent_types::boot_disk::BootDiskUpdatePathParams; +use sled_agent_types::boot_disk::BootDiskWriteStartQueryParams; +use sled_agent_types::bootstore::BootstoreStatus; use sled_agent_types::disk::DiskEnsureBody; use sled_agent_types::early_networking::EarlyNetworkConfig; use sled_agent_types::firewall_rules::VpcFirewallRulesEnsureBody; @@ -35,8 +48,14 @@ use sled_agent_types::instance::InstancePutStateBody; use sled_agent_types::instance::InstancePutStateResponse; use sled_agent_types::instance::InstanceUnregisterResponse; use sled_agent_types::sled::AddSledRequest; +use sled_agent_types::time_sync::TimeSync; +use sled_agent_types::zone_bundle::BundleUtilization; +use sled_agent_types::zone_bundle::CleanupContext; +use sled_agent_types::zone_bundle::CleanupCount; +use sled_agent_types::zone_bundle::ZoneBundleId; +use sled_agent_types::zone_bundle::ZoneBundleMetadata; +use std::collections::BTreeMap; use std::sync::Arc; -use uuid::Uuid; use super::sled_agent::SledAgent; @@ -44,510 +63,507 @@ type SledApiDescription = ApiDescription>; /// Returns a description of the sled agent API pub fn api() -> SledApiDescription { - fn register_endpoints( - api: &mut SledApiDescription, - ) -> Result<(), ApiDescriptionRegisterError> { - api.register(instance_put_state)?; - api.register(instance_get_state)?; - api.register(instance_register)?; - api.register(instance_unregister)?; - api.register(instance_put_external_ip)?; - api.register(instance_delete_external_ip)?; + fn register_endpoints() -> Result { + let mut api = sled_agent_api::sled_agent_api_mod::api_description::< + SledAgentSimImpl, + >()?; api.register(instance_poke_post)?; api.register(instance_poke_single_step_post)?; api.register(instance_post_sim_migration_source)?; - api.register(disk_put)?; api.register(disk_poke_post)?; - api.register(update_artifact)?; - api.register(instance_issue_disk_snapshot_request)?; - api.register(vpc_firewall_rules_put)?; - api.register(set_v2p)?; - api.register(del_v2p)?; - api.register(list_v2p)?; - api.register(uplink_ensure)?; - api.register(read_network_bootstore_config)?; - api.register(write_network_bootstore_config)?; - api.register(inventory)?; - api.register(omicron_physical_disks_get)?; - api.register(omicron_physical_disks_put)?; - api.register(omicron_zones_get)?; - api.register(omicron_zones_put)?; - api.register(sled_add)?; - api.register(list_vpc_routes)?; - api.register(set_vpc_routes)?; - - Ok(()) - } - - let mut api = SledApiDescription::new(); - if let Err(err) = register_endpoints(&mut api) { - panic!("failed to register entrypoints: {}", err); - } - api -} -/// Path parameters for Instance requests (sled agent API) -#[derive(Deserialize, JsonSchema)] -struct InstancePathParam { - instance_id: InstanceUuid, -} + Ok(api) + } -#[endpoint { - method = PUT, - path = "/instances/{instance_id}", -}] -async fn instance_register( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result, HttpError> { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - let body_args = body.into_inner(); - Ok(HttpResponseOk( - sa.instance_register( - instance_id, - body_args.propolis_id, - body_args.hardware, - body_args.instance_runtime, - body_args.vmm_runtime, - body_args.metadata, + register_endpoints().expect("failed to register entrypoints") +} + +enum SledAgentSimImpl {} + +impl SledAgentApi for SledAgentSimImpl { + type Context = Arc; + + async fn instance_register( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + Ok(HttpResponseOk( + sa.instance_register( + instance_id, + body_args.propolis_id, + body_args.hardware, + body_args.instance_runtime, + body_args.vmm_runtime, + body_args.metadata, + ) + .await?, + )) + } + + async fn instance_unregister( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + Ok(HttpResponseOk(sa.instance_unregister(instance_id).await?)) + } + + async fn instance_put_state( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + Ok(HttpResponseOk( + sa.instance_ensure_state(instance_id, body_args.state).await?, + )) + } + + async fn instance_get_state( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + Ok(HttpResponseOk(sa.instance_get_state(instance_id).await?)) + } + + async fn instance_put_external_ip( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + sa.instance_put_external_ip(instance_id, &body_args).await?; + Ok(HttpResponseUpdatedNoContent()) + } + + async fn instance_delete_external_ip( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + sa.instance_delete_external_ip(instance_id, &body_args).await?; + Ok(HttpResponseUpdatedNoContent()) + } + + async fn disk_put( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let disk_id = path_params.into_inner().disk_id; + let body_args = body.into_inner(); + Ok(HttpResponseOk( + sa.disk_ensure( + disk_id, + body_args.initial_runtime.clone(), + body_args.target.clone(), + ) + .await?, + )) + } + + async fn update_artifact( + rqctx: RequestContext, + artifact: TypedBody, + ) -> Result { + let sa = rqctx.context(); + sa.updates() + .download_artifact( + artifact.into_inner(), + rqctx.context().nexus_client.as_ref(), + ) + .await + .map_err(|e| HttpError::for_internal_error(e.to_string()))?; + Ok(HttpResponseUpdatedNoContent()) + } + + async fn instance_issue_disk_snapshot_request( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result< + HttpResponseOk, + HttpError, + > { + let sa = rqctx.context(); + let path_params = path_params.into_inner(); + let body = body.into_inner(); + + sa.instance_issue_disk_snapshot_request( + InstanceUuid::from_untyped_uuid(path_params.instance_id), + path_params.disk_id, + body.snapshot_id, ) - .await?, - )) -} + .await + .map_err(|e| HttpError::for_internal_error(e.to_string()))?; -#[endpoint { - method = DELETE, - path = "/instances/{instance_id}", -}] -async fn instance_unregister( - rqctx: RequestContext>, - path_params: Path, -) -> Result, HttpError> { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - Ok(HttpResponseOk(sa.instance_unregister(instance_id).await?)) -} + Ok(HttpResponseOk(InstanceIssueDiskSnapshotRequestResponse { + snapshot_id: body.snapshot_id, + })) + } -#[endpoint { - method = PUT, - path = "/instances/{instance_id}/state", -}] -async fn instance_put_state( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result, HttpError> { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - let body_args = body.into_inner(); - Ok(HttpResponseOk( - sa.instance_ensure_state(instance_id, body_args.state).await?, - )) -} + async fn vpc_firewall_rules_put( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result { + let _sa = rqctx.context(); + let _vpc_id = path_params.into_inner().vpc_id; + let _body_args = body.into_inner(); -#[endpoint { - method = GET, - path = "/instances/{instance_id}/state", -}] -async fn instance_get_state( - rqctx: RequestContext>, - path_params: Path, -) -> Result, HttpError> { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - Ok(HttpResponseOk(sa.instance_get_state(instance_id).await?)) -} + Ok(HttpResponseUpdatedNoContent()) + } -#[endpoint { - method = PUT, - path = "/instances/{instance_id}/external-ip", -}] -async fn instance_put_external_ip( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - let body_args = body.into_inner(); - sa.instance_put_external_ip(instance_id, &body_args).await?; - Ok(HttpResponseUpdatedNoContent()) -} + async fn set_v2p( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let body_args = body.into_inner(); -#[endpoint { - method = DELETE, - path = "/instances/{instance_id}/external-ip", -}] -async fn instance_delete_external_ip( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - let body_args = body.into_inner(); - sa.instance_delete_external_ip(instance_id, &body_args).await?; - Ok(HttpResponseUpdatedNoContent()) -} + sa.set_virtual_nic_host(&body_args) + .await + .map_err(|e| HttpError::for_internal_error(e.to_string()))?; -#[endpoint { - method = POST, - path = "/instances/{instance_id}/poke", -}] -async fn instance_poke_post( - rqctx: RequestContext>, - path_params: Path, -) -> Result { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - sa.instance_poke(instance_id, PokeMode::Drain).await; - Ok(HttpResponseUpdatedNoContent()) -} + Ok(HttpResponseUpdatedNoContent()) + } -#[endpoint { - method = POST, - path = "/instances/{instance_id}/poke-single-step", -}] -async fn instance_poke_single_step_post( - rqctx: RequestContext>, - path_params: Path, -) -> Result { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - sa.instance_poke(instance_id, PokeMode::SingleStep).await; - Ok(HttpResponseUpdatedNoContent()) -} + async fn del_v2p( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let body_args = body.into_inner(); -#[endpoint { - method = POST, - path = "/instances/{instance_id}/sim-migration-source", -}] -async fn instance_post_sim_migration_source( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - sa.instance_simulate_migration_source(instance_id, body.into_inner()) - .await?; - Ok(HttpResponseUpdatedNoContent()) -} + sa.unset_virtual_nic_host(&body_args) + .await + .map_err(|e| HttpError::for_internal_error(e.to_string()))?; -/// Path parameters for Disk requests (sled agent API) -#[derive(Deserialize, JsonSchema)] -struct DiskPathParam { - disk_id: Uuid, -} + Ok(HttpResponseUpdatedNoContent()) + } -#[endpoint { - method = PUT, - path = "/disks/{disk_id}", -}] -async fn disk_put( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result, HttpError> { - let sa = rqctx.context(); - let disk_id = path_params.into_inner().disk_id; - let body_args = body.into_inner(); - Ok(HttpResponseOk( - sa.disk_ensure( - disk_id, - body_args.initial_runtime.clone(), - body_args.target.clone(), - ) - .await?, - )) -} + async fn list_v2p( + rqctx: RequestContext, + ) -> Result>, HttpError> + { + let sa = rqctx.context(); -#[endpoint { - method = POST, - path = "/disks/{disk_id}/poke", -}] -async fn disk_poke_post( - rqctx: RequestContext>, - path_params: Path, -) -> Result { - let sa = rqctx.context(); - let disk_id = path_params.into_inner().disk_id; - sa.disk_poke(disk_id).await; - Ok(HttpResponseUpdatedNoContent()) -} + let vnics = sa.list_virtual_nics().await.map_err(HttpError::from)?; -#[endpoint { - method = POST, - path = "/update" -}] -async fn update_artifact( - rqctx: RequestContext>, - artifact: TypedBody, -) -> Result { - let sa = rqctx.context(); - sa.updates() - .download_artifact( - artifact.into_inner(), - rqctx.context().nexus_client.as_ref(), - ) - .await - .map_err(|e| HttpError::for_internal_error(e.to_string()))?; - Ok(HttpResponseUpdatedNoContent()) -} + Ok(HttpResponseOk(vnics)) + } -#[derive(Deserialize, JsonSchema)] -pub struct InstanceIssueDiskSnapshotRequestPathParam { - instance_id: Uuid, - disk_id: Uuid, -} + async fn uplink_ensure( + _rqctx: RequestContext, + _body: TypedBody, + ) -> Result { + Ok(HttpResponseUpdatedNoContent()) + } -#[derive(Deserialize, JsonSchema)] -pub struct InstanceIssueDiskSnapshotRequestBody { - snapshot_id: Uuid, -} + async fn read_network_bootstore_config_cache( + rqctx: RequestContext, + ) -> Result, HttpError> { + let config = + rqctx.context().bootstore_network_config.lock().await.clone(); + Ok(HttpResponseOk(config)) + } -#[derive(Serialize, JsonSchema)] -pub struct InstanceIssueDiskSnapshotRequestResponse { - snapshot_id: Uuid, -} + async fn write_network_bootstore_config( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let mut config = rqctx.context().bootstore_network_config.lock().await; + *config = body.into_inner(); + Ok(HttpResponseUpdatedNoContent()) + } -/// Take a snapshot of a disk that is attached to an instance -#[endpoint { - method = POST, - path = "/instances/{instance_id}/disks/{disk_id}/snapshot", -}] -async fn instance_issue_disk_snapshot_request( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result, HttpError> -{ - let sa = rqctx.context(); - let path_params = path_params.into_inner(); - let body = body.into_inner(); - - sa.instance_issue_disk_snapshot_request( - InstanceUuid::from_untyped_uuid(path_params.instance_id), - path_params.disk_id, - body.snapshot_id, - ) - .await - .map_err(|e| HttpError::for_internal_error(e.to_string()))?; - - Ok(HttpResponseOk(InstanceIssueDiskSnapshotRequestResponse { - snapshot_id: body.snapshot_id, - })) -} + /// Fetch basic information about this sled + async fn inventory( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk( + sa.inventory(rqctx.server.local_addr).await.map_err(|e| { + HttpError::for_internal_error(format!("{:#}", e)) + })?, + )) + } -/// Path parameters for VPC requests (sled agent API) -#[derive(Deserialize, JsonSchema)] -struct VpcPathParam { - vpc_id: Uuid, -} + async fn omicron_physical_disks_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let body_args = body.into_inner(); + let result = sa.omicron_physical_disks_ensure(body_args).await?; + Ok(HttpResponseOk(result)) + } -#[endpoint { - method = PUT, - path = "/vpc/{vpc_id}/firewall/rules", -}] -async fn vpc_firewall_rules_put( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result { - let _sa = rqctx.context(); - let _vpc_id = path_params.into_inner().vpc_id; - let _body_args = body.into_inner(); + async fn omicron_physical_disks_get( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.omicron_physical_disks_list().await?)) + } - Ok(HttpResponseUpdatedNoContent()) -} + async fn omicron_zones_get( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.omicron_zones_list().await)) + } -/// Create a mapping from a virtual NIC to a physical host -#[endpoint { - method = PUT, - path = "/v2p/", -}] -async fn set_v2p( - rqctx: RequestContext>, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let body_args = body.into_inner(); + async fn omicron_zones_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let body_args = body.into_inner(); + sa.omicron_zones_ensure(body_args).await; + Ok(HttpResponseUpdatedNoContent()) + } - sa.set_virtual_nic_host(&body_args) - .await - .map_err(|e| HttpError::for_internal_error(e.to_string()))?; + async fn sled_add( + _rqctx: RequestContext, + _body: TypedBody, + ) -> Result { + Ok(HttpResponseUpdatedNoContent()) + } - Ok(HttpResponseUpdatedNoContent()) -} + async fn list_vpc_routes( + rqctx: RequestContext, + ) -> Result>, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.list_vpc_routes().await)) + } -/// Delete a mapping from a virtual NIC to a physical host -#[endpoint { - method = DELETE, - path = "/v2p/", -}] -async fn del_v2p( - rqctx: RequestContext>, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let body_args = body.into_inner(); + async fn set_vpc_routes( + rqctx: RequestContext, + body: TypedBody>, + ) -> Result { + let sa = rqctx.context(); + sa.set_vpc_routes(body.into_inner()).await; + Ok(HttpResponseUpdatedNoContent()) + } - sa.unset_virtual_nic_host(&body_args) - .await - .map_err(|e| HttpError::for_internal_error(e.to_string()))?; + // --- Unimplemented endpoints --- - Ok(HttpResponseUpdatedNoContent()) -} + async fn zone_bundle_list_all( + _rqctx: RequestContext, + _query: Query, + ) -> Result>, HttpError> { + method_unimplemented() + } -/// List v2p mappings present on sled -#[endpoint { - method = GET, - path = "/v2p/", -}] -async fn list_v2p( - rqctx: RequestContext>, -) -> Result>, HttpError> { - let sa = rqctx.context(); + async fn zone_bundle_list( + _rqctx: RequestContext, + _params: Path, + ) -> Result>, HttpError> { + method_unimplemented() + } - let vnics = sa.list_virtual_nics().await.map_err(HttpError::from)?; + async fn zone_bundle_create( + _rqctx: RequestContext, + _params: Path, + ) -> Result, HttpError> { + method_unimplemented() + } - Ok(HttpResponseOk(vnics)) -} + async fn zone_bundle_get( + _rqctx: RequestContext, + _params: Path, + ) -> Result>, HttpError> + { + method_unimplemented() + } -#[endpoint { - method = POST, - path = "/switch-ports", -}] -async fn uplink_ensure( - _rqctx: RequestContext>, - _body: TypedBody, -) -> Result { - Ok(HttpResponseUpdatedNoContent()) -} + async fn zone_bundle_delete( + _rqctx: RequestContext, + _params: Path, + ) -> Result { + method_unimplemented() + } -#[endpoint { - method = GET, - path = "/network-bootstore-config", -}] -async fn read_network_bootstore_config( - rqctx: RequestContext>, -) -> Result, HttpError> { - let config = rqctx.context().bootstore_network_config.lock().await.clone(); - Ok(HttpResponseOk(config)) -} + async fn zone_bundle_utilization( + _rqctx: RequestContext, + ) -> Result< + HttpResponseOk>, + HttpError, + > { + method_unimplemented() + } -#[endpoint { - method = PUT, - path = "/network-bootstore-config", -}] -async fn write_network_bootstore_config( - rqctx: RequestContext>, - body: TypedBody, -) -> Result { - let mut config = rqctx.context().bootstore_network_config.lock().await; - *config = body.into_inner(); - Ok(HttpResponseUpdatedNoContent()) -} + async fn zone_bundle_cleanup_context( + _rqctx: RequestContext, + ) -> Result, HttpError> { + method_unimplemented() + } -/// Fetch basic information about this sled -#[endpoint { - method = GET, - path = "/inventory", -}] -async fn inventory( - rqctx: RequestContext>, -) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk( - sa.inventory(rqctx.server.local_addr) - .await - .map_err(|e| HttpError::for_internal_error(format!("{:#}", e)))?, - )) -} + async fn zone_bundle_cleanup_context_update( + _rqctx: RequestContext, + _body: TypedBody, + ) -> Result { + method_unimplemented() + } -#[endpoint { - method = PUT, - path = "/omicron-physical-disks", -}] -async fn omicron_physical_disks_put( - rqctx: RequestContext>, - body: TypedBody, -) -> Result, HttpError> { - let sa = rqctx.context(); - let body_args = body.into_inner(); - let result = sa.omicron_physical_disks_ensure(body_args).await?; - Ok(HttpResponseOk(result)) -} + async fn zone_bundle_cleanup( + _rqctx: RequestContext, + ) -> Result>, HttpError> + { + method_unimplemented() + } -#[endpoint { - method = GET, - path = "/omicron-physical-disks", -}] -async fn omicron_physical_disks_get( - rqctx: RequestContext>, -) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.omicron_physical_disks_list().await?)) + async fn zones_list( + _rqctx: RequestContext, + ) -> Result>, HttpError> { + method_unimplemented() + } + + async fn zpools_get( + _rqctx: RequestContext, + ) -> Result>, HttpError> { + method_unimplemented() + } + + async fn sled_role_get( + _rqctx: RequestContext, + ) -> Result, HttpError> { + method_unimplemented() + } + + async fn cockroachdb_init( + _rqctx: RequestContext, + ) -> Result { + method_unimplemented() + } + + async fn timesync_get( + _rqctx: RequestContext, + ) -> Result, HttpError> { + method_unimplemented() + } + + async fn host_os_write_start( + _rqctx: RequestContext, + _path_params: Path, + _query_params: Query, + _body: StreamingBody, + ) -> Result { + method_unimplemented() + } + + async fn host_os_write_status_get( + _rqctx: RequestContext, + _path_params: Path, + ) -> Result, HttpError> { + method_unimplemented() + } + + async fn host_os_write_status_delete( + _rqctx: RequestContext, + _path_params: Path, + ) -> Result { + method_unimplemented() + } + + async fn sled_identifiers( + _rqctx: RequestContext, + ) -> Result, HttpError> { + method_unimplemented() + } + + async fn bootstore_status( + _rqctx: RequestContext, + ) -> Result, HttpError> { + method_unimplemented() + } } -#[endpoint { - method = GET, - path = "/omicron-zones", -}] -async fn omicron_zones_get( - rqctx: RequestContext>, -) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.omicron_zones_list().await)) +fn method_unimplemented() -> Result { + Err(HttpError { + // Use a client error here (405 Method Not Allowed vs 501 Not + // Implemented) even though it isn't strictly accurate here, so tests + // get to see the error message. + status_code: http::StatusCode::METHOD_NOT_ALLOWED, + error_code: None, + external_message: "Method not implemented in sled-agent-sim" + .to_string(), + internal_message: "Method not implemented in sled-agent-sim" + .to_string(), + }) } +// --- Extra endpoints only available in the sim implementation --- + #[endpoint { - method = PUT, - path = "/omicron-zones", + method = POST, + path = "/instances/{instance_id}/poke", }] -async fn omicron_zones_put( +async fn instance_poke_post( rqctx: RequestContext>, - body: TypedBody, + path_params: Path, ) -> Result { let sa = rqctx.context(); - let body_args = body.into_inner(); - sa.omicron_zones_ensure(body_args).await; + let instance_id = path_params.into_inner().instance_id; + sa.instance_poke(instance_id, PokeMode::Drain).await; Ok(HttpResponseUpdatedNoContent()) } #[endpoint { - method = PUT, - path = "/sleds" + method = POST, + path = "/instances/{instance_id}/poke-single-step", }] -async fn sled_add( - _rqctx: RequestContext>, - _body: TypedBody, +async fn instance_poke_single_step_post( + rqctx: RequestContext>, + path_params: Path, ) -> Result { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + sa.instance_poke(instance_id, PokeMode::SingleStep).await; Ok(HttpResponseUpdatedNoContent()) } #[endpoint { - method = GET, - path = "/vpc-routes", + method = POST, + path = "/instances/{instance_id}/sim-migration-source", }] -async fn list_vpc_routes( +async fn instance_post_sim_migration_source( rqctx: RequestContext>, -) -> Result>, HttpError> { + path_params: Path, + body: TypedBody, +) -> Result { let sa = rqctx.context(); - Ok(HttpResponseOk(sa.list_vpc_routes().await)) + let instance_id = path_params.into_inner().instance_id; + sa.instance_simulate_migration_source(instance_id, body.into_inner()) + .await?; + Ok(HttpResponseUpdatedNoContent()) } #[endpoint { - method = PUT, - path = "/vpc-routes", + method = POST, + path = "/disks/{disk_id}/poke", }] -async fn set_vpc_routes( +async fn disk_poke_post( rqctx: RequestContext>, - body: TypedBody>, + path_params: Path, ) -> Result { let sa = rqctx.context(); - sa.set_vpc_routes(body.into_inner()).await; + let disk_id = path_params.into_inner().disk_id; + sa.disk_poke(disk_id).await; Ok(HttpResponseUpdatedNoContent()) } diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs index 189f775adb..b546025654 100644 --- a/sled-agent/src/sim/server.rs +++ b/sled-agent/src/sim/server.rs @@ -12,6 +12,10 @@ use crate::nexus::d2n_params; use crate::nexus::NexusClient; use crate::rack_setup::service::build_initial_blueprint_from_sled_configs; use crate::rack_setup::SledConfig; +use crate::rack_setup::{ + from_ipaddr_to_external_floating_ip, + from_sockaddr_to_external_floating_addr, +}; use anyhow::anyhow; use crucible_agent_client::types::State as RegionState; use illumos_utils::zpool::ZpoolName; @@ -19,9 +23,11 @@ use internal_dns::ServiceName; use nexus_client::types as NexusTypes; use nexus_client::types::{IpRange, Ipv4Range, Ipv6Range}; use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; -use nexus_sled_agent_shared::inventory::OmicronZoneConfig; use nexus_sled_agent_shared::inventory::OmicronZoneDataset; -use nexus_sled_agent_shared::inventory::OmicronZoneType; +use nexus_types::deployment::blueprint_zone_type; +use nexus_types::deployment::{ + BlueprintZoneConfig, BlueprintZoneDisposition, BlueprintZoneType, +}; use nexus_types::inventory::NetworkInterfaceKind; use omicron_common::address::DNS_OPTE_IPV4_SUBNET; use omicron_common::address::NEXUS_OPTE_IPV4_SUBNET; @@ -36,6 +42,7 @@ use omicron_common::backoff::{ use omicron_common::disk::DiskIdentity; use omicron_common::FileKv; use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; use oxnet::Ipv6Net; @@ -375,19 +382,22 @@ pub async fn run_standalone_server( SocketAddr::V6(a) => a, }; let pool_name = ZpoolName::new_external(ZpoolUuid::new_v4()); - let mut zones = vec![OmicronZoneConfig { - id: Uuid::new_v4(), + let mut zones = vec![BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id: OmicronZoneUuid::new_v4(), underlay_address: *http_bound.ip(), - zone_type: OmicronZoneType::InternalDns { - dataset: OmicronZoneDataset { pool_name: pool_name.clone() }, - http_address: http_bound, - dns_address: match dns.dns_server.local_address() { - SocketAddr::V4(_) => panic!("did not expect v4 address"), - SocketAddr::V6(a) => a, + zone_type: BlueprintZoneType::InternalDns( + blueprint_zone_type::InternalDns { + dataset: OmicronZoneDataset { pool_name: pool_name.clone() }, + http_address: http_bound, + dns_address: match dns.dns_server.local_address() { + SocketAddr::V4(_) => panic!("did not expect v4 address"), + SocketAddr::V6(a) => a, + }, + gz_address: Ipv6Addr::LOCALHOST, + gz_address_index: 0, }, - gz_address: Ipv6Addr::LOCALHOST, - gz_address_index: 0, - }, + ), // Co-locate the filesystem pool with the dataset filesystem_pool: Some(pool_name), }]; @@ -396,23 +406,26 @@ pub async fn run_standalone_server( let mut macs = MacAddr::iter_system(); if let Some(nexus_external_addr) = rss_args.nexus_external_addr { let ip = nexus_external_addr.ip(); - let id = Uuid::new_v4(); + let id = OmicronZoneUuid::new_v4(); - zones.push(OmicronZoneConfig { + zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, id, underlay_address: match ip { IpAddr::V4(_) => panic!("did not expect v4 address"), IpAddr::V6(a) => a, }, - zone_type: OmicronZoneType::Nexus { + zone_type: BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { internal_address: match config.nexus_address { SocketAddr::V4(_) => panic!("did not expect v4 address"), SocketAddr::V6(a) => a, }, - external_ip: ip, + external_ip: from_ipaddr_to_external_floating_ip(ip), nic: nexus_types::inventory::NetworkInterface { id: Uuid::new_v4(), - kind: NetworkInterfaceKind::Service { id }, + kind: NetworkInterfaceKind::Service { + id: id.into_untyped_uuid(), + }, name: "nexus".parse().unwrap(), ip: NEXUS_OPTE_IPV4_SUBNET .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES + 1) @@ -427,7 +440,7 @@ pub async fn run_standalone_server( }, external_tls: false, external_dns_servers: vec![], - }, + }), filesystem_pool: Some(get_random_zpool()), }); @@ -445,31 +458,40 @@ pub async fn run_standalone_server( rss_args.external_dns_internal_addr { let ip = *external_dns_internal_addr.ip(); - let id = Uuid::new_v4(); + let id = OmicronZoneUuid::new_v4(); let pool_name = ZpoolName::new_external(ZpoolUuid::new_v4()); - zones.push(OmicronZoneConfig { + zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, id, underlay_address: ip, - zone_type: OmicronZoneType::ExternalDns { - dataset: OmicronZoneDataset { pool_name: pool_name.clone() }, - http_address: external_dns_internal_addr, - dns_address: SocketAddr::V6(external_dns_internal_addr), - nic: nexus_types::inventory::NetworkInterface { - id: Uuid::new_v4(), - kind: NetworkInterfaceKind::Service { id }, - name: "external-dns".parse().unwrap(), - ip: DNS_OPTE_IPV4_SUBNET - .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES + 1) - .unwrap() - .into(), - mac: macs.next().unwrap(), - subnet: (*DNS_OPTE_IPV4_SUBNET).into(), - vni: Vni::SERVICES_VNI, - primary: true, - slot: 0, - transit_ips: vec![], + zone_type: BlueprintZoneType::ExternalDns( + blueprint_zone_type::ExternalDns { + dataset: OmicronZoneDataset { + pool_name: pool_name.clone(), + }, + http_address: external_dns_internal_addr, + dns_address: from_sockaddr_to_external_floating_addr( + SocketAddr::V6(external_dns_internal_addr), + ), + nic: nexus_types::inventory::NetworkInterface { + id: Uuid::new_v4(), + kind: NetworkInterfaceKind::Service { + id: id.into_untyped_uuid(), + }, + name: "external-dns".parse().unwrap(), + ip: DNS_OPTE_IPV4_SUBNET + .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES + 1) + .unwrap() + .into(), + mac: macs.next().unwrap(), + subnet: (*DNS_OPTE_IPV4_SUBNET).into(), + vni: Vni::SERVICES_VNI, + primary: true, + slot: 0, + transit_ips: vec![], + }, }, - }, + ), // Co-locate the filesystem pool with the dataset filesystem_pool: Some(pool_name), }); @@ -530,8 +552,7 @@ pub async fn run_standalone_server( blueprint: build_initial_blueprint_from_sled_configs( &sled_configs, internal_dns_version, - ) - .expect("failed to construct initial blueprint"), + ), physical_disks, zpools, datasets, diff --git a/sled-agent/tests/integration_tests/early_network.rs b/sled-agent/tests/integration_tests/early_network.rs index 6fa91e0e4a..9b69975054 100644 --- a/sled-agent/tests/integration_tests/early_network.rs +++ b/sled-agent/tests/integration_tests/early_network.rs @@ -126,6 +126,7 @@ fn current_config_example() -> (&'static str, EarlyNetworkConfig) { destination: "10.1.9.32/16".parse().unwrap(), nexthop: "10.1.9.32".parse().unwrap(), vlan_id: None, + local_pref: None, }], addresses: vec!["2001:db8::/96".parse().unwrap()], switch: SwitchLocation::Switch0, @@ -153,6 +154,7 @@ fn current_config_example() -> (&'static str, EarlyNetworkConfig) { vlan_id: None, }], autoneg: true, + lldp: None, }], bgp: vec![BgpConfig { asn: 20000, diff --git a/sled-agent/tests/output/new-rss-sled-plans/madrid-rss-sled-plan.json b/sled-agent/tests/output/new-rss-sled-plans/madrid-rss-sled-plan.json index efd1a3c167..2da814042d 100644 --- a/sled-agent/tests/output/new-rss-sled-plans/madrid-rss-sled-plan.json +++ b/sled-agent/tests/output/new-rss-sled-plans/madrid-rss-sled-plan.json @@ -128,7 +128,8 @@ { "destination": "0.0.0.0/0", "nexthop": "172.20.15.33", - "vlan_id": null + "vlan_id": null, + "local_pref": null } ], "addresses": [ @@ -142,14 +143,16 @@ "uplink_port_speed": "speed40_g", "uplink_port_fec": "none", "bgp_peers": [], - "autoneg": false + "autoneg": false, + "lldp": null }, { "routes": [ { "destination": "0.0.0.0/0", "nexthop": "172.20.15.33", - "vlan_id": null + "vlan_id": null, + "local_pref": null } ], "addresses": [ @@ -163,7 +166,8 @@ "uplink_port_speed": "speed40_g", "uplink_port_fec": "none", "bgp_peers": [], - "autoneg": false + "autoneg": false, + "lldp": null } ], "bgp": [], diff --git a/sled-agent/types/src/early_networking.rs b/sled-agent/types/src/early_networking.rs index dc93aa1300..755033dc23 100644 --- a/sled-agent/types/src/early_networking.rs +++ b/sled-agent/types/src/early_networking.rs @@ -299,6 +299,7 @@ pub mod back_compat { uplink_port_fec: v1.uplink_port_fec, bgp_peers: v1.bgp_peers.clone(), autoneg: v1.autoneg, + lldp: None, } } } @@ -322,6 +323,8 @@ pub mod back_compat { pub uplink_cidr: Ipv4Net, /// VLAN id to use for uplink pub uplink_vid: Option, + /// Local preference + pub local_pref: Option, } impl From for PortConfigV2 { @@ -331,6 +334,7 @@ pub mod back_compat { destination: "0.0.0.0/0".parse().unwrap(), nexthop: value.gateway_ip.into(), vlan_id: value.uplink_vid, + local_pref: value.local_pref, }], addresses: vec![UplinkAddressConfig { address: value.uplink_cidr.into(), @@ -342,6 +346,7 @@ pub mod back_compat { uplink_port_fec: value.uplink_port_fec, bgp_peers: vec![], autoneg: false, + lldp: None, } } } @@ -472,6 +477,7 @@ mod tests { uplink_port_fec: PortFec::None, uplink_cidr: "192.168.0.1/16".parse().unwrap(), uplink_vid: None, + local_pref: None, }], }), }; @@ -501,6 +507,7 @@ mod tests { destination: "0.0.0.0/0".parse().unwrap(), nexthop: uplink.gateway_ip.into(), vlan_id: None, + local_pref: None, }], addresses: vec![UplinkAddressConfig { address: uplink.uplink_cidr.into(), @@ -512,6 +519,7 @@ mod tests { uplink_port_fec: uplink.uplink_port_fec, autoneg: false, bgp_peers: vec![], + lldp: None, }], bgp: vec![], bfd: vec![], @@ -545,6 +553,7 @@ mod tests { destination: "0.0.0.0/0".parse().unwrap(), nexthop: "192.168.0.2".parse().unwrap(), vlan_id: None, + local_pref: None, }], addresses: vec!["192.168.0.1/16".parse().unwrap()], switch: SwitchLocation::Switch0, @@ -592,6 +601,7 @@ mod tests { uplink_port_fec: port.uplink_port_fec, autoneg: false, bgp_peers: vec![], + lldp: None, }], bgp: vec![], bfd: vec![], diff --git a/smf/clickhouse/method_script.sh b/smf/clickhouse/method_script.sh index 224d759cf3..bb5dd960a1 100755 --- a/smf/clickhouse/method_script.sh +++ b/smf/clickhouse/method_script.sh @@ -10,136 +10,13 @@ LISTEN_ADDR="$(svcprop -c -p config/listen_addr "${SMF_FMRI}")" LISTEN_PORT="$(svcprop -c -p config/listen_port "${SMF_FMRI}")" DATASTORE="$(svcprop -c -p config/store "${SMF_FMRI}")" -# TEMPORARY: Racks will be set up with single node ClickHouse until -# Nexus provisions services so there is no divergence between racks -# https://github.com/oxidecomputer/omicron/issues/732 -single_node=true +args=( +"--log-file" "/var/tmp/clickhouse-server.log" +"--errorlog-file" "/var/tmp/clickhouse-server.errlog" +"--" +"--path" "${DATASTORE}" +"--listen_host" "$LISTEN_ADDR" +"--http_port" "$LISTEN_PORT" +) -command=() -# TODO((https://github.com/oxidecomputer/omicron/issues/4000)): Remove single node mode once all racks are running in replicated mode -if $single_node -then - command+=( - "/opt/oxide/clickhouse/clickhouse" "server" - "--log-file" "/var/tmp/clickhouse-server.log" - "--errorlog-file" "/var/tmp/clickhouse-server.errlog" - "--" - "--path" "${DATASTORE}" - "--listen_host" "$LISTEN_ADDR" - "--http_port" "$LISTEN_PORT" - ) -else - # Retrieve hostnames (SRV records in internal DNS) of the clickhouse nodes. - CH_ADDRS="$(/opt/oxide/internal-dns-cli/bin/dnswait clickhouse -H)" - - if [[ -z "$CH_ADDRS" ]]; then - printf 'ERROR: found no hostnames for other ClickHouse nodes\n' >&2 - exit "$SMF_EXIT_ERR_CONFIG" - fi - - declare -a nodes=($CH_ADDRS) - - for i in "${nodes[@]}" - do - if ! grep -q "host.control-plane.oxide.internal" <<< "${i}"; then - printf 'ERROR: retrieved ClickHouse hostname does not match the expected format\n' >&2 - exit "$SMF_EXIT_ERR_CONFIG" - fi - done - - # Assign hostnames to replicas - REPLICA_HOST_01="${nodes[0]}" - REPLICA_HOST_02="${nodes[1]}" - - # Retrieve hostnames (SRV records in internal DNS) of the keeper nodes. - K_ADDRS="$(/opt/oxide/internal-dns-cli/bin/dnswait clickhouse-keeper -H)" - - if [[ -z "$K_ADDRS" ]]; then - printf 'ERROR: found no hostnames for other ClickHouse Keeper nodes\n' >&2 - exit "$SMF_EXIT_ERR_CONFIG" - fi - - declare -a keepers=($K_ADDRS) - - for i in "${keepers[@]}" - do - if ! grep -q "host.control-plane.oxide.internal" <<< "${i}"; then - printf 'ERROR: retrieved ClickHouse Keeper hostname does not match the expected format\n' >&2 - exit "$SMF_EXIT_ERR_CONFIG" - fi - done - - if [[ "${#keepers[@]}" != 3 ]] - then - printf "ERROR: expected 3 ClickHouse Keeper hosts, found "${#keepers[@]}" instead\n" >&2 - exit "$SMF_EXIT_ERR_CONFIG" - fi - - # Identify the node type this is as this will influence how the config is constructed - # TODO(https://github.com/oxidecomputer/omicron/issues/3824): There are probably much - # better ways to do this service discovery, but this works for now. - # The services contain the same IDs as the hostnames. - CLICKHOUSE_SVC="$(zonename | tr -dc [:digit:])" - REPLICA_IDENTIFIER_01="$( echo "${REPLICA_HOST_01}" | tr -dc [:digit:])" - REPLICA_IDENTIFIER_02="$( echo "${REPLICA_HOST_02}" | tr -dc [:digit:])" - if [[ $REPLICA_IDENTIFIER_01 == $CLICKHOUSE_SVC ]] - then - REPLICA_DISPLAY_NAME="oximeter_cluster node 1" - REPLICA_NUMBER="01" - elif [[ $REPLICA_IDENTIFIER_02 == $CLICKHOUSE_SVC ]] - then - REPLICA_DISPLAY_NAME="oximeter_cluster node 2" - REPLICA_NUMBER="02" - else - printf 'ERROR: service name does not match any of the identified ClickHouse hostnames\n' >&2 - exit "$SMF_EXIT_ERR_CONFIG" - fi - - # Setting environment variables this way is best practice, but has the downside of - # obscuring the field values to anyone ssh-ing into the zone. To mitigate this, - # we will be saving them to ${DATASTORE}/config_env_vars - export CH_LOG="${DATASTORE}/clickhouse-server.log" - export CH_ERROR_LOG="${DATASTORE}/clickhouse-server.errlog" - export CH_REPLICA_DISPLAY_NAME=${REPLICA_DISPLAY_NAME} - export CH_LISTEN_ADDR=${LISTEN_ADDR} - export CH_LISTEN_PORT=${LISTEN_PORT} - export CH_DATASTORE=${DATASTORE} - export CH_TMP_PATH="${DATASTORE}/tmp/" - export CH_USER_FILES_PATH="${DATASTORE}/user_files/" - export CH_USER_LOCAL_DIR="${DATASTORE}/access/" - export CH_FORMAT_SCHEMA_PATH="${DATASTORE}/format_schemas/" - export CH_REPLICA_NUMBER=${REPLICA_NUMBER} - export CH_REPLICA_HOST_01=${REPLICA_HOST_01} - export CH_REPLICA_HOST_02=${REPLICA_HOST_02} - export CH_KEEPER_HOST_01="${keepers[0]}" - export CH_KEEPER_HOST_02="${keepers[1]}" - export CH_KEEPER_HOST_03="${keepers[2]}" - - content="CH_LOG="${CH_LOG}"\n\ - CH_ERROR_LOG="${CH_ERROR_LOG}"\n\ - CH_REPLICA_DISPLAY_NAME="${CH_REPLICA_DISPLAY_NAME}"\n\ - CH_LISTEN_ADDR="${CH_LISTEN_ADDR}"\n\ - CH_LISTEN_PORT="${CH_LISTEN_PORT}"\n\ - CH_DATASTORE="${CH_DATASTORE}"\n\ - CH_TMP_PATH="${CH_TMP_PATH}"\n\ - CH_USER_FILES_PATH="${CH_USER_FILES_PATH}"\n\ - CH_USER_LOCAL_DIR="${CH_USER_LOCAL_DIR}"\n\ - CH_FORMAT_SCHEMA_PATH="${CH_FORMAT_SCHEMA_PATH}"\n\ - CH_REPLICA_NUMBER="${CH_REPLICA_NUMBER}"\n\ - CH_REPLICA_HOST_01="${CH_REPLICA_HOST_01}"\n\ - CH_REPLICA_HOST_02="${CH_REPLICA_HOST_02}"\n\ - CH_KEEPER_HOST_01="${CH_KEEPER_HOST_01}"\n\ - CH_KEEPER_HOST_02="${CH_KEEPER_HOST_02}"\n\ - CH_KEEPER_HOST_03="${CH_KEEPER_HOST_03}"" - - echo $content >> "${DATASTORE}/config_env_vars" - - - # The clickhouse binary must be run from within the directory that contains it. - # Otherwise, it does not automatically detect the configuration files, nor does - # it append them when necessary - cd /opt/oxide/clickhouse/ - command+=("./clickhouse" "server") -fi - -exec "${command[@]}" & \ No newline at end of file +exec /opt/oxide/clickhouse/clickhouse server "${args[@]}" & \ No newline at end of file diff --git a/smf/clickhouse/config_replica.xml b/smf/clickhouse_server/config_replica.xml similarity index 100% rename from smf/clickhouse/config_replica.xml rename to smf/clickhouse_server/config_replica.xml diff --git a/smf/clickhouse_server/manifest.xml b/smf/clickhouse_server/manifest.xml new file mode 100644 index 0000000000..8ab4f78bcb --- /dev/null +++ b/smf/clickhouse_server/manifest.xml @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/smf/clickhouse_server/method_script.sh b/smf/clickhouse_server/method_script.sh new file mode 100755 index 0000000000..a0d61072ac --- /dev/null +++ b/smf/clickhouse_server/method_script.sh @@ -0,0 +1,124 @@ +#!/bin/bash + +set -x +set -o errexit +set -o pipefail + +. /lib/svc/share/smf_include.sh + +LISTEN_ADDR="$(svcprop -c -p config/listen_addr "${SMF_FMRI}")" +LISTEN_PORT="$(svcprop -c -p config/listen_port "${SMF_FMRI}")" +DATASTORE="$(svcprop -c -p config/store "${SMF_FMRI}")" + +# Retrieve hostnames (SRV records in internal DNS) of the clickhouse nodes. +CH_ADDRS="$(/opt/oxide/internal-dns-cli/bin/dnswait clickhouse-server -H)" + +if [[ -z "$CH_ADDRS" ]]; then + printf 'ERROR: found no hostnames for other ClickHouse server nodes\n' >&2 + exit "$SMF_EXIT_ERR_CONFIG" +fi + +declare -a nodes=($CH_ADDRS) + +for i in "${nodes[@]}" +do + if ! grep -q "host.control-plane.oxide.internal" <<< "${i}"; then + printf 'ERROR: retrieved ClickHouse hostname does not match the expected format\n' >&2 + exit "$SMF_EXIT_ERR_CONFIG" + fi +done + +# Assign hostnames to replicas +REPLICA_HOST_01="${nodes[0]}" +REPLICA_HOST_02="${nodes[1]}" + +# Retrieve hostnames (SRV records in internal DNS) of the keeper nodes. +K_ADDRS="$(/opt/oxide/internal-dns-cli/bin/dnswait clickhouse-keeper -H)" + +if [[ -z "$K_ADDRS" ]]; then + printf 'ERROR: found no hostnames for other ClickHouse Keeper nodes\n' >&2 + exit "$SMF_EXIT_ERR_CONFIG" +fi + +declare -a keepers=($K_ADDRS) + +for i in "${keepers[@]}" +do + if ! grep -q "host.control-plane.oxide.internal" <<< "${i}"; then + printf 'ERROR: retrieved ClickHouse Keeper hostname does not match the expected format\n' >&2 + exit "$SMF_EXIT_ERR_CONFIG" + fi +done + +if [[ "${#keepers[@]}" != 3 ]] +then + printf "ERROR: expected 3 ClickHouse Keeper hosts, found "${#keepers[@]}" instead\n" >&2 + exit "$SMF_EXIT_ERR_CONFIG" +fi + +# Identify the node type this is as this will influence how the config is constructed +# TODO(https://github.com/oxidecomputer/omicron/issues/3824): There are probably much +# better ways to do this service discovery, but this works for now. +# The services contain the same IDs as the hostnames. +CLICKHOUSE_SVC="$(zonename | tr -dc [:digit:])" +REPLICA_IDENTIFIER_01="$( echo "${REPLICA_HOST_01}" | tr -dc [:digit:])" +REPLICA_IDENTIFIER_02="$( echo "${REPLICA_HOST_02}" | tr -dc [:digit:])" +if [[ $REPLICA_IDENTIFIER_01 == $CLICKHOUSE_SVC ]] +then + REPLICA_DISPLAY_NAME="oximeter_cluster node 1" + REPLICA_NUMBER="01" +elif [[ $REPLICA_IDENTIFIER_02 == $CLICKHOUSE_SVC ]] +then + REPLICA_DISPLAY_NAME="oximeter_cluster node 2" + REPLICA_NUMBER="02" +else + printf 'ERROR: service name does not match any of the identified ClickHouse hostnames\n' >&2 + exit "$SMF_EXIT_ERR_CONFIG" +fi + +# Setting environment variables this way is best practice, but has the downside of +# obscuring the field values to anyone ssh-ing into the zone. To mitigate this, +# we will be saving them to ${DATASTORE}/config_env_vars +export CH_LOG="${DATASTORE}/clickhouse-server.log" +export CH_ERROR_LOG="${DATASTORE}/clickhouse-server.errlog" +export CH_REPLICA_DISPLAY_NAME=${REPLICA_DISPLAY_NAME} +export CH_LISTEN_ADDR=${LISTEN_ADDR} +export CH_LISTEN_PORT=${LISTEN_PORT} +export CH_DATASTORE=${DATASTORE} +export CH_TMP_PATH="${DATASTORE}/tmp/" +export CH_USER_FILES_PATH="${DATASTORE}/user_files/" +export CH_USER_LOCAL_DIR="${DATASTORE}/access/" +export CH_FORMAT_SCHEMA_PATH="${DATASTORE}/format_schemas/" +export CH_REPLICA_NUMBER=${REPLICA_NUMBER} +export CH_REPLICA_HOST_01=${REPLICA_HOST_01} +export CH_REPLICA_HOST_02=${REPLICA_HOST_02} +export CH_KEEPER_HOST_01="${keepers[0]}" +export CH_KEEPER_HOST_02="${keepers[1]}" +export CH_KEEPER_HOST_03="${keepers[2]}" + +content="CH_LOG="${CH_LOG}"\n\ +CH_ERROR_LOG="${CH_ERROR_LOG}"\n\ +CH_REPLICA_DISPLAY_NAME="${CH_REPLICA_DISPLAY_NAME}"\n\ +CH_LISTEN_ADDR="${CH_LISTEN_ADDR}"\n\ +CH_LISTEN_PORT="${CH_LISTEN_PORT}"\n\ +CH_DATASTORE="${CH_DATASTORE}"\n\ +CH_TMP_PATH="${CH_TMP_PATH}"\n\ +CH_USER_FILES_PATH="${CH_USER_FILES_PATH}"\n\ +CH_USER_LOCAL_DIR="${CH_USER_LOCAL_DIR}"\n\ +CH_FORMAT_SCHEMA_PATH="${CH_FORMAT_SCHEMA_PATH}"\n\ +CH_REPLICA_NUMBER="${CH_REPLICA_NUMBER}"\n\ +CH_REPLICA_HOST_01="${CH_REPLICA_HOST_01}"\n\ +CH_REPLICA_HOST_02="${CH_REPLICA_HOST_02}"\n\ +CH_KEEPER_HOST_01="${CH_KEEPER_HOST_01}"\n\ +CH_KEEPER_HOST_02="${CH_KEEPER_HOST_02}"\n\ +CH_KEEPER_HOST_03="${CH_KEEPER_HOST_03}"" + +echo $content >> "${DATASTORE}/config_env_vars" + + +# The clickhouse binary must be run from within the directory that contains it. +# Otherwise, it does not automatically detect the configuration files, nor does +# it append them when necessary +cd /opt/oxide/clickhouse_server/ + +exec ./clickhouse server & \ No newline at end of file diff --git a/smf/oximeter/config.toml b/smf/oximeter/replicated-cluster/config.toml similarity index 91% rename from smf/oximeter/config.toml rename to smf/oximeter/replicated-cluster/config.toml index ca14fe6ec8..f7958e5eb1 100644 --- a/smf/oximeter/config.toml +++ b/smf/oximeter/replicated-cluster/config.toml @@ -3,6 +3,7 @@ [db] batch_size = 1000 batch_interval = 5 # In seconds +replicated = true [log] level = "debug" diff --git a/smf/oximeter/single-node/config.toml b/smf/oximeter/single-node/config.toml new file mode 100644 index 0000000000..bc0418159c --- /dev/null +++ b/smf/oximeter/single-node/config.toml @@ -0,0 +1,12 @@ +# Example configuration file for running an oximeter collector server + +[db] +batch_size = 1000 +batch_interval = 5 # In seconds +replicated = false + +[log] +level = "debug" +mode = "file" +path = "/dev/stdout" +if_exists = "append" diff --git a/smf/sled-agent/non-gimlet/config-rss.toml b/smf/sled-agent/non-gimlet/config-rss.toml index 90f5339e84..a61ac81d91 100644 --- a/smf/sled-agent/non-gimlet/config-rss.toml +++ b/smf/sled-agent/non-gimlet/config-rss.toml @@ -118,6 +118,22 @@ switch = "switch0" # Neighbors we expect to peer with over BGP on this port. bgp_peers = [] +# LLDP settings for this port +#[rack_network_config.switch0.qsfp0.lldp] +#status = "Enabled" +# Optional Port ID, overriding default of qsfpX/0 +#port_id = "" +## Optional port description +#port_description = "uplink 0" +# Optional chassid ID, overriding the switch-level setting +#chassis_id = "" +# Optional system name, overriding the switch-level setting +#system_name = "" +# Optional system description, overriding the switch-level setting +#system_description = "" +# Optional management addresses to advertise, overriding switch-level setting +#management_addrs = [] + # An allowlist of source IPs that can make requests to user-facing services can # be specified here. It can be written as the string "any" ... [allowed_source_ips] diff --git a/sp-sim/examples/config.toml b/sp-sim/examples/config.toml index cf338ecf2e..f53ea7cfd8 100644 --- a/sp-sim/examples/config.toml +++ b/sp-sim/examples/config.toml @@ -24,6 +24,16 @@ capabilities = 0 presence = "Present" serial_console = "[::1]:33312" +[[simulated_sps.gimlet.components]] +id = "dev-0" +device = "tmp117" +description = "FAKE Southwest temperature sensor" +capabilities = 2 +presence = "Present" +sensors = [ + { name = "Southwest", kind = "Temperature", last_data.value = 41.7890625, last_data.timestamp = 1234 }, +] + [[simulated_sps.gimlet]] multicast_addr = "ff15:0:1de::2" bind_addrs = ["[::]:33320", "[::]:33321"] @@ -39,6 +49,17 @@ capabilities = 0 presence = "Present" serial_console = "[::1]:33322" +[[simulated_sps.gimlet.components]] +id = "dev-0" +device = "tmp117" +description = "FAKE Southwest temperature sensor" +capabilities = 2 +presence = "Present" +sensors = [ + { name = "Southwest", kind = "Temperature", last_data.value = 41.7890625, last_data.timestamp = 1234 }, +] + + [log] # Show log messages of this level and more severe level = "debug" diff --git a/sp-sim/src/config.rs b/sp-sim/src/config.rs index b64953e5ed..d45e956dee 100644 --- a/sp-sim/src/config.rs +++ b/sp-sim/src/config.rs @@ -5,6 +5,7 @@ //! Interfaces for parsing configuration files and working with a simulated SP //! configuration +use crate::sensors; use dropshot::ConfigLogging; use gateway_messages::DeviceCapabilities; use gateway_messages::DevicePresence; @@ -59,6 +60,9 @@ pub struct SpComponentConfig { /// /// Only supported for components inside a [`GimletConfig`]. pub serial_console: Option, + + #[serde(skip_serializing_if = "Vec::is_empty", default)] + pub sensors: Vec, } /// Configuration of a simulated sidecar SP @@ -93,6 +97,16 @@ pub struct Config { pub log: ConfigLogging, } +/// Configuration for a component's sensor readings. +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub struct SensorConfig { + #[serde(flatten)] + pub def: sensors::SensorDef, + + #[serde(flatten)] + pub state: sensors::SensorState, +} + impl Config { /// Load a `Config` from the given TOML file /// diff --git a/sp-sim/src/gimlet.rs b/sp-sim/src/gimlet.rs index e980a4b67d..70c2e72fcb 100644 --- a/sp-sim/src/gimlet.rs +++ b/sp-sim/src/gimlet.rs @@ -6,6 +6,7 @@ use crate::config::GimletConfig; use crate::config::SpComponentConfig; use crate::helpers::rot_slot_id_from_u16; use crate::helpers::rot_slot_id_to_u16; +use crate::sensors::Sensors; use crate::serial_number_padded; use crate::server; use crate::server::SimSpHandler; @@ -630,6 +631,7 @@ struct Handler { startup_options: StartupOptions, update_state: SimSpUpdate, reset_pending: Option, + sensors: Sensors, last_request_handled: Option, @@ -665,9 +667,12 @@ impl Handler { .push(&*Box::leak(c.description.clone().into_boxed_str())); } + let sensors = Sensors::from_component_configs(&components); + Self { log, components, + sensors, leaked_component_device_strings, leaked_component_description_strings, serial_number, @@ -1206,13 +1211,16 @@ impl SpHandler for Handler { port: SpPort, component: SpComponent, ) -> Result { + let num_details = + self.sensors.num_component_details(&component).unwrap_or(0); debug!( - &self.log, "asked for component details (returning 0 details)"; + &self.log, "asked for number of component details"; "sender" => %sender, "port" => ?port, "component" => ?component, + "num_details" => num_details ); - Ok(0) + Ok(num_details) } fn component_details( @@ -1220,9 +1228,20 @@ impl SpHandler for Handler { component: SpComponent, index: BoundsChecked, ) -> ComponentDetails { - // We return 0 for all components, so we should never be called (`index` - // would have to have been bounds checked to live in 0..0). - unreachable!("asked for {component:?} details index {index:?}") + let Some(sensor_details) = + self.sensors.component_details(&component, index) + else { + unreachable!( + "this is a gimlet, so it should have no port status details" + ); + }; + debug!( + &self.log, "asked for component details for a sensor"; + "component" => ?component, + "index" => index.0, + "details" => ?sensor_details + ); + sensor_details } fn component_clear_status( @@ -1445,9 +1464,9 @@ impl SpHandler for Handler { fn read_sensor( &mut self, - _request: gateway_messages::SensorRequest, + request: gateway_messages::SensorRequest, ) -> std::result::Result { - Err(SpError::RequestUnsupportedForSp) + self.sensors.read_sensor(request).map_err(SpError::Sensor) } fn current_time(&mut self) -> std::result::Result { diff --git a/sp-sim/src/lib.rs b/sp-sim/src/lib.rs index 0f340ed642..15f2034aa8 100644 --- a/sp-sim/src/lib.rs +++ b/sp-sim/src/lib.rs @@ -5,6 +5,7 @@ pub mod config; mod gimlet; mod helpers; +mod sensors; mod server; mod sidecar; mod update; diff --git a/sp-sim/src/sensors.rs b/sp-sim/src/sensors.rs new file mode 100644 index 0000000000..fc684af01b --- /dev/null +++ b/sp-sim/src/sensors.rs @@ -0,0 +1,218 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::config::SpComponentConfig; +use gateway_messages::measurement::MeasurementError; +use gateway_messages::measurement::MeasurementKind; +use gateway_messages::sp_impl::BoundsChecked; +use gateway_messages::ComponentDetails; +use gateway_messages::DeviceCapabilities; +use gateway_messages::Measurement; +use gateway_messages::SensorDataMissing; +use gateway_messages::SensorError; +use gateway_messages::SensorReading; +use gateway_messages::SensorRequest; +use gateway_messages::SensorRequestKind; +use gateway_messages::SensorResponse; +use gateway_messages::SpComponent; + +use std::collections::HashMap; + +pub(crate) struct Sensors { + by_component: HashMap>, + sensors: Vec, +} + +#[derive(Debug)] +struct Sensor { + def: SensorDef, + state: SensorState, +} + +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize, PartialEq)] +pub struct SensorDef { + pub name: String, + pub kind: MeasurementKind, +} + +// TODO(eliza): note that currently, we just hardcode these in +// `MeasurementConfig`. Eventually, it would be neat to allow the sensor to be +// changed dynamically as part of a simulation. +#[derive(Clone, Debug, serde::Serialize, serde::Deserialize, PartialEq)] +pub struct SensorState { + #[serde(default)] + pub last_error: Option, + + #[serde(default)] + pub last_data: Option, +} + +#[derive( + Clone, Copy, Debug, serde::Serialize, serde::Deserialize, PartialEq, +)] +pub struct LastError { + pub timestamp: u64, + pub value: SensorDataMissing, +} + +#[derive( + Clone, Copy, Debug, serde::Serialize, serde::Deserialize, PartialEq, +)] +pub struct LastData { + pub timestamp: u64, + pub value: f32, +} + +impl SensorState { + fn last_reading(&self) -> SensorReading { + match self { + Self { last_data: Some(data), last_error: Some(error) } => { + if data.timestamp >= error.timestamp { + SensorReading { + value: Ok(data.value), + timestamp: data.timestamp, + } + } else { + SensorReading { + value: Err(error.value), + timestamp: error.timestamp, + } + } + } + Self { last_data: Some(data), last_error: None } => SensorReading { + value: Ok(data.value), + timestamp: data.timestamp, + }, + Self { last_data: None, last_error: Some(error) } => { + SensorReading { + value: Err(error.value), + timestamp: error.timestamp, + } + } + Self { last_data: None, last_error: None } => SensorReading { + value: Err(SensorDataMissing::DeviceNotPresent), + timestamp: 0, // TODO(eliza): what do? + }, + } + } +} + +impl Sensors { + pub(crate) fn from_component_configs<'a>( + cfgs: impl IntoIterator, + ) -> Self { + let mut sensors = Vec::new(); + let mut by_component = HashMap::new(); + for cfg in cfgs { + if cfg.sensors.is_empty() { + continue; + } + if !cfg + .capabilities + .contains(DeviceCapabilities::HAS_MEASUREMENT_CHANNELS) + { + panic!( + "invalid component config: a device with sensors should \ + have the `HAS_MEASUREMENT_CHANNELS` capability:{cfg:#?}" + ); + } + + let mut ids = Vec::with_capacity(cfg.sensors.len()); + for sensor in &cfg.sensors { + let sensor_id = sensors.len() as u32; + sensors.push(Sensor { + def: sensor.def.clone(), + state: sensor.state.clone(), + }); + ids.push(sensor_id) + } + + let component = SpComponent::try_from(cfg.id.as_str()).unwrap(); + let prev = by_component.insert(component, ids); + assert!(prev.is_none(), "component ID {component} already exists!"); + } + Self { sensors, by_component } + } + + fn sensor_for_component<'sensors>( + &'sensors self, + component: &SpComponent, + index: BoundsChecked, + ) -> Option<&'sensors Sensor> { + let &id = self.by_component.get(component)?.get(index.0 as usize)?; + self.sensors.get(id as usize) + } + + pub(crate) fn num_component_details( + &self, + component: &SpComponent, + ) -> Option { + let len = self + .by_component + .get(component)? + .len() + .try_into() + .expect("why would you have more than `u32::MAX` sensors?"); + Some(len) + } + + /// This method returns an `Option` because the component's details might + /// be a port status rather than a measurement, if we eventually decide to + /// implement port statuses in the simulated sidecar... + pub(crate) fn component_details( + &self, + component: &SpComponent, + index: BoundsChecked, + ) -> Option { + let sensor = self.sensor_for_component(component, index)?; + let value = + sensor.state.last_reading().value.map_err(|err| match err { + SensorDataMissing::DeviceError => MeasurementError::DeviceError, + SensorDataMissing::DeviceNotPresent => { + MeasurementError::NotPresent + } + SensorDataMissing::DeviceOff => MeasurementError::DeviceOff, + SensorDataMissing::DeviceTimeout => { + MeasurementError::DeviceTimeout + } + SensorDataMissing::DeviceUnavailable => { + MeasurementError::DeviceUnavailable + } + }); + Some(ComponentDetails::Measurement(Measurement { + name: sensor.def.name.clone(), + kind: sensor.def.kind, + value, + })) + } + + pub(crate) fn read_sensor( + &self, + SensorRequest { id, kind }: SensorRequest, + ) -> Result { + let sensor = + self.sensors.get(id as usize).ok_or(SensorError::InvalidSensor)?; + match kind { + SensorRequestKind::LastReading => { + Ok(SensorResponse::LastReading(sensor.state.last_reading())) + } + SensorRequestKind::ErrorCount => { + let count = + // TODO(eliza): simulate more than one error... + if sensor.state.last_error.is_some() { 1 } else { 0 }; + Ok(SensorResponse::ErrorCount(count)) + } + SensorRequestKind::LastData => { + let LastData { timestamp, value } = + sensor.state.last_data.ok_or(SensorError::NoReading)?; + Ok(SensorResponse::LastData { value, timestamp }) + } + SensorRequestKind::LastError => { + let LastError { timestamp, value } = + sensor.state.last_error.ok_or(SensorError::NoReading)?; + Ok(SensorResponse::LastError { value, timestamp }) + } + } + } +} diff --git a/sp-sim/src/sidecar.rs b/sp-sim/src/sidecar.rs index c2fb2467d8..bef1d26c78 100644 --- a/sp-sim/src/sidecar.rs +++ b/sp-sim/src/sidecar.rs @@ -8,6 +8,7 @@ use crate::config::SimulatedSpsConfig; use crate::config::SpComponentConfig; use crate::helpers::rot_slot_id_from_u16; use crate::helpers::rot_slot_id_to_u16; +use crate::sensors::Sensors; use crate::serial_number_padded; use crate::server; use crate::server::SimSpHandler; @@ -377,6 +378,7 @@ struct Handler { // our life as a simulator. leaked_component_device_strings: Vec<&'static str>, leaked_component_description_strings: Vec<&'static str>, + sensors: Sensors, serial_number: String, ignition: FakeIgnition, @@ -417,9 +419,12 @@ impl Handler { .push(&*Box::leak(c.description.clone().into_boxed_str())); } + let sensors = Sensors::from_component_configs(&components); + Self { log, components, + sensors, leaked_component_device_strings, leaked_component_description_strings, serial_number, @@ -929,13 +934,18 @@ impl SpHandler for Handler { port: SpPort, component: SpComponent, ) -> Result { - warn!( - &self.log, "asked for component details (returning 0 details)"; + let num_sensor_details = + self.sensors.num_component_details(&component).unwrap_or(0); + // TODO: here is where we might also handle port statuses, if we decide + // to simulate that later... + debug!( + &self.log, "asked for number of component details"; "sender" => %sender, "port" => ?port, "component" => ?component, + "num_details" => num_sensor_details ); - Ok(0) + Ok(num_sensor_details) } fn component_details( @@ -943,9 +953,18 @@ impl SpHandler for Handler { component: SpComponent, index: BoundsChecked, ) -> ComponentDetails { - // We return 0 for all components, so we should never be called (`index` - // would have to have been bounds checked to live in 0..0). - unreachable!("asked for {component:?} details index {index:?}") + let Some(sensor_details) = + self.sensors.component_details(&component, index) + else { + todo!("simulate port status details..."); + }; + debug!( + &self.log, "asked for component details for a sensor"; + "component" => ?component, + "index" => index.0, + "details" => ?sensor_details + ); + sensor_details } fn component_clear_status( @@ -1163,9 +1182,9 @@ impl SpHandler for Handler { fn read_sensor( &mut self, - _request: gateway_messages::SensorRequest, + request: gateway_messages::SensorRequest, ) -> std::result::Result { - Err(SpError::RequestUnsupportedForSp) + self.sensors.read_sensor(request).map_err(SpError::Sensor) } fn current_time(&mut self) -> std::result::Result { diff --git a/tools/console_version b/tools/console_version index 4f67064733..b2fc99daf3 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="17ae890c68a5277fbefe773694e790a8f1b178b4" -SHA2="273a31ba14546305bfafeb9aedb2d9a7530328a0359cda363380c9ca3240b948" +COMMIT="771276573549dd255c6749980636aa7140e8bab8" +SHA2="4d441de0784bb0d775e0a7f4067758fd6c37fbf050ed76b744cd37d6e81af3d3" diff --git a/tools/dendrite_openapi_version b/tools/dendrite_openapi_version index 2d0f4d4887..a9e13c083a 100755 --- a/tools/dendrite_openapi_version +++ b/tools/dendrite_openapi_version @@ -1,2 +1,2 @@ -COMMIT="21b16567f28e103f145cd18d53fac6958429c4ff" +COMMIT="76c735d472e3badaeca08982e22496fccb1ce210" SHA2="3a54305ab4b1270c9a5fb0603f481fce199f3767c174a03559ff642f7f44687e" diff --git a/tools/dendrite_stub_checksums b/tools/dendrite_stub_checksums index e3d16d779c..075ead4752 100644 --- a/tools/dendrite_stub_checksums +++ b/tools/dendrite_stub_checksums @@ -1,3 +1,3 @@ -CIDL_SHA256_ILLUMOS="3771671f0069b33143774e560eb258db99253dba9b78fa3ca974f02a8e1145b4" -CIDL_SHA256_LINUX_DPD="6aa070ab0590aca7458f2555012acc5571e61b3b1523de862d4bbb04b9d34135" +CIDL_SHA256_ILLUMOS="3ee6cfe770da2855b4eb44c048637d56f8d72de45c8c396186dfe7232d8548fa" +CIDL_SHA256_LINUX_DPD="5c70318c6feb7595bdbf41d8b33827100d28fcdf34ad738a5af10e0411463f64" CIDL_SHA256_LINUX_SWADM="e1e35784538a4fdd76dc257cc636ac3f43f7ef2842dabfe981f17f8ce6b8e1a2" diff --git a/tools/generate-nexus-api.sh b/tools/generate-nexus-api.sh index a0c7d13165..9e3f8d63f6 100755 --- a/tools/generate-nexus-api.sh +++ b/tools/generate-nexus-api.sh @@ -1,4 +1,3 @@ #!/usr/bin/env bash ./target/debug/nexus nexus/examples/config.toml -O > openapi/nexus.json -./target/debug/nexus nexus/examples/config.toml -I > openapi/nexus-internal.json diff --git a/tools/maghemite_ddm_openapi_version b/tools/maghemite_ddm_openapi_version index c1e011e38d..0c223c85a8 100644 --- a/tools/maghemite_ddm_openapi_version +++ b/tools/maghemite_ddm_openapi_version @@ -1,2 +1,2 @@ -COMMIT="0c4292fe5b3c8ac27d99b5a4502d595acdbf7441" +COMMIT="c92d6ff85db8992066f49da176cf686acfd8fe0f" SHA2="007bfb717ccbc077c0250dee3121aeb0c5bb0d1c16795429a514fa4f8635a5ef" diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version index 1184f6e4fd..0db6a3b63d 100644 --- a/tools/maghemite_mg_openapi_version +++ b/tools/maghemite_mg_openapi_version @@ -1,2 +1,2 @@ -COMMIT="0c4292fe5b3c8ac27d99b5a4502d595acdbf7441" -SHA2="e4b42ab9daad90f0c561a830b62a9d17e294b4d0da0a6d44b4030929b0c37b7e" +COMMIT="c92d6ff85db8992066f49da176cf686acfd8fe0f" +SHA2="5b327f213f8f341cf9072d428980f53757b2c6383f684ac80bbccfb1984ffe5f" diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums index 7ca642fa70..2e180a83db 100644 --- a/tools/maghemite_mgd_checksums +++ b/tools/maghemite_mgd_checksums @@ -1,2 +1,2 @@ -CIDL_SHA256="e15db7d262b5b2f08a2e2799668c67d0cb883e84c72736a30d299688115bf055" -MGD_LINUX_SHA256="915e7b5cac8ff1deb6549b86e4ba49fd5c6adbdcc56ae5dc3c7b3e69555a7c2c" \ No newline at end of file +CIDL_SHA256="e000485f7e04ac1cf9b3532b60bcf23598ab980331ba4f1c6788a7e95c1e9ef8" +MGD_LINUX_SHA256="1c3d93bbfbe4ce97af7cb81c13e42a2eea464e18de6827794a55d5bfd971b66c" \ No newline at end of file diff --git a/tools/opte_version b/tools/opte_version index dfbb589f24..0e2023666f 100644 --- a/tools/opte_version +++ b/tools/opte_version @@ -1 +1 @@ -0.33.277 +0.33.293 diff --git a/tools/update_lldp.sh b/tools/update_lldp.sh index bf7f19eb02..2a9d1d6bae 100755 --- a/tools/update_lldp.sh +++ b/tools/update_lldp.sh @@ -47,7 +47,9 @@ function main { esac done - TARGET_COMMIT=$(get_latest_commit_from_gh "$REPO" "$TARGET_COMMIT") + if [[ -z "$TARGET_COMMIT" ]]; then + TARGET_COMMIT=$(get_latest_commit_from_gh "$REPO" "$TARGET_BRANCH") + fi install_toml2json do_update_packages "$TARGET_COMMIT" "$DRY_RUN" "$REPO" "${PACKAGES[@]}" do_update_crates "$TARGET_COMMIT" "$DRY_RUN" "$REPO" "${CRATES[@]}" diff --git a/wicket-common/src/example.rs b/wicket-common/src/example.rs index bb70273b45..34af11e906 100644 --- a/wicket-common/src/example.rs +++ b/wicket-common/src/example.rs @@ -12,7 +12,8 @@ use omicron_common::{ api::{ external::AllowedSourceIps, internal::shared::{ - BgpConfig, BgpPeerConfig, PortFec, PortSpeed, RouteConfig, + BgpConfig, BgpPeerConfig, LldpAdminStatus, LldpPortConfig, PortFec, + PortSpeed, RouteConfig, }, }, }; @@ -166,23 +167,45 @@ impl ExampleRackSetupData { vlan_id: None, }]; + let switch0_port0_lldp = Some(LldpPortConfig { + status: LldpAdminStatus::Enabled, + chassis_id: Some("chassid id override".to_string()), + port_id: Some("port id override".to_string()), + system_name: Some("system name override".to_string()), + system_description: Some("system description override".to_string()), + port_description: Some("port description override".to_string()), + management_addrs: None, + }); + + let switch1_port0_lldp = Some(LldpPortConfig { + status: LldpAdminStatus::Enabled, + chassis_id: Some("chassid id override".to_string()), + port_id: Some("port id override".to_string()), + system_name: Some("system name override".to_string()), + system_description: Some("system description override".to_string()), + port_description: Some("port description override".to_string()), + management_addrs: Some(vec!["172.32.0.4".parse().unwrap()]), + }); + let rack_network_config = UserSpecifiedRackNetworkConfig { infra_ip_first: "172.30.0.1".parse().unwrap(), infra_ip_last: "172.30.0.10".parse().unwrap(), switch0: btreemap! { "port0".to_owned() => UserSpecifiedPortConfig { - addresses: vec!["172.30.0.1/24".parse().unwrap()], - routes: vec![RouteConfig { + addresses: vec!["172.30.0.1/24".parse().unwrap()], + routes: vec![RouteConfig { destination: "0.0.0.0/0".parse().unwrap(), nexthop: "172.30.0.10".parse().unwrap(), vlan_id: Some(1), + local_pref: None, }], bgp_peers: switch0_port0_bgp_peers, uplink_port_speed: PortSpeed::Speed400G, uplink_port_fec: PortFec::Firecode, + lldp: switch0_port0_lldp, autoneg: true, }, - }, + }, switch1: btreemap! { // Use the same port name as in switch0 to test that it doesn't // collide. @@ -192,10 +215,12 @@ impl ExampleRackSetupData { destination: "0.0.0.0/0".parse().unwrap(), nexthop: "172.33.0.10".parse().unwrap(), vlan_id: Some(1), + local_pref: None, }], bgp_peers: switch1_port0_bgp_peers, uplink_port_speed: PortSpeed::Speed400G, uplink_port_fec: PortFec::Firecode, + lldp: switch1_port0_lldp, autoneg: true, }, }, diff --git a/wicket-common/src/rack_setup.rs b/wicket-common/src/rack_setup.rs index 7fd83e522a..cb6b13422b 100644 --- a/wicket-common/src/rack_setup.rs +++ b/wicket-common/src/rack_setup.rs @@ -11,6 +11,7 @@ use omicron_common::api::external::SwitchLocation; use omicron_common::api::internal::shared::AllowedSourceIps; use omicron_common::api::internal::shared::BgpConfig; use omicron_common::api::internal::shared::BgpPeerConfig; +use omicron_common::api::internal::shared::LldpPortConfig; use omicron_common::api::internal::shared::PortFec; use omicron_common::api::internal::shared::PortSpeed; use omicron_common::api::internal::shared::RouteConfig; @@ -185,6 +186,8 @@ pub struct UserSpecifiedPortConfig { pub autoneg: bool, #[serde(default)] pub bgp_peers: Vec, + #[serde(default)] + pub lldp: Option, } /// User-specified version of [`BgpPeerConfig`]. diff --git a/wicket/src/cli/rack_setup/config_toml.rs b/wicket/src/cli/rack_setup/config_toml.rs index 68485815a8..17b31e7730 100644 --- a/wicket/src/cli/rack_setup/config_toml.rs +++ b/wicket/src/cli/rack_setup/config_toml.rs @@ -8,6 +8,7 @@ use omicron_common::address::IpRange; use omicron_common::api::external::AllowedSourceIps; use omicron_common::api::internal::shared::BgpConfig; +use omicron_common::api::internal::shared::LldpPortConfig; use omicron_common::api::internal::shared::RouteConfig; use omicron_common::api::internal::shared::UplinkAddressConfig; use serde::Serialize; @@ -320,6 +321,7 @@ fn populate_uplink_table(cfg: &UserSpecifiedPortConfig) -> Table { uplink_port_fec, autoneg, bgp_peers, + lldp, } = cfg; let mut uplink = Table::new(); @@ -327,13 +329,16 @@ fn populate_uplink_table(cfg: &UserSpecifiedPortConfig) -> Table { // routes = [] let mut routes_out = Array::new(); for r in routes { - let RouteConfig { destination, nexthop, vlan_id } = r; + let RouteConfig { destination, nexthop, vlan_id, local_pref } = r; let mut route = InlineTable::new(); route.insert("nexthop", string_value(nexthop)); route.insert("destination", string_value(destination)); if let Some(vlan_id) = vlan_id { route.insert("vlan_id", i64_value(i64::from(*vlan_id))); } + if let Some(local_pref) = local_pref { + route.insert("local_pref", i64_value(i64::from(*local_pref))); + } routes_out.push(Value::InlineTable(route)); } uplink.insert("routes", Item::Value(Value::Array(routes_out))); @@ -488,6 +493,46 @@ fn populate_uplink_table(cfg: &UserSpecifiedPortConfig) -> Table { uplink.insert("bgp_peers", Item::ArrayOfTables(peers)); + if let Some(l) = lldp { + let LldpPortConfig { + status, + chassis_id, + port_id, + system_name, + system_description, + port_description, + management_addrs, + } = l; + let mut lldp = Table::new(); + lldp.insert("status", string_item(status)); + if let Some(x) = chassis_id { + lldp.insert("chassis_id", string_item(x)); + } + if let Some(x) = port_id { + lldp.insert("port_id", string_item(x)); + } + if let Some(x) = system_name { + lldp.insert("system_name", string_item(x)); + } + if let Some(x) = system_description { + lldp.insert("system_description", string_item(x)); + } + if let Some(x) = port_description { + lldp.insert("port_description", string_item(x)); + } + if let Some(addrs) = management_addrs { + let mut addresses_out = Array::new(); + for a in addrs { + addresses_out.push(string_value(a)); + } + lldp.insert( + "management_addrs", + Item::Value(Value::Array(addresses_out)), + ); + } + uplink.insert("lldp", Item::Table(lldp)); + } + uplink } diff --git a/wicket/src/ui/panes/rack_setup.rs b/wicket/src/ui/panes/rack_setup.rs index 7bb63b6b1b..f23bc3c816 100644 --- a/wicket/src/ui/panes/rack_setup.rs +++ b/wicket/src/ui/panes/rack_setup.rs @@ -21,6 +21,7 @@ use itertools::Itertools; use omicron_common::address::IpRange; use omicron_common::api::internal::shared::AllowedSourceIps; use omicron_common::api::internal::shared::BgpConfig; +use omicron_common::api::internal::shared::LldpPortConfig; use omicron_common::api::internal::shared::RouteConfig; use ratatui::layout::Constraint; use ratatui::layout::Direction; @@ -740,6 +741,7 @@ fn rss_config_text<'a>( uplink_port_fec, autoneg, bgp_peers, + lldp, } = uplink; let mut items = vec![ @@ -771,7 +773,8 @@ fn rss_config_text<'a>( ]; let routes = routes.iter().map(|r| { - let RouteConfig { destination, nexthop, vlan_id } = r; + let RouteConfig { destination, nexthop, vlan_id, local_pref } = + r; let mut items = vec![ Span::styled(" • Route : ", label_style), @@ -787,6 +790,13 @@ fn rss_config_text<'a>( Span::styled(")", label_style), ]); } + if let Some(local_pref) = local_pref { + items.extend([ + Span::styled(" (local_pref=", label_style), + Span::styled(local_pref.to_string(), ok_style), + Span::styled(")", label_style), + ]); + } items }); @@ -1027,6 +1037,68 @@ fn rss_config_text<'a>( items.extend(addresses); items.extend(peers); + if let Some(lp) = lldp { + let LldpPortConfig { + status, + chassis_id, + port_id, + system_name, + system_description, + port_description, + management_addrs, + } = lp; + + let mut lldp = vec![ + vec![Span::styled(" • LLDP port settings: ", label_style)], + vec![ + Span::styled(" • Admin status : ", label_style), + Span::styled(status.to_string(), ok_style), + ], + ]; + + if let Some(c) = chassis_id { + lldp.push(vec![ + Span::styled(" • Chassis ID : ", label_style), + Span::styled(c.to_string(), ok_style), + ]) + } + if let Some(s) = system_name { + lldp.push(vec![ + Span::styled(" • System name : ", label_style), + Span::styled(s.to_string(), ok_style), + ]) + } + if let Some(s) = system_description { + lldp.push(vec![ + Span::styled(" • System description: ", label_style), + Span::styled(s.to_string(), ok_style), + ]) + } + if let Some(p) = port_id { + lldp.push(vec![ + Span::styled(" • Port ID : ", label_style), + Span::styled(p.to_string(), ok_style), + ]) + } + if let Some(p) = port_description { + lldp.push(vec![ + Span::styled(" • Port description : ", label_style), + Span::styled(p.to_string(), ok_style), + ]) + } + if let Some(addrs) = management_addrs { + let mut label = " • Management addrs : "; + for a in addrs { + lldp.push(vec![ + Span::styled(label, label_style), + Span::styled(a.to_string(), ok_style), + ]); + label = " : "; + } + } + items.extend(lldp); + } + append_list( &mut spans, Cow::from(format!("Uplink {}: ", i + 1)), diff --git a/wicket/tests/output/example_non_empty.toml b/wicket/tests/output/example_non_empty.toml index 717e940ca5..fafb31048d 100644 --- a/wicket/tests/output/example_non_empty.toml +++ b/wicket/tests/output/example_non_empty.toml @@ -111,6 +111,14 @@ allowed_export = [] local_pref = 80 enforce_first_as = true +[rack_network_config.switch0.port0.lldp] +status = "enabled" +chassis_id = "chassid id override" +port_id = "port id override" +system_name = "system name override" +system_description = "system description override" +port_description = "port description override" + [rack_network_config.switch1.port0] routes = [{ nexthop = "172.33.0.10", destination = "0.0.0.0/0", vlan_id = 1 }] addresses = [{ address = "172.32.0.1/24" }] @@ -131,6 +139,15 @@ auth_key_id = "bgp-key-1" allowed_import = ["224.0.0.0/4"] enforce_first_as = false +[rack_network_config.switch1.port0.lldp] +status = "enabled" +chassis_id = "chassid id override" +port_id = "port id override" +system_name = "system name override" +system_description = "system description override" +port_description = "port description override" +management_addrs = ["172.32.0.4"] + [[rack_network_config.bgp]] asn = 47 originate = ["10.0.0.0/16"] diff --git a/wicketd/Cargo.toml b/wicketd/Cargo.toml index 324ae01b42..6e2c27a97e 100644 --- a/wicketd/Cargo.toml +++ b/wicketd/Cargo.toml @@ -25,6 +25,7 @@ flume.workspace = true futures.workspace = true gateway-messages.workspace = true hex.workspace = true +hickory-resolver.workspace = true http.workspace = true hubtools.workspace = true hyper.workspace = true @@ -46,7 +47,6 @@ tokio-stream.workspace = true tokio-util.workspace = true toml.workspace = true tough.workspace = true -trust-dns-resolver.workspace = true uuid.workspace = true bootstrap-agent-client.workspace = true diff --git a/wicketd/src/preflight_check/uplink.rs b/wicketd/src/preflight_check/uplink.rs index 36a4f61779..fb0914e836 100644 --- a/wicketd/src/preflight_check/uplink.rs +++ b/wicketd/src/preflight_check/uplink.rs @@ -14,6 +14,11 @@ use dpd_client::types::PortSpeed as DpdPortSpeed; use dpd_client::Client as DpdClient; use dpd_client::ClientState as DpdClientState; use either::Either; +use hickory_resolver::config::NameServerConfigGroup; +use hickory_resolver::config::ResolverConfig; +use hickory_resolver::config::ResolverOpts; +use hickory_resolver::error::ResolveErrorKind; +use hickory_resolver::TokioAsyncResolver; use illumos_utils::zone::SVCCFG; use illumos_utils::PFEXEC; use omicron_common::address::DENDRITE_PORT; @@ -35,12 +40,6 @@ use std::time::Duration; use std::time::Instant; use tokio::process::Command; use tokio::sync::mpsc; -use trust_dns_resolver::config::NameServerConfigGroup; -use trust_dns_resolver::config::ResolverConfig; -use trust_dns_resolver::config::ResolverOpts; -use trust_dns_resolver::error::ResolveError; -use trust_dns_resolver::error::ResolveErrorKind; -use trust_dns_resolver::TokioAsyncResolver; use wicket_common::preflight_check::EventBuffer; use wicket_common::preflight_check::StepContext; use wicket_common::preflight_check::StepProgress; @@ -930,16 +929,7 @@ impl DnsLookupStep { }; 'dns_servers: for &dns_ip in dns_servers { - let resolver = match self.build_resolver(dns_ip) { - Ok(resolver) => resolver, - Err(err) => { - self.warnings.push(format!( - "failed to create resolver for {dns_ip}: {}", - DisplayErrorChain::new(&err) - )); - continue; - } - }; + let resolver = self.build_resolver(dns_ip); // Attempt to resolve any NTP servers that aren't IP addresses. for &ntp_name in &ntp_names_to_resolve { @@ -1052,14 +1042,18 @@ impl DnsLookupStep { ( "A", resolver.ipv4_lookup(name).await.map(|records| { - Either::Left(records.into_iter().map(IpAddr::V4)) + Either::Left( + records.into_iter().map(|x| IpAddr::V4(x.into())), + ) }), ) } else { ( "AAAA", resolver.ipv6_lookup(name).await.map(|records| { - Either::Right(records.into_iter().map(IpAddr::V6)) + Either::Right( + records.into_iter().map(|x| IpAddr::V6(x.into())), + ) }), ) }; @@ -1175,12 +1169,12 @@ impl DnsLookupStep { /// /// If building it fails, we'll append to our internal `warnings` and return /// `None`. - fn build_resolver( - &mut self, - dns_ip: IpAddr, - ) -> Result { + fn build_resolver(&mut self, dns_ip: IpAddr) -> TokioAsyncResolver { let mut options = ResolverOpts::default(); + // Enable edns for potentially larger records + options.edns0 = true; + // We will retry ourselves; we don't want the resolver // retrying internally too. options.attempts = 1; diff --git a/wicketd/src/rss_config.rs b/wicketd/src/rss_config.rs index c6f2dd5892..56e83fcd41 100644 --- a/wicketd/src/rss_config.rs +++ b/wicketd/src/rss_config.rs @@ -686,11 +686,14 @@ fn build_port_config( bgp_auth_keys: &BTreeMap>, ) -> BaPortConfigV2 { use bootstrap_agent_client::types::BgpPeerConfig as BaBgpPeerConfig; + use bootstrap_agent_client::types::LldpAdminStatus as BaLldpAdminStatus; + use bootstrap_agent_client::types::LldpPortConfig as BaLldpPortConfig; use bootstrap_agent_client::types::PortFec as BaPortFec; use bootstrap_agent_client::types::PortSpeed as BaPortSpeed; use bootstrap_agent_client::types::RouteConfig as BaRouteConfig; use bootstrap_agent_client::types::SwitchLocation as BaSwitchLocation; use bootstrap_agent_client::types::UplinkAddressConfig as BaUplinkAddressConfig; + use omicron_common::api::internal::shared::LldpAdminStatus; use omicron_common::api::internal::shared::PortFec; use omicron_common::api::internal::shared::PortSpeed; @@ -703,6 +706,7 @@ fn build_port_config( destination: r.destination, nexthop: r.nexthop, vlan_id: r.vlan_id, + local_pref: r.local_pref, }) .collect(), addresses: config @@ -779,6 +783,20 @@ fn build_port_config( PortFec::Rs => BaPortFec::Rs, }, autoneg: config.autoneg, + lldp: config.lldp.as_ref().map(|c| BaLldpPortConfig { + status: match c.status { + LldpAdminStatus::Enabled => BaLldpAdminStatus::Enabled, + LldpAdminStatus::Disabled => BaLldpAdminStatus::Disabled, + LldpAdminStatus::TxOnly => BaLldpAdminStatus::TxOnly, + LldpAdminStatus::RxOnly => BaLldpAdminStatus::RxOnly, + }, + chassis_id: c.chassis_id.clone(), + port_id: c.port_id.clone(), + system_name: c.system_name.clone(), + system_description: c.system_description.clone(), + port_description: c.port_description.clone(), + management_addrs: c.management_addrs.clone(), + }), } } diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 35c266cdf3..a39daa5735 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -27,13 +27,12 @@ bit-set = { version = "0.5.3" } bit-vec = { version = "0.6.3" } bitflags-dff4ba8e3ae991db = { package = "bitflags", version = "1.3.2" } bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.6.0", default-features = false, features = ["serde", "std"] } -bstr-6f8ce4dd05d13bba = { package = "bstr", version = "0.2.17" } -bstr-dff4ba8e3ae991db = { package = "bstr", version = "1.9.1" } +bstr = { version = "1.9.1" } byteorder = { version = "1.5.0" } bytes = { version = "1.7.1", features = ["serde"] } chrono = { version = "0.4.38", features = ["serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } -clap = { version = "4.5.15", features = ["cargo", "derive", "env", "wrap_help"] } +clap = { version = "4.5.16", features = ["cargo", "derive", "env", "wrap_help"] } clap_builder = { version = "4.5.15", default-features = false, features = ["cargo", "color", "env", "std", "suggestions", "usage", "wrap_help"] } console = { version = "0.15.8" } const-oid = { version = "0.9.6", default-features = false, features = ["db", "std"] } @@ -60,6 +59,7 @@ getrandom = { version = "0.2.14", default-features = false, features = ["js", "r group = { version = "0.13.0", default-features = false, features = ["alloc"] } hashbrown = { version = "0.14.5", features = ["raw"] } hex = { version = "0.4.3", features = ["serde"] } +hickory-proto = { version = "0.24.1", features = ["text-parsing"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "0.14.30", features = ["full"] } indexmap = { version = "2.4.0", features = ["serde"] } @@ -68,7 +68,7 @@ itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12.1" } itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10.5" } lalrpop-util = { version = "0.19.12" } lazy_static = { version = "1.5.0", default-features = false, features = ["spin_no_std"] } -libc = { version = "0.2.155", features = ["extra_traits"] } +libc = { version = "0.2.156", features = ["extra_traits"] } log = { version = "0.4.21", default-features = false, features = ["std"] } managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] } memchr = { version = "2.7.2" } @@ -85,6 +85,7 @@ pkcs8 = { version = "0.10.2", default-features = false, features = ["encryption" postgres-types = { version = "0.2.7", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } predicates = { version = "3.1.2" } proc-macro2 = { version = "1.0.86" } +quote = { version = "1.0.36" } regex = { version = "1.10.6" } regex-automata = { version = "0.4.6", default-features = false, features = ["dfa", "hybrid", "meta", "nfa", "perf", "unicode"] } regex-syntax = { version = "0.8.4" } @@ -94,13 +95,14 @@ rsa = { version = "0.9.6", features = ["serde", "sha2"] } schemars = { version = "0.8.21", features = ["bytes", "chrono", "uuid1"] } scopeguard = { version = "1.2.0" } semver = { version = "1.0.23", features = ["serde"] } -serde = { version = "1.0.207", features = ["alloc", "derive", "rc"] } +serde = { version = "1.0.208", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.125", features = ["raw_value", "unbounded_depth"] } sha1 = { version = "0.10.6", features = ["oid"] } sha2 = { version = "0.10.8", features = ["oid"] } -similar = { version = "2.5.0", features = ["bytes", "inline", "unicode"] } +similar = { version = "2.6.0", features = ["bytes", "inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } smallvec = { version = "1.13.2", default-features = false, features = ["const_new"] } +socket2 = { version = "0.5.7", default-features = false, features = ["all"] } spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } @@ -111,9 +113,9 @@ tokio-postgres = { version = "0.7.11", features = ["with-chrono-0_4", "with-serd tokio-stream = { version = "0.1.15", features = ["net"] } tokio-util = { version = "0.7.11", features = ["codec", "io-util"] } toml = { version = "0.7.8" } +toml_datetime = { version = "0.6.8", default-features = false, features = ["serde"] } toml_edit-3c51e837cfc5589a = { package = "toml_edit", version = "0.22.20", features = ["serde"] } tracing = { version = "0.1.40", features = ["log"] } -trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.15" } unicode-normalization = { version = "0.1.23" } usdt = { version = "0.5.0" } @@ -133,14 +135,13 @@ bit-set = { version = "0.5.3" } bit-vec = { version = "0.6.3" } bitflags-dff4ba8e3ae991db = { package = "bitflags", version = "1.3.2" } bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.6.0", default-features = false, features = ["serde", "std"] } -bstr-6f8ce4dd05d13bba = { package = "bstr", version = "0.2.17" } -bstr-dff4ba8e3ae991db = { package = "bstr", version = "1.9.1" } +bstr = { version = "1.9.1" } byteorder = { version = "1.5.0" } bytes = { version = "1.7.1", features = ["serde"] } cc = { version = "1.0.97", default-features = false, features = ["parallel"] } chrono = { version = "0.4.38", features = ["serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } -clap = { version = "4.5.15", features = ["cargo", "derive", "env", "wrap_help"] } +clap = { version = "4.5.16", features = ["cargo", "derive", "env", "wrap_help"] } clap_builder = { version = "4.5.15", default-features = false, features = ["cargo", "color", "env", "std", "suggestions", "usage", "wrap_help"] } console = { version = "0.15.8" } const-oid = { version = "0.9.6", default-features = false, features = ["db", "std"] } @@ -167,6 +168,7 @@ getrandom = { version = "0.2.14", default-features = false, features = ["js", "r group = { version = "0.13.0", default-features = false, features = ["alloc"] } hashbrown = { version = "0.14.5", features = ["raw"] } hex = { version = "0.4.3", features = ["serde"] } +hickory-proto = { version = "0.24.1", features = ["text-parsing"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "0.14.30", features = ["full"] } indexmap = { version = "2.4.0", features = ["serde"] } @@ -175,7 +177,7 @@ itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12.1" } itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10.5" } lalrpop-util = { version = "0.19.12" } lazy_static = { version = "1.5.0", default-features = false, features = ["spin_no_std"] } -libc = { version = "0.2.155", features = ["extra_traits"] } +libc = { version = "0.2.156", features = ["extra_traits"] } log = { version = "0.4.21", default-features = false, features = ["std"] } managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] } memchr = { version = "2.7.2" } @@ -192,6 +194,7 @@ pkcs8 = { version = "0.10.2", default-features = false, features = ["encryption" postgres-types = { version = "0.2.7", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } predicates = { version = "3.1.2" } proc-macro2 = { version = "1.0.86" } +quote = { version = "1.0.36" } regex = { version = "1.10.6" } regex-automata = { version = "0.4.6", default-features = false, features = ["dfa", "hybrid", "meta", "nfa", "perf", "unicode"] } regex-syntax = { version = "0.8.4" } @@ -201,13 +204,14 @@ rsa = { version = "0.9.6", features = ["serde", "sha2"] } schemars = { version = "0.8.21", features = ["bytes", "chrono", "uuid1"] } scopeguard = { version = "1.2.0" } semver = { version = "1.0.23", features = ["serde"] } -serde = { version = "1.0.207", features = ["alloc", "derive", "rc"] } +serde = { version = "1.0.208", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.125", features = ["raw_value", "unbounded_depth"] } sha1 = { version = "0.10.6", features = ["oid"] } sha2 = { version = "0.10.8", features = ["oid"] } -similar = { version = "2.5.0", features = ["bytes", "inline", "unicode"] } +similar = { version = "2.6.0", features = ["bytes", "inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } smallvec = { version = "1.13.2", default-features = false, features = ["const_new"] } +socket2 = { version = "0.5.7", default-features = false, features = ["all"] } spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } @@ -220,9 +224,9 @@ tokio-postgres = { version = "0.7.11", features = ["with-chrono-0_4", "with-serd tokio-stream = { version = "0.1.15", features = ["net"] } tokio-util = { version = "0.7.11", features = ["codec", "io-util"] } toml = { version = "0.7.8" } +toml_datetime = { version = "0.6.8", default-features = false, features = ["serde"] } toml_edit-3c51e837cfc5589a = { package = "toml_edit", version = "0.22.20", features = ["serde"] } tracing = { version = "0.1.40", features = ["log"] } -trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.15" } unicode-normalization = { version = "0.1.23" } unicode-xid = { version = "0.2.4" } @@ -285,7 +289,6 @@ nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signa once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } -toml_datetime = { version = "0.6.8", default-features = false, features = ["serde"] } toml_edit-cdcf2f9584511fe6 = { package = "toml_edit", version = "0.19.15", features = ["serde"] } [target.x86_64-unknown-illumos.build-dependencies] @@ -295,7 +298,6 @@ nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signa once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } -toml_datetime = { version = "0.6.8", default-features = false, features = ["serde"] } toml_edit-cdcf2f9584511fe6 = { package = "toml_edit", version = "0.19.15", features = ["serde"] } ### END HAKARI SECTION