From 0e047a68abbe0477c71f802b5dbaad271ac435a0 Mon Sep 17 00:00:00 2001 From: Ryan Goodfellow Date: Wed, 29 Nov 2023 16:16:14 -0800 Subject: [PATCH] add communication probes --- .github/buildomat/jobs/a4x2-deploy.sh | 161 +++++++ .github/buildomat/jobs/a4x2-prepare.sh | 88 ++++ .github/buildomat/jobs/ci-tools.sh | 5 + .github/buildomat/jobs/package.sh | 1 + .github/buildomat/jobs/tuf-repo.sh | 0 .gitignore | 1 + Cargo.lock | 216 +++++++-- Cargo.toml | 10 + clients/nexus-client/src/lib.rs | 2 + clients/sled-agent-client/src/lib.rs | 36 +- common/src/api/external/mod.rs | 11 + common/src/api/internal/shared.rs | 2 + dev-tools/omdb/tests/successes.out | 33 +- end-to-end-tests/Cargo.toml | 12 +- end-to-end-tests/src/bin/commtest.rs | 399 +++++++++++++++ end-to-end-tests/src/bin/dhcp-server.rs | 122 +++++ end-to-end-tests/src/helpers/cli.rs | 21 + end-to-end-tests/src/helpers/icmp.rs | 271 +++++++++++ end-to-end-tests/src/helpers/mod.rs | 2 + illumos-utils/src/running_zone.rs | 4 + installinator/src/dispatch.rs | 4 +- nexus/Cargo.toml | 2 +- nexus/db-model/src/external_ip.rs | 48 +- nexus/db-model/src/lib.rs | 2 + nexus/db-model/src/network_interface.rs | 54 +++ nexus/db-model/src/probe.rs | 50 ++ nexus/db-model/src/schema.rs | 17 +- nexus/db-model/src/service_kind.rs | 4 + nexus/db-model/src/unsigned.rs | 2 + nexus/db-queries/Cargo.toml | 1 + .../src/db/datastore/external_ip.rs | 69 +++ nexus/db-queries/src/db/datastore/mod.rs | 2 + .../src/db/datastore/network_interface.rs | 110 ++++- nexus/db-queries/src/db/datastore/probe.rs | 375 +++++++++++++++ nexus/db-queries/src/db/datastore/vpc.rs | 24 + .../db-queries/src/db/queries/external_ip.rs | 6 + .../src/db/queries/network_interface.rs | 9 +- nexus/src/app/background/init.rs | 5 +- nexus/src/app/instance.rs | 4 +- nexus/src/app/instance_network.rs | 101 +++- nexus/src/app/mod.rs | 1 + nexus/src/app/probe.rs | 96 ++++ nexus/src/app/rack.rs | 5 +- nexus/src/app/sagas/project_create.rs | 1 - nexus/src/app/switch_port.rs | 4 +- nexus/src/app/vpc.rs | 11 +- nexus/src/external_api/http_entrypoints.rs | 128 ++++- nexus/src/external_api/tag-config.json | 6 + nexus/src/internal_api/http_entrypoints.rs | 35 +- nexus/tests/integration_tests/instances.rs | 31 +- nexus/tests/integration_tests/mod.rs | 1 + nexus/tests/integration_tests/probe.rs | 119 +++++ nexus/tests/output/nexus_tags.txt | 7 + .../output/uncovered-authz-endpoints.txt | 4 + nexus/types/src/external_api/params.rs | 18 + nexus/types/src/internal_api/params.rs | 2 + openapi/nexus-internal.json | 309 ++++++++++++ openapi/nexus.json | 453 ++++++++++++++++++ openapi/sled-agent.json | 20 + package-manifest.toml | 17 + schema/all-zone-requests.json | 20 + schema/all-zones-requests.json | 20 + schema/crdb/22.0.0/up1.sql | 10 + schema/crdb/22.0.0/up2.sql | 4 + schema/crdb/22.0.0/up3.sql | 1 + schema/crdb/22.0.0/up4.sql | 1 + schema/crdb/dbinit.sql | 22 +- schema/rss-service-plan-v2.json | 20 + sled-agent/src/lib.rs | 1 + sled-agent/src/params.rs | 6 +- sled-agent/src/probe_manager.rs | 342 +++++++++++++ sled-agent/src/sled_agent.rs | 16 + tools/ci_download_maghemite_mgd | 3 + tools/ci_download_maghemite_openapi | 4 +- tools/ci_download_thundermuffin | 153 ++++++ tools/install_builder_prerequisites.sh | 3 + tools/install_runner_prerequisites.sh | 2 - tools/thundermuffin_checksums | 1 + tools/thundermuffin_version | 1 + workspace-hack/Cargo.toml | 62 ++- 80 files changed, 4052 insertions(+), 194 deletions(-) create mode 100755 .github/buildomat/jobs/a4x2-deploy.sh create mode 100755 .github/buildomat/jobs/a4x2-prepare.sh mode change 100644 => 100755 .github/buildomat/jobs/tuf-repo.sh create mode 100644 end-to-end-tests/src/bin/commtest.rs create mode 100644 end-to-end-tests/src/bin/dhcp-server.rs create mode 100644 end-to-end-tests/src/helpers/cli.rs create mode 100644 end-to-end-tests/src/helpers/icmp.rs create mode 100644 nexus/db-model/src/probe.rs create mode 100644 nexus/db-queries/src/db/datastore/probe.rs create mode 100644 nexus/src/app/probe.rs create mode 100644 nexus/tests/integration_tests/probe.rs create mode 100644 schema/crdb/22.0.0/up1.sql create mode 100644 schema/crdb/22.0.0/up2.sql create mode 100644 schema/crdb/22.0.0/up3.sql create mode 100644 schema/crdb/22.0.0/up4.sql create mode 100644 sled-agent/src/probe_manager.rs create mode 100755 tools/ci_download_thundermuffin create mode 100644 tools/thundermuffin_checksums create mode 100644 tools/thundermuffin_version diff --git a/.github/buildomat/jobs/a4x2-deploy.sh b/.github/buildomat/jobs/a4x2-deploy.sh new file mode 100755 index 00000000000..3ae628a4ca8 --- /dev/null +++ b/.github/buildomat/jobs/a4x2-deploy.sh @@ -0,0 +1,161 @@ +#!/bin/bash +#: +#: name = "a4x2-deploy" +#: variety = "basic" +#: target = "lab-2.0-opte-0.27" +#: rust_toolchain = "stable" +#: output_rules = [ +#: "/out/falcon/*.log", +#: "/out/falcon/*.err", +#: "/out/connectivity-report.json", +#: "/out/*-sled-agent.log", +#: "%/out/dhcp-server.log", +#: ] +#: skip_clone = true +#: +#: [dependencies.a4x2] +#: job = "a4x2-prepare" + +set -o errexit +set -o pipefail +set -o xtrace + +pfexec mkdir -p /out +pfexec chown "$UID" /out + +# +# If we fail, try to collect some debugging information +# +_exit_trap() { + local status=$? + [[ $status -eq 0 ]] && exit 0 + + set +o errexit + + mkdir -p /out/falcon + cp .falcon/* /out/falcon/ + for x in ce cr1 cr2 g0 g1 g2 g3; do + mv /out/falcon/$x.out /out/falcon/$x.log + done + cp connectivity-report.json /out/ + + for gimlet in g0 g1 g2 g3; do + ./a4x2 exec \ + $gimlet \ + "cat /var/svc/log/oxide-sled-agent:default.log" > \ + /out/$gimlet-sled-agent.log + done +} +trap _exit_trap EXIT + +# +# Install propolis +# +curl -fOL https://buildomat.eng.oxide.computer/wg/0/artefact/01HJ4BJJY2Q9EKXHYV6HQZ8XPN/qQS2fnkS9LebcL4cDLeHRWdleSiXaGKEXGLDucRoab8pwBSi/01HJ4BJY5F995ET252YSD4NJWV/01HJ4CGFH946THBF0ZRH6SRM8X/propolis-server +chmod +x propolis-server +pfexec mv propolis-server /usr/bin/ + +# +# Make space for CI work +# +export DISK=${DISK:-c1t1d0} +pfexec diskinfo +pfexec zpool create -f cpool $DISK +pfexec zfs create -o mountpoint=/ci cpool/ci +pfexec chown "$UID" /ci +cd /ci + +# +# Fetch and decompress the cargo bay from the a4x2-prepeare job +# +for x in ce cr1 cr2 omicron-common g0 g1 g2 g3 tools; do + tar -xvzf /input/a4x2/out/cargo-bay-$x.tgz +done + +for sled in g0 g1 g2 g3; do + cp -r cargo-bay/omicron-common/omicron/out/* cargo-bay/$sled/omicron/out/ +done +ls -R + +# +# Fetch the a4x2 topology manager program +# +buildomat_url=https://buildomat.eng.oxide.computer +testbed_artifact_path=public/file/oxidecomputer/testbed/topo/ +testbed_rev=667a7474968b15dafc8369e6cdc523b8d25630b2 +curl -fOL $buildomat_url/$testbed_artifact_path/$testbed_rev/a4x2 +chmod +x a4x2 + +# +# Create a zpool for falcon images and disks +# +export FALCON_DATASET=cpool/falcon + +# +# Install falcon base images +# +github_raw=https://raw.githubusercontent.com +falcon_path=oxidecomputer/falcon +falcon_branch=main +curl -sSf $github_raw/$falcon_path/$falcon_branch/setup-base-images.sh | bash +curl -sSf $github_raw/$falcon_path/$falcon_branch/get-ovmf.sh | bash + +# +# Fetch the arista image +# +curl -OL https://oxide-falcon-assets.s3.us-west-2.amazonaws.com/arista.gz.xz +unxz arista.gz.xz +pfexec zfs receive cpool/falcon/img/arista@base < arista.gz + +# +# Run the VM dhcp server +# +export EXT_INTERFACE=${EXT_INTERFACE:-igb0} + +cp /input/a4x2/out/dhcp-server . +chmod +x dhcp-server +first=`bmat address ls -f extra -Ho first` +last=`bmat address ls -f extra -Ho last` +gw=`bmat address ls -f extra -Ho gateway` +server=`ipadm show-addr $EXT_INTERFACE/dhcp -po ADDR | sed 's#/.*##g'` +pfexec ./dhcp-server $first $last $gw $server &> /out/dhcp-server.log & + +# +# Run the topology +# +pfexec ./a4x2 launch + +# +# Add a route to the rack ip pool +# + +# XXX i think this will be on the CI machine config by default, but leaving a +# breadcrumb here just in case. +# +# Get a DHCP address for the external interface on the LAB network. +# ipadm create-addr -t -T dhcp $EXT_INTERFACE/testbed + +# Get the DHCP address for the external interface of the customer edge VM. This +# VM interface is attached to the host machine's external interface via viona. +customer_edge_addr=$(./a4x2 exec ce \ + "ip -4 -j addr show enp0s10 | jq -r '.[0].addr_info[] | select(.dynamic == true) | .local'") + +# Add the route to the rack via the customer edge VM +pfexec dladm +pfexec ipadm +pfexec netstat -nr +pfexec route add 198.51.100.0/24 $customer_edge_addr + +# +# Run the communications test program +# +cp /input/a4x2/out/commtest . +chmod +x commtest +pfexec ./commtest http://198.51.100.23 run \ + --ip-pool-begin 198.51.100.40 \ + --ip-pool-end 198.51.100.70 \ + --icmp-loss-tolerance 10 \ + --test-duration 200s \ + --packet-rate 10 + +cp connectivity-report.json /out/ diff --git a/.github/buildomat/jobs/a4x2-prepare.sh b/.github/buildomat/jobs/a4x2-prepare.sh new file mode 100755 index 00000000000..3d973f6873d --- /dev/null +++ b/.github/buildomat/jobs/a4x2-prepare.sh @@ -0,0 +1,88 @@ +#!/bin/bash +#: +#: name = "a4x2-prepare" +#: variety = "basic" +#: target = "helios-2.0" +#: rust_toolchain = "stable" +#: output_rules = [ +#: "=/out/cargo-bay-ce.tgz", +#: "=/out/cargo-bay-cr1.tgz", +#: "=/out/cargo-bay-cr2.tgz", +#: "=/out/cargo-bay-g0.tgz", +#: "=/out/cargo-bay-g1.tgz", +#: "=/out/cargo-bay-g2.tgz", +#: "=/out/cargo-bay-g3.tgz", +#: "=/out/cargo-bay-tools.tgz", +#: "=/out/cargo-bay-omicron-common.tgz", +#: "=/out/commtest", +#: "=/out/dhcp-server", +#: ] +#: access_repos = [ +#: "oxidecomputer/testbed", +#: ] + +source ./env.sh + +set -o errexit +set -o pipefail +set -o xtrace + +pfexec mkdir -p /out +pfexec chown "$UID" /out + +# +# Prep to build omicron +# +banner "prerequisites" +set -o xtrace +./tools/install_builder_prerequisites.sh -y + +# +# Build the commtest program and place in the output +# +banner "commtest" +cargo build -p end-to-end-tests --bin commtest --bin dhcp-server --release +cp target/release/commtest /out/ +cp target/release/dhcp-server /out/ + +# +# Clone the testbed repo +# +banner "testbed" +cd /work/oxidecomputer +rm -rf testbed +git clone https://github.com/oxidecomputer/testbed +cd testbed/a4x2 + +# +# Build the a4x2 cargo bay using the omicron sources in this branch, fetch the +# softnpu artifacts into the cargo bay, zip up the cargo bay and place it in the +# output. +# +OMICRON=/work/oxidecomputer/omicron ./config/build-packages.sh + +# Create an omicron archive that captures common assets + +pushd cargo-bay +mkdir -p omicron-common/omicron/ +cp -r g0/omicron/out omicron-common/omicron/ +# sled agent archive is sled-specific +rm omicron-common/omicron/out/omicron-sled-agent.tar +popd + +# Remove everything in $sled/omicron/out except sled agent tar, these common +# elements are in the omicron-common archive +for sled in g0 g1 g2 g3; do + find cargo-bay/$sled/omicron/out/ -maxdepth 1 -mindepth 1 \ + | grep -v sled-agent | xargs -l rm -rf +done + +# Put the softnpu artifacts in place. +./config/fetch-softnpu-artifacts.sh + +# Archive everything up and place it in the output +for x in ce cr1 cr2 g0 g1 g2 g3 tools omicron-common; do + tar -czf cargo-bay-$x.tgz cargo-bay/$x + mv cargo-bay-$x.tgz /out/ +done + diff --git a/.github/buildomat/jobs/ci-tools.sh b/.github/buildomat/jobs/ci-tools.sh index 07a63af30c3..a4d38af182d 100755 --- a/.github/buildomat/jobs/ci-tools.sh +++ b/.github/buildomat/jobs/ci-tools.sh @@ -8,6 +8,7 @@ #: "=/work/end-to-end-tests/*.gz", #: "=/work/caboose-util.gz", #: "=/work/tufaceous.gz", +#: "=/work/commtest", #: ] set -o errexit @@ -33,6 +34,10 @@ export CARGO_INCREMENTAL=0 ptime -m cargo build --locked -p end-to-end-tests --tests --bin bootstrap \ --message-format json-render-diagnostics >/tmp/output.end-to-end.json +mkdir -p /work +ptime -m cargo build --locked -p end-to-end-tests --tests --bin commtest +cp target/debug/commtest /work/commtest + mkdir -p /work/end-to-end-tests for p in target/debug/bootstrap $(/opt/ooce/bin/jq -r 'select(.profile.test) | .executable' /tmp/output.end-to-end.json); do # shellcheck disable=SC2094 diff --git a/.github/buildomat/jobs/package.sh b/.github/buildomat/jobs/package.sh index 350ab372336..9340241efe5 100755 --- a/.github/buildomat/jobs/package.sh +++ b/.github/buildomat/jobs/package.sh @@ -124,6 +124,7 @@ zones=( out/omicron-gateway-softnpu.tar.gz out/omicron-gateway-asic.tar.gz out/overlay.tar.gz + out/probe.tar.gz ) cp "${zones[@]}" /work/zones/ diff --git a/.github/buildomat/jobs/tuf-repo.sh b/.github/buildomat/jobs/tuf-repo.sh old mode 100644 new mode 100755 diff --git a/.gitignore b/.gitignore index 1d7177320fd..69c25f8890a 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ debug.out rusty-tags.vi *.sw* tags +connectivity-report.json diff --git a/Cargo.lock b/Cargo.lock index 3cdf3dd6789..98a891b0713 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1035,11 +1035,10 @@ checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" [[package]] name = "colored" -version = "2.0.4" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2674ec482fbc38012cf31e6c42ba0177b431a0cb6f15fe40efa5aab1bda516f6" +checksum = "cbf2150cce219b664a8a70df7a1f933836724b503f8a413af9365b4dcc4d90b8" dependencies = [ - "is-terminal", "lazy_static", "windows-sys 0.48.0", ] @@ -1081,6 +1080,17 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" +[[package]] +name = "cookie" +version = "0.16.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e859cd57d0710d9e06c381b550c06e76992472a8c6d527aecd2fc673dcc231fb" +dependencies = [ + "percent-encoding", + "time", + "version_check", +] + [[package]] name = "cookie" version = "0.18.0" @@ -1091,6 +1101,23 @@ dependencies = [ "version_check", ] +[[package]] +name = "cookie_store" +version = "0.16.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d606d0fba62e13cf04db20536c05cb7f13673c161cb47a47a82b9b9e7d3f1daa" +dependencies = [ + "cookie 0.16.2", + "idna 0.2.3", + "log", + "publicsuffix", + "serde", + "serde_derive", + "serde_json", + "time", + "url", +] + [[package]] name = "core-foundation" version = "0.9.3" @@ -1558,7 +1585,7 @@ dependencies = [ "omicron-workspace-hack", "omicron-zone-package", "progenitor", - "progenitor-client", + "progenitor-client 0.4.0", "quote", "reqwest", "rustfmt-wrapper", @@ -1697,6 +1724,27 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "dhcproto" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "000717b4f6913807b6195419e0bacb008d449ba6023ca26abf349c4ff2f1866b" +dependencies = [ + "dhcproto-macros", + "hex", + "ipnet", + "rand 0.8.5", + "thiserror", + "trust-dns-proto", + "url", +] + +[[package]] +name = "dhcproto-macros" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7993efb860416547839c115490d4951c6d0f8ec04a3594d9dd99d50ed7ec170" + [[package]] name = "diesel" version = "2.1.4" @@ -1920,7 +1968,7 @@ dependencies = [ "omicron-workspace-hack", "omicron-zone-package", "progenitor", - "progenitor-client", + "progenitor-client 0.4.0", "quote", "rand 0.8.5", "regress", @@ -2117,15 +2165,24 @@ dependencies = [ name = "end-to-end-tests" version = "0.1.0" dependencies = [ + "anstyle", "anyhow", "async-trait", "base64", "chrono", + "clap 4.4.3", + "colored", + "dhcproto", "http", + "humantime", "hyper", + "internet-checksum", + "ispf", + "macaddr", "omicron-sled-agent", "omicron-test-utils", "omicron-workspace-hack", + "oxide-api", "oxide-client", "rand 0.8.5", "reqwest", @@ -2133,6 +2190,7 @@ dependencies = [ "russh-keys", "serde", "serde_json", + "socket2 0.5.5", "tokio", "toml 0.8.8", "trust-dns-resolver", @@ -2418,9 +2476,9 @@ checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" [[package]] name = "futures" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0290714b38af9b4a7b094b8a37086d1b4e61f2df9122c3cad2577669145335" +checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" dependencies = [ "futures-channel", "futures-core", @@ -2433,9 +2491,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff4dd66668b557604244583e3e1e1eada8c5c2e96a6d0d6653ede395b78bbacb" +checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" dependencies = [ "futures-core", "futures-sink", @@ -2443,15 +2501,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb1d22c66e66d9d72e1758f0bd7d4fd0bee04cad842ee34587d68c07e45d088c" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" [[package]] name = "futures-executor" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f4fb8693db0cf099eadcca0efe2a5a22e4550f98ed16aba6c48700da29597bc" +checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" dependencies = [ "futures-core", "futures-task", @@ -2460,15 +2518,15 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bf34a163b5c4c52d0478a4d757da8fb65cabef42ba90515efee0f6f9fa45aaa" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" [[package]] name = "futures-macro" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53b153fd91e4b0147f4aced87be237c98248656bb01050b96bf3ee89220a8ddb" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", @@ -2477,15 +2535,15 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e36d3378ee38c2a36ad710c5d30c2911d752cb941c00c72dbabfb786a7970817" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" [[package]] name = "futures-task" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efd193069b0ddadc69c46389b740bbccdd97203899b48d09c5f7969591d6bae2" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" [[package]] name = "futures-timer" @@ -2495,9 +2553,9 @@ checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c" [[package]] name = "futures-util" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a19526d624e703a3179b3d322efec918b6246ea0fa51d41124525f00f1cc8104" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" dependencies = [ "futures-channel", "futures-core", @@ -3167,6 +3225,16 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "idna" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + [[package]] name = "idna" version = "0.5.0" @@ -3456,6 +3524,12 @@ dependencies = [ "trust-dns-resolver", ] +[[package]] +name = "internet-checksum" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc6d6206008e25125b1f97fbe5d309eb7b85141cf9199d52dbd3729a1584dd16" + [[package]] name = "io-lifetimes" version = "1.0.11" @@ -3527,6 +3601,14 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "616cde7c720bb2bb5824a224687d8f77bfd38922027f01d825cd7453be5099fb" +[[package]] +name = "ispf" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/ispf#f78443a98397f7818b1e7a487dbb7d5cad625496" +dependencies = [ + "serde", +] + [[package]] name = "itertools" version = "0.10.5" @@ -3923,7 +4005,7 @@ dependencies = [ "omicron-workspace-hack", "omicron-zone-package", "progenitor", - "progenitor-client", + "progenitor-client 0.4.0", "quote", "reqwest", "rustfmt-wrapper", @@ -4150,7 +4232,7 @@ dependencies = [ "camino", "camino-tempfile", "chrono", - "cookie", + "cookie 0.18.0", "db-macros", "diesel", "diesel-dtrace", @@ -4192,6 +4274,7 @@ dependencies = [ "regex", "rustls", "samael", + "schemars", "serde", "serde_json", "serde_urlencoded", @@ -4808,7 +4891,7 @@ dependencies = [ "petgraph", "pq-sys", "pretty_assertions", - "progenitor-client", + "progenitor-client 0.4.0", "propolis-client", "rand 0.8.5", "rcgen", @@ -5129,6 +5212,7 @@ dependencies = [ "managed", "memchr", "mio", + "native-tls", "nom", "num-bigint", "num-integer", @@ -5136,6 +5220,8 @@ dependencies = [ "num-traits", "once_cell", "openapiv3", + "openssl", + "openssl-sys", "pem-rfc7468", "petgraph", "postgres-types", @@ -5158,6 +5244,7 @@ dependencies = [ "similar", "slog", "snafu", + "socket2 0.5.5", "spin 0.9.8", "string_cache", "subtle", @@ -5284,6 +5371,15 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" +[[package]] +name = "openssl-src" +version = "300.2.1+3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fe476c29791a5ca0d1273c697e96085bbabbbea2ef7afd5617e78a4b40332d3" +dependencies = [ + "cc", +] + [[package]] name = "openssl-sys" version = "0.9.96" @@ -5292,6 +5388,7 @@ checksum = "3812c071ba60da8b5677cc12bcb1d42989a65553772897a7e0355545a819838f" dependencies = [ "cc", "libc", + "openssl-src", "pkg-config", "vcpkg", ] @@ -5375,6 +5472,25 @@ version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" +[[package]] +name = "oxide-api" +version = "0.2.0" +source = "git+https://github.com/oxidecomputer/oxide.rs?branch=probes#8a68ab11c9b028bcb3611e80ff7579d5c20477b7" +dependencies = [ + "base64", + "chrono", + "clap 4.4.3", + "futures", + "progenitor-client 0.5.0", + "rand 0.8.5", + "regress", + "reqwest", + "schemars", + "serde", + "serde_json", + "uuid", +] + [[package]] name = "oxide-client" version = "0.1.0" @@ -6201,9 +6317,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.69" +version = "1.0.71" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da" +checksum = "75cb1540fadbd5b8fbccc4dddad2734eba435053f725621c070711a14bb5f4b8" dependencies = [ "unicode-ident", ] @@ -6213,7 +6329,7 @@ name = "progenitor" version = "0.4.0" source = "git+https://github.com/oxidecomputer/progenitor?branch=main#9339b57628e1e76b1d7131ef93a6c0db2ab0a762" dependencies = [ - "progenitor-client", + "progenitor-client 0.4.0", "progenitor-impl", "progenitor-macro", "serde_json", @@ -6233,6 +6349,20 @@ dependencies = [ "serde_urlencoded", ] +[[package]] +name = "progenitor-client" +version = "0.5.0" +source = "git+https://github.com/oxidecomputer/progenitor#3685587202ad260a9efeaa9314c80a25b01c9617" +dependencies = [ + "bytes", + "futures-core", + "percent-encoding", + "reqwest", + "serde", + "serde_json", + "serde_urlencoded", +] + [[package]] name = "progenitor-impl" version = "0.4.0" @@ -6352,6 +6482,22 @@ dependencies = [ "unarray", ] +[[package]] +name = "psl-types" +version = "2.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33cb294fe86a74cbcf50d4445b37da762029549ebeea341421c7c70370f86cac" + +[[package]] +name = "publicsuffix" +version = "2.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96a8c1bda5ae1af7f99a2962e49df150414a43d62404644d98dd5c3a93d07457" +dependencies = [ + "idna 0.3.0", + "psl-types", +] + [[package]] name = "quick-error" version = "1.2.3" @@ -6728,12 +6874,14 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.11.22" +version = "0.11.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "046cd98826c46c2ac8ddecae268eb5c2e58628688a5fc7a2643704a73faba95b" +checksum = "37b1ae8d9ac08420c66222fb9096fc5de435c3c48542bc5336c51892cffafb41" dependencies = [ "base64", "bytes", + "cookie 0.16.2", + "cookie_store", "encoding_rs", "futures-core", "futures-util", @@ -10289,8 +10437,7 @@ dependencies = [ [[package]] name = "zone" version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a62a428a79ea2224ce8ab05d6d8a21bdd7b4b68a8dbc1230511677a56e72ef22" +source = "git+https://github.com/oxidecomputer/zone?branch=state-derive-eq-hash#f1920d5636c69ea8179f8ec659702dcdef43268c" dependencies = [ "itertools 0.10.5", "thiserror", @@ -10301,8 +10448,7 @@ dependencies = [ [[package]] name = "zone_cfg_derive" version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5c4f01d3785e222d5aca11c9813e9c46b69abfe258756c99c9b628683626cc8" +source = "git+https://github.com/oxidecomputer/zone?branch=state-derive-eq-hash#f1920d5636c69ea8179f8ec659702dcdef43268c" dependencies = [ "heck 0.4.1", "proc-macro-error", diff --git a/Cargo.toml b/Cargo.toml index d4f81b0310a..147b6494548 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -140,6 +140,7 @@ resolver = "2" [workspace.dependencies] anyhow = "1.0" +anstyle = "1.0" api_identity = { path = "api_identity" } approx = "0.5.1" assert_matches = "1.5.0" @@ -167,6 +168,7 @@ ciborium = "0.2.1" cfg-if = "1.0" chrono = { version = "0.4", features = [ "serde" ] } clap = { version = "4.4", features = ["derive", "env", "wrap_help"] } +colored = "2.1" cookie = "0.18" criterion = { version = "0.5.1", features = [ "async_tokio" ] } crossbeam = "0.8" @@ -224,8 +226,10 @@ installinator-artifactd = { path = "installinator-artifactd" } installinator-artifact-client = { path = "clients/installinator-artifact-client" } installinator-common = { path = "installinator-common" } internal-dns = { path = "internal-dns" } +internet-checksum = "0.2" ipcc-key-value = { path = "ipcc-key-value" } ipnetwork = { version = "0.20", features = ["schemars"] } +ispf = { git = "https://github.com/oxidecomputer/ispf" } itertools = "0.12.0" key-manager = { path = "key-manager" } kstat-rs = "0.2.3" @@ -259,6 +263,7 @@ omicron-rpaths = { path = "rpaths" } omicron-sled-agent = { path = "sled-agent" } omicron-test-utils = { path = "test-utils" } omicron-zone-package = "0.9.1" +oxide-api = { git = "https://github.com/oxidecomputer/oxide.rs", branch = "probes" } oxide-client = { path = "clients/oxide-client" } oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4", features = [ "api", "std" ] } once_cell = "1.19.0" @@ -343,6 +348,7 @@ slog-error-chain = { git = "https://github.com/oxidecomputer/slog-error-chain", slog-term = "2.9" smf = "0.2" snafu = "0.7" +socket2 = { version = "0.5", features = ["all"] } sp-sim = { path = "sp-sim" } sprockets-common = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } sprockets-host = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } @@ -594,3 +600,7 @@ branch = "oxide/omicron" # to it. [patch.crates-io.omicron-workspace-hack] path = "workspace-hack" + +[patch.crates-io.zone] +git = 'https://github.com/oxidecomputer/zone' +branch = 'state-derive-eq-hash' diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs index 3ecba7e7100..e14dc6e25c7 100644 --- a/clients/nexus-client/src/lib.rs +++ b/clients/nexus-client/src/lib.rs @@ -28,6 +28,8 @@ progenitor::generate_api!( MacAddr = omicron_common::api::external::MacAddr, Name = omicron_common::api::external::Name, NewPasswordHash = omicron_passwords::NewPasswordHash, + NetworkInterface = omicron_common::api::internal::shared::NetworkInterface, + NetworkInterfaceKind = omicron_common::api::internal::shared::NetworkInterfaceKind, } ); diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index 0bbd27cf3ee..4a88f2ce831 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -31,6 +31,8 @@ progenitor::generate_api!( IpNetwork = ipnetwork::IpNetwork, PortFec = omicron_common::api::internal::shared::PortFec, PortSpeed = omicron_common::api::internal::shared::PortSpeed, + NetworkInterface = omicron_common::api::internal::shared::NetworkInterface, + NetworkInterfaceKind = omicron_common::api::internal::shared::NetworkInterfaceKind, } ); @@ -455,40 +457,6 @@ impl From } } -impl From - for types::NetworkInterfaceKind -{ - fn from( - s: omicron_common::api::internal::shared::NetworkInterfaceKind, - ) -> Self { - use omicron_common::api::internal::shared::NetworkInterfaceKind::*; - match s { - Instance { id } => Self::Instance(id), - Service { id } => Self::Service(id), - } - } -} - -impl From - for types::NetworkInterface -{ - fn from( - s: omicron_common::api::internal::shared::NetworkInterface, - ) -> Self { - Self { - id: s.id, - kind: s.kind.into(), - name: (&s.name).into(), - ip: s.ip, - mac: s.mac.into(), - subnet: s.subnet.into(), - vni: s.vni.into(), - primary: s.primary, - slot: s.slot, - } - } -} - impl From for types::SourceNatConfig { diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 446152137a8..176816e86ca 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -768,6 +768,8 @@ pub enum ResourceType { Vmm, Ipv4NatEntry, FloatingIp, + Probe, + ProbeNetworkInterface, } // IDENTITY METADATA @@ -2621,6 +2623,15 @@ pub struct BgpImportedRouteIpv4 { pub switch: SwitchLocation, } +#[derive( + Clone, Debug, Deserialize, JsonSchema, Serialize, PartialEq, ObjectIdentity, +)] +pub struct Probe { + #[serde(flatten)] + pub identity: IdentityMetadata, + pub sled: Uuid, +} + #[cfg(test)] mod test { use serde::Deserialize; diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index c8d8b1c7861..bf825fd2e7b 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -36,6 +36,8 @@ pub enum NetworkInterfaceKind { Instance { id: Uuid }, /// A vNIC associated with an internal service Service { id: Uuid }, + /// A vNIC associated with a probe + Probe { id: Uuid }, } /// Information required to construct a virtual network interface diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 65520ab59c1..b97d16ee2b0 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -59,43 +59,30 @@ note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=d note: database schema version matches expected () ============================================= EXECUTING COMMAND: omdb ["db", "services", "list-instances"] -termination: Exited(0) +termination: Exited(1) --------------------------------------------- stdout: -SERVICE INSTANCE_ID ADDR SLED_SERIAL -CruciblePantry REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT sim-b6d65341 -Dendrite REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT sim-b6d65341 -Dendrite REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT sim-b6d65341 -ExternalDns REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT sim-b6d65341 -InternalDns REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT sim-b6d65341 -Nexus REDACTED_UUID_REDACTED_UUID_REDACTED [::ffff:127.0.0.1]:REDACTED_PORT sim-b6d65341 -Mgd REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT sim-b6d65341 -Mgd REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT sim-b6d65341 --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable note: database schema version matches expected () +Error: listing instances of kind Probe + +Caused by: + Internal Error: unexpected database error: error in argument for $1: invalid input value for enum service_kind: "probe" ============================================= EXECUTING COMMAND: omdb ["db", "services", "list-by-sled"] -termination: Exited(0) +termination: Exited(1) --------------------------------------------- stdout: -sled: sim-b6d65341 (id REDACTED_UUID_REDACTED_UUID_REDACTED) - - SERVICE INSTANCE_ID ADDR - CruciblePantry REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT - Dendrite REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT - Dendrite REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT - ExternalDns REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT - InternalDns REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT - Nexus REDACTED_UUID_REDACTED_UUID_REDACTED [::ffff:127.0.0.1]:REDACTED_PORT - Mgd REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT - Mgd REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT - --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable note: database schema version matches expected () +Error: listing instances of kind Probe + +Caused by: + Internal Error: unexpected database error: error in argument for $1: invalid input value for enum service_kind: "probe" ============================================= EXECUTING COMMAND: omdb ["db", "sleds"] termination: Exited(0) diff --git a/end-to-end-tests/Cargo.toml b/end-to-end-tests/Cargo.toml index 9e38112c36f..c54486f56ba 100644 --- a/end-to-end-tests/Cargo.toml +++ b/end-to-end-tests/Cargo.toml @@ -15,7 +15,7 @@ omicron-sled-agent.workspace = true omicron-test-utils.workspace = true oxide-client.workspace = true rand.workspace = true -reqwest.workspace = true +reqwest = { workspace = true, features = ["cookies"] } russh = "0.40.1" russh-keys = "0.40.1" serde.workspace = true @@ -25,3 +25,13 @@ toml.workspace = true trust-dns-resolver.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true +ispf.workspace = true +internet-checksum.workspace = true +humantime.workspace = true +socket2.workspace = true +colored.workspace = true +anstyle.workspace = true +clap.workspace = true +oxide-api.workspace = true +dhcproto = "0.11" +macaddr.workspace = true diff --git a/end-to-end-tests/src/bin/commtest.rs b/end-to-end-tests/src/bin/commtest.rs new file mode 100644 index 00000000000..2efa9829d16 --- /dev/null +++ b/end-to-end-tests/src/bin/commtest.rs @@ -0,0 +1,399 @@ +use anyhow::{anyhow, Result}; +use clap::{Parser, Subcommand}; +use end_to_end_tests::helpers::cli::oxide_cli_style; +use end_to_end_tests::helpers::icmp::ping4_test_run; +use oxide_api::{ + types::{ + IpRange, Ipv4Range, Name, PingStatus, ProbeCreate, ProbeInfo, + ProjectCreate, UsernamePasswordCredentials, + }, + ClientLoginExt, ClientProbesExt, ClientProjectsExt, + ClientSystemHardwareExt, ClientSystemNetworkingExt, ClientSystemStatusExt, + ClientVpcsExt, +}; +use std::{ + net::{IpAddr, Ipv4Addr}, + time::{Duration, Instant}, +}; +use tokio::time::sleep; +use uuid::Uuid; + +#[derive(Parser, Debug)] +#[clap(version, about, long_about = None, styles = oxide_cli_style())] +struct Cli { + /// Oxide API address i.e., http://198.51.100.20 + oxide_api: String, + + #[clap(subcommand)] + command: Commands, +} + +#[derive(Debug, Subcommand)] +enum Commands { + Run(RunArgs), + Cleanup, +} + +#[derive(Parser, Debug)] +struct RunArgs { + /// Test Duration + #[arg(long, default_value = "100s")] + test_duration: humantime::Duration, + + /// Test packet rate in packets per second + #[arg(long, default_value_t = 10)] + packet_rate: usize, + + /// How many lost ICMP packets may be tolerated + #[arg(long, default_value_t = 0)] + icmp_loss_tolerance: usize, + + #[arg(long)] + ip_pool_begin: Ipv4Addr, + + #[arg(long)] + ip_pool_end: Ipv4Addr, +} + +#[tokio::main] +pub async fn main() -> Result<()> { + let cli = Cli::parse(); + match cli.command { + Commands::Run(ref args) => run(&cli, args).await, + Commands::Cleanup => cleanup(&cli).await, + } +} + +async fn run(cli: &Cli, args: &RunArgs) -> Result<()> { + wait_until_oxide_api_is_available(cli).await?; + let (sleds, oxide) = rack_prepare(cli, args).await?; + let addrs = launch_probes(sleds, &oxide).await?; + test_connectivity(args, addrs)?; + Ok(()) +} + +async fn cleanup(cli: &Cli) -> Result<()> { + wait_until_oxide_api_is_available(cli).await?; + let oxide = cleanup_probes(cli).await?; + rack_cleanup(&oxide).await?; + Ok(()) +} + +async fn wait_until_oxide_api_is_available(cli: &Cli) -> Result<()> { + let oxide = oxide_api::Client::new(&cli.oxide_api); + let start = Instant::now(); + loop { + if let Ok(result) = oxide.ping().send().await.map(|x| x.into_inner()) { + if result.status == PingStatus::Ok { + println!("the api is up"); + break; + } + } + if Instant::now().duration_since(start) > Duration::from_secs(60 * 60) { + return Err(anyhow!( + "One hour deadline for system startup exceeded" + )); + } + println!("no api response yet, wating 3s ..."); + sleep(Duration::from_secs(3)).await; + } + Ok(()) +} + +macro_rules! api_retry { + ($call:expr) => {{ + let mut limit = 15; + loop { + match $call { + res @ Ok(_) => break res, + Err(e) => { + limit -= 1; + if limit == 0 { + break Err(e); + } + println!("API call error: {e}, retrying in 3 s"); + sleep(Duration::from_secs(3)).await; + } + } + } + }}; +} + +async fn cleanup_probes(cli: &Cli) -> Result { + let rqb = reqwest::ClientBuilder::new() + .cookie_store(true) + .timeout(Duration::from_secs(15)) + .connect_timeout(Duration::from_secs(15)) + .build() + .unwrap(); + let oxide = oxide_api::Client::new_with_client(&cli.oxide_api, rqb); + + print!("logging in ... "); + api_retry!( + oxide + .login_local() + .silo_name(Name::try_from("recovery").unwrap()) + .body(UsernamePasswordCredentials { + password: "oxide".parse().unwrap(), + username: "recovery".parse().unwrap(), + }) + .send() + .await + )?; + println!("done"); + + let probes: Vec = api_retry!( + oxide + .probe_list() + .project(Name::try_from("classone").unwrap()) + .limit(u32::MAX) + .send() + .await + )? + .into_inner() + .items; + + for probe in &probes { + print!("deleting probe {} ... ", *probe.name); + api_retry!( + oxide + .probe_delete() + .project(Name::try_from("classone").unwrap()) + .probe(probe.id) + .send() + .await + )?; + println!("done"); + } + + Ok(oxide) +} + +async fn rack_cleanup(oxide: &oxide_api::Client) -> Result<()> { + if let Err(e) = oxide + .project_view() + .project(Name::try_from("classone").unwrap()) + .send() + .await + { + if let Some(reqwest::StatusCode::NOT_FOUND) = e.status() { + print!("project does not exist"); + } else { + Err(e)?; + } + } else { + print!("deleting classone subnet ... "); + api_retry!( + oxide + .vpc_subnet_delete() + .project(Name::try_from("classone").unwrap()) + .vpc(Name::try_from("default").unwrap()) + .subnet(Name::try_from("default").unwrap()) + .send() + .await + )?; + println!("done"); + + print!("deleting classone vpc ... "); + api_retry!( + oxide + .vpc_delete() + .project(Name::try_from("classone").unwrap()) + .vpc(Name::try_from("default").unwrap()) + .send() + .await + )?; + println!("done"); + + print!("deleting classone project ... "); + api_retry!( + oxide + .project_delete() + .project(Name::try_from("classone").unwrap()) + .send() + .await + )?; + println!("done"); + } + Ok(()) +} + +async fn rack_prepare( + cli: &Cli, + args: &RunArgs, +) -> Result<(Vec, oxide_api::Client)> { + let rqb = reqwest::ClientBuilder::new().cookie_store(true).build().unwrap(); + + let oxide = oxide_api::Client::new_with_client(&cli.oxide_api, rqb); + + print!("logging in ... "); + api_retry!( + oxide + .login_local() + .silo_name(Name::try_from("recovery").unwrap()) + .body(UsernamePasswordCredentials { + password: "oxide".parse().unwrap(), + username: "recovery".parse().unwrap(), + }) + .send() + .await + )?; + println!("done"); + + api_retry!(if let Err(e) = oxide + .project_view() + .project(Name::try_from("classone").unwrap()) + .send() + .await + { + if let Some(reqwest::StatusCode::NOT_FOUND) = e.status() { + print!("project does not exist, creating ... "); + oxide + .project_create() + .body(ProjectCreate { + description: "A project for probes".into(), + name: "classone".parse().unwrap(), + }) + .send() + .await?; + println!("done"); + Ok(()) + } else { + Err(e) + } + } else { + println!("classone project already exists"); + Ok(()) + })?; + + let pool = api_retry!( + oxide + .ip_pool_range_list() + .limit(u32::MAX) + .pool(Name::try_from("default").unwrap()) + .send() + .await + )? + .into_inner() + .items; + + let range = Ipv4Range { first: args.ip_pool_begin, last: args.ip_pool_end }; + + let range_exists = pool + .iter() + .filter_map(|x| match &x.range { + IpRange::V4(r) => Some(r), + IpRange::V6(_) => None, + }) + .any(|x| x.first == range.first && x.last == range.last); + + if !range_exists { + print!("ip range does not exist, creating ... "); + api_retry!( + oxide + .ip_pool_range_add() + .pool(Name::try_from("default").unwrap()) + .body(IpRange::V4(range.clone())) + .send() + .await + )?; + println!("done"); + } else { + println!("ip range already exists"); + } + + print!("getting sled ids ... "); + let sleds = api_retry!(oxide.sled_list().limit(u32::MAX).send().await)? + .into_inner() + .items + .iter() + .map(|x| x.id) + .collect(); + println!("done"); + + Ok((sleds, oxide)) +} + +async fn launch_probes( + sleds: Vec, + oxide: &oxide_api::Client, +) -> Result> { + for (i, sled) in sleds.into_iter().enumerate() { + println!("checking if probe{i} exists"); + api_retry!(if let Err(e) = oxide + .probe_view() + .project(Name::try_from("classone").unwrap()) + .probe(Name::try_from(format!("probe{i}")).unwrap()) + .send() + .await + { + if let Some(reqwest::StatusCode::NOT_FOUND) = e.status() { + print!("probe{i} does not exist, creating ... "); + oxide + .probe_create() + .project(Name::try_from("classone").unwrap()) + .body(ProbeCreate { + description: format!("probe {i}"), + ip_pool: Some("default".parse().unwrap()), + name: format!("probe{i}").parse().unwrap(), + sled, + }) + .send() + .await?; + println!("done"); + Ok(()) + } else { + Err(e) + } + } else { + println!("probe{i} already exists"); + Ok(()) + })?; + } + + Ok(api_retry!( + oxide + .probe_list() + .project(Name::try_from("classone").unwrap()) + .limit(u32::MAX) + .send() + .await + )? + .into_inner() + .items + .iter() + .map(|x| x.external_ips.get(0).unwrap().ip) + .filter_map(|x| match x { + IpAddr::V4(ip) => Some(ip), + IpAddr::V6(_) => None, + }) + .collect()) +} + +fn test_connectivity(args: &RunArgs, addrs: Vec) -> Result<()> { + let ttl = 255; + println!("testing connectivity to probes"); + let report = ping4_test_run( + &addrs, + ttl, + args.packet_rate, + args.test_duration.into(), + ); + + let out = serde_json::to_string_pretty(&report).unwrap(); + std::fs::write("connectivity-report.json", out.as_str()).unwrap(); + + for state in report.v4.iter() { + if state.lost > args.icmp_loss_tolerance { + panic!( + "{} has loss = {} packets which is greater than tolerance {}", + state.dest, state.lost, args.icmp_loss_tolerance, + ); + } + if state.rx_count == 0 { + panic!("received no responses from {}", state.dest); + } + } + println!("all connectivity tests within loss tolerance"); + Ok(()) +} diff --git a/end-to-end-tests/src/bin/dhcp-server.rs b/end-to-end-tests/src/bin/dhcp-server.rs new file mode 100644 index 00000000000..65fe82fe00a --- /dev/null +++ b/end-to-end-tests/src/bin/dhcp-server.rs @@ -0,0 +1,122 @@ +//! This is a dirt simple DHCP server for handing out addresses in a given +//! range. Leases do not expire. If the server runs out of addresses, it +//! panics. This is a stopgap program to hand out addresses to VMs in CI. It's +//! in no way meant to be a generic DHCP server solution. + +use anyhow::Result; +use clap::Parser; +use dhcproto::{ + v4::{ + self, Decodable, Decoder, DhcpOptions, Encodable, Message, Opcode, + OptionCode, + }, + Encoder, +}; +use end_to_end_tests::helpers::cli::oxide_cli_style; +use macaddr::MacAddr6; +use std::{ + collections::HashMap, + net::{Ipv4Addr, SocketAddrV4, UdpSocket}, +}; + +#[derive(Parser, Debug)] +#[clap(version, about, long_about = None, styles = oxide_cli_style())] +struct Cli { + /// First address in DHCP range. + begin: Ipv4Addr, + /// Last address in DHCP range. + end: Ipv4Addr, + /// Default router to advertise. + router: Ipv4Addr, + /// Server address to advertise. + server: Ipv4Addr, +} + +pub fn main() -> Result<()> { + let cli = Cli::parse(); + let mut current = cli.begin; + let mut assignments = HashMap::::new(); + + let sock = UdpSocket::bind("0.0.0.0:67")?; + loop { + let mut buf = [0; 8192]; + let (n, src) = sock.recv_from(&mut buf)?; + + let mut msg = match Message::decode(&mut Decoder::new(&buf[..n])) { + Ok(msg) => msg, + Err(e) => { + eprintln!("message decode error {e}"); + continue; + } + }; + + println!("request: {msg:#?}"); + + if msg.opcode() != Opcode::BootRequest { + continue; + } + + let mac: [u8; 6] = msg.chaddr()[0..6].try_into().unwrap(); + let mac = MacAddr6::from(mac); + + let ip = match assignments.get(&mac) { + Some(ip) => *ip, + None => { + assignments.insert(mac, current); + let ip = current; + current = Ipv4Addr::from(u32::from(current) + 1); + if u32::from(current) > u32::from(cli.end) { + panic!("address exhaustion"); + } + ip + } + }; + + let mut opts = DhcpOptions::new(); + match msg.opts().get(OptionCode::MessageType) { + Some(v4::DhcpOption::MessageType(v4::MessageType::Discover)) => { + opts.insert(v4::DhcpOption::MessageType( + v4::MessageType::Offer, + )); + } + Some(v4::DhcpOption::MessageType(v4::MessageType::Request)) => { + opts.insert(v4::DhcpOption::MessageType(v4::MessageType::Ack)); + } + Some(mtype) => eprintln!("unexpected message type {mtype:?}"), + None => { + eprintln!("no message type"); + } + }; + // hardcode to /24 + opts.insert(v4::DhcpOption::SubnetMask(Ipv4Addr::new( + 255, 255, 255, 0, + ))); + opts.insert(v4::DhcpOption::DomainNameServer(vec![Ipv4Addr::new( + 1, 1, 1, 1, + )])); + opts.insert(v4::DhcpOption::ServerIdentifier(cli.server)); + opts.insert(v4::DhcpOption::AddressLeaseTime(60 * 60 * 24 * 30)); + opts.insert(v4::DhcpOption::Router(vec![cli.router])); + if let Some(opt) = msg.opts().get(OptionCode::ClientIdentifier) { + opts.insert(opt.clone()); + } + + msg.set_opcode(Opcode::BootReply); + msg.set_siaddr(cli.server); + msg.set_yiaddr(ip); + msg.set_opts(opts); + + let mut buf = Vec::new(); + let mut e = Encoder::new(&mut buf); + if let Err(e) = msg.encode(&mut e) { + eprintln!("encode reply error: {e}"); + continue; + } + + let dst = + SocketAddrV4::new(Ipv4Addr::new(255, 255, 255, 255), src.port()); + if let Err(e) = sock.send_to(&buf, dst) { + eprintln!("send reply error: {e}"); + } + } +} diff --git a/end-to-end-tests/src/helpers/cli.rs b/end-to-end-tests/src/helpers/cli.rs new file mode 100644 index 00000000000..0d92bc08d6e --- /dev/null +++ b/end-to-end-tests/src/helpers/cli.rs @@ -0,0 +1,21 @@ +pub fn oxide_cli_style() -> clap::builder::Styles { + clap::builder::Styles::styled() + .header(anstyle::Style::new().bold().underline().fg_color(Some( + anstyle::Color::Rgb(anstyle::RgbColor(245, 207, 101)), + ))) + .literal(anstyle::Style::new().bold().fg_color(Some( + anstyle::Color::Rgb(anstyle::RgbColor(72, 213, 151)), + ))) + .invalid(anstyle::Style::new().bold().fg_color(Some( + anstyle::Color::Rgb(anstyle::RgbColor(72, 213, 151)), + ))) + .valid(anstyle::Style::new().bold().fg_color(Some( + anstyle::Color::Rgb(anstyle::RgbColor(72, 213, 151)), + ))) + .usage(anstyle::Style::new().bold().fg_color(Some( + anstyle::Color::Rgb(anstyle::RgbColor(245, 207, 101)), + ))) + .error(anstyle::Style::new().bold().fg_color(Some( + anstyle::Color::Rgb(anstyle::RgbColor(232, 104, 134)), + ))) +} diff --git a/end-to-end-tests/src/helpers/icmp.rs b/end-to-end-tests/src/helpers/icmp.rs new file mode 100644 index 00000000000..26373121a33 --- /dev/null +++ b/end-to-end-tests/src/helpers/icmp.rs @@ -0,0 +1,271 @@ +use colored::*; +use internet_checksum::Checksum; +use serde::{Deserialize, Serialize}; +use socket2::{Domain, Protocol, SockAddr, Socket, Type}; +use std::collections::BTreeMap; +use std::mem::MaybeUninit; +use std::net::{Ipv4Addr, SocketAddrV4}; +use std::sync::{Arc, Mutex}; +use std::thread::{sleep, spawn}; +use std::time::{Duration, Instant}; + +const HIDE_CURSOR: &str = "\x1b[?25l"; +const SHOW_CURSOR: &str = "\x1b[?25h"; + +const ICMP_ECHO_TYPE: u8 = 8; +const ICMP_ECHO_CODE: u8 = 0; + +#[derive(Debug, Serialize, Deserialize)] +struct EchoRequest { + typ: u8, + code: u8, + checksum: u16, + identifier: u16, + sequence_number: u16, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct Report { + pub v4: Vec, +} + +pub fn ping4_test_run( + dst: &[Ipv4Addr], + ttl: u32, + pps: usize, + duration: Duration, +) -> Report { + let p = Pinger4::new(ttl); + for dst in dst { + p.add_target(rand::random(), *dst, pps, duration); + } + print!("{HIDE_CURSOR}"); + p.clone().show(); + sleep(duration + Duration::from_millis(250)); + for _ in 0..p.targets.lock().unwrap().len() { + println!(); + } + print!("{SHOW_CURSOR}"); + let v4 = p.targets.lock().unwrap().values().copied().collect(); + Report { v4 } +} + +struct Pinger4 { + sock: Socket, + targets: Mutex>, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub struct Ping4State { + pub dest: Ipv4Addr, + pub low: Duration, + pub high: Duration, + pub sum: Duration, + pub current: Option, + pub lost: usize, + pub tx_count: u16, + pub rx_count: u16, + #[serde(skip)] + pub sent: Option, + #[serde(skip)] + pub first: u16, +} + +impl Ping4State { + fn new(addr: Ipv4Addr) -> Self { + Self { + dest: addr, + low: Duration::default(), + high: Duration::default(), + sum: Duration::default(), + current: None, + lost: 0, + tx_count: 0, + rx_count: 0, + sent: None, + first: 0, + } + } +} + +impl Pinger4 { + fn new(ttl: u32) -> Arc { + let sock = Socket::new(Domain::IPV4, Type::RAW, Some(Protocol::ICMPV4)) + .unwrap(); + sock.set_ttl(ttl).unwrap(); + let s = Arc::new(Self { sock, targets: Mutex::new(BTreeMap::new()) }); + s.clone().rx(); + s.clone().expire(); + s + } + + fn show(self: Arc) { + println!( + "{:15} {:7} {:7} {:7} {:7} {:7} {:7} {}", + "addr".dimmed(), + "low".dimmed(), + "avg".dimmed(), + "high".dimmed(), + "last".dimmed(), + "sent".dimmed(), + "received".dimmed(), + "lost".dimmed() + ); + spawn(move || loop { + for (_id, t) in self.targets.lock().unwrap().iter() { + println!( + "{:15} {:7} {:7} {:7} {:7} {:7} {:7} {:<7}", + t.dest.to_string().cyan(), + format!("{:.3}", (t.low.as_micros() as f32 / 1000.0)), + if t.rx_count == 0 { + format!("{:.3}", 0.0) + } else { + format!( + "{:.3}", + (t.sum.as_micros() as f32 + / 1000.0 + / t.rx_count as f32) + ) + }, + format!("{:.3}", (t.high.as_micros() as f32 / 1000.0)), + match t.current { + Some(dt) => + format!("{:.3}", (dt.as_micros() as f32 / 1000.0)), + None => format!("{:.3}", 0.0), + }, + t.tx_count.to_string(), + t.rx_count.to_string(), + if t.lost == 0 { + t.lost.to_string().green() + } else { + t.lost.to_string().red() + }, + ); + } + for _ in 0..self.targets.lock().unwrap().len() { + print!("\x1b[A"); + } + print!("\r"); + sleep(Duration::from_millis(100)); + }); + } + + fn add_target( + self: &Arc, + id: u16, + addr: Ipv4Addr, + pps: usize, + duration: Duration, + ) { + self.targets.lock().unwrap().insert(id, Ping4State::new(addr)); + let interval = Duration::from_secs_f64(1.0 / pps as f64); + self.clone().tx(id, addr, interval, duration); + } + + fn tx( + self: Arc, + id: u16, + dst: Ipv4Addr, + interval: Duration, + duration: Duration, + ) { + let mut seq = 0u16; + let stop = Instant::now() + duration; + spawn(move || loop { + if Instant::now() >= stop { + break; + } + let mut c = Checksum::new(); + c.add_bytes(&[ICMP_ECHO_TYPE, ICMP_ECHO_CODE]); + c.add_bytes(&id.to_be_bytes()); + c.add_bytes(&seq.to_be_bytes()); + let pkt = EchoRequest { + typ: ICMP_ECHO_TYPE, + code: ICMP_ECHO_CODE, + checksum: u16::from_be_bytes(c.checksum()), + identifier: id, + sequence_number: seq, + }; + let msg = ispf::to_bytes_be(&pkt).unwrap(); + + match self.targets.lock().unwrap().get_mut(&id) { + Some(ref mut tgt) => { + tgt.sent = Some(Instant::now()); + tgt.tx_count = seq; + let sa: SockAddr = SocketAddrV4::new(dst, 0).into(); + self.sock.send_to(&msg, &sa).unwrap(); + } + None => continue, + } + + seq += 1; + sleep(interval); + }); + } + + fn expire(self: Arc) { + spawn(move || loop { + for (_, tgt) in self.targets.lock().unwrap().iter_mut() { + // Only start considering packets lost after the first packet + // is received. This allows the remote endpoint time to come + // online without considering initial packets lost while it's + // coming up. + if tgt.first != 0 { + tgt.lost = tgt + .tx_count + .saturating_sub(tgt.first) + .saturating_sub(tgt.rx_count) + as usize; + } + } + sleep(Duration::from_millis(10)); + }); + } + + fn rx(self: Arc) { + spawn(move || loop { + let mut ubuf = [MaybeUninit::new(0); 10240]; + if let Ok((sz, _)) = self.sock.recv_from(&mut ubuf) { + let buf = unsafe { &slice_assume_init_ref(&ubuf) }; + let msg: EchoRequest = match ispf::from_bytes_be(&buf[20..sz]) { + Ok(msg) => msg, + Err(_) => { + continue; + } + }; + match self.targets.lock().unwrap().get_mut(&msg.identifier) { + Some(ref mut target) => match target.sent { + Some(ref mut sent) => { + let t1 = Instant::now(); + let dt = t1 - *sent; + target.current = Some(dt); + if target.low == Duration::ZERO || dt < target.low { + target.low = dt; + } + if dt > target.high { + target.high = dt; + } + target.sum += dt; + target.current = Some(dt); + target.rx_count += 1; + if target.first == 0 { + target.first = target.tx_count; + } + } + None => { + println!("no sent"); + } + }, + None => { + println!("no target {}", msg.identifier); + } + } + } + }); + } +} + +// TODO: Use `MaybeUninit::slice_assume_init_ref` once it has stabilized +unsafe fn slice_assume_init_ref(slice: &[MaybeUninit]) -> &[T] { + unsafe { &*(slice as *const [MaybeUninit] as *const [T]) } +} diff --git a/end-to-end-tests/src/helpers/mod.rs b/end-to-end-tests/src/helpers/mod.rs index db039735556..b7cd6d55749 100644 --- a/end-to-end-tests/src/helpers/mod.rs +++ b/end-to-end-tests/src/helpers/mod.rs @@ -1,4 +1,6 @@ +pub mod cli; pub mod ctx; +pub mod icmp; use self::ctx::nexus_addr; use anyhow::{bail, Result}; diff --git a/illumos-utils/src/running_zone.rs b/illumos-utils/src/running_zone.rs index ea80a6d34b6..e739e4c20d2 100644 --- a/illumos-utils/src/running_zone.rs +++ b/illumos-utils/src/running_zone.rs @@ -915,6 +915,10 @@ impl RunningZone { &self.inner.links } + pub fn links_mut(&mut self) -> &mut Vec { + &mut self.inner.links + } + /// Return the running processes associated with all the SMF services this /// zone is intended to run. pub fn service_processes( diff --git a/installinator/src/dispatch.rs b/installinator/src/dispatch.rs index 9bec14664c8..1fcf351a9b7 100644 --- a/installinator/src/dispatch.rs +++ b/installinator/src/dispatch.rs @@ -151,13 +151,13 @@ struct InstallOpts { #[clap(long)] install_on_gimlet: bool, - //TODO(ry) this probably needs to get plumbed somewhere instead of relying + //TODO this probably needs to get plumbed somewhere instead of relying //on a default. /// The first gimlet data link to use. #[clap(long, default_value = "cxgbe0")] data_link0: String, - //TODO(ry) this probably needs to get plumbed somewhere instead of relying + //TODO this probably needs to get plumbed somewhere instead of relying //on a default. /// The second gimlet data link to use. #[clap(long, default_value = "cxgbe1")] diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index 25833ec104e..6f994c6cf28 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -32,6 +32,7 @@ http.workspace = true hyper.workspace = true internal-dns.workspace = true ipnetwork.workspace = true +itertools.workspace = true macaddr.workspace = true mime_guess.workspace = true # Not under "dev-dependencies"; these also need to be implemented for @@ -93,7 +94,6 @@ diesel.workspace = true dns-server.workspace = true expectorate.workspace = true hyper-rustls.workspace = true -itertools.workspace = true gateway-messages.workspace = true gateway-test-utils.workspace = true hubtools.workspace = true diff --git a/nexus/db-model/src/external_ip.rs b/nexus/db-model/src/external_ip.rs index 1a755f0396b..31415d574b2 100644 --- a/nexus/db-model/src/external_ip.rs +++ b/nexus/db-model/src/external_ip.rs @@ -21,6 +21,7 @@ use nexus_types::external_api::views; use omicron_common::address::NUM_SOURCE_NAT_PORTS; use omicron_common::api::external::Error; use omicron_common::api::external::IdentityMetadata; +use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; use std::convert::TryFrom; @@ -32,7 +33,7 @@ impl_enum_type!( #[diesel(postgres_type(name = "ip_kind"))] pub struct IpKindEnum; - #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, PartialEq)] + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, PartialEq, Serialize, Deserialize, JsonSchema)] #[diesel(sql_type = IpKindEnum)] pub enum IpKind; @@ -51,7 +52,16 @@ impl_enum_type!( /// addresses and port ranges, while source NAT IPs are not discoverable in the /// API at all, and only provide outbound connectivity to instances, not /// inbound. -#[derive(Debug, Clone, Selectable, Queryable, Insertable)] +#[derive( + Debug, + Clone, + Selectable, + Queryable, + Insertable, + Serialize, + Deserialize, + JsonSchema, +)] #[diesel(table_name = external_ip)] pub struct ExternalIp { pub id: Uuid, @@ -120,6 +130,7 @@ pub struct IncompleteExternalIp { time_created: DateTime, kind: IpKind, is_service: bool, + is_probe: bool, parent_id: Option, pool_id: Uuid, project_id: Option, @@ -142,6 +153,7 @@ impl IncompleteExternalIp { time_created: Utc::now(), kind: IpKind::SNat, is_service: false, + is_probe: false, parent_id: Some(instance_id), pool_id, project_id: None, @@ -158,6 +170,28 @@ impl IncompleteExternalIp { time_created: Utc::now(), kind: IpKind::Ephemeral, is_service: false, + is_probe: false, + parent_id: Some(instance_id), + pool_id, + project_id: None, + explicit_ip: None, + explicit_port_range: None, + } + } + + pub fn for_ephemeral_probe( + id: Uuid, + instance_id: Uuid, + pool_id: Uuid, + ) -> Self { + Self { + id, + name: None, + description: None, + time_created: Utc::now(), + kind: IpKind::Ephemeral, + is_service: false, + is_probe: true, parent_id: Some(instance_id), pool_id, project_id: None, @@ -180,6 +214,7 @@ impl IncompleteExternalIp { time_created: Utc::now(), kind: IpKind::Floating, is_service: false, + is_probe: false, parent_id: None, pool_id, project_id: Some(project_id), @@ -203,6 +238,7 @@ impl IncompleteExternalIp { time_created: Utc::now(), kind: IpKind::Floating, is_service: false, + is_probe: false, parent_id: None, pool_id, project_id: Some(project_id), @@ -226,6 +262,7 @@ impl IncompleteExternalIp { time_created: Utc::now(), kind: IpKind::Floating, is_service: true, + is_probe: false, parent_id: Some(service_id), pool_id, project_id: None, @@ -255,6 +292,7 @@ impl IncompleteExternalIp { time_created: Utc::now(), kind: IpKind::SNat, is_service: true, + is_probe: false, parent_id: Some(service_id), pool_id, project_id: None, @@ -277,6 +315,7 @@ impl IncompleteExternalIp { time_created: Utc::now(), kind: IpKind::Floating, is_service: true, + is_probe: false, parent_id: Some(service_id), pool_id, project_id: None, @@ -293,6 +332,7 @@ impl IncompleteExternalIp { time_created: Utc::now(), kind: IpKind::SNat, is_service: true, + is_probe: false, parent_id: Some(service_id), pool_id, project_id: None, @@ -325,6 +365,10 @@ impl IncompleteExternalIp { &self.is_service } + pub fn is_probe(&self) -> &bool { + &self.is_probe + } + pub fn parent_id(&self) -> &Option { &self.parent_id } diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 2c3433b2d31..0072a9677a6 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -44,6 +44,7 @@ mod network_interface; mod oximeter_info; mod physical_disk; mod physical_disk_kind; +mod probe; mod producer_endpoint; mod project; mod semver_version; @@ -139,6 +140,7 @@ pub use network_interface::*; pub use oximeter_info::*; pub use physical_disk::*; pub use physical_disk_kind::*; +pub use probe::*; pub use producer_endpoint::*; pub use project::*; pub use quota::*; diff --git a/nexus/db-model/src/network_interface.rs b/nexus/db-model/src/network_interface.rs index ada21485165..2f8793767a5 100644 --- a/nexus/db-model/src/network_interface.rs +++ b/nexus/db-model/src/network_interface.rs @@ -28,6 +28,7 @@ impl_enum_type! { Instance => b"instance" Service => b"service" + Probe => b"probe" } /// Generic Network Interface DB model. @@ -56,6 +57,40 @@ pub struct NetworkInterface { pub primary: bool, } +impl Into + for NetworkInterface +{ + fn into(self) -> omicron_common::api::internal::shared::NetworkInterface { + omicron_common::api::internal::shared::NetworkInterface { + id: self.id(), + kind: match self.kind { + NetworkInterfaceKind::Instance => + omicron_common::api::internal::shared::NetworkInterfaceKind::Instance { + id: self.parent_id + }, + NetworkInterfaceKind::Service => + omicron_common::api::internal::shared::NetworkInterfaceKind::Service { + id: self.parent_id + }, + NetworkInterfaceKind::Probe => + omicron_common::api::internal::shared::NetworkInterfaceKind::Probe { + id: self.parent_id + }, + }, + name: self.name().clone(), + ip: self.ip.ip(), + mac: self.mac.into(), + subnet: ipnetwork::IpNetwork::new( + self.ip.network(), + 24, //TODO + ).unwrap().into(), + vni: omicron_common::api::external::Vni::try_from(0).unwrap(), //TODO + primary: self.primary, + slot: self.slot.try_into().unwrap(), + } + } +} + /// Instance Network Interface DB model. /// /// The underlying "table" (`instance_network_interface`) is actually a view @@ -287,6 +322,25 @@ impl IncompleteNetworkInterface { mac, ) } + + pub fn new_probe( + interface_id: Uuid, + probe_id: Uuid, + subnet: VpcSubnet, + identity: external::IdentityMetadataCreateParams, + ip: Option, + mac: Option, + ) -> Result { + Self::new( + interface_id, + NetworkInterfaceKind::Probe, + probe_id, + subnet, + identity, + ip, + mac, + ) + } } /// Describes a set of updates for the [`NetworkInterface`] model. diff --git a/nexus/db-model/src/probe.rs b/nexus/db-model/src/probe.rs new file mode 100644 index 00000000000..be3576dfa0b --- /dev/null +++ b/nexus/db-model/src/probe.rs @@ -0,0 +1,50 @@ +use crate::schema::probe; +use db_macros::Resource; +use nexus_types::external_api::params; +use nexus_types::identity::Resource; +use omicron_common::api::external; +use omicron_common::api::external::IdentityMetadataCreateParams; +use serde::Deserialize; +use serde::Serialize; +use uuid::Uuid; + +#[derive( + Queryable, + Insertable, + Selectable, + Clone, + Debug, + Resource, + Serialize, + Deserialize, +)] +#[diesel(table_name = probe)] +pub struct Probe { + #[diesel(embed)] + pub identity: ProbeIdentity, + + pub project_id: Uuid, + pub sled: Uuid, +} + +impl Probe { + pub fn from_create(p: ¶ms::ProbeCreate, project_id: Uuid) -> Self { + Self { + identity: ProbeIdentity::new( + Uuid::new_v4(), + IdentityMetadataCreateParams { + name: p.identity.name.clone(), + description: p.identity.description.clone(), + }, + ), + project_id, + sled: p.sled, + } + } +} + +impl Into for Probe { + fn into(self) -> external::Probe { + external::Probe { identity: self.identity().clone(), sled: self.sled } + } +} diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 7f4bf514873..ef4efd5b4f3 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -13,7 +13,7 @@ use omicron_common::api::external::SemverVersion; /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(21, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(22, 0, 0); table! { disk (id) { @@ -567,6 +567,8 @@ table! { last_port -> Int4, project_id -> Nullable, + + is_probe -> Bool, } } @@ -1338,6 +1340,19 @@ table! { } } +table! { + probe (id) { + id -> Uuid, + name -> Text, + description -> Text, + time_created -> Timestamptz, + time_modified -> Timestamptz, + time_deleted -> Nullable, + project_id -> Uuid, + sled -> Uuid, + } +} + table! { db_metadata (singleton) { singleton -> Bool, diff --git a/nexus/db-model/src/service_kind.rs b/nexus/db-model/src/service_kind.rs index 4210c3ee205..ac9a86db09b 100644 --- a/nexus/db-model/src/service_kind.rs +++ b/nexus/db-model/src/service_kind.rs @@ -31,6 +31,7 @@ impl_enum_type!( Tfport => b"tfport" Ntp => b"ntp" Mgd => b"mgd" + Probe => b"probe" ); impl TryFrom for ServiceUsingCertificate { @@ -90,6 +91,9 @@ impl From for ServiceKind { ServiceKind::Ntp } internal_api::params::ServiceKind::Mgd => ServiceKind::Mgd, + internal_api::params::ServiceKind::Probe { .. } => { + ServiceKind::Probe + } } } } diff --git a/nexus/db-model/src/unsigned.rs b/nexus/db-model/src/unsigned.rs index b4e9db2308a..920cad1cff9 100644 --- a/nexus/db-model/src/unsigned.rs +++ b/nexus/db-model/src/unsigned.rs @@ -7,6 +7,7 @@ use diesel::deserialize::{self, FromSql}; use diesel::pg::Pg; use diesel::serialize::{self, ToSql}; use diesel::sql_types; +use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::convert::TryFrom; @@ -76,6 +77,7 @@ where FromSqlRow, Serialize, Deserialize, + JsonSchema, )] #[diesel(sql_type = sql_types::Int4)] #[repr(transparent)] diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml index d5320be7336..4c0140b3607 100644 --- a/nexus/db-queries/Cargo.toml +++ b/nexus/db-queries/Cargo.toml @@ -35,6 +35,7 @@ pq-sys = "*" rand.workspace = true ref-cast.workspace = true samael.workspace = true +schemars.workspace = true serde.workspace = true serde_json.workspace = true serde_urlencoded.workspace = true diff --git a/nexus/db-queries/src/db/datastore/external_ip.rs b/nexus/db-queries/src/db/datastore/external_ip.rs index 2adeebd8198..b40f005ed66 100644 --- a/nexus/db-queries/src/db/datastore/external_ip.rs +++ b/nexus/db-queries/src/db/datastore/external_ip.rs @@ -59,6 +59,32 @@ impl DataStore { self.allocate_external_ip(opctx, data).await } + /// Create an Ephemeral IP address for a probe. + pub async fn allocate_probe_ephemeral_ip( + &self, + opctx: &OpContext, + ip_id: Uuid, + probe_id: Uuid, + pool_name: Option, + ) -> CreateResult { + let pool = match pool_name { + Some(name) => { + let (.., pool) = LookupPath::new(opctx, &self) + .ip_pool_name(&name) + .fetch_for(authz::Action::CreateChild) + .await?; + pool + } + // If no name given, use the default pool + None => self.ip_pools_fetch_default(&opctx).await?, + }; + + let pool_id = pool.identity.id; + let data = + IncompleteExternalIp::for_ephemeral_probe(ip_id, probe_id, pool_id); + self.allocate_external_ip(opctx, data).await + } + /// Create an Ephemeral IP address for an instance. pub async fn allocate_instance_ephemeral_ip( &self, @@ -356,6 +382,7 @@ impl DataStore { diesel::update(dsl::external_ip) .filter(dsl::time_deleted.is_null()) .filter(dsl::is_service.eq(false)) + .filter(dsl::is_probe.eq(false)) .filter(dsl::parent_id.eq(instance_id)) .filter(dsl::kind.ne(IpKind::Floating)) .set(dsl::time_deleted.eq(now)) @@ -364,6 +391,30 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + /// Delete all external IP addresses associated with the provided probe + /// ID. + /// + /// This method returns the number of records deleted, rather than the usual + /// `DeleteResult`. That's mostly useful for tests, but could be important + /// if callers have some invariants they'd like to check. + pub async fn deallocate_external_ip_by_probe_id( + &self, + opctx: &OpContext, + probe_id: Uuid, + ) -> Result { + use db::schema::external_ip::dsl; + let now = Utc::now(); + diesel::update(dsl::external_ip) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::is_probe.eq(true)) + .filter(dsl::parent_id.eq(probe_id)) + .filter(dsl::kind.ne(IpKind::Ephemeral)) + .set(dsl::time_deleted.eq(now)) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + /// Detach an individual Floating IP address from its parent instance. /// /// As in `deallocate_external_ip_by_instance_id`, this method returns the @@ -394,6 +445,7 @@ impl DataStore { use db::schema::external_ip::dsl; dsl::external_ip .filter(dsl::is_service.eq(false)) + .filter(dsl::is_probe.eq(false)) .filter(dsl::parent_id.eq(instance_id)) .filter(dsl::time_deleted.is_null()) .select(ExternalIp::as_select()) @@ -402,6 +454,23 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + /// Fetch all external IP addresses of any kind for the provided probe + pub async fn probe_lookup_external_ips( + &self, + opctx: &OpContext, + probe_id: Uuid, + ) -> LookupResult> { + use db::schema::external_ip::dsl; + dsl::external_ip + .filter(dsl::is_probe.eq(true)) + .filter(dsl::parent_id.eq(probe_id)) + .filter(dsl::time_deleted.is_null()) + .select(ExternalIp::as_select()) + .get_results_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + /// Fetch all Floating IP addresses for the provided project. pub async fn floating_ips_list( &self, diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 93486771b5f..74b7946d7a2 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -67,6 +67,7 @@ mod ipv4_nat_entry; mod network_interface; mod oximeter; mod physical_disk; +mod probe; mod project; mod quota; mod rack; @@ -101,6 +102,7 @@ pub use db_metadata::{ pub use dns::DnsVersionUpdateBuilder; pub use instance::InstanceAndActiveVmm; pub use inventory::DataStoreInventoryTest; +pub use probe::ProbeInfo; pub use rack::RackInit; pub use silo::Discoverability; pub use switch_port::SwitchPortSettingsCombinedResult; diff --git a/nexus/db-queries/src/db/datastore/network_interface.rs b/nexus/db-queries/src/db/datastore/network_interface.rs index 4d4e43c9a7c..8b9806bb7fc 100644 --- a/nexus/db-queries/src/db/datastore/network_interface.rs +++ b/nexus/db-queries/src/db/datastore/network_interface.rs @@ -34,11 +34,11 @@ use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; +use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; use omicron_common::api::external::UpdateResult; use ref_cast::RefCast; -use sled_agent_client::types as sled_client_types; use uuid::Uuid; /// OPTE requires information that's currently split across the network @@ -58,8 +58,10 @@ struct NicInfo { slot: i16, } -impl From for sled_client_types::NetworkInterface { - fn from(nic: NicInfo) -> sled_client_types::NetworkInterface { +impl From for omicron_common::api::internal::shared::NetworkInterface { + fn from( + nic: NicInfo, + ) -> omicron_common::api::internal::shared::NetworkInterface { let ip_subnet = if nic.ip.is_ipv4() { external::IpNet::V4(nic.ipv4_block.0) } else { @@ -67,20 +69,23 @@ impl From for sled_client_types::NetworkInterface { }; let kind = match nic.kind { NetworkInterfaceKind::Instance => { - sled_client_types::NetworkInterfaceKind::Instance(nic.parent_id) + omicron_common::api::internal::shared::NetworkInterfaceKind::Instance{ id: nic.parent_id } } NetworkInterfaceKind::Service => { - sled_client_types::NetworkInterfaceKind::Service(nic.parent_id) + omicron_common::api::internal::shared::NetworkInterfaceKind::Service{ id: nic.parent_id } + } + NetworkInterfaceKind::Probe => { + omicron_common::api::internal::shared::NetworkInterfaceKind::Probe{ id: nic.parent_id } } }; - sled_client_types::NetworkInterface { + omicron_common::api::internal::shared::NetworkInterface { id: nic.id, kind, - name: sled_client_types::Name::from(&nic.name.0), + name: nic.name.into(), ip: nic.ip.ip(), - mac: sled_client_types::MacAddr::from(nic.mac.0), - subnet: sled_client_types::IpNet::from(ip_subnet), - vni: sled_client_types::Vni::from(nic.vni.0), + mac: nic.mac.into(), + subnet: ip_subnet, + vni: nic.vni.0, primary: nic.primary, slot: u8::try_from(nic.slot).unwrap(), } @@ -107,6 +112,14 @@ impl DataStore { self.instance_create_network_interface_raw(&opctx, interface).await } + pub async fn probe_create_network_interface( + &self, + opctx: &OpContext, + interface: IncompleteNetworkInterface, + ) -> Result { + self.create_network_interface_raw(&opctx, interface).await + } + pub(crate) async fn instance_create_network_interface_raw( &self, opctx: &OpContext, @@ -216,6 +229,33 @@ impl DataStore { Ok(()) } + /// Delete all network interfaces attached to the given probe. + pub async fn probe_delete_all_network_interfaces( + &self, + opctx: &OpContext, + probe_id: Uuid, + ) -> DeleteResult { + use db::schema::network_interface::dsl; + let now = Utc::now(); + diesel::update(dsl::network_interface) + .filter(dsl::parent_id.eq(probe_id)) + .filter(dsl::kind.eq(NetworkInterfaceKind::Probe)) + .filter(dsl::time_deleted.is_null()) + .set(dsl::time_deleted.eq(now)) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::Probe, + LookupType::ById(probe_id), + ), + ) + })?; + Ok(()) + } + /// Delete an `InstanceNetworkInterface` attached to a provided instance. /// /// Note that the primary interface for an instance cannot be deleted if @@ -258,7 +298,8 @@ impl DataStore { &self, opctx: &OpContext, partial_query: BoxedQuery, - ) -> ListResultVec { + ) -> ListResultVec + { use db::schema::network_interface; use db::schema::vpc; use db::schema::vpc_subnet; @@ -294,7 +335,7 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; Ok(rows .into_iter() - .map(sled_client_types::NetworkInterface::from) + .map(omicron_common::api::internal::shared::NetworkInterface::from) .collect()) } @@ -304,7 +345,8 @@ impl DataStore { &self, opctx: &OpContext, authz_instance: &authz::Instance, - ) -> ListResultVec { + ) -> ListResultVec + { opctx.authorize(authz::Action::ListChildren, authz_instance).await?; use db::schema::network_interface; @@ -320,13 +362,31 @@ impl DataStore { .await } + pub async fn derive_probe_network_interface_info( + &self, + opctx: &OpContext, + probe_id: Uuid, + ) -> ListResultVec + { + use db::schema::network_interface; + self.derive_network_interface_info( + opctx, + network_interface::table + .filter(network_interface::parent_id.eq(probe_id)) + .filter(network_interface::kind.eq(NetworkInterfaceKind::Probe)) + .into_boxed(), + ) + .await + } + /// Return information about all VNICs connected to a VPC required /// for the sled agent to instantiate firewall rules via OPTE. pub async fn derive_vpc_network_interface_info( &self, opctx: &OpContext, authz_vpc: &authz::Vpc, - ) -> ListResultVec { + ) -> ListResultVec + { opctx.authorize(authz::Action::ListChildren, authz_vpc).await?; use db::schema::network_interface; @@ -345,7 +405,8 @@ impl DataStore { &self, opctx: &OpContext, authz_subnet: &authz::VpcSubnet, - ) -> ListResultVec { + ) -> ListResultVec + { opctx.authorize(authz::Action::ListChildren, authz_subnet).await?; use db::schema::network_interface; @@ -388,6 +449,25 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + /// Get network interface associated with a given probe. + pub async fn probe_get_network_interface( + &self, + opctx: &OpContext, + probe_id: Uuid, + ) -> LookupResult { + use db::schema::network_interface::dsl; + + dsl::network_interface + .filter(dsl::time_deleted.is_null()) + .filter(dsl::parent_id.eq(probe_id)) + .select(NetworkInterface::as_select()) + .first_async::( + &*self.pool_connection_authorized(opctx).await?, + ) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + /// Update a network interface associated with a given instance. pub async fn instance_update_network_interface( &self, diff --git a/nexus/db-queries/src/db/datastore/probe.rs b/nexus/db-queries/src/db/datastore/probe.rs new file mode 100644 index 00000000000..721b2eedc43 --- /dev/null +++ b/nexus/db-queries/src/db/datastore/probe.rs @@ -0,0 +1,375 @@ +use std::net::IpAddr; + +use crate::authz; +use crate::context::OpContext; +use crate::db; +use crate::db::error::public_error_from_diesel; +use crate::db::error::ErrorHandler; +use crate::db::lookup::LookupPath; +use crate::db::model::Name; +use crate::db::pagination::paginated; +use async_bb8_diesel::AsyncRunQueryDsl; +use chrono::Utc; +use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; +use nexus_db_model::IncompleteNetworkInterface; +use nexus_db_model::Probe; +use nexus_types::external_api::params; +use nexus_types::identity::Resource; +use omicron_common::api::external::http_pagination::PaginatedBy; +use omicron_common::api::external::CreateResult; +use omicron_common::api::external::DataPageParams; +use omicron_common::api::external::DeleteResult; +use omicron_common::api::external::IdentityMetadataCreateParams; +use omicron_common::api::external::ListResultVec; +use omicron_common::api::external::LookupResult; +use omicron_common::api::external::LookupType; +use omicron_common::api::external::NameOrId; +use omicron_common::api::external::ResourceType; +use omicron_common::api::internal::shared::NetworkInterface; +use ref_cast::RefCast; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +#[derive(Debug, Clone, JsonSchema, Serialize, Deserialize)] +pub struct ProbeInfo { + pub id: Uuid, + pub name: Name, + sled: Uuid, + pub external_ips: Vec, + pub interface: NetworkInterface, +} + +#[derive(Debug, Clone, JsonSchema, Serialize, Deserialize)] +pub struct ExternalIp { + ip: IpAddr, + first_port: u16, + last_port: u16, + kind: IpKind, +} + +impl From for ExternalIp { + fn from(value: nexus_db_model::ExternalIp) -> Self { + Self { + ip: value.ip.ip(), + first_port: value.first_port.0, + last_port: value.last_port.0, + kind: value.kind.into(), + } + } +} + +#[derive(Debug, Clone, JsonSchema, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum IpKind { + Snat, + Floating, + Ephemeral, +} + +impl From for IpKind { + fn from(value: nexus_db_model::IpKind) -> Self { + match value { + nexus_db_model::IpKind::SNat => Self::Snat, + nexus_db_model::IpKind::Ephemeral => Self::Ephemeral, + nexus_db_model::IpKind::Floating => Self::Floating, + } + } +} + +impl super::DataStore { + pub async fn probe_list( + &self, + opctx: &OpContext, + authz_project: &authz::Project, + pagparams: &PaginatedBy<'_>, + ) -> ListResultVec { + opctx.authorize(authz::Action::ListChildren, authz_project).await?; + + use db::schema::probe::dsl; + + let pool = self.pool_connection_authorized(opctx).await?; + + let probes = match pagparams { + PaginatedBy::Id(pagparams) => { + paginated(dsl::probe, dsl::id, &pagparams) + } + PaginatedBy::Name(pagparams) => paginated( + dsl::probe, + dsl::name, + &pagparams.map_name(|n| Name::ref_cast(n)), + ), + } + .filter(dsl::project_id.eq(authz_project.id())) + .filter(dsl::time_deleted.is_null()) + .select(Probe::as_select()) + .load_async(&*pool) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + let mut result = Vec::with_capacity(probes.len()); + + for probe in probes.into_iter() { + let external_ips = self + .probe_lookup_external_ips(opctx, probe.id()) + .await? + .into_iter() + .map(Into::into) + .collect(); + + let interface = + self.probe_get_network_interface(opctx, probe.id()).await?; + + let vni = self.resolve_vpc_to_vni(opctx, interface.vpc_id).await?; + + let mut interface: omicron_common::api::internal::shared::NetworkInterface = + interface.into(); + interface.vni = vni.0; + + result.push(ProbeInfo { + id: probe.id(), + name: probe.name().clone().into(), + sled: probe.sled, + interface, + external_ips, + }) + } + + Ok(result) + } + + pub async fn probe_list_for_sled( + &self, + sled: Uuid, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + use db::schema::probe::dsl; + + let pool = self.pool_connection_authorized(opctx).await?; + + let probes = paginated(dsl::probe, dsl::id, pagparams) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::sled.eq(sled)) + .select(Probe::as_select()) + .load_async(&*pool) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + let mut result = Vec::with_capacity(probes.len()); + + for probe in probes.into_iter() { + let external_ips = self + .probe_lookup_external_ips(opctx, probe.id()) + .await? + .into_iter() + .map(Into::into) + .collect(); + + let interface = + self.probe_get_network_interface(opctx, probe.id()).await?; + + let vni = self.resolve_vpc_to_vni(opctx, interface.vpc_id).await?; + + let mut interface: omicron_common::api::internal::shared::NetworkInterface = + interface.into(); + interface.vni = vni.0; + + result.push(ProbeInfo { + id: probe.id(), + name: probe.name().clone().into(), + sled: probe.sled, + interface, + external_ips, + }) + } + + Ok(result) + } + + pub async fn probe_get( + &self, + opctx: &OpContext, + authz_project: &authz::Project, + name_or_id: &NameOrId, + ) -> LookupResult { + use db::schema::probe; + use db::schema::probe::dsl; + let pool = self.pool_connection_authorized(opctx).await?; + + let name_or_id = name_or_id.clone(); + + let probe = match name_or_id { + NameOrId::Name(name) => dsl::probe + .filter(probe::name.eq(name.to_string())) + .filter(probe::time_deleted.is_null()) + .filter(probe::project_id.eq(authz_project.id())) + .select(Probe::as_select()) + .limit(1) + .first_async::(&*pool) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::Probe, + LookupType::ByName(name.to_string()), + ), + ) + }), + NameOrId::Id(id) => dsl::probe + .filter(probe::id.eq(id)) + .filter(probe::project_id.eq(authz_project.id())) + .select(Probe::as_select()) + .limit(1) + .first_async::(&*pool) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::Probe, + LookupType::ById(id), + ), + ) + }), + }?; + + let external_ips = self + .probe_lookup_external_ips(opctx, probe.id()) + .await? + .into_iter() + .map(Into::into) + .collect(); + + let interface = + self.probe_get_network_interface(opctx, probe.id()).await?; + + let vni = self.resolve_vpc_to_vni(opctx, interface.vpc_id).await?; + + let mut interface: omicron_common::api::internal::shared::NetworkInterface = + interface.into(); + interface.vni = vni.0; + + Ok(ProbeInfo { + id: probe.id(), + name: probe.name().clone().into(), + sled: probe.sled, + interface, + external_ips, + }) + } + + pub async fn probe_create( + &self, + opctx: &OpContext, + authz_project: &authz::Project, + new_probe: ¶ms::ProbeCreate, + ) -> CreateResult { + //TODO in transaction + use db::schema::probe::dsl; + let pool = self.pool_connection_authorized(opctx).await?; + + let probe = Probe::from_create(new_probe, authz_project.id()); + + let _eip = self + .allocate_probe_ephemeral_ip( + opctx, + Uuid::new_v4(), + probe.id(), + new_probe.ip_pool.clone().map(Into::into), + ) + .await?; + + let default_name = omicron_common::api::external::Name::try_from( + "default".to_string(), + ) + .unwrap(); + let internal_default_name = db::model::Name::from(default_name.clone()); + + let (.., db_subnet) = LookupPath::new(opctx, self) + .project_id(authz_project.id()) + .vpc_name(&internal_default_name) + .vpc_subnet_name(&internal_default_name) + .fetch() + .await?; + + let incomplete = IncompleteNetworkInterface::new_probe( + Uuid::new_v4(), + probe.id(), + db_subnet, //todo!(), //VpcSubnet + IdentityMetadataCreateParams { + name: probe.name().clone(), + description: format!( + "default primary interface for {}", + probe.name(), + ), + }, + None, //Request IP address assignment + None, //Request MAC address assignment + )?; + + let _ifx = self + .probe_create_network_interface(opctx, incomplete) + .await + .map_err(|e| { + omicron_common::api::external::Error::InternalError { + internal_message: format!( + "create network interface: {e:?}" + ), + } + })?; + + let result = diesel::insert_into(dsl::probe) + .values(probe.clone()) + .returning(Probe::as_returning()) + .get_result_async(&*pool) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(result) + } + + pub async fn probe_delete( + &self, + opctx: &OpContext, + authz_project: &authz::Project, + name_or_id: &NameOrId, + ) -> DeleteResult { + use db::schema::probe; + use db::schema::probe::dsl; + let pool = self.pool_connection_authorized(opctx).await?; + + let name_or_id = name_or_id.clone(); + + //TODO in transaction + let id = match name_or_id { + NameOrId::Name(name) => dsl::probe + .filter(probe::name.eq(name.to_string())) + .filter(probe::time_deleted.is_null()) + .filter(probe::project_id.eq(authz_project.id())) + .select(probe::id) + .limit(1) + .first_async::(&*pool) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?, + NameOrId::Id(id) => id, + }; + + self.deallocate_external_ip_by_probe_id(opctx, id).await?; + + self.probe_delete_all_network_interfaces(opctx, id).await?; + + diesel::update(dsl::probe) + .filter(dsl::id.eq(id)) + .filter(dsl::project_id.eq(authz_project.id())) + .set(dsl::time_deleted.eq(Utc::now())) + .execute_async(&*pool) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(()) + } +} diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index 4f0245e283f..f5c714faffd 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -1166,6 +1166,30 @@ impl DataStore { ) }) } + + /// Look up a VNI by VPC. + pub async fn resolve_vpc_to_vni( + &self, + opctx: &OpContext, + vpc_id: Uuid, + ) -> LookupResult { + use db::schema::vpc::dsl; + dsl::vpc + .filter(dsl::id.eq(vpc_id)) + .filter(dsl::time_deleted.is_null()) + .select(dsl::vni) + .get_result_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::Vpc, + LookupType::ByCompositeId("VNI".to_string()), + ), + ) + }) + } } #[cfg(test)] diff --git a/nexus/db-queries/src/db/queries/external_ip.rs b/nexus/db-queries/src/db/queries/external_ip.rs index 2a76ea7408b..3c2f2bcbd10 100644 --- a/nexus/db-queries/src/db/queries/external_ip.rs +++ b/nexus/db-queries/src/db/queries/external_ip.rs @@ -378,6 +378,12 @@ impl NextExternalIp { out.push_bind_param::, Option>(self.ip.project_id())?; out.push_sql(" AS "); out.push_identifier(dsl::project_id::NAME)?; + out.push_sql(", "); + + // is_probe flag + out.push_bind_param::(self.ip.is_probe())?; + out.push_sql(" AS "); + out.push_identifier(dsl::is_probe::NAME)?; out.push_sql(" FROM ("); self.push_address_sequence_subquery(out.reborrow())?; diff --git a/nexus/db-queries/src/db/queries/network_interface.rs b/nexus/db-queries/src/db/queries/network_interface.rs index 6d00b4bc29c..a6ce94722a0 100644 --- a/nexus/db-queries/src/db/queries/network_interface.rs +++ b/nexus/db-queries/src/db/queries/network_interface.rs @@ -164,6 +164,9 @@ impl InsertError { InsertError::InterfaceAlreadyExists(_name, NetworkInterfaceKind::Service) => { unimplemented!("service network interface") } + InsertError::InterfaceAlreadyExists(_name, NetworkInterfaceKind::Probe) => { + unimplemented!("probe network interface") + } InsertError::NoAvailableIpAddresses => { external::Error::invalid_request( "No available IP addresses for interface", @@ -396,6 +399,9 @@ fn decode_database_error( NetworkInterfaceKind::Service => { external::ResourceType::ServiceNetworkInterface } + NetworkInterfaceKind::Probe => { + external::ResourceType::ProbeNetworkInterface + } }; InsertError::External(error::public_error_from_diesel( err, @@ -605,7 +611,7 @@ impl NextMacAddress { let min_shift = x - MacAddr::MIN_GUEST_ADDR; (base.into(), max_shift, min_shift) } - NetworkInterfaceKind::Service => { + NetworkInterfaceKind::Service | NetworkInterfaceKind::Probe => { let base = MacAddr::random_system(); let x = base.to_i64(); let max_shift = MacAddr::MAX_SYSTEM_ADDR - x; @@ -2538,6 +2544,7 @@ mod tests { NetworkInterfaceKind::Service => { (inserted.mac.is_system(), "system") } + NetworkInterfaceKind::Probe => (inserted.mac.is_system(), "probe"), }; assert!( mac_in_range, diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index d30d2162c4c..8d582228eab 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -127,7 +127,7 @@ impl BackgroundTasks { let task_inventory_collection = { let collector = inventory_collection::InventoryCollector::new( datastore.clone(), - resolver, + resolver.clone(), &nexus_id.to_string(), config.inventory.nkeep, config.inventory.disable, @@ -149,7 +149,8 @@ impl BackgroundTasks { // Background task: phantom disk detection let task_phantom_disks = { - let detector = phantom_disks::PhantomDiskDetector::new(datastore); + let detector = + phantom_disks::PhantomDiskDetector::new(datastore.clone()); let task = driver.register( String::from("phantom_disks"), diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 40452698787..fa5f46230b8 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -39,7 +39,6 @@ use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::NameOrId; use omicron_common::api::external::UpdateResult; -use omicron_common::api::external::Vni; use omicron_common::api::internal::nexus; use propolis_client::support::tungstenite::protocol::frame::coding::CloseCode; use propolis_client::support::tungstenite::protocol::CloseFrame; @@ -1089,10 +1088,9 @@ impl super::Nexus { // matter which one we use because all NICs must be in the // same VPC; see the check in project_create_instance.) let firewall_rules = if let Some(nic) = nics.first() { - let vni = Vni::try_from(nic.vni.0)?; let vpc = self .db_datastore - .resolve_vni_to_vpc(opctx, db::model::Vni(vni)) + .resolve_vni_to_vpc(opctx, db::model::Vni(nic.vni)) .await?; let (.., authz_vpc) = LookupPath::new(opctx, &self.db_datastore) .vpc_id(vpc.id()) diff --git a/nexus/src/app/instance_network.rs b/nexus/src/app/instance_network.rs index 3db749f43b9..3718957f494 100644 --- a/nexus/src/app/instance_network.rs +++ b/nexus/src/app/instance_network.rs @@ -136,9 +136,9 @@ impl super::Nexus { let nic_id = nic.id; let mapping = SetVirtualNetworkInterfaceHost { virtual_ip: nic.ip, - virtual_mac: nic.mac.clone(), + virtual_mac: nic.mac.into(), physical_host_ip, - vni: nic.vni.clone(), + vni: nic.vni.into(), }; let log = self.log.clone(); @@ -225,7 +225,7 @@ impl super::Nexus { let nic_id = nic.id; let mapping = DeleteVirtualNetworkInterfaceHost { virtual_ip: nic.ip, - vni: nic.vni.clone(), + vni: nic.vni.into(), }; let log = self.log.clone(); @@ -389,11 +389,102 @@ impl super::Nexus { Ok(()) } + //TODO mostly copy pasta from function above + pub(crate) async fn probe_ensure_dpd_config( + &self, + opctx: &OpContext, + probe_id: Uuid, + sled_ip_address: std::net::Ipv6Addr, + ip_index_filter: Option, + dpd_client: &Arc, + ) -> Result<(), Error> { + let log = &self.log; + + // All external IPs map to the primary network interface, so find that + // interface. If there is no such interface, there's no way to route + // traffic destined to those IPs, so there's nothing to configure and + // it's safe to return early. + let network_interface = match self + .db_datastore + .derive_probe_network_interface_info(&opctx, probe_id) + .await? + .into_iter() + .find(|interface| interface.primary) + { + Some(interface) => interface, + None => { + info!(log, "probe has no primary network interface"; + "probe_id" => %probe_id); + return Ok(()); + } + }; + + let mac_address = + macaddr::MacAddr6::from_str(&network_interface.mac.to_string()) + .map_err(|e| { + Error::internal_error(&format!( + "failed to convert mac address: {e}" + )) + })?; + + info!(log, "looking up probe's external IPs"; + "probe_id" => %probe_id); + + let ips = self + .db_datastore + .probe_lookup_external_ips(&opctx, probe_id) + .await?; + + if let Some(wanted_index) = ip_index_filter { + if let None = ips.get(wanted_index) { + return Err(Error::internal_error(&format!( + "failed to find external ip address at index: {}", + wanted_index + ))); + } + } + + let sled_address = + Ipv6Net(Ipv6Network::new(sled_ip_address, 128).unwrap()); + + for target_ip in ips + .iter() + .enumerate() + .filter(|(index, _)| { + if let Some(wanted_index) = ip_index_filter { + *index == wanted_index + } else { + true + } + }) + .map(|(_, ip)| ip) + { + // For each external ip, add a nat entry to the database + self.ensure_nat_entry( + target_ip, + sled_address, + &network_interface, + mac_address, + opctx, + ) + .await?; + } + + // Notify dendrite that there are changes for it to reconcile. + // In the event of a failure to notify dendrite, we'll log an error + // and rely on dendrite's RPW timer to catch it up. + if let Err(e) = dpd_client.ipv4_nat_trigger_update().await { + error!(self.log, "failed to notify dendrite of nat updates"; "error" => ?e); + }; + + Ok(()) + } + async fn ensure_nat_entry( &self, target_ip: &nexus_db_model::ExternalIp, sled_address: Ipv6Net, - network_interface: &sled_agent_client::types::NetworkInterface, + network_interface: &omicron_common::api::internal::shared::NetworkInterface, mac_address: macaddr::MacAddr6, opctx: &OpContext, ) -> Result<(), Error> { @@ -404,7 +495,7 @@ impl super::Nexus { first_port: target_ip.first_port, last_port: target_ip.last_port, sled_address: sled_address.into(), - vni: DbVni(network_interface.vni.clone().into()), + vni: DbVni(network_interface.vni), mac: nexus_db_model::MacAddr( omicron_common::api::external::MacAddr(mac_address), ), diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 5af45985db1..54daa153a09 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -50,6 +50,7 @@ mod ip_pool; mod metrics; mod network_interface; mod oximeter; +mod probe; mod project; mod quota; mod rack; diff --git a/nexus/src/app/probe.rs b/nexus/src/app/probe.rs new file mode 100644 index 00000000000..e779d7c3025 --- /dev/null +++ b/nexus/src/app/probe.rs @@ -0,0 +1,96 @@ +use nexus_db_model::Probe; +use nexus_db_queries::authz; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::datastore::ProbeInfo; +use nexus_db_queries::db::lookup; +use nexus_types::external_api::params; +use nexus_types::identity::Resource; +use omicron_common::api::external::Error; +use omicron_common::api::external::{ + http_pagination::PaginatedBy, CreateResult, DataPageParams, DeleteResult, + ListResultVec, LookupResult, NameOrId, +}; +use uuid::Uuid; + +impl super::Nexus { + pub(crate) async fn probe_list( + &self, + opctx: &OpContext, + project_lookup: &lookup::Project<'_>, + pagparams: &PaginatedBy<'_>, + ) -> ListResultVec { + let (.., authz_project) = + project_lookup.lookup_for(authz::Action::ListChildren).await?; + self.db_datastore.probe_list(opctx, &authz_project, pagparams).await + } + + pub(crate) async fn probe_list_for_sled( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + sled: Uuid, + ) -> ListResultVec { + self.db_datastore.probe_list_for_sled(sled, opctx, pagparams).await + } + + pub(crate) async fn probe_get( + &self, + opctx: &OpContext, + project_lookup: &lookup::Project<'_>, + name_or_id: NameOrId, + ) -> LookupResult { + let (.., authz_project) = + project_lookup.lookup_for(authz::Action::CreateChild).await?; + self.db_datastore.probe_get(opctx, &authz_project, &name_or_id).await + } + + pub(crate) async fn probe_create( + &self, + opctx: &OpContext, + project_lookup: &lookup::Project<'_>, + new_probe_params: ¶ms::ProbeCreate, + ) -> CreateResult { + let (.., authz_project) = + project_lookup.lookup_for(authz::Action::CreateChild).await?; + + let probe = self + .db_datastore + .probe_create(opctx, &authz_project, new_probe_params) + .await?; + + let (.., sled) = + self.sled_lookup(opctx, &new_probe_params.sled)?.fetch().await?; + + let boundary_switches = + self.boundary_switches(&self.opctx_alloc).await?; + + for switch in &boundary_switches { + let dpd_client = self.dpd_clients.get(switch).ok_or_else(|| { + Error::internal_error(&format!( + "could not find dpd client for {switch}" + )) + })?; + self.probe_ensure_dpd_config( + opctx, + probe.id(), + sled.ip.into(), + None, + dpd_client, + ) + .await?; + } + + Ok(probe) + } + + pub(crate) async fn probe_delete( + &self, + opctx: &OpContext, + project_lookup: &lookup::Project<'_>, + name_or_id: NameOrId, + ) -> DeleteResult { + let (.., authz_project) = + project_lookup.lookup_for(authz::Action::CreateChild).await?; + self.db_datastore.probe_delete(opctx, &authz_project, &name_or_id).await + } +} diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index a0dcb7fcb17..aa0903dc150 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -863,8 +863,9 @@ impl super::Nexus { ntp_servers: Vec::new(), //TODO rack_network_config: Some(RackNetworkConfigV1 { rack_subnet: subnet, - //TODO(ry) you are here. We need to remove these too. They are - // inconsistent with a generic set of addresses on ports. + //TODO: We need to remove these. They are inconsistent with + // a generic set of addresses on ports that may not be + // contiguous. infra_ip_first: Ipv4Addr::UNSPECIFIED, infra_ip_last: Ipv4Addr::UNSPECIFIED, ports, diff --git a/nexus/src/app/sagas/project_create.rs b/nexus/src/app/sagas/project_create.rs index 40acc822c08..b31dd821f03 100644 --- a/nexus/src/app/sagas/project_create.rs +++ b/nexus/src/app/sagas/project_create.rs @@ -245,7 +245,6 @@ mod test { .filter(dsl::collection_type.eq(nexus_db_queries::db::model::CollectionTypeProvisioned::Project.to_string())) // ignore built-in services project .filter(dsl::id.ne(*SERVICES_PROJECT_ID)) - .select(VirtualProvisioningCollection::as_select()) .get_results_async::(&conn) .await diff --git a/nexus/src/app/switch_port.rs b/nexus/src/app/switch_port.rs index b9f0f94fa0d..d94aafded63 100644 --- a/nexus/src/app/switch_port.rs +++ b/nexus/src/app/switch_port.rs @@ -40,9 +40,7 @@ impl super::Nexus { ) -> CreateResult { opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; - //TODO(ry) race conditions on exists check versus update/create. - // Normally I would use a DB lock here, but not sure what - // the Omicron way of doing things here is. + //TODO race conditions on exists check versus update/create. match self .db_datastore diff --git a/nexus/src/app/vpc.rs b/nexus/src/app/vpc.rs index c47f499c419..d4a78968b75 100644 --- a/nexus/src/app/vpc.rs +++ b/nexus/src/app/vpc.rs @@ -28,10 +28,9 @@ use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; use omicron_common::api::external::NameOrId; use omicron_common::api::external::UpdateResult; -use omicron_common::api::external::Vni; use omicron_common::api::external::VpcFirewallRuleUpdateParams; use omicron_common::api::internal::nexus::HostIdentifier; -use sled_agent_client::types::NetworkInterface; +use omicron_common::api::internal::shared::NetworkInterface; use futures::future::join_all; use ipnetwork::IpNetwork; @@ -480,7 +479,7 @@ impl super::Nexus { let mut nics = HashSet::new(); let mut targets = Vec::with_capacity(rule.targets.len()); let mut push_target_nic = |nic: &NetworkInterface| { - if nics.insert((*nic.vni, (*nic.mac).clone())) { + if nics.insert((nic.vni, *nic.mac)) { targets.push(nic.clone()); } }; @@ -589,10 +588,8 @@ impl super::Nexus { .unwrap_or(&no_interfaces) { host_addrs.push( - HostIdentifier::Vpc(Vni::try_from( - *interface.vni, - )?) - .into(), + HostIdentifier::Vpc(interface.vni) + .into(), ) } } diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index dde641a4adc..fe469bc6a0e 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -38,13 +38,13 @@ use dropshot::{ channel, endpoint, WebsocketChannelResult, WebsocketConnection, }; use ipnetwork::IpNetwork; -use nexus_db_queries::authz; use nexus_db_queries::db; use nexus_db_queries::db::identity::AssetIdentityMetadata; use nexus_db_queries::db::identity::Resource; use nexus_db_queries::db::lookup::ImageLookup; use nexus_db_queries::db::lookup::ImageParentLookup; use nexus_db_queries::db::model::Name; +use nexus_db_queries::{authz, db::datastore::ProbeInfo}; use nexus_db_queries::{ authz::ApiResource, db::fixed_data::silo::INTERNAL_SILO_ID, }; @@ -53,6 +53,7 @@ use omicron_common::api::external::http_pagination::data_page_params_for; use omicron_common::api::external::http_pagination::marker_for_name; use omicron_common::api::external::http_pagination::marker_for_name_or_id; use omicron_common::api::external::http_pagination::name_or_id_pagination; +use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::http_pagination::PaginatedById; use omicron_common::api::external::http_pagination::PaginatedByName; use omicron_common::api::external::http_pagination::PaginatedByNameOrId; @@ -76,6 +77,7 @@ use omicron_common::api::external::InstanceNetworkInterface; use omicron_common::api::external::InternalContext; use omicron_common::api::external::LoopbackAddress; use omicron_common::api::external::NameOrId; +use omicron_common::api::external::Probe; use omicron_common::api::external::RouterRoute; use omicron_common::api::external::RouterRouteKind; use omicron_common::api::external::SwitchPort; @@ -347,6 +349,11 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(device_auth::device_auth_confirm)?; api.register(device_auth::device_access_token)?; + api.register(probe_list)?; + api.register(probe_view)?; + api.register(probe_create)?; + api.register(probe_delete)?; + Ok(()) } @@ -5820,6 +5827,125 @@ async fn current_user_ssh_key_delete( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } +/// List instrumentation probes. +#[endpoint { + method = GET, + path = "/v1/probes", + tags = ["probes"], +}] +async fn probe_list( + rqctx: RequestContext>, + query_params: Query>, +) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + + let nexus = &apictx.nexus; + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let project_lookup = + nexus.project_lookup(&opctx, scan_params.selector.clone())?; + + let probes = + nexus.probe_list(&opctx, &project_lookup, &paginated_by).await?; + + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + probes, + &|_, p: &ProbeInfo| match paginated_by { + PaginatedBy::Id(_) => NameOrId::Id(p.id), + PaginatedBy::Name(_) => NameOrId::Name(p.name.clone().into()), + }, + )?)) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +/// View an instrumentation probe. +#[endpoint { + method = GET, + path = "/v1/probes/{probe}", + tags = ["probes"], +}] +async fn probe_view( + rqctx: RequestContext>, + path_params: Path, + query_params: Query, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let project_selector = query_params.into_inner(); + let project_lookup = nexus.project_lookup(&opctx, project_selector)?; + let probe = + nexus.probe_get(&opctx, &project_lookup, path.probe).await?; + Ok(HttpResponseOk(probe)) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +/// Create an instrumentation probe. +#[endpoint { + method = POST, + path = "/v1/probes", + tags = ["probes"], +}] +async fn probe_create( + rqctx: RequestContext>, + query_params: Query, + new_probe: TypedBody, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + + let nexus = &apictx.nexus; + let new_probe_params = &new_probe.into_inner(); + let project_selector = query_params.into_inner(); + let project_lookup = nexus.project_lookup(&opctx, project_selector)?; + let probe = nexus + .probe_create(&opctx, &project_lookup, &new_probe_params) + .await?; + Ok(HttpResponseCreated(probe.into())) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +/// Delete an instrumentation probe. +#[endpoint { + method = DELETE, + path = "/v1/probes/{probe}", + tags = ["probes"], +}] +async fn probe_delete( + rqctx: RequestContext>, + query_params: Query, + path_params: Path, +) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let project_selector = query_params.into_inner(); + let project_lookup = nexus.project_lookup(&opctx, project_selector)?; + nexus.probe_delete(&opctx, &project_lookup, path.probe).await?; + Ok(HttpResponseDeleted()) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + #[cfg(test)] mod test { use super::external_api; diff --git a/nexus/src/external_api/tag-config.json b/nexus/src/external_api/tag-config.json index 3bc8006cee8..f56e1a9c9b6 100644 --- a/nexus/src/external_api/tag-config.json +++ b/nexus/src/external_api/tag-config.json @@ -86,6 +86,12 @@ "url": "http://docs.oxide.computer/api/vpcs" } }, + "probes": { + "description": "Probes for testing network connectivity", + "external_docs": { + "url": "http://docs.oxide.computer/api/probes" + } + }, "system/status": { "description": "Endpoints related to system health", "external_docs": { diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index 9a209118930..7632ca77032 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -25,6 +25,8 @@ use dropshot::ResultsPage; use dropshot::TypedBody; use hyper::Body; use nexus_db_model::Ipv4NatEntryView; + +use nexus_db_queries::db::datastore::ProbeInfo; use nexus_types::internal_api::params::SwitchPutRequest; use nexus_types::internal_api::params::SwitchPutResponse; use nexus_types::internal_api::views::to_list; @@ -74,6 +76,8 @@ pub(crate) fn internal_api() -> NexusApiDescription { api.register(bgtask_list)?; api.register(bgtask_view)?; + api.register(probes_get)?; + Ok(()) } @@ -568,7 +572,7 @@ struct RpwNatQueryParam { /// change or until the `limit` is reached. If there are no changes, an /// empty vec is returned. #[endpoint { - method = GET, + method = GET, path = "/nat/ipv4/changeset/{from_gen}" }] async fn ipv4_nat_changeset( @@ -591,3 +595,32 @@ async fn ipv4_nat_changeset( }; apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await } + +/// Path parameters for probes +#[derive(Deserialize, JsonSchema)] +struct ProbePathParam { + sled: Uuid, +} + +#[endpoint { + method = GET, + path = "/probes/{sled}" +}] +async fn probes_get( + rqctx: RequestContext>, + path_params: Path, + query_params: Query, +) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let query = query_params.into_inner(); + let path = path_params.into_inner(); + let nexus = &apictx.nexus; + let opctx = crate::context::op_context_for_internal_api(&rqctx).await; + let pagparams = data_page_params_for(&rqctx, &query)?; + Ok(HttpResponseOk( + nexus.probe_list_for_sled(&opctx, &pagparams, path.sled).await?, + )) + }; + apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await +} diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 44b65fa67ba..dccc8890c9c 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -605,12 +605,7 @@ async fn test_instance_start_creates_networking_state( // TODO(#3107) Remove this bifurcation when Nexus programs all mappings // itself. if agent.id != sled_id { - assert_sled_v2p_mappings( - agent, - &nics[0], - guest_nics[0].vni.clone().into(), - ) - .await; + assert_sled_v2p_mappings(agent, &nics[0], guest_nics[0].vni).await; } else { assert!(agent.v2p_mappings.lock().await.is_empty()); } @@ -804,12 +799,8 @@ async fn test_instance_migrate_v2p(cptestctx: &ControlPlaneTestContext) { // all mappings explicitly (without skipping the instance's current // sled) this bifurcation should be removed. if sled_agent.id != original_sled_id { - assert_sled_v2p_mappings( - sled_agent, - &nics[0], - guest_nics[0].vni.clone().into(), - ) - .await; + assert_sled_v2p_mappings(sled_agent, &nics[0], guest_nics[0].vni) + .await; } else { assert!(sled_agent.v2p_mappings.lock().await.is_empty()); } @@ -857,12 +848,8 @@ async fn test_instance_migrate_v2p(cptestctx: &ControlPlaneTestContext) { // agent will have updated any mappings there. Remove this bifurcation // when Nexus programs all mappings explicitly. if sled_agent.id != dst_sled_id { - assert_sled_v2p_mappings( - sled_agent, - &nics[0], - guest_nics[0].vni.clone().into(), - ) - .await; + assert_sled_v2p_mappings(sled_agent, &nics[0], guest_nics[0].vni) + .await; } } } @@ -4099,12 +4086,8 @@ async fn test_instance_v2p_mappings(cptestctx: &ControlPlaneTestContext) { // TODO(#3107) Remove this bifurcation when Nexus programs all mappings // itself. if sled_agent.id != sled_id { - assert_sled_v2p_mappings( - sled_agent, - &nics[0], - guest_nics[0].vni.clone().into(), - ) - .await; + assert_sled_v2p_mappings(sled_agent, &nics[0], guest_nics[0].vni) + .await; } else { assert!(sled_agent.v2p_mappings.lock().await.is_empty()); } diff --git a/nexus/tests/integration_tests/mod.rs b/nexus/tests/integration_tests/mod.rs index 6cb99b9e458..c086d6a4be9 100644 --- a/nexus/tests/integration_tests/mod.rs +++ b/nexus/tests/integration_tests/mod.rs @@ -23,6 +23,7 @@ mod metrics; mod oximeter; mod pantry; mod password_login; +mod probe; mod projects; mod quotas; mod rack; diff --git a/nexus/tests/integration_tests/probe.rs b/nexus/tests/integration_tests/probe.rs new file mode 100644 index 00000000000..cf6c3f3387c --- /dev/null +++ b/nexus/tests/integration_tests/probe.rs @@ -0,0 +1,119 @@ +use dropshot::HttpErrorResponseBody; +use http::{Method, StatusCode}; +use nexus_db_queries::db::datastore::ProbeInfo; +use nexus_test_utils::{ + http_testing::{AuthnMode, NexusRequest}, + resource_helpers::{create_project, populate_ip_pool}, + SLED_AGENT_UUID, +}; +use nexus_test_utils_macros::nexus_test; +use nexus_types::external_api::params::ProbeCreate; +use omicron_common::api::external::{IdentityMetadataCreateParams, Probe}; + +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + +#[nexus_test] +async fn test_probe_basic_crud(ctx: &ControlPlaneTestContext) { + let client = &ctx.external_client; + + populate_ip_pool(&client, "default", None).await; + create_project(&client, "nebula").await; + + let probes = NexusRequest::iter_collection_authn::( + client, + "/v1/probes?project=nebula", + "", + None, + ) + .await + .expect("Failed to list probes") + .all_items; + + assert_eq!(probes.len(), 0, "Expected zero probes"); + + let params = ProbeCreate { + identity: IdentityMetadataCreateParams { + name: "class1".parse().unwrap(), + description: "subspace relay probe".to_owned(), + }, + ip_pool: None, + sled: SLED_AGENT_UUID.parse().unwrap(), + }; + + let created: Probe = NexusRequest::objects_post( + client, + "/v1/probes?project=nebula", + ¶ms, + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + let probes = NexusRequest::iter_collection_authn::( + client, + "/v1/probes?project=nebula", + "", + None, + ) + .await + .expect("Failed to list probes") + .all_items; + + assert_eq!(probes.len(), 1, "Expected one probe"); + assert_eq!(probes[0].id, created.identity.id); + + let error: HttpErrorResponseBody = NexusRequest::expect_failure( + client, + StatusCode::NOT_FOUND, + Method::GET, + "/v1/probes/class2?project=nebula", + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + assert_eq!(error.message, "not found: probe with name \"class2\""); + + NexusRequest::object_get(client, "/v1/probes/class1?project=nebula") + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to view probe") + .parsed_body::() + .expect("failed to parse probe info"); + + let fetched: ProbeInfo = + NexusRequest::object_get(client, "/v1/probes/class1?project=nebula") + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + assert_eq!(fetched.id, created.identity.id); + + NexusRequest::object_delete(client, "/v1/probes/class1?project=nebula") + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); + + let probes = NexusRequest::iter_collection_authn::( + client, + "/v1/probes?project=nebula", + "", + None, + ) + .await + .expect("Failed to list probes after delete") + .all_items; + + assert_eq!(probes.len(), 0, "Expected zero probes"); +} diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index b607bbf1f34..1c9b7ba3bc7 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -69,6 +69,13 @@ OPERATION ID METHOD URL PATH system_policy_update PUT /v1/system/policy system_policy_view GET /v1/system/policy +API operations found with tag "probes" +OPERATION ID METHOD URL PATH +probe_create POST /v1/probes +probe_delete DELETE /v1/probes/{probe} +probe_list GET /v1/probes +probe_view GET /v1/probes/{probe} + API operations found with tag "projects" OPERATION ID METHOD URL PATH project_create POST /v1/projects diff --git a/nexus/tests/output/uncovered-authz-endpoints.txt b/nexus/tests/output/uncovered-authz-endpoints.txt index d76d9c54954..2e7e297be7f 100644 --- a/nexus/tests/output/uncovered-authz-endpoints.txt +++ b/nexus/tests/output/uncovered-authz-endpoints.txt @@ -1,8 +1,12 @@ API endpoints with no coverage in authz tests: +probe_delete (delete "/v1/probes/{probe}") ping (get "/v1/ping") +probe_list (get "/v1/probes") +probe_view (get "/v1/probes/{probe}") device_auth_request (post "/device/auth") device_auth_confirm (post "/device/confirm") device_access_token (post "/device/token") login_saml (post "/login/{silo_name}/saml/{provider_name}") login_local (post "/v1/login/{silo_name}/local") logout (post "/v1/logout") +probe_create (post "/v1/probes") diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 6749794a9a8..906115b46fe 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -80,6 +80,7 @@ path_param!(ProviderPath, provider, "SAML identity provider"); path_param!(IpPoolPath, pool, "IP pool"); path_param!(SshKeyPath, ssh_key, "SSH key"); path_param!(AddressLotPath, address_lot, "address lot"); +path_param!(ProbePath, probe, "probe"); id_path_param!(GroupPath, group_id, "group"); @@ -1939,3 +1940,20 @@ pub struct UpdateableComponentCreate { pub component_type: shared::UpdateableComponentType, pub device_id: String, } + +// Probes + +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct ProbeCreate { + #[serde(flatten)] + pub identity: IdentityMetadataCreateParams, + pub sled: Uuid, + pub ip_pool: Option, +} + +/// List BGP configs with an optional name or id. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +pub struct ProbeListSelector { + /// A name or id to use when selecting a probe. + pub name_or_id: Option, +} diff --git a/nexus/types/src/internal_api/params.rs b/nexus/types/src/internal_api/params.rs index bc25e8d4bda..ffcb6f8e2c7 100644 --- a/nexus/types/src/internal_api/params.rs +++ b/nexus/types/src/internal_api/params.rs @@ -183,6 +183,7 @@ pub enum ServiceKind { BoundaryNtp { snat: SourceNatConfig, nic: ServiceNic }, InternalNtp, Mgd, + Probe { nic: ServiceNic }, } impl fmt::Display for ServiceKind { @@ -202,6 +203,7 @@ impl fmt::Display for ServiceKind { CruciblePantry => "crucible_pantry", BoundaryNtp { .. } | InternalNtp => "ntp", Mgd => "mgd", + Probe { .. } => "probe", }; write!(f, "{}", s) } diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index a1d70d838b1..b1b981f6e20 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -433,6 +433,74 @@ } } }, + "/probes/{sled}": { + "get": { + "operationId": "probes_get", + "parameters": [ + { + "in": "path", + "name": "sled", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + }, + { + "in": "query", + "name": "limit", + "description": "Maximum number of items returned by a single call", + "schema": { + "nullable": true, + "type": "integer", + "format": "uint32", + "minimum": 1 + } + }, + { + "in": "query", + "name": "page_token", + "description": "Token returned by previous call to retrieve the subsequent page", + "schema": { + "nullable": true, + "type": "string" + } + }, + { + "in": "query", + "name": "sort_by", + "schema": { + "$ref": "#/components/schemas/IdSortMode" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_ProbeInfo", + "type": "array", + "items": { + "$ref": "#/components/schemas/ProbeInfo" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + }, + "x-dropshot-pagination": { + "required": [] + } + } + }, "/racks/{rack_id}/initialization-complete": { "put": { "summary": "Report that the Rack Setup Service initialization is complete", @@ -2998,6 +3066,34 @@ "request_id" ] }, + "ExternalIp": { + "type": "object", + "properties": { + "first_port": { + "type": "integer", + "format": "uint16", + "minimum": 0 + }, + "ip": { + "type": "string", + "format": "ip" + }, + "kind": { + "$ref": "#/components/schemas/IpKind" + }, + "last_port": { + "type": "integer", + "format": "uint16", + "minimum": 0 + } + }, + "required": [ + "first_port", + "ip", + "kind", + "last_port" + ] + }, "ExternalPortDiscovery": { "oneOf": [ { @@ -3825,6 +3921,34 @@ } ] }, + "IpKind": { + "type": "string", + "enum": [ + "snat", + "floating", + "ephemeral" + ] + }, + "IpNet": { + "oneOf": [ + { + "title": "v4", + "allOf": [ + { + "$ref": "#/components/schemas/Ipv4Net" + } + ] + }, + { + "title": "v6", + "allOf": [ + { + "$ref": "#/components/schemas/Ipv6Net" + } + ] + } + ] + }, "IpNetwork": { "oneOf": [ { @@ -3912,6 +4036,13 @@ "vni" ] }, + "Ipv4Net": { + "example": "192.168.1.0/24", + "title": "An IPv4 subnet", + "description": "An IPv4 subnet, including prefix and subnet mask", + "type": "string", + "pattern": "^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/([0-9]|1[0-9]|2[0-9]|3[0-2])$" + }, "Ipv4Network": { "type": "string", "pattern": "^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\/(3[0-2]|[0-2]?[0-9])$" @@ -3934,6 +4065,13 @@ "last" ] }, + "Ipv6Net": { + "example": "fd12:3456::/64", + "title": "An IPv6 subnet", + "description": "An IPv6 subnet, including prefix and subnet mask", + "type": "string", + "pattern": "^([fF][dD])[0-9a-fA-F]{2}:(([0-9a-fA-F]{1,4}:){6}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,6}:)([0-9a-fA-F]{1,4})?\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$" + }, "Ipv6Network": { "type": "string", "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\")[/](12[0-8]|1[0-1][0-9]|[0-9]?[0-9])$" @@ -4272,6 +4410,119 @@ "minLength": 1, "maxLength": 63 }, + "NetworkInterface": { + "description": "Information required to construct a virtual network interface", + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "ip": { + "type": "string", + "format": "ip" + }, + "kind": { + "$ref": "#/components/schemas/NetworkInterfaceKind" + }, + "mac": { + "$ref": "#/components/schemas/MacAddr" + }, + "name": { + "$ref": "#/components/schemas/Name" + }, + "primary": { + "type": "boolean" + }, + "slot": { + "type": "integer", + "format": "uint8", + "minimum": 0 + }, + "subnet": { + "$ref": "#/components/schemas/IpNet" + }, + "vni": { + "$ref": "#/components/schemas/Vni" + } + }, + "required": [ + "id", + "ip", + "kind", + "mac", + "name", + "primary", + "slot", + "subnet", + "vni" + ] + }, + "NetworkInterfaceKind": { + "description": "The type of network interface", + "oneOf": [ + { + "description": "A vNIC attached to a guest instance", + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "instance" + ] + } + }, + "required": [ + "id", + "type" + ] + }, + { + "description": "A vNIC associated with an internal service", + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "service" + ] + } + }, + "required": [ + "id", + "type" + ] + }, + { + "description": "A vNIC associated with a probe", + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "probe" + ] + } + }, + "required": [ + "id", + "type" + ] + } + ] + }, "NewPasswordHash": { "title": "A password hash in PHC string format", "description": "Password hashes must be in PHC (Password Hashing Competition) string format. Passwords must be hashed with Argon2id. Password hashes may be rejected if the parameters appear not to be secure enough.", @@ -4455,6 +4706,38 @@ "speed400_g" ] }, + "ProbeInfo": { + "type": "object", + "properties": { + "external_ips": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ExternalIp" + } + }, + "id": { + "type": "string", + "format": "uuid" + }, + "interface": { + "$ref": "#/components/schemas/NetworkInterface" + }, + "name": { + "$ref": "#/components/schemas/Name" + }, + "sled": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "external_ips", + "id", + "interface", + "name", + "sled" + ] + }, "ProducerEndpoint": { "description": "Information announced by a metric server, used so that clients can contact it and collect available metric data from it.", "type": "object", @@ -5306,6 +5589,32 @@ "required": [ "type" ] + }, + { + "type": "object", + "properties": { + "content": { + "type": "object", + "properties": { + "nic": { + "$ref": "#/components/schemas/ServiceNic" + } + }, + "required": [ + "nic" + ] + }, + "type": { + "type": "string", + "enum": [ + "probe" + ] + } + }, + "required": [ + "content", + "type" + ] } ] }, diff --git a/openapi/nexus.json b/openapi/nexus.json index 815cc399aee..9852a6bc180 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -3055,6 +3055,206 @@ } } }, + "/v1/probes": { + "get": { + "tags": [ + "probes" + ], + "summary": "List instrumentation probes.", + "operationId": "probe_list", + "parameters": [ + { + "in": "query", + "name": "limit", + "description": "Maximum number of items returned by a single call", + "schema": { + "nullable": true, + "type": "integer", + "format": "uint32", + "minimum": 1 + } + }, + { + "in": "query", + "name": "page_token", + "description": "Token returned by previous call to retrieve the subsequent page", + "schema": { + "nullable": true, + "type": "string" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "sort_by", + "schema": { + "$ref": "#/components/schemas/NameOrIdSortMode" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProbeInfoResultsPage" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + }, + "x-dropshot-pagination": { + "required": [ + "project" + ] + } + }, + "post": { + "tags": [ + "probes" + ], + "summary": "Create an instrumentation probe.", + "operationId": "probe_create", + "parameters": [ + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProbeCreate" + } + } + }, + "required": true + }, + "responses": { + "201": { + "description": "successful creation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Probe" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/v1/probes/{probe}": { + "get": { + "tags": [ + "probes" + ], + "summary": "View an instrumentation probe.", + "operationId": "probe_view", + "parameters": [ + { + "in": "path", + "name": "probe", + "description": "Name or ID of the probe", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProbeInfo" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "delete": { + "tags": [ + "probes" + ], + "summary": "Delete an instrumentation probe.", + "operationId": "probe_delete", + "parameters": [ + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "path", + "name": "probe", + "description": "Name or ID of the probe", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "204": { + "description": "successful deletion" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/projects": { "get": { "tags": [ @@ -12862,6 +13062,119 @@ } ] }, + "NetworkInterface": { + "description": "Information required to construct a virtual network interface", + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "ip": { + "type": "string", + "format": "ip" + }, + "kind": { + "$ref": "#/components/schemas/NetworkInterfaceKind" + }, + "mac": { + "$ref": "#/components/schemas/MacAddr" + }, + "name": { + "$ref": "#/components/schemas/Name" + }, + "primary": { + "type": "boolean" + }, + "slot": { + "type": "integer", + "format": "uint8", + "minimum": 0 + }, + "subnet": { + "$ref": "#/components/schemas/IpNet" + }, + "vni": { + "$ref": "#/components/schemas/Vni" + } + }, + "required": [ + "id", + "ip", + "kind", + "mac", + "name", + "primary", + "slot", + "subnet", + "vni" + ] + }, + "NetworkInterfaceKind": { + "description": "The type of network interface", + "oneOf": [ + { + "description": "A vNIC attached to a guest instance", + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "instance" + ] + } + }, + "required": [ + "id", + "type" + ] + }, + { + "description": "A vNIC associated with an internal service", + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "service" + ] + } + }, + "required": [ + "id", + "type" + ] + }, + { + "description": "A vNIC associated with a probe", + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "probe" + ] + } + }, + "required": [ + "id", + "type" + ] + } + ] + }, "Password": { "title": "A password used to authenticate a user", "description": "Passwords may be subject to additional constraints.", @@ -12967,6 +13280,133 @@ "ok" ] }, + "Probe": { + "description": "Identity-related metadata that's included in nearly all public API objects", + "type": "object", + "properties": { + "description": { + "description": "human-readable free-form text about a resource", + "type": "string" + }, + "id": { + "description": "unique, immutable, system-controlled identifier for each resource", + "type": "string", + "format": "uuid" + }, + "name": { + "description": "unique, mutable, user-controlled identifier for each resource", + "allOf": [ + { + "$ref": "#/components/schemas/Name" + } + ] + }, + "sled": { + "type": "string", + "format": "uuid" + }, + "time_created": { + "description": "timestamp when this resource was created", + "type": "string", + "format": "date-time" + }, + "time_modified": { + "description": "timestamp when this resource was last modified", + "type": "string", + "format": "date-time" + } + }, + "required": [ + "description", + "id", + "name", + "sled", + "time_created", + "time_modified" + ] + }, + "ProbeCreate": { + "description": "Create-time identity-related parameters", + "type": "object", + "properties": { + "description": { + "type": "string" + }, + "ip_pool": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/Name" + } + ] + }, + "name": { + "$ref": "#/components/schemas/Name" + }, + "sled": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "description", + "name", + "sled" + ] + }, + "ProbeInfo": { + "type": "object", + "properties": { + "external_ips": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ExternalIp" + } + }, + "id": { + "type": "string", + "format": "uuid" + }, + "interface": { + "$ref": "#/components/schemas/NetworkInterface" + }, + "name": { + "$ref": "#/components/schemas/Name" + }, + "sled": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "external_ips", + "id", + "interface", + "name", + "sled" + ] + }, + "ProbeInfoResultsPage": { + "description": "A single page of results", + "type": "object", + "properties": { + "items": { + "description": "list of items on this page of results", + "type": "array", + "items": { + "$ref": "#/components/schemas/ProbeInfo" + } + }, + "next_page": { + "nullable": true, + "description": "token used to fetch the next page of results (if any)", + "type": "string" + } + }, + "required": [ + "items" + ] + }, "Project": { "description": "View of a Project", "type": "object", @@ -15199,6 +15639,12 @@ "storage" ] }, + "Vni": { + "description": "A Geneve Virtual Network Identifier", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "Vpc": { "description": "View of a VPC", "type": "object", @@ -16092,6 +16538,13 @@ "url": "http://docs.oxide.computer/api/policy" } }, + { + "name": "probes", + "description": "Probes for testing network connectivity", + "externalDocs": { + "url": "http://docs.oxide.computer/api/probes" + } + }, { "name": "projects", "description": "Projects are a grouping of associated resources such as instances and disks within a silo for purposes of billing and access control.", diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 467fd32cb84..a9246b02105 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -5340,6 +5340,26 @@ "id", "type" ] + }, + { + "description": "A vNIC associated with a probe", + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "probe" + ] + } + }, + "required": [ + "id", + "type" + ] } ] }, diff --git a/package-manifest.toml b/package-manifest.toml index 6bd40c320d7..1c4a478634d 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -548,6 +548,15 @@ output.type = "zone" output.intermediate_only = true setup_hint = "Run `./tools/ci_download_transceiver_control` to download the necessary binaries" +[package.thundermuffin] +service_name = "thundermuffin" +source.type = "prebuilt" +source.repo = "thundermuffin" +source.commit = "a4a6108d7c9aac2464a0b6898e88132a8f701a13" +source.sha256 = "dc55a2accd33a347df4cbdc0026cbaccea2c004940c3fec8cadcdd633d440dfa" +output.type = "zone" +output.intermediate_only = true + # To package and install the asic variant of the switch, do: # # $ cargo run --release --bin omicron-package -- -t default target create -i standard -m gimlet -s asic @@ -613,3 +622,11 @@ source.packages = [ "sp-sim-softnpu.tar.gz" ] output.type = "zone" + +[package.probe] +service_name = "probe" +source.type = "composite" +source.packages = [ + "thundermuffin.tar.gz", +] +output.type = "zone" diff --git a/schema/all-zone-requests.json b/schema/all-zone-requests.json index 8c324a15bd4..e37fbfde590 100644 --- a/schema/all-zone-requests.json +++ b/schema/all-zone-requests.json @@ -302,6 +302,26 @@ ] } } + }, + { + "description": "A vNIC associated with a probe", + "type": "object", + "required": [ + "id", + "type" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "probe" + ] + } + } } ] }, diff --git a/schema/all-zones-requests.json b/schema/all-zones-requests.json index 7a07e2f9aeb..0ac9e760a83 100644 --- a/schema/all-zones-requests.json +++ b/schema/all-zones-requests.json @@ -186,6 +186,26 @@ ] } } + }, + { + "description": "A vNIC associated with a probe", + "type": "object", + "required": [ + "id", + "type" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "probe" + ] + } + } } ] }, diff --git a/schema/crdb/22.0.0/up1.sql b/schema/crdb/22.0.0/up1.sql new file mode 100644 index 00000000000..7fc8c017139 --- /dev/null +++ b/schema/crdb/22.0.0/up1.sql @@ -0,0 +1,10 @@ +CREATE TABLE IF NOT EXISTS omicron.public.probe ( + id UUID NOT NULL PRIMARY KEY, + name STRING(63) NOT NULL, + description STRING(512) NOT NULL, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + time_deleted TIMESTAMPTZ, + project_id UUID NOT NULL, + sled UUID NOT NULL +); diff --git a/schema/crdb/22.0.0/up2.sql b/schema/crdb/22.0.0/up2.sql new file mode 100644 index 00000000000..6c070463a47 --- /dev/null +++ b/schema/crdb/22.0.0/up2.sql @@ -0,0 +1,4 @@ +CREATE UNIQUE INDEX IF NOT EXISTS lookup_probe_by_name ON omicron.public.probe ( + name +) WHERE + time_deleted IS NULL; diff --git a/schema/crdb/22.0.0/up3.sql b/schema/crdb/22.0.0/up3.sql new file mode 100644 index 00000000000..3b71ba43137 --- /dev/null +++ b/schema/crdb/22.0.0/up3.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.external_ip ADD COLUMN IF NOT EXISTS is_probe BOOL NOT NULL DEFAULT false; diff --git a/schema/crdb/22.0.0/up4.sql b/schema/crdb/22.0.0/up4.sql new file mode 100644 index 00000000000..6c989cf8c88 --- /dev/null +++ b/schema/crdb/22.0.0/up4.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.network_interface_kind ADD VALUE IF NOT EXISTS 'probe'; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index cc611480483..3018f20d896 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3089,6 +3089,26 @@ CREATE TABLE IF NOT EXISTS omicron.public.db_metadata ( ALTER TABLE omicron.public.switch_port_settings_link_config ADD COLUMN IF NOT EXISTS autoneg BOOL NOT NULL DEFAULT false; +CREATE TABLE IF NOT EXISTS omicron.public.probe ( + id UUID NOT NULL PRIMARY KEY, + name STRING(63) NOT NULL, + description STRING(512) NOT NULL, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + time_deleted TIMESTAMPTZ, + project_id UUID NOT NULL, + sled UUID NOT NULL +); + +CREATE UNIQUE INDEX IF NOT EXISTS lookup_probe_by_name ON omicron.public.probe ( + name +) WHERE + time_deleted IS NULL; + +ALTER TABLE omicron.public.external_ip ADD COLUMN IF NOT EXISTS is_probe BOOL NOT NULL DEFAULT false; + +ALTER TYPE omicron.public.network_interface_kind ADD VALUE IF NOT EXISTS 'probe'; + INSERT INTO omicron.public.db_metadata ( singleton, time_created, @@ -3096,7 +3116,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '21.0.0', NULL) + ( TRUE, NOW(), NOW(), '22.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/rss-service-plan-v2.json b/schema/rss-service-plan-v2.json index 62ce358938a..0874aaace52 100644 --- a/schema/rss-service-plan-v2.json +++ b/schema/rss-service-plan-v2.json @@ -271,6 +271,26 @@ ] } } + }, + { + "description": "A vNIC associated with a probe", + "type": "object", + "required": [ + "id", + "type" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "probe" + ] + } + } } ] }, diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs index 527b483ee81..61c0bcf3d24 100644 --- a/sled-agent/src/lib.rs +++ b/sled-agent/src/lib.rs @@ -30,6 +30,7 @@ mod long_running_tasks; mod metrics; mod nexus; pub mod params; +mod probe_manager; mod profile; pub mod rack_setup; pub mod server; diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index a7d91e2b932..b3170767360 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -701,7 +701,7 @@ impl From for sled_agent_client::types::OmicronZoneType { domain, ntp_servers, snat_cfg: snat_cfg.into(), - nic: nic.into(), + nic: nic, }, OmicronZoneType::Clickhouse { address, dataset } => { Other::Clickhouse { @@ -737,7 +737,7 @@ impl From for sled_agent_client::types::OmicronZoneType { dataset: dataset.into(), http_address: http_address.to_string(), dns_address: dns_address.to_string(), - nic: nic.into(), + nic: nic, }, OmicronZoneType::InternalDns { dataset, @@ -774,7 +774,7 @@ impl From for sled_agent_client::types::OmicronZoneType { external_ip, external_tls, internal_address: internal_address.to_string(), - nic: nic.into(), + nic: nic, }, OmicronZoneType::Oximeter { address } => { Other::Oximeter { address: address.to_string() } diff --git a/sled-agent/src/probe_manager.rs b/sled-agent/src/probe_manager.rs new file mode 100644 index 00000000000..d576b822638 --- /dev/null +++ b/sled-agent/src/probe_manager.rs @@ -0,0 +1,342 @@ +use crate::nexus::NexusClientWithResolver; +use anyhow::{anyhow, Result}; +use illumos_utils::dladm::Etherstub; +use illumos_utils::link::VnicAllocator; +use illumos_utils::opte::params::VpcFirewallRule; +use illumos_utils::opte::{DhcpCfg, PortManager}; +use illumos_utils::running_zone::{RunningZone, ZoneBuilderFactory}; +use illumos_utils::zone::Zones; +use nexus_client::types::{ExternalIp, ProbeInfo}; +use omicron_common::api::external::{ + VpcFirewallRuleAction, VpcFirewallRuleDirection, VpcFirewallRulePriority, + VpcFirewallRuleStatus, +}; +use omicron_common::api::internal::shared::NetworkInterface; +use rand::prelude::SliceRandom; +use rand::SeedableRng; +use sled_storage::dataset::ZONE_DATASET; +use sled_storage::manager::StorageHandle; +use slog::{error, warn, Logger}; +use std::collections::{HashMap, HashSet}; +use std::hash::{Hash, Hasher}; +use std::sync::Arc; +use std::time::Duration; +use tokio::sync::Mutex; +use tokio::task::JoinHandle; +use tokio::time::sleep; +use uuid::Uuid; +use zone::Zone; + +const PROBE_ZONE_PREFIX: &str = "oxz_probe"; + +pub struct ProbeManager { + inner: Arc, +} + +pub struct ProbeManagerInner { + join_handle: Mutex>>, + nexus_client: NexusClientWithResolver, + log: Logger, + sled_id: Uuid, + vnic_allocator: VnicAllocator, + storage: StorageHandle, + port_manager: PortManager, + running_probes: Mutex>, +} + +impl ProbeManager { + pub fn new( + sled_id: Uuid, + nexus_client: NexusClientWithResolver, + etherstub: Etherstub, + storage: StorageHandle, + port_manager: PortManager, + log: Logger, + ) -> Self { + Self { + inner: Arc::new(ProbeManagerInner { + join_handle: Mutex::new(None), + vnic_allocator: VnicAllocator::new("probe", etherstub), + running_probes: Mutex::new(HashMap::new()), + nexus_client, + log, + sled_id, + storage, + port_manager, + }), + } + } + + pub async fn run(&self) { + self.inner.run().await; + } +} + +#[derive(Debug, Clone)] +struct ProbeState { + id: Uuid, + status: zone::State, + external_ips: Vec, + interface: Option, +} + +impl PartialEq for ProbeState { + fn eq(&self, other: &Self) -> bool { + self.id.eq(&other.id) + } +} + +impl Eq for ProbeState {} + +impl Hash for ProbeState { + fn hash(&self, state: &mut H) { + self.id.hash(state) + } +} + +impl From for ProbeState { + fn from(value: ProbeInfo) -> Self { + Self { + id: value.id, + status: zone::State::Running, + external_ips: value.external_ips, + interface: Some(value.interface), + } + } +} + +impl TryFrom for ProbeState { + type Error = String; + fn try_from(value: Zone) -> std::result::Result { + Ok(Self { + id: value + .name() + .strip_prefix(&format!("{PROBE_ZONE_PREFIX}_")) + .ok_or(String::from("not a probe prefix"))? + .parse() + .map_err(|e| format!("invalid uuid: {e}"))?, + status: value.state(), + external_ips: Vec::new(), + interface: None, + }) + } +} + +impl ProbeManagerInner { + async fn run(self: &Arc) { + let mut join_handle = self.join_handle.lock().await; + if join_handle.is_none() { + *join_handle = Some(self.clone().reconciler()) + } + } + + fn reconciler(self: Arc) -> JoinHandle<()> { + tokio::spawn(async move { + loop { + sleep(Duration::from_secs(1)).await; + + let target = match self.target_state().await { + Ok(state) => state, + Err(e) => { + error!(self.log, "get target probe state: {e}"); + continue; + } + }; + + let current = match self.current_state().await { + Ok(state) => state, + Err(e) => { + error!(self.log, "get current probe state: {e}"); + continue; + } + }; + + self.add(target.difference(¤t)).await; + self.remove(current.difference(&target)).await; + self.check(current.intersection(&target)).await; + } + }) + } + + async fn add<'a, I>(self: &Arc, probes: I) + where + I: Iterator, + { + for probe in probes { + info!(self.log, "adding probe {}", probe.id); + if let Err(e) = self.add_probe(probe).await { + error!(self.log, "add probe: {e}"); + } + } + } + + async fn add_probe(self: &Arc, probe: &ProbeState) -> Result<()> { + let mut rng = rand::rngs::StdRng::from_entropy(); + let root = self + .storage + .get_latest_resources() + .await + .all_u2_mountpoints(ZONE_DATASET) + .choose(&mut rng) + .ok_or_else(|| anyhow!("u2 not found"))? + .clone(); + + let nic = probe + .interface + .as_ref() + .ok_or(anyhow!("no interface specified for probe"))?; + + let eip = probe + .external_ips + .get(0) + .ok_or(anyhow!("expected an external ip"))?; + + let port = self.port_manager.create_port( + &nic, + None, + Some(eip.ip), + &[], // floating ips + &[VpcFirewallRule { + status: VpcFirewallRuleStatus::Enabled, + direction: VpcFirewallRuleDirection::Inbound, + targets: vec![nic.clone()], + filter_hosts: None, + filter_ports: None, + filter_protocols: None, + action: VpcFirewallRuleAction::Allow, + priority: VpcFirewallRulePriority(100), + }], + DhcpCfg::default(), + )?; + + let installed_zone = ZoneBuilderFactory::default() + .builder() + .with_log(self.log.clone()) + .with_underlay_vnic_allocator(&self.vnic_allocator) + .with_zone_root_path(&root) + .with_zone_image_paths(&["/opt/oxide".into()]) + .with_zone_type("probe") + .with_unique_name(probe.id) + .with_datasets(&[]) + .with_filesystems(&[]) + .with_data_links(&[]) + .with_devices(&[]) + .with_opte_ports(vec![port]) + .with_links(vec![]) + .with_limit_priv(vec![]) + .install() + .await?; + + info!(self.log, "installed probe {}", probe.id); + + //TODO(ry) SMF properties? + + let rz = RunningZone::boot(installed_zone).await?; + rz.ensure_address_for_port("overlay", 0).await?; + info!(self.log, "started probe {}", probe.id); + + self.running_probes.lock().await.insert(probe.id, rz); + + Ok(()) + } + + async fn remove<'a, I>(self: &Arc, probes: I) + where + I: Iterator, + { + for probe in probes { + info!(self.log, "removing probe {}", probe.id); + self.remove_probe(probe.id).await; + } + } + + async fn remove_probe(self: &Arc, id: Uuid) { + match self.running_probes.lock().await.remove(&id) { + Some(mut running_zone) => { + for l in running_zone.links_mut() { + if let Err(e) = l.delete() { + error!(self.log, "delete probe link {}: {e}", l.name()); + } + } + running_zone.release_opte_ports(); + if let Err(e) = running_zone.stop().await { + error!(self.log, "stop probe: {e}") + } + } + None => { + warn!(self.log, "attempt to stop non-running probe: {id}") + } + } + } + + async fn check<'a, I>(self: &Arc, probes: I) + where + I: Iterator, + { + for probe in probes { + if probe.status == zone::State::Running { + continue; + } + warn!( + self.log, + "probe {} found in unexpected state {:?}", + probe.id, + probe.status + ) + //TODO(ry) handle the hooligans here + } + } + + async fn target_state(self: &Arc) -> Result> { + Ok(self + .nexus_client + .client() + .probes_get( + &self.sled_id, + None, //limit + None, //page token + None, //sort by + ) + .await? + .into_inner() + .into_iter() + .map(Into::into) + .collect()) + } + + async fn current_state(self: &Arc) -> Result> { + Ok(Zones::get() + .await? + .into_iter() + .filter_map(|z| ProbeState::try_from(z).ok()) + .collect()) + } +} + +#[cfg(test)] +mod test { + use super::*; + use uuid::Uuid; + + #[test] + fn probe_state_set_ops() { + let a = ProbeState { + id: Uuid::new_v4(), + status: zone::State::Configured, + external_ips: Vec::new(), + interface: None, + }; + + let mut b = a.clone(); + b.status = zone::State::Running; + + let target = HashSet::from([a]); + let current = HashSet::from([b]); + + let to_add = target.difference(¤t); + let to_remove = current.difference(&target); + + assert_eq!(to_add.count(), 0); + assert_eq!(to_remove.count(), 0); + } +} diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 621d0032682..b1d6e8b2981 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -21,6 +21,7 @@ use crate::params::{ InstanceUnregisterResponse, OmicronZonesConfig, SledRole, TimeSync, VpcFirewallRule, ZoneBundleMetadata, Zpool, }; +use crate::probe_manager::ProbeManager; use crate::services::{self, ServiceManager}; use crate::storage_monitor::UnderlayAccess; use crate::updates::{ConfigUpdates, UpdateManager}; @@ -268,6 +269,9 @@ struct SledAgentInner { // Handle to the traffic manager for writing OS updates to our boot disks. boot_disk_os_writer: BootDiskOsWriter, + + // Component of Sled Agent responsible for managing instrumentation probes. + probes: ProbeManager, } impl SledAgentInner { @@ -525,6 +529,15 @@ impl SledAgent { endpoint, )); + let probes = ProbeManager::new( + request.body.id, + nexus_client.clone(), + etherstub.clone(), + storage_manager.clone(), + port_manager.clone(), + log.new(o!("component" => "ProbeManager")), + ); + let sled_agent = SledAgent { inner: Arc::new(SledAgentInner { id: request.body.id, @@ -532,6 +545,7 @@ impl SledAgent { start_request: request, storage: long_running_task_handles.storage_manager.clone(), instances, + probes, hardware: long_running_task_handles.hardware_manager.clone(), updates, port_manager, @@ -554,6 +568,8 @@ impl SledAgent { log: log.clone(), }; + sled_agent.inner.probes.run().await; + // We immediately add a notification to the request queue about our // existence. If inspection of the hardware later informs us that we're // actually running on a scrimlet, that's fine, the updated value will diff --git a/tools/ci_download_maghemite_mgd b/tools/ci_download_maghemite_mgd index eff680d7fd0..cb154e46da1 100755 --- a/tools/ci_download_maghemite_mgd +++ b/tools/ci_download_maghemite_mgd @@ -27,8 +27,11 @@ ARTIFACT_URL="https://buildomat.eng.oxide.computer/public/file" REPO='oxidecomputer/maghemite' PACKAGE_BASE_URL="$ARTIFACT_URL/$REPO/image/$COMMIT" + function main { + rm -rf $DOWNLOAD_DIR/root + # # Process command-line arguments. We generally don't expect any, but # we allow callers to specify a value to override OSTYPE, just for diff --git a/tools/ci_download_maghemite_openapi b/tools/ci_download_maghemite_openapi index db53f68d2c7..6bc36f150a9 100755 --- a/tools/ci_download_maghemite_openapi +++ b/tools/ci_download_maghemite_openapi @@ -15,10 +15,10 @@ TARGET_DIR="out" # Location where intermediate artifacts are downloaded / unpacked. DOWNLOAD_DIR="$TARGET_DIR/downloads" - - function main { + rm -rf $DOWNLOAD_DIR/root + if [[ $# != 0 ]]; then echo "unexpected arguments" >&2 exit 2 diff --git a/tools/ci_download_thundermuffin b/tools/ci_download_thundermuffin new file mode 100755 index 00000000000..53e54ddcbef --- /dev/null +++ b/tools/ci_download_thundermuffin @@ -0,0 +1,153 @@ +#!/bin/bash + +# +# ci_download_probe_packages: fetches thundermuffin binary tarball package, +# unpacks it, and creates a copy, all in the current directory +# + +set -o pipefail +set -o xtrace +set -o errexit + +SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +ARG0="$(basename "${BASH_SOURCE[0]}")" + +source "$SOURCE_DIR/thundermuffin_checksums" +source "$SOURCE_DIR/thundermuffin_version" + +TARGET_DIR="out" +# Location where intermediate artifacts are downloaded / unpacked. +DOWNLOAD_DIR="$TARGET_DIR/downloads" +# Location where the final thundermuffin directory should end up. +DEST_DIR="./$TARGET_DIR/thundermuffin" +BIN_DIR="$DEST_DIR/root/opt/oxide/thundermuffin/bin" + +ARTIFACT_URL="https://buildomat.eng.oxide.computer/public/file" + +REPO='oxidecomputer/thundermuffin' +PACKAGE_BASE_URL="$ARTIFACT_URL/$REPO/image/$COMMIT" + +function main +{ + rm -rf $DOWNLOAD_DIR/root + + # + # Process command-line arguments. We generally don't expect any, but + # we allow callers to specify a value to override OSTYPE, just for + # testing. + # + if [[ $# != 0 ]]; then + CIDL_OS="$1" + shift + else + CIDL_OS="$OSTYPE" + fi + + if [[ $# != 0 ]]; then + echo "unexpected arguments" >&2 + exit 2 + fi + + # Configure this program + configure_os "$CIDL_OS" + + CIDL_SHA256FUNC="do_sha256sum" + TARBALL_FILENAME="thundermuffin.tar.gz" + PACKAGE_URL="$PACKAGE_BASE_URL/$TARBALL_FILENAME" + TARBALL_FILE="$DOWNLOAD_DIR/$TARBALL_FILENAME" + + # Download the file. + echo "URL: $PACKAGE_URL" + echo "Local file: $TARBALL_FILE" + + mkdir -p "$DOWNLOAD_DIR" + mkdir -p "$DEST_DIR" + + fetch_and_verify + + do_untar "$TARBALL_FILE" + + do_assemble + + $SET_BINARIES +} + +function fail +{ + echo "$ARG0: $@" >&2 + exit 1 +} + +function configure_os +{ + echo "current directory: $PWD" + echo "configuring based on OS: \"$1\"" + case "$1" in + solaris*) + SET_BINARIES="" + ;; + *) + echo "WARNING: binaries for $1 are not published by thundermuffin" + SET_BINARIES="unsupported_os" + ;; + esac +} + +function do_download_curl +{ + curl --silent --show-error --fail --location --output "$2" "$1" +} + +function do_sha256sum +{ + sha256sum < "$1" | awk '{print $1}' +} + +function do_untar +{ + tar xzf "$1" -C "$DOWNLOAD_DIR" +} + +function do_assemble +{ + rm -r "$DEST_DIR" || true + mkdir "$DEST_DIR" + cp -r "$DOWNLOAD_DIR/root" "$DEST_DIR/root" +} + +function fetch_and_verify +{ + local DO_DOWNLOAD="true" + if [[ -f "$TARBALL_FILE" ]]; then + # If the file exists with a valid checksum, we can skip downloading. + calculated_sha256="$($CIDL_SHA256FUNC "$TARBALL_FILE")" || \ + fail "failed to calculate sha256sum" + if [[ "$calculated_sha256" == "$CIDL_SHA256" ]]; then + DO_DOWNLOAD="false" + fi + fi + + if [ "$DO_DOWNLOAD" == "true" ]; then + echo "Downloading..." + do_download_curl "$PACKAGE_URL" "$TARBALL_FILE" || \ + fail "failed to download file" + + # Verify the sha256sum. + calculated_sha256="$($CIDL_SHA256FUNC "$TARBALL_FILE")" || \ + fail "failed to calculate sha256sum" + if [[ "$calculated_sha256" != "$CIDL_SHA256" ]]; then + fail "sha256sum mismatch \ + (expected $CIDL_SHA256, found $calculated_sha256)" + fi + fi + +} + +function unsupported_os +{ + mkdir -p "$BIN_DIR" + echo "echo 'unsupported os' && exit 1" >> "$BIN_DIR/dpd" + chmod +x "$BIN_DIR/dpd" +} + +main "$@" diff --git a/tools/install_builder_prerequisites.sh b/tools/install_builder_prerequisites.sh index 1ce133dff3d..ffa0c78e991 100755 --- a/tools/install_builder_prerequisites.sh +++ b/tools/install_builder_prerequisites.sh @@ -207,6 +207,9 @@ retry ./tools/ci_download_maghemite_mgd # xcvradm binary which is bundled with the switch zone. retry ./tools/ci_download_transceiver_control +# Download thundermuffin. This is required to launch network probes. +retry ./tools/ci_download_thundermuffin + # Validate the PATH: expected_in_path=( 'pg_config' diff --git a/tools/install_runner_prerequisites.sh b/tools/install_runner_prerequisites.sh index 42347f518d7..2a29e97085f 100755 --- a/tools/install_runner_prerequisites.sh +++ b/tools/install_runner_prerequisites.sh @@ -120,8 +120,6 @@ function install_packages { exit "$rc" fi - pfexec svcadm enable chrony - pkg list -v "${packages[@]}" elif [[ "${HOST_OS}" == "Linux" ]]; then packages=( diff --git a/tools/thundermuffin_checksums b/tools/thundermuffin_checksums new file mode 100644 index 00000000000..5e10539bdd5 --- /dev/null +++ b/tools/thundermuffin_checksums @@ -0,0 +1 @@ +CIDL_SHA256="dc55a2accd33a347df4cbdc0026cbaccea2c004940c3fec8cadcdd633d440dfa" diff --git a/tools/thundermuffin_version b/tools/thundermuffin_version new file mode 100644 index 00000000000..cbca739f5c8 --- /dev/null +++ b/tools/thundermuffin_version @@ -0,0 +1 @@ +COMMIT="a4a6108d7c9aac2464a0b6898e88132a8f701a13" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 8998f7594ba..2e7a0dd01c0 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -27,8 +27,8 @@ byteorder = { version = "1.5.0" } bytes = { version = "1.5.0", features = ["serde"] } chrono = { version = "0.4.31", features = ["alloc", "serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } -clap = { version = "4.4.3", features = ["derive", "env", "wrap_help"] } -clap_builder = { version = "4.4.2", default-features = false, features = ["color", "env", "std", "suggestions", "usage", "wrap_help"] } +clap = { version = "4.4.3", features = ["derive", "env", "string", "wrap_help"] } +clap_builder = { version = "4.4.2", default-features = false, features = ["color", "env", "std", "string", "suggestions", "usage", "wrap_help"] } console = { version = "0.15.7" } const-oid = { version = "0.9.5", default-features = false, features = ["db", "std"] } crossbeam-epoch = { version = "0.9.15" } @@ -41,13 +41,13 @@ either = { version = "1.9.0" } elliptic-curve = { version = "0.13.8", features = ["ecdh", "hazmat", "pem", "std"] } ff = { version = "0.13.0", default-features = false, features = ["alloc"] } flate2 = { version = "1.0.28" } -futures = { version = "0.3.29" } -futures-channel = { version = "0.3.29", features = ["sink"] } -futures-core = { version = "0.3.29" } -futures-io = { version = "0.3.29", default-features = false, features = ["std"] } -futures-sink = { version = "0.3.29" } -futures-task = { version = "0.3.29", default-features = false, features = ["std"] } -futures-util = { version = "0.3.29", features = ["channel", "io", "sink"] } +futures = { version = "0.3.30" } +futures-channel = { version = "0.3.30", features = ["sink"] } +futures-core = { version = "0.3.30" } +futures-io = { version = "0.3.30", default-features = false, features = ["std"] } +futures-sink = { version = "0.3.30" } +futures-task = { version = "0.3.30", default-features = false, features = ["std"] } +futures-util = { version = "0.3.30", features = ["channel", "io", "sink"] } gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway-service", rev = "2739c18e80697aa6bc235c935176d14b4d757ee9", features = ["std"] } generic-array = { version = "0.14.7", default-features = false, features = ["more_lengths", "zeroize"] } getrandom = { version = "0.2.10", default-features = false, features = ["js", "rdrand", "std"] } @@ -77,13 +77,13 @@ petgraph = { version = "0.6.4", features = ["serde-1"] } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } predicates = { version = "3.0.4" } -proc-macro2 = { version = "1.0.69" } +proc-macro2 = { version = "1.0.71" } rand = { version = "0.8.5" } rand_chacha = { version = "0.3.1", default-features = false, features = ["std"] } regex = { version = "1.10.2" } regex-automata = { version = "0.4.3", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] } regex-syntax = { version = "0.8.2" } -reqwest = { version = "0.11.22", features = ["blocking", "json", "rustls-tls", "stream"] } +reqwest = { version = "0.11.23", features = ["blocking", "cookies", "json", "native-tls-vendored", "rustls-tls", "stream"] } ring = { version = "0.17.7", features = ["std"] } schemars = { version = "0.8.13", features = ["bytes", "chrono", "uuid1"] } semver = { version = "1.0.20", features = ["serde"] } @@ -93,6 +93,7 @@ sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } snafu = { version = "0.7.5", features = ["futures"] } +socket2 = { version = "0.5.5", default-features = false, features = ["all"] } spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } @@ -130,8 +131,8 @@ byteorder = { version = "1.5.0" } bytes = { version = "1.5.0", features = ["serde"] } chrono = { version = "0.4.31", features = ["alloc", "serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } -clap = { version = "4.4.3", features = ["derive", "env", "wrap_help"] } -clap_builder = { version = "4.4.2", default-features = false, features = ["color", "env", "std", "suggestions", "usage", "wrap_help"] } +clap = { version = "4.4.3", features = ["derive", "env", "string", "wrap_help"] } +clap_builder = { version = "4.4.2", default-features = false, features = ["color", "env", "std", "string", "suggestions", "usage", "wrap_help"] } console = { version = "0.15.7" } const-oid = { version = "0.9.5", default-features = false, features = ["db", "std"] } crossbeam-epoch = { version = "0.9.15" } @@ -144,13 +145,13 @@ either = { version = "1.9.0" } elliptic-curve = { version = "0.13.8", features = ["ecdh", "hazmat", "pem", "std"] } ff = { version = "0.13.0", default-features = false, features = ["alloc"] } flate2 = { version = "1.0.28" } -futures = { version = "0.3.29" } -futures-channel = { version = "0.3.29", features = ["sink"] } -futures-core = { version = "0.3.29" } -futures-io = { version = "0.3.29", default-features = false, features = ["std"] } -futures-sink = { version = "0.3.29" } -futures-task = { version = "0.3.29", default-features = false, features = ["std"] } -futures-util = { version = "0.3.29", features = ["channel", "io", "sink"] } +futures = { version = "0.3.30" } +futures-channel = { version = "0.3.30", features = ["sink"] } +futures-core = { version = "0.3.30" } +futures-io = { version = "0.3.30", default-features = false, features = ["std"] } +futures-sink = { version = "0.3.30" } +futures-task = { version = "0.3.30", default-features = false, features = ["std"] } +futures-util = { version = "0.3.30", features = ["channel", "io", "sink"] } gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway-service", rev = "2739c18e80697aa6bc235c935176d14b4d757ee9", features = ["std"] } generic-array = { version = "0.14.7", default-features = false, features = ["more_lengths", "zeroize"] } getrandom = { version = "0.2.10", default-features = false, features = ["js", "rdrand", "std"] } @@ -180,13 +181,13 @@ petgraph = { version = "0.6.4", features = ["serde-1"] } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } predicates = { version = "3.0.4" } -proc-macro2 = { version = "1.0.69" } +proc-macro2 = { version = "1.0.71" } rand = { version = "0.8.5" } rand_chacha = { version = "0.3.1", default-features = false, features = ["std"] } regex = { version = "1.10.2" } regex-automata = { version = "0.4.3", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] } regex-syntax = { version = "0.8.2" } -reqwest = { version = "0.11.22", features = ["blocking", "json", "rustls-tls", "stream"] } +reqwest = { version = "0.11.23", features = ["blocking", "cookies", "json", "native-tls-vendored", "rustls-tls", "stream"] } ring = { version = "0.17.7", features = ["std"] } schemars = { version = "0.8.13", features = ["bytes", "chrono", "uuid1"] } semver = { version = "1.0.20", features = ["serde"] } @@ -196,6 +197,7 @@ sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } snafu = { version = "0.7.5", features = ["futures"] } +socket2 = { version = "0.5.5", default-features = false, features = ["all"] } spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } @@ -224,14 +226,20 @@ zip = { version = "0.6.6", default-features = false, features = ["bzip2", "defla bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } +native-tls = { version = "0.2.11", default-features = false, features = ["vendored"] } once_cell = { version = "1.19.0", features = ["unstable"] } +openssl = { version = "0.10.60", features = ["vendored"] } +openssl-sys = { version = "0.9.96", default-features = false, features = ["vendored"] } rustix = { version = "0.38.25", features = ["fs", "termios"] } [target.x86_64-unknown-linux-gnu.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } +native-tls = { version = "0.2.11", default-features = false, features = ["vendored"] } once_cell = { version = "1.19.0", features = ["unstable"] } +openssl = { version = "0.10.60", features = ["vendored"] } +openssl-sys = { version = "0.9.96", default-features = false, features = ["vendored"] } rustix = { version = "0.38.25", features = ["fs", "termios"] } [target.x86_64-apple-darwin.dependencies] @@ -239,6 +247,7 @@ bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-f errno = { version = "0.3.2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } +native-tls = { version = "0.2.11", default-features = false, features = ["vendored"] } once_cell = { version = "1.19.0", features = ["unstable"] } rustix = { version = "0.38.25", features = ["fs", "termios"] } @@ -247,6 +256,7 @@ bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-f errno = { version = "0.3.2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } +native-tls = { version = "0.2.11", default-features = false, features = ["vendored"] } once_cell = { version = "1.19.0", features = ["unstable"] } rustix = { version = "0.38.25", features = ["fs", "termios"] } @@ -255,6 +265,7 @@ bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-f errno = { version = "0.3.2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } +native-tls = { version = "0.2.11", default-features = false, features = ["vendored"] } once_cell = { version = "1.19.0", features = ["unstable"] } rustix = { version = "0.38.25", features = ["fs", "termios"] } @@ -263,6 +274,7 @@ bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-f errno = { version = "0.3.2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } +native-tls = { version = "0.2.11", default-features = false, features = ["vendored"] } once_cell = { version = "1.19.0", features = ["unstable"] } rustix = { version = "0.38.25", features = ["fs", "termios"] } @@ -271,7 +283,10 @@ bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-f errno = { version = "0.3.2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } +native-tls = { version = "0.2.11", default-features = false, features = ["vendored"] } once_cell = { version = "1.19.0", features = ["unstable"] } +openssl = { version = "0.10.60", features = ["vendored"] } +openssl-sys = { version = "0.9.96", default-features = false, features = ["vendored"] } rustix = { version = "0.38.25", features = ["fs", "termios"] } toml_datetime = { version = "0.6.5", default-features = false, features = ["serde"] } toml_edit-cdcf2f9584511fe6 = { package = "toml_edit", version = "0.19.15", features = ["serde"] } @@ -281,7 +296,10 @@ bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-f errno = { version = "0.3.2", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } +native-tls = { version = "0.2.11", default-features = false, features = ["vendored"] } once_cell = { version = "1.19.0", features = ["unstable"] } +openssl = { version = "0.10.60", features = ["vendored"] } +openssl-sys = { version = "0.9.96", default-features = false, features = ["vendored"] } rustix = { version = "0.38.25", features = ["fs", "termios"] } toml_datetime = { version = "0.6.5", default-features = false, features = ["serde"] } toml_edit-cdcf2f9584511fe6 = { package = "toml_edit", version = "0.19.15", features = ["serde"] }