diff --git a/.config/nextest.toml b/.config/nextest.toml index b2a8b360bb..ba07186c8a 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -8,7 +8,9 @@ nextest-version = { required = "0.9.59", recommended = "0.9.59" } experimental = ["setup-scripts"] [[profile.default.scripts]] -filter = 'rdeps(nexus-test-utils)' +# Exclude omicron-dev tests from crdb-seed as we explicitly want to simulate an +# environment where the seed file doesn't exist. +filter = 'rdeps(nexus-test-utils) - package(omicron-dev)' setup = 'crdb-seed' [profile.ci] diff --git a/.github/buildomat/config.toml b/.github/buildomat/config.toml index 922de631f2..419173fa50 100644 --- a/.github/buildomat/config.toml +++ b/.github/buildomat/config.toml @@ -17,5 +17,6 @@ org_only = true allow_users = [ "dependabot[bot]", "oxide-reflector-bot[bot]", + "oxide-renovate[bot]", "renovate[bot]", ] diff --git a/.github/renovate.json b/.github/renovate.json new file mode 100644 index 0000000000..405a3e282b --- /dev/null +++ b/.github/renovate.json @@ -0,0 +1,9 @@ +{ + "$schema": "https://docs.renovatebot.com/renovate-schema.json", + "extends": [ + "local>oxidecomputer/renovate-config", + "local>oxidecomputer/renovate-config//rust/autocreate", + "local>oxidecomputer/renovate-config:post-upgrade", + "helpers:pinGitHubActionDigests" + ] +} diff --git a/.github/workflows/check-opte-ver.yml b/.github/workflows/check-opte-ver.yml index 3b57f2795f..a8e18f080e 100644 --- a/.github/workflows/check-opte-ver.yml +++ b/.github/workflows/check-opte-ver.yml @@ -9,7 +9,7 @@ jobs: check-opte-ver: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3.5.0 + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 - name: Install jq run: sudo apt-get install -y jq - name: Install toml-cli diff --git a/.github/workflows/check-workspace-deps.yml b/.github/workflows/check-workspace-deps.yml index 9611c4103c..521afa7359 100644 --- a/.github/workflows/check-workspace-deps.yml +++ b/.github/workflows/check-workspace-deps.yml @@ -10,6 +10,6 @@ jobs: check-workspace-deps: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3.5.0 + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 - name: Check Workspace Dependencies run: cargo xtask check-workspace-deps diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index d79196d318..df4cbc9b59 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -17,21 +17,21 @@ jobs: env: RUSTFLAGS: -D warnings steps: - - uses: actions/checkout@v4 - - uses: actions-rs/toolchain@v1 + - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4 + - uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1 with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@v2 + uses: taiki-e/install-action@e659bf85ee986e37e35cc1c53bfeebe044d8133e # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date - uses: actions-rs/cargo@v1 + uses: actions-rs/cargo@844f36862e911db73fe0815f00a4a2602c279505 # v1 with: command: hakari args: generate --diff - name: Check all crates depend on workspace-hack - uses: actions-rs/cargo@v1 + uses: actions-rs/cargo@844f36862e911db73fe0815f00a4a2602c279505 # v1 with: command: hakari args: manage-deps --dry-run diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index f5cf1dc885..873b316e16 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -9,7 +9,7 @@ jobs: check-style: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3.5.0 + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 - name: Report cargo version run: cargo --version - name: Report rustfmt version @@ -27,8 +27,8 @@ jobs: # This repo is unstable and unnecessary: https://github.com/microsoft/linux-package-repositories/issues/34 - name: Disable packages.microsoft.com repo run: sudo rm -f /etc/apt/sources.list.d/microsoft-prod.list - - uses: actions/checkout@v3.5.0 - - uses: Swatinem/rust-cache@v2.2.1 + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 + - uses: Swatinem/rust-cache@6fd3edff6979b79f87531400ad694fb7f2c84b1f # v2.2.1 if: ${{ github.ref != 'refs/heads/main' }} - name: Report cargo version run: cargo --version @@ -53,8 +53,8 @@ jobs: # This repo is unstable and unnecessary: https://github.com/microsoft/linux-package-repositories/issues/34 - name: Disable packages.microsoft.com repo run: sudo rm -f /etc/apt/sources.list.d/microsoft-prod.list - - uses: actions/checkout@v3.5.0 - - uses: Swatinem/rust-cache@v2.2.1 + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 + - uses: Swatinem/rust-cache@6fd3edff6979b79f87531400ad694fb7f2c84b1f # v2.2.1 if: ${{ github.ref != 'refs/heads/main' }} - name: Report cargo version run: cargo --version @@ -79,8 +79,8 @@ jobs: # This repo is unstable and unnecessary: https://github.com/microsoft/linux-package-repositories/issues/34 - name: Disable packages.microsoft.com repo run: sudo rm -f /etc/apt/sources.list.d/microsoft-prod.list - - uses: actions/checkout@v3.5.0 - - uses: Swatinem/rust-cache@v2.2.1 + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 + - uses: Swatinem/rust-cache@6fd3edff6979b79f87531400ad694fb7f2c84b1f # v2.2.1 if: ${{ github.ref != 'refs/heads/main' }} - name: Report cargo version run: cargo --version diff --git a/.github/workflows/update-dendrite.yml b/.github/workflows/update-dendrite.yml index 86049dcafc..10d8ef7618 100644 --- a/.github/workflows/update-dendrite.yml +++ b/.github/workflows/update-dendrite.yml @@ -29,7 +29,7 @@ jobs: steps: # Checkout both the target and integration branches - - uses: actions/checkout@v3.5.0 + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 with: token: ${{ inputs.reflector_access_token }} fetch-depth: 0 diff --git a/.github/workflows/update-maghemite.yml b/.github/workflows/update-maghemite.yml index 07fe329af3..7aa2b8b6c8 100644 --- a/.github/workflows/update-maghemite.yml +++ b/.github/workflows/update-maghemite.yml @@ -29,7 +29,7 @@ jobs: steps: # Checkout both the target and integration branches - - uses: actions/checkout@v3.5.0 + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 with: token: ${{ inputs.reflector_access_token }} fetch-depth: 0 diff --git a/.github/workflows/validate-openapi-spec.yml b/.github/workflows/validate-openapi-spec.yml index 06fc7526a8..1d6c152296 100644 --- a/.github/workflows/validate-openapi-spec.yml +++ b/.github/workflows/validate-openapi-spec.yml @@ -10,8 +10,8 @@ jobs: format: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3.5.0 - - uses: actions/setup-node@v3.6.0 + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 + - uses: actions/setup-node@64ed1c7eab4cce3362f8c340dee64e5eaeef8f7c # v3.6.0 with: node-version: '18' - name: Install our tools diff --git a/Cargo.lock b/Cargo.lock index 18e0e15c3b..d58ba77133 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -283,7 +283,7 @@ dependencies = [ "anstyle", "bstr 1.6.0", "doc-comment", - "predicates 3.0.3", + "predicates 3.0.4", "predicates-core", "predicates-tree", "wait-timeout", @@ -366,6 +366,17 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1181e1e0d1fce796a03db1ae795d67167da795f9cf4a39c37589e85ef57f26d3" +[[package]] +name = "atomicwrites" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1163d9d7c51de51a2b79d6df5e8888d11e9df17c752ce4a285fb6ca1580734e" +dependencies = [ + "rustix 0.37.23", + "tempfile", + "windows-sys 0.48.0", +] + [[package]] name = "atty" version = "0.2.14" @@ -485,7 +496,7 @@ dependencies = [ [[package]] name = "bhyve_api" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=42c878b71a58d430dfc306126af5d40ca816d70f#42c878b71a58d430dfc306126af5d40ca816d70f" +source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" dependencies = [ "bhyve_api_sys", "libc", @@ -495,7 +506,7 @@ dependencies = [ [[package]] name = "bhyve_api_sys" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=42c878b71a58d430dfc306126af5d40ca816d70f#42c878b71a58d430dfc306126af5d40ca816d70f" +source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" dependencies = [ "libc", "strum", @@ -1225,7 +1236,7 @@ dependencies = [ [[package]] name = "cpuid_profile_config" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=42c878b71a58d430dfc306126af5d40ca816d70f#42c878b71a58d430dfc306126af5d40ca816d70f" +source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" dependencies = [ "propolis", "serde", @@ -1268,13 +1279,10 @@ dependencies = [ name = "crdb-seed" version = "0.1.0" dependencies = [ - "camino", - "camino-tempfile", + "anyhow", "dropshot", - "hex", "omicron-test-utils", "omicron-workspace-hack", - "ring", "slog", "tokio", ] @@ -2016,7 +2024,7 @@ checksum = "7e1a8646b2c125eeb9a84ef0faa6d2d102ea0d5da60b824ade2743263117b848" [[package]] name = "dladm" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=42c878b71a58d430dfc306126af5d40ca816d70f#42c878b71a58d430dfc306126af5d40ca816d70f" +source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" dependencies = [ "libc", "strum", @@ -5338,11 +5346,15 @@ name = "omicron-test-utils" version = "0.1.0" dependencies = [ "anyhow", + "atomicwrites", "camino", + "camino-tempfile", "dropshot", "expectorate", + "filetime", "futures", "headers", + "hex", "http", "libc", "omicron-common 0.1.0", @@ -5351,9 +5363,11 @@ dependencies = [ "rcgen", "regex", "reqwest", + "ring", "rustls", "slog", "subprocess", + "tar", "tempfile", "thiserror", "tokio", @@ -5428,7 +5442,7 @@ dependencies = [ "phf_shared 0.11.2", "postgres-types", "ppv-lite86", - "predicates 3.0.3", + "predicates 3.0.4", "rand 0.8.5", "rand_chacha 0.3.1", "regex", @@ -5436,6 +5450,7 @@ dependencies = [ "regex-syntax 0.7.5", "reqwest", "ring", + "rustix 0.37.23", "rustix 0.38.9", "schemars", "semver 1.0.18", @@ -6422,14 +6437,14 @@ dependencies = [ [[package]] name = "predicates" -version = "3.0.3" +version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09963355b9f467184c04017ced4a2ba2d75cbcb4e7462690d388233253d4b1a9" +checksum = "6dfc28575c2e3f19cb3c73b93af36460ae898d426eba6fc15b9bd2a5220758a0" dependencies = [ "anstyle", "difflib", "float-cmp", - "itertools 0.10.5", + "itertools 0.11.0", "normalize-line-endings", "predicates-core", "regex", @@ -6593,7 +6608,7 @@ dependencies = [ [[package]] name = "propolis" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=42c878b71a58d430dfc306126af5d40ca816d70f#42c878b71a58d430dfc306126af5d40ca816d70f" +source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" dependencies = [ "anyhow", "bhyve_api", @@ -6626,7 +6641,7 @@ dependencies = [ [[package]] name = "propolis-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=42c878b71a58d430dfc306126af5d40ca816d70f#42c878b71a58d430dfc306126af5d40ca816d70f" +source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" dependencies = [ "async-trait", "base64 0.21.4", @@ -6650,7 +6665,7 @@ dependencies = [ [[package]] name = "propolis-server" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=42c878b71a58d430dfc306126af5d40ca816d70f#42c878b71a58d430dfc306126af5d40ca816d70f" +source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" dependencies = [ "anyhow", "async-trait", @@ -6702,7 +6717,7 @@ dependencies = [ [[package]] name = "propolis-server-config" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=42c878b71a58d430dfc306126af5d40ca816d70f#42c878b71a58d430dfc306126af5d40ca816d70f" +source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" dependencies = [ "cpuid_profile_config", "serde", @@ -6714,7 +6729,7 @@ dependencies = [ [[package]] name = "propolis_types" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=42c878b71a58d430dfc306126af5d40ca816d70f#42c878b71a58d430dfc306126af5d40ca816d70f" +source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" dependencies = [ "schemars", "serde", @@ -6722,19 +6737,19 @@ dependencies = [ [[package]] name = "proptest" -version = "1.2.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e35c06b98bf36aba164cc17cb25f7e232f5c4aeea73baa14b8a9f0d92dbfa65" +checksum = "7c003ac8c77cb07bb74f5f198bce836a689bcd5a42574612bf14d17bfd08c20e" dependencies = [ "bit-set", - "bitflags 1.3.2", - "byteorder", + "bit-vec", + "bitflags 2.4.0", "lazy_static", "num-traits", "rand 0.8.5", "rand_chacha 0.3.1", "rand_xorshift", - "regex-syntax 0.6.29", + "regex-syntax 0.7.5", "rusty-fork", "tempfile", "unarray", @@ -9359,7 +9374,7 @@ dependencies = [ "omicron-common 0.1.0", "omicron-test-utils", "omicron-workspace-hack", - "predicates 3.0.3", + "predicates 3.0.4", "slog", "slog-async", "slog-envlogger", @@ -9456,8 +9471,8 @@ version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" dependencies = [ - "cfg-if 0.1.10", - "rand 0.4.6", + "cfg-if 1.0.0", + "rand 0.8.5", "static_assertions", ] @@ -9761,7 +9776,7 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "viona_api" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=42c878b71a58d430dfc306126af5d40ca816d70f#42c878b71a58d430dfc306126af5d40ca816d70f" +source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" dependencies = [ "libc", "viona_api_sys", @@ -9770,7 +9785,7 @@ dependencies = [ [[package]] name = "viona_api_sys" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=42c878b71a58d430dfc306126af5d40ca816d70f#42c878b71a58d430dfc306126af5d40ca816d70f" +source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" dependencies = [ "libc", ] diff --git a/Cargo.toml b/Cargo.toml index 3b83b2f7c5..832b8663e6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ members = [ "clients/sled-agent-client", "clients/wicketd-client", "common", - "crdb-seed", + "dev-tools/crdb-seed", "dev-tools/omdb", "dev-tools/omicron-dev", "dev-tools/thing-flinger", @@ -83,6 +83,7 @@ default-members = [ "clients/sled-agent-client", "clients/wicketd-client", "common", + "dev-tools/crdb-seed", "dev-tools/omdb", "dev-tools/omicron-dev", "dev-tools/thing-flinger", @@ -137,6 +138,7 @@ assert_matches = "1.5.0" assert_cmd = "2.0.12" async-bb8-diesel = { git = "https://github.com/oxidecomputer/async-bb8-diesel", rev = "da04c087f835a51e0441addb19c5ef4986e1fcf2" } async-trait = "0.1.73" +atomicwrites = "0.4.1" authz-macros = { path = "nexus/authz-macros" } backoff = { version = "0.4.0", features = [ "tokio" ] } base64 = "0.21.4" @@ -182,6 +184,7 @@ dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", either = "1.9.0" expectorate = "1.1.0" fatfs = "0.3.6" +filetime = "0.2.22" flate2 = "1.0.27" flume = "0.11.0" foreign-types = "0.3.2" @@ -271,16 +274,16 @@ percent-encoding = "2.2.0" pem = "1.1" petgraph = "0.6.4" postgres-protocol = "0.6.6" -predicates = "3.0.3" +predicates = "3.0.4" pretty_assertions = "1.4.0" pretty-hex = "0.3.0" proc-macro2 = "1.0" progenitor = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } progenitor-client = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } -bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "42c878b71a58d430dfc306126af5d40ca816d70f" } -propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "42c878b71a58d430dfc306126af5d40ca816d70f", features = [ "generated-migration" ] } -propolis-server = { git = "https://github.com/oxidecomputer/propolis", rev = "42c878b71a58d430dfc306126af5d40ca816d70f", default-features = false, features = ["mock-only"] } -proptest = "1.2.0" +bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "901b710b6e5bd05a94a323693c2b971e7e7b240e" } +propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "901b710b6e5bd05a94a323693c2b971e7e7b240e", features = [ "generated-migration" ] } +propolis-server = { git = "https://github.com/oxidecomputer/propolis", rev = "901b710b6e5bd05a94a323693c2b971e7e7b240e", default-features = false, features = ["mock-only"] } +proptest = "1.3.1" quote = "1.0" rand = "0.8.5" ratatui = "0.23.0" diff --git a/crdb-seed/src/main.rs b/crdb-seed/src/main.rs deleted file mode 100644 index b8572bd886..0000000000 --- a/crdb-seed/src/main.rs +++ /dev/null @@ -1,92 +0,0 @@ -use camino::Utf8PathBuf; -use dropshot::{test_util::LogContext, ConfigLogging, ConfigLoggingLevel}; -use omicron_test_utils::dev; -use slog::Logger; -use std::io::Write; - -// Creates a string identifier for the current DB schema and version. -// -// The goal here is to allow to create different "seed" directories -// for each revision of the DB. -fn digest_unique_to_schema() -> String { - let schema = include_str!("../../schema/crdb/dbinit.sql"); - let crdb_version = include_str!("../../tools/cockroachdb_version"); - let mut ctx = ring::digest::Context::new(&ring::digest::SHA256); - ctx.update(&schema.as_bytes()); - ctx.update(&crdb_version.as_bytes()); - let digest = ctx.finish(); - hex::encode(digest.as_ref()) -} - -enum SeedDirectoryStatus { - Created, - Existing, -} - -async fn ensure_seed_directory_exists( - log: &Logger, -) -> (Utf8PathBuf, SeedDirectoryStatus) { - let base_seed_dir = Utf8PathBuf::from_path_buf(std::env::temp_dir()) - .expect("Not a UTF-8 path") - .join("crdb-base"); - std::fs::create_dir_all(&base_seed_dir).unwrap(); - let desired_seed_dir = base_seed_dir.join(digest_unique_to_schema()); - - if desired_seed_dir.exists() { - return (desired_seed_dir, SeedDirectoryStatus::Existing); - } - - // The directory didn't exist when we started, so try to create it. - // - // Nextest will execute it just once, but it is possible for a user to start - // up multiple nextest processes to be running at the same time. So we - // should consider it possible for another caller to create this seed - // directory before we finish setting it up ourselves. - let tmp_seed_dir = - camino_tempfile::Utf8TempDir::new_in(base_seed_dir).unwrap(); - dev::test_setup_database_seed(log, tmp_seed_dir.path()).await; - - // If we can successfully perform the rename, there was either no - // contention or we won a creation race. - // - // If we couldn't perform the rename, the directory might already exist. - // Check that this is the error we encountered -- otherwise, we're - // struggling. - if let Err(err) = std::fs::rename(tmp_seed_dir.path(), &desired_seed_dir) { - if !desired_seed_dir.exists() { - panic!("Cannot rename seed directory for CockroachDB: {err}"); - } - } - - (desired_seed_dir, SeedDirectoryStatus::Created) -} - -#[tokio::main] -async fn main() { - // TODO: dropshot is v heavyweight for this, we should be able to pull in a - // smaller binary - let logctx = LogContext::new( - "crdb_seeding", - &ConfigLogging::StderrTerminal { level: ConfigLoggingLevel::Info }, - ); - let (dir, status) = ensure_seed_directory_exists(&logctx.log).await; - match status { - SeedDirectoryStatus::Created => { - slog::info!(logctx.log, "Created seed directory: `{dir}`"); - } - SeedDirectoryStatus::Existing => { - slog::info!(logctx.log, "Using existing seed directory: `{dir}`"); - } - } - if let Ok(env_path) = std::env::var("NEXTEST_ENV") { - let mut file = std::fs::File::create(&env_path) - .expect("failed to open NEXTEST_ENV file"); - writeln!(file, "CRDB_SEED_DIR={dir}") - .expect("failed to write to NEXTEST_ENV file"); - } else { - slog::warn!( - logctx.log, - "NEXTEST_ENV not set (is this script running under nextest?)" - ); - } -} diff --git a/crdb-seed/Cargo.toml b/dev-tools/crdb-seed/Cargo.toml similarity index 60% rename from crdb-seed/Cargo.toml rename to dev-tools/crdb-seed/Cargo.toml index 8d6d570d08..aff26995dc 100644 --- a/crdb-seed/Cargo.toml +++ b/dev-tools/crdb-seed/Cargo.toml @@ -3,14 +3,12 @@ name = "crdb-seed" version = "0.1.0" edition = "2021" license = "MPL-2.0" +readme = "README.md" [dependencies] -camino.workspace = true -camino-tempfile.workspace = true +anyhow.workspace = true dropshot.workspace = true -hex.workspace = true -omicron-test-utils.workspace = true -ring.workspace = true +omicron-test-utils = { workspace = true, features = ["seed-gen"] } slog.workspace = true tokio.workspace = true omicron-workspace-hack.workspace = true diff --git a/dev-tools/crdb-seed/README.md b/dev-tools/crdb-seed/README.md new file mode 100644 index 0000000000..3b77f23066 --- /dev/null +++ b/dev-tools/crdb-seed/README.md @@ -0,0 +1,11 @@ +# crdb-seed + +This is a small utility that creates a seed tarball for our CockroachDB instance +in the temporary directory. It is used as a setup script for nextest (see +`.config/nextest.rs`). + +This utility hashes inputs and attempts to reuse a tarball if it already exists +(see `digest_unique_to_schema` in `omicron/test-utils/src/dev/seed.rs`). + +To invalidate the tarball and cause it to be recreated from scratch, set +`CRDB_SEED_INVALIDATE=1` in the environment. diff --git a/dev-tools/crdb-seed/src/main.rs b/dev-tools/crdb-seed/src/main.rs new file mode 100644 index 0000000000..26b0e19410 --- /dev/null +++ b/dev-tools/crdb-seed/src/main.rs @@ -0,0 +1,39 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use anyhow::{Context, Result}; +use dropshot::{test_util::LogContext, ConfigLogging, ConfigLoggingLevel}; +use omicron_test_utils::dev::seed::{ + ensure_seed_tarball_exists, should_invalidate_seed, +}; +use omicron_test_utils::dev::CRDB_SEED_TAR_ENV; +use std::io::Write; + +#[tokio::main] +async fn main() -> Result<()> { + // TODO: dropshot is v heavyweight for this, we should be able to pull in a + // smaller binary + let logctx = LogContext::new( + "crdb_seeding", + &ConfigLogging::StderrTerminal { level: ConfigLoggingLevel::Info }, + ); + let (seed_tar, status) = + ensure_seed_tarball_exists(&logctx.log, should_invalidate_seed()) + .await?; + status.log(&logctx.log, &seed_tar); + + if let Ok(env_path) = std::env::var("NEXTEST_ENV") { + let mut file = std::fs::File::create(&env_path) + .context("failed to open NEXTEST_ENV file")?; + writeln!(file, "{CRDB_SEED_TAR_ENV}={seed_tar}") + .context("failed to write to NEXTEST_ENV file")?; + } else { + slog::warn!( + logctx.log, + "NEXTEST_ENV not set (is this script running under nextest?)" + ); + } + + Ok(()) +} diff --git a/dev-tools/omicron-dev/Cargo.toml b/dev-tools/omicron-dev/Cargo.toml index 251ee16c01..ec7cafb559 100644 --- a/dev-tools/omicron-dev/Cargo.toml +++ b/dev-tools/omicron-dev/Cargo.toml @@ -16,7 +16,7 @@ futures.workspace = true gateway-messages.workspace = true gateway-test-utils.workspace = true libc.workspace = true -nexus-test-utils.workspace = true +nexus-test-utils = { workspace = true, features = ["omicron-dev"] } nexus-test-interface.workspace = true omicron-common.workspace = true omicron-nexus.workspace = true diff --git a/dev-tools/omicron-dev/src/bin/omicron-dev.rs b/dev-tools/omicron-dev/src/bin/omicron-dev.rs index 9107766d8a..e79184f7e5 100644 --- a/dev-tools/omicron-dev/src/bin/omicron-dev.rs +++ b/dev-tools/omicron-dev/src/bin/omicron-dev.rs @@ -14,7 +14,6 @@ use futures::stream::StreamExt; use nexus_test_interface::NexusServer; use omicron_common::cmd::fatal; use omicron_common::cmd::CmdError; -use omicron_sled_agent::sim; use omicron_test_utils::dev; use signal_hook::consts::signal::SIGINT; use signal_hook_tokio::Signals; @@ -348,13 +347,12 @@ async fn cmd_run_all(args: &RunAllArgs) -> Result<(), anyhow::Error> { config.deployment.dropshot_external.dropshot.bind_address.set_port(p); } - // Start up a ControlPlaneTestContext, which tautologically sets up - // everything needed for a simulated control plane. println!("omicron-dev: setting up all services ... "); - let cptestctx = nexus_test_utils::test_setup_with_config::< + let cptestctx = nexus_test_utils::omicron_dev_setup_with_config::< omicron_nexus::Server, - >("omicron-dev", &mut config, sim::SimMode::Auto, None) - .await; + >(&mut config) + .await + .context("error setting up services")?; println!("omicron-dev: services are running."); // Print out basic information about what was started. diff --git a/dev-tools/omicron-dev/tests/test_omicron_dev.rs b/dev-tools/omicron-dev/tests/test_omicron_dev.rs index f855d8935d..f1e8177243 100644 --- a/dev-tools/omicron-dev/tests/test_omicron_dev.rs +++ b/dev-tools/omicron-dev/tests/test_omicron_dev.rs @@ -13,6 +13,7 @@ use omicron_test_utils::dev::test_cmds::path_to_executable; use omicron_test_utils::dev::test_cmds::run_command; use omicron_test_utils::dev::test_cmds::EXIT_SUCCESS; use omicron_test_utils::dev::test_cmds::EXIT_USAGE; +use omicron_test_utils::dev::CRDB_SEED_TAR_ENV; use oxide_client::ClientHiddenExt; use std::io::BufRead; use std::path::Path; @@ -389,6 +390,16 @@ async fn test_db_run() { // This mirrors the `test_db_run()` test. #[tokio::test] async fn test_run_all() { + // Ensure that the CRDB_SEED_TAR environment variable is not set. We want to + // simulate a user running omicron-dev without the test environment. + // Check if CRDB_SEED_TAR_ENV is set and panic if it is + if let Ok(val) = std::env::var(CRDB_SEED_TAR_ENV) { + panic!( + "CRDB_SEED_TAR_ENV should not be set here, but is set to {}", + val + ); + } + let cmd_path = path_to_omicron_dev(); let cmdstr = format!( diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs index ba8cd8c84a..9118a9a3cd 100644 --- a/illumos-utils/src/zfs.rs +++ b/illumos-utils/src/zfs.rs @@ -61,6 +61,9 @@ enum EnsureFilesystemErrorRaw { #[error("Failed to mount encrypted filesystem: {0}")] MountEncryptedFsFailed(crate::ExecutionError), + + #[error("Failed to mount overlay filesystem: {0}")] + MountOverlayFsFailed(crate::ExecutionError), } /// Error returned by [`Zfs::ensure_filesystem`]. @@ -202,6 +205,7 @@ impl Zfs { /// Creates a new ZFS filesystem named `name`, unless one already exists. /// /// Applies an optional quota, provided _in bytes_. + #[allow(clippy::too_many_arguments)] pub fn ensure_filesystem( name: &str, mountpoint: Mountpoint, @@ -209,6 +213,7 @@ impl Zfs { do_format: bool, encryption_details: Option, size_details: Option, + additional_options: Option>, ) -> Result<(), EnsureFilesystemError> { let (exists, mounted) = Self::dataset_exists(name, &mountpoint)?; if exists { @@ -261,7 +266,14 @@ impl Zfs { ]); } + if let Some(opts) = additional_options { + for o in &opts { + cmd.args(&["-o", &o]); + } + } + cmd.args(&["-o", &format!("mountpoint={}", mountpoint), name]); + execute(cmd).map_err(|err| EnsureFilesystemError { name: name.to_string(), mountpoint: mountpoint.clone(), @@ -322,6 +334,20 @@ impl Zfs { Ok(()) } + pub fn mount_overlay_dataset( + name: &str, + mountpoint: &Mountpoint, + ) -> Result<(), EnsureFilesystemError> { + let mut command = std::process::Command::new(PFEXEC); + let cmd = command.args(&[ZFS, "mount", "-O", name]); + execute(cmd).map_err(|err| EnsureFilesystemError { + name: name.to_string(), + mountpoint: mountpoint.clone(), + err: EnsureFilesystemErrorRaw::MountOverlayFsFailed(err), + })?; + Ok(()) + } + // Return (true, mounted) if the dataset exists, (false, false) otherwise, // where mounted is if the dataset is mounted. fn dataset_exists( @@ -385,7 +411,7 @@ impl Zfs { Zfs::get_value(filesystem_name, &format!("oxide:{}", name)) } - fn get_value( + pub fn get_value( filesystem_name: &str, name: &str, ) -> Result { @@ -422,13 +448,12 @@ pub fn get_all_omicron_datasets_for_delete() -> anyhow::Result> { let internal = pool.kind() == crate::zpool::ZpoolKind::Internal; let pool = pool.to_string(); for dataset in &Zfs::list_datasets(&pool)? { - // Avoid erasing crashdump datasets on internal pools - if dataset == "crash" && internal { - continue; - } - - // The swap device might be in use, so don't assert that it can be deleted. - if dataset == "swap" && internal { + // Avoid erasing crashdump, backing data and swap datasets on + // internal pools. The swap device may be in use. + if internal + && (["crash", "backing", "swap"].contains(&dataset.as_str()) + || dataset.starts_with("backing/")) + { continue; } diff --git a/nexus/db-model/src/region_snapshot.rs b/nexus/db-model/src/region_snapshot.rs index 9addeb83e3..af1cf8b2b3 100644 --- a/nexus/db-model/src/region_snapshot.rs +++ b/nexus/db-model/src/region_snapshot.rs @@ -32,4 +32,7 @@ pub struct RegionSnapshot { // how many volumes reference this? pub volume_references: i64, + + // true if part of a volume's `resources_to_clean_up` already + pub deleting: bool, } diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 2153dfaa7a..2d6970452d 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -867,6 +867,7 @@ table! { snapshot_id -> Uuid, snapshot_addr -> Text, volume_references -> Int8, + deleting -> Bool, } } diff --git a/nexus/db-queries/src/db/datastore/dataset.rs b/nexus/db-queries/src/db/datastore/dataset.rs index 99972459c8..0b26789e8f 100644 --- a/nexus/db-queries/src/db/datastore/dataset.rs +++ b/nexus/db-queries/src/db/datastore/dataset.rs @@ -13,15 +13,31 @@ use crate::db::error::ErrorHandler; use crate::db::identity::Asset; use crate::db::model::Dataset; use crate::db::model::Zpool; +use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; use diesel::upsert::excluded; use omicron_common::api::external::CreateResult; use omicron_common::api::external::Error; +use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; +use uuid::Uuid; impl DataStore { + pub async fn dataset_get(&self, dataset_id: Uuid) -> LookupResult { + use db::schema::dataset::dsl; + + dsl::dataset + .filter(dsl::id.eq(dataset_id)) + .select(Dataset::as_select()) + .first_async::( + &*self.pool_connection_unauthorized().await?, + ) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + /// Stores a new dataset in the database. pub async fn dataset_upsert( &self, diff --git a/nexus/db-queries/src/db/datastore/region_snapshot.rs b/nexus/db-queries/src/db/datastore/region_snapshot.rs index 0a707e4504..148cfe4812 100644 --- a/nexus/db-queries/src/db/datastore/region_snapshot.rs +++ b/nexus/db-queries/src/db/datastore/region_snapshot.rs @@ -10,9 +10,11 @@ use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::model::RegionSnapshot; use async_bb8_diesel::AsyncRunQueryDsl; +use async_bb8_diesel::OptionalExtension; use diesel::prelude::*; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DeleteResult; +use omicron_common::api::external::LookupResult; use uuid::Uuid; impl DataStore { @@ -31,6 +33,27 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + pub async fn region_snapshot_get( + &self, + dataset_id: Uuid, + region_id: Uuid, + snapshot_id: Uuid, + ) -> LookupResult> { + use db::schema::region_snapshot::dsl; + + dsl::region_snapshot + .filter(dsl::dataset_id.eq(dataset_id)) + .filter(dsl::region_id.eq(region_id)) + .filter(dsl::snapshot_id.eq(snapshot_id)) + .select(RegionSnapshot::as_select()) + .first_async::( + &*self.pool_connection_unauthorized().await?, + ) + .await + .optional() + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + pub async fn region_snapshot_remove( &self, dataset_id: Uuid, diff --git a/nexus/db-queries/src/db/datastore/volume.rs b/nexus/db-queries/src/db/datastore/volume.rs index b3e82886de..b97b8451cf 100644 --- a/nexus/db-queries/src/db/datastore/volume.rs +++ b/nexus/db-queries/src/db/datastore/volume.rs @@ -119,6 +119,7 @@ impl DataStore { .filter( rs_dsl::snapshot_addr.eq(read_only_target.clone()), ) + .filter(rs_dsl::deleting.eq(false)) .set( rs_dsl::volume_references .eq(rs_dsl::volume_references + 1), @@ -573,9 +574,7 @@ impl DataStore { // multiple times, and that is done by soft-deleting the volume during // the transaction, and returning the previously serialized list of // resources to clean up if a soft-delete has already occurred. - // - // TODO it would be nice to make this transaction_async, but I couldn't - // get the async optional extension to work. + self.pool_connection_unauthorized() .await? .transaction_async(|conn| async move { @@ -639,7 +638,9 @@ impl DataStore { } }; - // Decrease the number of uses for each referenced region snapshot. + // Decrease the number of uses for each non-deleted referenced + // region snapshot. + use db::schema::region_snapshot::dsl; diesel::update(dsl::region_snapshot) @@ -647,12 +648,40 @@ impl DataStore { dsl::snapshot_addr .eq_any(crucible_targets.read_only_targets.clone()), ) + .filter(dsl::volume_references.gt(0)) + .filter(dsl::deleting.eq(false)) .set(dsl::volume_references.eq(dsl::volume_references - 1)) .execute_async(&conn) .await?; + // Then, note anything that was set to zero from the above + // UPDATE, and then mark all those as deleted. + let snapshots_to_delete: Vec = + dsl::region_snapshot + .filter( + dsl::snapshot_addr.eq_any( + crucible_targets.read_only_targets.clone(), + ), + ) + .filter(dsl::volume_references.eq(0)) + .filter(dsl::deleting.eq(false)) + .select(RegionSnapshot::as_select()) + .load_async(&conn) + .await?; + + diesel::update(dsl::region_snapshot) + .filter( + dsl::snapshot_addr + .eq_any(crucible_targets.read_only_targets.clone()), + ) + .filter(dsl::volume_references.eq(0)) + .filter(dsl::deleting.eq(false)) + .set(dsl::deleting.eq(true)) + .execute_async(&conn) + .await?; + // Return what results can be cleaned up - let result = CrucibleResources::V1(CrucibleResourcesV1 { + let result = CrucibleResources::V2(CrucibleResourcesV2 { // The only use of a read-write region will be at the top level of a // Volume. These are not shared, but if any snapshots are taken this // will prevent deletion of the region. Filter out any regions that @@ -681,6 +710,7 @@ impl DataStore { .eq(0) // Despite the SQL specifying that this column is NOT NULL, // this null check is required for this function to work! + // The left join of region_snapshot might cause a null here. .or(dsl::volume_references.is_null()), ) .select((Dataset::as_select(), Region::as_select())) @@ -688,46 +718,17 @@ impl DataStore { .await? }, - // A volume (for a disk or snapshot) may reference another nested - // volume as a read-only parent, and this may be arbitrarily deep. - // After decrementing volume_references above, get the region - // snapshot records for these read_only_targets where the - // volume_references has gone to 0. Consumers of this struct will - // be responsible for deleting the read-only downstairs running - // for the snapshot and the snapshot itself. - datasets_and_snapshots: { - use db::schema::dataset::dsl as dataset_dsl; - - dsl::region_snapshot - // Only return region_snapshot records related to - // this volume that have zero references. This will - // only happen one time, on the last decrease of a - // volume containing these read-only targets. - // - // It's important to not return *every* region - // snapshot with zero references: multiple volume - // delete sub-sagas will then be issues duplicate - // DELETE calls to Crucible agents, and a request to - // delete a read-only downstairs running for a - // snapshot that doesn't exist will return a 404, - // causing the saga to error and unwind. - .filter(dsl::snapshot_addr.eq_any( - crucible_targets.read_only_targets.clone(), - )) - .filter(dsl::volume_references.eq(0)) - .inner_join( - dataset_dsl::dataset - .on(dsl::dataset_id.eq(dataset_dsl::id)), - ) - .select(( - Dataset::as_select(), - RegionSnapshot::as_select(), - )) - .get_results_async::<(Dataset, RegionSnapshot)>( - &conn, - ) - .await? - }, + // Consumers of this struct will be responsible for deleting + // the read-only downstairs running for the snapshot and the + // snapshot itself. + // + // It's important to not return *every* region snapshot with + // zero references: multiple volume delete sub-sagas will + // then be issues duplicate DELETE calls to Crucible agents, + // and a request to delete a read-only downstairs running + // for a snapshot that doesn't exist will return a 404, + // causing the saga to error and unwind. + snapshots_to_delete, }); // Soft delete this volume, and serialize the resources that are to @@ -967,7 +968,7 @@ impl DataStore { #[derive(Default, Debug, Serialize, Deserialize)] pub struct CrucibleTargets { - read_only_targets: Vec, + pub read_only_targets: Vec, } // Serialize this enum into the `resources_to_clean_up` column to handle @@ -975,6 +976,7 @@ pub struct CrucibleTargets { #[derive(Debug, Serialize, Deserialize)] pub enum CrucibleResources { V1(CrucibleResourcesV1), + V2(CrucibleResourcesV2), } #[derive(Debug, Default, Serialize, Deserialize)] @@ -983,6 +985,12 @@ pub struct CrucibleResourcesV1 { pub datasets_and_snapshots: Vec<(Dataset, RegionSnapshot)>, } +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct CrucibleResourcesV2 { + pub datasets_and_regions: Vec<(Dataset, Region)>, + pub snapshots_to_delete: Vec, +} + /// Return the targets from a VolumeConstructionRequest. /// /// The targets of a volume construction request map to resources. diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index 315f91fa59..079b6af4a4 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -1261,6 +1261,7 @@ async fn ssc_start_running_snapshot( snapshot_id, snapshot_addr, volume_references: 0, // to be filled later + deleting: false, }) .await .map_err(ActionError::action_failed)?; diff --git a/nexus/src/app/sagas/volume_delete.rs b/nexus/src/app/sagas/volume_delete.rs index 4cd633f575..d6358d5435 100644 --- a/nexus/src/app/sagas/volume_delete.rs +++ b/nexus/src/app/sagas/volume_delete.rs @@ -155,39 +155,39 @@ async fn svd_delete_crucible_regions( sagactx.lookup::("crucible_resources_to_delete")?; // Send DELETE calls to the corresponding Crucible agents - match crucible_resources_to_delete { + let datasets_and_regions = match crucible_resources_to_delete { CrucibleResources::V1(crucible_resources_to_delete) => { - delete_crucible_regions( - log, - crucible_resources_to_delete.datasets_and_regions.clone(), - ) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "failed to delete_crucible_regions: {:?}", - e, - )) - })?; + crucible_resources_to_delete.datasets_and_regions + } - // Remove DB records - let region_ids_to_delete = crucible_resources_to_delete - .datasets_and_regions - .iter() - .map(|(_, r)| r.id()) - .collect(); - - osagactx - .datastore() - .regions_hard_delete(log, region_ids_to_delete) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "failed to regions_hard_delete: {:?}", - e, - )) - })?; + CrucibleResources::V2(crucible_resources_to_delete) => { + crucible_resources_to_delete.datasets_and_regions } - } + }; + + delete_crucible_regions(log, datasets_and_regions.clone()).await.map_err( + |e| { + ActionError::action_failed(format!( + "failed to delete_crucible_regions: {:?}", + e, + )) + }, + )?; + + // Remove DB records + let region_ids_to_delete = + datasets_and_regions.iter().map(|(_, r)| r.id()).collect(); + + osagactx + .datastore() + .regions_hard_delete(log, region_ids_to_delete) + .await + .map_err(|e| { + ActionError::action_failed(format!( + "failed to regions_hard_delete: {:?}", + e, + )) + })?; Ok(()) } @@ -202,26 +202,46 @@ async fn svd_delete_crucible_running_snapshots( sagactx: NexusActionContext, ) -> Result<(), ActionError> { let log = sagactx.user_data().log(); + let osagactx = sagactx.user_data(); let crucible_resources_to_delete = sagactx.lookup::("crucible_resources_to_delete")?; // Send DELETE calls to the corresponding Crucible agents - match crucible_resources_to_delete { + let datasets_and_snapshots = match crucible_resources_to_delete { CrucibleResources::V1(crucible_resources_to_delete) => { - delete_crucible_running_snapshots( - log, - crucible_resources_to_delete.datasets_and_snapshots.clone(), - ) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "failed to delete_crucible_running_snapshots: {:?}", - e, - )) - })?; + crucible_resources_to_delete.datasets_and_snapshots } - } + + CrucibleResources::V2(crucible_resources_to_delete) => { + let mut datasets_and_snapshots: Vec<_> = Vec::with_capacity( + crucible_resources_to_delete.snapshots_to_delete.len(), + ); + + for region_snapshot in + crucible_resources_to_delete.snapshots_to_delete + { + let dataset = osagactx + .datastore() + .dataset_get(region_snapshot.dataset_id) + .await + .map_err(ActionError::action_failed)?; + + datasets_and_snapshots.push((dataset, region_snapshot)); + } + + datasets_and_snapshots + } + }; + + delete_crucible_running_snapshots(log, datasets_and_snapshots.clone()) + .await + .map_err(|e| { + ActionError::action_failed(format!( + "failed to delete_crucible_running_snapshots: {:?}", + e, + )) + })?; Ok(()) } @@ -235,26 +255,46 @@ async fn svd_delete_crucible_snapshots( sagactx: NexusActionContext, ) -> Result<(), ActionError> { let log = sagactx.user_data().log(); + let osagactx = sagactx.user_data(); let crucible_resources_to_delete = sagactx.lookup::("crucible_resources_to_delete")?; // Send DELETE calls to the corresponding Crucible agents - match crucible_resources_to_delete { + let datasets_and_snapshots = match crucible_resources_to_delete { CrucibleResources::V1(crucible_resources_to_delete) => { - delete_crucible_snapshots( - log, - crucible_resources_to_delete.datasets_and_snapshots.clone(), - ) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "failed to delete_crucible_snapshots: {:?}", - e, - )) - })?; + crucible_resources_to_delete.datasets_and_snapshots } - } + + CrucibleResources::V2(crucible_resources_to_delete) => { + let mut datasets_and_snapshots: Vec<_> = Vec::with_capacity( + crucible_resources_to_delete.snapshots_to_delete.len(), + ); + + for region_snapshot in + crucible_resources_to_delete.snapshots_to_delete + { + let dataset = osagactx + .datastore() + .dataset_get(region_snapshot.dataset_id) + .await + .map_err(ActionError::action_failed)?; + + datasets_and_snapshots.push((dataset, region_snapshot)); + } + + datasets_and_snapshots + } + }; + + delete_crucible_snapshots(log, datasets_and_snapshots.clone()) + .await + .map_err(|e| { + ActionError::action_failed(format!( + "failed to delete_crucible_snapshots: {:?}", + e, + )) + })?; Ok(()) } @@ -293,6 +333,31 @@ async fn svd_delete_crucible_snapshot_records( })?; } } + + CrucibleResources::V2(crucible_resources_to_delete) => { + // Remove DB records + for region_snapshot in + &crucible_resources_to_delete.snapshots_to_delete + { + osagactx + .datastore() + .region_snapshot_remove( + region_snapshot.dataset_id, + region_snapshot.region_id, + region_snapshot.snapshot_id, + ) + .await + .map_err(|e| { + ActionError::action_failed(format!( + "failed to region_snapshot_remove {} {} {}: {:?}", + region_snapshot.dataset_id, + region_snapshot.region_id, + region_snapshot.snapshot_id, + e, + )) + })?; + } + } } Ok(()) diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 4921f52f90..1fddfba85b 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -98,6 +98,8 @@ type NexusApiDescription = ApiDescription>; /// Returns a description of the external nexus API pub(crate) fn external_api() -> NexusApiDescription { fn register_endpoints(api: &mut NexusApiDescription) -> Result<(), String> { + api.register(ping)?; + api.register(system_policy_view)?; api.register(system_policy_update)?; @@ -364,6 +366,20 @@ pub(crate) fn external_api() -> NexusApiDescription { // clients. Client generators use operationId to name API methods, so changing // a function name is a breaking change from a client perspective. +/// Ping API +/// +/// Always responds with Ok if it responds at all. +#[endpoint { + method = GET, + path = "/v1/ping", + tags = ["system/status"], +}] +async fn ping( + _rqctx: RequestContext>, +) -> Result, HttpError> { + Ok(HttpResponseOk(views::Ping { status: views::PingStatus::Ok })) +} + /// Fetch the top-level IAM policy #[endpoint { method = GET, diff --git a/nexus/src/external_api/tag-config.json b/nexus/src/external_api/tag-config.json index e985ea7db4..07eb198016 100644 --- a/nexus/src/external_api/tag-config.json +++ b/nexus/src/external_api/tag-config.json @@ -80,6 +80,12 @@ "url": "http://docs.oxide.computer/api/vpcs" } }, + "system/status": { + "description": "Endpoints related to system health", + "external_docs": { + "url": "http://docs.oxide.computer/api/system-status" + } + }, "system/hardware": { "description": "These operations pertain to hardware inventory and management. Racks are the unit of expansion of an Oxide deployment. Racks are in turn composed of sleds, switches, power supplies, and a cabled backplane.", "external_docs": { diff --git a/nexus/test-utils/Cargo.toml b/nexus/test-utils/Cargo.toml index 8eb8df4a5b..8cd25582be 100644 --- a/nexus/test-utils/Cargo.toml +++ b/nexus/test-utils/Cargo.toml @@ -39,3 +39,6 @@ trust-dns-proto.workspace = true trust-dns-resolver.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true + +[features] +omicron-dev = ["omicron-test-utils/seed-gen"] diff --git a/nexus/test-utils/src/db.rs b/nexus/test-utils/src/db.rs index 37d7128c49..ff23f35df0 100644 --- a/nexus/test-utils/src/db.rs +++ b/nexus/test-utils/src/db.rs @@ -8,7 +8,7 @@ use camino::Utf8PathBuf; use omicron_test_utils::dev; use slog::Logger; -/// Path to the "seed" CockroachDB directory. +/// Path to the "seed" CockroachDB tarball. /// /// Populating CockroachDB unfortunately isn't free - creation of /// tables, indices, and users takes several seconds to complete. @@ -16,20 +16,39 @@ use slog::Logger; /// By creating a "seed" version of the database, we can cut down /// on the time spent performing this operation. Instead, we opt /// to copy the database from this seed location. -fn seed_dir() -> Utf8PathBuf { +fn seed_tar() -> Utf8PathBuf { // The setup script should set this environment variable. - let seed_dir = std::env::var("CRDB_SEED_DIR") - .expect("CRDB_SEED_DIR missing -- are you running this test with `cargo nextest run`?"); + let seed_dir = std::env::var(dev::CRDB_SEED_TAR_ENV).unwrap_or_else(|_| { + panic!( + "{} missing -- are you running this test \ + with `cargo nextest run`?", + dev::CRDB_SEED_TAR_ENV, + ) + }); seed_dir.into() } -/// Wrapper around [`dev::test_setup_database`] which uses a a -/// seed directory provided at build-time. +/// Wrapper around [`dev::test_setup_database`] which uses a seed tarball +/// provided from the environment. pub async fn test_setup_database(log: &Logger) -> dev::db::CockroachInstance { - let dir = seed_dir(); + let input_tar = seed_tar(); dev::test_setup_database( log, - dev::StorageSource::CopyFromSeed { input_dir: dir }, + dev::StorageSource::CopyFromSeed { input_tar }, + ) + .await +} + +/// Wrapper around [`dev::test_setup_database`] which uses a seed tarball +/// provided as an argument. +#[cfg(feature = "omicron-dev")] +pub async fn test_setup_database_from_seed( + log: &Logger, + input_tar: Utf8PathBuf, +) -> dev::db::CockroachInstance { + dev::test_setup_database( + log, + dev::StorageSource::CopyFromSeed { input_tar }, ) .await } diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index d219da7e96..34c218b3e2 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -5,6 +5,7 @@ //! Integration testing facilities for Nexus use anyhow::Context; +use anyhow::Result; use camino::Utf8Path; use dns_service_client::types::DnsConfigParams; use dropshot::test_util::ClientTestContext; @@ -284,14 +285,30 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { } pub async fn start_crdb(&mut self, populate: bool) { + let populate = if populate { + PopulateCrdb::FromEnvironmentSeed + } else { + PopulateCrdb::Empty + }; + self.start_crdb_impl(populate).await; + } + + /// Private implementation of `start_crdb` that allows for a seed tarball to + /// be passed in. See [`PopulateCrdb`] for more details. + async fn start_crdb_impl(&mut self, populate: PopulateCrdb) { let log = &self.logctx.log; debug!(log, "Starting CRDB"); // Start up CockroachDB. - let database = if populate { - db::test_setup_database(log).await - } else { - db::test_setup_database_empty(log).await + let database = match populate { + PopulateCrdb::FromEnvironmentSeed => { + db::test_setup_database(log).await + } + #[cfg(feature = "omicron-dev")] + PopulateCrdb::FromSeed { input_tar } => { + db::test_setup_database_from_seed(log, input_tar).await + } + PopulateCrdb::Empty => db::test_setup_database_empty(log).await, }; eprintln!("DB URL: {}", database.pg_config()); @@ -759,17 +776,89 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { } } +/// How to populate CockroachDB. +/// +/// This is private because we want to ensure that tests use the setup script +/// rather than trying to create their own seed tarballs. This may need to be +/// revisited if circumstances change. +#[derive(Clone, Debug)] +enum PopulateCrdb { + /// Populate Cockroach from the `CRDB_SEED_TAR_ENV` environment variable. + /// + /// Any tests that depend on nexus-test-utils should have this environment + /// variable available. + FromEnvironmentSeed, + + /// Populate Cockroach from the seed located at this path. + #[cfg(feature = "omicron-dev")] + FromSeed { input_tar: camino::Utf8PathBuf }, + + /// Do not populate Cockroach. + Empty, +} + +/// Setup routine to use for `omicron-dev`. Use [`test_setup_with_config`] for +/// tests. +/// +/// The main difference from tests is that this routine ensures the seed tarball +/// exists (or creates a seed tarball if it doesn't exist). For tests, this +/// should be done in the `crdb-seed` setup script. +#[cfg(feature = "omicron-dev")] +pub async fn omicron_dev_setup_with_config( + config: &mut omicron_common::nexus_config::Config, +) -> Result> { + let builder = + ControlPlaneTestContextBuilder::::new("omicron-dev", config); + + let log = &builder.logctx.log; + debug!(log, "Ensuring seed tarball exists"); + + // Start up a ControlPlaneTestContext, which tautologically sets up + // everything needed for a simulated control plane. + let why_invalidate = + omicron_test_utils::dev::seed::should_invalidate_seed(); + let (seed_tar, status) = + omicron_test_utils::dev::seed::ensure_seed_tarball_exists( + log, + why_invalidate, + ) + .await + .context("error ensuring seed tarball exists")?; + status.log(log, &seed_tar); + + Ok(setup_with_config_impl( + builder, + PopulateCrdb::FromSeed { input_tar: seed_tar }, + sim::SimMode::Auto, + None, + ) + .await) +} + +/// Setup routine to use for tests. pub async fn test_setup_with_config( test_name: &str, config: &mut omicron_common::nexus_config::Config, sim_mode: sim::SimMode, initial_cert: Option, ) -> ControlPlaneTestContext { - let mut builder = - ControlPlaneTestContextBuilder::::new(test_name, config); + let builder = ControlPlaneTestContextBuilder::::new(test_name, config); + setup_with_config_impl( + builder, + PopulateCrdb::FromEnvironmentSeed, + sim_mode, + initial_cert, + ) + .await +} - let populate = true; - builder.start_crdb(populate).await; +async fn setup_with_config_impl( + mut builder: ControlPlaneTestContextBuilder<'_, N>, + populate: PopulateCrdb, + sim_mode: sim::SimMode, + initial_cert: Option, +) -> ControlPlaneTestContext { + builder.start_crdb_impl(populate).await; builder.start_clickhouse().await; builder.start_dendrite(SwitchLocation::Switch0).await; builder.start_dendrite(SwitchLocation::Switch1).await; diff --git a/nexus/tests/integration_tests/basic.rs b/nexus/tests/integration_tests/basic.rs index ab54c97197..282ec0cd96 100644 --- a/nexus/tests/integration_tests/basic.rs +++ b/nexus/tests/integration_tests/basic.rs @@ -10,7 +10,8 @@ use dropshot::HttpErrorResponseBody; use http::method::Method; use http::StatusCode; -use nexus_types::external_api::{params, views::Project}; +use nexus_types::external_api::params; +use nexus_types::external_api::views::{self, Project}; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::external::IdentityMetadataUpdateParams; use omicron_common::api::external::Name; @@ -546,3 +547,13 @@ async fn test_projects_list(cptestctx: &ControlPlaneTestContext) { .collect::>() ); } + +#[nexus_test] +async fn test_ping(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + + let health = NexusRequest::object_get(client, "/v1/ping") + .execute_and_parse_unwrap::() + .await; + assert_eq!(health.status, views::PingStatus::Ok); +} diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index e04d26cc45..e9ae11c21f 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -1876,6 +1876,5 @@ lazy_static! { AllowedMethod::GetNonexistent ], }, - ]; } diff --git a/nexus/tests/integration_tests/snapshots.rs b/nexus/tests/integration_tests/snapshots.rs index d212175415..68f4cdadd2 100644 --- a/nexus/tests/integration_tests/snapshots.rs +++ b/nexus/tests/integration_tests/snapshots.rs @@ -1094,6 +1094,7 @@ async fn test_region_snapshot_create_idempotent( snapshot_addr: "[::]:12345".to_string(), volume_references: 1, + deleting: false, }; datastore.region_snapshot_create(region_snapshot.clone()).await.unwrap(); @@ -1287,13 +1288,16 @@ async fn test_multiple_deletes_not_sent(cptestctx: &ControlPlaneTestContext) { .unwrap(); let resources_1 = match resources_1 { - db::datastore::CrucibleResources::V1(resources_1) => resources_1, + db::datastore::CrucibleResources::V1(_) => panic!("using old style!"), + db::datastore::CrucibleResources::V2(resources_1) => resources_1, }; let resources_2 = match resources_2 { - db::datastore::CrucibleResources::V1(resources_2) => resources_2, + db::datastore::CrucibleResources::V1(_) => panic!("using old style!"), + db::datastore::CrucibleResources::V2(resources_2) => resources_2, }; let resources_3 = match resources_3 { - db::datastore::CrucibleResources::V1(resources_3) => resources_3, + db::datastore::CrucibleResources::V1(_) => panic!("using old style!"), + db::datastore::CrucibleResources::V2(resources_3) => resources_3, }; // No region deletions yet, these are just snapshot deletes @@ -1304,24 +1308,24 @@ async fn test_multiple_deletes_not_sent(cptestctx: &ControlPlaneTestContext) { // But there are snapshots to delete - assert!(!resources_1.datasets_and_snapshots.is_empty()); - assert!(!resources_2.datasets_and_snapshots.is_empty()); - assert!(!resources_3.datasets_and_snapshots.is_empty()); + assert!(!resources_1.snapshots_to_delete.is_empty()); + assert!(!resources_2.snapshots_to_delete.is_empty()); + assert!(!resources_3.snapshots_to_delete.is_empty()); - // Assert there are no overlaps in the datasets_and_snapshots to delete. + // Assert there are no overlaps in the snapshots_to_delete to delete. - for tuple in &resources_1.datasets_and_snapshots { - assert!(!resources_2.datasets_and_snapshots.contains(tuple)); - assert!(!resources_3.datasets_and_snapshots.contains(tuple)); + for tuple in &resources_1.snapshots_to_delete { + assert!(!resources_2.snapshots_to_delete.contains(tuple)); + assert!(!resources_3.snapshots_to_delete.contains(tuple)); } - for tuple in &resources_2.datasets_and_snapshots { - assert!(!resources_1.datasets_and_snapshots.contains(tuple)); - assert!(!resources_3.datasets_and_snapshots.contains(tuple)); + for tuple in &resources_2.snapshots_to_delete { + assert!(!resources_1.snapshots_to_delete.contains(tuple)); + assert!(!resources_3.snapshots_to_delete.contains(tuple)); } - for tuple in &resources_3.datasets_and_snapshots { - assert!(!resources_1.datasets_and_snapshots.contains(tuple)); - assert!(!resources_2.datasets_and_snapshots.contains(tuple)); + for tuple in &resources_3.snapshots_to_delete { + assert!(!resources_1.snapshots_to_delete.contains(tuple)); + assert!(!resources_2.snapshots_to_delete.contains(tuple)); } } diff --git a/nexus/tests/integration_tests/volume_management.rs b/nexus/tests/integration_tests/volume_management.rs index 70d34fb778..e263593def 100644 --- a/nexus/tests/integration_tests/volume_management.rs +++ b/nexus/tests/integration_tests/volume_management.rs @@ -19,6 +19,7 @@ use nexus_test_utils::resource_helpers::DiskTest; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; use nexus_types::external_api::views; +use nexus_types::identity::Asset; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Disk; use omicron_common::api::external::IdentityMetadataCreateParams; @@ -1813,6 +1814,313 @@ async fn test_volume_checkout_updates_sparse_mid_multiple_gen( volume_match_gen(new_vol, vec![Some(8), None, Some(10)]); } +/// Test that the Crucible agent's port reuse does not confuse +/// `decrease_crucible_resource_count_and_soft_delete_volume`, due to the +/// `[ipv6]:port` targets being reused. +#[nexus_test] +async fn test_keep_your_targets_straight(cptestctx: &ControlPlaneTestContext) { + let nexus = &cptestctx.server.apictx().nexus; + let datastore = nexus.datastore(); + + // Four zpools, one dataset each + let mut disk_test = DiskTest::new(&cptestctx).await; + disk_test + .add_zpool_with_dataset(&cptestctx, DiskTest::DEFAULT_ZPOOL_SIZE_GIB) + .await; + + // This bug occurs when region_snapshot records share a snapshot_addr, so + // insert those here manually. + + // (dataset_id, region_id, snapshot_id, snapshot_addr) + let region_snapshots = vec![ + // first snapshot-create + ( + disk_test.zpools[0].datasets[0].id, + Uuid::new_v4(), + Uuid::new_v4(), + String::from("[fd00:1122:3344:101:7]:19016"), + ), + ( + disk_test.zpools[1].datasets[0].id, + Uuid::new_v4(), + Uuid::new_v4(), + String::from("[fd00:1122:3344:102:7]:19016"), + ), + ( + disk_test.zpools[2].datasets[0].id, + Uuid::new_v4(), + Uuid::new_v4(), + String::from("[fd00:1122:3344:103:7]:19016"), + ), + // second snapshot-create + ( + disk_test.zpools[0].datasets[0].id, + Uuid::new_v4(), + Uuid::new_v4(), + String::from("[fd00:1122:3344:101:7]:19016"), // duplicate! + ), + ( + disk_test.zpools[3].datasets[0].id, + Uuid::new_v4(), + Uuid::new_v4(), + String::from("[fd00:1122:3344:104:7]:19016"), + ), + ( + disk_test.zpools[2].datasets[0].id, + Uuid::new_v4(), + Uuid::new_v4(), + String::from("[fd00:1122:3344:103:7]:19017"), + ), + ]; + + // First, three `region_snapshot` records created in the snapshot-create + // saga, which are then used to make snapshot's volume construction request + + for i in 0..3 { + let (dataset_id, region_id, snapshot_id, snapshot_addr) = + ®ion_snapshots[i]; + datastore + .region_snapshot_create(nexus_db_model::RegionSnapshot { + dataset_id: *dataset_id, + region_id: *region_id, + snapshot_id: *snapshot_id, + snapshot_addr: snapshot_addr.clone(), + volume_references: 0, + deleting: false, + }) + .await + .unwrap(); + } + + let volume_id = Uuid::new_v4(); + let volume = datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: volume_id, + block_size: 512, + sub_volumes: vec![], + read_only_parent: Some(Box::new( + VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 1, + extent_count: 1, + gen: 1, + opts: CrucibleOpts { + id: Uuid::new_v4(), + target: vec![ + region_snapshots[0].3.clone(), + region_snapshots[1].3.clone(), + region_snapshots[2].3.clone(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + }, + }, + )), + }) + .unwrap(), + )) + .await + .unwrap(); + + // Sanity check + + assert_eq!(volume.id(), volume_id); + + // Make sure the volume has only three read-only targets: + + let crucible_targets = datastore + .read_only_resources_associated_with_volume(volume_id) + .await + .unwrap(); + assert_eq!(crucible_targets.read_only_targets.len(), 3); + + // Also validate the volume's region_snapshots got incremented by + // volume_create + + for i in 0..3 { + let (dataset_id, region_id, snapshot_id, _) = region_snapshots[i]; + let region_snapshot = datastore + .region_snapshot_get(dataset_id, region_id, snapshot_id) + .await + .unwrap() + .unwrap(); + + assert_eq!(region_snapshot.volume_references, 1); + assert_eq!(region_snapshot.deleting, false); + } + + // Soft delete the volume, and validate that only three region_snapshot + // records are returned. + + let cr = datastore + .decrease_crucible_resource_count_and_soft_delete_volume(volume_id) + .await + .unwrap(); + + for i in 0..3 { + let (dataset_id, region_id, snapshot_id, _) = region_snapshots[i]; + let region_snapshot = datastore + .region_snapshot_get(dataset_id, region_id, snapshot_id) + .await + .unwrap() + .unwrap(); + + assert_eq!(region_snapshot.volume_references, 0); + assert_eq!(region_snapshot.deleting, true); + } + + match cr { + nexus_db_queries::db::datastore::CrucibleResources::V1(cr) => { + assert!(cr.datasets_and_regions.is_empty()); + assert_eq!(cr.datasets_and_snapshots.len(), 3); + } + + nexus_db_queries::db::datastore::CrucibleResources::V2(cr) => { + assert!(cr.datasets_and_regions.is_empty()); + assert_eq!(cr.snapshots_to_delete.len(), 3); + } + } + + // Now, let's say we're at a spot where the running snapshots have been + // deleted, but before volume_hard_delete or region_snapshot_remove are + // called. Pretend another snapshot-create and snapshot-delete snuck in + // here, and the second snapshot hits a agent that reuses the first target. + + for i in 3..6 { + let (dataset_id, region_id, snapshot_id, snapshot_addr) = + ®ion_snapshots[i]; + datastore + .region_snapshot_create(nexus_db_model::RegionSnapshot { + dataset_id: *dataset_id, + region_id: *region_id, + snapshot_id: *snapshot_id, + snapshot_addr: snapshot_addr.clone(), + volume_references: 0, + deleting: false, + }) + .await + .unwrap(); + } + + let volume_id = Uuid::new_v4(); + let volume = datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: volume_id, + block_size: 512, + sub_volumes: vec![], + read_only_parent: Some(Box::new( + VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 1, + extent_count: 1, + gen: 1, + opts: CrucibleOpts { + id: Uuid::new_v4(), + target: vec![ + region_snapshots[3].3.clone(), + region_snapshots[4].3.clone(), + region_snapshots[5].3.clone(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + }, + }, + )), + }) + .unwrap(), + )) + .await + .unwrap(); + + // Sanity check + + assert_eq!(volume.id(), volume_id); + + // Make sure the volume has only three read-only targets: + + let crucible_targets = datastore + .read_only_resources_associated_with_volume(volume_id) + .await + .unwrap(); + assert_eq!(crucible_targets.read_only_targets.len(), 3); + + // Also validate only the volume's region_snapshots got incremented by + // volume_create. + + for i in 0..3 { + let (dataset_id, region_id, snapshot_id, _) = region_snapshots[i]; + let region_snapshot = datastore + .region_snapshot_get(dataset_id, region_id, snapshot_id) + .await + .unwrap() + .unwrap(); + + assert_eq!(region_snapshot.volume_references, 0); + assert_eq!(region_snapshot.deleting, true); + } + for i in 3..6 { + let (dataset_id, region_id, snapshot_id, _) = region_snapshots[i]; + let region_snapshot = datastore + .region_snapshot_get(dataset_id, region_id, snapshot_id) + .await + .unwrap() + .unwrap(); + + assert_eq!(region_snapshot.volume_references, 1); + assert_eq!(region_snapshot.deleting, false); + } + + // Soft delete the volume, and validate that only three region_snapshot + // records are returned. + + let cr = datastore + .decrease_crucible_resource_count_and_soft_delete_volume(volume_id) + .await + .unwrap(); + + // Make sure every region_snapshot is now 0, and deleting + + for i in 0..6 { + let (dataset_id, region_id, snapshot_id, _) = region_snapshots[i]; + let region_snapshot = datastore + .region_snapshot_get(dataset_id, region_id, snapshot_id) + .await + .unwrap() + .unwrap(); + + assert_eq!(region_snapshot.volume_references, 0); + assert_eq!(region_snapshot.deleting, true); + } + + match cr { + nexus_db_queries::db::datastore::CrucibleResources::V1(cr) => { + assert!(cr.datasets_and_regions.is_empty()); + assert_eq!(cr.datasets_and_snapshots.len(), 3); + } + + nexus_db_queries::db::datastore::CrucibleResources::V2(cr) => { + assert!(cr.datasets_and_regions.is_empty()); + assert_eq!(cr.snapshots_to_delete.len(), 3); + } + } +} + #[nexus_test] async fn test_disk_create_saga_unwinds_correctly( cptestctx: &ControlPlaneTestContext, diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index ca2f737cb0..1d7f5556c2 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -172,6 +172,10 @@ silo_view GET /v1/system/silos/{silo} user_builtin_list GET /v1/system/users-builtin user_builtin_view GET /v1/system/users-builtin/{user} +API operations found with tag "system/status" +OPERATION ID METHOD URL PATH +ping GET /v1/ping + API operations found with tag "vpcs" OPERATION ID METHOD URL PATH vpc_create POST /v1/vpcs diff --git a/nexus/tests/output/uncovered-authz-endpoints.txt b/nexus/tests/output/uncovered-authz-endpoints.txt index 0e53222a8a..d76d9c5495 100644 --- a/nexus/tests/output/uncovered-authz-endpoints.txt +++ b/nexus/tests/output/uncovered-authz-endpoints.txt @@ -1,4 +1,5 @@ API endpoints with no coverage in authz tests: +ping (get "/v1/ping") device_auth_request (post "/device/auth") device_auth_confirm (post "/device/confirm") device_access_token (post "/device/token") diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index 4b30b0be1c..ef3835c618 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -522,3 +522,18 @@ pub struct UpdateDeployment { pub version: SemverVersion, pub status: UpdateStatus, } + +// SYSTEM HEALTH + +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub enum PingStatus { + Ok, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize, JsonSchema)] +pub struct Ping { + /// Whether the external API is reachable. Will always be Ok if the endpoint + /// returns anything at all. + pub status: PingStatus, +} diff --git a/openapi/nexus.json b/openapi/nexus.json index 9330b0ef47..9dda94f283 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -2816,6 +2816,34 @@ } } }, + "/v1/ping": { + "get": { + "tags": [ + "system/status" + ], + "summary": "Ping API", + "description": "Always responds with Ok if it responds at all.", + "operationId": "ping", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Ping" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/policy": { "get": { "tags": [ @@ -12031,6 +12059,28 @@ "items" ] }, + "Ping": { + "type": "object", + "properties": { + "status": { + "description": "Whether the external API is reachable. Will always be Ok if the endpoint returns anything at all.", + "allOf": [ + { + "$ref": "#/components/schemas/PingStatus" + } + ] + } + }, + "required": [ + "status" + ] + }, + "PingStatus": { + "type": "string", + "enum": [ + "ok" + ] + }, "Project": { "description": "View of a Project", "type": "object", @@ -15277,6 +15327,13 @@ "url": "http://docs.oxide.computer/api/system-silos" } }, + { + "name": "system/status", + "description": "Endpoints related to system health", + "externalDocs": { + "url": "http://docs.oxide.computer/api/system-status" + } + }, { "name": "system/update" }, diff --git a/package-manifest.toml b/package-manifest.toml index a7f8683eee..7cf235c24a 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -406,10 +406,10 @@ service_name = "propolis-server" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "propolis" -source.commit = "42c878b71a58d430dfc306126af5d40ca816d70f" +source.commit = "901b710b6e5bd05a94a323693c2b971e7e7b240e" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/propolis/image//propolis-server.sha256.txt -source.sha256 = "dce4d82bb936e990262abcaa279eee7e33a19930880b23f49fa3851cded18567" +source.sha256 = "0f681cdbe7312f66fd3c99fe033b379e49c59fa4ad04d307f68b12514307e976" output.type = "zone" [package.maghemite] diff --git a/schema/crdb/6.0.0/up1.sql b/schema/crdb/6.0.0/up1.sql new file mode 100644 index 0000000000..4a3cdc302e --- /dev/null +++ b/schema/crdb/6.0.0/up1.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.region_snapshot ADD COLUMN IF NOT EXISTS deleting BOOL NOT NULL DEFAULT false; diff --git a/schema/crdb/6.0.0/up2.sql b/schema/crdb/6.0.0/up2.sql new file mode 100644 index 0000000000..77c136a3bf --- /dev/null +++ b/schema/crdb/6.0.0/up2.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.region_snapshot ALTER COLUMN deleting DROP DEFAULT; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index d817e20c7b..2b06e4cbd6 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -505,6 +505,9 @@ CREATE TABLE IF NOT EXISTS omicron.public.region_snapshot ( /* How many volumes reference this? */ volume_references INT8 NOT NULL, + /* Is this currently part of some resources_to_delete? */ + deleting BOOL NOT NULL, + PRIMARY KEY (dataset_id, region_id, snapshot_id) ); diff --git a/sled-agent/src/backing_fs.rs b/sled-agent/src/backing_fs.rs new file mode 100644 index 0000000000..5014ac5999 --- /dev/null +++ b/sled-agent/src/backing_fs.rs @@ -0,0 +1,178 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Operations for dealing with persistent backing mounts for OS data + +// On Oxide hardware, the root filesystem is backed by a ramdisk and +// non-persistent. However, there are several things within the root filesystem +// which are useful to preserve across reboots, and these are backed persistent +// datasets on the boot disk. +// +// Each boot disk contains a dataset sled_hardware::disk::M2_BACKING_DATASET +// and for each backing mount, a child dataset is created under there that +// is configured with the desired mountpoint in the root filesystem. Since +// there are multiple disks which can be used to boot, these datasets are also +// marked with the "canmount=noauto" attribute so that they do not all try to +// mount automatically and race -- only one could ever succeed. This allows us +// to come along later and specifically mount the one that we want (the one from +// the current boot disk) and also perform an overlay mount so that it succeeds +// even if there is content from the ramdisk image or early boot services +// present underneath. The overlay mount action is optionally bracketed with a +// service stop/start. + +use camino::Utf8PathBuf; +use illumos_utils::zfs::{ + EnsureFilesystemError, GetValueError, Mountpoint, SizeDetails, Zfs, +}; + +#[derive(Debug, thiserror::Error)] +pub enum BackingFsError { + #[error("Error administering service: {0}")] + Adm(#[from] smf::AdmError), + + #[error("Error retrieving dataset property: {0}")] + DatasetProperty(#[from] GetValueError), + + #[error("Error initializing dataset: {0}")] + Mount(#[from] EnsureFilesystemError), +} + +struct BackingFs { + // Dataset name + name: &'static str, + // Mountpoint + mountpoint: &'static str, + // Optional quota, in _bytes_ + quota: Option, + // Optional compression mode + compression: Option<&'static str>, + // Linked service + service: Option<&'static str>, +} + +impl BackingFs { + const fn new(name: &'static str) -> Self { + Self { + name, + mountpoint: "legacy", + quota: None, + compression: None, + service: None, + } + } + + const fn mountpoint(mut self, mountpoint: &'static str) -> Self { + self.mountpoint = mountpoint; + self + } + + const fn quota(mut self, quota: usize) -> Self { + self.quota = Some(quota); + self + } + + const fn compression(mut self, compression: &'static str) -> Self { + self.compression = Some(compression); + self + } + + const fn service(mut self, service: &'static str) -> Self { + self.service = Some(service); + self + } +} + +const BACKING_FMD_DATASET: &'static str = "fmd"; +const BACKING_FMD_MOUNTPOINT: &'static str = "/var/fm/fmd"; +const BACKING_FMD_SERVICE: &'static str = "svc:/system/fmd:default"; +const BACKING_FMD_QUOTA: usize = 500 * (1 << 20); // 500 MiB + +const BACKING_COMPRESSION: &'static str = "on"; + +const BACKINGFS_COUNT: usize = 1; +static BACKINGFS: [BackingFs; BACKINGFS_COUNT] = + [BackingFs::new(BACKING_FMD_DATASET) + .mountpoint(BACKING_FMD_MOUNTPOINT) + .quota(BACKING_FMD_QUOTA) + .compression(BACKING_COMPRESSION) + .service(BACKING_FMD_SERVICE)]; + +/// Ensure that the backing filesystems are mounted. +/// If the underlying dataset for a backing fs does not exist on the specified +/// boot disk then it will be created. +pub(crate) fn ensure_backing_fs( + log: &slog::Logger, + boot_zpool_name: &illumos_utils::zpool::ZpoolName, +) -> Result<(), BackingFsError> { + let log = log.new(o!( + "component" => "BackingFs", + )); + for bfs in BACKINGFS.iter() { + info!(log, "Processing {}", bfs.name); + + let dataset = format!( + "{}/{}/{}", + boot_zpool_name, + sled_hardware::disk::M2_BACKING_DATASET, + bfs.name + ); + let mountpoint = Mountpoint::Path(Utf8PathBuf::from(bfs.mountpoint)); + + info!(log, "Ensuring dataset {}", dataset); + + let size_details = Some(SizeDetails { + quota: bfs.quota, + compression: bfs.compression, + }); + + Zfs::ensure_filesystem( + &dataset, + mountpoint.clone(), + false, // zoned + true, // do_format + None, // encryption_details, + size_details, + Some(vec!["canmount=noauto".to_string()]), // options + )?; + + // Check if a ZFS filesystem is already mounted on bfs.mountpoint by + // retrieving the ZFS `mountpoint` property and comparing it. This + // might seem counter-intuitive but if there is a filesystem mounted + // there, its mountpoint will match, and if not then we will retrieve + // the mountpoint of a higher level filesystem, such as '/'. If we + // can't retrieve the property at all, then there is definitely no ZFS + // filesystem mounted there - most likely we are running with a non-ZFS + // root, such as when net booted during CI. + if Zfs::get_value(&bfs.mountpoint, "mountpoint") + .unwrap_or("not-zfs".to_string()) + == bfs.mountpoint + { + info!(log, "{} is already mounted", bfs.mountpoint); + return Ok(()); + } + + if let Some(service) = bfs.service { + info!(log, "Stopping service {}", service); + smf::Adm::new() + .disable() + .temporary() + .synchronous() + .run(smf::AdmSelection::ByPattern(&[service]))?; + } + + info!(log, "Mounting {} on {}", dataset, mountpoint); + + Zfs::mount_overlay_dataset(&dataset, &mountpoint)?; + + if let Some(service) = bfs.service { + info!(log, "Starting service {}", service); + smf::Adm::new() + .enable() + .synchronous() + .run(smf::AdmSelection::ByPattern(&[service]))?; + } + } + + Ok(()) +} diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs index 0899bdd82f..71325fef3d 100644 --- a/sled-agent/src/bootstrap/pre_server.rs +++ b/sled-agent/src/bootstrap/pre_server.rs @@ -381,6 +381,7 @@ fn ensure_zfs_ramdisk_dataset() -> Result<(), StartError> { do_format, encryption_details, quota, + None, ) .map_err(StartError::EnsureZfsRamdiskDataset) } diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs index 5c4dbd8310..4e7921c605 100644 --- a/sled-agent/src/lib.rs +++ b/sled-agent/src/lib.rs @@ -17,6 +17,7 @@ pub mod sim; pub mod common; // Modules for the non-simulated sled agent. +mod backing_fs; pub mod bootstrap; pub mod config; mod http_entrypoints; diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 273bb58873..b6f910220e 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -62,9 +62,15 @@ use illumos_utils::{dladm::MockDladm as Dladm, zone::MockZones as Zones}; #[derive(thiserror::Error, Debug)] pub enum Error { + #[error("Could not find boot disk")] + BootDiskNotFound, + #[error("Configuration error: {0}")] Config(#[from] crate::config::ConfigError), + #[error("Error setting up backing filesystems: {0}")] + BackingFs(#[from] crate::backing_fs::BackingFsError), + #[error("Error setting up swap device: {0}")] SwapDevice(#[from] crate::swap_device::SwapDeviceError), @@ -271,14 +277,17 @@ impl SledAgent { )); info!(&log, "SledAgent::new(..) starting"); - // Configure a swap device of the configured size before other system setup. + let boot_disk = storage + .resources() + .boot_disk() + .await + .ok_or_else(|| Error::BootDiskNotFound)?; + + // Configure a swap device of the configured size before other system + // setup. match config.swap_device_size_gb { Some(sz) if sz > 0 => { info!(log, "Requested swap device of size {} GiB", sz); - let boot_disk = - storage.resources().boot_disk().await.ok_or_else(|| { - crate::swap_device::SwapDeviceError::BootDiskNotFound - })?; crate::swap_device::ensure_swap_device( &parent_log, &boot_disk.1, @@ -293,6 +302,9 @@ impl SledAgent { } } + info!(log, "Mounting backing filesystems"); + crate::backing_fs::ensure_backing_fs(&parent_log, &boot_disk.1)?; + // Ensure we have a thread that automatically reaps process contracts // when they become empty. See the comments in // illumos-utils/src/running_zone.rs for more detail. diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index bd71371396..c31a4dc0bc 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -417,6 +417,7 @@ impl StorageWorker { do_format, encryption_details, size_details, + None, )?; // Ensure the dataset has a usable UUID. if let Ok(id_str) = Zfs::get_oxide_value(&fs_name, "uuid") { diff --git a/sled-agent/src/swap_device.rs b/sled-agent/src/swap_device.rs index 5a8f40adbd..6a00b42672 100644 --- a/sled-agent/src/swap_device.rs +++ b/sled-agent/src/swap_device.rs @@ -9,9 +9,6 @@ use zeroize::Zeroize; #[derive(Debug, thiserror::Error)] pub enum SwapDeviceError { - #[error("Could not find boot disk")] - BootDiskNotFound, - #[error("Error running ZFS command: {0}")] Zfs(illumos_utils::ExecutionError), diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs index aec99ae3f8..e3078cbeea 100644 --- a/sled-hardware/src/disk.rs +++ b/sled-hardware/src/disk.rs @@ -256,6 +256,7 @@ pub const CRASH_DATASET: &'static str = "crash"; pub const CLUSTER_DATASET: &'static str = "cluster"; pub const CONFIG_DATASET: &'static str = "config"; pub const M2_DEBUG_DATASET: &'static str = "debug"; +pub const M2_BACKING_DATASET: &'static str = "backing"; // TODO-correctness: This value of 100GiB is a pretty wild guess, and should be // tuned as needed. pub const DEBUG_DATASET_QUOTA: usize = 100 * (1 << 30); @@ -282,7 +283,7 @@ static U2_EXPECTED_DATASETS: [ExpectedDataset; U2_EXPECTED_DATASET_COUNT] = [ .compression(DUMP_DATASET_COMPRESSION), ]; -const M2_EXPECTED_DATASET_COUNT: usize = 5; +const M2_EXPECTED_DATASET_COUNT: usize = 6; static M2_EXPECTED_DATASETS: [ExpectedDataset; M2_EXPECTED_DATASET_COUNT] = [ // Stores software images. // @@ -290,7 +291,11 @@ static M2_EXPECTED_DATASETS: [ExpectedDataset; M2_EXPECTED_DATASET_COUNT] = [ ExpectedDataset::new(INSTALL_DATASET), // Stores crash dumps. ExpectedDataset::new(CRASH_DATASET), - // Stores cluter configuration information. + // Backing store for OS data that should be persisted across reboots. + // Its children are selectively overlay mounted onto parts of the ramdisk + // root. + ExpectedDataset::new(M2_BACKING_DATASET), + // Stores cluster configuration information. // // Should be duplicated to both M.2s. ExpectedDataset::new(CLUSTER_DATASET), @@ -524,6 +529,7 @@ impl Disk { do_format, Some(encryption_details), None, + None, ); keyfile.zero_and_unlink().await.map_err(|error| { @@ -562,8 +568,8 @@ impl Disk { "Automatically destroying dataset: {}", name ); Zfs::destroy_dataset(name).or_else(|err| { - // If we can't find the dataset, that's fine -- it might - // not have been formatted yet. + // If we can't find the dataset, that's fine -- it + // might not have been formatted yet. if let DestroyDatasetErrorVariant::NotFound = err.err { @@ -588,6 +594,7 @@ impl Disk { do_format, encryption_details, size_details, + None, )?; if dataset.wipe { diff --git a/test-utils/Cargo.toml b/test-utils/Cargo.toml index 9e21f3ca12..7b1f70c79e 100644 --- a/test-utils/Cargo.toml +++ b/test-utils/Cargo.toml @@ -6,20 +6,26 @@ license = "MPL-2.0" [dependencies] anyhow.workspace = true +atomicwrites.workspace = true camino.workspace = true +camino-tempfile.workspace = true dropshot.workspace = true +filetime = { workspace = true, optional = true } futures.workspace = true headers.workspace = true +hex.workspace = true http.workspace = true libc.workspace = true omicron-common.workspace = true pem.workspace = true +ring.workspace = true rustls.workspace = true slog.workspace = true subprocess.workspace = true tempfile.workspace = true thiserror.workspace = true -tokio = { workspace = true, features = [ "full" ] } +tar.workspace = true +tokio = { workspace = true, features = ["full"] } tokio-postgres.workspace = true usdt.workspace = true rcgen.workspace = true @@ -29,3 +35,6 @@ omicron-workspace-hack.workspace = true [dev-dependencies] expectorate.workspace = true + +[features] +seed-gen = ["dep:filetime"] diff --git a/test-utils/src/dev/mod.rs b/test-utils/src/dev/mod.rs index ea95a1de76..dbd66fe1f8 100644 --- a/test-utils/src/dev/mod.rs +++ b/test-utils/src/dev/mod.rs @@ -9,55 +9,21 @@ pub mod clickhouse; pub mod db; pub mod dendrite; pub mod poll; +#[cfg(feature = "seed-gen")] +pub mod seed; pub mod test_cmds; -use anyhow::Context; -use camino::Utf8Path; +use anyhow::{Context, Result}; use camino::Utf8PathBuf; pub use dropshot::test_util::LogContext; use dropshot::ConfigLogging; use dropshot::ConfigLoggingIfExists; use dropshot::ConfigLoggingLevel; use slog::Logger; -use std::path::Path; +use std::io::BufReader; -// Helper for copying all the files in one directory to another. -fn copy_dir( - src: impl AsRef, - dst: impl AsRef, -) -> Result<(), anyhow::Error> { - let src = src.as_ref(); - let dst = dst.as_ref(); - std::fs::create_dir_all(&dst) - .with_context(|| format!("Failed to create dst {}", dst.display()))?; - for entry in std::fs::read_dir(src) - .with_context(|| format!("Failed to read_dir {}", src.display()))? - { - let entry = entry.with_context(|| { - format!("Failed to read entry in {}", src.display()) - })?; - let ty = entry.file_type().context("Failed to access file type")?; - let target = dst.join(entry.file_name()); - if ty.is_dir() { - copy_dir(entry.path(), &target).with_context(|| { - format!( - "Failed to copy subdirectory {} to {}", - entry.path().display(), - target.display() - ) - })?; - } else { - std::fs::copy(entry.path(), &target).with_context(|| { - format!( - "Failed to copy file at {} to {}", - entry.path().display(), - target.display() - ) - })?; - } - } - Ok(()) -} +/// The environment variable via which the path to the seed tarball is passed. +pub static CRDB_SEED_TAR_ENV: &str = "CRDB_SEED_TAR"; /// Set up a [`dropshot::test_util::LogContext`] appropriate for a test named /// `test_name` @@ -80,36 +46,9 @@ pub enum StorageSource { DoNotPopulate, /// Populate the latest version of the database. PopulateLatest { output_dir: Utf8PathBuf }, - /// Copy the database from a seed directory, which has previously + /// Copy the database from a seed tarball, which has previously /// been created with `PopulateLatest`. - CopyFromSeed { input_dir: Utf8PathBuf }, -} - -/// Creates a [`db::CockroachInstance`] with a populated storage directory. -/// -/// This is intended to optimize subsequent calls to [`test_setup_database`] -/// by reducing the latency of populating the storage directory. -pub async fn test_setup_database_seed(log: &Logger, dir: &Utf8Path) { - let _ = std::fs::remove_dir_all(dir); - std::fs::create_dir_all(dir).unwrap(); - let mut db = setup_database( - log, - StorageSource::PopulateLatest { output_dir: dir.to_owned() }, - ) - .await; - db.cleanup().await.unwrap(); - - // See https://github.com/cockroachdb/cockroach/issues/74231 for context on - // this. We use this assertion to check that our seed directory won't point - // back to itself, even if it is copied elsewhere. - assert_eq!( - 0, - dir.join("temp-dirs-record.txt") - .metadata() - .expect("Cannot access metadata") - .len(), - "Temporary directory record should be empty after graceful shutdown", - ); + CopyFromSeed { input_tar: Utf8PathBuf }, } /// Set up a [`db::CockroachInstance`] for running tests. @@ -118,13 +57,15 @@ pub async fn test_setup_database( source: StorageSource, ) -> db::CockroachInstance { usdt::register_probes().expect("Failed to register USDT DTrace probes"); - setup_database(log, source).await + setup_database(log, source).await.unwrap() } +// TODO: switch to anyhow entirely -- this function is currently a mishmash of +// anyhow and unwrap/expect calls. async fn setup_database( log: &Logger, storage_source: StorageSource, -) -> db::CockroachInstance { +) -> Result { let builder = db::CockroachStarterBuilder::new(); let mut builder = match &storage_source { StorageSource::DoNotPopulate | StorageSource::CopyFromSeed { .. } => { @@ -135,7 +76,7 @@ async fn setup_database( } }; builder.redirect_stdio_to_files(); - let starter = builder.build().unwrap(); + let starter = builder.build().context("error building CockroachStarter")?; info!( &log, "cockroach temporary directory: {}", @@ -147,13 +88,22 @@ async fn setup_database( match &storage_source { StorageSource::DoNotPopulate | StorageSource::PopulateLatest { .. } => { } - StorageSource::CopyFromSeed { input_dir } => { + StorageSource::CopyFromSeed { input_tar } => { info!(&log, - "cockroach: copying from seed directory ({}) to storage directory ({})", - input_dir, starter.store_dir().to_string_lossy(), + "cockroach: copying from seed tarball ({}) to storage directory ({})", + input_tar, starter.store_dir().to_string_lossy(), ); - copy_dir(input_dir, starter.store_dir()) - .expect("Cannot copy storage from seed directory"); + let reader = std::fs::File::open(input_tar).with_context(|| { + format!("cannot open input tar {}", input_tar) + })?; + let mut tar = tar::Archive::new(BufReader::new(reader)); + tar.unpack(starter.store_dir()).with_context(|| { + format!( + "cannot unpack input tar {} into {}", + input_tar, + starter.store_dir().display() + ) + })?; } } @@ -184,7 +134,8 @@ async fn setup_database( info!(&log, "cockroach: populated"); } } - database + + Ok(database) } /// Returns whether the given process is currently running diff --git a/test-utils/src/dev/seed.rs b/test-utils/src/dev/seed.rs new file mode 100644 index 0000000000..841ecd5f35 --- /dev/null +++ b/test-utils/src/dev/seed.rs @@ -0,0 +1,239 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::io::{BufWriter, Write}; + +use anyhow::{ensure, Context, Result}; +use camino::{Utf8Path, Utf8PathBuf}; +use filetime::FileTime; +use slog::Logger; + +use super::CRDB_SEED_TAR_ENV; + +/// Creates a string identifier for the current DB schema and version. +// +/// The goal here is to allow to create different "seed" tarballs +/// for each revision of the DB. +pub fn digest_unique_to_schema() -> String { + let schema = include_str!("../../../schema/crdb/dbinit.sql"); + let crdb_version = include_str!("../../../tools/cockroachdb_version"); + let mut ctx = ring::digest::Context::new(&ring::digest::SHA256); + ctx.update(&schema.as_bytes()); + ctx.update(&crdb_version.as_bytes()); + let digest = ctx.finish(); + hex::encode(digest.as_ref()) +} + +/// Looks up the standard environment variable `CRDB_SEED_INVALIDATE` to check +/// if a seed should be invalidated. Returns a string to pass in as the +/// `why_invalidate` argument of [`ensure_seed_tarball_exists`]. +pub fn should_invalidate_seed() -> Option<&'static str> { + (std::env::var("CRDB_SEED_INVALIDATE").as_deref() == Ok("1")) + .then_some("CRDB_SEED_INVALIDATE=1 set in environment") +} + +/// The return value of [`ensure_seed_tarball_exists`]. +#[derive(Clone, Copy, Debug)] +pub enum SeedTarballStatus { + Created, + Invalidated, + Existing, +} + +impl SeedTarballStatus { + pub fn log(self, log: &Logger, seed_tar: &Utf8Path) { + match self { + SeedTarballStatus::Created => { + info!(log, "Created CRDB seed tarball: `{seed_tar}`"); + } + SeedTarballStatus::Invalidated => { + info!( + log, + "Invalidated and created new CRDB seed tarball: `{seed_tar}`", + ); + } + SeedTarballStatus::Existing => { + info!(log, "Using existing CRDB seed tarball: `{seed_tar}`"); + } + } + } +} + +/// Ensures that a seed tarball corresponding to the schema returned by +/// [`digest_unique_to_schema`] exists, recreating it if necessary. +/// +/// This used to create a directory rather than a tarball, but that was changed +/// due to [Omicron issue +/// #4193](https://github.com/oxidecomputer/omicron/issues/4193). +/// +/// If `why_invalidate` is `Some`, then if the seed tarball exists, it will be +/// deleted before being recreated. +/// +/// # Notes +/// +/// This method should _not_ be used by tests. Instead, rely on the `crdb-seed` +/// setup script. +pub async fn ensure_seed_tarball_exists( + log: &Logger, + why_invalidate: Option<&str>, +) -> Result<(Utf8PathBuf, SeedTarballStatus)> { + // If the CRDB_SEED_TAR_ENV variable is set, return an error. + // + // Even though this module is gated behind a feature flag, omicron-dev needs + // this function -- and so, if you're doing a top-level `cargo nextest run` + // like CI does, feature unification would mean this gets included in test + // binaries anyway. So this acts as a belt-and-suspenders check. + if let Ok(val) = std::env::var(CRDB_SEED_TAR_ENV) { + anyhow::bail!( + "{CRDB_SEED_TAR_ENV} is set to `{val}` -- implying that a test called \ + ensure_seed_tarball_exists. Instead, tests should rely on the `crdb-seed` \ + setup script." + ); + } + + // XXX: we aren't considering cross-user permissions for this file. Might be + // worth setting more restrictive permissions on it, or using a per-user + // cache dir. + let base_seed_dir = Utf8PathBuf::from_path_buf(std::env::temp_dir()) + .expect("Not a UTF-8 path") + .join("crdb-base"); + std::fs::create_dir_all(&base_seed_dir).unwrap(); + let mut desired_seed_tar = base_seed_dir.join(digest_unique_to_schema()); + desired_seed_tar.set_extension("tar"); + + let invalidated = match (desired_seed_tar.exists(), why_invalidate) { + (true, Some(why)) => { + slog::info!( + log, + "{why}: invalidating seed tarball: `{desired_seed_tar}`", + ); + std::fs::remove_file(&desired_seed_tar) + .context("failed to remove seed tarball")?; + true + } + (true, None) => { + // The tarball exists. Update its atime and mtime (i.e. `touch` it) + // to ensure that it doesn't get deleted by a /tmp cleaner. + let now = FileTime::now(); + filetime::set_file_times(&desired_seed_tar, now, now) + .context("failed to update seed tarball atime and mtime")?; + return Ok((desired_seed_tar, SeedTarballStatus::Existing)); + } + (false, Some(why)) => { + slog::info!( + log, + "{why}, but seed tarball does not exist: `{desired_seed_tar}`", + ); + false + } + (false, None) => { + // The tarball doesn't exist. + false + } + }; + + // At this point the tarball does not exist (either because it didn't exist + // in the first place or because it was deleted above), so try to create it. + // + // Nextest will execute this function just once via the `crdb-seed` binary, + // but it is possible for a user to start up multiple nextest processes to + // be running at the same time. So we should consider it possible for + // another caller to create this seed tarball before we finish setting it up + // ourselves. + test_setup_database_seed(log, &desired_seed_tar) + .await + .context("failed to setup seed tarball")?; + + let status = if invalidated { + SeedTarballStatus::Invalidated + } else { + SeedTarballStatus::Created + }; + Ok((desired_seed_tar, status)) +} + +/// Creates a seed file for a Cockroach database at the output tarball. +/// +/// This is intended to optimize subsequent calls to +/// [`test_setup_database`](super::test_setup_database) by reducing the latency +/// of populating the storage directory. +pub async fn test_setup_database_seed( + log: &Logger, + output_tar: &Utf8Path, +) -> Result<()> { + let base_seed_dir = output_tar.parent().unwrap(); + let tmp_seed_dir = camino_tempfile::Utf8TempDir::new_in(base_seed_dir) + .context("failed to create temporary seed directory")?; + + let mut db = super::setup_database( + log, + super::StorageSource::PopulateLatest { + output_dir: tmp_seed_dir.path().to_owned(), + }, + ) + .await + .context("failed to setup database")?; + db.cleanup().await.context("failed to cleanup database")?; + + // See https://github.com/cockroachdb/cockroach/issues/74231 for context on + // this. We use this assertion to check that our seed directory won't point + // back to itself, even if it is copied elsewhere. + let dirs_record_path = tmp_seed_dir.path().join("temp-dirs-record.txt"); + let dirs_record_len = dirs_record_path + .metadata() + .with_context(|| { + format!("cannot access metadata for {dirs_record_path}") + })? + .len(); + ensure!( + dirs_record_len == 0, + "Temporary directory record should be empty (was {dirs_record_len}) \ + after graceful shutdown", + ); + + let output_tar = output_tar.to_owned(); + + tokio::task::spawn_blocking(move || { + // Tar up the directory -- this prevents issues where some but not all of + // the files get cleaned up by /tmp cleaners. See + // https://github.com/oxidecomputer/omicron/issues/4193. + let atomic_file = atomicwrites::AtomicFile::new( + &output_tar, + // We don't expect this to exist, but if it does, we want to overwrite + // it. That is because there's a remote possibility that multiple + // instances of test_setup_database_seed are running simultaneously. + atomicwrites::OverwriteBehavior::AllowOverwrite, + ); + let res = atomic_file.write(|f| { + // Tar up the directory here. + let writer = BufWriter::new(f); + let mut tar = tar::Builder::new(writer); + tar.follow_symlinks(false); + tar.append_dir_all(".", tmp_seed_dir.path()).with_context( + || { + format!( + "failed to append directory `{}` to tarball", + tmp_seed_dir.path(), + ) + }, + )?; + + let mut writer = + tar.into_inner().context("failed to finish writing tarball")?; + writer.flush().context("failed to flush tarball")?; + + Ok::<_, anyhow::Error>(()) + }); + match res { + Ok(()) => Ok(()), + Err(atomicwrites::Error::Internal(error)) => Err(error) + .with_context(|| { + format!("failed to write seed tarball: `{}`", output_tar) + }), + Err(atomicwrites::Error::User(error)) => Err(error), + } + }) + .await + .context("error in task to tar up contents")? +} diff --git a/tools/ci_download_softnpu_machinery b/tools/ci_download_softnpu_machinery index d37d428476..7975a310f0 100755 --- a/tools/ci_download_softnpu_machinery +++ b/tools/ci_download_softnpu_machinery @@ -15,7 +15,7 @@ OUT_DIR="out/npuzone" # Pinned commit for softnpu ASIC simulator SOFTNPU_REPO="softnpu" -SOFTNPU_COMMIT="41b3a67b3d44f51528816ff8e539b4001df48305" +SOFTNPU_COMMIT="eb27e6a00f1082c9faac7cf997e57d0609f7a309" # This is the softnpu ASIC simulator echo "fetching npuzone" diff --git a/tools/create_virtual_hardware.sh b/tools/create_virtual_hardware.sh index dd6d9af9dd..95c2aa63df 100755 --- a/tools/create_virtual_hardware.sh +++ b/tools/create_virtual_hardware.sh @@ -37,7 +37,7 @@ function ensure_simulated_links { dladm create-simnet -t "net$I" dladm create-simnet -t "sc${I}_0" dladm modify-simnet -t -p "net$I" "sc${I}_0" - dladm set-linkprop -p mtu=1600 "sc${I}_0" # encap headroom + dladm set-linkprop -p mtu=9000 "sc${I}_0" # match emulated devices fi success "Simnet net$I/sc${I}_0 exists" done diff --git a/tools/destroy_virtual_hardware.sh b/tools/destroy_virtual_hardware.sh index ae6fef0673..46c6f117c4 100755 --- a/tools/destroy_virtual_hardware.sh +++ b/tools/destroy_virtual_hardware.sh @@ -56,7 +56,23 @@ function remove_softnpu_zone { --ports sc0_1,tfportqsfp0_0 } +# Some services have their working data overlaid by backing mounts from the +# internal boot disk. Before we can destroy the ZFS pools, we need to unmount +# these. + +BACKED_SERVICES="svc:/system/fmd:default" + +function demount_backingfs { + svcadm disable -st $BACKED_SERVICES + zpool list -Hpo name | grep '^oxi_' \ + | xargs -i zfs list -Hpo name,canmount,mounted -r {}/backing \ + | awk '$3 == "yes" && $2 == "noauto" { print $1 }' \ + | xargs -l zfs umount + svcadm enable -st $BACKED_SERVICES +} + verify_omicron_uninstalled +demount_backingfs unload_xde_driver remove_softnpu_zone try_remove_vnics diff --git a/tools/renovate-post-upgrade.sh b/tools/renovate-post-upgrade.sh new file mode 100755 index 0000000000..2699f9f6a0 --- /dev/null +++ b/tools/renovate-post-upgrade.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +# This script is run after Renovate upgrades dependencies or lock files. + +set -euo pipefail + +# Function to retry a command up to 3 times. +function retry_command { + local retries=3 + local delay=5 + local count=0 + until "$@"; do + exit_code=$? + count=$((count+1)) + if [ $count -lt $retries ]; then + echo "Command failed with exit code $exit_code. Retrying in $delay seconds..." + sleep $delay + else + echo "Command failed with exit code $exit_code after $count attempts." + return $exit_code + fi + done +} + +# If cargo isn't present, skip this -- it implies that a non-Rust dependency was +# updated. +if ! command -v cargo &> /dev/null; then + echo "Skipping cargo-hakari update because cargo is not present." + exit 0 +fi + +# Download and install cargo-hakari if it is not already installed. +if ! command -v cargo-hakari &> /dev/null; then + # Need cargo-binstall to install cargo-hakari. + if ! command -v cargo-binstall &> /dev/null; then + # Fetch cargo binstall. + echo "Installing cargo-binstall..." + curl --retry 3 -L --proto '=https' --tlsv1.2 -sSfO https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh + retry_command bash install-from-binstall-release.sh + fi + + # Install cargo-hakari. + echo "Installing cargo-hakari..." + retry_command cargo binstall cargo-hakari --no-confirm +fi + +# Run cargo hakari to regenerate the workspace-hack file. +echo "Running cargo-hakari..." +cargo hakari generate diff --git a/update-engine/src/buffer.rs b/update-engine/src/buffer.rs index 3de0e45f24..1779ef7da6 100644 --- a/update-engine/src/buffer.rs +++ b/update-engine/src/buffer.rs @@ -1389,7 +1389,10 @@ mod tests { test_cx .run_filtered_test( "all events passed in", - |buffer, event| buffer.add_event(event.clone()), + |buffer, event| { + buffer.add_event(event.clone()); + true + }, WithDeltas::No, ) .unwrap(); @@ -1397,10 +1400,12 @@ mod tests { test_cx .run_filtered_test( "progress events skipped", - |buffer, event| { - if let Event::Step(event) = event { + |buffer, event| match event { + Event::Step(event) => { buffer.add_step_event(event.clone()); + true } + Event::Progress(_) => false, }, WithDeltas::Both, ) @@ -1410,13 +1415,16 @@ mod tests { .run_filtered_test( "low-priority events skipped", |buffer, event| match event { - Event::Step(event) => { - if event.kind.priority() == StepEventPriority::Low { + Event::Step(event) => match event.kind.priority() { + StepEventPriority::High => { buffer.add_step_event(event.clone()); + true } - } + StepEventPriority::Low => false, + }, Event::Progress(event) => { buffer.add_progress_event(event.clone()); + true } }, WithDeltas::Both, @@ -1427,13 +1435,16 @@ mod tests { .run_filtered_test( "low-priority and progress events skipped", |buffer, event| match event { - Event::Step(event) => { - if event.kind.priority() == StepEventPriority::Low { + Event::Step(event) => match event.kind.priority() { + StepEventPriority::High => { buffer.add_step_event(event.clone()); + true } - } + StepEventPriority::Low => false, + }, Event::Progress(_) => { - // Don't add progress events either. + // Don't add progress events. + false } }, WithDeltas::Both, @@ -1565,7 +1576,10 @@ mod tests { fn run_filtered_test( &self, event_fn_description: &str, - mut event_fn: impl FnMut(&mut EventBuffer, &Event), + mut event_fn: impl FnMut( + &mut EventBuffer, + &Event, + ) -> bool, with_deltas: WithDeltas, ) -> anyhow::Result<()> { match with_deltas { @@ -1590,7 +1604,10 @@ mod tests { fn run_filtered_test_inner( &self, - mut event_fn: impl FnMut(&mut EventBuffer, &Event), + mut event_fn: impl FnMut( + &mut EventBuffer, + &Event, + ) -> bool, with_deltas: bool, ) -> anyhow::Result<()> { let description = format!("with deltas = {with_deltas}"); @@ -1608,8 +1625,9 @@ mod tests { let mut last_seen_opt = with_deltas.then_some(None); for (i, event) in self.generated_events.iter().enumerate() { - (event_fn)(&mut buffer, event); - buffer.add_event(event.clone()); + // Going to use event_added in an upcoming commit. + let _event_added = (event_fn)(&mut buffer, event); + let report = match last_seen_opt { Some(last_seen) => buffer.generate_report_since(last_seen), None => buffer.generate_report(), diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 8854ef27bc..a91477678b 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -73,7 +73,7 @@ petgraph = { version = "0.6.4", features = ["serde-1"] } phf_shared = { version = "0.11.2" } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } -predicates = { version = "3.0.3" } +predicates = { version = "3.0.4" } rand = { version = "0.8.5", features = ["min_const_gen", "small_rng"] } rand_chacha = { version = "0.3.1" } regex = { version = "1.9.5" } @@ -171,7 +171,7 @@ petgraph = { version = "0.6.4", features = ["serde-1"] } phf_shared = { version = "0.11.2" } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } -predicates = { version = "3.0.3" } +predicates = { version = "3.0.4" } rand = { version = "0.8.5", features = ["min_const_gen", "small_rng"] } rand_chacha = { version = "0.3.1" } regex = { version = "1.9.5" } @@ -215,49 +215,56 @@ bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-f hyper-rustls = { version = "0.24.1" } mio = { version = "0.8.8", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.9", features = ["fs", "termios"] } +rustix-d736d0ac4424f0f1 = { package = "rustix", version = "0.37.23", features = ["fs", "termios"] } [target.x86_64-unknown-linux-gnu.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.1" } mio = { version = "0.8.8", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.9", features = ["fs", "termios"] } +rustix-d736d0ac4424f0f1 = { package = "rustix", version = "0.37.23", features = ["fs", "termios"] } [target.x86_64-apple-darwin.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.1" } mio = { version = "0.8.8", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.9", features = ["fs", "termios"] } +rustix-d736d0ac4424f0f1 = { package = "rustix", version = "0.37.23", features = ["fs", "termios"] } [target.x86_64-apple-darwin.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.1" } mio = { version = "0.8.8", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.9", features = ["fs", "termios"] } +rustix-d736d0ac4424f0f1 = { package = "rustix", version = "0.37.23", features = ["fs", "termios"] } [target.aarch64-apple-darwin.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.1" } mio = { version = "0.8.8", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.9", features = ["fs", "termios"] } +rustix-d736d0ac4424f0f1 = { package = "rustix", version = "0.37.23", features = ["fs", "termios"] } [target.aarch64-apple-darwin.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.1" } mio = { version = "0.8.8", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.9", features = ["fs", "termios"] } +rustix-d736d0ac4424f0f1 = { package = "rustix", version = "0.37.23", features = ["fs", "termios"] } [target.x86_64-unknown-illumos.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.1" } mio = { version = "0.8.8", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.9", features = ["fs", "termios"] } +rustix-d736d0ac4424f0f1 = { package = "rustix", version = "0.37.23", features = ["fs", "termios"] } toml_datetime = { version = "0.6.3", default-features = false, features = ["serde"] } toml_edit = { version = "0.19.15", features = ["serde"] } @@ -266,7 +273,8 @@ bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-f hyper-rustls = { version = "0.24.1" } mio = { version = "0.8.8", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.9", features = ["fs", "termios"] } +rustix-d736d0ac4424f0f1 = { package = "rustix", version = "0.37.23", features = ["fs", "termios"] } toml_datetime = { version = "0.6.3", default-features = false, features = ["serde"] } toml_edit = { version = "0.19.15", features = ["serde"] }